Some io hang problems are caused by miss wakeup, and these cases could be mitigated if io_schedule could be replaced with io_schedule_timeout. By default, the io_schedule is still executed, and in the case of problems, a workaround solution can be turned on by modify io_schedule_timeout_msecs. Signed-off-by: Fengnan Chang <changfengnan@xxxxxxxxxxxxx> --- block/blk-core.c | 33 +++++++++++++++++++++++++++++++++ block/blk-iocost.c | 2 +- block/blk-mq-tag.c | 2 +- block/blk-rq-qos.c | 2 +- include/linux/blkdev.h | 2 ++ 5 files changed, 38 insertions(+), 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index fdac48aec5ef..722a32f98b95 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -62,6 +62,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_insert); static DEFINE_IDA(blk_queue_ida); +static unsigned long __read_mostly sysctl_io_schedule_timeout_msecs; + /* * For queue allocation */ @@ -72,6 +74,18 @@ static struct kmem_cache *blk_requestq_cachep; */ static struct workqueue_struct *kblockd_workqueue; +#ifdef CONFIG_SYSCTL +static const struct ctl_table kernel_io_schedule_timeout_table[] = { + { + .procname = "io_schedule_timeout_msecs", + .data = &sysctl_io_schedule_timeout_msecs, + .maxlen = sizeof(sysctl_io_schedule_timeout_msecs), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, +}; +#endif + /** * blk_queue_flag_set - atomically set a queue flag * @flag: flag to be set @@ -1250,6 +1264,21 @@ void blk_finish_plug(struct blk_plug *plug) } EXPORT_SYMBOL(blk_finish_plug); +/** + * Maybe it can be integrated into blk_io_schedule? + */ +void blk_io_schedule_timeout(void) +{ + /* Prevent hang_check timer from firing at us during very long I/O */ + unsigned long timeout = msecs_to_jiffies(sysctl_io_schedule_timeout_msecs); + + if (timeout) + io_schedule_timeout(timeout); + else + io_schedule(); +} +EXPORT_SYMBOL_GPL(blk_io_schedule_timeout); + void blk_io_schedule(void) { /* Prevent hang_check timer from firing at us during very long I/O */ @@ -1280,5 +1309,9 @@ int __init blk_dev_init(void) blk_debugfs_root = debugfs_create_dir("block", NULL); +#ifdef CONFIG_SYSCTL + register_sysctl_init("kernel", kernel_io_schedule_timeout_table); +#endif + return 0; } diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 5bfd70311359..56aac7ac7a71 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2732,7 +2732,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) set_current_state(TASK_UNINTERRUPTIBLE); if (wait.committed) break; - io_schedule(); + blk_io_schedule_timeout(); } /* waker already committed us, proceed */ diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index d880c50629d6..892704292005 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -185,7 +185,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) break; bt_prev = bt; - io_schedule(); + blk_io_schedule_timeout(); sbitmap_finish_wait(bt, ws, &wait); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 848591fb3c57..5c81e863c092 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -306,7 +306,7 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, do { if (data.got_token) break; - io_schedule(); + blk_io_schedule_timeout(); set_current_state(TASK_UNINTERRUPTIBLE); } while (1); finish_wait(&rqw->wait, &data.wq); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a59880c809c7..e58353c8523e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1170,6 +1170,8 @@ static inline long nr_blockdev_pages(void) extern void blk_io_schedule(void); +extern void blk_io_schedule_timeout(void); + int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask); int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, -- 2.39.2 (Apple Git-143)