updating nr_hw_queues is usually used for error handling code, when it doesn't make sense to allow blk-mq elevator switching, since nr_hw_queues may change, and elevator tags depends on nr_hw_queues. Prevent elevator switch during updating nr_hw_queues by setting flag of BLK_MQ_F_UPDATE_HW_QUEUES, and use srcu to fail elevator switch during the period. Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- block/blk-mq-debugfs.c | 1 + block/blk-mq.c | 32 ++++++++++++++++++++------------ block/elevator.c | 12 +++++++++++- include/linux/blk-mq.h | 9 ++++++++- 4 files changed, 40 insertions(+), 14 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index c308699ded58..27f984311bb7 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -180,6 +180,7 @@ static const char *const hctx_flag_name[] = { HCTX_FLAG_NAME(BLOCKING), HCTX_FLAG_NAME(TAG_RR), HCTX_FLAG_NAME(NO_SCHED_BY_DEFAULT), + HCTX_FLAG_NAME(UPDATE_HW_QUEUES), }; #undef HCTX_FLAG_NAME diff --git a/block/blk-mq.c b/block/blk-mq.c index d7a103dc258b..c1e7e1823369 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4776,14 +4776,12 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids) set->nr_hw_queues = nr_cpu_ids; - if (set->flags & BLK_MQ_F_BLOCKING) { - set->srcu = kmalloc(sizeof(*set->srcu), GFP_KERNEL); - if (!set->srcu) - return -ENOMEM; - ret = init_srcu_struct(set->srcu); - if (ret) - goto out_free_srcu; - } + set->srcu = kmalloc(sizeof(*set->srcu), GFP_KERNEL); + if (!set->srcu) + return -ENOMEM; + ret = init_srcu_struct(set->srcu); + if (ret) + goto out_free_srcu; ret = -ENOMEM; set->tags = kcalloc_node(set->nr_hw_queues, @@ -4864,10 +4862,9 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) kfree(set->tags); set->tags = NULL; - if (set->flags & BLK_MQ_F_BLOCKING) { - cleanup_srcu_struct(set->srcu); - kfree(set->srcu); - } + + cleanup_srcu_struct(set->srcu); + kfree(set->srcu); } EXPORT_SYMBOL(blk_mq_free_tag_set); @@ -5081,7 +5078,18 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) { mutex_lock(&set->tag_list_lock); + /* + * Mark us in updating nr_hw_queues for preventing switching + * elevator + * + * Elevator switch code can _not_ acquire ->tag_list_lock + */ + set->flags |= BLK_MQ_F_UPDATE_HW_QUEUES; + synchronize_srcu(set->srcu); + __blk_mq_update_nr_hw_queues(set, nr_hw_queues); + + set->flags &= BLK_MQ_F_UPDATE_HW_QUEUES; mutex_unlock(&set->tag_list_lock); } EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); diff --git a/block/elevator.c b/block/elevator.c index cf48613c6e62..e50e04ed15a0 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -718,9 +718,10 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf, { char elevator_name[ELV_NAME_MAX]; char *name; - int ret; + int ret, idx; unsigned int memflags; struct request_queue *q = disk->queue; + struct blk_mq_tag_set *set = q->tag_set; /* * If the attribute needs to load a module, do it before freezing the @@ -732,6 +733,13 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf, elv_iosched_load_module(name); + idx = srcu_read_lock(set->srcu); + + if (set->flags & BLK_MQ_F_UPDATE_HW_QUEUES) { + ret = -EBUSY; + goto exit; + } + memflags = blk_mq_freeze_queue(q); mutex_lock(&q->elevator_lock); ret = elevator_change(q, name); @@ -739,6 +747,8 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf, ret = count; mutex_unlock(&q->elevator_lock); blk_mq_unfreeze_queue(q, memflags); +exit: + srcu_read_unlock(set->srcu, idx); return ret; } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8eb9b3310167..71e05245af9d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -681,7 +681,14 @@ enum { */ BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 6, - BLK_MQ_F_MAX = 1 << 7, + /* + * True when updating nr_hw_queues is in-progress + * + * tag_set only flag, not usable for hctx + */ + BLK_MQ_F_UPDATE_HW_QUEUES = 1 << 7, + + BLK_MQ_F_MAX = 1 << 8, }; #define BLK_MQ_MAX_DEPTH (10240) -- 2.44.0 --1HVwzvZ8lKiJPGhS--