Commit 5989bfe6ac6b ("block: restore two stage elevator switch while running nr_hw_queue update") reintroduced a lockdep warning by calling blk_mq_freeze_queue_nomemsave() before switching the I/O scheduler. The function blk_mq_elv_switch_none() calls elevator_change_done(). Running this while the queue is frozen causes a lockdep warning. Fix this by reordering the operations: first, switch the I/O scheduler to 'none', and then freeze the queue. This ensures that elevator_change_done() is not called on an already frozen queue. And this way is safe because elevator_set_none() does freeze queue before switching to none. Also we still have to rely on blk_mq_elv_switch_back() for switching back, and it has to cover unfrozen queue case. Cc: Nilay Shroff <nilay@xxxxxxxxxxxxx> Cc: Yu Kuai <yukuai3@xxxxxxxxxx> Fixes: 5989bfe6ac6b ("block: restore two stage elevator switch while running nr_hw_queue update") Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- block/blk-mq.c | 13 +++++++------ block/blk.h | 2 +- block/elevator.c | 12 +++++++++--- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index b67d6c02eceb..9c62781c6b8c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4974,13 +4974,13 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) * Switch back to the elevator type stored in the xarray. */ static void blk_mq_elv_switch_back(struct request_queue *q, - struct xarray *elv_tbl, struct xarray *et_tbl) + struct xarray *elv_tbl, struct xarray *et_tbl, bool frozen) { struct elevator_type *e = xa_load(elv_tbl, q->id); struct elevator_tags *t = xa_load(et_tbl, q->id); /* The elv_update_nr_hw_queues unfreezes the queue. */ - elv_update_nr_hw_queues(q, e, t); + elv_update_nr_hw_queues(q, e, t, frozen); /* Drop the reference acquired in blk_mq_elv_switch_none. */ if (e) @@ -5033,6 +5033,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, unsigned int memflags; int i; struct xarray elv_tbl, et_tbl; + bool queues_frozen = false; lockdep_assert_held(&set->tag_list_lock); @@ -5056,9 +5057,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, blk_mq_sysfs_unregister_hctxs(q); } - list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_freeze_queue_nomemsave(q); - /* * Switch IO scheduler to 'none', cleaning up the data associated * with the previous scheduler. We will switch back once we are done @@ -5068,6 +5066,9 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (blk_mq_elv_switch_none(q, &elv_tbl)) goto switch_back; + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_freeze_queue_nomemsave(q); + queues_frozen = true; if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) goto switch_back; @@ -5092,7 +5093,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, switch_back: /* The blk_mq_elv_switch_back unfreezes queue for us. */ list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl); + blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl, queues_frozen); list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_sysfs_register_hctxs(q); diff --git a/block/blk.h b/block/blk.h index 0a2eccf28ca4..601db258c00d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -332,7 +332,7 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list, bool blk_insert_flush(struct request *rq); void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e, - struct elevator_tags *t); + struct elevator_tags *t, bool frozen); void elevator_set_default(struct request_queue *q); void elevator_set_none(struct request_queue *q); diff --git a/block/elevator.c b/block/elevator.c index fe96c6f4753c..0644b2d35ecb 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -706,24 +706,30 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx) * reattachment when nr_hw_queues changes. */ void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e, - struct elevator_tags *t) + struct elevator_tags *t, bool frozen) { struct blk_mq_tag_set *set = q->tag_set; struct elv_change_ctx ctx = {}; int ret = -ENODEV; - WARN_ON_ONCE(q->mq_freeze_depth == 0); + WARN_ON_ONCE(frozen == (q->mq_freeze_depth == 0)); if (e && !blk_queue_dying(q) && blk_queue_registered(q)) { ctx.name = e->elevator_name; ctx.et = t; + /* elevator switch requires queue to be frozen */ + if (!frozen) { + blk_mq_freeze_queue_nomemsave(q); + frozen = true; + } mutex_lock(&q->elevator_lock); /* force to reattach elevator after nr_hw_queue is updated */ ret = elevator_switch(q, &ctx); mutex_unlock(&q->elevator_lock); } - blk_mq_unfreeze_queue_nomemrestore(q); + if (frozen) + blk_mq_unfreeze_queue_nomemrestore(q); if (!ret) WARN_ON_ONCE(elevator_change_done(q, &ctx)); /* -- 2.50.1