Re: [PATCH] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

在 2025/08/15 15:56, Ming Lei 写道:
Commit 5989bfe6ac6b ("block: restore two stage elevator switch while
running nr_hw_queue update") reintroduced a lockdep warning by calling
blk_mq_freeze_queue_nomemsave() before switching the I/O scheduler.

The function blk_mq_elv_switch_none() calls elevator_change_done().
Running this while the queue is frozen causes a lockdep warning.

Fix this by reordering the operations: first, switch the I/O scheduler
to 'none', and then freeze the queue. This ensures that elevator_change_done()
is not called on an already frozen queue. And this way is safe because
elevator_set_none() does freeze queue before switching to none.

Also we still have to rely on blk_mq_elv_switch_back() for switching
back, and it has to cover unfrozen queue case.

Cc: Nilay Shroff <nilay@xxxxxxxxxxxxx>
Cc: Yu Kuai <yukuai3@xxxxxxxxxx>
Fixes: 5989bfe6ac6b ("block: restore two stage elevator switch while running nr_hw_queue update")
Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
---
  block/blk-mq.c   | 13 +++++++------
  block/blk.h      |  2 +-
  block/elevator.c | 12 +++++++++---
  3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index b67d6c02eceb..9c62781c6b8c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4974,13 +4974,13 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
   * Switch back to the elevator type stored in the xarray.
   */
  static void blk_mq_elv_switch_back(struct request_queue *q,
-		struct xarray *elv_tbl, struct xarray *et_tbl)
+		struct xarray *elv_tbl, struct xarray *et_tbl, bool frozen)
  {
  	struct elevator_type *e = xa_load(elv_tbl, q->id);
  	struct elevator_tags *t = xa_load(et_tbl, q->id);
/* The elv_update_nr_hw_queues unfreezes the queue. */
-	elv_update_nr_hw_queues(q, e, t);
+	elv_update_nr_hw_queues(q, e, t, frozen);
/* Drop the reference acquired in blk_mq_elv_switch_none. */
  	if (e)
@@ -5033,6 +5033,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
  	unsigned int memflags;
  	int i;
  	struct xarray elv_tbl, et_tbl;
+	bool queues_frozen = false;
lockdep_assert_held(&set->tag_list_lock); @@ -5056,9 +5057,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
  		blk_mq_sysfs_unregister_hctxs(q);
  	}
- list_for_each_entry(q, &set->tag_list, tag_set_list)
-		blk_mq_freeze_queue_nomemsave(q);
-
  	/*
  	 * Switch IO scheduler to 'none', cleaning up the data associated
  	 * with the previous scheduler. We will switch back once we are done
@@ -5068,6 +5066,9 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
  		if (blk_mq_elv_switch_none(q, &elv_tbl))
  			goto switch_back;
+ list_for_each_entry(q, &set->tag_list, tag_set_list)
+		blk_mq_freeze_queue_nomemsave(q);
+	queues_frozen = true;
  	if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0)
  		goto switch_back;
Will it be simpler if we move blk_mq_freeze_queue_nomemsave() into
blk_mq_elv_switch_none(), after elevator is succeed switching to none
then freeze the queue.

Later in blk_mq_elv_switch_back we'll know if xa_load() return valid
elevator_type, related queue is already freezed.

Thanks,
Kuai

@@ -5092,7 +5093,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
  switch_back:
  	/* The blk_mq_elv_switch_back unfreezes queue for us. */
  	list_for_each_entry(q, &set->tag_list, tag_set_list)
-		blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl);
+		blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl, queues_frozen);
list_for_each_entry(q, &set->tag_list, tag_set_list) {
  		blk_mq_sysfs_register_hctxs(q);
diff --git a/block/blk.h b/block/blk.h
index 0a2eccf28ca4..601db258c00d 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -332,7 +332,7 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
  bool blk_insert_flush(struct request *rq);
void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
-		struct elevator_tags *t);
+		struct elevator_tags *t, bool frozen);
  void elevator_set_default(struct request_queue *q);
  void elevator_set_none(struct request_queue *q);
diff --git a/block/elevator.c b/block/elevator.c
index fe96c6f4753c..0644b2d35ecb 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -706,24 +706,30 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
   * reattachment when nr_hw_queues changes.
   */
  void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
-		struct elevator_tags *t)
+		struct elevator_tags *t, bool frozen)
  {
  	struct blk_mq_tag_set *set = q->tag_set;
  	struct elv_change_ctx ctx = {};
  	int ret = -ENODEV;
- WARN_ON_ONCE(q->mq_freeze_depth == 0);
+	WARN_ON_ONCE(frozen == (q->mq_freeze_depth == 0));
if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
  		ctx.name = e->elevator_name;
  		ctx.et = t;
+ /* elevator switch requires queue to be frozen */
+		if (!frozen) {
+			blk_mq_freeze_queue_nomemsave(q);
+			frozen = true;
+		}
  		mutex_lock(&q->elevator_lock);
  		/* force to reattach elevator after nr_hw_queue is updated */
  		ret = elevator_switch(q, &ctx);
  		mutex_unlock(&q->elevator_lock);
  	}
-	blk_mq_unfreeze_queue_nomemrestore(q);
+	if (frozen)
+		blk_mq_unfreeze_queue_nomemrestore(q);
  	if (!ret)
  		WARN_ON_ONCE(elevator_change_done(q, &ctx));
  	/*






[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux