Both adding/deleting disk code are reader of `nr_hw_queues`, so we can't allow them in-progress when updating nr_hw_queues, kernel panic and kasan has been reported in [1]. Prevent adding/deleting disk during updating nr_hw_queues by setting set->updating_nr_hwq, and use SRCU to fail & retry to add/delete disk. This way avoids lot of trouble. Reported-by: Nilay Shroff <nilay@xxxxxxxxxxxxx> Closes: https://lore.kernel.org/linux-block/a5896cdb-a59a-4a37-9f99-20522f5d2987@xxxxxxxxxxxxx/ Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- block/blk-mq.c | 22 +++++++++++++++++++++- block/genhd.c | 36 ++++++++++++++++++++++++++++++++---- include/linux/blk-mq.h | 5 +++++ 3 files changed, 58 insertions(+), 5 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 7cda919fafba..e1662617cc7a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4782,12 +4782,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) goto out_free_srcu; } + mutex_init(&set->update_nr_hwq_lock); + init_waitqueue_head(&set->update_nr_hwq_wq); + ret = init_srcu_struct(&set->update_nr_hwq_srcu); + if (ret) + goto out_cleanup_srcu; + ret = -ENOMEM; set->tags = kcalloc_node(set->nr_hw_queues, sizeof(struct blk_mq_tags *), GFP_KERNEL, set->numa_node); if (!set->tags) - goto out_cleanup_srcu; + goto out_cleanup_hwq_srcu; for (i = 0; i < set->nr_maps; i++) { set->map[i].mq_map = kcalloc_node(nr_cpu_ids, @@ -4816,6 +4822,8 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) } kfree(set->tags); set->tags = NULL; +out_cleanup_hwq_srcu: + cleanup_srcu_struct(&set->update_nr_hwq_srcu); out_cleanup_srcu: if (set->flags & BLK_MQ_F_BLOCKING) cleanup_srcu_struct(set->srcu); @@ -5077,9 +5085,21 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) { + mutex_lock(&set->update_nr_hwq_lock); + /* + * Mark us in updating nr_hw_queues for preventing reader of + * nr_hw_queues, such as adding/deleting disk. + */ + set->updating_nr_hwq = true; + synchronize_srcu(&set->update_nr_hwq_srcu); + mutex_lock(&set->tag_list_lock); __blk_mq_update_nr_hw_queues(set, nr_hw_queues); mutex_unlock(&set->tag_list_lock); + + set->updating_nr_hwq = false; + wake_up_all(&set->update_nr_hwq_wq); + mutex_unlock(&set->update_nr_hwq_lock); } EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); diff --git a/block/genhd.c b/block/genhd.c index 4370c5be1f34..d22fdc0d5383 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -396,6 +396,33 @@ int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode) return ret; } +static int retry_on_updating_nr_hwq(struct gendisk_data *data, + int (*cb)(struct gendisk_data *data)) +{ + struct gendisk *disk = data->disk; + struct blk_mq_tag_set *set; + + if (!queue_is_mq(disk->queue)) + return cb(data); + + set = disk->queue->tag_set; + do { + int idx, ret; + + idx = srcu_read_lock(&set->update_nr_hwq_srcu); + if (set->updating_nr_hwq) { + srcu_read_unlock(&set->update_nr_hwq_srcu, idx); + goto wait; + } + ret = cb(data); + srcu_read_unlock(&set->update_nr_hwq_srcu, idx); + return ret; + wait: + wait_event_interruptible(set->update_nr_hwq_wq, + !set->updating_nr_hwq); + } while (true); +} + static int __add_disk_fwnode(struct gendisk_data *data) { struct gendisk *disk = data->disk; @@ -589,7 +616,7 @@ int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk, .fwnode = fwnode, }; - return __add_disk_fwnode(&data); + return retry_on_updating_nr_hwq(&data, __add_disk_fwnode); } EXPORT_SYMBOL_GPL(add_disk_fwnode); @@ -671,7 +698,7 @@ void blk_mark_disk_dead(struct gendisk *disk) } EXPORT_SYMBOL_GPL(blk_mark_disk_dead); -static void __del_gendisk(struct gendisk_data *data) +static int __del_gendisk(struct gendisk_data *data) { struct gendisk *disk = data->disk; struct request_queue *q = disk->queue; @@ -682,7 +709,7 @@ static void __del_gendisk(struct gendisk_data *data) might_sleep(); if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN))) - return; + return 0; disk_del_events(disk); @@ -764,6 +791,7 @@ static void __del_gendisk(struct gendisk_data *data) if (start_drain) blk_unfreeze_release_lock(q); + return 0; } EXPORT_SYMBOL(del_gendisk); @@ -792,7 +820,7 @@ void del_gendisk(struct gendisk *disk) .disk = disk, }; - __del_gendisk(&data); + retry_on_updating_nr_hwq(&data, __del_gendisk); } /** diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8eb9b3310167..afe76dcfaa3c 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -527,6 +527,11 @@ struct blk_mq_tag_set { struct mutex tag_list_lock; struct list_head tag_list; struct srcu_struct *srcu; + + bool updating_nr_hwq; + struct mutex update_nr_hwq_lock; + struct srcu_struct update_nr_hwq_srcu; + wait_queue_head_t update_nr_hwq_wq; }; /** -- 2.47.0