Both adding/deleting disk code are reader of `nr_hw_queues`, so we can't allow them in-progress when updating nr_hw_queues, kernel panic and kasan has been reported in [1]. Prevent adding/deleting disk during updating nr_hw_queues by adding rw_semaphore to tagset, write lock is grabbed in blk_mq_update_nr_hw_queues(), and read lock is acquired when adding/deleting disk. Also mark GFP_NOIO allocation scope for adding/deleting disk because blk_mq_update_nr_hw_queues() is part of some driver's error handler. This way avoids lot of trouble. Suggested-by: Nilay Shroff <nilay@xxxxxxxxxxxxx> Reported-by: Nilay Shroff <nilay@xxxxxxxxxxxxx> Closes: https://lore.kernel.org/linux-block/a5896cdb-a59a-4a37-9f99-20522f5d2987@xxxxxxxxxxxxx/ Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- block/blk-mq.c | 4 ++ block/genhd.c | 94 +++++++++++++++++++++++++++++++----------- include/linux/blk-mq.h | 3 ++ 3 files changed, 78 insertions(+), 23 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 29cfc7ce2e0a..1ed2d183f912 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4802,6 +4802,8 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) goto out_free_srcu; } + init_rwsem(&set->update_nr_hwq_sema); + ret = -ENOMEM; set->tags = kcalloc_node(set->nr_hw_queues, sizeof(struct blk_mq_tags *), GFP_KERNEL, @@ -5097,9 +5099,11 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) { + down_write(&set->update_nr_hwq_sema); mutex_lock(&set->tag_list_lock); __blk_mq_update_nr_hw_queues(set, nr_hw_queues); mutex_unlock(&set->tag_list_lock); + up_write(&set->update_nr_hwq_sema); } EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); diff --git a/block/genhd.c b/block/genhd.c index c2bd86cd09de..7f3ae3d23b26 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -399,9 +399,9 @@ int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode) * This function registers the partitioning information in @disk * with the kernel. Also attach a fwnode to the disk device. */ -int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk, - const struct attribute_group **groups, - struct fwnode_handle *fwnode) +static int __add_disk_fwnode(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups, + struct fwnode_handle *fwnode) { struct device *ddev = disk_to_dev(disk); @@ -572,6 +572,37 @@ int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk, } return ret; } + +/** + * add_disk_fwnode - add disk information to kernel list with fwnode + * @parent: parent device for the disk + * @disk: per-device partitioning information + * @groups: Additional per-device sysfs groups + * @fwnode: attached disk fwnode + * + * This function registers the partitioning information in @disk + * with the kernel. Also attach a fwnode to the disk device. + */ +int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups, + struct fwnode_handle *fwnode) +{ + struct blk_mq_tag_set *set; + unsigned int memflags; + int ret; + + if (!queue_is_mq(disk->queue)) + return __add_disk_fwnode(parent, disk, groups, fwnode); + + set = disk->queue->tag_set; + memflags = memalloc_noio_save(); + down_read(&set->update_nr_hwq_sema); + ret = __add_disk_fwnode(parent, disk, groups, fwnode); + up_read(&set->update_nr_hwq_sema); + memalloc_noio_restore(memflags); + + return ret; +} EXPORT_SYMBOL_GPL(add_disk_fwnode); /** @@ -652,26 +683,7 @@ void blk_mark_disk_dead(struct gendisk *disk) } EXPORT_SYMBOL_GPL(blk_mark_disk_dead); -/** - * del_gendisk - remove the gendisk - * @disk: the struct gendisk to remove - * - * Removes the gendisk and all its associated resources. This deletes the - * partitions associated with the gendisk, and unregisters the associated - * request_queue. - * - * This is the counter to the respective __device_add_disk() call. - * - * The final removal of the struct gendisk happens when its refcount reaches 0 - * with put_disk(), which should be called after del_gendisk(), if - * __device_add_disk() was used. - * - * Drivers exist which depend on the release of the gendisk to be synchronous, - * it should not be deferred. - * - * Context: can sleep - */ -void del_gendisk(struct gendisk *disk) +static void __del_gendisk(struct gendisk *disk) { struct request_queue *q = disk->queue; struct block_device *part; @@ -764,6 +776,42 @@ void del_gendisk(struct gendisk *disk) if (start_drain) blk_unfreeze_release_lock(q); } + +/** + * del_gendisk - remove the gendisk + * @disk: the struct gendisk to remove + * + * Removes the gendisk and all its associated resources. This deletes the + * partitions associated with the gendisk, and unregisters the associated + * request_queue. + * + * This is the counter to the respective __device_add_disk() call. + * + * The final removal of the struct gendisk happens when its refcount reaches 0 + * with put_disk(), which should be called after del_gendisk(), if + * __device_add_disk() was used. + * + * Drivers exist which depend on the release of the gendisk to be synchronous, + * it should not be deferred. + * + * Context: can sleep + */ +void del_gendisk(struct gendisk *disk) +{ + struct blk_mq_tag_set *set; + unsigned int memflags; + + if (!queue_is_mq(disk->queue)) { + __del_gendisk(disk); + } else { + set = disk->queue->tag_set; + memflags = memalloc_noio_save(); + down_read(&set->update_nr_hwq_sema); + __del_gendisk(disk); + up_read(&set->update_nr_hwq_sema); + memalloc_noio_restore(memflags); + } +} EXPORT_SYMBOL(del_gendisk); /** diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8eb9b3310167..28bc03b2b0dc 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -9,6 +9,7 @@ #include <linux/prefetch.h> #include <linux/srcu.h> #include <linux/rw_hint.h> +#include <linux/rwsem.h> struct blk_mq_tags; struct blk_flush_queue; @@ -527,6 +528,8 @@ struct blk_mq_tag_set { struct mutex tag_list_lock; struct list_head tag_list; struct srcu_struct *srcu; + + struct rw_semaphore update_nr_hwq_sema; }; /** -- 2.47.0