Dear Xiao,
Thank you for your patch.
Am 12.08.25 um 09:49 schrieb Xiao Ni:
commit 9e59d609763f ("md: call del_gendisk in control path") changes the
async way to sync way of calling del_gendisk. But it breaks mdadm
--assemble command. The assemble command runs like this:
1. create the array
2. stop the array
3. access the sysfs files after stopping
The sync way calls del_gendisk in step2, so all sysfs files are removed.
Now to avoid breaking mdadm assemble command, this patch adds a parameter
… the parameter legacy_async_del_gendisk …
that can be used to choose which way. The default is async way. In future,
we can remove this parameter when users upgrade to mdadm 4.5 which removes
step2.
step 2
Maybe say to first change the default, and then remove it.
Fixes: 9e59d609763f ("md: call del_gendisk in control path")
Reported-by: Mikulas Patocka <mpatocka@xxxxxxxxxx>
Closes: https://lore.kernel.org/linux-raid/CAMw=ZnQ=ET2St-+hnhsuq34rRPnebqcXqP1QqaHW5Bh4aaaZ4g@xxxxxxxxxxxxxx/T/#t
Suggested-and-reviewed-by: Yu Kuai <yukuai3@xxxxxxxxxx>
Signed-off-by: Xiao Ni <xni@xxxxxxxxxx>
---
v2: minor changes on format and log content
drivers/md/md.c | 56 ++++++++++++++++++++++++++++++++++++-------------
1 file changed, 42 insertions(+), 14 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index ac85ec73a409..44827f841927 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -339,6 +339,7 @@ static int start_readonly;
* so all the races disappear.
*/
static bool create_on_open = true;
+static bool legacy_async_del_gendisk = true;
/*
* We have a system wide 'event count' that is incremented
@@ -877,15 +878,18 @@ void mddev_unlock(struct mddev *mddev)
export_rdev(rdev, mddev);
}
- /* Call del_gendisk after release reconfig_mutex to avoid
- * deadlock (e.g. call del_gendisk under the lock and an
- * access to sysfs files waits the lock)
- * And MD_DELETED is only used for md raid which is set in
- * do_md_stop. dm raid only uses md_stop to stop. So dm raid
- * doesn't need to check MD_DELETED when getting reconfig lock
- */
- if (test_bit(MD_DELETED, &mddev->flags))
- del_gendisk(mddev->gendisk);
+ if (!legacy_async_del_gendisk) {
+ /*
+ * Call del_gendisk after release reconfig_mutex to avoid
+ * deadlock (e.g. call del_gendisk under the lock and an
+ * access to sysfs files waits the lock)
+ * And MD_DELETED is only used for md raid which is set in
+ * do_md_stop. dm raid only uses md_stop to stop. So dm raid
+ * doesn't need to check MD_DELETED when getting reconfig lock
+ */
+ if (test_bit(MD_DELETED, &mddev->flags))
+ del_gendisk(mddev->gendisk);
+ }
}
EXPORT_SYMBOL_GPL(mddev_unlock);
@@ -5818,6 +5822,13 @@ static void md_kobj_release(struct kobject *ko)
{
struct mddev *mddev = container_of(ko, struct mddev, kobj);
+ if (legacy_async_del_gendisk) {
+ if (mddev->sysfs_state)
+ sysfs_put(mddev->sysfs_state);
+ if (mddev->sysfs_level)
+ sysfs_put(mddev->sysfs_level);
+ del_gendisk(mddev->gendisk);
+ }
put_disk(mddev->gendisk);
}
@@ -6021,6 +6032,9 @@ static int md_alloc_and_put(dev_t dev, char *name)
{
struct mddev *mddev = md_alloc(dev, name);
+ if (legacy_async_del_gendisk)
+ pr_warn("md: async del_gendisk mode will be removed please upgrade to mdadm-4.5+\n");
Maybe add a timeframe?
md: async del_gendisk mode will be removed in Linux 6.18. Please upgrade
to mdadm 4.5+
+
if (IS_ERR(mddev))
return PTR_ERR(mddev);
mddev_put(mddev);
@@ -6431,10 +6445,22 @@ static void md_clean(struct mddev *mddev)
mddev->persistent = 0;
mddev->level = LEVEL_NONE;
mddev->clevel[0] = 0;
- /* if UNTIL_STOP is set, it's cleared here */
- mddev->hold_active = 0;
- /* Don't clear MD_CLOSING, or mddev can be opened again. */
- mddev->flags &= BIT_ULL_MASK(MD_CLOSING);
+
+ /*
+ * For legacy_async_del_gendisk mode, it can stop the array in the
+ * middle of assembling it, then it still can access the array. So
+ * it needs to clear MD_CLOSING. If not legacy_async_del_gendisk,
+ * it can't open the array again after stopping it. So it doesn't
+ * clear MD_CLOSING.
+ */
+ if (legacy_async_del_gendisk && mddev->hold_active) {
+ clear_bit(MD_CLOSING, &mddev->flags);
+ } else {
+ /* if UNTIL_STOP is set, it's cleared here */
+ mddev->hold_active = 0;
+ /* Don't clear MD_CLOSING, or mddev can be opened again. */
+ mddev->flags &= BIT_ULL_MASK(MD_CLOSING);
+ }
mddev->sb_flags = 0;
mddev->ro = MD_RDWR;
mddev->metadata_type[0] = 0;
@@ -6658,7 +6684,8 @@ static int do_md_stop(struct mddev *mddev, int mode)
export_array(mddev);
md_clean(mddev);
- set_bit(MD_DELETED, &mddev->flags);
+ if (!legacy_async_del_gendisk)
+ set_bit(MD_DELETED, &mddev->flags);
}
md_new_event();
sysfs_notify_dirent_safe(mddev->sysfs_state);
@@ -10392,6 +10419,7 @@ module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
+module_param(legacy_async_del_gendisk, bool, 0600);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("MD RAID framework");
Kind regards,
Paul