From: Li Nan <linan122@xxxxxxxxxx> When a new disk is added during running recovery, the kernel may restart recovery from the beginning of the device and submit write io to ranges that have already been synchronized. Reproduce: mdadm -CR /dev/md0 -l1 -n3 /dev/sda missing missing mdadm --add /dev/md0 /dev/sdb sleep 10 cat /proc/mdstat # partially synchronized mdadm --add /dev/md0 /dev/sdc cat /proc/mdstat # start from 0 iostat 1 sdb sdc # sdb has io, too If 'rdev->recovery_offset' is ahead of the current recovery sector, read from that device instead of issuing a write. It prevents unnecessary writes while still preserving the chance to back up data if it is the last copy. Signed-off-by: Li Nan <linan122@xxxxxxxxxx> --- drivers/md/raid1.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 3e422854cafb..ac5a9b73157a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2894,7 +2894,8 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, test_bit(Faulty, &rdev->flags)) { if (i < conf->raid_disks) still_degraded = true; - } else if (!test_bit(In_sync, &rdev->flags)) { + } else if (!test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset <= sector_nr) { bio->bi_opf = REQ_OP_WRITE; bio->bi_end_io = end_sync_write; write_targets ++; @@ -2903,6 +2904,9 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, sector_t first_bad = MaxSector; sector_t bad_sectors; + if (!test_bit(In_sync, &rdev->flags)) + good_sectors = min(rdev->recovery_offset - sector_nr, + (u64)good_sectors); if (is_badblock(rdev, sector_nr, good_sectors, &first_bad, &bad_sectors)) { if (first_bad > sector_nr) -- 2.39.2