Hi,
在 2025/08/17 23:26, colyli@xxxxxxxxxx 写道:
From: Coly Li <colyli@xxxxxxxxxx>
This patch adds a new BLK_FLAG_STACK_IO_OPT for stack block device. If a
stack block device like md raid5 declares its io_opt when don't want
blk_stack_limits() to change it with io_opt of underlying non-stack
block devices, BLK_FLAG_STACK_IO_OPT can be set on limits.flags. Then in
blk_stack_limits(), lcm_not_zero(t->io_opt, b->io_opt) will be avoided.
It's better refering to the thread:
https://lore.kernel.org/all/ywsfp3lqnijgig6yrlv2ztxram6ohf5z4yfeebswjkvp2dzisd@f5ikoyo3sfq5/
That scsi and mdraid have different definition of io_opt.
For md raid5, it is necessary to keep a proper io_opt size for better
I/O thoughput.
Signed-off-by: Coly Li <colyli@xxxxxxxxxx>
---
block/blk-settings.c | 6 +++++-
drivers/md/raid5.c | 1 +
include/linux/blkdev.h | 3 +++
3 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 07874e9b609f..46ee538b2be9 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -782,6 +782,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->features &= ~BLK_FEAT_POLL;
t->flags |= (b->flags & BLK_FLAG_MISALIGNED);
+ t->flags |= (b->flags & BLK_FLAG_STACK_IO_OPT);
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_user_sectors = min_not_zero(t->max_user_sectors,
@@ -839,7 +840,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
b->physical_block_size);
t->io_min = max(t->io_min, b->io_min);
- t->io_opt = lcm_not_zero(t->io_opt, b->io_opt);
+ if (!t->io_opt || !(t->flags & BLK_FLAG_STACK_IO_OPT) ||
+ (b->flags & BLK_FLAG_STACK_IO_OPT))
+ t->io_opt = lcm_not_zero(t->io_opt, b->io_opt);
+
t->dma_alignment = max(t->dma_alignment, b->dma_alignment);
/* Set non-power-of-2 compatible chunk_sectors boundary */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 023649fe2476..989acd8abd98 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7730,6 +7730,7 @@ static int raid5_set_limits(struct mddev *mddev)
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded);
lim.features |= BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE;
+ lim.flags |= BLK_FLAG_STACK_IO_OPT;
lim.discard_granularity = stripe;
lim.max_write_zeroes_sectors = 0;
mddev_stack_rdev_limits(mddev, &lim, 0);
And I think raid0/raid1/raid10 should all set this flag as well.
Thanks,
Kuai
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 95886b404b16..a22c7cea9836 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -366,6 +366,9 @@ typedef unsigned int __bitwise blk_flags_t;
/* passthrough command IO accounting */
#define BLK_FLAG_IOSTATS_PASSTHROUGH ((__force blk_flags_t)(1u << 2))
+/* ignore underlying non-stack devices io_opt */
+#define BLK_FLAG_STACK_IO_OPT ((__force blk_flags_t)(1u << 3))
+
struct queue_limits {
blk_features_t features;
blk_flags_t flags;