From: Coly Li <colyli@xxxxxxxxxx> This patch adds a new BLK_FLAG_STACK_IO_OPT for stack block device. If a stack block device like md raid5 declares its io_opt when don't want blk_stack_limits() to change it with io_opt of underlying non-stack block devices, BLK_FLAG_STACK_IO_OPT can be set on limits.flags. Then in blk_stack_limits(), lcm_not_zero(t->io_opt, b->io_opt) will be avoided. For md raid5, it is necessary to keep a proper io_opt size for better I/O thoughput. Signed-off-by: Coly Li <colyli@xxxxxxxxxx> --- block/blk-settings.c | 6 +++++- drivers/md/raid5.c | 1 + include/linux/blkdev.h | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 07874e9b609f..46ee538b2be9 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -782,6 +782,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->features &= ~BLK_FEAT_POLL; t->flags |= (b->flags & BLK_FLAG_MISALIGNED); + t->flags |= (b->flags & BLK_FLAG_STACK_IO_OPT); t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_user_sectors = min_not_zero(t->max_user_sectors, @@ -839,7 +840,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->physical_block_size); t->io_min = max(t->io_min, b->io_min); - t->io_opt = lcm_not_zero(t->io_opt, b->io_opt); + if (!t->io_opt || !(t->flags & BLK_FLAG_STACK_IO_OPT) || + (b->flags & BLK_FLAG_STACK_IO_OPT)) + t->io_opt = lcm_not_zero(t->io_opt, b->io_opt); + t->dma_alignment = max(t->dma_alignment, b->dma_alignment); /* Set non-power-of-2 compatible chunk_sectors boundary */ diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 023649fe2476..989acd8abd98 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7730,6 +7730,7 @@ static int raid5_set_limits(struct mddev *mddev) lim.io_min = mddev->chunk_sectors << 9; lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded); lim.features |= BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE; + lim.flags |= BLK_FLAG_STACK_IO_OPT; lim.discard_granularity = stripe; lim.max_write_zeroes_sectors = 0; mddev_stack_rdev_limits(mddev, &lim, 0); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 95886b404b16..a22c7cea9836 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -366,6 +366,9 @@ typedef unsigned int __bitwise blk_flags_t; /* passthrough command IO accounting */ #define BLK_FLAG_IOSTATS_PASSTHROUGH ((__force blk_flags_t)(1u << 2)) +/* ignore underlying non-stack devices io_opt */ +#define BLK_FLAG_STACK_IO_OPT ((__force blk_flags_t)(1u << 3)) + struct queue_limits { blk_features_t features; blk_flags_t flags; -- 2.47.2