[PATCH v23 01/16] block: Support block devices that preserve the order of write requests

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Some storage controllers preserve the request order per hardware queue.
Some but not all device mapper drivers preserve the bio order. Introduce
the feature flag BLK_FEAT_ORDERED_HWQ to allow block drivers and stacked
drivers to indicate that the order of write commands is preserved per
hardware queue and hence that serialization of writes per zone is not
required if all pending writes are submitted to the same hardware queue.
Add a sysfs attribute for controlling write pipelining support.

Cc: Damien Le Moal <dlemoal@xxxxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
---
 Documentation/ABI/stable/sysfs-block | 15 +++++++++++++++
 block/blk-settings.c                 | 10 ++++++++++
 block/blk-sysfs.c                    |  7 +++++++
 include/linux/blkdev.h               |  9 +++++++++
 4 files changed, 41 insertions(+)

diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 803f578dc023..5a42d99cf39a 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -637,6 +637,21 @@ Description:
 		I/O size is reported this file contains 0.
 
 
+What:		/sys/block/<disk>/queue/pipeline_zoned_writes
+Date:		August 2025
+Contact:	Bart Van Assche <bvanassche@xxxxxxx>
+Description:
+		[RW] If this attribute is present it means that the block driver
+		and the storage controller both support preserving the order of
+		zoned writes per hardware queue. This attribute controls whether
+		or not pipelining zoned writes is enabled. If the value of this
+		attribute is zero, the block layer restricts the queue depth for
+		sequential writes per zone to one (zone append operations are
+		not affected). If the value of this attribute is one, the block
+		layer does not restrict the queue depth of sequential writes per
+		zone to one.
+
+
 What:		/sys/block/<disk>/queue/physical_block_size
 Date:		May 2009
 Contact:	Martin K. Petersen <martin.petersen@xxxxxxxxxx>
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 07874e9b609f..01c0edf2308a 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -119,6 +119,14 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
 	lim->max_zone_append_sectors =
 		min_not_zero(lim->max_hw_zone_append_sectors,
 			min(lim->chunk_sectors, lim->max_hw_sectors));
+
+	/*
+	 * If both the block driver and the block device preserve the write
+	 * order per hwq, enable zoned write pipelining.
+	 */
+	if (lim->features & BLK_FEAT_ORDERED_HWQ)
+		lim->features |= BLK_FEAT_PIPELINE_ZWR;
+
 	return 0;
 }
 
@@ -780,6 +788,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		t->features &= ~BLK_FEAT_NOWAIT;
 	if (!(b->features & BLK_FEAT_POLL))
 		t->features &= ~BLK_FEAT_POLL;
+	if (!(b->features & BLK_FEAT_ORDERED_HWQ))
+		t->features &= ~BLK_FEAT_ORDERED_HWQ;
 
 	t->flags |= (b->flags & BLK_FLAG_MISALIGNED);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 78ee8d324c7f..4bf0b663f25d 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -270,6 +270,7 @@ QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL)
 QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM)
 QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT)
 QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES);
+QUEUE_SYSFS_FEATURE(pipeline_zwr, BLK_FEAT_PIPELINE_ZWR);
 
 #define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature)			\
 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page)	\
@@ -554,6 +555,7 @@ QUEUE_LIM_RO_ENTRY(queue_dax, "dax");
 QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
 QUEUE_LIM_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
 QUEUE_LIM_RO_ENTRY(queue_dma_alignment, "dma_alignment");
+QUEUE_LIM_RW_ENTRY(queue_pipeline_zwr, "pipeline_zoned_writes");
 
 /* legacy alias for logical_block_size: */
 static struct queue_sysfs_entry queue_hw_sector_size_entry = {
@@ -700,6 +702,7 @@ static struct attribute *queue_attrs[] = {
 	&queue_dax_entry.attr,
 	&queue_virt_boundary_mask_entry.attr,
 	&queue_dma_alignment_entry.attr,
+	&queue_pipeline_zwr_entry.attr,
 	&queue_ra_entry.attr,
 
 	/*
@@ -746,6 +749,10 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
 	    !blk_queue_is_zoned(q))
 		return 0;
 
+	if (attr == &queue_pipeline_zwr_entry.attr &&
+	    !(q->limits.features & BLK_FEAT_ORDERED_HWQ))
+		return 0;
+
 	return attr->mode;
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 95886b404b16..79d14b3d3309 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -338,6 +338,15 @@ typedef unsigned int __bitwise blk_features_t;
 /* skip this queue in blk_mq_(un)quiesce_tagset */
 #define BLK_FEAT_SKIP_TAGSET_QUIESCE	((__force blk_features_t)(1u << 13))
 
+/*
+ * The request order is preserved per hardware queue by the block driver and by
+ * the block device. Set by the block driver.
+ */
+#define BLK_FEAT_ORDERED_HWQ		((__force blk_features_t)(1u << 14))
+
+/* Whether to pipeline zoned writes. Controlled by the block layer. */
+#define BLK_FEAT_PIPELINE_ZWR		((__force blk_features_t)(1u << 15))
+
 /* undocumented magic for bcache */
 #define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
 	((__force blk_features_t)(1u << 15))




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux