Re: [PATCH 1/2] block: Make __submit_bio_noacct() preserve the bio submission order

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 6/11/25 12:43 PM, Bart Van Assche wrote:
On 6/11/25 11:15 AM, Eric Biggers wrote:
Well, again it needs to work on any block device.  If the encryption might just not be done and plaintext ends up on-disk, then blk-crypto-fallback would be
unsafe to use.

It would be preferable to have blk-crypto-fallback continue to be handled in the
block layer so that drivers don't need to worry about it.

This concern could be addressed by introducing a new flag in struct
block_device_operations or struct queue_limits - a flag that indicates
that bio_split_to_limits() will be called by the block driver. If that
flag is set, blk_crypto_bio_prep() can be called from
bio_submit_split(). If that flag is not set, blk_crypto_bio_prep()
should be called from __submit_bio(). The latter behavior is is the
current behavior for the upstream kernel.

(replying to my own email)

The patch below seems to work but needs further review and testing:


block: Rework splitting of encrypted bios

    Modify splitting of encrypted bios as follows:
- Introduce a new block device flag BD_SPLITS_BIO_TO_LIMITS that allows bio- based drivers to report that they call bio_split_to_limits() for every bio. - For request-based block drivers and for bio-based block drivers that call
      bio_split_to_limits(), call blk_crypto_bio_prep() after bio splitting
      happened instead of before bio splitting happened.
- For bio-based block drivers of which it is not known whether they call bio_split_to_limits(), call blk_crypto_bio_prep() before .submit_bio() is
      called.
- In blk_crypto_fallback_encrypt_bio(), prevent infinite recursion by only
      trying to split a bio if this function is not called from inside
      bio_split_to_limits().
- Since blk_crypto_fallback_encrypt_bio() may clear *bio_ptr, in its caller
      (__blk_crypto_bio_prep()), check whether or not this pointer is NULL.
- In bio_split_rw(), restrict the bio size to the smaller size of what is
      supported to the block driver and the crypto fallback code.

    The advantages of these changes are as follows:
    - This patch fixes write errors on zoned storage caused by out-of-order
      submission of bios. This out-of-order submission happens if both the
      crypto fallback code and bio_split_to_limits() split a bio.
    - Less code duplication. The crypto fallback code now calls
      bio_split_to_limits() instead of open-coding it.

diff --git a/block/bio.c b/block/bio.c
index 3c0a558c90f5..d597cef6d228 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1682,6 +1682,11 @@ struct bio *bio_split(struct bio *bio, int sectors,
 	if (!split)
 		return ERR_PTR(-ENOMEM);

+	/*
+	 * Tell blk_crypto_fallback_encrypt_bio() not to split this bio further.
+	 */
+	bio_set_flag(split, BIO_BEING_SPLIT);
+
 	split->bi_iter.bi_size = sectors << 9;

 	if (bio_integrity(split))
diff --git a/block/blk-core.c b/block/blk-core.c
index fdac48aec5ef..78b555ceea77 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -626,16 +626,22 @@ static void __submit_bio(struct bio *bio)
 	/* If plug is not used, add new plug here to cache nsecs time. */
 	struct blk_plug plug;

-	if (unlikely(!blk_crypto_bio_prep(&bio)))
-		return;
-
 	blk_start_plug(&plug);

 	if (!bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO)) {
 		blk_mq_submit_bio(bio);
 	} else if (likely(bio_queue_enter(bio) == 0)) {
 		struct gendisk *disk = bio->bi_bdev->bd_disk;
-	
+
+		/*
+		 * Only call blk_crypto_bio_prep() before .submit_bio() if
+		 * the block driver won't call bio_split_to_limits().
+		 */
+		if (unlikely(!bdev_test_flag(bio->bi_bdev,
+					     BD_SPLITS_BIO_TO_LIMITS) &&
+			     !blk_crypto_bio_prep(&bio)))
+			goto exit_queue;
+
 		if ((bio->bi_opf & REQ_POLLED) &&
 		    !(disk->queue->limits.features & BLK_FEAT_POLL)) {
 			bio->bi_status = BLK_STS_NOTSUPP;
@@ -643,6 +649,8 @@ static void __submit_bio(struct bio *bio)
 		} else {
 			disk->fops->submit_bio(bio);
 		}
+
+exit_queue:
 		blk_queue_exit(disk->queue);
 	}

diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index 005c9157ffb3..f2012368ac9e 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -209,9 +209,12 @@ blk_crypto_fallback_alloc_cipher_req(struct blk_crypto_keyslot *slot,
 	return true;
 }

-static bool blk_crypto_fallback_split_bio_if_needed(struct bio **bio_ptr)
+/*
+ * The encryption fallback code allocates bounce pages individually. Hence this
+ * function that calculates an upper limit for the bio size.
+ */
+unsigned int blk_crypto_max_io_size(struct bio *bio)
 {
-	struct bio *bio = *bio_ptr;
 	unsigned int i = 0;
 	unsigned int num_sectors = 0;
 	struct bio_vec bv;
@@ -222,21 +225,8 @@ static bool blk_crypto_fallback_split_bio_if_needed(struct bio **bio_ptr)
 		if (++i == BIO_MAX_VECS)
 			break;
 	}
-	if (num_sectors < bio_sectors(bio)) {
-		struct bio *split_bio;
-
-		split_bio = bio_split(bio, num_sectors, GFP_NOIO,
-				      &crypto_bio_split);
-		if (IS_ERR(split_bio)) {
-			bio->bi_status = BLK_STS_RESOURCE;
-			return false;
-		}
-		bio_chain(split_bio, bio);
-		submit_bio_noacct(bio);
-		*bio_ptr = split_bio;
-	}

-	return true;
+	return num_sectors;
 }

 union blk_crypto_iv {
@@ -257,8 +247,10 @@ static void blk_crypto_dun_to_iv(const u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
  * The crypto API fallback's encryption routine.
* Allocate a bounce bio for encryption, encrypt the input bio using crypto API, * and replace *bio_ptr with the bounce bio. May split input bio if it's too
- * large. Returns true on success. Returns false and sets bio->bi_status on
- * error.
+ * large. Returns %true on success. On error, %false is returned and one of
+ * these two actions is taken:
+ * - Either @bio_ptr->bi_status is set and *@bio_ptr is not modified.
+ * - Or bio_endio() is called and *@bio_ptr is changed into %NULL.
  */
 static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr)
 {
@@ -275,11 +267,17 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr)
 	bool ret = false;
 	blk_status_t blk_st;

-	/* Split the bio if it's too big for single page bvec */
-	if (!blk_crypto_fallback_split_bio_if_needed(bio_ptr))
-		return false;
+	if (!bio_flagged(*bio_ptr, BIO_BEING_SPLIT)) {
+		/* Split the bio if it's too big for single page bvec */
+		src_bio = bio_split_to_limits(*bio_ptr);
+		if (!src_bio) {
+			*bio_ptr = NULL;
+			return false;
+		}
+	} else {
+		src_bio = *bio_ptr;
+	}

-	src_bio = *bio_ptr;
 	bc = src_bio->bi_crypt_context;
 	data_unit_size = bc->bc_key->crypto_cfg.data_unit_size;

@@ -475,9 +473,8 @@ static void blk_crypto_fallback_decrypt_endio(struct bio *bio)
  * @bio_ptr: pointer to the bio to prepare
  *
* If bio is doing a WRITE operation, this splits the bio into two parts if it's - * too big (see blk_crypto_fallback_split_bio_if_needed()). It then allocates a - * bounce bio for the first part, encrypts it, and updates bio_ptr to point to
- * the bounce bio.
+ * too big. It then allocates a bounce bio for the first part, encrypts it, and
+ * updates bio_ptr to point to the bounce bio.
  *
* For a READ operation, we mark the bio for decryption by using bi_private and
  * bi_end_io.
@@ -495,6 +492,12 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr)
 	struct bio_crypt_ctx *bc = bio->bi_crypt_context;
 	struct bio_fallback_crypt_ctx *f_ctx;

+	/*
+	 * Check whether blk_crypto_fallback_bio_prep() has already been called.
+	 */
+	if (bio->bi_end_io == blk_crypto_fallback_encrypt_endio)
+		return true;
+
 	if (WARN_ON_ONCE(!tfms_inited[bc->bc_key->crypto_cfg.crypto_mode])) {
 		/* User didn't call blk_crypto_start_using_key() first */
 		bio->bi_status = BLK_STS_IOERR;
diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h
index ccf6dff6ff6b..443ba1fd82e6 100644
--- a/block/blk-crypto-internal.h
+++ b/block/blk-crypto-internal.h
@@ -223,6 +223,8 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr);

 int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key);

+unsigned int blk_crypto_max_io_size(struct bio *bio);
+
 #else /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */

 static inline int
@@ -245,6 +247,11 @@ blk_crypto_fallback_evict_key(const struct blk_crypto_key *key)
 	return 0;
 }

+static inline unsigned int blk_crypto_max_io_size(struct bio *bio)
+{
+	return UINT_MAX;
+}
+
 #endif /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */

 #endif /* __LINUX_BLK_CRYPTO_INTERNAL_H */
diff --git a/block/blk-crypto.c b/block/blk-crypto.c
index 4b1ad84d1b5a..76278e23193d 100644
--- a/block/blk-crypto.c
+++ b/block/blk-crypto.c
@@ -306,7 +306,8 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr)
 	if (blk_crypto_fallback_bio_prep(bio_ptr))
 		return true;
 fail:
-	bio_endio(*bio_ptr);
+	if (*bio_ptr)
+		bio_endio(*bio_ptr);
 	return false;
 }

diff --git a/block/blk-merge.c b/block/blk-merge.c
index e55a8ec219c9..df65231be543 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -9,6 +9,7 @@
 #include <linux/blk-integrity.h>
 #include <linux/part_stat.h>
 #include <linux/blk-cgroup.h>
+#include <linux/blk-crypto.h>

 #include <trace/events/block.h>

@@ -125,9 +126,14 @@ static struct bio *bio_submit_split(struct bio *bio, int split_sectors)
 				  bio->bi_iter.bi_sector);
 		WARN_ON_ONCE(bio_zone_write_plugging(bio));
 		submit_bio_noacct(bio);
-		return split;
+
+		bio = split;
 	}

+	WARN_ON_ONCE(!bio);
+	if (unlikely(!blk_crypto_bio_prep(&bio)))
+		return NULL;
+
 	return bio;
 error:
 	bio->bi_status = errno_to_blk_status(split_sectors);
@@ -356,9 +362,12 @@ EXPORT_SYMBOL_GPL(bio_split_rw_at);
 struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
 		unsigned *nr_segs)
 {
+	u32 max_sectors =
+		min(get_max_io_size(bio, lim), blk_crypto_max_io_size(bio));
+
 	return bio_submit_split(bio,
 		bio_split_rw_at(bio, lim, nr_segs,
-			get_max_io_size(bio, lim) << SECTOR_SHIFT));
+				(u64)max_sectors << SECTOR_SHIFT));
 }

 /*
diff --git a/block/blk.h b/block/blk.h
index 37ec459fe656..8db804f32896 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -367,6 +367,11 @@ static inline bool bio_may_need_split(struct bio *bio,
 		lim->min_segment_size;
 }

+static void bio_clear_split_flag(struct bio **bio)
+{
+	bio_clear_flag(*bio, BIO_BEING_SPLIT);
+}
+
 /**
  * __bio_split_to_limits - split a bio to fit the queue limits
  * @bio:     bio to be split
@@ -383,6 +388,10 @@ static inline bool bio_may_need_split(struct bio *bio,
 static inline struct bio *__bio_split_to_limits(struct bio *bio,
 		const struct queue_limits *lim, unsigned int *nr_segs)
 {
+	struct bio *clear_split_flag __cleanup(bio_clear_split_flag) = bio;
+
+	bio_set_flag(bio, BIO_BEING_SPLIT);
+
 	switch (bio_op(bio)) {
 	case REQ_OP_READ:
 	case REQ_OP_WRITE:
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5facb06e5924..c79195de2669 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2316,6 +2316,8 @@ static struct mapped_device *alloc_dev(int minor)
 	md->disk->private_data = md;
 	sprintf(md->disk->disk_name, "dm-%d", minor);

+	bdev_set_flag(md->disk->part0, BD_SPLITS_BIO_TO_LIMITS);
+
 	dax_dev = alloc_dax(md, &dm_dax_ops);
 	if (IS_ERR(dax_dev)) {
 		if (PTR_ERR(dax_dev) != -EOPNOTSUPP)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 3d1577f07c1c..25b789830b75 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -54,6 +54,8 @@ struct block_device {
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 #define BD_MAKE_IT_FAIL		(1u<<12)
 #endif
+	/* Set this flag if the driver calls bio_split_to_limits(). */
+#define BD_SPLITS_BIO_TO_LIMITS	(1u<<13)
 	dev_t			bd_dev;
 	struct address_space	*bd_mapping;	/* page cache */

@@ -308,6 +310,7 @@ enum {
 	BIO_REMAPPED,
 	BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */
 	BIO_EMULATES_ZONE_APPEND, /* bio emulates a zone append operation */
+	BIO_BEING_SPLIT,
 	BIO_FLAG_LAST
 };







[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux