Re: [PATCH 1/2] block: accumulate segment page gaps per bio

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Aug 5, 2025 at 3:59 PM Keith Busch <kbusch@xxxxxxxx> wrote:
>
> From: Keith Busch <kbusch@xxxxxxxxxx>
>
> The blk-mq dma iteration has an optimization for requests that align to
> the device's iommu merge boundary. This boundary may be larger than the
> device's virtual boundary, but the code had been depending on that queue
> limit to know ahead of time if the request aligns to the optimization.
>
> Rather than rely on that queue limit, which many devices may not even
> have, store the virtual boundary gaps of each segment into the bio as a
> mask while checking the segments and merging. We can then quickly check
> per io if the request can use the optimization or not.
>
> Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx>
> ---
>  block/blk-merge.c         | 30 +++++++++++++++++++++++++++---
>  block/blk-mq-dma.c        |  3 +--
>  block/blk-mq.c            |  5 +++++
>  include/linux/blk-mq.h    |  6 ++++++
>  include/linux/blk_types.h |  2 ++
>  5 files changed, 41 insertions(+), 5 deletions(-)
>
> diff --git a/block/blk-merge.c b/block/blk-merge.c
> index 81bdad915699a..d63389c063006 100644
> --- a/block/blk-merge.c
> +++ b/block/blk-merge.c
> @@ -278,6 +278,9 @@ static unsigned int bio_split_alignment(struct bio *bio,
>         return lim->logical_block_size;
>  }
>
> +#define bv_seg_gap(bv, bvprv) \
> +       bv.bv_offset | ((bvprv.bv_offset + bvprv.bv_len) & (PAGE_SIZE - 1));

Extra semicolon and missing parentheses around inputs and output. Is
there a reason not to make this a static inline function rather than a
macro?

Best,
Caleb

> +
>  /**
>   * bio_split_rw_at - check if and where to split a read/write bio
>   * @bio:  [in] bio to be split
> @@ -293,9 +296,9 @@ static unsigned int bio_split_alignment(struct bio *bio,
>  int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
>                 unsigned *segs, unsigned max_bytes)
>  {
> +       unsigned nsegs = 0, bytes = 0, page_gaps = 0;
>         struct bio_vec bv, bvprv, *bvprvp = NULL;
>         struct bvec_iter iter;
> -       unsigned nsegs = 0, bytes = 0;
>
>         bio_for_each_bvec(bv, bio, iter) {
>                 if (bv.bv_offset & lim->dma_alignment)
> @@ -305,8 +308,11 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
>                  * If the queue doesn't support SG gaps and adding this
>                  * offset would create a gap, disallow it.
>                  */
> -               if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
> -                       goto split;
> +               if (bvprvp) {
> +                       if (bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
> +                               goto split;
> +                       page_gaps |= bv_seg_gap(bv, bvprv);
> +               }
>
>                 if (nsegs < lim->max_segments &&
>                     bytes + bv.bv_len <= max_bytes &&
> @@ -324,6 +330,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
>         }
>
>         *segs = nsegs;
> +       bio->page_gaps = page_gaps;
>         return 0;
>  split:
>         if (bio->bi_opf & REQ_ATOMIC)
> @@ -353,6 +360,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
>          * big IO can be trival, disable iopoll when split needed.
>          */
>         bio_clear_polled(bio);
> +       bio->page_gaps = page_gaps;
>         return bytes >> SECTOR_SHIFT;
>  }
>  EXPORT_SYMBOL_GPL(bio_split_rw_at);
> @@ -696,6 +704,8 @@ static bool blk_atomic_write_mergeable_rqs(struct request *rq,
>  static struct request *attempt_merge(struct request_queue *q,
>                                      struct request *req, struct request *next)
>  {
> +       struct bio_vec bv, bvprv;
> +
>         if (!rq_mergeable(req) || !rq_mergeable(next))
>                 return NULL;
>
> @@ -753,6 +763,10 @@ static struct request *attempt_merge(struct request_queue *q,
>         if (next->start_time_ns < req->start_time_ns)
>                 req->start_time_ns = next->start_time_ns;
>
> +       bv = next->bio->bi_io_vec[0];
> +       bvprv = req->biotail->bi_io_vec[req->biotail->bi_vcnt - 1];
> +       req->__page_gaps |= blk_rq_page_gaps(next) | bv_seg_gap(bv, bvprv);
> +
>         req->biotail->bi_next = next->bio;
>         req->biotail = next->biotail;
>
> @@ -861,6 +875,7 @@ enum bio_merge_status bio_attempt_back_merge(struct request *req,
>                 struct bio *bio, unsigned int nr_segs)
>  {
>         const blk_opf_t ff = bio_failfast(bio);
> +       struct bio_vec bv, bvprv;
>
>         if (!ll_back_merge_fn(req, bio, nr_segs))
>                 return BIO_MERGE_FAILED;
> @@ -876,6 +891,10 @@ enum bio_merge_status bio_attempt_back_merge(struct request *req,
>         if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
>                 blk_zone_write_plug_bio_merged(bio);
>
> +       bv = bio->bi_io_vec[0];
> +       bvprv = req->biotail->bi_io_vec[req->biotail->bi_vcnt - 1];
> +       req->__page_gaps |= bio->page_gaps | bv_seg_gap(bv, bvprv);
> +
>         req->biotail->bi_next = bio;
>         req->biotail = bio;
>         req->__data_len += bio->bi_iter.bi_size;
> @@ -890,6 +909,7 @@ static enum bio_merge_status bio_attempt_front_merge(struct request *req,
>                 struct bio *bio, unsigned int nr_segs)
>  {
>         const blk_opf_t ff = bio_failfast(bio);
> +       struct bio_vec bv, bvprv;
>
>         /*
>          * A front merge for writes to sequential zones of a zoned block device
> @@ -910,6 +930,10 @@ static enum bio_merge_status bio_attempt_front_merge(struct request *req,
>
>         blk_update_mixed_merge(req, bio, true);
>
> +       bv = req->bio->bi_io_vec[0];
> +       bvprv = bio->bi_io_vec[bio->bi_vcnt - 1];
> +       req->__page_gaps |= bio->page_gaps | bv_seg_gap(bv, bvprv);
> +
>         bio->bi_next = req->bio;
>         req->bio = bio;
>
> diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c
> index faa36ff6465ee..a03067c4a268f 100644
> --- a/block/blk-mq-dma.c
> +++ b/block/blk-mq-dma.c
> @@ -73,8 +73,7 @@ static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter)
>  static inline bool blk_can_dma_map_iova(struct request *req,
>                 struct device *dma_dev)
>  {
> -       return !((queue_virt_boundary(req->q) + 1) &
> -               dma_get_merge_boundary(dma_dev));
> +       return !(blk_rq_page_gaps(req) & dma_get_merge_boundary(dma_dev));
>  }
>
>  static bool blk_dma_map_bus(struct blk_dma_iter *iter)
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index b67d6c02ecebd..09134a66c5666 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -376,6 +376,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
>         INIT_LIST_HEAD(&rq->queuelist);
>         rq->q = q;
>         rq->__sector = (sector_t) -1;
> +       rq->__page_gaps = 0;
>         INIT_HLIST_NODE(&rq->hash);
>         RB_CLEAR_NODE(&rq->rb_node);
>         rq->tag = BLK_MQ_NO_TAG;
> @@ -659,6 +660,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf,
>                         goto out_queue_exit;
>         }
>         rq->__data_len = 0;
> +       rq->__page_gaps = 0;
>         rq->__sector = (sector_t) -1;
>         rq->bio = rq->biotail = NULL;
>         return rq;
> @@ -739,6 +741,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
>         rq = blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag);
>         blk_mq_rq_time_init(rq, alloc_time_ns);
>         rq->__data_len = 0;
> +       rq->__page_gaps = 0;
>         rq->__sector = (sector_t) -1;
>         rq->bio = rq->biotail = NULL;
>         return rq;
> @@ -2665,6 +2668,7 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
>         rq->bio = rq->biotail = bio;
>         rq->__sector = bio->bi_iter.bi_sector;
>         rq->__data_len = bio->bi_iter.bi_size;
> +       rq->__page_gaps = bio->page_gaps;
>         rq->nr_phys_segments = nr_segs;
>         if (bio_integrity(bio))
>                 rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q,
> @@ -3363,6 +3367,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
>
>         /* Copy attributes of the original request to the clone request. */
>         rq->__sector = blk_rq_pos(rq_src);
> +       rq->__page_gaps = blk_rq_page_gaps(rq_src);
>         rq->__data_len = blk_rq_bytes(rq_src);
>         if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) {
>                 rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index 2a5a828f19a0b..d8f491867adc0 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -115,6 +115,7 @@ struct request {
>
>         /* the following two fields are internal, NEVER access directly */
>         unsigned int __data_len;        /* total data len */
> +       unsigned int __page_gaps;       /* a mask of all the segment page gaps */
>         sector_t __sector;              /* sector cursor */
>
>         struct bio *bio;
> @@ -1080,6 +1081,11 @@ static inline sector_t blk_rq_pos(const struct request *rq)
>         return rq->__sector;
>  }
>
> +static inline unsigned int blk_rq_page_gaps(const struct request *rq)
> +{
> +       return rq->__page_gaps;
> +}
> +
>  static inline unsigned int blk_rq_bytes(const struct request *rq)
>  {
>         return rq->__data_len;
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 0a29b20939d17..d0ed28d40fe02 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -264,6 +264,8 @@ struct bio {
>
>         unsigned short          bi_max_vecs;    /* max bvl_vecs we can hold */
>
> +       unsigned int            page_gaps;      /* a mask of all the vector gaps */
> +
>         atomic_t                __bi_cnt;       /* pin count */
>
>         struct bio_vec          *bi_io_vec;     /* the actual vec list */
> --
> 2.47.3
>
>





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux