On Tue, Aug 5, 2025 at 3:59 PM Keith Busch <kbusch@xxxxxxxx> wrote: > > From: Keith Busch <kbusch@xxxxxxxxxx> > > The blk-mq dma iteration has an optimization for requests that align to > the device's iommu merge boundary. This boundary may be larger than the > device's virtual boundary, but the code had been depending on that queue > limit to know ahead of time if the request aligns to the optimization. > > Rather than rely on that queue limit, which many devices may not even > have, store the virtual boundary gaps of each segment into the bio as a > mask while checking the segments and merging. We can then quickly check > per io if the request can use the optimization or not. > > Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx> > --- > block/blk-merge.c | 30 +++++++++++++++++++++++++++--- > block/blk-mq-dma.c | 3 +-- > block/blk-mq.c | 5 +++++ > include/linux/blk-mq.h | 6 ++++++ > include/linux/blk_types.h | 2 ++ > 5 files changed, 41 insertions(+), 5 deletions(-) > > diff --git a/block/blk-merge.c b/block/blk-merge.c > index 81bdad915699a..d63389c063006 100644 > --- a/block/blk-merge.c > +++ b/block/blk-merge.c > @@ -278,6 +278,9 @@ static unsigned int bio_split_alignment(struct bio *bio, > return lim->logical_block_size; > } > > +#define bv_seg_gap(bv, bvprv) \ > + bv.bv_offset | ((bvprv.bv_offset + bvprv.bv_len) & (PAGE_SIZE - 1)); Extra semicolon and missing parentheses around inputs and output. Is there a reason not to make this a static inline function rather than a macro? Best, Caleb > + > /** > * bio_split_rw_at - check if and where to split a read/write bio > * @bio: [in] bio to be split > @@ -293,9 +296,9 @@ static unsigned int bio_split_alignment(struct bio *bio, > int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim, > unsigned *segs, unsigned max_bytes) > { > + unsigned nsegs = 0, bytes = 0, page_gaps = 0; > struct bio_vec bv, bvprv, *bvprvp = NULL; > struct bvec_iter iter; > - unsigned nsegs = 0, bytes = 0; > > bio_for_each_bvec(bv, bio, iter) { > if (bv.bv_offset & lim->dma_alignment) > @@ -305,8 +308,11 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim, > * If the queue doesn't support SG gaps and adding this > * offset would create a gap, disallow it. > */ > - if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset)) > - goto split; > + if (bvprvp) { > + if (bvec_gap_to_prev(lim, bvprvp, bv.bv_offset)) > + goto split; > + page_gaps |= bv_seg_gap(bv, bvprv); > + } > > if (nsegs < lim->max_segments && > bytes + bv.bv_len <= max_bytes && > @@ -324,6 +330,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim, > } > > *segs = nsegs; > + bio->page_gaps = page_gaps; > return 0; > split: > if (bio->bi_opf & REQ_ATOMIC) > @@ -353,6 +360,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim, > * big IO can be trival, disable iopoll when split needed. > */ > bio_clear_polled(bio); > + bio->page_gaps = page_gaps; > return bytes >> SECTOR_SHIFT; > } > EXPORT_SYMBOL_GPL(bio_split_rw_at); > @@ -696,6 +704,8 @@ static bool blk_atomic_write_mergeable_rqs(struct request *rq, > static struct request *attempt_merge(struct request_queue *q, > struct request *req, struct request *next) > { > + struct bio_vec bv, bvprv; > + > if (!rq_mergeable(req) || !rq_mergeable(next)) > return NULL; > > @@ -753,6 +763,10 @@ static struct request *attempt_merge(struct request_queue *q, > if (next->start_time_ns < req->start_time_ns) > req->start_time_ns = next->start_time_ns; > > + bv = next->bio->bi_io_vec[0]; > + bvprv = req->biotail->bi_io_vec[req->biotail->bi_vcnt - 1]; > + req->__page_gaps |= blk_rq_page_gaps(next) | bv_seg_gap(bv, bvprv); > + > req->biotail->bi_next = next->bio; > req->biotail = next->biotail; > > @@ -861,6 +875,7 @@ enum bio_merge_status bio_attempt_back_merge(struct request *req, > struct bio *bio, unsigned int nr_segs) > { > const blk_opf_t ff = bio_failfast(bio); > + struct bio_vec bv, bvprv; > > if (!ll_back_merge_fn(req, bio, nr_segs)) > return BIO_MERGE_FAILED; > @@ -876,6 +891,10 @@ enum bio_merge_status bio_attempt_back_merge(struct request *req, > if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING) > blk_zone_write_plug_bio_merged(bio); > > + bv = bio->bi_io_vec[0]; > + bvprv = req->biotail->bi_io_vec[req->biotail->bi_vcnt - 1]; > + req->__page_gaps |= bio->page_gaps | bv_seg_gap(bv, bvprv); > + > req->biotail->bi_next = bio; > req->biotail = bio; > req->__data_len += bio->bi_iter.bi_size; > @@ -890,6 +909,7 @@ static enum bio_merge_status bio_attempt_front_merge(struct request *req, > struct bio *bio, unsigned int nr_segs) > { > const blk_opf_t ff = bio_failfast(bio); > + struct bio_vec bv, bvprv; > > /* > * A front merge for writes to sequential zones of a zoned block device > @@ -910,6 +930,10 @@ static enum bio_merge_status bio_attempt_front_merge(struct request *req, > > blk_update_mixed_merge(req, bio, true); > > + bv = req->bio->bi_io_vec[0]; > + bvprv = bio->bi_io_vec[bio->bi_vcnt - 1]; > + req->__page_gaps |= bio->page_gaps | bv_seg_gap(bv, bvprv); > + > bio->bi_next = req->bio; > req->bio = bio; > > diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c > index faa36ff6465ee..a03067c4a268f 100644 > --- a/block/blk-mq-dma.c > +++ b/block/blk-mq-dma.c > @@ -73,8 +73,7 @@ static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter) > static inline bool blk_can_dma_map_iova(struct request *req, > struct device *dma_dev) > { > - return !((queue_virt_boundary(req->q) + 1) & > - dma_get_merge_boundary(dma_dev)); > + return !(blk_rq_page_gaps(req) & dma_get_merge_boundary(dma_dev)); > } > > static bool blk_dma_map_bus(struct blk_dma_iter *iter) > diff --git a/block/blk-mq.c b/block/blk-mq.c > index b67d6c02ecebd..09134a66c5666 100644 > --- a/block/blk-mq.c > +++ b/block/blk-mq.c > @@ -376,6 +376,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) > INIT_LIST_HEAD(&rq->queuelist); > rq->q = q; > rq->__sector = (sector_t) -1; > + rq->__page_gaps = 0; > INIT_HLIST_NODE(&rq->hash); > RB_CLEAR_NODE(&rq->rb_node); > rq->tag = BLK_MQ_NO_TAG; > @@ -659,6 +660,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, > goto out_queue_exit; > } > rq->__data_len = 0; > + rq->__page_gaps = 0; > rq->__sector = (sector_t) -1; > rq->bio = rq->biotail = NULL; > return rq; > @@ -739,6 +741,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, > rq = blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag); > blk_mq_rq_time_init(rq, alloc_time_ns); > rq->__data_len = 0; > + rq->__page_gaps = 0; > rq->__sector = (sector_t) -1; > rq->bio = rq->biotail = NULL; > return rq; > @@ -2665,6 +2668,7 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, > rq->bio = rq->biotail = bio; > rq->__sector = bio->bi_iter.bi_sector; > rq->__data_len = bio->bi_iter.bi_size; > + rq->__page_gaps = bio->page_gaps; > rq->nr_phys_segments = nr_segs; > if (bio_integrity(bio)) > rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q, > @@ -3363,6 +3367,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, > > /* Copy attributes of the original request to the clone request. */ > rq->__sector = blk_rq_pos(rq_src); > + rq->__page_gaps = blk_rq_page_gaps(rq_src); > rq->__data_len = blk_rq_bytes(rq_src); > if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) { > rq->rq_flags |= RQF_SPECIAL_PAYLOAD; > diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h > index 2a5a828f19a0b..d8f491867adc0 100644 > --- a/include/linux/blk-mq.h > +++ b/include/linux/blk-mq.h > @@ -115,6 +115,7 @@ struct request { > > /* the following two fields are internal, NEVER access directly */ > unsigned int __data_len; /* total data len */ > + unsigned int __page_gaps; /* a mask of all the segment page gaps */ > sector_t __sector; /* sector cursor */ > > struct bio *bio; > @@ -1080,6 +1081,11 @@ static inline sector_t blk_rq_pos(const struct request *rq) > return rq->__sector; > } > > +static inline unsigned int blk_rq_page_gaps(const struct request *rq) > +{ > + return rq->__page_gaps; > +} > + > static inline unsigned int blk_rq_bytes(const struct request *rq) > { > return rq->__data_len; > diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h > index 0a29b20939d17..d0ed28d40fe02 100644 > --- a/include/linux/blk_types.h > +++ b/include/linux/blk_types.h > @@ -264,6 +264,8 @@ struct bio { > > unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ > > + unsigned int page_gaps; /* a mask of all the vector gaps */ > + > atomic_t __bi_cnt; /* pin count */ > > struct bio_vec *bi_io_vec; /* the actual vec list */ > -- > 2.47.3 > >