On 7/29/25 15:17, Matthew Wilcox wrote: > Hm. Maybe something like this would be more clear? > > (contents and indeed name of iomap_should_split_ioend() very much TBD) > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > index 9f541c05103b..429890fb7763 100644 > --- a/fs/iomap/buffered-io.c > +++ b/fs/iomap/buffered-io.c > @@ -1684,6 +1684,7 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, > struct iomap_folio_state *ifs = folio->private; > size_t poff = offset_in_folio(folio, pos); > unsigned int ioend_flags = 0; > + unsigned thislen; > int error; > > if (wpc->iomap.type == IOMAP_UNWRITTEN) > @@ -1704,8 +1705,16 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, > ioend_flags); > } > > - if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff)) > + thislen = iomap_should_split_ioend(wpc, pos, len); > + > + if (!bio_add_folio(&wpc->ioend->io_bio, folio, thislen, poff)) > + goto new_ioend; > + if (thislen < len) { > + pos += thislen; > + len -= thislen; > + wbc_account_cgroup_owner(wbc, folio, thislen); > goto new_ioend; > + } > > if (ifs) > atomic_add(len, &ifs->write_bytes_pending); How is this? Does ioend_flags need to be recomputed (particularly IOMAP_IOEND_BOUNDARY) when processing the remainder of the folio? diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index fb4519158f3a..0967e6fd62a1 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1669,6 +1669,39 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, return true; } +static unsigned int iomap_should_split_ioend(struct iomap_writepage_ctx *wpc, + loff_t pos, unsigned int len) +{ + struct queue_limits *lim = bdev_limits(wpc->iomap.bdev); + + if ((lim->features & BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE) && + !(wpc->iomap.flags & IOMAP_F_ANON_WRITE)) { + unsigned int io_align = lim->io_opt >> SECTOR_SHIFT; + + /* Split sequential writes along io_align boundaries. */ + if (io_align) { + sector_t lba = bio_end_sector(&wpc->ioend->io_bio); + unsigned int mod = lba % io_align; + unsigned int max_len; + + /* + * If the end sector is already aligned and the bio is + * nonempty, then start a new bio for the remainder. + */ + if (!mod && wpc->ioend->io_bio.bi_iter.bi_size) + return 0; + + /* + * Clip the end of the bio to the alignment boundary. + */ + max_len = (io_align - mod) << SECTOR_SHIFT; + if (len > max_len) + len = max_len; + } + } + return len; +} + /* * Test to see if we have an existing ioend structure that we could append to * first; otherwise finish off the current ioend and start another. @@ -1688,6 +1721,7 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct iomap_folio_state *ifs = folio->private; size_t poff = offset_in_folio(folio, pos); unsigned int ioend_flags = 0; + unsigned int thislen; int error; if (wpc->iomap.type == IOMAP_UNWRITTEN) @@ -1708,11 +1742,14 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, ioend_flags); } - if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff)) + thislen = iomap_should_split_ioend(wpc, pos, len); + if (!thislen) + goto new_ioend; + if (!bio_add_folio(&wpc->ioend->io_bio, folio, thislen, poff)) goto new_ioend; if (ifs) - atomic_add(len, &ifs->write_bytes_pending); + atomic_add(thislen, &ifs->write_bytes_pending); /* * Clamp io_offset and io_size to the incore EOF so that ondisk @@ -1755,11 +1792,18 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, * Note that this defeats the ability to chain the ioends of * appending writes. */ - wpc->ioend->io_size += len; + wpc->ioend->io_size += thislen; if (wpc->ioend->io_offset + wpc->ioend->io_size > end_pos) wpc->ioend->io_size = end_pos - wpc->ioend->io_offset; - wbc_account_cgroup_owner(wbc, folio, len); + wbc_account_cgroup_owner(wbc, folio, thislen); + + if (thislen < len) { + pos += thislen; + len -= thislen; + goto new_ioend; + } + return 0; } -- 2.43.0