On Fri, Jun 27, 2025 at 09:02:37AM +0200, Christoph Hellwig wrote: > Replace the ioend pointer in iomap_writeback_ctx with a void *wb_ctx > one to facilitate non-block, non-ioend writeback for use. Rename > the submit_ioend method to writeback_submit and make it mandatory so > that the generic writeback code stops seeing ioends and bios. > > Co-developed-by: Joanne Koong <joannelkoong@xxxxxxxxx> > Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx> > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > --- Reviewed-by: Brian Foster <bfoster@xxxxxxxxxx> > .../filesystems/iomap/operations.rst | 16 +--- > block/fops.c | 1 + > fs/gfs2/bmap.c | 1 + > fs/iomap/buffered-io.c | 91 ++++++++++--------- > fs/xfs/xfs_aops.c | 60 ++++++------ > fs/zonefs/file.c | 1 + > include/linux/iomap.h | 19 ++-- > 7 files changed, 93 insertions(+), 96 deletions(-) > > diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst > index 3c7989ee84ff..7073c1a3ede3 100644 > --- a/Documentation/filesystems/iomap/operations.rst > +++ b/Documentation/filesystems/iomap/operations.rst > @@ -285,7 +285,7 @@ The ``ops`` structure must be specified and is as follows: > struct iomap_writeback_ops { > int (*writeback_range)(struct iomap_writeback_ctx *wpc, > struct folio *folio, u64 pos, unsigned int len, u64 end_pos); > - int (*submit_ioend)(struct iomap_writeback_ctx *wpc, int status); > + int (*writeback_submit)(struct iomap_writeback_ctx *wpc, int error); > }; > > The fields are as follows: > @@ -307,13 +307,7 @@ The fields are as follows: > purpose. > This function must be supplied by the filesystem. > > - - ``submit_ioend``: Allows the file systems to hook into writeback bio > - submission. > - This might include pre-write space accounting updates, or installing > - a custom ``->bi_end_io`` function for internal purposes, such as > - deferring the ioend completion to a workqueue to run metadata update > - transactions from process context before submitting the bio. > - This function is optional. > + - ``writeback_submit``: Submit the previous built writeback context. > > Pagecache Writeback Completion > ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > @@ -328,12 +322,6 @@ the address space. > This can happen in interrupt or process context, depending on the > storage device. > > -Filesystems that need to update internal bookkeeping (e.g. unwritten > -extent conversions) should provide a ``->submit_ioend`` function to > -set ``struct iomap_end::bio::bi_end_io`` to its own function. > -This function should call ``iomap_finish_ioends`` after finishing its > -own work (e.g. unwritten extent conversion). > - > Some filesystems may wish to `amortize the cost of running metadata > transactions > <https://lore.kernel.org/all/20220120034733.221737-1-david@xxxxxxxxxxxxx/>`_ > diff --git a/block/fops.c b/block/fops.c > index 692be63a4aa0..777f2318eca2 100644 > --- a/block/fops.c > +++ b/block/fops.c > @@ -560,6 +560,7 @@ static ssize_t blkdev_writeback_range(struct iomap_writeback_ctx *wpc, > > static const struct iomap_writeback_ops blkdev_writeback_ops = { > .writeback_range = blkdev_writeback_range, > + .writeback_submit = ioend_writeback_submit, > }; > > static int blkdev_writepages(struct address_space *mapping, > diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c > index ff72e04a4788..eb49e49f2db4 100644 > --- a/fs/gfs2/bmap.c > +++ b/fs/gfs2/bmap.c > @@ -2490,4 +2490,5 @@ static ssize_t gfs2_writeback_range(struct iomap_writeback_ctx *wpc, > > const struct iomap_writeback_ops gfs2_writeback_ops = { > .writeback_range = gfs2_writeback_range, > + .writeback_submit = ioend_writeback_submit, > }; > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > index a54b14817cd0..a72ab487c8ab 100644 > --- a/fs/iomap/buffered-io.c > +++ b/fs/iomap/buffered-io.c > @@ -1579,7 +1579,7 @@ u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend) > return folio_count; > } > > -static void iomap_writepage_end_bio(struct bio *bio) > +static void ioend_writeback_end_bio(struct bio *bio) > { > struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); > > @@ -1588,42 +1588,30 @@ static void iomap_writepage_end_bio(struct bio *bio) > } > > /* > - * Submit an ioend. > - * > - * If @error is non-zero, it means that we have a situation where some part of > - * the submission process has failed after we've marked pages for writeback. > - * We cannot cancel ioend directly in that case, so call the bio end I/O handler > - * with the error status here to run the normal I/O completion handler to clear > - * the writeback bit and let the file system proess the errors. > + * We cannot cancel the ioend directly in case of an error, so call the bio end > + * I/O handler with the error status here to run the normal I/O completion > + * handler. > */ > -static int iomap_submit_ioend(struct iomap_writeback_ctx *wpc, int error) > +int ioend_writeback_submit(struct iomap_writeback_ctx *wpc, int error) > { > - if (!wpc->ioend) > - return error; > + struct iomap_ioend *ioend = wpc->wb_ctx; > > - /* > - * Let the file systems prepare the I/O submission and hook in an I/O > - * comletion handler. This also needs to happen in case after a > - * failure happened so that the file system end I/O handler gets called > - * to clean up. > - */ > - if (wpc->ops->submit_ioend) { > - error = wpc->ops->submit_ioend(wpc, error); > - } else { > - if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE)) > - error = -EIO; > - if (!error) > - submit_bio(&wpc->ioend->io_bio); > - } > + if (!ioend->io_bio.bi_end_io) > + ioend->io_bio.bi_end_io = ioend_writeback_end_bio; > + > + if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE)) > + error = -EIO; > > if (error) { > - wpc->ioend->io_bio.bi_status = errno_to_blk_status(error); > - bio_endio(&wpc->ioend->io_bio); > + ioend->io_bio.bi_status = errno_to_blk_status(error); > + bio_endio(&ioend->io_bio); > + return error; > } > > - wpc->ioend = NULL; > - return error; > + submit_bio(&ioend->io_bio); > + return 0; > } > +EXPORT_SYMBOL_GPL(ioend_writeback_submit); > > static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writeback_ctx *wpc, > loff_t pos, u16 ioend_flags) > @@ -1634,7 +1622,6 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writeback_ctx *wpc, > REQ_OP_WRITE | wbc_to_write_flags(wpc->wbc), > GFP_NOFS, &iomap_ioend_bioset); > bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos); > - bio->bi_end_io = iomap_writepage_end_bio; > bio->bi_write_hint = wpc->inode->i_write_hint; > wbc_init_bio(wpc->wbc, bio); > wpc->nr_folios = 0; > @@ -1644,16 +1631,17 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writeback_ctx *wpc, > static bool iomap_can_add_to_ioend(struct iomap_writeback_ctx *wpc, loff_t pos, > u16 ioend_flags) > { > + struct iomap_ioend *ioend = wpc->wb_ctx; > + > if (ioend_flags & IOMAP_IOEND_BOUNDARY) > return false; > if ((ioend_flags & IOMAP_IOEND_NOMERGE_FLAGS) != > - (wpc->ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) > + (ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) > return false; > - if (pos != wpc->ioend->io_offset + wpc->ioend->io_size) > + if (pos != ioend->io_offset + ioend->io_size) > return false; > if (!(wpc->iomap.flags & IOMAP_F_ANON_WRITE) && > - iomap_sector(&wpc->iomap, pos) != > - bio_end_sector(&wpc->ioend->io_bio)) > + iomap_sector(&wpc->iomap, pos) != bio_end_sector(&ioend->io_bio)) > return false; > /* > * Limit ioend bio chain lengths to minimise IO completion latency. This > @@ -1679,6 +1667,7 @@ static bool iomap_can_add_to_ioend(struct iomap_writeback_ctx *wpc, loff_t pos, > ssize_t iomap_add_to_ioend(struct iomap_writeback_ctx *wpc, struct folio *folio, > loff_t pos, loff_t end_pos, unsigned int dirty_len) > { > + struct iomap_ioend *ioend = wpc->wb_ctx; > struct iomap_folio_state *ifs = folio->private; > size_t poff = offset_in_folio(folio, pos); > unsigned int ioend_flags = 0; > @@ -1709,15 +1698,17 @@ ssize_t iomap_add_to_ioend(struct iomap_writeback_ctx *wpc, struct folio *folio, > if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY)) > ioend_flags |= IOMAP_IOEND_BOUNDARY; > > - if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) { > + if (!ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) { > new_ioend: > - error = iomap_submit_ioend(wpc, 0); > - if (error) > - return error; > - wpc->ioend = iomap_alloc_ioend(wpc, pos, ioend_flags); > + if (ioend) { > + error = wpc->ops->writeback_submit(wpc, 0); > + if (error) > + return error; > + } > + wpc->wb_ctx = ioend = iomap_alloc_ioend(wpc, pos, ioend_flags); > } > > - if (!bio_add_folio(&wpc->ioend->io_bio, folio, map_len, poff)) > + if (!bio_add_folio(&ioend->io_bio, folio, map_len, poff)) > goto new_ioend; > > if (ifs) > @@ -1764,9 +1755,9 @@ ssize_t iomap_add_to_ioend(struct iomap_writeback_ctx *wpc, struct folio *folio, > * Note that this defeats the ability to chain the ioends of > * appending writes. > */ > - wpc->ioend->io_size += map_len; > - if (wpc->ioend->io_offset + wpc->ioend->io_size > end_pos) > - wpc->ioend->io_size = end_pos - wpc->ioend->io_offset; > + ioend->io_size += map_len; > + if (ioend->io_offset + ioend->io_size > end_pos) > + ioend->io_size = end_pos - ioend->io_offset; > > wbc_account_cgroup_owner(wpc->wbc, folio, map_len); > return map_len; > @@ -1956,6 +1947,18 @@ iomap_writepages(struct iomap_writeback_ctx *wpc) > > while ((folio = writeback_iter(mapping, wpc->wbc, folio, &error))) > error = iomap_writepage_map(wpc, folio); > - return iomap_submit_ioend(wpc, error); > + > + /* > + * If @error is non-zero, it means that we have a situation where some > + * part of the submission process has failed after we've marked pages > + * for writeback. > + * > + * We cannot cancel the writeback directly in that case, so always call > + * ->writeback_submit to run the I/O completion handler to clear the > + * writeback bit and let the file system proess the errors. > + */ > + if (wpc->wb_ctx) > + return wpc->ops->writeback_submit(wpc, error); > + return error; > } > EXPORT_SYMBOL_GPL(iomap_writepages); > diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c > index ce4ed03db21d..193000e9ca7b 100644 > --- a/fs/xfs/xfs_aops.c > +++ b/fs/xfs/xfs_aops.c > @@ -514,41 +514,40 @@ xfs_ioend_needs_wq_completion( > } > > static int > -xfs_submit_ioend( > - struct iomap_writeback_ctx *wpc, > - int status) > +xfs_writeback_submit( > + struct iomap_writeback_ctx *wpc, > + int error) > { > - struct iomap_ioend *ioend = wpc->ioend; > - unsigned int nofs_flag; > + struct iomap_ioend *ioend = wpc->wb_ctx; > > /* > - * We can allocate memory here while doing writeback on behalf of > - * memory reclaim. To avoid memory allocation deadlocks set the > - * task-wide nofs context for the following operations. > + * Convert CoW extents to regular. > + * > + * We can allocate memory here while doing writeback on behalf of memory > + * reclaim. To avoid memory allocation deadlocks, set the task-wide > + * nofs context. > */ > - nofs_flag = memalloc_nofs_save(); > + if (!error && (ioend->io_flags & IOMAP_IOEND_SHARED)) { > + unsigned int nofs_flag; > > - /* Convert CoW extents to regular */ > - if (!status && (ioend->io_flags & IOMAP_IOEND_SHARED)) { > - status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), > + nofs_flag = memalloc_nofs_save(); > + error = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), > ioend->io_offset, ioend->io_size); > + memalloc_nofs_restore(nofs_flag); > } > > - memalloc_nofs_restore(nofs_flag); > - > - /* send ioends that might require a transaction to the completion wq */ > + /* > + * Send ioends that might require a transaction to the completion wq. > + */ > if (xfs_ioend_needs_wq_completion(ioend)) > ioend->io_bio.bi_end_io = xfs_end_bio; > > - if (status) > - return status; > - submit_bio(&ioend->io_bio); > - return 0; > + return ioend_writeback_submit(wpc, error); > } > > static const struct iomap_writeback_ops xfs_writeback_ops = { > .writeback_range = xfs_writeback_range, > - .submit_ioend = xfs_submit_ioend, > + .writeback_submit = xfs_writeback_submit, > }; > > struct xfs_zoned_writepage_ctx { > @@ -646,20 +645,25 @@ xfs_zoned_writeback_range( > } > > static int > -xfs_zoned_submit_ioend( > - struct iomap_writeback_ctx *wpc, > - int status) > +xfs_zoned_writeback_submit( > + struct iomap_writeback_ctx *wpc, > + int error) > { > - wpc->ioend->io_bio.bi_end_io = xfs_end_bio; > - if (status) > - return status; > - xfs_zone_alloc_and_submit(wpc->ioend, &XFS_ZWPC(wpc)->open_zone); > + struct iomap_ioend *ioend = wpc->wb_ctx; > + > + ioend->io_bio.bi_end_io = xfs_end_bio; > + if (error) { > + ioend->io_bio.bi_status = errno_to_blk_status(error); > + bio_endio(&ioend->io_bio); > + return error; > + } > + xfs_zone_alloc_and_submit(ioend, &XFS_ZWPC(wpc)->open_zone); > return 0; > } > > static const struct iomap_writeback_ops xfs_zoned_writeback_ops = { > .writeback_range = xfs_zoned_writeback_range, > - .submit_ioend = xfs_zoned_submit_ioend, > + .writeback_submit = xfs_zoned_writeback_submit, > }; > > STATIC int > diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c > index cb3d380c4651..a0ce6c97b9e5 100644 > --- a/fs/zonefs/file.c > +++ b/fs/zonefs/file.c > @@ -151,6 +151,7 @@ static ssize_t zonefs_writeback_range(struct iomap_writeback_ctx *wpc, > > static const struct iomap_writeback_ops zonefs_writeback_ops = { > .writeback_range = zonefs_writeback_range, > + .writeback_submit = ioend_writeback_submit, > }; > > static int zonefs_writepages(struct address_space *mapping, > diff --git a/include/linux/iomap.h b/include/linux/iomap.h > index e346475a023d..b65951cdb0b5 100644 > --- a/include/linux/iomap.h > +++ b/include/linux/iomap.h > @@ -391,8 +391,7 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, > /* > * Structure for writeback I/O completions. > * > - * File systems implementing ->submit_ioend (for buffered I/O) or ->submit_io > - * for direct I/O) can split a bio generated by iomap. In that case the parent > + * File systems can split a bio generated by iomap. In that case the parent > * ioend it was split from is recorded in ioend->io_parent. > */ > struct iomap_ioend { > @@ -416,7 +415,7 @@ static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio) > > struct iomap_writeback_ops { > /* > - * Required, performs writeback on the passed in range > + * Performs writeback on the passed in range > * > * Can map arbitrarily large regions, but we need to call into it at > * least once per folio to allow the file systems to synchronize with > @@ -432,23 +431,22 @@ struct iomap_writeback_ops { > u64 end_pos); > > /* > - * Optional, allows the file systems to hook into bio submission, > - * including overriding the bi_end_io handler. > + * Submit a writeback context previously build up by ->writeback_range. > * > - * Returns 0 if the bio was successfully submitted, or a negative > - * error code if status was non-zero or another error happened and > - * the bio could not be submitted. > + * Returns 0 if the context was successfully submitted, or a negative > + * error code if not. If @error is non-zero a failure occurred, and > + * the writeback context should be completed with an error. > */ > - int (*submit_ioend)(struct iomap_writeback_ctx *wpc, int status); > + int (*writeback_submit)(struct iomap_writeback_ctx *wpc, int error); > }; > > struct iomap_writeback_ctx { > struct iomap iomap; > struct inode *inode; > struct writeback_control *wbc; > - struct iomap_ioend *ioend; > const struct iomap_writeback_ops *ops; > u32 nr_folios; /* folios added to the ioend */ > + void *wb_ctx; /* pending writeback context */ > }; > > struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio, > @@ -461,6 +459,7 @@ void iomap_ioend_try_merge(struct iomap_ioend *ioend, > void iomap_sort_ioends(struct list_head *ioend_list); > ssize_t iomap_add_to_ioend(struct iomap_writeback_ctx *wpc, struct folio *folio, > loff_t pos, loff_t end_pos, unsigned int dirty_len); > +int ioend_writeback_submit(struct iomap_writeback_ctx *wpc, int error); > int iomap_writepages(struct iomap_writeback_ctx *wpc); > > /* > -- > 2.47.2 > >