Replace the ioend pointer in iomap_writeback_ctx with a void *wb_ctx one to facilitate non-block, non-ioend writeback for use. Rename the submit_ioend method to writeback_submit and make it mandatory so that the generic writeback code stops seeing ioends and bios. Co-developed-by: Joanne Koong <joannelkoong@xxxxxxxxx> Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx> Signed-off-by: Christoph Hellwig <hch@xxxxxx> --- .../filesystems/iomap/operations.rst | 16 +--- block/fops.c | 1 + fs/gfs2/bmap.c | 1 + fs/iomap/buffered-io.c | 91 ++++++++++--------- fs/xfs/xfs_aops.c | 60 ++++++------ fs/zonefs/file.c | 1 + include/linux/iomap.h | 19 ++-- 7 files changed, 93 insertions(+), 96 deletions(-) diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst index 3c7989ee84ff..7073c1a3ede3 100644 --- a/Documentation/filesystems/iomap/operations.rst +++ b/Documentation/filesystems/iomap/operations.rst @@ -285,7 +285,7 @@ The ``ops`` structure must be specified and is as follows: struct iomap_writeback_ops { int (*writeback_range)(struct iomap_writeback_ctx *wpc, struct folio *folio, u64 pos, unsigned int len, u64 end_pos); - int (*submit_ioend)(struct iomap_writeback_ctx *wpc, int status); + int (*writeback_submit)(struct iomap_writeback_ctx *wpc, int error); }; The fields are as follows: @@ -307,13 +307,7 @@ The fields are as follows: purpose. This function must be supplied by the filesystem. - - ``submit_ioend``: Allows the file systems to hook into writeback bio - submission. - This might include pre-write space accounting updates, or installing - a custom ``->bi_end_io`` function for internal purposes, such as - deferring the ioend completion to a workqueue to run metadata update - transactions from process context before submitting the bio. - This function is optional. + - ``writeback_submit``: Submit the previous built writeback context. Pagecache Writeback Completion ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -328,12 +322,6 @@ the address space. This can happen in interrupt or process context, depending on the storage device. -Filesystems that need to update internal bookkeeping (e.g. unwritten -extent conversions) should provide a ``->submit_ioend`` function to -set ``struct iomap_end::bio::bi_end_io`` to its own function. -This function should call ``iomap_finish_ioends`` after finishing its -own work (e.g. unwritten extent conversion). - Some filesystems may wish to `amortize the cost of running metadata transactions <https://lore.kernel.org/all/20220120034733.221737-1-david@xxxxxxxxxxxxx/>`_ diff --git a/block/fops.c b/block/fops.c index 692be63a4aa0..777f2318eca2 100644 --- a/block/fops.c +++ b/block/fops.c @@ -560,6 +560,7 @@ static ssize_t blkdev_writeback_range(struct iomap_writeback_ctx *wpc, static const struct iomap_writeback_ops blkdev_writeback_ops = { .writeback_range = blkdev_writeback_range, + .writeback_submit = ioend_writeback_submit, }; static int blkdev_writepages(struct address_space *mapping, diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index ff72e04a4788..eb49e49f2db4 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -2490,4 +2490,5 @@ static ssize_t gfs2_writeback_range(struct iomap_writeback_ctx *wpc, const struct iomap_writeback_ops gfs2_writeback_ops = { .writeback_range = gfs2_writeback_range, + .writeback_submit = ioend_writeback_submit, }; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index a54b14817cd0..a72ab487c8ab 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1579,7 +1579,7 @@ u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend) return folio_count; } -static void iomap_writepage_end_bio(struct bio *bio) +static void ioend_writeback_end_bio(struct bio *bio) { struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); @@ -1588,42 +1588,30 @@ static void iomap_writepage_end_bio(struct bio *bio) } /* - * Submit an ioend. - * - * If @error is non-zero, it means that we have a situation where some part of - * the submission process has failed after we've marked pages for writeback. - * We cannot cancel ioend directly in that case, so call the bio end I/O handler - * with the error status here to run the normal I/O completion handler to clear - * the writeback bit and let the file system proess the errors. + * We cannot cancel the ioend directly in case of an error, so call the bio end + * I/O handler with the error status here to run the normal I/O completion + * handler. */ -static int iomap_submit_ioend(struct iomap_writeback_ctx *wpc, int error) +int ioend_writeback_submit(struct iomap_writeback_ctx *wpc, int error) { - if (!wpc->ioend) - return error; + struct iomap_ioend *ioend = wpc->wb_ctx; - /* - * Let the file systems prepare the I/O submission and hook in an I/O - * comletion handler. This also needs to happen in case after a - * failure happened so that the file system end I/O handler gets called - * to clean up. - */ - if (wpc->ops->submit_ioend) { - error = wpc->ops->submit_ioend(wpc, error); - } else { - if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE)) - error = -EIO; - if (!error) - submit_bio(&wpc->ioend->io_bio); - } + if (!ioend->io_bio.bi_end_io) + ioend->io_bio.bi_end_io = ioend_writeback_end_bio; + + if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE)) + error = -EIO; if (error) { - wpc->ioend->io_bio.bi_status = errno_to_blk_status(error); - bio_endio(&wpc->ioend->io_bio); + ioend->io_bio.bi_status = errno_to_blk_status(error); + bio_endio(&ioend->io_bio); + return error; } - wpc->ioend = NULL; - return error; + submit_bio(&ioend->io_bio); + return 0; } +EXPORT_SYMBOL_GPL(ioend_writeback_submit); static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writeback_ctx *wpc, loff_t pos, u16 ioend_flags) @@ -1634,7 +1622,6 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writeback_ctx *wpc, REQ_OP_WRITE | wbc_to_write_flags(wpc->wbc), GFP_NOFS, &iomap_ioend_bioset); bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos); - bio->bi_end_io = iomap_writepage_end_bio; bio->bi_write_hint = wpc->inode->i_write_hint; wbc_init_bio(wpc->wbc, bio); wpc->nr_folios = 0; @@ -1644,16 +1631,17 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writeback_ctx *wpc, static bool iomap_can_add_to_ioend(struct iomap_writeback_ctx *wpc, loff_t pos, u16 ioend_flags) { + struct iomap_ioend *ioend = wpc->wb_ctx; + if (ioend_flags & IOMAP_IOEND_BOUNDARY) return false; if ((ioend_flags & IOMAP_IOEND_NOMERGE_FLAGS) != - (wpc->ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) + (ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) return false; - if (pos != wpc->ioend->io_offset + wpc->ioend->io_size) + if (pos != ioend->io_offset + ioend->io_size) return false; if (!(wpc->iomap.flags & IOMAP_F_ANON_WRITE) && - iomap_sector(&wpc->iomap, pos) != - bio_end_sector(&wpc->ioend->io_bio)) + iomap_sector(&wpc->iomap, pos) != bio_end_sector(&ioend->io_bio)) return false; /* * Limit ioend bio chain lengths to minimise IO completion latency. This @@ -1679,6 +1667,7 @@ static bool iomap_can_add_to_ioend(struct iomap_writeback_ctx *wpc, loff_t pos, ssize_t iomap_add_to_ioend(struct iomap_writeback_ctx *wpc, struct folio *folio, loff_t pos, loff_t end_pos, unsigned int dirty_len) { + struct iomap_ioend *ioend = wpc->wb_ctx; struct iomap_folio_state *ifs = folio->private; size_t poff = offset_in_folio(folio, pos); unsigned int ioend_flags = 0; @@ -1709,15 +1698,17 @@ ssize_t iomap_add_to_ioend(struct iomap_writeback_ctx *wpc, struct folio *folio, if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY)) ioend_flags |= IOMAP_IOEND_BOUNDARY; - if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) { + if (!ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) { new_ioend: - error = iomap_submit_ioend(wpc, 0); - if (error) - return error; - wpc->ioend = iomap_alloc_ioend(wpc, pos, ioend_flags); + if (ioend) { + error = wpc->ops->writeback_submit(wpc, 0); + if (error) + return error; + } + wpc->wb_ctx = ioend = iomap_alloc_ioend(wpc, pos, ioend_flags); } - if (!bio_add_folio(&wpc->ioend->io_bio, folio, map_len, poff)) + if (!bio_add_folio(&ioend->io_bio, folio, map_len, poff)) goto new_ioend; if (ifs) @@ -1764,9 +1755,9 @@ ssize_t iomap_add_to_ioend(struct iomap_writeback_ctx *wpc, struct folio *folio, * Note that this defeats the ability to chain the ioends of * appending writes. */ - wpc->ioend->io_size += map_len; - if (wpc->ioend->io_offset + wpc->ioend->io_size > end_pos) - wpc->ioend->io_size = end_pos - wpc->ioend->io_offset; + ioend->io_size += map_len; + if (ioend->io_offset + ioend->io_size > end_pos) + ioend->io_size = end_pos - ioend->io_offset; wbc_account_cgroup_owner(wpc->wbc, folio, map_len); return map_len; @@ -1956,6 +1947,18 @@ iomap_writepages(struct iomap_writeback_ctx *wpc) while ((folio = writeback_iter(mapping, wpc->wbc, folio, &error))) error = iomap_writepage_map(wpc, folio); - return iomap_submit_ioend(wpc, error); + + /* + * If @error is non-zero, it means that we have a situation where some + * part of the submission process has failed after we've marked pages + * for writeback. + * + * We cannot cancel the writeback directly in that case, so always call + * ->writeback_submit to run the I/O completion handler to clear the + * writeback bit and let the file system proess the errors. + */ + if (wpc->wb_ctx) + return wpc->ops->writeback_submit(wpc, error); + return error; } EXPORT_SYMBOL_GPL(iomap_writepages); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index ce4ed03db21d..193000e9ca7b 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -514,41 +514,40 @@ xfs_ioend_needs_wq_completion( } static int -xfs_submit_ioend( - struct iomap_writeback_ctx *wpc, - int status) +xfs_writeback_submit( + struct iomap_writeback_ctx *wpc, + int error) { - struct iomap_ioend *ioend = wpc->ioend; - unsigned int nofs_flag; + struct iomap_ioend *ioend = wpc->wb_ctx; /* - * We can allocate memory here while doing writeback on behalf of - * memory reclaim. To avoid memory allocation deadlocks set the - * task-wide nofs context for the following operations. + * Convert CoW extents to regular. + * + * We can allocate memory here while doing writeback on behalf of memory + * reclaim. To avoid memory allocation deadlocks, set the task-wide + * nofs context. */ - nofs_flag = memalloc_nofs_save(); + if (!error && (ioend->io_flags & IOMAP_IOEND_SHARED)) { + unsigned int nofs_flag; - /* Convert CoW extents to regular */ - if (!status && (ioend->io_flags & IOMAP_IOEND_SHARED)) { - status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), + nofs_flag = memalloc_nofs_save(); + error = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), ioend->io_offset, ioend->io_size); + memalloc_nofs_restore(nofs_flag); } - memalloc_nofs_restore(nofs_flag); - - /* send ioends that might require a transaction to the completion wq */ + /* + * Send ioends that might require a transaction to the completion wq. + */ if (xfs_ioend_needs_wq_completion(ioend)) ioend->io_bio.bi_end_io = xfs_end_bio; - if (status) - return status; - submit_bio(&ioend->io_bio); - return 0; + return ioend_writeback_submit(wpc, error); } static const struct iomap_writeback_ops xfs_writeback_ops = { .writeback_range = xfs_writeback_range, - .submit_ioend = xfs_submit_ioend, + .writeback_submit = xfs_writeback_submit, }; struct xfs_zoned_writepage_ctx { @@ -646,20 +645,25 @@ xfs_zoned_writeback_range( } static int -xfs_zoned_submit_ioend( - struct iomap_writeback_ctx *wpc, - int status) +xfs_zoned_writeback_submit( + struct iomap_writeback_ctx *wpc, + int error) { - wpc->ioend->io_bio.bi_end_io = xfs_end_bio; - if (status) - return status; - xfs_zone_alloc_and_submit(wpc->ioend, &XFS_ZWPC(wpc)->open_zone); + struct iomap_ioend *ioend = wpc->wb_ctx; + + ioend->io_bio.bi_end_io = xfs_end_bio; + if (error) { + ioend->io_bio.bi_status = errno_to_blk_status(error); + bio_endio(&ioend->io_bio); + return error; + } + xfs_zone_alloc_and_submit(ioend, &XFS_ZWPC(wpc)->open_zone); return 0; } static const struct iomap_writeback_ops xfs_zoned_writeback_ops = { .writeback_range = xfs_zoned_writeback_range, - .submit_ioend = xfs_zoned_submit_ioend, + .writeback_submit = xfs_zoned_writeback_submit, }; STATIC int diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index cb3d380c4651..a0ce6c97b9e5 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -151,6 +151,7 @@ static ssize_t zonefs_writeback_range(struct iomap_writeback_ctx *wpc, static const struct iomap_writeback_ops zonefs_writeback_ops = { .writeback_range = zonefs_writeback_range, + .writeback_submit = ioend_writeback_submit, }; static int zonefs_writepages(struct address_space *mapping, diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e346475a023d..b65951cdb0b5 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -391,8 +391,7 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, /* * Structure for writeback I/O completions. * - * File systems implementing ->submit_ioend (for buffered I/O) or ->submit_io - * for direct I/O) can split a bio generated by iomap. In that case the parent + * File systems can split a bio generated by iomap. In that case the parent * ioend it was split from is recorded in ioend->io_parent. */ struct iomap_ioend { @@ -416,7 +415,7 @@ static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio) struct iomap_writeback_ops { /* - * Required, performs writeback on the passed in range + * Performs writeback on the passed in range * * Can map arbitrarily large regions, but we need to call into it at * least once per folio to allow the file systems to synchronize with @@ -432,23 +431,22 @@ struct iomap_writeback_ops { u64 end_pos); /* - * Optional, allows the file systems to hook into bio submission, - * including overriding the bi_end_io handler. + * Submit a writeback context previously build up by ->writeback_range. * - * Returns 0 if the bio was successfully submitted, or a negative - * error code if status was non-zero or another error happened and - * the bio could not be submitted. + * Returns 0 if the context was successfully submitted, or a negative + * error code if not. If @error is non-zero a failure occurred, and + * the writeback context should be completed with an error. */ - int (*submit_ioend)(struct iomap_writeback_ctx *wpc, int status); + int (*writeback_submit)(struct iomap_writeback_ctx *wpc, int error); }; struct iomap_writeback_ctx { struct iomap iomap; struct inode *inode; struct writeback_control *wbc; - struct iomap_ioend *ioend; const struct iomap_writeback_ops *ops; u32 nr_folios; /* folios added to the ioend */ + void *wb_ctx; /* pending writeback context */ }; struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio, @@ -461,6 +459,7 @@ void iomap_ioend_try_merge(struct iomap_ioend *ioend, void iomap_sort_ioends(struct list_head *ioend_list); ssize_t iomap_add_to_ioend(struct iomap_writeback_ctx *wpc, struct folio *folio, loff_t pos, loff_t end_pos, unsigned int dirty_len); +int ioend_writeback_submit(struct iomap_writeback_ctx *wpc, int error); int iomap_writepages(struct iomap_writeback_ctx *wpc); /* -- 2.47.2