As part of the larger effort to have iomap buffered io code support generic io, replace map_blocks() with writeback_folio() and move the bio writeback code into a helper function, iomap_bio_writeback_folio(), that callers using bios can directly invoke. No functional changes. Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx> --- .../filesystems/iomap/operations.rst | 38 +++++----- block/fops.c | 7 +- fs/gfs2/bmap.c | 7 +- fs/iomap/buffered-io-bio.c | 49 +++++++++++- fs/iomap/buffered-io.c | 74 +++++-------------- fs/iomap/internal.h | 4 - fs/xfs/xfs_aops.c | 14 +++- fs/zonefs/file.c | 7 +- include/linux/iomap.h | 42 ++++++++--- 9 files changed, 148 insertions(+), 94 deletions(-) diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst index 9f0e8a46cc8c..5d018d504145 100644 --- a/Documentation/filesystems/iomap/operations.rst +++ b/Documentation/filesystems/iomap/operations.rst @@ -278,7 +278,7 @@ writeback. It does not lock ``i_rwsem`` or ``invalidate_lock``. The dirty bit will be cleared for all folios run through the -``->map_blocks`` machinery described below even if the writeback fails. +``->writeback_folio`` machinery described below even if the writeback fails. This is to prevent dirty folio clots when storage devices fail; an ``-EIO`` is recorded for userspace to collect via ``fsync``. @@ -290,29 +290,33 @@ The ``ops`` structure must be specified and is as follows: .. code-block:: c struct iomap_writeback_ops { - int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, - loff_t offset, unsigned len); + int (*writeback_folio)(struct iomap_writeback_folio_range *ctx); int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status); void (*discard_folio)(struct folio *folio, loff_t pos); }; The fields are as follows: - - ``map_blocks``: Sets ``wpc->iomap`` to the space mapping of the file - range (in bytes) given by ``offset`` and ``len``. - iomap calls this function for each dirty fs block in each dirty folio, - though it will `reuse mappings + - ``writeback_folio``: iomap calls this function for each dirty fs block + in each dirty folio, though it will `reuse mappings <https://lore.kernel.org/all/20231207072710.176093-15-hch@xxxxxx/>`_ for runs of contiguous dirty fsblocks within a folio. - Do not return ``IOMAP_INLINE`` mappings here; the ``->iomap_end`` - function must deal with persisting written data. - Do not return ``IOMAP_DELALLOC`` mappings here; iomap currently - requires mapping to allocated space. - Filesystems can skip a potentially expensive mapping lookup if the - mappings have not changed. - This revalidation must be open-coded by the filesystem; it is - unclear if ``iomap::validity_cookie`` can be reused for this - purpose. + For blocks that need to be mapped first, please take a look at + ``iomap_bio_writeback_folio`` which takes in a ``iomap_map_blocks_t`` + mapping function. For that mapping function, + + * Set ``wpc->iomap`` to the space mapping of the file range (in bytes) + given by ``offset`` and ``len``. + * Do not return ``IOMAP_INLINE`` mappings here; the ``->iomap_end`` + function must deal with persisting written data. + * Do not return ``IOMAP_DELALLOC`` mappings here; iomap currently + requires mapping to allocated space. + * Filesystems can skip a potentially expensive mapping lookup if the + mappings have not changed. + * This revalidation must be open-coded by the filesystem; it is + unclear if ``iomap::validity_cookie`` can be reused for this + purpose. + This function must be supplied by the filesystem. - ``submit_ioend``: Allows the file systems to hook into writeback bio @@ -323,7 +327,7 @@ The fields are as follows: transactions from process context before submitting the bio. This function is optional. - - ``discard_folio``: iomap calls this function after ``->map_blocks`` + - ``discard_folio``: iomap calls this function after ``->writeback_folio`` fails to schedule I/O for any part of a dirty folio. The function should throw away any reservations that may have been made for the write. diff --git a/block/fops.c b/block/fops.c index 1309861d4c2c..c35fe1495fd2 100644 --- a/block/fops.c +++ b/block/fops.c @@ -551,8 +551,13 @@ static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc, IOMAP_WRITE, &wpc->iomap, NULL); } +static int blkdev_writeback_folio(struct iomap_writeback_folio_range *ctx) +{ + return iomap_bio_writeback_folio(ctx, blkdev_map_blocks); +} + static const struct iomap_writeback_ops blkdev_writeback_ops = { - .map_blocks = blkdev_map_blocks, + .writeback_folio = blkdev_writeback_folio, }; static int blkdev_writepages(struct address_space *mapping, diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 7703d0471139..d13dfa986e18 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -2486,6 +2486,11 @@ static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode, return ret; } +static int gfs2_writeback_folio(struct iomap_writeback_folio_range *ctx) +{ + return iomap_bio_writeback_folio(ctx, gfs2_map_blocks); +} + const struct iomap_writeback_ops gfs2_writeback_ops = { - .map_blocks = gfs2_map_blocks, + .writeback_folio = gfs2_writeback_folio, }; diff --git a/fs/iomap/buffered-io-bio.c b/fs/iomap/buffered-io-bio.c index d5dfa1b3eef7..e052fc8b46c1 100644 --- a/fs/iomap/buffered-io-bio.c +++ b/fs/iomap/buffered-io-bio.c @@ -9,6 +9,7 @@ #include <linux/writeback.h> #include "internal.h" +#include "trace.h" static void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, int error) @@ -208,7 +209,7 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, * At the end of a writeback pass, there will be a cached ioend remaining on the * writepage context that the caller will need to submit. */ -int iomap_bio_add_to_ioend(struct iomap_writepage_ctx *wpc, +static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct writeback_control *wbc, struct folio *folio, struct inode *inode, loff_t pos, loff_t end_pos, unsigned len) @@ -290,3 +291,49 @@ int iomap_bio_add_to_ioend(struct iomap_writepage_ctx *wpc, wbc_account_cgroup_owner(wbc, folio, len); return 0; } + +int iomap_bio_writeback_folio(struct iomap_writeback_folio_range *ctx, + iomap_map_blocks_t map_blocks) +{ + struct iomap_writepage_ctx *wpc = ctx->wpc; + struct folio *folio = ctx->folio; + u64 pos = ctx->pos; + u64 end_pos = ctx->end_pos; + u32 dirty_len = ctx->dirty_len; + struct writeback_control *wbc = ctx->wbc; + struct inode *inode = folio->mapping->host; + int error; + + do { + unsigned map_len; + + error = map_blocks(wpc, inode, pos, dirty_len); + if (error) + break; + trace_iomap_writepage_map(inode, pos, dirty_len, &wpc->iomap); + + map_len = min_t(u64, dirty_len, + wpc->iomap.offset + wpc->iomap.length - pos); + WARN_ON_ONCE(!folio->private && map_len < dirty_len); + + switch (wpc->iomap.type) { + case IOMAP_INLINE: + WARN_ON_ONCE(1); + error = -EIO; + break; + case IOMAP_HOLE: + break; + default: + error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos, + end_pos, map_len); + if (!error) + ctx->async_writeback = true; + break; + } + dirty_len -= map_len; + pos += map_len; + } while (dirty_len && !error); + + return error; +} +EXPORT_SYMBOL_GPL(iomap_bio_writeback_folio); diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 2f620ebe20e2..2b8d733f65da 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1486,57 +1486,6 @@ int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error) return error; } -static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc, - struct writeback_control *wbc, struct folio *folio, - struct inode *inode, u64 pos, u64 end_pos, - unsigned dirty_len, bool *async_writeback) -{ - int error; - - do { - unsigned map_len; - - error = wpc->ops->map_blocks(wpc, inode, pos, dirty_len); - if (error) - break; - trace_iomap_writepage_map(inode, pos, dirty_len, &wpc->iomap); - - map_len = min_t(u64, dirty_len, - wpc->iomap.offset + wpc->iomap.length - pos); - WARN_ON_ONCE(!folio->private && map_len < dirty_len); - - switch (wpc->iomap.type) { - case IOMAP_INLINE: - WARN_ON_ONCE(1); - error = -EIO; - break; - case IOMAP_HOLE: - break; - default: - error = iomap_bio_add_to_ioend(wpc, wbc, folio, inode, - pos, end_pos, map_len); - if (!error) - *async_writeback = true; - break; - } - dirty_len -= map_len; - pos += map_len; - } while (dirty_len && !error); - - /* - * We cannot cancel the ioend directly here on error. We may have - * already set other pages under writeback and hence we have to run I/O - * completion to mark the error state of the pages under writeback - * appropriately. - * - * Just let the file system know what portion of the folio failed to - * map. - */ - if (error && wpc->ops->discard_folio) - wpc->ops->discard_folio(folio, pos); - return error; -} - /* * Check interaction of the folio with the file end. * @@ -1603,9 +1552,14 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, u64 pos = folio_pos(folio); u64 end_pos = pos + folio_size(folio); u64 end_aligned = 0; - bool async_writeback = false; int error = 0; u32 rlen; + struct iomap_writeback_folio_range ctx = { + .wpc = wpc, + .wbc = wbc, + .folio = folio, + .end_pos = end_pos, + }; WARN_ON_ONCE(!folio_test_locked(folio)); WARN_ON_ONCE(folio_test_dirty(folio)); @@ -1646,14 +1600,20 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, */ end_aligned = round_up(end_pos, i_blocksize(inode)); while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) { - error = iomap_writepage_map_blocks(wpc, wbc, folio, inode, - pos, end_pos, rlen, &async_writeback); - if (error) + ctx.pos = pos; + ctx.dirty_len = rlen; + WARN_ON(!wpc->ops->writeback_folio); + error = wpc->ops->writeback_folio(&ctx); + + if (error) { + if (wpc->ops->discard_folio) + wpc->ops->discard_folio(folio, pos); break; + } pos += rlen; } - if (async_writeback) + if (ctx.async_writeback) wpc->nr_folios++; /* @@ -1675,7 +1635,7 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, if (atomic_dec_and_test(&ifs->write_bytes_pending)) folio_end_writeback(folio); } else { - if (!async_writeback) + if (!ctx.async_writeback) folio_end_writeback(folio); } mapping_set_error(inode->i_mapping, error); diff --git a/fs/iomap/internal.h b/fs/iomap/internal.h index 27e8a174dc3f..6efb5905bf4f 100644 --- a/fs/iomap/internal.h +++ b/fs/iomap/internal.h @@ -37,9 +37,6 @@ int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error); #ifdef CONFIG_BLOCK int iomap_bio_read_folio_sync(loff_t block_start, struct folio *folio, size_t poff, size_t plen, const struct iomap *iomap); -int iomap_bio_add_to_ioend(struct iomap_writepage_ctx *wpc, - struct writeback_control *wbc, struct folio *folio, - struct inode *inode, loff_t pos, loff_t end_pos, unsigned len); void iomap_bio_readpage(const struct iomap *iomap, loff_t pos, struct iomap_readpage_ctx *ctx, size_t poff, size_t plen, loff_t length); @@ -47,7 +44,6 @@ void iomap_bio_ioend_error(struct iomap_writepage_ctx *wpc, int error); void iomap_submit_bio(struct bio *bio); #else #define iomap_bio_read_folio_sync(...) (-ENOSYS) -#define iomap_bio_add_to_ioend(...) (-ENOSYS) #define iomap_bio_readpage(...) ((void)0) #define iomap_bio_ioend_error(...) ((void)0) #define iomap_submit_bio(...) ((void)0) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 63151feb9c3f..8878c015bd48 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -455,6 +455,11 @@ xfs_ioend_needs_wq_completion( return false; } +static int xfs_writeback_folio(struct iomap_writeback_folio_range *ctx) +{ + return iomap_bio_writeback_folio(ctx, xfs_map_blocks); +} + static int xfs_submit_ioend( struct iomap_writepage_ctx *wpc, @@ -526,7 +531,7 @@ xfs_discard_folio( } static const struct iomap_writeback_ops xfs_writeback_ops = { - .map_blocks = xfs_map_blocks, + .writeback_folio = xfs_writeback_folio, .submit_ioend = xfs_submit_ioend, .discard_folio = xfs_discard_folio, }; @@ -608,6 +613,11 @@ xfs_zoned_map_blocks( return 0; } +static int xfs_zoned_writeback_folio(struct iomap_writeback_folio_range *ctx) +{ + return iomap_bio_writeback_folio(ctx, xfs_zoned_map_blocks); +} + static int xfs_zoned_submit_ioend( struct iomap_writepage_ctx *wpc, @@ -621,7 +631,7 @@ xfs_zoned_submit_ioend( } static const struct iomap_writeback_ops xfs_zoned_writeback_ops = { - .map_blocks = xfs_zoned_map_blocks, + .writeback_folio = xfs_zoned_writeback_folio, .submit_ioend = xfs_zoned_submit_ioend, .discard_folio = xfs_discard_folio, }; diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index 42e2c0065bb3..11901e40e810 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -145,8 +145,13 @@ static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc, IOMAP_WRITE, &wpc->iomap, NULL); } +static int zonefs_writeback_folio(struct iomap_writeback_folio_range *ctx) +{ + return iomap_bio_writeback_folio(ctx, zonefs_write_map_blocks); +} + static const struct iomap_writeback_ops zonefs_writeback_ops = { - .map_blocks = zonefs_write_map_blocks, + .writeback_folio = zonefs_writeback_folio, }; static int zonefs_writepages(struct address_space *mapping, diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 51cf3e863caf..fe827948035d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -16,6 +16,7 @@ struct inode; struct iomap_iter; struct iomap_dio; struct iomap_writepage_ctx; +struct iomap_writeback_folio_range; struct iov_iter; struct kiocb; struct page; @@ -427,18 +428,13 @@ static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio) struct iomap_writeback_ops { /* - * Required, maps the blocks so that writeback can be performed on - * the range starting at offset. + * Required. * - * Can return arbitrarily large regions, but we need to call into it at - * least once per folio to allow the file systems to synchronize with - * the write path that could be invalidating mappings. - * - * An existing mapping from a previous call to this method can be reused - * by the file system if it is still valid. + * If the writeback is done asynchronously, the caller is responsible + * for ending writeback on the folio once all the dirty ranges have been + * written out and the caller should set ctx->async_writeback to true. */ - int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, - loff_t offset, unsigned len); + int (*writeback_folio)(struct iomap_writeback_folio_range *ctx); /* * Optional, allows the file systems to hook into bio submission, @@ -464,6 +460,16 @@ struct iomap_writepage_ctx { u32 nr_folios; /* folios added to the ioend */ }; +struct iomap_writeback_folio_range { + struct iomap_writepage_ctx *wpc; + struct writeback_control *wbc; + struct folio *folio; + u64 pos; + u64 end_pos; + u32 dirty_len; + bool async_writeback; /* should get set to true if writeback is async */ +}; + struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio, loff_t file_offset, u16 ioend_flags); struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend, @@ -541,4 +547,20 @@ int iomap_swapfile_activate(struct swap_info_struct *sis, extern struct bio_set iomap_ioend_bioset; +/* + * Maps the blocks so that writeback can be performed on the range + * starting at offset. + * + * Can return arbitrarily large regions, but we need to call into it at + * least once per folio to allow the file systems to synchronize with + * the write path that could be invalidating mappings. + * + * An existing mapping from a previous call to this method can be reused + * by the file system if it is still valid. + */ +typedef int iomap_map_blocks_t(struct iomap_writepage_ctx *wpc, + struct inode *inode, loff_t offset, unsigned int len); +int iomap_bio_writeback_folio(struct iomap_writeback_folio_range *ctx, + iomap_map_blocks_t map_blocks); + #endif /* LINUX_IOMAP_H */ -- 2.47.1