On Tue, Jun 17, 2025 at 3:55 AM Christoph Hellwig <hch@xxxxxx> wrote: > > Replace ->map_blocks with a new ->writeback_range, which differs in the > following ways: > > - it must also queue up the I/O for writeback, that is called into the > slightly refactored and extended in scope iomap_add_to_ioend for > each region > - can handle only a part of the requested region, that is the retry > loop for partial mappings moves to the caller > - handles cleanup on failures as well, and thus also replaces the > discard_folio method only implemented by XFS. > > This will allow to use the iomap writeback code also for file systems > that are not block based like fuse. > > Co-developed-by: Joanne Koong <joannelkoong@xxxxxxxxx> > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > --- > .../filesystems/iomap/operations.rst | 23 +-- > block/fops.c | 25 ++- > fs/gfs2/bmap.c | 26 +-- > fs/iomap/buffered-io.c | 93 +++++------ > fs/iomap/trace.h | 2 +- > fs/xfs/xfs_aops.c | 154 ++++++++++-------- > fs/zonefs/file.c | 28 ++-- > include/linux/iomap.h | 20 +-- > 8 files changed, 187 insertions(+), 184 deletions(-) > > diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst > index 3b628e370d88..b28f215db6e5 100644 > --- a/Documentation/filesystems/iomap/operations.rst > +++ b/Documentation/filesystems/iomap/operations.rst > @@ -271,7 +271,7 @@ writeback. > It does not lock ``i_rwsem`` or ``invalidate_lock``. > > The dirty bit will be cleared for all folios run through the > -``->map_blocks`` machinery described below even if the writeback fails. > +``->writeback_range`` machinery described below even if the writeback fails. > This is to prevent dirty folio clots when storage devices fail; an > ``-EIO`` is recorded for userspace to collect via ``fsync``. > > @@ -283,15 +283,14 @@ The ``ops`` structure must be specified and is as follows: > .. code-block:: c > > struct iomap_writeback_ops { > - int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, > - loff_t offset, unsigned len); > - int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status); > - void (*discard_folio)(struct folio *folio, loff_t pos); > + int (*writeback_range)(struct iomap_writepage_ctx *wpc, > + struct folio *folio, u64 pos, unsigned int len, u64 end_pos); end_pos only gets used in iomap_add_to_ioend() but it looks like end_pos can be deduced there by doing something like "end_pos = min(folio_pos(folio) + folio_size(folio), i_size_read(wpc->inode))". Would it be cleaner for ->writeback_range() to just pass in pos and len instead of also passing in end_pos? I find the end_pos arg kind of confusing anyways, like I think most people would think end_pos is the end of the dirty range (eg pos + len), not the end position of the folio. > + int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status); > }; > > diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c > index 65485a52df3b..8157b6d92c8e 100644 > --- a/fs/xfs/xfs_aops.c > +++ b/fs/xfs/xfs_aops.c > -static int > -xfs_map_blocks( > +static ssize_t > +xfs_writeback_range( > struct iomap_writepage_ctx *wpc, > - struct inode *inode, > - loff_t offset, > - unsigned int len) > + struct folio *folio, > + u64 offset, > + unsigned int len, > + u64 end_pos) > { > - struct xfs_inode *ip = XFS_I(inode); > + struct xfs_inode *ip = XFS_I(wpc->inode); > struct xfs_mount *mp = ip->i_mount; > - ssize_t count = i_blocksize(inode); > + ssize_t count = i_blocksize(wpc->inode); > xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); > xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); > xfs_fileoff_t cow_fsb; > @@ -292,7 +334,7 @@ xfs_map_blocks( > struct xfs_bmbt_irec imap; > struct xfs_iext_cursor icur; > int retries = 0; > - int error = 0; > + ssize_t ret = 0; > unsigned int *seq; > > if (xfs_is_shutdown(mp)) > @@ -316,7 +358,7 @@ xfs_map_blocks( > * out that ensures that we always see the current value. > */ > if (xfs_imap_valid(wpc, ip, offset)) > - return 0; > + goto map_blocks; > > /* > * If we don't have a valid map, now it's time to get a new one for this > @@ -351,7 +393,7 @@ xfs_map_blocks( > */ > if (xfs_imap_valid(wpc, ip, offset)) { > xfs_iunlock(ip, XFS_ILOCK_SHARED); > - return 0; > + goto map_blocks; > } > > /* > @@ -389,7 +431,12 @@ xfs_map_blocks( > > xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq); > trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); > - return 0; > +map_blocks: nit: should this be called map_blocks or changed to something like "add_to_ioend"? afaict, the mapping has already been done by this point? > + ret = iomap_add_to_ioend(wpc, folio, offset, end_pos, len); > + if (ret < 0) > + goto out_error; > + return ret; > + > allocate_blocks: > /* > * Convert a dellalloc extent to a real one. The current page is held > @@ -402,9 +449,9 @@ xfs_map_blocks( > else > > -static int > -xfs_zoned_map_blocks( > +static ssize_t > +xfs_zoned_writeback_range( > struct iomap_writepage_ctx *wpc, > - struct inode *inode, > - loff_t offset, > - unsigned int len) > + struct folio *folio, > + u64 offset, > + unsigned int len, > + u64 end_pos) > { > - struct xfs_inode *ip = XFS_I(inode); > + struct xfs_inode *ip = XFS_I(wpc->inode); > struct xfs_mount *mp = ip->i_mount; > xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); > xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + len); > xfs_filblks_t count_fsb; > struct xfs_bmbt_irec imap, del; > struct xfs_iext_cursor icur; > + ssize_t ret; > > if (xfs_is_shutdown(mp)) > return -EIO; > @@ -586,7 +601,7 @@ xfs_zoned_map_blocks( > imap.br_state = XFS_EXT_NORM; > xfs_iunlock(ip, XFS_ILOCK_EXCL); > xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, 0); > - return 0; > + goto map_blocks; > } > end_fsb = min(end_fsb, imap.br_startoff + imap.br_blockcount); > count_fsb = end_fsb - offset_fsb; > @@ -603,9 +618,13 @@ xfs_zoned_map_blocks( > wpc->iomap.offset = offset; > wpc->iomap.length = XFS_FSB_TO_B(mp, count_fsb); > wpc->iomap.flags = IOMAP_F_ANON_WRITE; > - > trace_xfs_zoned_map_blocks(ip, offset, wpc->iomap.length); > - return 0; > + > +map_blocks: Same question here > + ret = iomap_add_to_ioend(wpc, folio, offset, end_pos, len); > + if (ret < 0) > + xfs_discard_folio(folio, offset); > + return ret; > }