Re: [PATCH v4 09/15] iomap: add caller-provided callbacks for read and readahead

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Sep 22, 2025 at 05:23:47PM -0700, Joanne Koong wrote:
> Add caller-provided callbacks for read and readahead so that it can be
> used generically, especially by filesystems that are not block-based.
> 
> In particular, this:
> * Modifies the read and readahead interface to take in a
>   struct iomap_read_folio_ctx that is publicly defined as:
> 
>   struct iomap_read_folio_ctx {
> 	const struct iomap_read_ops *ops;
> 	struct folio *cur_folio;
> 	struct readahead_control *rac;
> 	void *read_ctx;
>   };

I'm starting to wonder if struct iomap_read_ops should contain a struct
iomap_ops object, but that might result in more churn through this
patchset.

<shrug> What do you think?

> 
>   where struct iomap_read_ops is defined as:
> 
>   struct iomap_read_ops {
>       int (*read_folio_range)(const struct iomap_iter *iter,
>                              struct iomap_read_folio_ctx *ctx,
>                              size_t len);
>       void (*read_submit)(struct iomap_read_folio_ctx *ctx);
>   };
> 
>   read_folio_range() reads in the folio range and is required by the
>   caller to provide. read_submit() is optional and is used for
>   submitting any pending read requests.
> 
> * Modifies existing filesystems that use iomap for read and readahead to
>   use the new API, through the new statically inlined helpers
>   iomap_bio_read_folio() and iomap_bio_readahead(). There is no change
>   in functinality for those filesystems.

Nit: functionality

> 
> Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx>
> ---
>  .../filesystems/iomap/operations.rst          | 45 ++++++++++++
>  block/fops.c                                  |  5 +-
>  fs/erofs/data.c                               |  5 +-
>  fs/gfs2/aops.c                                |  6 +-
>  fs/iomap/buffered-io.c                        | 68 +++++++++++--------
>  fs/xfs/xfs_aops.c                             |  5 +-
>  fs/zonefs/file.c                              |  5 +-
>  include/linux/iomap.h                         | 62 ++++++++++++++++-
>  8 files changed, 158 insertions(+), 43 deletions(-)
> 
> diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst
> index 067ed8e14ef3..dbb193415c0e 100644
> --- a/Documentation/filesystems/iomap/operations.rst
> +++ b/Documentation/filesystems/iomap/operations.rst
> @@ -135,6 +135,29 @@ These ``struct kiocb`` flags are significant for buffered I/O with iomap:
>  
>   * ``IOCB_DONTCACHE``: Turns on ``IOMAP_DONTCACHE``.
>  
> +``struct iomap_read_ops``
> +--------------------------
> +
> +.. code-block:: c
> +
> + struct iomap_read_ops {
> +     int (*read_folio_range)(const struct iomap_iter *iter,
> +                             struct iomap_read_folio_ctx *ctx, size_t len);
> +     void (*submit_read)(struct iomap_read_folio_ctx *ctx);
> + };
> +
> +iomap calls these functions:
> +
> +  - ``read_folio_range``: Called to read in the range. This must be provided
> +    by the caller. The caller is responsible for calling
> +    iomap_start_folio_read() and iomap_finish_folio_read() before and after
> +    reading in the folio range. This should be done even if an error is
> +    encountered during the read. This returns 0 on success or a negative error
> +    on failure.
> +
> +  - ``submit_read``: Submit any pending read requests. This function is
> +    optional.
> +
>  Internal per-Folio State
>  ------------------------
>  
> @@ -182,6 +205,28 @@ The ``flags`` argument to ``->iomap_begin`` will be set to zero.
>  The pagecache takes whatever locks it needs before calling the
>  filesystem.
>  
> +Both ``iomap_readahead`` and ``iomap_read_folio`` pass in a ``struct
> +iomap_read_folio_ctx``:
> +
> +.. code-block:: c
> +
> + struct iomap_read_folio_ctx {
> +    const struct iomap_read_ops *ops;
> +    struct folio *cur_folio;
> +    struct readahead_control *rac;
> +    void *read_ctx;
> + };
> +
> +``iomap_readahead`` must set:
> + * ``ops->read_folio_range()`` and ``rac``
> +
> +``iomap_read_folio`` must set:
> + * ``ops->read_folio_range()`` and ``cur_folio``

Hrmm, so we're multiplexing read and readahead through the same
iomap_read_folio_ctx.  Is there ever a case where cur_folio and rac can
both be used by the underlying machinery?  I think the answer to that
question is "no" but I don't think the struct definition makes that
obvious.

> +
> +``ops->submit_read()`` and ``read_ctx`` are optional. ``read_ctx`` is used to
> +pass in any custom data the caller needs accessible in the ops callbacks for
> +fulfilling reads.
> +
>  Buffered Writes
>  ---------------
>  
> diff --git a/block/fops.c b/block/fops.c
> index ddbc69c0922b..a2c2391d8dfa 100644
> --- a/block/fops.c
> +++ b/block/fops.c
> @@ -533,12 +533,13 @@ const struct address_space_operations def_blk_aops = {
>  #else /* CONFIG_BUFFER_HEAD */
>  static int blkdev_read_folio(struct file *file, struct folio *folio)
>  {
> -	return iomap_read_folio(folio, &blkdev_iomap_ops);
> +	iomap_bio_read_folio(folio, &blkdev_iomap_ops);
> +	return 0;
>  }
>  
>  static void blkdev_readahead(struct readahead_control *rac)
>  {
> -	iomap_readahead(rac, &blkdev_iomap_ops);
> +	iomap_bio_readahead(rac, &blkdev_iomap_ops);
>  }
>  
>  static ssize_t blkdev_writeback_range(struct iomap_writepage_ctx *wpc,
> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
> index 3b1ba571c728..be4191b33321 100644
> --- a/fs/erofs/data.c
> +++ b/fs/erofs/data.c
> @@ -371,7 +371,8 @@ static int erofs_read_folio(struct file *file, struct folio *folio)
>  {
>  	trace_erofs_read_folio(folio, true);
>  
> -	return iomap_read_folio(folio, &erofs_iomap_ops);
> +	iomap_bio_read_folio(folio, &erofs_iomap_ops);
> +	return 0;
>  }
>  
>  static void erofs_readahead(struct readahead_control *rac)
> @@ -379,7 +380,7 @@ static void erofs_readahead(struct readahead_control *rac)
>  	trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
>  					readahead_count(rac), true);
>  
> -	return iomap_readahead(rac, &erofs_iomap_ops);
> +	iomap_bio_readahead(rac, &erofs_iomap_ops);
>  }
>  
>  static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
> diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
> index 47d74afd63ac..38d4f343187a 100644
> --- a/fs/gfs2/aops.c
> +++ b/fs/gfs2/aops.c
> @@ -424,11 +424,11 @@ static int gfs2_read_folio(struct file *file, struct folio *folio)
>  	struct inode *inode = folio->mapping->host;
>  	struct gfs2_inode *ip = GFS2_I(inode);
>  	struct gfs2_sbd *sdp = GFS2_SB(inode);
> -	int error;
> +	int error = 0;
>  
>  	if (!gfs2_is_jdata(ip) ||
>  	    (i_blocksize(inode) == PAGE_SIZE && !folio_buffers(folio))) {
> -		error = iomap_read_folio(folio, &gfs2_iomap_ops);
> +		iomap_bio_read_folio(folio, &gfs2_iomap_ops);
>  	} else if (gfs2_is_stuffed(ip)) {
>  		error = stuffed_read_folio(ip, folio);
>  	} else {
> @@ -503,7 +503,7 @@ static void gfs2_readahead(struct readahead_control *rac)
>  	else if (gfs2_is_jdata(ip))
>  		mpage_readahead(rac, gfs2_block_map);
>  	else
> -		iomap_readahead(rac, &gfs2_iomap_ops);
> +		iomap_bio_readahead(rac, &gfs2_iomap_ops);
>  }
>  
>  /**
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 469079524208..81ba0cc7705a 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -373,12 +373,6 @@ static void iomap_read_end_io(struct bio *bio)
>  	bio_put(bio);
>  }
>  
> -struct iomap_read_folio_ctx {
> -	struct folio		*cur_folio;
> -	void			*read_ctx;
> -	struct readahead_control *rac;
> -};
> -
>  static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx)
>  {
>  	struct bio *bio = ctx->read_ctx;
> @@ -387,11 +381,12 @@ static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx)
>  		submit_bio(bio);
>  }
>  
> -static void iomap_bio_read_folio_range(const struct iomap_iter *iter,
> -		struct iomap_read_folio_ctx *ctx, loff_t pos, size_t plen)
> +static int iomap_bio_read_folio_range(const struct iomap_iter *iter,
> +		struct iomap_read_folio_ctx *ctx, size_t plen)
>  {
>  	struct folio *folio = ctx->cur_folio;
>  	const struct iomap *iomap = &iter->iomap;
> +	loff_t pos = iter->pos;
>  	size_t poff = offset_in_folio(folio, pos);
>  	loff_t length = iomap_length(iter);
>  	sector_t sector;
> @@ -426,8 +421,15 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter,
>  		bio_add_folio_nofail(bio, folio, plen, poff);
>  		ctx->read_ctx = bio;
>  	}
> +	return 0;
>  }
>  
> +const struct iomap_read_ops iomap_bio_read_ops = {
> +	.read_folio_range	= iomap_bio_read_folio_range,
> +	.submit_read		= iomap_bio_submit_read,
> +};
> +EXPORT_SYMBOL_GPL(iomap_bio_read_ops);
> +
>  static int iomap_read_folio_iter(struct iomap_iter *iter,
>  		struct iomap_read_folio_ctx *ctx, bool *folio_owned)
>  {
> @@ -436,7 +438,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
>  	loff_t length = iomap_length(iter);
>  	struct folio *folio = ctx->cur_folio;
>  	size_t poff, plen;
> -	loff_t count;
> +	loff_t pos_diff;
>  	int ret;
>  
>  	if (iomap->type == IOMAP_INLINE) {
> @@ -454,12 +456,16 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
>  		iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff,
>  				&plen);
>  
> -		count = pos - iter->pos + plen;
> -		if (WARN_ON_ONCE(count > length))
> +		pos_diff = pos - iter->pos;
> +		if (WARN_ON_ONCE(pos_diff + plen > length))
>  			return -EIO;

Er, can these changes get their own patch describing why the count ->
pos_diff change was made?

--D

>  
> +		ret = iomap_iter_advance(iter, pos_diff);
> +		if (ret)
> +			return ret;
> +
>  		if (plen == 0)
> -			return iomap_iter_advance(iter, count);
> +			return 0;
>  
>  		/* zero post-eof blocks as the page may be mapped */
>  		if (iomap_block_needs_zeroing(iter, pos)) {
> @@ -467,28 +473,29 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
>  			iomap_set_range_uptodate(folio, poff, plen);
>  		} else {
>  			*folio_owned = true;
> -			iomap_bio_read_folio_range(iter, ctx, pos, plen);
> +			ret = ctx->ops->read_folio_range(iter, ctx, plen);
> +			if (ret)
> +				return ret;
>  		}
>  
> -		ret = iomap_iter_advance(iter, count);
> +		ret = iomap_iter_advance(iter, plen);
>  		if (ret)
>  			return ret;
> -		length -= count;
> +		length -= pos_diff + plen;
>  		pos = iter->pos;
>  	}
>  	return 0;
>  }
>  
> -int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
> +int iomap_read_folio(const struct iomap_ops *ops,
> +		struct iomap_read_folio_ctx *ctx)
>  {
> +	struct folio *folio = ctx->cur_folio;
>  	struct iomap_iter iter = {
>  		.inode		= folio->mapping->host,
>  		.pos		= folio_pos(folio),
>  		.len		= folio_size(folio),
>  	};
> -	struct iomap_read_folio_ctx ctx = {
> -		.cur_folio	= folio,
> -	};
>  	/*
>  	 * If an IO helper takes ownership of the folio, it is responsible for
>  	 * unlocking it when the read completes.
> @@ -499,10 +506,11 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
>  	trace_iomap_readpage(iter.inode, 1);
>  
>  	while ((ret = iomap_iter(&iter, ops)) > 0)
> -		iter.status = iomap_read_folio_iter(&iter, &ctx,
> +		iter.status = iomap_read_folio_iter(&iter, ctx,
>  				&folio_owned);
>  
> -	iomap_bio_submit_read(&ctx);
> +	if (ctx->ops->submit_read)
> +		ctx->ops->submit_read(ctx);
>  
>  	if (!folio_owned)
>  		folio_unlock(folio);
> @@ -545,8 +553,8 @@ static int iomap_readahead_iter(struct iomap_iter *iter,
>  
>  /**
>   * iomap_readahead - Attempt to read pages from a file.
> - * @rac: Describes the pages to be read.
>   * @ops: The operations vector for the filesystem.
> + * @ctx: The ctx used for issuing readahead.
>   *
>   * This function is for filesystems to call to implement their readahead
>   * address_space operation.
> @@ -558,16 +566,15 @@ static int iomap_readahead_iter(struct iomap_iter *iter,
>   * function is called with memalloc_nofs set, so allocations will not cause
>   * the filesystem to be reentered.
>   */
> -void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
> +void iomap_readahead(const struct iomap_ops *ops,
> +		struct iomap_read_folio_ctx *ctx)
>  {
> +	struct readahead_control *rac = ctx->rac;
>  	struct iomap_iter iter = {
>  		.inode	= rac->mapping->host,
>  		.pos	= readahead_pos(rac),
>  		.len	= readahead_length(rac),
>  	};
> -	struct iomap_read_folio_ctx ctx = {
> -		.rac	= rac,
> -	};
>  	/*
>  	 * If an IO helper takes ownership of the folio, it is responsible for
>  	 * unlocking it when the read completes.
> @@ -577,13 +584,14 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
>  	trace_iomap_readahead(rac->mapping->host, readahead_count(rac));
>  
>  	while (iomap_iter(&iter, ops) > 0)
> -		iter.status = iomap_readahead_iter(&iter, &ctx,
> +		iter.status = iomap_readahead_iter(&iter, ctx,
>  					&cur_folio_owned);
>  
> -	iomap_bio_submit_read(&ctx);
> +	if (ctx->ops->submit_read)
> +		ctx->ops->submit_read(ctx);
>  
> -	if (ctx.cur_folio && !cur_folio_owned)
> -		folio_unlock(ctx.cur_folio);
> +	if (ctx->cur_folio && !cur_folio_owned)
> +		folio_unlock(ctx->cur_folio);
>  }
>  EXPORT_SYMBOL_GPL(iomap_readahead);
>  
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index a26f79815533..0c2ed00733f2 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -742,14 +742,15 @@ xfs_vm_read_folio(
>  	struct file		*unused,
>  	struct folio		*folio)
>  {
> -	return iomap_read_folio(folio, &xfs_read_iomap_ops);
> +	iomap_bio_read_folio(folio, &xfs_read_iomap_ops);
> +	return 0;
>  }
>  
>  STATIC void
>  xfs_vm_readahead(
>  	struct readahead_control	*rac)
>  {
> -	iomap_readahead(rac, &xfs_read_iomap_ops);
> +	iomap_bio_readahead(rac, &xfs_read_iomap_ops);
>  }
>  
>  static int
> diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
> index fd3a5922f6c3..4d6e7eb52966 100644
> --- a/fs/zonefs/file.c
> +++ b/fs/zonefs/file.c
> @@ -112,12 +112,13 @@ static const struct iomap_ops zonefs_write_iomap_ops = {
>  
>  static int zonefs_read_folio(struct file *unused, struct folio *folio)
>  {
> -	return iomap_read_folio(folio, &zonefs_read_iomap_ops);
> +	iomap_bio_read_folio(folio, &zonefs_read_iomap_ops);
> +	return 0;
>  }
>  
>  static void zonefs_readahead(struct readahead_control *rac)
>  {
> -	iomap_readahead(rac, &zonefs_read_iomap_ops);
> +	iomap_bio_readahead(rac, &zonefs_read_iomap_ops);
>  }
>  
>  /*
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index edc7b3682903..c1a7613bca6e 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -16,6 +16,7 @@ struct inode;
>  struct iomap_iter;
>  struct iomap_dio;
>  struct iomap_writepage_ctx;
> +struct iomap_read_folio_ctx;
>  struct iov_iter;
>  struct kiocb;
>  struct page;
> @@ -337,8 +338,10 @@ static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter)
>  ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
>  		const struct iomap_ops *ops,
>  		const struct iomap_write_ops *write_ops, void *private);
> -int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
> -void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
> +int iomap_read_folio(const struct iomap_ops *ops,
> +		struct iomap_read_folio_ctx *ctx);
> +void iomap_readahead(const struct iomap_ops *ops,
> +		struct iomap_read_folio_ctx *ctx);
>  bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
>  struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len);
>  bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
> @@ -476,6 +479,35 @@ void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
>  int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio);
>  int iomap_writepages(struct iomap_writepage_ctx *wpc);
>  
> +struct iomap_read_folio_ctx {
> +	const struct iomap_read_ops *ops;
> +	struct folio		*cur_folio;
> +	struct readahead_control *rac;
> +	void			*read_ctx;
> +};
> +
> +struct iomap_read_ops {
> +	/*
> +	 * Read in a folio range.
> +	 *
> +	 * The caller is responsible for calling iomap_start_folio_read() and
> +	 * iomap_finish_folio_read() before and after reading in the folio
> +	 * range. This should be done even if an error is encountered during the
> +	 * read.
> +	 *
> +	 * Returns 0 on success or a negative error on failure.
> +	 */
> +	int (*read_folio_range)(const struct iomap_iter *iter,
> +			struct iomap_read_folio_ctx *ctx, size_t len);
> +
> +	/*
> +	 * Submit any pending read requests.
> +	 *
> +	 * This is optional.
> +	 */
> +	void (*submit_read)(struct iomap_read_folio_ctx *ctx);
> +};
> +
>  /*
>   * Flags for direct I/O ->end_io:
>   */
> @@ -541,4 +573,30 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
>  
>  extern struct bio_set iomap_ioend_bioset;
>  
> +#ifdef CONFIG_BLOCK
> +extern const struct iomap_read_ops iomap_bio_read_ops;
> +
> +static inline void iomap_bio_read_folio(struct folio *folio,
> +		const struct iomap_ops *ops)
> +{
> +	struct iomap_read_folio_ctx ctx = {
> +		.ops		= &iomap_bio_read_ops,
> +		.cur_folio	= folio,
> +	};
> +
> +	iomap_read_folio(ops, &ctx);
> +}
> +
> +static inline void iomap_bio_readahead(struct readahead_control *rac,
> +		const struct iomap_ops *ops)
> +{
> +	struct iomap_read_folio_ctx ctx = {
> +		.ops		= &iomap_bio_read_ops,
> +		.rac		= rac,
> +	};
> +
> +	iomap_readahead(ops, &ctx);
> +}
> +#endif /* CONFIG_BLOCK */
> +
>  #endif /* LINUX_IOMAP_H */
> -- 
> 2.47.3
> 
> 




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux