Re: [PATCH RFC 02/29] iomap: introduce iomap_read/write_region interface

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Jul 28, 2025 at 10:30:06PM +0200, Andrey Albershteyn wrote:
> From: Andrey Albershteyn <aalbersh@xxxxxxxxxx>
> 
> Interface for writing data beyond EOF into offsetted region in
> page cache.
> 
> Signed-off-by: Andrey Albershteyn <aalbersh@xxxxxxxxxx>
> ---
>  include/linux/iomap.h  | 16 ++++++++
>  fs/iomap/buffered-io.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 114 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 4a0b5ebb79e9..73288f28543f 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -83,6 +83,11 @@ struct vm_fault;
>   */
>  #define IOMAP_F_PRIVATE		(1U << 12)
>  
> +/*
> + * Writes happens beyound inode EOF
> + */
> +#define IOMAP_F_BEYOND_EOF	(1U << 13)
> +
>  /*
>   * Flags set by the core iomap code during operations:
>   *
> @@ -533,4 +538,15 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
>  
>  extern struct bio_set iomap_ioend_bioset;
>  
> +struct ioregion {
> +	struct inode *inode;
> +	loff_t pos;				/* IO position */
> +	const void *buf;			/* Data to be written (in only) */
> +	size_t length;				/* Length of the date */

Length of the data ?

> +	const struct iomap_ops *ops;
> +};

This sounds like a kiocb and a kvec...

> +
> +struct folio *iomap_read_region(struct ioregion *region);
> +int iomap_write_region(struct ioregion *region);

...and these sound a lot like filemap_read and iomap_write_iter.
Why not use those?  You'd get readahead for free.  Though I guess
filemap_read cuts off at i_size so maybe that's why this is necessary?

(and by extension, is this why the existing fsverity implementations
seem to do their own readahead and reading?)

((and now I guess I see why this isn't done through the regular kiocb
interface, because then we'd be exposing post-EOF data hiding to
everyone in the system))

>  #endif /* LINUX_IOMAP_H */
> 
> -- 
> 2.50.0
> 
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 7bef232254a3..e959a206cba9 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -321,6 +321,7 @@ struct iomap_readpage_ctx {
>  	bool			cur_folio_in_bio;
>  	struct bio		*bio;
>  	struct readahead_control *rac;
> +	int			flags;

What flags go in here?

>  };
>  
>  /**
> @@ -387,7 +388,8 @@ static int iomap_readpage_iter(struct iomap_iter *iter,
>  	if (plen == 0)
>  		goto done;
>  
> -	if (iomap_block_needs_zeroing(iter, pos)) {
> +	if (iomap_block_needs_zeroing(iter, pos) &&
> +	    !(iomap->flags & IOMAP_F_BEYOND_EOF)) {
>  		folio_zero_range(folio, poff, plen);
>  		iomap_set_range_uptodate(folio, poff, plen);
>  		goto done;
> @@ -2007,3 +2009,98 @@ iomap_writepages_unbound(struct address_space *mapping, struct writeback_control
>  	return iomap_submit_ioend(wpc, error);
>  }
>  EXPORT_SYMBOL_GPL(iomap_writepages_unbound);
> +
> +struct folio *
> +iomap_read_region(struct ioregion *region)
> +{
> +	struct inode *inode = region->inode;
> +	fgf_t fgp = FGP_CREAT | FGP_LOCK | fgf_set_order(region->length);
> +	pgoff_t index = (region->pos) >> PAGE_SHIFT;
> +	struct folio *folio = __filemap_get_folio(inode->i_mapping, index, fgp,
> +				    mapping_gfp_mask(inode->i_mapping));
> +	int ret;
> +	struct iomap_iter iter = {
> +		.inode		= folio->mapping->host,
> +		.pos		= region->pos,
> +		.len		= region->length,
> +	};
> +	struct iomap_readpage_ctx ctx = {
> +		.cur_folio	= folio,
> +	};
> +
> +	if (folio_test_uptodate(folio)) {
> +		folio_unlock(folio);
> +		return folio;
> +	}
> +
> +	while ((ret = iomap_iter(&iter, region->ops)) > 0)
> +		iter.status = iomap_read_folio_iter(&iter, &ctx);

Huh, we don't read into region->buf?  Oh, I see, this gets iomap to
install an uptodate folio in the pagecache, and then later we can
just hand it to fsverity.  Maybe?

--D

> +
> +	if (ctx.bio) {
> +		submit_bio(ctx.bio);
> +		WARN_ON_ONCE(!ctx.cur_folio_in_bio);
> +	} else {
> +		WARN_ON_ONCE(ctx.cur_folio_in_bio);
> +		folio_unlock(folio);
> +	}
> +
> +	return folio;
> +}
> +EXPORT_SYMBOL_GPL(iomap_read_region);
> +
> +static int iomap_write_region_iter(struct iomap_iter *iter, const void *buf)
> +{
> +	loff_t pos = iter->pos;
> +	u64 bytes = iomap_length(iter);
> +	int status;
> +
> +	do {
> +		struct folio *folio;
> +		size_t offset;
> +		bool ret;

Is balance_dirty_pages_ratelimited_flags need here if we're at the dirty
thresholds?

> +
> +		bytes = min_t(u64, SIZE_MAX, bytes);
> +		status = iomap_write_begin(iter, &folio, &offset, &bytes);
> +		if (status)
> +			return status;
> +		if (iter->iomap.flags & IOMAP_F_STALE)
> +			break;
> +
> +		offset = offset_in_folio(folio, pos);
> +		if (bytes > folio_size(folio) - offset)
> +			bytes = folio_size(folio) - offset;
> +
> +		memcpy_to_folio(folio, offset, buf, bytes);
> +
> +		ret = iomap_write_end(iter, bytes, bytes, folio);
> +		if (WARN_ON_ONCE(!ret))
> +			return -EIO;
> +
> +		__iomap_put_folio(iter, bytes, folio);
> +		if (WARN_ON_ONCE(!ret))
> +			return -EIO;
> +
> +		status = iomap_iter_advance(iter, &bytes);
> +		if (status)
> +			break;
> +	} while (bytes > 0);
> +
> +	return status;
> +}

Hrm, stripped down version of iomap_write_iter without the isize
updates.

--D

> +
> +int
> +iomap_write_region(struct ioregion *region)
> +{
> +	struct iomap_iter iter = {
> +		.inode		= region->inode,
> +		.pos		= region->pos,
> +		.len		= region->length,
> +	};
> +	ssize_t ret;
> +
> +	while ((ret = iomap_iter(&iter, region->ops)) > 0)
> +		iter.status = iomap_write_region_iter(&iter, region->buf);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(iomap_write_region);




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux