On 2025-07-29 15:22:52, Darrick J. Wong wrote: > On Mon, Jul 28, 2025 at 10:30:06PM +0200, Andrey Albershteyn wrote: > > From: Andrey Albershteyn <aalbersh@xxxxxxxxxx> > > > > Interface for writing data beyond EOF into offsetted region in > > page cache. > > > > Signed-off-by: Andrey Albershteyn <aalbersh@xxxxxxxxxx> > > --- > > include/linux/iomap.h | 16 ++++++++ > > fs/iomap/buffered-io.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++- > > 2 files changed, 114 insertions(+), 1 deletion(-) > > > > diff --git a/include/linux/iomap.h b/include/linux/iomap.h > > index 4a0b5ebb79e9..73288f28543f 100644 > > --- a/include/linux/iomap.h > > +++ b/include/linux/iomap.h > > @@ -83,6 +83,11 @@ struct vm_fault; > > */ > > #define IOMAP_F_PRIVATE (1U << 12) > > > > +/* > > + * Writes happens beyound inode EOF > > + */ > > +#define IOMAP_F_BEYOND_EOF (1U << 13) > > + > > /* > > * Flags set by the core iomap code during operations: > > * > > @@ -533,4 +538,15 @@ int iomap_swapfile_activate(struct swap_info_struct *sis, > > > > extern struct bio_set iomap_ioend_bioset; > > > > +struct ioregion { > > + struct inode *inode; > > + loff_t pos; /* IO position */ > > + const void *buf; /* Data to be written (in only) */ > > + size_t length; /* Length of the date */ > > Length of the data ? thanks! > > > + const struct iomap_ops *ops; > > +}; > > This sounds like a kiocb and a kvec... > > > + > > +struct folio *iomap_read_region(struct ioregion *region); > > +int iomap_write_region(struct ioregion *region); > > ...and these sound a lot like filemap_read and iomap_write_iter. > Why not use those? You'd get readahead for free. Though I guess > filemap_read cuts off at i_size so maybe that's why this is necessary? > > (and by extension, is this why the existing fsverity implementations > seem to do their own readahead and reading?) > > ((and now I guess I see why this isn't done through the regular kiocb > interface, because then we'd be exposing post-EOF data hiding to > everyone in the system)) > > > #endif /* LINUX_IOMAP_H */ > > > > -- > > 2.50.0 > > > > > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > > index 7bef232254a3..e959a206cba9 100644 > > --- a/fs/iomap/buffered-io.c > > +++ b/fs/iomap/buffered-io.c > > @@ -321,6 +321,7 @@ struct iomap_readpage_ctx { > > bool cur_folio_in_bio; > > struct bio *bio; > > struct readahead_control *rac; > > + int flags; > > What flags go in here? IOMAP_F_BEYOND_EOF > > > }; > > > > /** > > @@ -387,7 +388,8 @@ static int iomap_readpage_iter(struct iomap_iter *iter, > > if (plen == 0) > > goto done; > > > > - if (iomap_block_needs_zeroing(iter, pos)) { > > + if (iomap_block_needs_zeroing(iter, pos) && > > + !(iomap->flags & IOMAP_F_BEYOND_EOF)) { > > folio_zero_range(folio, poff, plen); > > iomap_set_range_uptodate(folio, poff, plen); > > goto done; > > @@ -2007,3 +2009,98 @@ iomap_writepages_unbound(struct address_space *mapping, struct writeback_control > > return iomap_submit_ioend(wpc, error); > > } > > EXPORT_SYMBOL_GPL(iomap_writepages_unbound); > > + > > +struct folio * > > +iomap_read_region(struct ioregion *region) > > +{ > > + struct inode *inode = region->inode; > > + fgf_t fgp = FGP_CREAT | FGP_LOCK | fgf_set_order(region->length); > > + pgoff_t index = (region->pos) >> PAGE_SHIFT; > > + struct folio *folio = __filemap_get_folio(inode->i_mapping, index, fgp, > > + mapping_gfp_mask(inode->i_mapping)); > > + int ret; > > + struct iomap_iter iter = { > > + .inode = folio->mapping->host, > > + .pos = region->pos, > > + .len = region->length, > > + }; > > + struct iomap_readpage_ctx ctx = { > > + .cur_folio = folio, > > + }; > > + > > + if (folio_test_uptodate(folio)) { > > + folio_unlock(folio); > > + return folio; > > + } > > + > > + while ((ret = iomap_iter(&iter, region->ops)) > 0) > > + iter.status = iomap_read_folio_iter(&iter, &ctx); > > Huh, we don't read into region->buf? Oh, I see, this gets iomap to > install an uptodate folio in the pagecache, and then later we can > just hand it to fsverity. Maybe? yes > > --D > > > + > > + if (ctx.bio) { > > + submit_bio(ctx.bio); > > + WARN_ON_ONCE(!ctx.cur_folio_in_bio); > > + } else { > > + WARN_ON_ONCE(ctx.cur_folio_in_bio); > > + folio_unlock(folio); > > + } > > + > > + return folio; > > +} > > +EXPORT_SYMBOL_GPL(iomap_read_region); > > + > > +static int iomap_write_region_iter(struct iomap_iter *iter, const void *buf) > > +{ > > + loff_t pos = iter->pos; > > + u64 bytes = iomap_length(iter); > > + int status; > > + > > + do { > > + struct folio *folio; > > + size_t offset; > > + bool ret; > > Is balance_dirty_pages_ratelimited_flags need here if we're at the dirty > thresholds? Ah I see, missed that, I will add it, thanks! > > > + > > + bytes = min_t(u64, SIZE_MAX, bytes); > > + status = iomap_write_begin(iter, &folio, &offset, &bytes); > > + if (status) > > + return status; > > + if (iter->iomap.flags & IOMAP_F_STALE) > > + break; > > + > > + offset = offset_in_folio(folio, pos); > > + if (bytes > folio_size(folio) - offset) > > + bytes = folio_size(folio) - offset; > > + > > + memcpy_to_folio(folio, offset, buf, bytes); > > + > > + ret = iomap_write_end(iter, bytes, bytes, folio); > > + if (WARN_ON_ONCE(!ret)) > > + return -EIO; > > + > > + __iomap_put_folio(iter, bytes, folio); > > + if (WARN_ON_ONCE(!ret)) > > + return -EIO; > > + > > + status = iomap_iter_advance(iter, &bytes); > > + if (status) > > + break; > > + } while (bytes > 0); > > + > > + return status; > > +} > > Hrm, stripped down version of iomap_write_iter without the isize > updates. yes, it probably can be merged with a flag to skip the i_size updates, I can look into this if it make more sense > > --D > > > + > > +int > > +iomap_write_region(struct ioregion *region) > > +{ > > + struct iomap_iter iter = { > > + .inode = region->inode, > > + .pos = region->pos, > > + .len = region->length, > > + }; > > + ssize_t ret; > > + > > + while ((ret = iomap_iter(&iter, region->ops)) > 0) > > + iter.status = iomap_write_region_iter(&iter, region->buf); > > + > > + return 0; > > +} > > +EXPORT_SYMBOL_GPL(iomap_write_region); > -- - Andrey