On Wed, Jul 2, 2025 at 11:13 AM Darrick J. Wong <djwong@xxxxxxxxxx> wrote: > > On Mon, Jun 23, 2025 at 07:21:32PM -0700, Joanne Koong wrote: > > Use iomap for dirty folio writeback in ->writepages(). > > This allows for granular dirty writeback of large folios. > > > > Only the dirty portions of the large folio will be written instead of > > having to write out the entire folio. For example if there is a 1 MB > > large folio and only 2 bytes in it are dirty, only the page for those > > dirty bytes will be written out. > > > > .dirty_folio needs to be set to iomap_dirty_folio so that the bitmap > > iomap uses for dirty tracking correctly reflects dirty regions that need > > to be written back. > > > > Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx> > > --- > > fs/fuse/file.c | 124 ++++++++++++++++++++++++++++++------------------- > > 1 file changed, 75 insertions(+), 49 deletions(-) > > > > diff --git a/fs/fuse/file.c b/fs/fuse/file.c > > index a7f11c1a4f89..2b4b950eaeed 100644 > > --- a/fs/fuse/file.c > > +++ b/fs/fuse/file.c > > @@ -1837,7 +1837,7 @@ static void fuse_writepage_finish(struct fuse_writepage_args *wpa) > > * scope of the fi->lock alleviates xarray lock > > * contention and noticeably improves performance. > > */ > > - folio_end_writeback(ap->folios[i]); > > + iomap_finish_folio_write(inode, ap->folios[i], 1); > > dec_wb_stat(&bdi->wb, WB_WRITEBACK); > > wb_writeout_inc(&bdi->wb); > > } > > @@ -2024,19 +2024,20 @@ static void fuse_writepage_add_to_bucket(struct fuse_conn *fc, > > } > > > > static void fuse_writepage_args_page_fill(struct fuse_writepage_args *wpa, struct folio *folio, > > - uint32_t folio_index) > > + uint32_t folio_index, loff_t offset, unsigned len) > > { > > struct inode *inode = folio->mapping->host; > > struct fuse_args_pages *ap = &wpa->ia.ap; > > > > ap->folios[folio_index] = folio; > > - ap->descs[folio_index].offset = 0; > > - ap->descs[folio_index].length = folio_size(folio); > > + ap->descs[folio_index].offset = offset; > > + ap->descs[folio_index].length = len; > > > > inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); > > } > > > > static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio, > > + size_t offset, > > struct fuse_file *ff) > > { > > struct inode *inode = folio->mapping->host; > > @@ -2049,7 +2050,7 @@ static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio > > return NULL; > > > > fuse_writepage_add_to_bucket(fc, wpa); > > - fuse_write_args_fill(&wpa->ia, ff, folio_pos(folio), 0); > > + fuse_write_args_fill(&wpa->ia, ff, folio_pos(folio) + offset, 0); > > wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE; > > wpa->inode = inode; > > wpa->ia.ff = ff; > > @@ -2075,7 +2076,7 @@ static int fuse_writepage_locked(struct folio *folio) > > if (!ff) > > goto err; > > > > - wpa = fuse_writepage_args_setup(folio, ff); > > + wpa = fuse_writepage_args_setup(folio, 0, ff); > > error = -ENOMEM; > > if (!wpa) > > goto err_writepage_args; > > @@ -2084,7 +2085,7 @@ static int fuse_writepage_locked(struct folio *folio) > > ap->num_folios = 1; > > > > folio_start_writeback(folio); > > - fuse_writepage_args_page_fill(wpa, folio, 0); > > + fuse_writepage_args_page_fill(wpa, folio, 0, 0, folio_size(folio)); > > > > spin_lock(&fi->lock); > > list_add_tail(&wpa->queue_entry, &fi->queued_writes); > > @@ -2105,7 +2106,7 @@ struct fuse_fill_wb_data { > > struct fuse_file *ff; > > struct inode *inode; > > unsigned int max_folios; > > - unsigned int nr_pages; > > + unsigned int nr_bytes; > > }; > > > > static bool fuse_pages_realloc(struct fuse_fill_wb_data *data) > > @@ -2147,21 +2148,28 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data) > > } > > > > static bool fuse_writepage_need_send(struct fuse_conn *fc, struct folio *folio, > > + loff_t offset, unsigned len, > > struct fuse_args_pages *ap, > > struct fuse_fill_wb_data *data) > > { > > + struct folio *prev_folio; > > + struct fuse_folio_desc prev_desc; > > + > > WARN_ON(!ap->num_folios); > > > > /* Reached max pages */ > > - if (data->nr_pages + folio_nr_pages(folio) > fc->max_pages) > > + if ((data->nr_bytes + len) / PAGE_SIZE > fc->max_pages) > > return true; > > > > /* Reached max write bytes */ > > - if ((data->nr_pages * PAGE_SIZE) + folio_size(folio) > fc->max_write) > > + if (data->nr_bytes + len > fc->max_write) > > return true; > > > > /* Discontinuity */ > > - if (folio_next_index(ap->folios[ap->num_folios - 1]) != folio->index) > > + prev_folio = ap->folios[ap->num_folios - 1]; > > + prev_desc = ap->descs[ap->num_folios - 1]; > > + if ((folio_pos(prev_folio) + prev_desc.offset + prev_desc.length) != > > + folio_pos(folio) + offset) > > return true; > > > > /* Need to grow the pages array? If so, did the expansion fail? */ > > @@ -2171,85 +2179,103 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, struct folio *folio, > > return false; > > } > > > > -static int fuse_writepages_fill(struct folio *folio, > > - struct writeback_control *wbc, void *_data) > > +static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc, > > + struct folio *folio, u64 pos, > > + unsigned len, u64 end_pos) > > { > > - struct fuse_fill_wb_data *data = _data; > > + struct fuse_fill_wb_data *data = wpc->wb_ctx; > > struct fuse_writepage_args *wpa = data->wpa; > > struct fuse_args_pages *ap = &wpa->ia.ap; > > struct inode *inode = data->inode; > > struct fuse_inode *fi = get_fuse_inode(inode); > > struct fuse_conn *fc = get_fuse_conn(inode); > > - int err; > > + loff_t offset = offset_in_folio(folio, pos); > > + > > + WARN_ON_ONCE(!data); > > + /* len will always be page aligned */ > > + WARN_ON_ONCE(len & (PAGE_SIZE - 1)); > > > > if (!data->ff) { > > - err = -EIO; > > data->ff = fuse_write_file_get(fi); > > if (!data->ff) > > - goto out_unlock; > > + return -EIO; > > } > > > > - if (wpa && fuse_writepage_need_send(fc, folio, ap, data)) { > > + iomap_start_folio_write(inode, folio, 1); > > + > > + if (wpa && fuse_writepage_need_send(fc, folio, offset, len, ap, data)) { > > fuse_writepages_send(data); > > data->wpa = NULL; > > - data->nr_pages = 0; > > + data->nr_bytes = 0; > > } > > > > if (data->wpa == NULL) { > > - err = -ENOMEM; > > - wpa = fuse_writepage_args_setup(folio, data->ff); > > + wpa = fuse_writepage_args_setup(folio, offset, data->ff); > > if (!wpa) > > - goto out_unlock; > > + return -ENOMEM; > > If we error out here, what subtracts from write_bytes_pending the > quantity that we just added in iomap_start_folio_write? > > (It would have helped a lot if the cover letter had linked to a git > branch so I could go look at the final product for myself...) > Ugh you're right, this needs to be accounted for in the error cases. I'll fix this in v4. Thanks for spotting this. I'll make a git branch and include a link to it in v4 as well. > --D >