Re: [PATCH v4 1/5] fuse: use iomap for buffered writes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, Jul 12, 2025 at 6:46 AM Darrick J. Wong <djwong@xxxxxxxxxx> wrote:
>
> On Wed, Jul 09, 2025 at 03:10:19PM -0700, Joanne Koong wrote:
> > Have buffered writes go through iomap. This has two advantages:
> > * granular large folio synchronous reads
> > * granular large folio dirty tracking
> >
> > If for example there is a 1 MB large folio and a write issued at pos 1
> > to pos 1 MB - 2, only the head and tail pages will need to be read in
> > and marked uptodate instead of the entire folio needing to be read in.
> > Non-relevant trailing pages are also skipped (eg if for a 1 MB large
> > folio a write is issued at pos 1 to 4099, only the first two pages are
> > read in and the ones after that are skipped).
> >
> > iomap also has granular dirty tracking. This is useful in that when it
> > comes to writeback time, only the dirty portions of the large folio will
> > be written instead of having to write out the entire folio. For example
> > if there is a 1 MB large folio and only 2 bytes in it are dirty, only
> > the page for those dirty bytes get written out. Please note that
> > granular writeback is only done once fuse also uses iomap in writeback
> > (separate commit).
> >
> > .release_folio needs to be set to iomap_release_folio so that any
> > allocated iomap ifs structs get freed.
> >
> > Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx>
> > ---
> >  fs/fuse/Kconfig |   1 +
> >  fs/fuse/file.c  | 148 ++++++++++++++++++------------------------------
> >  2 files changed, 55 insertions(+), 94 deletions(-)
> >
> > diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
> > index ca215a3cba3e..a774166264de 100644
> > --- a/fs/fuse/Kconfig
> > +++ b/fs/fuse/Kconfig
> > @@ -2,6 +2,7 @@
> >  config FUSE_FS
> >       tristate "FUSE (Filesystem in Userspace) support"
> >       select FS_POSIX_ACL
> > +     select FS_IOMAP
> >       help
> >         With FUSE it is possible to implement a fully functional filesystem
> >         in a userspace program.
> > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > index 47006d0753f1..cadad61ef7df 100644
> > --- a/fs/fuse/file.c
> > +++ b/fs/fuse/file.c
> > @@ -21,6 +21,7 @@
> >  #include <linux/filelock.h>
> >  #include <linux/splice.h>
> >  #include <linux/task_io_accounting_ops.h>
> > +#include <linux/iomap.h>
> >
> >  static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
> >                         unsigned int open_flags, int opcode,
> > @@ -788,12 +789,16 @@ static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
> >       }
> >  }
> >
> > -static int fuse_do_readfolio(struct file *file, struct folio *folio)
> > +static int fuse_do_readfolio(struct file *file, struct folio *folio,
> > +                          size_t off, size_t len)
> >  {
> >       struct inode *inode = folio->mapping->host;
> >       struct fuse_mount *fm = get_fuse_mount(inode);
> > -     loff_t pos = folio_pos(folio);
> > -     struct fuse_folio_desc desc = { .length = folio_size(folio) };
> > +     loff_t pos = folio_pos(folio) + off;
> > +     struct fuse_folio_desc desc = {
> > +             .offset = off,
> > +             .length = len,
> > +     };
> >       struct fuse_io_args ia = {
> >               .ap.args.page_zeroing = true,
> >               .ap.args.out_pages = true,
> > @@ -820,8 +825,6 @@ static int fuse_do_readfolio(struct file *file, struct folio *folio)
> >       if (res < desc.length)
> >               fuse_short_read(inode, attr_ver, res, &ia.ap);
> >
> > -     folio_mark_uptodate(folio);
> > -
> >       return 0;
> >  }
> >
> > @@ -834,13 +837,26 @@ static int fuse_read_folio(struct file *file, struct folio *folio)
> >       if (fuse_is_bad(inode))
> >               goto out;
> >
> > -     err = fuse_do_readfolio(file, folio);
> > +     err = fuse_do_readfolio(file, folio, 0, folio_size(folio));
> > +     if (!err)
> > +             folio_mark_uptodate(folio);
> > +
> >       fuse_invalidate_atime(inode);
> >   out:
> >       folio_unlock(folio);
> >       return err;
> >  }
> >
> > +static int fuse_iomap_read_folio_range(const struct iomap_iter *iter,
> > +                                    struct folio *folio, loff_t pos,
> > +                                    size_t len)
> > +{
> > +     struct file *file = iter->private;
> > +     size_t off = offset_in_folio(folio, pos);
> > +
> > +     return fuse_do_readfolio(file, folio, off, len);
> > +}
> > +
> >  static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
> >                              int err)
> >  {
> > @@ -1374,6 +1390,24 @@ static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive)
> >       }
> >  }
> >
> > +static const struct iomap_write_ops fuse_iomap_write_ops = {
> > +     .read_folio_range = fuse_iomap_read_folio_range,
> > +};
> > +
> > +static int fuse_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> > +                         unsigned int flags, struct iomap *iomap,
> > +                         struct iomap *srcmap)
> > +{
> > +     iomap->type = IOMAP_MAPPED;
> > +     iomap->length = length;
> > +     iomap->offset = offset;
> > +     return 0;
> > +}
> > +
> > +static const struct iomap_ops fuse_iomap_ops = {
> > +     .iomap_begin    = fuse_iomap_begin,
> > +};
> > +
> >  static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
> >  {
> >       struct file *file = iocb->ki_filp;
> > @@ -1383,6 +1417,7 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
> >       struct inode *inode = mapping->host;
> >       ssize_t err, count;
> >       struct fuse_conn *fc = get_fuse_conn(inode);
> > +     bool writeback = false;
> >
> >       if (fc->writeback_cache) {
> >               /* Update size (EOF optimization) and mode (SUID clearing) */
> > @@ -1391,16 +1426,11 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
> >               if (err)
> >                       return err;
> >
> > -             if (fc->handle_killpriv_v2 &&
> > -                 setattr_should_drop_suidgid(idmap,
> > -                                             file_inode(file))) {
> > -                     goto writethrough;
> > -             }
> > -
> > -             return generic_file_write_iter(iocb, from);
> > +             if (!fc->handle_killpriv_v2 ||
> > +                 !setattr_should_drop_suidgid(idmap, file_inode(file)))
> > +                     writeback = true;
> >       }
> >
> > -writethrough:
> >       inode_lock(inode);
> >
> >       err = count = generic_write_checks(iocb, from);
> > @@ -1419,6 +1449,15 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
> >                       goto out;
> >               written = direct_write_fallback(iocb, from, written,
> >                               fuse_perform_write(iocb, from));
>
> Random unrelatd question: does anyone know why fuse handles IOCB_DIRECT
> in its fuse_cache_{read,write}_iter functions and /also/ sets
> ->direct_IO?  I thought filesystems only did one or the other, not both.
>

I think it has to do with the difference in handling async aio and sync aio
and the difference between user requested O_DIRECT and server
requested FOPEN_DIRECT_IO.

I think Bernd had some patches to further unify the related code.

Thanks,
Amir.





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux