On 4/23/25 01:56, Joanne Koong wrote: > For direct io writes, splice is disabled when forwarding pages from the > client to the server. This is because the pages in the pipe buffer are > user pages, which is controlled by the client. Thus if a server replies > to the request and then keeps accessing the pages afterwards, there is > the possibility that the client may have modified the contents of the > pages in the meantime. More context on this can be found in commit > 0c4bcfdecb1a ("fuse: fix pipe buffer lifetime for direct_io"). > > For servers that do not need to access pages after answering the > request, splice gives a non-trivial improvement in performance. > Benchmarks show roughly a 40% speedup. > > Allow servers with CAP_SYS_ADMIN privileges to enable zero-copy splice > for servicing client direct io writes. By enabling this, the server > understands that they should not continue accessing the pipe buffer > after completing the request or may face incorrect data if they do so. > Only servers with CAP_SYS_ADMIN may enable this, since having access to > the underlying user pages may allow servers to snoop or modify the > user pages after completing the request. > > Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx> Reviewed-by: Bernd Schubert <bschubert@xxxxxxx> > > --- > > Changes from v1 -> v2: > * Gate this behind CAP_SYS_ADMIN (Bernd) > v1: https://lore.kernel.org/linux-fsdevel/20250419000614.3795331-1-joannelkoong@xxxxxxxxx/ > > --- > fs/fuse/dev.c | 18 ++++++++++-------- > fs/fuse/dev_uring.c | 4 ++-- > fs/fuse/fuse_dev_i.h | 5 +++-- > fs/fuse/fuse_i.h | 3 +++ > fs/fuse/inode.c | 5 ++++- > include/uapi/linux/fuse.h | 9 +++++++++ > 6 files changed, 31 insertions(+), 13 deletions(-) > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > index 67d07b4c778a..e4949c379eaf 100644 > --- a/fs/fuse/dev.c > +++ b/fs/fuse/dev.c > @@ -816,12 +816,13 @@ static int unlock_request(struct fuse_req *req) > return err; > } > > -void fuse_copy_init(struct fuse_copy_state *cs, bool write, > - struct iov_iter *iter) > +void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc, > + bool write, struct iov_iter *iter) > { > memset(cs, 0, sizeof(*cs)); > cs->write = write; > cs->iter = iter; > + cs->splice_read_user_pages = fc->splice_read_user_pages; > } > > /* Unmap and put previous page of userspace buffer */ > @@ -1105,9 +1106,10 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, > if (cs->write && cs->pipebufs && page) { > /* > * Can't control lifetime of pipe buffers, so always > - * copy user pages. > + * copy user pages if server does not support reading > + * user pages through splice. > */ > - if (cs->req->args->user_pages) { > + if (cs->req->args->user_pages && !cs->splice_read_user_pages) { > err = fuse_copy_fill(cs); > if (err) > return err; > @@ -1538,7 +1540,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to) > if (!user_backed_iter(to)) > return -EINVAL; > > - fuse_copy_init(&cs, true, to); > + fuse_copy_init(&cs, fud->fc, true, to); > > return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to)); > } > @@ -1561,7 +1563,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, > if (!bufs) > return -ENOMEM; > > - fuse_copy_init(&cs, true, NULL); > + fuse_copy_init(&cs, fud->fc, true, NULL); > cs.pipebufs = bufs; > cs.pipe = pipe; > ret = fuse_dev_do_read(fud, in, &cs, len); > @@ -2227,7 +2229,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from) > if (!user_backed_iter(from)) > return -EINVAL; > > - fuse_copy_init(&cs, false, from); > + fuse_copy_init(&cs, fud->fc, false, from); > > return fuse_dev_do_write(fud, &cs, iov_iter_count(from)); > } > @@ -2301,7 +2303,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, > } > pipe_unlock(pipe); > > - fuse_copy_init(&cs, false, NULL); > + fuse_copy_init(&cs, fud->fc, false, NULL); > cs.pipebufs = bufs; > cs.nr_segs = nbuf; > cs.pipe = pipe; > diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c > index ef470c4a9261..52b883a6a79d 100644 > --- a/fs/fuse/dev_uring.c > +++ b/fs/fuse/dev_uring.c > @@ -593,7 +593,7 @@ static int fuse_uring_copy_from_ring(struct fuse_ring *ring, > if (err) > return err; > > - fuse_copy_init(&cs, false, &iter); > + fuse_copy_init(&cs, ring->fc, false, &iter); > cs.is_uring = true; > cs.req = req; > > @@ -623,7 +623,7 @@ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req, > return err; > } > > - fuse_copy_init(&cs, true, &iter); > + fuse_copy_init(&cs, ring->fc, true, &iter); > cs.is_uring = true; > cs.req = req; > > diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h > index db136e045925..25e593e64c67 100644 > --- a/fs/fuse/fuse_dev_i.h > +++ b/fs/fuse/fuse_dev_i.h > @@ -32,6 +32,7 @@ struct fuse_copy_state { > bool write:1; > bool move_pages:1; > bool is_uring:1; > + bool splice_read_user_pages:1; > struct { > unsigned int copied_sz; /* copied size into the user buffer */ > } ring; > @@ -51,8 +52,8 @@ struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique); > > void fuse_dev_end_requests(struct list_head *head); > > -void fuse_copy_init(struct fuse_copy_state *cs, bool write, > - struct iov_iter *iter); > +void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *conn, > + bool write, struct iov_iter *iter); > int fuse_copy_args(struct fuse_copy_state *cs, unsigned int numargs, > unsigned int argpages, struct fuse_arg *args, > int zeroing); > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > index 3d5289cb82a5..e21875f16220 100644 > --- a/fs/fuse/fuse_i.h > +++ b/fs/fuse/fuse_i.h > @@ -898,6 +898,9 @@ struct fuse_conn { > /* Use io_uring for communication */ > bool io_uring:1; > > + /* Allow splice for reading user pages */ > + bool splice_read_user_pages:1; > + > /** Maximum stack depth for passthrough backing files */ > int max_stack_depth; > > diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c > index 43b6643635ee..8b78dacf6c97 100644 > --- a/fs/fuse/inode.c > +++ b/fs/fuse/inode.c > @@ -1439,6 +1439,9 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, > > if (flags & FUSE_REQUEST_TIMEOUT) > timeout = arg->request_timeout; > + > + if (flags & FUSE_SPLICE_READ_USER_PAGES && capable(CAP_SYS_ADMIN)) > + fc->splice_read_user_pages = true; > } else { > ra_pages = fc->max_read / PAGE_SIZE; > fc->no_lock = true; > @@ -1489,7 +1492,7 @@ void fuse_send_init(struct fuse_mount *fm) > FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | > FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP | > FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_ALLOW_IDMAP | > - FUSE_REQUEST_TIMEOUT; > + FUSE_REQUEST_TIMEOUT | FUSE_SPLICE_READ_USER_PAGES; > #ifdef CONFIG_FUSE_DAX > if (fm->fc->dax) > flags |= FUSE_MAP_ALIGNMENT; > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h > index 122d6586e8d4..fecb06921da9 100644 > --- a/include/uapi/linux/fuse.h > +++ b/include/uapi/linux/fuse.h > @@ -235,6 +235,9 @@ > * > * 7.44 > * - add FUSE_NOTIFY_INC_EPOCH > + * > + * 7.45 > + * - add FUSE_SPLICE_READ_USER_PAGES > */ > > #ifndef _LINUX_FUSE_H > @@ -443,6 +446,11 @@ struct fuse_file_lock { > * FUSE_OVER_IO_URING: Indicate that client supports io-uring > * FUSE_REQUEST_TIMEOUT: kernel supports timing out requests. > * init_out.request_timeout contains the timeout (in secs) > + * FUSE_SPLICE_READ_USER_PAGES: kernel supports splice on the device for reading > + * user pages. If the server enables this, then the > + * server should not access the pipe buffer after > + * completing the request. Only servers with > + * CAP_SYS_ADMIN privileges can enable this. > */ > #define FUSE_ASYNC_READ (1 << 0) > #define FUSE_POSIX_LOCKS (1 << 1) > @@ -490,6 +498,7 @@ struct fuse_file_lock { > #define FUSE_ALLOW_IDMAP (1ULL << 40) > #define FUSE_OVER_IO_URING (1ULL << 41) > #define FUSE_REQUEST_TIMEOUT (1ULL << 42) > +#define FUSE_SPLICE_READ_USER_PAGES (1ULL << 43) > > /** > * CUSE INIT request/reply flags