Re: [PATCH v2] fuse: use splice for reading user pages on servers that enable it

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 4/23/25 01:56, Joanne Koong wrote:
> For direct io writes, splice is disabled when forwarding pages from the
> client to the server. This is because the pages in the pipe buffer are
> user pages, which is controlled by the client. Thus if a server replies
> to the request and then keeps accessing the pages afterwards, there is
> the possibility that the client may have modified the contents of the
> pages in the meantime. More context on this can be found in commit
> 0c4bcfdecb1a ("fuse: fix pipe buffer lifetime for direct_io").
> 
> For servers that do not need to access pages after answering the
> request, splice gives a non-trivial improvement in performance.
> Benchmarks show roughly a 40% speedup.
> 
> Allow servers with CAP_SYS_ADMIN privileges to enable zero-copy splice
> for servicing client direct io writes. By enabling this, the server
> understands that they should not continue accessing the pipe buffer
> after completing the request or may face incorrect data if they do so.
> Only servers with CAP_SYS_ADMIN may enable this, since having access to
> the underlying user pages may allow servers to snoop or modify the
> user pages after completing the request.
> 
> Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx>

Reviewed-by: Bernd Schubert <bschubert@xxxxxxx>

> 
> ---
> 
> Changes from v1 -> v2:
> * Gate this behind CAP_SYS_ADMIN (Bernd)
> v1: https://lore.kernel.org/linux-fsdevel/20250419000614.3795331-1-joannelkoong@xxxxxxxxx/
> 
> ---
>  fs/fuse/dev.c             | 18 ++++++++++--------
>  fs/fuse/dev_uring.c       |  4 ++--
>  fs/fuse/fuse_dev_i.h      |  5 +++--
>  fs/fuse/fuse_i.h          |  3 +++
>  fs/fuse/inode.c           |  5 ++++-
>  include/uapi/linux/fuse.h |  9 +++++++++
>  6 files changed, 31 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 67d07b4c778a..e4949c379eaf 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -816,12 +816,13 @@ static int unlock_request(struct fuse_req *req)
>  	return err;
>  }
>  
> -void fuse_copy_init(struct fuse_copy_state *cs, bool write,
> -		    struct iov_iter *iter)
> +void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
> +		    bool write, struct iov_iter *iter)
>  {
>  	memset(cs, 0, sizeof(*cs));
>  	cs->write = write;
>  	cs->iter = iter;
> +	cs->splice_read_user_pages = fc->splice_read_user_pages;
>  }
>  
>  /* Unmap and put previous page of userspace buffer */
> @@ -1105,9 +1106,10 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
>  		if (cs->write && cs->pipebufs && page) {
>  			/*
>  			 * Can't control lifetime of pipe buffers, so always
> -			 * copy user pages.
> +			 * copy user pages if server does not support reading
> +			 * user pages through splice.
>  			 */
> -			if (cs->req->args->user_pages) {
> +			if (cs->req->args->user_pages && !cs->splice_read_user_pages) {
>  				err = fuse_copy_fill(cs);
>  				if (err)
>  					return err;
> @@ -1538,7 +1540,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
>  	if (!user_backed_iter(to))
>  		return -EINVAL;
>  
> -	fuse_copy_init(&cs, true, to);
> +	fuse_copy_init(&cs, fud->fc, true, to);
>  
>  	return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
>  }
> @@ -1561,7 +1563,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
>  	if (!bufs)
>  		return -ENOMEM;
>  
> -	fuse_copy_init(&cs, true, NULL);
> +	fuse_copy_init(&cs, fud->fc, true, NULL);
>  	cs.pipebufs = bufs;
>  	cs.pipe = pipe;
>  	ret = fuse_dev_do_read(fud, in, &cs, len);
> @@ -2227,7 +2229,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
>  	if (!user_backed_iter(from))
>  		return -EINVAL;
>  
> -	fuse_copy_init(&cs, false, from);
> +	fuse_copy_init(&cs, fud->fc, false, from);
>  
>  	return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
>  }
> @@ -2301,7 +2303,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
>  	}
>  	pipe_unlock(pipe);
>  
> -	fuse_copy_init(&cs, false, NULL);
> +	fuse_copy_init(&cs, fud->fc, false, NULL);
>  	cs.pipebufs = bufs;
>  	cs.nr_segs = nbuf;
>  	cs.pipe = pipe;
> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
> index ef470c4a9261..52b883a6a79d 100644
> --- a/fs/fuse/dev_uring.c
> +++ b/fs/fuse/dev_uring.c
> @@ -593,7 +593,7 @@ static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
>  	if (err)
>  		return err;
>  
> -	fuse_copy_init(&cs, false, &iter);
> +	fuse_copy_init(&cs, ring->fc, false, &iter);
>  	cs.is_uring = true;
>  	cs.req = req;
>  
> @@ -623,7 +623,7 @@ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
>  		return err;
>  	}
>  
> -	fuse_copy_init(&cs, true, &iter);
> +	fuse_copy_init(&cs, ring->fc, true, &iter);
>  	cs.is_uring = true;
>  	cs.req = req;
>  
> diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h
> index db136e045925..25e593e64c67 100644
> --- a/fs/fuse/fuse_dev_i.h
> +++ b/fs/fuse/fuse_dev_i.h
> @@ -32,6 +32,7 @@ struct fuse_copy_state {
>  	bool write:1;
>  	bool move_pages:1;
>  	bool is_uring:1;
> +	bool splice_read_user_pages:1;
>  	struct {
>  		unsigned int copied_sz; /* copied size into the user buffer */
>  	} ring;
> @@ -51,8 +52,8 @@ struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique);
>  
>  void fuse_dev_end_requests(struct list_head *head);
>  
> -void fuse_copy_init(struct fuse_copy_state *cs, bool write,
> -			   struct iov_iter *iter);
> +void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *conn,
> +		    bool write, struct iov_iter *iter);
>  int fuse_copy_args(struct fuse_copy_state *cs, unsigned int numargs,
>  		   unsigned int argpages, struct fuse_arg *args,
>  		   int zeroing);
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 3d5289cb82a5..e21875f16220 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -898,6 +898,9 @@ struct fuse_conn {
>  	/* Use io_uring for communication */
>  	bool io_uring:1;
>  
> +	/* Allow splice for reading user pages */
> +	bool splice_read_user_pages:1;
> +
>  	/** Maximum stack depth for passthrough backing files */
>  	int max_stack_depth;
>  
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 43b6643635ee..8b78dacf6c97 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -1439,6 +1439,9 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
>  
>  			if (flags & FUSE_REQUEST_TIMEOUT)
>  				timeout = arg->request_timeout;
> +
> +			if (flags & FUSE_SPLICE_READ_USER_PAGES && capable(CAP_SYS_ADMIN))
> +				fc->splice_read_user_pages = true;
>  		} else {
>  			ra_pages = fc->max_read / PAGE_SIZE;
>  			fc->no_lock = true;
> @@ -1489,7 +1492,7 @@ void fuse_send_init(struct fuse_mount *fm)
>  		FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
>  		FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP |
>  		FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_ALLOW_IDMAP |
> -		FUSE_REQUEST_TIMEOUT;
> +		FUSE_REQUEST_TIMEOUT | FUSE_SPLICE_READ_USER_PAGES;
>  #ifdef CONFIG_FUSE_DAX
>  	if (fm->fc->dax)
>  		flags |= FUSE_MAP_ALIGNMENT;
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index 122d6586e8d4..fecb06921da9 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -235,6 +235,9 @@
>   *
>   *  7.44
>   *  - add FUSE_NOTIFY_INC_EPOCH
> + *
> + *  7.45
> + *  - add FUSE_SPLICE_READ_USER_PAGES
>   */
>  
>  #ifndef _LINUX_FUSE_H
> @@ -443,6 +446,11 @@ struct fuse_file_lock {
>   * FUSE_OVER_IO_URING: Indicate that client supports io-uring
>   * FUSE_REQUEST_TIMEOUT: kernel supports timing out requests.
>   *			 init_out.request_timeout contains the timeout (in secs)
> + * FUSE_SPLICE_READ_USER_PAGES: kernel supports splice on the device for reading
> + *				user pages. If the server enables this, then the
> + *				server should not access the pipe buffer after
> + *				completing the request. Only servers with
> + *				CAP_SYS_ADMIN privileges can enable this.
>   */
>  #define FUSE_ASYNC_READ		(1 << 0)
>  #define FUSE_POSIX_LOCKS	(1 << 1)
> @@ -490,6 +498,7 @@ struct fuse_file_lock {
>  #define FUSE_ALLOW_IDMAP	(1ULL << 40)
>  #define FUSE_OVER_IO_URING	(1ULL << 41)
>  #define FUSE_REQUEST_TIMEOUT	(1ULL << 42)
> +#define FUSE_SPLICE_READ_USER_PAGES (1ULL << 43)
>  
>  /**
>   * CUSE INIT request/reply flags





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux