Re: [PATCH v1 15/16] fuse: use iomap for readahead

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Aug 29, 2025 at 04:56:26PM -0700, Joanne Koong wrote:
> Do readahead in fuse using iomap. This gives us granular uptodate
> tracking for large folios, which optimizes how much data needs to be
> read in. If some portions of the folio are already uptodate (eg through
> a prior write), we only need to read in the non-uptodate portions.
> 
> Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx>
> ---
>  fs/fuse/file.c | 214 +++++++++++++++++++++++++++----------------------
>  1 file changed, 118 insertions(+), 96 deletions(-)
> 
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index bdfb13cdee4b..1659603f4cb6 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -844,8 +844,73 @@ static const struct iomap_ops fuse_iomap_ops = {
>  
>  struct fuse_fill_read_data {
>  	struct file *file;
> +	/*
> +	 * We need to track this because non-readahead requests can't be sent
> +	 * asynchronously.
> +	 */
> +	bool readahead : 1;
> +
> +	/*
> +	 * Fields below are used if sending the read request
> +	 * asynchronously.
> +	 */
> +	struct fuse_conn *fc;
> +	struct readahead_control *rac;
> +	struct fuse_io_args *ia;
> +	unsigned int nr_bytes;
>  };
>  
> +/* forward declarations */
> +static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
> +				  unsigned len, struct fuse_args_pages *ap,
> +				  unsigned cur_bytes, bool write);
> +static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
> +				unsigned int count, bool async);
> +
> +static int fuse_handle_readahead(struct folio *folio,
> +				 struct fuse_fill_read_data *data, loff_t pos,
> +				 size_t len)
> +{
> +	struct fuse_io_args *ia = data->ia;
> +	size_t off = offset_in_folio(folio, pos);
> +	struct fuse_conn *fc = data->fc;
> +	struct fuse_args_pages *ap;
> +
> +	if (ia && fuse_folios_need_send(fc, pos, len, &ia->ap, data->nr_bytes,
> +					false)) {
> +		fuse_send_readpages(ia, data->file, data->nr_bytes,
> +				    fc->async_read);
> +		data->nr_bytes = 0;
> +		ia = NULL;
> +	}
> +	if (!ia) {
> +		struct readahead_control *rac = data->rac;
> +		unsigned nr_pages = min(fc->max_pages, readahead_count(rac));
> +
> +		if (fc->num_background >= fc->congestion_threshold &&
> +		    rac->ra->async_size >= readahead_count(rac))
> +			/*
> +			 * Congested and only async pages left, so skip the
> +			 * rest.
> +			 */
> +			return -EAGAIN;
> +
> +		data->ia = fuse_io_alloc(NULL, nr_pages);
> +		if (!data->ia)
> +			return -ENOMEM;
> +		ia = data->ia;
> +	}
> +	folio_get(folio);
> +	ap = &ia->ap;
> +	ap->folios[ap->num_folios] = folio;
> +	ap->descs[ap->num_folios].offset = off;
> +	ap->descs[ap->num_folios].length = len;
> +	data->nr_bytes += len;
> +	ap->num_folios++;
> +
> +	return 0;
> +}
> +
>  static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
>  					     struct folio *folio, loff_t pos,
>  					     size_t len)
> @@ -855,13 +920,24 @@ static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
>  	size_t off = offset_in_folio(folio, pos);
>  	int ret;
>  
> -	/*
> -	 *  for non-readahead read requests, do reads synchronously since
> -	 *  it's not guaranteed that the server can handle out-of-order reads
> -	 */
>  	iomap_start_folio_read(folio, len);
> -	ret = fuse_do_readfolio(file, folio, off, len);
> -	iomap_finish_folio_read(folio, off, len, ret);
> +	if (data->readahead) {
> +		ret = fuse_handle_readahead(folio, data, pos, len);
> +		/*
> +		 * If fuse_handle_readahead was successful, fuse_readpages_end
> +		 * will do the iomap_finish_folio_read, else we need to call it
> +		 * here
> +		 */
> +		if (ret)
> +			iomap_finish_folio_read(folio, off, len, ret);
> +	} else {
> +		/*
> +		 *  for non-readahead read requests, do reads synchronously since
> +		 *  it's not guaranteed that the server can handle out-of-order reads
> +		 */
> +		ret = fuse_do_readfolio(file, folio, off, len);
> +		iomap_finish_folio_read(folio, off, len, ret);
> +	}
>  	return ret;
>  }
>  
> @@ -923,7 +999,8 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
>  	}
>  
>  	for (i = 0; i < ap->num_folios; i++) {
> -		folio_end_read(ap->folios[i], !err);
> +		iomap_finish_folio_read(ap->folios[i], ap->descs[i].offset,
> +					ap->descs[i].length, err);
>  		folio_put(ap->folios[i]);
>  	}
>  	if (ia->ff)
> @@ -933,7 +1010,7 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
>  }
>  
>  static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
> -				unsigned int count)
> +				unsigned int count, bool async)
>  {
>  	struct fuse_file *ff = file->private_data;
>  	struct fuse_mount *fm = ff->fm;
> @@ -955,7 +1032,7 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
>  
>  	fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
>  	ia->read.attr_ver = fuse_get_attr_version(fm->fc);
> -	if (fm->fc->async_read) {
> +	if (async) {
>  		ia->ff = fuse_file_get(ff);
>  		ap->args.end = fuse_readpages_end;
>  		err = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
> @@ -972,81 +1049,20 @@ static void fuse_readahead(struct readahead_control *rac)
>  {
>  	struct inode *inode = rac->mapping->host;
>  	struct fuse_conn *fc = get_fuse_conn(inode);
> -	unsigned int max_pages, nr_pages;
> -	struct folio *folio = NULL;
> +	struct fuse_fill_read_data data = {
> +		.file = rac->file,
> +		.readahead = true,
> +		.fc = fc,
> +		.rac = rac,
> +	};
>  
>  	if (fuse_is_bad(inode))
>  		return;
>  
> -	max_pages = min_t(unsigned int, fc->max_pages,
> -			fc->max_read / PAGE_SIZE);
> -
> -	/*
> -	 * This is only accurate the first time through, since readahead_folio()
> -	 * doesn't update readahead_count() from the previous folio until the
> -	 * next call.  Grab nr_pages here so we know how many pages we're going
> -	 * to have to process.  This means that we will exit here with
> -	 * readahead_count() == folio_nr_pages(last_folio), but we will have
> -	 * consumed all of the folios, and read_pages() will call
> -	 * readahead_folio() again which will clean up the rac.
> -	 */
> -	nr_pages = readahead_count(rac);
> -
> -	while (nr_pages) {
> -		struct fuse_io_args *ia;
> -		struct fuse_args_pages *ap;
> -		unsigned cur_pages = min(max_pages, nr_pages);
> -		unsigned int pages = 0;
> -
> -		if (fc->num_background >= fc->congestion_threshold &&
> -		    rac->ra->async_size >= readahead_count(rac))
> -			/*
> -			 * Congested and only async pages left, so skip the
> -			 * rest.
> -			 */
> -			break;
> -
> -		ia = fuse_io_alloc(NULL, cur_pages);
> -		if (!ia)
> -			break;
> -		ap = &ia->ap;
> -
> -		while (pages < cur_pages) {
> -			unsigned int folio_pages;
> -
> -			/*
> -			 * This returns a folio with a ref held on it.
> -			 * The ref needs to be held until the request is
> -			 * completed, since the splice case (see
> -			 * fuse_try_move_page()) drops the ref after it's
> -			 * replaced in the page cache.
> -			 */
> -			if (!folio)
> -				folio =  __readahead_folio(rac);
> -
> -			folio_pages = folio_nr_pages(folio);
> -			if (folio_pages > cur_pages - pages) {
> -				/*
> -				 * Large folios belonging to fuse will never
> -				 * have more pages than max_pages.
> -				 */
> -				WARN_ON(!pages);
> -				break;
> -			}
> -
> -			ap->folios[ap->num_folios] = folio;
> -			ap->descs[ap->num_folios].length = folio_size(folio);
> -			ap->num_folios++;
> -			pages += folio_pages;
> -			folio = NULL;
> -		}
> -		fuse_send_readpages(ia, rac->file, pages << PAGE_SHIFT);
> -		nr_pages -= pages;
> -	}
> -	if (folio) {
> -		folio_end_read(folio, false);
> -		folio_put(folio);
> -	}
> +	iomap_readahead(rac, &fuse_iomap_ops, &fuse_iomap_read_ops, &data);
> +	if (data.ia)
> +		fuse_send_readpages(data.ia, data.file, data.nr_bytes,
> +				    fc->async_read);
>  }
>  
>  static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
> @@ -2077,7 +2093,7 @@ struct fuse_fill_wb_data {
>  	struct fuse_file *ff;
>  	unsigned int max_folios;
>  	/*
> -	 * nr_bytes won't overflow since fuse_writepage_need_send() caps
> +	 * nr_bytes won't overflow since fuse_folios_need_send() caps
>  	 * wb requests to never exceed fc->max_pages (which has an upper bound
>  	 * of U16_MAX).
>  	 */
> @@ -2122,14 +2138,15 @@ static void fuse_writepages_send(struct inode *inode,
>  	spin_unlock(&fi->lock);
>  }
>  
> -static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
> -				     unsigned len, struct fuse_args_pages *ap,
> -				     struct fuse_fill_wb_data *data)
> +static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
> +				  unsigned len, struct fuse_args_pages *ap,
> +				  unsigned cur_bytes, bool write)
>  {
>  	struct folio *prev_folio;
>  	struct fuse_folio_desc prev_desc;
> -	unsigned bytes = data->nr_bytes + len;
> +	unsigned bytes = cur_bytes + len;
>  	loff_t prev_pos;
> +	size_t max_bytes = write ? fc->max_write : fc->max_read;
>  
>  	WARN_ON(!ap->num_folios);
>  
> @@ -2137,8 +2154,7 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
>  	if ((bytes + PAGE_SIZE - 1) >> PAGE_SHIFT > fc->max_pages)
>  		return true;
>  
> -	/* Reached max write bytes */
> -	if (bytes > fc->max_write)
> +	if (bytes > max_bytes)
>  		return true;
>  
>  	/* Discontinuity */
> @@ -2148,11 +2164,6 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
>  	if (prev_pos != pos)
>  		return true;
>  
> -	/* Need to grow the pages array?  If so, did the expansion fail? */
> -	if (ap->num_folios == data->max_folios &&
> -	    !fuse_pages_realloc(data, fc->max_pages))
> -		return true;
> -
>  	return false;
>  }
>  
> @@ -2176,10 +2187,21 @@ static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc,
>  			return -EIO;
>  	}
>  
> -	if (wpa && fuse_writepage_need_send(fc, pos, len, ap, data)) {
> -		fuse_writepages_send(inode, data);
> -		data->wpa = NULL;
> -		data->nr_bytes = 0;
> +	if (wpa) {
> +		bool send = fuse_folios_need_send(fc, pos, len, ap, data->nr_bytes,
> +						  true);
> +
> +		if (!send) {
> +			/* Need to grow the pages array?  If so, did the expansion fail? */
> +			send = (ap->num_folios == data->max_folios) &&
> +				!fuse_pages_realloc(data, fc->max_pages);
> +		}

What purpose this code relocation serve?  I gather the idea here is that
writes need to reallocate the pages array, whereas readahead can simply
constrain to whatever's already allocated?

--D

> +
> +		if (send) {
> +			fuse_writepages_send(inode, data);
> +			data->wpa = NULL;
> +			data->nr_bytes = 0;
> +		}
>  	}
>  
>  	if (data->wpa == NULL) {
> -- 
> 2.47.3
> 
> 




[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux