[PATCH v1 12/16] iomap: add iomap_read_ops for read and readahead

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add a "struct iomap_read_ops" that contains a read_folio_range()
callback that callers can provide as a custom handler for reading in a
folio range, if the caller does not wish to issue bio read requests
(which otherwise is the default behavior). read_folio_range() may read
the request asynchronously or synchronously. The caller is responsible
for calling iomap_start_folio_read()/iomap_finish_folio_read() when
reading the folio range.

This makes it so that non-block based filesystems may use iomap for
reads.

Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx>
---
 .../filesystems/iomap/operations.rst          | 19 +++++
 block/fops.c                                  |  4 +-
 fs/erofs/data.c                               |  4 +-
 fs/gfs2/aops.c                                |  4 +-
 fs/iomap/buffered-io.c                        | 79 +++++++++++++------
 fs/xfs/xfs_aops.c                             |  4 +-
 fs/zonefs/file.c                              |  4 +-
 include/linux/iomap.h                         | 21 ++++-
 8 files changed, 105 insertions(+), 34 deletions(-)

diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst
index 067ed8e14ef3..215053f0779d 100644
--- a/Documentation/filesystems/iomap/operations.rst
+++ b/Documentation/filesystems/iomap/operations.rst
@@ -57,6 +57,25 @@ The following address space operations can be wrapped easily:
  * ``bmap``
  * ``swap_activate``
 
+``struct iomap_read_ops``
+--------------------------
+
+.. code-block:: c
+
+ struct iomap_read_ops {
+     int (*read_folio_range)(const struct iomap_iter *iter,
+                        struct folio *folio, loff_t pos, size_t len);
+ };
+
+iomap calls these functions:
+
+  - ``read_folio_range``: Called to read in the range (read does not need to
+    be synchronous). The caller is responsible for calling
+    iomap_start_folio_read() and iomap_finish_folio_read() when reading the
+    folio range. This should be done even if an error is encountered during
+    the read. If this function is not provided by the caller, then iomap
+    will default to issuing asynchronous bio read requests.
+
 ``struct iomap_write_ops``
 --------------------------
 
diff --git a/block/fops.c b/block/fops.c
index ddbc69c0922b..b42e16d0eb35 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -533,12 +533,12 @@ const struct address_space_operations def_blk_aops = {
 #else /* CONFIG_BUFFER_HEAD */
 static int blkdev_read_folio(struct file *file, struct folio *folio)
 {
-	return iomap_read_folio(folio, &blkdev_iomap_ops);
+	return iomap_read_folio(folio, &blkdev_iomap_ops, NULL);
 }
 
 static void blkdev_readahead(struct readahead_control *rac)
 {
-	iomap_readahead(rac, &blkdev_iomap_ops);
+	iomap_readahead(rac, &blkdev_iomap_ops, NULL);
 }
 
 static ssize_t blkdev_writeback_range(struct iomap_writepage_ctx *wpc,
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 3b1ba571c728..ea451f233263 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -371,7 +371,7 @@ static int erofs_read_folio(struct file *file, struct folio *folio)
 {
 	trace_erofs_read_folio(folio, true);
 
-	return iomap_read_folio(folio, &erofs_iomap_ops);
+	return iomap_read_folio(folio, &erofs_iomap_ops, NULL);
 }
 
 static void erofs_readahead(struct readahead_control *rac)
@@ -379,7 +379,7 @@ static void erofs_readahead(struct readahead_control *rac)
 	trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
 					readahead_count(rac), true);
 
-	return iomap_readahead(rac, &erofs_iomap_ops);
+	return iomap_readahead(rac, &erofs_iomap_ops, NULL);
 }
 
 static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 47d74afd63ac..bf531bcfd8a0 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -428,7 +428,7 @@ static int gfs2_read_folio(struct file *file, struct folio *folio)
 
 	if (!gfs2_is_jdata(ip) ||
 	    (i_blocksize(inode) == PAGE_SIZE && !folio_buffers(folio))) {
-		error = iomap_read_folio(folio, &gfs2_iomap_ops);
+		error = iomap_read_folio(folio, &gfs2_iomap_ops, NULL);
 	} else if (gfs2_is_stuffed(ip)) {
 		error = stuffed_read_folio(ip, folio);
 	} else {
@@ -503,7 +503,7 @@ static void gfs2_readahead(struct readahead_control *rac)
 	else if (gfs2_is_jdata(ip))
 		mpage_readahead(rac, gfs2_block_map);
 	else
-		iomap_readahead(rac, &gfs2_iomap_ops);
+		iomap_readahead(rac, &gfs2_iomap_ops, NULL);
 }
 
 /**
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 5d153c6b16b6..06f2c857de64 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -335,8 +335,8 @@ void iomap_start_folio_read(struct folio *folio, size_t len)
 }
 EXPORT_SYMBOL_GPL(iomap_start_folio_read);
 
-void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len,
-		int error)
+static void __iomap_finish_folio_read(struct folio *folio, size_t off,
+		size_t len, int error, bool update_bitmap)
 {
 	struct iomap_folio_state *ifs = folio->private;
 	bool uptodate = !error;
@@ -346,7 +346,7 @@ void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len,
 		unsigned long flags;
 
 		spin_lock_irqsave(&ifs->state_lock, flags);
-		if (!error)
+		if (!error && update_bitmap)
 			uptodate = ifs_set_range_uptodate(folio, ifs, off, len);
 		ifs->read_bytes_pending -= len;
 		finished = !ifs->read_bytes_pending;
@@ -356,6 +356,12 @@ void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len,
 	if (finished)
 		folio_end_read(folio, uptodate);
 }
+
+void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len,
+		int error)
+{
+	return __iomap_finish_folio_read(folio, off, len, error, true);
+}
 EXPORT_SYMBOL_GPL(iomap_finish_folio_read);
 
 #ifdef CONFIG_BLOCK
@@ -379,7 +385,6 @@ static void iomap_read_folio_range_async(struct iomap_iter *iter,
 	struct bio *bio = iter->private;
 	sector_t sector;
 
-	ctx->folio_unlocked = true;
 	iomap_start_folio_read(folio, plen);
 
 	sector = iomap_sector(iomap, pos);
@@ -453,15 +458,17 @@ static void iomap_readfolio_submit(const struct iomap_iter *iter)
 #endif /* CONFIG_BLOCK */
 
 static int iomap_readfolio_iter(struct iomap_iter *iter,
-		struct iomap_readfolio_ctx *ctx)
+		struct iomap_readfolio_ctx *ctx,
+		const struct iomap_read_ops *read_ops)
 {
 	const struct iomap *iomap = &iter->iomap;
+	struct iomap_folio_state *ifs;
 	loff_t pos = iter->pos;
 	loff_t length = iomap_length(iter);
 	struct folio *folio = ctx->cur_folio;
 	size_t poff, plen;
 	loff_t count;
-	int ret;
+	int ret = 0;
 
 	if (iomap->type == IOMAP_INLINE) {
 		ret = iomap_read_inline_data(iter, folio);
@@ -471,7 +478,14 @@ static int iomap_readfolio_iter(struct iomap_iter *iter,
 	}
 
 	/* zero post-eof blocks as the page may be mapped */
-	ifs_alloc(iter->inode, folio, iter->flags);
+	ifs = ifs_alloc(iter->inode, folio, iter->flags);
+
+	/*
+	 * Add a bias to ifs->read_bytes_pending so that a read is ended only
+	 * after all the ranges have been read in.
+	 */
+	if (ifs)
+		iomap_start_folio_read(folio, 1);
 
 	length = min_t(loff_t, length,
 			folio_size(folio) - offset_in_folio(folio, pos));
@@ -479,35 +493,53 @@ static int iomap_readfolio_iter(struct iomap_iter *iter,
 		iomap_adjust_read_range(iter->inode, folio, &pos,
 				length, &poff, &plen);
 		count = pos - iter->pos + plen;
-		if (plen == 0)
-			return iomap_iter_advance(iter, &count);
+		if (plen == 0) {
+			ret = iomap_iter_advance(iter, &count);
+			break;
+		}
 
 		if (iomap_block_needs_zeroing(iter, pos)) {
 			folio_zero_range(folio, poff, plen);
 			iomap_set_range_uptodate(folio, poff, plen);
 		} else {
-			iomap_read_folio_range_async(iter, ctx, pos, plen);
+			ctx->folio_unlocked = true;
+			if (read_ops && read_ops->read_folio_range) {
+				ret = read_ops->read_folio_range(iter, folio, pos, plen);
+				if (ret)
+					break;
+			} else {
+				iomap_read_folio_range_async(iter, ctx, pos, plen);
+			}
 		}
 
 		length -= count;
 		ret = iomap_iter_advance(iter, &count);
 		if (ret)
-			return ret;
+			break;
 		pos = iter->pos;
 	}
-	return 0;
+
+	if (ifs) {
+		__iomap_finish_folio_read(folio, 0, 1, ret, false);
+		ctx->folio_unlocked = true;
+	}
+
+	return ret;
 }
 
 static void iomap_readfolio_complete(const struct iomap_iter *iter,
-		const struct iomap_readfolio_ctx *ctx)
+		const struct iomap_readfolio_ctx *ctx,
+		const struct iomap_read_ops *read_ops)
 {
-	iomap_readfolio_submit(iter);
+	if (!read_ops || !read_ops->read_folio_range)
+		iomap_readfolio_submit(iter);
 
 	if (ctx->cur_folio && !ctx->folio_unlocked)
 		folio_unlock(ctx->cur_folio);
 }
 
-int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
+int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops,
+		const struct iomap_read_ops *read_ops)
 {
 	struct iomap_iter iter = {
 		.inode		= folio->mapping->host,
@@ -522,16 +554,17 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
 	trace_iomap_readpage(iter.inode, 1);
 
 	while ((ret = iomap_iter(&iter, ops)) > 0)
-		iter.status = iomap_readfolio_iter(&iter, &ctx);
+		iter.status = iomap_readfolio_iter(&iter, &ctx, read_ops);
 
-	iomap_readfolio_complete(&iter, &ctx);
+	iomap_readfolio_complete(&iter, &ctx, read_ops);
 
 	return ret;
 }
 EXPORT_SYMBOL_GPL(iomap_read_folio);
 
 static int iomap_readahead_iter(struct iomap_iter *iter,
-		struct iomap_readfolio_ctx *ctx)
+		struct iomap_readfolio_ctx *ctx,
+		const struct iomap_read_ops *read_ops)
 {
 	int ret;
 
@@ -545,7 +578,7 @@ static int iomap_readahead_iter(struct iomap_iter *iter,
 		 */
 		WARN_ON(!ctx->cur_folio);
 		ctx->folio_unlocked = false;
-		ret = iomap_readfolio_iter(iter, ctx);
+		ret = iomap_readfolio_iter(iter, ctx, read_ops);
 		if (ret)
 			return ret;
 	}
@@ -557,6 +590,7 @@ static int iomap_readahead_iter(struct iomap_iter *iter,
  * iomap_readahead - Attempt to read pages from a file.
  * @rac: Describes the pages to be read.
  * @ops: The operations vector for the filesystem.
+ * @read_ops: Optional ops callers can pass in if they want custom handling.
  *
  * This function is for filesystems to call to implement their readahead
  * address_space operation.
@@ -568,7 +602,8 @@ static int iomap_readahead_iter(struct iomap_iter *iter,
  * function is called with memalloc_nofs set, so allocations will not cause
  * the filesystem to be reentered.
  */
-void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
+void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops,
+		const struct iomap_read_ops *read_ops)
 {
 	struct iomap_iter iter = {
 		.inode	= rac->mapping->host,
@@ -582,9 +617,9 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
 	trace_iomap_readahead(rac->mapping->host, readahead_count(rac));
 
 	while (iomap_iter(&iter, ops) > 0)
-		iter.status = iomap_readahead_iter(&iter, &ctx);
+		iter.status = iomap_readahead_iter(&iter, &ctx, read_ops);
 
-	iomap_readfolio_complete(&iter, &ctx);
+	iomap_readfolio_complete(&iter, &ctx, read_ops);
 }
 EXPORT_SYMBOL_GPL(iomap_readahead);
 
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 1ee4f835ac3c..fb2150c0825a 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -742,14 +742,14 @@ xfs_vm_read_folio(
 	struct file		*unused,
 	struct folio		*folio)
 {
-	return iomap_read_folio(folio, &xfs_read_iomap_ops);
+	return iomap_read_folio(folio, &xfs_read_iomap_ops, NULL);
 }
 
 STATIC void
 xfs_vm_readahead(
 	struct readahead_control	*rac)
 {
-	iomap_readahead(rac, &xfs_read_iomap_ops);
+	iomap_readahead(rac, &xfs_read_iomap_ops, NULL);
 }
 
 static int
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index fd3a5922f6c3..96470daf4d3f 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -112,12 +112,12 @@ static const struct iomap_ops zonefs_write_iomap_ops = {
 
 static int zonefs_read_folio(struct file *unused, struct folio *folio)
 {
-	return iomap_read_folio(folio, &zonefs_read_iomap_ops);
+	return iomap_read_folio(folio, &zonefs_read_iomap_ops, NULL);
 }
 
 static void zonefs_readahead(struct readahead_control *rac)
 {
-	iomap_readahead(rac, &zonefs_read_iomap_ops);
+	iomap_readahead(rac, &zonefs_read_iomap_ops, NULL);
 }
 
 /*
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 0938c4a57f4c..a7247439aeb5 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -178,6 +178,21 @@ struct iomap_write_ops {
 			struct folio *folio, loff_t pos, size_t len);
 };
 
+struct iomap_read_ops {
+	/*
+	 * If the filesystem doesn't provide a custom handler for reading in the
+	 * contents of a folio, iomap will default to issuing asynchronous bio
+	 * read requests.
+	 *
+	 * The read does not need to be done synchronously. The caller is
+	 * responsible for calling iomap_start_folio_read() and
+	 * iomap_finish_folio_read() when reading the folio range. This should
+	 * be done even if an error is encountered during the read.
+	 */
+	int (*read_folio_range)(const struct iomap_iter *iter,
+			struct folio *folio, loff_t pos, size_t len);
+};
+
 /*
  * Flags for iomap_begin / iomap_end.  No flag implies a read.
  */
@@ -339,8 +354,10 @@ static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter)
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops,
 		const struct iomap_write_ops *write_ops, void *private);
-int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
-void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
+int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops,
+		const struct iomap_read_ops *read_ops);
+void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops,
+		const struct iomap_read_ops *read_ops);
 bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
 struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len);
 bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
-- 
2.47.3





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux