From: Darrick J. Wong <djwong@xxxxxxxxxx> When iomap is in use for the page cache, the kernel will take care of all the file data block IO for us, including zeroing of punched ranges and post-EOF bytes. fuse2fs only needs to do IO for inline data. Therefore, set the NOBLOCKIO ext2_file flag so that libext2fs will not do any regular file IO to or from disk blocks at all. Signed-off-by: "Darrick J. Wong" <djwong@xxxxxxxxxx> --- misc/fuse2fs.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- misc/fuse4fs.c | 11 ++++++++- 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c index dcf002f380b843..588b0053f43c95 100644 --- a/misc/fuse2fs.c +++ b/misc/fuse2fs.c @@ -3158,15 +3158,72 @@ static int fuse2fs_punch_posteof(struct fuse2fs *ff, ext2_ino_t ino, return 0; } +/* + * Decide if file IO for this inode can use iomap. + * + * It turns out that libfuse creates internal node ids that have nothing to do + * with the ext2_ino_t that we give it. These internal node ids are what + * actually gets igetted in the kernel, which means that there can be multiple + * fuse_inode objects in the kernel for a single hardlinked ondisk ext2 inode. + * + * What this means, horrifyingly, is that on a fuse filesystem that supports + * hard links, the in-kernel i_rwsem does not protect against concurrent writes + * between files that point to the same inode. That in turn means that the + * file mode and size can get desynchronized between the multiple fuse_inode + * objects. This also means that we cannot cache iomaps in the kernel AT ALL + * because the caches will get out of sync, leading to WARN_ONs from the iomap + * zeroing code and probably data corruption after that. + * + * Therefore, libfuse won't let us create hardlinks of iomap files, and we must + * never turn on iomap for existing hardlinked files. Long term it means we + * have to find a way around this loss of functionality. fuse4fs gets around + * this by being a low level fuse driver and controlling the nodeids itself. + * + * Returns 0 for no, 1 for yes, or a negative errno. + */ +#ifdef HAVE_FUSE_IOMAP +static int fuse2fs_file_uses_iomap(struct fuse2fs *ff, ext2_ino_t ino) +{ + struct stat statbuf; + int ret; + + if (!fuse2fs_iomap_enabled(ff)) + return 0; + + ret = stat_inode(ff->fs, ino, &statbuf); + if (ret) + return ret; + + /* the kernel handles all block IO for us in iomap mode */ + return fuse_fs_can_enable_iomap(&statbuf); +} +#else +# define fuse2fs_file_uses_iomap(...) (0) +#endif + static int fuse2fs_truncate(struct fuse2fs *ff, ext2_ino_t ino, off_t new_size) { ext2_filsys fs = ff->fs; ext2_file_t file; __u64 old_isize; errcode_t err; + int flags = EXT2_FILE_WRITE; int ret = 0; - err = ext2fs_file_open(fs, ino, EXT2_FILE_WRITE, &file); + /* the kernel handles all eof zeroing for us in iomap mode */ + ret = fuse2fs_file_uses_iomap(ff, ino); + switch (ret) { + case 0: + break; + case 1: + flags |= EXT2_FILE_NOBLOCKIO; + ret = 0; + break; + default: + return ret; + } + + err = ext2fs_file_open(fs, ino, flags, &file); if (err) return translate_error(fs, ino, err); @@ -3324,6 +3381,19 @@ static int __op_open(struct fuse2fs *ff, const char *path, goto out; } + /* the kernel handles all block IO for us in iomap mode */ + ret = fuse2fs_file_uses_iomap(ff, file->ino); + switch (ret) { + case 0: + break; + case 1: + file->open_flags |= EXT2_FILE_NOBLOCKIO; + ret = 0; + break; + default: + goto out; + } + if (fp->flags & O_TRUNC) { ret = fuse2fs_truncate(ff, file->ino, 0); if (ret) diff --git a/misc/fuse4fs.c b/misc/fuse4fs.c index 3082c23e398adf..e08c5af5abfd27 100644 --- a/misc/fuse4fs.c +++ b/misc/fuse4fs.c @@ -3375,9 +3375,14 @@ static int fuse4fs_truncate(struct fuse4fs *ff, ext2_ino_t ino, off_t new_size) ext2_file_t file; __u64 old_isize; errcode_t err; + int flags = EXT2_FILE_WRITE; int ret = 0; - err = ext2fs_file_open(fs, ino, EXT2_FILE_WRITE, &file); + /* the kernel handles all eof zeroing for us in iomap mode */ + if (fuse4fs_iomap_enabled(ff)) + flags |= EXT2_FILE_NOBLOCKIO; + + err = ext2fs_file_open(fs, ino, flags, &file); if (err) return translate_error(fs, ino, err); @@ -3472,6 +3477,10 @@ static int fuse4fs_open_file(struct fuse4fs *ff, const struct fuse_ctx *ctxt, if (linked) check |= L_OK; + /* the kernel handles all block IO for us in iomap mode */ + if (fuse4fs_iomap_enabled(ff)) + file->open_flags |= EXT2_FILE_NOBLOCKIO; + /* * If the caller wants to truncate the file, we need to ask for full * write access even if the caller claims to be appending.