On regular fuse filesystems, i_blkbits is set to PAGE_SHIFT which means any iomap partial writes will mark the entire folio as uptodate. However fuseblk filesystems work differently and allow the blocksize to be less than the page size. As such, this may lead to data corruption if fuseblk sets its blocksize to less than the page size, uses the writeback cache, and does a partial write, then a read and the read happens before the write has undergone writeback, since the folio will not be marked uptodate from the partial write so the read will read in the entire folio from disk, which will overwrite the partial write. The long-term solution for this, which will also be needed for fuse to enable large folios with the writeback cache on, is to have fuse also use iomap for folio reads, but until that is done, the cleanest workaround is to use the page size for fuseblk's internal kernel inode blksize/blkbits values while maintaining current behavior for stat(). This was verified using ntfs-3g: $ sudo mkfs.ntfs -f -c 512 /dev/vdd1 $ sudo ntfs-3g /dev/vdd1 ~/fuseblk $ stat ~/fuseblk/hi.txt IO Block: 512 Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx> Fixes: a4c9ab1d4975 ("fuse: use iomap for buffered writes") --- fs/fuse/dir.c | 2 +- fs/fuse/fuse_i.h | 8 ++++++++ fs/fuse/inode.c | 12 +++++++++++- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ebee7e0b1cd3..5c569c3cb53f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1199,7 +1199,7 @@ static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode, if (attr->blksize != 0) blkbits = ilog2(attr->blksize); else - blkbits = inode->i_sb->s_blocksize_bits; + blkbits = fc->blkbits; stat->blksize = 1 << blkbits; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index db44d05c8d02..a6aa16422c30 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -975,6 +975,14 @@ struct fuse_conn { /* Request timeout (in jiffies). 0 = no timeout */ unsigned int req_timeout; } timeout; + + /* + * This is a workaround until fuse uses iomap for reads. + * For fuseblk servers, this represents the blocksize passed in at + * mount time and for regular fuse servers, this is equivalent to + * inode->i_blkbits. + */ + unsigned char blkbits; }; /* diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 3bfd83469d9f..7e66419f4556 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -292,7 +292,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, if (attr->blksize) fi->cached_i_blkbits = ilog2(attr->blksize); else - fi->cached_i_blkbits = inode->i_sb->s_blocksize_bits; + fi->cached_i_blkbits = fc->blkbits; /* * Don't set the sticky bit in i_mode, unless we want the VFS @@ -1810,10 +1810,20 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) err = -EINVAL; if (!sb_set_blocksize(sb, ctx->blksize)) goto err; + /* + * This is a workaround until fuse hooks into iomap for reads. + * Use PAGE_SIZE for the blocksize else if the writeback cache + * is enabled, buffered writes go through iomap and a read + * may overwrite partially written data if blocksize < PAGE_SIZE + */ + fc->blkbits = sb->s_blocksize_bits; + if (!sb_set_blocksize(sb, PAGE_SIZE)) + goto err; #endif } else { sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; + fc->blkbits = sb->s_blocksize_bits; } sb->s_subtype = ctx->subtype; -- 2.47.3