From: Darrick J. Wong <djwong@xxxxxxxxxx> Implement inline data file IO by issuing FUSE_READ/FUSE_WRITE commands in response to an inline data mapping. Signed-off-by: "Darrick J. Wong" <djwong@xxxxxxxxxx> --- fs/fuse/fuse_trace.h | 45 +++++++++++++ fs/fuse/file_iomap.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 224 insertions(+) diff --git a/fs/fuse/fuse_trace.h b/fs/fuse/fuse_trace.h index 2f4c78ba498177..4ebd9a9e697ce2 100644 --- a/fs/fuse/fuse_trace.h +++ b/fs/fuse/fuse_trace.h @@ -234,6 +234,7 @@ DEFINE_FUSE_BACKING_EVENT(fuse_backing_close); #if IS_ENABLED(CONFIG_FUSE_IOMAP) struct iomap_writepage_ctx; struct iomap_ioend; +struct iomap; /* tracepoint boilerplate so we don't have to keep doing this */ #define FUSE_IOMAP_OPFLAGS_FIELD \ @@ -1071,6 +1072,50 @@ TRACE_EVENT(fuse_iomap_dev_inval, __entry->offset, __entry->length) ); + +DECLARE_EVENT_CLASS(fuse_iomap_inline_class, + TP_PROTO(const struct inode *inode, loff_t pos, uint64_t count, + const struct iomap *map), + TP_ARGS(inode, pos, count, map), + + TP_STRUCT__entry( + FUSE_IO_RANGE_FIELDS() + FUSE_IOMAP_MAP_FIELDS(map) + __field(bool, has_buf) + __field(uint64_t, validity_cookie) + ), + + TP_fast_assign( + FUSE_INODE_ASSIGN(inode, fi, fm); + __entry->offset = pos; + __entry->length = count; + + __entry->mapdev = FUSE_IOMAP_DEV_NULL; + __entry->mapaddr = map->addr; + __entry->mapoffset = map->offset; + __entry->maplength = map->length; + __entry->maptype = map->type; + __entry->mapflags = map->flags; + + __entry->has_buf = map->inline_data != NULL; + __entry->validity_cookie= map->validity_cookie; + ), + + TP_printk(FUSE_IO_RANGE_FMT() FUSE_IOMAP_MAP_FMT() " has_buf? %d cookie 0x%llx", + FUSE_IO_RANGE_PRINTK_ARGS(), + FUSE_IOMAP_MAP_PRINTK_ARGS(map), + __entry->has_buf, + __entry->validity_cookie) +); +#define DEFINE_FUSE_IOMAP_INLINE_EVENT(name) \ +DEFINE_EVENT(fuse_iomap_inline_class, name, \ + TP_PROTO(const struct inode *inode, loff_t pos, uint64_t count, \ + const struct iomap *map), \ + TP_ARGS(inode, pos, count, map)) +DEFINE_FUSE_IOMAP_INLINE_EVENT(fuse_iomap_inline_read); +DEFINE_FUSE_IOMAP_INLINE_EVENT(fuse_iomap_inline_write); +DEFINE_FUSE_IOMAP_INLINE_EVENT(fuse_iomap_set_inline_iomap); +DEFINE_FUSE_IOMAP_INLINE_EVENT(fuse_iomap_set_inline_srcmap); #endif /* CONFIG_FUSE_IOMAP */ #endif /* _TRACE_FUSE_H */ diff --git a/fs/fuse/file_iomap.c b/fs/fuse/file_iomap.c index 1b389d7792e965..4c8fef25b0749b 100644 --- a/fs/fuse/file_iomap.c +++ b/fs/fuse/file_iomap.c @@ -421,6 +421,157 @@ fuse_iomap_find_dev(struct fuse_conn *fc, const struct fuse_iomap_io *map) return ret; } +static inline int fuse_iomap_inline_alloc(struct iomap *iomap) +{ + ASSERT(iomap->inline_data == NULL); + ASSERT(iomap->length > 0); + + iomap->inline_data = kvzalloc(iomap->length, GFP_KERNEL); + return iomap->inline_data ? 0 : -ENOMEM; +} + +static inline void fuse_iomap_inline_free(struct iomap *iomap) +{ + kvfree(iomap->inline_data); + iomap->inline_data = NULL; +} + +/* + * Use the FUSE_READ command to read inline file data from the fuse server. + * Note that there's no file handle attached, so the fuse server must be able + * to reconnect to the inode via the nodeid. + */ +static int fuse_iomap_inline_read(struct inode *inode, loff_t pos, + loff_t count, struct iomap *iomap) +{ + struct fuse_read_in in = { + .offset = pos, + .size = count, + }; + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_mount *fm = get_fuse_mount(inode); + FUSE_ARGS(args); + ssize_t ret; + + if (BAD_DATA(!iomap_inline_data_valid(iomap))) + return -EFSCORRUPTED; + + trace_fuse_iomap_inline_read(inode, pos, count, iomap); + + args.opcode = FUSE_READ; + args.nodeid = fi->nodeid; + args.in_numargs = 1; + args.in_args[0].size = sizeof(in); + args.in_args[0].value = ∈ + args.out_argvar = true; + args.out_numargs = 1; + args.out_args[0].size = count; + args.out_args[0].value = iomap_inline_data(iomap, pos); + + ret = fuse_simple_request(fm, &args); + if (ret < 0) { + fuse_iomap_inline_free(iomap); + return ret; + } + /* no readahead means something bad happened */ + if (ret == 0) { + fuse_iomap_inline_free(iomap); + return -EIO; + } + + return 0; +} + +/* + * Use the FUSE_WRITE command to write inline file data from the fuse server. + * Note that there's no file handle attached, so the fuse server must be able + * to reconnect to the inode via the nodeid. + */ +static int fuse_iomap_inline_write(struct inode *inode, loff_t pos, + loff_t count, struct iomap *iomap) +{ + struct fuse_write_in in = { + .offset = pos, + .size = count, + }; + struct fuse_write_out out = { }; + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_mount *fm = get_fuse_mount(inode); + FUSE_ARGS(args); + ssize_t ret; + + if (BAD_DATA(!iomap_inline_data_valid(iomap))) + return -EFSCORRUPTED; + + trace_fuse_iomap_inline_write(inode, pos, count, iomap); + + args.opcode = FUSE_WRITE; + args.nodeid = fi->nodeid; + args.in_numargs = 2; + args.in_args[0].size = sizeof(in); + args.in_args[0].value = ∈ + args.in_args[1].size = count; + args.in_args[1].value = iomap_inline_data(iomap, pos); + args.out_numargs = 1; + args.out_args[0].size = sizeof(out); + args.out_args[0].value = &out; + + ret = fuse_simple_request(fm, &args); + if (ret < 0) { + fuse_iomap_inline_free(iomap); + return ret; + } + /* short write means something bad happened */ + if (out.size < count) { + fuse_iomap_inline_free(iomap); + return -EIO; + } + + return 0; +} + +/* Set up inline data buffers for iomap_begin */ +static int fuse_iomap_set_inline(struct inode *inode, unsigned opflags, + loff_t pos, loff_t count, + struct iomap *iomap, struct iomap *srcmap) +{ + int err; + + if (opflags & IOMAP_REPORT) + return 0; + + if (fuse_is_iomap_file_write(opflags)) { + if (iomap->type == IOMAP_INLINE) { + err = fuse_iomap_inline_alloc(iomap); + if (err) + return err; + } + + if (srcmap->type == IOMAP_INLINE) { + err = fuse_iomap_inline_alloc(srcmap); + if (!err) + err = fuse_iomap_inline_read(inode, pos, count, + srcmap); + if (err) { + fuse_iomap_inline_free(iomap); + return err; + } + } + } else if (iomap->type == IOMAP_INLINE) { + /* inline data read */ + err = fuse_iomap_inline_alloc(iomap); + if (!err) + err = fuse_iomap_inline_read(inode, pos, count, iomap); + if (err) + return err; + } + + trace_fuse_iomap_set_inline_iomap(inode, pos, count, iomap); + trace_fuse_iomap_set_inline_srcmap(inode, pos, count, srcmap); + + return 0; +} + static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t count, unsigned opflags, struct iomap *iomap, struct iomap *srcmap) @@ -490,12 +641,20 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t count, fuse_iomap_from_server(inode, iomap, read_dev, &outarg.read); } + if (iomap->type == IOMAP_INLINE || srcmap->type == IOMAP_INLINE) { + err = fuse_iomap_set_inline(inode, opflags, pos, count, iomap, + srcmap); + if (err) + goto out_write_dev; + } + /* * XXX: if we ever want to support closing devices, we need a way to * track the fuse_backing refcount all the way through bio endios. * For now we put the refcount here because you can't remove an iomap * device until unmount time. */ +out_write_dev: fuse_backing_put(write_dev); out_read_dev: fuse_backing_put(read_dev); @@ -534,8 +693,28 @@ static int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t count, { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_mount *fm = get_fuse_mount(inode); + struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap); + struct iomap *srcmap = &iter->srcmap; int err = 0; + if (srcmap->inline_data) + fuse_iomap_inline_free(srcmap); + + if (iomap->inline_data) { + if (fuse_is_iomap_file_write(opflags) && written > 0) { + err = fuse_iomap_inline_write(inode, pos, written, + iomap); + fuse_iomap_inline_free(iomap); + if (err) + return err; + } else { + fuse_iomap_inline_free(iomap); + } + + /* fuse server should already be aware of what happened */ + return 0; + } + if (fuse_should_send_iomap_end(fm, iomap, opflags, count, written)) { struct fuse_iomap_end_in inarg = { .opflags = fuse_iomap_op_to_server(opflags),