Add 'io_cache_read' to NFSD's debugfs interface so that: Any data read by NFSD will either be: - cached using page cache (NFSD_IO_BUFFERED=1) - cached but removed from the page cache upon completion (NFSD_IO_DONTCACHE=2). - not cached (NFSD_IO_DIRECT=3) io_cache_read may be set by writing to: /sys/kernel/debug/nfsd/io_cache_read If NFSD_IO_DONTCACHE is specified using 2, FOP_DONTCACHE must be advertised as supported by the underlying filesystem (e.g. XFS), otherwise all IO flagged with RWF_DONTCACHE will fail with -EOPNOTSUPP. If NFSD_IO_DIRECT is specified using 3, the IO must be aligned relative to the underlying block device's logical_block_size. Also the memory buffer used to store the read must be aligned relative to the underlying block device's dma_alignment. Signed-off-by: Mike Snitzer <snitzer@xxxxxxxxxx> Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx> --- fs/nfsd/debugfs.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/nfsd.h | 9 ++++++++ fs/nfsd/vfs.c | 18 +++++++++++++++ 3 files changed, 84 insertions(+) diff --git a/fs/nfsd/debugfs.c b/fs/nfsd/debugfs.c index 84b0c8b559dc9..3cadd45868b48 100644 --- a/fs/nfsd/debugfs.c +++ b/fs/nfsd/debugfs.c @@ -27,11 +27,65 @@ static int nfsd_dsr_get(void *data, u64 *val) static int nfsd_dsr_set(void *data, u64 val) { nfsd_disable_splice_read = (val > 0) ? true : false; + if (!nfsd_disable_splice_read) { + /* + * Cannot use NFSD_IO_DONTCACHE or NFSD_IO_DIRECT + * if splice_read is enabled. + */ + nfsd_io_cache_read = NFSD_IO_BUFFERED; + } return 0; } DEFINE_DEBUGFS_ATTRIBUTE(nfsd_dsr_fops, nfsd_dsr_get, nfsd_dsr_set, "%llu\n"); +/* + * /sys/kernel/debug/nfsd/io_cache_read + * + * Contents: + * %1: NFS READ will use buffered IO + * %2: NFS READ will use dontcache (buffered IO w/ dropbehind) + * %3: NFS READ will use direct IO + * + * The default value of this setting is zero (UNSPECIFIED). + * This setting takes immediate effect for all NFS versions, + * all exports, and in all NFSD net namespaces. + */ + +static int nfsd_io_cache_read_get(void *data, u64 *val) +{ + *val = nfsd_io_cache_read; + return 0; +} + +static int nfsd_io_cache_read_set(void *data, u64 val) +{ + int ret = 0; + + switch (val) { + case NFSD_IO_BUFFERED: + nfsd_io_cache_read = NFSD_IO_BUFFERED; + break; + case NFSD_IO_DONTCACHE: + case NFSD_IO_DIRECT: + /* + * Must disable splice_read when enabling + * NFSD_IO_DONTCACHE or NFSD_IO_DIRECT. + */ + nfsd_disable_splice_read = true; + nfsd_io_cache_read = val; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +DEFINE_DEBUGFS_ATTRIBUTE(nfsd_io_cache_read_fops, nfsd_io_cache_read_get, + nfsd_io_cache_read_set, "%llu\n"); + void nfsd_debugfs_exit(void) { debugfs_remove_recursive(nfsd_top_dir); @@ -44,4 +98,7 @@ void nfsd_debugfs_init(void) debugfs_create_file("disable-splice-read", S_IWUSR | S_IRUGO, nfsd_top_dir, NULL, &nfsd_dsr_fops); + + debugfs_create_file("io_cache_read", S_IWUSR | S_IRUGO, + nfsd_top_dir, NULL, &nfsd_io_cache_read_fops); } diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 1cd0bed57bc2f..6ef799405145f 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -153,6 +153,15 @@ static inline void nfsd_debugfs_exit(void) {} extern bool nfsd_disable_splice_read __read_mostly; +enum { + NFSD_IO_UNSPECIFIED = 0, + NFSD_IO_BUFFERED, + NFSD_IO_DONTCACHE, + NFSD_IO_DIRECT, +}; + +extern u64 nfsd_io_cache_read __read_mostly; + extern int nfsd_max_blksize; static inline int nfsd_v4client(struct svc_rqst *rq) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 79439ad93880a..8ea8b80097195 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -49,6 +49,7 @@ #define NFSDDBG_FACILITY NFSDDBG_FILEOP bool nfsd_disable_splice_read __read_mostly; +u64 nfsd_io_cache_read __read_mostly; /** * nfserrno - Map Linux errnos to NFS errnos @@ -1099,6 +1100,23 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, size_t len; init_sync_kiocb(&kiocb, file); + + switch (nfsd_io_cache_read) { + case NFSD_IO_DIRECT: + /* Verify ondisk and memory DIO alignment */ + if (nf->nf_dio_mem_align && nf->nf_dio_read_offset_align && + (((offset | *count) & (nf->nf_dio_read_offset_align - 1)) == 0) && + (base & (nf->nf_dio_mem_align - 1)) == 0) + kiocb.ki_flags = IOCB_DIRECT; + break; + case NFSD_IO_DONTCACHE: + kiocb.ki_flags = IOCB_DONTCACHE; + fallthrough; + case NFSD_IO_UNSPECIFIED: + case NFSD_IO_BUFFERED: + break; + } + kiocb.ki_pos = offset; v = 0; -- 2.44.0