On 7/10/25 10:06 AM, Jeff Layton wrote: > On Tue, 2025-07-08 at 12:06 -0400, Mike Snitzer wrote: >> Add 'io_cache_read' to NFSD's debugfs interface so that: Any data >> read by NFSD will either be: >> - cached using page cache (NFSD_IO_BUFFERED=0) >> - cached but removed from the page cache upon completion >> (NFSD_IO_DONTCACHE=1). >> - not cached (NFSD_IO_DIRECT=2) >> >> io_cache_read is 0 by default. It may be set by writing to: >> /sys/kernel/debug/nfsd/io_cache_read >> >> If NFSD_IO_DONTCACHE is specified using 1, FOP_DONTCACHE must be >> advertised as supported by the underlying filesystem (e.g. XFS), >> otherwise all IO flagged with RWF_DONTCACHE will fail with >> -EOPNOTSUPP. >> >> If NFSD_IO_DIRECT is specified using 2, the IO must be aligned >> relative to the underlying block device's logical_block_size. Also the >> memory buffer used to store the read must be aligned relative to the >> underlying block device's dma_alignment. >> >> Signed-off-by: Mike Snitzer <snitzer@xxxxxxxxxx> >> --- >> fs/nfsd/debugfs.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++ >> fs/nfsd/nfsd.h | 8 +++++++ >> fs/nfsd/vfs.c | 15 ++++++++++++++ >> 3 files changed, 76 insertions(+) >> >> diff --git a/fs/nfsd/debugfs.c b/fs/nfsd/debugfs.c >> index 84b0c8b559dc..709646af797a 100644 >> --- a/fs/nfsd/debugfs.c >> +++ b/fs/nfsd/debugfs.c >> @@ -27,11 +27,61 @@ static int nfsd_dsr_get(void *data, u64 *val) >> static int nfsd_dsr_set(void *data, u64 val) >> { >> nfsd_disable_splice_read = (val > 0) ? true : false; >> + if (!nfsd_disable_splice_read) { >> + /* >> + * Cannot use NFSD_IO_DONTCACHE or NFSD_IO_DIRECT >> + * if splice_read is enabled. >> + */ >> + nfsd_io_cache_read = NFSD_IO_BUFFERED; >> + } >> return 0; >> } >> >> DEFINE_DEBUGFS_ATTRIBUTE(nfsd_dsr_fops, nfsd_dsr_get, nfsd_dsr_set, "%llu\n"); >> >> +/* >> + * /sys/kernel/debug/nfsd/io_cache_read >> + * >> + * Contents: >> + * %0: NFS READ will use buffered IO (default) >> + * %1: NFS READ will use dontcache (buffered IO w/ dropbehind) >> + * %2: NFS READ will use direct IO >> + * >> + * The default value of this setting is zero (buffered IO is >> + * used). This setting takes immediate effect for all NFS >> + * versions, all exports, and in all NFSD net namespaces. >> + */ >> + > > Could we switch this to use a string instead? Maybe > buffered/dontcache/direct ? That thought occurred to me too, since it would make the API a little more self-documenting, and might be a harbinger of what a future export option might look like. >> +static int nfsd_io_cache_read_get(void *data, u64 *val) >> +{ >> + *val = nfsd_io_cache_read; >> + return 0; >> +} >> + >> +static int nfsd_io_cache_read_set(void *data, u64 val) >> +{ >> + switch (val) { >> + case NFSD_IO_DONTCACHE: >> + case NFSD_IO_DIRECT: >> + /* >> + * Must disable splice_read when enabling >> + * NFSD_IO_DONTCACHE or NFSD_IO_DIRECT. >> + */ >> + nfsd_disable_splice_read = true; >> + nfsd_io_cache_read = val; >> + break; >> + case NFSD_IO_BUFFERED: >> + default: >> + nfsd_io_cache_read = NFSD_IO_BUFFERED; >> + break; > > I think the default case should leave nfsd_io_cache_read alone and > return an error. If we add new values later, and someone tries to use > them on an old kernel, it's better to make that attempt error out. > > Ditto for the write side controls. +1 on both accounts. >> + } >> + >> + return 0; >> +} >> + >> +DEFINE_DEBUGFS_ATTRIBUTE(nfsd_io_cache_read_fops, nfsd_io_cache_read_get, >> + nfsd_io_cache_read_set, "%llu\n"); >> + >> void nfsd_debugfs_exit(void) >> { >> debugfs_remove_recursive(nfsd_top_dir); >> @@ -44,4 +94,7 @@ void nfsd_debugfs_init(void) >> >> debugfs_create_file("disable-splice-read", S_IWUSR | S_IRUGO, >> nfsd_top_dir, NULL, &nfsd_dsr_fops); >> + >> + debugfs_create_file("io_cache_read", S_IWUSR | S_IRUGO, >> + nfsd_top_dir, NULL, &nfsd_io_cache_read_fops); >> } >> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h >> index 1cd0bed57bc2..4740567f4e7e 100644 >> --- a/fs/nfsd/nfsd.h >> +++ b/fs/nfsd/nfsd.h >> @@ -153,6 +153,14 @@ static inline void nfsd_debugfs_exit(void) {} >> >> extern bool nfsd_disable_splice_read __read_mostly; >> >> +enum { >> + NFSD_IO_BUFFERED = 0, >> + NFSD_IO_DONTCACHE, >> + NFSD_IO_DIRECT >> +}; >> + >> +extern u64 nfsd_io_cache_read __read_mostly; >> + >> extern int nfsd_max_blksize; >> >> static inline int nfsd_v4client(struct svc_rqst *rq) >> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c >> index 845c212ad10b..632ce417f4ef 100644 >> --- a/fs/nfsd/vfs.c >> +++ b/fs/nfsd/vfs.c >> @@ -49,6 +49,7 @@ >> #define NFSDDBG_FACILITY NFSDDBG_FILEOP >> >> bool nfsd_disable_splice_read __read_mostly; >> +u64 nfsd_io_cache_read __read_mostly; >> >> /** >> * nfserrno - Map Linux errnos to NFS errnos >> @@ -1107,6 +1108,20 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, >> >> trace_nfsd_read_vector(rqstp, fhp, offset, *count); >> iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v, *count); >> + >> + switch (nsfd_io_cache_read) { >> + case NFSD_IO_DIRECT: >> + if (iov_iter_is_aligned(&iter, nf->nf_dio_mem_align - 1, >> + nf->nf_dio_read_offset_align - 1)) >> + kiocb.ki_flags = IOCB_DIRECT; >> + break; >> + case NFSD_IO_DONTCACHE: >> + kiocb.ki_flags = IOCB_DONTCACHE; >> + break; >> + case NFSD_IO_BUFFERED: >> + break; >> + } >> + >> host_err = vfs_iocb_iter_read(file, &kiocb, &iter); >> return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); >> } > -- Chuck Lever