On 6/10/25 4:57 PM, Mike Snitzer wrote: > Add 'enable-dontcache' to NFSD's debugfs interface so that: Any data > read or written by NFSD will either not be cached (thanks to O_DIRECT) > or will be removed from the page cache upon completion (DONTCACHE). I thought we were going to do two switches: One for reads and one for writes? I could be misremembering. After all, you are describing two different facilities here: a form of direct I/O for READs, and RWF_DONTCACHE for WRITEs (I think?). > enable-dontcache is 0 by default. It may be enabled with: > echo 1 > /sys/kernel/debug/nfsd/enable-dontcache > > FOP_DONTCACHE must be advertised as supported by the underlying > filesystem (e.g. XFS), otherwise if/when 'enable-dontcache' is 1 > all IO flagged with RWF_DONTCACHE will fail with -EOPNOTSUPP. > Signed-off-by: Mike Snitzer <snitzer@xxxxxxxxxx> > --- > fs/nfsd/debugfs.c | 39 +++++++++++++++++++++++++++++++++++++++ > fs/nfsd/nfsd.h | 1 + > fs/nfsd/vfs.c | 12 +++++++++++- > 3 files changed, 51 insertions(+), 1 deletion(-) > > diff --git a/fs/nfsd/debugfs.c b/fs/nfsd/debugfs.c > index 84b0c8b559dc..8decdec60a8e 100644 > --- a/fs/nfsd/debugfs.c > +++ b/fs/nfsd/debugfs.c > @@ -32,6 +32,42 @@ static int nfsd_dsr_set(void *data, u64 val) > > DEFINE_DEBUGFS_ATTRIBUTE(nfsd_dsr_fops, nfsd_dsr_get, nfsd_dsr_set, "%llu\n"); > > +/* > + * /sys/kernel/debug/nfsd/enable-dontcache > + * > + * Contents: > + * %0: NFS READ and WRITE are not allowed to use dontcache > + * %1: NFS READ and WRITE are allowed to use dontcache > + * > + * NFSD's dontcache support reserves the right to use O_DIRECT > + * if it chooses (instead of dontcache's usual pagecache-based > + * dropbehind semantics). > + * > + * The default value of this setting is zero (dontcache is > + * disabled). This setting takes immediate effect for all NFS > + * versions, all exports, and in all NFSD net namespaces. > + */ > + > +static int nfsd_dontcache_get(void *data, u64 *val) > +{ > + *val = nfsd_enable_dontcache ? 1 : 0; > + return 0; > +} > + > +static int nfsd_dontcache_set(void *data, u64 val) > +{ > + if (val > 0) { > + /* Must first also disable-splice-read */ > + nfsd_disable_splice_read = true; > + nfsd_enable_dontcache = true; > + } else > + nfsd_enable_dontcache = false; > + return 0; > +} > + > +DEFINE_DEBUGFS_ATTRIBUTE(nfsd_dontcache_fops, nfsd_dontcache_get, > + nfsd_dontcache_set, "%llu\n"); > + > void nfsd_debugfs_exit(void) > { > debugfs_remove_recursive(nfsd_top_dir); > @@ -44,4 +80,7 @@ void nfsd_debugfs_init(void) > > debugfs_create_file("disable-splice-read", S_IWUSR | S_IRUGO, > nfsd_top_dir, NULL, &nfsd_dsr_fops); > + > + debugfs_create_file("enable-dontcache", S_IWUSR | S_IRUGO, > + nfsd_top_dir, NULL, &nfsd_dontcache_fops); > } > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h > index 1bfd0b4e9af7..00546547eae6 100644 > --- a/fs/nfsd/nfsd.h > +++ b/fs/nfsd/nfsd.h > @@ -155,6 +155,7 @@ static inline void nfsd_debugfs_exit(void) {} > #endif > > extern bool nfsd_disable_splice_read __read_mostly; > +extern bool nfsd_enable_dontcache __read_mostly; > > extern int nfsd_max_blksize; > > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c > index 7d94fae1dee8..bba3e6f4f56b 100644 > --- a/fs/nfsd/vfs.c > +++ b/fs/nfsd/vfs.c > @@ -49,6 +49,7 @@ > #define NFSDDBG_FACILITY NFSDDBG_FILEOP > > bool nfsd_disable_splice_read __read_mostly; > +bool nfsd_enable_dontcache __read_mostly; > > /** > * nfserrno - Map Linux errnos to NFS errnos > @@ -1086,6 +1087,7 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, > unsigned long v, total; > struct iov_iter iter; > loff_t ppos = offset; > + rwf_t flags = 0; > ssize_t host_err; > size_t len; > > @@ -1103,7 +1105,11 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, > > trace_nfsd_read_vector(rqstp, fhp, offset, *count); > iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v, *count); > - host_err = vfs_iter_read(file, &iter, &ppos, 0); > + > + if (nfsd_enable_dontcache) > + flags |= RWF_DONTCACHE; Two things: - Maybe NFSD should record whether the file system is DONTCACHE-enabled in @fhp or in the export it is associated with, and then check that setting here before asserting RWF_DONTCACHE - I thought we were going with O_DIRECT for READs. > + > + host_err = vfs_iter_read(file, &iter, &ppos, flags); > return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); > } > > @@ -1209,6 +1215,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, > > nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, payload); > iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt); > + > + if (nfsd_enable_dontcache) > + flags |= RWF_DONTCACHE; > + > since = READ_ONCE(file->f_wb_err); > if (verf) > nfsd_copy_write_verifier(verf, nn); -- Chuck Lever