On Thu, 2025-08-07 at 12:25 -0400, Mike Snitzer wrote: > Add 'io_cache_write' to NFSD's debugfs interface so that: Any data > written by NFSD will either be: > - cached using page cache (NFSD_IO_BUFFERED=1) > - cached but removed from the page cache upon completion > (NFSD_IO_DONTCACHE=2). > - not cached (NFSD_IO_DIRECT=3) > > io_cache_write may be set by writing to: > /sys/kernel/debug/nfsd/io_cache_write > > If NFSD_IO_DONTCACHE is specified using 2, FOP_DONTCACHE must be > advertised as supported by the underlying filesystem (e.g. XFS), > otherwise all IO flagged with RWF_DONTCACHE will fail with > -EOPNOTSUPP. > > If NFSD_IO_DIRECT is specified using 3, the IO must be aligned > relative to the underlying block device's logical_block_size. Also the > memory buffer used to store the WRITE payload must be aligned relative > to the underlying block device's dma_alignment. And if it isn't, it looks it falls back on doing regular buffered I/O (at least until patch #7)? > Signed-off-by: Mike Snitzer <snitzer@xxxxxxxxxx> > --- > fs/nfsd/debugfs.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ > fs/nfsd/nfsd.h | 1 + > fs/nfsd/vfs.c | 16 ++++++++++++++++ > 3 files changed, 61 insertions(+) > > diff --git a/fs/nfsd/debugfs.c b/fs/nfsd/debugfs.c > index c07f71d4e84f4..872de65f0e9ac 100644 > --- a/fs/nfsd/debugfs.c > +++ b/fs/nfsd/debugfs.c > @@ -87,6 +87,47 @@ static int nfsd_io_cache_read_set(void *data, u64 val) > DEFINE_DEBUGFS_ATTRIBUTE(nfsd_io_cache_read_fops, nfsd_io_cache_read_get, > nfsd_io_cache_read_set, "%llu\n"); > > +/* > + * /sys/kernel/debug/nfsd/io_cache_write > + * > + * Contents: > + * %1: NFS WRITE will use buffered IO > + * %2: NFS WRITE will use dontcache (buffered IO w/ dropbehind) > + * %3: NFS WRITE will use direct IO > + * > + * The default value of this setting is zero (UNSPECIFIED). > + * This setting takes immediate effect for all NFS versions, > + * all exports, and in all NFSD net namespaces. > + */ > + > +static int nfsd_io_cache_write_get(void *data, u64 *val) > +{ > + *val = nfsd_io_cache_write; > + return 0; > +} > + > +static int nfsd_io_cache_write_set(void *data, u64 val) > +{ > + int ret = 0; > + > + switch (val) { > + case NFSD_IO_BUFFERED: > + case NFSD_IO_DONTCACHE: > + case NFSD_IO_DIRECT: > + nfsd_io_cache_write = val; > + break; > + default: > + nfsd_io_cache_write = NFSD_IO_UNSPECIFIED; > + ret = -EINVAL; > + break; > + } > + > + return ret; > +} > + > +DEFINE_DEBUGFS_ATTRIBUTE(nfsd_io_cache_write_fops, nfsd_io_cache_write_get, > + nfsd_io_cache_write_set, "%llu\n"); > + > void nfsd_debugfs_exit(void) > { > debugfs_remove_recursive(nfsd_top_dir); > @@ -102,4 +143,7 @@ void nfsd_debugfs_init(void) > > debugfs_create_file("io_cache_read", S_IWUSR | S_IRUGO, > nfsd_top_dir, NULL, &nfsd_io_cache_read_fops); > + > + debugfs_create_file("io_cache_write", S_IWUSR | S_IRUGO, > + nfsd_top_dir, NULL, &nfsd_io_cache_write_fops); > } > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h > index 6ef799405145f..fe935b4cda538 100644 > --- a/fs/nfsd/nfsd.h > +++ b/fs/nfsd/nfsd.h > @@ -161,6 +161,7 @@ enum { > }; > > extern u64 nfsd_io_cache_read __read_mostly; > +extern u64 nfsd_io_cache_write __read_mostly; > > extern int nfsd_max_blksize; > > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c > index 26b6d96258711..5768244c7a3c3 100644 > --- a/fs/nfsd/vfs.c > +++ b/fs/nfsd/vfs.c > @@ -50,6 +50,7 @@ > > bool nfsd_disable_splice_read __read_mostly; > u64 nfsd_io_cache_read __read_mostly; > +u64 nfsd_io_cache_write __read_mostly; > > /** > * nfserrno - Map Linux errnos to NFS errnos > @@ -1234,6 +1235,21 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, > > nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, payload); > iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt); > + > + switch (nfsd_io_cache_write) { > + case NFSD_IO_DIRECT: > + /* direct I/O must be aligned to device logical sector size */ > + if (nf->nf_dio_mem_align && nf->nf_dio_offset_align && > + (((offset | *cnt) & (nf->nf_dio_offset_align-1)) == 0)) > + kiocb.ki_flags |= IOCB_DIRECT; > + break; > + case NFSD_IO_DONTCACHE: > + kiocb.ki_flags |= IOCB_DONTCACHE; > + break; > + case NFSD_IO_BUFFERED: > + break; > + } > + > since = READ_ONCE(file->f_wb_err); > if (verf) > nfsd_copy_write_verifier(verf, nn); Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx>