Add 'io_cache_write' to NFSD's debugfs interface so that: Any data written by NFSD will either be: - cached using page cache (NFSD_IO_BUFFERED=0) - cached but removed from the page cache upon completion (NFSD_IO_DONTCACHE=1). - not cached (NFSD_IO_DIRECT=2) io_cache_write may be set by writing to: /sys/kernel/debug/nfsd/io_cache_write The default value for io_cache_write reflects NFSD's current default IO mode (NFSD_IO_BUFFERED=0). If NFSD_IO_DONTCACHE is specified using 1, FOP_DONTCACHE must be advertised as supported by the underlying filesystem (e.g. XFS), otherwise all IO flagged with RWF_DONTCACHE will fail with -EOPNOTSUPP. If NFSD_IO_DIRECT is specified using 2, the IO must be aligned relative to the underlying block device's logical_block_size. Also the memory buffer used to store the WRITE payload must be aligned relative to the underlying block device's dma_alignment. Signed-off-by: Mike Snitzer <snitzer@xxxxxxxxxx> --- fs/nfsd/debugfs.c | 42 ++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/nfsd.h | 1 + fs/nfsd/vfs.c | 15 +++++++++++++++ 3 files changed, 58 insertions(+) diff --git a/fs/nfsd/debugfs.c b/fs/nfsd/debugfs.c index dd1dc28a53784..173032a04cdec 100644 --- a/fs/nfsd/debugfs.c +++ b/fs/nfsd/debugfs.c @@ -85,6 +85,45 @@ static int nfsd_io_cache_read_set(void *data, u64 val) DEFINE_DEBUGFS_ATTRIBUTE(nfsd_io_cache_read_fops, nfsd_io_cache_read_get, nfsd_io_cache_read_set, "%llu\n"); +/* + * /sys/kernel/debug/nfsd/io_cache_write + * + * Contents: + * %0: NFS WRITE will use buffered IO + * %1: NFS WRITE will use dontcache (buffered IO w/ dropbehind) + * %2: NFS WRITE will use direct IO + * + * This setting takes immediate effect for all NFS versions, + * all exports, and in all NFSD net namespaces. + */ + +static int nfsd_io_cache_write_get(void *data, u64 *val) +{ + *val = nfsd_io_cache_write; + return 0; +} + +static int nfsd_io_cache_write_set(void *data, u64 val) +{ + int ret = 0; + + switch (val) { + case NFSD_IO_BUFFERED: + case NFSD_IO_DONTCACHE: + case NFSD_IO_DIRECT: + nfsd_io_cache_write = val; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +DEFINE_DEBUGFS_ATTRIBUTE(nfsd_io_cache_write_fops, nfsd_io_cache_write_get, + nfsd_io_cache_write_set, "%llu\n"); + void nfsd_debugfs_exit(void) { debugfs_remove_recursive(nfsd_top_dir); @@ -100,4 +139,7 @@ void nfsd_debugfs_init(void) debugfs_create_file("io_cache_read", S_IWUSR | S_IRUGO, nfsd_top_dir, NULL, &nfsd_io_cache_read_fops); + + debugfs_create_file("io_cache_write", S_IWUSR | S_IRUGO, + nfsd_top_dir, NULL, &nfsd_io_cache_write_fops); } diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 41cb7c7feff3e..c491eb258ecd3 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -161,6 +161,7 @@ enum { }; extern u64 nfsd_io_cache_read __read_mostly; +extern u64 nfsd_io_cache_write __read_mostly; extern int nfsd_max_blksize; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 21441745df69a..358d10a0665f6 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -50,6 +50,7 @@ bool nfsd_disable_splice_read __read_mostly; u64 nfsd_io_cache_read __read_mostly = NFSD_IO_BUFFERED; +u64 nfsd_io_cache_write __read_mostly = NFSD_IO_BUFFERED; /** * nfserrno - Map Linux errnos to NFS errnos @@ -1241,6 +1242,20 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, since = READ_ONCE(file->f_wb_err); if (verf) nfsd_copy_write_verifier(verf, nn); + + switch (nfsd_io_cache_write) { + case NFSD_IO_DIRECT: + /* direct I/O must be aligned to device logical sector size */ + if (nf->nf_dio_mem_align && nf->nf_dio_offset_align && + (((offset | *cnt) & (nf->nf_dio_offset_align-1)) == 0)) + kiocb.ki_flags |= IOCB_DIRECT; + break; + case NFSD_IO_DONTCACHE: + kiocb.ki_flags |= IOCB_DONTCACHE; + break; + case NFSD_IO_BUFFERED: + break; + } host_err = vfs_iocb_iter_write(file, &kiocb, &iter); if (host_err < 0) { commit_reset_write_verifier(nn, rqstp, host_err); -- 2.44.0