On Mon, 2025-04-28 at 09:49 -0700, trondmy@xxxxxxxxxx wrote: > From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> > > The Linux client assumes that all filehandles are non-volatile for > renames within the same directory (otherwise sillyrename cannot work). > However, the existence of the Linux 'subtree_check' export option has > meant that nfs_rename() has always assumed it needs to flush writes > before attempting to rename. > > Since NFSv4 does allow the client to query whether or not the server > exhibits this behaviour, and since knfsd does actually set the > appropriate flag when 'subtree_check' is enabled on an export, it > should be OK to optimise away the write flushing behaviour in the cases > where it is clearly not needed. > > Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> > --- > fs/nfs/client.c | 2 ++ > fs/nfs/dir.c | 15 ++++++++++++++- > include/linux/nfs_fs_sb.h | 12 +++++++++--- > 3 files changed, 25 insertions(+), 4 deletions(-) > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c > index 2115c1189c2d..6d63b958c4bb 100644 > --- a/fs/nfs/client.c > +++ b/fs/nfs/client.c > @@ -1105,6 +1105,8 @@ struct nfs_server *nfs_create_server(struct fs_context *fc) > if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) > server->namelen = NFS2_MAXNAMLEN; > } > + /* Linux 'subtree_check' borkenness mandates this setting */ > + server->fh_expire_type = NFS_FH_VOL_RENAME; > > if (!(fattr->valid & NFS_ATTR_FATTR)) { > error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh, > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c > index bd23fc736b39..d0e0b435a843 100644 > --- a/fs/nfs/dir.c > +++ b/fs/nfs/dir.c > @@ -2676,6 +2676,18 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data) > unblock_revalidate(new_dentry); > } > > +static bool nfs_rename_is_unsafe_cross_dir(struct dentry *old_dentry, > + struct dentry *new_dentry) > +{ > + struct nfs_server *server = NFS_SB(old_dentry->d_sb); > + > + if (old_dentry->d_parent != new_dentry->d_parent) > + return false; > + if (server->fh_expire_type & NFS_FH_RENAME_UNSAFE) > + return !(server->fh_expire_type & NFS_FH_NOEXPIRE_WITH_OPEN); > + return true; > +} > + > /* > * RENAME > * FIXME: Some nfsds, like the Linux user space nfsd, may generate a > @@ -2763,7 +2775,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, > > } > > - if (S_ISREG(old_inode->i_mode)) > + if (S_ISREG(old_inode->i_mode) && > + nfs_rename_is_unsafe_cross_dir(old_dentry, new_dentry)) > nfs_sync_inode(old_inode); > task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, > must_unblock ? nfs_unblock_rename : NULL); > diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h > index 71319637a84e..ee03f3cef30c 100644 > --- a/include/linux/nfs_fs_sb.h > +++ b/include/linux/nfs_fs_sb.h > @@ -213,6 +213,15 @@ struct nfs_server { > char *fscache_uniq; /* Uniquifier (or NULL) */ > #endif > > + /* The following #defines numerically match the NFSv4 equivalents */ > +#define NFS_FH_NOEXPIRE_WITH_OPEN (0x1) > +#define NFS_FH_VOLATILE_ANY (0x2) > +#define NFS_FH_VOL_MIGRATION (0x4) > +#define NFS_FH_VOL_RENAME (0x8) > +#define NFS_FH_RENAME_UNSAFE (NFS_FH_VOLATILE_ANY | NFS_FH_VOL_RENAME) > + u32 fh_expire_type; /* V4 bitmask representing file > + handle volatility type for > + this filesystem */ > u32 pnfs_blksize; /* layout_blksize attr */ > #if IS_ENABLED(CONFIG_NFS_V4) > u32 attr_bitmask[3];/* V4 bitmask representing the set > @@ -236,9 +245,6 @@ struct nfs_server { > u32 acl_bitmask; /* V4 bitmask representing the ACEs > that are supported on this > filesystem */ > - u32 fh_expire_type; /* V4 bitmask representing file > - handle volatility type for > - this filesystem */ > struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ > struct rpc_wait_queue roc_rpcwaitq; > void *pnfs_ld_data; /* per mount point data */ Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx>