Re: [PATCH v2] NFS: Avoid flushing data while holding directory locks in nfs_rename()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, 2025-04-28 at 09:49 -0700, trondmy@xxxxxxxxxx wrote:
> From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
> 
> The Linux client assumes that all filehandles are non-volatile for
> renames within the same directory (otherwise sillyrename cannot work).
> However, the existence of the Linux 'subtree_check' export option has
> meant that nfs_rename() has always assumed it needs to flush writes
> before attempting to rename.
> 
> Since NFSv4 does allow the client to query whether or not the server
> exhibits this behaviour, and since knfsd does actually set the
> appropriate flag when 'subtree_check' is enabled on an export, it
> should be OK to optimise away the write flushing behaviour in the cases
> where it is clearly not needed.
> 
> Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
> ---
>  fs/nfs/client.c           |  2 ++
>  fs/nfs/dir.c              | 15 ++++++++++++++-
>  include/linux/nfs_fs_sb.h | 12 +++++++++---
>  3 files changed, 25 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> index 2115c1189c2d..6d63b958c4bb 100644
> --- a/fs/nfs/client.c
> +++ b/fs/nfs/client.c
> @@ -1105,6 +1105,8 @@ struct nfs_server *nfs_create_server(struct fs_context *fc)
>  		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
>  			server->namelen = NFS2_MAXNAMLEN;
>  	}
> +	/* Linux 'subtree_check' borkenness mandates this setting */
> +	server->fh_expire_type = NFS_FH_VOL_RENAME;
>  
>  	if (!(fattr->valid & NFS_ATTR_FATTR)) {
>  		error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh,
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index bd23fc736b39..d0e0b435a843 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -2676,6 +2676,18 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
>  	unblock_revalidate(new_dentry);
>  }
>  
> +static bool nfs_rename_is_unsafe_cross_dir(struct dentry *old_dentry,
> +					   struct dentry *new_dentry)
> +{
> +	struct nfs_server *server = NFS_SB(old_dentry->d_sb);
> +
> +	if (old_dentry->d_parent != new_dentry->d_parent)
> +		return false;
> +	if (server->fh_expire_type & NFS_FH_RENAME_UNSAFE)
> +		return !(server->fh_expire_type & NFS_FH_NOEXPIRE_WITH_OPEN);
> +	return true;
> +}
> +
>  /*
>   * RENAME
>   * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
> @@ -2763,7 +2775,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
>  
>  	}
>  
> -	if (S_ISREG(old_inode->i_mode))
> +	if (S_ISREG(old_inode->i_mode) &&
> +	    nfs_rename_is_unsafe_cross_dir(old_dentry, new_dentry))
>  		nfs_sync_inode(old_inode);
>  	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
>  				must_unblock ? nfs_unblock_rename : NULL);
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index 71319637a84e..ee03f3cef30c 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -213,6 +213,15 @@ struct nfs_server {
>  	char			*fscache_uniq;	/* Uniquifier (or NULL) */
>  #endif
>  
> +	/* The following #defines numerically match the NFSv4 equivalents */
> +#define NFS_FH_NOEXPIRE_WITH_OPEN (0x1)
> +#define NFS_FH_VOLATILE_ANY (0x2)
> +#define NFS_FH_VOL_MIGRATION (0x4)
> +#define NFS_FH_VOL_RENAME (0x8)
> +#define NFS_FH_RENAME_UNSAFE (NFS_FH_VOLATILE_ANY | NFS_FH_VOL_RENAME)
> +	u32			fh_expire_type;	/* V4 bitmask representing file
> +						   handle volatility type for
> +						   this filesystem */
>  	u32			pnfs_blksize;	/* layout_blksize attr */
>  #if IS_ENABLED(CONFIG_NFS_V4)
>  	u32			attr_bitmask[3];/* V4 bitmask representing the set
> @@ -236,9 +245,6 @@ struct nfs_server {
>  	u32			acl_bitmask;	/* V4 bitmask representing the ACEs
>  						   that are supported on this
>  						   filesystem */
> -	u32			fh_expire_type;	/* V4 bitmask representing file
> -						   handle volatility type for
> -						   this filesystem */
>  	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
>  	struct rpc_wait_queue	roc_rpcwaitq;
>  	void			*pnfs_ld_data;	/* per mount point data */

Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx>





[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux