Re: [PATCH 4/4] NFS: use a hash table for delegation lookup

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, 2025-07-14 at 13:16 +0200, Christoph Hellwig wrote:
> nfs_delegation_find_inode currently has to walk the entire list of
> delegations per inode, which can become pretty large, and can become even
> larger when increasing the delegation watermark.
> 
> Add a hash table to speed up the delegation lookup, sized as a fraction
> of the delegation watermark.
> 
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> ---
>  fs/nfs/client.c           | 23 +++++++++++++++++++----
>  fs/nfs/delegation.c       | 15 +++++++++++++--
>  fs/nfs/delegation.h       |  3 +++
>  include/linux/nfs_fs_sb.h |  2 ++
>  4 files changed, 37 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> index f55188928f67..94684a476dd8 100644
> --- a/fs/nfs/client.c
> +++ b/fs/nfs/client.c
> @@ -994,6 +994,7 @@ static DEFINE_IDA(s_sysfs_ids);
>  struct nfs_server *nfs_alloc_server(void)
>  {
>  	struct nfs_server *server;
> +	int delegation_buckets, i;
>  
>  	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
>  	if (!server)
> @@ -1019,11 +1020,18 @@ struct nfs_server *nfs_alloc_server(void)
>  	atomic_set(&server->active, 0);
>  	atomic_long_set(&server->nr_active_delegations, 0);
>  
> +	delegation_buckets = roundup_pow_of_two(nfs_delegation_watermark / 16);
> +	server->delegation_hash_mask = delegation_buckets - 1;
> +	server->delegation_hash_table = kmalloc_array(delegation_buckets,
> +			sizeof(*server->delegation_hash_table), GFP_KERNEL);
> +	if (!server->delegation_hash_table)
> +		goto out_free_server;
> +	for (i = 0; i < delegation_buckets; i++)
> +		INIT_HLIST_HEAD(&server->delegation_hash_table[i]);
> +

This is going to get created for any mount, even v3 ones. It might be
better to only bother with this for v4 mounts. Maybe do this in
nfs4_server_common_setup() instead?

Also, I wonder if you'd be better off using the rhashtable
infrastructure instead of adding a fixed-size one? The number of
delegations in flight is very workload-dependent, so a resizeable
hashtable may be a better option.

>  	server->io_stats = nfs_alloc_iostats();
> -	if (!server->io_stats) {
> -		kfree(server);
> -		return NULL;
> -	}
> +	if (!server->io_stats)
> +		goto out_free_delegation_hash;
>  
>  	server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
>  
> @@ -1036,6 +1044,12 @@ struct nfs_server *nfs_alloc_server(void)
>  	rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC");
>  
>  	return server;
> +
> +out_free_delegation_hash:
> +	kfree(server->delegation_hash_table);
> +out_free_server:
> +	kfree(server);
> +	return NULL;
>  }
>  EXPORT_SYMBOL_GPL(nfs_alloc_server);
>  
> @@ -1044,6 +1058,7 @@ static void delayed_free(struct rcu_head *p)
>  	struct nfs_server *server = container_of(p, struct nfs_server, rcu);
>  
>  	nfs_free_iostats(server->io_stats);
> +	kfree(server->delegation_hash_table);
>  	kfree(server);
>  }
>  
> diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
> index 621b635d1c56..ca830ceb466e 100644
> --- a/fs/nfs/delegation.c
> +++ b/fs/nfs/delegation.c
> @@ -27,9 +27,16 @@
>  
>  #define NFS_DEFAULT_DELEGATION_WATERMARK (15000U)
>  
> -static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK;
> +unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK;
>  module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644);
>  
> +static struct hlist_head *nfs_delegation_hash(struct nfs_server *server,
> +		const struct nfs_fh *fhandle)
> +{
> +	return server->delegation_hash_table +
> +		(nfs_fhandle_hash(fhandle) & server->delegation_hash_mask);
> +}
> +
>  static void __nfs_free_delegation(struct nfs_delegation *delegation)
>  {
>  	put_cred(delegation->cred);
> @@ -367,6 +374,7 @@ nfs_detach_delegation_locked(struct nfs_inode *nfsi,
>  		spin_unlock(&delegation->lock);
>  		return NULL;
>  	}
> +	hlist_del_init_rcu(&delegation->hash);
>  	list_del_rcu(&delegation->super_list);
>  	delegation->inode = NULL;
>  	rcu_assign_pointer(nfsi->delegation, NULL);
> @@ -529,6 +537,8 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
>  	spin_unlock(&inode->i_lock);
>  
>  	list_add_tail_rcu(&delegation->super_list, &server->delegations);
> +	hlist_add_head_rcu(&delegation->hash,
> +			nfs_delegation_hash(server, &NFS_I(inode)->fh));
>  	rcu_assign_pointer(nfsi->delegation, delegation);
>  	delegation = NULL;
>  
> @@ -1166,11 +1176,12 @@ static struct inode *
>  nfs_delegation_find_inode_server(struct nfs_server *server,
>  				 const struct nfs_fh *fhandle)
>  {
> +	struct hlist_head *head = nfs_delegation_hash(server, fhandle);
>  	struct nfs_delegation *delegation;
>  	struct super_block *freeme = NULL;
>  	struct inode *res = NULL;
>  
> -	list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
> +	hlist_for_each_entry_rcu(delegation, head, hash) {
>  		spin_lock(&delegation->lock);
>  		if (delegation->inode != NULL &&
>  		    !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) &&
> diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
> index 8ff5ab9c5c25..9f1fb9b39c43 100644
> --- a/fs/nfs/delegation.h
> +++ b/fs/nfs/delegation.h
> @@ -14,6 +14,7 @@
>   * NFSv4 delegation
>   */
>  struct nfs_delegation {
> +	struct hlist_node hash;
>  	struct list_head super_list;
>  	const struct cred *cred;
>  	struct inode *inode;
> @@ -123,4 +124,6 @@ static inline int nfs_have_delegated_mtime(struct inode *inode)
>  						 NFS_DELEGATION_FLAG_TIME);
>  }
>  
> +extern unsigned nfs_delegation_watermark;
> +
>  #endif
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index fe930d685780..88212306fd87 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -256,6 +256,8 @@ struct nfs_server {
>  	struct list_head	layouts;
>  	struct list_head	delegations;
>  	atomic_long_t		nr_active_delegations;
> +	unsigned int		delegation_hash_mask;
> +	struct hlist_head	*delegation_hash_table;
>  	struct list_head	ss_copies;
>  	struct list_head	ss_src_copies;
>  

-- 
Jeff Layton <jlayton@xxxxxxxxxx>





[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux