Re: [PATCH v2 1/4] fuse: Make the fuse unique value a per-cpu counter

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Thanks for your review Joanne!

On 4/3/25 20:27, Joanne Koong wrote:
> On Thu, Apr 3, 2025 at 6:05 AM Bernd Schubert <bschubert@xxxxxxx> wrote:
>>
>> No need to take lock, we can have that per cpu and
>> add in the current cpu as offset.
>>
>> fuse-io-uring and virtiofs especially benefit from it
>> as they don't need the fiq lock at all.
>>
>> Signed-off-by: Bernd Schubert <bschubert@xxxxxxx>
>> ---
>>  fs/fuse/dev.c        | 24 +++---------------------
>>  fs/fuse/fuse_dev_i.h |  4 ----
>>  fs/fuse/fuse_i.h     | 23 ++++++++++++++++++-----
>>  fs/fuse/inode.c      |  1 +
>>  4 files changed, 22 insertions(+), 30 deletions(-)
>>
>> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
>> index 51e31df4c54613280a9c295f530b18e1d461a974..e9592ab092b948bacb5034018bd1f32c917d5c9f 100644
>> --- a/fs/fuse/dev.c
>> +++ b/fs/fuse/dev.c
>> @@ -204,24 +204,6 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
>>  }
>>  EXPORT_SYMBOL_GPL(fuse_len_args);
>>
>> -static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq)
>> -{
>> -       fiq->reqctr += FUSE_REQ_ID_STEP;
>> -       return fiq->reqctr;
>> -}
>> -
>> -u64 fuse_get_unique(struct fuse_iqueue *fiq)
>> -{
>> -       u64 ret;
>> -
>> -       spin_lock(&fiq->lock);
>> -       ret = fuse_get_unique_locked(fiq);
>> -       spin_unlock(&fiq->lock);
>> -
>> -       return ret;
>> -}
>> -EXPORT_SYMBOL_GPL(fuse_get_unique);
>> -
>>  unsigned int fuse_req_hash(u64 unique)
>>  {
>>         return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
>> @@ -278,7 +260,7 @@ static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
>>         spin_lock(&fiq->lock);
>>         if (fiq->connected) {
>>                 if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
>> -                       req->in.h.unique = fuse_get_unique_locked(fiq);
>> +                       req->in.h.unique = fuse_get_unique(fiq);
>>                 list_add_tail(&req->list, &fiq->pending);
>>                 fuse_dev_wake_and_unlock(fiq);
>>         } else {
>> @@ -1177,7 +1159,7 @@ __releases(fiq->lock)
>>         struct fuse_in_header ih = {
>>                 .opcode = FUSE_FORGET,
>>                 .nodeid = forget->forget_one.nodeid,
>> -               .unique = fuse_get_unique_locked(fiq),
>> +               .unique = fuse_get_unique(fiq),
>>                 .len = sizeof(ih) + sizeof(arg),
>>         };
>>
>> @@ -1208,7 +1190,7 @@ __releases(fiq->lock)
>>         struct fuse_batch_forget_in arg = { .count = 0 };
>>         struct fuse_in_header ih = {
>>                 .opcode = FUSE_BATCH_FORGET,
>> -               .unique = fuse_get_unique_locked(fiq),
>> +               .unique = fuse_get_unique(fiq),
>>                 .len = sizeof(ih) + sizeof(arg),
>>         };
>>
>> diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h
>> index 3b2bfe1248d3573abe3b144a6d4bf6a502f56a40..e0afd837a8024450bab77312c7eebdcc7a39bd36 100644
>> --- a/fs/fuse/fuse_dev_i.h
>> +++ b/fs/fuse/fuse_dev_i.h
>> @@ -8,10 +8,6 @@
>>
>>  #include <linux/types.h>
>>
>> -/* Ordinary requests have even IDs, while interrupts IDs are odd */
>> -#define FUSE_INT_REQ_BIT (1ULL << 0)
>> -#define FUSE_REQ_ID_STEP (1ULL << 1)
>> -
>>  struct fuse_arg;
>>  struct fuse_args;
>>  struct fuse_pqueue;
>> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
>> index fee96fe7887b30cd57b8a6bbda11447a228cf446..73c612dd58e45ecde0b8f72fd58ac603d12cf202 100644
>> --- a/fs/fuse/fuse_i.h
>> +++ b/fs/fuse/fuse_i.h
>> @@ -9,6 +9,8 @@
>>  #ifndef _FS_FUSE_I_H
>>  #define _FS_FUSE_I_H
>>
>> +#include "linux/percpu-defs.h"
> 
> Think the convention is #include <linux/percpu-defs.h> though I wonder
> if you even need this. I see other filesystems using percpu counters
> but they don't explicitly include this header. Compilation seems fine
> without it.
> 
>> +#include "linux/threads.h"
> 
> Do you need threads.h?

Oh, I fixed my .clangd settings, it had added headers itself.

> 
>>  #ifndef pr_fmt
>>  # define pr_fmt(fmt) "fuse: " fmt
>>  #endif
>> @@ -44,6 +46,10 @@
>>  /** Number of dentries for each connection in the control filesystem */
>>  #define FUSE_CTL_NUM_DENTRIES 5
>>
>> +/* Ordinary requests have even IDs, while interrupts IDs are odd */
>> +#define FUSE_INT_REQ_BIT (1ULL << 0)
>> +#define FUSE_REQ_ID_STEP (1ULL << 1)
>> +
>>  /** Maximum of max_pages received in init_out */
>>  extern unsigned int fuse_max_pages_limit;
>>
>> @@ -490,7 +496,7 @@ struct fuse_iqueue {
>>         wait_queue_head_t waitq;
>>
>>         /** The next unique request id */
>> -       u64 reqctr;
>> +       u64 __percpu *reqctr;
>>
>>         /** The list of pending requests */
>>         struct list_head pending;
>> @@ -1065,6 +1071,17 @@ static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket)
>>         rcu_read_unlock();
>>  }
>>
>> +/**
>> + * Get the next unique ID for a request
>> + */
>> +static inline u64 fuse_get_unique(struct fuse_iqueue *fiq)
>> +{
>> +       int step = FUSE_REQ_ID_STEP * (task_cpu(current) + 1);
> 
> I don't think you need the + 1 here. This works fine even if
> task_cpu() returns 0.

Yeah right, I had a version that was multiplying by the step

> 
>> +       u64 cntr = this_cpu_inc_return(*fiq->reqctr);
>> +
>> +       return cntr * FUSE_REQ_ID_STEP * NR_CPUS + step;
> 
> if you want to save a multiplication, I think we could just do
> 
>  static inline u64 fuse_get_unique(struct fuse_iqueue *fiq) {
>    u64 cntr = this_cpu_inc_return(*fiq->reqctr);
>    return (cntr * NR_CPUS + task_cpu(current)) * FUSE_REQ_ID_STEP;
> }
> 

I find this harder to read - the compiler will optimize that anyway?



>> +}
>> +
>>  /** Device operations */
>>  extern const struct file_operations fuse_dev_operations;
>>
>> @@ -1415,10 +1432,6 @@ int fuse_readdir(struct file *file, struct dir_context *ctx);
>>   */
>>  unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args);
>>
>> -/**
>> - * Get the next unique ID for a request
>> - */
>> -u64 fuse_get_unique(struct fuse_iqueue *fiq);
>>  void fuse_free_conn(struct fuse_conn *fc);
>>
>>  /* dax.c */
>> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
>> index e9db2cb8c150878634728685af0fa15e7ade628f..12012bfbf59a93deb9d27e0e0641e4ea2ec4c233 100644
>> --- a/fs/fuse/inode.c
>> +++ b/fs/fuse/inode.c
>> @@ -930,6 +930,7 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq,
>>         memset(fiq, 0, sizeof(struct fuse_iqueue));
>>         spin_lock_init(&fiq->lock);
>>         init_waitqueue_head(&fiq->waitq);
>> +       fiq->reqctr = alloc_percpu(u64);
>>         INIT_LIST_HEAD(&fiq->pending);
>>         INIT_LIST_HEAD(&fiq->interrupts);
>>         fiq->forget_list_tail = &fiq->forget_list_head;
>>
> 
> I think we need a free_percpu(fiq->reqctr); as well when the last ref
> on the connection is dropped or else this is leaked

Right, totally forgot.


Thanks,
Bernd




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux