On Tue, May 20 2025, Luis Henriques wrote: > This patch adds a new module parameter 'inval_wq' which is used to start a > workqueue to periodically invalidate expired dentries. The value of this > new parameter is the period, in seconds, of the workqueue. When it is set, > every new dentry will be added to an rbtree, sorted by the dentry's expiry > time. > > When the workqueue is executed, it will check the dentries in this tree and > invalidate them if: > > - The dentry has timed-out, or if > - The connection epoch has been incremented. > > The workqueue will run for, at most, 5 seconds each time. It will > reschedule itself if the dentries tree isn't empty. > > The workqueue period is set per filesystem with the 'inval_wq' parameter > value when it is mounted. This value can not be less than 5 seconds. If > this module parameter is changed later on, the mounted filesystems will > keep using the old value until they are remounted. > > Signed-off-by: Luis Henriques <luis@xxxxxxxxxx> > --- > Hi! > > I've been staring at this patch for a while and hopefully it's ready for a > fresh set of eyes to find the bugs that are hiding from me. I believe I > addressed all Miklos comments, and decided to drop the RFC from the subject. > > As before, this patch depends on my previous patch (already in Miklos tree) > with subject: "fuse: add more control over cache invalidation behaviour". > > Feedback is welcome. Gentle ping, feedback on the generic approach would be great. Cheers, -- Luís > > Cheers, > -- > Luis > > Changes since v2: > > - Major rework, the dentries tree nodes are now in fuse_dentry and they are > tied to the actual dentry lifetime > - Mount option is now a module parameter > - workqueue now runs for at most 5 seconds before rescheduling > > fs/fuse/dir.c | 180 +++++++++++++++++++++++++++++++++++++++++------ > fs/fuse/fuse_i.h | 12 ++++ > fs/fuse/inode.c | 21 ++++++ > 3 files changed, 190 insertions(+), 23 deletions(-) > > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c > index 1fb0b15a6088..257ca2b36b94 100644 > --- a/fs/fuse/dir.c > +++ b/fs/fuse/dir.c > @@ -34,33 +34,153 @@ static void fuse_advise_use_readdirplus(struct inode *dir) > set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state); > } > > -#if BITS_PER_LONG >= 64 > -static inline void __fuse_dentry_settime(struct dentry *entry, u64 time) > +struct fuse_dentry { > + u64 time; > + struct rcu_head rcu; > + struct rb_node node; > + struct dentry *dentry; > +}; > + > +static void __fuse_dentry_tree_del_node(struct fuse_conn *fc, > + struct fuse_dentry *fd) > { > - entry->d_fsdata = (void *) time; > + if (!RB_EMPTY_NODE(&fd->node)) { > + rb_erase(&fd->node, &fc->dentry_tree); > + RB_CLEAR_NODE(&fd->node); > + } > } > > -static inline u64 fuse_dentry_time(const struct dentry *entry) > +static void fuse_dentry_tree_del_node(struct dentry *dentry) > { > - return (u64)entry->d_fsdata; > + struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb); > + struct fuse_dentry *fd = dentry->d_fsdata; > + > + if (!fc->inval_wq) > + return; > + > + spin_lock(&fc->dentry_tree_lock); > + __fuse_dentry_tree_del_node(fc, fd); > + spin_unlock(&fc->dentry_tree_lock); > } > > -#else > -union fuse_dentry { > - u64 time; > - struct rcu_head rcu; > -}; > +static void fuse_dentry_tree_add_node(struct dentry *dentry) > +{ > + struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb); > + struct fuse_dentry *fd = dentry->d_fsdata; > + struct fuse_dentry *cur; > + struct rb_node **p, *parent = NULL; > + bool start_work = false; > + > + if (!fc->inval_wq) > + return; > + > + spin_lock(&fc->dentry_tree_lock); > + > + if (!fc->inval_wq) { > + spin_unlock(&fc->dentry_tree_lock); > + return; > + } > + > + start_work = RB_EMPTY_ROOT(&fc->dentry_tree); > + __fuse_dentry_tree_del_node(fc, fd); > + > + p = &fc->dentry_tree.rb_node; > + while (*p) { > + parent = *p; > + cur = rb_entry(*p, struct fuse_dentry, node); > + if (fd->time > cur->time) > + p = &(*p)->rb_left; > + else > + p = &(*p)->rb_right; > + } > + rb_link_node(&fd->node, parent, p); > + rb_insert_color(&fd->node, &fc->dentry_tree); > + spin_unlock(&fc->dentry_tree_lock); > + > + if (start_work) > + schedule_delayed_work(&fc->dentry_tree_work, > + secs_to_jiffies(fc->inval_wq)); > +} > + > +void fuse_dentry_tree_prune(struct fuse_conn *fc) > +{ > + struct rb_node *n; > + > + if (!fc->inval_wq) > + return; > + > + fc->inval_wq = 0; > + cancel_delayed_work_sync(&fc->dentry_tree_work); > + > + spin_lock(&fc->dentry_tree_lock); > + while (!RB_EMPTY_ROOT(&fc->dentry_tree)) { > + n = rb_first(&fc->dentry_tree); > + rb_erase(n, &fc->dentry_tree); > + RB_CLEAR_NODE(&rb_entry(n, struct fuse_dentry, node)->node); > + } > + spin_unlock(&fc->dentry_tree_lock); > +} > + > +/* > + * workqueue that, when enabled, will periodically check for expired dentries in > + * the dentries tree. > + * > + * A dentry has expired if: > + * > + * 1) it has been around for too long (timeout) or if > + * > + * 2) the connection epoch has been incremented. > + * > + * The workqueue will be rescheduled itself as long as the dentries tree is not > + * empty. Also, it will not spend more than 5 seconds invalidating dentries on > + * each run. > + */ > +void fuse_dentry_tree_work(struct work_struct *work) > +{ > + struct fuse_conn *fc = container_of(work, struct fuse_conn, > + dentry_tree_work.work); > + struct fuse_dentry *fd; > + struct rb_node *node; > + u64 start, end; > + int epoch; > + bool reschedule; > + > + spin_lock(&fc->dentry_tree_lock); > + start = get_jiffies_64(); > + /* Don't spend too much time invalidating dentries */ > + end = start + secs_to_jiffies(5); > + epoch = atomic_read(&fc->epoch); > + > + node = rb_first(&fc->dentry_tree); > + while (node && time_after64(end, get_jiffies_64())) { > + fd = rb_entry(node, struct fuse_dentry, node); > + if ((fd->dentry->d_time < epoch) || (fd->time < start)) { > + rb_erase(&fd->node, &fc->dentry_tree); > + RB_CLEAR_NODE(&fd->node); > + spin_unlock(&fc->dentry_tree_lock); > + d_invalidate(fd->dentry); > + spin_lock(&fc->dentry_tree_lock); > + } else > + break; > + node = rb_first(&fc->dentry_tree); > + } > + reschedule = !RB_EMPTY_ROOT(&fc->dentry_tree); > + spin_unlock(&fc->dentry_tree_lock); > + > + if (reschedule) > + schedule_delayed_work(&fc->dentry_tree_work, > + secs_to_jiffies(fc->inval_wq)); > +} > > static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time) > { > - ((union fuse_dentry *) dentry->d_fsdata)->time = time; > + ((struct fuse_dentry *) dentry->d_fsdata)->time = time; > } > > static inline u64 fuse_dentry_time(const struct dentry *entry) > { > - return ((union fuse_dentry *) entry->d_fsdata)->time; > + return ((struct fuse_dentry *) entry->d_fsdata)->time; > } > -#endif > > static void fuse_dentry_settime(struct dentry *dentry, u64 time) > { > @@ -81,6 +201,7 @@ static void fuse_dentry_settime(struct dentry *dentry, u64 time) > } > > __fuse_dentry_settime(dentry, time); > + fuse_dentry_tree_add_node(dentry); > } > > /* > @@ -283,21 +404,36 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name, > goto out; > } > > -#if BITS_PER_LONG < 64 > static int fuse_dentry_init(struct dentry *dentry) > { > - dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), > - GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE); > + struct fuse_dentry *fd; > + > + fd = kzalloc(sizeof(struct fuse_dentry), > + GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE); > + if (!fd) > + return -ENOMEM; > + > + fd->dentry = dentry; > + RB_CLEAR_NODE(&fd->node); > + dentry->d_fsdata = fd; > > - return dentry->d_fsdata ? 0 : -ENOMEM; > + return 0; > +} > + > +static void fuse_dentry_prune(struct dentry *dentry) > +{ > + struct fuse_dentry *fd = dentry->d_fsdata; > + > + if (!RB_EMPTY_NODE(&fd->node)) > + fuse_dentry_tree_del_node(dentry); > } > + > static void fuse_dentry_release(struct dentry *dentry) > { > - union fuse_dentry *fd = dentry->d_fsdata; > + struct fuse_dentry *fd = dentry->d_fsdata; > > kfree_rcu(fd, rcu); > } > -#endif > > static int fuse_dentry_delete(const struct dentry *dentry) > { > @@ -334,18 +470,16 @@ static struct vfsmount *fuse_dentry_automount(struct path *path) > const struct dentry_operations fuse_dentry_operations = { > .d_revalidate = fuse_dentry_revalidate, > .d_delete = fuse_dentry_delete, > -#if BITS_PER_LONG < 64 > .d_init = fuse_dentry_init, > + .d_prune = fuse_dentry_prune, > .d_release = fuse_dentry_release, > -#endif > .d_automount = fuse_dentry_automount, > }; > > const struct dentry_operations fuse_root_dentry_operations = { > -#if BITS_PER_LONG < 64 > .d_init = fuse_dentry_init, > + .d_prune = fuse_dentry_prune, > .d_release = fuse_dentry_release, > -#endif > }; > > int fuse_valid_type(int m) > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > index f870d53a1bcf..3b7794f21573 100644 > --- a/fs/fuse/fuse_i.h > +++ b/fs/fuse/fuse_i.h > @@ -978,6 +978,15 @@ struct fuse_conn { > /* Request timeout (in jiffies). 0 = no timeout */ > unsigned int req_timeout; > } timeout; > + > + /** Cache dentries tree */ > + struct rb_root dentry_tree; > + /** Look to protect dentry_tree access */ > + spinlock_t dentry_tree_lock; > + /** Periodic delayed work to invalidate expired dentries */ > + struct delayed_work dentry_tree_work; > + /** Period for the invalidation workqueue */ > + unsigned int inval_wq; > }; > > /* > @@ -1262,6 +1271,9 @@ void fuse_wait_aborted(struct fuse_conn *fc); > /* Check if any requests timed out */ > void fuse_check_timeout(struct work_struct *work); > > +void fuse_dentry_tree_prune(struct fuse_conn *fc); > +void fuse_dentry_tree_work(struct work_struct *work); > + > /** > * Invalidate inode attributes > */ > diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c > index b399784cca5f..7dbb11937344 100644 > --- a/fs/fuse/inode.c > +++ b/fs/fuse/inode.c > @@ -57,6 +57,20 @@ MODULE_PARM_DESC(max_user_congthresh, > "Global limit for the maximum congestion threshold an " > "unprivileged user can set"); > > +static unsigned __read_mostly inval_wq; > +static int inval_wq_set(const char *val, const struct kernel_param *kp) > +{ > + return param_set_uint_minmax(val, kp, 5, (unsigned int)(-1)); > +} > +static const struct kernel_param_ops inval_wq_ops = { > + .set = inval_wq_set, > + .get = param_get_uint, > +}; > +module_param_cb(inval_wq, &inval_wq_ops, &inval_wq, 0644); > +__MODULE_PARM_TYPE(inval_wq, "uint"); > +MODULE_PARM_DESC(inval_wq, > + "Dentries invalidation workqueue period in secs (>= 5)."); > + > #define FUSE_DEFAULT_BLKSIZE 512 > > /** Maximum number of outstanding background requests */ > @@ -959,6 +973,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, > memset(fc, 0, sizeof(*fc)); > spin_lock_init(&fc->lock); > spin_lock_init(&fc->bg_lock); > + spin_lock_init(&fc->dentry_tree_lock); > init_rwsem(&fc->killsb); > refcount_set(&fc->count, 1); > atomic_set(&fc->dev_count, 1); > @@ -968,6 +983,8 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, > INIT_LIST_HEAD(&fc->bg_queue); > INIT_LIST_HEAD(&fc->entry); > INIT_LIST_HEAD(&fc->devices); > + fc->dentry_tree = RB_ROOT; > + fc->inval_wq = 0; > atomic_set(&fc->num_waiting, 0); > fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; > fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; > @@ -1844,6 +1861,9 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) > fc->group_id = ctx->group_id; > fc->legacy_opts_show = ctx->legacy_opts_show; > fc->max_read = max_t(unsigned int, 4096, ctx->max_read); > + fc->inval_wq = inval_wq; > + if (fc->inval_wq > 0) > + INIT_DELAYED_WORK(&fc->dentry_tree_work, fuse_dentry_tree_work); > fc->destroy = ctx->destroy; > fc->no_control = ctx->no_control; > fc->no_force_umount = ctx->no_force_umount; > @@ -2048,6 +2068,7 @@ void fuse_conn_destroy(struct fuse_mount *fm) > > fuse_abort_conn(fc); > fuse_wait_aborted(fc); > + fuse_dentry_tree_prune(fc); > > if (!list_empty(&fc->entry)) { > mutex_lock(&fuse_mutex); >