Some sysctl tables can provide an is_seen() function which reports if the sysctl should be visible to the current process. This is currently used to cause d_compare to fail for invisible sysctls. This technique might have worked in 2.6.26 when it was implemented, but it cannot work now. In particular if ->d_compare always fails for a particular name, then d_alloc_parallel() will always create a new dentry and pass it to lookup() resulting in a new inode for every lookup. I tested this by changing sysctl_is_seen() to always return 0. When all sysctls were still visible and repeated lookups (ls -li) reported different inode numbers. This patch discards proc_sys_compare (the d_compare function) and instead adds the is_seen functionality into proc_sys_revalidate (the d_revalidate function). If the sysctl table is unregistering, 0 is returned. Otherwise if is_seen() exists but fails, -ENOENT is returned. The rcu_dereference() and RCU_INIT_POINTER() for ->sysctl are removed as the field is not rcu-managed. It is only changed when the inode is created and when it is evicted, and in these cases there can be no possible concurrent access. Signed-off-by: NeilBrown <neil@xxxxxxxxxx> --- fs/proc/inode.c | 2 +- fs/proc/proc_sysctl.c | 35 ++++------------------------------- 2 files changed, 5 insertions(+), 32 deletions(-) diff --git a/fs/proc/inode.c b/fs/proc/inode.c index a3eb3b740f76..c3991dd314d9 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -42,7 +42,7 @@ static void proc_evict_inode(struct inode *inode) head = ei->sysctl; if (head) { - RCU_INIT_POINTER(ei->sysctl, NULL); + ei->sysctl = NULL; proc_sys_evict_inode(inode, head); } } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index cc9d74a06ff0..dbf652b50909 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -884,21 +884,15 @@ static const struct inode_operations proc_sys_dir_operations = { .getattr = proc_sys_getattr, }; -static int proc_sys_revalidate(struct inode *dir, const struct qstr *name, - struct dentry *dentry, unsigned int flags) -{ - if (flags & LOOKUP_RCU) - return -ECHILD; - return !PROC_I(d_inode(dentry))->sysctl->unregistering; -} - static int proc_sys_delete(const struct dentry *dentry) { return !!PROC_I(d_inode(dentry))->sysctl->unregistering; } -static int sysctl_is_seen(struct ctl_table_header *p) +static int proc_sys_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { + struct ctl_table_header *p = PROC_I(d_inode(dentry))->sysctl; struct ctl_table_set *set = p->set; int res; spin_lock(&sysctl_lock); @@ -907,35 +901,14 @@ static int sysctl_is_seen(struct ctl_table_header *p) else if (!set->is_seen) res = 1; else - res = set->is_seen(set); + res = set->is_seen(set) ? 1 : -ENOENT; spin_unlock(&sysctl_lock); return res; } -static int proc_sys_compare(const struct dentry *dentry, - unsigned int len, const char *str, const struct qstr *name) -{ - struct ctl_table_header *head; - struct inode *inode; - - /* Although proc doesn't have negative dentries, rcu-walk means - * that inode here can be NULL */ - /* AV: can it, indeed? */ - inode = d_inode_rcu(dentry); - if (!inode) - return 1; - if (name->len != len) - return 1; - if (memcmp(name->name, str, len)) - return 1; - head = rcu_dereference(PROC_I(inode)->sysctl); - return !head || !sysctl_is_seen(head); -} - static const struct dentry_operations proc_sys_dentry_operations = { .d_revalidate = proc_sys_revalidate, .d_delete = proc_sys_delete, - .d_compare = proc_sys_compare, }; static struct ctl_dir *find_subdir(struct ctl_dir *dir, -- 2.49.0