On Tue, Aug 26, 2025 at 12:44:13AM +0100, Al Viro wrote: > As for the second, couldn't we simply use !list_empty(&ns->mnt_ns_list) > as a condition? And avoiding an RCU delay... nice, in principle, but > the case when that would've saved us anything is CLONE_NEWNS clone(2) or > unshare(2) failing due to severe OOM. Do we give a damn about one extra > call_rcu() for each of such failures? > > mnt_ns_tree handling is your code; do you see any problems with ... this (on top of the posted series, needs to be carved into several parts - dropping pointless lock_mount_hash() in open_detached_copy(), making mnt_ns_tree_remove() and thus free_mnt_ns() safe to use on ns not in mnt_ns_tree yet, then dealing with open_detached_copy() and copy_mnt_ns() separately): diff --git a/fs/namespace.c b/fs/namespace.c index 63b74d7384fd..b77469789f82 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -195,7 +195,7 @@ static void mnt_ns_release_rcu(struct rcu_head *rcu) static void mnt_ns_tree_remove(struct mnt_namespace *ns) { /* remove from global mount namespace list */ - if (!is_anon_ns(ns)) { + if (!list_empty(&ns->mnt_ns_list)) { mnt_ns_tree_write_lock(); rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree); list_bidir_del_rcu(&ns->mnt_ns_list); @@ -3053,18 +3053,17 @@ static int do_loopback(const struct path *path, const char *old_name, return err; } -static struct file *open_detached_copy(struct path *path, bool recursive) +static struct mnt_namespace *get_detached_copy(const struct path *path, bool recursive) { struct mnt_namespace *ns, *mnt_ns = current->nsproxy->mnt_ns, *src_mnt_ns; struct user_namespace *user_ns = mnt_ns->user_ns; struct mount *mnt, *p; - struct file *file; ns = alloc_mnt_ns(user_ns, true); if (IS_ERR(ns)) - return ERR_CAST(ns); + return ns; - namespace_lock(); + guard(namespace_excl)(); /* * Record the sequence number of the source mount namespace. @@ -3081,23 +3080,28 @@ static struct file *open_detached_copy(struct path *path, bool recursive) mnt = __do_loopback(path, recursive); if (IS_ERR(mnt)) { - namespace_unlock(); - free_mnt_ns(ns); + emptied_ns = ns; return ERR_CAST(mnt); } - lock_mount_hash(); for (p = mnt; p; p = next_mnt(p, mnt)) { mnt_add_to_ns(ns, p); ns->nr_mounts++; } ns->root = mnt; - mntget(&mnt->mnt); - unlock_mount_hash(); - namespace_unlock(); + return ns; +} + +static struct file *open_detached_copy(struct path *path, bool recursive) +{ + struct mnt_namespace *ns = get_detached_copy(path, recursive); + struct file *file; + + if (IS_ERR(ns)) + return ERR_CAST(ns); mntput(path->mnt); - path->mnt = &mnt->mnt; + path->mnt = mntget(&ns->root->mnt); file = dentry_open(path, O_PATH, current_cred()); if (IS_ERR(file)) dissolve_on_fput(path->mnt); @@ -4165,7 +4169,8 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, struct user_namespace *user_ns, struct fs_struct *new_fs) { struct mnt_namespace *new_ns; - struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; + struct vfsmount *rootmnt __free(mntput)= NULL; + struct vfsmount *pwdmnt __free(mntput) = NULL; struct mount *p, *q; struct mount *old; struct mount *new; @@ -4184,23 +4189,20 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, if (IS_ERR(new_ns)) return new_ns; - namespace_lock(); + guard(namespace_excl)(); /* First pass: copy the tree topology */ copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; if (user_ns != ns->user_ns) copy_flags |= CL_SLAVE; new = copy_tree(old, old->mnt.mnt_root, copy_flags); if (IS_ERR(new)) { - namespace_unlock(); - ns_free_inum(&new_ns->ns); - dec_mnt_namespaces(new_ns->ucounts); - mnt_ns_release(new_ns); + emptied_ns = new_ns; return ERR_CAST(new); } + if (user_ns != ns->user_ns) { - lock_mount_hash(); - lock_mnt_tree(new); - unlock_mount_hash(); + scoped_guard(mount_writer) + lock_mnt_tree(new); } new_ns->root = new; @@ -4232,12 +4234,6 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, while (p->mnt.mnt_root != q->mnt.mnt_root) p = next_mnt(skip_mnt_tree(p), old); } - namespace_unlock(); - - if (rootmnt) - mntput(rootmnt); - if (pwdmnt) - mntput(pwdmnt); mnt_ns_tree_add(new_ns); return new_ns;