On Mon, Apr 21, 2025 at 06:03:19PM +0100, Al Viro wrote: > On Mon, Apr 21, 2025 at 05:29:47PM +0100, Al Viro wrote: > > > What's to prevent the 'beneath' case from getting mnt mount --move'd > > away *AND* the ex-parent from getting unmounted while we are blocked > > in inode_lock? At this point we are not holding any locks whatsoever > > (and all mount-related locks nest inside inode_lock(), so we couldn't > > hold them there anyway). > > > > Hit that race and watch a very unhappy umount... > > While we are at it, in normal case inode_unlock() in unlock_mount() > is safe since we have dentry (and associated mount) pinned by > struct path we'd fed to matching lock_mount(). No longer true for > the 'beneath' case, AFAICS... Completely untested patch follows; 'beneath' case in do_lock_mount() is made to grab mount reference to match the dentry one (same lifetime; dropped simultaneously), unlock_mount() unlocks the inode *before* namespace_unlock(), so we don't depend upon the externally held references. Comments? diff --git a/fs/namespace.c b/fs/namespace.c index fa17762268f5..91d28f283f1c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2827,56 +2827,62 @@ static struct mountpoint *do_lock_mount(struct path *path, bool beneath) struct vfsmount *mnt = path->mnt; struct dentry *dentry; struct mountpoint *mp = ERR_PTR(-ENOENT); + struct path under = {}; for (;;) { - struct mount *m; + struct mount *m = real_mount(mnt); if (beneath) { - m = real_mount(mnt); + path_put(&under); read_seqlock_excl(&mount_lock); - dentry = dget(m->mnt_mountpoint); + under.mnt = mntget(&m->mnt_parent->mnt); + under.dentry = dget(m->mnt_mountpoint); read_sequnlock_excl(&mount_lock); + dentry = under.dentry; } else { dentry = path->dentry; } inode_lock(dentry->d_inode); - if (unlikely(cant_mount(dentry))) { - inode_unlock(dentry->d_inode); - goto out; - } - namespace_lock(); - if (beneath && (!is_mounted(mnt) || m->mnt_mountpoint != dentry)) { + if (unlikely(cant_mount(dentry) || !is_mounted(mnt))) + break; // not to be mounted on + + if (beneath && unlikely(m->mnt_mountpoint != dentry || + &m->mnt_parent->mnt != under.mnt)) { namespace_unlock(); inode_unlock(dentry->d_inode); - goto out; + continue; // got moved } mnt = lookup_mnt(path); - if (likely(!mnt)) + if (unlikely(mnt)) { + namespace_unlock(); + inode_unlock(dentry->d_inode); + path_put(path); + path->mnt = mnt; + path->dentry = dget(mnt->mnt_root); + continue; // got overmounted + } + mp = get_mountpoint(dentry); + if (IS_ERR(mp)) break; - - namespace_unlock(); - inode_unlock(dentry->d_inode); - if (beneath) - dput(dentry); - path_put(path); - path->mnt = mnt; - path->dentry = dget(mnt->mnt_root); - } - - mp = get_mountpoint(dentry); - if (IS_ERR(mp)) { - namespace_unlock(); - inode_unlock(dentry->d_inode); + if (beneath) { + /* + * @under duplicates the references that will stay + * at least until namespace_unlock(), so the path_put() + * below is safe (and OK to do under namespace_lock - + * we are not dropping the final references here). + */ + path_put(&under); + } + return mp; } - -out: + namespace_unlock(); + inode_unlock(dentry->d_inode); if (beneath) - dput(dentry); - + path_put(&under); return mp; } @@ -2892,9 +2898,9 @@ static void unlock_mount(struct mountpoint *where) read_seqlock_excl(&mount_lock); put_mountpoint(where); read_sequnlock_excl(&mount_lock); + inode_unlock(dentry->d_inode); namespace_unlock(); - inode_unlock(dentry->d_inode); } static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)