On Wed, Jun 25, 2025 at 1:07 AM NeilBrown <neil@xxxxxxxxxx> wrote: > > ovl_writeout() relies on the workdir i_rwsem to provide exclusive access > to ofs->writeout which it manipulates. Rather than depending on this, typo writeout/whiteout all over this commit message > add a new mutex, "writeout_lock" to explicitly provide the required > locking. > > Then take the lock on workdir only when needed - to lookup the temp name > and to do the whiteout or link. So ovl_writeout() and similarly > ovl_cleanup_and_writeout() and ovl_workdir_cleanup() are now called > without the lock held. This requires changes to > ovl_remove_and_whiteout(), ovl_cleanup_index(), ovl_indexdir_cleanup(), > and ovl_workdir_create(). > > Signed-off-by: NeilBrown <neil@xxxxxxxxxx> > --- > fs/overlayfs/dir.c | 71 +++++++++++++++++++++------------------- > fs/overlayfs/overlayfs.h | 2 +- > fs/overlayfs/ovl_entry.h | 1 + > fs/overlayfs/params.c | 2 ++ > fs/overlayfs/readdir.c | 31 ++++++++++-------- > fs/overlayfs/super.c | 9 +++-- > fs/overlayfs/util.c | 7 ++-- > 7 files changed, 67 insertions(+), 56 deletions(-) > > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c > index 5afe17cee305..78b0d956b0ac 100644 > --- a/fs/overlayfs/dir.c > +++ b/fs/overlayfs/dir.c > @@ -77,7 +77,6 @@ struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir) > return temp; > } > > -/* caller holds i_mutex on workdir */ > static struct dentry *ovl_whiteout(struct ovl_fs *ofs) > { > int err; > @@ -85,47 +84,51 @@ static struct dentry *ovl_whiteout(struct ovl_fs *ofs) > struct dentry *workdir = ofs->workdir; > struct inode *wdir = workdir->d_inode; > > + mutex_lock(&ofs->whiteout_lock); > if (!ofs->whiteout) { > + inode_lock(wdir); > whiteout = ovl_lookup_temp(ofs, workdir); > + if (!IS_ERR(whiteout)) { > + err = ovl_do_whiteout(ofs, wdir, whiteout); > + if (err) { > + dput(whiteout); > + whiteout = ERR_PTR(err); > + } > + } > + inode_unlock(wdir); > if (IS_ERR(whiteout)) > goto out; > - > - err = ovl_do_whiteout(ofs, wdir, whiteout); > - if (err) { > - dput(whiteout); > - whiteout = ERR_PTR(err); > - goto out; > - } > ofs->whiteout = whiteout; > } > > if (!ofs->no_shared_whiteout) { > + inode_lock(wdir); > whiteout = ovl_lookup_temp(ofs, workdir); > - if (IS_ERR(whiteout)) > - goto out; > - > - err = ovl_do_link(ofs, ofs->whiteout, wdir, whiteout); > - if (!err) > + if (!IS_ERR(whiteout)) { > + err = ovl_do_link(ofs, ofs->whiteout, wdir, whiteout); > + if (err) { > + dput(whiteout); > + whiteout = ERR_PTR(err); > + } > + } > + inode_unlock(wdir); > + if (!IS_ERR(whiteout) || PTR_ERR(whiteout) != -EMLINK) > goto out; > > - if (err != -EMLINK) { > - pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n", > - ofs->whiteout->d_inode->i_nlink, err); > - ofs->no_shared_whiteout = true; > - } > - dput(whiteout); Where did this dput go? > + pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n", > + ofs->whiteout->d_inode->i_nlink, err); > + ofs->no_shared_whiteout = true; > } > whiteout = ofs->whiteout; > ofs->whiteout = NULL; > out: > + mutex_unlock(&ofs->whiteout_lock); > return whiteout; > } > > -/* Caller must hold i_mutex on both workdir and dir */ > int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir, > struct dentry *dentry) > { > - struct inode *wdir = ofs->workdir->d_inode; > struct dentry *whiteout; > int err; > int flags = 0; > @@ -138,18 +141,26 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir, > if (d_is_dir(dentry)) > flags = RENAME_EXCHANGE; > > - err = ovl_do_rename(ofs, ofs->workdir, whiteout, dir, dentry, flags); > + err = ovl_lock_rename_workdir(ofs->workdir, dir); > + if (!err) { > + if (whiteout->d_parent == ofs->workdir) > + err = ovl_do_rename(ofs, ofs->workdir, whiteout, dir, > + dentry, flags); > + else > + err = -EINVAL; > + unlock_rename(ofs->workdir, dir); > + } > if (err) > goto kill_whiteout; > if (flags) > - ovl_cleanup(ofs, wdir, dentry); > + ovl_cleanup_unlocked(ofs, ofs->workdir, dentry); > > out: > dput(whiteout); > return err; > > kill_whiteout: > - ovl_cleanup(ofs, wdir, whiteout); > + ovl_cleanup_unlocked(ofs, ofs->workdir, whiteout); > goto out; > } > > @@ -777,15 +788,11 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, > goto out; > } > > - err = ovl_lock_rename_workdir(workdir, upperdir); > - if (err) > - goto out_dput; > - > - upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir, > - dentry->d_name.len); > + upper = ovl_lookup_upper_unlocked(ofs, dentry->d_name.name, upperdir, > + dentry->d_name.len); > err = PTR_ERR(upper); > if (IS_ERR(upper)) > - goto out_unlock; > + goto out_dput; > > err = -ESTALE; > if ((opaquedir && upper != opaquedir) || > @@ -803,8 +810,6 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, > d_drop(dentry); > out_dput_upper: > dput(upper); > -out_unlock: > - unlock_rename(workdir, upperdir); > out_dput: > dput(opaquedir); > out: > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h > index 508003e26e08..25378b81251e 100644 > --- a/fs/overlayfs/overlayfs.h > +++ b/fs/overlayfs/overlayfs.h > @@ -732,7 +732,7 @@ void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, > void ovl_cache_free(struct list_head *list); > void ovl_dir_cache_free(struct inode *inode); > int ovl_check_d_type_supported(const struct path *realpath); > -int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir, > +int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent, > struct vfsmount *mnt, struct dentry *dentry, int level); > int ovl_indexdir_cleanup(struct ovl_fs *ofs); > > diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h > index afb7762f873f..4c1bae935ced 100644 > --- a/fs/overlayfs/ovl_entry.h > +++ b/fs/overlayfs/ovl_entry.h > @@ -88,6 +88,7 @@ struct ovl_fs { > /* Shared whiteout cache */ > struct dentry *whiteout; > bool no_shared_whiteout; > + struct mutex whiteout_lock; > /* r/o snapshot of upperdir sb's only taken on volatile mounts */ > errseq_t errseq; > }; > diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c > index f42488c01957..cb1a17c066cd 100644 > --- a/fs/overlayfs/params.c > +++ b/fs/overlayfs/params.c > @@ -797,6 +797,8 @@ int ovl_init_fs_context(struct fs_context *fc) > fc->s_fs_info = ofs; > fc->fs_private = ctx; > fc->ops = &ovl_context_ops; > + > + mutex_init(&ofs->whiteout_lock); > return 0; > > out_err: > diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c > index 2a222b8185a3..fd98444dacef 100644 > --- a/fs/overlayfs/readdir.c > +++ b/fs/overlayfs/readdir.c > @@ -1141,7 +1141,8 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa > if (IS_ERR(dentry)) > continue; > if (dentry->d_inode) > - err = ovl_workdir_cleanup(ofs, dir, path->mnt, dentry, level); > + err = ovl_workdir_cleanup(ofs, path->dentry, path->mnt, > + dentry, level); > dput(dentry); > if (err) > break; > @@ -1152,24 +1153,27 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa > return err; > } > > -int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir, > +int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent, > struct vfsmount *mnt, struct dentry *dentry, int level) > { > int err; > > if (!d_is_dir(dentry) || level > 1) { > - return ovl_cleanup(ofs, dir, dentry); > + return ovl_cleanup_unlocked(ofs, parent, dentry); > } > > - err = ovl_do_rmdir(ofs, dir, dentry); > + inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); > + if (dentry->d_parent == parent) > + err = ovl_do_rmdir(ofs, parent->d_inode, dentry); > + else > + err = -EINVAL; > + inode_unlock(parent->d_inode); > if (err) { > struct path path = { .mnt = mnt, .dentry = dentry }; > > - inode_unlock(dir); > err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1); > - inode_lock_nested(dir, I_MUTEX_PARENT); > if (!err) > - err = ovl_cleanup(ofs, dir, dentry); > + err = ovl_cleanup_unlocked(ofs, parent, dentry); > } > > return err; > @@ -1180,7 +1184,6 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) > int err; > struct dentry *indexdir = ofs->workdir; > struct dentry *index = NULL; > - struct inode *dir = indexdir->d_inode; > struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir }; > LIST_HEAD(list); > struct ovl_cache_entry *p; > @@ -1194,7 +1197,6 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) > if (err) > goto out; > > - inode_lock_nested(dir, I_MUTEX_PARENT); > list_for_each_entry(p, &list, l_node) { > if (p->name[0] == '.') { > if (p->len == 1) > @@ -1202,7 +1204,8 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) > if (p->len == 2 && p->name[1] == '.') > continue; > } > - index = ovl_lookup_upper(ofs, p->name, indexdir, p->len); > + index = ovl_lookup_upper_unlocked(ofs, p->name, indexdir, > + p->len); > if (IS_ERR(index)) { > err = PTR_ERR(index); > index = NULL; > @@ -1210,7 +1213,8 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) > } > /* Cleanup leftover from index create/cleanup attempt */ > if (index->d_name.name[0] == '#') { > - err = ovl_workdir_cleanup(ofs, dir, path.mnt, index, 1); > + err = ovl_workdir_cleanup(ofs, indexdir, path.mnt, > + index, 1); > if (err) > break; > goto next; > @@ -1220,7 +1224,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) > goto next; > } else if (err == -ESTALE) { > /* Cleanup stale index entries */ > - err = ovl_cleanup(ofs, dir, index); > + err = ovl_cleanup_unlocked(ofs, indexdir, index); > } else if (err != -ENOENT) { > /* > * Abort mount to avoid corrupting the index if > @@ -1236,7 +1240,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) > err = ovl_cleanup_and_whiteout(ofs, indexdir, index); > } else { > /* Cleanup orphan index entries */ > - err = ovl_cleanup(ofs, dir, index); > + err = ovl_cleanup_unlocked(ofs, indexdir, index); > } > > if (err) > @@ -1247,7 +1251,6 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) > index = NULL; > } > dput(index); > - inode_unlock(dir); > out: > ovl_cache_free(&list); > if (err) > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c > index 576b5c3b537c..3583e359655f 100644 > --- a/fs/overlayfs/super.c > +++ b/fs/overlayfs/super.c > @@ -299,8 +299,8 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > int err; > bool retried = false; > > - inode_lock_nested(dir, I_MUTEX_PARENT); > retry: > + inode_lock_nested(dir, I_MUTEX_PARENT); > work = ovl_lookup_upper(ofs, name, ofs->workbasedir, strlen(name)); > > if (!IS_ERR(work)) { > @@ -311,6 +311,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > > if (work->d_inode) { > err = -EEXIST; > + inode_unlock(dir); > if (retried) > goto out_dput; > > @@ -318,7 +319,8 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > goto out_unlock; > > retried = true; > - err = ovl_workdir_cleanup(ofs, dir, mnt, work, 0); > + err = ovl_workdir_cleanup(ofs, ofs->workbasedir, mnt, > + work, 0); > dput(work); > if (err == -EINVAL) { > work = ERR_PTR(err); > @@ -328,6 +330,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > } > > work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode); > + inode_unlock(dir); > err = PTR_ERR(work); > if (IS_ERR(work)) > goto out_err; > @@ -365,11 +368,11 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > if (err) > goto out_dput; > } else { > + inode_unlock(dir); > err = PTR_ERR(work); > goto out_err; > } > out_unlock: This label name is now misleading > - inode_unlock(dir); > return work; > Thanks, Amir.