On Thu, 26 Jun 2025, Amir Goldstein wrote: > On Wed, Jun 25, 2025 at 1:07 AM NeilBrown <neil@xxxxxxxxxx> wrote: > > > > ovl_writeout() relies on the workdir i_rwsem to provide exclusive access > > to ofs->writeout which it manipulates. Rather than depending on this, > > typo writeout/whiteout all over this commit message Fixed - thanks. > > > add a new mutex, "writeout_lock" to explicitly provide the required > > locking. > > > > Then take the lock on workdir only when needed - to lookup the temp name > > and to do the whiteout or link. So ovl_writeout() and similarly > > ovl_cleanup_and_writeout() and ovl_workdir_cleanup() are now called > > without the lock held. This requires changes to > > ovl_remove_and_whiteout(), ovl_cleanup_index(), ovl_indexdir_cleanup(), > > and ovl_workdir_create(). > > > > Signed-off-by: NeilBrown <neil@xxxxxxxxxx> > > --- > > fs/overlayfs/dir.c | 71 +++++++++++++++++++++------------------- > > fs/overlayfs/overlayfs.h | 2 +- > > fs/overlayfs/ovl_entry.h | 1 + > > fs/overlayfs/params.c | 2 ++ > > fs/overlayfs/readdir.c | 31 ++++++++++-------- > > fs/overlayfs/super.c | 9 +++-- > > fs/overlayfs/util.c | 7 ++-- > > 7 files changed, 67 insertions(+), 56 deletions(-) > > > > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c > > index 5afe17cee305..78b0d956b0ac 100644 > > --- a/fs/overlayfs/dir.c > > +++ b/fs/overlayfs/dir.c > > @@ -77,7 +77,6 @@ struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir) > > return temp; > > } > > > > -/* caller holds i_mutex on workdir */ > > static struct dentry *ovl_whiteout(struct ovl_fs *ofs) > > { > > int err; > > @@ -85,47 +84,51 @@ static struct dentry *ovl_whiteout(struct ovl_fs *ofs) > > struct dentry *workdir = ofs->workdir; > > struct inode *wdir = workdir->d_inode; > > > > + mutex_lock(&ofs->whiteout_lock); > > if (!ofs->whiteout) { > > + inode_lock(wdir); > > whiteout = ovl_lookup_temp(ofs, workdir); > > + if (!IS_ERR(whiteout)) { > > + err = ovl_do_whiteout(ofs, wdir, whiteout); > > + if (err) { > > + dput(whiteout); > > + whiteout = ERR_PTR(err); > > + } > > + } > > + inode_unlock(wdir); > > if (IS_ERR(whiteout)) > > goto out; > > - > > - err = ovl_do_whiteout(ofs, wdir, whiteout); > > - if (err) { > > - dput(whiteout); > > - whiteout = ERR_PTR(err); > > - goto out; > > - } > > ofs->whiteout = whiteout; > > } > > > > if (!ofs->no_shared_whiteout) { > > + inode_lock(wdir); > > whiteout = ovl_lookup_temp(ofs, workdir); > > - if (IS_ERR(whiteout)) > > - goto out; > > - > > - err = ovl_do_link(ofs, ofs->whiteout, wdir, whiteout); > > - if (!err) > > + if (!IS_ERR(whiteout)) { > > + err = ovl_do_link(ofs, ofs->whiteout, wdir, whiteout); > > + if (err) { > > + dput(whiteout); > > + whiteout = ERR_PTR(err); > > + } > > + } > > + inode_unlock(wdir); > > + if (!IS_ERR(whiteout) || PTR_ERR(whiteout) != -EMLINK) > > goto out; > > > > - if (err != -EMLINK) { > > - pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n", > > - ofs->whiteout->d_inode->i_nlink, err); > > - ofs->no_shared_whiteout = true; > > - } > > - dput(whiteout); > > Where did this dput go? 13 lines up in the patch. It is called when ovl_do_link() fails. It is now closer to the ovl_do_link() call. > > > + pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n", > > + ofs->whiteout->d_inode->i_nlink, err); > > + ofs->no_shared_whiteout = true; > > } > > whiteout = ofs->whiteout; > > ofs->whiteout = NULL; > > out: > > + mutex_unlock(&ofs->whiteout_lock); > > return whiteout; > > } > > > > -/* Caller must hold i_mutex on both workdir and dir */ > > int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir, > > struct dentry *dentry) > > { > > - struct inode *wdir = ofs->workdir->d_inode; > > struct dentry *whiteout; > > int err; > > int flags = 0; > > @@ -138,18 +141,26 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir, > > if (d_is_dir(dentry)) > > flags = RENAME_EXCHANGE; > > > > - err = ovl_do_rename(ofs, ofs->workdir, whiteout, dir, dentry, flags); > > + err = ovl_lock_rename_workdir(ofs->workdir, dir); > > + if (!err) { > > + if (whiteout->d_parent == ofs->workdir) > > + err = ovl_do_rename(ofs, ofs->workdir, whiteout, dir, > > + dentry, flags); > > + else > > + err = -EINVAL; > > + unlock_rename(ofs->workdir, dir); > > + } > > if (err) > > goto kill_whiteout; > > if (flags) > > - ovl_cleanup(ofs, wdir, dentry); > > + ovl_cleanup_unlocked(ofs, ofs->workdir, dentry); > > > > out: > > dput(whiteout); > > return err; > > > > kill_whiteout: > > - ovl_cleanup(ofs, wdir, whiteout); > > + ovl_cleanup_unlocked(ofs, ofs->workdir, whiteout); > > goto out; > > } > > > > @@ -777,15 +788,11 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, > > goto out; > > } > > > > - err = ovl_lock_rename_workdir(workdir, upperdir); > > - if (err) > > - goto out_dput; > > - > > - upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir, > > - dentry->d_name.len); > > + upper = ovl_lookup_upper_unlocked(ofs, dentry->d_name.name, upperdir, > > + dentry->d_name.len); > > err = PTR_ERR(upper); > > if (IS_ERR(upper)) > > - goto out_unlock; > > + goto out_dput; > > > > err = -ESTALE; > > if ((opaquedir && upper != opaquedir) || > > @@ -803,8 +810,6 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, > > d_drop(dentry); > > out_dput_upper: > > dput(upper); > > -out_unlock: > > - unlock_rename(workdir, upperdir); > > out_dput: > > dput(opaquedir); > > out: > > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h > > index 508003e26e08..25378b81251e 100644 > > --- a/fs/overlayfs/overlayfs.h > > +++ b/fs/overlayfs/overlayfs.h > > @@ -732,7 +732,7 @@ void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, > > void ovl_cache_free(struct list_head *list); > > void ovl_dir_cache_free(struct inode *inode); > > int ovl_check_d_type_supported(const struct path *realpath); > > -int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir, > > +int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent, > > struct vfsmount *mnt, struct dentry *dentry, int level); > > int ovl_indexdir_cleanup(struct ovl_fs *ofs); > > > > diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h > > index afb7762f873f..4c1bae935ced 100644 > > --- a/fs/overlayfs/ovl_entry.h > > +++ b/fs/overlayfs/ovl_entry.h > > @@ -88,6 +88,7 @@ struct ovl_fs { > > /* Shared whiteout cache */ > > struct dentry *whiteout; > > bool no_shared_whiteout; > > + struct mutex whiteout_lock; > > /* r/o snapshot of upperdir sb's only taken on volatile mounts */ > > errseq_t errseq; > > }; > > diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c > > index f42488c01957..cb1a17c066cd 100644 > > --- a/fs/overlayfs/params.c > > +++ b/fs/overlayfs/params.c > > @@ -797,6 +797,8 @@ int ovl_init_fs_context(struct fs_context *fc) > > fc->s_fs_info = ofs; > > fc->fs_private = ctx; > > fc->ops = &ovl_context_ops; > > + > > + mutex_init(&ofs->whiteout_lock); > > return 0; > > > > out_err: > > diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c > > index 2a222b8185a3..fd98444dacef 100644 > > --- a/fs/overlayfs/readdir.c > > +++ b/fs/overlayfs/readdir.c > > @@ -1141,7 +1141,8 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa > > if (IS_ERR(dentry)) > > continue; > > if (dentry->d_inode) > > - err = ovl_workdir_cleanup(ofs, dir, path->mnt, dentry, level); > > + err = ovl_workdir_cleanup(ofs, path->dentry, path->mnt, > > + dentry, level); > > dput(dentry); > > if (err) > > break; > > @@ -1152,24 +1153,27 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa > > return err; > > } > > > > -int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir, > > +int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent, > > struct vfsmount *mnt, struct dentry *dentry, int level) > > { > > int err; > > > > if (!d_is_dir(dentry) || level > 1) { > > - return ovl_cleanup(ofs, dir, dentry); > > + return ovl_cleanup_unlocked(ofs, parent, dentry); > > } > > > > - err = ovl_do_rmdir(ofs, dir, dentry); > > + inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); > > + if (dentry->d_parent == parent) > > + err = ovl_do_rmdir(ofs, parent->d_inode, dentry); > > + else > > + err = -EINVAL; > > + inode_unlock(parent->d_inode); > > if (err) { > > struct path path = { .mnt = mnt, .dentry = dentry }; > > > > - inode_unlock(dir); > > err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1); > > - inode_lock_nested(dir, I_MUTEX_PARENT); > > if (!err) > > - err = ovl_cleanup(ofs, dir, dentry); > > + err = ovl_cleanup_unlocked(ofs, parent, dentry); > > } > > > > return err; > > @@ -1180,7 +1184,6 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) > > int err; > > struct dentry *indexdir = ofs->workdir; > > struct dentry *index = NULL; > > - struct inode *dir = indexdir->d_inode; > > struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir }; > > LIST_HEAD(list); > > struct ovl_cache_entry *p; > > @@ -1194,7 +1197,6 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) > > if (err) > > goto out; > > > > - inode_lock_nested(dir, I_MUTEX_PARENT); > > list_for_each_entry(p, &list, l_node) { > > if (p->name[0] == '.') { > > if (p->len == 1) > > @@ -1202,7 +1204,8 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) > > if (p->len == 2 && p->name[1] == '.') > > continue; > > } > > - index = ovl_lookup_upper(ofs, p->name, indexdir, p->len); > > + index = ovl_lookup_upper_unlocked(ofs, p->name, indexdir, > > + p->len); > > if (IS_ERR(index)) { > > err = PTR_ERR(index); > > index = NULL; > > @@ -1210,7 +1213,8 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) > > } > > /* Cleanup leftover from index create/cleanup attempt */ > > if (index->d_name.name[0] == '#') { > > - err = ovl_workdir_cleanup(ofs, dir, path.mnt, index, 1); > > + err = ovl_workdir_cleanup(ofs, indexdir, path.mnt, > > + index, 1); > > if (err) > > break; > > goto next; > > @@ -1220,7 +1224,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) > > goto next; > > } else if (err == -ESTALE) { > > /* Cleanup stale index entries */ > > - err = ovl_cleanup(ofs, dir, index); > > + err = ovl_cleanup_unlocked(ofs, indexdir, index); > > } else if (err != -ENOENT) { > > /* > > * Abort mount to avoid corrupting the index if > > @@ -1236,7 +1240,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) > > err = ovl_cleanup_and_whiteout(ofs, indexdir, index); > > } else { > > /* Cleanup orphan index entries */ > > - err = ovl_cleanup(ofs, dir, index); > > + err = ovl_cleanup_unlocked(ofs, indexdir, index); > > } > > > > if (err) > > @@ -1247,7 +1251,6 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) > > index = NULL; > > } > > dput(index); > > - inode_unlock(dir); > > out: > > ovl_cache_free(&list); > > if (err) > > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c > > index 576b5c3b537c..3583e359655f 100644 > > --- a/fs/overlayfs/super.c > > +++ b/fs/overlayfs/super.c > > @@ -299,8 +299,8 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > > int err; > > bool retried = false; > > > > - inode_lock_nested(dir, I_MUTEX_PARENT); > > retry: > > + inode_lock_nested(dir, I_MUTEX_PARENT); > > work = ovl_lookup_upper(ofs, name, ofs->workbasedir, strlen(name)); > > > > if (!IS_ERR(work)) { > > @@ -311,6 +311,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > > > > if (work->d_inode) { > > err = -EEXIST; > > + inode_unlock(dir); > > if (retried) > > goto out_dput; > > > > @@ -318,7 +319,8 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > > goto out_unlock; > > > > retried = true; > > - err = ovl_workdir_cleanup(ofs, dir, mnt, work, 0); > > + err = ovl_workdir_cleanup(ofs, ofs->workbasedir, mnt, > > + work, 0); > > dput(work); > > if (err == -EINVAL) { > > work = ERR_PTR(err); > > @@ -328,6 +330,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > > } > > > > work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode); > > + inode_unlock(dir); > > err = PTR_ERR(work); > > if (IS_ERR(work)) > > goto out_err; > > @@ -365,11 +368,11 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > > if (err) > > goto out_dput; > > } else { > > + inode_unlock(dir); > > err = PTR_ERR(work); > > goto out_err; > > } > > out_unlock: > > This label name is now misleading Yep - I'll fix it. Thanks, NeilBrown > > > - inode_unlock(dir); > > return work; > > > > Thanks, > Amir. >