Re: [PATCH 09/12] ovl: whiteout locking changes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jun 25, 2025 at 1:07 AM NeilBrown <neil@xxxxxxxxxx> wrote:
>
> ovl_writeout() relies on the workdir i_rwsem to provide exclusive access
> to ofs->writeout which it manipulates.  Rather than depending on this,

typo writeout/whiteout all over this commit message

> add a new mutex, "writeout_lock" to explicitly provide the required
> locking.
>
> Then take the lock on workdir only when needed - to lookup the temp name
> and to do the whiteout or link.  So ovl_writeout() and similarly
> ovl_cleanup_and_writeout() and ovl_workdir_cleanup() are now called
> without the lock held.  This requires changes to
> ovl_remove_and_whiteout(), ovl_cleanup_index(), ovl_indexdir_cleanup(),
> and ovl_workdir_create().
>
> Signed-off-by: NeilBrown <neil@xxxxxxxxxx>
> ---
>  fs/overlayfs/dir.c       | 71 +++++++++++++++++++++-------------------
>  fs/overlayfs/overlayfs.h |  2 +-
>  fs/overlayfs/ovl_entry.h |  1 +
>  fs/overlayfs/params.c    |  2 ++
>  fs/overlayfs/readdir.c   | 31 ++++++++++--------
>  fs/overlayfs/super.c     |  9 +++--
>  fs/overlayfs/util.c      |  7 ++--
>  7 files changed, 67 insertions(+), 56 deletions(-)
>
> diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> index 5afe17cee305..78b0d956b0ac 100644
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -77,7 +77,6 @@ struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir)
>         return temp;
>  }
>
> -/* caller holds i_mutex on workdir */
>  static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
>  {
>         int err;
> @@ -85,47 +84,51 @@ static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
>         struct dentry *workdir = ofs->workdir;
>         struct inode *wdir = workdir->d_inode;
>
> +       mutex_lock(&ofs->whiteout_lock);
>         if (!ofs->whiteout) {
> +               inode_lock(wdir);
>                 whiteout = ovl_lookup_temp(ofs, workdir);
> +               if (!IS_ERR(whiteout)) {
> +                       err = ovl_do_whiteout(ofs, wdir, whiteout);
> +                       if (err) {
> +                               dput(whiteout);
> +                               whiteout = ERR_PTR(err);
> +                       }
> +               }
> +               inode_unlock(wdir);
>                 if (IS_ERR(whiteout))
>                         goto out;
> -
> -               err = ovl_do_whiteout(ofs, wdir, whiteout);
> -               if (err) {
> -                       dput(whiteout);
> -                       whiteout = ERR_PTR(err);
> -                       goto out;
> -               }
>                 ofs->whiteout = whiteout;
>         }
>
>         if (!ofs->no_shared_whiteout) {
> +               inode_lock(wdir);
>                 whiteout = ovl_lookup_temp(ofs, workdir);
> -               if (IS_ERR(whiteout))
> -                       goto out;
> -
> -               err = ovl_do_link(ofs, ofs->whiteout, wdir, whiteout);
> -               if (!err)
> +               if (!IS_ERR(whiteout)) {
> +                       err = ovl_do_link(ofs, ofs->whiteout, wdir, whiteout);
> +                       if (err) {
> +                               dput(whiteout);
> +                               whiteout = ERR_PTR(err);
> +                       }
> +               }
> +               inode_unlock(wdir);
> +               if (!IS_ERR(whiteout) || PTR_ERR(whiteout) != -EMLINK)
>                         goto out;
>
> -               if (err != -EMLINK) {
> -                       pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
> -                               ofs->whiteout->d_inode->i_nlink, err);
> -                       ofs->no_shared_whiteout = true;
> -               }
> -               dput(whiteout);

Where did this dput go?

> +               pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
> +                       ofs->whiteout->d_inode->i_nlink, err);
> +               ofs->no_shared_whiteout = true;
>         }
>         whiteout = ofs->whiteout;
>         ofs->whiteout = NULL;
>  out:
> +       mutex_unlock(&ofs->whiteout_lock);
>         return whiteout;
>  }
>
> -/* Caller must hold i_mutex on both workdir and dir */
>  int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir,
>                              struct dentry *dentry)
>  {
> -       struct inode *wdir = ofs->workdir->d_inode;
>         struct dentry *whiteout;
>         int err;
>         int flags = 0;
> @@ -138,18 +141,26 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir,
>         if (d_is_dir(dentry))
>                 flags = RENAME_EXCHANGE;
>
> -       err = ovl_do_rename(ofs, ofs->workdir, whiteout, dir, dentry, flags);
> +       err = ovl_lock_rename_workdir(ofs->workdir, dir);
> +       if (!err) {
> +               if (whiteout->d_parent == ofs->workdir)
> +                       err = ovl_do_rename(ofs, ofs->workdir, whiteout, dir,
> +                                           dentry, flags);
> +               else
> +                       err = -EINVAL;
> +               unlock_rename(ofs->workdir, dir);
> +       }
>         if (err)
>                 goto kill_whiteout;
>         if (flags)
> -               ovl_cleanup(ofs, wdir, dentry);
> +               ovl_cleanup_unlocked(ofs, ofs->workdir, dentry);
>
>  out:
>         dput(whiteout);
>         return err;
>
>  kill_whiteout:
> -       ovl_cleanup(ofs, wdir, whiteout);
> +       ovl_cleanup_unlocked(ofs, ofs->workdir, whiteout);
>         goto out;
>  }
>
> @@ -777,15 +788,11 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
>                         goto out;
>         }
>
> -       err = ovl_lock_rename_workdir(workdir, upperdir);
> -       if (err)
> -               goto out_dput;
> -
> -       upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
> -                                dentry->d_name.len);
> +       upper = ovl_lookup_upper_unlocked(ofs, dentry->d_name.name, upperdir,
> +                                         dentry->d_name.len);
>         err = PTR_ERR(upper);
>         if (IS_ERR(upper))
> -               goto out_unlock;
> +               goto out_dput;
>
>         err = -ESTALE;
>         if ((opaquedir && upper != opaquedir) ||
> @@ -803,8 +810,6 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
>         d_drop(dentry);
>  out_dput_upper:
>         dput(upper);
> -out_unlock:
> -       unlock_rename(workdir, upperdir);
>  out_dput:
>         dput(opaquedir);
>  out:
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index 508003e26e08..25378b81251e 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -732,7 +732,7 @@ void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
>  void ovl_cache_free(struct list_head *list);
>  void ovl_dir_cache_free(struct inode *inode);
>  int ovl_check_d_type_supported(const struct path *realpath);
> -int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir,
> +int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent,
>                         struct vfsmount *mnt, struct dentry *dentry, int level);
>  int ovl_indexdir_cleanup(struct ovl_fs *ofs);
>
> diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
> index afb7762f873f..4c1bae935ced 100644
> --- a/fs/overlayfs/ovl_entry.h
> +++ b/fs/overlayfs/ovl_entry.h
> @@ -88,6 +88,7 @@ struct ovl_fs {
>         /* Shared whiteout cache */
>         struct dentry *whiteout;
>         bool no_shared_whiteout;
> +       struct mutex whiteout_lock;
>         /* r/o snapshot of upperdir sb's only taken on volatile mounts */
>         errseq_t errseq;
>  };
> diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
> index f42488c01957..cb1a17c066cd 100644
> --- a/fs/overlayfs/params.c
> +++ b/fs/overlayfs/params.c
> @@ -797,6 +797,8 @@ int ovl_init_fs_context(struct fs_context *fc)
>         fc->s_fs_info           = ofs;
>         fc->fs_private          = ctx;
>         fc->ops                 = &ovl_context_ops;
> +
> +       mutex_init(&ofs->whiteout_lock);
>         return 0;
>
>  out_err:
> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> index 2a222b8185a3..fd98444dacef 100644
> --- a/fs/overlayfs/readdir.c
> +++ b/fs/overlayfs/readdir.c
> @@ -1141,7 +1141,8 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa
>                 if (IS_ERR(dentry))
>                         continue;
>                 if (dentry->d_inode)
> -                       err = ovl_workdir_cleanup(ofs, dir, path->mnt, dentry, level);
> +                       err = ovl_workdir_cleanup(ofs, path->dentry, path->mnt,
> +                                                 dentry, level);
>                 dput(dentry);
>                 if (err)
>                         break;
> @@ -1152,24 +1153,27 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa
>         return err;
>  }
>
> -int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir,
> +int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent,
>                         struct vfsmount *mnt, struct dentry *dentry, int level)
>  {
>         int err;
>
>         if (!d_is_dir(dentry) || level > 1) {
> -               return ovl_cleanup(ofs, dir, dentry);
> +               return ovl_cleanup_unlocked(ofs, parent, dentry);
>         }
>
> -       err = ovl_do_rmdir(ofs, dir, dentry);
> +       inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
> +       if (dentry->d_parent == parent)
> +               err = ovl_do_rmdir(ofs, parent->d_inode, dentry);
> +       else
> +               err = -EINVAL;
> +       inode_unlock(parent->d_inode);
>         if (err) {
>                 struct path path = { .mnt = mnt, .dentry = dentry };
>
> -               inode_unlock(dir);
>                 err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1);
> -               inode_lock_nested(dir, I_MUTEX_PARENT);
>                 if (!err)
> -                       err = ovl_cleanup(ofs, dir, dentry);
> +                       err = ovl_cleanup_unlocked(ofs, parent, dentry);
>         }
>
>         return err;
> @@ -1180,7 +1184,6 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
>         int err;
>         struct dentry *indexdir = ofs->workdir;
>         struct dentry *index = NULL;
> -       struct inode *dir = indexdir->d_inode;
>         struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir };
>         LIST_HEAD(list);
>         struct ovl_cache_entry *p;
> @@ -1194,7 +1197,6 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
>         if (err)
>                 goto out;
>
> -       inode_lock_nested(dir, I_MUTEX_PARENT);
>         list_for_each_entry(p, &list, l_node) {
>                 if (p->name[0] == '.') {
>                         if (p->len == 1)
> @@ -1202,7 +1204,8 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
>                         if (p->len == 2 && p->name[1] == '.')
>                                 continue;
>                 }
> -               index = ovl_lookup_upper(ofs, p->name, indexdir, p->len);
> +               index = ovl_lookup_upper_unlocked(ofs, p->name, indexdir,
> +                                                 p->len);
>                 if (IS_ERR(index)) {
>                         err = PTR_ERR(index);
>                         index = NULL;
> @@ -1210,7 +1213,8 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
>                 }
>                 /* Cleanup leftover from index create/cleanup attempt */
>                 if (index->d_name.name[0] == '#') {
> -                       err = ovl_workdir_cleanup(ofs, dir, path.mnt, index, 1);
> +                       err = ovl_workdir_cleanup(ofs, indexdir, path.mnt,
> +                                                 index, 1);
>                         if (err)
>                                 break;
>                         goto next;
> @@ -1220,7 +1224,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
>                         goto next;
>                 } else if (err == -ESTALE) {
>                         /* Cleanup stale index entries */
> -                       err = ovl_cleanup(ofs, dir, index);
> +                       err = ovl_cleanup_unlocked(ofs, indexdir, index);
>                 } else if (err != -ENOENT) {
>                         /*
>                          * Abort mount to avoid corrupting the index if
> @@ -1236,7 +1240,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
>                         err = ovl_cleanup_and_whiteout(ofs, indexdir, index);
>                 } else {
>                         /* Cleanup orphan index entries */
> -                       err = ovl_cleanup(ofs, dir, index);
> +                       err = ovl_cleanup_unlocked(ofs, indexdir, index);
>                 }
>
>                 if (err)
> @@ -1247,7 +1251,6 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
>                 index = NULL;
>         }
>         dput(index);
> -       inode_unlock(dir);
>  out:
>         ovl_cache_free(&list);
>         if (err)
> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
> index 576b5c3b537c..3583e359655f 100644
> --- a/fs/overlayfs/super.c
> +++ b/fs/overlayfs/super.c
> @@ -299,8 +299,8 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
>         int err;
>         bool retried = false;
>
> -       inode_lock_nested(dir, I_MUTEX_PARENT);
>  retry:
> +       inode_lock_nested(dir, I_MUTEX_PARENT);
>         work = ovl_lookup_upper(ofs, name, ofs->workbasedir, strlen(name));
>
>         if (!IS_ERR(work)) {
> @@ -311,6 +311,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
>
>                 if (work->d_inode) {
>                         err = -EEXIST;
> +                       inode_unlock(dir);
>                         if (retried)
>                                 goto out_dput;
>
> @@ -318,7 +319,8 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
>                                 goto out_unlock;
>
>                         retried = true;
> -                       err = ovl_workdir_cleanup(ofs, dir, mnt, work, 0);
> +                       err = ovl_workdir_cleanup(ofs, ofs->workbasedir, mnt,
> +                                                 work, 0);
>                         dput(work);
>                         if (err == -EINVAL) {
>                                 work = ERR_PTR(err);
> @@ -328,6 +330,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
>                 }
>
>                 work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode);
> +               inode_unlock(dir);
>                 err = PTR_ERR(work);
>                 if (IS_ERR(work))
>                         goto out_err;
> @@ -365,11 +368,11 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
>                 if (err)
>                         goto out_dput;
>         } else {
> +               inode_unlock(dir);
>                 err = PTR_ERR(work);
>                 goto out_err;
>         }
>  out_unlock:

This label name is now misleading

> -       inode_unlock(dir);
>         return work;
>

Thanks,
Amir.





[Index of Archives]     [Linux Filesystems Devel]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux