On Tue, 22 Jul 2025, Jeff Layton wrote: > On Mon, 2025-07-21 at 17:59 +1000, NeilBrown wrote: > > Proposed changes to directory-op locking will lock the dentry rather > > than the whole directory. So the dentry will need to be unlocked. > > > > vfs_mkdir() consumes the dentry on error, so there will be no dentry to > > be unlocked. > > > > So this patch changes vfs_mkdir() to unlock on error as well as > > releasing the dentry. This requires various other functions in various > > callers to also unlock on error. > > > > At present this results in some clumsy code. Once the transition to > > dentry locking is complete the clumsiness will be gone. > > > > Signed-off-by: NeilBrown <neil@xxxxxxxxxx> > > --- > > fs/cachefiles/namei.c | 9 +++++---- > > fs/ecryptfs/inode.c | 3 ++- > > fs/namei.c | 24 ++++++++++++++++-------- > > fs/nfsd/nfs4recover.c | 12 +++++------- > > fs/nfsd/vfs.c | 12 ++++++++++-- > > fs/overlayfs/dir.c | 13 +++++++------ > > fs/overlayfs/overlayfs.h | 1 + > > fs/overlayfs/super.c | 5 +++-- > > fs/xfs/scrub/orphanage.c | 2 +- > > 9 files changed, 50 insertions(+), 31 deletions(-) > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > > index d1edb2ac3837..732d78911bed 100644 > > --- a/fs/cachefiles/namei.c > > +++ b/fs/cachefiles/namei.c > > @@ -131,8 +131,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > ret = cachefiles_inject_write_error(); > > if (ret == 0) > > subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); > > - else > > + else { > > + /* vfs_mkdir() unlocks on failure so we must too */ > > + inode_unlock(d_inode(dir)); > > subdir = ERR_PTR(ret); > > + } > > if (IS_ERR(subdir)) { > > trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, > > cachefiles_trace_mkdir_error); > > @@ -196,9 +199,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > return ERR_PTR(-EBUSY); > > > > mkdir_error: > > - inode_unlock(d_inode(dir)); > > - if (!IS_ERR(subdir)) > > - dput(subdir); > > + done_dentry_lookup(subdir); > > pr_err("mkdir %s failed with error %d\n", dirname, ret); > > return ERR_PTR(ret); > > > > diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c > > index abd954c6a14e..5d8cb042aa57 100644 > > --- a/fs/ecryptfs/inode.c > > +++ b/fs/ecryptfs/inode.c > > @@ -520,7 +520,7 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > > lower_dentry, mode); > > rc = PTR_ERR(lower_dentry); > > if (IS_ERR(lower_dentry)) > > - goto out; > > + goto out_unlocked; > > rc = 0; > > if (d_unhashed(lower_dentry)) > > goto out; > > @@ -532,6 +532,7 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > > set_nlink(dir, lower_dir->i_nlink); > > out: > > inode_unlock(lower_dir); > > +out_unlocked: > > if (d_really_is_negative(dentry)) > > d_drop(dentry); > > return ERR_PTR(rc); > > diff --git a/fs/namei.c b/fs/namei.c > > index da160a01e23d..950a0d0d54da 100644 > > --- a/fs/namei.c > > +++ b/fs/namei.c > > @@ -1723,13 +1723,18 @@ EXPORT_SYMBOL(lookup_one_qstr_excl); > > * rmdir) a dentry. After this, done_dentry_lookup() can be used to both > > * unlock the parent directory and dput() the dentry. > > * > > + * If the dentry is an error - as can happen after vfs_mkdir() - > > + * the unlock is skipped as unneeded. > > + * > > * This interface allows a smooth transition from parent-dir based > > * locking to dentry based locking. > > */ > > void done_dentry_lookup(struct dentry *dentry) > > { > > - inode_unlock(dentry->d_parent->d_inode); > > - dput(dentry); > > + if (!IS_ERR(dentry)) { > > + inode_unlock(dentry->d_parent->d_inode); > > + dput(dentry); > > + } > > nit: could you introduce these versions of done_dentry_lookup() and > done_dentry_lookup_return() in patch #2, even if not strictly needed > yet? Better to introduce it as ERR_PTR-safe from the start. Maybe... an ERR_PTR-safe done_dentry_lookup() implies that you generally don't need to unlock if you have an error. But that isn't the case until this patch. But it is only a one-patch gap if I introduce that a bit earlier. Maybe it doesn't matter. I'll try it and see what I think. Thanks, NeilBrown > > > } > > EXPORT_SYMBOL(done_dentry_lookup); > > > > @@ -1742,12 +1747,16 @@ EXPORT_SYMBOL(done_dentry_lookup); > > * rmdir) a dentry. After this, done_dentry_lookup_return() can be used to > > * unlock the parent directory. The dentry is returned for further use. > > * > > + * If the dentry is an error - as can happen after vfs_mkdir() - > > + * the unlock is skipped as unneeded. > > + * > > * This interface allows a smooth transition from parent-dir based > > * locking to dentry based locking. > > */ > > struct dentry *done_dentry_lookup_return(struct dentry *dentry) > > { > > - inode_unlock(dentry->d_parent->d_inode); > > + if (!IS_ERR(dentry)) > > + inode_unlock(dentry->d_parent->d_inode); > > return dentry; > > } > > EXPORT_SYMBOL(done_dentry_lookup_return); > > @@ -4210,9 +4219,7 @@ EXPORT_SYMBOL(kern_path_create); > > > > void done_path_create(struct path *path, struct dentry *dentry) > > { > > - if (!IS_ERR(dentry)) > > - dput(dentry); > > - inode_unlock(path->dentry->d_inode); > > + done_dentry_lookup(dentry); > > mnt_drop_write(path->mnt); > > path_put(path); > > } > > @@ -4375,7 +4382,8 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d > > * negative or unhashes it and possibly splices a different one returning it, > > * the original dentry is dput() and the alternate is returned. > > * > > - * In case of an error the dentry is dput() and an ERR_PTR() is returned. > > + * In case of an error the dentry is dput(), the parent is unlocked, and > > + * an ERR_PTR() is returned. > > */ > > struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > > struct dentry *dentry, umode_t mode) > > @@ -4413,7 +4421,7 @@ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > > return dentry; > > > > err: > > - dput(dentry); > > + done_dentry_lookup(dentry); > > return ERR_PTR(error); > > } > > EXPORT_SYMBOL(vfs_mkdir); > > diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c > > index 82785db730d9..693fa95fa678 100644 > > --- a/fs/nfsd/nfs4recover.c > > +++ b/fs/nfsd/nfs4recover.c > > @@ -222,7 +222,8 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) > > dentry = lookup_one(&nop_mnt_idmap, &QSTR(dname), dir); > > if (IS_ERR(dentry)) { > > status = PTR_ERR(dentry); > > - goto out_unlock; > > + inode_unlock(d_inode(dir)); > > + goto out; > > } > > if (d_really_is_positive(dentry)) > > /* > > @@ -233,15 +234,12 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) > > * In the 4.0 case, we should never get here; but we may > > * as well be forgiving and just succeed silently. > > */ > > - goto out_put; > > + goto out; > > dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); > > if (IS_ERR(dentry)) > > status = PTR_ERR(dentry); > > -out_put: > > - if (!status) > > - dput(dentry); > > -out_unlock: > > - inode_unlock(d_inode(dir)); > > +out: > > + done_dentry_lookup(dentry); > > if (status == 0) { > > if (nn->in_grace) > > __nfsd4_create_reclaim_record_grace(clp, dname, > > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c > > index a21940cadede..e85195e858a2 100644 > > --- a/fs/nfsd/vfs.c > > +++ b/fs/nfsd/vfs.c > > @@ -1489,7 +1489,9 @@ nfsd_check_ignore_resizing(struct iattr *iap) > > iap->ia_valid &= ~ATTR_SIZE; > > } > > > > -/* The parent directory should already be locked: */ > > +/* The parent directory should already be locked. The lock > > + * will be dropped on error. > > + */ > > __be32 > > nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, > > struct nfsd_attrs *attrs, > > @@ -1555,8 +1557,11 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, > > err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs); > > > > out: > > - if (!IS_ERR(dchild)) > > + if (!IS_ERR(dchild)) { > > + if (err) > > + inode_unlock(dirp); > > dput(dchild); > > + } > > return err; > > > > out_nfserr: > > @@ -1613,6 +1618,9 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, > > if (err != nfs_ok) > > goto out_unlock; > > err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp); > > + if (err) > > + /* lock will have been dropped */ > > + return err; > > fh_fill_post_attrs(fhp); > > out_unlock: > > inode_unlock(dentry->d_inode); > > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c > > index 30619777f0f6..74b52595ea0e 100644 > > --- a/fs/overlayfs/dir.c > > +++ b/fs/overlayfs/dir.c > > @@ -161,14 +161,17 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir, > > goto out; > > } > > > > +/* dir will be unlocked on return */ > > struct dentry *ovl_create_real(struct ovl_fs *ofs, struct dentry *parent, > > struct dentry *newdentry, struct ovl_cattr *attr) > > { > > struct inode *dir = parent->d_inode; > > int err; > > > > - if (IS_ERR(newdentry)) > > + if (IS_ERR(newdentry)) { > > + inode_unlock(dir); > > return newdentry; > > + } > > > > err = -ESTALE; > > if (newdentry->d_inode) > > @@ -213,11 +216,11 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct dentry *parent, > > } > > out: > > if (err) { > > - if (!IS_ERR(newdentry)) > > - dput(newdentry); > > + done_dentry_lookup(newdentry); > > return ERR_PTR(err); > > + } else { > > + return done_dentry_lookup_return(newdentry); > > } > > - return newdentry; > > } > > > > struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, > > @@ -227,7 +230,6 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, > > inode_lock(workdir->d_inode); > > ret = ovl_create_real(ofs, workdir, > > ovl_lookup_temp(ofs, workdir), attr); > > - inode_unlock(workdir->d_inode); > > return ret; > > } > > > > @@ -335,7 +337,6 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, > > ovl_lookup_upper(ofs, dentry->d_name.name, > > upperdir, dentry->d_name.len), > > attr); > > - inode_unlock(udir); > > if (IS_ERR(newdentry)) > > return PTR_ERR(newdentry); > > > > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h > > index 4f84abaa0d68..238c26142318 100644 > > --- a/fs/overlayfs/overlayfs.h > > +++ b/fs/overlayfs/overlayfs.h > > @@ -250,6 +250,7 @@ static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs, > > > > ret = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); > > pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(ret)); > > + /* Note: dir will have been unlocked on failure */ > > return ret; > > } > > > > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c > > index 4afa91882075..df99a6fa17ef 100644 > > --- a/fs/overlayfs/super.c > > +++ b/fs/overlayfs/super.c > > @@ -328,11 +328,11 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > > } > > > > work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode); > > - inode_unlock(dir); > > err = PTR_ERR(work); > > if (IS_ERR(work)) > > goto out_err; > > > > + done_dentry_lookup_return(work); > > /* Weird filesystem returning with hashed negative (kernfs)? */ > > err = -EINVAL; > > if (d_really_is_negative(work)) > > @@ -623,7 +623,8 @@ static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs, > > child = ovl_lookup_upper(ofs, name, parent, len); > > if (!IS_ERR(child) && !child->d_inode) > > child = ovl_create_real(ofs, parent, child, OVL_CATTR(mode)); > > - inode_unlock(parent->d_inode); > > + else > > + inode_unlock(parent->d_inode); > > dput(parent); > > > > return child; > > diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c > > index 9c12cb844231..c95bded4e8a7 100644 > > --- a/fs/xfs/scrub/orphanage.c > > +++ b/fs/xfs/scrub/orphanage.c > > @@ -170,7 +170,7 @@ xrep_orphanage_create( > > orphanage_dentry, 0750); > > error = PTR_ERR(orphanage_dentry); > > if (IS_ERR(orphanage_dentry)) > > - goto out_unlock_root; > > + goto out_dput_root; > > } > > > > /* Not a directory? Bail out. */ > > This does make for some awkward code. Fortunately there aren't that > many vfs_mkdir() callers. > -- > Jeff Layton <jlayton@xxxxxxxxxx> >