lookup_one_qstr_excl() is used for lookups prior to directory modifications, whether create, unlink, rename, or whatever. To prepare for allowing modification to happen in parallel, change lookup_one_qstr_excl() to use d_alloc_parallel() and change its name to remove the _excl requirement. If LOOKUP_EXCL or LOOKUP_RENAME_TARGET is passed, the caller must ensure d_lookup_done() is called at an appropriate time, and must not assume that it can test for positive or negative dentries without confirming that the dentry is no longer d_in_lookup() - unless it is filesystem code acting one itself and *knows* that ->lookup() always completes the lookup (currently true for all filesystems other than NFS). Signed-off-by: NeilBrown <neil@xxxxxxxxxx> --- Documentation/filesystems/porting.rst | 11 +++++ fs/namei.c | 58 ++++++++++++++++++--------- 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 385ca21e230e..1feeac9e1483 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1271,3 +1271,14 @@ completed - typically when it completes with failure. d_alloc_parallel() no longer requires a waitqueue_head. It used one from an internal table when needed. + +--- + +** mandatory** + +All lookup_and_lock* functions may return a d_in_lookup() dentry if +passed "O_CREATE|O_EXCL" or "O_RENAME_TARGET". dentry_unlock() calls +the necessary d_lookup_done(). If the caller *knows* which filesystem +is being used, it may know that this is not possible. Otherwise it must +be careful testing if the dentry is positive or negative as the lookup +may not have been performed yet. diff --git a/fs/namei.c b/fs/namei.c index 0703568339d3..e1749fb03cb5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1666,17 +1666,18 @@ static struct dentry *lookup_dcache(const struct qstr *name, } /* - * Parent directory has inode locked exclusive. This is one - * and only case when ->lookup() gets called on non in-lookup - * dentries - as the matter of fact, this only gets called - * when directory is guaranteed to have no in-lookup children - * at all. + * Parent directory has inode locked. + * d_lookup_done() must be called before the dentry is dput() + * if LOOKUP_EXCL or LOOKUP_RENAME_TARGET is set. + * If the dentry is not d_in_lookup(): * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed. * Will return -EEXIST if name is found and LOOKUP_EXCL was passed. + * If it is d_in_lookup() then these conditions will be checked when + * the attempt make to create the act on the name. */ -static struct dentry *lookup_one_qstr_excl(const struct qstr *name, - struct dentry *base, - unsigned int flags) +static struct dentry *lookup_one_qstr(const struct qstr *name, + struct dentry *base, + unsigned int flags) { struct dentry *dentry; struct dentry *old; @@ -1691,18 +1692,27 @@ static struct dentry *lookup_one_qstr_excl(const struct qstr *name, if (unlikely(IS_DEADDIR(dir))) return ERR_PTR(-ENOENT); - dentry = d_alloc(base, name); - if (unlikely(!dentry)) - return ERR_PTR(-ENOMEM); + dentry = d_alloc_parallel(base, name); + if (unlikely(IS_ERR(dentry))) + return dentry; + if (unlikely(!d_in_lookup(dentry))) + /* Raced with another thread which did the lookup */ + goto found; old = dir->i_op->lookup(dir, dentry, flags); if (unlikely(old)) { + d_lookup_done(dentry); dput(dentry); dentry = old; } found: if (IS_ERR(dentry)) return dentry; + if (d_in_lookup(dentry)) + /* We cannot check for errors - the caller will have to + * wait for any create-etc attempt to get relevant errors. + */ + return dentry; if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) { dput(dentry); return ERR_PTR(-ENOENT); @@ -1725,7 +1735,10 @@ static struct dentry *lookup_one_qstr_excl(const struct qstr *name, * The "necessary locks" are currently the inode node lock on @base. * The name @last is expected to already have the hash calculated. * No permission checks are performed. + * * Returns: the dentry, suitably locked, or an ERR_PTR(). + * The dentry may be d_in_lookup() if LOOKUP_EXCL or LOOKUP_RENAME_TARGET + * is given, depending on the filesystem. */ struct dentry *lookup_and_lock_hashed(struct qstr *last, struct dentry *base, @@ -1735,7 +1748,7 @@ struct dentry *lookup_and_lock_hashed(struct qstr *last, inode_lock_nested(base->d_inode, I_MUTEX_PARENT); - dentry = lookup_one_qstr_excl(last, base, lookup_flags); + dentry = lookup_one_qstr(last, base, lookup_flags); if (IS_ERR(dentry)) inode_unlock(base->d_inode); return dentry; @@ -1755,7 +1768,7 @@ struct dentry *lookup_and_lock_noperm_locked(struct qstr *last, if (err < 0) return ERR_PTR(err); - return lookup_one_qstr_excl(last, base, lookup_flags); + return lookup_one_qstr(last, base, lookup_flags); } EXPORT_SYMBOL(lookup_and_lock_noperm_locked); @@ -1770,7 +1783,10 @@ EXPORT_SYMBOL(lookup_and_lock_noperm_locked); * The "necessary locks" are currently the inode node lock on @base. * The name @last is NOT expected to have the hash calculated. * No permission checks are performed. + * * Returns: the dentry, suitably locked, or an ERR_PTR(). + * The dentry may be d_in_lookup() if LOOKUP_EXCL or LOOKUP_RENAME_TARGET + * is given, depending on the filesystem. */ struct dentry *lookup_and_lock_noperm(struct qstr *last, struct dentry *base, @@ -1798,7 +1814,10 @@ EXPORT_SYMBOL(lookup_and_lock_noperm); * The "necessary locks" are currently the inode node lock on @base. * The name @last is NOT expected to already have the hash calculated. * Permission checks are performed to ensure %MAY_EXEC access to @base. + * * Returns: the dentry, suitably locked, or an ERR_PTR(). + * The dentry may be d_in_lookup() if LOOKUP_EXCL or LOOKUP_RENAME_TARGET + * is given, depending on the filesystem. */ struct dentry *lookup_and_lock(struct mnt_idmap *idmap, struct qstr *last, @@ -1826,7 +1845,10 @@ EXPORT_SYMBOL(lookup_and_lock); * If a fatal signal arrives or is already pending the operation is aborted. * The name @last is NOT expected to already have the hash calculated. * Permission checks are performed to ensure %MAY_EXEC access to @base. + * * Returns: the dentry, suitably locked, or an ERR_PTR(). + * The dentry may be d_in_lookup() if LOOKUP_EXCL or LOOKUP_RENAME_TARGET + * is given, depending on the filesystem. */ struct dentry *lookup_and_lock_killable(struct mnt_idmap *idmap, struct qstr *last, @@ -1843,7 +1865,7 @@ struct dentry *lookup_and_lock_killable(struct mnt_idmap *idmap, if (err < 0) return ERR_PTR(err); - dentry = lookup_one_qstr_excl(last, base, lookup_flags); + dentry = lookup_one_qstr(last, base, lookup_flags); if (IS_ERR(dentry)) inode_unlock(base->d_inode); return dentry; @@ -1894,6 +1916,7 @@ EXPORT_SYMBOL(dentry_unlock_dir_locked); void dentry_unlock(struct dentry *dentry) { if (!IS_ERR(dentry)) { + d_lookup_done(dentry); inode_unlock(dentry->d_parent->d_inode); dentry_unlock_dir_locked(dentry); } @@ -3500,8 +3523,7 @@ lookup_and_lock_rename_hashed(struct renamedata *rd, int lookup_flags) p = lock_rename(rd->old_dir, rd->new_dir); dget(rd->old_dir); - d1 = lookup_one_qstr_excl(&rd->old_last, rd->old_dir, - lookup_flags); + d1 = lookup_one_qstr(&rd->old_last, rd->old_dir, lookup_flags); if (IS_ERR(d1)) goto out_unlock_1; } @@ -3513,8 +3535,8 @@ lookup_and_lock_rename_hashed(struct renamedata *rd, int lookup_flags) } d2 = dget(rd->new_dentry); } else { - d2 = lookup_one_qstr_excl(&rd->new_last, rd->new_dir, - lookup_flags | target_flags); + d2 = lookup_one_qstr(&rd->new_last, rd->new_dir, + lookup_flags | target_flags); if (IS_ERR(d2)) goto out_unlock_2; } -- 2.49.0