The VFS no longer needs the directory to be locked when performing updates in the directory (create/remove/rename). We only lock directories during these ops because the filesystem might expect that. Some filesystems may not need it. Allow the filesystem to opt out by setting no_dir_lock in inode_operations. Signed-off-by: NeilBrown <neil@xxxxxxxxxx> --- fs/namei.c | 75 ++++++++++++++++++++++++++++++++-------------- include/linux/fs.h | 1 + 2 files changed, 54 insertions(+), 22 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 5c9279657b32..55ea67b4f891 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2001,7 +2001,8 @@ struct dentry *lookup_and_lock_hashed(struct qstr *last, { struct dentry *dentry; - inode_lock_nested(base->d_inode, I_MUTEX_PARENT); + if (!d_inode(base)->i_op->no_dir_lock) + inode_lock_nested(base->d_inode, I_MUTEX_PARENT); retry: dentry = lookup_one_qstr(last, base, lookup_flags); @@ -2011,7 +2012,8 @@ struct dentry *lookup_and_lock_hashed(struct qstr *last, goto retry; } - if (IS_ERR(dentry)) + if (IS_ERR(dentry) && + !d_inode(base)->i_op->no_dir_lock) inode_unlock(base->d_inode); return dentry; } @@ -2066,11 +2068,13 @@ struct dentry *lookup_and_lock_noperm(struct qstr *last, { struct dentry *dentry; - inode_lock_nested(base->d_inode, I_MUTEX_PARENT); + if (!d_inode(base)->i_op->no_dir_lock) + inode_lock_nested(base->d_inode, I_MUTEX_PARENT); dentry = lookup_and_lock_noperm_locked(last, base, lookup_flags, DLOCK_NORMAL); - if (IS_ERR(dentry)) + if (IS_ERR(dentry) && + !d_inode(base)->i_op->no_dir_lock) inode_unlock(base->d_inode); return dentry; } @@ -2097,9 +2101,11 @@ struct dentry *lookup_and_lock_noperm_nested(struct qstr *last, { struct dentry *dentry; - inode_lock_nested(base->d_inode, I_MUTEX_PARENT); + if (!d_inode(base)->i_op->no_dir_lock) + inode_lock_nested(base->d_inode, I_MUTEX_PARENT); dentry = lookup_and_lock_noperm_locked(last, base, lookup_flags, class); - if (IS_ERR(dentry)) + if (IS_ERR(dentry) && + !d_inode(base)->i_op->no_dir_lock) inode_unlock(base->d_inode); return dentry; } @@ -2160,9 +2166,12 @@ struct dentry *lookup_and_lock_killable(struct mnt_idmap *idmap, struct dentry *dentry; int err; - err = down_write_killable_nested(&base->d_inode->i_rwsem, I_MUTEX_PARENT); - if (err) - return ERR_PTR(err); + if (!d_inode(base)->i_op->no_dir_lock) { + err = down_write_killable_nested(&base->d_inode->i_rwsem, + I_MUTEX_PARENT); + if (err) + return ERR_PTR(err); + } err = lookup_one_common(idmap, last, base); if (err < 0) return ERR_PTR(err); @@ -2176,7 +2185,8 @@ struct dentry *lookup_and_lock_killable(struct mnt_idmap *idmap, return ERR_PTR(-ERESTARTSYS); goto retry; } - if (IS_ERR(dentry)) + if (IS_ERR(dentry) && + !d_inode(base)->i_op->no_dir_lock) inode_unlock(base->d_inode); return dentry; } @@ -2205,7 +2215,8 @@ bool lock_and_check_dentry(struct dentry *child, struct dentry *parent) } /* get the child to balance with dentry_unlock() which puts it. */ dget(child); - inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); + if (!d_inode(parent)->i_op->no_dir_lock) + inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); return true; } EXPORT_SYMBOL(lock_and_check_dentry); @@ -2230,7 +2241,8 @@ void dentry_unlock(struct dentry *dentry) { if (!IS_ERR(dentry)) { d_lookup_done(dentry); - inode_unlock(dentry->d_parent->d_inode); + if (!dentry->d_parent->d_inode->i_op->no_dir_lock) + inode_unlock(dentry->d_parent->d_inode); dentry_unlock_dir_locked(dentry); } } @@ -2342,9 +2354,11 @@ static struct dentry *lookup_slow(const struct qstr *name, { struct inode *inode = dir->d_inode; struct dentry *res; - inode_lock_shared(inode); + if (!inode->i_op->no_dir_lock) + inode_lock_shared(inode); res = __lookup_slow(name, dir, flags); - inode_unlock_shared(inode); + if (!inode->i_op->no_dir_lock) + inode_unlock_shared(inode); return res; } @@ -3721,6 +3735,9 @@ static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2) */ static struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) { + if (d_inode(p1)->i_op->no_dir_lock) + return NULL; + if (p1 == p2) { inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); return NULL; @@ -3735,6 +3752,9 @@ static struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) */ static struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2) { + if (d_inode(c1)->i_op->no_dir_lock) + return NULL; + if (READ_ONCE(c1->d_parent) == p2) { /* * hopefully won't need to touch ->s_vfs_rename_mutex at all. @@ -3773,6 +3793,8 @@ static struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2) static void unlock_rename(struct dentry *p1, struct dentry *p2) { + if (d_inode(p1)->i_op->no_dir_lock) + return; inode_unlock(p1->d_inode); if (p1 != p2) { inode_unlock(p2->d_inode); @@ -3880,6 +3902,10 @@ static struct dentry *lock_ancestors(struct dentry *d1, struct dentry *d2) { struct dentry *locked, *ancestor; + if (!d_inode(d1)->i_op->no_dir_lock) + /* s_vfs_rename_mutex is being used, so skip this locking */ + return NULL; + if (d1->d_parent == d2->d_parent) /* Nothing to lock */ return NULL; @@ -4194,6 +4220,7 @@ void dentry_unlock_rename(struct renamedata *rd) renaming_unlock(rd->old_dir, rd->new_dir, rd->ancestor, rd->old_dentry, rd->new_dentry); + if (!d_inode(rd->old_dir)->i_op->no_dir_lock) unlock_rename(rd->old_dir, rd->new_dir); dput(rd->old_dir); @@ -4697,19 +4724,23 @@ static const char *open_last_lookups(struct nameidata *nd, * dropping this one anyway. */ } - if (open_flag & O_CREAT) - inode_lock(dir->d_inode); - else - inode_lock_shared(dir->d_inode); + if (!d_inode(dir)->i_op->no_dir_lock) { + if (open_flag & O_CREAT) + inode_lock(dir->d_inode); + else + inode_lock_shared(dir->d_inode); + } dentry = lookup_open(nd, file, op, got_write); if (!IS_ERR(dentry)) { if (file->f_mode & FMODE_OPENED) fsnotify_open(file); } - if (open_flag & O_CREAT) - inode_unlock(dir->d_inode); - else - inode_unlock_shared(dir->d_inode); + if (!d_inode(dir)->i_op->no_dir_lock) { + if (open_flag & O_CREAT) + inode_unlock(dir->d_inode); + else + inode_unlock_shared(dir->d_inode); + } if (got_write) mnt_drop_write(nd->path.mnt); diff --git a/include/linux/fs.h b/include/linux/fs.h index 6b4a1a1f4786..b213993c486a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2225,6 +2225,7 @@ int wrap_directory_iterator(struct file *, struct dir_context *, { return wrap_directory_iterator(file, ctx, x); } struct inode_operations { + bool no_dir_lock:1; struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct mnt_idmap *, struct inode *, int); -- 2.49.0