We currently always chase a pointer inode->i_sb->s_user_ns whenever we need to map a uid/gid which is noticeable during path lookup as noticed by Linus in [1]. In the majority of cases we don't need to bother with that pointer chase because the inode won't be located on a filesystem that's mounted in a user namespace. The user namespace of the superblock cannot ever change once it's mounted. So introduce and raise IOP_USERNS on all inodes and check for that flag in i_user_ns() when we retrieve the user namespace. Link: https://lore.kernel.org/CAHk-=whJgRDtxTudTQ9HV8BFw5-bBsu+c8Ouwd_PrPqPB6_KEQ@xxxxxxxxxxxxxx [1] Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx> --- fs/inode.c | 6 ++++++ fs/mnt_idmapping.c | 14 -------------- include/linux/fs.h | 5 ++++- include/linux/mnt_idmapping.h | 14 ++++++++++++++ 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 99318b157a9a..7335d05dd7d5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -245,6 +245,8 @@ int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp inode->i_opflags |= IOP_XATTR; if (sb->s_type->fs_flags & FS_MGTIME) inode->i_opflags |= IOP_MGTIME; + if (unlikely(!initial_idmapping(i_user_ns(inode)))) + inode->i_opflags |= IOP_USERNS; i_uid_write(inode, 0); i_gid_write(inode, 0); atomic_set(&inode->i_writecount, 0); @@ -1864,6 +1866,10 @@ static void iput_final(struct inode *inode) WARN_ON(inode->i_state & I_NEW); + /* This is security sensitive so catch missing IOP_USERNS. */ + VFS_WARN_ON_ONCE(!initial_idmapping(i_user_ns(inode)) && + !(inode->i_opflags & IOP_USERNS)); + if (op->drop_inode) drop = op->drop_inode(inode); else diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c index a37991fdb194..8f7ae908ea16 100644 --- a/fs/mnt_idmapping.c +++ b/fs/mnt_idmapping.c @@ -42,20 +42,6 @@ struct mnt_idmap invalid_mnt_idmap = { }; EXPORT_SYMBOL_GPL(invalid_mnt_idmap); -/** - * initial_idmapping - check whether this is the initial mapping - * @ns: idmapping to check - * - * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1, - * [...], 1000 to 1000 [...]. - * - * Return: true if this is the initial mapping, false if not. - */ -static inline bool initial_idmapping(const struct user_namespace *ns) -{ - return ns == &init_user_ns; -} - /** * make_vfsuid - map a filesystem kuid according to an idmapping * @idmap: the mount's idmapping diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..d28384d5b752 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -663,6 +663,7 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_DEFAULT_READLINK 0x0010 #define IOP_MGTIME 0x0020 #define IOP_CACHED_LINK 0x0040 +#define IOP_USERNS 0x0080 /* * Keep mostly read-only and often accessed (especially for @@ -1454,7 +1455,9 @@ struct super_block { static inline struct user_namespace *i_user_ns(const struct inode *inode) { - return inode->i_sb->s_user_ns; + if (unlikely(inode->i_opflags & IOP_USERNS)) + return inode->i_sb->s_user_ns; + return &init_user_ns; } /* Helper functions so that in most cases filesystems will diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index e71a6070a8f8..85553b3a7904 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -25,6 +25,20 @@ static_assert(sizeof(vfsgid_t) == sizeof(kgid_t)); static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val)); static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val)); +/** + * initial_idmapping - check whether this is the initial mapping + * @ns: idmapping to check + * + * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1, + * [...], 1000 to 1000 [...]. + * + * Return: true if this is the initial mapping, false if not. + */ +static inline bool initial_idmapping(const struct user_namespace *ns) +{ + return ns == &init_user_ns; +} + static inline bool is_valid_mnt_idmap(const struct mnt_idmap *idmap) { return idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap; -- 2.47.2