We currently always chase a pointer inode->i_sb->s_user_ns whenever we need to map a uid/gid which is noticeable during path lookup as noticed by Linus in [1]. In the majority of cases we don't need to bother with that pointer chase because the inode won't be located on a filesystem that's mounted in a user namespace. The user namespace of the superblock cannot ever change once it's mounted. So introduce and raise IOP_USERNS on all inodes and check for that flag. Link: https://lore.kernel.org/CAHk-=whJgRDtxTudTQ9HV8BFw5-bBsu+c8Ouwd_PrPqPB6_KEQ@xxxxxxxxxxxxxx [1] Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx> --- fs/inode.c | 8 ++++++++ include/linux/fs.h | 23 ++++++++++++++++++++--- include/linux/mnt_idmapping.h | 5 +++++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 99318b157a9a..8824e462800b 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -245,6 +245,8 @@ int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp inode->i_opflags |= IOP_XATTR; if (sb->s_type->fs_flags & FS_MGTIME) inode->i_opflags |= IOP_MGTIME; + if (unlikely(sb->s_user_ns != &init_user_ns)) + inode->i_opflags |= IOP_USERNS; i_uid_write(inode, 0); i_gid_write(inode, 0); atomic_set(&inode->i_writecount, 0); @@ -1864,6 +1866,12 @@ static void iput_final(struct inode *inode) WARN_ON(inode->i_state & I_NEW); + /* This is security sensitive so catch missing IOP_USERNS. */ + VFS_WARN_ON_ONCE(!(inode->i_opflags & IOP_USERNS) && + (inode->i_sb->s_user_ns != &init_user_ns)); + VFS_WARN_ON_ONCE((inode->i_opflags & IOP_USERNS) && + (inode->i_sb->s_user_ns == &init_user_ns)); + if (op->drop_inode) drop = op->drop_inode(inode); else diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..eae1b992aef5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -663,6 +663,7 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_DEFAULT_READLINK 0x0010 #define IOP_MGTIME 0x0020 #define IOP_CACHED_LINK 0x0040 +#define IOP_USERNS 0x0080 /* * Keep mostly read-only and often accessed (especially for @@ -1454,7 +1455,13 @@ struct super_block { static inline struct user_namespace *i_user_ns(const struct inode *inode) { - return inode->i_sb->s_user_ns; + VFS_WARN_ON_ONCE(!(inode->i_opflags & IOP_USERNS) && + (inode->i_sb->s_user_ns != &init_user_ns)); + VFS_WARN_ON_ONCE((inode->i_opflags & IOP_USERNS) && + (inode->i_sb->s_user_ns == &init_user_ns)); + if (unlikely(inode->i_opflags & IOP_USERNS)) + return inode->i_sb->s_user_ns; + return &init_user_ns; } /* Helper functions so that in most cases filesystems will @@ -1493,6 +1500,8 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) static inline vfsuid_t i_uid_into_vfsuid(struct mnt_idmap *idmap, const struct inode *inode) { + if (likely(is_nop_mnt_idmap(idmap))) + return VFSUIDT_INIT(inode->i_uid); return make_vfsuid(idmap, i_user_ns(inode), inode->i_uid); } @@ -1545,6 +1554,8 @@ static inline void i_uid_update(struct mnt_idmap *idmap, static inline vfsgid_t i_gid_into_vfsgid(struct mnt_idmap *idmap, const struct inode *inode) { + if (likely(is_nop_mnt_idmap(idmap))) + return VFSGIDT_INIT(inode->i_gid); return make_vfsgid(idmap, i_user_ns(inode), inode->i_gid); } @@ -1597,7 +1608,10 @@ static inline void i_gid_update(struct mnt_idmap *idmap, static inline void inode_fsuid_set(struct inode *inode, struct mnt_idmap *idmap) { - inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode)); + if (likely(is_nop_mnt_idmap(idmap))) + inode->i_uid = current_fsuid(); + else + inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode)); } /** @@ -1611,7 +1625,10 @@ static inline void inode_fsuid_set(struct inode *inode, static inline void inode_fsgid_set(struct inode *inode, struct mnt_idmap *idmap) { - inode->i_gid = mapped_fsgid(idmap, i_user_ns(inode)); + if (likely(is_nop_mnt_idmap(idmap))) + inode->i_gid = current_fsgid(); + else + inode->i_gid = mapped_fsgid(idmap, i_user_ns(inode)); } /** diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index e71a6070a8f8..22e6e2f08d77 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -25,6 +25,11 @@ static_assert(sizeof(vfsgid_t) == sizeof(kgid_t)); static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val)); static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val)); +static __always_inline bool is_nop_mnt_idmap(const struct mnt_idmap *idmap) +{ + return idmap == &nop_mnt_idmap; +} + static inline bool is_valid_mnt_idmap(const struct mnt_idmap *idmap) { return idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap; -- 2.47.2