[PATCH v2 2/2] inode: add fastpath for filesystem user namespace retrieval

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



We currently always chase a pointer inode->i_sb->s_user_ns whenever we
need to map a uid/gid which is noticeable during path lookup as noticed
by Linus in [1]. In the majority of cases we don't need to bother with
that pointer chase because the inode won't be located on a filesystem
that's mounted in a user namespace. The user namespace of the superblock
cannot ever change once it's mounted. So introduce and raise IOP_USERNS
on all inodes and check for that flag.

Link: https://lore.kernel.org/CAHk-=whJgRDtxTudTQ9HV8BFw5-bBsu+c8Ouwd_PrPqPB6_KEQ@xxxxxxxxxxxxxx [1]
Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>
---
 fs/inode.c                    |  8 ++++++++
 include/linux/fs.h            | 23 ++++++++++++++++++++---
 include/linux/mnt_idmapping.h |  5 +++++
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 99318b157a9a..8824e462800b 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -245,6 +245,8 @@ int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp
 		inode->i_opflags |= IOP_XATTR;
 	if (sb->s_type->fs_flags & FS_MGTIME)
 		inode->i_opflags |= IOP_MGTIME;
+	if (unlikely(sb->s_user_ns != &init_user_ns))
+		inode->i_opflags |= IOP_USERNS;
 	i_uid_write(inode, 0);
 	i_gid_write(inode, 0);
 	atomic_set(&inode->i_writecount, 0);
@@ -1864,6 +1866,12 @@ static void iput_final(struct inode *inode)
 
 	WARN_ON(inode->i_state & I_NEW);
 
+	/* This is security sensitive so catch missing IOP_USERNS. */
+	VFS_WARN_ON_ONCE(!(inode->i_opflags & IOP_USERNS) &&
+			 (inode->i_sb->s_user_ns != &init_user_ns));
+	VFS_WARN_ON_ONCE((inode->i_opflags & IOP_USERNS) &&
+			 (inode->i_sb->s_user_ns == &init_user_ns));
+
 	if (op->drop_inode)
 		drop = op->drop_inode(inode);
 	else
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 016b0fe1536e..eae1b992aef5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -663,6 +663,7 @@ is_uncached_acl(struct posix_acl *acl)
 #define IOP_DEFAULT_READLINK	0x0010
 #define IOP_MGTIME	0x0020
 #define IOP_CACHED_LINK	0x0040
+#define IOP_USERNS	0x0080
 
 /*
  * Keep mostly read-only and often accessed (especially for
@@ -1454,7 +1455,13 @@ struct super_block {
 
 static inline struct user_namespace *i_user_ns(const struct inode *inode)
 {
-	return inode->i_sb->s_user_ns;
+	VFS_WARN_ON_ONCE(!(inode->i_opflags & IOP_USERNS) &&
+			 (inode->i_sb->s_user_ns != &init_user_ns));
+	VFS_WARN_ON_ONCE((inode->i_opflags & IOP_USERNS) &&
+			 (inode->i_sb->s_user_ns == &init_user_ns));
+	if (unlikely(inode->i_opflags & IOP_USERNS))
+		return inode->i_sb->s_user_ns;
+	return &init_user_ns;
 }
 
 /* Helper functions so that in most cases filesystems will
@@ -1493,6 +1500,8 @@ static inline void i_gid_write(struct inode *inode, gid_t gid)
 static inline vfsuid_t i_uid_into_vfsuid(struct mnt_idmap *idmap,
 					 const struct inode *inode)
 {
+	if (likely(is_nop_mnt_idmap(idmap)))
+		return VFSUIDT_INIT(inode->i_uid);
 	return make_vfsuid(idmap, i_user_ns(inode), inode->i_uid);
 }
 
@@ -1545,6 +1554,8 @@ static inline void i_uid_update(struct mnt_idmap *idmap,
 static inline vfsgid_t i_gid_into_vfsgid(struct mnt_idmap *idmap,
 					 const struct inode *inode)
 {
+	if (likely(is_nop_mnt_idmap(idmap)))
+		return VFSGIDT_INIT(inode->i_gid);
 	return make_vfsgid(idmap, i_user_ns(inode), inode->i_gid);
 }
 
@@ -1597,7 +1608,10 @@ static inline void i_gid_update(struct mnt_idmap *idmap,
 static inline void inode_fsuid_set(struct inode *inode,
 				   struct mnt_idmap *idmap)
 {
-	inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode));
+	if (likely(is_nop_mnt_idmap(idmap)))
+		inode->i_uid = current_fsuid();
+	else
+		inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode));
 }
 
 /**
@@ -1611,7 +1625,10 @@ static inline void inode_fsuid_set(struct inode *inode,
 static inline void inode_fsgid_set(struct inode *inode,
 				   struct mnt_idmap *idmap)
 {
-	inode->i_gid = mapped_fsgid(idmap, i_user_ns(inode));
+	if (likely(is_nop_mnt_idmap(idmap)))
+		inode->i_gid = current_fsgid();
+	else
+		inode->i_gid = mapped_fsgid(idmap, i_user_ns(inode));
 }
 
 /**
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index e71a6070a8f8..22e6e2f08d77 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -25,6 +25,11 @@ static_assert(sizeof(vfsgid_t) == sizeof(kgid_t));
 static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val));
 static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val));
 
+static __always_inline bool is_nop_mnt_idmap(const struct mnt_idmap *idmap)
+{
+	return idmap == &nop_mnt_idmap;
+}
+
 static inline bool is_valid_mnt_idmap(const struct mnt_idmap *idmap)
 {
 	return idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap;

-- 
2.47.2





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux