On Wed, Apr 16, 2025 at 3:17 PM Christian Brauner <brauner@xxxxxxxxxx> wrote: > > We currently always chase a pointer inode->i_sb->s_user_ns whenever we > need to map a uid/gid which is noticeable during path lookup as noticed > by Linus in [1]. In the majority of cases we don't need to bother with > that pointer chase because the inode won't be located on a filesystem > that's mounted in a user namespace. The user namespace of the superblock > cannot ever change once it's mounted. So introduce and raise IOP_USERNS > on all inodes and check for that flag in i_user_ns() when we retrieve > the user namespace. > > Link: https://lore.kernel.org/CAHk-=whJgRDtxTudTQ9HV8BFw5-bBsu+c8Ouwd_PrPqPB6_KEQ@xxxxxxxxxxxxxx [1] > Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx> > --- > fs/inode.c | 6 ++++++ > fs/mnt_idmapping.c | 14 -------------- > include/linux/fs.h | 5 ++++- > include/linux/mnt_idmapping.h | 14 ++++++++++++++ > 4 files changed, 24 insertions(+), 15 deletions(-) > > diff --git a/fs/inode.c b/fs/inode.c > index 99318b157a9a..7335d05dd7d5 100644 > --- a/fs/inode.c > +++ b/fs/inode.c > @@ -245,6 +245,8 @@ int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp > inode->i_opflags |= IOP_XATTR; > if (sb->s_type->fs_flags & FS_MGTIME) > inode->i_opflags |= IOP_MGTIME; > + if (unlikely(!initial_idmapping(i_user_ns(inode)))) > + inode->i_opflags |= IOP_USERNS; > i_uid_write(inode, 0); > i_gid_write(inode, 0); > atomic_set(&inode->i_writecount, 0); > @@ -1864,6 +1866,10 @@ static void iput_final(struct inode *inode) > > WARN_ON(inode->i_state & I_NEW); > > + /* This is security sensitive so catch missing IOP_USERNS. */ > + VFS_WARN_ON_ONCE(!initial_idmapping(i_user_ns(inode)) && > + !(inode->i_opflags & IOP_USERNS)); > + > if (op->drop_inode) > drop = op->drop_inode(inode); > else > diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c > index a37991fdb194..8f7ae908ea16 100644 > --- a/fs/mnt_idmapping.c > +++ b/fs/mnt_idmapping.c > @@ -42,20 +42,6 @@ struct mnt_idmap invalid_mnt_idmap = { > }; > EXPORT_SYMBOL_GPL(invalid_mnt_idmap); > > -/** > - * initial_idmapping - check whether this is the initial mapping > - * @ns: idmapping to check > - * > - * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1, > - * [...], 1000 to 1000 [...]. > - * > - * Return: true if this is the initial mapping, false if not. > - */ > -static inline bool initial_idmapping(const struct user_namespace *ns) > -{ > - return ns == &init_user_ns; > -} > - > /** > * make_vfsuid - map a filesystem kuid according to an idmapping > * @idmap: the mount's idmapping > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 016b0fe1536e..d28384d5b752 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -663,6 +663,7 @@ is_uncached_acl(struct posix_acl *acl) > #define IOP_DEFAULT_READLINK 0x0010 > #define IOP_MGTIME 0x0020 > #define IOP_CACHED_LINK 0x0040 > +#define IOP_USERNS 0x0080 > > /* > * Keep mostly read-only and often accessed (especially for > @@ -1454,7 +1455,9 @@ struct super_block { > > static inline struct user_namespace *i_user_ns(const struct inode *inode) > { > - return inode->i_sb->s_user_ns; > + if (unlikely(inode->i_opflags & IOP_USERNS)) > + return inode->i_sb->s_user_ns; > + return &init_user_ns; > } > > /* Helper functions so that in most cases filesystems will > diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h > index e71a6070a8f8..85553b3a7904 100644 > --- a/include/linux/mnt_idmapping.h > +++ b/include/linux/mnt_idmapping.h > @@ -25,6 +25,20 @@ static_assert(sizeof(vfsgid_t) == sizeof(kgid_t)); > static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val)); > static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val)); > > +/** > + * initial_idmapping - check whether this is the initial mapping > + * @ns: idmapping to check > + * > + * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1, > + * [...], 1000 to 1000 [...]. > + * > + * Return: true if this is the initial mapping, false if not. > + */ > +static inline bool initial_idmapping(const struct user_namespace *ns) > +{ > + return ns == &init_user_ns; > +} > + > static inline bool is_valid_mnt_idmap(const struct mnt_idmap *idmap) > { > return idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap; > > -- > 2.47.2 > I don't have an opinion. But if going this way, i think you want the assert for correctly applied flag when fetching the ns, not just iput_final. -- Mateusz Guzik <mjguzik gmail.com>