[CC += people related to memfd_{create,secret}(2) in the kernel] Hi Zhengyi, On Thu, Jun 12, 2025 at 02:17:05PM +0800, Zhengyi Fu wrote: > memfd_secret returns EINVAL when called with FD_CLOEXEC. The > correct flag should be O_CLOEXEC. Thanks for the report! It seems like a bug in the kernel. The documentation was written (relatively) consistent with memfd_create(2), but the implementation was made different. I say the documentation was relatively consistent, because memfd_create(2) uses MFD_CLOEXEC, and memfd_secret(2) documents FD_CLOEXEC, which could be confused, and since they have the same value, it could be considered just a typo. However, O_CLOEXEC is an entirely different flag, which doesn't seem to make sense here. $ grepc -tfld memfd_create . | grep -A4 -e '^[{}.]' -e CLOEXEC; ./mm/memfd.c:SYSCALL_DEFINE2(memfd_create, const char __user *, uname, unsigned int, flags) { struct file *file; int fd, error; char *name; -- fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0); if (fd < 0) { error = fd; goto err_name; } -- } $ grepc -tfld memfd_secret . | grep -A3 -e '^[{}.]' -e CLOEXEC; ./mm/secretmem.c:SYSCALL_DEFINE1(memfd_secret, unsigned int, flags) { struct file *file; int fd, err; -- BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC); if (!secretmem_enable || !can_set_direct_map()) return -ENOSYS; -- if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC)) return -EINVAL; if (atomic_read(&secretmem_users) < 0) return -ENFILE; -- fd = get_unused_fd_flags(flags & O_CLOEXEC); if (fd < 0) return fd; -- } Let's see who added memfd_create(2): alx@devuan:~/src/linux/linux/master$ git blame -- ./mm/memfd.c | grep _CLOEXEC 105ff5339f498 (Jeff Xu 2022-12-15 00:12:03 +0000 306) #define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB | MFD_NOEXEC_SEAL | MFD_EXEC) f5dbcd90dacd3 (Isaac J. Manjarres 2025-01-10 08:58:59 -0800 475) fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0); alx@devuan:~/src/linux/linux/master$ git show f5dbcd90dacd3 | grep -e _CLOEXEC -e ^diff | grep -B1 -v ^d diff --git a/mm/memfd.c b/mm/memfd.c - fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0); + fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0); alx@devuan:~/src/linux/linux/master$ git blame f5dbcd90dacd3^ -- mm/memfd.c | grep _CLOEXEC 105ff5339f498 (Jeff Xu 2022-12-15 00:12:03 +0000 305) #define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB | MFD_NOEXEC_SEAL | MFD_EXEC) 5d752600a8c37 (Mike Kravetz 2018-06-07 17:06:01 -0700 423) fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0); alx@devuan:~/src/linux/linux/master$ git show 5d752600a8c37 | grep -e _CLOEXEC -e ^diff | grep -B1 -v ^d diff --git a/mm/memfd.c b/mm/memfd.c +#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB) + fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0); diff --git a/mm/shmem.c b/mm/shmem.c -#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB) - fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0); alx@devuan:~/src/linux/linux/master$ git blame 5d752600a8c37^ -- mm/shmem.c | grep _CLOEXEC 749df87bd7bee (Mike Kravetz 2017-09-06 16:24:16 -0700 3684) #define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB) 9183df25fe7b1 (David Rheinsberg 2014-08-08 14:25:29 -0700 3729) fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0); alx@devuan:~/src/linux/linux/master$ git show 9183df25fe7b1 | grep -e _CLOEXEC -e ^diff | grep -B1 -v ^d diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h +#define MFD_CLOEXEC 0x0001U -- diff --git a/mm/shmem.c b/mm/shmem.c +#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING) + fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0); alx@devuan:~/src/linux/linux/master$ git show 9183df25fe7b1 | head -n5 commit 9183df25fe7b194563db3fec6dc3202a5855839c Author: David Rheinsberg <david@xxxxxxxxxxxx> Date: Fri Aug 8 14:25:29 2014 -0700 shm: add memfd_create() syscall alx@devuan:~/src/linux/linux/master$ git log -1 9183df25fe7b1 | grep @ Author: David Rheinsberg <david@xxxxxxxxxxxx> Signed-off-by: David Herrmann <dh.herrmann@xxxxxxxxx> Acked-by: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Michael Kerrisk <mtk.manpages@xxxxxxxxx> Cc: Ryan Lortie <desrt@xxxxxxxx> Cc: Lennart Poettering <lennart@xxxxxxxxxxxxxx> Cc: Daniel Mack <zonque@xxxxxxxxx> Cc: Andy Lutomirski <luto@xxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> And memfd_secret(2): alx@devuan:~/src/linux/linux/master$ git blame -- ./mm/secretmem.c | grep _CLOEXEC 1507f51255c9f (Mike Rapoport 2021-07-07 18:08:03 -0700 238) BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC); 1507f51255c9f (Mike Rapoport 2021-07-07 18:08:03 -0700 243) if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC)) 1507f51255c9f (Mike Rapoport 2021-07-07 18:08:03 -0700 248) fd = get_unused_fd_flags(flags & O_CLOEXEC); alx@devuan:~/src/linux/linux/master$ git show 1507f51255c9f | grep -e _CLOEXEC -e ^diff | grep -B1 -v ^d diff --git a/mm/secretmem.c b/mm/secretmem.c + BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC); + if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC)) + fd = get_unused_fd_flags(flags & O_CLOEXEC); alx@devuan:~/src/linux/linux/master$ git show 1507f51255c9f | head -n5 commit 1507f51255c9ff07d75909a84e7c0d7f3c4b2f49 Author: Mike Rapoport <rppt@xxxxxxxxxx> Date: Wed Jul 7 18:08:03 2021 -0700 mm: introduce memfd_secret system call to create "secret" memory areas alx@devuan:~/src/linux/linux/master$ git log -1 1507f51255c9f | grep @ Author: Mike Rapoport <rppt@xxxxxxxxxx> [1] https://lore.kernel.org/linux-mm/213b4567-46ce-f116-9cdf-bbd0c884eb3c@xxxxxxxxxxxxxxx/ [akpm@xxxxxxxxxxxxxxxxxxxx: suppress Kconfig whine] Link: https://lkml.kernel.org/r/20210518072034.31572-5-rppt@xxxxxxxxxx Signed-off-by: Mike Rapoport <rppt@xxxxxxxxxxxxx> Acked-by: Hagen Paul Pfeifer <hagen@xxxxxxxx> Acked-by: James Bottomley <James.Bottomley@xxxxxxxxxxxxxxxxxxxxx> Cc: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx> Cc: Andy Lutomirski <luto@xxxxxxxxxx> Cc: Arnd Bergmann <arnd@xxxxxxxx> Cc: Borislav Petkov <bp@xxxxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: Christopher Lameter <cl@xxxxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: Elena Reshetova <elena.reshetova@xxxxxxxxx> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: James Bottomley <jejb@xxxxxxxxxxxxx> Cc: "Kirill A. Shutemov" <kirill@xxxxxxxxxxxxx> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> Cc: Mark Rutland <mark.rutland@xxxxxxx> Cc: Michael Kerrisk <mtk.manpages@xxxxxxxxx> Cc: Palmer Dabbelt <palmer@xxxxxxxxxxx> Cc: Palmer Dabbelt <palmerdabbelt@xxxxxxxxxx> Cc: Paul Walmsley <paul.walmsley@xxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Rick Edgecombe <rick.p.edgecombe@xxxxxxxxx> Cc: Roman Gushchin <guro@xxxxxx> Cc: Shakeel Butt <shakeelb@xxxxxxxxxx> Cc: Shuah Khan <shuah@xxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Tycho Andersen <tycho@xxxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: kernel test robot <lkp@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> I've added to CC everyone who had something different than Cc, and everyone who had Cc in both. Now about the situation: it seems there is only one user of CLOEXEC with memfd_secret(2) in Debian: systemtap. <https://codesearch.debian.net/search?q=memfd_secret.*CLOEXEC&literal=0> Do we want to fix the bug, or do we want to document it? This is for kernel people to respond. Also, was O_CLOEXEC used on purpose, or was it by accident? I expect that either MFD_CLOEXEC should have been used, by imitating memfd_create(2), or a new MFDS_CLOEXEC could have been invented, but O_CLOEXEC doesn't make much sense, IMO. Have a lovely day! Alex > > Signed-off-by: Zhengyi Fu <i@xxxxxxx> > --- > man/man2/memfd_secret.2 | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/man/man2/memfd_secret.2 b/man/man2/memfd_secret.2 > index 5ba7813c1..c6abd2f5f 100644 > --- a/man/man2/memfd_secret.2 > +++ b/man/man2/memfd_secret.2 > @@ -51,7 +51,7 @@ The following values may be bitwise ORed in > to control the behavior of > .BR memfd_secret (): > .TP > -.B FD_CLOEXEC > +.B O_CLOEXEC > Set the close-on-exec flag on the new file descriptor, > which causes the region to be removed from the process on > .BR execve (2). > -- <https://www.alejandro-colomar.es/>
Attachment:
signature.asc
Description: PGP signature