On Wed, Aug 13, 2025 at 02:55:41PM +0100, Usama Arif wrote: > The test will set the global system THP setting to never, madvise > or always depending on the fixture variant and the 2M setting to > inherit before it starts (and reset to original at teardown). > The fixture setup will also test if PR_SET_THP_DISABLE prctl call can > be made to disable all THPs and skip if it fails. > > This tests if the process can: > - successfully get the policy to disable THPs completely. > - never get a hugepage when the THPs are completely disabled > with the prctl, including with MADV_HUGE and MADV_COLLAPSE. > - successfully reset the policy of the process. > - after reset, only get hugepages with: > - MADV_COLLAPSE when policy is set to never. > - MADV_HUGE and MADV_COLLAPSE when policy is set to madvise. > - always when policy is set to "always". > - repeat the above tests in a forked process to make sure > the policy is carried across forks. > > Signed-off-by: Usama Arif <usamaarif642@xxxxxxxxx> > Acked-by: David Hildenbrand <david@xxxxxxxxxx> Some nits below but this looks sensible, so: Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> > --- > tools/testing/selftests/mm/.gitignore | 1 + > tools/testing/selftests/mm/Makefile | 1 + > .../testing/selftests/mm/prctl_thp_disable.c | 168 ++++++++++++++++++ > tools/testing/selftests/mm/thp_settings.c | 9 +- > tools/testing/selftests/mm/thp_settings.h | 1 + > 5 files changed, 179 insertions(+), 1 deletion(-) > create mode 100644 tools/testing/selftests/mm/prctl_thp_disable.c > > diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore > index e7b23a8a05fe2..eb023ea857b31 100644 > --- a/tools/testing/selftests/mm/.gitignore > +++ b/tools/testing/selftests/mm/.gitignore > @@ -58,3 +58,4 @@ pkey_sighandler_tests_32 > pkey_sighandler_tests_64 > guard-regions > merge > +prctl_thp_disable > diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile > index d75f1effcb791..bd5d17beafa64 100644 > --- a/tools/testing/selftests/mm/Makefile > +++ b/tools/testing/selftests/mm/Makefile > @@ -87,6 +87,7 @@ TEST_GEN_FILES += on-fault-limit > TEST_GEN_FILES += pagemap_ioctl > TEST_GEN_FILES += pfnmap > TEST_GEN_FILES += process_madv > +TEST_GEN_FILES += prctl_thp_disable > TEST_GEN_FILES += thuge-gen > TEST_GEN_FILES += transhuge-stress > TEST_GEN_FILES += uffd-stress > diff --git a/tools/testing/selftests/mm/prctl_thp_disable.c b/tools/testing/selftests/mm/prctl_thp_disable.c > new file mode 100644 > index 0000000000000..8845e9f414560 > --- /dev/null > +++ b/tools/testing/selftests/mm/prctl_thp_disable.c > @@ -0,0 +1,168 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Basic tests for PR_GET/SET_THP_DISABLE prctl calls > + * > + * Author(s): Usama Arif <usamaarif642@xxxxxxxxx> > + */ > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <unistd.h> > +#include <sys/mman.h> > +#include <sys/prctl.h> > +#include <sys/wait.h> > + > +#include "../kselftest_harness.h" > +#include "thp_settings.h" > +#include "vm_util.h" > + > +enum thp_collapse_type { > + THP_COLLAPSE_NONE, > + THP_COLLAPSE_MADV_HUGEPAGE, /* MADV_HUGEPAGE before access */ > + THP_COLLAPSE_MADV_COLLAPSE, /* MADV_COLLAPSE after access */ > +}; > + > +/* > + * Function to mmap a buffer, fault it in, madvise it appropriately (before > + * page fault for MADV_HUGE, and after for MADV_COLLAPSE), and check if the > + * mmap region is huge. > + * Returns: > + * 0 if test doesn't give hugepage > + * 1 if test gives a hugepage > + * -errno if mmap fails > + */ > +static int test_mmap_thp(enum thp_collapse_type madvise_buf, size_t pmdsize) > +{ > + char *mem, *mmap_mem; > + size_t mmap_size; > + int ret; > + > + /* For alignment purposes, we need twice the THP size. */ > + mmap_size = 2 * pmdsize; > + mmap_mem = (char *)mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, > + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); > + if (mmap_mem == MAP_FAILED) > + return -errno; > + > + /* We need a THP-aligned memory area. */ > + mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1)); > + > + if (madvise_buf == THP_COLLAPSE_MADV_HUGEPAGE) > + madvise(mem, pmdsize, MADV_HUGEPAGE); > + > + /* Ensure memory is allocated */ > + memset(mem, 1, pmdsize); > + > + if (madvise_buf == THP_COLLAPSE_MADV_COLLAPSE) > + madvise(mem, pmdsize, MADV_COLLAPSE); > + > + /* HACK: make sure we have a separate VMA that we can check reliably. */ > + mprotect(mem, pmdsize, PROT_READ); I mean you won't be _absolutely_ sure of this, as you might merge with an adjacent read-only VMA. The best way is always to map a PROT_NONE mapping first, then perform a MAP_FIXED mapping into it. Given 2 * PMD should guarantee at least 1 alligned PMD you can use, you could do: char *reserve, *mem, *mmap_mem; ... (set mmap_size) /* Reserve space so we don't get any unexpected merges around us. */ reserve = mmap(NULL, 2 * pagesize + mmap_size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); if (reserve == MAP_FAILED) return -errno; mmap_mem = mmap(&reserved[pagesize], mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); ... You could then do your 'hack' (which is not really a hack, just fine I think). > + > + ret = check_huge_anon(mem, 1, pmdsize); > + munmap(mmap_mem, mmap_size); > + return ret; > +} > + > +static void prctl_thp_disable_completely_test(struct __test_metadata *const _metadata, > + size_t pmdsize, > + enum thp_enabled thp_policy) > +{ > + ASSERT_EQ(prctl(PR_GET_THP_DISABLE, NULL, NULL, NULL, NULL), 1); > + > + /* tests after prctl overrides global policy */ > + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), 0); > + > + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), 0); > + > + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 0); > + > + /* Reset to global policy */ > + ASSERT_EQ(prctl(PR_SET_THP_DISABLE, 0, NULL, NULL, NULL), 0); > + > + /* tests after prctl is cleared, and only global policy is effective */ > + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), > + thp_policy == THP_ALWAYS ? 1 : 0); > + > + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), > + thp_policy == THP_NEVER ? 0 : 1); > + > + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1); > +} > + > +FIXTURE(prctl_thp_disable_completely) > +{ > + struct thp_settings settings; > + size_t pmdsize; > +}; > + > +FIXTURE_VARIANT(prctl_thp_disable_completely) > +{ > + enum thp_enabled thp_policy; > +}; > + > +FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, never) > +{ > + .thp_policy = THP_NEVER, > +}; > + > +FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, madvise) > +{ > + .thp_policy = THP_MADVISE, > +}; > + > +FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, always) > +{ > + .thp_policy = THP_ALWAYS, > +}; > + Nice! > +FIXTURE_SETUP(prctl_thp_disable_completely) > +{ > + if (!thp_available()) > + SKIP(return, "Transparent Hugepages not available\n"); > + > + self->pmdsize = read_pmd_pagesize(); > + if (!self->pmdsize) > + SKIP(return, "Unable to read PMD size\n"); > + > + if (prctl(PR_SET_THP_DISABLE, 1, NULL, NULL, NULL)) > + SKIP(return, "Unable to disable THPs completely for the process\n"); Hm, shouldn't this be a test failure? > + > + thp_save_settings(); > + thp_read_settings(&self->settings); > + self->settings.thp_enabled = variant->thp_policy; Ugh this variable name is horrid, not your fault. I see you've renamed it at least in the variant field. That's not one for this series though, one for a follow up. > + self->settings.hugepages[sz2ord(self->pmdsize, getpagesize())].enabled = THP_INHERIT; > + thp_write_settings(&self->settings); > +} > + > +FIXTURE_TEARDOWN(prctl_thp_disable_completely) > +{ > + thp_restore_settings(); > +} > + > +TEST_F(prctl_thp_disable_completely, nofork) > +{ > + prctl_thp_disable_completely_test(_metadata, self->pmdsize, variant->thp_policy); > +} > + > +TEST_F(prctl_thp_disable_completely, fork) > +{ > + int ret = 0; > + pid_t pid; > + > + /* Make sure prctl changes are carried across fork */ > + pid = fork(); > + ASSERT_GE(pid, 0); > + > + if (!pid) > + prctl_thp_disable_completely_test(_metadata, self->pmdsize, variant->thp_policy); > + > + wait(&ret); > + if (WIFEXITED(ret)) > + ret = WEXITSTATUS(ret); > + else > + ret = -EINVAL; > + ASSERT_EQ(ret, 0); > +} > + > +TEST_HARNESS_MAIN > diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c > index bad60ac52874a..574bd0f8ae480 100644 > --- a/tools/testing/selftests/mm/thp_settings.c > +++ b/tools/testing/selftests/mm/thp_settings.c > @@ -382,10 +382,17 @@ unsigned long thp_shmem_supported_orders(void) > return __thp_supported_orders(true); > } > > -bool thp_is_enabled(void) > +bool thp_available(void) > { > if (access(THP_SYSFS, F_OK) != 0) > return false; > + return true; > +} > + > +bool thp_is_enabled(void) > +{ > + if (!thp_available()) > + return false; > > int mode = thp_read_string("enabled", thp_enabled_strings); > > diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h > index 6c07f70beee97..76eeb712e5f10 100644 > --- a/tools/testing/selftests/mm/thp_settings.h > +++ b/tools/testing/selftests/mm/thp_settings.h > @@ -84,6 +84,7 @@ void thp_set_read_ahead_path(char *path); > unsigned long thp_supported_orders(void); > unsigned long thp_shmem_supported_orders(void); > > +bool thp_available(void); > bool thp_is_enabled(void); > > #endif /* __THP_SETTINGS_H__ */ > -- > 2.47.3 >