On Wed, Sep 10, 2025 at 10:44:38AM +0800, Yafang Shao wrote: > Since a task with MMF_DISABLE_THP_COMPLETELY cannot use THP, remove it from > the khugepaged_mm_slot to stop khugepaged from processing it. > > After this change, the following semantic relationship always holds: > > MMF_VM_HUGEPAGE is set == task is in khugepaged mm_slot > MMF_VM_HUGEPAGE is not set == task is not in khugepaged mm_slot > > Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx> > Cc: Lance Yang <ioworker0@xxxxxxxxx> (Obviously on basis of fixing issue bot reported). > --- > include/linux/khugepaged.h | 1 + > kernel/sys.c | 6 ++++++ > mm/khugepaged.c | 19 +++++++++---------- > 3 files changed, 16 insertions(+), 10 deletions(-) > > diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h > index eb1946a70cff..6cb9107f1006 100644 > --- a/include/linux/khugepaged.h > +++ b/include/linux/khugepaged.h > @@ -19,6 +19,7 @@ extern void khugepaged_min_free_kbytes_update(void); > extern bool current_is_khugepaged(void); > extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > bool install_pmd); > +bool hugepage_pmd_enabled(void); Need to provide a !CONFIG_TRANSPARENT_HUGEPAGE version, or to not invoke this in a context where CONFIG_TRANSPARENT_HUGEPAGE is specified. > > static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) > { > diff --git a/kernel/sys.c b/kernel/sys.c > index a46d9b75880b..a1c1e8007f2d 100644 > --- a/kernel/sys.c > +++ b/kernel/sys.c > @@ -8,6 +8,7 @@ > #include <linux/export.h> > #include <linux/mm.h> > #include <linux/mm_inline.h> > +#include <linux/khugepaged.h> > #include <linux/utsname.h> > #include <linux/mman.h> > #include <linux/reboot.h> > @@ -2493,6 +2494,11 @@ static int prctl_set_thp_disable(bool thp_disable, unsigned long flags, > mm_flags_clear(MMF_DISABLE_THP_COMPLETELY, mm); > mm_flags_clear(MMF_DISABLE_THP_EXCEPT_ADVISED, mm); > } > + > + if (!mm_flags_test(MMF_DISABLE_THP_COMPLETELY, mm) && > + !mm_flags_test(MMF_VM_HUGEPAGE, mm) && > + hugepage_pmd_enabled()) > + __khugepaged_enter(mm); Let's refactor this so it's not open-coded. We can have: void khugepaged_enter_mm(struct mm_struct *mm) { if (mm_flags_test(MMF_DISABLE_THP_COMPLETELY, mm)) return; if (mm_flags_test(MMF_VM_HUGEPAGE, mm)) return; if (!hugepage_pmd_enabled()) return; __khugepaged_enter(mm); } void khugepaged_enter_vma(struct vm_area_struct *vma, vm_flags_t vm_flags) { if (!thp_vma_allowable_order(vma, vm_flags, TVA_KHUGEPAGED, PMD_ORDER)) return; khugepaged_enter_mm(vma->vm_mm); } Then just invoke khugepaged_enter_mm() here. > mmap_write_unlock(current->mm); > return 0; > } > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 4ec324a4c1fe..88ac482fb3a0 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -413,7 +413,7 @@ static inline int hpage_collapse_test_exit_or_disable(struct mm_struct *mm) > mm_flags_test(MMF_DISABLE_THP_COMPLETELY, mm); > } > > -static bool hugepage_pmd_enabled(void) > +bool hugepage_pmd_enabled(void) > { > /* > * We cover the anon, shmem and the file-backed case here; file-backed > @@ -445,6 +445,7 @@ void __khugepaged_enter(struct mm_struct *mm) > > /* __khugepaged_exit() must not run from under us */ > VM_BUG_ON_MM(hpage_collapse_test_exit(mm), mm); > + WARN_ON_ONCE(mm_flags_test(MMF_DISABLE_THP_COMPLETELY, mm)); Not sure why this needs to be a naked WARN_ON_ONCE()? Seems that'd be a programmatic eror, so VM_WARN_ON_ONCE() more appropriate? Can also change the VM_BUG_ON_MM() to VM_WARN_ON_ONCE_MM() while we're here. > if (unlikely(mm_flags_test_and_set(MMF_VM_HUGEPAGE, mm))) > return; > > @@ -472,7 +473,8 @@ void __khugepaged_enter(struct mm_struct *mm) > void khugepaged_enter_vma(struct vm_area_struct *vma, > vm_flags_t vm_flags) > { > - if (!mm_flags_test(MMF_VM_HUGEPAGE, vma->vm_mm) && > + if (!mm_flags_test(MMF_DISABLE_THP_COMPLETELY, vma->vm_mm) && > + !mm_flags_test(MMF_VM_HUGEPAGE, vma->vm_mm) && > hugepage_pmd_enabled()) { > if (thp_vma_allowable_order(vma, vm_flags, TVA_KHUGEPAGED, PMD_ORDER)) > __khugepaged_enter(vma->vm_mm); See above, we can refactor this. > @@ -1451,16 +1453,13 @@ static void collect_mm_slot(struct khugepaged_mm_slot *mm_slot) > > lockdep_assert_held(&khugepaged_mm_lock); > > - if (hpage_collapse_test_exit(mm)) { > + if (hpage_collapse_test_exit_or_disable(mm)) { > /* free mm_slot */ > hash_del(&slot->hash); > list_del(&slot->mm_node); > > - /* > - * Not strictly needed because the mm exited already. > - * > - * mm_flags_clear(MMF_VM_HUGEPAGE, mm); > - */ > + /* If the mm is disabled, this flag must be cleared. */ > + mm_flags_clear(MMF_VM_HUGEPAGE, mm); > > /* khugepaged_mm_lock actually not necessary for the below */ > mm_slot_free(mm_slot_cache, mm_slot); > @@ -2507,9 +2506,9 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result, > VM_BUG_ON(khugepaged_scan.mm_slot != mm_slot); > /* > * Release the current mm_slot if this mm is about to die, or > - * if we scanned all vmas of this mm. > + * if we scanned all vmas of this mm, or if this mm is disabled. > */ > - if (hpage_collapse_test_exit(mm) || !vma) { > + if (hpage_collapse_test_exit_or_disable(mm) || !vma) { > /* > * Make sure that if mm_users is reaching zero while > * khugepaged runs here, khugepaged_exit will find Seems reasonable, but makes me wonder if we actually always want to invoke hpage_collapse_test_exit_or_disable()? I guess the VM_BUG_ON() (though it should be a VM_WARN_ON_ONCE()) in __khugepaged_enter() is a legit use, but the only other case is retract_page_tables(). I wonder if we should change this also? Seems reasonable to. > -- > 2.47.3 >