On Wed, Aug 13, 2025 at 9:57 PM Usama Arif <usamaarif642@xxxxxxxxx> wrote: > > From: David Hildenbrand <david@xxxxxxxxxx> > > When determining which THP orders are eligible for a VMA mapping, > we have previously specified tva_flags, however it turns out it is > really not necessary to treat these as flags. > > Rather, we distinguish between distinct modes. > > The only case where we previously combined flags was with > TVA_ENFORCE_SYSFS, but we can avoid this by observing that this > is the default, except for MADV_COLLAPSE or an edge cases in > collapse_pte_mapped_thp() and hugepage_vma_revalidate(), and > adding a mode specifically for this case - TVA_FORCED_COLLAPSE. > > We have: > * smaps handling for showing "THPeligible" > * Pagefault handling > * khugepaged handling > * Forced collapse handling: primarily MADV_COLLAPSE, but also for > an edge case in collapse_pte_mapped_thp() > > Disregarding the edge cases, we only want to ignore sysfs settings only > when we are forcing a collapse through MADV_COLLAPSE, otherwise we > want to enforce it, hence this patch does the following flag to enum > conversions: > > * TVA_SMAPS | TVA_ENFORCE_SYSFS -> TVA_SMAPS > * TVA_IN_PF | TVA_ENFORCE_SYSFS -> TVA_PAGEFAULT > * TVA_ENFORCE_SYSFS -> TVA_KHUGEPAGED > * 0 -> TVA_FORCED_COLLAPSE > > With this change, we immediately know if we are in the forced collapse > case, which will be valuable next. > > Signed-off-by: David Hildenbrand <david@xxxxxxxxxx> > Acked-by: Usama Arif <usamaarif642@xxxxxxxxx> > Signed-off-by: Usama Arif <usamaarif642@xxxxxxxxx> > Reviewed-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx> > Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> Acked-by: Yafang Shao <laoar.shao@xxxxxxxxx> Hello Usama, This change is also required by my BPF-based THP order selection series [0]. Since this patch appears to be independent of the series, could we merge it first into mm-new or mm-everything if the series itself won't be merged shortly? Link: https://lwn.net/Articles/1031829/ [0] > --- > fs/proc/task_mmu.c | 4 ++-- > include/linux/huge_mm.h | 30 ++++++++++++++++++------------ > mm/huge_memory.c | 8 ++++---- > mm/khugepaged.c | 17 ++++++++--------- > mm/memory.c | 14 ++++++-------- > 5 files changed, 38 insertions(+), 35 deletions(-) > > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c > index e8e7bef345313..ced01cf3c5ab3 100644 > --- a/fs/proc/task_mmu.c > +++ b/fs/proc/task_mmu.c > @@ -1369,8 +1369,8 @@ static int show_smap(struct seq_file *m, void *v) > __show_smap(m, &mss, false); > > seq_printf(m, "THPeligible: %8u\n", > - !!thp_vma_allowable_orders(vma, vma->vm_flags, > - TVA_SMAPS | TVA_ENFORCE_SYSFS, THP_ORDERS_ALL)); > + !!thp_vma_allowable_orders(vma, vma->vm_flags, TVA_SMAPS, > + THP_ORDERS_ALL)); > > if (arch_pkeys_enabled()) > seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma)); > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h > index 22b8b067b295e..92ea0b9771fae 100644 > --- a/include/linux/huge_mm.h > +++ b/include/linux/huge_mm.h > @@ -94,12 +94,15 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; > #define THP_ORDERS_ALL \ > (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_SPECIAL | THP_ORDERS_ALL_FILE_DEFAULT) > > -#define TVA_SMAPS (1 << 0) /* Will be used for procfs */ > -#define TVA_IN_PF (1 << 1) /* Page fault handler */ > -#define TVA_ENFORCE_SYSFS (1 << 2) /* Obey sysfs configuration */ > +enum tva_type { > + TVA_SMAPS, /* Exposing "THPeligible:" in smaps. */ > + TVA_PAGEFAULT, /* Serving a page fault. */ > + TVA_KHUGEPAGED, /* Khugepaged collapse. */ > + TVA_FORCED_COLLAPSE, /* Forced collapse (e.g. MADV_COLLAPSE). */ > +}; > > -#define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \ > - (!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order))) > +#define thp_vma_allowable_order(vma, vm_flags, type, order) \ > + (!!thp_vma_allowable_orders(vma, vm_flags, type, BIT(order))) > > #define split_folio(f) split_folio_to_list(f, NULL) > > @@ -264,14 +267,14 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, > > unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, > vm_flags_t vm_flags, > - unsigned long tva_flags, > + enum tva_type type, > unsigned long orders); > > /** > * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma > * @vma: the vm area to check > * @vm_flags: use these vm_flags instead of vma->vm_flags > - * @tva_flags: Which TVA flags to honour > + * @type: TVA type > * @orders: bitfield of all orders to consider > * > * Calculates the intersection of the requested hugepage orders and the allowed > @@ -285,11 +288,14 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, > static inline > unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, > vm_flags_t vm_flags, > - unsigned long tva_flags, > + enum tva_type type, > unsigned long orders) > { > - /* Optimization to check if required orders are enabled early. */ > - if ((tva_flags & TVA_ENFORCE_SYSFS) && vma_is_anonymous(vma)) { > + /* > + * Optimization to check if required orders are enabled early. Only > + * forced collapse ignores sysfs configs. > + */ > + if (type != TVA_FORCED_COLLAPSE && vma_is_anonymous(vma)) { > unsigned long mask = READ_ONCE(huge_anon_orders_always); > > if (vm_flags & VM_HUGEPAGE) > @@ -303,7 +309,7 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, > return 0; > } > > - return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders); > + return __thp_vma_allowable_orders(vma, vm_flags, type, orders); > } > > struct thpsize { > @@ -547,7 +553,7 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, > > static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, > vm_flags_t vm_flags, > - unsigned long tva_flags, > + enum tva_type type, > unsigned long orders) > { > return 0; > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 6df1ed0cef5cf..9c716be949cbf 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -99,12 +99,12 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma) > > unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, > vm_flags_t vm_flags, > - unsigned long tva_flags, > + enum tva_type type, > unsigned long orders) > { > - bool smaps = tva_flags & TVA_SMAPS; > - bool in_pf = tva_flags & TVA_IN_PF; > - bool enforce_sysfs = tva_flags & TVA_ENFORCE_SYSFS; > + const bool smaps = type == TVA_SMAPS; > + const bool in_pf = type == TVA_PAGEFAULT; > + const bool enforce_sysfs = type != TVA_FORCED_COLLAPSE; > unsigned long supported_orders; > > /* Check the intersection of requested and supported orders. */ > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 1a416b8659972..d3d4f116e14b6 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -474,8 +474,7 @@ void khugepaged_enter_vma(struct vm_area_struct *vma, > { > if (!mm_flags_test(MMF_VM_HUGEPAGE, vma->vm_mm) && > hugepage_pmd_enabled()) { > - if (thp_vma_allowable_order(vma, vm_flags, TVA_ENFORCE_SYSFS, > - PMD_ORDER)) > + if (thp_vma_allowable_order(vma, vm_flags, TVA_KHUGEPAGED, PMD_ORDER)) > __khugepaged_enter(vma->vm_mm); > } > } > @@ -921,7 +920,8 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address, > struct collapse_control *cc) > { > struct vm_area_struct *vma; > - unsigned long tva_flags = cc->is_khugepaged ? TVA_ENFORCE_SYSFS : 0; > + enum tva_type type = cc->is_khugepaged ? TVA_KHUGEPAGED : > + TVA_FORCED_COLLAPSE; > > if (unlikely(hpage_collapse_test_exit_or_disable(mm))) > return SCAN_ANY_PROCESS; > @@ -932,7 +932,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address, > > if (!thp_vma_suitable_order(vma, address, PMD_ORDER)) > return SCAN_ADDRESS_RANGE; > - if (!thp_vma_allowable_order(vma, vma->vm_flags, tva_flags, PMD_ORDER)) > + if (!thp_vma_allowable_order(vma, vma->vm_flags, type, PMD_ORDER)) > return SCAN_VMA_CHECK; > /* > * Anon VMA expected, the address may be unmapped then > @@ -1533,9 +1533,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > * in the page cache with a single hugepage. If a mm were to fault-in > * this memory (mapped by a suitably aligned VMA), we'd get the hugepage > * and map it by a PMD, regardless of sysfs THP settings. As such, let's > - * analogously elide sysfs THP settings here. > + * analogously elide sysfs THP settings here and force collapse. > */ > - if (!thp_vma_allowable_order(vma, vma->vm_flags, 0, PMD_ORDER)) > + if (!thp_vma_allowable_order(vma, vma->vm_flags, TVA_FORCED_COLLAPSE, PMD_ORDER)) > return SCAN_VMA_CHECK; > > /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */ > @@ -2432,8 +2432,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result, > progress++; > break; > } > - if (!thp_vma_allowable_order(vma, vma->vm_flags, > - TVA_ENFORCE_SYSFS, PMD_ORDER)) { > + if (!thp_vma_allowable_order(vma, vma->vm_flags, TVA_KHUGEPAGED, PMD_ORDER)) { > skip: > progress++; > continue; > @@ -2767,7 +2766,7 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start, > BUG_ON(vma->vm_start > start); > BUG_ON(vma->vm_end < end); > > - if (!thp_vma_allowable_order(vma, vma->vm_flags, 0, PMD_ORDER)) > + if (!thp_vma_allowable_order(vma, vma->vm_flags, TVA_FORCED_COLLAPSE, PMD_ORDER)) > return -EINVAL; > > cc = kmalloc(sizeof(*cc), GFP_KERNEL); > diff --git a/mm/memory.c b/mm/memory.c > index 002c28795d8b7..7b1e8f137fa3f 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -4515,8 +4515,8 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf) > * Get a list of all the (large) orders below PMD_ORDER that are enabled > * and suitable for swapping THP. > */ > - orders = thp_vma_allowable_orders(vma, vma->vm_flags, > - TVA_IN_PF | TVA_ENFORCE_SYSFS, BIT(PMD_ORDER) - 1); > + orders = thp_vma_allowable_orders(vma, vma->vm_flags, TVA_PAGEFAULT, > + BIT(PMD_ORDER) - 1); > orders = thp_vma_suitable_orders(vma, vmf->address, orders); > orders = thp_swap_suitable_orders(swp_offset(entry), > vmf->address, orders); > @@ -5063,8 +5063,8 @@ static struct folio *alloc_anon_folio(struct vm_fault *vmf) > * for this vma. Then filter out the orders that can't be allocated over > * the faulting address and still be fully contained in the vma. > */ > - orders = thp_vma_allowable_orders(vma, vma->vm_flags, > - TVA_IN_PF | TVA_ENFORCE_SYSFS, BIT(PMD_ORDER) - 1); > + orders = thp_vma_allowable_orders(vma, vma->vm_flags, TVA_PAGEFAULT, > + BIT(PMD_ORDER) - 1); > orders = thp_vma_suitable_orders(vma, vmf->address, orders); > > if (!orders) > @@ -6254,8 +6254,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, > return VM_FAULT_OOM; > retry_pud: > if (pud_none(*vmf.pud) && > - thp_vma_allowable_order(vma, vm_flags, > - TVA_IN_PF | TVA_ENFORCE_SYSFS, PUD_ORDER)) { > + thp_vma_allowable_order(vma, vm_flags, TVA_PAGEFAULT, PUD_ORDER)) { > ret = create_huge_pud(&vmf); > if (!(ret & VM_FAULT_FALLBACK)) > return ret; > @@ -6289,8 +6288,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, > goto retry_pud; > > if (pmd_none(*vmf.pmd) && > - thp_vma_allowable_order(vma, vm_flags, > - TVA_IN_PF | TVA_ENFORCE_SYSFS, PMD_ORDER)) { > + thp_vma_allowable_order(vma, vm_flags, TVA_PAGEFAULT, PMD_ORDER)) { > ret = create_huge_pmd(&vmf); > if (!(ret & VM_FAULT_FALLBACK)) > return ret; > -- > 2.47.3 > -- Regards Yafang