On Mon, Aug 04, 2025 at 04:40:46PM +0100, Usama Arif wrote: > From: David Hildenbrand <david@xxxxxxxxxx> > > Let's allow for making MADV_COLLAPSE succeed on areas that neither have > VM_HUGEPAGE nor VM_NOHUGEPAGE when we have THP disabled > unless explicitly advised (PR_THP_DISABLE_EXCEPT_ADVISED). > > MADV_COLLAPSE is a clear advice that we want to collapse. > > Note that we still respect the VM_NOHUGEPAGE flag, just like > MADV_COLLAPSE always does. So consequently, MADV_COLLAPSE is now only > refused on VM_NOHUGEPAGE with PR_THP_DISABLE_EXCEPT_ADVISED, > including for shmem. Hm feels like 'including for shmem' is a bit brief here :) But fine probably ok. > > Co-developed-by: Usama Arif <usamaarif642@xxxxxxxxx> > Signed-off-by: Usama Arif <usamaarif642@xxxxxxxxx> > Signed-off-by: David Hildenbrand <david@xxxxxxxxxx> LGTM, so: Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> > --- > include/linux/huge_mm.h | 8 +++++++- > include/uapi/linux/prctl.h | 2 +- > mm/huge_memory.c | 5 +++-- > mm/memory.c | 6 ++++-- > mm/shmem.c | 2 +- > 5 files changed, 16 insertions(+), 7 deletions(-) > > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h > index bd4f9e6327e0..1fd06ecbde72 100644 > --- a/include/linux/huge_mm.h > +++ b/include/linux/huge_mm.h > @@ -329,7 +329,7 @@ struct thpsize { > * through madvise or prctl. > */ > static inline bool vma_thp_disabled(struct vm_area_struct *vma, > - vm_flags_t vm_flags) > + vm_flags_t vm_flags, bool forced_collapse) > { > /* Are THPs disabled for this VMA? */ > if (vm_flags & VM_NOHUGEPAGE) > @@ -343,6 +343,12 @@ static inline bool vma_thp_disabled(struct vm_area_struct *vma, > */ > if (vm_flags & VM_HUGEPAGE) > return false; > + /* > + * Forcing a collapse (e.g., madv_collapse), is a clear advice to > + * use THPs. > + */ > + if (forced_collapse) > + return false; > return test_bit(MMF_DISABLE_THP_EXCEPT_ADVISED, &vma->vm_mm->flags); > } > > diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h > index 9c1d6e49b8a9..cdda963a039a 100644 > --- a/include/uapi/linux/prctl.h > +++ b/include/uapi/linux/prctl.h > @@ -185,7 +185,7 @@ struct prctl_mm_map { > #define PR_SET_THP_DISABLE 41 > /* > * Don't disable THPs when explicitly advised (e.g., MADV_HUGEPAGE / > - * VM_HUGEPAGE). > + * VM_HUGEPAGE, MADV_COLLAPSE). > */ > # define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1) > #define PR_GET_THP_DISABLE 42 > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 85252b468f80..ef5ccb0ec5d5 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -104,7 +104,8 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, > { > const bool smaps = type == TVA_SMAPS; > const bool in_pf = type == TVA_PAGEFAULT; > - const bool enforce_sysfs = type != TVA_FORCED_COLLAPSE; > + const bool forced_collapse = type == TVA_FORCED_COLLAPSE; > + const bool enforce_sysfs = !forced_collapse; I guess as discussed we'll return to this. > unsigned long supported_orders; > > /* Check the intersection of requested and supported orders. */ > @@ -122,7 +123,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, > if (!vma->vm_mm) /* vdso */ > return 0; > > - if (thp_disabled_by_hw() || vma_thp_disabled(vma, vm_flags)) > + if (thp_disabled_by_hw() || vma_thp_disabled(vma, vm_flags, forced_collapse)) > return 0; > > /* khugepaged doesn't collapse DAX vma, but page fault is fine. */ > diff --git a/mm/memory.c b/mm/memory.c > index be761753f240..bd04212d6f79 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -5186,9 +5186,11 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct folio *folio, struct page *pa > * It is too late to allocate a small folio, we already have a large > * folio in the pagecache: especially s390 KVM cannot tolerate any > * PMD mappings, but PTE-mapped THP are fine. So let's simply refuse any > - * PMD mappings if THPs are disabled. > + * PMD mappings if THPs are disabled. As we already have a THP ... > + * behave as if we are forcing a collapse. > */ > - if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags)) > + if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags, > + /* forced_collapse=*/ true)) > return ret; > > if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER)) > diff --git a/mm/shmem.c b/mm/shmem.c > index e6cdfda08aed..30609197a266 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -1816,7 +1816,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, > vm_flags_t vm_flags = vma ? vma->vm_flags : 0; > unsigned int global_orders; > > - if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags))) > + if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force))) > return 0; > > global_orders = shmem_huge_global_enabled(inode, index, write_end, > -- > 2.47.3 >