On Wed, Apr 23, 2025 at 12:44 AM Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx> wrote: > > > > On 2025/4/17 08:02, Nico Pache wrote: > > The khugepaged daemon and madvise_collapse have two different > > implementations that do almost the same thing. > > > > Create khugepaged_collapse_single_pmd to increase code > > reuse and create an entry point for future khugepaged changes. > > > > Refactor madvise_collapse and khugepaged_scan_mm_slot to use > > the new khugepaged_collapse_single_pmd function. > > > > Signed-off-by: Nico Pache <npache@xxxxxxxxxx> > > Can you add a prefix 'khugepaged:' for the subject line? I had that originally but the subject line is already extremely long. > > > --- > > mm/khugepaged.c | 92 ++++++++++++++++++++++++------------------------- > > 1 file changed, 46 insertions(+), 46 deletions(-) > > > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > > index b8838ba8207a..cecadc4239e7 100644 > > --- a/mm/khugepaged.c > > +++ b/mm/khugepaged.c > > @@ -2363,6 +2363,48 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, > > } > > #endif > > > > +/* > > + * Try to collapse a single PMD starting at a PMD aligned addr, and return > > + * the results. > > + */ > > +static int khugepaged_collapse_single_pmd(unsigned long addr, > > + struct vm_area_struct *vma, bool *mmap_locked, > > + struct collapse_control *cc) > > +{ > > + int result = SCAN_FAIL; > > + struct mm_struct *mm = vma->vm_mm; > > + unsigned long tva_flags = cc->is_khugepaged ? TVA_ENFORCE_SYSFS : 0; > > + > > + if (thp_vma_allowable_order(vma, vma->vm_flags, > > + tva_flags, PMD_ORDER)) { > > We've already checked the thp_vma_allowable_order() before calling this > function, why check again? > > > + if (IS_ENABLED(CONFIG_SHMEM) && !vma_is_anonymous(vma)) { > > + struct file *file = get_file(vma->vm_file); > > + pgoff_t pgoff = linear_page_index(vma, addr); > > + > > + mmap_read_unlock(mm); > > + *mmap_locked = false; > > + result = hpage_collapse_scan_file(mm, addr, file, pgoff, > > + cc); > > + fput(file); > > + if (result == SCAN_PTE_MAPPED_HUGEPAGE) { > > + mmap_read_lock(mm); > > + if (hpage_collapse_test_exit_or_disable(mm)) > > + goto end; > > + result = collapse_pte_mapped_thp(mm, addr, > > + !cc->is_khugepaged); > > why drop the following check? > if (*result == SCAN_PMD_MAPPED) > *result = SCAN_SUCCEED; Good catch! When generalizing this for madvise_collapse i forgot to properly handle the khugepaged case of PMD_MAPPED==SUCCEED. > > > + mmap_read_unlock(mm); > > + } > > + } else { > > + result = hpage_collapse_scan_pmd(mm, vma, addr, > > + mmap_locked, cc); > > + } > > + if (cc->is_khugepaged && result == SCAN_SUCCEED) > > + ++khugepaged_pages_collapsed; > > + } > > +end: > > + return result; > > +} > > + > > static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result, > > struct collapse_control *cc) > > __releases(&khugepaged_mm_lock) > > @@ -2437,33 +2479,9 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result, > > VM_BUG_ON(khugepaged_scan.address < hstart || > > khugepaged_scan.address + HPAGE_PMD_SIZE > > > hend); > > - if (IS_ENABLED(CONFIG_SHMEM) && !vma_is_anonymous(vma)) { > > - struct file *file = get_file(vma->vm_file); > > - pgoff_t pgoff = linear_page_index(vma, > > - khugepaged_scan.address); > > > > - mmap_read_unlock(mm); > > - mmap_locked = false; > > - *result = hpage_collapse_scan_file(mm, > > - khugepaged_scan.address, file, pgoff, cc); > > - fput(file); > > - if (*result == SCAN_PTE_MAPPED_HUGEPAGE) { > > - mmap_read_lock(mm); > > - if (hpage_collapse_test_exit_or_disable(mm)) > > - goto breakouterloop; > > - *result = collapse_pte_mapped_thp(mm, > > - khugepaged_scan.address, false); > > - if (*result == SCAN_PMD_MAPPED) > > - *result = SCAN_SUCCEED; > > - mmap_read_unlock(mm); > > - } > > - } else { > > - *result = hpage_collapse_scan_pmd(mm, vma, > > - khugepaged_scan.address, &mmap_locked, cc); > > - } > > - > > - if (*result == SCAN_SUCCEED) > > - ++khugepaged_pages_collapsed; > > + *result = khugepaged_collapse_single_pmd(khugepaged_scan.address, > > + vma, &mmap_locked, cc); > > If the khugepaged_collapse_single_pmd() returns a failure caused by > hpage_collapse_test_exit_or_disable(), we should break out of the loop > according to the original logic. But you've changed the action in this > patch, is this intentional? Nope, not intentional! Thanks for pointing that out. I'll get that fixed! Thanks for the in depth review ! I'll work on cleaning up these corner cases. > > > > > /* move to next address */ > > khugepaged_scan.address += HPAGE_PMD_SIZE; > > @@ -2783,36 +2801,18 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, > > mmap_assert_locked(mm); > > memset(cc->node_load, 0, sizeof(cc->node_load)); > > nodes_clear(cc->alloc_nmask); > > - if (IS_ENABLED(CONFIG_SHMEM) && !vma_is_anonymous(vma)) { > > - struct file *file = get_file(vma->vm_file); > > - pgoff_t pgoff = linear_page_index(vma, addr); > > > > - mmap_read_unlock(mm); > > - mmap_locked = false; > > - result = hpage_collapse_scan_file(mm, addr, file, pgoff, > > - cc); > > - fput(file); > > - } else { > > - result = hpage_collapse_scan_pmd(mm, vma, addr, > > - &mmap_locked, cc); > > - } > > + result = khugepaged_collapse_single_pmd(addr, vma, &mmap_locked, cc); > > + > > if (!mmap_locked) > > *prev = NULL; /* Tell caller we dropped mmap_lock */ > > > > -handle_result: > > switch (result) { > > case SCAN_SUCCEED: > > case SCAN_PMD_MAPPED: > > ++thps; > > break; > > case SCAN_PTE_MAPPED_HUGEPAGE: > > - BUG_ON(mmap_locked); > > - BUG_ON(*prev); > > - mmap_read_lock(mm); > > - result = collapse_pte_mapped_thp(mm, addr, true); > > - mmap_read_unlock(mm); > > - goto handle_result; > > - /* Whitelisted set of results where continuing OK */ > > case SCAN_PMD_NULL: > > case SCAN_PTE_NON_PRESENT: > > case SCAN_PTE_UFFD_WP: >