On Tue, May 20, 2025 at 11:55:20AM +0800, Chengming Zhou wrote: > On 2025/5/19 16:51, Lorenzo Stoakes wrote: > > If a user wishes to enable KSM mergeability for an entire process and all > > fork/exec'd processes that come after it, they use the prctl() > > PR_SET_MEMORY_MERGE operation. > > > > This defaults all newly mapped VMAs to have the VM_MERGEABLE VMA flag set > > (in order to indicate they are KSM mergeable), as well as setting this flag > > for all existing VMAs. > > > > However it also entirely and completely breaks VMA merging for the process > > and all forked (and fork/exec'd) processes. > > > > This is because when a new mapping is proposed, the flags specified will > > never have VM_MERGEABLE set. However all adjacent VMAs will already have > > VM_MERGEABLE set, rendering VMAs unmergeable by default. > > > > To work around this, we try to set the VM_MERGEABLE flag prior to > > attempting a merge. In the case of brk() this can always be done. > > > > However on mmap() things are more complicated - while KSM is not supported > > for file-backed mappings, it is supported for MAP_PRIVATE file-backed > > mappings. > > > > And these mappings may have deprecated .mmap() callbacks specified which > > could, in theory, adjust flags and thus KSM merge eligiblity. > > > > So we check to determine whether this at all possible. If not, we set > > VM_MERGEABLE prior to the merge attempt on mmap(), otherwise we retain the > > previous behaviour. > > > > When .mmap_prepare() is more widely used, we can remove this precaution. > > > > While this doesn't quite cover all cases, it covers a great many (all > > anonymous memory, for instance), meaning we should already see a > > significant improvement in VMA mergeability. > > > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> > > Looks good to me with the build fix. And it seems that ksm_add_vma() > is not used anymore.. > > Reviewed-by: Chengming Zhou <chengming.zhou@xxxxxxxxx> > > Thanks! Thanks! Yeah in the fix I also drop that, will obviously send a v2 to clear things up and address comments :) > > > --- > > include/linux/ksm.h | 4 ++-- > > mm/ksm.c | 20 ++++++++++++------ > > mm/vma.c | 49 +++++++++++++++++++++++++++++++++++++++++++-- > > 3 files changed, 63 insertions(+), 10 deletions(-) > > > > diff --git a/include/linux/ksm.h b/include/linux/ksm.h > > index d73095b5cd96..ba5664daca6e 100644 > > --- a/include/linux/ksm.h > > +++ b/include/linux/ksm.h > > @@ -17,8 +17,8 @@ > > #ifdef CONFIG_KSM > > int ksm_madvise(struct vm_area_struct *vma, unsigned long start, > > unsigned long end, int advice, unsigned long *vm_flags); > > - > > -void ksm_add_vma(struct vm_area_struct *vma); > > +vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file, > > + vm_flags_t vm_flags); > > int ksm_enable_merge_any(struct mm_struct *mm); > > int ksm_disable_merge_any(struct mm_struct *mm); > > int ksm_disable(struct mm_struct *mm); > > diff --git a/mm/ksm.c b/mm/ksm.c > > index d0c763abd499..022af14a95ea 100644 > > --- a/mm/ksm.c > > +++ b/mm/ksm.c > > @@ -2731,16 +2731,24 @@ static int __ksm_del_vma(struct vm_area_struct *vma) > > return 0; > > } > > /** > > - * ksm_add_vma - Mark vma as mergeable if compatible > > + * ksm_vma_flags - Update VMA flags to mark as mergeable if compatible > > * > > - * @vma: Pointer to vma > > + * @mm: Proposed VMA's mm_struct > > + * @file: Proposed VMA's file-backed mapping, if any. > > + * @vm_flags: Proposed VMA"s flags. > > + * > > + * Returns: @vm_flags possibly updated to mark mergeable. > > */ > > -void ksm_add_vma(struct vm_area_struct *vma) > > +vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file, > > + vm_flags_t vm_flags) > > { > > - struct mm_struct *mm = vma->vm_mm; > > + vm_flags_t ret = vm_flags; > > - if (test_bit(MMF_VM_MERGE_ANY, &mm->flags)) > > - __ksm_add_vma(vma); > > + if (test_bit(MMF_VM_MERGE_ANY, &mm->flags) && > > + __ksm_should_add_vma(file, vm_flags)) > > + ret |= VM_MERGEABLE; > > + > > + return ret; > > } > > static void ksm_add_vmas(struct mm_struct *mm) > > diff --git a/mm/vma.c b/mm/vma.c > > index 3ff6cfbe3338..5bebe55ea737 100644 > > --- a/mm/vma.c > > +++ b/mm/vma.c > > @@ -2482,7 +2482,6 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap) > > */ > > if (!vma_is_anonymous(vma)) > > khugepaged_enter_vma(vma, map->flags); > > - ksm_add_vma(vma); > > *vmap = vma; > > return 0; > > @@ -2585,6 +2584,45 @@ static void set_vma_user_defined_fields(struct vm_area_struct *vma, > > vma->vm_private_data = map->vm_private_data; > > } > > +static void update_ksm_flags(struct mmap_state *map) > > +{ > > + map->flags = ksm_vma_flags(map->mm, map->file, map->flags); > > +} > > + > > +/* > > + * Are we guaranteed no driver can change state such as to preclude KSM merging? > > + * If so, let's set the KSM mergeable flag early so we don't break VMA merging. > > + * > > + * This is applicable when PR_SET_MEMORY_MERGE has been set on the mm_struct via > > + * prctl() causing newly mapped VMAs to have the KSM mergeable VMA flag set. > > + * > > + * If this is not the case, then we set the flag after considering mergeability, > > + * which will prevent mergeability as, when PR_SET_MEMORY_MERGE is set, a new > > + * VMA will not have the KSM mergeability VMA flag set, but all other VMAs will, > > + * preventing any merge. > > + */ > > +static bool can_set_ksm_flags_early(struct mmap_state *map) > > +{ > > + struct file *file = map->file; > > + > > + /* Anonymous mappings have no driver which can change them. */ > > + if (!file) > > + return true; > > + > > + /* shmem is safe. */ > > + if (shmem_file(file)) > > + return true; > > + > > + /* > > + * If .mmap_prepare() is specified, then the driver will have already > > + * manipulated state prior to updating KSM flags. > > + */ > > + if (file->f_op->mmap_prepare) > > + return true; > > + > > + return false; > > +} > > + > > static unsigned long __mmap_region(struct file *file, unsigned long addr, > > unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, > > struct list_head *uf) > > @@ -2595,6 +2633,7 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr, > > bool have_mmap_prepare = file && file->f_op->mmap_prepare; > > VMA_ITERATOR(vmi, mm, addr); > > MMAP_STATE(map, mm, &vmi, addr, len, pgoff, vm_flags, file); > > + bool check_ksm_early = can_set_ksm_flags_early(&map); > > error = __mmap_prepare(&map, uf); > > if (!error && have_mmap_prepare) > > @@ -2602,6 +2641,9 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr, > > if (error) > > goto abort_munmap; > > + if (check_ksm_early) > > + update_ksm_flags(&map); > > + > > /* Attempt to merge with adjacent VMAs... */ > > if (map.prev || map.next) { > > VMG_MMAP_STATE(vmg, &map, /* vma = */ NULL); > > @@ -2611,6 +2653,9 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr, > > /* ...but if we can't, allocate a new VMA. */ > > if (!vma) { > > + if (!check_ksm_early) > > + update_ksm_flags(&map); > > + > > error = __mmap_new_vma(&map, &vma); > > if (error) > > goto unacct_error; > > @@ -2713,6 +2758,7 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, > > * Note: This happens *after* clearing old mappings in some code paths. > > */ > > flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; > > + flags = ksm_vma_flags(mm, NULL, flags); > > if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT)) > > return -ENOMEM; > > @@ -2756,7 +2802,6 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, > > mm->map_count++; > > validate_mm(mm); > > - ksm_add_vma(vma); > > out: > > perf_event_mmap(vma); > > mm->total_vm += len >> PAGE_SHIFT;