* Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> [250428 11:28]: > There is functionality that overlaps the exec and memory mapping > subsystems. While it properly belongs in mm, it is important that exec > maintainers maintain oversight of this functionality correctly. > > We can establish both goals by adding a new mm/vma_exec.c file which > contains these 'glue' functions, and have fs/exec.c import them. > > As a part of this change, to ensure that proper oversight is achieved, add > the file to both the MEMORY MAPPING and EXEC & BINFMT API, ELF sections. > > scripts/get_maintainer.pl can correctly handle files in multiple entries > and this neatly handles the cross-over. > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> Reviewed-by: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx> > --- > MAINTAINERS | 2 + > fs/exec.c | 3 ++ > include/linux/mm.h | 1 - > mm/Makefile | 2 +- > mm/mmap.c | 83 ---------------------------- > mm/vma.h | 5 ++ > mm/vma_exec.c | 92 ++++++++++++++++++++++++++++++++ > tools/testing/vma/Makefile | 2 +- > tools/testing/vma/vma.c | 1 + > tools/testing/vma/vma_internal.h | 40 ++++++++++++++ > 10 files changed, 145 insertions(+), 86 deletions(-) > create mode 100644 mm/vma_exec.c > > diff --git a/MAINTAINERS b/MAINTAINERS > index f5ee0390cdee..1ee1c22e6e36 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -8830,6 +8830,7 @@ F: include/linux/elf.h > F: include/uapi/linux/auxvec.h > F: include/uapi/linux/binfmts.h > F: include/uapi/linux/elf.h > +F: mm/vma_exec.c > F: tools/testing/selftests/exec/ > N: asm/elf.h > N: binfmt > @@ -15654,6 +15655,7 @@ F: mm/mremap.c > F: mm/mseal.c > F: mm/vma.c > F: mm/vma.h > +F: mm/vma_exec.c > F: mm/vma_internal.h > F: tools/testing/selftests/mm/merge.c > F: tools/testing/vma/ > diff --git a/fs/exec.c b/fs/exec.c > index 8e4ea5f1e64c..477bc3f2e966 100644 > --- a/fs/exec.c > +++ b/fs/exec.c > @@ -78,6 +78,9 @@ > > #include <trace/events/sched.h> > > +/* For vma exec functions. */ > +#include "../mm/internal.h" > + > static int bprm_creds_from_file(struct linux_binprm *bprm); > > int suid_dumpable = 0; > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 21dd110b6655..4fc361df9ad7 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -3223,7 +3223,6 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); > extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); > extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); > extern void exit_mmap(struct mm_struct *); > -int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift); > bool mmap_read_lock_maybe_expand(struct mm_struct *mm, struct vm_area_struct *vma, > unsigned long addr, bool write); > > diff --git a/mm/Makefile b/mm/Makefile > index 9d7e5b5bb694..15a901bb431a 100644 > --- a/mm/Makefile > +++ b/mm/Makefile > @@ -37,7 +37,7 @@ mmu-y := nommu.o > mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \ > mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \ > msync.o page_vma_mapped.o pagewalk.o \ > - pgtable-generic.o rmap.o vmalloc.o vma.o > + pgtable-generic.o rmap.o vmalloc.o vma.o vma_exec.o > > > ifdef CONFIG_CROSS_MEMORY_ATTACH > diff --git a/mm/mmap.c b/mm/mmap.c > index bd210aaf7ebd..1794bf6f4dc0 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -1717,89 +1717,6 @@ static int __meminit init_reserve_notifier(void) > } > subsys_initcall(init_reserve_notifier); > > -/* > - * Relocate a VMA downwards by shift bytes. There cannot be any VMAs between > - * this VMA and its relocated range, which will now reside at [vma->vm_start - > - * shift, vma->vm_end - shift). > - * > - * This function is almost certainly NOT what you want for anything other than > - * early executable temporary stack relocation. > - */ > -int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) > -{ > - /* > - * The process proceeds as follows: > - * > - * 1) Use shift to calculate the new vma endpoints. > - * 2) Extend vma to cover both the old and new ranges. This ensures the > - * arguments passed to subsequent functions are consistent. > - * 3) Move vma's page tables to the new range. > - * 4) Free up any cleared pgd range. > - * 5) Shrink the vma to cover only the new range. > - */ > - > - struct mm_struct *mm = vma->vm_mm; > - unsigned long old_start = vma->vm_start; > - unsigned long old_end = vma->vm_end; > - unsigned long length = old_end - old_start; > - unsigned long new_start = old_start - shift; > - unsigned long new_end = old_end - shift; > - VMA_ITERATOR(vmi, mm, new_start); > - VMG_STATE(vmg, mm, &vmi, new_start, old_end, 0, vma->vm_pgoff); > - struct vm_area_struct *next; > - struct mmu_gather tlb; > - PAGETABLE_MOVE(pmc, vma, vma, old_start, new_start, length); > - > - BUG_ON(new_start > new_end); > - > - /* > - * ensure there are no vmas between where we want to go > - * and where we are > - */ > - if (vma != vma_next(&vmi)) > - return -EFAULT; > - > - vma_iter_prev_range(&vmi); > - /* > - * cover the whole range: [new_start, old_end) > - */ > - vmg.middle = vma; > - if (vma_expand(&vmg)) > - return -ENOMEM; > - > - /* > - * move the page tables downwards, on failure we rely on > - * process cleanup to remove whatever mess we made. > - */ > - pmc.for_stack = true; > - if (length != move_page_tables(&pmc)) > - return -ENOMEM; > - > - tlb_gather_mmu(&tlb, mm); > - next = vma_next(&vmi); > - if (new_end > old_start) { > - /* > - * when the old and new regions overlap clear from new_end. > - */ > - free_pgd_range(&tlb, new_end, old_end, new_end, > - next ? next->vm_start : USER_PGTABLES_CEILING); > - } else { > - /* > - * otherwise, clean from old_start; this is done to not touch > - * the address space in [new_end, old_start) some architectures > - * have constraints on va-space that make this illegal (IA64) - > - * for the others its just a little faster. > - */ > - free_pgd_range(&tlb, old_start, old_end, new_end, > - next ? next->vm_start : USER_PGTABLES_CEILING); > - } > - tlb_finish_mmu(&tlb); > - > - vma_prev(&vmi); > - /* Shrink the vma to just the new range */ > - return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff); > -} > - > #ifdef CONFIG_MMU > /* > * Obtain a read lock on mm->mmap_lock, if the specified address is below the > diff --git a/mm/vma.h b/mm/vma.h > index 149926e8a6d1..1ce3e18f01b7 100644 > --- a/mm/vma.h > +++ b/mm/vma.h > @@ -548,4 +548,9 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address); > > int __vm_munmap(unsigned long start, size_t len, bool unlock); > > +/* vma_exec.h */ > +#ifdef CONFIG_MMU > +int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift); > +#endif > + > #endif /* __MM_VMA_H */ > diff --git a/mm/vma_exec.c b/mm/vma_exec.c > new file mode 100644 > index 000000000000..6736ae37f748 > --- /dev/null > +++ b/mm/vma_exec.c > @@ -0,0 +1,92 @@ > +// SPDX-License-Identifier: GPL-2.0-only > + > +/* > + * Functions explicitly implemented for exec functionality which however are > + * explicitly VMA-only logic. > + */ > + > +#include "vma_internal.h" > +#include "vma.h" > + > +/* > + * Relocate a VMA downwards by shift bytes. There cannot be any VMAs between > + * this VMA and its relocated range, which will now reside at [vma->vm_start - > + * shift, vma->vm_end - shift). > + * > + * This function is almost certainly NOT what you want for anything other than > + * early executable temporary stack relocation. > + */ > +int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) > +{ > + /* > + * The process proceeds as follows: > + * > + * 1) Use shift to calculate the new vma endpoints. > + * 2) Extend vma to cover both the old and new ranges. This ensures the > + * arguments passed to subsequent functions are consistent. > + * 3) Move vma's page tables to the new range. > + * 4) Free up any cleared pgd range. > + * 5) Shrink the vma to cover only the new range. > + */ > + > + struct mm_struct *mm = vma->vm_mm; > + unsigned long old_start = vma->vm_start; > + unsigned long old_end = vma->vm_end; > + unsigned long length = old_end - old_start; > + unsigned long new_start = old_start - shift; > + unsigned long new_end = old_end - shift; > + VMA_ITERATOR(vmi, mm, new_start); > + VMG_STATE(vmg, mm, &vmi, new_start, old_end, 0, vma->vm_pgoff); > + struct vm_area_struct *next; > + struct mmu_gather tlb; > + PAGETABLE_MOVE(pmc, vma, vma, old_start, new_start, length); > + > + BUG_ON(new_start > new_end); > + > + /* > + * ensure there are no vmas between where we want to go > + * and where we are > + */ > + if (vma != vma_next(&vmi)) > + return -EFAULT; > + > + vma_iter_prev_range(&vmi); > + /* > + * cover the whole range: [new_start, old_end) > + */ > + vmg.middle = vma; > + if (vma_expand(&vmg)) > + return -ENOMEM; > + > + /* > + * move the page tables downwards, on failure we rely on > + * process cleanup to remove whatever mess we made. > + */ > + pmc.for_stack = true; > + if (length != move_page_tables(&pmc)) > + return -ENOMEM; > + > + tlb_gather_mmu(&tlb, mm); > + next = vma_next(&vmi); > + if (new_end > old_start) { > + /* > + * when the old and new regions overlap clear from new_end. > + */ > + free_pgd_range(&tlb, new_end, old_end, new_end, > + next ? next->vm_start : USER_PGTABLES_CEILING); > + } else { > + /* > + * otherwise, clean from old_start; this is done to not touch > + * the address space in [new_end, old_start) some architectures > + * have constraints on va-space that make this illegal (IA64) - > + * for the others its just a little faster. > + */ > + free_pgd_range(&tlb, old_start, old_end, new_end, > + next ? next->vm_start : USER_PGTABLES_CEILING); > + } > + tlb_finish_mmu(&tlb); > + > + vma_prev(&vmi); > + /* Shrink the vma to just the new range */ > + return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff); > +} > diff --git a/tools/testing/vma/Makefile b/tools/testing/vma/Makefile > index 860fd2311dcc..624040fcf193 100644 > --- a/tools/testing/vma/Makefile > +++ b/tools/testing/vma/Makefile > @@ -9,7 +9,7 @@ include ../shared/shared.mk > OFILES = $(SHARED_OFILES) vma.o maple-shim.o > TARGETS = vma > > -vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma.h > +vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_exec.c ../../../mm/vma.h > > vma: $(OFILES) > $(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS) > diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c > index 7cfd6e31db10..5832ae5d797d 100644 > --- a/tools/testing/vma/vma.c > +++ b/tools/testing/vma/vma.c > @@ -28,6 +28,7 @@ unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT; > * Directly import the VMA implementation here. Our vma_internal.h wrapper > * provides userland-equivalent functionality for everything vma.c uses. > */ > +#include "../../../mm/vma_exec.c" > #include "../../../mm/vma.c" > > const struct vm_operations_struct vma_dummy_vm_ops; > diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h > index 572ab2cea763..0df19ca0000a 100644 > --- a/tools/testing/vma/vma_internal.h > +++ b/tools/testing/vma/vma_internal.h > @@ -421,6 +421,28 @@ struct vm_unmapped_area_info { > unsigned long start_gap; > }; > > +struct pagetable_move_control { > + struct vm_area_struct *old; /* Source VMA. */ > + struct vm_area_struct *new; /* Destination VMA. */ > + unsigned long old_addr; /* Address from which the move begins. */ > + unsigned long old_end; /* Exclusive address at which old range ends. */ > + unsigned long new_addr; /* Address to move page tables to. */ > + unsigned long len_in; /* Bytes to remap specified by user. */ > + > + bool need_rmap_locks; /* Do rmap locks need to be taken? */ > + bool for_stack; /* Is this an early temp stack being moved? */ > +}; > + > +#define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \ > + struct pagetable_move_control name = { \ > + .old = old_, \ > + .new = new_, \ > + .old_addr = old_addr_, \ > + .old_end = (old_addr_) + (len_), \ > + .new_addr = new_addr_, \ > + .len_in = len_, \ > + } > + > static inline void vma_iter_invalidate(struct vma_iterator *vmi) > { > mas_pause(&vmi->mas); > @@ -1240,4 +1262,22 @@ static inline int mapping_map_writable(struct address_space *mapping) > return 0; > } > > +static inline unsigned long move_page_tables(struct pagetable_move_control *pmc) > +{ > + (void)pmc; > + > + return 0; > +} > + > +static inline void free_pgd_range(struct mmu_gather *tlb, > + unsigned long addr, unsigned long end, > + unsigned long floor, unsigned long ceiling) > +{ > + (void)tlb; > + (void)addr; > + (void)end; > + (void)floor; > + (void)ceiling; > +} > + > #endif /* __MM_VMA_INTERNAL_H */ > -- > 2.49.0 >