Now are able to use mmap_prepare, complete callbacks for procfs implementations, update the vmcore implementation accordingly. As part of this change, we must also update remap_vmalloc_range_partial() to optionally not update VMA flags. Other than then remap_vmalloc_range() wrapper, vmcore is the only user of this function so we can simply go ahead and add a parameter. Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> --- arch/s390/kernel/crash_dump.c | 6 ++-- fs/proc/vmcore.c | 53 +++++++++++++++++++++++++---------- include/linux/vmalloc.h | 10 +++---- mm/vmalloc.c | 16 +++++++++-- 4 files changed, 59 insertions(+), 26 deletions(-) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index d4839de8ce9d..44d7902f7e41 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -186,7 +186,7 @@ static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma, if (pfn < oldmem_data.size >> PAGE_SHIFT) { size_old = min(size, oldmem_data.size - (pfn << PAGE_SHIFT)); - rc = remap_pfn_range(vma, from, + rc = remap_pfn_range_complete(vma, from, pfn + (oldmem_data.start >> PAGE_SHIFT), size_old, prot); if (rc || size == size_old) @@ -195,7 +195,7 @@ static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma, from += size_old; pfn += size_old >> PAGE_SHIFT; } - return remap_pfn_range(vma, from, pfn, size, prot); + return remap_pfn_range_complete(vma, from, pfn, size, prot); } /* @@ -220,7 +220,7 @@ static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma, from += size_hsa; pfn += size_hsa >> PAGE_SHIFT; } - return remap_pfn_range(vma, from, pfn, size, prot); + return remap_pfn_range_complete(vma, from, pfn, size, prot); } /* diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index f188bd900eb2..5e4e19c38d5e 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -254,7 +254,7 @@ int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long size, pgprot_t prot) { prot = pgprot_encrypted(prot); - return remap_pfn_range(vma, from, pfn, size, prot); + return remap_pfn_range_complete(vma, from, pfn, size, prot); } /* @@ -308,7 +308,7 @@ static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst, tsz = min(offset + (u64)dump->size - start, (u64)size); buf = dump->buf + start - offset; if (remap_vmalloc_range_partial(vma, dst, buf, 0, - tsz)) + tsz, /* set_vma= */false)) return -EFAULT; size -= tsz; @@ -588,24 +588,40 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma, return ret; } -static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) +static int mmap_prepare_vmcore(struct vm_area_desc *desc) { - size_t size = vma->vm_end - vma->vm_start; - u64 start, end, len, tsz; - struct vmcore_range *m; + size_t size = vma_desc_size(desc); + u64 start, end; - start = (u64)vma->vm_pgoff << PAGE_SHIFT; + start = (u64)desc->pgoff << PAGE_SHIFT; end = start + size; if (size > vmcore_size || end > vmcore_size) return -EINVAL; - if (vma->vm_flags & (VM_WRITE | VM_EXEC)) + if (desc->vm_flags & (VM_WRITE | VM_EXEC)) return -EPERM; - vm_flags_mod(vma, VM_MIXEDMAP, VM_MAYWRITE | VM_MAYEXEC); - vma->vm_ops = &vmcore_mmap_ops; + desc->vm_flags |= VM_MIXEDMAP | VM_REMAP_FLAGS; + desc->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); + desc->vm_ops = &vmcore_mmap_ops; + + /* + * No need for remap_pfn_range_prepare() as we ensure non-CoW by + * clearing VM_MAYWRITE. + */ + + return 0; +} + +static int mmap_complete_vmcore(struct file *file, struct vm_area_struct *vma, + const void *context) +{ + size_t size = vma->vm_end - vma->vm_start; + u64 start, len, tsz; + struct vmcore_range *m; + start = (u64)vma->vm_pgoff << PAGE_SHIFT; len = 0; if (start < elfcorebuf_sz) { @@ -613,8 +629,8 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) tsz = min(elfcorebuf_sz - (size_t)start, size); pfn = __pa(elfcorebuf + start) >> PAGE_SHIFT; - if (remap_pfn_range(vma, vma->vm_start, pfn, tsz, - vma->vm_page_prot)) + if (remap_pfn_range_complete(vma, vma->vm_start, pfn, tsz, + vma->vm_page_prot)) return -EAGAIN; size -= tsz; start += tsz; @@ -664,7 +680,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size); kaddr = elfnotes_buf + start - elfcorebuf_sz - vmcoredd_orig_sz; if (remap_vmalloc_range_partial(vma, vma->vm_start + len, - kaddr, 0, tsz)) + kaddr, 0, tsz, /* set_vma =*/false)) goto fail; size -= tsz; @@ -701,7 +717,13 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) return -EAGAIN; } #else -static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) +static int mmap_prepare_vmcore(struct vm_area_desc *desc) +{ + return -ENOSYS; +} + +static int mmap_complete_vmcore(struct file *file, struct vm_area_struct *vma, + const void *context) { return -ENOSYS; } @@ -712,7 +734,8 @@ static const struct proc_ops vmcore_proc_ops = { .proc_release = release_vmcore, .proc_read_iter = read_vmcore, .proc_lseek = default_llseek, - .proc_mmap = mmap_vmcore, + .proc_mmap_prepare = mmap_prepare_vmcore, + .proc_mmap_complete = mmap_complete_vmcore, }; static u64 get_vmcore_size(size_t elfsz, size_t elfnotesegsz, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index eb54b7b3202f..588810e571aa 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -215,12 +215,12 @@ extern void *vmap(struct page **pages, unsigned int count, void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot); extern void vunmap(const void *addr); -extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, - unsigned long uaddr, void *kaddr, - unsigned long pgoff, unsigned long size); +int remap_vmalloc_range_partial(struct vm_area_struct *vma, + unsigned long uaddr, void *kaddr, unsigned long pgoff, + unsigned long size, bool set_vma); -extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, - unsigned long pgoff); +int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, + unsigned long pgoff); int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4249e1e01947..877b557b2482 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4528,6 +4528,7 @@ long vread_iter(struct iov_iter *iter, const char *addr, size_t count) * @kaddr: virtual address of vmalloc kernel memory * @pgoff: offset from @kaddr to start at * @size: size of map area + * @set_vma: If true, update VMA flags * * Returns: 0 for success, -Exxx on failure * @@ -4540,7 +4541,7 @@ long vread_iter(struct iov_iter *iter, const char *addr, size_t count) */ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, void *kaddr, unsigned long pgoff, - unsigned long size) + unsigned long size, bool set_vma) { struct vm_struct *area; unsigned long off; @@ -4566,6 +4567,10 @@ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, return -EINVAL; kaddr += off; + /* If we shouldn't modify VMA flags, vm_insert_page() mustn't. */ + if (!set_vma && !(vma->vm_flags & VM_MIXEDMAP)) + return -EINVAL; + do { struct page *page = vmalloc_to_page(kaddr); int ret; @@ -4579,7 +4584,11 @@ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, size -= PAGE_SIZE; } while (size > 0); - vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP); + if (set_vma) + vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP); + else + VM_WARN_ON_ONCE((vma->vm_flags & (VM_DONTEXPAND | VM_DONTDUMP)) != + (VM_DONTEXPAND | VM_DONTDUMP)); return 0; } @@ -4603,7 +4612,8 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, { return remap_vmalloc_range_partial(vma, vma->vm_start, addr, pgoff, - vma->vm_end - vma->vm_start); + vma->vm_end - vma->vm_start, + /* set_vma= */ true); } EXPORT_SYMBOL(remap_vmalloc_range); -- 2.51.0