Nested file systems, that is those which invoke call_mmap() within their own f_op->mmap() handlers, may encounter underlying file systems which provide the f_op->mmap_prepare() hook introduced by commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"). We have a chicken-and-egg scenario here - until all file systems are converted to using .mmap_prepare(), we cannot convert these nested handlers, as we can't call f_op->mmap from an .mmap_prepare() hook. So we have to do it the other way round - invoke the .mmap_prepare() hook from an .mmap() one. in order to do so, we need to convert VMA state into a struct vm_area_desc descriptor, invoking the underlying file system's f_op->mmap_prepare() callback passing a pointer to this, and then setting VMA state accordingly and safely. This patch achieves this via the compat_vma_mmap_prepare() function, which we invoke from call_mmap() if f_op->mmap_prepare() is specified in the passed in file pointer. We place the fundamental logic into mm/vma.c where VMA manipulation belongs. We also update the VMA userland tests to accommodate the changes. The compat_vma_mmap_prepare() function and its associated machinery is temporary, and will be removed once the conversion of file systems is complete. Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> Reported-by: Jann Horn <jannh@xxxxxxxxxx> Closes: https://lore.kernel.org/linux-mm/CAG48ez04yOEVx1ekzOChARDDBZzAKwet8PEoPM4Ln3_rk91AzQ@xxxxxxxxxxxxxx/ Fixes: c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"). --- include/linux/fs.h | 6 +++-- mm/mmap.c | 39 +++++++++++++++++++++++++++ mm/vma.c | 46 +++++++++++++++++++++++++++++++- mm/vma.h | 4 +++ tools/testing/vma/vma_internal.h | 16 +++++++++++ 5 files changed, 108 insertions(+), 3 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 05abdabe9db7..8fe41a2b7527 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2274,10 +2274,12 @@ static inline bool file_has_valid_mmap_hooks(struct file *file) return true; } +int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma); + static inline int call_mmap(struct file *file, struct vm_area_struct *vma) { - if (WARN_ON_ONCE(file->f_op->mmap_prepare)) - return -EINVAL; + if (file->f_op->mmap_prepare) + return compat_vma_mmap_prepare(file, vma); return file->f_op->mmap(file, vma); } diff --git a/mm/mmap.c b/mm/mmap.c index 09c563c95112..0755cb5d89d1 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1891,3 +1891,42 @@ __latent_entropy int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) vm_unacct_memory(charge); goto loop_out; } + +/** + * compat_vma_mmap_prepare() - Apply the file's .mmap_prepare() hook to an + * existing VMA + * @file: The file which possesss an f_op->mmap_prepare() hook + * @vma; The VMA to apply the .mmap_prepare() hook to. + * + * Ordinarily, .mmap_prepare() is invoked directly upon mmap(). However, certain + * 'wrapper' file systems invoke a nested mmap hook of an underlying file. + * + * Until all filesystems are converted to use .mmap_prepare(), we must be + * conservative and continue to invoke these 'wrapper' filesystems using the + * deprecated .mmap() hook. + * + * However we have a problem if the underlying file system possesses an + * .mmap_prepare() hook, as we are in a different context when we invoke the + * .mmap() hook, already having a VMA to deal with. + * + * compat_vma_mmap_prepare() is a compatibility function that takes VMA state, + * establishes a struct vm_area_desc descriptor, passes to the underlying + * .mmap_prepare() hook and applies any changes performed by it. + * + * Once the conversion of filesystems is complete this function will no longer + * be required and will be removed. + * + * Returns: 0 on success or error. + */ +int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma) +{ + struct vm_area_desc desc; + int err; + + err = file->f_op->mmap_prepare(vma_to_desc(vma, &desc)); + if (err) + return err; + set_vma_from_desc(vma, &desc); + + return 0; +} diff --git a/mm/vma.c b/mm/vma.c index 01b1d26d87b4..d771750f8f76 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -3153,7 +3153,6 @@ int __vm_munmap(unsigned long start, size_t len, bool unlock) return ret; } - /* Insert vm structure into process list sorted by address * and into the inode's i_mmap tree. If vm_file is non-NULL * then i_mmap_rwsem is taken here. @@ -3195,3 +3194,48 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) return 0; } + +/* + * Temporary helper functions for file systems which wrap an invocation of + * f_op->mmap() but which might have an underlying file system which implements + * f_op->mmap_prepare(). + */ + +struct vm_area_desc *vma_to_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc) +{ + desc->mm = vma->vm_mm; + desc->start = vma->vm_start; + desc->end = vma->vm_end; + + desc->pgoff = vma->vm_pgoff; + desc->file = vma->vm_file; + desc->vm_flags = vma->vm_flags; + desc->page_prot = vma->vm_page_prot; + + desc->vm_ops = NULL; + desc->private_data = NULL; + + return desc; +} + +void set_vma_from_desc(struct vm_area_struct *vma, struct vm_area_desc *desc) +{ + /* + * Since we're invoking .mmap_prepare() despite having a partially + * established VMA, we must take care to handle setting fields + * correctly. + */ + + /* Mutable fields. Populated with initial state. */ + vma->vm_pgoff = desc->pgoff; + if (vma->vm_file != desc->file) + vma_set_file(vma, desc->file); + if (vma->vm_flags != desc->vm_flags) + vm_flags_set(vma, desc->vm_flags); + vma->vm_page_prot = desc->page_prot; + + /* User-defined fields. */ + vma->vm_ops = desc->vm_ops; + vma->vm_private_data = desc->private_data; +} diff --git a/mm/vma.h b/mm/vma.h index 0db066e7a45d..afd6cc026658 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -570,4 +570,8 @@ int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap, int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift); #endif +struct vm_area_desc *vma_to_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc); +void set_vma_from_desc(struct vm_area_struct *vma, struct vm_area_desc *desc); + #endif /* __MM_VMA_H */ diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 77b2949d874a..675a55216607 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -159,6 +159,14 @@ typedef __bitwise unsigned int vm_fault_t; #define ASSERT_EXCLUSIVE_WRITER(x) +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value + */ +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + struct kref { refcount_t refcount; }; @@ -1479,4 +1487,12 @@ static inline vm_flags_t ksm_vma_flags(const struct mm_struct *, const struct fi return vm_flags; } +static inline void vma_set_file(struct vm_area_struct *vma, struct file *file) +{ + /* Changing an anonymous vma with this is illegal */ + get_file(file); + swap(vma->vm_file, file); + fput(file); +} + #endif /* __MM_VMA_INTERNAL_H */ -- 2.49.0