On Fri, Aug 08, 2025 at 08:28:49AM -0700, Suren Baghdasaryan wrote: > Utilize per-vma locks to stabilize vma after lookup without taking > mmap_lock during PROCMAP_QUERY ioctl execution. If vma lock is > contended, we fall back to mmap_lock but take it only momentarily > to lock the vma and release the mmap_lock. In a very unlikely case > of vm_refcnt overflow, this fall back path will fail and ioctl is > done under mmap_lock protection. > > This change is designed to reduce mmap_lock contention and prevent > PROCMAP_QUERY ioctl calls from blocking address space updates. > > Signed-off-by: Suren Baghdasaryan <surenb@xxxxxxxxxx> > Acked-by: SeongJae Park <sj@xxxxxxxxxx> All LGTM, so: Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> > --- > fs/proc/task_mmu.c | 103 +++++++++++++++++++++++++++++++++++++-------- > 1 file changed, 85 insertions(+), 18 deletions(-) > > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c > index c0968d293b61..e64cf40ce9c4 100644 > --- a/fs/proc/task_mmu.c > +++ b/fs/proc/task_mmu.c > @@ -132,6 +132,12 @@ static void release_task_mempolicy(struct proc_maps_private *priv) > > #ifdef CONFIG_PER_VMA_LOCK > > +static void reset_lock_ctx(struct proc_maps_locking_ctx *lock_ctx) > +{ > + lock_ctx->locked_vma = NULL; > + lock_ctx->mmap_locked = false; > +} Thanks! > + > static void unlock_ctx_vma(struct proc_maps_locking_ctx *lock_ctx) > { > if (lock_ctx->locked_vma) { > @@ -157,8 +163,7 @@ static inline bool lock_vma_range(struct seq_file *m, > lock_ctx->mmap_locked = true; > } else { > rcu_read_lock(); > - lock_ctx->locked_vma = NULL; > - lock_ctx->mmap_locked = false; > + reset_lock_ctx(lock_ctx); > } > > return true; > @@ -522,28 +527,90 @@ static int pid_maps_open(struct inode *inode, struct file *file) > PROCMAP_QUERY_VMA_FLAGS \ > ) > > -static int query_vma_setup(struct mm_struct *mm) > +#ifdef CONFIG_PER_VMA_LOCK > + > +static int query_vma_setup(struct proc_maps_locking_ctx *lock_ctx) > { > - return mmap_read_lock_killable(mm); > + reset_lock_ctx(lock_ctx); > + > + return 0; > } > > -static void query_vma_teardown(struct mm_struct *mm, struct vm_area_struct *vma) > +static void query_vma_teardown(struct proc_maps_locking_ctx *lock_ctx) > { > - mmap_read_unlock(mm); > + if (lock_ctx->mmap_locked) { > + mmap_read_unlock(lock_ctx->mm); > + lock_ctx->mmap_locked = false; > + } else { > + unlock_ctx_vma(lock_ctx); > + } > +} > + > +static struct vm_area_struct *query_vma_find_by_addr(struct proc_maps_locking_ctx *lock_ctx, > + unsigned long addr) > +{ > + struct mm_struct *mm = lock_ctx->mm; > + struct vm_area_struct *vma; > + struct vma_iterator vmi; > + > + if (lock_ctx->mmap_locked) > + return find_vma(mm, addr); > + > + /* Unlock previously locked VMA and find the next one under RCU */ > + unlock_ctx_vma(lock_ctx); > + rcu_read_lock(); > + vma_iter_init(&vmi, mm, addr); > + vma = lock_next_vma(mm, &vmi, addr); > + rcu_read_unlock(); > + > + if (!vma) > + return NULL; > + > + if (!IS_ERR(vma)) { > + lock_ctx->locked_vma = vma; > + return vma; > + } > + > + if (PTR_ERR(vma) == -EAGAIN) { > + /* Fallback to mmap_lock on vma->vm_refcnt overflow */ > + mmap_read_lock(mm); > + vma = find_vma(mm, addr); > + lock_ctx->mmap_locked = true; > + } Thanks this is great! > + > + return vma; > } > > -static struct vm_area_struct *query_vma_find_by_addr(struct mm_struct *mm, unsigned long addr) > +#else /* CONFIG_PER_VMA_LOCK */ > + > +static int query_vma_setup(struct proc_maps_locking_ctx *lock_ctx) > +{ > + return mmap_read_lock_killable(lock_ctx->mm); > +} > + > +static void query_vma_teardown(struct proc_maps_locking_ctx *lock_ctx) > +{ > + mmap_read_unlock(lock_ctx->mm); > +} > + > +static struct vm_area_struct *query_vma_find_by_addr(struct proc_maps_locking_ctx *lock_ctx, > + unsigned long addr) > { > - return find_vma(mm, addr); > + return find_vma(lock_ctx->mm, addr); > } > > -static struct vm_area_struct *query_matching_vma(struct mm_struct *mm, > +#endif /* CONFIG_PER_VMA_LOCK */ > + > +static struct vm_area_struct *query_matching_vma(struct proc_maps_locking_ctx *lock_ctx, > unsigned long addr, u32 flags) > { > struct vm_area_struct *vma; > > next_vma: > - vma = query_vma_find_by_addr(mm, addr); > + vma = query_vma_find_by_addr(lock_ctx, addr); > + if (IS_ERR(vma)) > + return vma; > + > if (!vma) > goto no_vma; > > @@ -584,11 +651,11 @@ static struct vm_area_struct *query_matching_vma(struct mm_struct *mm, > return ERR_PTR(-ENOENT); > } > > -static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) > +static int do_procmap_query(struct mm_struct *mm, void __user *uarg) > { > + struct proc_maps_locking_ctx lock_ctx = { .mm = mm }; > struct procmap_query karg; > struct vm_area_struct *vma; > - struct mm_struct *mm; > const char *name = NULL; > char build_id_buf[BUILD_ID_SIZE_MAX], *name_buf = NULL; > __u64 usize; > @@ -615,17 +682,16 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) > if (!!karg.build_id_size != !!karg.build_id_addr) > return -EINVAL; > > - mm = priv->lock_ctx.mm; > if (!mm || !mmget_not_zero(mm)) > return -ESRCH; > > - err = query_vma_setup(mm); > + err = query_vma_setup(&lock_ctx); > if (err) { > mmput(mm); > return err; > } > > - vma = query_matching_vma(mm, karg.query_addr, karg.query_flags); > + vma = query_matching_vma(&lock_ctx, karg.query_addr, karg.query_flags); > if (IS_ERR(vma)) { > err = PTR_ERR(vma); > vma = NULL; > @@ -710,7 +776,7 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) > } > > /* unlock vma or mmap_lock, and put mm_struct before copying data to user */ > - query_vma_teardown(mm, vma); > + query_vma_teardown(&lock_ctx); > mmput(mm); > > if (karg.vma_name_size && copy_to_user(u64_to_user_ptr(karg.vma_name_addr), > @@ -730,7 +796,7 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) > return 0; > > out: > - query_vma_teardown(mm, vma); > + query_vma_teardown(&lock_ctx); > mmput(mm); > kfree(name_buf); > return err; > @@ -743,7 +809,8 @@ static long procfs_procmap_ioctl(struct file *file, unsigned int cmd, unsigned l > > switch (cmd) { > case PROCMAP_QUERY: > - return do_procmap_query(priv, (void __user *)arg); > + /* priv->lock_ctx.mm is set during file open operation */ Thanks! > + return do_procmap_query(priv->lock_ctx.mm, (void __user *)arg); > default: > return -ENOIOCTLCMD; > } > -- > 2.50.1.703.g449372360f-goog >