Christian, I will actually read your patch tomorrow, but at first glance On 04/09, Christian Brauner wrote: > > The seqcounter might be > useful independent of pidfs. Are you sure? ;) to me the new pid->pid_seq needs more justification... Again, can't we use pid->wait_pidfd->lock if we want to avoid the (minor) problem with the wrong ENOENT? or even signal->siglock, although in this case we will need pid_task() + lock_task_sighand()... Oleg. > Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx> > --- > include/linux/pid.h | 1 + > kernel/exit.c | 11 +++++++++++ > kernel/fork.c | 22 ++++++++++++---------- > kernel/pid.c | 1 + > 4 files changed, 25 insertions(+), 10 deletions(-) > > diff --git a/include/linux/pid.h b/include/linux/pid.h > index 311ecebd7d56..b54a4c1ef602 100644 > --- a/include/linux/pid.h > +++ b/include/linux/pid.h > @@ -65,6 +65,7 @@ struct pid > struct hlist_head inodes; > /* wait queue for pidfd notifications */ > wait_queue_head_t wait_pidfd; > + seqcount_rwlock_t pid_seq; > struct rcu_head rcu; > struct upid numbers[]; > }; > diff --git a/kernel/exit.c b/kernel/exit.c > index 1b51dc099f1e..8050572fe682 100644 > --- a/kernel/exit.c > +++ b/kernel/exit.c > @@ -133,17 +133,28 @@ struct release_task_post { > static void __unhash_process(struct release_task_post *post, struct task_struct *p, > bool group_dead) > { > + struct pid *pid; > + > + lockdep_assert_held_write(&tasklist_lock); > + > nr_threads--; > + > + pid = task_pid(p); > + raw_write_seqcount_begin(&pid->pid_seq); > detach_pid(post->pids, p, PIDTYPE_PID); > if (group_dead) { > detach_pid(post->pids, p, PIDTYPE_TGID); > detach_pid(post->pids, p, PIDTYPE_PGID); > detach_pid(post->pids, p, PIDTYPE_SID); > + } > + raw_write_seqcount_end(&pid->pid_seq); > > + if (group_dead) { > list_del_rcu(&p->tasks); > list_del_init(&p->sibling); > __this_cpu_dec(process_counts); > } > + > list_del_rcu(&p->thread_node); > } > > diff --git a/kernel/fork.c b/kernel/fork.c > index 4a2080b968c8..1480bf6f5f38 100644 > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -2109,24 +2109,26 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re > int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret) > { > int err = 0; > + unsigned int seq; > > - if (!(flags & PIDFD_THREAD)) { > + do { > + seq = raw_seqcount_begin(&pid->pid_seq); > /* > * If this is struct pid isn't used as a thread-group > * leader pid but the caller requested to create a > * thread-group leader pidfd then report ENOENT to the > * caller as a hint. > */ > - if (!pid_has_task(pid, PIDTYPE_TGID)) > + if (!(flags & PIDFD_THREAD) && !pid_has_task(pid, PIDTYPE_TGID)) > err = -ENOENT; > - } > - > - /* > - * If this wasn't a thread-group leader struct pid or the task > - * got reaped in the meantime report -ESRCH to userspace. > - */ > - if (!pid_has_task(pid, PIDTYPE_PID)) > - err = -ESRCH; > + /* > + * If this wasn't a thread-group leader struct pid or > + * the task got reaped in the meantime report -ESRCH to > + * userspace. > + */ > + if (!pid_has_task(pid, PIDTYPE_PID)) > + err = -ESRCH; > + } while (read_seqcount_retry(&pid->pid_seq, seq)); > if (err) > return err; > > diff --git a/kernel/pid.c b/kernel/pid.c > index 4ac2ce46817f..bbca61f62faa 100644 > --- a/kernel/pid.c > +++ b/kernel/pid.c > @@ -271,6 +271,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, > upid = pid->numbers + ns->level; > idr_preload(GFP_KERNEL); > spin_lock(&pidmap_lock); > + seqcount_rwlock_init(&pid->pid_seq, &tasklist_lock); > if (!(ns->pid_allocated & PIDNS_ADDING)) > goto out_unlock; > pidfs_add_pid(pid); > -- > 2.47.2 >