[RFC PATCH] pidfs: ensure consistent ENOENT/ESRCH reporting

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



In a prior patch series we tried to cleanly differentiate between:

(1) The task has already been reaped.
(2) The caller requested a pidfd for a thread-group leader but the pid
    actually references a struct pid that isn't used as a thread-group
    leader.

as this was causing issues for non-threaded workloads.

But there's cases where the current simple logic is wrong. Specifically,
if the pid was a leader pid and the check races with __unhash_process().

Stabilize this by using a sequence counter associated with tasklist_lock
and retry while we're in __unhash_process(). The seqcounter might be
useful independent of pidfs.

Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>
---
 include/linux/pid.h |  1 +
 kernel/exit.c       | 11 +++++++++++
 kernel/fork.c       | 22 ++++++++++++----------
 kernel/pid.c        |  1 +
 4 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 311ecebd7d56..b54a4c1ef602 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -65,6 +65,7 @@ struct pid
 	struct hlist_head inodes;
 	/* wait queue for pidfd notifications */
 	wait_queue_head_t wait_pidfd;
+	seqcount_rwlock_t pid_seq;
 	struct rcu_head rcu;
 	struct upid numbers[];
 };
diff --git a/kernel/exit.c b/kernel/exit.c
index 1b51dc099f1e..8050572fe682 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -133,17 +133,28 @@ struct release_task_post {
 static void __unhash_process(struct release_task_post *post, struct task_struct *p,
 			     bool group_dead)
 {
+	struct pid *pid;
+
+	lockdep_assert_held_write(&tasklist_lock);
+
 	nr_threads--;
+
+	pid = task_pid(p);
+	raw_write_seqcount_begin(&pid->pid_seq);
 	detach_pid(post->pids, p, PIDTYPE_PID);
 	if (group_dead) {
 		detach_pid(post->pids, p, PIDTYPE_TGID);
 		detach_pid(post->pids, p, PIDTYPE_PGID);
 		detach_pid(post->pids, p, PIDTYPE_SID);
+	}
+	raw_write_seqcount_end(&pid->pid_seq);
 
+	if (group_dead) {
 		list_del_rcu(&p->tasks);
 		list_del_init(&p->sibling);
 		__this_cpu_dec(process_counts);
 	}
+
 	list_del_rcu(&p->thread_node);
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 4a2080b968c8..1480bf6f5f38 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2109,24 +2109,26 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
 int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
 {
 	int err = 0;
+	unsigned int seq;
 
-	if (!(flags & PIDFD_THREAD)) {
+	do {
+		seq = raw_seqcount_begin(&pid->pid_seq);
 		/*
 		 * If this is struct pid isn't used as a thread-group
 		 * leader pid but the caller requested to create a
 		 * thread-group leader pidfd then report ENOENT to the
 		 * caller as a hint.
 		 */
-		if (!pid_has_task(pid, PIDTYPE_TGID))
+		if (!(flags & PIDFD_THREAD) && !pid_has_task(pid, PIDTYPE_TGID))
 			err = -ENOENT;
-	}
-
-	/*
-	 * If this wasn't a thread-group leader struct pid or the task
-	 * got reaped in the meantime report -ESRCH to userspace.
-	 */
-	if (!pid_has_task(pid, PIDTYPE_PID))
-		err = -ESRCH;
+		/*
+		 * If this wasn't a thread-group leader struct pid or
+		 * the task got reaped in the meantime report -ESRCH to
+		 * userspace.
+		 */
+		if (!pid_has_task(pid, PIDTYPE_PID))
+			err = -ESRCH;
+	} while (read_seqcount_retry(&pid->pid_seq, seq));
 	if (err)
 		return err;
 
diff --git a/kernel/pid.c b/kernel/pid.c
index 4ac2ce46817f..bbca61f62faa 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -271,6 +271,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 	upid = pid->numbers + ns->level;
 	idr_preload(GFP_KERNEL);
 	spin_lock(&pidmap_lock);
+	seqcount_rwlock_init(&pid->pid_seq, &tasklist_lock);
 	if (!(ns->pid_allocated & PIDNS_ADDING))
 		goto out_unlock;
 	pidfs_add_pid(pid);
-- 
2.47.2





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux