check_unsafe_exec() sets fs->in_exec under cred_guard_mutex, then execve() paths clear fs->in_exec lockless. This is fine if exec succeeds, but if it fails we have the following race: T1 sets fs->in_exec = 1, fails, drops cred_guard_mutex T2 sets fs->in_exec = 1 T1 clears fs->in_exec T2 continues with fs->in_exec == 0 Change fs/exec.c to clear fs->in_exec with cred_guard_mutex held. Reported-by: syzbot+1c486d0b62032c82a968@xxxxxxxxxxxxxxxxxxxxxxxxx Closes: https://lore.kernel.org/all/67dc67f0.050a0220.25ae54.001f.GAE@xxxxxxxxxx/ Cc: stable@xxxxxxxxxxxxxxx Signed-off-by: Oleg Nesterov <oleg@xxxxxxxxxx> --- fs/exec.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 506cd411f4ac..17047210be46 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1229,13 +1229,12 @@ int begin_new_exec(struct linux_binprm * bprm) */ bprm->point_of_no_return = true; - /* - * Make this the only thread in the thread group. - */ + /* Make this the only thread in the thread group */ retval = de_thread(me); if (retval) goto out; - + /* see the comment in check_unsafe_exec() */ + current->fs->in_exec = 0; /* * Cancel any io_uring activity across execve */ @@ -1497,6 +1496,8 @@ static void free_bprm(struct linux_binprm *bprm) } free_arg_pages(bprm); if (bprm->cred) { + /* in case exec fails before de_thread() succeeds */ + current->fs->in_exec = 0; mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } @@ -1618,6 +1619,10 @@ static void check_unsafe_exec(struct linux_binprm *bprm) * suid exec because the differently privileged task * will be able to manipulate the current directory, etc. * It would be nice to force an unshare instead... + * + * Otherwise we set fs->in_exec = 1 to deny clone(CLONE_FS) + * from another sub-thread until de_thread() succeeds, this + * state is protected by cred_guard_mutex we hold. */ n_fs = 1; spin_lock(&p->fs->lock); @@ -1862,7 +1867,6 @@ static int bprm_execve(struct linux_binprm *bprm) sched_mm_cid_after_execve(current); /* execve succeeded */ - current->fs->in_exec = 0; current->in_execve = 0; rseq_execve(current); user_events_execve(current); @@ -1881,7 +1885,6 @@ static int bprm_execve(struct linux_binprm *bprm) force_fatal_sig(SIGSEGV); sched_mm_cid_after_execve(current); - current->fs->in_exec = 0; current->in_execve = 0; return retval; -- 2.25.1.362.g51ebf55