There has always been a small race condition between the time a children status is checked, and the arrival of a SIGCHLD that would alert us of their demise, hopefully interrupt poll(). To close the gap, add the reading side of a pipe to the poll and use the signal handler to write to it for each child. Suggested=by: Phillip Wood <phillip.wood123@xxxxxxxxx> Signed-off-by: Carlo Marcelo Arenas Belón <carenas@xxxxxxxxx> --- Implements the "self pipe" trick Hillip suggested. I tried to make self healing and optional, as I also presume the race condition it is meant to address is very unlikely. An obvious disadvantage (at least of this implementation), is that it actually doubles the number of events that need to be handled for each children process on most cases (ex: when `poll()` gets interrupted) I suspect that if fixing that last race condition is so important with the current foundation, it might be better to reintroduce some sort of timeout to poll(), so that they will be cleared periodically. I had a prototype (only the bare minimum) that I thought was more efficient and that would instead remove completely the need for a signal handler which I would post (only for RFC) later. daemon.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 9 deletions(-) diff --git a/daemon.c b/daemon.c index d1be61fd57..d3b9421575 100644 --- a/daemon.c +++ b/daemon.c @@ -912,14 +912,17 @@ static void handle(int incoming, struct sockaddr *addr, socklen_t addrlen) add_child(&cld, addr, addrlen); } -static void child_handler(int signo UNUSED) +int poll_pipe[2] = { -1, -1 }; + +static void child_handler(int signo) { /* - * Otherwise empty handler because systemcalls will get interrupted - * upon signal receipt * SysV needs the handler to be rearmed */ signal(SIGCHLD, child_handler); + + if (poll_pipe[1] >= 0) + write(poll_pipe[1], &signo, 1); } static int set_reuse_addr(int sockfd) @@ -1121,20 +1124,43 @@ static void socksetup(struct string_list *listen_addr, int listen_port, struct s static int service_loop(struct socketlist *socklist) { struct pollfd *pfd; + unsigned long nfds = 1 + socklist->nr; + + ALLOC_ARRAY(pfd, nfds); + if (!pipe(poll_pipe)) { + for (int i = 0; i < 2; i++) { + int flags; + + flags = fcntl(poll_pipe[i], F_GETFD, 0); + if (flags >= 0) + fcntl(poll_pipe[i], F_SETFD, flags | FD_CLOEXEC); + + flags = fcntl(poll_pipe[i], F_GETFL, 0); + if (flags < 0 || fcntl(poll_pipe[i], F_SETFL, + flags | O_NONBLOCK) == -1) { + close(poll_pipe[0]); + close(poll_pipe[1]); + poll_pipe[0] = poll_pipe[1] = -1; + break; + } + } + } + pfd[0].fd = poll_pipe[0]; + pfd[0].events = POLLIN; - CALLOC_ARRAY(pfd, socklist->nr); - - for (size_t i = 0; i < socklist->nr; i++) { - pfd[i].fd = socklist->list[i]; + for (size_t i = 1; i < nfds; i++) { + pfd[i].fd = socklist->list[i - 1]; pfd[i].events = POLLIN; } signal(SIGCHLD, child_handler); for (;;) { + int nevents, scratch; + check_dead_children(); - if (poll(pfd, socklist->nr, -1) < 0) { + if ((nevents = poll(pfd, nfds, -1)) <= 0) { if (errno != EINTR) { logerror("Poll failed, resuming: %s", strerror(errno)); @@ -1143,7 +1169,17 @@ static int service_loop(struct socketlist *socklist) continue; } - for (size_t i = 0; i < socklist->nr; i++) { + if ((pfd[0].revents & POLLIN) && pfd[0].fd != -1) { + if (nevents == 1 && read(pfd[0].fd, &scratch, 1) > 0) + continue; + else if (!read(pfd[0].fd, &scratch, 1)) { + close(pfd[0].fd); + pfd[0].fd = -1; + } + nevents--; + } + + for (size_t i = 1; nevents && i < nfds; i++) { if (pfd[i].revents & POLLIN) { union { struct sockaddr sa; @@ -1165,6 +1201,7 @@ static int service_loop(struct socketlist *socklist) } } handle(incoming, &ss.sa, sslen); + nevents--; } } } -- 2.50.0.131.gcf6f63ea6b.dirty