1/****************************************************************************
2**
3** Copyright (C) 2016 Intel Corporation.
4** Copyright (C) 2015 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com
5**
6** Permission is hereby granted, free of charge, to any person obtaining a copy
7** of this software and associated documentation files (the "Software"), to deal
8** in the Software without restriction, including without limitation the rights
9** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10** copies of the Software, and to permit persons to whom the Software is
11** furnished to do so, subject to the following conditions:
12**
13** The above copyright notice and this permission notice shall be included in
14** all copies or substantial portions of the Software.
15**
16** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22** THE SOFTWARE.
23**
24****************************************************************************/
25
26#ifndef _GNU_SOURCE
27# define _GNU_SOURCE
28#endif
29
30#include "forkfd.h"
31
32#include <sys/types.h>
33#if defined(__OpenBSD__) || defined(__NetBSD__)
34# include <sys/param.h>
35#endif
36#include <sys/time.h>
37#include <sys/resource.h>
38#include <sys/wait.h>
39#include <assert.h>
40#include <errno.h>
41#include <pthread.h>
42#include <signal.h>
43#include <stdlib.h>
44#include <string.h>
45#include <time.h>
46#include <unistd.h>
47
48#ifdef __linux__
49# define HAVE_WAIT4 1
50# if defined(__BIONIC__) || (defined(__GLIBC__) && (__GLIBC__ << 8) + __GLIBC_MINOR__ >= 0x208 && \
51 (!defined(__UCLIBC__) || ((__UCLIBC_MAJOR__ << 16) + (__UCLIBC_MINOR__ << 8) + __UCLIBC_SUBLEVEL__ > 0x90201)))
52# include <sys/eventfd.h>
53# ifdef EFD_CLOEXEC
54# define HAVE_EVENTFD 1
55# endif
56# endif
57# if defined(__BIONIC__) || (defined(__GLIBC__) && (__GLIBC__ << 8) + __GLIBC_MINOR__ >= 0x209 && \
58 (!defined(__UCLIBC__) || ((__UCLIBC_MAJOR__ << 16) + (__UCLIBC_MINOR__ << 8) + __UCLIBC_SUBLEVEL__ > 0x90201)))
59# define HAVE_PIPE2 1
60# endif
61#endif
62#if defined(__FreeBSD__) && __FreeBSD__ >= 9
63# include <sys/procdesc.h>
64#endif
65
66#if _POSIX_VERSION-0 >= 200809L || _XOPEN_VERSION-0 >= 500
67# define HAVE_WAITID 1
68#endif
69#if !defined(WEXITED) || !defined(WNOWAIT)
70# undef HAVE_WAITID
71#endif
72
73#if (defined(__FreeBSD__) && defined(__FreeBSD_version) && __FreeBSD_version >= 1000032) || \
74 (defined(__OpenBSD__) && OpenBSD >= 201505) || \
75 (defined(__NetBSD__) && __NetBSD_Version__ >= 600000000)
76# define HAVE_PIPE2 1
77#endif
78#if defined(__FreeBSD__) || defined(__DragonFly__) || defined(__FreeBSD_kernel__) || \
79 defined(__OpenBSD__) || defined(__NetBSD__) || defined(__APPLE__)
80# define HAVE_WAIT4 1
81#endif
82
83#if defined(__APPLE__)
84/* Up until OS X 10.7, waitid(P_ALL, ...) will return success, but will not
85 * fill in the details of the dead child. That means waitid is not useful to us.
86 * Therefore, we only enable waitid() support if we're targetting OS X 10.8 or
87 * later.
88 */
89# include <Availability.h>
90# include <AvailabilityMacros.h>
91# if MAC_OS_X_VERSION_MIN_REQUIRED <= 1070
92# define HAVE_BROKEN_WAITID 1
93# endif
94#endif
95
96#ifndef FFD_ATOMIC_RELAXED
97# include "forkfd_gcc.h"
98#endif
99
100#define CHILDREN_IN_SMALL_ARRAY 16
101#define CHILDREN_IN_BIG_ARRAY 256
102#define sizeofarray(array) (sizeof(array)/sizeof(array[0]))
103#define EINTR_LOOP(ret, call) \
104 do { \
105 ret = call; \
106 } while (ret == -1 && errno == EINTR)
107
108struct pipe_payload
109{
110 struct forkfd_info info;
111 struct rusage rusage;
112};
113
114typedef struct process_info
115{
116 ffd_atomic_int pid;
117 int deathPipe;
118} ProcessInfo;
119
120struct BigArray;
121typedef struct Header
122{
123 ffd_atomic_pointer(struct BigArray) nextArray;
124 ffd_atomic_int busyCount;
125} Header;
126
127typedef struct BigArray
128{
129 Header header;
130 ProcessInfo entries[CHILDREN_IN_BIG_ARRAY];
131} BigArray;
132
133typedef struct SmallArray
134{
135 Header header;
136 ProcessInfo entries[CHILDREN_IN_SMALL_ARRAY];
137} SmallArray;
138static SmallArray children;
139
140static struct sigaction old_sigaction;
141static pthread_once_t forkfd_initialization = PTHREAD_ONCE_INIT;
142static ffd_atomic_int forkfd_status = FFD_ATOMIC_INIT(0);
143
144#ifdef HAVE_BROKEN_WAITID
145static int waitid_works = 0;
146#else
147static const int waitid_works = 1;
148#endif
149
150static ProcessInfo *tryAllocateInSection(Header *header, ProcessInfo entries[], int maxCount)
151{
152 /* we use ACQUIRE here because the signal handler might have released the PID */
153 int busyCount = ffd_atomic_add_fetch(&header->busyCount, 1, FFD_ATOMIC_ACQUIRE);
154 if (busyCount <= maxCount) {
155 /* there's an available entry in this section, find it and take it */
156 int i;
157 for (i = 0; i < maxCount; ++i) {
158 /* if the PID is 0, it's free; mark it as used by swapping it with -1 */
159 int expected_pid = 0;
160 if (ffd_atomic_compare_exchange(&entries[i].pid, &expected_pid,
161 -1, FFD_ATOMIC_RELAXED, FFD_ATOMIC_RELAXED))
162 return &entries[i];
163 }
164 }
165
166 /* there isn't an available entry, undo our increment */
167 (void)ffd_atomic_add_fetch(&header->busyCount, -1, FFD_ATOMIC_RELAXED);
168 return NULL;
169}
170
171static ProcessInfo *allocateInfo(Header **header)
172{
173 Header *currentHeader = &children.header;
174
175 /* try to find an available entry in the small array first */
176 ProcessInfo *info =
177 tryAllocateInSection(currentHeader, children.entries, sizeofarray(children.entries));
178
179 /* go on to the next arrays */
180 while (info == NULL) {
181 BigArray *array = ffd_atomic_load(&currentHeader->nextArray, FFD_ATOMIC_ACQUIRE);
182 if (array == NULL) {
183 /* allocate an array and try to use it */
184 BigArray *allocatedArray = (BigArray *)calloc(1, sizeof(BigArray));
185 if (allocatedArray == NULL)
186 return NULL;
187
188 if (ffd_atomic_compare_exchange(&currentHeader->nextArray, &array, allocatedArray,
189 FFD_ATOMIC_RELEASE, FFD_ATOMIC_ACQUIRE)) {
190 /* success */
191 array = allocatedArray;
192 } else {
193 /* failed, the atomic updated 'array' */
194 free(allocatedArray);
195 }
196 }
197
198 currentHeader = &array->header;
199 info = tryAllocateInSection(currentHeader, array->entries, sizeofarray(array->entries));
200 }
201
202 *header = currentHeader;
203 return info;
204}
205
206#ifdef HAVE_WAITID
207static int isChildReady(pid_t pid, siginfo_t *info)
208{
209 info->si_pid = 0;
210 return waitid(P_PID, pid, info, WEXITED | WNOHANG | WNOWAIT) == 0 && info->si_pid == pid;
211}
212#endif
213
214static void convertStatusToForkfdInfo(int status, struct forkfd_info *info)
215{
216 if (WIFEXITED(status)) {
217 info->code = CLD_EXITED;
218 info->status = WEXITSTATUS(status);
219 } else if (WIFSIGNALED(status)) {
220 info->code = CLD_KILLED;
221# ifdef WCOREDUMP
222 if (WCOREDUMP(status))
223 info->code = CLD_DUMPED;
224# endif
225 info->status = WTERMSIG(status);
226 }
227}
228
229static int tryReaping(pid_t pid, struct pipe_payload *payload)
230{
231 /* reap the child */
232#if defined(HAVE_WAIT4)
233 int status;
234 if (wait4(pid, &status, WNOHANG, &payload->rusage) <= 0)
235 return 0;
236 convertStatusToForkfdInfo(status, &payload->info);
237#else
238# if defined(HAVE_WAITID)
239 if (waitid_works) {
240 /* we have waitid(2), which gets us some payload values on some systems */
241 siginfo_t info;
242 info.si_pid = 0;
243 int ret = waitid(P_PID, pid, &info, WEXITED | WNOHANG) == 0 && info.si_pid == pid;
244 if (!ret)
245 return ret;
246
247 payload->info.code = info.si_code;
248 payload->info.status = info.si_status;
249# ifdef __linux__
250 payload->rusage.ru_utime.tv_sec = info.si_utime / CLOCKS_PER_SEC;
251 payload->rusage.ru_utime.tv_usec = info.si_utime % CLOCKS_PER_SEC;
252 payload->rusage.ru_stime.tv_sec = info.si_stime / CLOCKS_PER_SEC;
253 payload->rusage.ru_stime.tv_usec = info.si_stime % CLOCKS_PER_SEC;
254# endif
255 return 1;
256 }
257# endif // HAVE_WAITID
258 int status;
259 if (waitpid(pid, &status, WNOHANG) <= 0)
260 return 0; // child did not change state
261 convertStatusToForkfdInfo(status, &payload->info);
262#endif // !HAVE_WAIT4
263
264 return 1;
265}
266
267static void freeInfo(Header *header, ProcessInfo *entry)
268{
269 entry->deathPipe = -1;
270 ffd_atomic_store(&entry->pid, 0, FFD_ATOMIC_RELEASE);
271
272 (void)ffd_atomic_add_fetch(&header->busyCount, -1, FFD_ATOMIC_RELEASE);
273 assert(header->busyCount >= 0);
274}
275
276static void notifyAndFreeInfo(Header *header, ProcessInfo *entry,
277 const struct pipe_payload *payload)
278{
279 ssize_t ret;
280 EINTR_LOOP(ret, write(entry->deathPipe, payload, sizeof(*payload)));
281 EINTR_LOOP(ret, close(entry->deathPipe));
282
283 freeInfo(header, entry);
284}
285
286static void reapChildProcesses();
287static void sigchld_handler(int signum, siginfo_t *handler_info, void *handler_context)
288{
289 /*
290 * This is a signal handler, so we need to be careful about which functions
291 * we can call. See the full, official listing in the POSIX.1-2008
292 * specification at:
293 * http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
294 *
295 * The handler_info and handler_context parameters may not be valid, if
296 * we're a chained handler from another handler that did not use
297 * SA_SIGINFO. Therefore, we must obtain the siginfo ourselves directly by
298 * calling waitid.
299 *
300 * But we pass them anyway. Let's call the chained handler first, while
301 * those two arguments have a chance of being correct.
302 */
303 if (old_sigaction.sa_handler != SIG_IGN && old_sigaction.sa_handler != SIG_DFL) {
304 if (old_sigaction.sa_flags & SA_SIGINFO)
305 old_sigaction.sa_sigaction(signum, handler_info, handler_context);
306 else
307 old_sigaction.sa_handler(signum);
308 }
309
310 if (ffd_atomic_load(&forkfd_status, FFD_ATOMIC_RELAXED) == 1) {
311 int saved_errno = errno;
312 reapChildProcesses();
313 errno = saved_errno;
314 }
315}
316
317static inline void reapChildProcesses()
318{
319 /* is this one of our children? */
320 BigArray *array;
321 siginfo_t info;
322 struct pipe_payload payload;
323 int i;
324
325 memset(&info, 0, sizeof info);
326 memset(&payload, 0, sizeof payload);
327
328#ifdef HAVE_WAITID
329 if (waitid_works) {
330 /* be optimistic: try to see if we can get the child that exited */
331search_next_child:
332 /* waitid returns -1 ECHILD if there are no further children at all;
333 * it returns 0 and sets si_pid to 0 if there are children but they are not ready
334 * to be waited (we're passing WNOHANG). We should not get EINTR because
335 * we're passing WNOHANG and we should definitely not get EINVAL or anything else.
336 * That means we can actually ignore the return code and only inspect si_pid.
337 */
338 info.si_pid = 0;
339 waitid(P_ALL, 0, &info, WNOHANG | WNOWAIT | WEXITED);
340 if (info.si_pid == 0) {
341 /* there are no further un-waited-for children, so we can just exit.
342 */
343 return;
344 }
345
346 for (i = 0; i < (int)sizeofarray(children.entries); ++i) {
347 /* acquire the child first: swap the PID with -1 to indicate it's busy */
348 int pid = info.si_pid;
349 if (ffd_atomic_compare_exchange(&children.entries[i].pid, &pid, -1,
350 FFD_ATOMIC_ACQUIRE, FFD_ATOMIC_RELAXED)) {
351 /* this is our child, send notification and free up this entry */
352 /* ### FIXME: what if tryReaping returns false? */
353 if (tryReaping(pid, &payload))
354 notifyAndFreeInfo(&children.header, &children.entries[i], &payload);
355 goto search_next_child;
356 }
357 }
358
359 /* try the arrays */
360 array = ffd_atomic_load(&children.header.nextArray, FFD_ATOMIC_ACQUIRE);
361 while (array != NULL) {
362 for (i = 0; i < (int)sizeofarray(array->entries); ++i) {
363 int pid = info.si_pid;
364 if (ffd_atomic_compare_exchange(&array->entries[i].pid, &pid, -1,
365 FFD_ATOMIC_ACQUIRE, FFD_ATOMIC_RELAXED)) {
366 /* this is our child, send notification and free up this entry */
367 /* ### FIXME: what if tryReaping returns false? */
368 if (tryReaping(pid, &payload))
369 notifyAndFreeInfo(&array->header, &array->entries[i], &payload);
370 goto search_next_child;
371 }
372 }
373
374 array = ffd_atomic_load(&array->header.nextArray, FFD_ATOMIC_ACQUIRE);
375 }
376
377 /* if we got here, we couldn't find this child in our list. That means this child
378 * belongs to one of the chained SIGCHLD handlers. However, there might be another
379 * child that exited and does belong to us, so we need to check each one individually.
380 */
381 }
382#endif
383
384 for (i = 0; i < (int)sizeofarray(children.entries); ++i) {
385 int pid = ffd_atomic_load(&children.entries[i].pid, FFD_ATOMIC_ACQUIRE);
386 if (pid <= 0)
387 continue;
388#ifdef HAVE_WAITID
389 if (waitid_works) {
390 /* The child might have been reaped by the block above in another thread,
391 * so first check if it's ready and, if it is, lock it */
392 if (!isChildReady(pid, &info) ||
393 !ffd_atomic_compare_exchange(&children.entries[i].pid, &pid, -1,
394 FFD_ATOMIC_RELAXED, FFD_ATOMIC_RELAXED))
395 continue;
396 }
397#endif
398 if (tryReaping(pid, &payload)) {
399 /* this is our child, send notification and free up this entry */
400 notifyAndFreeInfo(&children.header, &children.entries[i], &payload);
401 }
402 }
403
404 /* try the arrays */
405 array = ffd_atomic_load(&children.header.nextArray, FFD_ATOMIC_ACQUIRE);
406 while (array != NULL) {
407 for (i = 0; i < (int)sizeofarray(array->entries); ++i) {
408 int pid = ffd_atomic_load(&array->entries[i].pid, FFD_ATOMIC_ACQUIRE);
409 if (pid <= 0)
410 continue;
411#ifdef HAVE_WAITID
412 if (waitid_works) {
413 /* The child might have been reaped by the block above in another thread,
414 * so first check if it's ready and, if it is, lock it */
415 if (!isChildReady(pid, &info) ||
416 !ffd_atomic_compare_exchange(&array->entries[i].pid, &pid, -1,
417 FFD_ATOMIC_RELAXED, FFD_ATOMIC_RELAXED))
418 continue;
419 }
420#endif
421 if (tryReaping(pid, &payload)) {
422 /* this is our child, send notification and free up this entry */
423 notifyAndFreeInfo(&array->header, &array->entries[i], &payload);
424 }
425 }
426
427 array = ffd_atomic_load(&array->header.nextArray, FFD_ATOMIC_ACQUIRE);
428 }
429}
430
431static void ignore_sigpipe()
432{
433#ifdef O_NOSIGPIPE
434 static ffd_atomic_int done = FFD_ATOMIC_INIT(0);
435 if (ffd_atomic_load(&done, FFD_ATOMIC_RELAXED))
436 return;
437#endif
438
439 struct sigaction action;
440 memset(&action, 0, sizeof action);
441 sigemptyset(&action.sa_mask);
442 action.sa_handler = SIG_IGN;
443 action.sa_flags = 0;
444 sigaction(SIGPIPE, &action, NULL);
445
446#ifdef O_NOSIGPIPE
447 ffd_atomic_store(&done, 1, FFD_ATOMIC_RELAXED);
448#endif
449}
450
451static void forkfd_initialize()
452{
453#if defined(HAVE_BROKEN_WAITID)
454 pid_t pid = fork();
455 if (pid == 0) {
456 _exit(0);
457 } else if (pid > 0) {
458 siginfo_t info;
459 waitid(P_ALL, 0, &info, WNOWAIT | WEXITED);
460 waitid_works = (info.si_pid != 0);
461 info.si_pid = 0;
462
463 // now really reap the child
464 waitid(P_PID, pid, &info, WEXITED);
465 waitid_works = waitid_works && (info.si_pid != 0);
466 }
467#endif
468
469 /* install our signal handler */
470 struct sigaction action;
471 memset(&action, 0, sizeof action);
472 sigemptyset(&action.sa_mask);
473 action.sa_flags = SA_NOCLDSTOP | SA_SIGINFO;
474 action.sa_sigaction = sigchld_handler;
475
476 /* ### RACE CONDITION
477 * The sigaction function does a memcpy from an internal buffer
478 * to old_sigaction, which we use in the SIGCHLD handler. If a
479 * SIGCHLD is delivered before or during that memcpy, the handler will
480 * see an inconsistent state.
481 *
482 * There is no solution. pthread_sigmask doesn't work here because the
483 * signal could be delivered to another thread.
484 */
485 sigaction(SIGCHLD, &action, &old_sigaction);
486
487#ifndef O_NOSIGPIPE
488 /* disable SIGPIPE too */
489 ignore_sigpipe();
490#endif
491
492#ifndef __GNUC__
493 atexit(cleanup);
494#endif
495
496 ffd_atomic_store(&forkfd_status, 1, FFD_ATOMIC_RELAXED);
497}
498
499#ifdef __GNUC__
500__attribute((destructor, unused)) static void cleanup();
501#endif
502
503static void cleanup()
504{
505 BigArray *array;
506 /* This function is not thread-safe!
507 * It must only be called when the process is shutting down.
508 * At shutdown, we expect no one to be calling forkfd(), so we don't
509 * need to be thread-safe with what is done there.
510 *
511 * But SIGCHLD might be delivered to any thread, including this one.
512 * There's no way to prevent that. The correct solution would be to
513 * cooperatively delete. We don't do that.
514 */
515 if (ffd_atomic_load(&forkfd_status, FFD_ATOMIC_RELAXED) == 0)
516 return;
517
518 /* notify the handler that we're no longer in operation */
519 ffd_atomic_store(&forkfd_status, 0, FFD_ATOMIC_RELAXED);
520
521 /* free any arrays we might have */
522 array = ffd_atomic_load(&children.header.nextArray, FFD_ATOMIC_ACQUIRE);
523 while (array != NULL) {
524 BigArray *next = ffd_atomic_load(&array->header.nextArray, FFD_ATOMIC_ACQUIRE);
525 free(array);
526 array = next;
527 }
528}
529
530static int create_pipe(int filedes[], int flags)
531{
532 int ret = -1;
533#ifdef HAVE_PIPE2
534 /* use pipe2(2) whenever possible, since it can thread-safely create a
535 * cloexec pair of pipes. Without it, we have a race condition setting
536 * FD_CLOEXEC
537 */
538
539# ifdef O_NOSIGPIPE
540 /* try first with O_NOSIGPIPE */
541 ret = pipe2(filedes, O_CLOEXEC | O_NOSIGPIPE);
542 if (ret == -1) {
543 /* O_NOSIGPIPE not supported, ignore SIGPIPE */
544 ignore_sigpipe();
545 }
546# endif
547 if (ret == -1)
548 ret = pipe2(filedes, O_CLOEXEC);
549 if (ret == -1)
550 return ret;
551
552 if ((flags & FFD_CLOEXEC) == 0)
553 fcntl(filedes[0], F_SETFD, 0);
554#else
555 ret = pipe(filedes);
556 if (ret == -1)
557 return ret;
558
559 fcntl(filedes[1], F_SETFD, FD_CLOEXEC);
560 if (flags & FFD_CLOEXEC)
561 fcntl(filedes[0], F_SETFD, FD_CLOEXEC);
562#endif
563 if (flags & FFD_NONBLOCK)
564 fcntl(filedes[0], F_SETFL, fcntl(filedes[0], F_GETFL) | O_NONBLOCK);
565 return ret;
566}
567
568#if defined(FORKFD_NO_SPAWNFD) && defined(__FreeBSD__) && __FreeBSD__ >= 9
569# if __FreeBSD__ == 9
570/* PROCDESC is an optional feature in the kernel and wasn't enabled
571 * by default on FreeBSD 9. So we need to check for it at runtime. */
572static ffd_atomic_int system_has_forkfd = FFD_ATOMIC_INIT(1);
573# else
574/* On FreeBSD 10, PROCDESC was enabled by default. On v11, it's not an option
575 * anymore and can't be disabled. */
576static const int system_has_forkfd = 1;
577# endif
578
579static int system_forkfd(int flags, pid_t *ppid)
580{
581 int ret;
582 pid_t pid;
583 pid = pdfork(&ret, PD_DAEMON);
584 if (__builtin_expect(pid == -1, 0)) {
585# if __FreeBSD__ == 9
586 if (errno == ENOSYS) {
587 /* PROCDESC wasn't compiled into the kernel: don't try it again. */
588 ffd_atomic_store(&system_has_forkfd, 0, FFD_ATOMIC_RELAXED);
589 }
590# endif
591 return -1;
592 }
593 if (pid == 0) {
594 /* child process */
595 return FFD_CHILD_PROCESS;
596 }
597
598 /* parent process */
599 if (flags & FFD_CLOEXEC)
600 fcntl(ret, F_SETFD, FD_CLOEXEC);
601 if (flags & FFD_NONBLOCK)
602 fcntl(ret, F_SETFL, fcntl(ret, F_GETFL) | O_NONBLOCK);
603 if (ppid)
604 *ppid = pid;
605 return ret;
606}
607#else
608static const int system_has_forkfd = 0;
609static int system_forkfd(int flags, pid_t *ppid)
610{
611 (void)flags;
612 (void)ppid;
613 return -1;
614}
615#endif
616
617#ifndef FORKFD_NO_FORKFD
618/**
619 * @brief forkfd returns a file descriptor representing a child process
620 * @return a file descriptor, or -1 in case of failure
621 *
622 * forkfd() creates a file descriptor that can be used to be notified of when a
623 * child process exits. This file descriptor can be monitored using select(2),
624 * poll(2) or similar mechanisms.
625 *
626 * The @a flags parameter can contain the following values ORed to change the
627 * behaviour of forkfd():
628 *
629 * @li @c FFD_NONBLOCK Set the O_NONBLOCK file status flag on the new open file
630 * descriptor. Using this flag saves extra calls to fnctl(2) to achieve the same
631 * result.
632 *
633 * @li @c FFD_CLOEXEC Set the close-on-exec (FD_CLOEXEC) flag on the new file
634 * descriptor. You probably want to set this flag, since forkfd() does not work
635 * if the original parent process dies.
636 *
637 * The file descriptor returned by forkfd() supports the following operations:
638 *
639 * @li read(2) When the child process exits, then the buffer supplied to
640 * read(2) is used to return information about the status of the child in the
641 * form of one @c siginfo_t structure. The buffer must be at least
642 * sizeof(siginfo_t) bytes. The return value of read(2) is the total number of
643 * bytes read.
644 *
645 * @li poll(2), select(2) (and similar) The file descriptor is readable (the
646 * select(2) readfds argument; the poll(2) POLLIN flag) if the child has exited
647 * or signalled via SIGCHLD.
648 *
649 * @li close(2) When the file descriptor is no longer required it should be closed.
650 */
651int forkfd(int flags, pid_t *ppid)
652{
653 Header *header;
654 ProcessInfo *info;
655 pid_t pid;
656 int fd = -1;
657 int death_pipe[2];
658 int sync_pipe[2];
659 int ret;
660#ifdef __linux__
661 int efd;
662#endif
663
664 if (system_has_forkfd) {
665 ret = system_forkfd(flags, ppid);
666 if (system_has_forkfd)
667 return ret;
668 }
669
670 (void) pthread_once(&forkfd_initialization, forkfd_initialize);
671
672 info = allocateInfo(&header);
673 if (info == NULL) {
674 errno = ENOMEM;
675 return -1;
676 }
677
678 /* create the pipes before we fork */
679 if (create_pipe(death_pipe, flags) == -1)
680 goto err_free; /* failed to create the pipes, pass errno */
681
682#ifdef HAVE_EVENTFD
683 /* try using an eventfd, which consumes less resources */
684 efd = eventfd(0, EFD_CLOEXEC);
685 if (efd == -1)
686#endif
687 {
688 /* try a pipe */
689 if (create_pipe(sync_pipe, FFD_CLOEXEC) == -1) {
690 /* failed both at eventfd and pipe; fail and pass errno */
691 goto err_close;
692 }
693 }
694
695 /* now fork */
696 pid = fork();
697 if (pid == -1)
698 goto err_close2; /* failed to fork, pass errno */
699 if (ppid)
700 *ppid = pid;
701
702 /*
703 * We need to store the child's PID in the info structure, so
704 * the SIGCHLD handler knows that this child is present and it
705 * knows the writing end of the pipe to pass information on.
706 * However, the child process could exit before we stored the
707 * information (or the handler could run for other children exiting).
708 * We prevent that from happening by blocking the child process in
709 * a read(2) until we're finished storing the information.
710 */
711 if (pid == 0) {
712 /* this is the child process */
713 /* first, wait for the all clear */
714#ifdef HAVE_EVENTFD
715 if (efd != -1) {
716 eventfd_t val64;
717 EINTR_LOOP(ret, eventfd_read(efd, &val64));
718 EINTR_LOOP(ret, close(efd));
719 } else
720#endif
721 {
722 char c;
723 EINTR_LOOP(ret, close(sync_pipe[1]));
724 EINTR_LOOP(ret, read(sync_pipe[0], &c, sizeof c));
725 EINTR_LOOP(ret, close(sync_pipe[0]));
726 }
727
728 /* now close the pipes and return to the caller */
729 EINTR_LOOP(ret, close(death_pipe[0]));
730 EINTR_LOOP(ret, close(death_pipe[1]));
731 fd = FFD_CHILD_PROCESS;
732 } else {
733 /* parent process */
734 info->deathPipe = death_pipe[1];
735 fd = death_pipe[0];
736 ffd_atomic_store(&info->pid, pid, FFD_ATOMIC_RELEASE);
737
738 /* release the child */
739#ifdef HAVE_EVENTFD
740 if (efd != -1) {
741 eventfd_t val64 = 42;
742 EINTR_LOOP(ret, eventfd_write(efd, val64));
743 EINTR_LOOP(ret, close(efd));
744 } else
745#endif
746 {
747 /*
748 * Usually, closing would be enough to make read(2) return and the child process
749 * continue. We need to write here: another thread could be calling forkfd at the
750 * same time, which means auxpipe[1] might be open in another child process.
751 */
752 EINTR_LOOP(ret, close(sync_pipe[0]));
753 EINTR_LOOP(ret, write(sync_pipe[1], "", 1));
754 EINTR_LOOP(ret, close(sync_pipe[1]));
755 }
756 }
757
758 return fd;
759
760err_close2:
761#ifdef HAVE_EVENTFD
762 if (efd != -1) {
763 EINTR_LOOP(ret, close(efd));
764 } else
765#endif
766 {
767 EINTR_LOOP(ret, close(sync_pipe[0]));
768 EINTR_LOOP(ret, close(sync_pipe[1]));
769 }
770err_close:
771 EINTR_LOOP(ret, close(death_pipe[0]));
772 EINTR_LOOP(ret, close(death_pipe[1]));
773err_free:
774 /* free the info pointer */
775 freeInfo(header, info);
776 return -1;
777}
778#endif // FORKFD_NO_FORKFD
779
780#if _POSIX_SPAWN > 0 && !defined(FORKFD_NO_SPAWNFD)
781int spawnfd(int flags, pid_t *ppid, const char *path, const posix_spawn_file_actions_t *file_actions,
782 posix_spawnattr_t *attrp, char *const argv[], char *const envp[])
783{
784 Header *header;
785 ProcessInfo *info;
786 struct pipe_payload payload;
787 pid_t pid;
788 int death_pipe[2];
789 int ret = -1;
790 /* we can only do work if we have a way to start the child in stopped mode;
791 * otherwise, we have a major race condition. */
792
793 assert(!system_has_forkfd);
794
795 (void) pthread_once(&forkfd_initialization, forkfd_initialize);
796
797 info = allocateInfo(&header);
798 if (info == NULL) {
799 errno = ENOMEM;
800 goto out;
801 }
802
803 /* create the pipe before we spawn */
804 if (create_pipe(death_pipe, flags) == -1)
805 goto err_free; /* failed to create the pipes, pass errno */
806
807 /* start the process */
808 if (flags & FFD_SPAWN_SEARCH_PATH) {
809 /* use posix_spawnp */
810 if (posix_spawnp(&pid, path, file_actions, attrp, argv, envp) != 0)
811 goto err_close;
812 } else {
813 if (posix_spawn(&pid, path, file_actions, attrp, argv, envp) != 0)
814 goto err_close;
815 }
816
817 if (ppid)
818 *ppid = pid;
819
820 /* Store the child's PID in the info structure.
821 */
822 info->deathPipe = death_pipe[1];
823 ffd_atomic_store(&info->pid, pid, FFD_ATOMIC_RELEASE);
824
825 /* check if the child has already exited */
826 if (tryReaping(pid, &payload))
827 notifyAndFreeInfo(header, info, &payload);
828
829 ret = death_pipe[0];
830 return ret;
831
832err_close:
833 EINTR_LOOP(ret, close(death_pipe[0]));
834 EINTR_LOOP(ret, close(death_pipe[1]));
835
836err_free:
837 /* free the info pointer */
838 freeInfo(header, info);
839
840out:
841 return -1;
842}
843#endif // _POSIX_SPAWN && !FORKFD_NO_SPAWNFD
844
845
846int forkfd_wait(int ffd, forkfd_info *info, struct rusage *rusage)
847{
848 struct pipe_payload payload;
849 int ret;
850
851 if (system_has_forkfd) {
852#if defined(__FreeBSD__) && __FreeBSD__ >= 9
853 pid_t pid;
854 int status;
855 int options = WEXITED;
856
857 ret = pdgetpid(ffd, &pid);
858 if (ret == -1)
859 return ret;
860 ret = fcntl(ffd, F_GETFL);
861 if (ret == -1)
862 return ret;
863 options |= (ret & O_NONBLOCK) ? WNOHANG : 0;
864 ret = wait4(pid, &status, options, rusage);
865 if (ret != -1 && info)
866 convertStatusToForkfdInfo(status, info);
867 return ret == -1 ? -1 : 0;
868#endif
869 }
870
871 ret = read(ffd, &payload, sizeof(payload));
872 if (ret == -1)
873 return ret; /* pass errno, probably EINTR, EBADF or EWOULDBLOCK */
874
875 assert(ret == sizeof(payload));
876 if (info)
877 *info = payload.info;
878 if (rusage)
879 *rusage = payload.rusage;
880
881 return 0; /* success */
882}
883
884
885int forkfd_close(int ffd)
886{
887 return close(ffd);
888}
889