1 /*        $NetBSD: work_fork.c,v 1.15 2020/10/10 13:41:14 christos Exp $        */
2 
3 /*
4  * work_fork.c - fork implementation for blocking worker child.
5  */
6 #include <config.h>
7 #include "ntp_workimpl.h"
8 
9 #ifdef WORK_FORK
10 #include <stdio.h>
11 #include <ctype.h>
12 #include <signal.h>
13 #include <sys/wait.h>
14 
15 #include "iosignal.h"
16 #include "ntp_stdlib.h"
17 #include "ntp_malloc.h"
18 #include "ntp_syslog.h"
19 #include "ntpd.h"
20 #include "ntp_io.h"
21 #include "ntp_assert.h"
22 #include "ntp_unixtime.h"
23 #include "ntp_worker.h"
24 
25 /* === variables === */
26           int                           worker_process;
27           addremove_io_fd_func          addremove_io_fd;
28 static    volatile int                  worker_sighup_received;
29 int       saved_argc = 0;
30 char      **saved_argv;
31 
32 /* === function prototypes === */
33 static    void                fork_blocking_child(blocking_child *);
34 static    RETSIGTYPE          worker_sighup(int);
35 static    void                send_worker_home_atexit(void);
36 static    void                cleanup_after_child(blocking_child *);
37 
38 /* === I/O helpers === */
39 /* Since we have signals enabled, there's a good chance that blocking IO
40  * via pipe suffers from EINTR -- and this goes for both directions.
41  * The next two wrappers will loop until either all the data is written
42  * or read, plus handling the EOF condition on read. They may return
43  * zero if no data was transferred at all, and effectively every return
44  * value that differs from the given transfer length signifies an error
45  * condition.
46  */
47 
48 static size_t
netread(int fd,void * vb,size_t l)49 netread(
50           int                 fd,
51           void *              vb,
52           size_t              l
53           )
54 {
55           char *              b = vb;
56           ssize_t             r;
57 
58           while (l) {
59                     r = read(fd, b, l);
60                     if (r > 0) {
61                               l -= r;
62                               b += r;
63                     } else if (r == 0 || errno != EINTR) {
64                               l = 0;
65                     }
66           }
67           return (size_t)(b - (char *)vb);
68 }
69 
70 
71 static size_t
netwrite(int fd,const void * vb,size_t l)72 netwrite(
73           int                 fd,
74           const void *        vb,
75           size_t              l
76           )
77 {
78           const char *        b = vb;
79           ssize_t             w;
80 
81           while (l) {
82                     w = write(fd, b, l);
83                     if (w > 0) {
84                               l -= w;
85                               b += w;
86                     } else if (errno != EINTR) {
87                               l = 0;
88                     }
89           }
90           return (size_t)(b - (const char *)vb);
91 }
92 
93 
94 #if defined(HAVE_DROPROOT)
95 extern int set_user_group_ids(void);
96 #endif
97 
98 /* === functions === */
99 /*
100  * exit_worker()
101  *
102  * On some systems _exit() is preferred to exit() for forked children.
103  * For example, http://netbsd.gw.com/cgi-bin/man-cgi?fork++NetBSD-5.0
104  * recommends _exit() to avoid double-flushing C runtime stream buffers
105  * and also to avoid calling the parent's atexit() routines in the
106  * child.  On those systems WORKER_CHILD_EXIT is _exit.  Since _exit
107  * bypasses CRT cleanup, fflush() files we know might have output
108  * buffered.
109  */
110 void
exit_worker(int exitcode)111 exit_worker(
112           int       exitcode
113           )
114 {
115           if (syslog_file != NULL)
116                     fflush(syslog_file);
117           fflush(stdout);
118           fflush(stderr);
119           WORKER_CHILD_EXIT (exitcode); /* space before ( required */
120 }
121 
122 
123 static RETSIGTYPE
worker_sighup(int sig)124 worker_sighup(
125           int sig
126           )
127 {
128           if (SIGHUP == sig)
129                     worker_sighup_received = 1;
130 }
131 
132 
133 int
worker_sleep(blocking_child * c,time_t seconds)134 worker_sleep(
135           blocking_child *    c,
136           time_t                        seconds
137           )
138 {
139           u_int sleep_remain;
140 
141           sleep_remain = (u_int)seconds;
142           do {
143                     if (!worker_sighup_received)
144                               sleep_remain = sleep(sleep_remain);
145                     if (worker_sighup_received) {
146                               TRACE(1, ("worker SIGHUP with %us left to sleep",
147                                           sleep_remain));
148                               worker_sighup_received = 0;
149                               return -1;
150                     }
151           } while (sleep_remain);
152 
153           return 0;
154 }
155 
156 
157 void
interrupt_worker_sleep(void)158 interrupt_worker_sleep(void)
159 {
160           u_int                         idx;
161           blocking_child *    c;
162           int                           rc;
163 
164           for (idx = 0; idx < blocking_children_alloc; idx++) {
165                     c = blocking_children[idx];
166 
167                     if (NULL == c || c->reusable == TRUE)
168                               continue;
169 
170                     rc = kill(c->pid, SIGHUP);
171                     if (rc < 0)
172                               msyslog(LOG_ERR,
173                                         "Unable to signal HUP to wake child pid %d: %m",
174                                         c->pid);
175           }
176 }
177 
178 
179 /*
180  * harvest_child_status() runs in the parent.
181  *
182  * Note the error handling -- this is an interaction with SIGCHLD.
183  * SIG_IGN on SIGCHLD on some OSes means do not wait but reap
184  * automatically. Since we're not really interested in the result code,
185  * we simply ignore the error.
186  */
187 static void
harvest_child_status(blocking_child * c)188 harvest_child_status(
189           blocking_child *    c
190           )
191 {
192           if (c->pid) {
193                     /* Wait on the child so it can finish terminating */
194                     if (waitpid(c->pid, NULL, 0) == c->pid)
195                               TRACE(4, ("harvested child %d\n", c->pid));
196                     else if (errno != ECHILD)
197                               msyslog(LOG_ERR, "error waiting on child %d: %m", c->pid);
198                     c->pid = 0;
199           }
200 }
201 
202 /*
203  * req_child_exit() runs in the parent.
204  */
205 int
req_child_exit(blocking_child * c)206 req_child_exit(
207           blocking_child *    c
208           )
209 {
210           if (-1 != c->req_write_pipe) {
211                     close(c->req_write_pipe);
212                     c->req_write_pipe = -1;
213                     return 0;
214           }
215           /* Closing the pipe forces the child to exit */
216           harvest_child_status(c);
217           return -1;
218 }
219 
220 
221 /*
222  * cleanup_after_child() runs in parent.
223  */
224 static void
cleanup_after_child(blocking_child * c)225 cleanup_after_child(
226           blocking_child *    c
227           )
228 {
229           harvest_child_status(c);
230           if (-1 != c->resp_read_pipe) {
231                     (*addremove_io_fd)(c->resp_read_pipe, c->ispipe, TRUE);
232                     close(c->resp_read_pipe);
233                     c->resp_read_pipe = -1;
234           }
235           c->resp_read_ctx = NULL;
236           DEBUG_INSIST(-1 == c->req_read_pipe);
237           DEBUG_INSIST(-1 == c->resp_write_pipe);
238           c->reusable = TRUE;
239 }
240 
241 
242 static void
send_worker_home_atexit(void)243 send_worker_home_atexit(void)
244 {
245           u_int                         idx;
246           blocking_child *    c;
247 
248           if (worker_process)
249                     return;
250 
251           for (idx = 0; idx < blocking_children_alloc; idx++) {
252                     c = blocking_children[idx];
253                     if (NULL == c)
254                               continue;
255                     req_child_exit(c);
256           }
257 }
258 
259 
260 int
send_blocking_req_internal(blocking_child * c,blocking_pipe_header * hdr,void * data)261 send_blocking_req_internal(
262           blocking_child *    c,
263           blocking_pipe_header *        hdr,
264           void *                        data
265           )
266 {
267           size_t    octets;
268           size_t    rc;
269 
270           DEBUG_REQUIRE(hdr != NULL);
271           DEBUG_REQUIRE(data != NULL);
272           DEBUG_REQUIRE(BLOCKING_REQ_MAGIC == hdr->magic_sig);
273 
274           if (-1 == c->req_write_pipe) {
275                     fork_blocking_child(c);
276                     DEBUG_INSIST(-1 != c->req_write_pipe);
277           }
278 
279           octets = sizeof(*hdr);
280           rc = netwrite(c->req_write_pipe, hdr, octets);
281 
282           if (rc == octets) {
283                     octets = hdr->octets - sizeof(*hdr);
284                     rc = netwrite(c->req_write_pipe, data, octets);
285                     if (rc == octets)
286                               return 0;
287           }
288 
289           msyslog(LOG_ERR,
290                     "send_blocking_req_internal: short write (%zu of %zu), %m",
291                     rc, octets);
292 
293           /* Fatal error.  Clean up the child process.  */
294           req_child_exit(c);
295           exit(1);  /* otherwise would be return -1 */
296 }
297 
298 
299 blocking_pipe_header *
receive_blocking_req_internal(blocking_child * c)300 receive_blocking_req_internal(
301           blocking_child *    c
302           )
303 {
304           blocking_pipe_header          hdr;
305           blocking_pipe_header *        req;
306           size_t                        rc;
307           size_t                        octets;
308 
309           DEBUG_REQUIRE(-1 != c->req_read_pipe);
310 
311           req = NULL;
312           rc = netread(c->req_read_pipe, &hdr, sizeof(hdr));
313 
314           if (0 == rc) {
315                     TRACE(4, ("parent closed request pipe, child %d terminating\n",
316                                 c->pid));
317           } else if (rc != sizeof(hdr)) {
318                     msyslog(LOG_ERR,
319                               "receive_blocking_req_internal: short header read (%zu of %zu), %m",
320                               rc, sizeof(hdr));
321           } else {
322                     INSIST(sizeof(hdr) < hdr.octets && hdr.octets < 4 * 1024);
323                     req = emalloc(hdr.octets);
324                     memcpy(req, &hdr, sizeof(*req));
325                     octets = hdr.octets - sizeof(hdr);
326                     rc = netread(c->req_read_pipe, (char *)(req + 1),
327                                    octets);
328 
329                     if (rc != octets)
330                               msyslog(LOG_ERR,
331                                         "receive_blocking_req_internal: short read (%zu of %zu), %m",
332                                         rc, octets);
333                     else if (BLOCKING_REQ_MAGIC != req->magic_sig)
334                               msyslog(LOG_ERR,
335                                         "receive_blocking_req_internal: packet header mismatch (0x%x)",
336                                         req->magic_sig);
337                     else
338                               return req;
339           }
340 
341           if (req != NULL)
342                     free(req);
343 
344           return NULL;
345 }
346 
347 
348 int
send_blocking_resp_internal(blocking_child * c,blocking_pipe_header * resp)349 send_blocking_resp_internal(
350           blocking_child *    c,
351           blocking_pipe_header *        resp
352           )
353 {
354           size_t    octets;
355           size_t    rc;
356 
357           DEBUG_REQUIRE(-1 != c->resp_write_pipe);
358 
359           octets = resp->octets;
360           rc = netwrite(c->resp_write_pipe, resp, octets);
361           free(resp);
362 
363           if (octets == rc)
364                     return 0;
365 
366           TRACE(1, ("send_blocking_resp_internal: short write (%zu of %zu), %m\n",
367                       rc, octets));
368           return -1;
369 }
370 
371 
372 blocking_pipe_header *
receive_blocking_resp_internal(blocking_child * c)373 receive_blocking_resp_internal(
374           blocking_child *    c
375           )
376 {
377           blocking_pipe_header          hdr;
378           blocking_pipe_header *        resp;
379           size_t                        rc;
380           size_t                        octets;
381 
382           DEBUG_REQUIRE(c->resp_read_pipe != -1);
383 
384           resp = NULL;
385           rc = netread(c->resp_read_pipe, &hdr, sizeof(hdr));
386 
387           if (0 == rc) {
388                     /* this is the normal child exited indication */
389           } else if (rc != sizeof(hdr)) {
390                     TRACE(1, ("receive_blocking_resp_internal: short header read (%zu of %zu), %m\n",
391                                 rc, sizeof(hdr)));
392           } else if (BLOCKING_RESP_MAGIC != hdr.magic_sig) {
393                     TRACE(1, ("receive_blocking_resp_internal: header mismatch (0x%x)\n",
394                                 hdr.magic_sig));
395           } else {
396                     INSIST(sizeof(hdr) < hdr.octets &&
397                            hdr.octets < 16 * 1024);
398                     resp = emalloc(hdr.octets);
399                     memcpy(resp, &hdr, sizeof(*resp));
400                     octets = hdr.octets - sizeof(hdr);
401                     rc = netread(c->resp_read_pipe, (char *)(resp + 1),
402                                    octets);
403 
404                     if (rc != octets)
405                               TRACE(1, ("receive_blocking_resp_internal: short read (%zu of %zu), %m\n",
406                                           rc, octets));
407                     else
408                               return resp;
409           }
410 
411           cleanup_after_child(c);
412 
413           if (resp != NULL)
414                     free(resp);
415 
416           return NULL;
417 }
418 
419 
420 #if defined(HAVE_DROPROOT) && defined(WORK_FORK)
421 void
fork_deferred_worker(void)422 fork_deferred_worker(void)
423 {
424           u_int                         idx;
425           blocking_child *    c;
426 
427           REQUIRE(droproot && root_dropped);
428 
429           for (idx = 0; idx < blocking_children_alloc; idx++) {
430                     c = blocking_children[idx];
431                     if (NULL == c)
432                               continue;
433                     if (-1 != c->req_write_pipe && 0 == c->pid)
434                               fork_blocking_child(c);
435           }
436 }
437 #endif
438 
439 #if HAVE_SETPROCTITLE == 0
440 static void
setproctitle(const char * fmt,...)441 setproctitle(const char *fmt, ...)
442 {
443           va_list ap;
444           char b1[128];
445           int argcc, argvlen, l;
446 
447           if (saved_argc == 0)
448                     return;
449 
450           va_start(ap, fmt);
451           vsnprintf(b1, sizeof(b1), fmt, ap);
452           va_end(ap);
453 
454           /* Clear argv */
455           for (argvlen = 0, argcc = 0; argcc < saved_argc; argcc++) {
456                     l = strlen(saved_argv[argcc]);
457                     argvlen += l + 1;
458                     memset(saved_argv[argcc], 0, l);
459           }
460           l = snprintf(saved_argv[0], argvlen, "ntpd: %s", b1);
461           for (argcc = 1; argcc < saved_argc; argcc++)
462                     saved_argv[argcc] = &saved_argv[0][l];
463 }
464 #endif
465 
466 static void
fork_blocking_child(blocking_child * c)467 fork_blocking_child(
468           blocking_child *    c
469           )
470 {
471           static int          atexit_installed;
472           static int          blocking_pipes[4] = { -1, -1, -1, -1 };
473           int                 rc;
474           int                 was_pipe;
475           int                 is_pipe;
476           int                 saved_errno = 0;
477           int                 childpid;
478           int                 keep_fd;
479           int                 fd;
480 
481           /*
482            * parent and child communicate via a pair of pipes.
483            *
484            * 0 child read request
485            * 1 parent write request
486            * 2 parent read response
487            * 3 child write response
488            */
489           if (-1 == c->req_write_pipe) {
490                     rc = pipe_socketpair(&blocking_pipes[0], &was_pipe);
491                     if (0 != rc) {
492                               saved_errno = errno;
493                     } else {
494                               rc = pipe_socketpair(&blocking_pipes[2], &is_pipe);
495                               if (0 != rc) {
496                                         saved_errno = errno;
497                                         close(blocking_pipes[0]);
498                                         close(blocking_pipes[1]);
499                               } else {
500                                         INSIST(was_pipe == is_pipe);
501                               }
502                     }
503                     if (0 != rc) {
504                               errno = saved_errno;
505                               msyslog(LOG_ERR, "unable to create worker pipes: %m");
506                               exit(1);
507                     }
508 
509                     /*
510                      * Move the descriptors the parent will keep open out of the
511                      * low descriptors preferred by C runtime buffered FILE *.
512                      */
513                     c->req_write_pipe = move_fd(blocking_pipes[1]);
514                     c->resp_read_pipe = move_fd(blocking_pipes[2]);
515                     /*
516                      * wake any worker child on orderly shutdown of the
517                      * daemon so that it can notice the broken pipes and
518                      * go away promptly.
519                      */
520                     if (!atexit_installed) {
521                               atexit(&send_worker_home_atexit);
522                               atexit_installed = TRUE;
523                     }
524           }
525 
526 #if defined(HAVE_DROPROOT) && !defined(NEED_EARLY_FORK)
527           /* defer the fork until after root is dropped */
528           if (droproot && !root_dropped)
529                     return;
530 #endif
531           if (syslog_file != NULL)
532                     fflush(syslog_file);
533           fflush(stdout);
534           fflush(stderr);
535 
536           /* [BUG 3050] setting SIGCHLD to SIG_IGN likely causes unwanted
537            * or undefined effects. We don't do it and leave SIGCHLD alone.
538            */
539           /* signal_no_reset(SIGCHLD, SIG_IGN); */
540 
541           childpid = fork();
542           if (-1 == childpid) {
543                     msyslog(LOG_ERR, "unable to fork worker: %m");
544                     exit(1);
545           }
546 
547           if (childpid) {
548                     /* this is the parent */
549                     TRACE(1, ("forked worker child (pid %d)\n", childpid));
550                     c->pid = childpid;
551                     c->ispipe = is_pipe;
552 
553                     /* close the child's pipe descriptors. */
554                     close(blocking_pipes[0]);
555                     close(blocking_pipes[3]);
556 
557                     memset(blocking_pipes, -1, sizeof(blocking_pipes));
558 
559                     /* wire into I/O loop */
560                     (*addremove_io_fd)(c->resp_read_pipe, is_pipe, FALSE);
561 
562                     /* wait until child is done */
563                     rc = netread(c->resp_read_pipe, &rc, sizeof(rc));
564 
565                     return;             /* parent returns */
566           }
567 
568           /*
569            * The parent gets the child pid as the return value of fork().
570            * The child must work for it.
571            */
572           c->pid = getpid();
573           worker_process = TRUE;
574 
575           /*
576            * Change the process name of the child to avoid confusion
577            * about ntpd trunning twice.
578            */
579           setproctitle("asynchronous dns resolver");
580 
581           /*
582            * In the child, close all files except stdin, stdout, stderr,
583            * and the two child ends of the pipes.
584            */
585           DEBUG_INSIST(-1 == c->req_read_pipe);
586           DEBUG_INSIST(-1 == c->resp_write_pipe);
587           c->req_read_pipe = blocking_pipes[0];
588           c->resp_write_pipe = blocking_pipes[3];
589 
590           kill_asyncio(0);
591 
592           /* Tell parent we are ready */
593           rc = netwrite(c->resp_write_pipe, &rc, sizeof(rc));
594 
595           closelog();
596           if (syslog_file != NULL) {
597                     fclose(syslog_file);
598                     syslog_file = NULL;
599                     syslogit = TRUE;
600           }
601           keep_fd = max(c->req_read_pipe, c->resp_write_pipe);
602           for (fd = 3; fd < keep_fd; fd++)
603                     if (fd != c->req_read_pipe &&
604                         fd != c->resp_write_pipe)
605                               close(fd);
606           close_all_beyond(keep_fd);
607           /*
608            * We get signals from refclock serial I/O on NetBSD in the
609            * worker if we do not reset SIGIO's handler to the default.
610            * It is not conditionalized for NetBSD alone because on
611            * systems where it is not needed, it is harmless, and that
612            * allows us to handle unknown others with NetBSD behavior.
613            * [Bug 1386]
614            */
615 #if defined(USE_SIGIO)
616           signal_no_reset(SIGIO, SIG_DFL);
617 #elif defined(USE_SIGPOLL)
618           signal_no_reset(SIGPOLL, SIG_DFL);
619 #endif
620           signal_no_reset(SIGHUP, worker_sighup);
621           init_logging("ntp_intres", 0, FALSE);
622           setup_logfile(NULL);
623 
624 #ifdef HAVE_DROPROOT
625           (void) set_user_group_ids();
626 #endif
627 
628           /*
629            * And now back to the portable code
630            */
631           exit_worker(blocking_child_common(c));
632 }
633 
634 
worker_global_lock(int inOrOut)635 void worker_global_lock(int inOrOut)
636 {
637           (void)inOrOut;
638 }
639 
640 #else     /* !WORK_FORK follows */
641 char work_fork_nonempty_compilation_unit;
642 #endif
643