1 /*        $NetBSD: ntp_intres.c,v 1.13 2024/08/18 20:47:13 christos Exp $       */
2 
3 /*
4  * ntp_intres.c - Implements a generic blocking worker child or thread,
5  *                    initially to provide a nonblocking solution for DNS
6  *                    name to address lookups available with getaddrinfo().
7  *
8  * This is a new implementation as of 2009 sharing the filename and
9  * very little else with the prior implementation, which used a
10  * temporary file to receive a single set of requests from the parent,
11  * and a NTP mode 7 authenticated request to push back responses.
12  *
13  * A primary goal in rewriting this code was the need to support the
14  * pool configuration directive's requirement to retrieve multiple
15  * addresses resolving a single name, which has previously been
16  * satisfied with blocking resolver calls from the ntpd mainline code.
17  *
18  * A secondary goal is to provide a generic mechanism for other
19  * blocking operations to be delegated to a worker using a common
20  * model for both Unix and Windows ntpd.  ntp_worker.c, work_fork.c,
21  * and work_thread.c implement the generic mechanism.  This file
22  * implements the two current consumers, getaddrinfo_sometime() and the
23  * presently unused getnameinfo_sometime().
24  *
25  * Both routines deliver results to a callback and manage memory
26  * allocation, meaning there is no freeaddrinfo_sometime().
27  *
28  * The initial implementation for Unix uses a pair of unidirectional
29  * pipes, one each for requests and responses, connecting the forked
30  * blocking child worker with the ntpd mainline.  The threaded code
31  * uses arrays of pointers to queue requests and responses.
32  *
33  * The parent drives the process, including scheduling sleeps between
34  * retries.
35  *
36  * Memory is managed differently for a child process, which mallocs
37  * request buffers to read from the pipe into, whereas the threaded
38  * code mallocs a copy of the request to hand off to the worker via
39  * the queueing array.  The resulting request buffer is free()d by
40  * platform-independent code.  A wrinkle is the request needs to be
41  * available to the requestor during response processing.
42  *
43  * Response memory allocation is also platform-dependent.  With a
44  * separate process and pipes, the response is free()d after being
45  * written to the pipe.  With threads, the same memory is handed
46  * over and the requestor frees it after processing is completed.
47  *
48  * The code should be generalized to support threads on Unix using
49  * much of the same code used for Windows initially.
50  *
51  */
52 #ifdef HAVE_CONFIG_H
53 # include <config.h>
54 #endif
55 
56 #include "ntp_workimpl.h"
57 
58 #ifdef WORKER
59 
60 #include <stdio.h>
61 #include <ctype.h>
62 #include <signal.h>
63 
64 /**/
65 #ifdef HAVE_SYS_TYPES_H
66 # include <sys/types.h>
67 #endif
68 #ifdef HAVE_NETINET_IN_H
69 #include <netinet/in.h>
70 #endif
71 #include <arpa/inet.h>
72 /**/
73 #ifdef HAVE_SYS_PARAM_H
74 # include <sys/param.h>
75 #endif
76 
77 #if !defined(HAVE_RES_INIT) && defined(HAVE___RES_INIT)
78 # define HAVE_RES_INIT
79 #endif
80 
81 #if defined(HAVE_RESOLV_H) && defined(HAVE_RES_INIT)
82 # ifdef HAVE_ARPA_NAMESER_H
83 #  include <arpa/nameser.h> /* DNS HEADER struct */
84 # endif
85 # ifdef HAVE_NETDB_H
86 #  include <netdb.h>
87 # endif
88 # include <resolv.h>
89 #endif
90 
91 #include "ntp.h"
92 #include "ntp_debug.h"
93 #include "ntp_malloc.h"
94 #include "ntp_syslog.h"
95 #include "ntp_unixtime.h"
96 #include "ntp_intres.h"
97 #include "intreswork.h"
98 
99 
100 /*
101  * Following are implementations of getaddrinfo_sometime() and
102  * getnameinfo_sometime().  Each is implemented in three routines:
103  *
104  * getaddrinfo_sometime()               getnameinfo_sometime()
105  * blocking_getaddrinfo()               blocking_getnameinfo()
106  * getaddrinfo_sometime_complete()      getnameinfo_sometime_complete()
107  *
108  * The first runs in the parent and marshalls (or serializes) request
109  * parameters into a request blob which is processed in the child by
110  * the second routine, blocking_*(), which serializes the results into
111  * a response blob unpacked by the third routine, *_complete(), which
112  * calls the callback routine provided with the request and frees
113  * _request_ memory allocated by the first routine.  Response memory
114  * is managed by the code which calls the *_complete routines.
115  */
116 
117 
118 /* === typedefs === */
119 typedef struct blocking_gai_req_tag {   /* marshalled args */
120           size_t                        octets;
121           u_int                         dns_idx;
122           time_t                        scheduled;
123           time_t                        earliest;
124           int                           retry;
125           struct addrinfo               hints;
126           u_int                         qflags;
127           gai_sometime_callback         callback;
128           void *                        context;
129           size_t                        nodesize;
130           size_t                        servsize;
131 } blocking_gai_req;
132 
133 typedef struct blocking_gai_resp_tag {
134           size_t                        octets;
135           int                           retcode;
136           int                           retry;
137           int                           gai_errno; /* for EAI_SYSTEM case */
138           int                           ai_count;
139           /*
140            * Followed by ai_count struct addrinfo and then ai_count
141            * sockaddr_u and finally the canonical name strings.
142            */
143 } blocking_gai_resp;
144 
145 typedef struct blocking_gni_req_tag {
146           size_t                        octets;
147           u_int                         dns_idx;
148           time_t                        scheduled;
149           time_t                        earliest;
150           int                           retry;
151           size_t                        hostoctets;
152           size_t                        servoctets;
153           int                           flags;
154           gni_sometime_callback         callback;
155           void *                        context;
156           sockaddr_u                    socku;
157 } blocking_gni_req;
158 
159 typedef struct blocking_gni_resp_tag {
160           size_t                        octets;
161           int                           retcode;
162           int                           gni_errno; /* for EAI_SYSTEM case */
163           int                           retry;
164           size_t                        hostoctets;
165           size_t                        servoctets;
166           /*
167            * Followed by hostoctets bytes of null-terminated host,
168            * then servoctets bytes of null-terminated service.
169            */
170 } blocking_gni_resp;
171 
172 /* per-DNS-worker state in parent */
173 typedef struct dnschild_ctx_tag {
174           u_int     index;
175           time_t    next_dns_timeslot;
176 } dnschild_ctx;
177 
178 /* per-DNS-worker state in worker */
179 typedef struct dnsworker_ctx_tag {
180           blocking_child *    c;
181           time_t                        ignore_scheduled_before;
182 #ifdef HAVE_RES_INIT
183           time_t    next_res_init;
184 #endif
185 } dnsworker_ctx;
186 
187 
188 /* === variables === */
189 dnschild_ctx **               dnschild_contexts;            /* parent */
190 u_int                         dnschild_contexts_alloc;
191 dnsworker_ctx **    dnsworker_contexts;           /* child */
192 u_int                         dnsworker_contexts_alloc;
193 
194 #ifdef HAVE_RES_INIT
195 static    time_t              next_res_init;
196 #endif
197 
198 
199 /* === forward declarations === */
200 static    u_int               reserve_dnschild_ctx(void);
201 static    u_int               get_dnschild_ctx(void);
202 static    dnsworker_ctx *     get_worker_context(blocking_child *, u_int);
203 static    void                scheduled_sleep(time_t, time_t,
204                                                   dnsworker_ctx *);
205 static    void                manage_dns_retry_interval(time_t *, time_t *,
206                                                               int *, time_t *,
207                                                               int/*BOOL*/);
208 static    int                 should_retry_dns(int, int);
209 #ifdef HAVE_RES_INIT
210 static    void                reload_resolv_conf(dnsworker_ctx *);
211 #else
212 # define            reload_resolv_conf(wc)                  \
213           do {                                                        \
214                     (void)(wc);                                       \
215           } while (FALSE)
216 #endif
217 static    void                getaddrinfo_sometime_complete(blocking_work_req,
218                                                                   void *, size_t,
219                                                                   void *);
220 static    void                getnameinfo_sometime_complete(blocking_work_req,
221                                                                   void *, size_t,
222                                                                   void *);
223 
224 
225 /* === functions === */
226 /*
227  * getaddrinfo_sometime - uses blocking child to call getaddrinfo then
228  *                              invokes provided callback completion function.
229  */
230 int
getaddrinfo_sometime_ex(const char * node,const char * service,const struct addrinfo * hints,int retry,gai_sometime_callback callback,void * context,u_int qflags)231 getaddrinfo_sometime_ex(
232           const char *                  node,
233           const char *                  service,
234           const struct addrinfo *       hints,
235           int                           retry,
236           gai_sometime_callback         callback,
237           void *                        context,
238           u_int                         qflags
239           )
240 {
241           blocking_gai_req *  gai_req;
242           u_int                         idx;
243           dnschild_ctx *                child_ctx;
244           size_t                        req_size;
245           size_t                        nodesize;
246           size_t                        servsize;
247           time_t                        now;
248 
249           REQUIRE(NULL != node);
250           if (NULL != hints) {
251                     REQUIRE(0 == hints->ai_addrlen);
252                     REQUIRE(NULL == hints->ai_addr);
253                     REQUIRE(NULL == hints->ai_canonname);
254                     REQUIRE(NULL == hints->ai_next);
255           }
256 
257           idx = get_dnschild_ctx();
258           child_ctx = dnschild_contexts[idx];
259 
260           nodesize = strlen(node) + 1;
261           servsize = strlen(service) + 1;
262           req_size = sizeof(*gai_req) + nodesize + servsize;
263 
264           gai_req = emalloc_zero(req_size);
265 
266           gai_req->octets = req_size;
267           gai_req->dns_idx = idx;
268           now = time(NULL);
269           gai_req->scheduled = now;
270           gai_req->earliest = max(now, child_ctx->next_dns_timeslot);
271           child_ctx->next_dns_timeslot = gai_req->earliest;
272           if (hints != NULL)
273                     gai_req->hints = *hints;
274           gai_req->retry = retry;
275           gai_req->callback = callback;
276           gai_req->context = context;
277           gai_req->nodesize = nodesize;
278           gai_req->servsize = servsize;
279           gai_req->qflags = qflags;
280 
281           memcpy((char *)gai_req + sizeof(*gai_req), node, nodesize);
282           memcpy((char *)gai_req + sizeof(*gai_req) + nodesize, service,
283                  servsize);
284 
285           if (queue_blocking_request(
286                     BLOCKING_GETADDRINFO,
287                     gai_req,
288                     req_size,
289                     &getaddrinfo_sometime_complete,
290                     gai_req)) {
291 
292                     msyslog(LOG_ERR, "unable to queue getaddrinfo request");
293                     errno = EFAULT;
294                     return -1;
295           }
296 
297           return 0;
298 }
299 
300 int
blocking_getaddrinfo(blocking_child * c,blocking_pipe_header * req)301 blocking_getaddrinfo(
302           blocking_child *    c,
303           blocking_pipe_header *        req
304           )
305 {
306           blocking_gai_req *  gai_req;
307           dnsworker_ctx *               worker_ctx;
308           blocking_pipe_header *        resp;
309           blocking_gai_resp * gai_resp;
310           char *                        node;
311           char *                        service;
312           struct addrinfo *   ai_res;
313           struct addrinfo *   ai;
314           struct addrinfo *   serialized_ai;
315           size_t                        canons_octets;
316           size_t                        this_octets;
317           size_t                        resp_octets;
318           char *                        cp;
319           time_t                        time_now;
320 
321           gai_req = (void *)((char *)req + sizeof(*req));
322           node = (char *)gai_req + sizeof(*gai_req);
323           service = node + gai_req->nodesize;
324 
325           worker_ctx = get_worker_context(c, gai_req->dns_idx);
326           scheduled_sleep(gai_req->scheduled, gai_req->earliest,
327                               worker_ctx);
328           reload_resolv_conf(worker_ctx);
329 
330           /*
331            * Take a shot at the final size, better to overestimate
332            * at first and then realloc to a smaller size.
333            */
334 
335           resp_octets = sizeof(*resp) + sizeof(*gai_resp) +
336                           16 * (sizeof(struct addrinfo) +
337                                   sizeof(sockaddr_u)) +
338                           256;
339           resp = emalloc_zero(resp_octets);
340           gai_resp = (void *)(resp + 1);
341 
342           TRACE(2, ("blocking_getaddrinfo given node %s serv %s fam %d flags %x\n",
343                       node, service, gai_req->hints.ai_family,
344                       gai_req->hints.ai_flags));
345 #ifdef DEBUG
346           if (debug >= 2)
347                     fflush(stdout);
348 #endif
349           ai_res = NULL;
350           gai_resp->retcode = getaddrinfo(node, service, &gai_req->hints,
351                                                   &ai_res);
352           gai_resp->retry = gai_req->retry;
353 #ifdef EAI_SYSTEM
354           if (EAI_SYSTEM == gai_resp->retcode)
355                     gai_resp->gai_errno = errno;
356 #endif
357           canons_octets = 0;
358 
359           if (0 == gai_resp->retcode) {
360                     ai = ai_res;
361                     while (NULL != ai) {
362                               gai_resp->ai_count++;
363                               if (ai->ai_canonname)
364                                         canons_octets += strlen(ai->ai_canonname) + 1;
365                               ai = ai->ai_next;
366                     }
367                     /*
368                      * If this query succeeded only after retrying, DNS may have
369                      * just become responsive.  Ignore previously-scheduled
370                      * retry sleeps once for each pending request, similar to
371                      * the way scheduled_sleep() does when its worker_sleep()
372                      * is interrupted.
373                      */
374                     if (gai_resp->retry > INITIAL_DNS_RETRY) {
375                               time_now = time(NULL);
376                               worker_ctx->ignore_scheduled_before = time_now;
377                               TRACE(1, ("DNS success after retry, ignoring sleeps scheduled before now (%s)\n",
378                                           humantime(time_now)));
379                     }
380           }
381 
382           /*
383            * Our response consists of a header, followed by ai_count
384            * addrinfo structs followed by ai_count sockaddr_storage
385            * structs followed by the canonical names.
386            */
387           gai_resp->octets = sizeof(*gai_resp)
388                                   + gai_resp->ai_count
389                                         * (sizeof(gai_req->hints)
390                                            + sizeof(sockaddr_u))
391                                   + canons_octets;
392 
393           resp_octets = sizeof(*resp) + gai_resp->octets;
394           resp = erealloc(resp, resp_octets);
395           gai_resp = (void *)(resp + 1);
396 
397           /* cp serves as our current pointer while serializing */
398           cp = (void *)(gai_resp + 1);
399           canons_octets = 0;
400 
401           if (0 == gai_resp->retcode) {
402                     ai = ai_res;
403                     while (NULL != ai) {
404                               memcpy(cp, ai, sizeof(*ai));
405                               serialized_ai = (void *)cp;
406                               cp += sizeof(*ai);
407 
408                               /* transform ai_canonname into offset */
409                               if (NULL != ai->ai_canonname) {
410                                         serialized_ai->ai_canonname = (char *)canons_octets;
411                                         canons_octets += strlen(ai->ai_canonname) + 1;
412                               }
413 
414                               /* leave fixup of ai_addr pointer for receiver */
415 
416                               ai = ai->ai_next;
417                     }
418 
419                     ai = ai_res;
420                     while (NULL != ai) {
421                               INSIST(ai->ai_addrlen <= sizeof(sockaddr_u));
422                               memcpy(cp, ai->ai_addr, ai->ai_addrlen);
423                               cp += sizeof(sockaddr_u);
424 
425                               ai = ai->ai_next;
426                     }
427 
428                     ai = ai_res;
429                     while (NULL != ai) {
430                               if (NULL != ai->ai_canonname) {
431                                         this_octets = strlen(ai->ai_canonname) + 1;
432                                         memcpy(cp, ai->ai_canonname, this_octets);
433                                         cp += this_octets;
434                               }
435 
436                               ai = ai->ai_next;
437                     }
438                     freeaddrinfo(ai_res);
439           }
440 
441           /*
442            * make sure our walk and earlier calc match
443            */
444           DEBUG_INSIST((size_t)(cp - (char *)resp) == resp_octets);
445 
446           if (queue_blocking_response(c, resp, resp_octets, req)) {
447                     msyslog(LOG_ERR, "blocking_getaddrinfo can not queue response");
448                     return -1;
449           }
450 
451           return 0;
452 }
453 
454 int
getaddrinfo_sometime(const char * node,const char * service,const struct addrinfo * hints,int retry,gai_sometime_callback callback,void * context)455 getaddrinfo_sometime(
456           const char *                  node,
457           const char *                  service,
458           const struct addrinfo *       hints,
459           int                           retry,
460           gai_sometime_callback         callback,
461           void *                        context
462           )
463 {
464           return getaddrinfo_sometime_ex(node, service, hints, retry,
465                                                callback, context, 0);
466 }
467 
468 
469 static void
getaddrinfo_sometime_complete(blocking_work_req rtype,void * context,size_t respsize,void * resp)470 getaddrinfo_sometime_complete(
471           blocking_work_req   rtype,
472           void *                        context,
473           size_t                        respsize,
474           void *                        resp
475           )
476 {
477           blocking_gai_req *  gai_req;
478           blocking_gai_resp * gai_resp;
479           dnschild_ctx *                child_ctx;
480           struct addrinfo *   ai;
481           struct addrinfo *   next_ai;
482           sockaddr_u *                  psau;
483           char *                        node;
484           char *                        service;
485           char *                        canon_start;
486           time_t                        time_now;
487           int                           again, noerr;
488           int                           af;
489           const char *                  fam_spec;
490           int                           i;
491 
492           gai_req = context;
493           gai_resp = resp;
494 
495           DEBUG_REQUIRE(BLOCKING_GETADDRINFO == rtype);
496           DEBUG_REQUIRE(respsize == gai_resp->octets);
497 
498           node = (char *)gai_req + sizeof(*gai_req);
499           service = node + gai_req->nodesize;
500 
501           child_ctx = dnschild_contexts[gai_req->dns_idx];
502 
503           if (0 == gai_resp->retcode) {
504                     /*
505                      * If this query succeeded only after retrying, DNS may have
506                      * just become responsive.
507                      */
508                     if (gai_resp->retry > INITIAL_DNS_RETRY) {
509                               time_now = time(NULL);
510                               child_ctx->next_dns_timeslot = time_now;
511                               TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
512                                           gai_req->dns_idx, humantime(time_now)));
513                     }
514           } else {
515                     noerr = !!(gai_req->qflags & GAIR_F_IGNDNSERR);
516                     again = noerr || should_retry_dns(
517                                                   gai_resp->retcode, gai_resp->gai_errno);
518                     /*
519                      * exponential backoff of DNS retries to 64s
520                      */
521                     if (gai_req->retry > 0 && again) {
522                               /* log the first retry only */
523                               if (INITIAL_DNS_RETRY == gai_req->retry)
524                                         NLOG(NLOG_SYSINFO) {
525                                                   af = gai_req->hints.ai_family;
526                                                   fam_spec = (AF_INET6 == af)
527                                                                    ? " (AAAA)"
528                                                                    : (AF_INET == af)
529                                                                            ? " (A)"
530                                                                            : "";
531 #ifdef EAI_SYSTEM
532                                                   if (EAI_SYSTEM == gai_resp->retcode) {
533                                                             errno = gai_resp->gai_errno;
534                                                             msyslog(LOG_INFO,
535                                                                       "retrying DNS %s%s: EAI_SYSTEM %d: %m",
536                                                                       node, fam_spec,
537                                                                       gai_resp->gai_errno);
538                                                   } else
539 #endif
540                                                             msyslog(LOG_INFO,
541                                                                       "retrying DNS %s%s: %s (%d)",
542                                                                       node, fam_spec,
543                                                                       gai_strerror(gai_resp->retcode),
544                                                                       gai_resp->retcode);
545                                         }
546                               manage_dns_retry_interval(
547                                         &gai_req->scheduled, &gai_req->earliest,
548                                         &gai_req->retry, &child_ctx->next_dns_timeslot,
549                                         noerr);
550                               if (!queue_blocking_request(
551                                                   BLOCKING_GETADDRINFO,
552                                                   gai_req,
553                                                   gai_req->octets,
554                                                   &getaddrinfo_sometime_complete,
555                                                   gai_req))
556                                         return;
557                               else
558                                         msyslog(LOG_ERR,
559                                                   "unable to retry hostname %s",
560                                                   node);
561                     }
562           }
563 
564           /*
565            * fixup pointers in returned addrinfo array
566            */
567           ai = (void *)((char *)gai_resp + sizeof(*gai_resp));
568           next_ai = NULL;
569           for (i = gai_resp->ai_count - 1; i >= 0; i--) {
570                     ai[i].ai_next = next_ai;
571                     next_ai = &ai[i];
572           }
573 
574           psau = (void *)((char *)ai + gai_resp->ai_count * sizeof(*ai));
575           canon_start = (char *)psau + gai_resp->ai_count * sizeof(*psau);
576 
577           for (i = 0; i < gai_resp->ai_count; i++) {
578                     if (NULL != ai[i].ai_addr)
579                               ai[i].ai_addr = &psau->sa;
580                     psau++;
581                     if (NULL != ai[i].ai_canonname)
582                               ai[i].ai_canonname += (size_t)canon_start;
583           }
584 
585           ENSURE((char *)psau == canon_start);
586 
587           if (!gai_resp->ai_count)
588                     ai = NULL;
589 
590           (*gai_req->callback)(gai_resp->retcode, gai_resp->gai_errno,
591                                    gai_req->context, node, service,
592                                    &gai_req->hints, ai);
593 
594           free(gai_req);
595           /* gai_resp is part of block freed by process_blocking_resp() */
596 }
597 
598 
599 #ifdef TEST_BLOCKING_WORKER
gai_test_callback(int rescode,int gai_errno,void * context,const char * name,const char * service,const struct addrinfo * hints,const struct addrinfo * ai_res)600 void gai_test_callback(int rescode, int gai_errno, void *context, const char *name, const char *service, const struct addrinfo *hints, const struct addrinfo *ai_res)
601 {
602           sockaddr_u addr;
603 
604           if (rescode) {
605                     TRACE(1, ("gai_test_callback context %p error rescode %d %s serv %s\n",
606                                 context, rescode, name, service));
607                     return;
608           }
609           while (!rescode && NULL != ai_res) {
610                     ZERO_SOCK(&addr);
611                     memcpy(&addr, ai_res->ai_addr, ai_res->ai_addrlen);
612                     TRACE(1, ("ctx %p fam %d addr %s canon '%s' type %s at %p ai_addr %p ai_next %p\n",
613                                 context,
614                                 AF(&addr),
615                                 stoa(&addr),
616                                 (ai_res->ai_canonname)
617                                     ? ai_res->ai_canonname
618                                     : "",
619                                 (SOCK_DGRAM == ai_res->ai_socktype)
620                                     ? "DGRAM"
621                                     : (SOCK_STREAM == ai_res->ai_socktype)
622                                             ? "STREAM"
623                                             : "(other)",
624                                 ai_res,
625                                 ai_res->ai_addr,
626                                 ai_res->ai_next));
627 
628                     getnameinfo_sometime((sockaddr_u *)ai_res->ai_addr, 128, 32, 0, gni_test_callback, context);
629 
630                     ai_res = ai_res->ai_next;
631           }
632 }
633 #endif    /* TEST_BLOCKING_WORKER */
634 
635 
636 int
getnameinfo_sometime(sockaddr_u * psau,size_t hostoctets,size_t servoctets,int flags,gni_sometime_callback callback,void * context)637 getnameinfo_sometime(
638           sockaddr_u *                  psau,
639           size_t                        hostoctets,
640           size_t                        servoctets,
641           int                           flags,
642           gni_sometime_callback         callback,
643           void *                        context
644           )
645 {
646           blocking_gni_req *  gni_req;
647           u_int                         idx;
648           dnschild_ctx *                child_ctx;
649           time_t                        time_now;
650 
651           REQUIRE(hostoctets);
652           REQUIRE(hostoctets + servoctets < 1024);
653 
654           idx = get_dnschild_ctx();
655           child_ctx = dnschild_contexts[idx];
656 
657           gni_req = emalloc_zero(sizeof(*gni_req));
658 
659           gni_req->octets = sizeof(*gni_req);
660           gni_req->dns_idx = idx;
661           time_now = time(NULL);
662           gni_req->scheduled = time_now;
663           gni_req->earliest = max(time_now, child_ctx->next_dns_timeslot);
664           child_ctx->next_dns_timeslot = gni_req->earliest;
665           memcpy(&gni_req->socku, psau, SOCKLEN(psau));
666           gni_req->hostoctets = hostoctets;
667           gni_req->servoctets = servoctets;
668           gni_req->flags = flags;
669           gni_req->retry = INITIAL_DNS_RETRY;
670           gni_req->callback = callback;
671           gni_req->context = context;
672 
673           if (queue_blocking_request(
674                     BLOCKING_GETNAMEINFO,
675                     gni_req,
676                     sizeof(*gni_req),
677                     &getnameinfo_sometime_complete,
678                     gni_req)) {
679 
680                     msyslog(LOG_ERR, "unable to queue getnameinfo request");
681                     errno = EFAULT;
682                     return -1;
683           }
684 
685           return 0;
686 }
687 
688 
689 int
blocking_getnameinfo(blocking_child * c,blocking_pipe_header * req)690 blocking_getnameinfo(
691           blocking_child *    c,
692           blocking_pipe_header *        req
693           )
694 {
695           blocking_gni_req *  gni_req;
696           dnsworker_ctx *               worker_ctx;
697           blocking_pipe_header *        resp;
698           blocking_gni_resp * gni_resp;
699           size_t                        octets;
700           size_t                        resp_octets;
701           char *                        service;
702           char *                        cp;
703           int                           rc;
704           time_t                        time_now;
705           char                          host[1024];
706 
707           gni_req = (void *)((char *)req + sizeof(*req));
708 
709           octets = gni_req->hostoctets + gni_req->servoctets;
710 
711           /*
712            * Some alloca() implementations are fragile regarding
713            * large allocations.  We only need room for the host
714            * and service names.
715            */
716           REQUIRE(octets < sizeof(host));
717           service = host + gni_req->hostoctets;
718 
719           worker_ctx = get_worker_context(c, gni_req->dns_idx);
720           scheduled_sleep(gni_req->scheduled, gni_req->earliest,
721                               worker_ctx);
722           reload_resolv_conf(worker_ctx);
723 
724           /*
725            * Take a shot at the final size, better to overestimate
726            * then realloc to a smaller size.
727            */
728 
729           resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
730           resp = emalloc_zero(resp_octets);
731           gni_resp = (void *)((char *)resp + sizeof(*resp));
732 
733           TRACE(2, ("blocking_getnameinfo given addr %s flags 0x%x hostlen %lu servlen %lu\n",
734                       stoa(&gni_req->socku), gni_req->flags,
735                       (u_long)gni_req->hostoctets, (u_long)gni_req->servoctets));
736 
737           gni_resp->retcode = getnameinfo(&gni_req->socku.sa,
738                                                   SOCKLEN(&gni_req->socku),
739                                                   host,
740                                                   gni_req->hostoctets,
741                                                   service,
742                                                   gni_req->servoctets,
743                                                   gni_req->flags);
744           gni_resp->retry = gni_req->retry;
745 #ifdef EAI_SYSTEM
746           if (EAI_SYSTEM == gni_resp->retcode)
747                     gni_resp->gni_errno = errno;
748 #endif
749 
750           if (0 != gni_resp->retcode) {
751                     gni_resp->hostoctets = 0;
752                     gni_resp->servoctets = 0;
753           } else {
754                     gni_resp->hostoctets = strlen(host) + 1;
755                     gni_resp->servoctets = strlen(service) + 1;
756                     /*
757                      * If this query succeeded only after retrying, DNS may have
758                      * just become responsive.  Ignore previously-scheduled
759                      * retry sleeps once for each pending request, similar to
760                      * the way scheduled_sleep() does when its worker_sleep()
761                      * is interrupted.
762                      */
763                     if (gni_req->retry > INITIAL_DNS_RETRY) {
764                               time_now = time(NULL);
765                               worker_ctx->ignore_scheduled_before = time_now;
766                               TRACE(1, ("DNS success after retrying, ignoring sleeps scheduled before now (%s)\n",
767                                         humantime(time_now)));
768                     }
769           }
770           octets = gni_resp->hostoctets + gni_resp->servoctets;
771           /*
772            * Our response consists of a header, followed by the host and
773            * service strings, each null-terminated.
774            */
775           resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
776 
777           resp = erealloc(resp, resp_octets);
778           gni_resp = (void *)(resp + 1);
779 
780           gni_resp->octets = sizeof(*gni_resp) + octets;
781 
782           /* cp serves as our current pointer while serializing */
783           cp = (void *)(gni_resp + 1);
784 
785           if (0 == gni_resp->retcode) {
786                     memcpy(cp, host, gni_resp->hostoctets);
787                     cp += gni_resp->hostoctets;
788                     memcpy(cp, service, gni_resp->servoctets);
789                     cp += gni_resp->servoctets;
790           }
791 
792           INSIST((size_t)(cp - (char *)resp) == resp_octets);
793           INSIST(resp_octets - sizeof(*resp) == gni_resp->octets);
794 
795           rc = queue_blocking_response(c, resp, resp_octets, req);
796           if (rc)
797                     msyslog(LOG_ERR, "blocking_getnameinfo unable to queue response");
798           return rc;
799 }
800 
801 
802 static void
getnameinfo_sometime_complete(blocking_work_req rtype,void * context,size_t respsize,void * resp)803 getnameinfo_sometime_complete(
804           blocking_work_req   rtype,
805           void *                        context,
806           size_t                        respsize,
807           void *                        resp
808           )
809 {
810           blocking_gni_req *  gni_req;
811           blocking_gni_resp * gni_resp;
812           dnschild_ctx *                child_ctx;
813           char *                        host;
814           char *                        service;
815           time_t                        time_now;
816           int                           again;
817 
818           gni_req = context;
819           gni_resp = resp;
820 
821           DEBUG_REQUIRE(BLOCKING_GETNAMEINFO == rtype);
822           DEBUG_REQUIRE(respsize == gni_resp->octets);
823 
824           child_ctx = dnschild_contexts[gni_req->dns_idx];
825 
826           if (0 == gni_resp->retcode) {
827                     /*
828                      * If this query succeeded only after retrying, DNS may have
829                      * just become responsive.
830                      */
831                     if (gni_resp->retry > INITIAL_DNS_RETRY) {
832                               time_now = time(NULL);
833                               child_ctx->next_dns_timeslot = time_now;
834                               TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
835                                           gni_req->dns_idx, humantime(time_now)));
836                     }
837           } else {
838                     again = should_retry_dns(gni_resp->retcode, gni_resp->gni_errno);
839                     /*
840                      * exponential backoff of DNS retries to 64s
841                      */
842                     if (gni_req->retry > 0)
843                               manage_dns_retry_interval(&gni_req->scheduled,
844                                   &gni_req->earliest, &gni_req->retry,
845                                                               &child_ctx->next_dns_timeslot, FALSE);
846 
847                     if (gni_req->retry > 0 && again) {
848                               if (!queue_blocking_request(
849                                         BLOCKING_GETNAMEINFO,
850                                         gni_req,
851                                         gni_req->octets,
852                                         &getnameinfo_sometime_complete,
853                                         gni_req))
854                                         return;
855 
856                               msyslog(LOG_ERR, "unable to retry reverse lookup of %s", stoa(&gni_req->socku));
857                     }
858           }
859 
860           if (!gni_resp->hostoctets) {
861                     host = NULL;
862                     service = NULL;
863           } else {
864                     host = (char *)gni_resp + sizeof(*gni_resp);
865                     service = (gni_resp->servoctets)
866                                     ? host + gni_resp->hostoctets
867                                     : NULL;
868           }
869 
870           (*gni_req->callback)(gni_resp->retcode, gni_resp->gni_errno,
871                                    &gni_req->socku, gni_req->flags, host,
872                                    service, gni_req->context);
873 
874           free(gni_req);
875           /* gni_resp is part of block freed by process_blocking_resp() */
876 }
877 
878 
879 #ifdef TEST_BLOCKING_WORKER
gni_test_callback(int rescode,int gni_errno,sockaddr_u * psau,int flags,const char * host,const char * service,void * context)880 void gni_test_callback(int rescode, int gni_errno, sockaddr_u *psau, int flags, const char *host, const char *service, void *context)
881 {
882           if (!rescode)
883                     TRACE(1, ("gni_test_callback got host '%s' serv '%s' for addr %s context %p\n",
884                                 host, service, stoa(psau), context));
885           else
886                     TRACE(1, ("gni_test_callback context %p rescode %d gni_errno %d flags 0x%x addr %s\n",
887                                 context, rescode, gni_errno, flags, stoa(psau)));
888 }
889 #endif    /* TEST_BLOCKING_WORKER */
890 
891 
892 #ifdef HAVE_RES_INIT
893 static void
reload_resolv_conf(dnsworker_ctx * worker_ctx)894 reload_resolv_conf(
895           dnsworker_ctx *     worker_ctx
896           )
897 {
898           time_t    time_now;
899 
900           /*
901            * This is ad-hoc.  Reload /etc/resolv.conf once per minute
902            * to pick up on changes from the DHCP client.  [Bug 1226]
903            * When using threads for the workers, this needs to happen
904            * only once per minute process-wide.
905            */
906           time_now = time(NULL);
907 # ifdef WORK_THREAD
908           worker_ctx->next_res_init = next_res_init;
909 # endif
910           if (worker_ctx->next_res_init <= time_now) {
911                     if (worker_ctx->next_res_init != 0)
912                               res_init();
913                     worker_ctx->next_res_init = time_now + 60;
914 # ifdef WORK_THREAD
915                     next_res_init = worker_ctx->next_res_init;
916 # endif
917           }
918 }
919 #endif    /* HAVE_RES_INIT */
920 
921 
922 static u_int
reserve_dnschild_ctx(void)923 reserve_dnschild_ctx(void)
924 {
925           const size_t        ps = sizeof(dnschild_contexts[0]);
926           const size_t        cs = sizeof(*dnschild_contexts[0]);
927           u_int               c;
928           u_int               new_alloc;
929           size_t              octets;
930           size_t              new_octets;
931 
932           c = 0;
933           while (TRUE) {
934                     for ( ; c < dnschild_contexts_alloc; c++) {
935                               if (NULL == dnschild_contexts[c]) {
936                                         dnschild_contexts[c] = emalloc_zero(cs);
937 
938                                         return c;
939                               }
940                     }
941                     new_alloc = dnschild_contexts_alloc + 20;
942                     new_octets = new_alloc * ps;
943                     octets = dnschild_contexts_alloc * ps;
944                     dnschild_contexts = erealloc_zero(dnschild_contexts,
945                                                               new_octets, octets);
946                     dnschild_contexts_alloc = new_alloc;
947           }
948 }
949 
950 
951 static u_int
get_dnschild_ctx(void)952 get_dnschild_ctx(void)
953 {
954           static u_int        shared_ctx = UINT_MAX;
955 
956           if (worker_per_query)
957                     return reserve_dnschild_ctx();
958 
959           if (UINT_MAX == shared_ctx)
960                     shared_ctx = reserve_dnschild_ctx();
961 
962           return shared_ctx;
963 }
964 
965 
966 static dnsworker_ctx *
get_worker_context(blocking_child * c,u_int idx)967 get_worker_context(
968           blocking_child *    c,
969           u_int                         idx
970           )
971 {
972           u_int               min_new_alloc;
973           u_int               new_alloc;
974           size_t              octets;
975           size_t              new_octets;
976           dnsworker_ctx *     retv;
977 
978           worker_global_lock(TRUE);
979 
980           if (dnsworker_contexts_alloc <= idx) {
981                     min_new_alloc = 1 + idx;
982                     /* round new_alloc up to nearest multiple of 4 */
983                     new_alloc = (min_new_alloc + 4) & ~(4 - 1);
984                     new_octets = new_alloc * sizeof(dnsworker_ctx*);
985                     octets = dnsworker_contexts_alloc * sizeof(dnsworker_ctx*);
986                     dnsworker_contexts = erealloc_zero(dnsworker_contexts,
987                                                                new_octets, octets);
988                     dnsworker_contexts_alloc = new_alloc;
989                     retv = emalloc_zero(sizeof(dnsworker_ctx));
990                     dnsworker_contexts[idx] = retv;
991           } else if (NULL == (retv = dnsworker_contexts[idx])) {
992                     retv = emalloc_zero(sizeof(dnsworker_ctx));
993                     dnsworker_contexts[idx] = retv;
994           }
995 
996           worker_global_lock(FALSE);
997 
998           ZERO(*retv);
999           retv->c = c;
1000           return retv;
1001 }
1002 
1003 
1004 static void
scheduled_sleep(time_t scheduled,time_t earliest,dnsworker_ctx * worker_ctx)1005 scheduled_sleep(
1006           time_t              scheduled,
1007           time_t              earliest,
1008           dnsworker_ctx *     worker_ctx
1009           )
1010 {
1011           time_t now;
1012 
1013           if (scheduled < worker_ctx->ignore_scheduled_before) {
1014                     TRACE(1, ("ignoring sleep until %s scheduled at %s (before %s)\n",
1015                                 humantime(earliest), humantime(scheduled),
1016                                 humantime(worker_ctx->ignore_scheduled_before)));
1017                     return;
1018           }
1019 
1020           now = time(NULL);
1021 
1022           if (now < earliest) {
1023                     TRACE(1, ("sleep until %s scheduled at %s (>= %s)\n",
1024                                 humantime(earliest), humantime(scheduled),
1025                                 humantime(worker_ctx->ignore_scheduled_before)));
1026                     if (-1 == worker_sleep(worker_ctx->c, earliest - now)) {
1027                               /* our sleep was interrupted */
1028                               now = time(NULL);
1029                               worker_ctx->ignore_scheduled_before = now;
1030 #ifdef HAVE_RES_INIT
1031                               worker_ctx->next_res_init = now + 60;
1032                               next_res_init = worker_ctx->next_res_init;
1033                               res_init();
1034 #endif
1035                               TRACE(1, ("sleep interrupted by daemon, ignoring sleeps scheduled before now (%s)\n",
1036                                           humantime(worker_ctx->ignore_scheduled_before)));
1037                     }
1038           }
1039 }
1040 
1041 
1042 /*
1043  * manage_dns_retry_interval is a helper used by
1044  * getaddrinfo_sometime_complete and getnameinfo_sometime_complete
1045  * to calculate the new retry interval and schedule the next query.
1046  */
1047 static void
manage_dns_retry_interval(time_t * pscheduled,time_t * pwhen,int * pretry,time_t * pnext_timeslot,int forever)1048 manage_dns_retry_interval(
1049           time_t *  pscheduled,
1050           time_t *  pwhen,
1051           int *               pretry,
1052           time_t *  pnext_timeslot,
1053           int                 forever
1054           )
1055 {
1056           time_t    now;
1057           time_t    when;
1058           int       retry;
1059           int       retmax;
1060 
1061           now = time(NULL);
1062           retry = *pretry;
1063           when = max(now + retry, *pnext_timeslot);
1064           *pnext_timeslot = when;
1065 
1066           /* this exponential backoff is slower than doubling up: The
1067            * sequence goes 2-3-4-6-8-12-16-24-32... and the upper limit is
1068            * 64 seconds for things that should not repeat forever, and
1069            * 1024 when repeated forever.
1070            */
1071           retmax = forever ? 1024 : 64;
1072           retry <<= 1;
1073           if (retry & (retry - 1))
1074                     retry &= (retry - 1);
1075           else
1076                     retry -= (retry >> 2);
1077           retry = min(retmax, retry);
1078 
1079           *pscheduled = now;
1080           *pwhen = when;
1081           *pretry = retry;
1082 }
1083 
1084 /*
1085  * should_retry_dns is a helper used by getaddrinfo_sometime_complete
1086  * and getnameinfo_sometime_complete which implements ntpd's DNS retry
1087  * policy.
1088  */
1089 static int
should_retry_dns(int rescode,int res_errno)1090 should_retry_dns(
1091           int       rescode,
1092           int       res_errno
1093           )
1094 {
1095           static int          eai_again_seen;
1096           int                 again;
1097 #if defined (EAI_SYSTEM) && defined(DEBUG)
1098           char                msg[256];
1099 #endif
1100 
1101           /*
1102            * If the resolver failed, see if the failure is
1103            * temporary. If so, return success.
1104            */
1105           again = 0;
1106 
1107           switch (rescode) {
1108 
1109           case EAI_FAIL:
1110                     again = 1;
1111                     break;
1112 
1113           case EAI_AGAIN:
1114                     again = 1;
1115                     eai_again_seen = 1;           /* [Bug 1178] */
1116                     break;
1117 
1118           case EAI_NONAME:
1119 #if defined(EAI_NODATA) && (EAI_NODATA != EAI_NONAME)
1120           case EAI_NODATA:
1121 #endif
1122                     again = !eai_again_seen;      /* [Bug 1178] */
1123                     break;
1124 
1125 #ifdef EAI_SYSTEM
1126           case EAI_SYSTEM:
1127                     /*
1128                      * EAI_SYSTEM means the real error is in errno.  We should be more
1129                      * discriminating about which errno values require retrying, but
1130                      * this matches existing behavior.
1131                      */
1132                     again = 1;
1133 # ifdef DEBUG
1134                     errno_to_str(res_errno, msg, sizeof(msg));
1135                     TRACE(1, ("intres: EAI_SYSTEM errno %d (%s) means try again, right?\n",
1136                                 res_errno, msg));
1137 # endif
1138                     break;
1139 #endif
1140           }
1141 
1142           TRACE(2, ("intres: resolver returned: %s (%d), %sretrying\n",
1143                       gai_strerror(rescode), rescode, again ? "" : "not "));
1144 
1145           return again;
1146 }
1147 
1148 #else     /* !WORKER follows */
1149 int ntp_intres_nonempty_compilation_unit;
1150 #endif
1151