1 /*
2 * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 /**
37 * \file
38 *
39 * This file has functions to get queries from clients.
40 */
41 #include "config.h"
42 #ifdef HAVE_SYS_TYPES_H
43 # include <sys/types.h>
44 #endif
45 #include <sys/time.h>
46 #include <limits.h>
47 #ifdef USE_TCP_FASTOPEN
48 #include <netinet/tcp.h>
49 #endif
50 #include <ctype.h>
51 #include "services/listen_dnsport.h"
52 #include "services/outside_network.h"
53 #include "util/netevent.h"
54 #include "util/log.h"
55 #include "util/config_file.h"
56 #include "util/net_help.h"
57 #include "sldns/sbuffer.h"
58 #include "sldns/parseutil.h"
59 #include "sldns/wire2str.h"
60 #include "services/mesh.h"
61 #include "util/fptr_wlist.h"
62 #include "util/locks.h"
63 #include "util/timeval_func.h"
64
65 #ifdef HAVE_NETDB_H
66 #include <netdb.h>
67 #endif
68 #include <fcntl.h>
69
70 #ifdef HAVE_SYS_UN_H
71 #include <sys/un.h>
72 #endif
73
74 #ifdef HAVE_SYSTEMD
75 #include <systemd/sd-daemon.h>
76 #endif
77
78 #ifdef HAVE_IFADDRS_H
79 #include <ifaddrs.h>
80 #endif
81 #ifdef HAVE_NET_IF_H
82 #include <net/if.h>
83 #endif
84
85 #ifdef HAVE_TIME_H
86 #include <time.h>
87 #endif
88 #include <sys/time.h>
89
90 #ifdef HAVE_NGTCP2
91 #include <ngtcp2/ngtcp2.h>
92 #include <ngtcp2/ngtcp2_crypto.h>
93 #ifdef HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H
94 #include <ngtcp2/ngtcp2_crypto_quictls.h>
95 #else
96 #include <ngtcp2/ngtcp2_crypto_openssl.h>
97 #endif
98 #endif
99
100 #ifdef HAVE_OPENSSL_SSL_H
101 #include <openssl/ssl.h>
102 #endif
103
104 #ifdef HAVE_LINUX_NET_TSTAMP_H
105 #include <linux/net_tstamp.h>
106 #endif
107
108 /** number of queued TCP connections for listen() */
109 #define TCP_BACKLOG 256
110
111 #ifndef THREADS_DISABLED
112 /** lock on the counter of stream buffer memory */
113 static lock_basic_type stream_wait_count_lock;
114 /** lock on the counter of HTTP2 query buffer memory */
115 static lock_basic_type http2_query_buffer_count_lock;
116 /** lock on the counter of HTTP2 response buffer memory */
117 static lock_basic_type http2_response_buffer_count_lock;
118 #endif
119 /** size (in bytes) of stream wait buffers */
120 static size_t stream_wait_count = 0;
121 /** is the lock initialised for stream wait buffers */
122 static int stream_wait_lock_inited = 0;
123 /** size (in bytes) of HTTP2 query buffers */
124 static size_t http2_query_buffer_count = 0;
125 /** is the lock initialised for HTTP2 query buffers */
126 static int http2_query_buffer_lock_inited = 0;
127 /** size (in bytes) of HTTP2 response buffers */
128 static size_t http2_response_buffer_count = 0;
129 /** is the lock initialised for HTTP2 response buffers */
130 static int http2_response_buffer_lock_inited = 0;
131
132 /**
133 * Debug print of the getaddrinfo returned address.
134 * @param addr: the address returned.
135 * @param additional: additional text that describes the type of socket,
136 * or NULL for no text.
137 */
138 static void
verbose_print_addr(struct addrinfo * addr,const char * additional)139 verbose_print_addr(struct addrinfo *addr, const char* additional)
140 {
141 if(verbosity >= VERB_ALGO) {
142 char buf[100];
143 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
144 #ifdef INET6
145 if(addr->ai_family == AF_INET6)
146 sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
147 sin6_addr;
148 #endif /* INET6 */
149 if(inet_ntop(addr->ai_family, sinaddr, buf,
150 (socklen_t)sizeof(buf)) == 0) {
151 (void)strlcpy(buf, "(null)", sizeof(buf));
152 }
153 buf[sizeof(buf)-1] = 0;
154 verbose(VERB_ALGO, "creating %s%s socket %s %d%s%s",
155 addr->ai_socktype==SOCK_DGRAM?"udp":
156 addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
157 addr->ai_family==AF_INET?"4":
158 addr->ai_family==AF_INET6?"6":
159 "_otherfam", buf,
160 ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port),
161 (additional?" ":""), (additional?additional:""));
162 }
163 }
164
165 void
verbose_print_unbound_socket(struct unbound_socket * ub_sock)166 verbose_print_unbound_socket(struct unbound_socket* ub_sock)
167 {
168 if(verbosity >= VERB_ALGO) {
169 char buf[256];
170 log_info("listing of unbound_socket structure:");
171 addr_to_str((void*)ub_sock->addr, ub_sock->addrlen, buf,
172 sizeof(buf));
173 log_info("%s s is: %d, fam is: %s, acl: %s", buf, ub_sock->s,
174 ub_sock->fam == AF_INET?"AF_INET":"AF_INET6",
175 ub_sock->acl?"yes":"no");
176 }
177 }
178
179 #ifdef HAVE_SYSTEMD
180 static int
systemd_get_activated(int family,int socktype,int listen,struct sockaddr * addr,socklen_t addrlen,const char * path)181 systemd_get_activated(int family, int socktype, int listen,
182 struct sockaddr *addr, socklen_t addrlen,
183 const char *path)
184 {
185 int i = 0;
186 int r = 0;
187 int s = -1;
188 const char* listen_pid, *listen_fds;
189
190 /* We should use "listen" option only for stream protocols. For UDP it should be -1 */
191
192 if((r = sd_booted()) < 1) {
193 if(r == 0)
194 log_warn("systemd is not running");
195 else
196 log_err("systemd sd_booted(): %s", strerror(-r));
197 return -1;
198 }
199
200 listen_pid = getenv("LISTEN_PID");
201 listen_fds = getenv("LISTEN_FDS");
202
203 if (!listen_pid) {
204 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID");
205 return -1;
206 }
207
208 if (!listen_fds) {
209 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS");
210 return -1;
211 }
212
213 if((r = sd_listen_fds(0)) < 1) {
214 if(r == 0)
215 log_warn("systemd: did not return socket, check unit configuration");
216 else
217 log_err("systemd sd_listen_fds(): %s", strerror(-r));
218 return -1;
219 }
220
221 for(i = 0; i < r; i++) {
222 if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) {
223 s = SD_LISTEN_FDS_START + i;
224 break;
225 }
226 }
227 if (s == -1) {
228 if (addr)
229 log_err_addr("systemd sd_listen_fds()",
230 "no such socket",
231 (struct sockaddr_storage *)addr, addrlen);
232 else
233 log_err("systemd sd_listen_fds(): %s", path);
234 }
235 return s;
236 }
237 #endif
238
239 int
create_udp_sock(int family,int socktype,struct sockaddr * addr,socklen_t addrlen,int v6only,int * inuse,int * noproto,int rcv,int snd,int listen,int * reuseport,int transparent,int freebind,int use_systemd,int dscp)240 create_udp_sock(int family, int socktype, struct sockaddr* addr,
241 socklen_t addrlen, int v6only, int* inuse, int* noproto,
242 int rcv, int snd, int listen, int* reuseport, int transparent,
243 int freebind, int use_systemd, int dscp)
244 {
245 int s;
246 char* err;
247 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY)
248 int on=1;
249 #endif
250 #ifdef IPV6_MTU
251 int mtu = IPV6_MIN_MTU;
252 #endif
253 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
254 (void)rcv;
255 #endif
256 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
257 (void)snd;
258 #endif
259 #ifndef IPV6_V6ONLY
260 (void)v6only;
261 #endif
262 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
263 (void)transparent;
264 #endif
265 #if !defined(IP_FREEBIND)
266 (void)freebind;
267 #endif
268 #ifdef HAVE_SYSTEMD
269 int got_fd_from_systemd = 0;
270
271 if (!use_systemd
272 || (use_systemd
273 && (s = systemd_get_activated(family, socktype, -1, addr,
274 addrlen, NULL)) == -1)) {
275 #else
276 (void)use_systemd;
277 #endif
278 if((s = socket(family, socktype, 0)) == -1) {
279 *inuse = 0;
280 #ifndef USE_WINSOCK
281 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
282 *noproto = 1;
283 return -1;
284 }
285 #else
286 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
287 WSAGetLastError() == WSAEPROTONOSUPPORT) {
288 *noproto = 1;
289 return -1;
290 }
291 #endif
292 log_err("can't create socket: %s", sock_strerror(errno));
293 *noproto = 0;
294 return -1;
295 }
296 #ifdef HAVE_SYSTEMD
297 } else {
298 got_fd_from_systemd = 1;
299 }
300 #endif
301 if(listen) {
302 #ifdef SO_REUSEADDR
303 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
304 (socklen_t)sizeof(on)) < 0) {
305 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
306 sock_strerror(errno));
307 #ifndef USE_WINSOCK
308 if(errno != ENOSYS) {
309 close(s);
310 *noproto = 0;
311 *inuse = 0;
312 return -1;
313 }
314 #else
315 closesocket(s);
316 *noproto = 0;
317 *inuse = 0;
318 return -1;
319 #endif
320 }
321 #endif /* SO_REUSEADDR */
322 #ifdef SO_REUSEPORT
323 # ifdef SO_REUSEPORT_LB
324 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance
325 * like SO_REUSEPORT on Linux. This is what the users want
326 * with the config option in unbound.conf; if we actually
327 * need local address and port reuse they'll also need to
328 * have SO_REUSEPORT set for them, assume it was _LB they want.
329 */
330 if (reuseport && *reuseport &&
331 setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on,
332 (socklen_t)sizeof(on)) < 0) {
333 #ifdef ENOPROTOOPT
334 if(errno != ENOPROTOOPT || verbosity >= 3)
335 log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s",
336 strerror(errno));
337 #endif
338 /* this option is not essential, we can continue */
339 *reuseport = 0;
340 }
341 # else /* no SO_REUSEPORT_LB */
342
343 /* try to set SO_REUSEPORT so that incoming
344 * queries are distributed evenly among the receiving threads.
345 * Each thread must have its own socket bound to the same port,
346 * with SO_REUSEPORT set on each socket.
347 */
348 if (reuseport && *reuseport &&
349 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
350 (socklen_t)sizeof(on)) < 0) {
351 #ifdef ENOPROTOOPT
352 if(errno != ENOPROTOOPT || verbosity >= 3)
353 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
354 strerror(errno));
355 #endif
356 /* this option is not essential, we can continue */
357 *reuseport = 0;
358 }
359 # endif /* SO_REUSEPORT_LB */
360 #else
361 (void)reuseport;
362 #endif /* defined(SO_REUSEPORT) */
363 #ifdef IP_TRANSPARENT
364 if (transparent &&
365 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
366 (socklen_t)sizeof(on)) < 0) {
367 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
368 strerror(errno));
369 }
370 #elif defined(IP_BINDANY)
371 if (transparent &&
372 setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
373 (family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
374 (void*)&on, (socklen_t)sizeof(on)) < 0) {
375 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
376 (family==AF_INET6?"V6":""), strerror(errno));
377 }
378 #elif defined(SO_BINDANY)
379 if (transparent &&
380 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on,
381 (socklen_t)sizeof(on)) < 0) {
382 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
383 strerror(errno));
384 }
385 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
386 }
387 #ifdef IP_FREEBIND
388 if(freebind &&
389 setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
390 (socklen_t)sizeof(on)) < 0) {
391 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
392 strerror(errno));
393 }
394 #endif /* IP_FREEBIND */
395 if(rcv) {
396 #ifdef SO_RCVBUF
397 int got;
398 socklen_t slen = (socklen_t)sizeof(got);
399 # ifdef SO_RCVBUFFORCE
400 /* Linux specific: try to use root permission to override
401 * system limits on rcvbuf. The limit is stored in
402 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
403 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
404 (socklen_t)sizeof(rcv)) < 0) {
405 if(errno != EPERM) {
406 log_err("setsockopt(..., SO_RCVBUFFORCE, "
407 "...) failed: %s", sock_strerror(errno));
408 sock_close(s);
409 *noproto = 0;
410 *inuse = 0;
411 return -1;
412 }
413 # endif /* SO_RCVBUFFORCE */
414 if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
415 (socklen_t)sizeof(rcv)) < 0) {
416 log_err("setsockopt(..., SO_RCVBUF, "
417 "...) failed: %s", sock_strerror(errno));
418 sock_close(s);
419 *noproto = 0;
420 *inuse = 0;
421 return -1;
422 }
423 /* check if we got the right thing or if system
424 * reduced to some system max. Warn if so */
425 if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
426 &slen) >= 0 && got < rcv/2) {
427 log_warn("so-rcvbuf %u was not granted. "
428 "Got %u. To fix: start with "
429 "root permissions(linux) or sysctl "
430 "bigger net.core.rmem_max(linux) or "
431 "kern.ipc.maxsockbuf(bsd) values.",
432 (unsigned)rcv, (unsigned)got);
433 }
434 # ifdef SO_RCVBUFFORCE
435 }
436 # endif
437 #endif /* SO_RCVBUF */
438 }
439 /* first do RCVBUF as the receive buffer is more important */
440 if(snd) {
441 #ifdef SO_SNDBUF
442 int got;
443 socklen_t slen = (socklen_t)sizeof(got);
444 # ifdef SO_SNDBUFFORCE
445 /* Linux specific: try to use root permission to override
446 * system limits on sndbuf. The limit is stored in
447 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
448 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
449 (socklen_t)sizeof(snd)) < 0) {
450 if(errno != EPERM) {
451 log_err("setsockopt(..., SO_SNDBUFFORCE, "
452 "...) failed: %s", sock_strerror(errno));
453 sock_close(s);
454 *noproto = 0;
455 *inuse = 0;
456 return -1;
457 }
458 # endif /* SO_SNDBUFFORCE */
459 if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
460 (socklen_t)sizeof(snd)) < 0) {
461 log_err("setsockopt(..., SO_SNDBUF, "
462 "...) failed: %s", sock_strerror(errno));
463 sock_close(s);
464 *noproto = 0;
465 *inuse = 0;
466 return -1;
467 }
468 /* check if we got the right thing or if system
469 * reduced to some system max. Warn if so */
470 if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
471 &slen) >= 0 && got < snd/2) {
472 log_warn("so-sndbuf %u was not granted. "
473 "Got %u. To fix: start with "
474 "root permissions(linux) or sysctl "
475 "bigger net.core.wmem_max(linux) or "
476 "kern.ipc.maxsockbuf(bsd) values.",
477 (unsigned)snd, (unsigned)got);
478 }
479 # ifdef SO_SNDBUFFORCE
480 }
481 # endif
482 #endif /* SO_SNDBUF */
483 }
484 err = set_ip_dscp(s, family, dscp);
485 if(err != NULL)
486 log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err);
487 if(family == AF_INET6) {
488 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
489 int omit6_set = 0;
490 int action;
491 # endif
492 # if defined(IPV6_V6ONLY)
493 if(v6only
494 # ifdef HAVE_SYSTEMD
495 /* Systemd wants to control if the socket is v6 only
496 * or both, with BindIPv6Only=default, ipv6-only or
497 * both in systemd.socket, so it is not set here. */
498 && !got_fd_from_systemd
499 # endif
500 ) {
501 int val=(v6only==2)?0:1;
502 if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
503 (void*)&val, (socklen_t)sizeof(val)) < 0) {
504 log_err("setsockopt(..., IPV6_V6ONLY"
505 ", ...) failed: %s", sock_strerror(errno));
506 sock_close(s);
507 *noproto = 0;
508 *inuse = 0;
509 return -1;
510 }
511 }
512 # endif
513 # if defined(IPV6_USE_MIN_MTU)
514 /*
515 * There is no fragmentation of IPv6 datagrams
516 * during forwarding in the network. Therefore
517 * we do not send UDP datagrams larger than
518 * the minimum IPv6 MTU of 1280 octets. The
519 * EDNS0 message length can be larger if the
520 * network stack supports IPV6_USE_MIN_MTU.
521 */
522 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
523 (void*)&on, (socklen_t)sizeof(on)) < 0) {
524 log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
525 "...) failed: %s", sock_strerror(errno));
526 sock_close(s);
527 *noproto = 0;
528 *inuse = 0;
529 return -1;
530 }
531 # elif defined(IPV6_MTU)
532 # ifndef USE_WINSOCK
533 /*
534 * On Linux, to send no larger than 1280, the PMTUD is
535 * disabled by default for datagrams anyway, so we set
536 * the MTU to use.
537 */
538 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
539 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
540 log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
541 sock_strerror(errno));
542 sock_close(s);
543 *noproto = 0;
544 *inuse = 0;
545 return -1;
546 }
547 # elif defined(IPV6_USER_MTU)
548 /* As later versions of the mingw crosscompiler define
549 * IPV6_MTU, do the same for windows but use IPV6_USER_MTU
550 * instead which is writable; IPV6_MTU is readonly there. */
551 if (setsockopt(s, IPPROTO_IPV6, IPV6_USER_MTU,
552 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
553 if (WSAGetLastError() != WSAENOPROTOOPT) {
554 log_err("setsockopt(..., IPV6_USER_MTU, ...) failed: %s",
555 wsa_strerror(WSAGetLastError()));
556 sock_close(s);
557 *noproto = 0;
558 *inuse = 0;
559 return -1;
560 }
561 }
562 # endif /* USE_WINSOCK */
563 # endif /* IPv6 MTU */
564 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
565 # if defined(IP_PMTUDISC_OMIT)
566 action = IP_PMTUDISC_OMIT;
567 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
568 &action, (socklen_t)sizeof(action)) < 0) {
569
570 if (errno != EINVAL) {
571 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
572 strerror(errno));
573 sock_close(s);
574 *noproto = 0;
575 *inuse = 0;
576 return -1;
577 }
578 }
579 else
580 {
581 omit6_set = 1;
582 }
583 # endif
584 if (omit6_set == 0) {
585 action = IP_PMTUDISC_DONT;
586 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
587 &action, (socklen_t)sizeof(action)) < 0) {
588 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
589 strerror(errno));
590 sock_close(s);
591 *noproto = 0;
592 *inuse = 0;
593 return -1;
594 }
595 }
596 # endif /* IPV6_MTU_DISCOVER */
597 } else if(family == AF_INET) {
598 # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
599 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
600 * PMTU information is not accepted, but fragmentation is allowed
601 * if and only if the packet size exceeds the outgoing interface MTU
602 * (and also uses the interface mtu to determine the size of the packets).
603 * So there won't be any EMSGSIZE error. Against DNS fragmentation attacks.
604 * FreeBSD already has same semantics without setting the option. */
605 int omit_set = 0;
606 int action;
607 # if defined(IP_PMTUDISC_OMIT)
608 action = IP_PMTUDISC_OMIT;
609 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
610 &action, (socklen_t)sizeof(action)) < 0) {
611
612 if (errno != EINVAL) {
613 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
614 strerror(errno));
615 sock_close(s);
616 *noproto = 0;
617 *inuse = 0;
618 return -1;
619 }
620 }
621 else
622 {
623 omit_set = 1;
624 }
625 # endif
626 if (omit_set == 0) {
627 action = IP_PMTUDISC_DONT;
628 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
629 &action, (socklen_t)sizeof(action)) < 0) {
630 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
631 strerror(errno));
632 sock_close(s);
633 *noproto = 0;
634 *inuse = 0;
635 return -1;
636 }
637 }
638 # elif defined(IP_DONTFRAG) && !defined(__APPLE__)
639 /* the IP_DONTFRAG option if defined in the 11.0 OSX headers,
640 * but does not work on that version, so we exclude it */
641 /* a nonzero value disables fragmentation, according to
642 * docs.oracle.com for ip(4). */
643 int off = 1;
644 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
645 &off, (socklen_t)sizeof(off)) < 0) {
646 log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
647 strerror(errno));
648 sock_close(s);
649 *noproto = 0;
650 *inuse = 0;
651 return -1;
652 }
653 # endif /* IPv4 MTU */
654 }
655 if(
656 #ifdef HAVE_SYSTEMD
657 !got_fd_from_systemd &&
658 #endif
659 bind(s, (struct sockaddr*)addr, addrlen) != 0) {
660 *noproto = 0;
661 *inuse = 0;
662 #ifndef USE_WINSOCK
663 #ifdef EADDRINUSE
664 *inuse = (errno == EADDRINUSE);
665 /* detect freebsd jail with no ipv6 permission */
666 if(family==AF_INET6 && errno==EINVAL)
667 *noproto = 1;
668 else if(errno != EADDRINUSE &&
669 !(errno == EACCES && verbosity < 4 && !listen)
670 #ifdef EADDRNOTAVAIL
671 && !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen)
672 #endif
673 ) {
674 log_err_addr("can't bind socket", strerror(errno),
675 (struct sockaddr_storage*)addr, addrlen);
676 }
677 #endif /* EADDRINUSE */
678 #else /* USE_WINSOCK */
679 if(WSAGetLastError() != WSAEADDRINUSE &&
680 WSAGetLastError() != WSAEADDRNOTAVAIL &&
681 !(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) {
682 log_err_addr("can't bind socket",
683 wsa_strerror(WSAGetLastError()),
684 (struct sockaddr_storage*)addr, addrlen);
685 }
686 #endif /* USE_WINSOCK */
687 sock_close(s);
688 return -1;
689 }
690 if(!fd_set_nonblock(s)) {
691 *noproto = 0;
692 *inuse = 0;
693 sock_close(s);
694 return -1;
695 }
696 return s;
697 }
698
699 int
create_tcp_accept_sock(struct addrinfo * addr,int v6only,int * noproto,int * reuseport,int transparent,int mss,int nodelay,int freebind,int use_systemd,int dscp,const char * additional)700 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
701 int* reuseport, int transparent, int mss, int nodelay, int freebind,
702 int use_systemd, int dscp, const char* additional)
703 {
704 int s = -1;
705 char* err;
706 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) \
707 || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) \
708 || defined(IP_BINDANY) || defined(IP_FREEBIND) \
709 || defined(SO_BINDANY) || defined(TCP_NODELAY)
710 int on = 1;
711 #endif
712 #ifdef HAVE_SYSTEMD
713 int got_fd_from_systemd = 0;
714 #endif
715 #ifdef USE_TCP_FASTOPEN
716 int qlen;
717 #endif
718 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
719 (void)transparent;
720 #endif
721 #if !defined(IP_FREEBIND)
722 (void)freebind;
723 #endif
724 verbose_print_addr(addr, additional);
725 *noproto = 0;
726 #ifdef HAVE_SYSTEMD
727 if (!use_systemd ||
728 (use_systemd
729 && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1,
730 addr->ai_addr, addr->ai_addrlen,
731 NULL)) == -1)) {
732 #else
733 (void)use_systemd;
734 #endif
735 if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
736 #ifndef USE_WINSOCK
737 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
738 *noproto = 1;
739 return -1;
740 }
741 #else
742 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
743 WSAGetLastError() == WSAEPROTONOSUPPORT) {
744 *noproto = 1;
745 return -1;
746 }
747 #endif
748 log_err("can't create socket: %s", sock_strerror(errno));
749 return -1;
750 }
751 if(nodelay) {
752 #if defined(IPPROTO_TCP) && defined(TCP_NODELAY)
753 if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on,
754 (socklen_t)sizeof(on)) < 0) {
755 #ifndef USE_WINSOCK
756 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
757 strerror(errno));
758 #else
759 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
760 wsa_strerror(WSAGetLastError()));
761 #endif
762 }
763 #else
764 log_warn(" setsockopt(TCP_NODELAY) unsupported");
765 #endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */
766 }
767 if (mss > 0) {
768 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
769 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss,
770 (socklen_t)sizeof(mss)) < 0) {
771 log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
772 sock_strerror(errno));
773 } else {
774 verbose(VERB_ALGO,
775 " tcp socket mss set to %d", mss);
776 }
777 #else
778 log_warn(" setsockopt(TCP_MAXSEG) unsupported");
779 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
780 }
781 #ifdef HAVE_SYSTEMD
782 } else {
783 got_fd_from_systemd = 1;
784 }
785 #endif
786 #ifdef SO_REUSEADDR
787 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
788 (socklen_t)sizeof(on)) < 0) {
789 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
790 sock_strerror(errno));
791 sock_close(s);
792 return -1;
793 }
794 #endif /* SO_REUSEADDR */
795 #ifdef IP_FREEBIND
796 if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
797 (socklen_t)sizeof(on)) < 0) {
798 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
799 strerror(errno));
800 }
801 #endif /* IP_FREEBIND */
802 #ifdef SO_REUSEPORT
803 /* try to set SO_REUSEPORT so that incoming
804 * connections are distributed evenly among the receiving threads.
805 * Each thread must have its own socket bound to the same port,
806 * with SO_REUSEPORT set on each socket.
807 */
808 if (reuseport && *reuseport &&
809 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
810 (socklen_t)sizeof(on)) < 0) {
811 #ifdef ENOPROTOOPT
812 if(errno != ENOPROTOOPT || verbosity >= 3)
813 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
814 strerror(errno));
815 #endif
816 /* this option is not essential, we can continue */
817 *reuseport = 0;
818 }
819 #else
820 (void)reuseport;
821 #endif /* defined(SO_REUSEPORT) */
822 #if defined(IPV6_V6ONLY)
823 if(addr->ai_family == AF_INET6 && v6only
824 # ifdef HAVE_SYSTEMD
825 /* Systemd wants to control if the socket is v6 only
826 * or both, with BindIPv6Only=default, ipv6-only or
827 * both in systemd.socket, so it is not set here. */
828 && !got_fd_from_systemd
829 # endif
830 ) {
831 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
832 (void*)&on, (socklen_t)sizeof(on)) < 0) {
833 log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
834 sock_strerror(errno));
835 sock_close(s);
836 return -1;
837 }
838 }
839 #else
840 (void)v6only;
841 #endif /* IPV6_V6ONLY */
842 #ifdef IP_TRANSPARENT
843 if (transparent &&
844 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
845 (socklen_t)sizeof(on)) < 0) {
846 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
847 strerror(errno));
848 }
849 #elif defined(IP_BINDANY)
850 if (transparent &&
851 setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
852 (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
853 (void*)&on, (socklen_t)sizeof(on)) < 0) {
854 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
855 (addr->ai_family==AF_INET6?"V6":""), strerror(errno));
856 }
857 #elif defined(SO_BINDANY)
858 if (transparent &&
859 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t)
860 sizeof(on)) < 0) {
861 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
862 strerror(errno));
863 }
864 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
865 err = set_ip_dscp(s, addr->ai_family, dscp);
866 if(err != NULL)
867 log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err);
868 if(
869 #ifdef HAVE_SYSTEMD
870 !got_fd_from_systemd &&
871 #endif
872 bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
873 #ifndef USE_WINSOCK
874 /* detect freebsd jail with no ipv6 permission */
875 if(addr->ai_family==AF_INET6 && errno==EINVAL)
876 *noproto = 1;
877 else {
878 log_err_addr("can't bind socket", strerror(errno),
879 (struct sockaddr_storage*)addr->ai_addr,
880 addr->ai_addrlen);
881 }
882 #else
883 log_err_addr("can't bind socket",
884 wsa_strerror(WSAGetLastError()),
885 (struct sockaddr_storage*)addr->ai_addr,
886 addr->ai_addrlen);
887 #endif
888 sock_close(s);
889 return -1;
890 }
891 if(!fd_set_nonblock(s)) {
892 sock_close(s);
893 return -1;
894 }
895 if(listen(s, TCP_BACKLOG) == -1) {
896 log_err("can't listen: %s", sock_strerror(errno));
897 sock_close(s);
898 return -1;
899 }
900 #ifdef USE_TCP_FASTOPEN
901 /* qlen specifies how many outstanding TFO requests to allow. Limit is a defense
902 against IP spoofing attacks as suggested in RFC7413 */
903 #ifdef __APPLE__
904 /* OS X implementation only supports qlen of 1 via this call. Actual
905 value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */
906 qlen = 1;
907 #else
908 /* 5 is recommended on linux */
909 qlen = 5;
910 #endif
911 if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen,
912 sizeof(qlen))) == -1 ) {
913 #ifdef ENOPROTOOPT
914 /* squelch ENOPROTOOPT: freebsd server mode with kernel support
915 disabled, except when verbosity enabled for debugging */
916 if(errno != ENOPROTOOPT || verbosity >= 3) {
917 #endif
918 if(errno == EPERM) {
919 log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno));
920 } else {
921 log_err("Setting TCP Fast Open as server failed: %s", strerror(errno));
922 }
923 #ifdef ENOPROTOOPT
924 }
925 #endif
926 }
927 #endif
928 return s;
929 }
930
931 char*
set_ip_dscp(int socket,int addrfamily,int dscp)932 set_ip_dscp(int socket, int addrfamily, int dscp)
933 {
934 int ds;
935
936 if(dscp == 0)
937 return NULL;
938 ds = dscp << 2;
939 switch(addrfamily) {
940 case AF_INET6:
941 #ifdef IPV6_TCLASS
942 if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds,
943 sizeof(ds)) < 0)
944 return sock_strerror(errno);
945 break;
946 #else
947 return "IPV6_TCLASS not defined on this system";
948 #endif
949 default:
950 if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0)
951 return sock_strerror(errno);
952 break;
953 }
954 return NULL;
955 }
956
957 int
create_local_accept_sock(const char * path,int * noproto,int use_systemd)958 create_local_accept_sock(const char *path, int* noproto, int use_systemd)
959 {
960 #ifdef HAVE_SYSTEMD
961 int ret;
962
963 if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1)
964 return ret;
965 else {
966 #endif
967 #ifdef HAVE_SYS_UN_H
968 int s;
969 struct sockaddr_un usock;
970 #ifndef HAVE_SYSTEMD
971 (void)use_systemd;
972 #endif
973
974 verbose(VERB_ALGO, "creating unix socket %s", path);
975 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
976 /* this member exists on BSDs, not Linux */
977 usock.sun_len = (unsigned)sizeof(usock);
978 #endif
979 usock.sun_family = AF_LOCAL;
980 /* length is 92-108, 104 on FreeBSD */
981 (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
982
983 if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
984 log_err("Cannot create local socket %s (%s)",
985 path, strerror(errno));
986 return -1;
987 }
988
989 if (unlink(path) && errno != ENOENT) {
990 /* The socket already exists and cannot be removed */
991 log_err("Cannot remove old local socket %s (%s)",
992 path, strerror(errno));
993 goto err;
994 }
995
996 if (bind(s, (struct sockaddr *)&usock,
997 (socklen_t)sizeof(struct sockaddr_un)) == -1) {
998 log_err("Cannot bind local socket %s (%s)",
999 path, strerror(errno));
1000 goto err;
1001 }
1002
1003 if (!fd_set_nonblock(s)) {
1004 log_err("Cannot set non-blocking mode");
1005 goto err;
1006 }
1007
1008 if (listen(s, TCP_BACKLOG) == -1) {
1009 log_err("can't listen: %s", strerror(errno));
1010 goto err;
1011 }
1012
1013 (void)noproto; /*unused*/
1014 return s;
1015
1016 err:
1017 sock_close(s);
1018 return -1;
1019
1020 #ifdef HAVE_SYSTEMD
1021 }
1022 #endif
1023 #else
1024 (void)use_systemd;
1025 (void)path;
1026 log_err("Local sockets are not supported");
1027 *noproto = 1;
1028 return -1;
1029 #endif
1030 }
1031
1032
1033 /**
1034 * Create socket from getaddrinfo results
1035 */
1036 static int
make_sock(int stype,const char * ifname,int port,struct addrinfo * hints,int v6only,int * noip6,size_t rcv,size_t snd,int * reuseport,int transparent,int tcp_mss,int nodelay,int freebind,int use_systemd,int dscp,struct unbound_socket * ub_sock,const char * additional)1037 make_sock(int stype, const char* ifname, int port,
1038 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1039 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
1040 int use_systemd, int dscp, struct unbound_socket* ub_sock,
1041 const char* additional)
1042 {
1043 struct addrinfo *res = NULL;
1044 int r, s, inuse, noproto;
1045 char portbuf[32];
1046 snprintf(portbuf, sizeof(portbuf), "%d", port);
1047 hints->ai_socktype = stype;
1048 *noip6 = 0;
1049 if((r=getaddrinfo(ifname, portbuf, hints, &res)) != 0 || !res) {
1050 #ifdef USE_WINSOCK
1051 if(r == EAI_NONAME && hints->ai_family == AF_INET6){
1052 *noip6 = 1; /* 'Host not found' for IP6 on winXP */
1053 return -1;
1054 }
1055 #endif
1056 log_err("node %s:%s getaddrinfo: %s %s",
1057 ifname?ifname:"default", portbuf, gai_strerror(r),
1058 #ifdef EAI_SYSTEM
1059 (r==EAI_SYSTEM?(char*)strerror(errno):"")
1060 #else
1061 ""
1062 #endif
1063 );
1064 return -1;
1065 }
1066 if(stype == SOCK_DGRAM) {
1067 verbose_print_addr(res, additional);
1068 s = create_udp_sock(res->ai_family, res->ai_socktype,
1069 (struct sockaddr*)res->ai_addr, res->ai_addrlen,
1070 v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
1071 reuseport, transparent, freebind, use_systemd, dscp);
1072 if(s == -1 && inuse) {
1073 log_err("bind: address already in use");
1074 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){
1075 *noip6 = 1;
1076 }
1077 } else {
1078 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
1079 transparent, tcp_mss, nodelay, freebind, use_systemd,
1080 dscp, additional);
1081 if(s == -1 && noproto && hints->ai_family == AF_INET6){
1082 *noip6 = 1;
1083 }
1084 }
1085
1086 if(!res->ai_addr) {
1087 log_err("getaddrinfo returned no address");
1088 freeaddrinfo(res);
1089 sock_close(s);
1090 return -1;
1091 }
1092 ub_sock->addr = memdup(res->ai_addr, res->ai_addrlen);
1093 ub_sock->addrlen = res->ai_addrlen;
1094 if(!ub_sock->addr) {
1095 log_err("out of memory: allocate listening address");
1096 freeaddrinfo(res);
1097 sock_close(s);
1098 return -1;
1099 }
1100 freeaddrinfo(res);
1101
1102 ub_sock->s = s;
1103 ub_sock->fam = hints->ai_family;
1104 ub_sock->acl = NULL;
1105
1106 return s;
1107 }
1108
1109 /** make socket and first see if ifname contains port override info */
1110 static int
make_sock_port(int stype,const char * ifname,int port,struct addrinfo * hints,int v6only,int * noip6,size_t rcv,size_t snd,int * reuseport,int transparent,int tcp_mss,int nodelay,int freebind,int use_systemd,int dscp,struct unbound_socket * ub_sock,const char * additional)1111 make_sock_port(int stype, const char* ifname, int port,
1112 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1113 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
1114 int use_systemd, int dscp, struct unbound_socket* ub_sock,
1115 const char* additional)
1116 {
1117 char* s = strchr(ifname, '@');
1118 if(s) {
1119 /* override port with ifspec@port */
1120 int port;
1121 char newif[128];
1122 if((size_t)(s-ifname) >= sizeof(newif)) {
1123 log_err("ifname too long: %s", ifname);
1124 *noip6 = 0;
1125 return -1;
1126 }
1127 port = atoi(s+1);
1128 if(port < 0 || 0 == port || port > 65535) {
1129 log_err("invalid portnumber in interface: %s", ifname);
1130 *noip6 = 0;
1131 return -1;
1132 }
1133 (void)strlcpy(newif, ifname, sizeof(newif));
1134 newif[s-ifname] = 0;
1135 return make_sock(stype, newif, port, hints, v6only, noip6, rcv,
1136 snd, reuseport, transparent, tcp_mss, nodelay, freebind,
1137 use_systemd, dscp, ub_sock, additional);
1138 }
1139 return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
1140 reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd,
1141 dscp, ub_sock, additional);
1142 }
1143
1144 /**
1145 * Add port to open ports list.
1146 * @param list: list head. changed.
1147 * @param s: fd.
1148 * @param ftype: if fd is UDP.
1149 * @param pp2_enabled: if PROXYv2 is enabled for this port.
1150 * @param ub_sock: socket with address.
1151 * @return false on failure. list in unchanged then.
1152 */
1153 static int
port_insert(struct listen_port ** list,int s,enum listen_type ftype,int pp2_enabled,struct unbound_socket * ub_sock)1154 port_insert(struct listen_port** list, int s, enum listen_type ftype,
1155 int pp2_enabled, struct unbound_socket* ub_sock)
1156 {
1157 struct listen_port* item = (struct listen_port*)malloc(
1158 sizeof(struct listen_port));
1159 if(!item)
1160 return 0;
1161 item->next = *list;
1162 item->fd = s;
1163 item->ftype = ftype;
1164 item->pp2_enabled = pp2_enabled;
1165 item->socket = ub_sock;
1166 *list = item;
1167 return 1;
1168 }
1169
1170 /** set fd to receive software timestamps */
1171 static int
set_recvtimestamp(int s)1172 set_recvtimestamp(int s)
1173 {
1174 #ifdef HAVE_LINUX_NET_TSTAMP_H
1175 int opt = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE;
1176 if (setsockopt(s, SOL_SOCKET, SO_TIMESTAMPNS, (void*)&opt, (socklen_t)sizeof(opt)) < 0) {
1177 log_err("setsockopt(..., SO_TIMESTAMPNS, ...) failed: %s",
1178 strerror(errno));
1179 return 0;
1180 }
1181 return 1;
1182 #else
1183 log_err("packets timestamping is not supported on this platform");
1184 (void)s;
1185 return 0;
1186 #endif
1187 }
1188
1189 /** set fd to receive source address packet info */
1190 static int
set_recvpktinfo(int s,int family)1191 set_recvpktinfo(int s, int family)
1192 {
1193 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
1194 int on = 1;
1195 #else
1196 (void)s;
1197 #endif
1198 if(family == AF_INET6) {
1199 # ifdef IPV6_RECVPKTINFO
1200 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1201 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1202 log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
1203 strerror(errno));
1204 return 0;
1205 }
1206 # elif defined(IPV6_PKTINFO)
1207 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
1208 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1209 log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
1210 strerror(errno));
1211 return 0;
1212 }
1213 # else
1214 log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please "
1215 "disable interface-automatic or do-ip6 in config");
1216 return 0;
1217 # endif /* defined IPV6_RECVPKTINFO */
1218
1219 } else if(family == AF_INET) {
1220 # ifdef IP_PKTINFO
1221 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
1222 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1223 log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
1224 strerror(errno));
1225 return 0;
1226 }
1227 # elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
1228 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
1229 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1230 log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
1231 strerror(errno));
1232 return 0;
1233 }
1234 # else
1235 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
1236 "interface-automatic or do-ip4 in config");
1237 return 0;
1238 # endif /* IP_PKTINFO */
1239
1240 }
1241 return 1;
1242 }
1243
1244 /**
1245 * Helper for ports_open. Creates one interface (or NULL for default).
1246 * @param ifname: The interface ip address.
1247 * @param do_auto: use automatic interface detection.
1248 * If enabled, then ifname must be the wildcard name.
1249 * @param do_udp: if udp should be used.
1250 * @param do_tcp: if tcp should be used.
1251 * @param hints: for getaddrinfo. family and flags have to be set by caller.
1252 * @param port: Port number to use.
1253 * @param list: list of open ports, appended to, changed to point to list head.
1254 * @param rcv: receive buffer size for UDP
1255 * @param snd: send buffer size for UDP
1256 * @param ssl_port: ssl service port number
1257 * @param tls_additional_port: list of additional ssl service port numbers.
1258 * @param https_port: DoH service port number
1259 * @param proxy_protocol_port: list of PROXYv2 port numbers.
1260 * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
1261 * set to false on exit if reuseport failed due to no kernel support.
1262 * @param transparent: set IP_TRANSPARENT socket option.
1263 * @param tcp_mss: maximum segment size of tcp socket. default if zero.
1264 * @param freebind: set IP_FREEBIND socket option.
1265 * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection
1266 * @param use_systemd: if true, fetch sockets from systemd.
1267 * @param dnscrypt_port: dnscrypt service port number
1268 * @param dscp: DSCP to use.
1269 * @param quic_port: dns over quic port number.
1270 * @param http_notls_downstream: if no tls is used for https downstream.
1271 * @param sock_queue_timeout: the sock_queue_timeout from config. Seconds to
1272 * wait to discard if UDP packets have waited for long in the socket
1273 * buffer.
1274 * @return: returns false on error.
1275 */
1276 static int
ports_create_if(const char * ifname,int do_auto,int do_udp,int do_tcp,struct addrinfo * hints,int port,struct listen_port ** list,size_t rcv,size_t snd,int ssl_port,struct config_strlist * tls_additional_port,int https_port,struct config_strlist * proxy_protocol_port,int * reuseport,int transparent,int tcp_mss,int freebind,int http2_nodelay,int use_systemd,int dnscrypt_port,int dscp,int quic_port,int http_notls_downstream,int sock_queue_timeout)1277 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
1278 struct addrinfo *hints, int port, struct listen_port** list,
1279 size_t rcv, size_t snd, int ssl_port,
1280 struct config_strlist* tls_additional_port, int https_port,
1281 struct config_strlist* proxy_protocol_port,
1282 int* reuseport, int transparent, int tcp_mss, int freebind,
1283 int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp,
1284 int quic_port, int http_notls_downstream, int sock_queue_timeout)
1285 {
1286 int s, noip6=0;
1287 int is_ssl = if_is_ssl(ifname, port, ssl_port, tls_additional_port);
1288 int is_https = if_is_https(ifname, port, https_port);
1289 int is_dnscrypt = if_is_dnscrypt(ifname, port, dnscrypt_port);
1290 int is_pp2 = if_is_pp2(ifname, port, proxy_protocol_port);
1291 int is_doq = if_is_quic(ifname, port, quic_port);
1292 /* Always set TCP_NODELAY on TLS connection as it speeds up the TLS
1293 * handshake. DoH had already such option so we respect it.
1294 * Otherwise the server waits before sending more handshake data for
1295 * the client ACK (Nagle's algorithm), which is delayed because the
1296 * client waits for more data before ACKing (delayed ACK). */
1297 int nodelay = is_https?http2_nodelay:is_ssl;
1298 struct unbound_socket* ub_sock;
1299 const char* add = NULL;
1300
1301 if(!do_udp && !do_tcp)
1302 return 0;
1303
1304 if(is_pp2) {
1305 if(is_dnscrypt) {
1306 fatal_exit("PROXYv2 and DNSCrypt combination not "
1307 "supported!");
1308 } else if(is_https) {
1309 fatal_exit("PROXYv2 and DoH combination not "
1310 "supported!");
1311 } else if(is_doq) {
1312 fatal_exit("PROXYv2 and DoQ combination not "
1313 "supported!");
1314 }
1315 }
1316
1317 /* Check if both UDP and TCP ports should be open.
1318 * In the case of encrypted channels, probably an unencrypted channel
1319 * at the same port is not desired. */
1320 if((is_ssl || is_https) && !is_doq) do_udp = do_auto = 0;
1321 if((is_doq) && !(is_https || is_ssl)) do_tcp = 0;
1322
1323 if(do_auto) {
1324 ub_sock = calloc(1, sizeof(struct unbound_socket));
1325 if(!ub_sock)
1326 return 0;
1327 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1328 &noip6, rcv, snd, reuseport, transparent,
1329 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock,
1330 (is_dnscrypt?"udpancil_dnscrypt":"udpancil"))) == -1) {
1331 free(ub_sock->addr);
1332 free(ub_sock);
1333 if(noip6) {
1334 log_warn("IPv6 protocol not available");
1335 return 1;
1336 }
1337 return 0;
1338 }
1339 /* getting source addr packet info is highly non-portable */
1340 if(!set_recvpktinfo(s, hints->ai_family)) {
1341 sock_close(s);
1342 free(ub_sock->addr);
1343 free(ub_sock);
1344 return 0;
1345 }
1346 if (sock_queue_timeout && !set_recvtimestamp(s)) {
1347 log_warn("socket timestamping is not available");
1348 }
1349 if(!port_insert(list, s, is_dnscrypt
1350 ?listen_type_udpancil_dnscrypt:listen_type_udpancil,
1351 is_pp2, ub_sock)) {
1352 sock_close(s);
1353 free(ub_sock->addr);
1354 free(ub_sock);
1355 return 0;
1356 }
1357 } else if(do_udp) {
1358 enum listen_type udp_port_type;
1359 ub_sock = calloc(1, sizeof(struct unbound_socket));
1360 if(!ub_sock)
1361 return 0;
1362 if(is_dnscrypt) {
1363 udp_port_type = listen_type_udp_dnscrypt;
1364 add = "dnscrypt";
1365 } else if(is_doq) {
1366 udp_port_type = listen_type_doq;
1367 add = "doq";
1368 if(if_listens_on(ifname, port, 53, NULL)) {
1369 log_err("DNS over QUIC is strictly not "
1370 "allowed on port 53 as per RFC 9250. "
1371 "Port 53 is for DNS datagrams. Error "
1372 "for interface '%s'.", ifname);
1373 free(ub_sock->addr);
1374 free(ub_sock);
1375 return 0;
1376 }
1377 } else {
1378 udp_port_type = listen_type_udp;
1379 add = NULL;
1380 }
1381 /* regular udp socket */
1382 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1383 &noip6, rcv, snd, reuseport, transparent,
1384 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock,
1385 add)) == -1) {
1386 free(ub_sock->addr);
1387 free(ub_sock);
1388 if(noip6) {
1389 log_warn("IPv6 protocol not available");
1390 return 1;
1391 }
1392 return 0;
1393 }
1394 if(udp_port_type == listen_type_doq) {
1395 if(!set_recvpktinfo(s, hints->ai_family)) {
1396 sock_close(s);
1397 free(ub_sock->addr);
1398 free(ub_sock);
1399 return 0;
1400 }
1401 }
1402 if(udp_port_type == listen_type_udp && sock_queue_timeout)
1403 udp_port_type = listen_type_udpancil;
1404 if (sock_queue_timeout) {
1405 if(!set_recvtimestamp(s)) {
1406 log_warn("socket timestamping is not available");
1407 } else {
1408 if(udp_port_type == listen_type_udp)
1409 udp_port_type = listen_type_udpancil;
1410 }
1411 }
1412 if(!port_insert(list, s, udp_port_type, is_pp2, ub_sock)) {
1413 sock_close(s);
1414 free(ub_sock->addr);
1415 free(ub_sock);
1416 return 0;
1417 }
1418 }
1419 if(do_tcp) {
1420 enum listen_type port_type;
1421 ub_sock = calloc(1, sizeof(struct unbound_socket));
1422 if(!ub_sock)
1423 return 0;
1424 if(is_ssl) {
1425 port_type = listen_type_ssl;
1426 add = "tls";
1427 } else if(is_https) {
1428 port_type = listen_type_http;
1429 add = "https";
1430 if(http_notls_downstream)
1431 add = "http";
1432 } else if(is_dnscrypt) {
1433 port_type = listen_type_tcp_dnscrypt;
1434 add = "dnscrypt";
1435 } else {
1436 port_type = listen_type_tcp;
1437 add = NULL;
1438 }
1439 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
1440 &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay,
1441 freebind, use_systemd, dscp, ub_sock, add)) == -1) {
1442 free(ub_sock->addr);
1443 free(ub_sock);
1444 if(noip6) {
1445 /*log_warn("IPv6 protocol not available");*/
1446 return 1;
1447 }
1448 return 0;
1449 }
1450 if(is_ssl)
1451 verbose(VERB_ALGO, "setup TCP for SSL service");
1452 if(!port_insert(list, s, port_type, is_pp2, ub_sock)) {
1453 sock_close(s);
1454 free(ub_sock->addr);
1455 free(ub_sock);
1456 return 0;
1457 }
1458 }
1459 return 1;
1460 }
1461
1462 /**
1463 * Add items to commpoint list in front.
1464 * @param c: commpoint to add.
1465 * @param front: listen struct.
1466 * @return: false on failure.
1467 */
1468 static int
listen_cp_insert(struct comm_point * c,struct listen_dnsport * front)1469 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
1470 {
1471 struct listen_list* item = (struct listen_list*)malloc(
1472 sizeof(struct listen_list));
1473 if(!item)
1474 return 0;
1475 item->com = c;
1476 item->next = front->cps;
1477 front->cps = item;
1478 return 1;
1479 }
1480
listen_setup_locks(void)1481 void listen_setup_locks(void)
1482 {
1483 if(!stream_wait_lock_inited) {
1484 lock_basic_init(&stream_wait_count_lock);
1485 stream_wait_lock_inited = 1;
1486 }
1487 if(!http2_query_buffer_lock_inited) {
1488 lock_basic_init(&http2_query_buffer_count_lock);
1489 http2_query_buffer_lock_inited = 1;
1490 }
1491 if(!http2_response_buffer_lock_inited) {
1492 lock_basic_init(&http2_response_buffer_count_lock);
1493 http2_response_buffer_lock_inited = 1;
1494 }
1495 }
1496
listen_desetup_locks(void)1497 void listen_desetup_locks(void)
1498 {
1499 if(stream_wait_lock_inited) {
1500 stream_wait_lock_inited = 0;
1501 lock_basic_destroy(&stream_wait_count_lock);
1502 }
1503 if(http2_query_buffer_lock_inited) {
1504 http2_query_buffer_lock_inited = 0;
1505 lock_basic_destroy(&http2_query_buffer_count_lock);
1506 }
1507 if(http2_response_buffer_lock_inited) {
1508 http2_response_buffer_lock_inited = 0;
1509 lock_basic_destroy(&http2_response_buffer_count_lock);
1510 }
1511 }
1512
1513 struct listen_dnsport*
listen_create(struct comm_base * base,struct listen_port * ports,size_t bufsize,int tcp_accept_count,int tcp_idle_timeout,int harden_large_queries,uint32_t http_max_streams,char * http_endpoint,int http_notls,struct tcl_list * tcp_conn_limit,void * dot_sslctx,void * doh_sslctx,void * quic_sslctx,struct dt_env * dtenv,struct doq_table * doq_table,struct ub_randstate * rnd,struct config_file * cfg,comm_point_callback_type * cb,void * cb_arg)1514 listen_create(struct comm_base* base, struct listen_port* ports,
1515 size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
1516 int harden_large_queries, uint32_t http_max_streams,
1517 char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit,
1518 void* dot_sslctx, void* doh_sslctx, void* quic_sslctx,
1519 struct dt_env* dtenv,
1520 struct doq_table* doq_table,
1521 struct ub_randstate* rnd,struct config_file* cfg,
1522 comm_point_callback_type* cb, void *cb_arg)
1523 {
1524 struct listen_dnsport* front = (struct listen_dnsport*)
1525 malloc(sizeof(struct listen_dnsport));
1526 if(!front)
1527 return NULL;
1528 front->cps = NULL;
1529 front->udp_buff = sldns_buffer_new(bufsize);
1530 #ifdef USE_DNSCRYPT
1531 front->dnscrypt_udp_buff = NULL;
1532 #endif
1533 if(!front->udp_buff) {
1534 free(front);
1535 return NULL;
1536 }
1537
1538 /* create comm points as needed */
1539 while(ports) {
1540 struct comm_point* cp = NULL;
1541 if(ports->ftype == listen_type_udp ||
1542 ports->ftype == listen_type_udp_dnscrypt) {
1543 cp = comm_point_create_udp(base, ports->fd,
1544 front->udp_buff, ports->pp2_enabled, cb,
1545 cb_arg, ports->socket);
1546 } else if(ports->ftype == listen_type_doq) {
1547 #ifndef HAVE_NGTCP2
1548 log_warn("Unbound is not compiled with "
1549 "ngtcp2. This is required to use DNS "
1550 "over QUIC.");
1551 #endif
1552 cp = comm_point_create_doq(base, ports->fd,
1553 front->udp_buff, cb, cb_arg, ports->socket,
1554 doq_table, rnd, quic_sslctx, cfg);
1555 } else if(ports->ftype == listen_type_tcp ||
1556 ports->ftype == listen_type_tcp_dnscrypt) {
1557 cp = comm_point_create_tcp(base, ports->fd,
1558 tcp_accept_count, tcp_idle_timeout,
1559 harden_large_queries, 0, NULL,
1560 tcp_conn_limit, bufsize, front->udp_buff,
1561 ports->ftype, ports->pp2_enabled, cb, cb_arg,
1562 ports->socket);
1563 } else if(ports->ftype == listen_type_ssl ||
1564 ports->ftype == listen_type_http) {
1565 cp = comm_point_create_tcp(base, ports->fd,
1566 tcp_accept_count, tcp_idle_timeout,
1567 harden_large_queries,
1568 http_max_streams, http_endpoint,
1569 tcp_conn_limit, bufsize, front->udp_buff,
1570 ports->ftype, ports->pp2_enabled, cb, cb_arg,
1571 ports->socket);
1572 if(ports->ftype == listen_type_http) {
1573 if(!doh_sslctx && !http_notls) {
1574 log_warn("HTTPS port configured, but "
1575 "no TLS tls-service-key or "
1576 "tls-service-pem set");
1577 }
1578 #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB
1579 if(!http_notls) {
1580 log_warn("Unbound is not compiled "
1581 "with an OpenSSL version "
1582 "supporting ALPN "
1583 "(OpenSSL >= 1.0.2). This "
1584 "is required to use "
1585 "DNS-over-HTTPS");
1586 }
1587 #endif
1588 #ifndef HAVE_NGHTTP2_NGHTTP2_H
1589 log_warn("Unbound is not compiled with "
1590 "nghttp2. This is required to use "
1591 "DNS-over-HTTPS.");
1592 #endif
1593 }
1594 } else if(ports->ftype == listen_type_udpancil ||
1595 ports->ftype == listen_type_udpancil_dnscrypt) {
1596 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
1597 cp = comm_point_create_udp_ancil(base, ports->fd,
1598 front->udp_buff, ports->pp2_enabled, cb,
1599 cb_arg, ports->socket);
1600 #else
1601 log_warn("This system does not support UDP ancilliary data.");
1602 #endif
1603 }
1604 if(!cp) {
1605 log_err("can't create commpoint");
1606 listen_delete(front);
1607 return NULL;
1608 }
1609 if((http_notls && ports->ftype == listen_type_http) ||
1610 (ports->ftype == listen_type_tcp) ||
1611 (ports->ftype == listen_type_udp) ||
1612 (ports->ftype == listen_type_udpancil) ||
1613 (ports->ftype == listen_type_tcp_dnscrypt) ||
1614 (ports->ftype == listen_type_udp_dnscrypt) ||
1615 (ports->ftype == listen_type_udpancil_dnscrypt)) {
1616 cp->ssl = NULL;
1617 } else if(ports->ftype == listen_type_doq) {
1618 cp->ssl = quic_sslctx;
1619 } else if(ports->ftype == listen_type_http) {
1620 cp->ssl = doh_sslctx;
1621 } else {
1622 cp->ssl = dot_sslctx;
1623 }
1624 cp->dtenv = dtenv;
1625 cp->do_not_close = 1;
1626 #ifdef USE_DNSCRYPT
1627 if (ports->ftype == listen_type_udp_dnscrypt ||
1628 ports->ftype == listen_type_tcp_dnscrypt ||
1629 ports->ftype == listen_type_udpancil_dnscrypt) {
1630 cp->dnscrypt = 1;
1631 cp->dnscrypt_buffer = sldns_buffer_new(bufsize);
1632 if(!cp->dnscrypt_buffer) {
1633 log_err("can't alloc dnscrypt_buffer");
1634 comm_point_delete(cp);
1635 listen_delete(front);
1636 return NULL;
1637 }
1638 front->dnscrypt_udp_buff = cp->dnscrypt_buffer;
1639 }
1640 #endif
1641 if(!listen_cp_insert(cp, front)) {
1642 log_err("malloc failed");
1643 comm_point_delete(cp);
1644 listen_delete(front);
1645 return NULL;
1646 }
1647 ports = ports->next;
1648 }
1649 if(!front->cps) {
1650 log_err("Could not open sockets to accept queries.");
1651 listen_delete(front);
1652 return NULL;
1653 }
1654
1655 return front;
1656 }
1657
1658 void
listen_list_delete(struct listen_list * list)1659 listen_list_delete(struct listen_list* list)
1660 {
1661 struct listen_list *p = list, *pn;
1662 while(p) {
1663 pn = p->next;
1664 comm_point_delete(p->com);
1665 free(p);
1666 p = pn;
1667 }
1668 }
1669
1670 void
listen_delete(struct listen_dnsport * front)1671 listen_delete(struct listen_dnsport* front)
1672 {
1673 if(!front)
1674 return;
1675 listen_list_delete(front->cps);
1676 #ifdef USE_DNSCRYPT
1677 if(front->dnscrypt_udp_buff &&
1678 front->udp_buff != front->dnscrypt_udp_buff) {
1679 sldns_buffer_free(front->dnscrypt_udp_buff);
1680 }
1681 #endif
1682 sldns_buffer_free(front->udp_buff);
1683 free(front);
1684 }
1685
1686 #ifdef HAVE_GETIFADDRS
1687 static int
resolve_ifa_name(struct ifaddrs * ifas,const char * search_ifa,char *** ip_addresses,int * ip_addresses_size)1688 resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size)
1689 {
1690 struct ifaddrs *ifa;
1691 void *tmpbuf;
1692 int last_ip_addresses_size = *ip_addresses_size;
1693
1694 for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) {
1695 sa_family_t family;
1696 const char* atsign;
1697 #ifdef INET6 /* | address ip | % | ifa name | @ | port | nul */
1698 char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1];
1699 #else
1700 char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1];
1701 #endif
1702
1703 if((atsign=strrchr(search_ifa, '@')) != NULL) {
1704 if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa)
1705 || strncmp(ifa->ifa_name, search_ifa,
1706 atsign-search_ifa) != 0)
1707 continue;
1708 } else {
1709 if(strcmp(ifa->ifa_name, search_ifa) != 0)
1710 continue;
1711 atsign = "";
1712 }
1713
1714 if(ifa->ifa_addr == NULL)
1715 continue;
1716
1717 family = ifa->ifa_addr->sa_family;
1718 if(family == AF_INET) {
1719 char a4[INET_ADDRSTRLEN + 1];
1720 struct sockaddr_in *in4 = (struct sockaddr_in *)
1721 ifa->ifa_addr;
1722 if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) {
1723 log_err("inet_ntop failed");
1724 return 0;
1725 }
1726 snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1727 a4, atsign);
1728 }
1729 #ifdef INET6
1730 else if(family == AF_INET6) {
1731 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)
1732 ifa->ifa_addr;
1733 char a6[INET6_ADDRSTRLEN + 1];
1734 char if_index_name[IF_NAMESIZE + 1];
1735 if_index_name[0] = 0;
1736 if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) {
1737 log_err("inet_ntop failed");
1738 return 0;
1739 }
1740 (void)if_indextoname(in6->sin6_scope_id,
1741 (char *)if_index_name);
1742 if (strlen(if_index_name) != 0) {
1743 snprintf(addr_buf, sizeof(addr_buf),
1744 "%s%%%s%s", a6, if_index_name, atsign);
1745 } else {
1746 snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1747 a6, atsign);
1748 }
1749 }
1750 #endif
1751 else {
1752 continue;
1753 }
1754 verbose(4, "interface %s has address %s", search_ifa, addr_buf);
1755
1756 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1757 if(!tmpbuf) {
1758 log_err("realloc failed: out of memory");
1759 return 0;
1760 } else {
1761 *ip_addresses = tmpbuf;
1762 }
1763 (*ip_addresses)[*ip_addresses_size] = strdup(addr_buf);
1764 if(!(*ip_addresses)[*ip_addresses_size]) {
1765 log_err("strdup failed: out of memory");
1766 return 0;
1767 }
1768 (*ip_addresses_size)++;
1769 }
1770
1771 if (*ip_addresses_size == last_ip_addresses_size) {
1772 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1773 if(!tmpbuf) {
1774 log_err("realloc failed: out of memory");
1775 return 0;
1776 } else {
1777 *ip_addresses = tmpbuf;
1778 }
1779 (*ip_addresses)[*ip_addresses_size] = strdup(search_ifa);
1780 if(!(*ip_addresses)[*ip_addresses_size]) {
1781 log_err("strdup failed: out of memory");
1782 return 0;
1783 }
1784 (*ip_addresses_size)++;
1785 }
1786 return 1;
1787 }
1788 #endif /* HAVE_GETIFADDRS */
1789
resolve_interface_names(char ** ifs,int num_ifs,struct config_strlist * list,char *** resif,int * num_resif)1790 int resolve_interface_names(char** ifs, int num_ifs,
1791 struct config_strlist* list, char*** resif, int* num_resif)
1792 {
1793 #ifdef HAVE_GETIFADDRS
1794 struct ifaddrs *addrs = NULL;
1795 if(num_ifs == 0 && list == NULL) {
1796 *resif = NULL;
1797 *num_resif = 0;
1798 return 1;
1799 }
1800 if(getifaddrs(&addrs) == -1) {
1801 log_err("failed to list interfaces: getifaddrs: %s",
1802 strerror(errno));
1803 freeifaddrs(addrs);
1804 return 0;
1805 }
1806 if(ifs) {
1807 int i;
1808 for(i=0; i<num_ifs; i++) {
1809 if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) {
1810 freeifaddrs(addrs);
1811 config_del_strarray(*resif, *num_resif);
1812 *resif = NULL;
1813 *num_resif = 0;
1814 return 0;
1815 }
1816 }
1817 }
1818 if(list) {
1819 struct config_strlist* p;
1820 for(p = list; p; p = p->next) {
1821 if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) {
1822 freeifaddrs(addrs);
1823 config_del_strarray(*resif, *num_resif);
1824 *resif = NULL;
1825 *num_resif = 0;
1826 return 0;
1827 }
1828 }
1829 }
1830 freeifaddrs(addrs);
1831 return 1;
1832 #else
1833 struct config_strlist* p;
1834 if(num_ifs == 0 && list == NULL) {
1835 *resif = NULL;
1836 *num_resif = 0;
1837 return 1;
1838 }
1839 *num_resif = num_ifs;
1840 for(p = list; p; p = p->next) {
1841 (*num_resif)++;
1842 }
1843 *resif = calloc(*num_resif, sizeof(**resif));
1844 if(!*resif) {
1845 log_err("out of memory");
1846 return 0;
1847 }
1848 if(ifs) {
1849 int i;
1850 for(i=0; i<num_ifs; i++) {
1851 (*resif)[i] = strdup(ifs[i]);
1852 if(!((*resif)[i])) {
1853 log_err("out of memory");
1854 config_del_strarray(*resif, *num_resif);
1855 *resif = NULL;
1856 *num_resif = 0;
1857 return 0;
1858 }
1859 }
1860 }
1861 if(list) {
1862 int idx = num_ifs;
1863 for(p = list; p; p = p->next) {
1864 (*resif)[idx] = strdup(p->str);
1865 if(!((*resif)[idx])) {
1866 log_err("out of memory");
1867 config_del_strarray(*resif, *num_resif);
1868 *resif = NULL;
1869 *num_resif = 0;
1870 return 0;
1871 }
1872 idx++;
1873 }
1874 }
1875 return 1;
1876 #endif /* HAVE_GETIFADDRS */
1877 }
1878
1879 struct listen_port*
listening_ports_open(struct config_file * cfg,char ** ifs,int num_ifs,int * reuseport)1880 listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
1881 int* reuseport)
1882 {
1883 struct listen_port* list = NULL;
1884 struct addrinfo hints;
1885 int i, do_ip4, do_ip6;
1886 int do_tcp, do_auto;
1887 do_ip4 = cfg->do_ip4;
1888 do_ip6 = cfg->do_ip6;
1889 do_tcp = cfg->do_tcp;
1890 do_auto = cfg->if_automatic && cfg->do_udp;
1891 if(cfg->incoming_num_tcp == 0)
1892 do_tcp = 0;
1893
1894 /* getaddrinfo */
1895 memset(&hints, 0, sizeof(hints));
1896 hints.ai_flags = AI_PASSIVE;
1897 /* no name lookups on our listening ports */
1898 if(num_ifs > 0)
1899 hints.ai_flags |= AI_NUMERICHOST;
1900 hints.ai_family = AF_UNSPEC;
1901 #ifndef INET6
1902 do_ip6 = 0;
1903 #endif
1904 if(!do_ip4 && !do_ip6) {
1905 return NULL;
1906 }
1907 /* create ip4 and ip6 ports so that return addresses are nice. */
1908 if(do_auto || num_ifs == 0) {
1909 if(do_auto && cfg->if_automatic_ports &&
1910 cfg->if_automatic_ports[0]!=0) {
1911 char* now = cfg->if_automatic_ports;
1912 while(now && *now) {
1913 char* after;
1914 int extraport;
1915 while(isspace((unsigned char)*now))
1916 now++;
1917 if(!*now)
1918 break;
1919 after = now;
1920 extraport = (int)strtol(now, &after, 10);
1921 if(extraport < 0 || extraport > 65535) {
1922 log_err("interface-automatic-ports port number out of range, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports);
1923 listening_ports_free(list);
1924 return NULL;
1925 }
1926 if(extraport == 0 && now == after) {
1927 log_err("interface-automatic-ports could not be parsed, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports);
1928 listening_ports_free(list);
1929 return NULL;
1930 }
1931 now = after;
1932 if(do_ip6) {
1933 hints.ai_family = AF_INET6;
1934 if(!ports_create_if("::0",
1935 do_auto, cfg->do_udp, do_tcp,
1936 &hints, extraport, &list,
1937 cfg->so_rcvbuf, cfg->so_sndbuf,
1938 cfg->ssl_port, cfg->tls_additional_port,
1939 cfg->https_port,
1940 cfg->proxy_protocol_port,
1941 reuseport, cfg->ip_transparent,
1942 cfg->tcp_mss, cfg->ip_freebind,
1943 cfg->http_nodelay, cfg->use_systemd,
1944 cfg->dnscrypt_port, cfg->ip_dscp,
1945 cfg->quic_port, cfg->http_notls_downstream,
1946 cfg->sock_queue_timeout)) {
1947 listening_ports_free(list);
1948 return NULL;
1949 }
1950 }
1951 if(do_ip4) {
1952 hints.ai_family = AF_INET;
1953 if(!ports_create_if("0.0.0.0",
1954 do_auto, cfg->do_udp, do_tcp,
1955 &hints, extraport, &list,
1956 cfg->so_rcvbuf, cfg->so_sndbuf,
1957 cfg->ssl_port, cfg->tls_additional_port,
1958 cfg->https_port,
1959 cfg->proxy_protocol_port,
1960 reuseport, cfg->ip_transparent,
1961 cfg->tcp_mss, cfg->ip_freebind,
1962 cfg->http_nodelay, cfg->use_systemd,
1963 cfg->dnscrypt_port, cfg->ip_dscp,
1964 cfg->quic_port, cfg->http_notls_downstream,
1965 cfg->sock_queue_timeout)) {
1966 listening_ports_free(list);
1967 return NULL;
1968 }
1969 }
1970 }
1971 return list;
1972 }
1973 if(do_ip6) {
1974 hints.ai_family = AF_INET6;
1975 if(!ports_create_if(do_auto?"::0":"::1",
1976 do_auto, cfg->do_udp, do_tcp,
1977 &hints, cfg->port, &list,
1978 cfg->so_rcvbuf, cfg->so_sndbuf,
1979 cfg->ssl_port, cfg->tls_additional_port,
1980 cfg->https_port, cfg->proxy_protocol_port,
1981 reuseport, cfg->ip_transparent,
1982 cfg->tcp_mss, cfg->ip_freebind,
1983 cfg->http_nodelay, cfg->use_systemd,
1984 cfg->dnscrypt_port, cfg->ip_dscp,
1985 cfg->quic_port, cfg->http_notls_downstream,
1986 cfg->sock_queue_timeout)) {
1987 listening_ports_free(list);
1988 return NULL;
1989 }
1990 }
1991 if(do_ip4) {
1992 hints.ai_family = AF_INET;
1993 if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
1994 do_auto, cfg->do_udp, do_tcp,
1995 &hints, cfg->port, &list,
1996 cfg->so_rcvbuf, cfg->so_sndbuf,
1997 cfg->ssl_port, cfg->tls_additional_port,
1998 cfg->https_port, cfg->proxy_protocol_port,
1999 reuseport, cfg->ip_transparent,
2000 cfg->tcp_mss, cfg->ip_freebind,
2001 cfg->http_nodelay, cfg->use_systemd,
2002 cfg->dnscrypt_port, cfg->ip_dscp,
2003 cfg->quic_port, cfg->http_notls_downstream,
2004 cfg->sock_queue_timeout)) {
2005 listening_ports_free(list);
2006 return NULL;
2007 }
2008 }
2009 } else for(i = 0; i<num_ifs; i++) {
2010 if(str_is_ip6(ifs[i])) {
2011 if(!do_ip6)
2012 continue;
2013 hints.ai_family = AF_INET6;
2014 if(!ports_create_if(ifs[i], 0, cfg->do_udp,
2015 do_tcp, &hints, cfg->port, &list,
2016 cfg->so_rcvbuf, cfg->so_sndbuf,
2017 cfg->ssl_port, cfg->tls_additional_port,
2018 cfg->https_port, cfg->proxy_protocol_port,
2019 reuseport, cfg->ip_transparent,
2020 cfg->tcp_mss, cfg->ip_freebind,
2021 cfg->http_nodelay, cfg->use_systemd,
2022 cfg->dnscrypt_port, cfg->ip_dscp,
2023 cfg->quic_port, cfg->http_notls_downstream,
2024 cfg->sock_queue_timeout)) {
2025 listening_ports_free(list);
2026 return NULL;
2027 }
2028 } else {
2029 if(!do_ip4)
2030 continue;
2031 hints.ai_family = AF_INET;
2032 if(!ports_create_if(ifs[i], 0, cfg->do_udp,
2033 do_tcp, &hints, cfg->port, &list,
2034 cfg->so_rcvbuf, cfg->so_sndbuf,
2035 cfg->ssl_port, cfg->tls_additional_port,
2036 cfg->https_port, cfg->proxy_protocol_port,
2037 reuseport, cfg->ip_transparent,
2038 cfg->tcp_mss, cfg->ip_freebind,
2039 cfg->http_nodelay, cfg->use_systemd,
2040 cfg->dnscrypt_port, cfg->ip_dscp,
2041 cfg->quic_port, cfg->http_notls_downstream,
2042 cfg->sock_queue_timeout)) {
2043 listening_ports_free(list);
2044 return NULL;
2045 }
2046 }
2047 }
2048
2049 return list;
2050 }
2051
listening_ports_free(struct listen_port * list)2052 void listening_ports_free(struct listen_port* list)
2053 {
2054 struct listen_port* nx;
2055 while(list) {
2056 nx = list->next;
2057 if(list->fd != -1) {
2058 sock_close(list->fd);
2059 }
2060 /* rc_ports don't have ub_socket */
2061 if(list->socket) {
2062 free(list->socket->addr);
2063 free(list->socket);
2064 }
2065 free(list);
2066 list = nx;
2067 }
2068 }
2069
listen_get_mem(struct listen_dnsport * listen)2070 size_t listen_get_mem(struct listen_dnsport* listen)
2071 {
2072 struct listen_list* p;
2073 size_t s = sizeof(*listen) + sizeof(*listen->base) +
2074 sizeof(*listen->udp_buff) +
2075 sldns_buffer_capacity(listen->udp_buff);
2076 #ifdef USE_DNSCRYPT
2077 s += sizeof(*listen->dnscrypt_udp_buff);
2078 if(listen->udp_buff != listen->dnscrypt_udp_buff){
2079 s += sldns_buffer_capacity(listen->dnscrypt_udp_buff);
2080 }
2081 #endif
2082 for(p = listen->cps; p; p = p->next) {
2083 s += sizeof(*p);
2084 s += comm_point_get_mem(p->com);
2085 }
2086 return s;
2087 }
2088
listen_stop_accept(struct listen_dnsport * listen)2089 void listen_stop_accept(struct listen_dnsport* listen)
2090 {
2091 /* do not stop the ones that have no tcp_free list
2092 * (they have already stopped listening) */
2093 struct listen_list* p;
2094 for(p=listen->cps; p; p=p->next) {
2095 if(p->com->type == comm_tcp_accept &&
2096 p->com->tcp_free != NULL) {
2097 comm_point_stop_listening(p->com);
2098 }
2099 }
2100 }
2101
listen_start_accept(struct listen_dnsport * listen)2102 void listen_start_accept(struct listen_dnsport* listen)
2103 {
2104 /* do not start the ones that have no tcp_free list, it is no
2105 * use to listen to them because they have no free tcp handlers */
2106 struct listen_list* p;
2107 for(p=listen->cps; p; p=p->next) {
2108 if(p->com->type == comm_tcp_accept &&
2109 p->com->tcp_free != NULL) {
2110 comm_point_start_listening(p->com, -1, -1);
2111 }
2112 }
2113 }
2114
2115 struct tcp_req_info*
tcp_req_info_create(struct sldns_buffer * spoolbuf)2116 tcp_req_info_create(struct sldns_buffer* spoolbuf)
2117 {
2118 struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req));
2119 if(!req) {
2120 log_err("malloc failure for new stream outoforder processing structure");
2121 return NULL;
2122 }
2123 memset(req, 0, sizeof(*req));
2124 req->spool_buffer = spoolbuf;
2125 return req;
2126 }
2127
2128 void
tcp_req_info_delete(struct tcp_req_info * req)2129 tcp_req_info_delete(struct tcp_req_info* req)
2130 {
2131 if(!req) return;
2132 tcp_req_info_clear(req);
2133 /* cp is pointer back to commpoint that owns this struct and
2134 * called delete on us */
2135 /* spool_buffer is shared udp buffer, not deleted here */
2136 free(req);
2137 }
2138
tcp_req_info_clear(struct tcp_req_info * req)2139 void tcp_req_info_clear(struct tcp_req_info* req)
2140 {
2141 struct tcp_req_open_item* open, *nopen;
2142 struct tcp_req_done_item* item, *nitem;
2143 if(!req) return;
2144
2145 /* free outstanding request mesh reply entries */
2146 open = req->open_req_list;
2147 while(open) {
2148 nopen = open->next;
2149 mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp);
2150 free(open);
2151 open = nopen;
2152 }
2153 req->open_req_list = NULL;
2154 req->num_open_req = 0;
2155
2156 /* free pending writable result packets */
2157 item = req->done_req_list;
2158 while(item) {
2159 nitem = item->next;
2160 lock_basic_lock(&stream_wait_count_lock);
2161 stream_wait_count -= (sizeof(struct tcp_req_done_item)
2162 +item->len);
2163 lock_basic_unlock(&stream_wait_count_lock);
2164 free(item->buf);
2165 free(item);
2166 item = nitem;
2167 }
2168 req->done_req_list = NULL;
2169 req->num_done_req = 0;
2170 req->read_is_closed = 0;
2171 }
2172
2173 void
tcp_req_info_remove_mesh_state(struct tcp_req_info * req,struct mesh_state * m)2174 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m)
2175 {
2176 struct tcp_req_open_item* open, *prev = NULL;
2177 if(!req || !m) return;
2178 open = req->open_req_list;
2179 while(open) {
2180 if(open->mesh_state == m) {
2181 struct tcp_req_open_item* next;
2182 if(prev) prev->next = open->next;
2183 else req->open_req_list = open->next;
2184 /* caller has to manage the mesh state reply entry */
2185 next = open->next;
2186 free(open);
2187 req->num_open_req --;
2188
2189 /* prev = prev; */
2190 open = next;
2191 continue;
2192 }
2193 prev = open;
2194 open = open->next;
2195 }
2196 }
2197
2198 /** setup listening for read or write */
2199 static void
tcp_req_info_setup_listen(struct tcp_req_info * req)2200 tcp_req_info_setup_listen(struct tcp_req_info* req)
2201 {
2202 int wr = 0;
2203 int rd = 0;
2204
2205 if(req->cp->tcp_byte_count != 0) {
2206 /* cannot change, halfway through */
2207 return;
2208 }
2209
2210 if(!req->cp->tcp_is_reading)
2211 wr = 1;
2212 if(!req->read_is_closed)
2213 rd = 1;
2214
2215 if(wr) {
2216 req->cp->tcp_is_reading = 0;
2217 comm_point_stop_listening(req->cp);
2218 comm_point_start_listening(req->cp, -1,
2219 adjusted_tcp_timeout(req->cp));
2220 } else if(rd) {
2221 req->cp->tcp_is_reading = 1;
2222 comm_point_stop_listening(req->cp);
2223 comm_point_start_listening(req->cp, -1,
2224 adjusted_tcp_timeout(req->cp));
2225 /* and also read it (from SSL stack buffers), so
2226 * no event read event is expected since the remainder of
2227 * the TLS frame is sitting in the buffers. */
2228 req->read_again = 1;
2229 } else {
2230 comm_point_stop_listening(req->cp);
2231 comm_point_start_listening(req->cp, -1,
2232 adjusted_tcp_timeout(req->cp));
2233 comm_point_listen_for_rw(req->cp, 0, 0);
2234 }
2235 }
2236
2237 /** remove first item from list of pending results */
2238 static struct tcp_req_done_item*
tcp_req_info_pop_done(struct tcp_req_info * req)2239 tcp_req_info_pop_done(struct tcp_req_info* req)
2240 {
2241 struct tcp_req_done_item* item;
2242 log_assert(req->num_done_req > 0 && req->done_req_list);
2243 item = req->done_req_list;
2244 lock_basic_lock(&stream_wait_count_lock);
2245 stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len);
2246 lock_basic_unlock(&stream_wait_count_lock);
2247 req->done_req_list = req->done_req_list->next;
2248 req->num_done_req --;
2249 return item;
2250 }
2251
2252 /** Send given buffer and setup to write */
2253 static void
tcp_req_info_start_write_buf(struct tcp_req_info * req,uint8_t * buf,size_t len)2254 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf,
2255 size_t len)
2256 {
2257 sldns_buffer_clear(req->cp->buffer);
2258 sldns_buffer_write(req->cp->buffer, buf, len);
2259 sldns_buffer_flip(req->cp->buffer);
2260
2261 req->cp->tcp_is_reading = 0; /* we are now writing */
2262 }
2263
2264 /** pick up the next result and start writing it to the channel */
2265 static void
tcp_req_pickup_next_result(struct tcp_req_info * req)2266 tcp_req_pickup_next_result(struct tcp_req_info* req)
2267 {
2268 if(req->num_done_req > 0) {
2269 /* unlist the done item from the list of pending results */
2270 struct tcp_req_done_item* item = tcp_req_info_pop_done(req);
2271 tcp_req_info_start_write_buf(req, item->buf, item->len);
2272 free(item->buf);
2273 free(item);
2274 }
2275 }
2276
2277 /** the read channel has closed */
2278 int
tcp_req_info_handle_read_close(struct tcp_req_info * req)2279 tcp_req_info_handle_read_close(struct tcp_req_info* req)
2280 {
2281 verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd);
2282 /* reset byte count for (potential) partial read */
2283 req->cp->tcp_byte_count = 0;
2284 /* if we still have results to write, pick up next and write it */
2285 if(req->num_done_req != 0) {
2286 tcp_req_pickup_next_result(req);
2287 tcp_req_info_setup_listen(req);
2288 return 1;
2289 }
2290 /* if nothing to do, this closes the connection */
2291 if(req->num_open_req == 0 && req->num_done_req == 0)
2292 return 0;
2293 /* otherwise, we must be waiting for dns resolve, wait with timeout */
2294 req->read_is_closed = 1;
2295 tcp_req_info_setup_listen(req);
2296 return 1;
2297 }
2298
2299 void
tcp_req_info_handle_writedone(struct tcp_req_info * req)2300 tcp_req_info_handle_writedone(struct tcp_req_info* req)
2301 {
2302 /* back to reading state, we finished this write event */
2303 sldns_buffer_clear(req->cp->buffer);
2304 if(req->num_done_req == 0 && req->read_is_closed) {
2305 /* no more to write and nothing to read, close it */
2306 comm_point_drop_reply(&req->cp->repinfo);
2307 return;
2308 }
2309 req->cp->tcp_is_reading = 1;
2310 /* see if another result needs writing */
2311 tcp_req_pickup_next_result(req);
2312
2313 /* see if there is more to write, if not stop_listening for writing */
2314 /* see if new requests are allowed, if so, start_listening
2315 * for reading */
2316 tcp_req_info_setup_listen(req);
2317 }
2318
2319 void
tcp_req_info_handle_readdone(struct tcp_req_info * req)2320 tcp_req_info_handle_readdone(struct tcp_req_info* req)
2321 {
2322 struct comm_point* c = req->cp;
2323
2324 /* we want to read up several requests, unless there are
2325 * pending answers */
2326
2327 req->is_drop = 0;
2328 req->is_reply = 0;
2329 req->in_worker_handle = 1;
2330 sldns_buffer_set_limit(req->spool_buffer, 0);
2331 /* handle the current request */
2332 /* this calls the worker handle request routine that could give
2333 * a cache response, or localdata response, or drop the reply,
2334 * or schedule a mesh entry for later */
2335 fptr_ok(fptr_whitelist_comm_point(c->callback));
2336 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
2337 req->in_worker_handle = 0;
2338 /* there is an answer, put it up. It is already in the
2339 * c->buffer, just send it. */
2340 /* since we were just reading a query, the channel is
2341 * clear to write to */
2342 send_it:
2343 c->tcp_is_reading = 0;
2344 comm_point_stop_listening(c);
2345 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
2346 return;
2347 }
2348 req->in_worker_handle = 0;
2349 /* it should be waiting in the mesh for recursion.
2350 * If mesh failed to add a new entry and called commpoint_drop_reply.
2351 * Then the mesh state has been cleared. */
2352 if(req->is_drop) {
2353 /* the reply has been dropped, stream has been closed. */
2354 return;
2355 }
2356 /* If mesh failed(mallocfail) and called commpoint_send_reply with
2357 * something like servfail then we pick up that reply below. */
2358 if(req->is_reply) {
2359 goto send_it;
2360 }
2361
2362 sldns_buffer_clear(c->buffer);
2363 /* if pending answers, pick up an answer and start sending it */
2364 tcp_req_pickup_next_result(req);
2365
2366 /* if answers pending, start sending answers */
2367 /* read more requests if we can have more requests */
2368 tcp_req_info_setup_listen(req);
2369 }
2370
2371 int
tcp_req_info_add_meshstate(struct tcp_req_info * req,struct mesh_area * mesh,struct mesh_state * m)2372 tcp_req_info_add_meshstate(struct tcp_req_info* req,
2373 struct mesh_area* mesh, struct mesh_state* m)
2374 {
2375 struct tcp_req_open_item* item;
2376 log_assert(req && mesh && m);
2377 item = (struct tcp_req_open_item*)malloc(sizeof(*item));
2378 if(!item) return 0;
2379 item->next = req->open_req_list;
2380 item->mesh = mesh;
2381 item->mesh_state = m;
2382 req->open_req_list = item;
2383 req->num_open_req++;
2384 return 1;
2385 }
2386
2387 /** Add a result to the result list. At the end. */
2388 static int
tcp_req_info_add_result(struct tcp_req_info * req,uint8_t * buf,size_t len)2389 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len)
2390 {
2391 struct tcp_req_done_item* last = NULL;
2392 struct tcp_req_done_item* item;
2393 size_t space;
2394
2395 /* see if we have space */
2396 space = sizeof(struct tcp_req_done_item) + len;
2397 lock_basic_lock(&stream_wait_count_lock);
2398 if(stream_wait_count + space > stream_wait_max) {
2399 lock_basic_unlock(&stream_wait_count_lock);
2400 verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size");
2401 return 0;
2402 }
2403 stream_wait_count += space;
2404 lock_basic_unlock(&stream_wait_count_lock);
2405
2406 /* find last element */
2407 last = req->done_req_list;
2408 while(last && last->next)
2409 last = last->next;
2410
2411 /* create new element */
2412 item = (struct tcp_req_done_item*)malloc(sizeof(*item));
2413 if(!item) {
2414 log_err("malloc failure, for stream result list");
2415 return 0;
2416 }
2417 item->next = NULL;
2418 item->len = len;
2419 item->buf = memdup(buf, len);
2420 if(!item->buf) {
2421 free(item);
2422 log_err("malloc failure, adding reply to stream result list");
2423 return 0;
2424 }
2425
2426 /* link in */
2427 if(last) last->next = item;
2428 else req->done_req_list = item;
2429 req->num_done_req++;
2430 return 1;
2431 }
2432
2433 void
tcp_req_info_send_reply(struct tcp_req_info * req)2434 tcp_req_info_send_reply(struct tcp_req_info* req)
2435 {
2436 if(req->in_worker_handle) {
2437 /* reply from mesh is in the spool_buffer */
2438 /* copy now, so that the spool buffer is free for other tasks
2439 * before the callback is done */
2440 sldns_buffer_clear(req->cp->buffer);
2441 sldns_buffer_write(req->cp->buffer,
2442 sldns_buffer_begin(req->spool_buffer),
2443 sldns_buffer_limit(req->spool_buffer));
2444 sldns_buffer_flip(req->cp->buffer);
2445 req->is_reply = 1;
2446 return;
2447 }
2448 /* now that the query has been handled, that mesh_reply entry
2449 * should be removed, from the tcp_req_info list,
2450 * the mesh state cleanup removes then with region_cleanup and
2451 * replies_sent true. */
2452 /* see if we can send it straight away (we are not doing
2453 * anything else). If so, copy to buffer and start */
2454 if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) {
2455 /* buffer is free, and was ready to read new query into,
2456 * but we are now going to use it to send this answer */
2457 tcp_req_info_start_write_buf(req,
2458 sldns_buffer_begin(req->spool_buffer),
2459 sldns_buffer_limit(req->spool_buffer));
2460 /* switch to listen to write events */
2461 comm_point_stop_listening(req->cp);
2462 comm_point_start_listening(req->cp, -1,
2463 adjusted_tcp_timeout(req->cp));
2464 return;
2465 }
2466 /* queue up the answer behind the others already pending */
2467 if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer),
2468 sldns_buffer_limit(req->spool_buffer))) {
2469 /* drop the connection, we are out of resources */
2470 comm_point_drop_reply(&req->cp->repinfo);
2471 }
2472 }
2473
tcp_req_info_get_stream_buffer_size(void)2474 size_t tcp_req_info_get_stream_buffer_size(void)
2475 {
2476 size_t s;
2477 if(!stream_wait_lock_inited)
2478 return stream_wait_count;
2479 lock_basic_lock(&stream_wait_count_lock);
2480 s = stream_wait_count;
2481 lock_basic_unlock(&stream_wait_count_lock);
2482 return s;
2483 }
2484
http2_get_query_buffer_size(void)2485 size_t http2_get_query_buffer_size(void)
2486 {
2487 size_t s;
2488 if(!http2_query_buffer_lock_inited)
2489 return http2_query_buffer_count;
2490 lock_basic_lock(&http2_query_buffer_count_lock);
2491 s = http2_query_buffer_count;
2492 lock_basic_unlock(&http2_query_buffer_count_lock);
2493 return s;
2494 }
2495
http2_get_response_buffer_size(void)2496 size_t http2_get_response_buffer_size(void)
2497 {
2498 size_t s;
2499 if(!http2_response_buffer_lock_inited)
2500 return http2_response_buffer_count;
2501 lock_basic_lock(&http2_response_buffer_count_lock);
2502 s = http2_response_buffer_count;
2503 lock_basic_unlock(&http2_response_buffer_count_lock);
2504 return s;
2505 }
2506
2507 #ifdef HAVE_NGHTTP2
2508 /** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */
http2_submit_response_read_callback(nghttp2_session * ATTR_UNUSED (session),int32_t stream_id,uint8_t * buf,size_t length,uint32_t * data_flags,nghttp2_data_source * source,void * ATTR_UNUSED (cb_arg))2509 static ssize_t http2_submit_response_read_callback(
2510 nghttp2_session* ATTR_UNUSED(session),
2511 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2512 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2513 {
2514 struct http2_stream* h2_stream;
2515 struct http2_session* h2_session = source->ptr;
2516 size_t copylen = length;
2517 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2518 h2_session->session, stream_id))) {
2519 verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2520 "stream");
2521 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2522 }
2523 if(!h2_stream->rbuffer ||
2524 sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2525 verbose(VERB_QUERY, "http2: cannot submit buffer. No data "
2526 "available in rbuffer");
2527 /* rbuffer will be free'd in frame close cb */
2528 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2529 }
2530
2531 if(copylen > sldns_buffer_remaining(h2_stream->rbuffer))
2532 copylen = sldns_buffer_remaining(h2_stream->rbuffer);
2533 if(copylen > SSIZE_MAX)
2534 copylen = SSIZE_MAX; /* will probably never happen */
2535
2536 memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen);
2537 sldns_buffer_skip(h2_stream->rbuffer, copylen);
2538
2539 if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2540 *data_flags |= NGHTTP2_DATA_FLAG_EOF;
2541 lock_basic_lock(&http2_response_buffer_count_lock);
2542 http2_response_buffer_count -=
2543 sldns_buffer_capacity(h2_stream->rbuffer);
2544 lock_basic_unlock(&http2_response_buffer_count_lock);
2545 sldns_buffer_free(h2_stream->rbuffer);
2546 h2_stream->rbuffer = NULL;
2547 }
2548
2549 return copylen;
2550 }
2551
2552 /**
2553 * Send RST_STREAM frame for stream.
2554 * @param h2_session: http2 session to submit frame to
2555 * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM
2556 * @return 0 on error, 1 otherwise
2557 */
http2_submit_rst_stream(struct http2_session * h2_session,struct http2_stream * h2_stream)2558 static int http2_submit_rst_stream(struct http2_session* h2_session,
2559 struct http2_stream* h2_stream)
2560 {
2561 int ret = nghttp2_submit_rst_stream(h2_session->session,
2562 NGHTTP2_FLAG_NONE, h2_stream->stream_id,
2563 NGHTTP2_INTERNAL_ERROR);
2564 if(ret) {
2565 verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, "
2566 "error: %s", nghttp2_strerror(ret));
2567 return 0;
2568 }
2569 return 1;
2570 }
2571
2572 /**
2573 * DNS response ready to be submitted to nghttp2, to be prepared for sending
2574 * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer
2575 * might be used before this will be sent out.
2576 * @param h2_session: http2 session, containing c->buffer which contains answer
2577 * @return 0 on error, 1 otherwise
2578 */
http2_submit_dns_response(struct http2_session * h2_session)2579 int http2_submit_dns_response(struct http2_session* h2_session)
2580 {
2581 int ret;
2582 nghttp2_data_provider data_prd;
2583 char status[4];
2584 nghttp2_nv headers[3];
2585 struct http2_stream* h2_stream = h2_session->c->h2_stream;
2586 size_t rlen;
2587 char rlen_str[32];
2588
2589 if(h2_stream->rbuffer) {
2590 log_err("http2 submit response error: rbuffer already "
2591 "exists");
2592 return 0;
2593 }
2594 if(sldns_buffer_remaining(h2_session->c->buffer) == 0) {
2595 log_err("http2 submit response error: c->buffer not complete");
2596 return 0;
2597 }
2598
2599 if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2600 verbose(VERB_QUERY, "http2: submit response error: "
2601 "invalid status");
2602 return 0;
2603 }
2604
2605 rlen = sldns_buffer_remaining(h2_session->c->buffer);
2606 snprintf(rlen_str, sizeof(rlen_str), "%u", (unsigned)rlen);
2607
2608 lock_basic_lock(&http2_response_buffer_count_lock);
2609 if(http2_response_buffer_count + rlen > http2_response_buffer_max) {
2610 lock_basic_unlock(&http2_response_buffer_count_lock);
2611 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2612 "in https-response-buffer-size");
2613 return http2_submit_rst_stream(h2_session, h2_stream);
2614 }
2615 http2_response_buffer_count += rlen;
2616 lock_basic_unlock(&http2_response_buffer_count_lock);
2617
2618 if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) {
2619 lock_basic_lock(&http2_response_buffer_count_lock);
2620 http2_response_buffer_count -= rlen;
2621 lock_basic_unlock(&http2_response_buffer_count_lock);
2622 log_err("http2 submit response error: malloc failure");
2623 return 0;
2624 }
2625
2626 headers[0].name = (uint8_t*)":status";
2627 headers[0].namelen = 7;
2628 headers[0].value = (uint8_t*)status;
2629 headers[0].valuelen = 3;
2630 headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2631
2632 headers[1].name = (uint8_t*)"content-type";
2633 headers[1].namelen = 12;
2634 headers[1].value = (uint8_t*)"application/dns-message";
2635 headers[1].valuelen = 23;
2636 headers[1].flags = NGHTTP2_NV_FLAG_NONE;
2637
2638 headers[2].name = (uint8_t*)"content-length";
2639 headers[2].namelen = 14;
2640 headers[2].value = (uint8_t*)rlen_str;
2641 headers[2].valuelen = strlen(rlen_str);
2642 headers[2].flags = NGHTTP2_NV_FLAG_NONE;
2643
2644 sldns_buffer_write(h2_stream->rbuffer,
2645 sldns_buffer_current(h2_session->c->buffer),
2646 sldns_buffer_remaining(h2_session->c->buffer));
2647 sldns_buffer_flip(h2_stream->rbuffer);
2648
2649 data_prd.source.ptr = h2_session;
2650 data_prd.read_callback = http2_submit_response_read_callback;
2651 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2652 headers, 3, &data_prd);
2653 if(ret) {
2654 verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2655 "error: %s", nghttp2_strerror(ret));
2656 return 0;
2657 }
2658 return 1;
2659 }
2660 #else
http2_submit_dns_response(void * ATTR_UNUSED (v))2661 int http2_submit_dns_response(void* ATTR_UNUSED(v))
2662 {
2663 return 0;
2664 }
2665 #endif
2666
2667 #ifdef HAVE_NGHTTP2
2668 /** HTTP status to descriptive string */
http_status_to_str(enum http_status s)2669 static char* http_status_to_str(enum http_status s)
2670 {
2671 switch(s) {
2672 case HTTP_STATUS_OK:
2673 return "OK";
2674 case HTTP_STATUS_BAD_REQUEST:
2675 return "Bad Request";
2676 case HTTP_STATUS_NOT_FOUND:
2677 return "Not Found";
2678 case HTTP_STATUS_PAYLOAD_TOO_LARGE:
2679 return "Payload Too Large";
2680 case HTTP_STATUS_URI_TOO_LONG:
2681 return "URI Too Long";
2682 case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE:
2683 return "Unsupported Media Type";
2684 case HTTP_STATUS_NOT_IMPLEMENTED:
2685 return "Not Implemented";
2686 }
2687 return "Status Unknown";
2688 }
2689
2690 /** nghttp2 callback. Used to copy error message to nghttp2 session */
http2_submit_error_read_callback(nghttp2_session * ATTR_UNUSED (session),int32_t stream_id,uint8_t * buf,size_t length,uint32_t * data_flags,nghttp2_data_source * source,void * ATTR_UNUSED (cb_arg))2691 static ssize_t http2_submit_error_read_callback(
2692 nghttp2_session* ATTR_UNUSED(session),
2693 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2694 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2695 {
2696 struct http2_stream* h2_stream;
2697 struct http2_session* h2_session = source->ptr;
2698 char* msg;
2699 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2700 h2_session->session, stream_id))) {
2701 verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2702 "stream");
2703 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2704 }
2705 *data_flags |= NGHTTP2_DATA_FLAG_EOF;
2706 msg = http_status_to_str(h2_stream->status);
2707 if(length < strlen(msg))
2708 return 0; /* not worth trying over multiple frames */
2709 memcpy(buf, msg, strlen(msg));
2710 return strlen(msg);
2711
2712 }
2713
2714 /**
2715 * HTTP error response ready to be submitted to nghttp2, to be prepared for
2716 * sending out. Message body will contain descriptive string for HTTP status.
2717 * @param h2_session: http2 session to submit to
2718 * @param h2_stream: http2 stream containing HTTP status to use for error
2719 * @return 0 on error, 1 otherwise
2720 */
http2_submit_error(struct http2_session * h2_session,struct http2_stream * h2_stream)2721 static int http2_submit_error(struct http2_session* h2_session,
2722 struct http2_stream* h2_stream)
2723 {
2724 int ret;
2725 char status[4];
2726 nghttp2_data_provider data_prd;
2727 nghttp2_nv headers[1]; /* will be copied by nghttp */
2728 if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2729 verbose(VERB_QUERY, "http2: submit error failed, "
2730 "invalid status");
2731 return 0;
2732 }
2733 headers[0].name = (uint8_t*)":status";
2734 headers[0].namelen = 7;
2735 headers[0].value = (uint8_t*)status;
2736 headers[0].valuelen = 3;
2737 headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2738
2739 data_prd.source.ptr = h2_session;
2740 data_prd.read_callback = http2_submit_error_read_callback;
2741
2742 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2743 headers, 1, &data_prd);
2744 if(ret) {
2745 verbose(VERB_QUERY, "http2: submit error failed, "
2746 "error: %s", nghttp2_strerror(ret));
2747 return 0;
2748 }
2749 return 1;
2750 }
2751
2752 /**
2753 * Start query handling. Query is stored in the stream, and will be free'd here.
2754 * @param h2_session: http2 session, containing comm point
2755 * @param h2_stream: stream containing buffered query
2756 * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no
2757 * reply available (yet).
2758 */
http2_query_read_done(struct http2_session * h2_session,struct http2_stream * h2_stream)2759 static int http2_query_read_done(struct http2_session* h2_session,
2760 struct http2_stream* h2_stream)
2761 {
2762 log_assert(h2_stream->qbuffer);
2763
2764 if(h2_session->c->h2_stream) {
2765 verbose(VERB_ALGO, "http2_query_read_done failure: shared "
2766 "buffer already assigned to stream");
2767 return -1;
2768 }
2769
2770 /* the c->buffer might be used by mesh_send_reply and no be cleard
2771 * need to be cleared before use */
2772 sldns_buffer_clear(h2_session->c->buffer);
2773 if(sldns_buffer_remaining(h2_session->c->buffer) <
2774 sldns_buffer_remaining(h2_stream->qbuffer)) {
2775 /* qbuffer will be free'd in frame close cb */
2776 sldns_buffer_clear(h2_session->c->buffer);
2777 verbose(VERB_ALGO, "http2_query_read_done failure: can't fit "
2778 "qbuffer in c->buffer");
2779 return -1;
2780 }
2781
2782 sldns_buffer_write(h2_session->c->buffer,
2783 sldns_buffer_current(h2_stream->qbuffer),
2784 sldns_buffer_remaining(h2_stream->qbuffer));
2785
2786 lock_basic_lock(&http2_query_buffer_count_lock);
2787 http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer);
2788 lock_basic_unlock(&http2_query_buffer_count_lock);
2789 sldns_buffer_free(h2_stream->qbuffer);
2790 h2_stream->qbuffer = NULL;
2791
2792 sldns_buffer_flip(h2_session->c->buffer);
2793 h2_session->c->h2_stream = h2_stream;
2794 fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback));
2795 if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg,
2796 NETEVENT_NOERROR, &h2_session->c->repinfo)) {
2797 return 1; /* answer in c->buffer */
2798 }
2799 sldns_buffer_clear(h2_session->c->buffer);
2800 h2_session->c->h2_stream = NULL;
2801 return 0; /* mesh state added, or dropped */
2802 }
2803
2804 /** nghttp2 callback. Used to check if the received frame indicates the end of a
2805 * stream. Gather collected request data and start query handling. */
http2_req_frame_recv_cb(nghttp2_session * session,const nghttp2_frame * frame,void * cb_arg)2806 static int http2_req_frame_recv_cb(nghttp2_session* session,
2807 const nghttp2_frame* frame, void* cb_arg)
2808 {
2809 struct http2_session* h2_session = (struct http2_session*)cb_arg;
2810 struct http2_stream* h2_stream;
2811 int query_read_done;
2812
2813 if((frame->hd.type != NGHTTP2_DATA &&
2814 frame->hd.type != NGHTTP2_HEADERS) ||
2815 !(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) {
2816 return 0;
2817 }
2818
2819 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2820 session, frame->hd.stream_id)))
2821 return 0;
2822
2823 if(h2_stream->invalid_endpoint) {
2824 h2_stream->status = HTTP_STATUS_NOT_FOUND;
2825 goto submit_http_error;
2826 }
2827
2828 if(h2_stream->invalid_content_type) {
2829 h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE;
2830 goto submit_http_error;
2831 }
2832
2833 if(h2_stream->http_method != HTTP_METHOD_GET &&
2834 h2_stream->http_method != HTTP_METHOD_POST) {
2835 h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED;
2836 goto submit_http_error;
2837 }
2838
2839 if(h2_stream->query_too_large) {
2840 if(h2_stream->http_method == HTTP_METHOD_POST)
2841 h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE;
2842 else
2843 h2_stream->status = HTTP_STATUS_URI_TOO_LONG;
2844 goto submit_http_error;
2845 }
2846
2847 if(!h2_stream->qbuffer) {
2848 h2_stream->status = HTTP_STATUS_BAD_REQUEST;
2849 goto submit_http_error;
2850 }
2851
2852 if(h2_stream->status) {
2853 submit_http_error:
2854 verbose(VERB_QUERY, "http2 request invalid, returning :status="
2855 "%d", h2_stream->status);
2856 if(!http2_submit_error(h2_session, h2_stream)) {
2857 return NGHTTP2_ERR_CALLBACK_FAILURE;
2858 }
2859 return 0;
2860 }
2861 h2_stream->status = HTTP_STATUS_OK;
2862
2863 sldns_buffer_flip(h2_stream->qbuffer);
2864 h2_session->postpone_drop = 1;
2865 query_read_done = http2_query_read_done(h2_session, h2_stream);
2866 if(query_read_done < 0)
2867 return NGHTTP2_ERR_CALLBACK_FAILURE;
2868 else if(!query_read_done) {
2869 if(h2_session->is_drop) {
2870 /* connection needs to be closed. Return failure to make
2871 * sure no other action are taken anymore on comm point.
2872 * failure will result in reclaiming (and closing)
2873 * of comm point. */
2874 verbose(VERB_QUERY, "http2 query dropped in worker cb");
2875 h2_session->postpone_drop = 0;
2876 return NGHTTP2_ERR_CALLBACK_FAILURE;
2877 }
2878 /* nothing to submit right now, query added to mesh. */
2879 h2_session->postpone_drop = 0;
2880 return 0;
2881 }
2882 if(!http2_submit_dns_response(h2_session)) {
2883 sldns_buffer_clear(h2_session->c->buffer);
2884 h2_session->c->h2_stream = NULL;
2885 return NGHTTP2_ERR_CALLBACK_FAILURE;
2886 }
2887 verbose(VERB_QUERY, "http2 query submitted to session");
2888 sldns_buffer_clear(h2_session->c->buffer);
2889 h2_session->c->h2_stream = NULL;
2890 return 0;
2891 }
2892
2893 /** nghttp2 callback. Used to detect start of new streams. */
http2_req_begin_headers_cb(nghttp2_session * session,const nghttp2_frame * frame,void * cb_arg)2894 static int http2_req_begin_headers_cb(nghttp2_session* session,
2895 const nghttp2_frame* frame, void* cb_arg)
2896 {
2897 struct http2_session* h2_session = (struct http2_session*)cb_arg;
2898 struct http2_stream* h2_stream;
2899 int ret;
2900 if(frame->hd.type != NGHTTP2_HEADERS ||
2901 frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2902 /* only interested in request headers */
2903 return 0;
2904 }
2905 if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) {
2906 log_err("malloc failure while creating http2 stream");
2907 return NGHTTP2_ERR_CALLBACK_FAILURE;
2908 }
2909 http2_session_add_stream(h2_session, h2_stream);
2910 ret = nghttp2_session_set_stream_user_data(session,
2911 frame->hd.stream_id, h2_stream);
2912 if(ret) {
2913 /* stream does not exist */
2914 verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2915 "error: %s", nghttp2_strerror(ret));
2916 return NGHTTP2_ERR_CALLBACK_FAILURE;
2917 }
2918
2919 return 0;
2920 }
2921
2922 /**
2923 * base64url decode, store in qbuffer
2924 * @param h2_session: http2 session
2925 * @param h2_stream: http2 stream
2926 * @param start: start of the base64 string
2927 * @param length: length of the base64 string
2928 * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer,
2929 * buffer will be NULL is unparseble.
2930 */
http2_buffer_uri_query(struct http2_session * h2_session,struct http2_stream * h2_stream,const uint8_t * start,size_t length)2931 static int http2_buffer_uri_query(struct http2_session* h2_session,
2932 struct http2_stream* h2_stream, const uint8_t* start, size_t length)
2933 {
2934 size_t expectb64len;
2935 int b64len;
2936 if(h2_stream->http_method == HTTP_METHOD_POST)
2937 return 1;
2938 if(length == 0)
2939 return 1;
2940 if(h2_stream->qbuffer) {
2941 verbose(VERB_ALGO, "http2_req_header fail, "
2942 "qbuffer already set");
2943 return 0;
2944 }
2945
2946 /* calculate size, might be a bit bigger than the real
2947 * decoded buffer size */
2948 expectb64len = sldns_b64_pton_calculate_size(length);
2949 log_assert(expectb64len > 0);
2950 if(expectb64len >
2951 h2_session->c->http2_stream_max_qbuffer_size) {
2952 h2_stream->query_too_large = 1;
2953 return 1;
2954 }
2955
2956 lock_basic_lock(&http2_query_buffer_count_lock);
2957 if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) {
2958 lock_basic_unlock(&http2_query_buffer_count_lock);
2959 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2960 "in http2-query-buffer-size");
2961 return http2_submit_rst_stream(h2_session, h2_stream);
2962 }
2963 http2_query_buffer_count += expectb64len;
2964 lock_basic_unlock(&http2_query_buffer_count_lock);
2965 if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) {
2966 lock_basic_lock(&http2_query_buffer_count_lock);
2967 http2_query_buffer_count -= expectb64len;
2968 lock_basic_unlock(&http2_query_buffer_count_lock);
2969 log_err("http2_req_header fail, qbuffer "
2970 "malloc failure");
2971 return 0;
2972 }
2973
2974 if(sldns_b64_contains_nonurl((char const*)start, length)) {
2975 char buf[65536+4];
2976 verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding");
2977 /* copy to the scratch buffer temporarily to terminate the
2978 * string with a zero */
2979 if(length+1 > sizeof(buf)) {
2980 /* too long */
2981 lock_basic_lock(&http2_query_buffer_count_lock);
2982 http2_query_buffer_count -= expectb64len;
2983 lock_basic_unlock(&http2_query_buffer_count_lock);
2984 sldns_buffer_free(h2_stream->qbuffer);
2985 h2_stream->qbuffer = NULL;
2986 return 1;
2987 }
2988 memmove(buf, start, length);
2989 buf[length] = 0;
2990 if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current(
2991 h2_stream->qbuffer), expectb64len)) || b64len < 0) {
2992 lock_basic_lock(&http2_query_buffer_count_lock);
2993 http2_query_buffer_count -= expectb64len;
2994 lock_basic_unlock(&http2_query_buffer_count_lock);
2995 sldns_buffer_free(h2_stream->qbuffer);
2996 h2_stream->qbuffer = NULL;
2997 return 1;
2998 }
2999 } else {
3000 if(!(b64len = sldns_b64url_pton(
3001 (char const *)start, length,
3002 sldns_buffer_current(h2_stream->qbuffer),
3003 expectb64len)) || b64len < 0) {
3004 lock_basic_lock(&http2_query_buffer_count_lock);
3005 http2_query_buffer_count -= expectb64len;
3006 lock_basic_unlock(&http2_query_buffer_count_lock);
3007 sldns_buffer_free(h2_stream->qbuffer);
3008 h2_stream->qbuffer = NULL;
3009 /* return without error, method can be an
3010 * unknown POST */
3011 return 1;
3012 }
3013 }
3014 sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len);
3015 return 1;
3016 }
3017
3018 /** nghttp2 callback. Used to parse headers from HEADER frames. */
http2_req_header_cb(nghttp2_session * session,const nghttp2_frame * frame,const uint8_t * name,size_t namelen,const uint8_t * value,size_t valuelen,uint8_t ATTR_UNUSED (flags),void * cb_arg)3019 static int http2_req_header_cb(nghttp2_session* session,
3020 const nghttp2_frame* frame, const uint8_t* name, size_t namelen,
3021 const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags),
3022 void* cb_arg)
3023 {
3024 struct http2_stream* h2_stream = NULL;
3025 struct http2_session* h2_session = (struct http2_session*)cb_arg;
3026 /* nghttp2 deals with CONTINUATION frames and provides them as part of
3027 * the HEADER */
3028 if(frame->hd.type != NGHTTP2_HEADERS ||
3029 frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
3030 /* only interested in request headers */
3031 return 0;
3032 }
3033 if(!(h2_stream = nghttp2_session_get_stream_user_data(session,
3034 frame->hd.stream_id)))
3035 return 0;
3036
3037 /* earlier checks already indicate we can stop handling this query */
3038 if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED ||
3039 h2_stream->invalid_content_type ||
3040 h2_stream->invalid_endpoint)
3041 return 0;
3042
3043
3044 /* nghttp2 performs some sanity checks in the headers, including:
3045 * name and value are guaranteed to be null terminated
3046 * name is guaranteed to be lowercase
3047 * content-length value is guaranteed to contain digits
3048 */
3049
3050 if(!h2_stream->http_method && namelen == 7 &&
3051 memcmp(":method", name, namelen) == 0) {
3052 /* Case insensitive check on :method value to be on the safe
3053 * side. I failed to find text about case sensitivity in specs.
3054 */
3055 if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0)
3056 h2_stream->http_method = HTTP_METHOD_GET;
3057 else if(valuelen == 4 &&
3058 strcasecmp("POST", (const char*)value) == 0) {
3059 h2_stream->http_method = HTTP_METHOD_POST;
3060 if(h2_stream->qbuffer) {
3061 /* POST method uses query from DATA frames */
3062 lock_basic_lock(&http2_query_buffer_count_lock);
3063 http2_query_buffer_count -=
3064 sldns_buffer_capacity(h2_stream->qbuffer);
3065 lock_basic_unlock(&http2_query_buffer_count_lock);
3066 sldns_buffer_free(h2_stream->qbuffer);
3067 h2_stream->qbuffer = NULL;
3068 }
3069 } else
3070 h2_stream->http_method = HTTP_METHOD_UNSUPPORTED;
3071 return 0;
3072 }
3073 if(namelen == 5 && memcmp(":path", name, namelen) == 0) {
3074 /* :path may contain DNS query, depending on method. Method might
3075 * not be known yet here, so check after finishing receiving
3076 * stream. */
3077 #define HTTP_QUERY_PARAM "?dns="
3078 size_t el = strlen(h2_session->c->http_endpoint);
3079 size_t qpl = strlen(HTTP_QUERY_PARAM);
3080
3081 if(valuelen < el || memcmp(h2_session->c->http_endpoint,
3082 value, el) != 0) {
3083 h2_stream->invalid_endpoint = 1;
3084 return 0;
3085 }
3086 /* larger than endpoint only allowed if it is for the query
3087 * parameter */
3088 if(valuelen <= el+qpl ||
3089 memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) {
3090 if(valuelen != el)
3091 h2_stream->invalid_endpoint = 1;
3092 return 0;
3093 }
3094
3095 if(!http2_buffer_uri_query(h2_session, h2_stream,
3096 value+(el+qpl), valuelen-(el+qpl))) {
3097 return NGHTTP2_ERR_CALLBACK_FAILURE;
3098 }
3099 return 0;
3100 }
3101 /* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST,
3102 * and not needed when using GET. Don't enfore.
3103 * If set only allow lowercase "application/dns-message".
3104 *
3105 * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST
3106 * be able to handle "application/dns-message". Since that is the only
3107 * content-type supported we can ignore the accept header.
3108 */
3109 if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) {
3110 if(valuelen != 23 || memcmp("application/dns-message", value,
3111 valuelen) != 0) {
3112 h2_stream->invalid_content_type = 1;
3113 }
3114 }
3115
3116 /* Only interested in content-lentg for POST (on not yet known) method.
3117 */
3118 if((!h2_stream->http_method ||
3119 h2_stream->http_method == HTTP_METHOD_POST) &&
3120 !h2_stream->content_length && namelen == 14 &&
3121 memcmp("content-length", name, namelen) == 0) {
3122 if(valuelen > 5) {
3123 h2_stream->query_too_large = 1;
3124 return 0;
3125 }
3126 /* guaranteed to only contain digits and be null terminated */
3127 h2_stream->content_length = atoi((const char*)value);
3128 if(h2_stream->content_length >
3129 h2_session->c->http2_stream_max_qbuffer_size) {
3130 h2_stream->query_too_large = 1;
3131 return 0;
3132 }
3133 }
3134 return 0;
3135 }
3136
3137 /** nghttp2 callback. Used to get data from DATA frames, which can contain
3138 * queries in POST requests. */
http2_req_data_chunk_recv_cb(nghttp2_session * ATTR_UNUSED (session),uint8_t ATTR_UNUSED (flags),int32_t stream_id,const uint8_t * data,size_t len,void * cb_arg)3139 static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session),
3140 uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data,
3141 size_t len, void* cb_arg)
3142 {
3143 struct http2_session* h2_session = (struct http2_session*)cb_arg;
3144 struct http2_stream* h2_stream;
3145 size_t qlen = 0;
3146
3147 if(!(h2_stream = nghttp2_session_get_stream_user_data(
3148 h2_session->session, stream_id))) {
3149 return 0;
3150 }
3151
3152 if(h2_stream->query_too_large)
3153 return 0;
3154
3155 if(!h2_stream->qbuffer) {
3156 if(h2_stream->content_length) {
3157 if(h2_stream->content_length < len)
3158 /* getting more data in DATA frame than
3159 * advertised in content-length header. */
3160 return NGHTTP2_ERR_CALLBACK_FAILURE;
3161 qlen = h2_stream->content_length;
3162 } else if(len <= h2_session->c->http2_stream_max_qbuffer_size) {
3163 /* setting this to msg-buffer-size can result in a lot
3164 * of memory consuption. Most queries should fit in a
3165 * single DATA frame, and most POST queries will
3166 * contain content-length which does not impose this
3167 * limit. */
3168 qlen = len;
3169 }
3170 }
3171 if(!h2_stream->qbuffer && qlen) {
3172 lock_basic_lock(&http2_query_buffer_count_lock);
3173 if(http2_query_buffer_count + qlen > http2_query_buffer_max) {
3174 lock_basic_unlock(&http2_query_buffer_count_lock);
3175 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
3176 "in http2-query-buffer-size");
3177 return http2_submit_rst_stream(h2_session, h2_stream);
3178 }
3179 http2_query_buffer_count += qlen;
3180 lock_basic_unlock(&http2_query_buffer_count_lock);
3181 if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) {
3182 lock_basic_lock(&http2_query_buffer_count_lock);
3183 http2_query_buffer_count -= qlen;
3184 lock_basic_unlock(&http2_query_buffer_count_lock);
3185 }
3186 }
3187
3188 if(!h2_stream->qbuffer ||
3189 sldns_buffer_remaining(h2_stream->qbuffer) < len) {
3190 verbose(VERB_ALGO, "http2 data_chunck_recv failed. Not enough "
3191 "buffer space for POST query. Can happen on multi "
3192 "frame requests without content-length header");
3193 h2_stream->query_too_large = 1;
3194 return 0;
3195 }
3196
3197 sldns_buffer_write(h2_stream->qbuffer, data, len);
3198
3199 return 0;
3200 }
3201
http2_req_stream_clear(struct http2_stream * h2_stream)3202 void http2_req_stream_clear(struct http2_stream* h2_stream)
3203 {
3204 if(h2_stream->qbuffer) {
3205 lock_basic_lock(&http2_query_buffer_count_lock);
3206 http2_query_buffer_count -=
3207 sldns_buffer_capacity(h2_stream->qbuffer);
3208 lock_basic_unlock(&http2_query_buffer_count_lock);
3209 sldns_buffer_free(h2_stream->qbuffer);
3210 h2_stream->qbuffer = NULL;
3211 }
3212 if(h2_stream->rbuffer) {
3213 lock_basic_lock(&http2_response_buffer_count_lock);
3214 http2_response_buffer_count -=
3215 sldns_buffer_capacity(h2_stream->rbuffer);
3216 lock_basic_unlock(&http2_response_buffer_count_lock);
3217 sldns_buffer_free(h2_stream->rbuffer);
3218 h2_stream->rbuffer = NULL;
3219 }
3220 }
3221
http2_req_callbacks_create(void)3222 nghttp2_session_callbacks* http2_req_callbacks_create(void)
3223 {
3224 nghttp2_session_callbacks *callbacks;
3225 if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) {
3226 log_err("failed to initialize nghttp2 callback");
3227 return NULL;
3228 }
3229 /* reception of header block started, used to create h2_stream */
3230 nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks,
3231 http2_req_begin_headers_cb);
3232 /* complete frame received, used to get data from stream if frame
3233 * has end stream flag, and start processing query */
3234 nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks,
3235 http2_req_frame_recv_cb);
3236 /* get request info from headers */
3237 nghttp2_session_callbacks_set_on_header_callback(callbacks,
3238 http2_req_header_cb);
3239 /* get data from DATA frames, containing POST query */
3240 nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks,
3241 http2_req_data_chunk_recv_cb);
3242
3243 /* generic HTTP2 callbacks */
3244 nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb);
3245 nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb);
3246 nghttp2_session_callbacks_set_on_stream_close_callback(callbacks,
3247 http2_stream_close_cb);
3248
3249 return callbacks;
3250 }
3251 #endif /* HAVE_NGHTTP2 */
3252
3253 #ifdef HAVE_NGTCP2
3254 struct doq_table*
doq_table_create(struct config_file * cfg,struct ub_randstate * rnd)3255 doq_table_create(struct config_file* cfg, struct ub_randstate* rnd)
3256 {
3257 struct doq_table* table = calloc(1, sizeof(*table));
3258 if(!table)
3259 return NULL;
3260 table->idle_timeout = ((uint64_t)cfg->tcp_idle_timeout)*
3261 NGTCP2_MILLISECONDS;
3262 table->sv_scidlen = 16;
3263 table->static_secret_len = 16;
3264 table->static_secret = malloc(table->static_secret_len);
3265 if(!table->static_secret) {
3266 free(table);
3267 return NULL;
3268 }
3269 doq_fill_rand(rnd, table->static_secret, table->static_secret_len);
3270 table->conn_tree = rbtree_create(doq_conn_cmp);
3271 if(!table->conn_tree) {
3272 free(table->static_secret);
3273 free(table);
3274 return NULL;
3275 }
3276 table->conid_tree = rbtree_create(doq_conid_cmp);
3277 if(!table->conid_tree) {
3278 free(table->static_secret);
3279 free(table->conn_tree);
3280 free(table);
3281 return NULL;
3282 }
3283 table->timer_tree = rbtree_create(doq_timer_cmp);
3284 if(!table->timer_tree) {
3285 free(table->static_secret);
3286 free(table->conn_tree);
3287 free(table->conid_tree);
3288 free(table);
3289 return NULL;
3290 }
3291 lock_rw_init(&table->lock);
3292 lock_rw_init(&table->conid_lock);
3293 lock_basic_init(&table->size_lock);
3294 lock_protect(&table->lock, &table->static_secret,
3295 sizeof(table->static_secret));
3296 lock_protect(&table->lock, &table->static_secret_len,
3297 sizeof(table->static_secret_len));
3298 lock_protect(&table->lock, table->static_secret,
3299 table->static_secret_len);
3300 lock_protect(&table->lock, &table->sv_scidlen,
3301 sizeof(table->sv_scidlen));
3302 lock_protect(&table->lock, &table->idle_timeout,
3303 sizeof(table->idle_timeout));
3304 lock_protect(&table->lock, &table->conn_tree, sizeof(table->conn_tree));
3305 lock_protect(&table->lock, table->conn_tree, sizeof(*table->conn_tree));
3306 lock_protect(&table->conid_lock, table->conid_tree,
3307 sizeof(*table->conid_tree));
3308 lock_protect(&table->lock, table->timer_tree,
3309 sizeof(*table->timer_tree));
3310 lock_protect(&table->size_lock, &table->current_size,
3311 sizeof(table->current_size));
3312 return table;
3313 }
3314
3315 /** delete elements from the connection tree */
3316 static void
conn_tree_del(rbnode_type * node,void * arg)3317 conn_tree_del(rbnode_type* node, void* arg)
3318 {
3319 struct doq_table* table = (struct doq_table*)arg;
3320 struct doq_conn* conn;
3321 if(!node)
3322 return;
3323 conn = (struct doq_conn*)node->key;
3324 if(conn->timer.timer_in_list) {
3325 /* Remove timer from list first, because finding the rbnode
3326 * element of the setlist of same timeouts needs tree lookup.
3327 * Edit the tree structure after that lookup. */
3328 doq_timer_list_remove(conn->table, &conn->timer);
3329 }
3330 if(conn->timer.timer_in_tree)
3331 doq_timer_tree_remove(conn->table, &conn->timer);
3332 doq_table_quic_size_subtract(table, sizeof(*conn)+conn->key.dcidlen);
3333 doq_conn_delete(conn, table);
3334 }
3335
3336 /** delete elements from the connection id tree */
3337 static void
conid_tree_del(rbnode_type * node,void * ATTR_UNUSED (arg))3338 conid_tree_del(rbnode_type* node, void* ATTR_UNUSED(arg))
3339 {
3340 if(!node)
3341 return;
3342 doq_conid_delete((struct doq_conid*)node->key);
3343 }
3344
3345 void
doq_table_delete(struct doq_table * table)3346 doq_table_delete(struct doq_table* table)
3347 {
3348 if(!table)
3349 return;
3350 lock_rw_destroy(&table->lock);
3351 free(table->static_secret);
3352 if(table->conn_tree) {
3353 traverse_postorder(table->conn_tree, conn_tree_del, table);
3354 free(table->conn_tree);
3355 }
3356 lock_rw_destroy(&table->conid_lock);
3357 if(table->conid_tree) {
3358 /* The tree should be empty, because the doq_conn_delete calls
3359 * above should have also removed their conid elements. */
3360 traverse_postorder(table->conid_tree, conid_tree_del, NULL);
3361 free(table->conid_tree);
3362 }
3363 lock_basic_destroy(&table->size_lock);
3364 if(table->timer_tree) {
3365 /* The tree should be empty, because the conn_tree_del calls
3366 * above should also have removed them. Also the doq_timer
3367 * is part of the doq_conn struct, so is already freed. */
3368 free(table->timer_tree);
3369 }
3370 table->write_list_first = NULL;
3371 table->write_list_last = NULL;
3372 free(table);
3373 }
3374
3375 struct doq_timer*
doq_timer_find_time(struct doq_table * table,struct timeval * tv)3376 doq_timer_find_time(struct doq_table* table, struct timeval* tv)
3377 {
3378 struct doq_timer key;
3379 struct rbnode_type* node;
3380 memset(&key, 0, sizeof(key));
3381 key.time.tv_sec = tv->tv_sec;
3382 key.time.tv_usec = tv->tv_usec;
3383 node = rbtree_search(table->timer_tree, &key);
3384 if(node)
3385 return (struct doq_timer*)node->key;
3386 return NULL;
3387 }
3388
3389 void
doq_timer_tree_remove(struct doq_table * table,struct doq_timer * timer)3390 doq_timer_tree_remove(struct doq_table* table, struct doq_timer* timer)
3391 {
3392 if(!timer->timer_in_tree)
3393 return;
3394 rbtree_delete(table->timer_tree, timer);
3395 timer->timer_in_tree = 0;
3396 /* This item could have more timers in the same set. */
3397 if(timer->setlist_first) {
3398 struct doq_timer* rb_timer = timer->setlist_first;
3399 /* del first element from setlist */
3400 if(rb_timer->setlist_next)
3401 rb_timer->setlist_next->setlist_prev = NULL;
3402 else
3403 timer->setlist_last = NULL;
3404 timer->setlist_first = rb_timer->setlist_next;
3405 rb_timer->setlist_prev = NULL;
3406 rb_timer->setlist_next = NULL;
3407 rb_timer->timer_in_list = 0;
3408 /* insert it into the tree as new rb element */
3409 memset(&rb_timer->node, 0, sizeof(rb_timer->node));
3410 rb_timer->node.key = rb_timer;
3411 rbtree_insert(table->timer_tree, &rb_timer->node);
3412 rb_timer->timer_in_tree = 1;
3413 /* the setlist, if any remainder, moves to the rb element */
3414 rb_timer->setlist_first = timer->setlist_first;
3415 rb_timer->setlist_last = timer->setlist_last;
3416 timer->setlist_first = NULL;
3417 timer->setlist_last = NULL;
3418 rb_timer->worker_doq_socket = timer->worker_doq_socket;
3419 }
3420 timer->worker_doq_socket = NULL;
3421 }
3422
3423 void
doq_timer_list_remove(struct doq_table * table,struct doq_timer * timer)3424 doq_timer_list_remove(struct doq_table* table, struct doq_timer* timer)
3425 {
3426 struct doq_timer* rb_timer;
3427 if(!timer->timer_in_list)
3428 return;
3429 /* The item in the rbtree has the list start and end. */
3430 rb_timer = doq_timer_find_time(table, &timer->time);
3431 if(rb_timer) {
3432 if(timer->setlist_prev)
3433 timer->setlist_prev->setlist_next = timer->setlist_next;
3434 else
3435 rb_timer->setlist_first = timer->setlist_next;
3436 if(timer->setlist_next)
3437 timer->setlist_next->setlist_prev = timer->setlist_prev;
3438 else
3439 rb_timer->setlist_last = timer->setlist_prev;
3440 timer->setlist_prev = NULL;
3441 timer->setlist_next = NULL;
3442 }
3443 timer->timer_in_list = 0;
3444 }
3445
3446 /** doq append timer to setlist */
3447 static void
doq_timer_list_append(struct doq_timer * rb_timer,struct doq_timer * timer)3448 doq_timer_list_append(struct doq_timer* rb_timer, struct doq_timer* timer)
3449 {
3450 log_assert(timer->timer_in_list == 0);
3451 timer->timer_in_list = 1;
3452 timer->setlist_next = NULL;
3453 timer->setlist_prev = rb_timer->setlist_last;
3454 if(rb_timer->setlist_last)
3455 rb_timer->setlist_last->setlist_next = timer;
3456 else
3457 rb_timer->setlist_first = timer;
3458 rb_timer->setlist_last = timer;
3459 }
3460
3461 void
doq_timer_unset(struct doq_table * table,struct doq_timer * timer)3462 doq_timer_unset(struct doq_table* table, struct doq_timer* timer)
3463 {
3464 if(timer->timer_in_list) {
3465 /* Remove timer from list first, because finding the rbnode
3466 * element of the setlist of same timeouts needs tree lookup.
3467 * Edit the tree structure after that lookup. */
3468 doq_timer_list_remove(table, timer);
3469 }
3470 if(timer->timer_in_tree)
3471 doq_timer_tree_remove(table, timer);
3472 timer->worker_doq_socket = NULL;
3473 }
3474
doq_timer_set(struct doq_table * table,struct doq_timer * timer,struct doq_server_socket * worker_doq_socket,struct timeval * tv)3475 void doq_timer_set(struct doq_table* table, struct doq_timer* timer,
3476 struct doq_server_socket* worker_doq_socket, struct timeval* tv)
3477 {
3478 struct doq_timer* rb_timer;
3479 if(verbosity >= VERB_ALGO && timer->conn) {
3480 char a[256];
3481 struct timeval rel;
3482 addr_to_str((void*)&timer->conn->key.paddr.addr,
3483 timer->conn->key.paddr.addrlen, a, sizeof(a));
3484 timeval_subtract(&rel, tv, worker_doq_socket->now_tv);
3485 verbose(VERB_ALGO, "doq %s timer set %d.%6.6d in %d.%6.6d",
3486 a, (int)tv->tv_sec, (int)tv->tv_usec,
3487 (int)rel.tv_sec, (int)rel.tv_usec);
3488 }
3489 if(timer->timer_in_tree || timer->timer_in_list) {
3490 if(timer->time.tv_sec == tv->tv_sec &&
3491 timer->time.tv_usec == tv->tv_usec)
3492 return; /* already set on that time */
3493 doq_timer_unset(table, timer);
3494 }
3495 timer->time.tv_sec = tv->tv_sec;
3496 timer->time.tv_usec = tv->tv_usec;
3497 rb_timer = doq_timer_find_time(table, tv);
3498 if(rb_timer) {
3499 /* There is a timeout already with this value. Timer is
3500 * added to the setlist. */
3501 doq_timer_list_append(rb_timer, timer);
3502 } else {
3503 /* There is no timeout with this value. Make timer a new
3504 * tree element. */
3505 memset(&timer->node, 0, sizeof(timer->node));
3506 timer->node.key = timer;
3507 rbtree_insert(table->timer_tree, &timer->node);
3508 timer->timer_in_tree = 1;
3509 timer->setlist_first = NULL;
3510 timer->setlist_last = NULL;
3511 timer->worker_doq_socket = worker_doq_socket;
3512 }
3513 }
3514
3515 struct doq_conn*
doq_conn_create(struct comm_point * c,struct doq_pkt_addr * paddr,const uint8_t * dcid,size_t dcidlen,uint32_t version)3516 doq_conn_create(struct comm_point* c, struct doq_pkt_addr* paddr,
3517 const uint8_t* dcid, size_t dcidlen, uint32_t version)
3518 {
3519 struct doq_conn* conn = calloc(1, sizeof(*conn));
3520 if(!conn)
3521 return NULL;
3522 conn->node.key = conn;
3523 conn->doq_socket = c->doq_socket;
3524 conn->table = c->doq_socket->table;
3525 memmove(&conn->key.paddr.addr, &paddr->addr, paddr->addrlen);
3526 conn->key.paddr.addrlen = paddr->addrlen;
3527 memmove(&conn->key.paddr.localaddr, &paddr->localaddr,
3528 paddr->localaddrlen);
3529 conn->key.paddr.localaddrlen = paddr->localaddrlen;
3530 conn->key.paddr.ifindex = paddr->ifindex;
3531 conn->key.dcid = memdup((void*)dcid, dcidlen);
3532 if(!conn->key.dcid) {
3533 free(conn);
3534 return NULL;
3535 }
3536 conn->key.dcidlen = dcidlen;
3537 conn->version = version;
3538 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
3539 ngtcp2_ccerr_default(&conn->ccerr);
3540 #else
3541 ngtcp2_connection_close_error_default(&conn->last_error);
3542 #endif
3543 rbtree_init(&conn->stream_tree, &doq_stream_cmp);
3544 conn->timer.conn = conn;
3545 lock_basic_init(&conn->lock);
3546 lock_protect(&conn->lock, &conn->key, sizeof(conn->key));
3547 lock_protect(&conn->lock, &conn->doq_socket, sizeof(conn->doq_socket));
3548 lock_protect(&conn->lock, &conn->table, sizeof(conn->table));
3549 lock_protect(&conn->lock, &conn->is_deleted, sizeof(conn->is_deleted));
3550 lock_protect(&conn->lock, &conn->version, sizeof(conn->version));
3551 lock_protect(&conn->lock, &conn->conn, sizeof(conn->conn));
3552 lock_protect(&conn->lock, &conn->conid_list, sizeof(conn->conid_list));
3553 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
3554 lock_protect(&conn->lock, &conn->ccerr, sizeof(conn->ccerr));
3555 #else
3556 lock_protect(&conn->lock, &conn->last_error, sizeof(conn->last_error));
3557 #endif
3558 lock_protect(&conn->lock, &conn->tls_alert, sizeof(conn->tls_alert));
3559 lock_protect(&conn->lock, &conn->ssl, sizeof(conn->ssl));
3560 lock_protect(&conn->lock, &conn->close_pkt, sizeof(conn->close_pkt));
3561 lock_protect(&conn->lock, &conn->close_pkt_len, sizeof(conn->close_pkt_len));
3562 lock_protect(&conn->lock, &conn->close_ecn, sizeof(conn->close_ecn));
3563 lock_protect(&conn->lock, &conn->stream_tree, sizeof(conn->stream_tree));
3564 lock_protect(&conn->lock, &conn->stream_write_first, sizeof(conn->stream_write_first));
3565 lock_protect(&conn->lock, &conn->stream_write_last, sizeof(conn->stream_write_last));
3566 lock_protect(&conn->lock, &conn->write_interest, sizeof(conn->write_interest));
3567 lock_protect(&conn->lock, &conn->on_write_list, sizeof(conn->on_write_list));
3568 lock_protect(&conn->lock, &conn->write_prev, sizeof(conn->write_prev));
3569 lock_protect(&conn->lock, &conn->write_next, sizeof(conn->write_next));
3570 return conn;
3571 }
3572
3573 /** delete stream tree node */
3574 static void
stream_tree_del(rbnode_type * node,void * arg)3575 stream_tree_del(rbnode_type* node, void* arg)
3576 {
3577 struct doq_table* table = (struct doq_table*)arg;
3578 struct doq_stream* stream;
3579 if(!node)
3580 return;
3581 stream = (struct doq_stream*)node;
3582 if(stream->in)
3583 doq_table_quic_size_subtract(table, stream->inlen);
3584 if(stream->out)
3585 doq_table_quic_size_subtract(table, stream->outlen);
3586 doq_table_quic_size_subtract(table, sizeof(*stream));
3587 doq_stream_delete(stream);
3588 }
3589
3590 void
doq_conn_delete(struct doq_conn * conn,struct doq_table * table)3591 doq_conn_delete(struct doq_conn* conn, struct doq_table* table)
3592 {
3593 if(!conn)
3594 return;
3595 lock_basic_destroy(&conn->lock);
3596 lock_rw_wrlock(&conn->table->conid_lock);
3597 doq_conn_clear_conids(conn);
3598 lock_rw_unlock(&conn->table->conid_lock);
3599 ngtcp2_conn_del(conn->conn);
3600 if(conn->stream_tree.count != 0) {
3601 traverse_postorder(&conn->stream_tree, stream_tree_del, table);
3602 }
3603 free(conn->key.dcid);
3604 SSL_free(conn->ssl);
3605 free(conn->close_pkt);
3606 free(conn);
3607 }
3608
3609 int
doq_conn_cmp(const void * key1,const void * key2)3610 doq_conn_cmp(const void* key1, const void* key2)
3611 {
3612 struct doq_conn* c = (struct doq_conn*)key1;
3613 struct doq_conn* d = (struct doq_conn*)key2;
3614 int r;
3615 /* Compared in the order destination address, then
3616 * local address, ifindex and then dcid.
3617 * So that for a search for findlessorequal for the destination
3618 * address will find connections to that address, with different
3619 * dcids.
3620 * Also a printout in sorted order prints the connections by IP
3621 * address of destination, and then a number of them depending on the
3622 * dcids. */
3623 if(c->key.paddr.addrlen != d->key.paddr.addrlen) {
3624 if(c->key.paddr.addrlen < d->key.paddr.addrlen)
3625 return -1;
3626 return 1;
3627 }
3628 if((r=memcmp(&c->key.paddr.addr, &d->key.paddr.addr,
3629 c->key.paddr.addrlen))!=0)
3630 return r;
3631 if(c->key.paddr.localaddrlen != d->key.paddr.localaddrlen) {
3632 if(c->key.paddr.localaddrlen < d->key.paddr.localaddrlen)
3633 return -1;
3634 return 1;
3635 }
3636 if((r=memcmp(&c->key.paddr.localaddr, &d->key.paddr.localaddr,
3637 c->key.paddr.localaddrlen))!=0)
3638 return r;
3639 if(c->key.paddr.ifindex != d->key.paddr.ifindex) {
3640 if(c->key.paddr.ifindex < d->key.paddr.ifindex)
3641 return -1;
3642 return 1;
3643 }
3644 if(c->key.dcidlen != d->key.dcidlen) {
3645 if(c->key.dcidlen < d->key.dcidlen)
3646 return -1;
3647 return 1;
3648 }
3649 if((r=memcmp(c->key.dcid, d->key.dcid, c->key.dcidlen))!=0)
3650 return r;
3651 return 0;
3652 }
3653
doq_conid_cmp(const void * key1,const void * key2)3654 int doq_conid_cmp(const void* key1, const void* key2)
3655 {
3656 struct doq_conid* c = (struct doq_conid*)key1;
3657 struct doq_conid* d = (struct doq_conid*)key2;
3658 if(c->cidlen != d->cidlen) {
3659 if(c->cidlen < d->cidlen)
3660 return -1;
3661 return 1;
3662 }
3663 return memcmp(c->cid, d->cid, c->cidlen);
3664 }
3665
doq_timer_cmp(const void * key1,const void * key2)3666 int doq_timer_cmp(const void* key1, const void* key2)
3667 {
3668 struct doq_timer* e = (struct doq_timer*)key1;
3669 struct doq_timer* f = (struct doq_timer*)key2;
3670 if(e->time.tv_sec < f->time.tv_sec)
3671 return -1;
3672 if(e->time.tv_sec > f->time.tv_sec)
3673 return 1;
3674 if(e->time.tv_usec < f->time.tv_usec)
3675 return -1;
3676 if(e->time.tv_usec > f->time.tv_usec)
3677 return 1;
3678 return 0;
3679 }
3680
doq_stream_cmp(const void * key1,const void * key2)3681 int doq_stream_cmp(const void* key1, const void* key2)
3682 {
3683 struct doq_stream* c = (struct doq_stream*)key1;
3684 struct doq_stream* d = (struct doq_stream*)key2;
3685 if(c->stream_id != d->stream_id) {
3686 if(c->stream_id < d->stream_id)
3687 return -1;
3688 return 1;
3689 }
3690 return 0;
3691 }
3692
3693 /** doq store a local address in repinfo */
3694 static void
doq_repinfo_store_localaddr(struct comm_reply * repinfo,struct doq_addr_storage * localaddr,socklen_t localaddrlen)3695 doq_repinfo_store_localaddr(struct comm_reply* repinfo,
3696 struct doq_addr_storage* localaddr, socklen_t localaddrlen)
3697 {
3698 /* use the pktinfo that we have for ancillary udp data otherwise,
3699 * this saves space for a sockaddr */
3700 memset(&repinfo->pktinfo, 0, sizeof(repinfo->pktinfo));
3701 if(addr_is_ip6((void*)localaddr, localaddrlen)) {
3702 #ifdef IPV6_PKTINFO
3703 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr;
3704 memmove(&repinfo->pktinfo.v6info.ipi6_addr,
3705 &sa6->sin6_addr, sizeof(struct in6_addr));
3706 repinfo->doq_srcport = sa6->sin6_port;
3707 #endif
3708 repinfo->srctype = 6;
3709 } else {
3710 #ifdef IP_PKTINFO
3711 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
3712 memmove(&repinfo->pktinfo.v4info.ipi_addr,
3713 &sa->sin_addr, sizeof(struct in_addr));
3714 repinfo->doq_srcport = sa->sin_port;
3715 #elif defined(IP_RECVDSTADDR)
3716 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
3717 memmove(&repinfo->pktinfo.v4addr, &sa->sin_addr,
3718 sizeof(struct in_addr));
3719 repinfo->doq_srcport = sa->sin_port;
3720 #endif
3721 repinfo->srctype = 4;
3722 }
3723 }
3724
3725 /** doq retrieve localaddr from repinfo */
3726 static void
doq_repinfo_retrieve_localaddr(struct comm_reply * repinfo,struct doq_addr_storage * localaddr,socklen_t * localaddrlen)3727 doq_repinfo_retrieve_localaddr(struct comm_reply* repinfo,
3728 struct doq_addr_storage* localaddr, socklen_t* localaddrlen)
3729 {
3730 if(repinfo->srctype == 6) {
3731 #ifdef IPV6_PKTINFO
3732 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr;
3733 *localaddrlen = (socklen_t)sizeof(struct sockaddr_in6);
3734 memset(sa6, 0, *localaddrlen);
3735 sa6->sin6_family = AF_INET6;
3736 memmove(&sa6->sin6_addr, &repinfo->pktinfo.v6info.ipi6_addr,
3737 *localaddrlen);
3738 sa6->sin6_port = repinfo->doq_srcport;
3739 #endif
3740 } else {
3741 #ifdef IP_PKTINFO
3742 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
3743 *localaddrlen = (socklen_t)sizeof(struct sockaddr_in);
3744 memset(sa, 0, *localaddrlen);
3745 sa->sin_family = AF_INET;
3746 memmove(&sa->sin_addr, &repinfo->pktinfo.v4info.ipi_addr,
3747 *localaddrlen);
3748 sa->sin_port = repinfo->doq_srcport;
3749 #elif defined(IP_RECVDSTADDR)
3750 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
3751 *localaddrlen = (socklen_t)sizeof(struct sockaddr_in);
3752 memset(sa, 0, *localaddrlen);
3753 sa->sin_family = AF_INET;
3754 memmove(&sa->sin_addr, &repinfo->pktinfo.v4addr,
3755 sizeof(struct in_addr));
3756 sa->sin_port = repinfo->doq_srcport;
3757 #endif
3758 }
3759 }
3760
3761 /** doq write a connection key into repinfo, false if it does not fit */
3762 static int
doq_conn_key_store_repinfo(struct doq_conn_key * key,struct comm_reply * repinfo)3763 doq_conn_key_store_repinfo(struct doq_conn_key* key,
3764 struct comm_reply* repinfo)
3765 {
3766 repinfo->is_proxied = 0;
3767 repinfo->doq_ifindex = key->paddr.ifindex;
3768 repinfo->remote_addrlen = key->paddr.addrlen;
3769 memmove(&repinfo->remote_addr, &key->paddr.addr,
3770 repinfo->remote_addrlen);
3771 repinfo->client_addrlen = key->paddr.addrlen;
3772 memmove(&repinfo->client_addr, &key->paddr.addr,
3773 repinfo->client_addrlen);
3774 doq_repinfo_store_localaddr(repinfo, &key->paddr.localaddr,
3775 key->paddr.localaddrlen);
3776 if(key->dcidlen > sizeof(repinfo->doq_dcid))
3777 return 0;
3778 repinfo->doq_dcidlen = key->dcidlen;
3779 memmove(repinfo->doq_dcid, key->dcid, key->dcidlen);
3780 return 1;
3781 }
3782
3783 void
doq_conn_key_from_repinfo(struct doq_conn_key * key,struct comm_reply * repinfo)3784 doq_conn_key_from_repinfo(struct doq_conn_key* key, struct comm_reply* repinfo)
3785 {
3786 key->paddr.ifindex = repinfo->doq_ifindex;
3787 key->paddr.addrlen = repinfo->remote_addrlen;
3788 memmove(&key->paddr.addr, &repinfo->remote_addr,
3789 repinfo->remote_addrlen);
3790 doq_repinfo_retrieve_localaddr(repinfo, &key->paddr.localaddr,
3791 &key->paddr.localaddrlen);
3792 key->dcidlen = repinfo->doq_dcidlen;
3793 key->dcid = repinfo->doq_dcid;
3794 }
3795
3796 /** doq add a stream to the connection */
3797 static void
doq_conn_add_stream(struct doq_conn * conn,struct doq_stream * stream)3798 doq_conn_add_stream(struct doq_conn* conn, struct doq_stream* stream)
3799 {
3800 (void)rbtree_insert(&conn->stream_tree, &stream->node);
3801 }
3802
3803 /** doq delete a stream from the connection */
3804 static void
doq_conn_del_stream(struct doq_conn * conn,struct doq_stream * stream)3805 doq_conn_del_stream(struct doq_conn* conn, struct doq_stream* stream)
3806 {
3807 (void)rbtree_delete(&conn->stream_tree, &stream->node);
3808 }
3809
3810 /** doq create new stream */
3811 static struct doq_stream*
doq_stream_create(int64_t stream_id)3812 doq_stream_create(int64_t stream_id)
3813 {
3814 struct doq_stream* stream = calloc(1, sizeof(*stream));
3815 if(!stream)
3816 return NULL;
3817 stream->node.key = stream;
3818 stream->stream_id = stream_id;
3819 return stream;
3820 }
3821
doq_stream_delete(struct doq_stream * stream)3822 void doq_stream_delete(struct doq_stream* stream)
3823 {
3824 if(!stream)
3825 return;
3826 free(stream->in);
3827 free(stream->out);
3828 free(stream);
3829 }
3830
3831 struct doq_stream*
doq_stream_find(struct doq_conn * conn,int64_t stream_id)3832 doq_stream_find(struct doq_conn* conn, int64_t stream_id)
3833 {
3834 rbnode_type* node;
3835 struct doq_stream key;
3836 key.node.key = &key;
3837 key.stream_id = stream_id;
3838 node = rbtree_search(&conn->stream_tree, &key);
3839 if(node)
3840 return (struct doq_stream*)node->key;
3841 return NULL;
3842 }
3843
3844 /** doq put stream on the conn write list */
3845 static void
doq_stream_on_write_list(struct doq_conn * conn,struct doq_stream * stream)3846 doq_stream_on_write_list(struct doq_conn* conn, struct doq_stream* stream)
3847 {
3848 if(stream->on_write_list)
3849 return;
3850 stream->write_prev = conn->stream_write_last;
3851 if(conn->stream_write_last)
3852 conn->stream_write_last->write_next = stream;
3853 else
3854 conn->stream_write_first = stream;
3855 conn->stream_write_last = stream;
3856 stream->write_next = NULL;
3857 stream->on_write_list = 1;
3858 }
3859
3860 /** doq remove stream from the conn write list */
3861 static void
doq_stream_off_write_list(struct doq_conn * conn,struct doq_stream * stream)3862 doq_stream_off_write_list(struct doq_conn* conn, struct doq_stream* stream)
3863 {
3864 if(!stream->on_write_list)
3865 return;
3866 if(stream->write_next)
3867 stream->write_next->write_prev = stream->write_prev;
3868 else conn->stream_write_last = stream->write_prev;
3869 if(stream->write_prev)
3870 stream->write_prev->write_next = stream->write_next;
3871 else conn->stream_write_first = stream->write_next;
3872 stream->write_prev = NULL;
3873 stream->write_next = NULL;
3874 stream->on_write_list = 0;
3875 }
3876
3877 /** doq stream remove in buffer */
3878 static void
doq_stream_remove_in_buffer(struct doq_stream * stream,struct doq_table * table)3879 doq_stream_remove_in_buffer(struct doq_stream* stream, struct doq_table* table)
3880 {
3881 if(stream->in) {
3882 doq_table_quic_size_subtract(table, stream->inlen);
3883 free(stream->in);
3884 stream->in = NULL;
3885 stream->inlen = 0;
3886 }
3887 }
3888
3889 /** doq stream remove out buffer */
3890 static void
doq_stream_remove_out_buffer(struct doq_stream * stream,struct doq_table * table)3891 doq_stream_remove_out_buffer(struct doq_stream* stream,
3892 struct doq_table* table)
3893 {
3894 if(stream->out) {
3895 doq_table_quic_size_subtract(table, stream->outlen);
3896 free(stream->out);
3897 stream->out = NULL;
3898 stream->outlen = 0;
3899 }
3900 }
3901
3902 int
doq_stream_close(struct doq_conn * conn,struct doq_stream * stream,int send_shutdown)3903 doq_stream_close(struct doq_conn* conn, struct doq_stream* stream,
3904 int send_shutdown)
3905 {
3906 int ret;
3907 if(stream->is_closed)
3908 return 1;
3909 stream->is_closed = 1;
3910 doq_stream_off_write_list(conn, stream);
3911 if(send_shutdown) {
3912 verbose(VERB_ALGO, "doq: shutdown stream_id %d with app_error_code %d",
3913 (int)stream->stream_id, (int)DOQ_APP_ERROR_CODE);
3914 ret = ngtcp2_conn_shutdown_stream(conn->conn,
3915 #ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4
3916 0,
3917 #endif
3918 stream->stream_id, DOQ_APP_ERROR_CODE);
3919 if(ret != 0) {
3920 log_err("doq ngtcp2_conn_shutdown_stream %d failed: %s",
3921 (int)stream->stream_id, ngtcp2_strerror(ret));
3922 return 0;
3923 }
3924 doq_conn_write_enable(conn);
3925 }
3926 verbose(VERB_ALGO, "doq: conn extend max streams bidi by 1");
3927 ngtcp2_conn_extend_max_streams_bidi(conn->conn, 1);
3928 doq_conn_write_enable(conn);
3929 doq_stream_remove_in_buffer(stream, conn->doq_socket->table);
3930 doq_stream_remove_out_buffer(stream, conn->doq_socket->table);
3931 doq_table_quic_size_subtract(conn->doq_socket->table, sizeof(*stream));
3932 doq_conn_del_stream(conn, stream);
3933 doq_stream_delete(stream);
3934 return 1;
3935 }
3936
3937 /** doq stream pick up answer data from buffer */
3938 static int
doq_stream_pickup_answer(struct doq_stream * stream,struct sldns_buffer * buf)3939 doq_stream_pickup_answer(struct doq_stream* stream, struct sldns_buffer* buf)
3940 {
3941 stream->is_answer_available = 1;
3942 if(stream->out) {
3943 free(stream->out);
3944 stream->out = NULL;
3945 stream->outlen = 0;
3946 }
3947 stream->nwrite = 0;
3948 stream->outlen = sldns_buffer_limit(buf);
3949 /* For quic the output bytes have to stay allocated and available,
3950 * for potential resends, until the remote end has acknowledged them.
3951 * This includes the tcplen start uint16_t, in outlen_wire. */
3952 stream->outlen_wire = htons(stream->outlen);
3953 stream->out = memdup(sldns_buffer_begin(buf), sldns_buffer_limit(buf));
3954 if(!stream->out) {
3955 log_err("doq could not send answer: out of memory");
3956 return 0;
3957 }
3958 return 1;
3959 }
3960
3961 int
doq_stream_send_reply(struct doq_conn * conn,struct doq_stream * stream,struct sldns_buffer * buf)3962 doq_stream_send_reply(struct doq_conn* conn, struct doq_stream* stream,
3963 struct sldns_buffer* buf)
3964 {
3965 if(verbosity >= VERB_ALGO) {
3966 char* s = sldns_wire2str_pkt(sldns_buffer_begin(buf),
3967 sldns_buffer_limit(buf));
3968 verbose(VERB_ALGO, "doq stream %d response\n%s",
3969 (int)stream->stream_id, (s?s:"null"));
3970 free(s);
3971 }
3972 if(stream->out)
3973 doq_table_quic_size_subtract(conn->doq_socket->table,
3974 stream->outlen);
3975 if(!doq_stream_pickup_answer(stream, buf))
3976 return 0;
3977 doq_table_quic_size_add(conn->doq_socket->table, stream->outlen);
3978 doq_stream_on_write_list(conn, stream);
3979 doq_conn_write_enable(conn);
3980 return 1;
3981 }
3982
3983 /** doq stream data length has completed, allocations can be done. False on
3984 * allocation failure. */
3985 static int
doq_stream_datalen_complete(struct doq_stream * stream,struct doq_table * table)3986 doq_stream_datalen_complete(struct doq_stream* stream, struct doq_table* table)
3987 {
3988 if(stream->inlen > 1024*1024) {
3989 log_err("doq stream in length too large %d",
3990 (int)stream->inlen);
3991 return 0;
3992 }
3993 stream->in = calloc(1, stream->inlen);
3994 if(!stream->in) {
3995 log_err("doq could not read stream, calloc failed: "
3996 "out of memory");
3997 return 0;
3998 }
3999 doq_table_quic_size_add(table, stream->inlen);
4000 return 1;
4001 }
4002
4003 /** doq stream data is complete, the input data has been received. */
4004 static int
doq_stream_data_complete(struct doq_conn * conn,struct doq_stream * stream)4005 doq_stream_data_complete(struct doq_conn* conn, struct doq_stream* stream)
4006 {
4007 struct comm_point* c;
4008 if(verbosity >= VERB_ALGO) {
4009 char* s = sldns_wire2str_pkt(stream->in, stream->inlen);
4010 char a[128];
4011 addr_to_str((void*)&conn->key.paddr.addr,
4012 conn->key.paddr.addrlen, a, sizeof(a));
4013 verbose(VERB_ALGO, "doq %s stream %d incoming query\n%s",
4014 a, (int)stream->stream_id, (s?s:"null"));
4015 free(s);
4016 }
4017 stream->is_query_complete = 1;
4018 c = conn->doq_socket->cp;
4019 if(!stream->in) {
4020 verbose(VERB_ALGO, "doq_stream_data_complete: no in buffer");
4021 return 0;
4022 }
4023 if(stream->inlen > sldns_buffer_capacity(c->buffer)) {
4024 verbose(VERB_ALGO, "doq_stream_data_complete: query too long");
4025 return 0;
4026 }
4027 sldns_buffer_clear(c->buffer);
4028 sldns_buffer_write(c->buffer, stream->in, stream->inlen);
4029 sldns_buffer_flip(c->buffer);
4030 c->repinfo.c = c;
4031 if(!doq_conn_key_store_repinfo(&conn->key, &c->repinfo)) {
4032 verbose(VERB_ALGO, "doq_stream_data_complete: connection "
4033 "DCID too long");
4034 return 0;
4035 }
4036 c->repinfo.doq_streamid = stream->stream_id;
4037 conn->doq_socket->current_conn = conn;
4038 fptr_ok(fptr_whitelist_comm_point(c->callback));
4039 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo)) {
4040 conn->doq_socket->current_conn = NULL;
4041 if(!doq_stream_send_reply(conn, stream, c->buffer)) {
4042 verbose(VERB_ALGO, "doq: failed to send_reply");
4043 return 0;
4044 }
4045 return 1;
4046 }
4047 conn->doq_socket->current_conn = NULL;
4048 return 1;
4049 }
4050
4051 /** doq receive data for a stream, more bytes of the incoming data */
4052 static int
doq_stream_recv_data(struct doq_stream * stream,const uint8_t * data,size_t datalen,int * recv_done,struct doq_table * table)4053 doq_stream_recv_data(struct doq_stream* stream, const uint8_t* data,
4054 size_t datalen, int* recv_done, struct doq_table* table)
4055 {
4056 int got_data = 0;
4057 /* read the tcplength uint16_t at the start */
4058 if(stream->nread < 2) {
4059 uint16_t tcplen = 0;
4060 size_t todolen = 2 - stream->nread;
4061
4062 if(stream->nread > 0) {
4063 /* put in the already read byte if there is one */
4064 tcplen = stream->inlen;
4065 }
4066 if(datalen < todolen)
4067 todolen = datalen;
4068 memmove(((uint8_t*)&tcplen)+stream->nread, data, todolen);
4069 stream->nread += todolen;
4070 data += todolen;
4071 datalen -= todolen;
4072 if(stream->nread == 2) {
4073 /* the initial length value is completed */
4074 stream->inlen = ntohs(tcplen);
4075 if(!doq_stream_datalen_complete(stream, table))
4076 return 0;
4077 } else {
4078 /* store for later */
4079 stream->inlen = tcplen;
4080 return 1;
4081 }
4082 }
4083 /* if there are more data bytes */
4084 if(datalen > 0) {
4085 size_t to_write = datalen;
4086 if(stream->nread-2 > stream->inlen) {
4087 verbose(VERB_ALGO, "doq stream buffer too small");
4088 return 0;
4089 }
4090 if(datalen > stream->inlen - (stream->nread-2))
4091 to_write = stream->inlen - (stream->nread-2);
4092 if(to_write > 0) {
4093 if(!stream->in) {
4094 verbose(VERB_ALGO, "doq: stream has "
4095 "no buffer");
4096 return 0;
4097 }
4098 memmove(stream->in+(stream->nread-2), data, to_write);
4099 stream->nread += to_write;
4100 data += to_write;
4101 datalen -= to_write;
4102 got_data = 1;
4103 }
4104 }
4105 /* Are there extra bytes received after the end? If so, log them. */
4106 if(datalen > 0) {
4107 if(verbosity >= VERB_ALGO)
4108 log_hex("doq stream has extra bytes received after end",
4109 (void*)data, datalen);
4110 }
4111 /* Is the input data complete? */
4112 if(got_data && stream->nread >= stream->inlen+2) {
4113 if(!stream->in) {
4114 verbose(VERB_ALGO, "doq: completed stream has "
4115 "no buffer");
4116 return 0;
4117 }
4118 *recv_done = 1;
4119 }
4120 return 1;
4121 }
4122
4123 /** doq receive FIN for a stream. No more bytes are going to arrive. */
4124 static int
doq_stream_recv_fin(struct doq_conn * conn,struct doq_stream * stream,int recv_done)4125 doq_stream_recv_fin(struct doq_conn* conn, struct doq_stream* stream, int
4126 recv_done)
4127 {
4128 if(!stream->is_query_complete && !recv_done) {
4129 verbose(VERB_ALGO, "doq: stream recv FIN, but is "
4130 "not complete, have %d of %d bytes",
4131 ((int)stream->nread)-2, (int)stream->inlen);
4132 if(!doq_stream_close(conn, stream, 1))
4133 return 0;
4134 }
4135 return 1;
4136 }
4137
doq_fill_rand(struct ub_randstate * rnd,uint8_t * buf,size_t len)4138 void doq_fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len)
4139 {
4140 size_t i;
4141 for(i=0; i<len; i++)
4142 buf[i] = ub_random(rnd)&0xff;
4143 }
4144
4145 /** generate new connection id, checks for duplicates.
4146 * caller must hold lock on conid tree. */
4147 static int
doq_conn_generate_new_conid(struct doq_conn * conn,uint8_t * data,size_t datalen)4148 doq_conn_generate_new_conid(struct doq_conn* conn, uint8_t* data,
4149 size_t datalen)
4150 {
4151 int max_try = 100;
4152 int i;
4153 for(i=0; i<max_try; i++) {
4154 doq_fill_rand(conn->doq_socket->rnd, data, datalen);
4155 if(!doq_conid_find(conn->table, data, datalen)) {
4156 /* Found an unused connection id. */
4157 return 1;
4158 }
4159 }
4160 verbose(VERB_ALGO, "doq_conn_generate_new_conid failed: could not "
4161 "generate random unused connection id value in %d attempts.",
4162 max_try);
4163 return 0;
4164 }
4165
4166 /** ngtcp2 rand callback function */
4167 static void
doq_rand_cb(uint8_t * dest,size_t destlen,const ngtcp2_rand_ctx * rand_ctx)4168 doq_rand_cb(uint8_t* dest, size_t destlen, const ngtcp2_rand_ctx* rand_ctx)
4169 {
4170 struct ub_randstate* rnd = (struct ub_randstate*)
4171 rand_ctx->native_handle;
4172 doq_fill_rand(rnd, dest, destlen);
4173 }
4174
4175 /** ngtcp2 get_new_connection_id callback function */
4176 static int
doq_get_new_connection_id_cb(ngtcp2_conn * ATTR_UNUSED (conn),ngtcp2_cid * cid,uint8_t * token,size_t cidlen,void * user_data)4177 doq_get_new_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn), ngtcp2_cid* cid,
4178 uint8_t* token, size_t cidlen, void* user_data)
4179 {
4180 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4181 /* Lock the conid tree, so we can check for duplicates while
4182 * generating the id, and then insert it, whilst keeping the tree
4183 * locked against other modifications, guaranteeing uniqueness. */
4184 lock_rw_wrlock(&doq_conn->table->conid_lock);
4185 if(!doq_conn_generate_new_conid(doq_conn, cid->data, cidlen)) {
4186 lock_rw_unlock(&doq_conn->table->conid_lock);
4187 return NGTCP2_ERR_CALLBACK_FAILURE;
4188 }
4189 cid->datalen = cidlen;
4190 if(ngtcp2_crypto_generate_stateless_reset_token(token,
4191 doq_conn->doq_socket->static_secret,
4192 doq_conn->doq_socket->static_secret_len, cid) != 0) {
4193 lock_rw_unlock(&doq_conn->table->conid_lock);
4194 return NGTCP2_ERR_CALLBACK_FAILURE;
4195 }
4196 if(!doq_conn_associate_conid(doq_conn, cid->data, cid->datalen)) {
4197 lock_rw_unlock(&doq_conn->table->conid_lock);
4198 return NGTCP2_ERR_CALLBACK_FAILURE;
4199 }
4200 lock_rw_unlock(&doq_conn->table->conid_lock);
4201 return 0;
4202 }
4203
4204 /** ngtcp2 remove_connection_id callback function */
4205 static int
doq_remove_connection_id_cb(ngtcp2_conn * ATTR_UNUSED (conn),const ngtcp2_cid * cid,void * user_data)4206 doq_remove_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn),
4207 const ngtcp2_cid* cid, void* user_data)
4208 {
4209 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4210 lock_rw_wrlock(&doq_conn->table->conid_lock);
4211 doq_conn_dissociate_conid(doq_conn, cid->data, cid->datalen);
4212 lock_rw_unlock(&doq_conn->table->conid_lock);
4213 return 0;
4214 }
4215
4216 /** doq submit a new token */
4217 static int
doq_submit_new_token(struct doq_conn * conn)4218 doq_submit_new_token(struct doq_conn* conn)
4219 {
4220 uint8_t token[NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN];
4221 ngtcp2_ssize tokenlen;
4222 int ret;
4223 const ngtcp2_path* path = ngtcp2_conn_get_path(conn->conn);
4224 ngtcp2_tstamp ts = doq_get_timestamp_nanosec();
4225
4226 tokenlen = ngtcp2_crypto_generate_regular_token(token,
4227 conn->doq_socket->static_secret,
4228 conn->doq_socket->static_secret_len, path->remote.addr,
4229 path->remote.addrlen, ts);
4230 if(tokenlen < 0) {
4231 log_err("doq ngtcp2_crypto_generate_regular_token failed");
4232 return 1;
4233 }
4234
4235 verbose(VERB_ALGO, "doq submit new token");
4236 ret = ngtcp2_conn_submit_new_token(conn->conn, token, tokenlen);
4237 if(ret != 0) {
4238 log_err("doq ngtcp2_conn_submit_new_token failed: %s",
4239 ngtcp2_strerror(ret));
4240 return 0;
4241 }
4242 return 1;
4243 }
4244
4245 /** ngtcp2 handshake_completed callback function */
4246 static int
doq_handshake_completed_cb(ngtcp2_conn * ATTR_UNUSED (conn),void * user_data)4247 doq_handshake_completed_cb(ngtcp2_conn* ATTR_UNUSED(conn), void* user_data)
4248 {
4249 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4250 verbose(VERB_ALGO, "doq handshake_completed callback");
4251 verbose(VERB_ALGO, "ngtcp2_conn_get_max_data_left is %d",
4252 (int)ngtcp2_conn_get_max_data_left(doq_conn->conn));
4253 #ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI
4254 verbose(VERB_ALGO, "ngtcp2_conn_get_max_local_streams_uni is %d",
4255 (int)ngtcp2_conn_get_max_local_streams_uni(doq_conn->conn));
4256 #endif
4257 verbose(VERB_ALGO, "ngtcp2_conn_get_streams_uni_left is %d",
4258 (int)ngtcp2_conn_get_streams_uni_left(doq_conn->conn));
4259 verbose(VERB_ALGO, "ngtcp2_conn_get_streams_bidi_left is %d",
4260 (int)ngtcp2_conn_get_streams_bidi_left(doq_conn->conn));
4261 verbose(VERB_ALGO, "negotiated cipher name is %s",
4262 SSL_get_cipher_name(doq_conn->ssl));
4263 if(verbosity > VERB_ALGO) {
4264 const unsigned char* alpn = NULL;
4265 unsigned int alpnlen = 0;
4266 char alpnstr[128];
4267 SSL_get0_alpn_selected(doq_conn->ssl, &alpn, &alpnlen);
4268 if(alpnlen > sizeof(alpnstr)-1)
4269 alpnlen = sizeof(alpnstr)-1;
4270 memmove(alpnstr, alpn, alpnlen);
4271 alpnstr[alpnlen]=0;
4272 verbose(VERB_ALGO, "negotiated ALPN is '%s'", alpnstr);
4273 }
4274
4275 if(!doq_submit_new_token(doq_conn))
4276 return -1;
4277 return 0;
4278 }
4279
4280 /** ngtcp2 stream_open callback function */
4281 static int
doq_stream_open_cb(ngtcp2_conn * ATTR_UNUSED (conn),int64_t stream_id,void * user_data)4282 doq_stream_open_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id,
4283 void* user_data)
4284 {
4285 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4286 struct doq_stream* stream;
4287 verbose(VERB_ALGO, "doq new stream %x", (int)stream_id);
4288 if(doq_stream_find(doq_conn, stream_id)) {
4289 verbose(VERB_ALGO, "doq: stream with this id already exists");
4290 return 0;
4291 }
4292 if(stream_id != 0 && stream_id != 4 && /* allow one stream on a new connection */
4293 !doq_table_quic_size_available(doq_conn->doq_socket->table,
4294 doq_conn->doq_socket->cfg, sizeof(*stream)
4295 + 100 /* estimated query in */
4296 + 512 /* estimated response out */
4297 )) {
4298 int rv;
4299 verbose(VERB_ALGO, "doq: no mem for new stream");
4300 rv = ngtcp2_conn_shutdown_stream(doq_conn->conn,
4301 #ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4
4302 0,
4303 #endif
4304 stream_id, NGTCP2_CONNECTION_REFUSED);
4305 if(rv != 0) {
4306 log_err("ngtcp2_conn_shutdown_stream failed: %s",
4307 ngtcp2_strerror(rv));
4308 return NGTCP2_ERR_CALLBACK_FAILURE;
4309 }
4310 return 0;
4311 }
4312 stream = doq_stream_create(stream_id);
4313 if(!stream) {
4314 log_err("doq: could not doq_stream_create: out of memory");
4315 return NGTCP2_ERR_CALLBACK_FAILURE;
4316 }
4317 doq_table_quic_size_add(doq_conn->doq_socket->table, sizeof(*stream));
4318 doq_conn_add_stream(doq_conn, stream);
4319 return 0;
4320 }
4321
4322 /** ngtcp2 recv_stream_data callback function */
4323 static int
doq_recv_stream_data_cb(ngtcp2_conn * ATTR_UNUSED (conn),uint32_t flags,int64_t stream_id,uint64_t offset,const uint8_t * data,size_t datalen,void * user_data,void * ATTR_UNUSED (stream_user_data))4324 doq_recv_stream_data_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags,
4325 int64_t stream_id, uint64_t offset, const uint8_t* data,
4326 size_t datalen, void* user_data, void* ATTR_UNUSED(stream_user_data))
4327 {
4328 int recv_done = 0;
4329 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4330 struct doq_stream* stream;
4331 verbose(VERB_ALGO, "doq recv stream data stream id %d offset %d "
4332 "datalen %d%s%s", (int)stream_id, (int)offset, (int)datalen,
4333 ((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0?" FIN":""),
4334 #ifdef NGTCP2_STREAM_DATA_FLAG_0RTT
4335 ((flags&NGTCP2_STREAM_DATA_FLAG_0RTT)!=0?" 0RTT":"")
4336 #else
4337 ((flags&NGTCP2_STREAM_DATA_FLAG_EARLY)!=0?" EARLY":"")
4338 #endif
4339 );
4340 stream = doq_stream_find(doq_conn, stream_id);
4341 if(!stream) {
4342 verbose(VERB_ALGO, "doq: received stream data for "
4343 "unknown stream %d", (int)stream_id);
4344 return 0;
4345 }
4346 if(stream->is_closed) {
4347 verbose(VERB_ALGO, "doq: stream is closed, ignore recv data");
4348 return 0;
4349 }
4350 if(datalen != 0) {
4351 if(!doq_stream_recv_data(stream, data, datalen, &recv_done,
4352 doq_conn->doq_socket->table))
4353 return NGTCP2_ERR_CALLBACK_FAILURE;
4354 }
4355 if((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0) {
4356 if(!doq_stream_recv_fin(doq_conn, stream, recv_done))
4357 return NGTCP2_ERR_CALLBACK_FAILURE;
4358 }
4359 ngtcp2_conn_extend_max_stream_offset(doq_conn->conn, stream_id,
4360 datalen);
4361 ngtcp2_conn_extend_max_offset(doq_conn->conn, datalen);
4362 if(recv_done) {
4363 if(!doq_stream_data_complete(doq_conn, stream))
4364 return NGTCP2_ERR_CALLBACK_FAILURE;
4365 }
4366 return 0;
4367 }
4368
4369 /** ngtcp2 stream_close callback function */
4370 static int
doq_stream_close_cb(ngtcp2_conn * ATTR_UNUSED (conn),uint32_t flags,int64_t stream_id,uint64_t app_error_code,void * user_data,void * ATTR_UNUSED (stream_user_data))4371 doq_stream_close_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags,
4372 int64_t stream_id, uint64_t app_error_code, void* user_data,
4373 void* ATTR_UNUSED(stream_user_data))
4374 {
4375 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4376 struct doq_stream* stream;
4377 if((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0)
4378 verbose(VERB_ALGO, "doq stream close for stream id %d %sapp_error_code %d",
4379 (int)stream_id,
4380 (((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0)?
4381 "APP_ERROR_CODE_SET ":""),
4382 (int)app_error_code);
4383 else
4384 verbose(VERB_ALGO, "doq stream close for stream id %d",
4385 (int)stream_id);
4386
4387 stream = doq_stream_find(doq_conn, stream_id);
4388 if(!stream) {
4389 verbose(VERB_ALGO, "doq: stream close for "
4390 "unknown stream %d", (int)stream_id);
4391 return 0;
4392 }
4393 if(!doq_stream_close(doq_conn, stream, 0))
4394 return NGTCP2_ERR_CALLBACK_FAILURE;
4395 return 0;
4396 }
4397
4398 /** ngtcp2 stream_reset callback function */
4399 static int
doq_stream_reset_cb(ngtcp2_conn * ATTR_UNUSED (conn),int64_t stream_id,uint64_t final_size,uint64_t app_error_code,void * user_data,void * ATTR_UNUSED (stream_user_data))4400 doq_stream_reset_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id,
4401 uint64_t final_size, uint64_t app_error_code, void* user_data,
4402 void* ATTR_UNUSED(stream_user_data))
4403 {
4404 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4405 struct doq_stream* stream;
4406 verbose(VERB_ALGO, "doq stream reset for stream id %d final_size %d "
4407 "app_error_code %d", (int)stream_id, (int)final_size,
4408 (int)app_error_code);
4409
4410 stream = doq_stream_find(doq_conn, stream_id);
4411 if(!stream) {
4412 verbose(VERB_ALGO, "doq: stream reset for "
4413 "unknown stream %d", (int)stream_id);
4414 return 0;
4415 }
4416 if(!doq_stream_close(doq_conn, stream, 0))
4417 return NGTCP2_ERR_CALLBACK_FAILURE;
4418 return 0;
4419 }
4420
4421 /** ngtcp2 acked_stream_data_offset callback function */
4422 static int
doq_acked_stream_data_offset_cb(ngtcp2_conn * ATTR_UNUSED (conn),int64_t stream_id,uint64_t offset,uint64_t datalen,void * user_data,void * ATTR_UNUSED (stream_user_data))4423 doq_acked_stream_data_offset_cb(ngtcp2_conn* ATTR_UNUSED(conn),
4424 int64_t stream_id, uint64_t offset, uint64_t datalen, void* user_data,
4425 void* ATTR_UNUSED(stream_user_data))
4426 {
4427 struct doq_conn* doq_conn = (struct doq_conn*)user_data;
4428 struct doq_stream* stream;
4429 verbose(VERB_ALGO, "doq stream acked data for stream id %d offset %d "
4430 "datalen %d", (int)stream_id, (int)offset, (int)datalen);
4431
4432 stream = doq_stream_find(doq_conn, stream_id);
4433 if(!stream) {
4434 verbose(VERB_ALGO, "doq: stream acked data for "
4435 "unknown stream %d", (int)stream_id);
4436 return 0;
4437 }
4438 /* Acked the data from [offset .. offset+datalen). */
4439 if(stream->is_closed)
4440 return 0;
4441 if(offset+datalen >= stream->outlen) {
4442 doq_stream_remove_in_buffer(stream,
4443 doq_conn->doq_socket->table);
4444 doq_stream_remove_out_buffer(stream,
4445 doq_conn->doq_socket->table);
4446 }
4447 return 0;
4448 }
4449
4450 /** ngtc2p log_printf callback function */
4451 static void
doq_log_printf_cb(void * ATTR_UNUSED (user_data),const char * fmt,...)4452 doq_log_printf_cb(void* ATTR_UNUSED(user_data), const char* fmt, ...)
4453 {
4454 char buf[1024];
4455 va_list ap;
4456 va_start(ap, fmt);
4457 vsnprintf(buf, sizeof(buf), fmt, ap);
4458 verbose(VERB_ALGO, "libngtcp2: %s", buf);
4459 va_end(ap);
4460 }
4461
4462 #ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
4463 /** the doq application tx key callback, false on failure */
4464 static int
doq_application_tx_key_cb(struct doq_conn * conn)4465 doq_application_tx_key_cb(struct doq_conn* conn)
4466 {
4467 verbose(VERB_ALGO, "doq application tx key cb");
4468 /* The server does not want to open streams to the client,
4469 * the client instead initiates by opening bidi streams. */
4470 verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_data_left is %d",
4471 (int)ngtcp2_conn_get_max_data_left(conn->conn));
4472 #ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI
4473 verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_local_streams_uni is %d",
4474 (int)ngtcp2_conn_get_max_local_streams_uni(conn->conn));
4475 #endif
4476 verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_uni_left is %d",
4477 (int)ngtcp2_conn_get_streams_uni_left(conn->conn));
4478 verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_bidi_left is %d",
4479 (int)ngtcp2_conn_get_streams_bidi_left(conn->conn));
4480 return 1;
4481 }
4482
4483 /** quic_method set_encryption_secrets function */
4484 static int
doq_set_encryption_secrets(SSL * ssl,OSSL_ENCRYPTION_LEVEL ossl_level,const uint8_t * read_secret,const uint8_t * write_secret,size_t secret_len)4485 doq_set_encryption_secrets(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level,
4486 const uint8_t *read_secret, const uint8_t *write_secret,
4487 size_t secret_len)
4488 {
4489 struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl);
4490 #ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL
4491 ngtcp2_encryption_level
4492 #else
4493 ngtcp2_crypto_level
4494 #endif
4495 level =
4496 #ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL
4497 ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level);
4498 #else
4499 ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level);
4500 #endif
4501
4502 if(read_secret) {
4503 verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_rx_key for level %d ossl %d", (int)level, (int)ossl_level);
4504 if(ngtcp2_crypto_derive_and_install_rx_key(doq_conn->conn,
4505 NULL, NULL, NULL, level, read_secret, secret_len)
4506 != 0) {
4507 log_err("ngtcp2_crypto_derive_and_install_rx_key "
4508 "failed");
4509 return 0;
4510 }
4511 }
4512
4513 if(write_secret) {
4514 verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_tx_key for level %d ossl %d", (int)level, (int)ossl_level);
4515 if(ngtcp2_crypto_derive_and_install_tx_key(doq_conn->conn,
4516 NULL, NULL, NULL, level, write_secret, secret_len)
4517 != 0) {
4518 log_err("ngtcp2_crypto_derive_and_install_tx_key "
4519 "failed");
4520 return 0;
4521 }
4522 if(level == NGTCP2_CRYPTO_LEVEL_APPLICATION) {
4523 if(!doq_application_tx_key_cb(doq_conn))
4524 return 0;
4525 }
4526 }
4527 return 1;
4528 }
4529
4530 /** quic_method add_handshake_data function */
4531 static int
doq_add_handshake_data(SSL * ssl,OSSL_ENCRYPTION_LEVEL ossl_level,const uint8_t * data,size_t len)4532 doq_add_handshake_data(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level,
4533 const uint8_t *data, size_t len)
4534 {
4535 struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl);
4536 #ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL
4537 ngtcp2_encryption_level
4538 #else
4539 ngtcp2_crypto_level
4540 #endif
4541 level =
4542 #ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL
4543 ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level);
4544 #else
4545 ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level);
4546 #endif
4547 int rv;
4548
4549 verbose(VERB_ALGO, "doq_add_handshake_data: "
4550 "ngtcp2_con_submit_crypto_data level %d", (int)level);
4551 rv = ngtcp2_conn_submit_crypto_data(doq_conn->conn, level, data, len);
4552 if(rv != 0) {
4553 log_err("ngtcp2_conn_submit_crypto_data failed: %s",
4554 ngtcp2_strerror(rv));
4555 ngtcp2_conn_set_tls_error(doq_conn->conn, rv);
4556 return 0;
4557 }
4558 return 1;
4559 }
4560
4561 /** quic_method flush_flight function */
4562 static int
doq_flush_flight(SSL * ATTR_UNUSED (ssl))4563 doq_flush_flight(SSL* ATTR_UNUSED(ssl))
4564 {
4565 return 1;
4566 }
4567
4568 /** quic_method send_alert function */
4569 static int
doq_send_alert(SSL * ssl,enum ssl_encryption_level_t ATTR_UNUSED (level),uint8_t alert)4570 doq_send_alert(SSL *ssl, enum ssl_encryption_level_t ATTR_UNUSED(level),
4571 uint8_t alert)
4572 {
4573 struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl);
4574 doq_conn->tls_alert = alert;
4575 return 1;
4576 }
4577 #endif /* HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT */
4578
4579 /** ALPN select callback for the doq SSL context */
4580 static int
doq_alpn_select_cb(SSL * ATTR_UNUSED (ssl),const unsigned char ** out,unsigned char * outlen,const unsigned char * in,unsigned int inlen,void * ATTR_UNUSED (arg))4581 doq_alpn_select_cb(SSL* ATTR_UNUSED(ssl), const unsigned char** out,
4582 unsigned char* outlen, const unsigned char* in, unsigned int inlen,
4583 void* ATTR_UNUSED(arg))
4584 {
4585 /* select "doq" */
4586 int ret = SSL_select_next_proto((void*)out, outlen,
4587 (const unsigned char*)"\x03""doq", 4, in, inlen);
4588 if(ret == OPENSSL_NPN_NEGOTIATED)
4589 return SSL_TLSEXT_ERR_OK;
4590 verbose(VERB_ALGO, "doq alpn_select_cb: ALPN from client does "
4591 "not have 'doq'");
4592 return SSL_TLSEXT_ERR_ALERT_FATAL;
4593 }
4594
quic_sslctx_create(char * key,char * pem,char * verifypem)4595 void* quic_sslctx_create(char* key, char* pem, char* verifypem)
4596 {
4597 #ifdef HAVE_NGTCP2
4598 char* sid_ctx = "unbound server";
4599 #ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
4600 SSL_QUIC_METHOD* quic_method;
4601 #endif
4602 SSL_CTX* ctx = SSL_CTX_new(TLS_server_method());
4603 if(!ctx) {
4604 log_crypto_err("Could not SSL_CTX_new");
4605 return NULL;
4606 }
4607 if(!key || key[0] == 0) {
4608 log_err("doq: error, no tls-service-key file specified");
4609 SSL_CTX_free(ctx);
4610 return NULL;
4611 }
4612 if(!pem || pem[0] == 0) {
4613 log_err("doq: error, no tls-service-pem file specified");
4614 SSL_CTX_free(ctx);
4615 return NULL;
4616 }
4617 SSL_CTX_set_options(ctx,
4618 (SSL_OP_ALL & ~SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS) |
4619 SSL_OP_SINGLE_ECDH_USE |
4620 SSL_OP_CIPHER_SERVER_PREFERENCE |
4621 SSL_OP_NO_ANTI_REPLAY);
4622 SSL_CTX_set_mode(ctx, SSL_MODE_RELEASE_BUFFERS);
4623 SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION);
4624 SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION);
4625 #ifdef HAVE_SSL_CTX_SET_ALPN_SELECT_CB
4626 SSL_CTX_set_alpn_select_cb(ctx, doq_alpn_select_cb, NULL);
4627 #endif
4628 SSL_CTX_set_default_verify_paths(ctx);
4629 if(!SSL_CTX_use_certificate_chain_file(ctx, pem)) {
4630 log_err("doq: error for cert file: %s", pem);
4631 log_crypto_err("doq: error in "
4632 "SSL_CTX_use_certificate_chain_file");
4633 SSL_CTX_free(ctx);
4634 return NULL;
4635 }
4636 if(!SSL_CTX_use_PrivateKey_file(ctx, key, SSL_FILETYPE_PEM)) {
4637 log_err("doq: error for private key file: %s", key);
4638 log_crypto_err("doq: error in SSL_CTX_use_PrivateKey_file");
4639 SSL_CTX_free(ctx);
4640 return NULL;
4641 }
4642 if(!SSL_CTX_check_private_key(ctx)) {
4643 log_err("doq: error for key file: %s", key);
4644 log_crypto_err("doq: error in SSL_CTX_check_private_key");
4645 SSL_CTX_free(ctx);
4646 return NULL;
4647 }
4648 SSL_CTX_set_session_id_context(ctx, (void*)sid_ctx, strlen(sid_ctx));
4649 if(verifypem && verifypem[0]) {
4650 if(!SSL_CTX_load_verify_locations(ctx, verifypem, NULL)) {
4651 log_err("doq: error for verify pem file: %s",
4652 verifypem);
4653 log_crypto_err("doq: error in "
4654 "SSL_CTX_load_verify_locations");
4655 SSL_CTX_free(ctx);
4656 return NULL;
4657 }
4658 SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file(
4659 verifypem));
4660 SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER|
4661 SSL_VERIFY_CLIENT_ONCE|
4662 SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL);
4663 }
4664
4665 SSL_CTX_set_max_early_data(ctx, 0xffffffff);
4666 #ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
4667 if(ngtcp2_crypto_quictls_configure_server_context(ctx) != 0) {
4668 log_err("ngtcp2_crypto_quictls_configure_server_context failed");
4669 SSL_CTX_free(ctx);
4670 return NULL;
4671 }
4672 #else /* HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT */
4673 /* The quic_method needs to remain valid during the SSL_CTX
4674 * lifetime, so we allocate it. It is freed with the
4675 * doq_server_socket. */
4676 quic_method = calloc(1, sizeof(SSL_QUIC_METHOD));
4677 if(!quic_method) {
4678 log_err("calloc failed: out of memory");
4679 SSL_CTX_free(ctx);
4680 return NULL;
4681 }
4682 doq_socket->quic_method = quic_method;
4683 quic_method->set_encryption_secrets = doq_set_encryption_secrets;
4684 quic_method->add_handshake_data = doq_add_handshake_data;
4685 quic_method->flush_flight = doq_flush_flight;
4686 quic_method->send_alert = doq_send_alert;
4687 SSL_CTX_set_quic_method(ctx, doq_socket->quic_method);
4688 #endif
4689 return ctx;
4690 #else /* HAVE_NGTCP2 */
4691 (void)key; (void)pem; (void)verifypem;
4692 return NULL;
4693 #endif /* HAVE_NGTCP2 */
4694 }
4695
4696 /** Get the ngtcp2_conn from ssl userdata of type ngtcp2_conn_ref */
doq_conn_ref_get_conn(ngtcp2_crypto_conn_ref * conn_ref)4697 static ngtcp2_conn* doq_conn_ref_get_conn(ngtcp2_crypto_conn_ref* conn_ref)
4698 {
4699 struct doq_conn* conn = (struct doq_conn*)conn_ref->user_data;
4700 return conn->conn;
4701 }
4702
4703 /** create new SSL session for server connection */
4704 static SSL*
doq_ssl_server_setup(SSL_CTX * ctx,struct doq_conn * conn)4705 doq_ssl_server_setup(SSL_CTX* ctx, struct doq_conn* conn)
4706 {
4707 SSL* ssl = SSL_new(ctx);
4708 if(!ssl) {
4709 log_crypto_err("doq: SSL_new failed");
4710 return NULL;
4711 }
4712 #ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
4713 conn->conn_ref.get_conn = &doq_conn_ref_get_conn;
4714 conn->conn_ref.user_data = conn;
4715 SSL_set_app_data(ssl, &conn->conn_ref);
4716 #else
4717 SSL_set_app_data(ssl, conn);
4718 #endif
4719 SSL_set_accept_state(ssl);
4720 SSL_set_quic_early_data_enabled(ssl, 1);
4721 return ssl;
4722 }
4723
4724 int
doq_conn_setup(struct doq_conn * conn,uint8_t * scid,size_t scidlen,uint8_t * ocid,size_t ocidlen,const uint8_t * token,size_t tokenlen)4725 doq_conn_setup(struct doq_conn* conn, uint8_t* scid, size_t scidlen,
4726 uint8_t* ocid, size_t ocidlen, const uint8_t* token, size_t tokenlen)
4727 {
4728 int rv;
4729 struct ngtcp2_cid dcid, sv_scid, scid_cid;
4730 struct ngtcp2_path path;
4731 struct ngtcp2_callbacks callbacks;
4732 struct ngtcp2_settings settings;
4733 struct ngtcp2_transport_params params;
4734 memset(&dcid, 0, sizeof(dcid));
4735 memset(&sv_scid, 0, sizeof(sv_scid));
4736 memset(&scid_cid, 0, sizeof(scid_cid));
4737 memset(&path, 0, sizeof(path));
4738 memset(&callbacks, 0, sizeof(callbacks));
4739 memset(&settings, 0, sizeof(settings));
4740 memset(¶ms, 0, sizeof(params));
4741
4742 ngtcp2_cid_init(&scid_cid, scid, scidlen);
4743 ngtcp2_cid_init(&dcid, conn->key.dcid, conn->key.dcidlen);
4744
4745 path.remote.addr = (struct sockaddr*)&conn->key.paddr.addr;
4746 path.remote.addrlen = conn->key.paddr.addrlen;
4747 path.local.addr = (struct sockaddr*)&conn->key.paddr.localaddr;
4748 path.local.addrlen = conn->key.paddr.localaddrlen;
4749
4750 callbacks.recv_client_initial = ngtcp2_crypto_recv_client_initial_cb;
4751 callbacks.recv_crypto_data = ngtcp2_crypto_recv_crypto_data_cb;
4752 callbacks.encrypt = ngtcp2_crypto_encrypt_cb;
4753 callbacks.decrypt = ngtcp2_crypto_decrypt_cb;
4754 callbacks.hp_mask = ngtcp2_crypto_hp_mask;
4755 callbacks.update_key = ngtcp2_crypto_update_key_cb;
4756 callbacks.delete_crypto_aead_ctx =
4757 ngtcp2_crypto_delete_crypto_aead_ctx_cb;
4758 callbacks.delete_crypto_cipher_ctx =
4759 ngtcp2_crypto_delete_crypto_cipher_ctx_cb;
4760 callbacks.get_path_challenge_data =
4761 ngtcp2_crypto_get_path_challenge_data_cb;
4762 callbacks.version_negotiation = ngtcp2_crypto_version_negotiation_cb;
4763 callbacks.rand = doq_rand_cb;
4764 callbacks.get_new_connection_id = doq_get_new_connection_id_cb;
4765 callbacks.remove_connection_id = doq_remove_connection_id_cb;
4766 callbacks.handshake_completed = doq_handshake_completed_cb;
4767 callbacks.stream_open = doq_stream_open_cb;
4768 callbacks.stream_close = doq_stream_close_cb;
4769 callbacks.stream_reset = doq_stream_reset_cb;
4770 callbacks.acked_stream_data_offset = doq_acked_stream_data_offset_cb;
4771 callbacks.recv_stream_data = doq_recv_stream_data_cb;
4772
4773 ngtcp2_settings_default(&settings);
4774 if(verbosity >= VERB_ALGO) {
4775 settings.log_printf = doq_log_printf_cb;
4776 }
4777 settings.rand_ctx.native_handle = conn->doq_socket->rnd;
4778 settings.initial_ts = doq_get_timestamp_nanosec();
4779 settings.max_stream_window = 6*1024*1024;
4780 settings.max_window = 6*1024*1024;
4781 #ifdef HAVE_STRUCT_NGTCP2_SETTINGS_TOKENLEN
4782 settings.token = (void*)token;
4783 settings.tokenlen = tokenlen;
4784 #else
4785 settings.token.base = (void*)token;
4786 settings.token.len = tokenlen;
4787 #endif
4788
4789 ngtcp2_transport_params_default(¶ms);
4790 params.max_idle_timeout = conn->doq_socket->idle_timeout;
4791 params.active_connection_id_limit = 7;
4792 params.initial_max_stream_data_bidi_local = 256*1024;
4793 params.initial_max_stream_data_bidi_remote = 256*1024;
4794 params.initial_max_data = 1024*1024;
4795 /* DoQ uses bidi streams, so we allow 0 uni streams. */
4796 params.initial_max_streams_uni = 0;
4797 /* Initial max on number of bidi streams the remote end can open.
4798 * That is the number of queries it can make, at first. */
4799 params.initial_max_streams_bidi = 10;
4800 if(ocid) {
4801 ngtcp2_cid_init(¶ms.original_dcid, ocid, ocidlen);
4802 ngtcp2_cid_init(¶ms.retry_scid, conn->key.dcid,
4803 conn->key.dcidlen);
4804 params.retry_scid_present = 1;
4805 } else {
4806 ngtcp2_cid_init(¶ms.original_dcid, conn->key.dcid,
4807 conn->key.dcidlen);
4808 }
4809 #ifdef HAVE_STRUCT_NGTCP2_TRANSPORT_PARAMS_ORIGINAL_DCID_PRESENT
4810 params.original_dcid_present = 1;
4811 #endif
4812 doq_fill_rand(conn->doq_socket->rnd, params.stateless_reset_token,
4813 sizeof(params.stateless_reset_token));
4814 sv_scid.datalen = conn->doq_socket->sv_scidlen;
4815 lock_rw_wrlock(&conn->table->conid_lock);
4816 if(!doq_conn_generate_new_conid(conn, sv_scid.data, sv_scid.datalen)) {
4817 lock_rw_unlock(&conn->table->conid_lock);
4818 return 0;
4819 }
4820
4821 rv = ngtcp2_conn_server_new(&conn->conn, &scid_cid, &sv_scid, &path,
4822 conn->version, &callbacks, &settings, ¶ms, NULL, conn);
4823 if(rv != 0) {
4824 lock_rw_unlock(&conn->table->conid_lock);
4825 log_err("ngtcp2_conn_server_new failed: %s",
4826 ngtcp2_strerror(rv));
4827 return 0;
4828 }
4829 if(!doq_conn_setup_conids(conn)) {
4830 lock_rw_unlock(&conn->table->conid_lock);
4831 log_err("doq_conn_setup_conids failed: out of memory");
4832 return 0;
4833 }
4834 lock_rw_unlock(&conn->table->conid_lock);
4835 conn->ssl = doq_ssl_server_setup((SSL_CTX*)conn->doq_socket->ctx,
4836 conn);
4837 if(!conn->ssl) {
4838 log_err("doq_ssl_server_setup failed");
4839 return 0;
4840 }
4841 ngtcp2_conn_set_tls_native_handle(conn->conn, conn->ssl);
4842 doq_conn_write_enable(conn);
4843 return 1;
4844 }
4845
4846 struct doq_conid*
doq_conid_find(struct doq_table * table,const uint8_t * data,size_t datalen)4847 doq_conid_find(struct doq_table* table, const uint8_t* data, size_t datalen)
4848 {
4849 struct rbnode_type* node;
4850 struct doq_conid key;
4851 key.node.key = &key;
4852 key.cid = (void*)data;
4853 key.cidlen = datalen;
4854 node = rbtree_search(table->conid_tree, &key);
4855 if(node)
4856 return (struct doq_conid*)node->key;
4857 return NULL;
4858 }
4859
4860 /** insert conid in the conid list */
4861 static void
doq_conid_list_insert(struct doq_conn * conn,struct doq_conid * conid)4862 doq_conid_list_insert(struct doq_conn* conn, struct doq_conid* conid)
4863 {
4864 conid->prev = NULL;
4865 conid->next = conn->conid_list;
4866 if(conn->conid_list)
4867 conn->conid_list->prev = conid;
4868 conn->conid_list = conid;
4869 }
4870
4871 /** remove conid from the conid list */
4872 static void
doq_conid_list_remove(struct doq_conn * conn,struct doq_conid * conid)4873 doq_conid_list_remove(struct doq_conn* conn, struct doq_conid* conid)
4874 {
4875 if(conid->prev)
4876 conid->prev->next = conid->next;
4877 else conn->conid_list = conid->next;
4878 if(conid->next)
4879 conid->next->prev = conid->prev;
4880 }
4881
4882 /** create a doq_conid */
4883 static struct doq_conid*
doq_conid_create(uint8_t * data,size_t datalen,struct doq_conn_key * key)4884 doq_conid_create(uint8_t* data, size_t datalen, struct doq_conn_key* key)
4885 {
4886 struct doq_conid* conid;
4887 conid = calloc(1, sizeof(*conid));
4888 if(!conid)
4889 return NULL;
4890 conid->cid = memdup(data, datalen);
4891 if(!conid->cid) {
4892 free(conid);
4893 return NULL;
4894 }
4895 conid->cidlen = datalen;
4896 conid->node.key = conid;
4897 conid->key = *key;
4898 conid->key.dcid = memdup(key->dcid, key->dcidlen);
4899 if(!conid->key.dcid) {
4900 free(conid->cid);
4901 free(conid);
4902 return NULL;
4903 }
4904 return conid;
4905 }
4906
4907 void
doq_conid_delete(struct doq_conid * conid)4908 doq_conid_delete(struct doq_conid* conid)
4909 {
4910 if(!conid)
4911 return;
4912 free(conid->key.dcid);
4913 free(conid->cid);
4914 free(conid);
4915 }
4916
4917 /** return true if the conid is for the conn. */
4918 static int
conid_is_for_conn(struct doq_conn * conn,struct doq_conid * conid)4919 conid_is_for_conn(struct doq_conn* conn, struct doq_conid* conid)
4920 {
4921 if(conid->key.dcidlen == conn->key.dcidlen &&
4922 memcmp(conid->key.dcid, conn->key.dcid, conid->key.dcidlen)==0
4923 && conid->key.paddr.addrlen == conn->key.paddr.addrlen &&
4924 memcmp(&conid->key.paddr.addr, &conn->key.paddr.addr,
4925 conid->key.paddr.addrlen) == 0 &&
4926 conid->key.paddr.localaddrlen == conn->key.paddr.localaddrlen &&
4927 memcmp(&conid->key.paddr.localaddr, &conn->key.paddr.localaddr,
4928 conid->key.paddr.localaddrlen) == 0 &&
4929 conid->key.paddr.ifindex == conn->key.paddr.ifindex)
4930 return 1;
4931 return 0;
4932 }
4933
4934 int
doq_conn_associate_conid(struct doq_conn * conn,uint8_t * data,size_t datalen)4935 doq_conn_associate_conid(struct doq_conn* conn, uint8_t* data, size_t datalen)
4936 {
4937 struct doq_conid* conid;
4938 conid = doq_conid_find(conn->table, data, datalen);
4939 if(conid && !conid_is_for_conn(conn, conid)) {
4940 verbose(VERB_ALGO, "doq connection id already exists for "
4941 "another doq_conn. Ignoring second connection id.");
4942 /* Already exists to another conn, ignore it.
4943 * This works, in that the conid is listed in the doq_conn
4944 * conid_list element, and removed from there. So our conid
4945 * tree and list are fine, when created and removed.
4946 * The tree now does not have the lookup element pointing
4947 * to this connection. */
4948 return 1;
4949 }
4950 if(conid)
4951 return 1; /* already inserted */
4952 conid = doq_conid_create(data, datalen, &conn->key);
4953 if(!conid)
4954 return 0;
4955 doq_conid_list_insert(conn, conid);
4956 (void)rbtree_insert(conn->table->conid_tree, &conid->node);
4957 return 1;
4958 }
4959
4960 void
doq_conn_dissociate_conid(struct doq_conn * conn,const uint8_t * data,size_t datalen)4961 doq_conn_dissociate_conid(struct doq_conn* conn, const uint8_t* data,
4962 size_t datalen)
4963 {
4964 struct doq_conid* conid;
4965 conid = doq_conid_find(conn->table, data, datalen);
4966 if(conid && !conid_is_for_conn(conn, conid))
4967 return;
4968 if(conid) {
4969 (void)rbtree_delete(conn->table->conid_tree,
4970 conid->node.key);
4971 doq_conid_list_remove(conn, conid);
4972 doq_conid_delete(conid);
4973 }
4974 }
4975
4976 /** associate the scid array and also the dcid.
4977 * caller must hold the locks on conn and doq_table.conid_lock. */
4978 static int
doq_conn_setup_id_array_and_dcid(struct doq_conn * conn,struct ngtcp2_cid * scids,size_t num_scid)4979 doq_conn_setup_id_array_and_dcid(struct doq_conn* conn,
4980 struct ngtcp2_cid* scids, size_t num_scid)
4981 {
4982 size_t i;
4983 for(i=0; i<num_scid; i++) {
4984 if(!doq_conn_associate_conid(conn, scids[i].data,
4985 scids[i].datalen))
4986 return 0;
4987 }
4988 if(!doq_conn_associate_conid(conn, conn->key.dcid, conn->key.dcidlen))
4989 return 0;
4990 return 1;
4991 }
4992
4993 int
doq_conn_setup_conids(struct doq_conn * conn)4994 doq_conn_setup_conids(struct doq_conn* conn)
4995 {
4996 size_t num_scid =
4997 #ifndef HAVE_NGTCP2_CONN_GET_NUM_SCID
4998 ngtcp2_conn_get_scid(conn->conn, NULL);
4999 #else
5000 ngtcp2_conn_get_num_scid(conn->conn);
5001 #endif
5002 if(num_scid <= 4) {
5003 struct ngtcp2_cid ids[4];
5004 /* Usually there are not that many scids when just accepted,
5005 * like only 2. */
5006 ngtcp2_conn_get_scid(conn->conn, ids);
5007 return doq_conn_setup_id_array_and_dcid(conn, ids, num_scid);
5008 } else {
5009 struct ngtcp2_cid *scids = calloc(num_scid,
5010 sizeof(struct ngtcp2_cid));
5011 if(!scids)
5012 return 0;
5013 ngtcp2_conn_get_scid(conn->conn, scids);
5014 if(!doq_conn_setup_id_array_and_dcid(conn, scids, num_scid)) {
5015 free(scids);
5016 return 0;
5017 }
5018 free(scids);
5019 }
5020 return 1;
5021 }
5022
5023 void
doq_conn_clear_conids(struct doq_conn * conn)5024 doq_conn_clear_conids(struct doq_conn* conn)
5025 {
5026 struct doq_conid* p, *next;
5027 if(!conn)
5028 return;
5029 p = conn->conid_list;
5030 while(p) {
5031 next = p->next;
5032 (void)rbtree_delete(conn->table->conid_tree, p->node.key);
5033 doq_conid_delete(p);
5034 p = next;
5035 }
5036 conn->conid_list = NULL;
5037 }
5038
doq_get_timestamp_nanosec(void)5039 ngtcp2_tstamp doq_get_timestamp_nanosec(void)
5040 {
5041 #ifdef CLOCK_REALTIME
5042 struct timespec tp;
5043 memset(&tp, 0, sizeof(tp));
5044 /* Get a nanosecond time, that can be compared with the event base. */
5045 if(clock_gettime(CLOCK_REALTIME, &tp) == -1) {
5046 log_err("clock_gettime failed: %s", strerror(errno));
5047 }
5048 return ((uint64_t)tp.tv_sec)*((uint64_t)1000000000) +
5049 ((uint64_t)tp.tv_nsec);
5050 #else
5051 struct timeval tv;
5052 if(gettimeofday(&tv, NULL) < 0) {
5053 log_err("gettimeofday failed: %s", strerror(errno));
5054 }
5055 return ((uint64_t)tv.tv_sec)*((uint64_t)1000000000) +
5056 ((uint64_t)tv.tv_usec)*((uint64_t)1000);
5057 #endif /* CLOCK_REALTIME */
5058 }
5059
5060 /** doq start the closing period for the connection. */
5061 static int
doq_conn_start_closing_period(struct comm_point * c,struct doq_conn * conn)5062 doq_conn_start_closing_period(struct comm_point* c, struct doq_conn* conn)
5063 {
5064 struct ngtcp2_path_storage ps;
5065 struct ngtcp2_pkt_info pi;
5066 ngtcp2_ssize ret;
5067 if(!conn)
5068 return 1;
5069 if(
5070 #ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
5071 ngtcp2_conn_in_closing_period(conn->conn)
5072 #else
5073 ngtcp2_conn_is_in_closing_period(conn->conn)
5074 #endif
5075 )
5076 return 1;
5077 if(
5078 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
5079 ngtcp2_conn_in_draining_period(conn->conn)
5080 #else
5081 ngtcp2_conn_is_in_draining_period(conn->conn)
5082 #endif
5083 ) {
5084 doq_conn_write_disable(conn);
5085 return 1;
5086 }
5087 ngtcp2_path_storage_zero(&ps);
5088 sldns_buffer_clear(c->doq_socket->pkt_buf);
5089 /* the call to ngtcp2_conn_write_connection_close causes the
5090 * conn to be closed. It is now in the closing period. */
5091 ret = ngtcp2_conn_write_connection_close(conn->conn, &ps.path,
5092 &pi, sldns_buffer_begin(c->doq_socket->pkt_buf),
5093 sldns_buffer_remaining(c->doq_socket->pkt_buf),
5094 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5095 &conn->ccerr
5096 #else
5097 &conn->last_error
5098 #endif
5099 , doq_get_timestamp_nanosec());
5100 if(ret < 0) {
5101 log_err("doq ngtcp2_conn_write_connection_close failed: %s",
5102 ngtcp2_strerror(ret));
5103 return 0;
5104 }
5105 if(ret == 0) {
5106 return 0;
5107 }
5108 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
5109 sldns_buffer_flip(c->doq_socket->pkt_buf);
5110
5111 /* The close packet is allocated, because it may have to be repeated.
5112 * When incoming packets have this connection dcid. */
5113 conn->close_pkt = memdup(sldns_buffer_begin(c->doq_socket->pkt_buf),
5114 sldns_buffer_limit(c->doq_socket->pkt_buf));
5115 if(!conn->close_pkt) {
5116 log_err("doq: could not allocate close packet: out of memory");
5117 return 0;
5118 }
5119 conn->close_pkt_len = sldns_buffer_limit(c->doq_socket->pkt_buf);
5120 conn->close_ecn = pi.ecn;
5121 return 1;
5122 }
5123
5124 /** doq send the close packet for the connection, perhaps again. */
5125 int
doq_conn_send_close(struct comm_point * c,struct doq_conn * conn)5126 doq_conn_send_close(struct comm_point* c, struct doq_conn* conn)
5127 {
5128 if(!conn)
5129 return 0;
5130 if(!conn->close_pkt)
5131 return 0;
5132 if(conn->close_pkt_len > sldns_buffer_capacity(c->doq_socket->pkt_buf))
5133 return 0;
5134 sldns_buffer_clear(c->doq_socket->pkt_buf);
5135 sldns_buffer_write(c->doq_socket->pkt_buf, conn->close_pkt, conn->close_pkt_len);
5136 sldns_buffer_flip(c->doq_socket->pkt_buf);
5137 verbose(VERB_ALGO, "doq send connection close");
5138 doq_send_pkt(c, &conn->key.paddr, conn->close_ecn);
5139 doq_conn_write_disable(conn);
5140 return 1;
5141 }
5142
5143 /** doq close the connection on error. If it returns a failure, it
5144 * does not wait to send a close, and the connection can be dropped. */
5145 static int
doq_conn_close_error(struct comm_point * c,struct doq_conn * conn)5146 doq_conn_close_error(struct comm_point* c, struct doq_conn* conn)
5147 {
5148 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5149 if(conn->ccerr.type == NGTCP2_CCERR_TYPE_IDLE_CLOSE)
5150 return 0;
5151 #else
5152 if(conn->last_error.type ==
5153 NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE)
5154 return 0;
5155 #endif
5156 if(!doq_conn_start_closing_period(c, conn))
5157 return 0;
5158 if(
5159 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
5160 ngtcp2_conn_in_draining_period(conn->conn)
5161 #else
5162 ngtcp2_conn_is_in_draining_period(conn->conn)
5163 #endif
5164 ) {
5165 doq_conn_write_disable(conn);
5166 return 1;
5167 }
5168 doq_conn_write_enable(conn);
5169 if(!doq_conn_send_close(c, conn))
5170 return 0;
5171 return 1;
5172 }
5173
5174 int
doq_conn_recv(struct comm_point * c,struct doq_pkt_addr * paddr,struct doq_conn * conn,struct ngtcp2_pkt_info * pi,int * err_retry,int * err_drop)5175 doq_conn_recv(struct comm_point* c, struct doq_pkt_addr* paddr,
5176 struct doq_conn* conn, struct ngtcp2_pkt_info* pi, int* err_retry,
5177 int* err_drop)
5178 {
5179 int ret;
5180 ngtcp2_tstamp ts;
5181 struct ngtcp2_path path;
5182 memset(&path, 0, sizeof(path));
5183 path.remote.addr = (struct sockaddr*)&paddr->addr;
5184 path.remote.addrlen = paddr->addrlen;
5185 path.local.addr = (struct sockaddr*)&paddr->localaddr;
5186 path.local.addrlen = paddr->localaddrlen;
5187 ts = doq_get_timestamp_nanosec();
5188
5189 ret = ngtcp2_conn_read_pkt(conn->conn, &path, pi,
5190 sldns_buffer_begin(c->doq_socket->pkt_buf),
5191 sldns_buffer_limit(c->doq_socket->pkt_buf), ts);
5192 if(ret != 0) {
5193 if(err_retry)
5194 *err_retry = 0;
5195 if(err_drop)
5196 *err_drop = 0;
5197 if(ret == NGTCP2_ERR_DRAINING) {
5198 verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s",
5199 ngtcp2_strerror(ret));
5200 doq_conn_write_disable(conn);
5201 return 0;
5202 } else if(ret == NGTCP2_ERR_DROP_CONN) {
5203 verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s",
5204 ngtcp2_strerror(ret));
5205 if(err_drop)
5206 *err_drop = 1;
5207 return 0;
5208 } else if(ret == NGTCP2_ERR_RETRY) {
5209 verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s",
5210 ngtcp2_strerror(ret));
5211 if(err_retry)
5212 *err_retry = 1;
5213 if(err_drop)
5214 *err_drop = 1;
5215 return 0;
5216 } else if(ret == NGTCP2_ERR_CRYPTO) {
5217 if(
5218 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5219 !conn->ccerr.error_code
5220 #else
5221 !conn->last_error.error_code
5222 #endif
5223 ) {
5224 /* in picotls the tls alert may need to be
5225 * copied, but this is with openssl. And there
5226 * is conn->tls_alert. */
5227 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5228 ngtcp2_ccerr_set_tls_alert(&conn->ccerr,
5229 conn->tls_alert, NULL, 0);
5230 #else
5231 ngtcp2_connection_close_error_set_transport_error_tls_alert(
5232 &conn->last_error, conn->tls_alert,
5233 NULL, 0);
5234 #endif
5235 }
5236 } else {
5237 if(
5238 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5239 !conn->ccerr.error_code
5240 #else
5241 !conn->last_error.error_code
5242 #endif
5243 ) {
5244 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5245 ngtcp2_ccerr_set_liberr(&conn->ccerr, ret,
5246 NULL, 0);
5247 #else
5248 ngtcp2_connection_close_error_set_transport_error_liberr(
5249 &conn->last_error, ret, NULL, 0);
5250 #endif
5251 }
5252 }
5253 log_err("ngtcp2_conn_read_pkt failed: %s",
5254 ngtcp2_strerror(ret));
5255 if(!doq_conn_close_error(c, conn)) {
5256 if(err_drop)
5257 *err_drop = 1;
5258 }
5259 return 0;
5260 }
5261 doq_conn_write_enable(conn);
5262 return 1;
5263 }
5264
5265 /** doq stream write is done */
5266 static void
doq_stream_write_is_done(struct doq_conn * conn,struct doq_stream * stream)5267 doq_stream_write_is_done(struct doq_conn* conn, struct doq_stream* stream)
5268 {
5269 /* Cannot deallocate, the buffer may be needed for resends. */
5270 doq_stream_off_write_list(conn, stream);
5271 }
5272
5273 int
doq_conn_write_streams(struct comm_point * c,struct doq_conn * conn,int * err_drop)5274 doq_conn_write_streams(struct comm_point* c, struct doq_conn* conn,
5275 int* err_drop)
5276 {
5277 struct doq_stream* stream = conn->stream_write_first;
5278 ngtcp2_path_storage ps;
5279 ngtcp2_tstamp ts = doq_get_timestamp_nanosec();
5280 size_t num_packets = 0, max_packets = 65535;
5281 ngtcp2_path_storage_zero(&ps);
5282
5283 for(;;) {
5284 int64_t stream_id;
5285 uint32_t flags = 0;
5286 ngtcp2_pkt_info pi;
5287 ngtcp2_vec datav[2];
5288 size_t datav_count = 0;
5289 ngtcp2_ssize ret, ndatalen = 0;
5290 int fin;
5291
5292 if(stream) {
5293 /* data to send */
5294 verbose(VERB_ALGO, "doq: doq_conn write stream %d",
5295 (int)stream->stream_id);
5296 stream_id = stream->stream_id;
5297 fin = 1;
5298 if(stream->nwrite < 2) {
5299 datav[0].base = ((uint8_t*)&stream->
5300 outlen_wire) + stream->nwrite;
5301 datav[0].len = 2 - stream->nwrite;
5302 datav[1].base = stream->out;
5303 datav[1].len = stream->outlen;
5304 datav_count = 2;
5305 } else {
5306 datav[0].base = stream->out +
5307 (stream->nwrite-2);
5308 datav[0].len = stream->outlen -
5309 (stream->nwrite-2);
5310 datav_count = 1;
5311 }
5312 } else {
5313 /* no data to send */
5314 verbose(VERB_ALGO, "doq: doq_conn write stream -1");
5315 stream_id = -1;
5316 fin = 0;
5317 datav[0].base = NULL;
5318 datav[0].len = 0;
5319 datav_count = 1;
5320 }
5321
5322 /* if more streams, set it to write more */
5323 if(stream && stream->write_next)
5324 flags |= NGTCP2_WRITE_STREAM_FLAG_MORE;
5325 if(fin)
5326 flags |= NGTCP2_WRITE_STREAM_FLAG_FIN;
5327
5328 sldns_buffer_clear(c->doq_socket->pkt_buf);
5329 ret = ngtcp2_conn_writev_stream(conn->conn, &ps.path, &pi,
5330 sldns_buffer_begin(c->doq_socket->pkt_buf),
5331 sldns_buffer_remaining(c->doq_socket->pkt_buf),
5332 &ndatalen, flags, stream_id, datav, datav_count, ts);
5333 if(ret < 0) {
5334 if(ret == NGTCP2_ERR_WRITE_MORE) {
5335 verbose(VERB_ALGO, "doq: write more, ndatalen %d", (int)ndatalen);
5336 if(stream) {
5337 if(ndatalen >= 0)
5338 stream->nwrite += ndatalen;
5339 if(stream->nwrite >= stream->outlen+2)
5340 doq_stream_write_is_done(
5341 conn, stream);
5342 stream = stream->write_next;
5343 }
5344 continue;
5345 } else if(ret == NGTCP2_ERR_STREAM_DATA_BLOCKED) {
5346 verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_DATA_BLOCKED");
5347 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5348 ngtcp2_ccerr_set_application_error(
5349 &conn->ccerr, -1, NULL, 0);
5350 #else
5351 ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0);
5352 #endif
5353 if(err_drop)
5354 *err_drop = 0;
5355 if(!doq_conn_close_error(c, conn)) {
5356 if(err_drop)
5357 *err_drop = 1;
5358 }
5359 return 0;
5360 } else if(ret == NGTCP2_ERR_STREAM_SHUT_WR) {
5361 verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_SHUT_WR");
5362 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5363 ngtcp2_ccerr_set_application_error(
5364 &conn->ccerr, -1, NULL, 0);
5365 #else
5366 ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0);
5367 #endif
5368 if(err_drop)
5369 *err_drop = 0;
5370 if(!doq_conn_close_error(c, conn)) {
5371 if(err_drop)
5372 *err_drop = 1;
5373 }
5374 return 0;
5375 }
5376
5377 log_err("doq: ngtcp2_conn_writev_stream failed: %s",
5378 ngtcp2_strerror(ret));
5379 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5380 ngtcp2_ccerr_set_liberr(&conn->ccerr, ret, NULL, 0);
5381 #else
5382 ngtcp2_connection_close_error_set_transport_error_liberr(
5383 &conn->last_error, ret, NULL, 0);
5384 #endif
5385 if(err_drop)
5386 *err_drop = 0;
5387 if(!doq_conn_close_error(c, conn)) {
5388 if(err_drop)
5389 *err_drop = 1;
5390 }
5391 return 0;
5392 }
5393 verbose(VERB_ALGO, "doq: writev_stream pkt size %d ndatawritten %d",
5394 (int)ret, (int)ndatalen);
5395
5396 if(ndatalen >= 0 && stream) {
5397 stream->nwrite += ndatalen;
5398 if(stream->nwrite >= stream->outlen+2)
5399 doq_stream_write_is_done(conn, stream);
5400 }
5401 if(ret == 0) {
5402 /* congestion limited */
5403 doq_conn_write_disable(conn);
5404 ngtcp2_conn_update_pkt_tx_time(conn->conn, ts);
5405 return 1;
5406 }
5407 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
5408 sldns_buffer_flip(c->doq_socket->pkt_buf);
5409 doq_send_pkt(c, &conn->key.paddr, pi.ecn);
5410
5411 if(c->doq_socket->have_blocked_pkt)
5412 break;
5413 if(++num_packets == max_packets)
5414 break;
5415 if(stream)
5416 stream = stream->write_next;
5417 }
5418 ngtcp2_conn_update_pkt_tx_time(conn->conn, ts);
5419 return 1;
5420 }
5421
5422 void
doq_conn_write_enable(struct doq_conn * conn)5423 doq_conn_write_enable(struct doq_conn* conn)
5424 {
5425 conn->write_interest = 1;
5426 }
5427
5428 void
doq_conn_write_disable(struct doq_conn * conn)5429 doq_conn_write_disable(struct doq_conn* conn)
5430 {
5431 conn->write_interest = 0;
5432 }
5433
5434 /** doq append the connection to the write list */
5435 static void
doq_conn_write_list_append(struct doq_table * table,struct doq_conn * conn)5436 doq_conn_write_list_append(struct doq_table* table, struct doq_conn* conn)
5437 {
5438 if(conn->on_write_list)
5439 return;
5440 conn->write_prev = table->write_list_last;
5441 if(table->write_list_last)
5442 table->write_list_last->write_next = conn;
5443 else table->write_list_first = conn;
5444 conn->write_next = NULL;
5445 table->write_list_last = conn;
5446 conn->on_write_list = 1;
5447 }
5448
5449 void
doq_conn_write_list_remove(struct doq_table * table,struct doq_conn * conn)5450 doq_conn_write_list_remove(struct doq_table* table, struct doq_conn* conn)
5451 {
5452 if(!conn->on_write_list)
5453 return;
5454 if(conn->write_next)
5455 conn->write_next->write_prev = conn->write_prev;
5456 else table->write_list_last = conn->write_prev;
5457 if(conn->write_prev)
5458 conn->write_prev->write_next = conn->write_next;
5459 else table->write_list_first = conn->write_next;
5460 conn->write_prev = NULL;
5461 conn->write_next = NULL;
5462 conn->on_write_list = 0;
5463 }
5464
5465 void
doq_conn_set_write_list(struct doq_table * table,struct doq_conn * conn)5466 doq_conn_set_write_list(struct doq_table* table, struct doq_conn* conn)
5467 {
5468 if(conn->write_interest && conn->on_write_list)
5469 return;
5470 if(!conn->write_interest && !conn->on_write_list)
5471 return;
5472 if(conn->write_interest)
5473 doq_conn_write_list_append(table, conn);
5474 else doq_conn_write_list_remove(table, conn);
5475 }
5476
5477 struct doq_conn*
doq_table_pop_first(struct doq_table * table)5478 doq_table_pop_first(struct doq_table* table)
5479 {
5480 struct doq_conn* conn = table->write_list_first;
5481 if(!conn)
5482 return NULL;
5483 lock_basic_lock(&conn->lock);
5484 table->write_list_first = conn->write_next;
5485 if(conn->write_next)
5486 conn->write_next->write_prev = NULL;
5487 else table->write_list_last = NULL;
5488 conn->write_next = NULL;
5489 conn->write_prev = NULL;
5490 conn->on_write_list = 0;
5491 return conn;
5492 }
5493
5494 int
doq_conn_check_timer(struct doq_conn * conn,struct timeval * tv)5495 doq_conn_check_timer(struct doq_conn* conn, struct timeval* tv)
5496 {
5497 ngtcp2_tstamp expiry = ngtcp2_conn_get_expiry(conn->conn);
5498 ngtcp2_tstamp now = doq_get_timestamp_nanosec();
5499 ngtcp2_tstamp t;
5500
5501 if(expiry <= now) {
5502 /* The timer has already expired, add with zero timeout.
5503 * This should call the callback straight away. Calling it
5504 * from the event callbacks is cleaner than calling it here,
5505 * because then it is always called with the same locks and
5506 * so on. This routine only has the conn.lock. */
5507 t = now;
5508 } else {
5509 t = expiry;
5510 }
5511
5512 /* convert to timeval */
5513 memset(tv, 0, sizeof(*tv));
5514 tv->tv_sec = t / NGTCP2_SECONDS;
5515 tv->tv_usec = (t / NGTCP2_MICROSECONDS)%1000000;
5516
5517 /* If we already have a timer, is it the right value? */
5518 if(conn->timer.timer_in_tree || conn->timer.timer_in_list) {
5519 if(conn->timer.time.tv_sec == tv->tv_sec &&
5520 conn->timer.time.tv_usec == tv->tv_usec)
5521 return 0;
5522 }
5523 return 1;
5524 }
5525
5526 /* doq print connection log */
5527 static void
doq_conn_log_line(struct doq_conn * conn,char * s)5528 doq_conn_log_line(struct doq_conn* conn, char* s)
5529 {
5530 char remotestr[256], localstr[256];
5531 addr_to_str((void*)&conn->key.paddr.addr, conn->key.paddr.addrlen,
5532 remotestr, sizeof(remotestr));
5533 addr_to_str((void*)&conn->key.paddr.localaddr,
5534 conn->key.paddr.localaddrlen, localstr, sizeof(localstr));
5535 log_info("doq conn %s %s %s", remotestr, localstr, s);
5536 }
5537
5538 int
doq_conn_handle_timeout(struct doq_conn * conn)5539 doq_conn_handle_timeout(struct doq_conn* conn)
5540 {
5541 ngtcp2_tstamp now = doq_get_timestamp_nanosec();
5542 int rv;
5543
5544 if(verbosity >= VERB_ALGO)
5545 doq_conn_log_line(conn, "timeout");
5546
5547 rv = ngtcp2_conn_handle_expiry(conn->conn, now);
5548 if(rv != 0) {
5549 verbose(VERB_ALGO, "ngtcp2_conn_handle_expiry failed: %s",
5550 ngtcp2_strerror(rv));
5551 #ifdef HAVE_NGTCP2_CCERR_DEFAULT
5552 ngtcp2_ccerr_set_liberr(&conn->ccerr, rv, NULL, 0);
5553 #else
5554 ngtcp2_connection_close_error_set_transport_error_liberr(
5555 &conn->last_error, rv, NULL, 0);
5556 #endif
5557 if(!doq_conn_close_error(conn->doq_socket->cp, conn)) {
5558 /* failed, return for deletion */
5559 return 0;
5560 }
5561 return 1;
5562 }
5563 doq_conn_write_enable(conn);
5564 if(!doq_conn_write_streams(conn->doq_socket->cp, conn, NULL)) {
5565 /* failed, return for deletion. */
5566 return 0;
5567 }
5568 return 1;
5569 }
5570
5571 void
doq_table_quic_size_add(struct doq_table * table,size_t add)5572 doq_table_quic_size_add(struct doq_table* table, size_t add)
5573 {
5574 lock_basic_lock(&table->size_lock);
5575 table->current_size += add;
5576 lock_basic_unlock(&table->size_lock);
5577 }
5578
5579 void
doq_table_quic_size_subtract(struct doq_table * table,size_t subtract)5580 doq_table_quic_size_subtract(struct doq_table* table, size_t subtract)
5581 {
5582 lock_basic_lock(&table->size_lock);
5583 if(table->current_size < subtract)
5584 table->current_size = 0;
5585 else table->current_size -= subtract;
5586 lock_basic_unlock(&table->size_lock);
5587 }
5588
5589 int
doq_table_quic_size_available(struct doq_table * table,struct config_file * cfg,size_t mem)5590 doq_table_quic_size_available(struct doq_table* table,
5591 struct config_file* cfg, size_t mem)
5592 {
5593 size_t cur;
5594 lock_basic_lock(&table->size_lock);
5595 cur = table->current_size;
5596 lock_basic_unlock(&table->size_lock);
5597
5598 if(cur + mem > cfg->quic_size)
5599 return 0;
5600 return 1;
5601 }
5602
doq_table_quic_size_get(struct doq_table * table)5603 size_t doq_table_quic_size_get(struct doq_table* table)
5604 {
5605 size_t sz;
5606 if(!table)
5607 return 0;
5608 lock_basic_lock(&table->size_lock);
5609 sz = table->current_size;
5610 lock_basic_unlock(&table->size_lock);
5611 return sz;
5612 }
5613 #endif /* HAVE_NGTCP2 */
5614