xref: /freebsd-11-stable/contrib/unbound/services/listen_dnsport.c (revision c1f4b179fee705bc884d5bda381efcabfef106da)
1 /*
2  * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file has functions to get queries from clients.
40  */
41 #include "config.h"
42 #ifdef HAVE_SYS_TYPES_H
43 #  include <sys/types.h>
44 #endif
45 #include <sys/time.h>
46 #include <limits.h>
47 #ifdef USE_TCP_FASTOPEN
48 #include <netinet/tcp.h>
49 #endif
50 #include "services/listen_dnsport.h"
51 #include "services/outside_network.h"
52 #include "util/netevent.h"
53 #include "util/log.h"
54 #include "util/config_file.h"
55 #include "util/net_help.h"
56 #include "sldns/sbuffer.h"
57 #include "sldns/parseutil.h"
58 #include "services/mesh.h"
59 #include "util/fptr_wlist.h"
60 #include "util/locks.h"
61 
62 #ifdef HAVE_NETDB_H
63 #include <netdb.h>
64 #endif
65 #include <fcntl.h>
66 
67 #ifdef HAVE_SYS_UN_H
68 #include <sys/un.h>
69 #endif
70 
71 #ifdef HAVE_SYSTEMD
72 #include <systemd/sd-daemon.h>
73 #endif
74 
75 #ifdef HAVE_IFADDRS_H
76 #include <ifaddrs.h>
77 #endif
78 #ifdef HAVE_NET_IF_H
79 #include <net/if.h>
80 #endif
81 
82 /** number of queued TCP connections for listen() */
83 #define TCP_BACKLOG 256
84 
85 #ifndef THREADS_DISABLED
86 /** lock on the counter of stream buffer memory */
87 static lock_basic_type stream_wait_count_lock;
88 /** lock on the counter of HTTP2 query buffer memory */
89 static lock_basic_type http2_query_buffer_count_lock;
90 /** lock on the counter of HTTP2 response buffer memory */
91 static lock_basic_type http2_response_buffer_count_lock;
92 #endif
93 /** size (in bytes) of stream wait buffers */
94 static size_t stream_wait_count = 0;
95 /** is the lock initialised for stream wait buffers */
96 static int stream_wait_lock_inited = 0;
97 /** size (in bytes) of HTTP2 query buffers */
98 static size_t http2_query_buffer_count = 0;
99 /** is the lock initialised for HTTP2 query buffers */
100 static int http2_query_buffer_lock_inited = 0;
101 /** size (in bytes) of HTTP2 response buffers */
102 static size_t http2_response_buffer_count = 0;
103 /** is the lock initialised for HTTP2 response buffers */
104 static int http2_response_buffer_lock_inited = 0;
105 
106 /**
107  * Debug print of the getaddrinfo returned address.
108  * @param addr: the address returned.
109  */
110 static void
verbose_print_addr(struct addrinfo * addr)111 verbose_print_addr(struct addrinfo *addr)
112 {
113 	if(verbosity >= VERB_ALGO) {
114 		char buf[100];
115 		void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
116 #ifdef INET6
117 		if(addr->ai_family == AF_INET6)
118 			sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
119 				sin6_addr;
120 #endif /* INET6 */
121 		if(inet_ntop(addr->ai_family, sinaddr, buf,
122 			(socklen_t)sizeof(buf)) == 0) {
123 			(void)strlcpy(buf, "(null)", sizeof(buf));
124 		}
125 		buf[sizeof(buf)-1] = 0;
126 		verbose(VERB_ALGO, "creating %s%s socket %s %d",
127 			addr->ai_socktype==SOCK_DGRAM?"udp":
128 			addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
129 			addr->ai_family==AF_INET?"4":
130 			addr->ai_family==AF_INET6?"6":
131 			"_otherfam", buf,
132 			ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
133 	}
134 }
135 
136 #ifdef HAVE_SYSTEMD
137 static int
systemd_get_activated(int family,int socktype,int listen,struct sockaddr * addr,socklen_t addrlen,const char * path)138 systemd_get_activated(int family, int socktype, int listen,
139 		      struct sockaddr *addr, socklen_t addrlen,
140 		      const char *path)
141 {
142 	int i = 0;
143 	int r = 0;
144 	int s = -1;
145 	const char* listen_pid, *listen_fds;
146 
147 	/* We should use "listen" option only for stream protocols. For UDP it should be -1 */
148 
149 	if((r = sd_booted()) < 1) {
150 		if(r == 0)
151 			log_warn("systemd is not running");
152 		else
153 			log_err("systemd sd_booted(): %s", strerror(-r));
154 		return -1;
155 	}
156 
157 	listen_pid = getenv("LISTEN_PID");
158 	listen_fds = getenv("LISTEN_FDS");
159 
160 	if (!listen_pid) {
161 		log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID");
162 		return -1;
163 	}
164 
165 	if (!listen_fds) {
166 		log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS");
167 		return -1;
168 	}
169 
170 	if((r = sd_listen_fds(0)) < 1) {
171 		if(r == 0)
172 			log_warn("systemd: did not return socket, check unit configuration");
173 		else
174 			log_err("systemd sd_listen_fds(): %s", strerror(-r));
175 		return -1;
176 	}
177 
178 	for(i = 0; i < r; i++) {
179 		if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) {
180 			s = SD_LISTEN_FDS_START + i;
181 			break;
182 		}
183 	}
184 	if (s == -1) {
185 		if (addr)
186 			log_err_addr("systemd sd_listen_fds()",
187 				     "no such socket",
188 				     (struct sockaddr_storage *)addr, addrlen);
189 		else
190 			log_err("systemd sd_listen_fds(): %s", path);
191 	}
192 	return s;
193 }
194 #endif
195 
196 int
create_udp_sock(int family,int socktype,struct sockaddr * addr,socklen_t addrlen,int v6only,int * inuse,int * noproto,int rcv,int snd,int listen,int * reuseport,int transparent,int freebind,int use_systemd,int dscp)197 create_udp_sock(int family, int socktype, struct sockaddr* addr,
198         socklen_t addrlen, int v6only, int* inuse, int* noproto,
199 	int rcv, int snd, int listen, int* reuseport, int transparent,
200 	int freebind, int use_systemd, int dscp)
201 {
202 	int s;
203 	char* err;
204 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)  || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY)
205 	int on=1;
206 #endif
207 #ifdef IPV6_MTU
208 	int mtu = IPV6_MIN_MTU;
209 #endif
210 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
211 	(void)rcv;
212 #endif
213 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
214 	(void)snd;
215 #endif
216 #ifndef IPV6_V6ONLY
217 	(void)v6only;
218 #endif
219 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
220 	(void)transparent;
221 #endif
222 #if !defined(IP_FREEBIND)
223 	(void)freebind;
224 #endif
225 #ifdef HAVE_SYSTEMD
226 	int got_fd_from_systemd = 0;
227 
228 	if (!use_systemd
229 	    || (use_systemd
230 		&& (s = systemd_get_activated(family, socktype, -1, addr,
231 					      addrlen, NULL)) == -1)) {
232 #else
233 	(void)use_systemd;
234 #endif
235 	if((s = socket(family, socktype, 0)) == -1) {
236 		*inuse = 0;
237 #ifndef USE_WINSOCK
238 		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
239 			*noproto = 1;
240 			return -1;
241 		}
242 #else
243 		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
244 			WSAGetLastError() == WSAEPROTONOSUPPORT) {
245 			*noproto = 1;
246 			return -1;
247 		}
248 #endif
249 		log_err("can't create socket: %s", sock_strerror(errno));
250 		*noproto = 0;
251 		return -1;
252 	}
253 #ifdef HAVE_SYSTEMD
254 	} else {
255 		got_fd_from_systemd = 1;
256 	}
257 #endif
258 	if(listen) {
259 #ifdef SO_REUSEADDR
260 		if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
261 			(socklen_t)sizeof(on)) < 0) {
262 			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
263 				sock_strerror(errno));
264 #ifndef USE_WINSOCK
265 			if(errno != ENOSYS) {
266 				close(s);
267 				*noproto = 0;
268 				*inuse = 0;
269 				return -1;
270 			}
271 #else
272 			closesocket(s);
273 			*noproto = 0;
274 			*inuse = 0;
275 			return -1;
276 #endif
277 		}
278 #endif /* SO_REUSEADDR */
279 #ifdef SO_REUSEPORT
280 #  ifdef SO_REUSEPORT_LB
281 		/* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance
282 		 * like SO_REUSEPORT on Linux.  This is what the users want
283 		 * with the config option in unbound.conf; if we actually
284 		 * need local address and port reuse they'll also need to
285 		 * have SO_REUSEPORT set for them, assume it was _LB they want.
286 		 */
287 		if (reuseport && *reuseport &&
288 		    setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on,
289 			(socklen_t)sizeof(on)) < 0) {
290 #ifdef ENOPROTOOPT
291 			if(errno != ENOPROTOOPT || verbosity >= 3)
292 				log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s",
293 					strerror(errno));
294 #endif
295 			/* this option is not essential, we can continue */
296 			*reuseport = 0;
297 		}
298 #  else /* no SO_REUSEPORT_LB */
299 
300 		/* try to set SO_REUSEPORT so that incoming
301 		 * queries are distributed evenly among the receiving threads.
302 		 * Each thread must have its own socket bound to the same port,
303 		 * with SO_REUSEPORT set on each socket.
304 		 */
305 		if (reuseport && *reuseport &&
306 		    setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
307 			(socklen_t)sizeof(on)) < 0) {
308 #ifdef ENOPROTOOPT
309 			if(errno != ENOPROTOOPT || verbosity >= 3)
310 				log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
311 					strerror(errno));
312 #endif
313 			/* this option is not essential, we can continue */
314 			*reuseport = 0;
315 		}
316 #  endif /* SO_REUSEPORT_LB */
317 #else
318 		(void)reuseport;
319 #endif /* defined(SO_REUSEPORT) */
320 #ifdef IP_TRANSPARENT
321 		if (transparent &&
322 		    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
323 		    (socklen_t)sizeof(on)) < 0) {
324 			log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
325 			strerror(errno));
326 		}
327 #elif defined(IP_BINDANY)
328 		if (transparent &&
329 		    setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
330 		    (family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
331 		    (void*)&on, (socklen_t)sizeof(on)) < 0) {
332 			log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
333 			(family==AF_INET6?"V6":""), strerror(errno));
334 		}
335 #elif defined(SO_BINDANY)
336 		if (transparent &&
337 		    setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on,
338 		    (socklen_t)sizeof(on)) < 0) {
339 			log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
340 			strerror(errno));
341 		}
342 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
343 	}
344 #ifdef IP_FREEBIND
345 	if(freebind &&
346 	    setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
347 	    (socklen_t)sizeof(on)) < 0) {
348 		log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
349 		strerror(errno));
350 	}
351 #endif /* IP_FREEBIND */
352 	if(rcv) {
353 #ifdef SO_RCVBUF
354 		int got;
355 		socklen_t slen = (socklen_t)sizeof(got);
356 #  ifdef SO_RCVBUFFORCE
357 		/* Linux specific: try to use root permission to override
358 		 * system limits on rcvbuf. The limit is stored in
359 		 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
360 		if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
361 			(socklen_t)sizeof(rcv)) < 0) {
362 			if(errno != EPERM) {
363 				log_err("setsockopt(..., SO_RCVBUFFORCE, "
364 					"...) failed: %s", sock_strerror(errno));
365 				sock_close(s);
366 				*noproto = 0;
367 				*inuse = 0;
368 				return -1;
369 			}
370 #  endif /* SO_RCVBUFFORCE */
371 			if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
372 				(socklen_t)sizeof(rcv)) < 0) {
373 				log_err("setsockopt(..., SO_RCVBUF, "
374 					"...) failed: %s", sock_strerror(errno));
375 				sock_close(s);
376 				*noproto = 0;
377 				*inuse = 0;
378 				return -1;
379 			}
380 			/* check if we got the right thing or if system
381 			 * reduced to some system max.  Warn if so */
382 			if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
383 				&slen) >= 0 && got < rcv/2) {
384 				log_warn("so-rcvbuf %u was not granted. "
385 					"Got %u. To fix: start with "
386 					"root permissions(linux) or sysctl "
387 					"bigger net.core.rmem_max(linux) or "
388 					"kern.ipc.maxsockbuf(bsd) values.",
389 					(unsigned)rcv, (unsigned)got);
390 			}
391 #  ifdef SO_RCVBUFFORCE
392 		}
393 #  endif
394 #endif /* SO_RCVBUF */
395 	}
396 	/* first do RCVBUF as the receive buffer is more important */
397 	if(snd) {
398 #ifdef SO_SNDBUF
399 		int got;
400 		socklen_t slen = (socklen_t)sizeof(got);
401 #  ifdef SO_SNDBUFFORCE
402 		/* Linux specific: try to use root permission to override
403 		 * system limits on sndbuf. The limit is stored in
404 		 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
405 		if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
406 			(socklen_t)sizeof(snd)) < 0) {
407 			if(errno != EPERM) {
408 				log_err("setsockopt(..., SO_SNDBUFFORCE, "
409 					"...) failed: %s", sock_strerror(errno));
410 				sock_close(s);
411 				*noproto = 0;
412 				*inuse = 0;
413 				return -1;
414 			}
415 #  endif /* SO_SNDBUFFORCE */
416 			if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
417 				(socklen_t)sizeof(snd)) < 0) {
418 				log_err("setsockopt(..., SO_SNDBUF, "
419 					"...) failed: %s", sock_strerror(errno));
420 				sock_close(s);
421 				*noproto = 0;
422 				*inuse = 0;
423 				return -1;
424 			}
425 			/* check if we got the right thing or if system
426 			 * reduced to some system max.  Warn if so */
427 			if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
428 				&slen) >= 0 && got < snd/2) {
429 				log_warn("so-sndbuf %u was not granted. "
430 					"Got %u. To fix: start with "
431 					"root permissions(linux) or sysctl "
432 					"bigger net.core.wmem_max(linux) or "
433 					"kern.ipc.maxsockbuf(bsd) values.",
434 					(unsigned)snd, (unsigned)got);
435 			}
436 #  ifdef SO_SNDBUFFORCE
437 		}
438 #  endif
439 #endif /* SO_SNDBUF */
440 	}
441 	err = set_ip_dscp(s, family, dscp);
442 	if(err != NULL)
443 		log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err);
444 	if(family == AF_INET6) {
445 # if defined(IPV6_V6ONLY)
446 		if(v6only) {
447 			int val=(v6only==2)?0:1;
448 			if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
449 				(void*)&val, (socklen_t)sizeof(val)) < 0) {
450 				log_err("setsockopt(..., IPV6_V6ONLY"
451 					", ...) failed: %s", sock_strerror(errno));
452 				sock_close(s);
453 				*noproto = 0;
454 				*inuse = 0;
455 				return -1;
456 			}
457 		}
458 # endif
459 # if defined(IPV6_USE_MIN_MTU)
460 		/*
461 		 * There is no fragmentation of IPv6 datagrams
462 		 * during forwarding in the network. Therefore
463 		 * we do not send UDP datagrams larger than
464 		 * the minimum IPv6 MTU of 1280 octets. The
465 		 * EDNS0 message length can be larger if the
466 		 * network stack supports IPV6_USE_MIN_MTU.
467 		 */
468 		if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
469 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
470 			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
471 				"...) failed: %s", sock_strerror(errno));
472 			sock_close(s);
473 			*noproto = 0;
474 			*inuse = 0;
475 			return -1;
476 		}
477 # elif defined(IPV6_MTU)
478 		/*
479 		 * On Linux, to send no larger than 1280, the PMTUD is
480 		 * disabled by default for datagrams anyway, so we set
481 		 * the MTU to use.
482 		 */
483 		if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
484 			(void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
485 			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
486 				sock_strerror(errno));
487 			sock_close(s);
488 			*noproto = 0;
489 			*inuse = 0;
490 			return -1;
491 		}
492 # endif /* IPv6 MTU */
493 	} else if(family == AF_INET) {
494 #  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
495 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
496  * PMTU information is not accepted, but fragmentation is allowed
497  * if and only if the packet size exceeds the outgoing interface MTU
498  * (and also uses the interface mtu to determine the size of the packets).
499  * So there won't be any EMSGSIZE error.  Against DNS fragmentation attacks.
500  * FreeBSD already has same semantics without setting the option. */
501 		int omit_set = 0;
502 		int action;
503 #   if defined(IP_PMTUDISC_OMIT)
504 		action = IP_PMTUDISC_OMIT;
505 		if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
506 			&action, (socklen_t)sizeof(action)) < 0) {
507 
508 			if (errno != EINVAL) {
509 				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
510 					strerror(errno));
511 				sock_close(s);
512 				*noproto = 0;
513 				*inuse = 0;
514 				return -1;
515 			}
516 		}
517 		else
518 		{
519 		    omit_set = 1;
520 		}
521 #   endif
522 		if (omit_set == 0) {
523    			action = IP_PMTUDISC_DONT;
524 			if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
525 				&action, (socklen_t)sizeof(action)) < 0) {
526 				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
527 					strerror(errno));
528 				sock_close(s);
529 				*noproto = 0;
530 				*inuse = 0;
531 				return -1;
532 			}
533 		}
534 #  elif defined(IP_DONTFRAG) && !defined(__APPLE__)
535 		/* the IP_DONTFRAG option if defined in the 11.0 OSX headers,
536 		 * but does not work on that version, so we exclude it */
537 		int off = 0;
538 		if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
539 			&off, (socklen_t)sizeof(off)) < 0) {
540 			log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
541 				strerror(errno));
542 			sock_close(s);
543 			*noproto = 0;
544 			*inuse = 0;
545 			return -1;
546 		}
547 #  endif /* IPv4 MTU */
548 	}
549 	if(
550 #ifdef HAVE_SYSTEMD
551 		!got_fd_from_systemd &&
552 #endif
553 		bind(s, (struct sockaddr*)addr, addrlen) != 0) {
554 		*noproto = 0;
555 		*inuse = 0;
556 #ifndef USE_WINSOCK
557 #ifdef EADDRINUSE
558 		*inuse = (errno == EADDRINUSE);
559 		/* detect freebsd jail with no ipv6 permission */
560 		if(family==AF_INET6 && errno==EINVAL)
561 			*noproto = 1;
562 		else if(errno != EADDRINUSE &&
563 			!(errno == EACCES && verbosity < 4 && !listen)
564 #ifdef EADDRNOTAVAIL
565 			&& !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen)
566 #endif
567 			) {
568 			log_err_addr("can't bind socket", strerror(errno),
569 				(struct sockaddr_storage*)addr, addrlen);
570 		}
571 #endif /* EADDRINUSE */
572 #else /* USE_WINSOCK */
573 		if(WSAGetLastError() != WSAEADDRINUSE &&
574 			WSAGetLastError() != WSAEADDRNOTAVAIL &&
575 			!(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) {
576 			log_err_addr("can't bind socket",
577 				wsa_strerror(WSAGetLastError()),
578 				(struct sockaddr_storage*)addr, addrlen);
579 		}
580 #endif /* USE_WINSOCK */
581 		sock_close(s);
582 		return -1;
583 	}
584 	if(!fd_set_nonblock(s)) {
585 		*noproto = 0;
586 		*inuse = 0;
587 		sock_close(s);
588 		return -1;
589 	}
590 	return s;
591 }
592 
593 int
create_tcp_accept_sock(struct addrinfo * addr,int v6only,int * noproto,int * reuseport,int transparent,int mss,int nodelay,int freebind,int use_systemd,int dscp)594 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
595 	int* reuseport, int transparent, int mss, int nodelay, int freebind,
596 	int use_systemd, int dscp)
597 {
598 	int s;
599 	char* err;
600 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined(SO_BINDANY)
601 	int on = 1;
602 #endif
603 #ifdef HAVE_SYSTEMD
604 	int got_fd_from_systemd = 0;
605 #endif
606 #ifdef USE_TCP_FASTOPEN
607 	int qlen;
608 #endif
609 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
610 	(void)transparent;
611 #endif
612 #if !defined(IP_FREEBIND)
613 	(void)freebind;
614 #endif
615 	verbose_print_addr(addr);
616 	*noproto = 0;
617 #ifdef HAVE_SYSTEMD
618 	if (!use_systemd ||
619 	    (use_systemd
620 	     && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1,
621 					   addr->ai_addr, addr->ai_addrlen,
622 					   NULL)) == -1)) {
623 #else
624 	(void)use_systemd;
625 #endif
626 	if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
627 #ifndef USE_WINSOCK
628 		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
629 			*noproto = 1;
630 			return -1;
631 		}
632 #else
633 		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
634 			WSAGetLastError() == WSAEPROTONOSUPPORT) {
635 			*noproto = 1;
636 			return -1;
637 		}
638 #endif
639 		log_err("can't create socket: %s", sock_strerror(errno));
640 		return -1;
641 	}
642 	if(nodelay) {
643 #if defined(IPPROTO_TCP) && defined(TCP_NODELAY)
644 		if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on,
645 			(socklen_t)sizeof(on)) < 0) {
646 			#ifndef USE_WINSOCK
647 			log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
648 				strerror(errno));
649 			#else
650 			log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
651 				wsa_strerror(WSAGetLastError()));
652 			#endif
653 		}
654 #else
655 		log_warn(" setsockopt(TCP_NODELAY) unsupported");
656 #endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */
657 	}
658 	if (mss > 0) {
659 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
660 		if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss,
661 			(socklen_t)sizeof(mss)) < 0) {
662 			log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
663 				sock_strerror(errno));
664 		} else {
665 			verbose(VERB_ALGO,
666 				" tcp socket mss set to %d", mss);
667 		}
668 #else
669 		log_warn(" setsockopt(TCP_MAXSEG) unsupported");
670 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
671 	}
672 #ifdef HAVE_SYSTEMD
673 	} else {
674 		got_fd_from_systemd = 1;
675     }
676 #endif
677 #ifdef SO_REUSEADDR
678 	if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
679 		(socklen_t)sizeof(on)) < 0) {
680 		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
681 			sock_strerror(errno));
682 		sock_close(s);
683 		return -1;
684 	}
685 #endif /* SO_REUSEADDR */
686 #ifdef IP_FREEBIND
687 	if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
688 	    (socklen_t)sizeof(on)) < 0) {
689 		log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
690 		strerror(errno));
691 	}
692 #endif /* IP_FREEBIND */
693 #ifdef SO_REUSEPORT
694 	/* try to set SO_REUSEPORT so that incoming
695 	 * connections are distributed evenly among the receiving threads.
696 	 * Each thread must have its own socket bound to the same port,
697 	 * with SO_REUSEPORT set on each socket.
698 	 */
699 	if (reuseport && *reuseport &&
700 		setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
701 		(socklen_t)sizeof(on)) < 0) {
702 #ifdef ENOPROTOOPT
703 		if(errno != ENOPROTOOPT || verbosity >= 3)
704 			log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
705 				strerror(errno));
706 #endif
707 		/* this option is not essential, we can continue */
708 		*reuseport = 0;
709 	}
710 #else
711 	(void)reuseport;
712 #endif /* defined(SO_REUSEPORT) */
713 #if defined(IPV6_V6ONLY)
714 	if(addr->ai_family == AF_INET6 && v6only) {
715 		if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
716 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
717 			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
718 				sock_strerror(errno));
719 			sock_close(s);
720 			return -1;
721 		}
722 	}
723 #else
724 	(void)v6only;
725 #endif /* IPV6_V6ONLY */
726 #ifdef IP_TRANSPARENT
727 	if (transparent &&
728 	    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
729 	    (socklen_t)sizeof(on)) < 0) {
730 		log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
731 			strerror(errno));
732 	}
733 #elif defined(IP_BINDANY)
734 	if (transparent &&
735 	    setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
736 	    (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
737 	    (void*)&on, (socklen_t)sizeof(on)) < 0) {
738 		log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
739 		(addr->ai_family==AF_INET6?"V6":""), strerror(errno));
740 	}
741 #elif defined(SO_BINDANY)
742 	if (transparent &&
743 	    setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t)
744 	    sizeof(on)) < 0) {
745 		log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
746 		strerror(errno));
747 	}
748 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
749 	err = set_ip_dscp(s, addr->ai_family, dscp);
750 	if(err != NULL)
751 		log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err);
752 	if(
753 #ifdef HAVE_SYSTEMD
754 		!got_fd_from_systemd &&
755 #endif
756         bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
757 #ifndef USE_WINSOCK
758 		/* detect freebsd jail with no ipv6 permission */
759 		if(addr->ai_family==AF_INET6 && errno==EINVAL)
760 			*noproto = 1;
761 		else {
762 			log_err_addr("can't bind socket", strerror(errno),
763 				(struct sockaddr_storage*)addr->ai_addr,
764 				addr->ai_addrlen);
765 		}
766 #else
767 		log_err_addr("can't bind socket",
768 			wsa_strerror(WSAGetLastError()),
769 			(struct sockaddr_storage*)addr->ai_addr,
770 			addr->ai_addrlen);
771 #endif
772 		sock_close(s);
773 		return -1;
774 	}
775 	if(!fd_set_nonblock(s)) {
776 		sock_close(s);
777 		return -1;
778 	}
779 	if(listen(s, TCP_BACKLOG) == -1) {
780 		log_err("can't listen: %s", sock_strerror(errno));
781 		sock_close(s);
782 		return -1;
783 	}
784 #ifdef USE_TCP_FASTOPEN
785 	/* qlen specifies how many outstanding TFO requests to allow. Limit is a defense
786 	   against IP spoofing attacks as suggested in RFC7413 */
787 #ifdef __APPLE__
788 	/* OS X implementation only supports qlen of 1 via this call. Actual
789 	   value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */
790 	qlen = 1;
791 #else
792 	/* 5 is recommended on linux */
793 	qlen = 5;
794 #endif
795 	if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen,
796 		  sizeof(qlen))) == -1 ) {
797 #ifdef ENOPROTOOPT
798 		/* squelch ENOPROTOOPT: freebsd server mode with kernel support
799 		   disabled, except when verbosity enabled for debugging */
800 		if(errno != ENOPROTOOPT || verbosity >= 3) {
801 #endif
802 		  if(errno == EPERM) {
803 		  	log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno));
804 		  } else {
805 		  	log_err("Setting TCP Fast Open as server failed: %s", strerror(errno));
806 		  }
807 #ifdef ENOPROTOOPT
808 		}
809 #endif
810 	}
811 #endif
812 	return s;
813 }
814 
815 char*
set_ip_dscp(int socket,int addrfamily,int dscp)816 set_ip_dscp(int socket, int addrfamily, int dscp)
817 {
818 	int ds;
819 
820 	if(dscp == 0)
821 		return NULL;
822 	ds = dscp << 2;
823 	switch(addrfamily) {
824 	case AF_INET6:
825 		if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds, sizeof(ds)) < 0)
826 			return sock_strerror(errno);
827 		break;
828 	default:
829 		if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0)
830 			return sock_strerror(errno);
831 		break;
832 	}
833 	return NULL;
834 }
835 
836 int
create_local_accept_sock(const char * path,int * noproto,int use_systemd)837 create_local_accept_sock(const char *path, int* noproto, int use_systemd)
838 {
839 #ifdef HAVE_SYSTEMD
840 	int ret;
841 
842 	if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1)
843 		return ret;
844 	else {
845 #endif
846 #ifdef HAVE_SYS_UN_H
847 	int s;
848 	struct sockaddr_un usock;
849 #ifndef HAVE_SYSTEMD
850 	(void)use_systemd;
851 #endif
852 
853 	verbose(VERB_ALGO, "creating unix socket %s", path);
854 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
855 	/* this member exists on BSDs, not Linux */
856 	usock.sun_len = (unsigned)sizeof(usock);
857 #endif
858 	usock.sun_family = AF_LOCAL;
859 	/* length is 92-108, 104 on FreeBSD */
860 	(void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
861 
862 	if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
863 		log_err("Cannot create local socket %s (%s)",
864 			path, strerror(errno));
865 		return -1;
866 	}
867 
868 	if (unlink(path) && errno != ENOENT) {
869 		/* The socket already exists and cannot be removed */
870 		log_err("Cannot remove old local socket %s (%s)",
871 			path, strerror(errno));
872 		goto err;
873 	}
874 
875 	if (bind(s, (struct sockaddr *)&usock,
876 		(socklen_t)sizeof(struct sockaddr_un)) == -1) {
877 		log_err("Cannot bind local socket %s (%s)",
878 			path, strerror(errno));
879 		goto err;
880 	}
881 
882 	if (!fd_set_nonblock(s)) {
883 		log_err("Cannot set non-blocking mode");
884 		goto err;
885 	}
886 
887 	if (listen(s, TCP_BACKLOG) == -1) {
888 		log_err("can't listen: %s", strerror(errno));
889 		goto err;
890 	}
891 
892 	(void)noproto; /*unused*/
893 	return s;
894 
895 err:
896 	sock_close(s);
897 	return -1;
898 
899 #ifdef HAVE_SYSTEMD
900 	}
901 #endif
902 #else
903 	(void)use_systemd;
904 	(void)path;
905 	log_err("Local sockets are not supported");
906 	*noproto = 1;
907 	return -1;
908 #endif
909 }
910 
911 
912 /**
913  * Create socket from getaddrinfo results
914  */
915 static int
make_sock(int stype,const char * ifname,const char * port,struct addrinfo * hints,int v6only,int * noip6,size_t rcv,size_t snd,int * reuseport,int transparent,int tcp_mss,int nodelay,int freebind,int use_systemd,int dscp)916 make_sock(int stype, const char* ifname, const char* port,
917 	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
918 	int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
919 	int use_systemd, int dscp)
920 {
921 	struct addrinfo *res = NULL;
922 	int r, s, inuse, noproto;
923 	hints->ai_socktype = stype;
924 	*noip6 = 0;
925 	if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
926 #ifdef USE_WINSOCK
927 		if(r == EAI_NONAME && hints->ai_family == AF_INET6){
928 			*noip6 = 1; /* 'Host not found' for IP6 on winXP */
929 			return -1;
930 		}
931 #endif
932 		log_err("node %s:%s getaddrinfo: %s %s",
933 			ifname?ifname:"default", port, gai_strerror(r),
934 #ifdef EAI_SYSTEM
935 			r==EAI_SYSTEM?(char*)strerror(errno):""
936 #else
937 			""
938 #endif
939 		);
940 		return -1;
941 	}
942 	if(stype == SOCK_DGRAM) {
943 		verbose_print_addr(res);
944 		s = create_udp_sock(res->ai_family, res->ai_socktype,
945 			(struct sockaddr*)res->ai_addr, res->ai_addrlen,
946 			v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
947 			reuseport, transparent, freebind, use_systemd, dscp);
948 		if(s == -1 && inuse) {
949 			log_err("bind: address already in use");
950 		} else if(s == -1 && noproto && hints->ai_family == AF_INET6){
951 			*noip6 = 1;
952 		}
953 	} else	{
954 		s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
955 			transparent, tcp_mss, nodelay, freebind, use_systemd,
956 			dscp);
957 		if(s == -1 && noproto && hints->ai_family == AF_INET6){
958 			*noip6 = 1;
959 		}
960 	}
961 	freeaddrinfo(res);
962 	return s;
963 }
964 
965 /** make socket and first see if ifname contains port override info */
966 static int
make_sock_port(int stype,const char * ifname,const char * port,struct addrinfo * hints,int v6only,int * noip6,size_t rcv,size_t snd,int * reuseport,int transparent,int tcp_mss,int nodelay,int freebind,int use_systemd,int dscp)967 make_sock_port(int stype, const char* ifname, const char* port,
968 	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
969 	int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
970 	int use_systemd, int dscp)
971 {
972 	char* s = strchr(ifname, '@');
973 	if(s) {
974 		/* override port with ifspec@port */
975 		char p[16];
976 		char newif[128];
977 		if((size_t)(s-ifname) >= sizeof(newif)) {
978 			log_err("ifname too long: %s", ifname);
979 			*noip6 = 0;
980 			return -1;
981 		}
982 		if(strlen(s+1) >= sizeof(p)) {
983 			log_err("portnumber too long: %s", ifname);
984 			*noip6 = 0;
985 			return -1;
986 		}
987 		(void)strlcpy(newif, ifname, sizeof(newif));
988 		newif[s-ifname] = 0;
989 		(void)strlcpy(p, s+1, sizeof(p));
990 		p[strlen(s+1)]=0;
991 		return make_sock(stype, newif, p, hints, v6only, noip6, rcv,
992 			snd, reuseport, transparent, tcp_mss, nodelay, freebind,
993 			use_systemd, dscp);
994 	}
995 	return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
996 		reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd,
997 		dscp);
998 }
999 
1000 /**
1001  * Add port to open ports list.
1002  * @param list: list head. changed.
1003  * @param s: fd.
1004  * @param ftype: if fd is UDP.
1005  * @return false on failure. list in unchanged then.
1006  */
1007 static int
port_insert(struct listen_port ** list,int s,enum listen_type ftype)1008 port_insert(struct listen_port** list, int s, enum listen_type ftype)
1009 {
1010 	struct listen_port* item = (struct listen_port*)malloc(
1011 		sizeof(struct listen_port));
1012 	if(!item)
1013 		return 0;
1014 	item->next = *list;
1015 	item->fd = s;
1016 	item->ftype = ftype;
1017 	*list = item;
1018 	return 1;
1019 }
1020 
1021 /** set fd to receive source address packet info */
1022 static int
set_recvpktinfo(int s,int family)1023 set_recvpktinfo(int s, int family)
1024 {
1025 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
1026 	int on = 1;
1027 #else
1028 	(void)s;
1029 #endif
1030 	if(family == AF_INET6) {
1031 #           ifdef IPV6_RECVPKTINFO
1032 		if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1033 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1034 			log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
1035 				strerror(errno));
1036 			return 0;
1037 		}
1038 #           elif defined(IPV6_PKTINFO)
1039 		if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
1040 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1041 			log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
1042 				strerror(errno));
1043 			return 0;
1044 		}
1045 #           else
1046 		log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please "
1047 			"disable interface-automatic or do-ip6 in config");
1048 		return 0;
1049 #           endif /* defined IPV6_RECVPKTINFO */
1050 
1051 	} else if(family == AF_INET) {
1052 #           ifdef IP_PKTINFO
1053 		if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
1054 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1055 			log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
1056 				strerror(errno));
1057 			return 0;
1058 		}
1059 #           elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
1060 		if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
1061 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1062 			log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
1063 				strerror(errno));
1064 			return 0;
1065 		}
1066 #           else
1067 		log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
1068 			"interface-automatic or do-ip4 in config");
1069 		return 0;
1070 #           endif /* IP_PKTINFO */
1071 
1072 	}
1073 	return 1;
1074 }
1075 
1076 /** see if interface is ssl, its port number == the ssl port number */
1077 static int
if_is_ssl(const char * ifname,const char * port,int ssl_port,struct config_strlist * tls_additional_port)1078 if_is_ssl(const char* ifname, const char* port, int ssl_port,
1079 	struct config_strlist* tls_additional_port)
1080 {
1081 	struct config_strlist* s;
1082 	char* p = strchr(ifname, '@');
1083 	if(!p && atoi(port) == ssl_port)
1084 		return 1;
1085 	if(p && atoi(p+1) == ssl_port)
1086 		return 1;
1087 	for(s = tls_additional_port; s; s = s->next) {
1088 		if(p && atoi(p+1) == atoi(s->str))
1089 			return 1;
1090 		if(!p && atoi(port) == atoi(s->str))
1091 			return 1;
1092 	}
1093 	return 0;
1094 }
1095 
1096 /** see if interface is https, its port number == the https port number */
1097 static int
if_is_https(const char * ifname,const char * port,int https_port)1098 if_is_https(const char* ifname, const char* port, int https_port)
1099 {
1100 	char* p = strchr(ifname, '@');
1101 	if(!p && atoi(port) == https_port)
1102 		return 1;
1103 	if(p && atoi(p+1) == https_port)
1104 		return 1;
1105 	return 0;
1106 }
1107 
1108 /**
1109  * Helper for ports_open. Creates one interface (or NULL for default).
1110  * @param ifname: The interface ip address.
1111  * @param do_auto: use automatic interface detection.
1112  * 	If enabled, then ifname must be the wildcard name.
1113  * @param do_udp: if udp should be used.
1114  * @param do_tcp: if udp should be used.
1115  * @param hints: for getaddrinfo. family and flags have to be set by caller.
1116  * @param port: Port number to use (as string).
1117  * @param list: list of open ports, appended to, changed to point to list head.
1118  * @param rcv: receive buffer size for UDP
1119  * @param snd: send buffer size for UDP
1120  * @param ssl_port: ssl service port number
1121  * @param tls_additional_port: list of additional ssl service port numbers.
1122  * @param https_port: DoH service port number
1123  * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
1124  * 	set to false on exit if reuseport failed due to no kernel support.
1125  * @param transparent: set IP_TRANSPARENT socket option.
1126  * @param tcp_mss: maximum segment size of tcp socket. default if zero.
1127  * @param freebind: set IP_FREEBIND socket option.
1128  * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection
1129  * @param use_systemd: if true, fetch sockets from systemd.
1130  * @param dnscrypt_port: dnscrypt service port number
1131  * @param dscp: DSCP to use.
1132  * @return: returns false on error.
1133  */
1134 static int
ports_create_if(const char * ifname,int do_auto,int do_udp,int do_tcp,struct addrinfo * hints,const char * port,struct listen_port ** list,size_t rcv,size_t snd,int ssl_port,struct config_strlist * tls_additional_port,int https_port,int * reuseport,int transparent,int tcp_mss,int freebind,int http2_nodelay,int use_systemd,int dnscrypt_port,int dscp)1135 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
1136 	struct addrinfo *hints, const char* port, struct listen_port** list,
1137 	size_t rcv, size_t snd, int ssl_port,
1138 	struct config_strlist* tls_additional_port, int https_port,
1139 	int* reuseport, int transparent, int tcp_mss, int freebind,
1140 	int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp)
1141 {
1142 	int s, noip6=0;
1143 	int is_https = if_is_https(ifname, port, https_port);
1144 	int nodelay = is_https && http2_nodelay;
1145 #ifdef USE_DNSCRYPT
1146 	int is_dnscrypt = ((strchr(ifname, '@') &&
1147 			atoi(strchr(ifname, '@')+1) == dnscrypt_port) ||
1148 			(!strchr(ifname, '@') && atoi(port) == dnscrypt_port));
1149 #else
1150 	int is_dnscrypt = 0;
1151 	(void)dnscrypt_port;
1152 #endif
1153 
1154 	if(!do_udp && !do_tcp)
1155 		return 0;
1156 	if(do_auto) {
1157 		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1158 			&noip6, rcv, snd, reuseport, transparent,
1159 			tcp_mss, nodelay, freebind, use_systemd, dscp)) == -1) {
1160 			if(noip6) {
1161 				log_warn("IPv6 protocol not available");
1162 				return 1;
1163 			}
1164 			return 0;
1165 		}
1166 		/* getting source addr packet info is highly non-portable */
1167 		if(!set_recvpktinfo(s, hints->ai_family)) {
1168 			sock_close(s);
1169 			return 0;
1170 		}
1171 		if(!port_insert(list, s,
1172 		   is_dnscrypt?listen_type_udpancil_dnscrypt:listen_type_udpancil)) {
1173 			sock_close(s);
1174 			return 0;
1175 		}
1176 	} else if(do_udp) {
1177 		/* regular udp socket */
1178 		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1179 			&noip6, rcv, snd, reuseport, transparent,
1180 			tcp_mss, nodelay, freebind, use_systemd, dscp)) == -1) {
1181 			if(noip6) {
1182 				log_warn("IPv6 protocol not available");
1183 				return 1;
1184 			}
1185 			return 0;
1186 		}
1187 		if(!port_insert(list, s,
1188 		   is_dnscrypt?listen_type_udp_dnscrypt:listen_type_udp)) {
1189 			sock_close(s);
1190 			return 0;
1191 		}
1192 	}
1193 	if(do_tcp) {
1194 		int is_ssl = if_is_ssl(ifname, port, ssl_port,
1195 			tls_additional_port);
1196 		enum listen_type port_type;
1197 		if(is_ssl)
1198 			port_type = listen_type_ssl;
1199 		else if(is_https)
1200 			port_type = listen_type_http;
1201 		else if(is_dnscrypt)
1202 			port_type = listen_type_tcp_dnscrypt;
1203 		else
1204 			port_type = listen_type_tcp;
1205 		if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
1206 			&noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay,
1207 			freebind, use_systemd, dscp)) == -1) {
1208 			if(noip6) {
1209 				/*log_warn("IPv6 protocol not available");*/
1210 				return 1;
1211 			}
1212 			return 0;
1213 		}
1214 		if(is_ssl)
1215 			verbose(VERB_ALGO, "setup TCP for SSL service");
1216 		if(!port_insert(list, s, port_type)) {
1217 			sock_close(s);
1218 			return 0;
1219 		}
1220 	}
1221 	return 1;
1222 }
1223 
1224 /**
1225  * Add items to commpoint list in front.
1226  * @param c: commpoint to add.
1227  * @param front: listen struct.
1228  * @return: false on failure.
1229  */
1230 static int
listen_cp_insert(struct comm_point * c,struct listen_dnsport * front)1231 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
1232 {
1233 	struct listen_list* item = (struct listen_list*)malloc(
1234 		sizeof(struct listen_list));
1235 	if(!item)
1236 		return 0;
1237 	item->com = c;
1238 	item->next = front->cps;
1239 	front->cps = item;
1240 	return 1;
1241 }
1242 
1243 struct listen_dnsport*
listen_create(struct comm_base * base,struct listen_port * ports,size_t bufsize,int tcp_accept_count,int tcp_idle_timeout,int harden_large_queries,uint32_t http_max_streams,char * http_endpoint,int http_notls,struct tcl_list * tcp_conn_limit,void * sslctx,struct dt_env * dtenv,comm_point_callback_type * cb,void * cb_arg)1244 listen_create(struct comm_base* base, struct listen_port* ports,
1245 	size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
1246 	int harden_large_queries, uint32_t http_max_streams,
1247 	char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit,
1248 	void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb,
1249 	void *cb_arg)
1250 {
1251 	struct listen_dnsport* front = (struct listen_dnsport*)
1252 		malloc(sizeof(struct listen_dnsport));
1253 	if(!front)
1254 		return NULL;
1255 	front->cps = NULL;
1256 	front->udp_buff = sldns_buffer_new(bufsize);
1257 #ifdef USE_DNSCRYPT
1258 	front->dnscrypt_udp_buff = NULL;
1259 #endif
1260 	if(!front->udp_buff) {
1261 		free(front);
1262 		return NULL;
1263 	}
1264 	if(!stream_wait_lock_inited) {
1265 		lock_basic_init(&stream_wait_count_lock);
1266 		stream_wait_lock_inited = 1;
1267 	}
1268 	if(!http2_query_buffer_lock_inited) {
1269 		lock_basic_init(&http2_query_buffer_count_lock);
1270 		http2_query_buffer_lock_inited = 1;
1271 	}
1272 	if(!http2_response_buffer_lock_inited) {
1273 		lock_basic_init(&http2_response_buffer_count_lock);
1274 		http2_response_buffer_lock_inited = 1;
1275 	}
1276 
1277 	/* create comm points as needed */
1278 	while(ports) {
1279 		struct comm_point* cp = NULL;
1280 		if(ports->ftype == listen_type_udp ||
1281 		   ports->ftype == listen_type_udp_dnscrypt)
1282 			cp = comm_point_create_udp(base, ports->fd,
1283 				front->udp_buff, cb, cb_arg);
1284 		else if(ports->ftype == listen_type_tcp ||
1285 				ports->ftype == listen_type_tcp_dnscrypt)
1286 			cp = comm_point_create_tcp(base, ports->fd,
1287 				tcp_accept_count, tcp_idle_timeout,
1288 				harden_large_queries, 0, NULL,
1289 				tcp_conn_limit, bufsize, front->udp_buff,
1290 				ports->ftype, cb, cb_arg);
1291 		else if(ports->ftype == listen_type_ssl ||
1292 			ports->ftype == listen_type_http) {
1293 			cp = comm_point_create_tcp(base, ports->fd,
1294 				tcp_accept_count, tcp_idle_timeout,
1295 				harden_large_queries,
1296 				http_max_streams, http_endpoint,
1297 				tcp_conn_limit, bufsize, front->udp_buff,
1298 				ports->ftype, cb, cb_arg);
1299 			if(http_notls && ports->ftype == listen_type_http)
1300 				cp->ssl = NULL;
1301 			else
1302 				cp->ssl = sslctx;
1303 			if(ports->ftype == listen_type_http) {
1304 				if(!sslctx && !http_notls) {
1305 				  log_warn("HTTPS port configured, but no TLS "
1306 					"tls-service-key or tls-service-pem "
1307 					"set");
1308 				}
1309 #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB
1310 				if(!http_notls)
1311 				  log_warn("Unbound is not compiled with an "
1312 					"OpenSSL version supporting ALPN "
1313 					" (OpenSSL >= 1.0.2). This is required "
1314 					"to use DNS-over-HTTPS");
1315 #endif
1316 #ifndef HAVE_NGHTTP2_NGHTTP2_H
1317 				log_warn("Unbound is not compiled with "
1318 					"nghttp2. This is required to use "
1319 					"DNS-over-HTTPS.");
1320 #endif
1321 			}
1322 		} else if(ports->ftype == listen_type_udpancil ||
1323 				  ports->ftype == listen_type_udpancil_dnscrypt)
1324 			cp = comm_point_create_udp_ancil(base, ports->fd,
1325 				front->udp_buff, cb, cb_arg);
1326 		if(!cp) {
1327 			log_err("can't create commpoint");
1328 			listen_delete(front);
1329 			return NULL;
1330 		}
1331 		cp->dtenv = dtenv;
1332 		cp->do_not_close = 1;
1333 #ifdef USE_DNSCRYPT
1334 		if (ports->ftype == listen_type_udp_dnscrypt ||
1335 			ports->ftype == listen_type_tcp_dnscrypt ||
1336 			ports->ftype == listen_type_udpancil_dnscrypt) {
1337 			cp->dnscrypt = 1;
1338 			cp->dnscrypt_buffer = sldns_buffer_new(bufsize);
1339 			if(!cp->dnscrypt_buffer) {
1340 				log_err("can't alloc dnscrypt_buffer");
1341 				comm_point_delete(cp);
1342 				listen_delete(front);
1343 				return NULL;
1344 			}
1345 			front->dnscrypt_udp_buff = cp->dnscrypt_buffer;
1346 		}
1347 #endif
1348 		if(!listen_cp_insert(cp, front)) {
1349 			log_err("malloc failed");
1350 			comm_point_delete(cp);
1351 			listen_delete(front);
1352 			return NULL;
1353 		}
1354 		ports = ports->next;
1355 	}
1356 	if(!front->cps) {
1357 		log_err("Could not open sockets to accept queries.");
1358 		listen_delete(front);
1359 		return NULL;
1360 	}
1361 
1362 	return front;
1363 }
1364 
1365 void
listen_list_delete(struct listen_list * list)1366 listen_list_delete(struct listen_list* list)
1367 {
1368 	struct listen_list *p = list, *pn;
1369 	while(p) {
1370 		pn = p->next;
1371 		comm_point_delete(p->com);
1372 		free(p);
1373 		p = pn;
1374 	}
1375 }
1376 
1377 void
listen_delete(struct listen_dnsport * front)1378 listen_delete(struct listen_dnsport* front)
1379 {
1380 	if(!front)
1381 		return;
1382 	listen_list_delete(front->cps);
1383 #ifdef USE_DNSCRYPT
1384 	if(front->dnscrypt_udp_buff &&
1385 		front->udp_buff != front->dnscrypt_udp_buff) {
1386 		sldns_buffer_free(front->dnscrypt_udp_buff);
1387 	}
1388 #endif
1389 	sldns_buffer_free(front->udp_buff);
1390 	free(front);
1391 	if(stream_wait_lock_inited) {
1392 		stream_wait_lock_inited = 0;
1393 		lock_basic_destroy(&stream_wait_count_lock);
1394 	}
1395 	if(http2_query_buffer_lock_inited) {
1396 		http2_query_buffer_lock_inited = 0;
1397 		lock_basic_destroy(&http2_query_buffer_count_lock);
1398 	}
1399 	if(http2_response_buffer_lock_inited) {
1400 		http2_response_buffer_lock_inited = 0;
1401 		lock_basic_destroy(&http2_response_buffer_count_lock);
1402 	}
1403 }
1404 
1405 #ifdef HAVE_GETIFADDRS
1406 static int
resolve_ifa_name(struct ifaddrs * ifas,const char * search_ifa,char *** ip_addresses,int * ip_addresses_size)1407 resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size)
1408 {
1409 	struct ifaddrs *ifa;
1410 	void *tmpbuf;
1411 	int last_ip_addresses_size = *ip_addresses_size;
1412 
1413 	for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) {
1414 		sa_family_t family;
1415 		const char* atsign;
1416 #ifdef INET6      /* |   address ip    | % |  ifa name  | @ |  port  | nul */
1417 		char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1];
1418 #else
1419 		char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1];
1420 #endif
1421 
1422 		if((atsign=strrchr(search_ifa, '@')) != NULL) {
1423 			if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa)
1424 			   || strncmp(ifa->ifa_name, search_ifa,
1425 			   atsign-search_ifa) != 0)
1426 				continue;
1427 		} else {
1428 			if(strcmp(ifa->ifa_name, search_ifa) != 0)
1429 				continue;
1430 			atsign = "";
1431 		}
1432 
1433 		if(ifa->ifa_addr == NULL)
1434 			continue;
1435 
1436 		family = ifa->ifa_addr->sa_family;
1437 		if(family == AF_INET) {
1438 			char a4[INET_ADDRSTRLEN + 1];
1439 			struct sockaddr_in *in4 = (struct sockaddr_in *)
1440 				ifa->ifa_addr;
1441 			if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) {
1442 				log_err("inet_ntop failed");
1443 				return 0;
1444 			}
1445 			snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1446 				a4, atsign);
1447 		}
1448 #ifdef INET6
1449 		else if(family == AF_INET6) {
1450 			struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)
1451 				ifa->ifa_addr;
1452 			char a6[INET6_ADDRSTRLEN + 1];
1453 			char if_index_name[IF_NAMESIZE + 1];
1454 			if_index_name[0] = 0;
1455 			if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) {
1456 				log_err("inet_ntop failed");
1457 				return 0;
1458 			}
1459 			(void)if_indextoname(in6->sin6_scope_id,
1460 				(char *)if_index_name);
1461 			if (strlen(if_index_name) != 0) {
1462 				snprintf(addr_buf, sizeof(addr_buf),
1463 					"%s%%%s%s", a6, if_index_name, atsign);
1464 			} else {
1465 				snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1466 					a6, atsign);
1467 			}
1468 		}
1469 #endif
1470 		else {
1471 			continue;
1472 		}
1473 		verbose(4, "interface %s has address %s", search_ifa, addr_buf);
1474 
1475 		tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1476 		if(!tmpbuf) {
1477 			log_err("realloc failed: out of memory");
1478 			return 0;
1479 		} else {
1480 			*ip_addresses = tmpbuf;
1481 		}
1482 		(*ip_addresses)[*ip_addresses_size] = strdup(addr_buf);
1483 		if(!(*ip_addresses)[*ip_addresses_size]) {
1484 			log_err("strdup failed: out of memory");
1485 			return 0;
1486 		}
1487 		(*ip_addresses_size)++;
1488 	}
1489 
1490 	if (*ip_addresses_size == last_ip_addresses_size) {
1491 		tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1492 		if(!tmpbuf) {
1493 			log_err("realloc failed: out of memory");
1494 			return 0;
1495 		} else {
1496 			*ip_addresses = tmpbuf;
1497 		}
1498 		(*ip_addresses)[*ip_addresses_size] = strdup(search_ifa);
1499 		if(!(*ip_addresses)[*ip_addresses_size]) {
1500 			log_err("strdup failed: out of memory");
1501 			return 0;
1502 		}
1503 		(*ip_addresses_size)++;
1504 	}
1505 	return 1;
1506 }
1507 #endif /* HAVE_GETIFADDRS */
1508 
resolve_interface_names(struct config_file * cfg,char *** resif,int * num_resif)1509 int resolve_interface_names(struct config_file* cfg, char*** resif,
1510 	int* num_resif)
1511 {
1512 #ifdef HAVE_GETIFADDRS
1513 	int i;
1514 	struct ifaddrs *addrs = NULL;
1515 	if(cfg->num_ifs == 0) {
1516 		*resif = NULL;
1517 		*num_resif = 0;
1518 		return 1;
1519 	}
1520 	if(getifaddrs(&addrs) == -1) {
1521 		log_err("failed to list interfaces: getifaddrs: %s",
1522 			strerror(errno));
1523 		freeifaddrs(addrs);
1524 		return 0;
1525 	}
1526 	for(i=0; i<cfg->num_ifs; i++) {
1527 		if(!resolve_ifa_name(addrs, cfg->ifs[i], resif, num_resif)) {
1528 			freeifaddrs(addrs);
1529 			config_del_strarray(*resif, *num_resif);
1530 			*resif = NULL;
1531 			*num_resif = 0;
1532 			return 0;
1533 		}
1534 	}
1535 	freeifaddrs(addrs);
1536 	return 1;
1537 #else
1538 	int i;
1539 	if(cfg->num_ifs == 0) {
1540 		*resif = NULL;
1541 		*num_resif = 0;
1542 		return 1;
1543 	}
1544 	*num_resif = cfg->num_ifs;
1545 	*resif = calloc(*num_resif, sizeof(**resif));
1546 	if(!*resif) {
1547 		log_err("out of memory");
1548 		return 0;
1549 	}
1550 	for(i=0; i<*num_resif; i++) {
1551 		(*resif)[i] = strdup(cfg->ifs[i]);
1552 		if(!((*resif)[i])) {
1553 			log_err("out of memory");
1554 			config_del_strarray(*resif, *num_resif);
1555 			*resif = NULL;
1556 			*num_resif = 0;
1557 			return 0;
1558 		}
1559 	}
1560 	return 1;
1561 #endif /* HAVE_GETIFADDRS */
1562 }
1563 
1564 struct listen_port*
listening_ports_open(struct config_file * cfg,char ** ifs,int num_ifs,int * reuseport)1565 listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
1566 	int* reuseport)
1567 {
1568 	struct listen_port* list = NULL;
1569 	struct addrinfo hints;
1570 	int i, do_ip4, do_ip6;
1571 	int do_tcp, do_auto;
1572 	char portbuf[32];
1573 	snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1574 	do_ip4 = cfg->do_ip4;
1575 	do_ip6 = cfg->do_ip6;
1576 	do_tcp = cfg->do_tcp;
1577 	do_auto = cfg->if_automatic && cfg->do_udp;
1578 	if(cfg->incoming_num_tcp == 0)
1579 		do_tcp = 0;
1580 
1581 	/* getaddrinfo */
1582 	memset(&hints, 0, sizeof(hints));
1583 	hints.ai_flags = AI_PASSIVE;
1584 	/* no name lookups on our listening ports */
1585 	if(num_ifs > 0)
1586 		hints.ai_flags |= AI_NUMERICHOST;
1587 	hints.ai_family = AF_UNSPEC;
1588 #ifndef INET6
1589 	do_ip6 = 0;
1590 #endif
1591 	if(!do_ip4 && !do_ip6) {
1592 		return NULL;
1593 	}
1594 	/* create ip4 and ip6 ports so that return addresses are nice. */
1595 	if(do_auto || num_ifs == 0) {
1596 		if(do_ip6) {
1597 			hints.ai_family = AF_INET6;
1598 			if(!ports_create_if(do_auto?"::0":"::1",
1599 				do_auto, cfg->do_udp, do_tcp,
1600 				&hints, portbuf, &list,
1601 				cfg->so_rcvbuf, cfg->so_sndbuf,
1602 				cfg->ssl_port, cfg->tls_additional_port,
1603 				cfg->https_port, reuseport, cfg->ip_transparent,
1604 				cfg->tcp_mss, cfg->ip_freebind,
1605 				cfg->http_nodelay, cfg->use_systemd,
1606 				cfg->dnscrypt_port, cfg->ip_dscp)) {
1607 				listening_ports_free(list);
1608 				return NULL;
1609 			}
1610 		}
1611 		if(do_ip4) {
1612 			hints.ai_family = AF_INET;
1613 			if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
1614 				do_auto, cfg->do_udp, do_tcp,
1615 				&hints, portbuf, &list,
1616 				cfg->so_rcvbuf, cfg->so_sndbuf,
1617 				cfg->ssl_port, cfg->tls_additional_port,
1618 				cfg->https_port, reuseport, cfg->ip_transparent,
1619 				cfg->tcp_mss, cfg->ip_freebind,
1620 				cfg->http_nodelay, cfg->use_systemd,
1621 				cfg->dnscrypt_port, cfg->ip_dscp)) {
1622 				listening_ports_free(list);
1623 				return NULL;
1624 			}
1625 		}
1626 	} else for(i = 0; i<num_ifs; i++) {
1627 		if(str_is_ip6(ifs[i])) {
1628 			if(!do_ip6)
1629 				continue;
1630 			hints.ai_family = AF_INET6;
1631 			if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1632 				do_tcp, &hints, portbuf, &list,
1633 				cfg->so_rcvbuf, cfg->so_sndbuf,
1634 				cfg->ssl_port, cfg->tls_additional_port,
1635 				cfg->https_port, reuseport, cfg->ip_transparent,
1636 				cfg->tcp_mss, cfg->ip_freebind,
1637 				cfg->http_nodelay, cfg->use_systemd,
1638 				cfg->dnscrypt_port, cfg->ip_dscp)) {
1639 				listening_ports_free(list);
1640 				return NULL;
1641 			}
1642 		} else {
1643 			if(!do_ip4)
1644 				continue;
1645 			hints.ai_family = AF_INET;
1646 			if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1647 				do_tcp, &hints, portbuf, &list,
1648 				cfg->so_rcvbuf, cfg->so_sndbuf,
1649 				cfg->ssl_port, cfg->tls_additional_port,
1650 				cfg->https_port, reuseport, cfg->ip_transparent,
1651 				cfg->tcp_mss, cfg->ip_freebind,
1652 				cfg->http_nodelay, cfg->use_systemd,
1653 				cfg->dnscrypt_port, cfg->ip_dscp)) {
1654 				listening_ports_free(list);
1655 				return NULL;
1656 			}
1657 		}
1658 	}
1659 	return list;
1660 }
1661 
listening_ports_free(struct listen_port * list)1662 void listening_ports_free(struct listen_port* list)
1663 {
1664 	struct listen_port* nx;
1665 	while(list) {
1666 		nx = list->next;
1667 		if(list->fd != -1) {
1668 			sock_close(list->fd);
1669 		}
1670 		free(list);
1671 		list = nx;
1672 	}
1673 }
1674 
listen_get_mem(struct listen_dnsport * listen)1675 size_t listen_get_mem(struct listen_dnsport* listen)
1676 {
1677 	struct listen_list* p;
1678 	size_t s = sizeof(*listen) + sizeof(*listen->base) +
1679 		sizeof(*listen->udp_buff) +
1680 		sldns_buffer_capacity(listen->udp_buff);
1681 #ifdef USE_DNSCRYPT
1682 	s += sizeof(*listen->dnscrypt_udp_buff);
1683 	if(listen->udp_buff != listen->dnscrypt_udp_buff){
1684 		s += sldns_buffer_capacity(listen->dnscrypt_udp_buff);
1685 	}
1686 #endif
1687 	for(p = listen->cps; p; p = p->next) {
1688 		s += sizeof(*p);
1689 		s += comm_point_get_mem(p->com);
1690 	}
1691 	return s;
1692 }
1693 
listen_stop_accept(struct listen_dnsport * listen)1694 void listen_stop_accept(struct listen_dnsport* listen)
1695 {
1696 	/* do not stop the ones that have no tcp_free list
1697 	 * (they have already stopped listening) */
1698 	struct listen_list* p;
1699 	for(p=listen->cps; p; p=p->next) {
1700 		if(p->com->type == comm_tcp_accept &&
1701 			p->com->tcp_free != NULL) {
1702 			comm_point_stop_listening(p->com);
1703 		}
1704 	}
1705 }
1706 
listen_start_accept(struct listen_dnsport * listen)1707 void listen_start_accept(struct listen_dnsport* listen)
1708 {
1709 	/* do not start the ones that have no tcp_free list, it is no
1710 	 * use to listen to them because they have no free tcp handlers */
1711 	struct listen_list* p;
1712 	for(p=listen->cps; p; p=p->next) {
1713 		if(p->com->type == comm_tcp_accept &&
1714 			p->com->tcp_free != NULL) {
1715 			comm_point_start_listening(p->com, -1, -1);
1716 		}
1717 	}
1718 }
1719 
1720 struct tcp_req_info*
tcp_req_info_create(struct sldns_buffer * spoolbuf)1721 tcp_req_info_create(struct sldns_buffer* spoolbuf)
1722 {
1723 	struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req));
1724 	if(!req) {
1725 		log_err("malloc failure for new stream outoforder processing structure");
1726 		return NULL;
1727 	}
1728 	memset(req, 0, sizeof(*req));
1729 	req->spool_buffer = spoolbuf;
1730 	return req;
1731 }
1732 
1733 void
tcp_req_info_delete(struct tcp_req_info * req)1734 tcp_req_info_delete(struct tcp_req_info* req)
1735 {
1736 	if(!req) return;
1737 	tcp_req_info_clear(req);
1738 	/* cp is pointer back to commpoint that owns this struct and
1739 	 * called delete on us */
1740 	/* spool_buffer is shared udp buffer, not deleted here */
1741 	free(req);
1742 }
1743 
tcp_req_info_clear(struct tcp_req_info * req)1744 void tcp_req_info_clear(struct tcp_req_info* req)
1745 {
1746 	struct tcp_req_open_item* open, *nopen;
1747 	struct tcp_req_done_item* item, *nitem;
1748 	if(!req) return;
1749 
1750 	/* free outstanding request mesh reply entries */
1751 	open = req->open_req_list;
1752 	while(open) {
1753 		nopen = open->next;
1754 		mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp);
1755 		free(open);
1756 		open = nopen;
1757 	}
1758 	req->open_req_list = NULL;
1759 	req->num_open_req = 0;
1760 
1761 	/* free pending writable result packets */
1762 	item = req->done_req_list;
1763 	while(item) {
1764 		nitem = item->next;
1765 		lock_basic_lock(&stream_wait_count_lock);
1766 		stream_wait_count -= (sizeof(struct tcp_req_done_item)
1767 			+item->len);
1768 		lock_basic_unlock(&stream_wait_count_lock);
1769 		free(item->buf);
1770 		free(item);
1771 		item = nitem;
1772 	}
1773 	req->done_req_list = NULL;
1774 	req->num_done_req = 0;
1775 	req->read_is_closed = 0;
1776 }
1777 
1778 void
tcp_req_info_remove_mesh_state(struct tcp_req_info * req,struct mesh_state * m)1779 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m)
1780 {
1781 	struct tcp_req_open_item* open, *prev = NULL;
1782 	if(!req || !m) return;
1783 	open = req->open_req_list;
1784 	while(open) {
1785 		if(open->mesh_state == m) {
1786 			struct tcp_req_open_item* next;
1787 			if(prev) prev->next = open->next;
1788 			else req->open_req_list = open->next;
1789 			/* caller has to manage the mesh state reply entry */
1790 			next = open->next;
1791 			free(open);
1792 			req->num_open_req --;
1793 
1794 			/* prev = prev; */
1795 			open = next;
1796 			continue;
1797 		}
1798 		prev = open;
1799 		open = open->next;
1800 	}
1801 }
1802 
1803 /** setup listening for read or write */
1804 static void
tcp_req_info_setup_listen(struct tcp_req_info * req)1805 tcp_req_info_setup_listen(struct tcp_req_info* req)
1806 {
1807 	int wr = 0;
1808 	int rd = 0;
1809 
1810 	if(req->cp->tcp_byte_count != 0) {
1811 		/* cannot change, halfway through */
1812 		return;
1813 	}
1814 
1815 	if(!req->cp->tcp_is_reading)
1816 		wr = 1;
1817 	if(!req->read_is_closed)
1818 		rd = 1;
1819 
1820 	if(wr) {
1821 		req->cp->tcp_is_reading = 0;
1822 		comm_point_stop_listening(req->cp);
1823 		comm_point_start_listening(req->cp, -1,
1824 			adjusted_tcp_timeout(req->cp));
1825 	} else if(rd) {
1826 		req->cp->tcp_is_reading = 1;
1827 		comm_point_stop_listening(req->cp);
1828 		comm_point_start_listening(req->cp, -1,
1829 			adjusted_tcp_timeout(req->cp));
1830 		/* and also read it (from SSL stack buffers), so
1831 		 * no event read event is expected since the remainder of
1832 		 * the TLS frame is sitting in the buffers. */
1833 		req->read_again = 1;
1834 	} else {
1835 		comm_point_stop_listening(req->cp);
1836 		comm_point_start_listening(req->cp, -1,
1837 			adjusted_tcp_timeout(req->cp));
1838 		comm_point_listen_for_rw(req->cp, 0, 0);
1839 	}
1840 }
1841 
1842 /** remove first item from list of pending results */
1843 static struct tcp_req_done_item*
tcp_req_info_pop_done(struct tcp_req_info * req)1844 tcp_req_info_pop_done(struct tcp_req_info* req)
1845 {
1846 	struct tcp_req_done_item* item;
1847 	log_assert(req->num_done_req > 0 && req->done_req_list);
1848 	item = req->done_req_list;
1849 	lock_basic_lock(&stream_wait_count_lock);
1850 	stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len);
1851 	lock_basic_unlock(&stream_wait_count_lock);
1852 	req->done_req_list = req->done_req_list->next;
1853 	req->num_done_req --;
1854 	return item;
1855 }
1856 
1857 /** Send given buffer and setup to write */
1858 static void
tcp_req_info_start_write_buf(struct tcp_req_info * req,uint8_t * buf,size_t len)1859 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf,
1860 	size_t len)
1861 {
1862 	sldns_buffer_clear(req->cp->buffer);
1863 	sldns_buffer_write(req->cp->buffer, buf, len);
1864 	sldns_buffer_flip(req->cp->buffer);
1865 
1866 	req->cp->tcp_is_reading = 0; /* we are now writing */
1867 }
1868 
1869 /** pick up the next result and start writing it to the channel */
1870 static void
tcp_req_pickup_next_result(struct tcp_req_info * req)1871 tcp_req_pickup_next_result(struct tcp_req_info* req)
1872 {
1873 	if(req->num_done_req > 0) {
1874 		/* unlist the done item from the list of pending results */
1875 		struct tcp_req_done_item* item = tcp_req_info_pop_done(req);
1876 		tcp_req_info_start_write_buf(req, item->buf, item->len);
1877 		free(item->buf);
1878 		free(item);
1879 	}
1880 }
1881 
1882 /** the read channel has closed */
1883 int
tcp_req_info_handle_read_close(struct tcp_req_info * req)1884 tcp_req_info_handle_read_close(struct tcp_req_info* req)
1885 {
1886 	verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd);
1887 	/* reset byte count for (potential) partial read */
1888 	req->cp->tcp_byte_count = 0;
1889 	/* if we still have results to write, pick up next and write it */
1890 	if(req->num_done_req != 0) {
1891 		tcp_req_pickup_next_result(req);
1892 		tcp_req_info_setup_listen(req);
1893 		return 1;
1894 	}
1895 	/* if nothing to do, this closes the connection */
1896 	if(req->num_open_req == 0 && req->num_done_req == 0)
1897 		return 0;
1898 	/* otherwise, we must be waiting for dns resolve, wait with timeout */
1899 	req->read_is_closed = 1;
1900 	tcp_req_info_setup_listen(req);
1901 	return 1;
1902 }
1903 
1904 void
tcp_req_info_handle_writedone(struct tcp_req_info * req)1905 tcp_req_info_handle_writedone(struct tcp_req_info* req)
1906 {
1907 	/* back to reading state, we finished this write event */
1908 	sldns_buffer_clear(req->cp->buffer);
1909 	if(req->num_done_req == 0 && req->read_is_closed) {
1910 		/* no more to write and nothing to read, close it */
1911 		comm_point_drop_reply(&req->cp->repinfo);
1912 		return;
1913 	}
1914 	req->cp->tcp_is_reading = 1;
1915 	/* see if another result needs writing */
1916 	tcp_req_pickup_next_result(req);
1917 
1918 	/* see if there is more to write, if not stop_listening for writing */
1919 	/* see if new requests are allowed, if so, start_listening
1920 	 * for reading */
1921 	tcp_req_info_setup_listen(req);
1922 }
1923 
1924 void
tcp_req_info_handle_readdone(struct tcp_req_info * req)1925 tcp_req_info_handle_readdone(struct tcp_req_info* req)
1926 {
1927 	struct comm_point* c = req->cp;
1928 
1929 	/* we want to read up several requests, unless there are
1930 	 * pending answers */
1931 
1932 	req->is_drop = 0;
1933 	req->is_reply = 0;
1934 	req->in_worker_handle = 1;
1935 	sldns_buffer_set_limit(req->spool_buffer, 0);
1936 	/* handle the current request */
1937 	/* this calls the worker handle request routine that could give
1938 	 * a cache response, or localdata response, or drop the reply,
1939 	 * or schedule a mesh entry for later */
1940 	fptr_ok(fptr_whitelist_comm_point(c->callback));
1941 	if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
1942 		req->in_worker_handle = 0;
1943 		/* there is an answer, put it up.  It is already in the
1944 		 * c->buffer, just send it. */
1945 		/* since we were just reading a query, the channel is
1946 		 * clear to write to */
1947 	send_it:
1948 		c->tcp_is_reading = 0;
1949 		comm_point_stop_listening(c);
1950 		comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
1951 		return;
1952 	}
1953 	req->in_worker_handle = 0;
1954 	/* it should be waiting in the mesh for recursion.
1955 	 * If mesh failed to add a new entry and called commpoint_drop_reply.
1956 	 * Then the mesh state has been cleared. */
1957 	if(req->is_drop) {
1958 		/* the reply has been dropped, stream has been closed. */
1959 		return;
1960 	}
1961 	/* If mesh failed(mallocfail) and called commpoint_send_reply with
1962 	 * something like servfail then we pick up that reply below. */
1963 	if(req->is_reply) {
1964 		goto send_it;
1965 	}
1966 
1967 	sldns_buffer_clear(c->buffer);
1968 	/* if pending answers, pick up an answer and start sending it */
1969 	tcp_req_pickup_next_result(req);
1970 
1971 	/* if answers pending, start sending answers */
1972 	/* read more requests if we can have more requests */
1973 	tcp_req_info_setup_listen(req);
1974 }
1975 
1976 int
tcp_req_info_add_meshstate(struct tcp_req_info * req,struct mesh_area * mesh,struct mesh_state * m)1977 tcp_req_info_add_meshstate(struct tcp_req_info* req,
1978 	struct mesh_area* mesh, struct mesh_state* m)
1979 {
1980 	struct tcp_req_open_item* item;
1981 	log_assert(req && mesh && m);
1982 	item = (struct tcp_req_open_item*)malloc(sizeof(*item));
1983 	if(!item) return 0;
1984 	item->next = req->open_req_list;
1985 	item->mesh = mesh;
1986 	item->mesh_state = m;
1987 	req->open_req_list = item;
1988 	req->num_open_req++;
1989 	return 1;
1990 }
1991 
1992 /** Add a result to the result list.  At the end. */
1993 static int
tcp_req_info_add_result(struct tcp_req_info * req,uint8_t * buf,size_t len)1994 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len)
1995 {
1996 	struct tcp_req_done_item* last = NULL;
1997 	struct tcp_req_done_item* item;
1998 	size_t space;
1999 
2000 	/* see if we have space */
2001 	space = sizeof(struct tcp_req_done_item) + len;
2002 	lock_basic_lock(&stream_wait_count_lock);
2003 	if(stream_wait_count + space > stream_wait_max) {
2004 		lock_basic_unlock(&stream_wait_count_lock);
2005 		verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size");
2006 		return 0;
2007 	}
2008 	stream_wait_count += space;
2009 	lock_basic_unlock(&stream_wait_count_lock);
2010 
2011 	/* find last element */
2012 	last = req->done_req_list;
2013 	while(last && last->next)
2014 		last = last->next;
2015 
2016 	/* create new element */
2017 	item = (struct tcp_req_done_item*)malloc(sizeof(*item));
2018 	if(!item) {
2019 		log_err("malloc failure, for stream result list");
2020 		return 0;
2021 	}
2022 	item->next = NULL;
2023 	item->len = len;
2024 	item->buf = memdup(buf, len);
2025 	if(!item->buf) {
2026 		free(item);
2027 		log_err("malloc failure, adding reply to stream result list");
2028 		return 0;
2029 	}
2030 
2031 	/* link in */
2032 	if(last) last->next = item;
2033 	else req->done_req_list = item;
2034 	req->num_done_req++;
2035 	return 1;
2036 }
2037 
2038 void
tcp_req_info_send_reply(struct tcp_req_info * req)2039 tcp_req_info_send_reply(struct tcp_req_info* req)
2040 {
2041 	if(req->in_worker_handle) {
2042 		/* reply from mesh is in the spool_buffer */
2043 		/* copy now, so that the spool buffer is free for other tasks
2044 		 * before the callback is done */
2045 		sldns_buffer_clear(req->cp->buffer);
2046 		sldns_buffer_write(req->cp->buffer,
2047 			sldns_buffer_begin(req->spool_buffer),
2048 			sldns_buffer_limit(req->spool_buffer));
2049 		sldns_buffer_flip(req->cp->buffer);
2050 		req->is_reply = 1;
2051 		return;
2052 	}
2053 	/* now that the query has been handled, that mesh_reply entry
2054 	 * should be removed, from the tcp_req_info list,
2055 	 * the mesh state cleanup removes then with region_cleanup and
2056 	 * replies_sent true. */
2057 	/* see if we can send it straight away (we are not doing
2058 	 * anything else).  If so, copy to buffer and start */
2059 	if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) {
2060 		/* buffer is free, and was ready to read new query into,
2061 		 * but we are now going to use it to send this answer */
2062 		tcp_req_info_start_write_buf(req,
2063 			sldns_buffer_begin(req->spool_buffer),
2064 			sldns_buffer_limit(req->spool_buffer));
2065 		/* switch to listen to write events */
2066 		comm_point_stop_listening(req->cp);
2067 		comm_point_start_listening(req->cp, -1,
2068 			adjusted_tcp_timeout(req->cp));
2069 		return;
2070 	}
2071 	/* queue up the answer behind the others already pending */
2072 	if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer),
2073 		sldns_buffer_limit(req->spool_buffer))) {
2074 		/* drop the connection, we are out of resources */
2075 		comm_point_drop_reply(&req->cp->repinfo);
2076 	}
2077 }
2078 
tcp_req_info_get_stream_buffer_size(void)2079 size_t tcp_req_info_get_stream_buffer_size(void)
2080 {
2081 	size_t s;
2082 	if(!stream_wait_lock_inited)
2083 		return stream_wait_count;
2084 	lock_basic_lock(&stream_wait_count_lock);
2085 	s = stream_wait_count;
2086 	lock_basic_unlock(&stream_wait_count_lock);
2087 	return s;
2088 }
2089 
http2_get_query_buffer_size(void)2090 size_t http2_get_query_buffer_size(void)
2091 {
2092 	size_t s;
2093 	if(!http2_query_buffer_lock_inited)
2094 		return http2_query_buffer_count;
2095 	lock_basic_lock(&http2_query_buffer_count_lock);
2096 	s = http2_query_buffer_count;
2097 	lock_basic_unlock(&http2_query_buffer_count_lock);
2098 	return s;
2099 }
2100 
http2_get_response_buffer_size(void)2101 size_t http2_get_response_buffer_size(void)
2102 {
2103 	size_t s;
2104 	if(!http2_response_buffer_lock_inited)
2105 		return http2_response_buffer_count;
2106 	lock_basic_lock(&http2_response_buffer_count_lock);
2107 	s = http2_response_buffer_count;
2108 	lock_basic_unlock(&http2_response_buffer_count_lock);
2109 	return s;
2110 }
2111 
2112 #ifdef HAVE_NGHTTP2
2113 /** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */
http2_submit_response_read_callback(nghttp2_session * ATTR_UNUSED (session),int32_t stream_id,uint8_t * buf,size_t length,uint32_t * data_flags,nghttp2_data_source * source,void * ATTR_UNUSED (cb_arg))2114 static ssize_t http2_submit_response_read_callback(
2115 	nghttp2_session* ATTR_UNUSED(session),
2116 	int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2117 	nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2118 {
2119 	struct http2_stream* h2_stream;
2120 	struct http2_session* h2_session = source->ptr;
2121 	size_t copylen = length;
2122 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
2123 		h2_session->session, stream_id))) {
2124 		verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2125 			"stream");
2126 		return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2127 	}
2128 	if(!h2_stream->rbuffer ||
2129 		sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2130 		verbose(VERB_QUERY, "http2: cannot submit buffer. No data "
2131 			"available in rbuffer");
2132 		/* rbuffer will be free'd in frame close cb */
2133 		return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2134 	}
2135 
2136 	if(copylen > sldns_buffer_remaining(h2_stream->rbuffer))
2137 		copylen = sldns_buffer_remaining(h2_stream->rbuffer);
2138 	if(copylen > SSIZE_MAX)
2139 		copylen = SSIZE_MAX; /* will probably never happen */
2140 
2141 	memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen);
2142 	sldns_buffer_skip(h2_stream->rbuffer, copylen);
2143 
2144 	if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2145 		*data_flags |= NGHTTP2_DATA_FLAG_EOF;
2146 		lock_basic_lock(&http2_response_buffer_count_lock);
2147 		http2_response_buffer_count -=
2148 			sldns_buffer_capacity(h2_stream->rbuffer);
2149 		lock_basic_unlock(&http2_response_buffer_count_lock);
2150 		sldns_buffer_free(h2_stream->rbuffer);
2151 		h2_stream->rbuffer = NULL;
2152 	}
2153 
2154 	return copylen;
2155 }
2156 
2157 /**
2158  * Send RST_STREAM frame for stream.
2159  * @param h2_session: http2 session to submit frame to
2160  * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM
2161  * @return 0 on error, 1 otherwise
2162  */
http2_submit_rst_stream(struct http2_session * h2_session,struct http2_stream * h2_stream)2163 static int http2_submit_rst_stream(struct http2_session* h2_session,
2164 		struct http2_stream* h2_stream)
2165 {
2166 	int ret = nghttp2_submit_rst_stream(h2_session->session,
2167 		NGHTTP2_FLAG_NONE, h2_stream->stream_id,
2168 		NGHTTP2_INTERNAL_ERROR);
2169 	if(ret) {
2170 		verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, "
2171 			"error: %s", nghttp2_strerror(ret));
2172 		return 0;
2173 	}
2174 	return 1;
2175 }
2176 
2177 /**
2178  * DNS response ready to be submitted to nghttp2, to be prepared for sending
2179  * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer
2180  * might be used before this will be sent out.
2181  * @param h2_session: http2 session, containing c->buffer which contains answer
2182  * @return 0 on error, 1 otherwise
2183  */
http2_submit_dns_response(struct http2_session * h2_session)2184 int http2_submit_dns_response(struct http2_session* h2_session)
2185 {
2186 	int ret;
2187 	nghttp2_data_provider data_prd;
2188 	char status[4];
2189 	nghttp2_nv headers[3];
2190 	struct http2_stream* h2_stream = h2_session->c->h2_stream;
2191 	size_t rlen;
2192 	char rlen_str[32];
2193 
2194 	if(h2_stream->rbuffer) {
2195 		log_err("http2 submit response error: rbuffer already "
2196 			"exists");
2197 		return 0;
2198 	}
2199 	if(sldns_buffer_remaining(h2_session->c->buffer) == 0) {
2200 		log_err("http2 submit response error: c->buffer not complete");
2201 		return 0;
2202 	}
2203 
2204 	if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2205 		verbose(VERB_QUERY, "http2: submit response error: "
2206 			"invalid status");
2207 		return 0;
2208 	}
2209 
2210 	rlen = sldns_buffer_remaining(h2_session->c->buffer);
2211 	snprintf(rlen_str, sizeof(rlen_str), "%u", (unsigned)rlen);
2212 
2213 	lock_basic_lock(&http2_response_buffer_count_lock);
2214 	if(http2_response_buffer_count + rlen > http2_response_buffer_max) {
2215 		lock_basic_unlock(&http2_response_buffer_count_lock);
2216 		verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2217 			"in https-response-buffer-size");
2218 		return http2_submit_rst_stream(h2_session, h2_stream);
2219 	}
2220 	http2_response_buffer_count += rlen;
2221 	lock_basic_unlock(&http2_response_buffer_count_lock);
2222 
2223 	if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) {
2224 		lock_basic_lock(&http2_response_buffer_count_lock);
2225 		http2_response_buffer_count -= rlen;
2226 		lock_basic_unlock(&http2_response_buffer_count_lock);
2227 		log_err("http2 submit response error: malloc failure");
2228 		return 0;
2229 	}
2230 
2231 	headers[0].name = (uint8_t*)":status";
2232 	headers[0].namelen = 7;
2233 	headers[0].value = (uint8_t*)status;
2234 	headers[0].valuelen = 3;
2235 	headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2236 
2237 	headers[1].name = (uint8_t*)"content-type";
2238 	headers[1].namelen = 12;
2239 	headers[1].value = (uint8_t*)"application/dns-message";
2240 	headers[1].valuelen = 23;
2241 	headers[1].flags = NGHTTP2_NV_FLAG_NONE;
2242 
2243 	headers[2].name = (uint8_t*)"content-length";
2244 	headers[2].namelen = 14;
2245 	headers[2].value = (uint8_t*)rlen_str;
2246 	headers[2].valuelen = strlen(rlen_str);
2247 	headers[2].flags = NGHTTP2_NV_FLAG_NONE;
2248 
2249 	sldns_buffer_write(h2_stream->rbuffer,
2250 		sldns_buffer_current(h2_session->c->buffer),
2251 		sldns_buffer_remaining(h2_session->c->buffer));
2252 	sldns_buffer_flip(h2_stream->rbuffer);
2253 
2254 	data_prd.source.ptr = h2_session;
2255 	data_prd.read_callback = http2_submit_response_read_callback;
2256 	ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2257 		headers, 3, &data_prd);
2258 	if(ret) {
2259 		verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2260 			"error: %s", nghttp2_strerror(ret));
2261 		return 0;
2262 	}
2263 	return 1;
2264 }
2265 #else
http2_submit_dns_response(void * ATTR_UNUSED (v))2266 int http2_submit_dns_response(void* ATTR_UNUSED(v))
2267 {
2268 	return 0;
2269 }
2270 #endif
2271 
2272 #ifdef HAVE_NGHTTP2
2273 /** HTTP status to descriptive string */
http_status_to_str(enum http_status s)2274 static char* http_status_to_str(enum http_status s)
2275 {
2276 	switch(s) {
2277 		case HTTP_STATUS_OK:
2278 			return "OK";
2279 		case HTTP_STATUS_BAD_REQUEST:
2280 			return "Bad Request";
2281 		case HTTP_STATUS_NOT_FOUND:
2282 			return "Not Found";
2283 		case HTTP_STATUS_PAYLOAD_TOO_LARGE:
2284 			return "Payload Too Large";
2285 		case HTTP_STATUS_URI_TOO_LONG:
2286 			return "URI Too Long";
2287 		case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE:
2288 			return "Unsupported Media Type";
2289 		case HTTP_STATUS_NOT_IMPLEMENTED:
2290 			return "Not Implemented";
2291 	}
2292 	return "Status Unknown";
2293 }
2294 
2295 /** nghttp2 callback. Used to copy error message to nghttp2 session */
http2_submit_error_read_callback(nghttp2_session * ATTR_UNUSED (session),int32_t stream_id,uint8_t * buf,size_t length,uint32_t * data_flags,nghttp2_data_source * source,void * ATTR_UNUSED (cb_arg))2296 static ssize_t http2_submit_error_read_callback(
2297 	nghttp2_session* ATTR_UNUSED(session),
2298 	int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2299 	nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2300 {
2301 	struct http2_stream* h2_stream;
2302 	struct http2_session* h2_session = source->ptr;
2303 	char* msg;
2304 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
2305 		h2_session->session, stream_id))) {
2306 		verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2307 			"stream");
2308 		return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2309 	}
2310 	*data_flags |= NGHTTP2_DATA_FLAG_EOF;
2311 	msg = http_status_to_str(h2_stream->status);
2312 	if(length < strlen(msg))
2313 		return 0; /* not worth trying over multiple frames */
2314 	memcpy(buf, msg, strlen(msg));
2315 	return strlen(msg);
2316 
2317 }
2318 
2319 /**
2320  * HTTP error response ready to be submitted to nghttp2, to be prepared for
2321  * sending out. Message body will contain descriptive string for HTTP status.
2322  * @param h2_session: http2 session to submit to
2323  * @param h2_stream: http2 stream containing HTTP status to use for error
2324  * @return 0 on error, 1 otherwise
2325  */
http2_submit_error(struct http2_session * h2_session,struct http2_stream * h2_stream)2326 static int http2_submit_error(struct http2_session* h2_session,
2327 	struct http2_stream* h2_stream)
2328 {
2329 	int ret;
2330 	char status[4];
2331 	nghttp2_data_provider data_prd;
2332 	nghttp2_nv headers[1]; /* will be copied by nghttp */
2333 	if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2334 		verbose(VERB_QUERY, "http2: submit error failed, "
2335 			"invalid status");
2336 		return 0;
2337 	}
2338 	headers[0].name = (uint8_t*)":status";
2339 	headers[0].namelen = 7;
2340 	headers[0].value = (uint8_t*)status;
2341 	headers[0].valuelen = 3;
2342 	headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2343 
2344 	data_prd.source.ptr = h2_session;
2345 	data_prd.read_callback = http2_submit_error_read_callback;
2346 
2347 	ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2348 		headers, 1, &data_prd);
2349 	if(ret) {
2350 		verbose(VERB_QUERY, "http2: submit error failed, "
2351 			"error: %s", nghttp2_strerror(ret));
2352 		return 0;
2353 	}
2354 	return 1;
2355 }
2356 
2357 /**
2358  * Start query handling. Query is stored in the stream, and will be free'd here.
2359  * @param h2_session: http2 session, containing comm point
2360  * @param h2_stream: stream containing buffered query
2361  * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no
2362  * reply available (yet).
2363  */
http2_query_read_done(struct http2_session * h2_session,struct http2_stream * h2_stream)2364 static int http2_query_read_done(struct http2_session* h2_session,
2365 	struct http2_stream* h2_stream)
2366 {
2367 	log_assert(h2_stream->qbuffer);
2368 
2369 	if(h2_session->c->h2_stream) {
2370 		verbose(VERB_ALGO, "http2_query_read_done failure: shared "
2371 			"buffer already assigned to stream");
2372 		return -1;
2373 	}
2374 	if(sldns_buffer_remaining(h2_session->c->buffer) <
2375 		sldns_buffer_remaining(h2_stream->qbuffer)) {
2376 		/* qbuffer will be free'd in frame close cb */
2377 		sldns_buffer_clear(h2_session->c->buffer);
2378 		verbose(VERB_ALGO, "http2_query_read_done failure: can't fit "
2379 			"qbuffer in c->buffer");
2380 		return -1;
2381 	}
2382 
2383 	sldns_buffer_write(h2_session->c->buffer,
2384 		sldns_buffer_current(h2_stream->qbuffer),
2385 		sldns_buffer_remaining(h2_stream->qbuffer));
2386 
2387 	lock_basic_lock(&http2_query_buffer_count_lock);
2388 	http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer);
2389 	lock_basic_unlock(&http2_query_buffer_count_lock);
2390 	sldns_buffer_free(h2_stream->qbuffer);
2391 	h2_stream->qbuffer = NULL;
2392 
2393 	sldns_buffer_flip(h2_session->c->buffer);
2394 	h2_session->c->h2_stream = h2_stream;
2395 	fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback));
2396 	if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg,
2397 		NETEVENT_NOERROR, &h2_session->c->repinfo)) {
2398 		return 1; /* answer in c->buffer */
2399 	}
2400 	sldns_buffer_clear(h2_session->c->buffer);
2401 	h2_session->c->h2_stream = NULL;
2402 	return 0; /* mesh state added, or dropped */
2403 }
2404 
2405 /** nghttp2 callback. Used to check if the received frame indicates the end of a
2406  * stream. Gather collected request data and start query handling. */
http2_req_frame_recv_cb(nghttp2_session * session,const nghttp2_frame * frame,void * cb_arg)2407 static int http2_req_frame_recv_cb(nghttp2_session* session,
2408 	const nghttp2_frame* frame, void* cb_arg)
2409 {
2410 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
2411 	struct http2_stream* h2_stream;
2412 	int query_read_done;
2413 
2414 	if((frame->hd.type != NGHTTP2_DATA &&
2415 		frame->hd.type != NGHTTP2_HEADERS) ||
2416 		!(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) {
2417 			return 0;
2418 	}
2419 
2420 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
2421 		session, frame->hd.stream_id)))
2422 		return 0;
2423 
2424 	if(h2_stream->invalid_endpoint) {
2425 		h2_stream->status = HTTP_STATUS_NOT_FOUND;
2426 		goto submit_http_error;
2427 	}
2428 
2429 	if(h2_stream->invalid_content_type) {
2430 		h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE;
2431 		goto submit_http_error;
2432 	}
2433 
2434 	if(h2_stream->http_method != HTTP_METHOD_GET &&
2435 		h2_stream->http_method != HTTP_METHOD_POST) {
2436 		h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED;
2437 		goto submit_http_error;
2438 	}
2439 
2440 	if(h2_stream->query_too_large) {
2441 		if(h2_stream->http_method == HTTP_METHOD_POST)
2442 			h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE;
2443 		else
2444 			h2_stream->status = HTTP_STATUS_URI_TOO_LONG;
2445 		goto submit_http_error;
2446 	}
2447 
2448 	if(!h2_stream->qbuffer) {
2449 		h2_stream->status = HTTP_STATUS_BAD_REQUEST;
2450 		goto submit_http_error;
2451 	}
2452 
2453 	if(h2_stream->status) {
2454 submit_http_error:
2455 		verbose(VERB_QUERY, "http2 request invalid, returning :status="
2456 			"%d", h2_stream->status);
2457 		if(!http2_submit_error(h2_session, h2_stream)) {
2458 			return NGHTTP2_ERR_CALLBACK_FAILURE;
2459 		}
2460 		return 0;
2461 	}
2462 	h2_stream->status = HTTP_STATUS_OK;
2463 
2464 	sldns_buffer_flip(h2_stream->qbuffer);
2465 	h2_session->postpone_drop = 1;
2466 	query_read_done = http2_query_read_done(h2_session, h2_stream);
2467 	if(query_read_done < 0)
2468 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2469 	else if(!query_read_done) {
2470 		if(h2_session->is_drop) {
2471 			/* connection needs to be closed. Return failure to make
2472 			 * sure no other action are taken anymore on comm point.
2473 			 * failure will result in reclaiming (and closing)
2474 			 * of comm point. */
2475 			verbose(VERB_QUERY, "http2 query dropped in worker cb");
2476 			h2_session->postpone_drop = 0;
2477 			return NGHTTP2_ERR_CALLBACK_FAILURE;
2478 		}
2479 		/* nothing to submit right now, query added to mesh. */
2480 		h2_session->postpone_drop = 0;
2481 		return 0;
2482 	}
2483 	if(!http2_submit_dns_response(h2_session)) {
2484 		sldns_buffer_clear(h2_session->c->buffer);
2485 		h2_session->c->h2_stream = NULL;
2486 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2487 	}
2488 	verbose(VERB_QUERY, "http2 query submitted to session");
2489 	sldns_buffer_clear(h2_session->c->buffer);
2490 	h2_session->c->h2_stream = NULL;
2491 	return 0;
2492 }
2493 
2494 /** nghttp2 callback. Used to detect start of new streams. */
http2_req_begin_headers_cb(nghttp2_session * session,const nghttp2_frame * frame,void * cb_arg)2495 static int http2_req_begin_headers_cb(nghttp2_session* session,
2496 	const nghttp2_frame* frame, void* cb_arg)
2497 {
2498 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
2499 	struct http2_stream* h2_stream;
2500 	int ret;
2501 	if(frame->hd.type != NGHTTP2_HEADERS ||
2502 		frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2503 		/* only interrested in request headers */
2504 		return 0;
2505 	}
2506 	if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) {
2507 		log_err("malloc failure while creating http2 stream");
2508 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2509 	}
2510 	http2_session_add_stream(h2_session, h2_stream);
2511 	ret = nghttp2_session_set_stream_user_data(session,
2512 		frame->hd.stream_id, h2_stream);
2513 	if(ret) {
2514 		/* stream does not exist */
2515 		verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2516 			"error: %s", nghttp2_strerror(ret));
2517 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2518 	}
2519 
2520 	return 0;
2521 }
2522 
2523 /**
2524  * base64url decode, store in qbuffer
2525  * @param h2_session: http2 session
2526  * @param h2_stream: http2 stream
2527  * @param start: start of the base64 string
2528  * @param length: length of the base64 string
2529  * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer,
2530  * buffer will be NULL is unparseble.
2531  */
http2_buffer_uri_query(struct http2_session * h2_session,struct http2_stream * h2_stream,const uint8_t * start,size_t length)2532 static int http2_buffer_uri_query(struct http2_session* h2_session,
2533 	struct http2_stream* h2_stream, const uint8_t* start, size_t length)
2534 {
2535 	size_t expectb64len;
2536 	int b64len;
2537 	if(h2_stream->http_method == HTTP_METHOD_POST)
2538 		return 1;
2539 	if(length == 0)
2540 		return 1;
2541 	if(h2_stream->qbuffer) {
2542 		verbose(VERB_ALGO, "http2_req_header fail, "
2543 			"qbuffer already set");
2544 		return 0;
2545 	}
2546 
2547 	/* calculate size, might be a bit bigger than the real
2548 	 * decoded buffer size */
2549 	expectb64len = sldns_b64_pton_calculate_size(length);
2550 	log_assert(expectb64len > 0);
2551 	if(expectb64len >
2552 		h2_session->c->http2_stream_max_qbuffer_size) {
2553 		h2_stream->query_too_large = 1;
2554 		return 1;
2555 	}
2556 
2557 	lock_basic_lock(&http2_query_buffer_count_lock);
2558 	if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) {
2559 		lock_basic_unlock(&http2_query_buffer_count_lock);
2560 		verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2561 			"in http2-query-buffer-size");
2562 		return http2_submit_rst_stream(h2_session, h2_stream);
2563 	}
2564 	http2_query_buffer_count += expectb64len;
2565 	lock_basic_unlock(&http2_query_buffer_count_lock);
2566 	if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) {
2567 		lock_basic_lock(&http2_query_buffer_count_lock);
2568 		http2_query_buffer_count -= expectb64len;
2569 		lock_basic_unlock(&http2_query_buffer_count_lock);
2570 		log_err("http2_req_header fail, qbuffer "
2571 			"malloc failure");
2572 		return 0;
2573 	}
2574 
2575 	if(!(b64len = sldns_b64url_pton(
2576 		(char const *)start, length,
2577 		sldns_buffer_current(h2_stream->qbuffer),
2578 		expectb64len)) || b64len < 0) {
2579 		lock_basic_lock(&http2_query_buffer_count_lock);
2580 		http2_query_buffer_count -= expectb64len;
2581 		lock_basic_unlock(&http2_query_buffer_count_lock);
2582 		sldns_buffer_free(h2_stream->qbuffer);
2583 		h2_stream->qbuffer = NULL;
2584 		/* return without error, method can be an
2585 		 * unknown POST */
2586 		return 1;
2587 	}
2588 	sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len);
2589 	return 1;
2590 }
2591 
2592 /** nghttp2 callback. Used to parse headers from HEADER frames. */
http2_req_header_cb(nghttp2_session * session,const nghttp2_frame * frame,const uint8_t * name,size_t namelen,const uint8_t * value,size_t valuelen,uint8_t ATTR_UNUSED (flags),void * cb_arg)2593 static int http2_req_header_cb(nghttp2_session* session,
2594 	const nghttp2_frame* frame, const uint8_t* name, size_t namelen,
2595 	const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags),
2596 	void* cb_arg)
2597 {
2598 	struct http2_stream* h2_stream = NULL;
2599 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
2600 	/* nghttp2 deals with CONTINUATION frames and provides them as part of
2601 	 * the HEADER */
2602 	if(frame->hd.type != NGHTTP2_HEADERS ||
2603 		frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2604 		/* only interrested in request headers */
2605 		return 0;
2606 	}
2607 	if(!(h2_stream = nghttp2_session_get_stream_user_data(session,
2608 		frame->hd.stream_id)))
2609 		return 0;
2610 
2611 	/* earlier checks already indicate we can stop handling this query */
2612 	if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED ||
2613 		h2_stream->invalid_content_type ||
2614 		h2_stream->invalid_endpoint)
2615 		return 0;
2616 
2617 
2618 	/* nghttp2 performs some sanity checks in the headers, including:
2619 	 * name and value are guaranteed to be null terminated
2620 	 * name is guaranteed to be lowercase
2621 	 * content-length value is guaranteed to contain digits
2622 	 */
2623 
2624 	if(!h2_stream->http_method && namelen == 7 &&
2625 		memcmp(":method", name, namelen) == 0) {
2626 		/* Case insensitive check on :method value to be on the safe
2627 		 * side. I failed to find text about case sensitivity in specs.
2628 		 */
2629 		if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0)
2630 			h2_stream->http_method = HTTP_METHOD_GET;
2631 		else if(valuelen == 4 &&
2632 			strcasecmp("POST", (const char*)value) == 0) {
2633 			h2_stream->http_method = HTTP_METHOD_POST;
2634 			if(h2_stream->qbuffer) {
2635 				/* POST method uses query from DATA frames */
2636 				lock_basic_lock(&http2_query_buffer_count_lock);
2637 				http2_query_buffer_count -=
2638 					sldns_buffer_capacity(h2_stream->qbuffer);
2639 				lock_basic_unlock(&http2_query_buffer_count_lock);
2640 				sldns_buffer_free(h2_stream->qbuffer);
2641 				h2_stream->qbuffer = NULL;
2642 			}
2643 		} else
2644 			h2_stream->http_method = HTTP_METHOD_UNSUPPORTED;
2645 		return 0;
2646 	}
2647 	if(namelen == 5 && memcmp(":path", name, namelen) == 0) {
2648 		/* :path may contain DNS query, depending on method. Method might
2649 		 * not be known yet here, so check after finishing receiving
2650 		 * stream. */
2651 #define	HTTP_QUERY_PARAM "?dns="
2652 		size_t el = strlen(h2_session->c->http_endpoint);
2653 		size_t qpl = strlen(HTTP_QUERY_PARAM);
2654 
2655 		if(valuelen < el || memcmp(h2_session->c->http_endpoint,
2656 			value, el) != 0) {
2657 			h2_stream->invalid_endpoint = 1;
2658 			return 0;
2659 		}
2660 		/* larger than endpoint only allowed if it is for the query
2661 		 * parameter */
2662 		if(valuelen <= el+qpl ||
2663 			memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) {
2664 			if(valuelen != el)
2665 				h2_stream->invalid_endpoint = 1;
2666 			return 0;
2667 		}
2668 
2669 		if(!http2_buffer_uri_query(h2_session, h2_stream,
2670 			value+(el+qpl), valuelen-(el+qpl))) {
2671 			return NGHTTP2_ERR_CALLBACK_FAILURE;
2672 		}
2673 		return 0;
2674 	}
2675 	/* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST,
2676 	 * and not needed when using GET. Don't enfore.
2677 	 * If set only allow lowercase "application/dns-message".
2678 	 *
2679 	 * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST
2680 	 * be able to handle "application/dns-message". Since that is the only
2681 	 * content-type supported we can ignore the accept header.
2682 	 */
2683 	if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) {
2684 		if(valuelen != 23 || memcmp("application/dns-message", value,
2685 			valuelen) != 0) {
2686 			h2_stream->invalid_content_type = 1;
2687 		}
2688 	}
2689 
2690 	/* Only interested in content-lentg for POST (on not yet known) method.
2691 	 */
2692 	if((!h2_stream->http_method ||
2693 		h2_stream->http_method == HTTP_METHOD_POST) &&
2694 		!h2_stream->content_length && namelen  == 14 &&
2695 		memcmp("content-length", name, namelen) == 0) {
2696 		if(valuelen > 5) {
2697 			h2_stream->query_too_large = 1;
2698 			return 0;
2699 		}
2700 		/* guaranteed to only contian digits and be null terminated */
2701 		h2_stream->content_length = atoi((const char*)value);
2702 		if(h2_stream->content_length >
2703 			h2_session->c->http2_stream_max_qbuffer_size) {
2704 			h2_stream->query_too_large = 1;
2705 			return 0;
2706 		}
2707 	}
2708 	return 0;
2709 }
2710 
2711 /** nghttp2 callback. Used to get data from DATA frames, which can contain
2712  * queries in POST requests. */
http2_req_data_chunk_recv_cb(nghttp2_session * ATTR_UNUSED (session),uint8_t ATTR_UNUSED (flags),int32_t stream_id,const uint8_t * data,size_t len,void * cb_arg)2713 static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session),
2714 	uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data,
2715 	size_t len, void* cb_arg)
2716 {
2717 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
2718 	struct http2_stream* h2_stream;
2719 	size_t qlen = 0;
2720 
2721 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
2722 		h2_session->session, stream_id))) {
2723 		return 0;
2724 	}
2725 
2726 	if(h2_stream->query_too_large)
2727 		return 0;
2728 
2729 	if(!h2_stream->qbuffer) {
2730 		if(h2_stream->content_length) {
2731 			if(h2_stream->content_length < len)
2732 				/* getting more data in DATA frame than
2733 				 * advertised in content-length header. */
2734 				return NGHTTP2_ERR_CALLBACK_FAILURE;
2735 			qlen = h2_stream->content_length;
2736 		} else if(len <= h2_session->c->http2_stream_max_qbuffer_size) {
2737 			/* setting this to msg-buffer-size can result in a lot
2738 			 * of memory consuption. Most queries should fit in a
2739 			 * single DATA frame, and most POST queries will
2740 			 * containt content-length which does not impose this
2741 			 * limit. */
2742 			qlen = len;
2743 		}
2744 	}
2745 	if(!h2_stream->qbuffer && qlen) {
2746 		lock_basic_lock(&http2_query_buffer_count_lock);
2747 		if(http2_query_buffer_count + qlen > http2_query_buffer_max) {
2748 			lock_basic_unlock(&http2_query_buffer_count_lock);
2749 			verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2750 				"in http2-query-buffer-size");
2751 			return http2_submit_rst_stream(h2_session, h2_stream);
2752 		}
2753 		http2_query_buffer_count += qlen;
2754 		lock_basic_unlock(&http2_query_buffer_count_lock);
2755 		if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) {
2756 			lock_basic_lock(&http2_query_buffer_count_lock);
2757 			http2_query_buffer_count -= qlen;
2758 			lock_basic_unlock(&http2_query_buffer_count_lock);
2759 		}
2760 	}
2761 
2762 	if(!h2_stream->qbuffer ||
2763 		sldns_buffer_remaining(h2_stream->qbuffer) < len) {
2764 		verbose(VERB_ALGO, "http2 data_chunck_recv failed. Not enough "
2765 			"buffer space for POST query. Can happen on multi "
2766 			"frame requests without content-length header");
2767 		h2_stream->query_too_large = 1;
2768 		return 0;
2769 	}
2770 
2771 	sldns_buffer_write(h2_stream->qbuffer, data, len);
2772 
2773 	return 0;
2774 }
2775 
http2_req_stream_clear(struct http2_stream * h2_stream)2776 void http2_req_stream_clear(struct http2_stream* h2_stream)
2777 {
2778 	if(h2_stream->qbuffer) {
2779 		lock_basic_lock(&http2_query_buffer_count_lock);
2780 		http2_query_buffer_count -=
2781 			sldns_buffer_capacity(h2_stream->qbuffer);
2782 		lock_basic_unlock(&http2_query_buffer_count_lock);
2783 		sldns_buffer_free(h2_stream->qbuffer);
2784 		h2_stream->qbuffer = NULL;
2785 	}
2786 	if(h2_stream->rbuffer) {
2787 		lock_basic_lock(&http2_response_buffer_count_lock);
2788 		http2_response_buffer_count -=
2789 			sldns_buffer_capacity(h2_stream->rbuffer);
2790 		lock_basic_unlock(&http2_response_buffer_count_lock);
2791 		sldns_buffer_free(h2_stream->rbuffer);
2792 		h2_stream->rbuffer = NULL;
2793 	}
2794 }
2795 
http2_req_callbacks_create(void)2796 nghttp2_session_callbacks* http2_req_callbacks_create(void)
2797 {
2798 	nghttp2_session_callbacks *callbacks;
2799 	if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) {
2800 		log_err("failed to initialize nghttp2 callback");
2801 		return NULL;
2802 	}
2803 	/* reception of header block started, used to create h2_stream */
2804 	nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks,
2805 		http2_req_begin_headers_cb);
2806 	/* complete frame received, used to get data from stream if frame
2807 	 * has end stream flag, and start processing query */
2808 	nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks,
2809 		http2_req_frame_recv_cb);
2810 	/* get request info from headers */
2811 	nghttp2_session_callbacks_set_on_header_callback(callbacks,
2812 		http2_req_header_cb);
2813 	/* get data from DATA frames, containing POST query */
2814 	nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks,
2815 		http2_req_data_chunk_recv_cb);
2816 
2817 	/* generic HTTP2 callbacks */
2818 	nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb);
2819 	nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb);
2820 	nghttp2_session_callbacks_set_on_stream_close_callback(callbacks,
2821 		http2_stream_close_cb);
2822 
2823 	return callbacks;
2824 }
2825 #endif /* HAVE_NGHTTP2 */
2826