1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * Copyright (c) 2010-2011 Juniper Networks, Inc.
6 * All rights reserved.
7 *
8 * Portions of this software were developed by Robert N. M. Watson under
9 * contract to Juniper Networks, Inc.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the project nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $
36 */
37
38 /*-
39 * Copyright (c) 1982, 1986, 1991, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94
67 */
68
69 #include <sys/cdefs.h>
70 #include "opt_inet.h"
71 #include "opt_inet6.h"
72 #include "opt_ipsec.h"
73 #include "opt_pcbgroup.h"
74 #include "opt_route.h"
75 #include "opt_rss.h"
76
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/malloc.h>
80 #include <sys/mbuf.h>
81 #include <sys/domain.h>
82 #include <sys/protosw.h>
83 #include <sys/socket.h>
84 #include <sys/socketvar.h>
85 #include <sys/sockio.h>
86 #include <sys/errno.h>
87 #include <sys/time.h>
88 #include <sys/priv.h>
89 #include <sys/proc.h>
90 #include <sys/jail.h>
91
92 #include <vm/uma.h>
93
94 #include <net/if.h>
95 #include <net/if_var.h>
96 #include <net/if_llatbl.h>
97 #include <net/if_types.h>
98 #include <net/route.h>
99 #include <net/route/nhop.h>
100
101 #include <netinet/in.h>
102 #include <netinet/in_var.h>
103 #include <netinet/in_systm.h>
104 #include <netinet/tcp_var.h>
105 #include <netinet/ip6.h>
106 #include <netinet/ip_var.h>
107
108 #include <netinet6/ip6_var.h>
109 #include <netinet6/nd6.h>
110 #include <netinet/in_pcb.h>
111 #include <netinet6/in6_pcb.h>
112 #include <netinet6/in6_fib.h>
113 #include <netinet6/scope6_var.h>
114
115 int
in6_pcbbind(struct inpcb * inp,struct sockaddr * nam,struct ucred * cred)116 in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
117 struct ucred *cred)
118 {
119 struct socket *so = inp->inp_socket;
120 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL;
121 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
122 u_short lport = 0;
123 int error, lookupflags = 0;
124 int reuseport = (so->so_options & SO_REUSEPORT);
125
126 /*
127 * XXX: Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
128 * so that we don't have to add to the (already messy) code below.
129 */
130 int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
131
132 INP_WLOCK_ASSERT(inp);
133 INP_HASH_WLOCK_ASSERT(pcbinfo);
134
135 if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
136 return (EINVAL);
137 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
138 lookupflags = INPLOOKUP_WILDCARD;
139 if (nam == NULL) {
140 if ((error = prison_local_ip6(cred, &inp->in6p_laddr,
141 ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
142 return (error);
143 } else {
144 sin6 = (struct sockaddr_in6 *)nam;
145 KASSERT(sin6->sin6_family == AF_INET6,
146 ("%s: invalid address family for %p", __func__, sin6));
147 KASSERT(sin6->sin6_len == sizeof(*sin6),
148 ("%s: invalid address length for %p", __func__, sin6));
149
150 if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
151 return(error);
152
153 if ((error = prison_local_ip6(cred, &sin6->sin6_addr,
154 ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
155 return (error);
156
157 lport = sin6->sin6_port;
158 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
159 /*
160 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
161 * allow compepte duplication of binding if
162 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
163 * and a multicast address is bound on both
164 * new and duplicated sockets.
165 */
166 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
167 reuseport = SO_REUSEADDR|SO_REUSEPORT;
168 /*
169 * XXX: How to deal with SO_REUSEPORT_LB here?
170 * Treat same as SO_REUSEPORT for now.
171 */
172 if ((so->so_options &
173 (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
174 reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
175 } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
176 struct epoch_tracker et;
177 struct ifaddr *ifa;
178
179 sin6->sin6_port = 0; /* yech... */
180 NET_EPOCH_ENTER(et);
181 if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) ==
182 NULL &&
183 (inp->inp_flags & INP_BINDANY) == 0) {
184 NET_EPOCH_EXIT(et);
185 return (EADDRNOTAVAIL);
186 }
187
188 /*
189 * XXX: bind to an anycast address might accidentally
190 * cause sending a packet with anycast source address.
191 * We should allow to bind to a deprecated address, since
192 * the application dares to use it.
193 */
194 if (ifa != NULL &&
195 ((struct in6_ifaddr *)ifa)->ia6_flags &
196 (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) {
197 NET_EPOCH_EXIT(et);
198 return (EADDRNOTAVAIL);
199 }
200 NET_EPOCH_EXIT(et);
201 }
202 if (lport) {
203 struct inpcb *t;
204 struct tcptw *tw;
205
206 /* GROSS */
207 if (ntohs(lport) <= V_ipport_reservedhigh &&
208 ntohs(lport) >= V_ipport_reservedlow &&
209 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT))
210 return (EACCES);
211 if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) &&
212 priv_check_cred(inp->inp_cred, PRIV_NETINET_REUSEPORT) != 0) {
213 t = in6_pcblookup_local(pcbinfo,
214 &sin6->sin6_addr, lport,
215 INPLOOKUP_WILDCARD, cred);
216 if (t &&
217 ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
218 ((t->inp_flags & INP_TIMEWAIT) == 0) &&
219 (so->so_type != SOCK_STREAM ||
220 IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
221 (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
222 !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
223 (t->inp_flags2 & INP_REUSEPORT) ||
224 (t->inp_flags2 & INP_REUSEPORT_LB) == 0) &&
225 (inp->inp_cred->cr_uid !=
226 t->inp_cred->cr_uid))
227 return (EADDRINUSE);
228
229 /*
230 * If the socket is a BINDMULTI socket, then
231 * the credentials need to match and the
232 * original socket also has to have been bound
233 * with BINDMULTI.
234 */
235 if (t && (! in_pcbbind_check_bindmulti(inp, t)))
236 return (EADDRINUSE);
237
238 #ifdef INET
239 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
240 IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
241 struct sockaddr_in sin;
242
243 in6_sin6_2_sin(&sin, sin6);
244 t = in_pcblookup_local(pcbinfo,
245 sin.sin_addr, lport,
246 INPLOOKUP_WILDCARD, cred);
247 if (t &&
248 ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
249 ((t->inp_flags &
250 INP_TIMEWAIT) == 0) &&
251 (so->so_type != SOCK_STREAM ||
252 ntohl(t->inp_faddr.s_addr) ==
253 INADDR_ANY) &&
254 (inp->inp_cred->cr_uid !=
255 t->inp_cred->cr_uid))
256 return (EADDRINUSE);
257
258 if (t && (! in_pcbbind_check_bindmulti(inp, t)))
259 return (EADDRINUSE);
260 }
261 #endif
262 }
263 t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr,
264 lport, lookupflags, cred);
265 if (t && (t->inp_flags & INP_TIMEWAIT)) {
266 /*
267 * XXXRW: If an incpb has had its timewait
268 * state recycled, we treat the address as
269 * being in use (for now). This is better
270 * than a panic, but not desirable.
271 */
272 tw = intotw(t);
273 if (tw == NULL ||
274 ((reuseport & tw->tw_so_options) == 0 &&
275 (reuseport_lb & tw->tw_so_options) == 0))
276 return (EADDRINUSE);
277 } else if (t && (reuseport & inp_so_options(t)) == 0 &&
278 (reuseport_lb & inp_so_options(t)) == 0) {
279 return (EADDRINUSE);
280 }
281 #ifdef INET
282 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
283 IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
284 struct sockaddr_in sin;
285
286 in6_sin6_2_sin(&sin, sin6);
287 t = in_pcblookup_local(pcbinfo, sin.sin_addr,
288 lport, lookupflags, cred);
289 if (t && t->inp_flags & INP_TIMEWAIT) {
290 tw = intotw(t);
291 if (tw == NULL)
292 return (EADDRINUSE);
293 if ((reuseport & tw->tw_so_options) == 0
294 && (reuseport_lb & tw->tw_so_options) == 0
295 && (ntohl(t->inp_laddr.s_addr) !=
296 INADDR_ANY || ((inp->inp_vflag &
297 INP_IPV6PROTO) ==
298 (t->inp_vflag & INP_IPV6PROTO))))
299 return (EADDRINUSE);
300 } else if (t &&
301 (reuseport & inp_so_options(t)) == 0 &&
302 (reuseport_lb & inp_so_options(t)) == 0 &&
303 (ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
304 (t->inp_vflag & INP_IPV6PROTO) != 0)) {
305 return (EADDRINUSE);
306 }
307 }
308 #endif
309 }
310 inp->in6p_laddr = sin6->sin6_addr;
311 }
312 if (lport == 0) {
313 if ((error = in6_pcbsetport(&inp->in6p_laddr, inp, cred)) != 0) {
314 /* Undo an address bind that may have occurred. */
315 inp->in6p_laddr = in6addr_any;
316 return (error);
317 }
318 } else {
319 inp->inp_lport = lport;
320 if (in_pcbinshash(inp) != 0) {
321 inp->in6p_laddr = in6addr_any;
322 inp->inp_lport = 0;
323 return (EAGAIN);
324 }
325 }
326 return (0);
327 }
328
329 /*
330 * Transform old in6_pcbconnect() into an inner subroutine for new
331 * in6_pcbconnect(): Do some validity-checking on the remote
332 * address (in mbuf 'nam') and then determine local host address
333 * (i.e., which interface) to use to access that remote host.
334 *
335 * This preserves definition of in6_pcbconnect(), while supporting a
336 * slightly different version for T/TCP. (This is more than
337 * a bit of a kludge, but cleaning up the internal interfaces would
338 * have forced minor changes in every protocol).
339 */
340 static int
in6_pcbladdr(struct inpcb * inp,struct sockaddr_in6 * sin6,struct in6_addr * plocal_addr6)341 in6_pcbladdr(struct inpcb *inp, struct sockaddr_in6 *sin6,
342 struct in6_addr *plocal_addr6)
343 {
344 int error = 0;
345 int scope_ambiguous = 0;
346 struct in6_addr in6a;
347 struct epoch_tracker et;
348
349 INP_WLOCK_ASSERT(inp);
350 INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); /* XXXRW: why? */
351
352 if (sin6->sin6_port == 0)
353 return (EADDRNOTAVAIL);
354
355 if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
356 scope_ambiguous = 1;
357 if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
358 return(error);
359
360 if (!CK_STAILQ_EMPTY(&V_in6_ifaddrhead)) {
361 /*
362 * If the destination address is UNSPECIFIED addr,
363 * use the loopback addr, e.g ::1.
364 */
365 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
366 sin6->sin6_addr = in6addr_loopback;
367 }
368 if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0)
369 return (error);
370
371 NET_EPOCH_ENTER(et);
372 error = in6_selectsrc_socket(sin6, inp->in6p_outputopts,
373 inp, inp->inp_cred, scope_ambiguous, &in6a, NULL);
374 NET_EPOCH_EXIT(et);
375 if (error)
376 return (error);
377 if (IN6_IS_ADDR_UNSPECIFIED(&in6a))
378 return (EHOSTUNREACH);
379
380 /*
381 * Do not update this earlier, in case we return with an error.
382 *
383 * XXX: this in6_selectsrc_socket result might replace the bound local
384 * address with the address specified by setsockopt(IPV6_PKTINFO).
385 * Is it the intended behavior?
386 */
387 *plocal_addr6 = in6a;
388
389 /*
390 * Don't do pcblookup call here; return interface in
391 * plocal_addr6
392 * and exit to caller, that will do the lookup.
393 */
394
395 return (0);
396 }
397
398 /*
399 * Outer subroutine:
400 * Connect from a socket to a specified address.
401 * Both address and port must be specified in argument sin.
402 * If don't have a local address for this socket yet,
403 * then pick one.
404 */
405 int
in6_pcbconnect_mbuf(struct inpcb * inp,struct sockaddr * nam,struct ucred * cred,struct mbuf * m,bool rehash)406 in6_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
407 struct ucred *cred, struct mbuf *m, bool rehash)
408 {
409 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
410 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
411 struct sockaddr_in6 laddr6;
412 int error;
413
414 KASSERT(sin6->sin6_family == AF_INET6,
415 ("%s: invalid address family for %p", __func__, sin6));
416 KASSERT(sin6->sin6_len == sizeof(*sin6),
417 ("%s: invalid address length for %p", __func__, sin6));
418
419 bzero(&laddr6, sizeof(laddr6));
420 laddr6.sin6_family = AF_INET6;
421
422 INP_WLOCK_ASSERT(inp);
423 INP_HASH_WLOCK_ASSERT(pcbinfo);
424
425 #ifdef ROUTE_MPATH
426 if (CALC_FLOWID_OUTBOUND) {
427 uint32_t hash_type, hash_val;
428
429 hash_val = fib6_calc_software_hash(&inp->in6p_laddr,
430 &sin6->sin6_addr, 0, sin6->sin6_port,
431 inp->inp_socket->so_proto->pr_protocol, &hash_type);
432 inp->inp_flowid = hash_val;
433 inp->inp_flowtype = hash_type;
434 }
435 #endif
436 /*
437 * Call inner routine, to assign local interface address.
438 * in6_pcbladdr() may automatically fill in sin6_scope_id.
439 */
440 if ((error = in6_pcbladdr(inp, sin6, &laddr6.sin6_addr)) != 0)
441 return (error);
442
443 if (in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr,
444 sin6->sin6_port,
445 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
446 ? &laddr6.sin6_addr : &inp->in6p_laddr,
447 inp->inp_lport, 0, NULL, M_NODOM) != NULL) {
448 return (EADDRINUSE);
449 }
450 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
451 if (inp->inp_lport == 0) {
452 /*
453 * rehash was required to be true in the past for
454 * this case; retain that convention. However,
455 * we now call in_pcb_lport_dest rather than
456 * in6_pcbbind; the former does not insert into
457 * the hash table, the latter does. Change rehash
458 * to false to do the in_pcbinshash below.
459 */
460 KASSERT(rehash == true,
461 ("Rehashing required for unbound inps"));
462 rehash = false;
463 error = in_pcb_lport_dest(inp,
464 (struct sockaddr *) &laddr6, &inp->inp_lport,
465 (struct sockaddr *) sin6, sin6->sin6_port, cred,
466 INPLOOKUP_WILDCARD);
467 if (error)
468 return (error);
469 }
470 inp->in6p_laddr = laddr6.sin6_addr;
471 }
472 inp->in6p_faddr = sin6->sin6_addr;
473 inp->inp_fport = sin6->sin6_port;
474 /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
475 inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
476 if (inp->inp_flags & IN6P_AUTOFLOWLABEL)
477 inp->inp_flow |=
478 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
479
480 if (rehash) {
481 in_pcbrehash_mbuf(inp, m);
482 } else {
483 in_pcbinshash_mbuf(inp, m);
484 }
485
486 return (0);
487 }
488
489 int
in6_pcbconnect(struct inpcb * inp,struct sockaddr * nam,struct ucred * cred)490 in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
491 {
492
493 return (in6_pcbconnect_mbuf(inp, nam, cred, NULL, true));
494 }
495
496 void
in6_pcbdisconnect(struct inpcb * inp)497 in6_pcbdisconnect(struct inpcb *inp)
498 {
499
500 INP_WLOCK_ASSERT(inp);
501 INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
502
503 memset(&inp->in6p_laddr, 0, sizeof(inp->in6p_laddr));
504 memset(&inp->in6p_faddr, 0, sizeof(inp->in6p_faddr));
505 inp->inp_fport = 0;
506 /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
507 inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
508 in_pcbrehash(inp);
509 }
510
511 struct sockaddr *
in6_sockaddr(in_port_t port,struct in6_addr * addr_p)512 in6_sockaddr(in_port_t port, struct in6_addr *addr_p)
513 {
514 struct sockaddr_in6 *sin6;
515
516 sin6 = malloc(sizeof *sin6, M_SONAME, M_WAITOK);
517 bzero(sin6, sizeof *sin6);
518 sin6->sin6_family = AF_INET6;
519 sin6->sin6_len = sizeof(*sin6);
520 sin6->sin6_port = port;
521 sin6->sin6_addr = *addr_p;
522 (void)sa6_recoverscope(sin6); /* XXX: should catch errors */
523
524 return (struct sockaddr *)sin6;
525 }
526
527 struct sockaddr *
in6_v4mapsin6_sockaddr(in_port_t port,struct in_addr * addr_p)528 in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p)
529 {
530 struct sockaddr_in sin;
531 struct sockaddr_in6 *sin6_p;
532
533 bzero(&sin, sizeof sin);
534 sin.sin_family = AF_INET;
535 sin.sin_len = sizeof(sin);
536 sin.sin_port = port;
537 sin.sin_addr = *addr_p;
538
539 sin6_p = malloc(sizeof *sin6_p, M_SONAME,
540 M_WAITOK);
541 in6_sin_2_v4mapsin6(&sin, sin6_p);
542
543 return (struct sockaddr *)sin6_p;
544 }
545
546 int
in6_getsockaddr(struct socket * so,struct sockaddr ** nam)547 in6_getsockaddr(struct socket *so, struct sockaddr **nam)
548 {
549 struct inpcb *inp;
550 struct in6_addr addr;
551 in_port_t port;
552
553 inp = sotoinpcb(so);
554 KASSERT(inp != NULL, ("in6_getsockaddr: inp == NULL"));
555
556 INP_RLOCK(inp);
557 port = inp->inp_lport;
558 addr = inp->in6p_laddr;
559 INP_RUNLOCK(inp);
560
561 *nam = in6_sockaddr(port, &addr);
562 return 0;
563 }
564
565 int
in6_getpeeraddr(struct socket * so,struct sockaddr ** nam)566 in6_getpeeraddr(struct socket *so, struct sockaddr **nam)
567 {
568 struct inpcb *inp;
569 struct in6_addr addr;
570 in_port_t port;
571
572 inp = sotoinpcb(so);
573 KASSERT(inp != NULL, ("in6_getpeeraddr: inp == NULL"));
574
575 INP_RLOCK(inp);
576 port = inp->inp_fport;
577 addr = inp->in6p_faddr;
578 INP_RUNLOCK(inp);
579
580 *nam = in6_sockaddr(port, &addr);
581 return 0;
582 }
583
584 int
in6_mapped_sockaddr(struct socket * so,struct sockaddr ** nam)585 in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam)
586 {
587 struct inpcb *inp;
588 int error;
589
590 inp = sotoinpcb(so);
591 KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL"));
592
593 #ifdef INET
594 if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
595 error = in_getsockaddr(so, nam);
596 if (error == 0)
597 in6_sin_2_v4mapsin6_in_sock(nam);
598 } else
599 #endif
600 {
601 /* scope issues will be handled in in6_getsockaddr(). */
602 error = in6_getsockaddr(so, nam);
603 }
604
605 return error;
606 }
607
608 int
in6_mapped_peeraddr(struct socket * so,struct sockaddr ** nam)609 in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
610 {
611 struct inpcb *inp;
612 int error;
613
614 inp = sotoinpcb(so);
615 KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL"));
616
617 #ifdef INET
618 if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
619 error = in_getpeeraddr(so, nam);
620 if (error == 0)
621 in6_sin_2_v4mapsin6_in_sock(nam);
622 } else
623 #endif
624 /* scope issues will be handled in in6_getpeeraddr(). */
625 error = in6_getpeeraddr(so, nam);
626
627 return error;
628 }
629
630 /*
631 * Pass some notification to all connections of a protocol
632 * associated with address dst. The local address and/or port numbers
633 * may be specified to limit the search. The "usual action" will be
634 * taken, depending on the ctlinput cmd. The caller must filter any
635 * cmds that are uninteresting (e.g., no error in the map).
636 * Call the protocol specific routine (if any) to report
637 * any errors for each matching socket.
638 */
639 void
in6_pcbnotify(struct inpcbinfo * pcbinfo,struct sockaddr * dst,u_int fport_arg,const struct sockaddr * src,u_int lport_arg,int cmd,void * cmdarg,struct inpcb * (* notify)(struct inpcb *,int))640 in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst,
641 u_int fport_arg, const struct sockaddr *src, u_int lport_arg,
642 int cmd, void *cmdarg,
643 struct inpcb *(*notify)(struct inpcb *, int))
644 {
645 struct inpcb *inp, *inp_temp;
646 struct sockaddr_in6 sa6_src, *sa6_dst;
647 u_short fport = fport_arg, lport = lport_arg;
648 u_int32_t flowinfo;
649 int errno;
650
651 if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6)
652 return;
653
654 sa6_dst = (struct sockaddr_in6 *)dst;
655 if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr))
656 return;
657
658 /*
659 * note that src can be NULL when we get notify by local fragmentation.
660 */
661 sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src;
662 flowinfo = sa6_src.sin6_flowinfo;
663
664 /*
665 * Redirects go to all references to the destination,
666 * and use in6_rtchange to invalidate the route cache.
667 * Dead host indications: also use in6_rtchange to invalidate
668 * the cache, and deliver the error to all the sockets.
669 * Otherwise, if we have knowledge of the local port and address,
670 * deliver only to that socket.
671 */
672 if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
673 fport = 0;
674 lport = 0;
675 bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr));
676
677 if (cmd != PRC_HOSTDEAD)
678 notify = in6_rtchange;
679 }
680 errno = inet6ctlerrmap[cmd];
681 INP_INFO_WLOCK(pcbinfo);
682 CK_LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
683 INP_WLOCK(inp);
684 if ((inp->inp_vflag & INP_IPV6) == 0) {
685 INP_WUNLOCK(inp);
686 continue;
687 }
688
689 /*
690 * If the error designates a new path MTU for a destination
691 * and the application (associated with this socket) wanted to
692 * know the value, notify.
693 * XXX: should we avoid to notify the value to TCP sockets?
694 */
695 if (cmd == PRC_MSGSIZE && cmdarg != NULL)
696 ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst,
697 *(u_int32_t *)cmdarg);
698
699 /*
700 * Detect if we should notify the error. If no source and
701 * destination ports are specifed, but non-zero flowinfo and
702 * local address match, notify the error. This is the case
703 * when the error is delivered with an encrypted buffer
704 * by ESP. Otherwise, just compare addresses and ports
705 * as usual.
706 */
707 if (lport == 0 && fport == 0 && flowinfo &&
708 inp->inp_socket != NULL &&
709 flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) &&
710 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr))
711 goto do_notify;
712 else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
713 &sa6_dst->sin6_addr) ||
714 inp->inp_socket == 0 ||
715 (lport && inp->inp_lport != lport) ||
716 (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
717 !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
718 &sa6_src.sin6_addr)) ||
719 (fport && inp->inp_fport != fport)) {
720 INP_WUNLOCK(inp);
721 continue;
722 }
723
724 do_notify:
725 if (notify) {
726 if ((*notify)(inp, errno))
727 INP_WUNLOCK(inp);
728 } else
729 INP_WUNLOCK(inp);
730 }
731 INP_INFO_WUNLOCK(pcbinfo);
732 }
733
734 /*
735 * Lookup a PCB based on the local address and port. Caller must hold the
736 * hash lock. No inpcb locks or references are acquired.
737 */
738 struct inpcb *
in6_pcblookup_local(struct inpcbinfo * pcbinfo,struct in6_addr * laddr,u_short lport,int lookupflags,struct ucred * cred)739 in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
740 u_short lport, int lookupflags, struct ucred *cred)
741 {
742 struct inpcb *inp;
743 int matchwild = 3, wildcard;
744
745 KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
746 ("%s: invalid lookup flags %d", __func__, lookupflags));
747
748 INP_HASH_LOCK_ASSERT(pcbinfo);
749
750 if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
751 struct inpcbhead *head;
752 /*
753 * Look for an unconnected (wildcard foreign addr) PCB that
754 * matches the local address and port we're looking for.
755 */
756 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
757 INP6_PCBHASHKEY(&in6addr_any), lport, 0,
758 pcbinfo->ipi_hashmask)];
759 CK_LIST_FOREACH(inp, head, inp_hash) {
760 /* XXX inp locking */
761 if ((inp->inp_vflag & INP_IPV6) == 0)
762 continue;
763 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
764 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
765 inp->inp_lport == lport) {
766 /* Found. */
767 if (cred == NULL ||
768 prison_equal_ip6(cred->cr_prison,
769 inp->inp_cred->cr_prison))
770 return (inp);
771 }
772 }
773 /*
774 * Not found.
775 */
776 return (NULL);
777 } else {
778 struct inpcbporthead *porthash;
779 struct inpcbport *phd;
780 struct inpcb *match = NULL;
781 /*
782 * Best fit PCB lookup.
783 *
784 * First see if this local port is in use by looking on the
785 * port hash list.
786 */
787 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
788 pcbinfo->ipi_porthashmask)];
789 CK_LIST_FOREACH(phd, porthash, phd_hash) {
790 if (phd->phd_port == lport)
791 break;
792 }
793 if (phd != NULL) {
794 /*
795 * Port is in use by one or more PCBs. Look for best
796 * fit.
797 */
798 CK_LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
799 wildcard = 0;
800 if (cred != NULL &&
801 !prison_equal_ip6(cred->cr_prison,
802 inp->inp_cred->cr_prison))
803 continue;
804 /* XXX inp locking */
805 if ((inp->inp_vflag & INP_IPV6) == 0)
806 continue;
807 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))
808 wildcard++;
809 if (!IN6_IS_ADDR_UNSPECIFIED(
810 &inp->in6p_laddr)) {
811 if (IN6_IS_ADDR_UNSPECIFIED(laddr))
812 wildcard++;
813 else if (!IN6_ARE_ADDR_EQUAL(
814 &inp->in6p_laddr, laddr))
815 continue;
816 } else {
817 if (!IN6_IS_ADDR_UNSPECIFIED(laddr))
818 wildcard++;
819 }
820 if (wildcard < matchwild) {
821 match = inp;
822 matchwild = wildcard;
823 if (matchwild == 0)
824 break;
825 }
826 }
827 }
828 return (match);
829 }
830 }
831
832 void
in6_pcbpurgeif0(struct inpcbinfo * pcbinfo,struct ifnet * ifp)833 in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
834 {
835 struct inpcb *inp;
836 struct in6_multi *inm;
837 struct in6_mfilter *imf;
838 struct ip6_moptions *im6o;
839
840 INP_INFO_WLOCK(pcbinfo);
841 CK_LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
842 INP_WLOCK(inp);
843 if (__predict_false(inp->inp_flags2 & INP_FREED)) {
844 INP_WUNLOCK(inp);
845 continue;
846 }
847 im6o = inp->in6p_moptions;
848 if ((inp->inp_vflag & INP_IPV6) && im6o != NULL) {
849 /*
850 * Unselect the outgoing ifp for multicast if it
851 * is being detached.
852 */
853 if (im6o->im6o_multicast_ifp == ifp)
854 im6o->im6o_multicast_ifp = NULL;
855 /*
856 * Drop multicast group membership if we joined
857 * through the interface being detached.
858 */
859 restart:
860 IP6_MFILTER_FOREACH(imf, &im6o->im6o_head) {
861 if ((inm = imf->im6f_in6m) == NULL)
862 continue;
863 if (inm->in6m_ifp != ifp)
864 continue;
865 ip6_mfilter_remove(&im6o->im6o_head, imf);
866 IN6_MULTI_LOCK_ASSERT();
867 in6_leavegroup_locked(inm, NULL);
868 ip6_mfilter_free(imf);
869 goto restart;
870 }
871 }
872 INP_WUNLOCK(inp);
873 }
874 INP_INFO_WUNLOCK(pcbinfo);
875 }
876
877 /*
878 * Check for alternatives when higher level complains
879 * about service problems. For now, invalidate cached
880 * routing information. If the route was created dynamically
881 * (by a redirect), time to try a default gateway again.
882 */
883 void
in6_losing(struct inpcb * inp)884 in6_losing(struct inpcb *inp)
885 {
886
887 RO_INVALIDATE_CACHE(&inp->inp_route6);
888 }
889
890 /*
891 * After a routing change, flush old routing
892 * and allocate a (hopefully) better one.
893 */
894 struct inpcb *
in6_rtchange(struct inpcb * inp,int errno __unused)895 in6_rtchange(struct inpcb *inp, int errno __unused)
896 {
897
898 RO_INVALIDATE_CACHE(&inp->inp_route6);
899 return inp;
900 }
901
902 static bool
in6_pcblookup_lb_numa_match(const struct inpcblbgroup * grp,int domain)903 in6_pcblookup_lb_numa_match(const struct inpcblbgroup *grp, int domain)
904 {
905 return (domain == M_NODOM || domain == grp->il_numa_domain);
906 }
907
908 static struct inpcb *
in6_pcblookup_lbgroup(const struct inpcbinfo * pcbinfo,const struct in6_addr * laddr,uint16_t lport,const struct in6_addr * faddr,uint16_t fport,int lookupflags,uint8_t domain)909 in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
910 const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr,
911 uint16_t fport, int lookupflags, uint8_t domain)
912 {
913 const struct inpcblbgrouphead *hdr;
914 struct inpcblbgroup *grp;
915 struct inpcblbgroup *jail_exact, *jail_wild, *local_exact, *local_wild;
916
917 INP_HASH_LOCK_ASSERT(pcbinfo);
918
919 hdr = &pcbinfo->ipi_lbgrouphashbase[
920 INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
921
922 /*
923 * Search for an LB group match based on the following criteria:
924 * - prefer jailed groups to non-jailed groups
925 * - prefer exact source address matches to wildcard matches
926 * - prefer groups bound to the specified NUMA domain
927 */
928 jail_exact = jail_wild = local_exact = local_wild = NULL;
929 CK_LIST_FOREACH(grp, hdr, il_list) {
930 bool injail;
931
932 #ifdef INET
933 if (!(grp->il_vflag & INP_IPV6))
934 continue;
935 #endif
936 if (grp->il_lport != lport)
937 continue;
938
939 injail = prison_flag(grp->il_cred, PR_IP6) != 0;
940 if (injail && prison_check_ip6_locked(grp->il_cred->cr_prison,
941 laddr) != 0)
942 continue;
943
944 if (IN6_ARE_ADDR_EQUAL(&grp->il6_laddr, laddr)) {
945 if (injail) {
946 jail_exact = grp;
947 if (in6_pcblookup_lb_numa_match(grp, domain))
948 /* This is a perfect match. */
949 goto out;
950 } else if (local_exact == NULL ||
951 in6_pcblookup_lb_numa_match(grp, domain)) {
952 local_exact = grp;
953 }
954 } else if (IN6_IS_ADDR_UNSPECIFIED(&grp->il6_laddr) &&
955 (lookupflags & INPLOOKUP_WILDCARD) != 0) {
956 if (injail) {
957 if (jail_wild == NULL ||
958 in6_pcblookup_lb_numa_match(grp, domain))
959 jail_wild = grp;
960 } else if (local_wild == NULL ||
961 in6_pcblookup_lb_numa_match(grp, domain)) {
962 local_wild = grp;
963 }
964 }
965 }
966
967 if (jail_exact != NULL)
968 grp = jail_exact;
969 else if (jail_wild != NULL)
970 grp = jail_wild;
971 else if (local_exact != NULL)
972 grp = local_exact;
973 else
974 grp = local_wild;
975 if (grp == NULL)
976 return (NULL);
977 out:
978 return (grp->il_inp[INP_PCBLBGROUP_PKTHASH(INP6_PCBHASHKEY(faddr), lport, fport) %
979 grp->il_inpcnt]);
980 }
981
982 #ifdef PCBGROUP
983 /*
984 * Lookup PCB in hash list, using pcbgroup tables.
985 */
986 static struct inpcb *
in6_pcblookup_group(struct inpcbinfo * pcbinfo,struct inpcbgroup * pcbgroup,struct in6_addr * faddr,u_int fport_arg,struct in6_addr * laddr,u_int lport_arg,int lookupflags,struct ifnet * ifp)987 in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
988 struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr,
989 u_int lport_arg, int lookupflags, struct ifnet *ifp)
990 {
991 struct inpcbhead *head;
992 struct inpcb *inp, *tmpinp;
993 u_short fport = fport_arg, lport = lport_arg;
994 bool locked;
995
996 /*
997 * First look for an exact match.
998 */
999 tmpinp = NULL;
1000 INP_GROUP_LOCK(pcbgroup);
1001 head = &pcbgroup->ipg_hashbase[INP_PCBHASH(
1002 INP6_PCBHASHKEY(faddr), lport, fport, pcbgroup->ipg_hashmask)];
1003 CK_LIST_FOREACH(inp, head, inp_pcbgrouphash) {
1004 /* XXX inp locking */
1005 if ((inp->inp_vflag & INP_IPV6) == 0)
1006 continue;
1007 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
1008 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
1009 inp->inp_fport == fport &&
1010 inp->inp_lport == lport) {
1011 /*
1012 * XXX We should be able to directly return
1013 * the inp here, without any checks.
1014 * Well unless both bound with SO_REUSEPORT?
1015 */
1016 if (prison_flag(inp->inp_cred, PR_IP6))
1017 goto found;
1018 if (tmpinp == NULL)
1019 tmpinp = inp;
1020 }
1021 }
1022 if (tmpinp != NULL) {
1023 inp = tmpinp;
1024 goto found;
1025 }
1026
1027 /*
1028 * Then look for a wildcard match in the pcbgroup.
1029 */
1030 if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
1031 struct inpcb *local_wild = NULL, *local_exact = NULL;
1032 struct inpcb *jail_wild = NULL;
1033 int injail;
1034
1035 /*
1036 * Order of socket selection - we always prefer jails.
1037 * 1. jailed, non-wild.
1038 * 2. jailed, wild.
1039 * 3. non-jailed, non-wild.
1040 * 4. non-jailed, wild.
1041 */
1042 head = &pcbgroup->ipg_hashbase[
1043 INP_PCBHASH(INADDR_ANY, lport, 0, pcbgroup->ipg_hashmask)];
1044 CK_LIST_FOREACH(inp, head, inp_pcbgrouphash) {
1045 /* XXX inp locking */
1046 if ((inp->inp_vflag & INP_IPV6) == 0)
1047 continue;
1048
1049 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
1050 inp->inp_lport != lport) {
1051 continue;
1052 }
1053
1054 injail = prison_flag(inp->inp_cred, PR_IP6);
1055 if (injail) {
1056 if (prison_check_ip6(inp->inp_cred,
1057 laddr) != 0)
1058 continue;
1059 } else {
1060 if (local_exact != NULL)
1061 continue;
1062 }
1063
1064 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
1065 if (injail)
1066 goto found;
1067 else
1068 local_exact = inp;
1069 } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
1070 if (injail)
1071 jail_wild = inp;
1072 else
1073 local_wild = inp;
1074 }
1075 } /* LIST_FOREACH */
1076
1077 inp = jail_wild;
1078 if (inp == NULL)
1079 inp = jail_wild;
1080 if (inp == NULL)
1081 inp = local_exact;
1082 if (inp == NULL)
1083 inp = local_wild;
1084 if (inp != NULL)
1085 goto found;
1086 }
1087
1088 /*
1089 * Then look for a wildcard match, if requested.
1090 */
1091 if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
1092 struct inpcb *local_wild = NULL, *local_exact = NULL;
1093 struct inpcb *jail_wild = NULL;
1094 int injail;
1095
1096 /*
1097 * Order of socket selection - we always prefer jails.
1098 * 1. jailed, non-wild.
1099 * 2. jailed, wild.
1100 * 3. non-jailed, non-wild.
1101 * 4. non-jailed, wild.
1102 */
1103 head = &pcbinfo->ipi_wildbase[INP_PCBHASH(
1104 INP6_PCBHASHKEY(&in6addr_any), lport, 0,
1105 pcbinfo->ipi_wildmask)];
1106 CK_LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
1107 /* XXX inp locking */
1108 if ((inp->inp_vflag & INP_IPV6) == 0)
1109 continue;
1110
1111 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
1112 inp->inp_lport != lport) {
1113 continue;
1114 }
1115
1116 injail = prison_flag(inp->inp_cred, PR_IP6);
1117 if (injail) {
1118 if (prison_check_ip6(inp->inp_cred,
1119 laddr) != 0)
1120 continue;
1121 } else {
1122 if (local_exact != NULL)
1123 continue;
1124 }
1125
1126 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
1127 if (injail)
1128 goto found;
1129 else
1130 local_exact = inp;
1131 } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
1132 if (injail)
1133 jail_wild = inp;
1134 else
1135 local_wild = inp;
1136 }
1137 } /* LIST_FOREACH */
1138
1139 inp = jail_wild;
1140 if (inp == NULL)
1141 inp = jail_wild;
1142 if (inp == NULL)
1143 inp = local_exact;
1144 if (inp == NULL)
1145 inp = local_wild;
1146 if (inp != NULL)
1147 goto found;
1148 } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
1149 INP_GROUP_UNLOCK(pcbgroup);
1150 return (NULL);
1151
1152 found:
1153 if (lookupflags & INPLOOKUP_WLOCKPCB)
1154 locked = INP_TRY_WLOCK(inp);
1155 else if (lookupflags & INPLOOKUP_RLOCKPCB)
1156 locked = INP_TRY_RLOCK(inp);
1157 else
1158 panic("%s: locking buf", __func__);
1159 if (!locked)
1160 in_pcbref(inp);
1161 INP_GROUP_UNLOCK(pcbgroup);
1162 if (!locked) {
1163 if (lookupflags & INPLOOKUP_WLOCKPCB) {
1164 INP_WLOCK(inp);
1165 if (in_pcbrele_wlocked(inp))
1166 return (NULL);
1167 } else {
1168 INP_RLOCK(inp);
1169 if (in_pcbrele_rlocked(inp))
1170 return (NULL);
1171 }
1172 }
1173 #ifdef INVARIANTS
1174 if (lookupflags & INPLOOKUP_WLOCKPCB)
1175 INP_WLOCK_ASSERT(inp);
1176 else
1177 INP_RLOCK_ASSERT(inp);
1178 #endif
1179 return (inp);
1180 }
1181 #endif /* PCBGROUP */
1182
1183 /*
1184 * Lookup PCB in hash list. Used in in_pcb.c as well as here.
1185 */
1186 struct inpcb *
in6_pcblookup_hash_locked(struct inpcbinfo * pcbinfo,struct in6_addr * faddr,u_int fport_arg,struct in6_addr * laddr,u_int lport_arg,int lookupflags,struct ifnet * ifp,uint8_t numa_domain)1187 in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
1188 u_int fport_arg, struct in6_addr *laddr, u_int lport_arg,
1189 int lookupflags, struct ifnet *ifp, uint8_t numa_domain)
1190 {
1191 struct inpcbhead *head;
1192 struct inpcb *inp, *tmpinp;
1193 u_short fport = fport_arg, lport = lport_arg;
1194
1195 KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
1196 ("%s: invalid lookup flags %d", __func__, lookupflags));
1197
1198 INP_HASH_LOCK_ASSERT(pcbinfo);
1199
1200 /*
1201 * First look for an exact match.
1202 */
1203 tmpinp = NULL;
1204 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
1205 INP6_PCBHASHKEY(faddr), lport, fport, pcbinfo->ipi_hashmask)];
1206 CK_LIST_FOREACH(inp, head, inp_hash) {
1207 /* XXX inp locking */
1208 if ((inp->inp_vflag & INP_IPV6) == 0)
1209 continue;
1210 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
1211 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
1212 inp->inp_fport == fport &&
1213 inp->inp_lport == lport) {
1214 /*
1215 * XXX We should be able to directly return
1216 * the inp here, without any checks.
1217 * Well unless both bound with SO_REUSEPORT?
1218 */
1219 if (prison_flag(inp->inp_cred, PR_IP6))
1220 return (inp);
1221 if (tmpinp == NULL)
1222 tmpinp = inp;
1223 }
1224 }
1225 if (tmpinp != NULL)
1226 return (tmpinp);
1227
1228 /*
1229 * Then look for a wildcard match, if requested.
1230 */
1231 if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
1232 struct inpcb *local_wild = NULL, *local_exact = NULL;
1233 struct inpcb *jail_wild = NULL;
1234 int injail;
1235
1236 /*
1237 * First see if an LB group matches the request before scanning
1238 * all sockets on this port.
1239 */
1240 inp = in6_pcblookup_lbgroup(pcbinfo, laddr, lport, faddr,
1241 fport, lookupflags, numa_domain);
1242 if (inp != NULL)
1243 return (inp);
1244
1245 /*
1246 * Order of socket selection - we always prefer jails.
1247 * 1. jailed, non-wild.
1248 * 2. jailed, wild.
1249 * 3. non-jailed, non-wild.
1250 * 4. non-jailed, wild.
1251 */
1252 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
1253 INP6_PCBHASHKEY(&in6addr_any), lport, 0,
1254 pcbinfo->ipi_hashmask)];
1255 CK_LIST_FOREACH(inp, head, inp_hash) {
1256 /* XXX inp locking */
1257 if ((inp->inp_vflag & INP_IPV6) == 0)
1258 continue;
1259
1260 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
1261 inp->inp_lport != lport) {
1262 continue;
1263 }
1264
1265 injail = prison_flag(inp->inp_cred, PR_IP6);
1266 if (injail) {
1267 if (prison_check_ip6(inp->inp_cred,
1268 laddr) != 0)
1269 continue;
1270 } else {
1271 if (local_exact != NULL)
1272 continue;
1273 }
1274
1275 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
1276 if (injail)
1277 return (inp);
1278 else
1279 local_exact = inp;
1280 } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
1281 if (injail)
1282 jail_wild = inp;
1283 else
1284 local_wild = inp;
1285 }
1286 } /* LIST_FOREACH */
1287
1288 if (jail_wild != NULL)
1289 return (jail_wild);
1290 if (local_exact != NULL)
1291 return (local_exact);
1292 if (local_wild != NULL)
1293 return (local_wild);
1294 } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
1295
1296 /*
1297 * Not found.
1298 */
1299 return (NULL);
1300 }
1301
1302 /*
1303 * Lookup PCB in hash list, using pcbinfo tables. This variation locks the
1304 * hash list lock, and will return the inpcb locked (i.e., requires
1305 * INPLOOKUP_LOCKPCB).
1306 */
1307 static struct inpcb *
in6_pcblookup_hash(struct inpcbinfo * pcbinfo,struct in6_addr * faddr,u_int fport,struct in6_addr * laddr,u_int lport,int lookupflags,struct ifnet * ifp,uint8_t numa_domain)1308 in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
1309 u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
1310 struct ifnet *ifp, uint8_t numa_domain)
1311 {
1312 struct inpcb *inp;
1313
1314 inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
1315 (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp,
1316 numa_domain);
1317 if (inp != NULL) {
1318 if (lookupflags & INPLOOKUP_WLOCKPCB) {
1319 INP_WLOCK(inp);
1320 if (__predict_false(inp->inp_flags2 & INP_FREED)) {
1321 INP_WUNLOCK(inp);
1322 inp = NULL;
1323 }
1324 } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
1325 INP_RLOCK(inp);
1326 if (__predict_false(inp->inp_flags2 & INP_FREED)) {
1327 INP_RUNLOCK(inp);
1328 inp = NULL;
1329 }
1330 } else
1331 panic("%s: locking bug", __func__);
1332 #ifdef INVARIANTS
1333 if (inp != NULL) {
1334 if (lookupflags & INPLOOKUP_WLOCKPCB)
1335 INP_WLOCK_ASSERT(inp);
1336 else
1337 INP_RLOCK_ASSERT(inp);
1338 }
1339 #endif
1340 }
1341 return (inp);
1342 }
1343
1344 /*
1345 * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
1346 * from which a pre-calculated hash value may be extracted.
1347 *
1348 * Possibly more of this logic should be in in6_pcbgroup.c.
1349 */
1350 struct inpcb *
in6_pcblookup(struct inpcbinfo * pcbinfo,struct in6_addr * faddr,u_int fport,struct in6_addr * laddr,u_int lport,int lookupflags,struct ifnet * ifp)1351 in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport,
1352 struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp)
1353 {
1354 #if defined(PCBGROUP) && !defined(RSS)
1355 struct inpcbgroup *pcbgroup;
1356 #endif
1357
1358 KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
1359 ("%s: invalid lookup flags %d", __func__, lookupflags));
1360 KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
1361 ("%s: LOCKPCB not set", __func__));
1362
1363 /*
1364 * When not using RSS, use connection groups in preference to the
1365 * reservation table when looking up 4-tuples. When using RSS, just
1366 * use the reservation table, due to the cost of the Toeplitz hash
1367 * in software.
1368 *
1369 * XXXRW: This policy belongs in the pcbgroup code, as in principle
1370 * we could be doing RSS with a non-Toeplitz hash that is affordable
1371 * in software.
1372 */
1373 #if defined(PCBGROUP) && !defined(RSS)
1374 if (in_pcbgroup_enabled(pcbinfo)) {
1375 pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
1376 fport);
1377 return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
1378 laddr, lport, lookupflags, ifp));
1379 }
1380 #endif
1381 return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
1382 lookupflags, ifp, M_NODOM));
1383 }
1384
1385 struct inpcb *
in6_pcblookup_mbuf(struct inpcbinfo * pcbinfo,struct in6_addr * faddr,u_int fport,struct in6_addr * laddr,u_int lport,int lookupflags,struct ifnet * ifp,struct mbuf * m)1386 in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
1387 u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
1388 struct ifnet *ifp, struct mbuf *m)
1389 {
1390 #ifdef PCBGROUP
1391 struct inpcbgroup *pcbgroup;
1392 #endif
1393
1394 KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
1395 ("%s: invalid lookup flags %d", __func__, lookupflags));
1396 KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
1397 ("%s: LOCKPCB not set", __func__));
1398
1399 #ifdef PCBGROUP
1400 /*
1401 * If we can use a hardware-generated hash to look up the connection
1402 * group, use that connection group to find the inpcb. Otherwise
1403 * fall back on a software hash -- or the reservation table if we're
1404 * using RSS.
1405 *
1406 * XXXRW: As above, that policy belongs in the pcbgroup code.
1407 */
1408 if (in_pcbgroup_enabled(pcbinfo) &&
1409 M_HASHTYPE_TEST(m, M_HASHTYPE_NONE) == 0) {
1410 pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
1411 m->m_pkthdr.flowid);
1412 if (pcbgroup != NULL)
1413 return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr,
1414 fport, laddr, lport, lookupflags, ifp));
1415 #ifndef RSS
1416 pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
1417 fport);
1418 return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
1419 laddr, lport, lookupflags, ifp));
1420 #endif
1421 }
1422 #endif
1423 return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
1424 lookupflags, ifp, m->m_pkthdr.numa_domain));
1425 }
1426
1427 void
init_sin6(struct sockaddr_in6 * sin6,struct mbuf * m,int srcordst)1428 init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m, int srcordst)
1429 {
1430 struct ip6_hdr *ip;
1431
1432 ip = mtod(m, struct ip6_hdr *);
1433 bzero(sin6, sizeof(*sin6));
1434 sin6->sin6_len = sizeof(*sin6);
1435 sin6->sin6_family = AF_INET6;
1436 sin6->sin6_addr = srcordst ? ip->ip6_dst : ip->ip6_src;
1437
1438 (void)sa6_recoverscope(sin6); /* XXX: should catch errors... */
1439
1440 return;
1441 }
1442