1 /* $OpenBSD: in6_pcb.c,v 1.146 2024/12/21 00:10:04 mvs Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
34 *
35 * NRL grants permission for redistribution and use in source and binary
36 * forms, with or without modification, of the software and documentation
37 * created at NRL provided that the following conditions are met:
38 *
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgements:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * This product includes software developed at the Information
49 * Technology Division, US Naval Research Laboratory.
50 * 4. Neither the name of the NRL nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
55 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
56 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
57 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
58 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
59 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
60 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
61 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
62 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
63 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
64 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65 *
66 * The views and conclusions contained in the software and documentation
67 * are those of the authors and should not be interpreted as representing
68 * official policies, either expressed or implied, of the US Naval
69 * Research Laboratory (NRL).
70 */
71
72 /*
73 * Copyright (c) 1982, 1986, 1990, 1993, 1995
74 * Regents of the University of California. All rights reserved.
75 *
76 * Redistribution and use in source and binary forms, with or without
77 * modification, are permitted provided that the following conditions
78 * are met:
79 * 1. Redistributions of source code must retain the above copyright
80 * notice, this list of conditions and the following disclaimer.
81 * 2. Redistributions in binary form must reproduce the above copyright
82 * notice, this list of conditions and the following disclaimer in the
83 * documentation and/or other materials provided with the distribution.
84 * 3. Neither the name of the University nor the names of its contributors
85 * may be used to endorse or promote products derived from this software
86 * without specific prior written permission.
87 *
88 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
89 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
90 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
91 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
92 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
93 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
94 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
95 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
96 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
97 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
98 * SUCH DAMAGE.
99 *
100 */
101
102 #include "pf.h"
103 #include "stoeplitz.h"
104
105 #include <sys/param.h>
106 #include <sys/systm.h>
107 #include <sys/mbuf.h>
108 #include <sys/protosw.h>
109 #include <sys/socket.h>
110 #include <sys/socketvar.h>
111
112 #include <net/if.h>
113 #include <net/if_var.h>
114 #include <net/pfvar.h>
115
116 #include <netinet/in.h>
117 #include <netinet6/in6_var.h>
118 #include <netinet/ip.h>
119 #include <netinet/ip_var.h>
120 #include <netinet6/ip6_var.h>
121 #include <netinet/in_pcb.h>
122
123 #if NSTOEPLITZ > 0
124 #include <net/toeplitz.h>
125 #endif
126
127 const struct in6_addr zeroin6_addr;
128
129 struct inpcb *in6_pcbhash_lookup(struct inpcbtable *, uint64_t, u_int,
130 const struct in6_addr *, u_short, const struct in6_addr *, u_short);
131
132 struct inpcb * in6_pcblookup_lock(struct inpcbtable *, const struct in6_addr *,
133 u_int, const struct in6_addr *, u_int, u_int, int);
134
135 uint64_t
in6_pcbhash(struct inpcbtable * table,u_int rdomain,const struct in6_addr * faddr,u_short fport,const struct in6_addr * laddr,u_short lport)136 in6_pcbhash(struct inpcbtable *table, u_int rdomain,
137 const struct in6_addr *faddr, u_short fport,
138 const struct in6_addr *laddr, u_short lport)
139 {
140 SIPHASH_CTX ctx;
141 u_int32_t nrdom = htonl(rdomain);
142
143 SipHash24_Init(&ctx, &table->inpt_key);
144 SipHash24_Update(&ctx, &nrdom, sizeof(nrdom));
145 SipHash24_Update(&ctx, faddr, sizeof(*faddr));
146 SipHash24_Update(&ctx, &fport, sizeof(fport));
147 SipHash24_Update(&ctx, laddr, sizeof(*laddr));
148 SipHash24_Update(&ctx, &lport, sizeof(lport));
149 return SipHash24_End(&ctx);
150 }
151
152 int
in6_pcbaddrisavail_lock(const struct inpcb * inp,struct sockaddr_in6 * sin6,int wild,struct proc * p,int lock)153 in6_pcbaddrisavail_lock(const struct inpcb *inp, struct sockaddr_in6 *sin6,
154 int wild, struct proc *p, int lock)
155 {
156 struct socket *so = inp->inp_socket;
157 struct inpcbtable *table = inp->inp_table;
158 u_short lport = sin6->sin6_port;
159 int reuseport = (so->so_options & SO_REUSEPORT);
160
161 wild |= INPLOOKUP_IPV6;
162 /* KAME hack: embed scopeid */
163 if (in6_embedscope(&sin6->sin6_addr, sin6,
164 inp->inp_outputopts6, inp->inp_moptions6) != 0)
165 return (EINVAL);
166 /* this must be cleared for ifa_ifwithaddr() */
167 sin6->sin6_scope_id = 0;
168 /* reject IPv4 mapped address, we have no support for it */
169 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
170 return (EADDRNOTAVAIL);
171
172 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
173 /*
174 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
175 * allow complete duplication of binding if
176 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
177 * and a multicast address is bound on both
178 * new and duplicated sockets.
179 */
180 if (so->so_options & (SO_REUSEADDR|SO_REUSEPORT))
181 reuseport = SO_REUSEADDR | SO_REUSEPORT;
182 } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
183 struct ifaddr *ifa = NULL;
184
185 sin6->sin6_port = 0; /*
186 * Yechhhh, because of upcoming
187 * call to ifa_ifwithaddr(), which
188 * does bcmp's over the PORTS as
189 * well. (What about flow?)
190 */
191 sin6->sin6_flowinfo = 0;
192 if (!(so->so_options & SO_BINDANY) &&
193 (ifa = ifa_ifwithaddr(sin6tosa(sin6),
194 inp->inp_rtableid)) == NULL)
195 return (EADDRNOTAVAIL);
196 sin6->sin6_port = lport;
197
198 /*
199 * bind to an anycast address might accidentally
200 * cause sending a packet with an anycast source
201 * address, so we forbid it.
202 *
203 * We should allow to bind to a deprecated address,
204 * since the application dare to use it.
205 * But, can we assume that they are careful enough
206 * to check if the address is deprecated or not?
207 * Maybe, as a safeguard, we should have a setsockopt
208 * flag to control the bind(2) behavior against
209 * deprecated addresses (default: forbid bind(2)).
210 */
211 if (ifa && ifatoia6(ifa)->ia6_flags & (IN6_IFF_ANYCAST|
212 IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED|IN6_IFF_DETACHED))
213 return (EADDRNOTAVAIL);
214 }
215 if (lport) {
216 struct inpcb *t;
217 int error = 0;
218
219 if (so->so_euid && !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
220 t = in_pcblookup_local_lock(table, &sin6->sin6_addr,
221 lport, INPLOOKUP_WILDCARD | INPLOOKUP_IPV6,
222 inp->inp_rtableid, lock);
223 if (t && (so->so_euid != t->inp_socket->so_euid))
224 error = EADDRINUSE;
225 if (lock == IN_PCBLOCK_GRAB)
226 in_pcbunref(t);
227 if (error)
228 return (error);
229 }
230 t = in_pcblookup_local_lock(table, &sin6->sin6_addr, lport,
231 wild, inp->inp_rtableid, lock);
232 if (t && (reuseport & t->inp_socket->so_options) == 0)
233 error = EADDRINUSE;
234 if (lock == IN_PCBLOCK_GRAB)
235 in_pcbunref(t);
236 if (error)
237 return (error);
238 }
239 return (0);
240 }
241
242 int
in6_pcbaddrisavail(const struct inpcb * inp,struct sockaddr_in6 * sin6,int wild,struct proc * p)243 in6_pcbaddrisavail(const struct inpcb *inp, struct sockaddr_in6 *sin6,
244 int wild, struct proc *p)
245 {
246 return in6_pcbaddrisavail_lock(inp, sin6, wild, p, IN_PCBLOCK_GRAB);
247 }
248
249 /*
250 * Connect from a socket to a specified address.
251 * Both address and port must be specified in argument sin6.
252 * Eventually, flow labels will have to be dealt with here, as well.
253 *
254 * If don't have a local address for this socket yet,
255 * then pick one.
256 */
257 int
in6_pcbconnect(struct inpcb * inp,struct mbuf * nam)258 in6_pcbconnect(struct inpcb *inp, struct mbuf *nam)
259 {
260 struct inpcbtable *table = inp->inp_table;
261 const struct in6_addr *in6a;
262 struct sockaddr_in6 *sin6;
263 struct inpcb *t;
264 int error;
265 struct sockaddr_in6 tmp;
266
267 KASSERT(ISSET(inp->inp_flags, INP_IPV6));
268
269 if ((error = in6_nam2sin6(nam, &sin6)))
270 return (error);
271 if (sin6->sin6_port == 0)
272 return (EADDRNOTAVAIL);
273 /* reject IPv4 mapped address, we have no support for it */
274 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
275 return (EADDRNOTAVAIL);
276
277 /* protect *sin6 from overwrites */
278 tmp = *sin6;
279 sin6 = &tmp;
280
281 /* KAME hack: embed scopeid */
282 if (in6_embedscope(&sin6->sin6_addr, sin6,
283 inp->inp_outputopts6, inp->inp_moptions6) != 0)
284 return (EINVAL);
285 /* this must be cleared for ifa_ifwithaddr() */
286 sin6->sin6_scope_id = 0;
287
288 /* Source address selection. */
289 /*
290 * XXX: in6_selectsrc might replace the bound local address
291 * with the address specified by setsockopt(IPV6_PKTINFO).
292 * Is it the intended behavior?
293 */
294 error = in6_pcbselsrc(&in6a, sin6, inp, inp->inp_outputopts6);
295 if (error)
296 return (error);
297
298 inp->inp_ipv6.ip6_hlim = (u_int8_t)in6_selecthlim(inp);
299
300 /* keep lookup, modification, and rehash in sync */
301 mtx_enter(&table->inpt_mtx);
302
303 t = in6_pcblookup_lock(inp->inp_table, &sin6->sin6_addr,
304 sin6->sin6_port,
305 IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6) ? in6a : &inp->inp_laddr6,
306 inp->inp_lport, inp->inp_rtableid, IN_PCBLOCK_HOLD);
307 if (t != NULL) {
308 mtx_leave(&table->inpt_mtx);
309 return (EADDRINUSE);
310 }
311
312 KASSERT(IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6) || inp->inp_lport);
313
314 if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
315 if (inp->inp_lport == 0) {
316 error = in_pcbbind_locked(inp, NULL, in6a, curproc);
317 if (error) {
318 mtx_leave(&table->inpt_mtx);
319 return (error);
320 }
321 t = in6_pcblookup_lock(inp->inp_table, &sin6->sin6_addr,
322 sin6->sin6_port, in6a, inp->inp_lport,
323 inp->inp_rtableid, IN_PCBLOCK_HOLD);
324 if (t != NULL) {
325 inp->inp_lport = 0;
326 mtx_leave(&table->inpt_mtx);
327 return (EADDRINUSE);
328 }
329 }
330 inp->inp_laddr6 = *in6a;
331 }
332 inp->inp_faddr6 = sin6->sin6_addr;
333 inp->inp_fport = sin6->sin6_port;
334 in_pcbrehash(inp);
335
336 mtx_leave(&table->inpt_mtx);
337
338 inp->inp_flowinfo &= ~IPV6_FLOWLABEL_MASK;
339 if (ip6_auto_flowlabel)
340 inp->inp_flowinfo |=
341 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
342 #if NSTOEPLITZ > 0
343 inp->inp_flowid = stoeplitz_ip6port(&inp->inp_faddr6,
344 &inp->inp_laddr6, inp->inp_fport, inp->inp_lport);
345 #endif
346 return (0);
347 }
348
349 /*
350 * Get the local address/port, and put it in a sockaddr_in6.
351 * This services the getsockname(2) call.
352 */
353 void
in6_setsockaddr(struct inpcb * inp,struct mbuf * nam)354 in6_setsockaddr(struct inpcb *inp, struct mbuf *nam)
355 {
356 struct sockaddr_in6 *sin6;
357
358 nam->m_len = sizeof(struct sockaddr_in6);
359 sin6 = mtod(nam,struct sockaddr_in6 *);
360
361 bzero ((caddr_t)sin6,sizeof(struct sockaddr_in6));
362 sin6->sin6_family = AF_INET6;
363 sin6->sin6_len = sizeof(struct sockaddr_in6);
364 sin6->sin6_port = inp->inp_lport;
365 sin6->sin6_addr = inp->inp_laddr6;
366 /* KAME hack: recover scopeid */
367 in6_recoverscope(sin6, &inp->inp_laddr6);
368 }
369
370 /*
371 * Get the foreign address/port, and put it in a sockaddr_in6.
372 * This services the getpeername(2) call.
373 */
374 void
in6_setpeeraddr(struct inpcb * inp,struct mbuf * nam)375 in6_setpeeraddr(struct inpcb *inp, struct mbuf *nam)
376 {
377 struct sockaddr_in6 *sin6;
378
379 nam->m_len = sizeof(struct sockaddr_in6);
380 sin6 = mtod(nam,struct sockaddr_in6 *);
381
382 bzero ((caddr_t)sin6,sizeof(struct sockaddr_in6));
383 sin6->sin6_family = AF_INET6;
384 sin6->sin6_len = sizeof(struct sockaddr_in6);
385 sin6->sin6_port = inp->inp_fport;
386 sin6->sin6_addr = inp->inp_faddr6;
387 /* KAME hack: recover scopeid */
388 in6_recoverscope(sin6, &inp->inp_faddr6);
389 }
390
391 int
in6_sockaddr(struct socket * so,struct mbuf * nam)392 in6_sockaddr(struct socket *so, struct mbuf *nam)
393 {
394 struct inpcb *inp;
395
396 inp = sotoinpcb(so);
397 in6_setsockaddr(inp, nam);
398
399 return (0);
400 }
401
402 int
in6_peeraddr(struct socket * so,struct mbuf * nam)403 in6_peeraddr(struct socket *so, struct mbuf *nam)
404 {
405 struct inpcb *inp;
406
407 inp = sotoinpcb(so);
408 in6_setpeeraddr(inp, nam);
409
410 return (0);
411 }
412
413 /*
414 * Pass some notification to all connections of a protocol
415 * associated with address dst. The local address and/or port numbers
416 * may be specified to limit the search. The "usual action" will be
417 * taken, depending on the ctlinput cmd. The caller must filter any
418 * cmds that are uninteresting (e.g., no error in the map).
419 * Call the protocol specific routine (if any) to report
420 * any errors for each matching socket.
421 *
422 * Also perform input-side security policy check
423 * once PCB to be notified has been located.
424 */
425 void
in6_pcbnotify(struct inpcbtable * table,const struct sockaddr_in6 * dst,uint fport_arg,const struct sockaddr_in6 * src,uint lport_arg,u_int rtable,int cmd,void * cmdarg,void (* notify)(struct inpcb *,int))426 in6_pcbnotify(struct inpcbtable *table, const struct sockaddr_in6 *dst,
427 uint fport_arg, const struct sockaddr_in6 *src, uint lport_arg,
428 u_int rtable, int cmd, void *cmdarg, void (*notify)(struct inpcb *, int))
429 {
430 struct inpcb_iterator iter = { .inp_table = NULL };
431 struct inpcb *inp = NULL;
432 u_short fport = fport_arg, lport = lport_arg;
433 struct sockaddr_in6 sa6_src;
434 int errno;
435 u_int32_t flowinfo;
436 u_int rdomain;
437
438 if ((unsigned)cmd >= PRC_NCMDS)
439 return;
440
441 if (IN6_IS_ADDR_UNSPECIFIED(&dst->sin6_addr))
442 return;
443 if (IN6_IS_ADDR_V4MAPPED(&dst->sin6_addr)) {
444 #ifdef DIAGNOSTIC
445 printf("%s: Huh? Thought we never got "
446 "called with mapped!\n", __func__);
447 #endif
448 return;
449 }
450
451 /*
452 * note that src can be NULL when we get notify by local fragmentation.
453 */
454 sa6_src = (src == NULL) ? sa6_any : *src;
455 flowinfo = sa6_src.sin6_flowinfo;
456
457 /*
458 * Redirects go to all references to the destination,
459 * and use in_rtchange to invalidate the route cache.
460 * Dead host indications: also use in_rtchange to invalidate
461 * the cache, and deliver the error to all the sockets.
462 * Otherwise, if we have knowledge of the local port and address,
463 * deliver only to that socket.
464 */
465 if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
466 fport = 0;
467 lport = 0;
468 sa6_src.sin6_addr = in6addr_any;
469
470 if (cmd != PRC_HOSTDEAD)
471 notify = in_rtchange;
472 }
473 errno = inet6ctlerrmap[cmd];
474 if (notify == NULL)
475 return;
476
477 rdomain = rtable_l2(rtable);
478 mtx_enter(&table->inpt_mtx);
479 while ((inp = in_pcb_iterator(table, inp, &iter)) != NULL) {
480 KASSERT(ISSET(inp->inp_flags, INP_IPV6));
481
482 /*
483 * Under the following condition, notify of redirects
484 * to the pcb, without making address matches against inpcb.
485 * - redirect notification is arrived.
486 * - the inpcb is unconnected.
487 * - the inpcb is caching !RTF_HOST routing entry.
488 * - the ICMPv6 notification is from the gateway cached in the
489 * inpcb. i.e. ICMPv6 notification is from nexthop gateway
490 * the inpcb used very recently.
491 *
492 * This is to improve interaction between netbsd/openbsd
493 * redirect handling code, and inpcb route cache code.
494 * without the clause, !RTF_HOST routing entry (which carries
495 * gateway used by inpcb right before the ICMPv6 redirect)
496 * will be cached forever in unconnected inpcb.
497 *
498 * There still is a question regarding to what is TRT:
499 * - On bsdi/freebsd, RTF_HOST (cloned) routing entry will be
500 * generated on packet output. inpcb will always cache
501 * RTF_HOST routing entry so there's no need for the clause
502 * (ICMPv6 redirect will update RTF_HOST routing entry,
503 * and inpcb is caching it already).
504 * However, bsdi/freebsd are vulnerable to local DoS attacks
505 * due to the cloned routing entries.
506 * - Specwise, "destination cache" is mentioned in RFC2461.
507 * Jinmei says that it implies bsdi/freebsd behavior, itojun
508 * is not really convinced.
509 * - Having hiwat/lowat on # of cloned host route (redirect/
510 * pmtud) may be a good idea. netbsd/openbsd has it. see
511 * icmp6_mtudisc_update().
512 */
513 if ((PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) &&
514 IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6) &&
515 inp->inp_route.ro_rt &&
516 !(inp->inp_route.ro_rt->rt_flags & RTF_HOST) &&
517 IN6_ARE_ADDR_EQUAL(&inp->inp_route.ro_dstsin6.sin6_addr,
518 &dst->sin6_addr)) {
519 goto do_notify;
520 }
521
522 /*
523 * Detect if we should notify the error. If no source and
524 * destination ports are specified, but non-zero flowinfo and
525 * local address match, notify the error. This is the case
526 * when the error is delivered with an encrypted buffer
527 * by ESP. Otherwise, just compare addresses and ports
528 * as usual.
529 */
530 if (lport == 0 && fport == 0 && flowinfo &&
531 flowinfo == (inp->inp_flowinfo & IPV6_FLOWLABEL_MASK) &&
532 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, &sa6_src.sin6_addr))
533 goto do_notify;
534 else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6,
535 &dst->sin6_addr) ||
536 rtable_l2(inp->inp_rtableid) != rdomain ||
537 (lport && inp->inp_lport != lport) ||
538 (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
539 !IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6,
540 &sa6_src.sin6_addr)) ||
541 (fport && inp->inp_fport != fport)) {
542 continue;
543 }
544 do_notify:
545 mtx_leave(&table->inpt_mtx);
546 (*notify)(inp, errno);
547 mtx_enter(&table->inpt_mtx);
548 }
549 mtx_leave(&table->inpt_mtx);
550 }
551
552 struct rtentry *
in6_pcbrtentry(struct inpcb * inp)553 in6_pcbrtentry(struct inpcb *inp)
554 {
555 if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
556 return (NULL);
557 return (route6_mpath(&inp->inp_route, &inp->inp_faddr6,
558 &inp->inp_laddr6, inp->inp_rtableid));
559 }
560
561 struct inpcb *
in6_pcbhash_lookup(struct inpcbtable * table,uint64_t hash,u_int rdomain,const struct in6_addr * faddr,u_short fport,const struct in6_addr * laddr,u_short lport)562 in6_pcbhash_lookup(struct inpcbtable *table, uint64_t hash, u_int rdomain,
563 const struct in6_addr *faddr, u_short fport,
564 const struct in6_addr *laddr, u_short lport)
565 {
566 struct inpcbhead *head;
567 struct inpcb *inp;
568
569 NET_ASSERT_LOCKED();
570 MUTEX_ASSERT_LOCKED(&table->inpt_mtx);
571
572 head = &table->inpt_hashtbl[hash & table->inpt_mask];
573 LIST_FOREACH(inp, head, inp_hash) {
574 KASSERT(ISSET(inp->inp_flags, INP_IPV6));
575
576 if (inp->inp_fport == fport && inp->inp_lport == lport &&
577 IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) &&
578 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr) &&
579 rtable_l2(inp->inp_rtableid) == rdomain) {
580 break;
581 }
582 }
583 if (inp != NULL) {
584 /*
585 * Move this PCB to the head of hash chain so that
586 * repeated accesses are quicker. This is analogous to
587 * the historic single-entry PCB cache.
588 */
589 if (inp != LIST_FIRST(head)) {
590 LIST_REMOVE(inp, inp_hash);
591 LIST_INSERT_HEAD(head, inp, inp_hash);
592 }
593 }
594 return (inp);
595 }
596
597 struct inpcb *
in6_pcblookup_lock(struct inpcbtable * table,const struct in6_addr * faddr,u_int fport,const struct in6_addr * laddr,u_int lport,u_int rtable,int lock)598 in6_pcblookup_lock(struct inpcbtable *table, const struct in6_addr *faddr,
599 u_int fport, const struct in6_addr *laddr, u_int lport, u_int rtable,
600 int lock)
601 {
602 struct inpcb *inp;
603 uint64_t hash;
604 u_int rdomain;
605
606 rdomain = rtable_l2(rtable);
607 hash = in6_pcbhash(table, rdomain, faddr, fport, laddr, lport);
608
609 if (lock == IN_PCBLOCK_GRAB) {
610 mtx_enter(&table->inpt_mtx);
611 } else {
612 KASSERT(lock == IN_PCBLOCK_HOLD);
613 MUTEX_ASSERT_LOCKED(&table->inpt_mtx);
614 }
615 inp = in6_pcbhash_lookup(table, hash, rdomain,
616 faddr, fport, laddr, lport);
617 if (lock == IN_PCBLOCK_GRAB) {
618 in_pcbref(inp);
619 mtx_leave(&table->inpt_mtx);
620 }
621
622 #ifdef DIAGNOSTIC
623 if (inp == NULL && in_pcbnotifymiss) {
624 printf("%s: faddr= fport=%d laddr= lport=%d rdom=%u\n",
625 __func__, ntohs(fport), ntohs(lport), rdomain);
626 }
627 #endif
628 return (inp);
629 }
630
631 struct inpcb *
in6_pcblookup(struct inpcbtable * table,const struct in6_addr * faddr,u_int fport,const struct in6_addr * laddr,u_int lport,u_int rtable)632 in6_pcblookup(struct inpcbtable *table, const struct in6_addr *faddr,
633 u_int fport, const struct in6_addr *laddr, u_int lport, u_int rtable)
634 {
635 return in6_pcblookup_lock(table, faddr, fport, laddr, lport, rtable,
636 IN_PCBLOCK_GRAB);
637 }
638
639 struct inpcb *
in6_pcblookup_listen(struct inpcbtable * table,struct in6_addr * laddr,u_int lport,struct mbuf * m,u_int rtable)640 in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr,
641 u_int lport, struct mbuf *m, u_int rtable)
642 {
643 const struct in6_addr *key1, *key2;
644 struct inpcb *inp;
645 uint64_t hash;
646 u_int rdomain;
647
648 key1 = laddr;
649 key2 = &zeroin6_addr;
650 #if NPF > 0
651 if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
652 struct pf_divert *divert;
653
654 divert = pf_find_divert(m);
655 KASSERT(divert != NULL);
656 switch (divert->type) {
657 case PF_DIVERT_TO:
658 key1 = key2 = &divert->addr.v6;
659 lport = divert->port;
660 break;
661 case PF_DIVERT_REPLY:
662 return (NULL);
663 default:
664 panic("%s: unknown divert type %d, mbuf %p, divert %p",
665 __func__, divert->type, m, divert);
666 }
667 } else if (m && m->m_pkthdr.pf.flags & PF_TAG_TRANSLATE_LOCALHOST) {
668 /*
669 * Redirected connections should not be treated the same
670 * as connections directed to ::1 since localhost
671 * can only be accessed from the host itself.
672 */
673 key1 = &zeroin6_addr;
674 key2 = laddr;
675 }
676 #endif
677
678 rdomain = rtable_l2(rtable);
679 hash = in6_pcbhash(table, rdomain, &zeroin6_addr, 0, key1, lport);
680
681 mtx_enter(&table->inpt_mtx);
682 inp = in6_pcbhash_lookup(table, hash, rdomain,
683 &zeroin6_addr, 0, key1, lport);
684 if (inp == NULL && ! IN6_ARE_ADDR_EQUAL(key1, key2)) {
685 hash = in6_pcbhash(table, rdomain,
686 &zeroin6_addr, 0, key2, lport);
687 inp = in6_pcbhash_lookup(table, hash, rdomain,
688 &zeroin6_addr, 0, key2, lport);
689 }
690 in_pcbref(inp);
691 mtx_leave(&table->inpt_mtx);
692
693 #ifdef DIAGNOSTIC
694 if (inp == NULL && in_pcbnotifymiss) {
695 printf("%s: laddr= lport=%d rdom=%u\n",
696 __func__, ntohs(lport), rdomain);
697 }
698 #endif
699 return (inp);
700 }
701