1 /* $OpenBSD: in_pcb.c,v 1.75 2004/03/12 17:49:23 claudio Exp $ */
2 /* $NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $ */
3
4 /*
5 * Copyright (c) 1982, 1986, 1991, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
33 *
34 * NRL grants permission for redistribution and use in source and binary
35 * forms, with or without modification, of the software and documentation
36 * created at NRL provided that the following conditions are met:
37 *
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgements:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * This product includes software developed at the Information
48 * Technology Division, US Naval Research Laboratory.
49 * 4. Neither the name of the NRL nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64 *
65 * The views and conclusions contained in the software and documentation
66 * are those of the authors and should not be interpreted as representing
67 * official policies, either expressed or implied, of the US Naval
68 * Research Laboratory (NRL).
69 */
70
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/protosw.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/proc.h>
78 #include <sys/domain.h>
79
80 #include <net/if.h>
81 #include <net/route.h>
82
83 #include <netinet/in.h>
84 #include <netinet/in_systm.h>
85 #include <netinet/ip.h>
86 #include <netinet/in_pcb.h>
87 #include <netinet/in_var.h>
88 #include <netinet/ip_var.h>
89 #include <dev/rndvar.h>
90
91 #ifdef INET6
92 #include <netinet6/ip6_var.h>
93 #endif /* INET6 */
94
95 struct in_addr zeroin_addr;
96
97 extern int ipsec_auth_default_level;
98 extern int ipsec_esp_trans_default_level;
99 extern int ipsec_esp_network_default_level;
100 extern int ipsec_ipcomp_default_level;
101
102 /*
103 * These configure the range of local port addresses assigned to
104 * "unspecified" outgoing connections/packets/whatever.
105 */
106 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */
107 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */
108 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 40000 */
109 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 44999 */
110
111 struct pool inpcb_pool;
112 int inpcb_pool_initalized = 0;
113
114 #define INPCBHASH(table, faddr, fport, laddr, lport) \
115 &(table)->inpt_hashtbl[(ntohl((faddr)->s_addr) + \
116 ntohs((fport)) + ntohs((lport))) & (table->inpt_hash)]
117
118 #define IN6PCBHASH(table, faddr, fport, laddr, lport) \
119 &(table)->inpt_hashtbl[(ntohl((faddr)->s6_addr32[0] ^ \
120 (faddr)->s6_addr32[3]) + ntohs((fport)) + ntohs((lport))) & \
121 (table->inpt_hash)]
122
123 #define INPCBLHASH(table, lport) \
124 &(table)->inpt_lhashtbl[lport & table->inpt_lhash]
125
126 void
in_pcbinit(table,hashsize)127 in_pcbinit(table, hashsize)
128 struct inpcbtable *table;
129 int hashsize;
130 {
131
132 CIRCLEQ_INIT(&table->inpt_queue);
133 table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_NOWAIT,
134 &table->inpt_hash);
135 if (table->inpt_hashtbl == NULL)
136 panic("in_pcbinit: hashinit failed");
137 table->inpt_lhashtbl = hashinit(hashsize, M_PCB, M_NOWAIT,
138 &table->inpt_lhash);
139 if (table->inpt_lhashtbl == NULL)
140 panic("in_pcbinit: hashinit failed for lport");
141 table->inpt_lastport = 0;
142 }
143
144 struct baddynamicports baddynamicports;
145
146 /*
147 * Check if the specified port is invalid for dynamic allocation.
148 */
149 int
in_baddynamic(port,proto)150 in_baddynamic(port, proto)
151 u_int16_t port;
152 u_int16_t proto;
153 {
154
155 if (port < IPPORT_RESERVED/2 || port >= IPPORT_RESERVED)
156 return (0);
157
158 switch (proto) {
159 case IPPROTO_TCP:
160 return (DP_ISSET(baddynamicports.tcp, port));
161 case IPPROTO_UDP:
162 return (DP_ISSET(baddynamicports.udp, port));
163 default:
164 return (0);
165 }
166 }
167
168 int
in_pcballoc(so,v)169 in_pcballoc(so, v)
170 struct socket *so;
171 void *v;
172 {
173 struct inpcbtable *table = v;
174 struct inpcb *inp;
175 int s;
176
177 if (inpcb_pool_initalized == 0) {
178 pool_init(&inpcb_pool, sizeof(struct inpcb), 0, 0, 0,
179 "inpcbpl", NULL);
180 inpcb_pool_initalized = 1;
181 }
182 inp = pool_get(&inpcb_pool, PR_NOWAIT);
183 if (inp == NULL)
184 return (ENOBUFS);
185 bzero((caddr_t)inp, sizeof(*inp));
186 inp->inp_table = table;
187 inp->inp_socket = so;
188 inp->inp_seclevel[SL_AUTH] = ipsec_auth_default_level;
189 inp->inp_seclevel[SL_ESP_TRANS] = ipsec_esp_trans_default_level;
190 inp->inp_seclevel[SL_ESP_NETWORK] = ipsec_esp_network_default_level;
191 inp->inp_seclevel[SL_IPCOMP] = ipsec_ipcomp_default_level;
192 s = splnet();
193 CIRCLEQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue);
194 LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport), inp, inp_lhash);
195 LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, inp->inp_fport,
196 &inp->inp_laddr, inp->inp_lport), inp, inp_hash);
197 splx(s);
198 so->so_pcb = inp;
199 inp->inp_hops = -1;
200
201 #ifdef INET6
202 /*
203 * Small change in this function to set the INP_IPV6 flag so routines
204 * outside pcb-specific routines don't need to use sotopf(), and all
205 * of it's pointer chasing, later.
206 */
207 if (sotopf(so) == PF_INET6)
208 inp->inp_flags = INP_IPV6;
209 inp->in6p_cksum = -1;
210 #endif /* INET6 */
211 return (0);
212 }
213
214 int
in_pcbbind(v,nam)215 in_pcbbind(v, nam)
216 void *v;
217 struct mbuf *nam;
218 {
219 struct inpcb *inp = v;
220 struct socket *so = inp->inp_socket;
221 struct inpcbtable *table = inp->inp_table;
222 u_int16_t *lastport = &inp->inp_table->inpt_lastport;
223 struct sockaddr_in *sin;
224 struct proc *p = curproc; /* XXX */
225 u_int16_t lport = 0;
226 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
227 int error;
228
229 #ifdef INET6
230 if (sotopf(so) == PF_INET6)
231 return in6_pcbbind(inp, nam);
232 #endif /* INET6 */
233
234 if (in_ifaddr.tqh_first == 0)
235 return (EADDRNOTAVAIL);
236 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
237 return (EINVAL);
238 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
239 ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
240 (so->so_options & SO_ACCEPTCONN) == 0))
241 wild = INPLOOKUP_WILDCARD;
242 if (nam) {
243 sin = mtod(nam, struct sockaddr_in *);
244 if (nam->m_len != sizeof (*sin))
245 return (EINVAL);
246 #ifdef notdef
247 /*
248 * We should check the family, but old programs
249 * incorrectly fail to initialize it.
250 */
251 if (sin->sin_family != AF_INET)
252 return (EAFNOSUPPORT);
253 #endif
254 lport = sin->sin_port;
255 if (IN_MULTICAST(sin->sin_addr.s_addr)) {
256 /*
257 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
258 * allow complete duplication of binding if
259 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
260 * and a multicast address is bound on both
261 * new and duplicated sockets.
262 */
263 if (so->so_options & SO_REUSEADDR)
264 reuseport = SO_REUSEADDR|SO_REUSEPORT;
265 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
266 sin->sin_port = 0; /* yech... */
267 if (in_iawithaddr(sin->sin_addr, NULL) == 0)
268 return (EADDRNOTAVAIL);
269 }
270 if (lport) {
271 struct inpcb *t;
272
273 /* GROSS */
274 if (ntohs(lport) < IPPORT_RESERVED &&
275 (error = suser(p, 0)))
276 return (EACCES);
277 if (so->so_euid) {
278 t = in_pcblookup(table, &zeroin_addr, 0,
279 &sin->sin_addr, lport, INPLOOKUP_WILDCARD);
280 if (t && (so->so_euid != t->inp_socket->so_euid))
281 return (EADDRINUSE);
282 }
283 t = in_pcblookup(table, &zeroin_addr, 0,
284 &sin->sin_addr, lport, wild);
285 if (t && (reuseport & t->inp_socket->so_options) == 0)
286 return (EADDRINUSE);
287 }
288 inp->inp_laddr = sin->sin_addr;
289 }
290 if (lport == 0) {
291 u_int16_t first, last, old = 0;
292 int count;
293 int loopcount = 0;
294
295 if (inp->inp_flags & INP_HIGHPORT) {
296 first = ipport_hifirstauto; /* sysctl */
297 last = ipport_hilastauto;
298 } else if (inp->inp_flags & INP_LOWPORT) {
299 if ((error = suser(p, 0)))
300 return (EACCES);
301 first = IPPORT_RESERVED-1; /* 1023 */
302 last = 600; /* not IPPORT_RESERVED/2 */
303 } else {
304 first = ipport_firstauto; /* sysctl */
305 last = ipport_lastauto;
306 }
307
308 /*
309 * Simple check to ensure all ports are not used up causing
310 * a deadlock here.
311 *
312 * We split the two cases (up and down) so that the direction
313 * is not being tested on each round of the loop.
314 */
315
316 portloop:
317 if (first > last) {
318 /*
319 * counting down
320 */
321 if (loopcount == 0) { /* only do this once. */
322 old = first;
323 first -= arc4random_uniform(first - last);
324 }
325 count = first - last;
326 *lastport = first; /* restart each time */
327
328 do {
329 if (count-- <= 0) { /* completely used? */
330 if (loopcount == 0) {
331 last = old;
332 loopcount++;
333 goto portloop;
334 }
335 return (EADDRNOTAVAIL);
336 }
337 --*lastport;
338 if (*lastport > first || *lastport < last)
339 *lastport = first;
340 lport = htons(*lastport);
341 } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
342 in_pcblookup(table, &zeroin_addr, 0,
343 &inp->inp_laddr, lport, wild));
344 } else {
345 /*
346 * counting up
347 */
348 if (loopcount == 0) { /* only do this once. */
349 old = first;
350 first += arc4random_uniform(last - first);
351 }
352 count = last - first;
353 *lastport = first; /* restart each time */
354
355 do {
356 if (count-- <= 0) { /* completely used? */
357 if (loopcount == 0) {
358 first = old;
359 loopcount++;
360 goto portloop;
361 }
362 return (EADDRNOTAVAIL);
363 }
364 ++*lastport;
365 if (*lastport < first || *lastport > last)
366 *lastport = first;
367 lport = htons(*lastport);
368 } while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
369 in_pcblookup(table, &zeroin_addr, 0,
370 &inp->inp_laddr, lport, wild));
371 }
372 }
373 inp->inp_lport = lport;
374 in_pcbrehash(inp);
375 return (0);
376 }
377
378 /*
379 * Connect from a socket to a specified address.
380 * Both address and port must be specified in argument sin.
381 * If don't have a local address for this socket yet,
382 * then pick one.
383 */
384 int
in_pcbconnect(v,nam)385 in_pcbconnect(v, nam)
386 void *v;
387 struct mbuf *nam;
388 {
389 struct inpcb *inp = v;
390 struct sockaddr_in *ifaddr = NULL;
391 struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
392
393 #ifdef INET6
394 if (sotopf(inp->inp_socket) == PF_INET6)
395 return (in6_pcbconnect(inp, nam));
396 if ((inp->inp_flags & INP_IPV6) != 0)
397 panic("IPv6 pcb passed into in_pcbconnect");
398 #endif /* INET6 */
399
400 if (nam->m_len != sizeof (*sin))
401 return (EINVAL);
402 if (sin->sin_family != AF_INET)
403 return (EAFNOSUPPORT);
404 if (sin->sin_port == 0)
405 return (EADDRNOTAVAIL);
406 if (in_ifaddr.tqh_first != 0) {
407 /*
408 * If the destination address is INADDR_ANY,
409 * use the primary local address.
410 * If the supplied address is INADDR_BROADCAST,
411 * and the primary interface supports broadcast,
412 * choose the broadcast address for that interface.
413 */
414 if (sin->sin_addr.s_addr == INADDR_ANY)
415 sin->sin_addr = in_ifaddr.tqh_first->ia_addr.sin_addr;
416 else if (sin->sin_addr.s_addr == INADDR_BROADCAST &&
417 (in_ifaddr.tqh_first->ia_ifp->if_flags & IFF_BROADCAST))
418 sin->sin_addr = in_ifaddr.tqh_first->ia_broadaddr.sin_addr;
419 }
420 if (inp->inp_laddr.s_addr == INADDR_ANY) {
421 int error;
422 ifaddr = in_selectsrc(sin, &inp->inp_route,
423 inp->inp_socket->so_options, inp->inp_moptions, &error);
424 if (ifaddr == NULL) {
425 if (error == 0)
426 error = EADDRNOTAVAIL;
427 return error;
428 }
429 }
430 if (in_pcbhashlookup(inp->inp_table, sin->sin_addr, sin->sin_port,
431 inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
432 inp->inp_lport) != 0)
433 return (EADDRINUSE);
434 if (inp->inp_laddr.s_addr == INADDR_ANY) {
435 if (inp->inp_lport == 0 &&
436 in_pcbbind(inp, (struct mbuf *)0) == EADDRNOTAVAIL)
437 return (EADDRNOTAVAIL);
438 inp->inp_laddr = ifaddr->sin_addr;
439 }
440 inp->inp_faddr = sin->sin_addr;
441 inp->inp_fport = sin->sin_port;
442 in_pcbrehash(inp);
443 #ifdef IPSEC
444 {
445 int error; /* This is just ignored */
446
447 /* Cause an IPsec SA to be established. */
448 ipsp_spd_inp(NULL, AF_INET, 0, &error, IPSP_DIRECTION_OUT,
449 NULL, inp, NULL);
450 }
451 #endif
452 return (0);
453 }
454
455 void
in_pcbdisconnect(v)456 in_pcbdisconnect(v)
457 void *v;
458 {
459 struct inpcb *inp = v;
460
461 switch (sotopf(inp->inp_socket)) {
462 #ifdef INET6
463 case PF_INET6:
464 inp->inp_faddr6 = in6addr_any;
465 break;
466 #endif
467 case PF_INET:
468 inp->inp_faddr.s_addr = INADDR_ANY;
469 break;
470 }
471
472 inp->inp_fport = 0;
473 in_pcbrehash(inp);
474 if (inp->inp_socket->so_state & SS_NOFDREF)
475 in_pcbdetach(inp);
476 }
477
478 void
in_pcbdetach(v)479 in_pcbdetach(v)
480 void *v;
481 {
482 struct inpcb *inp = v;
483 struct socket *so = inp->inp_socket;
484 int s;
485
486 so->so_pcb = 0;
487 sofree(so);
488 if (inp->inp_options)
489 (void)m_freem(inp->inp_options);
490 if (inp->inp_route.ro_rt)
491 rtfree(inp->inp_route.ro_rt);
492 #ifdef INET6
493 if (inp->inp_flags & INP_IPV6)
494 ip6_freemoptions(inp->inp_moptions6);
495 else
496 #endif
497 ip_freemoptions(inp->inp_moptions);
498 #ifdef IPSEC
499 /* IPsec cleanup here */
500 s = spltdb();
501 if (inp->inp_tdb_in)
502 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in,
503 inp, inp_tdb_in_next);
504 if (inp->inp_tdb_out)
505 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, inp,
506 inp_tdb_out_next);
507 if (inp->inp_ipsec_remotecred)
508 ipsp_reffree(inp->inp_ipsec_remotecred);
509 if (inp->inp_ipsec_remoteauth)
510 ipsp_reffree(inp->inp_ipsec_remoteauth);
511 if (inp->inp_ipo)
512 ipsec_delete_policy(inp->inp_ipo);
513 splx(s);
514 #endif
515 s = splnet();
516 LIST_REMOVE(inp, inp_lhash);
517 LIST_REMOVE(inp, inp_hash);
518 CIRCLEQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue);
519 splx(s);
520 pool_put(&inpcb_pool, inp);
521 }
522
523 void
in_setsockaddr(inp,nam)524 in_setsockaddr(inp, nam)
525 struct inpcb *inp;
526 struct mbuf *nam;
527 {
528 struct sockaddr_in *sin;
529
530 nam->m_len = sizeof (*sin);
531 sin = mtod(nam, struct sockaddr_in *);
532 bzero((caddr_t)sin, sizeof (*sin));
533 sin->sin_family = AF_INET;
534 sin->sin_len = sizeof(*sin);
535 sin->sin_port = inp->inp_lport;
536 sin->sin_addr = inp->inp_laddr;
537 }
538
539 void
in_setpeeraddr(inp,nam)540 in_setpeeraddr(inp, nam)
541 struct inpcb *inp;
542 struct mbuf *nam;
543 {
544 struct sockaddr_in *sin;
545
546 #ifdef INET6
547 if (sotopf(inp->inp_socket) == PF_INET6) {
548 in6_setpeeraddr(inp, nam);
549 return;
550 }
551 #endif /* INET6 */
552
553 nam->m_len = sizeof (*sin);
554 sin = mtod(nam, struct sockaddr_in *);
555 bzero((caddr_t)sin, sizeof (*sin));
556 sin->sin_family = AF_INET;
557 sin->sin_len = sizeof(*sin);
558 sin->sin_port = inp->inp_fport;
559 sin->sin_addr = inp->inp_faddr;
560 }
561
562 /*
563 * Pass some notification to all connections of a protocol
564 * associated with address dst. The local address and/or port numbers
565 * may be specified to limit the search. The "usual action" will be
566 * taken, depending on the ctlinput cmd. The caller must filter any
567 * cmds that are uninteresting (e.g., no error in the map).
568 * Call the protocol specific routine (if any) to report
569 * any errors for each matching socket.
570 *
571 * Must be called at splsoftnet.
572 */
573 int
in_pcbnotify(table,dst,fport_arg,laddr,lport_arg,errno,notify)574 in_pcbnotify(table, dst, fport_arg, laddr, lport_arg, errno, notify)
575 struct inpcbtable *table;
576 struct sockaddr *dst;
577 u_int fport_arg, lport_arg;
578 struct in_addr laddr;
579 int errno;
580 void (*notify)(struct inpcb *, int);
581 {
582 struct inpcb *inp, *oinp;
583 struct in_addr faddr;
584 u_int16_t fport = fport_arg, lport = lport_arg;
585 int nmatch = 0;
586
587 splassert(IPL_SOFTNET);
588
589 #ifdef INET6
590 /*
591 * See in6_pcbnotify() for IPv6 codepath. By the time this
592 * gets called, the addresses passed are either definitely IPv4 or
593 * IPv6; *_pcbnotify() never gets called with v4-mapped v6 addresses.
594 */
595 #endif /* INET6 */
596
597 if (dst->sa_family != AF_INET)
598 return (0);
599 faddr = satosin(dst)->sin_addr;
600 if (faddr.s_addr == INADDR_ANY)
601 return (0);
602
603 for (inp = CIRCLEQ_FIRST(&table->inpt_queue);
604 inp != CIRCLEQ_END(&table->inpt_queue);) {
605 #ifdef INET6
606 if (inp->inp_flags & INP_IPV6) {
607 inp = CIRCLEQ_NEXT(inp, inp_queue);
608 continue;
609 }
610 #endif
611 if (inp->inp_faddr.s_addr != faddr.s_addr ||
612 inp->inp_socket == 0 ||
613 inp->inp_fport != fport ||
614 inp->inp_lport != lport ||
615 inp->inp_laddr.s_addr != laddr.s_addr) {
616 inp = CIRCLEQ_NEXT(inp, inp_queue);
617 continue;
618 }
619 oinp = inp;
620 inp = CIRCLEQ_NEXT(inp, inp_queue);
621 nmatch++;
622 if (notify)
623 (*notify)(oinp, errno);
624 }
625 return (nmatch);
626 }
627
628 void
in_pcbnotifyall(table,dst,errno,notify)629 in_pcbnotifyall(table, dst, errno, notify)
630 struct inpcbtable *table;
631 struct sockaddr *dst;
632 int errno;
633 void (*notify)(struct inpcb *, int);
634 {
635 struct inpcb *inp, *oinp;
636 struct in_addr faddr;
637
638 #ifdef INET6
639 /*
640 * See in6_pcbnotify() for IPv6 codepath. By the time this
641 * gets called, the addresses passed are either definitely IPv4 or
642 * IPv6; *_pcbnotify() never gets called with v4-mapped v6 addresses.
643 */
644 #endif /* INET6 */
645
646 if (dst->sa_family != AF_INET)
647 return;
648 faddr = satosin(dst)->sin_addr;
649 if (faddr.s_addr == INADDR_ANY)
650 return;
651
652 for (inp = table->inpt_queue.cqh_first;
653 inp != (struct inpcb *)&table->inpt_queue;) {
654 #ifdef INET6
655 if (inp->inp_flags & INP_IPV6) {
656 inp = CIRCLEQ_NEXT(inp, inp_queue);
657 continue;
658 }
659 #endif
660 if (inp->inp_faddr.s_addr != faddr.s_addr ||
661 inp->inp_socket == 0) {
662 inp = CIRCLEQ_NEXT(inp, inp_queue);
663 continue;
664 }
665 oinp = inp;
666 inp = inp->inp_queue.cqe_next;
667 if (notify)
668 (*notify)(oinp, errno);
669 }
670 }
671
672 /*
673 * Check for alternatives when higher level complains
674 * about service problems. For now, invalidate cached
675 * routing information. If the route was created dynamically
676 * (by a redirect), time to try a default gateway again.
677 */
678 void
in_losing(inp)679 in_losing(inp)
680 struct inpcb *inp;
681 {
682 struct rtentry *rt;
683 struct rt_addrinfo info;
684
685 if ((rt = inp->inp_route.ro_rt)) {
686 inp->inp_route.ro_rt = 0;
687 bzero((caddr_t)&info, sizeof(info));
688 info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst;
689 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
690 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
691 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
692 if (rt->rt_flags & RTF_DYNAMIC)
693 (void) rtrequest(RTM_DELETE, rt_key(rt),
694 rt->rt_gateway, rt_mask(rt), rt->rt_flags,
695 (struct rtentry **)0);
696 /*
697 * A new route can be allocated
698 * the next time output is attempted.
699 * rtfree() needs to be called in anycase because the inp
700 * is still holding a reference to rt.
701 */
702 rtfree(rt);
703 }
704 }
705
706 /*
707 * After a routing change, flush old routing
708 * and allocate a (hopefully) better one.
709 */
710 void
in_rtchange(inp,errno)711 in_rtchange(inp, errno)
712 struct inpcb *inp;
713 int errno;
714 {
715 if (inp->inp_route.ro_rt) {
716 rtfree(inp->inp_route.ro_rt);
717 inp->inp_route.ro_rt = 0;
718 /*
719 * A new route can be allocated the next time
720 * output is attempted.
721 */
722 }
723 }
724
725 struct inpcb *
in_pcblookup(table,faddrp,fport_arg,laddrp,lport_arg,flags)726 in_pcblookup(table, faddrp, fport_arg, laddrp, lport_arg, flags)
727 struct inpcbtable *table;
728 void *faddrp, *laddrp;
729 u_int fport_arg, lport_arg;
730 int flags;
731 {
732 struct inpcb *inp, *match = 0;
733 int matchwild = 3, wildcard;
734 u_int16_t fport = fport_arg, lport = lport_arg;
735 struct in_addr faddr = *(struct in_addr *)faddrp;
736 struct in_addr laddr = *(struct in_addr *)laddrp;
737
738 for (inp = LIST_FIRST(INPCBLHASH(table, lport)); inp;
739 inp = LIST_NEXT(inp, inp_lhash)) {
740 if (inp->inp_lport != lport)
741 continue;
742 wildcard = 0;
743 #ifdef INET6
744 if (flags & INPLOOKUP_IPV6) {
745 struct in6_addr *laddr6 = (struct in6_addr *)laddrp;
746 struct in6_addr *faddr6 = (struct in6_addr *)faddrp;
747
748 if (!(inp->inp_flags & INP_IPV6))
749 continue;
750
751 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
752 if (IN6_IS_ADDR_UNSPECIFIED(laddr6))
753 wildcard++;
754 else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr6))
755 continue;
756 } else {
757 if (!IN6_IS_ADDR_UNSPECIFIED(laddr6))
758 wildcard++;
759 }
760
761 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) {
762 if (IN6_IS_ADDR_UNSPECIFIED(faddr6))
763 wildcard++;
764 else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6,
765 faddr6) || inp->inp_fport != fport)
766 continue;
767 } else {
768 if (!IN6_IS_ADDR_UNSPECIFIED(faddr6))
769 wildcard++;
770 }
771 } else
772 #endif /* INET6 */
773 {
774 #ifdef INET6
775 if (inp->inp_flags & INP_IPV6)
776 continue;
777 #endif /* INET6 */
778
779 if (inp->inp_faddr.s_addr != INADDR_ANY) {
780 if (faddr.s_addr == INADDR_ANY)
781 wildcard++;
782 else if (inp->inp_faddr.s_addr != faddr.s_addr ||
783 inp->inp_fport != fport)
784 continue;
785 } else {
786 if (faddr.s_addr != INADDR_ANY)
787 wildcard++;
788 }
789 if (inp->inp_laddr.s_addr != INADDR_ANY) {
790 if (laddr.s_addr == INADDR_ANY)
791 wildcard++;
792 else if (inp->inp_laddr.s_addr != laddr.s_addr)
793 continue;
794 } else {
795 if (laddr.s_addr != INADDR_ANY)
796 wildcard++;
797 }
798 }
799 if ((!wildcard || (flags & INPLOOKUP_WILDCARD)) &&
800 wildcard < matchwild) {
801 match = inp;
802 if ((matchwild = wildcard) == 0)
803 break;
804 }
805 }
806 return (match);
807 }
808
809 struct rtentry *
in_pcbrtentry(inp)810 in_pcbrtentry(inp)
811 struct inpcb *inp;
812 {
813 struct route *ro;
814
815 ro = &inp->inp_route;
816
817 /*
818 * No route yet, so try to acquire one.
819 */
820 if (ro->ro_rt == NULL) {
821 #ifdef INET6
822 bzero(ro, sizeof(struct route_in6));
823 #else
824 bzero(ro, sizeof(struct route));
825 #endif
826
827 switch(sotopf(inp->inp_socket)) {
828 #ifdef INET6
829 case PF_INET6:
830 if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
831 break;
832 ro->ro_dst.sa_family = AF_INET6;
833 ro->ro_dst.sa_len = sizeof(struct sockaddr_in6);
834 ((struct sockaddr_in6 *) &ro->ro_dst)->sin6_addr =
835 inp->inp_faddr6;
836 rtalloc(ro);
837 break;
838 #endif /* INET6 */
839 case PF_INET:
840 if (inp->inp_faddr.s_addr == INADDR_ANY)
841 break;
842 ro->ro_dst.sa_family = AF_INET;
843 ro->ro_dst.sa_len = sizeof(ro->ro_dst);
844 satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr;
845 rtalloc(ro);
846 break;
847 }
848 }
849 return (ro->ro_rt);
850 }
851
852 struct sockaddr_in *
in_selectsrc(sin,ro,soopts,mopts,errorp)853 in_selectsrc(sin, ro, soopts, mopts, errorp)
854 struct sockaddr_in *sin;
855 struct route *ro;
856 int soopts;
857 struct ip_moptions *mopts;
858 int *errorp;
859 {
860 struct sockaddr_in *sin2;
861 struct in_ifaddr *ia;
862
863 ia = (struct in_ifaddr *)0;
864 /*
865 * If route is known or can be allocated now,
866 * our src addr is taken from the i/f, else punt.
867 */
868 if (ro->ro_rt &&
869 (satosin(&ro->ro_dst)->sin_addr.s_addr !=
870 sin->sin_addr.s_addr ||
871 soopts & SO_DONTROUTE)) {
872 RTFREE(ro->ro_rt);
873 ro->ro_rt = (struct rtentry *)0;
874 }
875 if ((soopts & SO_DONTROUTE) == 0 && /*XXX*/
876 (ro->ro_rt == (struct rtentry *)0 ||
877 ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
878 /* No route yet, so try to acquire one */
879 ro->ro_dst.sa_family = AF_INET;
880 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
881 satosin(&ro->ro_dst)->sin_addr = sin->sin_addr;
882 rtalloc(ro);
883
884 /*
885 * It is important to bzero out the rest of the
886 * struct sockaddr_in when mixing v6 & v4!
887 */
888 sin2 = (struct sockaddr_in *)&ro->ro_dst;
889 bzero(sin2->sin_zero, sizeof(sin2->sin_zero));
890 }
891 /*
892 * If we found a route, use the address
893 * corresponding to the outgoing interface
894 * unless it is the loopback (in case a route
895 * to our address on another net goes to loopback).
896 */
897 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
898 ia = ifatoia(ro->ro_rt->rt_ifa);
899 if (ia == 0) {
900 u_int16_t fport = sin->sin_port;
901
902 sin->sin_port = 0;
903 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
904 if (ia == 0)
905 ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
906 sin->sin_port = fport;
907 if (ia == 0)
908 ia = in_ifaddr.tqh_first;
909 if (ia == 0) {
910 *errorp = EADDRNOTAVAIL;
911 return NULL;
912 }
913 }
914 /*
915 * If the destination address is multicast and an outgoing
916 * interface has been set as a multicast option, use the
917 * address of that interface as our source address.
918 */
919 if (IN_MULTICAST(sin->sin_addr.s_addr) && mopts != NULL) {
920 struct ip_moptions *imo;
921 struct ifnet *ifp;
922
923 imo = mopts;
924 if (imo->imo_multicast_ifp != NULL) {
925 ifp = imo->imo_multicast_ifp;
926 for (ia = in_ifaddr.tqh_first; ia != 0;
927 ia = ia->ia_list.tqe_next)
928 if (ia->ia_ifp == ifp)
929 break;
930 if (ia == 0) {
931 *errorp = EADDRNOTAVAIL;
932 return NULL;
933 }
934 }
935 }
936 return satosin(&ia->ia_addr);
937 }
938
939 void
in_pcbrehash(inp)940 in_pcbrehash(inp)
941 struct inpcb *inp;
942 {
943 struct inpcbtable *table = inp->inp_table;
944 int s;
945
946 s = splnet();
947 LIST_REMOVE(inp, inp_lhash);
948 LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport), inp, inp_lhash);
949 LIST_REMOVE(inp, inp_hash);
950 #ifdef INET6
951 if (inp->inp_flags & INP_IPV6) {
952 LIST_INSERT_HEAD(IN6PCBHASH(table, &inp->inp_faddr6,
953 inp->inp_fport, &inp->inp_laddr6, inp->inp_lport),
954 inp, inp_hash);
955 } else {
956 #endif /* INET6 */
957 LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr,
958 inp->inp_fport, &inp->inp_laddr, inp->inp_lport),
959 inp, inp_hash);
960 #ifdef INET6
961 }
962 #endif /* INET6 */
963 splx(s);
964 }
965
966 #ifdef DIAGNOSTIC
967 int in_pcbnotifymiss = 0;
968 #endif
969
970 /*
971 * The in(6)_pcbhashlookup functions are used to locate connected sockets
972 * quickly:
973 * faddr.fport <-> laddr.lport
974 * No wildcard matching is done so that listening sockets are not found.
975 * If the functions return NULL in(6)_pcblookup_listen can be used to
976 * find a listening/bound socket that may accept the connection.
977 * After those two lookups no other are necessary.
978 */
979 struct inpcb *
in_pcbhashlookup(table,faddr,fport_arg,laddr,lport_arg)980 in_pcbhashlookup(table, faddr, fport_arg, laddr, lport_arg)
981 struct inpcbtable *table;
982 struct in_addr faddr, laddr;
983 u_int fport_arg, lport_arg;
984 {
985 struct inpcbhead *head;
986 struct inpcb *inp;
987 u_int16_t fport = fport_arg, lport = lport_arg;
988
989 head = INPCBHASH(table, &faddr, fport, &laddr, lport);
990 for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
991 #ifdef INET6
992 if (inp->inp_flags & INP_IPV6)
993 continue; /*XXX*/
994 #endif
995 if (inp->inp_faddr.s_addr == faddr.s_addr &&
996 inp->inp_fport == fport &&
997 inp->inp_lport == lport &&
998 inp->inp_laddr.s_addr == laddr.s_addr) {
999 /*
1000 * Move this PCB to the head of hash chain so that
1001 * repeated accesses are quicker. This is analogous to
1002 * the historic single-entry PCB cache.
1003 */
1004 if (inp != head->lh_first) {
1005 LIST_REMOVE(inp, inp_hash);
1006 LIST_INSERT_HEAD(head, inp, inp_hash);
1007 }
1008 break;
1009 }
1010 }
1011 #ifdef DIAGNOSTIC
1012 if (inp == NULL && in_pcbnotifymiss) {
1013 printf("in_pcbhashlookup: faddr=%08x fport=%d laddr=%08x lport=%d\n",
1014 ntohl(faddr.s_addr), ntohs(fport),
1015 ntohl(laddr.s_addr), ntohs(lport));
1016 }
1017 #endif
1018 return (inp);
1019 }
1020
1021 #ifdef INET6
1022 struct inpcb *
in6_pcbhashlookup(table,faddr,fport_arg,laddr,lport_arg)1023 in6_pcbhashlookup(table, faddr, fport_arg, laddr, lport_arg)
1024 struct inpcbtable *table;
1025 struct in6_addr *faddr, *laddr;
1026 u_int fport_arg, lport_arg;
1027 {
1028 struct inpcbhead *head;
1029 struct inpcb *inp;
1030 u_int16_t fport = fport_arg, lport = lport_arg;
1031
1032 head = IN6PCBHASH(table, faddr, fport, laddr, lport);
1033 for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
1034 if (!(inp->inp_flags & INP_IPV6))
1035 continue;
1036 if (IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) &&
1037 inp->inp_fport == fport && inp->inp_lport == lport &&
1038 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr)) {
1039 /*
1040 * Move this PCB to the head of hash chain so that
1041 * repeated accesses are quicker. This is analogous to
1042 * the historic single-entry PCB cache.
1043 */
1044 if (inp != head->lh_first) {
1045 LIST_REMOVE(inp, inp_hash);
1046 LIST_INSERT_HEAD(head, inp, inp_hash);
1047 }
1048 break;
1049 }
1050 }
1051 #ifdef DIAGNOSTIC
1052 if (inp == NULL && in_pcbnotifymiss) {
1053 printf("in6_pcbhashlookup: faddr=");
1054 printf(" fport=%d laddr=", ntohs(fport));
1055 printf(" lport=%d\n", ntohs(lport));
1056 }
1057 #endif
1058 return (inp);
1059 }
1060 #endif /* INET6 */
1061
1062 /*
1063 * The in(6)_pcblookup_listen functions are used to locate listening
1064 * sockets quickly. This are sockets with unspecified foreign address
1065 * and port:
1066 * *.* <-> laddr.lport
1067 * *.* <-> *.lport
1068 */
1069 struct inpcb *
in_pcblookup_listen(table,laddr,lport_arg,reverse)1070 in_pcblookup_listen(table, laddr, lport_arg, reverse)
1071 struct inpcbtable *table;
1072 struct in_addr laddr;
1073 u_int lport_arg;
1074 int reverse;
1075 {
1076 struct inpcbhead *head;
1077 struct in_addr *key1, *key2;
1078 struct inpcb *inp;
1079 u_int16_t lport = lport_arg;
1080
1081 if (reverse) {
1082 key1 = &zeroin_addr;
1083 key2 = &laddr;
1084 } else {
1085 key1 = &laddr;
1086 key2 = &zeroin_addr;
1087 }
1088
1089 head = INPCBHASH(table, &zeroin_addr, 0, key1, lport);
1090 LIST_FOREACH(inp, head, inp_hash) {
1091 #ifdef INET6
1092 if (inp->inp_flags & INP_IPV6)
1093 continue; /*XXX*/
1094 #endif
1095 if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1096 inp->inp_laddr.s_addr == key1->s_addr &&
1097 inp->inp_faddr.s_addr == INADDR_ANY)
1098 break;
1099 }
1100 if (inp == NULL && key1->s_addr != key2->s_addr) {
1101 head = INPCBHASH(table, &zeroin_addr, 0, key2, lport);
1102 LIST_FOREACH(inp, head, inp_hash) {
1103 #ifdef INET6
1104 if (inp->inp_flags & INP_IPV6)
1105 continue; /*XXX*/
1106 #endif
1107 if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1108 inp->inp_laddr.s_addr == key2->s_addr &&
1109 inp->inp_faddr.s_addr == INADDR_ANY)
1110 break;
1111 }
1112 }
1113 #ifdef DIAGNOSTIC
1114 if (inp == NULL && in_pcbnotifymiss) {
1115 printf("in_pcblookup_listen: laddr=%08x lport=%d\n",
1116 ntohl(laddr.s_addr), ntohs(lport));
1117 }
1118 #endif
1119 /*
1120 * Move this PCB to the head of hash chain so that
1121 * repeated accesses are quicker. This is analogous to
1122 * the historic single-entry PCB cache.
1123 */
1124 if (inp != NULL && inp != head->lh_first) {
1125 LIST_REMOVE(inp, inp_hash);
1126 LIST_INSERT_HEAD(head, inp, inp_hash);
1127 }
1128 return (inp);
1129 }
1130
1131 #ifdef INET6
1132 struct inpcb *
in6_pcblookup_listen(table,laddr,lport_arg,reverse)1133 in6_pcblookup_listen(table, laddr, lport_arg, reverse)
1134 struct inpcbtable *table;
1135 struct in6_addr *laddr;
1136 u_int lport_arg;
1137 int reverse;
1138 {
1139 struct inpcbhead *head;
1140 struct in6_addr *key1, *key2;
1141 struct inpcb *inp;
1142 u_int16_t lport = lport_arg;
1143
1144 if (reverse) {
1145 key1 = &zeroin6_addr;
1146 key2 = laddr;
1147 } else {
1148 key1 = laddr;
1149 key2 = &zeroin6_addr;
1150 }
1151
1152 head = IN6PCBHASH(table, &zeroin6_addr, 0, key1, lport);
1153 LIST_FOREACH(inp, head, inp_hash) {
1154 if (!(inp->inp_flags & INP_IPV6))
1155 continue;
1156 if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1157 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key1) &&
1158 IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
1159 break;
1160 }
1161 if (inp == NULL && ! IN6_ARE_ADDR_EQUAL(key1, key2)) {
1162 head = IN6PCBHASH(table, &zeroin6_addr, 0, key2, lport);
1163 LIST_FOREACH(inp, head, inp_hash) {
1164 if (!(inp->inp_flags & INP_IPV6))
1165 continue;
1166 if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1167 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key2) &&
1168 IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
1169 break;
1170 }
1171 }
1172 #ifdef DIAGNOSTIC
1173 if (inp == NULL && in_pcbnotifymiss) {
1174 printf("in6_pcblookup_listen: laddr= lport=%d\n",
1175 ntohs(lport));
1176 }
1177 #endif
1178 /*
1179 * Move this PCB to the head of hash chain so that
1180 * repeated accesses are quicker. This is analogous to
1181 * the historic single-entry PCB cache.
1182 */
1183 if (inp != NULL && inp != head->lh_first) {
1184 LIST_REMOVE(inp, inp_hash);
1185 LIST_INSERT_HEAD(head, inp, inp_hash);
1186 }
1187 return (inp);
1188 }
1189 #endif /* INET6 */
1190