1 /* $OpenBSD: ip_output.c,v 1.167.2.1 2005/06/14 01:47:20 brad Exp $ */
2 /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */
3
4 /*
5 * Copyright (c) 1982, 1986, 1988, 1990, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
33 */
34
35 #include "pf.h"
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/mbuf.h>
40 #include <sys/protosw.h>
41 #include <sys/socket.h>
42 #include <sys/socketvar.h>
43 #include <sys/proc.h>
44 #include <sys/kernel.h>
45
46 #include <net/if.h>
47 #include <net/if_enc.h>
48 #include <net/route.h>
49
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53 #include <netinet/in_pcb.h>
54 #include <netinet/in_var.h>
55 #include <netinet/ip_var.h>
56 #include <netinet/ip_icmp.h>
57 #include <netinet/tcp.h>
58 #include <netinet/udp.h>
59 #include <netinet/tcp_timer.h>
60 #include <netinet/tcp_var.h>
61 #include <netinet/udp_var.h>
62
63 #if NPF > 0
64 #include <net/pfvar.h>
65 #endif
66
67 #ifdef IPSEC
68 #ifdef ENCDEBUG
69 #define DPRINTF(x) do { if (encdebug) printf x ; } while (0)
70 #else
71 #define DPRINTF(x)
72 #endif
73
74 extern u_int8_t get_sa_require(struct inpcb *);
75
76 extern int ipsec_auth_default_level;
77 extern int ipsec_esp_trans_default_level;
78 extern int ipsec_esp_network_default_level;
79 extern int ipsec_ipcomp_default_level;
80 #endif /* IPSEC */
81
82 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
83 static void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *);
84
85 /*
86 * IP output. The packet in mbuf chain m contains a skeletal IP
87 * header (with len, off, ttl, proto, tos, src, dst).
88 * The mbuf chain containing the packet will be freed.
89 * The mbuf opt, if present, will not be freed.
90 */
91 int
ip_output(struct mbuf * m0,...)92 ip_output(struct mbuf *m0, ...)
93 {
94 struct ip *ip;
95 struct ifnet *ifp = NULL;
96 struct mbuf *m = m0;
97 int hlen = sizeof (struct ip);
98 int len, error = 0;
99 struct route iproute;
100 struct sockaddr_in *dst = NULL;
101 struct in_ifaddr *ia ;
102 struct mbuf *opt;
103 struct route *ro;
104 int flags;
105 struct ip_moptions *imo;
106 va_list ap;
107 u_int8_t sproto = 0, donerouting = 0;
108 u_long mtu = 0;
109 #ifdef IPSEC
110 u_int32_t icmp_mtu = 0;
111 union sockaddr_union sdst;
112 u_int32_t sspi = 0;
113 struct m_tag *mtag;
114 struct tdb_ident *tdbi;
115
116 struct inpcb *inp;
117 struct tdb *tdb;
118 int s;
119 #endif /* IPSEC */
120
121 va_start(ap, m0);
122 opt = va_arg(ap, struct mbuf *);
123 ro = va_arg(ap, struct route *);
124 flags = va_arg(ap, int);
125 imo = va_arg(ap, struct ip_moptions *);
126 #ifdef IPSEC
127 inp = va_arg(ap, struct inpcb *);
128 if (inp && (inp->inp_flags & INP_IPV6) != 0)
129 panic("ip_output: IPv6 pcb is passed");
130 #endif /* IPSEC */
131 va_end(ap);
132
133 #ifdef DIAGNOSTIC
134 if ((m->m_flags & M_PKTHDR) == 0)
135 panic("ip_output no HDR");
136 #endif
137 if (opt) {
138 m = ip_insertoptions(m, opt, &len);
139 hlen = len;
140 }
141
142 ip = mtod(m, struct ip *);
143
144 /*
145 * Fill in IP header.
146 */
147 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
148 ip->ip_v = IPVERSION;
149 ip->ip_off &= htons(IP_DF);
150 ip->ip_id = htons(ip_randomid());
151 ip->ip_hl = hlen >> 2;
152 ipstat.ips_localout++;
153 } else {
154 hlen = ip->ip_hl << 2;
155 }
156
157 /*
158 * If we're missing the IP source address, do a route lookup. We'll
159 * remember this result, in case we don't need to do any IPsec
160 * processing on the packet. We need the source address so we can
161 * do an SPD lookup in IPsec; for most packets, the source address
162 * is set at a higher level protocol. ICMPs and other packets
163 * though (e.g., traceroute) have a source address of zeroes.
164 */
165 if (ip->ip_src.s_addr == INADDR_ANY) {
166 if (flags & IP_ROUTETOETHER) {
167 error = EINVAL;
168 goto bad;
169 }
170 donerouting = 1;
171
172 if (ro == 0) {
173 ro = &iproute;
174 bzero((caddr_t)ro, sizeof (*ro));
175 }
176
177 dst = satosin(&ro->ro_dst);
178
179 /*
180 * If there is a cached route, check that it is to the same
181 * destination and is still up. If not, free it and try again.
182 */
183 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
184 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
185 RTFREE(ro->ro_rt);
186 ro->ro_rt = (struct rtentry *)0;
187 }
188
189 if (ro->ro_rt == 0) {
190 dst->sin_family = AF_INET;
191 dst->sin_len = sizeof(*dst);
192 dst->sin_addr = ip->ip_dst;
193 }
194
195 /*
196 * If routing to interface only, short-circuit routing lookup.
197 */
198 if (flags & IP_ROUTETOIF) {
199 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
200 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
201 ipstat.ips_noroute++;
202 error = ENETUNREACH;
203 goto bad;
204 }
205
206 ifp = ia->ia_ifp;
207 mtu = ifp->if_mtu;
208 ip->ip_ttl = 1;
209 } else if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
210 (ip->ip_dst.s_addr == INADDR_BROADCAST)) &&
211 imo != NULL && imo->imo_multicast_ifp != NULL) {
212 ifp = imo->imo_multicast_ifp;
213 mtu = ifp->if_mtu;
214 IFP_TO_IA(ifp, ia);
215 } else {
216 if (ro->ro_rt == 0)
217 rtalloc(ro);
218
219 if (ro->ro_rt == 0) {
220 ipstat.ips_noroute++;
221 error = EHOSTUNREACH;
222 goto bad;
223 }
224
225 ia = ifatoia(ro->ro_rt->rt_ifa);
226 ifp = ro->ro_rt->rt_ifp;
227 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0)
228 mtu = ifp->if_mtu;
229 ro->ro_rt->rt_use++;
230
231 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
232 dst = satosin(ro->ro_rt->rt_gateway);
233 }
234
235 /* Set the source IP address */
236 if (!IN_MULTICAST(ip->ip_dst.s_addr))
237 ip->ip_src = ia->ia_addr.sin_addr;
238 }
239
240 #ifdef IPSEC
241 /*
242 * splnet is chosen over spltdb because we are not allowed to
243 * lower the level, and udp_output calls us in splnet().
244 */
245 s = splnet();
246
247 /* Do we have any pending SAs to apply ? */
248 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
249 if (mtag != NULL) {
250 #ifdef DIAGNOSTIC
251 if (mtag->m_tag_len != sizeof (struct tdb_ident))
252 panic("ip_output: tag of length %d (should be %ld",
253 mtag->m_tag_len, sizeof (struct tdb_ident));
254 #endif
255 tdbi = (struct tdb_ident *)(mtag + 1);
256 tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto);
257 if (tdb == NULL)
258 error = -EINVAL;
259 m_tag_delete(m, mtag);
260 }
261 else
262 tdb = ipsp_spd_lookup(m, AF_INET, hlen, &error,
263 IPSP_DIRECTION_OUT, NULL, inp);
264
265 if (tdb == NULL) {
266 splx(s);
267
268 if (error == 0) {
269 /*
270 * No IPsec processing required, we'll just send the
271 * packet out.
272 */
273 sproto = 0;
274
275 /* Fall through to routing/multicast handling */
276 } else {
277 /*
278 * -EINVAL is used to indicate that the packet should
279 * be silently dropped, typically because we've asked
280 * key management for an SA.
281 */
282 if (error == -EINVAL) /* Should silently drop packet */
283 error = 0;
284
285 m_freem(m);
286 goto done;
287 }
288 } else {
289 /* Loop detection */
290 for (mtag = m_tag_first(m); mtag != NULL;
291 mtag = m_tag_next(m, mtag)) {
292 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
293 mtag->m_tag_id !=
294 PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
295 continue;
296 tdbi = (struct tdb_ident *)(mtag + 1);
297 if (tdbi->spi == tdb->tdb_spi &&
298 tdbi->proto == tdb->tdb_sproto &&
299 !bcmp(&tdbi->dst, &tdb->tdb_dst,
300 sizeof(union sockaddr_union))) {
301 splx(s);
302 sproto = 0; /* mark as no-IPsec-needed */
303 goto done_spd;
304 }
305 }
306
307 /* We need to do IPsec */
308 bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst));
309 sspi = tdb->tdb_spi;
310 sproto = tdb->tdb_sproto;
311 splx(s);
312
313 /*
314 * If it needs TCP/UDP hardware-checksumming, do the
315 * computation now.
316 */
317 if (m->m_pkthdr.csum & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) {
318 in_delayed_cksum(m);
319 m->m_pkthdr.csum &=
320 ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT);
321 }
322
323 /* If it's not a multicast packet, try to fast-path */
324 if (!IN_MULTICAST(ip->ip_dst.s_addr)) {
325 goto sendit;
326 }
327 }
328
329 /* Fall through to the routing/multicast handling code */
330 done_spd:
331 #endif /* IPSEC */
332
333 if (flags & IP_ROUTETOETHER) {
334 dst = satosin(&ro->ro_dst);
335 ifp = ro->ro_rt->rt_ifp;
336 mtu = ifp->if_mtu;
337 ro->ro_rt = NULL;
338 } else if (donerouting == 0) {
339 if (ro == 0) {
340 ro = &iproute;
341 bzero((caddr_t)ro, sizeof (*ro));
342 }
343
344 dst = satosin(&ro->ro_dst);
345
346 /*
347 * If there is a cached route, check that it is to the same
348 * destination and is still up. If not, free it and try again.
349 */
350 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
351 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
352 RTFREE(ro->ro_rt);
353 ro->ro_rt = (struct rtentry *)0;
354 }
355
356 if (ro->ro_rt == 0) {
357 dst->sin_family = AF_INET;
358 dst->sin_len = sizeof(*dst);
359 dst->sin_addr = ip->ip_dst;
360 }
361
362 /*
363 * If routing to interface only, short-circuit routing lookup.
364 */
365 if (flags & IP_ROUTETOIF) {
366 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
367 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
368 ipstat.ips_noroute++;
369 error = ENETUNREACH;
370 goto bad;
371 }
372
373 ifp = ia->ia_ifp;
374 mtu = ifp->if_mtu;
375 ip->ip_ttl = 1;
376 } else if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
377 (ip->ip_dst.s_addr == INADDR_BROADCAST)) &&
378 imo != NULL && imo->imo_multicast_ifp != NULL) {
379 ifp = imo->imo_multicast_ifp;
380 mtu = ifp->if_mtu;
381 IFP_TO_IA(ifp, ia);
382 } else {
383 if (ro->ro_rt == 0)
384 rtalloc(ro);
385
386 if (ro->ro_rt == 0) {
387 ipstat.ips_noroute++;
388 error = EHOSTUNREACH;
389 goto bad;
390 }
391
392 ia = ifatoia(ro->ro_rt->rt_ifa);
393 ifp = ro->ro_rt->rt_ifp;
394 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0)
395 mtu = ifp->if_mtu;
396 ro->ro_rt->rt_use++;
397
398 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
399 dst = satosin(ro->ro_rt->rt_gateway);
400 }
401
402 /* Set the source IP address */
403 if (ip->ip_src.s_addr == INADDR_ANY)
404 ip->ip_src = ia->ia_addr.sin_addr;
405 }
406
407 if (IN_MULTICAST(ip->ip_dst.s_addr) ||
408 (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
409 struct in_multi *inm;
410
411 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
412 M_BCAST : M_MCAST;
413
414 /*
415 * IP destination address is multicast. Make sure "dst"
416 * still points to the address in "ro". (It may have been
417 * changed to point to a gateway address, above.)
418 */
419 dst = satosin(&ro->ro_dst);
420
421 /*
422 * See if the caller provided any multicast options
423 */
424 if (imo != NULL)
425 ip->ip_ttl = imo->imo_multicast_ttl;
426 else
427 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
428
429 /*
430 * if we don't know the outgoing ifp yet, we can't generate
431 * output
432 */
433 if (!ifp) {
434 ipstat.ips_noroute++;
435 error = EHOSTUNREACH;
436 goto bad;
437 }
438
439 /*
440 * Confirm that the outgoing interface supports multicast,
441 * but only if the packet actually is going out on that
442 * interface (i.e., no IPsec is applied).
443 */
444 if ((((m->m_flags & M_MCAST) &&
445 (ifp->if_flags & IFF_MULTICAST) == 0) ||
446 ((m->m_flags & M_BCAST) &&
447 (ifp->if_flags & IFF_BROADCAST) == 0)) && (sproto == 0)) {
448 ipstat.ips_noroute++;
449 error = ENETUNREACH;
450 goto bad;
451 }
452
453 /*
454 * If source address not specified yet, use address
455 * of outgoing interface.
456 */
457 if (ip->ip_src.s_addr == INADDR_ANY) {
458 struct in_ifaddr *ia;
459
460 for (ia = in_ifaddr.tqh_first;
461 ia;
462 ia = ia->ia_list.tqe_next)
463 if (ia->ia_ifp == ifp) {
464 ip->ip_src = ia->ia_addr.sin_addr;
465 break;
466 }
467 }
468
469 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
470 if (inm != NULL &&
471 (imo == NULL || imo->imo_multicast_loop)) {
472 /*
473 * If we belong to the destination multicast group
474 * on the outgoing interface, and the caller did not
475 * forbid loopback, loop back a copy.
476 * Can't defer TCP/UDP checksumming, do the
477 * computation now.
478 */
479 if (m->m_pkthdr.csum &
480 (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) {
481 in_delayed_cksum(m);
482 m->m_pkthdr.csum &=
483 ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT);
484 }
485 ip_mloopback(ifp, m, dst);
486 }
487 #ifdef MROUTING
488 else {
489 /*
490 * If we are acting as a multicast router, perform
491 * multicast forwarding as if the packet had just
492 * arrived on the interface to which we are about
493 * to send. The multicast forwarding function
494 * recursively calls this function, using the
495 * IP_FORWARDING flag to prevent infinite recursion.
496 *
497 * Multicasts that are looped back by ip_mloopback(),
498 * above, will be forwarded by the ip_input() routine,
499 * if necessary.
500 */
501 extern struct socket *ip_mrouter;
502
503 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
504 if (ip_mforward(m, ifp) != 0) {
505 m_freem(m);
506 goto done;
507 }
508 }
509 }
510 #endif
511 /*
512 * Multicasts with a time-to-live of zero may be looped-
513 * back, above, but must not be transmitted on a network.
514 * Also, multicasts addressed to the loopback interface
515 * are not sent -- the above call to ip_mloopback() will
516 * loop back a copy if this host actually belongs to the
517 * destination group on the loopback interface.
518 */
519 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
520 m_freem(m);
521 goto done;
522 }
523
524 goto sendit;
525 }
526
527 /*
528 * Look for broadcast address and and verify user is allowed to send
529 * such a packet; if the packet is going in an IPsec tunnel, skip
530 * this check.
531 */
532 if ((sproto == 0) && (in_broadcast(dst->sin_addr, ifp))) {
533 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
534 error = EADDRNOTAVAIL;
535 goto bad;
536 }
537 if ((flags & IP_ALLOWBROADCAST) == 0) {
538 error = EACCES;
539 goto bad;
540 }
541
542 /* Don't allow broadcast messages to be fragmented */
543 if (ntohs(ip->ip_len) > ifp->if_mtu) {
544 error = EMSGSIZE;
545 goto bad;
546 }
547 m->m_flags |= M_BCAST;
548 } else
549 m->m_flags &= ~M_BCAST;
550
551 sendit:
552 /*
553 * If we're doing Path MTU discovery, we need to set DF unless
554 * the route's MTU is locked.
555 */
556 if ((flags & IP_MTUDISC) && ro && ro->ro_rt &&
557 (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
558 ip->ip_off |= htons(IP_DF);
559
560 #ifdef IPSEC
561 /*
562 * Check if the packet needs encapsulation.
563 */
564 if (sproto != 0) {
565 s = splnet();
566
567 /*
568 * Packet filter
569 */
570 #if NPF > 0
571
572 if (pf_test(PF_OUT, &encif[0].sc_if, &m) != PF_PASS) {
573 error = EHOSTUNREACH;
574 splx(s);
575 m_freem(m);
576 goto done;
577 }
578 if (m == NULL) {
579 splx(s);
580 goto done;
581 }
582 ip = mtod(m, struct ip *);
583 hlen = ip->ip_hl << 2;
584 #endif
585
586 tdb = gettdb(sspi, &sdst, sproto);
587 if (tdb == NULL) {
588 DPRINTF(("ip_output: unknown TDB"));
589 error = EHOSTUNREACH;
590 splx(s);
591 m_freem(m);
592 goto done;
593 }
594
595 /* Check if we are allowed to fragment */
596 if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu &&
597 ntohs(ip->ip_len) > tdb->tdb_mtu &&
598 tdb->tdb_mtutimeout > time.tv_sec) {
599 struct rtentry *rt = NULL;
600
601 icmp_mtu = tdb->tdb_mtu;
602 splx(s);
603
604 /* Find a host route to store the mtu in */
605 if (ro != NULL)
606 rt = ro->ro_rt;
607 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) {
608 struct sockaddr_in dst = {
609 sizeof(struct sockaddr_in), AF_INET};
610 dst.sin_addr = ip->ip_dst;
611 rt = icmp_mtudisc_clone((struct sockaddr *)&dst);
612 }
613 if (rt != NULL) {
614 rt->rt_rmx.rmx_mtu = icmp_mtu;
615 if (ro && ro->ro_rt != NULL) {
616 RTFREE(ro->ro_rt);
617 ro->ro_rt = (struct rtentry *) 0;
618 rtalloc(ro);
619 }
620 }
621 error = EMSGSIZE;
622 goto bad;
623 }
624
625 /*
626 * Clear these -- they'll be set in the recursive invocation
627 * as needed.
628 */
629 m->m_flags &= ~(M_MCAST | M_BCAST);
630
631 /* Callee frees mbuf */
632 error = ipsp_process_packet(m, tdb, AF_INET, 0);
633 splx(s);
634 return error; /* Nothing more to be done */
635 }
636
637 /*
638 * If deferred crypto processing is needed, check that the
639 * interface supports it.
640 */
641 if ((mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL))
642 != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) {
643 /* Notify IPsec to do its own crypto. */
644 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
645 m_freem(m);
646 error = EHOSTUNREACH;
647 goto done;
648 }
649 #endif /* IPSEC */
650
651 /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
652 if (m->m_pkthdr.csum & M_TCPV4_CSUM_OUT) {
653 if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
654 ifp->if_bridge != NULL) {
655 in_delayed_cksum(m);
656 m->m_pkthdr.csum &= ~M_TCPV4_CSUM_OUT; /* Clear */
657 }
658 } else if (m->m_pkthdr.csum & M_UDPV4_CSUM_OUT) {
659 if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
660 ifp->if_bridge != NULL) {
661 in_delayed_cksum(m);
662 m->m_pkthdr.csum &= ~M_UDPV4_CSUM_OUT; /* Clear */
663 }
664 }
665
666 /*
667 * Packet filter
668 */
669 #if NPF > 0
670 if (pf_test(PF_OUT, ifp, &m) != PF_PASS) {
671 error = EHOSTUNREACH;
672 m_freem(m);
673 goto done;
674 }
675 if (m == NULL)
676 goto done;
677
678 ip = mtod(m, struct ip *);
679 hlen = ip->ip_hl << 2;
680 #endif
681
682 /*
683 * If small enough for interface, can just send directly.
684 */
685 if (ntohs(ip->ip_len) <= mtu) {
686 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
687 ifp->if_bridge == NULL) {
688 m->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
689 ipstat.ips_outhwcsum++;
690 } else {
691 ip->ip_sum = 0;
692 ip->ip_sum = in_cksum(m, hlen);
693 }
694 /* Update relevant hardware checksum stats for TCP/UDP */
695 if (m->m_pkthdr.csum & M_TCPV4_CSUM_OUT)
696 tcpstat.tcps_outhwcsum++;
697 else if (m->m_pkthdr.csum & M_UDPV4_CSUM_OUT)
698 udpstat.udps_outhwcsum++;
699 error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt);
700 goto done;
701 }
702
703 /*
704 * Too large for interface; fragment if possible.
705 * Must be able to put at least 8 bytes per fragment.
706 */
707 if (ip->ip_off & htons(IP_DF)) {
708 #ifdef IPSEC
709 icmp_mtu = ifp->if_mtu;
710 #endif
711 error = EMSGSIZE;
712 /*
713 * This case can happen if the user changed the MTU
714 * of an interface after enabling IP on it. Because
715 * most netifs don't keep track of routes pointing to
716 * them, there is no way for one to update all its
717 * routes when the MTU is changed.
718 */
719 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
720 !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) &&
721 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
722 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
723 }
724 ipstat.ips_cantfrag++;
725 goto bad;
726 }
727
728 error = ip_fragment(m, ifp, mtu);
729 if (error) {
730 m = m0 = NULL;
731 goto bad;
732 }
733
734 for (; m; m = m0) {
735 m0 = m->m_nextpkt;
736 m->m_nextpkt = 0;
737 if (error == 0)
738 error = (*ifp->if_output)(ifp, m, sintosa(dst),
739 ro->ro_rt);
740 else
741 m_freem(m);
742 }
743
744 if (error == 0)
745 ipstat.ips_fragmented++;
746
747 done:
748 if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt)
749 RTFREE(ro->ro_rt);
750 return (error);
751 bad:
752 #ifdef IPSEC
753 if (error == EMSGSIZE && ip_mtudisc && icmp_mtu != 0 && m != NULL)
754 ipsec_adjust_mtu(m, icmp_mtu);
755 #endif
756 m_freem(m0);
757 goto done;
758 }
759
760 int
ip_fragment(struct mbuf * m,struct ifnet * ifp,u_long mtu)761 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu)
762 {
763 struct ip *ip, *mhip;
764 struct mbuf *m0;
765 int len, hlen, off;
766 int mhlen, firstlen;
767 struct mbuf **mnext;
768 int fragments = 0;
769 int s;
770 int error = 0;
771
772 ip = mtod(m, struct ip *);
773 hlen = ip->ip_hl << 2;
774
775 len = (mtu - hlen) &~ 7;
776 if (len < 8) {
777 m_freem(m);
778 return (EMSGSIZE);
779 }
780
781 /*
782 * If we are doing fragmentation, we can't defer TCP/UDP
783 * checksumming; compute the checksum and clear the flag.
784 */
785 if (m->m_pkthdr.csum & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) {
786 in_delayed_cksum(m);
787 m->m_pkthdr.csum &= ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT);
788 }
789
790 firstlen = len;
791 mnext = &m->m_nextpkt;
792
793 /*
794 * Loop through length of segment after first fragment,
795 * make new header and copy data of each part and link onto chain.
796 */
797 m0 = m;
798 mhlen = sizeof (struct ip);
799 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) {
800 MGETHDR(m, M_DONTWAIT, MT_HEADER);
801 if (m == 0) {
802 ipstat.ips_odropped++;
803 error = ENOBUFS;
804 goto sendorfree;
805 }
806 *mnext = m;
807 mnext = &m->m_nextpkt;
808 m->m_data += max_linkhdr;
809 mhip = mtod(m, struct ip *);
810 *mhip = *ip;
811 /* we must inherit MCAST and BCAST flags */
812 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST);
813 if (hlen > sizeof (struct ip)) {
814 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
815 mhip->ip_hl = mhlen >> 2;
816 }
817 m->m_len = mhlen;
818 mhip->ip_off = ((off - hlen) >> 3) +
819 (ntohs(ip->ip_off) & ~IP_MF);
820 if (ip->ip_off & htons(IP_MF))
821 mhip->ip_off |= IP_MF;
822 if (off + len >= ntohs(ip->ip_len))
823 len = ntohs(ip->ip_len) - off;
824 else
825 mhip->ip_off |= IP_MF;
826 mhip->ip_len = htons((u_int16_t)(len + mhlen));
827 m->m_next = m_copy(m0, off, len);
828 if (m->m_next == 0) {
829 ipstat.ips_odropped++;
830 error = ENOBUFS;
831 goto sendorfree;
832 }
833 m->m_pkthdr.len = mhlen + len;
834 m->m_pkthdr.rcvif = (struct ifnet *)0;
835 mhip->ip_off = htons((u_int16_t)mhip->ip_off);
836 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
837 ifp->if_bridge == NULL) {
838 m->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
839 ipstat.ips_outhwcsum++;
840 } else {
841 mhip->ip_sum = 0;
842 mhip->ip_sum = in_cksum(m, mhlen);
843 }
844 ipstat.ips_ofragments++;
845 fragments++;
846 }
847 /*
848 * Update first fragment by trimming what's been copied out
849 * and updating header, then send each fragment (in order).
850 */
851 m = m0;
852 m_adj(m, hlen + firstlen - ntohs(ip->ip_len));
853 m->m_pkthdr.len = hlen + firstlen;
854 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
855 ip->ip_off |= htons(IP_MF);
856 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
857 ifp->if_bridge == NULL) {
858 m->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
859 ipstat.ips_outhwcsum++;
860 } else {
861 ip->ip_sum = 0;
862 ip->ip_sum = in_cksum(m, hlen);
863 }
864 sendorfree:
865 /*
866 * If there is no room for all the fragments, don't queue
867 * any of them.
868 */
869 s = splnet();
870 if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments &&
871 error == 0) {
872 error = ENOBUFS;
873 ipstat.ips_odropped++;
874 IFQ_INC_DROPS(&ifp->if_snd);
875 }
876 splx(s);
877 if (error) {
878 for (m = m0; m; m = m0) {
879 m0 = m->m_nextpkt;
880 m->m_nextpkt = NULL;
881 m_freem(m);
882 }
883 }
884
885 return (error);
886 }
887
888 /*
889 * Insert IP options into preformed packet.
890 * Adjust IP destination as required for IP source routing,
891 * as indicated by a non-zero in_addr at the start of the options.
892 */
893 static struct mbuf *
ip_insertoptions(m,opt,phlen)894 ip_insertoptions(m, opt, phlen)
895 struct mbuf *m;
896 struct mbuf *opt;
897 int *phlen;
898 {
899 struct ipoption *p = mtod(opt, struct ipoption *);
900 struct mbuf *n;
901 struct ip *ip = mtod(m, struct ip *);
902 unsigned optlen;
903
904 optlen = opt->m_len - sizeof(p->ipopt_dst);
905 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET)
906 return (m); /* XXX should fail */
907 if (p->ipopt_dst.s_addr)
908 ip->ip_dst = p->ipopt_dst;
909 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
910 MGETHDR(n, M_DONTWAIT, MT_HEADER);
911 if (n == 0)
912 return (m);
913 M_MOVE_HDR(n, m);
914 n->m_pkthdr.len += optlen;
915 m->m_len -= sizeof(struct ip);
916 m->m_data += sizeof(struct ip);
917 n->m_next = m;
918 m = n;
919 m->m_len = optlen + sizeof(struct ip);
920 m->m_data += max_linkhdr;
921 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
922 } else {
923 m->m_data -= optlen;
924 m->m_len += optlen;
925 m->m_pkthdr.len += optlen;
926 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
927 }
928 ip = mtod(m, struct ip *);
929 bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen);
930 *phlen = sizeof(struct ip) + optlen;
931 ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
932 return (m);
933 }
934
935 /*
936 * Copy options from ip to jp,
937 * omitting those not copied during fragmentation.
938 */
939 int
ip_optcopy(ip,jp)940 ip_optcopy(ip, jp)
941 struct ip *ip, *jp;
942 {
943 u_char *cp, *dp;
944 int opt, optlen, cnt;
945
946 cp = (u_char *)(ip + 1);
947 dp = (u_char *)(jp + 1);
948 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
949 for (; cnt > 0; cnt -= optlen, cp += optlen) {
950 opt = cp[0];
951 if (opt == IPOPT_EOL)
952 break;
953 if (opt == IPOPT_NOP) {
954 /* Preserve for IP mcast tunnel's LSRR alignment. */
955 *dp++ = IPOPT_NOP;
956 optlen = 1;
957 continue;
958 }
959 #ifdef DIAGNOSTIC
960 if (cnt < IPOPT_OLEN + sizeof(*cp))
961 panic("malformed IPv4 option passed to ip_optcopy");
962 #endif
963 optlen = cp[IPOPT_OLEN];
964 #ifdef DIAGNOSTIC
965 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
966 panic("malformed IPv4 option passed to ip_optcopy");
967 #endif
968 /* bogus lengths should have been caught by ip_dooptions */
969 if (optlen > cnt)
970 optlen = cnt;
971 if (IPOPT_COPIED(opt)) {
972 bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen);
973 dp += optlen;
974 }
975 }
976 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
977 *dp++ = IPOPT_EOL;
978 return (optlen);
979 }
980
981 /*
982 * IP socket option processing.
983 */
984 int
ip_ctloutput(op,so,level,optname,mp)985 ip_ctloutput(op, so, level, optname, mp)
986 int op;
987 struct socket *so;
988 int level, optname;
989 struct mbuf **mp;
990 {
991 struct inpcb *inp = sotoinpcb(so);
992 struct mbuf *m = *mp;
993 int optval = 0;
994 #ifdef IPSEC
995 struct proc *p = curproc; /* XXX */
996 struct ipsec_ref *ipr;
997 u_int16_t opt16val;
998 #endif
999 int error = 0;
1000
1001 if (level != IPPROTO_IP) {
1002 error = EINVAL;
1003 if (op == PRCO_SETOPT && *mp)
1004 (void) m_free(*mp);
1005 } else switch (op) {
1006 case PRCO_SETOPT:
1007 switch (optname) {
1008 case IP_OPTIONS:
1009 #ifdef notyet
1010 case IP_RETOPTS:
1011 return (ip_pcbopts(optname, &inp->inp_options, m));
1012 #else
1013 return (ip_pcbopts(&inp->inp_options, m));
1014 #endif
1015
1016 case IP_TOS:
1017 case IP_TTL:
1018 case IP_RECVOPTS:
1019 case IP_RECVRETOPTS:
1020 case IP_RECVDSTADDR:
1021 if (m == NULL || m->m_len != sizeof(int))
1022 error = EINVAL;
1023 else {
1024 optval = *mtod(m, int *);
1025 switch (optname) {
1026
1027 case IP_TOS:
1028 inp->inp_ip.ip_tos = optval;
1029 break;
1030
1031 case IP_TTL:
1032 inp->inp_ip.ip_ttl = optval;
1033 break;
1034 #define OPTSET(bit) \
1035 if (optval) \
1036 inp->inp_flags |= bit; \
1037 else \
1038 inp->inp_flags &= ~bit;
1039
1040 case IP_RECVOPTS:
1041 OPTSET(INP_RECVOPTS);
1042 break;
1043
1044 case IP_RECVRETOPTS:
1045 OPTSET(INP_RECVRETOPTS);
1046 break;
1047
1048 case IP_RECVDSTADDR:
1049 OPTSET(INP_RECVDSTADDR);
1050 break;
1051 }
1052 }
1053 break;
1054 #undef OPTSET
1055
1056 case IP_MULTICAST_IF:
1057 case IP_MULTICAST_TTL:
1058 case IP_MULTICAST_LOOP:
1059 case IP_ADD_MEMBERSHIP:
1060 case IP_DROP_MEMBERSHIP:
1061 error = ip_setmoptions(optname, &inp->inp_moptions, m);
1062 break;
1063
1064 case IP_PORTRANGE:
1065 if (m == 0 || m->m_len != sizeof(int))
1066 error = EINVAL;
1067 else {
1068 optval = *mtod(m, int *);
1069
1070 switch (optval) {
1071
1072 case IP_PORTRANGE_DEFAULT:
1073 inp->inp_flags &= ~(INP_LOWPORT);
1074 inp->inp_flags &= ~(INP_HIGHPORT);
1075 break;
1076
1077 case IP_PORTRANGE_HIGH:
1078 inp->inp_flags &= ~(INP_LOWPORT);
1079 inp->inp_flags |= INP_HIGHPORT;
1080 break;
1081
1082 case IP_PORTRANGE_LOW:
1083 inp->inp_flags &= ~(INP_HIGHPORT);
1084 inp->inp_flags |= INP_LOWPORT;
1085 break;
1086
1087 default:
1088
1089 error = EINVAL;
1090 break;
1091 }
1092 }
1093 break;
1094 case IP_AUTH_LEVEL:
1095 case IP_ESP_TRANS_LEVEL:
1096 case IP_ESP_NETWORK_LEVEL:
1097 case IP_IPCOMP_LEVEL:
1098 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1099 #ifndef IPSEC
1100 error = EOPNOTSUPP;
1101 #else
1102 if (m == 0 || m->m_len != sizeof(int)) {
1103 error = EINVAL;
1104 break;
1105 }
1106 optval = *mtod(m, int *);
1107
1108 if (optval < IPSEC_LEVEL_BYPASS ||
1109 optval > IPSEC_LEVEL_UNIQUE) {
1110 error = EINVAL;
1111 break;
1112 }
1113
1114 /* Unlink cached output TDB to force a re-search */
1115 if (inp->inp_tdb_out) {
1116 int s = spltdb();
1117 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out,
1118 inp, inp_tdb_out_next);
1119 splx(s);
1120 }
1121
1122 if (inp->inp_tdb_in) {
1123 int s = spltdb();
1124 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in,
1125 inp, inp_tdb_in_next);
1126 splx(s);
1127 }
1128
1129 switch (optname) {
1130 case IP_AUTH_LEVEL:
1131 if (optval < ipsec_auth_default_level &&
1132 suser(p, 0)) {
1133 error = EACCES;
1134 break;
1135 }
1136 inp->inp_seclevel[SL_AUTH] = optval;
1137 break;
1138
1139 case IP_ESP_TRANS_LEVEL:
1140 if (optval < ipsec_esp_trans_default_level &&
1141 suser(p, 0)) {
1142 error = EACCES;
1143 break;
1144 }
1145 inp->inp_seclevel[SL_ESP_TRANS] = optval;
1146 break;
1147
1148 case IP_ESP_NETWORK_LEVEL:
1149 if (optval < ipsec_esp_network_default_level &&
1150 suser(p, 0)) {
1151 error = EACCES;
1152 break;
1153 }
1154 inp->inp_seclevel[SL_ESP_NETWORK] = optval;
1155 break;
1156 case IP_IPCOMP_LEVEL:
1157 if (optval < ipsec_ipcomp_default_level &&
1158 suser(p, 0)) {
1159 error = EACCES;
1160 break;
1161 }
1162 inp->inp_seclevel[SL_IPCOMP] = optval;
1163 break;
1164 }
1165 if (!error)
1166 inp->inp_secrequire = get_sa_require(inp);
1167 #endif
1168 break;
1169
1170 case IP_IPSEC_REMOTE_CRED:
1171 case IP_IPSEC_REMOTE_AUTH:
1172 /* Can't set the remote credential or key */
1173 error = EOPNOTSUPP;
1174 break;
1175
1176 case IP_IPSEC_LOCAL_ID:
1177 case IP_IPSEC_REMOTE_ID:
1178 case IP_IPSEC_LOCAL_CRED:
1179 case IP_IPSEC_LOCAL_AUTH:
1180 #ifndef IPSEC
1181 error = EOPNOTSUPP;
1182 #else
1183 if (m->m_len < 2) {
1184 error = EINVAL;
1185 break;
1186 }
1187
1188 m_copydata(m, 0, 2, (caddr_t) &opt16val);
1189
1190 /* If the type is 0, then we cleanup and return */
1191 if (opt16val == 0) {
1192 switch (optname) {
1193 case IP_IPSEC_LOCAL_ID:
1194 if (inp->inp_ipo != NULL &&
1195 inp->inp_ipo->ipo_srcid != NULL) {
1196 ipsp_reffree(inp->inp_ipo->ipo_srcid);
1197 inp->inp_ipo->ipo_srcid = NULL;
1198 }
1199 break;
1200
1201 case IP_IPSEC_REMOTE_ID:
1202 if (inp->inp_ipo != NULL &&
1203 inp->inp_ipo->ipo_dstid != NULL) {
1204 ipsp_reffree(inp->inp_ipo->ipo_dstid);
1205 inp->inp_ipo->ipo_dstid = NULL;
1206 }
1207 break;
1208
1209 case IP_IPSEC_LOCAL_CRED:
1210 if (inp->inp_ipo != NULL &&
1211 inp->inp_ipo->ipo_local_cred != NULL) {
1212 ipsp_reffree(inp->inp_ipo->ipo_local_cred);
1213 inp->inp_ipo->ipo_local_cred = NULL;
1214 }
1215 break;
1216
1217 case IP_IPSEC_LOCAL_AUTH:
1218 if (inp->inp_ipo != NULL &&
1219 inp->inp_ipo->ipo_local_auth != NULL) {
1220 ipsp_reffree(inp->inp_ipo->ipo_local_auth);
1221 inp->inp_ipo->ipo_local_auth = NULL;
1222 }
1223 break;
1224 }
1225
1226 error = 0;
1227 break;
1228 }
1229
1230 /* Can't have an empty payload */
1231 if (m->m_len == 2) {
1232 error = EINVAL;
1233 break;
1234 }
1235
1236 /* Allocate if needed */
1237 if (inp->inp_ipo == NULL) {
1238 inp->inp_ipo = ipsec_add_policy(inp,
1239 AF_INET, IPSP_DIRECTION_OUT);
1240 if (inp->inp_ipo == NULL) {
1241 error = ENOBUFS;
1242 break;
1243 }
1244 }
1245
1246 MALLOC(ipr, struct ipsec_ref *,
1247 sizeof(struct ipsec_ref) + m->m_len - 2,
1248 M_CREDENTIALS, M_NOWAIT);
1249 if (ipr == NULL) {
1250 error = ENOBUFS;
1251 break;
1252 }
1253
1254 ipr->ref_count = 1;
1255 ipr->ref_malloctype = M_CREDENTIALS;
1256 ipr->ref_len = m->m_len - 2;
1257 ipr->ref_type = opt16val;
1258 m_copydata(m, 2, m->m_len - 2, (caddr_t)(ipr + 1));
1259
1260 switch (optname) {
1261 case IP_IPSEC_LOCAL_ID:
1262 /* Check valid types and NUL-termination */
1263 if (ipr->ref_type < IPSP_IDENTITY_PREFIX ||
1264 ipr->ref_type > IPSP_IDENTITY_CONNECTION ||
1265 ((char *)(ipr + 1))[ipr->ref_len - 1]) {
1266 FREE(ipr, M_CREDENTIALS);
1267 error = EINVAL;
1268 } else {
1269 if (inp->inp_ipo->ipo_srcid != NULL)
1270 ipsp_reffree(inp->inp_ipo->ipo_srcid);
1271 inp->inp_ipo->ipo_srcid = ipr;
1272 }
1273 break;
1274 case IP_IPSEC_REMOTE_ID:
1275 /* Check valid types and NUL-termination */
1276 if (ipr->ref_type < IPSP_IDENTITY_PREFIX ||
1277 ipr->ref_type > IPSP_IDENTITY_CONNECTION ||
1278 ((char *)(ipr + 1))[ipr->ref_len - 1]) {
1279 FREE(ipr, M_CREDENTIALS);
1280 error = EINVAL;
1281 } else {
1282 if (inp->inp_ipo->ipo_dstid != NULL)
1283 ipsp_reffree(inp->inp_ipo->ipo_dstid);
1284 inp->inp_ipo->ipo_dstid = ipr;
1285 }
1286 break;
1287 case IP_IPSEC_LOCAL_CRED:
1288 if (ipr->ref_type < IPSP_CRED_KEYNOTE ||
1289 ipr->ref_type > IPSP_CRED_X509) {
1290 FREE(ipr, M_CREDENTIALS);
1291 error = EINVAL;
1292 } else {
1293 if (inp->inp_ipo->ipo_local_cred != NULL)
1294 ipsp_reffree(inp->inp_ipo->ipo_local_cred);
1295 inp->inp_ipo->ipo_local_cred = ipr;
1296 }
1297 break;
1298 case IP_IPSEC_LOCAL_AUTH:
1299 if (ipr->ref_type < IPSP_AUTH_PASSPHRASE ||
1300 ipr->ref_type > IPSP_AUTH_RSA) {
1301 FREE(ipr, M_CREDENTIALS);
1302 error = EINVAL;
1303 } else {
1304 if (inp->inp_ipo->ipo_local_auth != NULL)
1305 ipsp_reffree(inp->inp_ipo->ipo_local_auth);
1306 inp->inp_ipo->ipo_local_auth = ipr;
1307 }
1308 break;
1309 }
1310
1311 /* Unlink cached output TDB to force a re-search */
1312 if (inp->inp_tdb_out) {
1313 int s = spltdb();
1314 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out,
1315 inp, inp_tdb_out_next);
1316 splx(s);
1317 }
1318
1319 if (inp->inp_tdb_in) {
1320 int s = spltdb();
1321 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in,
1322 inp, inp_tdb_in_next);
1323 splx(s);
1324 }
1325 #endif
1326 break;
1327 default:
1328 error = ENOPROTOOPT;
1329 break;
1330 }
1331 if (m)
1332 (void)m_free(m);
1333 break;
1334
1335 case PRCO_GETOPT:
1336 switch (optname) {
1337 case IP_OPTIONS:
1338 case IP_RETOPTS:
1339 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1340 if (inp->inp_options) {
1341 m->m_len = inp->inp_options->m_len;
1342 bcopy(mtod(inp->inp_options, caddr_t),
1343 mtod(m, caddr_t), (unsigned)m->m_len);
1344 } else
1345 m->m_len = 0;
1346 break;
1347
1348 case IP_TOS:
1349 case IP_TTL:
1350 case IP_RECVOPTS:
1351 case IP_RECVRETOPTS:
1352 case IP_RECVDSTADDR:
1353 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1354 m->m_len = sizeof(int);
1355 switch (optname) {
1356
1357 case IP_TOS:
1358 optval = inp->inp_ip.ip_tos;
1359 break;
1360
1361 case IP_TTL:
1362 optval = inp->inp_ip.ip_ttl;
1363 break;
1364
1365 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1366
1367 case IP_RECVOPTS:
1368 optval = OPTBIT(INP_RECVOPTS);
1369 break;
1370
1371 case IP_RECVRETOPTS:
1372 optval = OPTBIT(INP_RECVRETOPTS);
1373 break;
1374
1375 case IP_RECVDSTADDR:
1376 optval = OPTBIT(INP_RECVDSTADDR);
1377 break;
1378 }
1379 *mtod(m, int *) = optval;
1380 break;
1381
1382 case IP_MULTICAST_IF:
1383 case IP_MULTICAST_TTL:
1384 case IP_MULTICAST_LOOP:
1385 case IP_ADD_MEMBERSHIP:
1386 case IP_DROP_MEMBERSHIP:
1387 error = ip_getmoptions(optname, inp->inp_moptions, mp);
1388 break;
1389
1390 case IP_PORTRANGE:
1391 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1392 m->m_len = sizeof(int);
1393
1394 if (inp->inp_flags & INP_HIGHPORT)
1395 optval = IP_PORTRANGE_HIGH;
1396 else if (inp->inp_flags & INP_LOWPORT)
1397 optval = IP_PORTRANGE_LOW;
1398 else
1399 optval = 0;
1400
1401 *mtod(m, int *) = optval;
1402 break;
1403
1404 case IP_AUTH_LEVEL:
1405 case IP_ESP_TRANS_LEVEL:
1406 case IP_ESP_NETWORK_LEVEL:
1407 case IP_IPCOMP_LEVEL:
1408 #ifndef IPSEC
1409 m->m_len = sizeof(int);
1410 *mtod(m, int *) = IPSEC_LEVEL_NONE;
1411 #else
1412 m->m_len = sizeof(int);
1413 switch (optname) {
1414 case IP_AUTH_LEVEL:
1415 optval = inp->inp_seclevel[SL_AUTH];
1416 break;
1417
1418 case IP_ESP_TRANS_LEVEL:
1419 optval = inp->inp_seclevel[SL_ESP_TRANS];
1420 break;
1421
1422 case IP_ESP_NETWORK_LEVEL:
1423 optval = inp->inp_seclevel[SL_ESP_NETWORK];
1424 break;
1425 case IP_IPCOMP_LEVEL:
1426 optval = inp->inp_seclevel[SL_IPCOMP];
1427 break;
1428 }
1429 *mtod(m, int *) = optval;
1430 #endif
1431 break;
1432 case IP_IPSEC_LOCAL_ID:
1433 case IP_IPSEC_REMOTE_ID:
1434 case IP_IPSEC_LOCAL_CRED:
1435 case IP_IPSEC_REMOTE_CRED:
1436 case IP_IPSEC_LOCAL_AUTH:
1437 case IP_IPSEC_REMOTE_AUTH:
1438 #ifndef IPSEC
1439 error = EOPNOTSUPP;
1440 #else
1441 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1442 m->m_len = sizeof(u_int16_t);
1443 ipr = NULL;
1444 switch (optname) {
1445 case IP_IPSEC_LOCAL_ID:
1446 if (inp->inp_ipo != NULL)
1447 ipr = inp->inp_ipo->ipo_srcid;
1448 opt16val = IPSP_IDENTITY_NONE;
1449 break;
1450 case IP_IPSEC_REMOTE_ID:
1451 if (inp->inp_ipo != NULL)
1452 ipr = inp->inp_ipo->ipo_dstid;
1453 opt16val = IPSP_IDENTITY_NONE;
1454 break;
1455 case IP_IPSEC_LOCAL_CRED:
1456 if (inp->inp_ipo != NULL)
1457 ipr = inp->inp_ipo->ipo_local_cred;
1458 opt16val = IPSP_CRED_NONE;
1459 break;
1460 case IP_IPSEC_REMOTE_CRED:
1461 ipr = inp->inp_ipsec_remotecred;
1462 opt16val = IPSP_CRED_NONE;
1463 break;
1464 case IP_IPSEC_LOCAL_AUTH:
1465 if (inp->inp_ipo != NULL)
1466 ipr = inp->inp_ipo->ipo_local_auth;
1467 break;
1468 case IP_IPSEC_REMOTE_AUTH:
1469 ipr = inp->inp_ipsec_remoteauth;
1470 break;
1471 }
1472 if (ipr == NULL)
1473 *mtod(m, u_int16_t *) = opt16val;
1474 else {
1475 size_t len;
1476
1477 len = m->m_len + ipr->ref_len;
1478 if (len > MCLBYTES) {
1479 m_free(m);
1480 error = EINVAL;
1481 break;
1482 }
1483 /* allocate mbuf cluster for larger option */
1484 if (len > MLEN) {
1485 MCLGET(m, M_WAITOK);
1486 if ((m->m_flags & M_EXT) == 0) {
1487 m_free(m);
1488 error = ENOBUFS;
1489 break;
1490 }
1491
1492 }
1493 m->m_len = len;
1494 *mtod(m, u_int16_t *) = ipr->ref_type;
1495 m_copyback(m, sizeof(u_int16_t), ipr->ref_len,
1496 ipr + 1);
1497 }
1498 #endif
1499 break;
1500 default:
1501 error = ENOPROTOOPT;
1502 break;
1503 }
1504 break;
1505 }
1506 return (error);
1507 }
1508
1509 /*
1510 * Set up IP options in pcb for insertion in output packets.
1511 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1512 * with destination address if source routed.
1513 */
1514 int
1515 #ifdef notyet
ip_pcbopts(optname,pcbopt,m)1516 ip_pcbopts(optname, pcbopt, m)
1517 int optname;
1518 #else
1519 ip_pcbopts(pcbopt, m)
1520 #endif
1521 struct mbuf **pcbopt;
1522 struct mbuf *m;
1523 {
1524 int cnt, optlen;
1525 u_char *cp;
1526 u_char opt;
1527
1528 /* turn off any old options */
1529 if (*pcbopt)
1530 (void)m_free(*pcbopt);
1531 *pcbopt = 0;
1532 if (m == (struct mbuf *)0 || m->m_len == 0) {
1533 /*
1534 * Only turning off any previous options.
1535 */
1536 if (m)
1537 (void)m_free(m);
1538 return (0);
1539 }
1540
1541 if (m->m_len % sizeof(int32_t))
1542 goto bad;
1543
1544 /*
1545 * IP first-hop destination address will be stored before
1546 * actual options; move other options back
1547 * and clear it when none present.
1548 */
1549 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1550 goto bad;
1551 cnt = m->m_len;
1552 m->m_len += sizeof(struct in_addr);
1553 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1554 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1555 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1556
1557 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1558 opt = cp[IPOPT_OPTVAL];
1559 if (opt == IPOPT_EOL)
1560 break;
1561 if (opt == IPOPT_NOP)
1562 optlen = 1;
1563 else {
1564 if (cnt < IPOPT_OLEN + sizeof(*cp))
1565 goto bad;
1566 optlen = cp[IPOPT_OLEN];
1567 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1568 goto bad;
1569 }
1570 switch (opt) {
1571
1572 default:
1573 break;
1574
1575 case IPOPT_LSRR:
1576 case IPOPT_SSRR:
1577 /*
1578 * user process specifies route as:
1579 * ->A->B->C->D
1580 * D must be our final destination (but we can't
1581 * check that since we may not have connected yet).
1582 * A is first hop destination, which doesn't appear in
1583 * actual IP option, but is stored before the options.
1584 */
1585 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1586 goto bad;
1587 m->m_len -= sizeof(struct in_addr);
1588 cnt -= sizeof(struct in_addr);
1589 optlen -= sizeof(struct in_addr);
1590 cp[IPOPT_OLEN] = optlen;
1591 /*
1592 * Move first hop before start of options.
1593 */
1594 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1595 sizeof(struct in_addr));
1596 /*
1597 * Then copy rest of options back
1598 * to close up the deleted entry.
1599 */
1600 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1601 sizeof(struct in_addr)),
1602 (caddr_t)&cp[IPOPT_OFFSET+1],
1603 (unsigned)cnt + sizeof(struct in_addr));
1604 break;
1605 }
1606 }
1607 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1608 goto bad;
1609 *pcbopt = m;
1610 return (0);
1611
1612 bad:
1613 (void)m_free(m);
1614 return (EINVAL);
1615 }
1616
1617 /*
1618 * Set the IP multicast options in response to user setsockopt().
1619 */
1620 int
ip_setmoptions(optname,imop,m)1621 ip_setmoptions(optname, imop, m)
1622 int optname;
1623 struct ip_moptions **imop;
1624 struct mbuf *m;
1625 {
1626 int error = 0;
1627 u_char loop;
1628 int i;
1629 struct in_addr addr;
1630 struct ip_mreq *mreq;
1631 struct ifnet *ifp;
1632 struct ip_moptions *imo = *imop;
1633 struct route ro;
1634 struct sockaddr_in *dst;
1635
1636 if (imo == NULL) {
1637 /*
1638 * No multicast option buffer attached to the pcb;
1639 * allocate one and initialize to default values.
1640 */
1641 imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
1642 M_WAITOK);
1643
1644 *imop = imo;
1645 imo->imo_multicast_ifp = NULL;
1646 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1647 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1648 imo->imo_num_memberships = 0;
1649 }
1650
1651 switch (optname) {
1652
1653 case IP_MULTICAST_IF:
1654 /*
1655 * Select the interface for outgoing multicast packets.
1656 */
1657 if (m == NULL || m->m_len != sizeof(struct in_addr)) {
1658 error = EINVAL;
1659 break;
1660 }
1661 addr = *(mtod(m, struct in_addr *));
1662 /*
1663 * INADDR_ANY is used to remove a previous selection.
1664 * When no interface is selected, a default one is
1665 * chosen every time a multicast packet is sent.
1666 */
1667 if (addr.s_addr == INADDR_ANY) {
1668 imo->imo_multicast_ifp = NULL;
1669 break;
1670 }
1671 /*
1672 * The selected interface is identified by its local
1673 * IP address. Find the interface and confirm that
1674 * it supports multicasting.
1675 */
1676 INADDR_TO_IFP(addr, ifp);
1677 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1678 error = EADDRNOTAVAIL;
1679 break;
1680 }
1681 imo->imo_multicast_ifp = ifp;
1682 break;
1683
1684 case IP_MULTICAST_TTL:
1685 /*
1686 * Set the IP time-to-live for outgoing multicast packets.
1687 */
1688 if (m == NULL || m->m_len != 1) {
1689 error = EINVAL;
1690 break;
1691 }
1692 imo->imo_multicast_ttl = *(mtod(m, u_char *));
1693 break;
1694
1695 case IP_MULTICAST_LOOP:
1696 /*
1697 * Set the loopback flag for outgoing multicast packets.
1698 * Must be zero or one.
1699 */
1700 if (m == NULL || m->m_len != 1 ||
1701 (loop = *(mtod(m, u_char *))) > 1) {
1702 error = EINVAL;
1703 break;
1704 }
1705 imo->imo_multicast_loop = loop;
1706 break;
1707
1708 case IP_ADD_MEMBERSHIP:
1709 /*
1710 * Add a multicast group membership.
1711 * Group must be a valid IP multicast address.
1712 */
1713 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1714 error = EINVAL;
1715 break;
1716 }
1717 mreq = mtod(m, struct ip_mreq *);
1718 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1719 error = EINVAL;
1720 break;
1721 }
1722 /*
1723 * If no interface address was provided, use the interface of
1724 * the route to the given multicast address.
1725 */
1726 if (mreq->imr_interface.s_addr == INADDR_ANY) {
1727 ro.ro_rt = NULL;
1728 dst = satosin(&ro.ro_dst);
1729 dst->sin_len = sizeof(*dst);
1730 dst->sin_family = AF_INET;
1731 dst->sin_addr = mreq->imr_multiaddr;
1732 rtalloc(&ro);
1733 if (ro.ro_rt == NULL) {
1734 error = EADDRNOTAVAIL;
1735 break;
1736 }
1737 ifp = ro.ro_rt->rt_ifp;
1738 rtfree(ro.ro_rt);
1739 } else {
1740 INADDR_TO_IFP(mreq->imr_interface, ifp);
1741 }
1742 /*
1743 * See if we found an interface, and confirm that it
1744 * supports multicast.
1745 */
1746 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1747 error = EADDRNOTAVAIL;
1748 break;
1749 }
1750 /*
1751 * See if the membership already exists or if all the
1752 * membership slots are full.
1753 */
1754 for (i = 0; i < imo->imo_num_memberships; ++i) {
1755 if (imo->imo_membership[i]->inm_ifp == ifp &&
1756 imo->imo_membership[i]->inm_addr.s_addr
1757 == mreq->imr_multiaddr.s_addr)
1758 break;
1759 }
1760 if (i < imo->imo_num_memberships) {
1761 error = EADDRINUSE;
1762 break;
1763 }
1764 if (i == IP_MAX_MEMBERSHIPS) {
1765 error = ETOOMANYREFS;
1766 break;
1767 }
1768 /*
1769 * Everything looks good; add a new record to the multicast
1770 * address list for the given interface.
1771 */
1772 if ((imo->imo_membership[i] =
1773 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1774 error = ENOBUFS;
1775 break;
1776 }
1777 ++imo->imo_num_memberships;
1778 break;
1779
1780 case IP_DROP_MEMBERSHIP:
1781 /*
1782 * Drop a multicast group membership.
1783 * Group must be a valid IP multicast address.
1784 */
1785 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1786 error = EINVAL;
1787 break;
1788 }
1789 mreq = mtod(m, struct ip_mreq *);
1790 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1791 error = EINVAL;
1792 break;
1793 }
1794 /*
1795 * If an interface address was specified, get a pointer
1796 * to its ifnet structure.
1797 */
1798 if (mreq->imr_interface.s_addr == INADDR_ANY)
1799 ifp = NULL;
1800 else {
1801 INADDR_TO_IFP(mreq->imr_interface, ifp);
1802 if (ifp == NULL) {
1803 error = EADDRNOTAVAIL;
1804 break;
1805 }
1806 }
1807 /*
1808 * Find the membership in the membership array.
1809 */
1810 for (i = 0; i < imo->imo_num_memberships; ++i) {
1811 if ((ifp == NULL ||
1812 imo->imo_membership[i]->inm_ifp == ifp) &&
1813 imo->imo_membership[i]->inm_addr.s_addr ==
1814 mreq->imr_multiaddr.s_addr)
1815 break;
1816 }
1817 if (i == imo->imo_num_memberships) {
1818 error = EADDRNOTAVAIL;
1819 break;
1820 }
1821 /*
1822 * Give up the multicast address record to which the
1823 * membership points.
1824 */
1825 in_delmulti(imo->imo_membership[i]);
1826 /*
1827 * Remove the gap in the membership array.
1828 */
1829 for (++i; i < imo->imo_num_memberships; ++i)
1830 imo->imo_membership[i-1] = imo->imo_membership[i];
1831 --imo->imo_num_memberships;
1832 break;
1833
1834 default:
1835 error = EOPNOTSUPP;
1836 break;
1837 }
1838
1839 /*
1840 * If all options have default values, no need to keep the mbuf.
1841 */
1842 if (imo->imo_multicast_ifp == NULL &&
1843 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1844 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1845 imo->imo_num_memberships == 0) {
1846 free(*imop, M_IPMOPTS);
1847 *imop = NULL;
1848 }
1849
1850 return (error);
1851 }
1852
1853 /*
1854 * Return the IP multicast options in response to user getsockopt().
1855 */
1856 int
ip_getmoptions(optname,imo,mp)1857 ip_getmoptions(optname, imo, mp)
1858 int optname;
1859 struct ip_moptions *imo;
1860 struct mbuf **mp;
1861 {
1862 u_char *ttl;
1863 u_char *loop;
1864 struct in_addr *addr;
1865 struct in_ifaddr *ia;
1866
1867 *mp = m_get(M_WAIT, MT_SOOPTS);
1868
1869 switch (optname) {
1870
1871 case IP_MULTICAST_IF:
1872 addr = mtod(*mp, struct in_addr *);
1873 (*mp)->m_len = sizeof(struct in_addr);
1874 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1875 addr->s_addr = INADDR_ANY;
1876 else {
1877 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1878 addr->s_addr = (ia == NULL) ? INADDR_ANY
1879 : ia->ia_addr.sin_addr.s_addr;
1880 }
1881 return (0);
1882
1883 case IP_MULTICAST_TTL:
1884 ttl = mtod(*mp, u_char *);
1885 (*mp)->m_len = 1;
1886 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
1887 : imo->imo_multicast_ttl;
1888 return (0);
1889
1890 case IP_MULTICAST_LOOP:
1891 loop = mtod(*mp, u_char *);
1892 (*mp)->m_len = 1;
1893 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
1894 : imo->imo_multicast_loop;
1895 return (0);
1896
1897 default:
1898 return (EOPNOTSUPP);
1899 }
1900 }
1901
1902 /*
1903 * Discard the IP multicast options.
1904 */
1905 void
ip_freemoptions(imo)1906 ip_freemoptions(imo)
1907 struct ip_moptions *imo;
1908 {
1909 int i;
1910
1911 if (imo != NULL) {
1912 for (i = 0; i < imo->imo_num_memberships; ++i)
1913 in_delmulti(imo->imo_membership[i]);
1914 free(imo, M_IPMOPTS);
1915 }
1916 }
1917
1918 /*
1919 * Routine called from ip_output() to loop back a copy of an IP multicast
1920 * packet to the input queue of a specified interface. Note that this
1921 * calls the output routine of the loopback "driver", but with an interface
1922 * pointer that might NOT be &loif -- easier than replicating that code here.
1923 */
1924 static void
ip_mloopback(ifp,m,dst)1925 ip_mloopback(ifp, m, dst)
1926 struct ifnet *ifp;
1927 struct mbuf *m;
1928 struct sockaddr_in *dst;
1929 {
1930 struct ip *ip;
1931 struct mbuf *copym;
1932
1933 copym = m_copym2(m, 0, M_COPYALL, M_DONTWAIT);
1934 if (copym != NULL) {
1935 /*
1936 * We don't bother to fragment if the IP length is greater
1937 * than the interface's MTU. Can this possibly matter?
1938 */
1939 ip = mtod(copym, struct ip *);
1940 ip->ip_sum = 0;
1941 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
1942 (void) looutput(ifp, copym, sintosa(dst), NULL);
1943 }
1944 }
1945
1946 /*
1947 * Process a delayed payload checksum calculation.
1948 */
1949 void
in_delayed_cksum(struct mbuf * m)1950 in_delayed_cksum(struct mbuf *m)
1951 {
1952 struct ip *ip;
1953 u_int16_t csum, offset;
1954
1955 ip = mtod(m, struct ip *);
1956 offset = ip->ip_hl << 2;
1957 csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset);
1958 if (csum == 0 && ip->ip_p == IPPROTO_UDP)
1959 csum = 0xffff;
1960
1961 switch (ip->ip_p) {
1962 case IPPROTO_TCP:
1963 offset += offsetof(struct tcphdr, th_sum);
1964 break;
1965
1966 case IPPROTO_UDP:
1967 offset += offsetof(struct udphdr, uh_sum);
1968 break;
1969
1970 default:
1971 return;
1972 }
1973
1974 if ((offset + sizeof(u_int16_t)) > m->m_len)
1975 m_copyback(m, offset, sizeof(csum), &csum);
1976 else
1977 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum;
1978 }
1979