1 /*        $NetBSD: ipsecif.c,v 1.22 2023/09/01 11:23:39 andvar Exp $  */
2 
3 /*
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: ipsecif.c,v 1.22 2023/09/01 11:23:39 andvar Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #include "opt_ipsec.h"
35 #endif
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/socket.h>
40 #include <sys/sockio.h>
41 #include <sys/mbuf.h>
42 #include <sys/errno.h>
43 #include <sys/ioctl.h>
44 #include <sys/syslog.h>
45 #include <sys/kernel.h>
46 
47 #include <net/if.h>
48 #include <net/route.h>
49 
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_var.h>
54 #include <netinet/in_var.h>
55 #include <netinet/ip_encap.h>
56 #include <netinet/ip_ecn.h>
57 #include <netinet/ip_private.h>
58 #include <netinet/udp.h>
59 
60 #ifdef INET6
61 #include <netinet/ip6.h>
62 #include <netinet6/ip6_var.h>
63 #include <netinet6/ip6_private.h>
64 #include <netinet6/in6_var.h>
65 #include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
66 #include <netinet/ip_ecn.h>
67 #endif
68 
69 #include <netipsec/key.h>
70 #include <netipsec/ipsecif.h>
71 
72 #include <net/if_ipsec.h>
73 
74 static int ipsecif_set_natt_ports(struct ipsec_variant *, struct mbuf *);
75 static void ipsecif4_input(struct mbuf *, int, int, void *);
76 static int ipsecif4_output(struct ipsec_variant *, int, struct mbuf *);
77 static int ipsecif4_filter4(const struct ip *, struct ipsec_variant *,
78           struct ifnet *);
79 
80 #ifdef INET6
81 static int ipsecif6_input(struct mbuf **, int *, int, void *);
82 static int ipsecif6_output(struct ipsec_variant *, int, struct mbuf *);
83 static int ipsecif6_filter6(const struct ip6_hdr *, struct ipsec_variant *,
84           struct ifnet *);
85 #endif
86 
87 static int ip_ipsec_ttl = IPSEC_TTL;
88 static int ip_ipsec_copy_tos = 0;
89 #ifdef INET6
90 int ip6_ipsec_hlim = IPSEC_HLIM;
91 int ip6_ipsec_pmtu = 0;
92 static int ip6_ipsec_copy_tos = 0;
93 #endif
94 
95 static const struct encapsw ipsecif4_encapsw = {
96           .encapsw4 = {
97                     .pr_input = ipsecif4_input,
98                     .pr_ctlinput = NULL,
99           }
100 };
101 
102 #ifdef INET6
103 static const struct encapsw ipsecif6_encapsw;
104 #endif
105 
106 static int
ipsecif_set_natt_ports(struct ipsec_variant * var,struct mbuf * m)107 ipsecif_set_natt_ports(struct ipsec_variant *var, struct mbuf *m)
108 {
109 
110           KASSERT(if_ipsec_heldref_variant(var));
111 
112           if (var->iv_sport || var->iv_dport) {
113                     struct m_tag *mtag;
114 
115                     mtag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
116                         sizeof(uint16_t) + sizeof(uint16_t), M_DONTWAIT);
117                     if (mtag) {
118                               uint16_t *natt_port;
119 
120                               natt_port = (uint16_t *)(mtag + 1);
121                               natt_port[0] = var->iv_dport;
122                               natt_port[1] = var->iv_sport;
123                               m_tag_prepend(m, mtag);
124                     } else {
125                               return ENOBUFS;
126                     }
127           }
128 
129           return 0;
130 }
131 
132 static struct mbuf *
ipsecif4_prepend_hdr(struct ipsec_variant * var,struct mbuf * m,uint8_t proto,uint8_t tos)133 ipsecif4_prepend_hdr(struct ipsec_variant *var, struct mbuf *m,
134     uint8_t proto, uint8_t tos)
135 {
136           struct ip *ip;
137           struct sockaddr_in *src, *dst;
138 
139           src = satosin(var->iv_psrc);
140           dst = satosin(var->iv_pdst);
141 
142           if (in_nullhost(src->sin_addr) || in_nullhost(src->sin_addr) ||
143               src->sin_addr.s_addr == INADDR_BROADCAST ||
144               dst->sin_addr.s_addr == INADDR_BROADCAST) {
145                     m_freem(m);
146                     return NULL;
147           }
148           m->m_flags &= ~M_BCAST;
149 
150           if (IN_MULTICAST(src->sin_addr.s_addr) ||
151               IN_MULTICAST(dst->sin_addr.s_addr)) {
152                     m_freem(m);
153                     return NULL;
154           }
155 
156           M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
157           if (m && M_UNWRITABLE(m, sizeof(struct ip)))
158                     m = m_pullup(m, sizeof(struct ip));
159           if (m == NULL)
160                     return NULL;
161 
162           ip = mtod(m, struct ip *);
163           ip->ip_v = IPVERSION;
164           ip->ip_off = htons(0);
165           if (m->m_pkthdr.len < IP_MINFRAGSIZE)
166                     ip->ip_id = 0;
167           else
168                     ip->ip_id = ip_newid(NULL);
169           ip->ip_hl = sizeof(*ip) >> 2;
170           if (ip_ipsec_copy_tos)
171                     ip->ip_tos = tos;
172           else
173                     ip->ip_tos = 0;
174           ip->ip_sum = 0;
175           ip->ip_src = src->sin_addr;
176           ip->ip_dst = dst->sin_addr;
177           ip->ip_p = proto;
178           ip->ip_ttl = ip_ipsec_ttl;
179           ip->ip_len = htons(m->m_pkthdr.len);
180 #ifndef IPSEC_TX_TOS_CLEAR
181           struct ifnet *ifp = &var->iv_softc->ipsec_if;
182           if (ifp->if_flags & IFF_ECN)
183                     ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
184           else
185                     ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
186 #endif
187 
188           return m;
189 }
190 
191 static int
ipsecif4_needfrag(struct mbuf * m,struct ipsecrequest * isr)192 ipsecif4_needfrag(struct mbuf *m, struct ipsecrequest *isr)
193 {
194           struct ip ip0;
195           struct ip *ip;
196           int mtu;
197           struct secasvar *sav;
198 
199           sav = key_lookup_sa_bysaidx(&isr->saidx);
200           if (sav == NULL)
201                     return 0;
202 
203           if (!(sav->natt_type & UDP_ENCAP_ESPINUDP)) {
204                     mtu = 0;
205                     goto out;
206           }
207 
208           if (m->m_len < sizeof(struct ip)) {
209                     m_copydata(m, 0, sizeof(ip0), &ip0);
210                     ip = &ip0;
211           } else {
212                     ip = mtod(m, struct ip *);
213           }
214           mtu = sav->esp_frag;
215           if (ntohs(ip->ip_len) <= mtu)
216                     mtu = 0;
217 
218 out:
219           KEY_SA_UNREF(&sav);
220           return mtu;
221 }
222 
223 static struct mbuf *
ipsecif4_flowinfo(struct mbuf * m,int family,int * proto0,u_int8_t * tos0)224 ipsecif4_flowinfo(struct mbuf *m, int family, int *proto0, u_int8_t *tos0)
225 {
226           const struct ip *ip;
227           int proto;
228           int tos;
229 
230           KASSERT(proto0 != NULL);
231           KASSERT(tos0 != NULL);
232 
233           switch (family) {
234           case AF_INET:
235                     proto = IPPROTO_IPV4;
236                     if (m->m_len < sizeof(*ip)) {
237                               m = m_pullup(m, sizeof(*ip));
238                               if (m == NULL) {
239                                         *tos0 = 0;
240                                         *proto0 = 0;
241                                         return NULL;
242                               }
243                     }
244                     ip = mtod(m, const struct ip *);
245                     tos = ip->ip_tos;
246                     /* TODO: support ALTQ for inner packet */
247                     break;
248 #ifdef INET6
249           case AF_INET6: {
250                     const struct ip6_hdr *ip6;
251                     proto = IPPROTO_IPV6;
252                     if (m->m_len < sizeof(*ip6)) {
253                               m = m_pullup(m, sizeof(*ip6));
254                               if (m == NULL) {
255                                         *tos0 = 0;
256                                         *proto0 = 0;
257                                         return NULL;
258                               }
259                     }
260                     ip6 = mtod(m, const struct ip6_hdr *);
261                     tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
262                     /* TODO: support ALTQ for inner packet */
263                     break;
264           }
265 #endif /* INET6 */
266           default:
267                     *tos0 = 0;
268                     *proto0 = 0;
269                     return NULL;
270           }
271 
272           *proto0 = proto;
273           *tos0 = tos;
274           return m;
275 }
276 
277 static int
ipsecif4_fragout(struct ipsec_variant * var,int family,struct mbuf * m,int mtu)278 ipsecif4_fragout(struct ipsec_variant *var, int family, struct mbuf *m, int mtu)
279 {
280           struct ifnet *ifp = &var->iv_softc->ipsec_if;
281           struct mbuf *next;
282           struct m_tag *mtag;
283           int error;
284 
285           KASSERT(if_ipsec_heldref_variant(var));
286 
287           mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS);
288           if (mtag)
289                     m_tag_delete(m, mtag);
290 
291           /* consider new IP header prepended in ipsecif4_output() */
292           if (mtu <= sizeof(struct ip)) {
293                     m_freem(m);
294                     return ENETUNREACH;
295           }
296           m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
297           error = ip_fragment(m, ifp, mtu - sizeof(struct ip));
298           if (error)
299                     return error;
300 
301           for (error = 0; m; m = next) {
302                     next = m->m_nextpkt;
303                     m->m_nextpkt = NULL;
304                     if (error) {
305                               m_freem(m);
306                               continue;
307                     }
308 
309                     error = ipsecif4_output(var, family, m);
310           }
311           if (error == 0)
312                     IP_STATINC(IP_STAT_FRAGMENTED);
313 
314           return error;
315 }
316 
317 int
ipsecif4_encap_func(struct mbuf * m,struct ip * ip,struct ipsec_variant * var)318 ipsecif4_encap_func(struct mbuf *m, struct ip *ip, struct ipsec_variant *var)
319 {
320           struct m_tag *mtag;
321           struct sockaddr_in *src, *dst;
322           u_int16_t src_port = 0;
323           u_int16_t dst_port = 0;
324 
325           KASSERT(var != NULL);
326 
327           src = satosin(var->iv_psrc);
328           dst = satosin(var->iv_pdst);
329           mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS);
330           if (mtag) {
331                     u_int16_t *ports;
332 
333                     ports = (u_int16_t *)(mtag + 1);
334                     src_port = ports[0];
335                     dst_port = ports[1];
336           }
337 
338           /* address match */
339           if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
340               dst->sin_addr.s_addr != ip->ip_src.s_addr)
341                     return 0;
342 
343           /* UDP encap? */
344           if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
345                     goto match;
346 
347           /* port match */
348           if (src_port != var->iv_dport ||
349               dst_port != var->iv_sport) {
350 #ifdef DEBUG
351                     printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
352                         __func__, ntohs(src_port), ntohs(dst_port),
353                         ntohs(var->iv_sport), ntohs(var->iv_dport));
354 #endif
355                     return 0;
356           }
357 
358 match:
359           /*
360            * hide NAT-T information from encapsulated traffics.
361            * they don't know about IPsec.
362            */
363           if (mtag)
364                     m_tag_delete(m, mtag);
365           return sizeof(src->sin_addr) + sizeof(dst->sin_addr);
366 }
367 
368 static int
ipsecif4_output(struct ipsec_variant * var,int family,struct mbuf * m)369 ipsecif4_output(struct ipsec_variant *var, int family, struct mbuf *m)
370 {
371           struct secpolicy *sp = NULL;
372           u_int8_t tos;
373           int proto;
374           int error;
375           int mtu;
376           u_long sa_mtu = 0;
377 
378           KASSERT(if_ipsec_heldref_variant(var));
379           KASSERT(if_ipsec_variant_is_configured(var));
380           KASSERT(var->iv_psrc->sa_family == AF_INET);
381           KASSERT(var->iv_pdst->sa_family == AF_INET);
382 
383           switch (family) {
384           case AF_INET:
385                     sp = IV_SP_OUT(var);
386                     break;
387           case AF_INET6:
388                     sp = IV_SP_OUT6(var);
389                     break;
390           default:
391                     m_freem(m);
392                     return EAFNOSUPPORT;
393           }
394           KASSERT(sp != NULL);
395           /*
396            * The SPs in ipsec_variant are prevented from freed by
397            * ipsec_variant->iv_psref. So, KEY_SP_REF() is unnecessary here.
398            *
399            * However, lastused should be updated.
400            */
401           key_sp_touch(sp);
402 
403           KASSERT(sp->policy != IPSEC_POLICY_NONE);
404           KASSERT(sp->policy != IPSEC_POLICY_ENTRUST);
405           KASSERT(sp->policy != IPSEC_POLICY_BYPASS);
406           if (sp->policy != IPSEC_POLICY_IPSEC) {
407                     m_freem(m);
408                     error = ENETUNREACH;
409                     goto done;
410           }
411 
412           /* get flowinfo */
413           m = ipsecif4_flowinfo(m, family, &proto, &tos);
414           if (m == NULL) {
415                     error = ENETUNREACH;
416                     goto done;
417           }
418 
419           /* prepend new IP header */
420           m = ipsecif4_prepend_hdr(var, m, proto, tos);
421           if (m == NULL) {
422                     error = ENETUNREACH;
423                     goto done;
424           }
425 
426           /*
427            * Normal netipsec's NAT-T fragmentation is done in ip_output().
428            * See "natt_frag" processing.
429            * However, ipsec(4) interface's one is not done in the same way,
430            * so we must do NAT-T fragmentation by own code.
431            */
432           /* NAT-T ESP fragmentation */
433           mtu = ipsecif4_needfrag(m, sp->req);
434           if (mtu > 0)
435                     return ipsecif4_fragout(var, family, m, mtu);
436 
437           /* set NAT-T ports */
438           error = ipsecif_set_natt_ports(var, m);
439           if (error) {
440                     m_freem(m);
441                     goto done;
442           }
443 
444           /* IPsec output */
445           IP_STATINC(IP_STAT_LOCALOUT);
446           error = ipsec4_process_packet(m, sp->req, &sa_mtu);
447           if (error == ENOENT)
448                     error = 0;
449           /*
450            * frangmentation is already done in ipsecif4_fragout(),
451            * so ipsec4_process_packet() must not do fragmentation here.
452            */
453           KASSERT(sa_mtu == 0);
454 
455 done:
456           return error;
457 }
458 
459 #ifdef INET6
460 int
ipsecif6_encap_func(struct mbuf * m,struct ip6_hdr * ip6,struct ipsec_variant * var)461 ipsecif6_encap_func(struct mbuf *m, struct ip6_hdr *ip6, struct ipsec_variant *var)
462 {
463           struct m_tag *mtag;
464           struct sockaddr_in6 *src, *dst;
465           u_int16_t src_port = 0;
466           u_int16_t dst_port = 0;
467 
468           KASSERT(var != NULL);
469 
470           src = satosin6(var->iv_psrc);
471           dst = satosin6(var->iv_pdst);
472           mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS);
473           if (mtag) {
474                     u_int16_t *ports;
475 
476                     ports = (u_int16_t *)(mtag + 1);
477                     src_port = ports[0];
478                     dst_port = ports[1];
479           }
480 
481           /* address match */
482           if (!IN6_ARE_ADDR_EQUAL(&src->sin6_addr, &ip6->ip6_dst) ||
483               !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_src))
484                     return 0;
485 
486           /* UDP encap? */
487           if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
488                     goto match;
489 
490           /* port match */
491           if (src_port != var->iv_dport ||
492               dst_port != var->iv_sport) {
493 #ifdef DEBUG
494                     printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
495                         __func__, ntohs(src_port), ntohs(dst_port),
496                         ntohs(var->iv_sport), ntohs(var->iv_dport));
497 #endif
498                     return 0;
499           }
500 
501 match:
502           /*
503            * hide NAT-T information from encapsulated traffics.
504            * they don't know about IPsec.
505            */
506           if (mtag)
507                     m_tag_delete(m, mtag);
508           return sizeof(src->sin6_addr) + sizeof(dst->sin6_addr);
509 }
510 
511 static int
ipsecif6_output(struct ipsec_variant * var,int family,struct mbuf * m)512 ipsecif6_output(struct ipsec_variant *var, int family, struct mbuf *m)
513 {
514           struct ifnet *ifp = &var->iv_softc->ipsec_if;
515           struct ipsec_softc *sc = ifp->if_softc;
516           struct route *ro_pc;
517           kmutex_t *lock_pc;
518           struct rtentry *rt;
519           struct sockaddr_in6 *sin6_src;
520           struct sockaddr_in6 *sin6_dst;
521           struct ip6_hdr *ip6;
522           int proto, error, flags;
523           u_int8_t itos, otos;
524           union {
525                     struct sockaddr               dst;
526                     struct sockaddr_in6 dst6;
527           } u;
528 
529           KASSERT(if_ipsec_heldref_variant(var));
530           KASSERT(if_ipsec_variant_is_configured(var));
531 
532           sin6_src = satosin6(var->iv_psrc);
533           sin6_dst = satosin6(var->iv_pdst);
534 
535           KASSERT(sin6_src->sin6_family == AF_INET6);
536           KASSERT(sin6_dst->sin6_family == AF_INET6);
537 
538           switch (family) {
539 #ifdef INET
540           case AF_INET:
541               {
542                     struct ip *ip;
543 
544                     proto = IPPROTO_IPV4;
545                     if (m->m_len < sizeof(*ip)) {
546                               m = m_pullup(m, sizeof(*ip));
547                               if (m == NULL)
548                                         return ENOBUFS;
549                     }
550                     ip = mtod(m, struct ip *);
551                     itos = ip->ip_tos;
552                     /* TODO: support ALTQ for inner packet */
553                     break;
554               }
555 #endif /* INET */
556           case AF_INET6:
557               {
558                     struct ip6_hdr *xip6;
559                     proto = IPPROTO_IPV6;
560                     if (m->m_len < sizeof(*xip6)) {
561                               m = m_pullup(m, sizeof(*xip6));
562                               if (m == NULL)
563                                         return ENOBUFS;
564                     }
565                     xip6 = mtod(m, struct ip6_hdr *);
566                     itos = (ntohl(xip6->ip6_flow) >> 20) & 0xff;
567                     /* TODO: support ALTQ for inner packet */
568                     break;
569               }
570           default:
571                     m_freem(m);
572                     return EAFNOSUPPORT;
573           }
574 
575           /* prepend new IP header */
576           M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
577           if (m && M_UNWRITABLE(m, sizeof(struct ip6_hdr)))
578                     m = m_pullup(m, sizeof(struct ip6_hdr));
579           if (m == NULL)
580                     return ENOBUFS;
581 
582           ip6 = mtod(m, struct ip6_hdr *);
583           ip6->ip6_flow       = 0;
584           ip6->ip6_vfc        &= ~IPV6_VERSION_MASK;
585           ip6->ip6_vfc        |= IPV6_VERSION;
586 #if 0     /* ip6->ip6_plen will be filled by ip6_output */
587           ip6->ip6_plen       = htons((u_short)m->m_pkthdr.len - sizeof(*ip6));
588 #endif
589           ip6->ip6_nxt        = proto;
590           ip6->ip6_hlim       = ip6_ipsec_hlim;
591           ip6->ip6_src        = sin6_src->sin6_addr;
592           /* bidirectional configured tunnel mode */
593           if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr)) {
594                     ip6->ip6_dst = sin6_dst->sin6_addr;
595           } else  {
596                     m_freem(m);
597                     return ENETUNREACH;
598           }
599 #ifndef IPSEC_TX_TOS_CLEAR
600           if (!ip6_ipsec_copy_tos)
601                     otos = 0;
602 
603           if (ifp->if_flags & IFF_ECN)
604                     ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
605           else
606                     ip_ecn_ingress(ECN_NOCARE, &otos, &itos);
607 #else
608           if (ip6_ipsec_copy_tos)
609                     otos = itos;
610           else
611                     otos = 0;
612 #endif
613           ip6->ip6_flow &= ~ntohl(0xff00000);
614           ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
615 
616           sockaddr_in6_init(&u.dst6, &sin6_dst->sin6_addr, 0, 0, 0);
617 
618           if_tunnel_get_ro(sc->ipsec_ro_percpu, &ro_pc, &lock_pc);
619           if ((rt = rtcache_lookup(ro_pc, &u.dst)) == NULL) {
620                     if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
621                     m_freem(m);
622                     return ENETUNREACH;
623           }
624 
625           if (rt->rt_ifp == ifp) {
626                     rtcache_unref(rt, ro_pc);
627                     rtcache_free(ro_pc);
628                     if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
629                     m_freem(m);
630                     return ENETUNREACH;
631           }
632           rtcache_unref(rt, ro_pc);
633 
634           /* set NAT-T ports */
635           error = ipsecif_set_natt_ports(var, m);
636           if (error) {
637                     m_freem(m);
638                     goto out;
639           }
640 
641           /*
642            * - IPSEC_PMTU_MINMTU
643            *   Force fragmentation to minimum MTU to avoid path MTU discovery
644            * - IPSEC_PMTU_OUTERMTU
645            *   Trust outer MTU is large enough to send all packets
646            *
647            * It is too painful to ask for resend of inner packet, to achieve
648            * path MTU discovery for encapsulated packets.
649            *
650            * See RFC4459.
651            */
652           if (sc->ipsec_pmtu == IPSEC_PMTU_SYSDEFAULT) {
653                     switch (ip6_ipsec_pmtu) {
654                     case IPSEC_PMTU_MINMTU:
655                               flags = IPV6_MINMTU;
656                               break;
657                     case IPSEC_PMTU_OUTERMTU:
658                               flags = 0;
659                               break;
660                     default:
661 #ifdef DEBUG
662                               log(LOG_DEBUG, "%s: ignore unexpected ip6_ipsec_pmtu %d\n",
663                                   __func__, ip6_ipsec_pmtu);
664 #endif
665                               flags = IPV6_MINMTU;
666                               break;
667                     }
668           } else {
669                     switch (sc->ipsec_pmtu) {
670                     case IPSEC_PMTU_MINMTU:
671                               flags = IPV6_MINMTU;
672                               break;
673                     case IPSEC_PMTU_OUTERMTU:
674                               flags = 0;
675                               break;
676                     default:
677 #ifdef DEBUG
678                               log(LOG_DEBUG, "%s: ignore unexpected ipsec_pmtu of %s %d\n",
679                                   __func__, ifp->if_xname, sc->ipsec_pmtu);
680 #endif
681                               flags = IPV6_MINMTU;
682                               break;
683                     }
684           }
685           error = ip6_output(m, 0, ro_pc, flags, 0, NULL, NULL);
686 
687 out:
688           if (error)
689                     rtcache_free(ro_pc);
690           if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
691 
692           return error;
693 }
694 #endif /* INET6 */
695 
696 static void
ipsecif4_input(struct mbuf * m,int off,int proto,void * eparg)697 ipsecif4_input(struct mbuf *m, int off, int proto, void *eparg)
698 {
699           struct ifnet *ipsecp;
700           struct ipsec_softc *sc = eparg;
701           struct ipsec_variant *var;
702           const struct ip *ip;
703           int af;
704 #ifndef IPSEC_TX_TOS_CLEAR
705           u_int8_t otos;
706 #endif
707           struct psref psref_rcvif;
708           struct psref psref_var;
709           struct ifnet *rcvif;
710 
711           KASSERT(sc != NULL);
712 
713           ipsecp = &sc->ipsec_if;
714           if ((ipsecp->if_flags & IFF_UP) == 0) {
715                     m_freem(m);
716                     ip_statinc(IP_STAT_NOIPSEC);
717                     return;
718           }
719 
720           var = if_ipsec_getref_variant(sc, &psref_var);
721           if (if_ipsec_variant_is_unconfigured(var)) {
722                     if_ipsec_putref_variant(var, &psref_var);
723                     m_freem(m);
724                     ip_statinc(IP_STAT_NOIPSEC);
725                     return;
726           }
727 
728           ip = mtod(m, const struct ip *);
729 
730           rcvif = m_get_rcvif_psref(m, &psref_rcvif);
731           if (rcvif == NULL || !ipsecif4_filter4(ip, var, rcvif)) {
732                     m_put_rcvif_psref(rcvif, &psref_rcvif);
733                     if_ipsec_putref_variant(var, &psref_var);
734                     m_freem(m);
735                     ip_statinc(IP_STAT_NOIPSEC);
736                     return;
737           }
738           m_put_rcvif_psref(rcvif, &psref_rcvif);
739           if_ipsec_putref_variant(var, &psref_var);
740 #ifndef IPSEC_TX_TOS_CLEAR
741           otos = ip->ip_tos;
742 #endif
743           m_adj(m, off);
744 
745           switch (proto) {
746           case IPPROTO_IPV4:
747               {
748                     struct ip *xip;
749                     af = AF_INET;
750                     if (M_UNWRITABLE(m, sizeof(*xip))) {
751                               m = m_pullup(m, sizeof(*xip));
752                               if (m == NULL)
753                                         return;
754                     }
755                     xip = mtod(m, struct ip *);
756 #ifndef IPSEC_TX_TOS_CLEAR
757                     if (ipsecp->if_flags & IFF_ECN)
758                               ip_ecn_egress(ECN_ALLOWED, &otos, &xip->ip_tos);
759                     else
760                               ip_ecn_egress(ECN_NOCARE, &otos, &xip->ip_tos);
761 #endif
762                     break;
763               }
764 #ifdef INET6
765           case IPPROTO_IPV6:
766               {
767                     struct ip6_hdr *ip6;
768                     u_int8_t itos;
769                     af = AF_INET6;
770                     if (M_UNWRITABLE(m, sizeof(*ip6))) {
771                               m = m_pullup(m, sizeof(*ip6));
772                               if (m == NULL)
773                                         return;
774                     }
775                     ip6 = mtod(m, struct ip6_hdr *);
776                     itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
777 #ifndef IPSEC_TX_TOS_CLEAR
778                     if (ipsecp->if_flags & IFF_ECN)
779                               ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
780                     else
781                               ip_ecn_egress(ECN_NOCARE, &otos, &itos);
782 #endif
783                     ip6->ip6_flow &= ~htonl(0xff << 20);
784                     ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
785                     break;
786               }
787 #endif /* INET6 */
788           default:
789                     ip_statinc(IP_STAT_NOIPSEC);
790                     m_freem(m);
791                     return;
792           }
793           if_ipsec_input(m, af, ipsecp);
794 
795           return;
796 }
797 
798 /*
799  * validate and filter the packet
800  */
801 static int
ipsecif4_filter4(const struct ip * ip,struct ipsec_variant * var,struct ifnet * ifp)802 ipsecif4_filter4(const struct ip *ip, struct ipsec_variant *var,
803     struct ifnet *ifp)
804 {
805           struct sockaddr_in *src, *dst;
806 
807           src = satosin(var->iv_psrc);
808           dst = satosin(var->iv_pdst);
809 
810           return in_tunnel_validate(ip, src->sin_addr, dst->sin_addr);
811 }
812 
813 #ifdef INET6
814 static int
ipsecif6_input(struct mbuf ** mp,int * offp,int proto,void * eparg)815 ipsecif6_input(struct mbuf **mp, int *offp, int proto, void *eparg)
816 {
817           struct mbuf *m = *mp;
818           struct ifnet *ipsecp;
819           struct ipsec_softc *sc = eparg;
820           struct ipsec_variant *var;
821           struct ip6_hdr *ip6;
822           int af = 0;
823 #ifndef IPSEC_TX_TOS_CLEAR
824           u_int32_t otos;
825 #endif
826           struct psref psref_rcvif;
827           struct psref psref_var;
828           struct ifnet *rcvif;
829 
830           KASSERT(eparg != NULL);
831 
832           ipsecp = &sc->ipsec_if;
833           if ((ipsecp->if_flags & IFF_UP) == 0) {
834                     m_freem(m);
835                     IP6_STATINC(IP6_STAT_NOIPSEC);
836                     return IPPROTO_DONE;
837           }
838 
839           var = if_ipsec_getref_variant(sc, &psref_var);
840           if (if_ipsec_variant_is_unconfigured(var)) {
841                     if_ipsec_putref_variant(var, &psref_var);
842                     m_freem(m);
843                     IP6_STATINC(IP6_STAT_NOIPSEC);
844                     return IPPROTO_DONE;
845           }
846 
847           ip6 = mtod(m, struct ip6_hdr *);
848 
849           rcvif = m_get_rcvif_psref(m, &psref_rcvif);
850           if (rcvif == NULL || !ipsecif6_filter6(ip6, var, rcvif)) {
851                     m_put_rcvif_psref(rcvif, &psref_rcvif);
852                     if_ipsec_putref_variant(var, &psref_var);
853                     m_freem(m);
854                     IP6_STATINC(IP6_STAT_NOIPSEC);
855                     return IPPROTO_DONE;
856           }
857           m_put_rcvif_psref(rcvif, &psref_rcvif);
858           if_ipsec_putref_variant(var, &psref_var);
859 
860 #ifndef IPSEC_TX_TOS_CLEAR
861           otos = ip6->ip6_flow;
862 #endif
863           m_adj(m, *offp);
864 
865           switch (proto) {
866 #ifdef INET
867           case IPPROTO_IPV4:
868               {
869                     af = AF_INET;
870 #ifndef IPSEC_TX_TOS_CLEAR
871                     struct ip *ip;
872                     u_int8_t otos8;
873                     otos8 = (ntohl(otos) >> 20) & 0xff;
874 
875                     if (M_UNWRITABLE(m, sizeof(*ip))) {
876                               m = m_pullup(m, sizeof(*ip));
877                               if (m == NULL)
878                                         return IPPROTO_DONE;
879                     }
880                     ip = mtod(m, struct ip *);
881                     if (ipsecp->if_flags & IFF_ECN)
882                               ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos);
883                     else
884                               ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
885 #endif
886                     break;
887               }
888 #endif /* INET */
889           case IPPROTO_IPV6:
890               {
891                     af = AF_INET6;
892 #ifndef IPSEC_TX_TOS_CLEAR
893                     struct ip6_hdr *xip6;
894 
895                     if (M_UNWRITABLE(m, sizeof(*xip6))) {
896                               m = m_pullup(m, sizeof(*xip6));
897                               if (m == NULL)
898                                         return IPPROTO_DONE;
899                     }
900                     xip6 = mtod(m, struct ip6_hdr *);
901                     if (ipsecp->if_flags & IFF_ECN)
902                               ip6_ecn_egress(ECN_ALLOWED, &otos, &xip6->ip6_flow);
903                     else
904                               ip6_ecn_egress(ECN_NOCARE, &otos, &xip6->ip6_flow);
905                     break;
906 #endif
907               }
908           default:
909                     IP6_STATINC(IP6_STAT_NOIPSEC);
910                     m_freem(m);
911                     return IPPROTO_DONE;
912           }
913 
914           if_ipsec_input(m, af, ipsecp);
915           return IPPROTO_DONE;
916 }
917 
918 /*
919  * validate and filter the packet.
920  */
921 static int
ipsecif6_filter6(const struct ip6_hdr * ip6,struct ipsec_variant * var,struct ifnet * ifp)922 ipsecif6_filter6(const struct ip6_hdr *ip6, struct ipsec_variant *var,
923     struct ifnet *ifp)
924 {
925           struct sockaddr_in6 *src, *dst;
926 
927           src = satosin6(var->iv_psrc);
928           dst = satosin6(var->iv_pdst);
929 
930           return in6_tunnel_validate(ip6, &src->sin6_addr, &dst->sin6_addr);
931 }
932 #endif /* INET6 */
933 
934 int
ipsecif4_attach(struct ipsec_variant * var)935 ipsecif4_attach(struct ipsec_variant *var)
936 {
937           struct ipsec_softc *sc = var->iv_softc;
938 
939           KASSERT(if_ipsec_variant_is_configured(var));
940 
941           if (var->iv_encap_cookie4 != NULL)
942                     return EALREADY;
943 
944           var->iv_encap_cookie4 = encap_attach_addr(AF_INET, -1,
945               var->iv_psrc, var->iv_pdst, if_ipsec_encap_func, &ipsecif4_encapsw,
946               sc);
947           if (var->iv_encap_cookie4 == NULL)
948                     return EEXIST;
949 
950           var->iv_output = ipsecif4_output;
951           return 0;
952 }
953 
954 int
ipsecif4_detach(struct ipsec_variant * var)955 ipsecif4_detach(struct ipsec_variant *var)
956 {
957           int error;
958 
959           if (var->iv_encap_cookie4 == NULL)
960                     return 0;
961 
962           var->iv_output = NULL;
963           error = encap_detach(var->iv_encap_cookie4);
964           if (error == 0)
965                     var->iv_encap_cookie4 = NULL;
966 
967           return error;
968 }
969 
970 #ifdef INET6
971 int
ipsecif6_attach(struct ipsec_variant * var)972 ipsecif6_attach(struct ipsec_variant *var)
973 {
974           struct ipsec_softc *sc = var->iv_softc;
975 
976           KASSERT(if_ipsec_variant_is_configured(var));
977           KASSERT(var->iv_encap_cookie6 == NULL);
978 
979           var->iv_encap_cookie6 = encap_attach_addr(AF_INET6, -1,
980               var->iv_psrc, var->iv_pdst, if_ipsec_encap_func, &ipsecif6_encapsw,
981               sc);
982           if (var->iv_encap_cookie6 == NULL)
983                     return EEXIST;
984 
985           var->iv_output = ipsecif6_output;
986           return 0;
987 }
988 
989 int
ipsecif6_detach(struct ipsec_variant * var)990 ipsecif6_detach(struct ipsec_variant *var)
991 {
992           struct ipsec_softc *sc = var->iv_softc;
993           int error;
994 
995           KASSERT(var->iv_encap_cookie6 != NULL);
996 
997           if_tunnel_ro_percpu_rtcache_free(sc->ipsec_ro_percpu);
998 
999           var->iv_output = NULL;
1000           error = encap_detach(var->iv_encap_cookie6);
1001           if (error == 0)
1002                     var->iv_encap_cookie6 = NULL;
1003           return error;
1004 }
1005 
1006 void *
ipsecif6_ctlinput(int cmd,const struct sockaddr * sa,void * d,void * eparg)1007 ipsecif6_ctlinput(int cmd, const struct sockaddr *sa, void *d, void *eparg)
1008 {
1009           struct ipsec_softc *sc = eparg;
1010           struct ip6ctlparam *ip6cp = NULL;
1011           struct ip6_hdr *ip6;
1012           const struct sockaddr_in6 *dst6;
1013           struct route *ro_pc;
1014           kmutex_t *lock_pc;
1015 
1016           if (sa->sa_family != AF_INET6 ||
1017               sa->sa_len != sizeof(struct sockaddr_in6))
1018                     return NULL;
1019 
1020           if ((unsigned)cmd >= PRC_NCMDS)
1021                     return NULL;
1022           if (cmd == PRC_HOSTDEAD)
1023                     d = NULL;
1024           else if (inet6ctlerrmap[cmd] == 0)
1025                     return NULL;
1026 
1027           /* if the parameter is from icmp6, decode it. */
1028           if (d != NULL) {
1029                     ip6cp = (struct ip6ctlparam *)d;
1030                     ip6 = ip6cp->ip6c_ip6;
1031           } else {
1032                     ip6 = NULL;
1033           }
1034 
1035           if (!ip6)
1036                     return NULL;
1037 
1038           if_tunnel_get_ro(sc->ipsec_ro_percpu, &ro_pc, &lock_pc);
1039           dst6 = satocsin6(rtcache_getdst(ro_pc));
1040           /* XXX scope */
1041           if (dst6 == NULL)
1042                     ;
1043           else if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst6->sin6_addr))
1044                     /* flush route cache */
1045                     rtcache_free(ro_pc);
1046 
1047           if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
1048 
1049           return NULL;
1050 }
1051 
1052 ENCAP_PR_WRAP_CTLINPUT(ipsecif6_ctlinput)
1053 #define   ipsecif6_ctlinput   ipsecif6_ctlinput_wrapper
1054 
1055 static const struct encapsw ipsecif6_encapsw = {
1056           .encapsw6 = {
1057                     .pr_input = ipsecif6_input,
1058                     .pr_ctlinput = ipsecif6_ctlinput,
1059           }
1060 };
1061 #endif /* INET6 */
1062