1 /* $FreeBSD: stable/9/sys/contrib/ipfilter/netinet/ip_nat.c 296924 2016-03-16 02:01:17Z cy $ */
2
3 /*
4 * Copyright (C) 1995-2003 by Darren Reed.
5 *
6 * See the IPFILTER.LICENCE file for details on licencing.
7 */
8 #if defined(KERNEL) || defined(_KERNEL)
9 # undef KERNEL
10 # undef _KERNEL
11 # define KERNEL 1
12 # define _KERNEL 1
13 #endif
14 #include <sys/errno.h>
15 #include <sys/types.h>
16 #include <sys/param.h>
17 #include <sys/time.h>
18 #include <sys/file.h>
19 #if defined(_KERNEL) && defined(__NetBSD_Version__) && \
20 (__NetBSD_Version__ >= 399002000)
21 # include <sys/kauth.h>
22 #endif
23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
24 defined(_KERNEL)
25 #if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 399001400)
26 # include "opt_ipfilter_log.h"
27 # else
28 # include "opt_ipfilter.h"
29 # endif
30 #endif
31 #if !defined(_KERNEL)
32 # include <stdio.h>
33 # include <string.h>
34 # include <stdlib.h>
35 # define _KERNEL
36 # ifdef __OpenBSD__
37 struct file;
38 # endif
39 # include <sys/uio.h>
40 # undef _KERNEL
41 #endif
42 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
43 # include <sys/filio.h>
44 # include <sys/fcntl.h>
45 #else
46 # include <sys/ioctl.h>
47 #endif
48 #if !defined(AIX)
49 # include <sys/fcntl.h>
50 #endif
51 #if !defined(linux)
52 # include <sys/protosw.h>
53 #endif
54 #include <sys/socket.h>
55 #if defined(_KERNEL)
56 # include <sys/systm.h>
57 # if !defined(__SVR4) && !defined(__svr4__)
58 # include <sys/mbuf.h>
59 # endif
60 #endif
61 #if defined(__SVR4) || defined(__svr4__)
62 # include <sys/filio.h>
63 # include <sys/byteorder.h>
64 # ifdef _KERNEL
65 # include <sys/dditypes.h>
66 # endif
67 # include <sys/stream.h>
68 # include <sys/kmem.h>
69 #endif
70 #if __FreeBSD_version >= 300000
71 # include <sys/queue.h>
72 #endif
73 #include <net/if.h>
74 #if __FreeBSD_version >= 300000
75 # include <net/if_var.h>
76 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
77 # include "opt_ipfilter.h"
78 # endif
79 #endif
80 #ifdef sun
81 # include <net/af.h>
82 #endif
83 #include <net/route.h>
84 #include <netinet/in.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/ip.h>
87
88 #ifdef RFC1825
89 # include <vpn/md5.h>
90 # include <vpn/ipsec.h>
91 extern struct ifnet vpnif;
92 #endif
93
94 #if !defined(linux)
95 # include <netinet/ip_var.h>
96 #endif
97 #include <netinet/tcp.h>
98 #include <netinet/udp.h>
99 #include <netinet/ip_icmp.h>
100 #include "netinet/ip_compat.h"
101 #include <netinet/tcpip.h>
102 #include "netinet/ip_fil.h"
103 #include "netinet/ip_nat.h"
104 #include "netinet/ip_frag.h"
105 #include "netinet/ip_state.h"
106 #include "netinet/ip_proxy.h"
107 #ifdef IPFILTER_SYNC
108 #include "netinet/ip_sync.h"
109 #endif
110 #if (__FreeBSD_version >= 300000)
111 # include <sys/malloc.h>
112 #endif
113 /* END OF INCLUDES */
114
115 #undef SOCKADDR_IN
116 #define SOCKADDR_IN struct sockaddr_in
117
118 #if !defined(lint)
119 static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed";
120 static const char rcsid[] = "@(#)$FreeBSD: stable/9/sys/contrib/ipfilter/netinet/ip_nat.c 296924 2016-03-16 02:01:17Z cy $";
121 /* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.102 2007/10/16 10:08:10 darrenr Exp $"; */
122 #endif
123
124
125 /* ======================================================================== */
126 /* How the NAT is organised and works. */
127 /* */
128 /* Inside (interface y) NAT Outside (interface x) */
129 /* -------------------- -+- ------------------------------------- */
130 /* Packet going | out, processsed by fr_checknatout() for x */
131 /* ------------> | ------------> */
132 /* src=10.1.1.1 | src=192.1.1.1 */
133 /* | */
134 /* | in, processed by fr_checknatin() for x */
135 /* <------------ | <------------ */
136 /* dst=10.1.1.1 | dst=192.1.1.1 */
137 /* -------------------- -+- ------------------------------------- */
138 /* fr_checknatout() - changes ip_src and if required, sport */
139 /* - creates a new mapping, if required. */
140 /* fr_checknatin() - changes ip_dst and if required, dport */
141 /* */
142 /* In the NAT table, internal source is recorded as "in" and externally */
143 /* seen as "out". */
144 /* ======================================================================== */
145
146
147 nat_t **nat_table[2] = { NULL, NULL },
148 *nat_instances = NULL;
149 ipnat_t *nat_list = NULL;
150 u_int ipf_nattable_max = NAT_TABLE_MAX;
151 u_int ipf_nattable_sz = NAT_TABLE_SZ;
152 u_int ipf_natrules_sz = NAT_SIZE;
153 u_int ipf_rdrrules_sz = RDR_SIZE;
154 u_int ipf_hostmap_sz = HOSTMAP_SIZE;
155 u_int fr_nat_maxbucket = 0,
156 fr_nat_maxbucket_reset = 1;
157 u_32_t nat_masks = 0;
158 u_32_t rdr_masks = 0;
159 u_long nat_last_force_flush = 0;
160 ipnat_t **nat_rules = NULL;
161 ipnat_t **rdr_rules = NULL;
162 hostmap_t **ipf_hm_maptable = NULL;
163 hostmap_t *ipf_hm_maplist = NULL;
164 ipftq_t nat_tqb[IPF_TCP_NSTATES];
165 ipftq_t nat_udptq;
166 ipftq_t nat_icmptq;
167 ipftq_t nat_iptq;
168 ipftq_t *nat_utqe = NULL;
169 int fr_nat_doflush = 0;
170 #ifdef IPFILTER_LOG
171 int nat_logging = 1;
172 #else
173 int nat_logging = 0;
174 #endif
175
176 u_long fr_defnatage = DEF_NAT_AGE,
177 fr_defnatipage = 120, /* 60 seconds */
178 fr_defnaticmpage = 6; /* 3 seconds */
179 natstat_t nat_stats;
180 int fr_nat_lock = 0;
181 int fr_nat_init = 0;
182 #if SOLARIS && !defined(_INET_IP_STACK_H)
183 extern int pfil_delayed_copy;
184 #endif
185
186 static int nat_flush_entry __P((void *));
187 static int nat_flushtable __P((void));
188 static int nat_clearlist __P((void));
189 static void nat_addnat __P((struct ipnat *));
190 static void nat_addrdr __P((struct ipnat *));
191 static void nat_delrdr __P((struct ipnat *));
192 static void nat_delnat __P((struct ipnat *));
193 static int fr_natgetent __P((caddr_t, int));
194 static int fr_natgetsz __P((caddr_t, int));
195 static int fr_natputent __P((caddr_t, int));
196 static int nat_extraflush __P((int));
197 static int nat_gettable __P((char *));
198 static void nat_tabmove __P((nat_t *));
199 static int nat_match __P((fr_info_t *, ipnat_t *));
200 static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
201 static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
202 static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
203 struct in_addr, struct in_addr, u_32_t));
204 static int nat_icmpquerytype4 __P((int));
205 static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int));
206 static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int));
207 static int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
208 tcphdr_t *, nat_t **, int));
209 static int nat_resolverule __P((ipnat_t *));
210 static nat_t *fr_natclone __P((fr_info_t *, nat_t *));
211 static void nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
212 static int nat_wildok __P((nat_t *, int, int, int, int));
213 static int nat_getnext __P((ipftoken_t *, ipfgeniter_t *));
214 static int nat_iterator __P((ipftoken_t *, ipfgeniter_t *));
215
216
217 /* ------------------------------------------------------------------------ */
218 /* Function: fr_natinit */
219 /* Returns: int - 0 == success, -1 == failure */
220 /* Parameters: Nil */
221 /* */
222 /* Initialise all of the NAT locks, tables and other structures. */
223 /* ------------------------------------------------------------------------ */
fr_natinit()224 int fr_natinit()
225 {
226 int i;
227
228 KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
229 if (nat_table[0] != NULL)
230 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
231 else
232 return -1;
233
234 KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
235 if (nat_table[1] != NULL)
236 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
237 else
238 return -2;
239
240 KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
241 if (nat_rules != NULL)
242 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
243 else
244 return -3;
245
246 KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
247 if (rdr_rules != NULL)
248 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
249 else
250 return -4;
251
252 KMALLOCS(ipf_hm_maptable, hostmap_t **, \
253 sizeof(hostmap_t *) * ipf_hostmap_sz);
254 if (ipf_hm_maptable != NULL)
255 bzero((char *)ipf_hm_maptable,
256 sizeof(hostmap_t *) * ipf_hostmap_sz);
257 else
258 return -5;
259 ipf_hm_maplist = NULL;
260
261 KMALLOCS(nat_stats.ns_bucketlen[0], u_long *,
262 ipf_nattable_sz * sizeof(u_long));
263 if (nat_stats.ns_bucketlen[0] == NULL)
264 return -6;
265 bzero((char *)nat_stats.ns_bucketlen[0],
266 ipf_nattable_sz * sizeof(u_long));
267
268 KMALLOCS(nat_stats.ns_bucketlen[1], u_long *,
269 ipf_nattable_sz * sizeof(u_long));
270 if (nat_stats.ns_bucketlen[1] == NULL)
271 return -7;
272
273 bzero((char *)nat_stats.ns_bucketlen[1],
274 ipf_nattable_sz * sizeof(u_long));
275
276 if (fr_nat_maxbucket == 0) {
277 for (i = ipf_nattable_sz; i > 0; i >>= 1)
278 fr_nat_maxbucket++;
279 fr_nat_maxbucket *= 2;
280 }
281
282 fr_sttab_init(nat_tqb);
283 /*
284 * Increase this because we may have "keep state" following this too
285 * and packet storms can occur if this is removed too quickly.
286 */
287 nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack;
288 nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq;
289 nat_udptq.ifq_ttl = fr_defnatage;
290 nat_udptq.ifq_ref = 1;
291 nat_udptq.ifq_head = NULL;
292 nat_udptq.ifq_tail = &nat_udptq.ifq_head;
293 MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab");
294 nat_udptq.ifq_next = &nat_icmptq;
295 nat_icmptq.ifq_ttl = fr_defnaticmpage;
296 nat_icmptq.ifq_ref = 1;
297 nat_icmptq.ifq_head = NULL;
298 nat_icmptq.ifq_tail = &nat_icmptq.ifq_head;
299 MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab");
300 nat_icmptq.ifq_next = &nat_iptq;
301 nat_iptq.ifq_ttl = fr_defnatipage;
302 nat_iptq.ifq_ref = 1;
303 nat_iptq.ifq_head = NULL;
304 nat_iptq.ifq_tail = &nat_iptq.ifq_head;
305 MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab");
306 nat_iptq.ifq_next = NULL;
307
308 for (i = 0; i < IPF_TCP_NSTATES; i++) {
309 if (nat_tqb[i].ifq_ttl < fr_defnaticmpage)
310 nat_tqb[i].ifq_ttl = fr_defnaticmpage;
311 #ifdef LARGE_NAT
312 else if (nat_tqb[i].ifq_ttl > fr_defnatage)
313 nat_tqb[i].ifq_ttl = fr_defnatage;
314 #endif
315 }
316
317 /*
318 * Increase this because we may have "keep state" following
319 * this too and packet storms can occur if this is removed
320 * too quickly.
321 */
322 nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
323
324 RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock");
325 RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock");
326 MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex");
327 MUTEX_INIT(&ipf_natio, "ipf nat io mutex");
328
329 fr_nat_init = 1;
330
331 return 0;
332 }
333
334
335 /* ------------------------------------------------------------------------ */
336 /* Function: nat_addrdr */
337 /* Returns: Nil */
338 /* Parameters: n(I) - pointer to NAT rule to add */
339 /* */
340 /* Adds a redirect rule to the hash table of redirect rules and the list of */
341 /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */
342 /* use by redirect rules. */
343 /* ------------------------------------------------------------------------ */
nat_addrdr(n)344 static void nat_addrdr(n)
345 ipnat_t *n;
346 {
347 ipnat_t **np;
348 u_32_t j;
349 u_int hv;
350 int k;
351
352 k = count4bits(n->in_outmsk);
353 if ((k >= 0) && (k != 32))
354 rdr_masks |= 1 << k;
355 j = (n->in_outip & n->in_outmsk);
356 hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
357 np = rdr_rules + hv;
358 while (*np != NULL)
359 np = &(*np)->in_rnext;
360 n->in_rnext = NULL;
361 n->in_prnext = np;
362 n->in_hv = hv;
363 *np = n;
364 }
365
366
367 /* ------------------------------------------------------------------------ */
368 /* Function: nat_addnat */
369 /* Returns: Nil */
370 /* Parameters: n(I) - pointer to NAT rule to add */
371 /* */
372 /* Adds a NAT map rule to the hash table of rules and the list of loaded */
373 /* NAT rules. Updates the bitmask indicating which netmasks are in use by */
374 /* redirect rules. */
375 /* ------------------------------------------------------------------------ */
nat_addnat(n)376 static void nat_addnat(n)
377 ipnat_t *n;
378 {
379 ipnat_t **np;
380 u_32_t j;
381 u_int hv;
382 int k;
383
384 k = count4bits(n->in_inmsk);
385 if ((k >= 0) && (k != 32))
386 nat_masks |= 1 << k;
387 j = (n->in_inip & n->in_inmsk);
388 hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
389 np = nat_rules + hv;
390 while (*np != NULL)
391 np = &(*np)->in_mnext;
392 n->in_mnext = NULL;
393 n->in_pmnext = np;
394 n->in_hv = hv;
395 *np = n;
396 }
397
398
399 /* ------------------------------------------------------------------------ */
400 /* Function: nat_delrdr */
401 /* Returns: Nil */
402 /* Parameters: n(I) - pointer to NAT rule to delete */
403 /* */
404 /* Removes a redirect rule from the hash table of redirect rules. */
405 /* ------------------------------------------------------------------------ */
nat_delrdr(n)406 static void nat_delrdr(n)
407 ipnat_t *n;
408 {
409 if (n->in_rnext)
410 n->in_rnext->in_prnext = n->in_prnext;
411 *n->in_prnext = n->in_rnext;
412 }
413
414
415 /* ------------------------------------------------------------------------ */
416 /* Function: nat_delnat */
417 /* Returns: Nil */
418 /* Parameters: n(I) - pointer to NAT rule to delete */
419 /* */
420 /* Removes a NAT map rule from the hash table of NAT map rules. */
421 /* ------------------------------------------------------------------------ */
nat_delnat(n)422 static void nat_delnat(n)
423 ipnat_t *n;
424 {
425 if (n->in_mnext != NULL)
426 n->in_mnext->in_pmnext = n->in_pmnext;
427 *n->in_pmnext = n->in_mnext;
428 }
429
430
431 /* ------------------------------------------------------------------------ */
432 /* Function: nat_hostmap */
433 /* Returns: struct hostmap* - NULL if no hostmap could be created, */
434 /* else a pointer to the hostmapping to use */
435 /* Parameters: np(I) - pointer to NAT rule */
436 /* real(I) - real IP address */
437 /* map(I) - mapped IP address */
438 /* port(I) - destination port number */
439 /* Write Locks: ipf_nat */
440 /* */
441 /* Check if an ip address has already been allocated for a given mapping */
442 /* that is not doing port based translation. If is not yet allocated, then */
443 /* create a new entry if a non-NULL NAT rule pointer has been supplied. */
444 /* ------------------------------------------------------------------------ */
nat_hostmap(np,src,dst,map,port)445 static struct hostmap *nat_hostmap(np, src, dst, map, port)
446 ipnat_t *np;
447 struct in_addr src;
448 struct in_addr dst;
449 struct in_addr map;
450 u_32_t port;
451 {
452 hostmap_t *hm;
453 u_int hv;
454
455 hv = (src.s_addr ^ dst.s_addr);
456 hv += src.s_addr;
457 hv += dst.s_addr;
458 hv %= HOSTMAP_SIZE;
459 for (hm = ipf_hm_maptable[hv]; hm; hm = hm->hm_next)
460 if ((hm->hm_srcip.s_addr == src.s_addr) &&
461 (hm->hm_dstip.s_addr == dst.s_addr) &&
462 ((np == NULL) || (np == hm->hm_ipnat)) &&
463 ((port == 0) || (port == hm->hm_port))) {
464 hm->hm_ref++;
465 return hm;
466 }
467
468 if (np == NULL)
469 return NULL;
470
471 KMALLOC(hm, hostmap_t *);
472 if (hm) {
473 hm->hm_next = ipf_hm_maplist;
474 hm->hm_pnext = &ipf_hm_maplist;
475 if (ipf_hm_maplist != NULL)
476 ipf_hm_maplist->hm_pnext = &hm->hm_next;
477 ipf_hm_maplist = hm;
478 hm->hm_hnext = ipf_hm_maptable[hv];
479 hm->hm_phnext = ipf_hm_maptable + hv;
480 if (ipf_hm_maptable[hv] != NULL)
481 ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext;
482 ipf_hm_maptable[hv] = hm;
483 hm->hm_ipnat = np;
484 hm->hm_srcip = src;
485 hm->hm_dstip = dst;
486 hm->hm_mapip = map;
487 hm->hm_ref = 1;
488 hm->hm_port = port;
489 }
490 return hm;
491 }
492
493
494 /* ------------------------------------------------------------------------ */
495 /* Function: fr_hostmapdel */
496 /* Returns: Nil */
497 /* Parameters: hmp(I) - pointer to hostmap structure pointer */
498 /* Write Locks: ipf_nat */
499 /* */
500 /* Decrement the references to this hostmap structure by one. If this */
501 /* reaches zero then remove it and free it. */
502 /* ------------------------------------------------------------------------ */
fr_hostmapdel(hmp)503 void fr_hostmapdel(hmp)
504 struct hostmap **hmp;
505 {
506 struct hostmap *hm;
507
508 hm = *hmp;
509 *hmp = NULL;
510
511 hm->hm_ref--;
512 if (hm->hm_ref == 0) {
513 if (hm->hm_hnext)
514 hm->hm_hnext->hm_phnext = hm->hm_phnext;
515 *hm->hm_phnext = hm->hm_hnext;
516 if (hm->hm_next)
517 hm->hm_next->hm_pnext = hm->hm_pnext;
518 *hm->hm_pnext = hm->hm_next;
519 KFREE(hm);
520 }
521 }
522
523
524 /* ------------------------------------------------------------------------ */
525 /* Function: fix_outcksum */
526 /* Returns: Nil */
527 /* Parameters: fin(I) - pointer to packet information */
528 /* sp(I) - location of 16bit checksum to update */
529 /* n((I) - amount to adjust checksum by */
530 /* */
531 /* Adjusts the 16bit checksum by "n" for packets going out. */
532 /* ------------------------------------------------------------------------ */
fix_outcksum(fin,sp,n)533 void fix_outcksum(fin, sp, n)
534 fr_info_t *fin;
535 u_short *sp;
536 u_32_t n;
537 {
538 u_short sumshort;
539 u_32_t sum1;
540
541 if (n == 0)
542 return;
543
544 if (n & NAT_HW_CKSUM) {
545 n &= 0xffff;
546 n += fin->fin_dlen;
547 n = (n & 0xffff) + (n >> 16);
548 *sp = n & 0xffff;
549 return;
550 }
551 sum1 = (~ntohs(*sp)) & 0xffff;
552 sum1 += (n);
553 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
554 /* Again */
555 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
556 sumshort = ~(u_short)sum1;
557 *(sp) = htons(sumshort);
558 }
559
560
561 /* ------------------------------------------------------------------------ */
562 /* Function: fix_incksum */
563 /* Returns: Nil */
564 /* Parameters: fin(I) - pointer to packet information */
565 /* sp(I) - location of 16bit checksum to update */
566 /* n((I) - amount to adjust checksum by */
567 /* */
568 /* Adjusts the 16bit checksum by "n" for packets going in. */
569 /* ------------------------------------------------------------------------ */
fix_incksum(fin,sp,n)570 void fix_incksum(fin, sp, n)
571 fr_info_t *fin;
572 u_short *sp;
573 u_32_t n;
574 {
575 u_short sumshort;
576 u_32_t sum1;
577
578 if (n == 0)
579 return;
580
581 if (n & NAT_HW_CKSUM) {
582 n &= 0xffff;
583 n += fin->fin_dlen;
584 n = (n & 0xffff) + (n >> 16);
585 *sp = n & 0xffff;
586 return;
587 }
588 sum1 = (~ntohs(*sp)) & 0xffff;
589 sum1 += ~(n) & 0xffff;
590 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
591 /* Again */
592 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
593 sumshort = ~(u_short)sum1;
594 *(sp) = htons(sumshort);
595 }
596
597
598 /* ------------------------------------------------------------------------ */
599 /* Function: fix_datacksum */
600 /* Returns: Nil */
601 /* Parameters: sp(I) - location of 16bit checksum to update */
602 /* n((I) - amount to adjust checksum by */
603 /* */
604 /* Fix_datacksum is used *only* for the adjustments of checksums in the */
605 /* data section of an IP packet. */
606 /* */
607 /* The only situation in which you need to do this is when NAT'ing an */
608 /* ICMP error message. Such a message, contains in its body the IP header */
609 /* of the original IP packet, that causes the error. */
610 /* */
611 /* You can't use fix_incksum or fix_outcksum in that case, because for the */
612 /* kernel the data section of the ICMP error is just data, and no special */
613 /* processing like hardware cksum or ntohs processing have been done by the */
614 /* kernel on the data section. */
615 /* ------------------------------------------------------------------------ */
fix_datacksum(sp,n)616 void fix_datacksum(sp, n)
617 u_short *sp;
618 u_32_t n;
619 {
620 u_short sumshort;
621 u_32_t sum1;
622
623 if (n == 0)
624 return;
625
626 sum1 = (~ntohs(*sp)) & 0xffff;
627 sum1 += (n);
628 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
629 /* Again */
630 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
631 sumshort = ~(u_short)sum1;
632 *(sp) = htons(sumshort);
633 }
634
635
636 /* ------------------------------------------------------------------------ */
637 /* Function: fr_nat_ioctl */
638 /* Returns: int - 0 == success, != 0 == failure */
639 /* Parameters: data(I) - pointer to ioctl data */
640 /* cmd(I) - ioctl command integer */
641 /* mode(I) - file mode bits used with open */
642 /* */
643 /* Processes an ioctl call made to operate on the IP Filter NAT device. */
644 /* ------------------------------------------------------------------------ */
fr_nat_ioctl(data,cmd,mode,uid,ctx)645 int fr_nat_ioctl(data, cmd, mode, uid, ctx)
646 ioctlcmd_t cmd;
647 caddr_t data;
648 int mode, uid;
649 void *ctx;
650 {
651 ipnat_t *nat, *nt, *n = NULL, **np = NULL;
652 int error = 0, ret, arg, getlock;
653 ipnat_t natd;
654 SPL_INT(s);
655
656 #if (BSD >= 199306) && defined(_KERNEL)
657 # if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 399002000)
658 if ((mode & FWRITE) &&
659 kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL,
660 KAUTH_REQ_NETWORK_FIREWALL_FW,
661 NULL, NULL, NULL)) {
662 return EPERM;
663 }
664 # else
665 # if defined(__FreeBSD_version) && (__FreeBSD_version >= 500034)
666 if (securelevel_ge(curthread->td_ucred, 3) && (mode & FWRITE)) {
667 # else
668 if ((securelevel >= 3) && (mode & FWRITE)) {
669 # endif
670 return EPERM;
671 }
672 # endif
673 #endif
674
675 #if defined(__osf__) && defined(_KERNEL)
676 getlock = 0;
677 #else
678 getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
679 #endif
680
681 nat = NULL; /* XXX gcc -Wuninitialized */
682 if (cmd == (ioctlcmd_t)SIOCADNAT) {
683 KMALLOC(nt, ipnat_t *);
684 } else {
685 nt = NULL;
686 }
687
688 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
689 if (mode & NAT_SYSSPACE) {
690 bcopy(data, (char *)&natd, sizeof(natd));
691 error = 0;
692 } else {
693 error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
694 }
695 }
696
697 if (error != 0)
698 goto done;
699
700 /*
701 * For add/delete, look to see if the NAT entry is already present
702 */
703 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
704 nat = &natd;
705 if (nat->in_v == 0) /* For backward compat. */
706 nat->in_v = 4;
707 nat->in_flags &= IPN_USERFLAGS;
708 if ((nat->in_redir & NAT_MAPBLK) == 0) {
709 if ((nat->in_flags & IPN_SPLIT) == 0)
710 nat->in_inip &= nat->in_inmsk;
711 if ((nat->in_flags & IPN_IPRANGE) == 0)
712 nat->in_outip &= nat->in_outmsk;
713 }
714 MUTEX_ENTER(&ipf_natio);
715 for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next)
716 if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
717 IPN_CMPSIZ) == 0) {
718 if (nat->in_redir == NAT_REDIRECT &&
719 nat->in_pnext != n->in_pnext)
720 continue;
721 break;
722 }
723 }
724
725 switch (cmd)
726 {
727 #ifdef IPFILTER_LOG
728 case SIOCIPFFB :
729 {
730 int tmp;
731
732 if (!(mode & FWRITE))
733 error = EPERM;
734 else {
735 tmp = ipflog_clear(IPL_LOGNAT);
736 error = BCOPYOUT((char *)&tmp, (char *)data,
737 sizeof(tmp));
738 if (error != 0)
739 error = EFAULT;
740 }
741 break;
742 }
743
744 case SIOCSETLG :
745 if (!(mode & FWRITE))
746 error = EPERM;
747 else {
748 error = BCOPYIN((char *)data, (char *)&nat_logging,
749 sizeof(nat_logging));
750 if (error != 0)
751 error = EFAULT;
752 }
753 break;
754
755 case SIOCGETLG :
756 error = BCOPYOUT((char *)&nat_logging, (char *)data,
757 sizeof(nat_logging));
758 if (error != 0)
759 error = EFAULT;
760 break;
761
762 case FIONREAD :
763 arg = iplused[IPL_LOGNAT];
764 error = BCOPYOUT(&arg, data, sizeof(arg));
765 if (error != 0)
766 error = EFAULT;
767 break;
768 #endif
769 case SIOCADNAT :
770 if (!(mode & FWRITE)) {
771 error = EPERM;
772 } else if (n != NULL) {
773 error = EEXIST;
774 } else if (nt == NULL) {
775 error = ENOMEM;
776 }
777 if (error != 0) {
778 MUTEX_EXIT(&ipf_natio);
779 break;
780 }
781 bcopy((char *)nat, (char *)nt, sizeof(*n));
782 error = nat_siocaddnat(nt, np, getlock);
783 MUTEX_EXIT(&ipf_natio);
784 if (error == 0)
785 nt = NULL;
786 break;
787
788 case SIOCRMNAT :
789 if (!(mode & FWRITE)) {
790 error = EPERM;
791 n = NULL;
792 } else if (n == NULL) {
793 error = ESRCH;
794 }
795
796 if (error != 0) {
797 MUTEX_EXIT(&ipf_natio);
798 break;
799 }
800 nat_siocdelnat(n, np, getlock);
801
802 MUTEX_EXIT(&ipf_natio);
803 n = NULL;
804 break;
805
806 case SIOCGNATS :
807 nat_stats.ns_table[0] = nat_table[0];
808 nat_stats.ns_table[1] = nat_table[1];
809 nat_stats.ns_list = nat_list;
810 nat_stats.ns_maptable = ipf_hm_maptable;
811 nat_stats.ns_maplist = ipf_hm_maplist;
812 nat_stats.ns_nattab_sz = ipf_nattable_sz;
813 nat_stats.ns_nattab_max = ipf_nattable_max;
814 nat_stats.ns_rultab_sz = ipf_natrules_sz;
815 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
816 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
817 nat_stats.ns_instances = nat_instances;
818 nat_stats.ns_apslist = ap_sess_list;
819 nat_stats.ns_ticks = fr_ticks;
820 error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT);
821 break;
822
823 case SIOCGNATL :
824 {
825 natlookup_t nl;
826
827 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
828 if (error == 0) {
829 void *ptr;
830
831 if (getlock) {
832 READ_ENTER(&ipf_nat);
833 }
834 ptr = nat_lookupredir(&nl);
835 if (getlock) {
836 RWLOCK_EXIT(&ipf_nat);
837 }
838 if (ptr != NULL) {
839 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
840 } else {
841 error = ESRCH;
842 }
843 }
844 break;
845 }
846
847 case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */
848 if (!(mode & FWRITE)) {
849 error = EPERM;
850 break;
851 }
852 if (getlock) {
853 WRITE_ENTER(&ipf_nat);
854 }
855
856 error = BCOPYIN(data, &arg, sizeof(arg));
857 if (error != 0)
858 error = EFAULT;
859 else {
860 if (arg == 0)
861 ret = nat_flushtable();
862 else if (arg == 1)
863 ret = nat_clearlist();
864 else
865 ret = nat_extraflush(arg);
866 }
867
868 if (getlock) {
869 RWLOCK_EXIT(&ipf_nat);
870 }
871 if (error == 0) {
872 error = BCOPYOUT(&ret, data, sizeof(ret));
873 }
874 break;
875
876 case SIOCPROXY :
877 error = appr_ioctl(data, cmd, mode, ctx);
878 break;
879
880 case SIOCSTLCK :
881 if (!(mode & FWRITE)) {
882 error = EPERM;
883 } else {
884 error = fr_lock(data, &fr_nat_lock);
885 }
886 break;
887
888 case SIOCSTPUT :
889 if ((mode & FWRITE) != 0) {
890 error = fr_natputent(data, getlock);
891 } else {
892 error = EACCES;
893 }
894 break;
895
896 case SIOCSTGSZ :
897 if (fr_nat_lock) {
898 error = fr_natgetsz(data, getlock);
899 } else
900 error = EACCES;
901 break;
902
903 case SIOCSTGET :
904 if (fr_nat_lock) {
905 error = fr_natgetent(data, getlock);
906 } else
907 error = EACCES;
908 break;
909
910 case SIOCGENITER :
911 {
912 ipfgeniter_t iter;
913 ipftoken_t *token;
914
915 SPL_SCHED(s);
916 error = fr_inobj(data, &iter, IPFOBJ_GENITER);
917 if (error == 0) {
918 token = ipf_findtoken(iter.igi_type, uid, ctx);
919 if (token != NULL) {
920 error = nat_iterator(token, &iter);
921 }
922 RWLOCK_EXIT(&ipf_tokens);
923 }
924 SPL_X(s);
925 break;
926 }
927
928 case SIOCIPFDELTOK :
929 error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
930 if (error == 0) {
931 SPL_SCHED(s);
932 error = ipf_deltoken(arg, uid, ctx);
933 SPL_X(s);
934 } else {
935 error = EFAULT;
936 }
937 break;
938
939 case SIOCGTQTAB :
940 error = fr_outobj(data, nat_tqb, IPFOBJ_STATETQTAB);
941 break;
942
943 case SIOCGTABL :
944 error = nat_gettable(data);
945 break;
946
947 default :
948 error = EINVAL;
949 break;
950 }
951 done:
952 if (nt != NULL)
953 KFREE(nt);
954 return error;
955 }
956
957
958 /* ------------------------------------------------------------------------ */
959 /* Function: nat_siocaddnat */
960 /* Returns: int - 0 == success, != 0 == failure */
961 /* Parameters: n(I) - pointer to new NAT rule */
962 /* np(I) - pointer to where to insert new NAT rule */
963 /* getlock(I) - flag indicating if lock on ipf_nat is held */
964 /* Mutex Locks: ipf_natio */
965 /* */
966 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
967 /* from information passed to the kernel, then add it to the appropriate */
968 /* NAT rule table(s). */
969 /* ------------------------------------------------------------------------ */
nat_siocaddnat(n,np,getlock)970 static int nat_siocaddnat(n, np, getlock)
971 ipnat_t *n, **np;
972 int getlock;
973 {
974 int error = 0, i, j;
975
976 if (nat_resolverule(n) != 0)
977 return ENOENT;
978
979 if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
980 return EINVAL;
981
982 n->in_use = 0;
983 if (n->in_redir & NAT_MAPBLK)
984 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
985 else if (n->in_flags & IPN_AUTOPORTMAP)
986 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
987 else if (n->in_flags & IPN_IPRANGE)
988 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
989 else if (n->in_flags & IPN_SPLIT)
990 n->in_space = 2;
991 else if (n->in_outmsk != 0)
992 n->in_space = ~ntohl(n->in_outmsk);
993 else
994 n->in_space = 1;
995
996 /*
997 * Calculate the number of valid IP addresses in the output
998 * mapping range. In all cases, the range is inclusive of
999 * the start and ending IP addresses.
1000 * If to a CIDR address, lose 2: broadcast + network address
1001 * (so subtract 1)
1002 * If to a range, add one.
1003 * If to a single IP address, set to 1.
1004 */
1005 if (n->in_space) {
1006 if ((n->in_flags & IPN_IPRANGE) != 0)
1007 n->in_space += 1;
1008 else
1009 n->in_space -= 1;
1010 } else
1011 n->in_space = 1;
1012
1013 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
1014 ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
1015 n->in_nip = ntohl(n->in_outip) + 1;
1016 else if ((n->in_flags & IPN_SPLIT) &&
1017 (n->in_redir & NAT_REDIRECT))
1018 n->in_nip = ntohl(n->in_inip);
1019 else
1020 n->in_nip = ntohl(n->in_outip);
1021 if (n->in_redir & NAT_MAP) {
1022 n->in_pnext = ntohs(n->in_pmin);
1023 /*
1024 * Multiply by the number of ports made available.
1025 */
1026 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
1027 n->in_space *= (ntohs(n->in_pmax) -
1028 ntohs(n->in_pmin) + 1);
1029 /*
1030 * Because two different sources can map to
1031 * different destinations but use the same
1032 * local IP#/port #.
1033 * If the result is smaller than in_space, then
1034 * we may have wrapped around 32bits.
1035 */
1036 i = n->in_inmsk;
1037 if ((i != 0) && (i != 0xffffffff)) {
1038 j = n->in_space * (~ntohl(i) + 1);
1039 if (j >= n->in_space)
1040 n->in_space = j;
1041 else
1042 n->in_space = 0xffffffff;
1043 }
1044 }
1045 /*
1046 * If no protocol is specified, multiple by 256 to allow for
1047 * at least one IP:IP mapping per protocol.
1048 */
1049 if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1050 j = n->in_space * 256;
1051 if (j >= n->in_space)
1052 n->in_space = j;
1053 else
1054 n->in_space = 0xffffffff;
1055 }
1056 }
1057
1058 /* Otherwise, these fields are preset */
1059
1060 if (getlock) {
1061 WRITE_ENTER(&ipf_nat);
1062 }
1063 n->in_next = NULL;
1064 *np = n;
1065
1066 if (n->in_age[0] != 0)
1067 n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]);
1068
1069 if (n->in_age[1] != 0)
1070 n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]);
1071
1072 if (n->in_redir & NAT_REDIRECT) {
1073 n->in_flags &= ~IPN_NOTDST;
1074 nat_addrdr(n);
1075 }
1076 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1077 n->in_flags &= ~IPN_NOTSRC;
1078 nat_addnat(n);
1079 }
1080 MUTEX_INIT(&n->in_lock, "ipnat rule lock");
1081
1082 n = NULL;
1083 nat_stats.ns_rules++;
1084 #if SOLARIS && !defined(_INET_IP_STACK_H)
1085 pfil_delayed_copy = 0;
1086 #endif
1087 if (getlock) {
1088 RWLOCK_EXIT(&ipf_nat); /* WRITE */
1089 }
1090
1091 return error;
1092 }
1093
1094
1095 /* ------------------------------------------------------------------------ */
1096 /* Function: nat_resolvrule */
1097 /* Returns: Nil */
1098 /* Parameters: n(I) - pointer to NAT rule */
1099 /* */
1100 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
1101 /* from information passed to the kernel, then add it to the appropriate */
1102 /* NAT rule table(s). */
1103 /* ------------------------------------------------------------------------ */
nat_resolverule(n)1104 static int nat_resolverule(n)
1105 ipnat_t *n;
1106 {
1107 n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1108 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
1109
1110 n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1111 if (n->in_ifnames[1][0] == '\0') {
1112 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1113 n->in_ifps[1] = n->in_ifps[0];
1114 } else {
1115 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
1116 }
1117
1118 if (n->in_plabel[0] != '\0') {
1119 n->in_apr = appr_lookup(n->in_p, n->in_plabel);
1120 if (n->in_apr == NULL)
1121 return -1;
1122 }
1123 return 0;
1124 }
1125
1126
1127 /* ------------------------------------------------------------------------ */
1128 /* Function: nat_siocdelnat */
1129 /* Returns: int - 0 == success, != 0 == failure */
1130 /* Parameters: n(I) - pointer to new NAT rule */
1131 /* np(I) - pointer to where to insert new NAT rule */
1132 /* getlock(I) - flag indicating if lock on ipf_nat is held */
1133 /* Mutex Locks: ipf_natio */
1134 /* */
1135 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
1136 /* from information passed to the kernel, then add it to the appropriate */
1137 /* NAT rule table(s). */
1138 /* ------------------------------------------------------------------------ */
nat_siocdelnat(n,np,getlock)1139 static void nat_siocdelnat(n, np, getlock)
1140 ipnat_t *n, **np;
1141 int getlock;
1142 {
1143 if (getlock) {
1144 WRITE_ENTER(&ipf_nat);
1145 }
1146 if (n->in_redir & NAT_REDIRECT)
1147 nat_delrdr(n);
1148 if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1149 nat_delnat(n);
1150 if (nat_list == NULL) {
1151 nat_masks = 0;
1152 rdr_masks = 0;
1153 }
1154
1155 if (n->in_tqehead[0] != NULL) {
1156 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1157 fr_freetimeoutqueue(n->in_tqehead[1]);
1158 }
1159 }
1160
1161 if (n->in_tqehead[1] != NULL) {
1162 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1163 fr_freetimeoutqueue(n->in_tqehead[1]);
1164 }
1165 }
1166
1167 *np = n->in_next;
1168
1169 if (n->in_use == 0) {
1170 if (n->in_apr)
1171 appr_free(n->in_apr);
1172 MUTEX_DESTROY(&n->in_lock);
1173 KFREE(n);
1174 nat_stats.ns_rules--;
1175 #if SOLARIS && !defined(_INET_IP_STACK_H)
1176 if (nat_stats.ns_rules == 0)
1177 pfil_delayed_copy = 1;
1178 #endif
1179 } else {
1180 n->in_flags |= IPN_DELETE;
1181 n->in_next = NULL;
1182 }
1183 if (getlock) {
1184 RWLOCK_EXIT(&ipf_nat); /* READ/WRITE */
1185 }
1186 }
1187
1188
1189 /* ------------------------------------------------------------------------ */
1190 /* Function: fr_natgetsz */
1191 /* Returns: int - 0 == success, != 0 is the error value. */
1192 /* Parameters: data(I) - pointer to natget structure with kernel pointer */
1193 /* get the size of. */
1194 /* */
1195 /* Handle SIOCSTGSZ. */
1196 /* Return the size of the nat list entry to be copied back to user space. */
1197 /* The size of the entry is stored in the ng_sz field and the enture natget */
1198 /* structure is copied back to the user. */
1199 /* ------------------------------------------------------------------------ */
fr_natgetsz(data,getlock)1200 static int fr_natgetsz(data, getlock)
1201 caddr_t data;
1202 int getlock;
1203 {
1204 ap_session_t *aps;
1205 nat_t *nat, *n;
1206 natget_t ng;
1207
1208 if (BCOPYIN(data, &ng, sizeof(ng)) != 0)
1209 return EFAULT;
1210
1211 if (getlock) {
1212 READ_ENTER(&ipf_nat);
1213 }
1214
1215 nat = ng.ng_ptr;
1216 if (!nat) {
1217 nat = nat_instances;
1218 ng.ng_sz = 0;
1219 /*
1220 * Empty list so the size returned is 0. Simple.
1221 */
1222 if (nat == NULL) {
1223 if (getlock) {
1224 RWLOCK_EXIT(&ipf_nat);
1225 }
1226 if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1227 return EFAULT;
1228 return 0;
1229 }
1230 } else {
1231 /*
1232 * Make sure the pointer we're copying from exists in the
1233 * current list of entries. Security precaution to prevent
1234 * copying of random kernel data.
1235 */
1236 for (n = nat_instances; n; n = n->nat_next)
1237 if (n == nat)
1238 break;
1239 if (n == NULL) {
1240 if (getlock) {
1241 RWLOCK_EXIT(&ipf_nat);
1242 }
1243 return ESRCH;
1244 }
1245 }
1246
1247 /*
1248 * Incluse any space required for proxy data structures.
1249 */
1250 ng.ng_sz = sizeof(nat_save_t);
1251 aps = nat->nat_aps;
1252 if (aps != NULL) {
1253 ng.ng_sz += sizeof(ap_session_t) - 4;
1254 if (aps->aps_data != 0)
1255 ng.ng_sz += aps->aps_psiz;
1256 }
1257 if (getlock) {
1258 RWLOCK_EXIT(&ipf_nat);
1259 }
1260
1261 if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1262 return EFAULT;
1263 return 0;
1264 }
1265
1266
1267 /* ------------------------------------------------------------------------ */
1268 /* Function: fr_natgetent */
1269 /* Returns: int - 0 == success, != 0 is the error value. */
1270 /* Parameters: data(I) - pointer to natget structure with kernel pointer */
1271 /* to NAT structure to copy out. */
1272 /* */
1273 /* Handle SIOCSTGET. */
1274 /* Copies out NAT entry to user space. Any additional data held for a */
1275 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1276 /* ------------------------------------------------------------------------ */
fr_natgetent(data,getlock)1277 static int fr_natgetent(data, getlock)
1278 caddr_t data;
1279 int getlock;
1280 {
1281 int error, outsize;
1282 ap_session_t *aps;
1283 nat_save_t *ipn, ipns;
1284 nat_t *n, *nat;
1285
1286 error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1287 if (error != 0)
1288 return error;
1289
1290 if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1291 return EINVAL;
1292
1293 KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1294 if (ipn == NULL)
1295 return ENOMEM;
1296
1297 if (getlock) {
1298 READ_ENTER(&ipf_nat);
1299 }
1300
1301 ipn->ipn_dsize = ipns.ipn_dsize;
1302 nat = ipns.ipn_next;
1303 if (nat == NULL) {
1304 nat = nat_instances;
1305 if (nat == NULL) {
1306 if (nat_instances == NULL)
1307 error = ENOENT;
1308 goto finished;
1309 }
1310 } else {
1311 /*
1312 * Make sure the pointer we're copying from exists in the
1313 * current list of entries. Security precaution to prevent
1314 * copying of random kernel data.
1315 */
1316 for (n = nat_instances; n; n = n->nat_next)
1317 if (n == nat)
1318 break;
1319 if (n == NULL) {
1320 error = ESRCH;
1321 goto finished;
1322 }
1323 }
1324 ipn->ipn_next = nat->nat_next;
1325
1326 /*
1327 * Copy the NAT structure.
1328 */
1329 bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1330
1331 /*
1332 * If we have a pointer to the NAT rule it belongs to, save that too.
1333 */
1334 if (nat->nat_ptr != NULL)
1335 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1336 sizeof(ipn->ipn_ipnat));
1337
1338 /*
1339 * If we also know the NAT entry has an associated filter rule,
1340 * save that too.
1341 */
1342 if (nat->nat_fr != NULL)
1343 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1344 sizeof(ipn->ipn_fr));
1345
1346 /*
1347 * Last but not least, if there is an application proxy session set
1348 * up for this NAT entry, then copy that out too, including any
1349 * private data saved along side it by the proxy.
1350 */
1351 aps = nat->nat_aps;
1352 outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1353 if (aps != NULL) {
1354 char *s;
1355
1356 if (outsize < sizeof(*aps)) {
1357 error = ENOBUFS;
1358 goto finished;
1359 }
1360
1361 s = ipn->ipn_data;
1362 bcopy((char *)aps, s, sizeof(*aps));
1363 s += sizeof(*aps);
1364 outsize -= sizeof(*aps);
1365 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1366 bcopy(aps->aps_data, s, aps->aps_psiz);
1367 else
1368 error = ENOBUFS;
1369 }
1370 if (error == 0) {
1371 if (getlock) {
1372 RWLOCK_EXIT(&ipf_nat);
1373 getlock = 0;
1374 }
1375 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1376 }
1377
1378 finished:
1379 if (getlock) {
1380 RWLOCK_EXIT(&ipf_nat);
1381 }
1382 if (ipn != NULL) {
1383 KFREES(ipn, ipns.ipn_dsize);
1384 }
1385 return error;
1386 }
1387
1388
1389 /* ------------------------------------------------------------------------ */
1390 /* Function: fr_natputent */
1391 /* Returns: int - 0 == success, != 0 is the error value. */
1392 /* Parameters: data(I) - pointer to natget structure with NAT */
1393 /* structure information to load into the kernel */
1394 /* getlock(I) - flag indicating whether or not a write lock */
1395 /* on ipf_nat is already held. */
1396 /* */
1397 /* Handle SIOCSTPUT. */
1398 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1399 /* firewall rule data structures, if pointers to them indicate so. */
1400 /* ------------------------------------------------------------------------ */
fr_natputent(data,getlock)1401 static int fr_natputent(data, getlock)
1402 caddr_t data;
1403 int getlock;
1404 {
1405 nat_save_t ipn, *ipnn;
1406 ap_session_t *aps;
1407 nat_t *n, *nat;
1408 frentry_t *fr;
1409 fr_info_t fin;
1410 ipnat_t *in;
1411 int error;
1412
1413 error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1414 if (error != 0)
1415 return error;
1416
1417 /*
1418 * Initialise early because of code at junkput label.
1419 */
1420 in = NULL;
1421 aps = NULL;
1422 nat = NULL;
1423 ipnn = NULL;
1424 fr = NULL;
1425
1426 /*
1427 * New entry, copy in the rest of the NAT entry if it's size is more
1428 * than just the nat_t structure.
1429 */
1430 if (ipn.ipn_dsize > sizeof(ipn)) {
1431 if (ipn.ipn_dsize > 81920) {
1432 error = ENOMEM;
1433 goto junkput;
1434 }
1435
1436 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1437 if (ipnn == NULL)
1438 return ENOMEM;
1439
1440 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1441 if (error != 0) {
1442 error = EFAULT;
1443 goto junkput;
1444 }
1445 } else
1446 ipnn = &ipn;
1447
1448 KMALLOC(nat, nat_t *);
1449 if (nat == NULL) {
1450 error = ENOMEM;
1451 goto junkput;
1452 }
1453
1454 bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1455 /*
1456 * Initialize all these so that nat_delete() doesn't cause a crash.
1457 */
1458 bzero((char *)nat, offsetof(struct nat, nat_tqe));
1459 nat->nat_tqe.tqe_pnext = NULL;
1460 nat->nat_tqe.tqe_next = NULL;
1461 nat->nat_tqe.tqe_ifq = NULL;
1462 nat->nat_tqe.tqe_parent = nat;
1463
1464 /*
1465 * Restore the rule associated with this nat session
1466 */
1467 in = ipnn->ipn_nat.nat_ptr;
1468 if (in != NULL) {
1469 KMALLOC(in, ipnat_t *);
1470 nat->nat_ptr = in;
1471 if (in == NULL) {
1472 error = ENOMEM;
1473 goto junkput;
1474 }
1475 bzero((char *)in, offsetof(struct ipnat, in_next6));
1476 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1477 in->in_use = 1;
1478 in->in_flags |= IPN_DELETE;
1479
1480 ATOMIC_INC(nat_stats.ns_rules);
1481
1482 if (nat_resolverule(in) != 0) {
1483 error = ESRCH;
1484 goto junkput;
1485 }
1486 }
1487
1488 /*
1489 * Check that the NAT entry doesn't already exist in the kernel.
1490 *
1491 * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry. To do
1492 * this, we check to see if the inbound combination of addresses and
1493 * ports is already known. Similar logic is applied for NAT_INBOUND.
1494 *
1495 */
1496 bzero((char *)&fin, sizeof(fin));
1497 fin.fin_p = nat->nat_p;
1498 if (nat->nat_dir == NAT_OUTBOUND) {
1499 fin.fin_ifp = nat->nat_ifps[0];
1500 fin.fin_data[0] = ntohs(nat->nat_oport);
1501 fin.fin_data[1] = ntohs(nat->nat_outport);
1502 if (getlock) {
1503 READ_ENTER(&ipf_nat);
1504 }
1505 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1506 nat->nat_oip, nat->nat_inip);
1507 if (getlock) {
1508 RWLOCK_EXIT(&ipf_nat);
1509 }
1510 if (n != NULL) {
1511 error = EEXIST;
1512 goto junkput;
1513 }
1514 } else if (nat->nat_dir == NAT_INBOUND) {
1515 fin.fin_ifp = nat->nat_ifps[0];
1516 fin.fin_data[0] = ntohs(nat->nat_outport);
1517 fin.fin_data[1] = ntohs(nat->nat_oport);
1518 if (getlock) {
1519 READ_ENTER(&ipf_nat);
1520 }
1521 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1522 nat->nat_outip, nat->nat_oip);
1523 if (getlock) {
1524 RWLOCK_EXIT(&ipf_nat);
1525 }
1526 if (n != NULL) {
1527 error = EEXIST;
1528 goto junkput;
1529 }
1530 } else {
1531 error = EINVAL;
1532 goto junkput;
1533 }
1534
1535 /*
1536 * Restore ap_session_t structure. Include the private data allocated
1537 * if it was there.
1538 */
1539 aps = nat->nat_aps;
1540 if (aps != NULL) {
1541 KMALLOC(aps, ap_session_t *);
1542 nat->nat_aps = aps;
1543 if (aps == NULL) {
1544 error = ENOMEM;
1545 goto junkput;
1546 }
1547 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1548 if (in != NULL)
1549 aps->aps_apr = in->in_apr;
1550 else
1551 aps->aps_apr = NULL;
1552 if (aps->aps_psiz != 0) {
1553 if (aps->aps_psiz > 81920) {
1554 error = ENOMEM;
1555 goto junkput;
1556 }
1557 KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1558 if (aps->aps_data == NULL) {
1559 error = ENOMEM;
1560 goto junkput;
1561 }
1562 bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1563 aps->aps_psiz);
1564 } else {
1565 aps->aps_psiz = 0;
1566 aps->aps_data = NULL;
1567 }
1568 }
1569
1570 /*
1571 * If there was a filtering rule associated with this entry then
1572 * build up a new one.
1573 */
1574 fr = nat->nat_fr;
1575 if (fr != NULL) {
1576 if ((nat->nat_flags & SI_NEWFR) != 0) {
1577 KMALLOC(fr, frentry_t *);
1578 nat->nat_fr = fr;
1579 if (fr == NULL) {
1580 error = ENOMEM;
1581 goto junkput;
1582 }
1583 ipnn->ipn_nat.nat_fr = fr;
1584 fr->fr_ref = 1;
1585 (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1586 bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1587
1588 fr->fr_ref = 1;
1589 fr->fr_dsize = 0;
1590 fr->fr_data = NULL;
1591 fr->fr_type = FR_T_NONE;
1592
1593 MUTEX_NUKE(&fr->fr_lock);
1594 MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1595 } else {
1596 if (getlock) {
1597 READ_ENTER(&ipf_nat);
1598 }
1599 for (n = nat_instances; n; n = n->nat_next)
1600 if (n->nat_fr == fr)
1601 break;
1602
1603 if (n != NULL) {
1604 MUTEX_ENTER(&fr->fr_lock);
1605 fr->fr_ref++;
1606 MUTEX_EXIT(&fr->fr_lock);
1607 }
1608 if (getlock) {
1609 RWLOCK_EXIT(&ipf_nat);
1610 }
1611
1612 if (!n) {
1613 error = ESRCH;
1614 goto junkput;
1615 }
1616 }
1617 }
1618
1619 if (ipnn != &ipn) {
1620 KFREES(ipnn, ipn.ipn_dsize);
1621 ipnn = NULL;
1622 }
1623
1624 if (getlock) {
1625 WRITE_ENTER(&ipf_nat);
1626 }
1627 error = nat_insert(nat, nat->nat_rev);
1628 if ((error == 0) && (aps != NULL)) {
1629 aps->aps_next = ap_sess_list;
1630 ap_sess_list = aps;
1631 }
1632 if (getlock) {
1633 RWLOCK_EXIT(&ipf_nat);
1634 }
1635
1636 if (error == 0)
1637 return 0;
1638
1639 error = ENOMEM;
1640
1641 junkput:
1642 if (fr != NULL)
1643 (void) fr_derefrule(&fr);
1644
1645 if ((ipnn != NULL) && (ipnn != &ipn)) {
1646 KFREES(ipnn, ipn.ipn_dsize);
1647 }
1648 if (nat != NULL) {
1649 if (aps != NULL) {
1650 if (aps->aps_data != NULL) {
1651 KFREES(aps->aps_data, aps->aps_psiz);
1652 }
1653 KFREE(aps);
1654 }
1655 if (in != NULL) {
1656 if (in->in_apr)
1657 appr_free(in->in_apr);
1658 KFREE(in);
1659 }
1660 KFREE(nat);
1661 }
1662 return error;
1663 }
1664
1665
1666 /* ------------------------------------------------------------------------ */
1667 /* Function: nat_delete */
1668 /* Returns: Nil */
1669 /* Parameters: natd(I) - pointer to NAT structure to delete */
1670 /* logtype(I) - type of LOG record to create before deleting */
1671 /* Write Lock: ipf_nat */
1672 /* */
1673 /* Delete a nat entry from the various lists and table. If NAT logging is */
1674 /* enabled then generate a NAT log record for this event. */
1675 /* ------------------------------------------------------------------------ */
nat_delete(nat,logtype)1676 void nat_delete(nat, logtype)
1677 struct nat *nat;
1678 int logtype;
1679 {
1680 struct ipnat *ipn;
1681 int removed = 0;
1682
1683 if (logtype != 0 && nat_logging != 0)
1684 nat_log(nat, logtype);
1685 #if defined(NEED_LOCAL_RAND) && defined(_KERNEL)
1686 ipf_rand_push(nat, sizeof(*nat));
1687 #endif
1688
1689 /*
1690 * Take it as a general indication that all the pointers are set if
1691 * nat_pnext is set.
1692 */
1693 if (nat->nat_pnext != NULL) {
1694 removed = 1;
1695
1696 nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1697 nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1698
1699 *nat->nat_pnext = nat->nat_next;
1700 if (nat->nat_next != NULL) {
1701 nat->nat_next->nat_pnext = nat->nat_pnext;
1702 nat->nat_next = NULL;
1703 }
1704 nat->nat_pnext = NULL;
1705
1706 *nat->nat_phnext[0] = nat->nat_hnext[0];
1707 if (nat->nat_hnext[0] != NULL) {
1708 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1709 nat->nat_hnext[0] = NULL;
1710 }
1711 nat->nat_phnext[0] = NULL;
1712
1713 *nat->nat_phnext[1] = nat->nat_hnext[1];
1714 if (nat->nat_hnext[1] != NULL) {
1715 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1716 nat->nat_hnext[1] = NULL;
1717 }
1718 nat->nat_phnext[1] = NULL;
1719
1720 if ((nat->nat_flags & SI_WILDP) != 0)
1721 nat_stats.ns_wilds--;
1722 }
1723
1724 if (nat->nat_me != NULL) {
1725 *nat->nat_me = NULL;
1726 nat->nat_me = NULL;
1727 }
1728
1729 if (nat->nat_tqe.tqe_ifq != NULL)
1730 fr_deletequeueentry(&nat->nat_tqe);
1731
1732 if (logtype == NL_EXPIRE)
1733 nat_stats.ns_expire++;
1734
1735 MUTEX_ENTER(&nat->nat_lock);
1736 /*
1737 * NL_DESTROY should only be passed in when we've got nat_ref >= 2.
1738 * This happens when a nat'd packet is blocked and we want to throw
1739 * away the NAT session.
1740 */
1741 if (logtype == NL_DESTROY) {
1742 if (nat->nat_ref > 2) {
1743 nat->nat_ref -= 2;
1744 MUTEX_EXIT(&nat->nat_lock);
1745 if (removed)
1746 nat_stats.ns_orphans++;
1747 return;
1748 }
1749 } else if (nat->nat_ref > 1) {
1750 nat->nat_ref--;
1751 MUTEX_EXIT(&nat->nat_lock);
1752 if (removed)
1753 nat_stats.ns_orphans++;
1754 return;
1755 }
1756 MUTEX_EXIT(&nat->nat_lock);
1757
1758 /*
1759 * At this point, nat_ref is 1, doing "--" would make it 0..
1760 */
1761 nat->nat_ref = 0;
1762 if (!removed)
1763 nat_stats.ns_orphans--;
1764
1765 #ifdef IPFILTER_SYNC
1766 if (nat->nat_sync)
1767 ipfsync_del(nat->nat_sync);
1768 #endif
1769
1770 if (nat->nat_fr != NULL)
1771 (void) fr_derefrule(&nat->nat_fr);
1772
1773 if (nat->nat_hm != NULL)
1774 fr_hostmapdel(&nat->nat_hm);
1775
1776 /*
1777 * If there is an active reference from the nat entry to its parent
1778 * rule, decrement the rule's reference count and free it too if no
1779 * longer being used.
1780 */
1781 ipn = nat->nat_ptr;
1782 if (ipn != NULL) {
1783 fr_ipnatderef(&ipn);
1784 }
1785
1786 MUTEX_DESTROY(&nat->nat_lock);
1787
1788 aps_free(nat->nat_aps);
1789 nat_stats.ns_inuse--;
1790
1791 /*
1792 * If there's a fragment table entry too for this nat entry, then
1793 * dereference that as well. This is after nat_lock is released
1794 * because of Tru64.
1795 */
1796 fr_forgetnat((void *)nat);
1797
1798 KFREE(nat);
1799 }
1800
1801
1802 /* ------------------------------------------------------------------------ */
1803 /* Function: nat_flushtable */
1804 /* Returns: int - number of NAT rules deleted */
1805 /* Parameters: Nil */
1806 /* */
1807 /* Deletes all currently active NAT sessions. In deleting each NAT entry a */
1808 /* log record should be emitted in nat_delete() if NAT logging is enabled. */
1809 /* ------------------------------------------------------------------------ */
1810 /*
1811 * nat_flushtable - clear the NAT table of all mapping entries.
1812 */
nat_flushtable()1813 static int nat_flushtable()
1814 {
1815 nat_t *nat;
1816 int j = 0;
1817
1818 /*
1819 * ALL NAT mappings deleted, so lets just make the deletions
1820 * quicker.
1821 */
1822 if (nat_table[0] != NULL)
1823 bzero((char *)nat_table[0],
1824 sizeof(nat_table[0]) * ipf_nattable_sz);
1825 if (nat_table[1] != NULL)
1826 bzero((char *)nat_table[1],
1827 sizeof(nat_table[1]) * ipf_nattable_sz);
1828
1829 while ((nat = nat_instances) != NULL) {
1830 nat_delete(nat, NL_FLUSH);
1831 j++;
1832 }
1833
1834 nat_stats.ns_inuse = 0;
1835 return j;
1836 }
1837
1838
1839 /* ------------------------------------------------------------------------ */
1840 /* Function: nat_clearlist */
1841 /* Returns: int - number of NAT/RDR rules deleted */
1842 /* Parameters: Nil */
1843 /* */
1844 /* Delete all rules in the current list of rules. There is nothing elegant */
1845 /* about this cleanup: simply free all entries on the list of rules and */
1846 /* clear out the tables used for hashed NAT rule lookups. */
1847 /* ------------------------------------------------------------------------ */
nat_clearlist()1848 static int nat_clearlist()
1849 {
1850 ipnat_t *n, **np = &nat_list;
1851 int i = 0;
1852
1853 if (nat_rules != NULL)
1854 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1855 if (rdr_rules != NULL)
1856 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1857
1858 while ((n = *np) != NULL) {
1859 *np = n->in_next;
1860 if (n->in_use == 0) {
1861 if (n->in_apr != NULL)
1862 appr_free(n->in_apr);
1863 MUTEX_DESTROY(&n->in_lock);
1864 KFREE(n);
1865 nat_stats.ns_rules--;
1866 } else {
1867 n->in_flags |= IPN_DELETE;
1868 n->in_next = NULL;
1869 }
1870 i++;
1871 }
1872 #if SOLARIS && !defined(_INET_IP_STACK_H)
1873 pfil_delayed_copy = 1;
1874 #endif
1875 nat_masks = 0;
1876 rdr_masks = 0;
1877 return i;
1878 }
1879
1880
1881 /* ------------------------------------------------------------------------ */
1882 /* Function: nat_newmap */
1883 /* Returns: int - -1 == error, 0 == success */
1884 /* Parameters: fin(I) - pointer to packet information */
1885 /* nat(I) - pointer to NAT entry */
1886 /* ni(I) - pointer to structure with misc. information needed */
1887 /* to create new NAT entry. */
1888 /* */
1889 /* Given an empty NAT structure, populate it with new information about a */
1890 /* new NAT session, as defined by the matching NAT rule. */
1891 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1892 /* to the new IP address for the translation. */
1893 /* ------------------------------------------------------------------------ */
nat_newmap(fin,nat,ni)1894 static INLINE int nat_newmap(fin, nat, ni)
1895 fr_info_t *fin;
1896 nat_t *nat;
1897 natinfo_t *ni;
1898 {
1899 u_short st_port, dport, sport, port, sp, dp;
1900 struct in_addr in, inb;
1901 hostmap_t *hm;
1902 u_32_t flags;
1903 u_32_t st_ip;
1904 ipnat_t *np;
1905 nat_t *natl;
1906 int l;
1907
1908 /*
1909 * If it's an outbound packet which doesn't match any existing
1910 * record, then create a new port
1911 */
1912 l = 0;
1913 hm = NULL;
1914 np = ni->nai_np;
1915 st_ip = np->in_nip;
1916 st_port = np->in_pnext;
1917 flags = ni->nai_flags;
1918 sport = ni->nai_sport;
1919 dport = ni->nai_dport;
1920
1921 /*
1922 * Do a loop until we either run out of entries to try or we find
1923 * a NAT mapping that isn't currently being used. This is done
1924 * because the change to the source is not (usually) being fixed.
1925 */
1926 do {
1927 port = 0;
1928 in.s_addr = htonl(np->in_nip);
1929 if (l == 0) {
1930 /*
1931 * Check to see if there is an existing NAT
1932 * setup for this IP address pair.
1933 */
1934 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1935 in, 0);
1936 if (hm != NULL)
1937 in.s_addr = hm->hm_mapip.s_addr;
1938 } else if ((l == 1) && (hm != NULL)) {
1939 fr_hostmapdel(&hm);
1940 }
1941 in.s_addr = ntohl(in.s_addr);
1942
1943 nat->nat_hm = hm;
1944
1945 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1946 if (l > 0)
1947 return -1;
1948 }
1949
1950 if (np->in_redir == NAT_BIMAP &&
1951 np->in_inmsk == np->in_outmsk) {
1952 /*
1953 * map the address block in a 1:1 fashion
1954 */
1955 in.s_addr = np->in_outip;
1956 in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1957 in.s_addr = ntohl(in.s_addr);
1958
1959 } else if (np->in_redir & NAT_MAPBLK) {
1960 if ((l >= np->in_ppip) || ((l > 0) &&
1961 !(flags & IPN_TCPUDP)))
1962 return -1;
1963 /*
1964 * map-block - Calculate destination address.
1965 */
1966 in.s_addr = ntohl(fin->fin_saddr);
1967 in.s_addr &= ntohl(~np->in_inmsk);
1968 inb.s_addr = in.s_addr;
1969 in.s_addr /= np->in_ippip;
1970 in.s_addr &= ntohl(~np->in_outmsk);
1971 in.s_addr += ntohl(np->in_outip);
1972 /*
1973 * Calculate destination port.
1974 */
1975 if ((flags & IPN_TCPUDP) &&
1976 (np->in_ppip != 0)) {
1977 port = ntohs(sport) + l;
1978 port %= np->in_ppip;
1979 port += np->in_ppip *
1980 (inb.s_addr % np->in_ippip);
1981 port += MAPBLK_MINPORT;
1982 port = htons(port);
1983 }
1984
1985 } else if ((np->in_outip == 0) &&
1986 (np->in_outmsk == 0xffffffff)) {
1987 /*
1988 * 0/32 - use the interface's IP address.
1989 */
1990 if ((l > 0) ||
1991 fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1992 &in, NULL) == -1)
1993 return -1;
1994 in.s_addr = ntohl(in.s_addr);
1995
1996 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1997 /*
1998 * 0/0 - use the original source address/port.
1999 */
2000 if (l > 0)
2001 return -1;
2002 in.s_addr = ntohl(fin->fin_saddr);
2003
2004 } else if ((np->in_outmsk != 0xffffffff) &&
2005 (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2006 np->in_nip++;
2007
2008 natl = NULL;
2009
2010 if ((flags & IPN_TCPUDP) &&
2011 ((np->in_redir & NAT_MAPBLK) == 0) &&
2012 (np->in_flags & IPN_AUTOPORTMAP)) {
2013 /*
2014 * "ports auto" (without map-block)
2015 */
2016 if ((l > 0) && (l % np->in_ppip == 0)) {
2017 if (l > np->in_space) {
2018 return -1;
2019 } else if ((l > np->in_ppip) &&
2020 np->in_outmsk != 0xffffffff)
2021 np->in_nip++;
2022 }
2023 if (np->in_ppip != 0) {
2024 port = ntohs(sport);
2025 port += (l % np->in_ppip);
2026 port %= np->in_ppip;
2027 port += np->in_ppip *
2028 (ntohl(fin->fin_saddr) %
2029 np->in_ippip);
2030 port += MAPBLK_MINPORT;
2031 port = htons(port);
2032 }
2033
2034 } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2035 (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2036 /*
2037 * Standard port translation. Select next port.
2038 */
2039 if (np->in_flags & IPN_SEQUENTIAL) {
2040 port = np->in_pnext;
2041 } else {
2042 port = ipf_random() % (ntohs(np->in_pmax) -
2043 ntohs(np->in_pmin));
2044 port += ntohs(np->in_pmin);
2045 }
2046 port = htons(port);
2047 np->in_pnext++;
2048
2049 if (np->in_pnext > ntohs(np->in_pmax)) {
2050 np->in_pnext = ntohs(np->in_pmin);
2051 if (np->in_outmsk != 0xffffffff)
2052 np->in_nip++;
2053 }
2054 }
2055
2056 if (np->in_flags & IPN_IPRANGE) {
2057 if (np->in_nip > ntohl(np->in_outmsk))
2058 np->in_nip = ntohl(np->in_outip);
2059 } else {
2060 if ((np->in_outmsk != 0xffffffff) &&
2061 ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2062 ntohl(np->in_outip))
2063 np->in_nip = ntohl(np->in_outip) + 1;
2064 }
2065
2066 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2067 port = sport;
2068
2069 /*
2070 * Here we do a lookup of the connection as seen from
2071 * the outside. If an IP# pair already exists, try
2072 * again. So if you have A->B becomes C->B, you can
2073 * also have D->E become C->E but not D->B causing
2074 * another C->B. Also take protocol and ports into
2075 * account when determining whether a pre-existing
2076 * NAT setup will cause an external conflict where
2077 * this is appropriate.
2078 */
2079 inb.s_addr = htonl(in.s_addr);
2080 sp = fin->fin_data[0];
2081 dp = fin->fin_data[1];
2082 fin->fin_data[0] = fin->fin_data[1];
2083 fin->fin_data[1] = htons(port);
2084 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2085 (u_int)fin->fin_p, fin->fin_dst, inb);
2086 fin->fin_data[0] = sp;
2087 fin->fin_data[1] = dp;
2088
2089 /*
2090 * Has the search wrapped around and come back to the
2091 * start ?
2092 */
2093 if ((natl != NULL) &&
2094 (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2095 (np->in_nip != 0) && (st_ip == np->in_nip))
2096 return -1;
2097 l++;
2098 } while (natl != NULL);
2099
2100 if (np->in_space > 0)
2101 np->in_space--;
2102
2103 /* Setup the NAT table */
2104 nat->nat_inip = fin->fin_src;
2105 nat->nat_outip.s_addr = htonl(in.s_addr);
2106 nat->nat_oip = fin->fin_dst;
2107 if (nat->nat_hm == NULL)
2108 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2109 nat->nat_outip, 0);
2110
2111 /*
2112 * The ICMP checksum does not have a pseudo header containing
2113 * the IP addresses
2114 */
2115 ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2116 ni->nai_sum2 = LONG_SUM(in.s_addr);
2117 if ((flags & IPN_TCPUDP)) {
2118 ni->nai_sum1 += ntohs(sport);
2119 ni->nai_sum2 += ntohs(port);
2120 }
2121
2122 if (flags & IPN_TCPUDP) {
2123 nat->nat_inport = sport;
2124 nat->nat_outport = port; /* sport */
2125 nat->nat_oport = dport;
2126 ((tcphdr_t *)fin->fin_dp)->th_sport = port;
2127 } else if (flags & IPN_ICMPQUERY) {
2128 ((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2129 nat->nat_inport = port;
2130 nat->nat_outport = port;
2131 } else if (fin->fin_p == IPPROTO_GRE) {
2132 #if 0
2133 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2134 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2135 nat->nat_oport = 0;/*fin->fin_data[1];*/
2136 nat->nat_inport = 0;/*fin->fin_data[0];*/
2137 nat->nat_outport = 0;/*fin->fin_data[0];*/
2138 nat->nat_call[0] = fin->fin_data[0];
2139 nat->nat_call[1] = fin->fin_data[0];
2140 }
2141 #endif
2142 }
2143 ni->nai_ip.s_addr = in.s_addr;
2144 ni->nai_port = port;
2145 ni->nai_nport = dport;
2146 return 0;
2147 }
2148
2149
2150 /* ------------------------------------------------------------------------ */
2151 /* Function: nat_newrdr */
2152 /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */
2153 /* allow rule to be moved if IPN_ROUNDR is set. */
2154 /* Parameters: fin(I) - pointer to packet information */
2155 /* nat(I) - pointer to NAT entry */
2156 /* ni(I) - pointer to structure with misc. information needed */
2157 /* to create new NAT entry. */
2158 /* */
2159 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2160 /* to the new IP address for the translation. */
2161 /* ------------------------------------------------------------------------ */
nat_newrdr(fin,nat,ni)2162 static INLINE int nat_newrdr(fin, nat, ni)
2163 fr_info_t *fin;
2164 nat_t *nat;
2165 natinfo_t *ni;
2166 {
2167 u_short nport, dport, sport;
2168 struct in_addr in, inb;
2169 u_short sp, dp;
2170 hostmap_t *hm;
2171 u_32_t flags;
2172 ipnat_t *np;
2173 nat_t *natl;
2174 int move;
2175
2176 move = 1;
2177 hm = NULL;
2178 in.s_addr = 0;
2179 np = ni->nai_np;
2180 flags = ni->nai_flags;
2181 sport = ni->nai_sport;
2182 dport = ni->nai_dport;
2183
2184 /*
2185 * If the matching rule has IPN_STICKY set, then we want to have the
2186 * same rule kick in as before. Why would this happen? If you have
2187 * a collection of rdr rules with "round-robin sticky", the current
2188 * packet might match a different one to the previous connection but
2189 * we want the same destination to be used.
2190 */
2191 if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) &&
2192 ((np->in_flags & IPN_STICKY) != 0)) {
2193 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2194 (u_32_t)dport);
2195 if (hm != NULL) {
2196 in.s_addr = ntohl(hm->hm_mapip.s_addr);
2197 np = hm->hm_ipnat;
2198 ni->nai_np = np;
2199 move = 0;
2200 }
2201 }
2202
2203 /*
2204 * Otherwise, it's an inbound packet. Most likely, we don't
2205 * want to rewrite source ports and source addresses. Instead,
2206 * we want to rewrite to a fixed internal address and fixed
2207 * internal port.
2208 */
2209 if (np->in_flags & IPN_SPLIT) {
2210 in.s_addr = np->in_nip;
2211
2212 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2213 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst,
2214 in, (u_32_t)dport);
2215 if (hm != NULL) {
2216 in.s_addr = hm->hm_mapip.s_addr;
2217 move = 0;
2218 }
2219 }
2220
2221 if (hm == NULL || hm->hm_ref == 1) {
2222 if (np->in_inip == htonl(in.s_addr)) {
2223 np->in_nip = ntohl(np->in_inmsk);
2224 move = 0;
2225 } else {
2226 np->in_nip = ntohl(np->in_inip);
2227 }
2228 }
2229
2230 } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2231 /*
2232 * 0/32 - use the interface's IP address.
2233 */
2234 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1)
2235 return -1;
2236 in.s_addr = ntohl(in.s_addr);
2237
2238 } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2239 /*
2240 * 0/0 - use the original destination address/port.
2241 */
2242 in.s_addr = ntohl(fin->fin_daddr);
2243
2244 } else if (np->in_redir == NAT_BIMAP &&
2245 np->in_inmsk == np->in_outmsk) {
2246 /*
2247 * map the address block in a 1:1 fashion
2248 */
2249 in.s_addr = np->in_inip;
2250 in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2251 in.s_addr = ntohl(in.s_addr);
2252 } else {
2253 in.s_addr = ntohl(np->in_inip);
2254 }
2255
2256 if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2257 nport = dport;
2258 else {
2259 /*
2260 * Whilst not optimized for the case where
2261 * pmin == pmax, the gain is not significant.
2262 */
2263 if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2264 (np->in_pmin != np->in_pmax)) {
2265 nport = ntohs(dport) - ntohs(np->in_pmin) +
2266 ntohs(np->in_pnext);
2267 nport = htons(nport);
2268 } else
2269 nport = np->in_pnext;
2270 }
2271
2272 /*
2273 * When the redirect-to address is set to 0.0.0.0, just
2274 * assume a blank `forwarding' of the packet. We don't
2275 * setup any translation for this either.
2276 */
2277 if (in.s_addr == 0) {
2278 if (nport == dport)
2279 return -1;
2280 in.s_addr = ntohl(fin->fin_daddr);
2281 }
2282
2283 /*
2284 * Check to see if this redirect mapping already exists and if
2285 * it does, return "failure" (allowing it to be created will just
2286 * cause one or both of these "connections" to stop working.)
2287 */
2288 inb.s_addr = htonl(in.s_addr);
2289 sp = fin->fin_data[0];
2290 dp = fin->fin_data[1];
2291 fin->fin_data[1] = fin->fin_data[0];
2292 fin->fin_data[0] = ntohs(nport);
2293 natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2294 (u_int)fin->fin_p, inb, fin->fin_src);
2295 fin->fin_data[0] = sp;
2296 fin->fin_data[1] = dp;
2297 if (natl != NULL)
2298 return -1;
2299
2300 nat->nat_inip.s_addr = htonl(in.s_addr);
2301 nat->nat_outip = fin->fin_dst;
2302 nat->nat_oip = fin->fin_src;
2303 if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0))
2304 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, in,
2305 (u_32_t)dport);
2306
2307 ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2308 ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2309
2310 ni->nai_ip.s_addr = in.s_addr;
2311 ni->nai_nport = nport;
2312 ni->nai_port = sport;
2313
2314 if (flags & IPN_TCPUDP) {
2315 nat->nat_inport = nport;
2316 nat->nat_outport = dport;
2317 nat->nat_oport = sport;
2318 ((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2319 } else if (flags & IPN_ICMPQUERY) {
2320 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2321 nat->nat_inport = nport;
2322 nat->nat_outport = nport;
2323 } else if (fin->fin_p == IPPROTO_GRE) {
2324 #if 0
2325 nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2326 if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2327 nat->nat_call[0] = fin->fin_data[0];
2328 nat->nat_call[1] = fin->fin_data[1];
2329 nat->nat_oport = 0; /*fin->fin_data[0];*/
2330 nat->nat_inport = 0; /*fin->fin_data[1];*/
2331 nat->nat_outport = 0; /*fin->fin_data[1];*/
2332 }
2333 #endif
2334 }
2335
2336 return move;
2337 }
2338
2339 /* ------------------------------------------------------------------------ */
2340 /* Function: nat_new */
2341 /* Returns: nat_t* - NULL == failure to create new NAT structure, */
2342 /* else pointer to new NAT structure */
2343 /* Parameters: fin(I) - pointer to packet information */
2344 /* np(I) - pointer to NAT rule */
2345 /* natsave(I) - pointer to where to store NAT struct pointer */
2346 /* flags(I) - flags describing the current packet */
2347 /* direction(I) - direction of packet (in/out) */
2348 /* Write Lock: ipf_nat */
2349 /* */
2350 /* Attempts to create a new NAT entry. Does not actually change the packet */
2351 /* in any way. */
2352 /* */
2353 /* This fucntion is in three main parts: (1) deal with creating a new NAT */
2354 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */
2355 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2356 /* and (3) building that structure and putting it into the NAT table(s). */
2357 /* */
2358 /* NOTE: natsave should NOT be used top point back to an ipstate_t struct */
2359 /* as it can result in memory being corrupted. */
2360 /* ------------------------------------------------------------------------ */
nat_new(fin,np,natsave,flags,direction)2361 nat_t *nat_new(fin, np, natsave, flags, direction)
2362 fr_info_t *fin;
2363 ipnat_t *np;
2364 nat_t **natsave;
2365 u_int flags;
2366 int direction;
2367 {
2368 u_short port = 0, sport = 0, dport = 0, nport = 0;
2369 tcphdr_t *tcp = NULL;
2370 hostmap_t *hm = NULL;
2371 struct in_addr in;
2372 nat_t *nat, *natl;
2373 u_int nflags;
2374 natinfo_t ni;
2375 u_32_t sumd;
2376 int move;
2377 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2378 qpktinfo_t *qpi = fin->fin_qpi;
2379 #endif
2380
2381 if (nat_stats.ns_inuse >= ipf_nattable_max) {
2382 nat_stats.ns_memfail++;
2383 fr_nat_doflush = 1;
2384 return NULL;
2385 }
2386
2387 move = 1;
2388 nflags = np->in_flags & flags;
2389 nflags &= NAT_FROMRULE;
2390
2391 ni.nai_np = np;
2392 ni.nai_nflags = nflags;
2393 ni.nai_flags = flags;
2394 ni.nai_dport = 0;
2395 ni.nai_sport = 0;
2396
2397 /* Give me a new nat */
2398 KMALLOC(nat, nat_t *);
2399 if (nat == NULL) {
2400 nat_stats.ns_memfail++;
2401 /*
2402 * Try to automatically tune the max # of entries in the
2403 * table allowed to be less than what will cause kmem_alloc()
2404 * to fail and try to eliminate panics due to out of memory
2405 * conditions arising.
2406 */
2407 if (ipf_nattable_max > ipf_nattable_sz) {
2408 ipf_nattable_max = nat_stats.ns_inuse - 100;
2409 printf("ipf_nattable_max reduced to %d\n",
2410 ipf_nattable_max);
2411 }
2412 return NULL;
2413 }
2414
2415 if (flags & IPN_TCPUDP) {
2416 tcp = fin->fin_dp;
2417 ni.nai_sport = htons(fin->fin_sport);
2418 ni.nai_dport = htons(fin->fin_dport);
2419 } else if (flags & IPN_ICMPQUERY) {
2420 /*
2421 * In the ICMP query NAT code, we translate the ICMP id fields
2422 * to make them unique. This is indepedent of the ICMP type
2423 * (e.g. in the unlikely event that a host sends an echo and
2424 * an tstamp request with the same id, both packets will have
2425 * their ip address/id field changed in the same way).
2426 */
2427 /* The icmp_id field is used by the sender to identify the
2428 * process making the icmp request. (the receiver justs
2429 * copies it back in its response). So, it closely matches
2430 * the concept of source port. We overlay sport, so we can
2431 * maximally reuse the existing code.
2432 */
2433 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2434 ni.nai_dport = ni.nai_sport;
2435 }
2436
2437 bzero((char *)nat, sizeof(*nat));
2438 nat->nat_flags = flags;
2439 nat->nat_redir = np->in_redir;
2440
2441 if ((flags & NAT_SLAVE) == 0) {
2442 MUTEX_ENTER(&ipf_nat_new);
2443 }
2444
2445 /*
2446 * Search the current table for a match.
2447 */
2448 if (direction == NAT_OUTBOUND) {
2449 /*
2450 * We can now arrange to call this for the same connection
2451 * because ipf_nat_new doesn't protect the code path into
2452 * this function.
2453 */
2454 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2455 fin->fin_src, fin->fin_dst);
2456 if (natl != NULL) {
2457 KFREE(nat);
2458 nat = natl;
2459 goto done;
2460 }
2461
2462 move = nat_newmap(fin, nat, &ni);
2463 if (move == -1)
2464 goto badnat;
2465
2466 np = ni.nai_np;
2467 in = ni.nai_ip;
2468 } else {
2469 /*
2470 * NAT_INBOUND is used only for redirects rules
2471 */
2472 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2473 fin->fin_src, fin->fin_dst);
2474 if (natl != NULL) {
2475 KFREE(nat);
2476 nat = natl;
2477 goto done;
2478 }
2479
2480 move = nat_newrdr(fin, nat, &ni);
2481 if (move == -1)
2482 goto badnat;
2483
2484 np = ni.nai_np;
2485 in = ni.nai_ip;
2486 }
2487 port = ni.nai_port;
2488 nport = ni.nai_nport;
2489
2490 if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2491 if (np->in_redir == NAT_REDIRECT) {
2492 nat_delrdr(np);
2493 nat_addrdr(np);
2494 } else if (np->in_redir == NAT_MAP) {
2495 nat_delnat(np);
2496 nat_addnat(np);
2497 }
2498 }
2499
2500 if (flags & IPN_TCPUDP) {
2501 sport = ni.nai_sport;
2502 dport = ni.nai_dport;
2503 } else if (flags & IPN_ICMPQUERY) {
2504 sport = ni.nai_sport;
2505 dport = 0;
2506 }
2507
2508 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2509 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2510 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2511 if ((flags & IPN_TCP) && dohwcksum &&
2512 (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
2513 if (direction == NAT_OUTBOUND)
2514 ni.nai_sum1 = LONG_SUM(in.s_addr);
2515 else
2516 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2517 ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr));
2518 ni.nai_sum1 += 30;
2519 ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16);
2520 nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff);
2521 } else
2522 #endif
2523 nat->nat_sumd[1] = nat->nat_sumd[0];
2524
2525 if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2526 if (direction == NAT_OUTBOUND)
2527 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2528 else
2529 ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2530
2531 ni.nai_sum2 = LONG_SUM(in.s_addr);
2532
2533 CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2534 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2535 } else {
2536 nat->nat_ipsumd = nat->nat_sumd[0];
2537 if (!(flags & IPN_TCPUDPICMP)) {
2538 nat->nat_sumd[0] = 0;
2539 nat->nat_sumd[1] = 0;
2540 }
2541 }
2542
2543 if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2544 fr_nat_doflush = 1;
2545 goto badnat;
2546 }
2547 if (flags & SI_WILDP)
2548 nat_stats.ns_wilds++;
2549 fin->fin_flx |= FI_NEWNAT;
2550 goto done;
2551 badnat:
2552 nat_stats.ns_badnat++;
2553 if ((hm = nat->nat_hm) != NULL)
2554 fr_hostmapdel(&hm);
2555 KFREE(nat);
2556 nat = NULL;
2557 done:
2558 if ((flags & NAT_SLAVE) == 0) {
2559 MUTEX_EXIT(&ipf_nat_new);
2560 }
2561 return nat;
2562 }
2563
2564
2565 /* ------------------------------------------------------------------------ */
2566 /* Function: nat_finalise */
2567 /* Returns: int - 0 == sucess, -1 == failure */
2568 /* Parameters: fin(I) - pointer to packet information */
2569 /* nat(I) - pointer to NAT entry */
2570 /* ni(I) - pointer to structure with misc. information needed */
2571 /* to create new NAT entry. */
2572 /* Write Lock: ipf_nat */
2573 /* */
2574 /* This is the tail end of constructing a new NAT entry and is the same */
2575 /* for both IPv4 and IPv6. */
2576 /* ------------------------------------------------------------------------ */
2577 /*ARGSUSED*/
nat_finalise(fin,nat,ni,tcp,natsave,direction)2578 static int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2579 fr_info_t *fin;
2580 nat_t *nat;
2581 natinfo_t *ni;
2582 tcphdr_t *tcp;
2583 nat_t **natsave;
2584 int direction;
2585 {
2586 frentry_t *fr;
2587 ipnat_t *np;
2588
2589 np = ni->nai_np;
2590
2591 if (np->in_ifps[0] != NULL) {
2592 COPYIFNAME(4, np->in_ifps[0], nat->nat_ifnames[0]);
2593 }
2594 if (np->in_ifps[1] != NULL) {
2595 COPYIFNAME(4, np->in_ifps[1], nat->nat_ifnames[1]);
2596 }
2597 #ifdef IPFILTER_SYNC
2598 if ((nat->nat_flags & SI_CLONE) == 0)
2599 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2600 #endif
2601
2602 nat->nat_me = natsave;
2603 nat->nat_dir = direction;
2604 nat->nat_ifps[0] = np->in_ifps[0];
2605 nat->nat_ifps[1] = np->in_ifps[1];
2606 nat->nat_ptr = np;
2607 nat->nat_p = fin->fin_p;
2608 nat->nat_mssclamp = np->in_mssclamp;
2609 if (nat->nat_p == IPPROTO_TCP)
2610 nat->nat_seqnext[0] = ntohl(tcp->th_seq);
2611
2612 if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2613 if (appr_new(fin, nat) == -1)
2614 return -1;
2615
2616 if (nat_insert(nat, fin->fin_rev) == 0) {
2617 if (nat_logging)
2618 nat_log(nat, (u_int)np->in_redir);
2619 np->in_use++;
2620 fr = fin->fin_fr;
2621 nat->nat_fr = fr;
2622 if (fr != NULL) {
2623 MUTEX_ENTER(&fr->fr_lock);
2624 fr->fr_ref++;
2625 MUTEX_EXIT(&fr->fr_lock);
2626 }
2627 return 0;
2628 }
2629
2630 /*
2631 * nat_insert failed, so cleanup time...
2632 */
2633 return -1;
2634 }
2635
2636
2637 /* ------------------------------------------------------------------------ */
2638 /* Function: nat_insert */
2639 /* Returns: int - 0 == sucess, -1 == failure */
2640 /* Parameters: nat(I) - pointer to NAT structure */
2641 /* rev(I) - flag indicating forward/reverse direction of packet */
2642 /* Write Lock: ipf_nat */
2643 /* */
2644 /* Insert a NAT entry into the hash tables for searching and add it to the */
2645 /* list of active NAT entries. Adjust global counters when complete. */
2646 /* ------------------------------------------------------------------------ */
nat_insert(nat,rev)2647 int nat_insert(nat, rev)
2648 nat_t *nat;
2649 int rev;
2650 {
2651 u_int hv1, hv2;
2652 nat_t **natp;
2653
2654 /*
2655 * Try and return an error as early as possible, so calculate the hash
2656 * entry numbers first and then proceed.
2657 */
2658 if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2659 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2660 0xffffffff);
2661 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2662 ipf_nattable_sz);
2663 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2664 0xffffffff);
2665 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2666 ipf_nattable_sz);
2667 } else {
2668 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2669 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
2670 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2671 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
2672 }
2673
2674 if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket ||
2675 nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) {
2676 return -1;
2677 }
2678
2679 nat->nat_hv[0] = hv1;
2680 nat->nat_hv[1] = hv2;
2681
2682 MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2683
2684 nat->nat_rev = rev;
2685 nat->nat_ref = 1;
2686 nat->nat_bytes[0] = 0;
2687 nat->nat_pkts[0] = 0;
2688 nat->nat_bytes[1] = 0;
2689 nat->nat_pkts[1] = 0;
2690
2691 nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2692 nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4);
2693
2694 if (nat->nat_ifnames[1][0] != '\0') {
2695 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2696 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4);
2697 } else {
2698 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2699 LIFNAMSIZ);
2700 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2701 nat->nat_ifps[1] = nat->nat_ifps[0];
2702 }
2703
2704 nat->nat_next = nat_instances;
2705 nat->nat_pnext = &nat_instances;
2706 if (nat_instances)
2707 nat_instances->nat_pnext = &nat->nat_next;
2708 nat_instances = nat;
2709
2710 natp = &nat_table[0][hv1];
2711 if (*natp)
2712 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2713 nat->nat_phnext[0] = natp;
2714 nat->nat_hnext[0] = *natp;
2715 *natp = nat;
2716 nat_stats.ns_bucketlen[0][hv1]++;
2717
2718 natp = &nat_table[1][hv2];
2719 if (*natp)
2720 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2721 nat->nat_phnext[1] = natp;
2722 nat->nat_hnext[1] = *natp;
2723 *natp = nat;
2724 nat_stats.ns_bucketlen[1][hv2]++;
2725
2726 fr_setnatqueue(nat, rev);
2727
2728 nat_stats.ns_added++;
2729 nat_stats.ns_inuse++;
2730 return 0;
2731 }
2732
2733
2734 /* ------------------------------------------------------------------------ */
2735 /* Function: nat_icmperrorlookup */
2736 /* Returns: nat_t* - point to matching NAT structure */
2737 /* Parameters: fin(I) - pointer to packet information */
2738 /* dir(I) - direction of packet (in/out) */
2739 /* */
2740 /* Check if the ICMP error message is related to an existing TCP, UDP or */
2741 /* ICMP query nat entry. It is assumed that the packet is already of the */
2742 /* the required length. */
2743 /* ------------------------------------------------------------------------ */
nat_icmperrorlookup(fin,dir)2744 nat_t *nat_icmperrorlookup(fin, dir)
2745 fr_info_t *fin;
2746 int dir;
2747 {
2748 int flags = 0, type, minlen;
2749 icmphdr_t *icmp, *orgicmp;
2750 tcphdr_t *tcp = NULL;
2751 u_short data[2];
2752 nat_t *nat;
2753 ip_t *oip;
2754 u_int p;
2755
2756 icmp = fin->fin_dp;
2757 type = icmp->icmp_type;
2758 /*
2759 * Does it at least have the return (basic) IP header ?
2760 * Only a basic IP header (no options) should be with an ICMP error
2761 * header. Also, if it's not an error type, then return.
2762 */
2763 if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2764 return NULL;
2765
2766 /*
2767 * Check packet size
2768 */
2769 oip = (ip_t *)((char *)fin->fin_dp + 8);
2770 minlen = IP_HL(oip) << 2;
2771 if ((minlen < sizeof(ip_t)) ||
2772 (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2773 return NULL;
2774 /*
2775 * Is the buffer big enough for all of it ? It's the size of the IP
2776 * header claimed in the encapsulated part which is of concern. It
2777 * may be too big to be in this buffer but not so big that it's
2778 * outside the ICMP packet, leading to TCP deref's causing problems.
2779 * This is possible because we don't know how big oip_hl is when we
2780 * do the pullup early in fr_check() and thus can't gaurantee it is
2781 * all here now.
2782 */
2783 #ifdef _KERNEL
2784 {
2785 mb_t *m;
2786
2787 m = fin->fin_m;
2788 # if defined(MENTAT)
2789 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2790 return NULL;
2791 # else
2792 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2793 (char *)fin->fin_ip + M_LEN(m))
2794 return NULL;
2795 # endif
2796 }
2797 #endif
2798
2799 if (fin->fin_daddr != oip->ip_src.s_addr)
2800 return NULL;
2801
2802 p = oip->ip_p;
2803 if (p == IPPROTO_TCP)
2804 flags = IPN_TCP;
2805 else if (p == IPPROTO_UDP)
2806 flags = IPN_UDP;
2807 else if (p == IPPROTO_ICMP) {
2808 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2809
2810 /* see if this is related to an ICMP query */
2811 if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2812 data[0] = fin->fin_data[0];
2813 data[1] = fin->fin_data[1];
2814 fin->fin_data[0] = 0;
2815 fin->fin_data[1] = orgicmp->icmp_id;
2816
2817 flags = IPN_ICMPERR|IPN_ICMPQUERY;
2818 /*
2819 * NOTE : dir refers to the direction of the original
2820 * ip packet. By definition the icmp error
2821 * message flows in the opposite direction.
2822 */
2823 if (dir == NAT_INBOUND)
2824 nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2825 oip->ip_src);
2826 else
2827 nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2828 oip->ip_src);
2829 fin->fin_data[0] = data[0];
2830 fin->fin_data[1] = data[1];
2831 return nat;
2832 }
2833 }
2834
2835 if (flags & IPN_TCPUDP) {
2836 minlen += 8; /* + 64bits of data to get ports */
2837 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2838 return NULL;
2839
2840 data[0] = fin->fin_data[0];
2841 data[1] = fin->fin_data[1];
2842 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2843 fin->fin_data[0] = ntohs(tcp->th_dport);
2844 fin->fin_data[1] = ntohs(tcp->th_sport);
2845
2846 if (dir == NAT_INBOUND) {
2847 nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2848 oip->ip_src);
2849 } else {
2850 nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2851 oip->ip_src);
2852 }
2853 fin->fin_data[0] = data[0];
2854 fin->fin_data[1] = data[1];
2855 return nat;
2856 }
2857 if (dir == NAT_INBOUND)
2858 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2859 else
2860 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2861 }
2862
2863
2864 /* ------------------------------------------------------------------------ */
2865 /* Function: nat_icmperror */
2866 /* Returns: nat_t* - point to matching NAT structure */
2867 /* Parameters: fin(I) - pointer to packet information */
2868 /* nflags(I) - NAT flags for this packet */
2869 /* dir(I) - direction of packet (in/out) */
2870 /* */
2871 /* Fix up an ICMP packet which is an error message for an existing NAT */
2872 /* session. This will correct both packet header data and checksums. */
2873 /* */
2874 /* This should *ONLY* be used for incoming ICMP error packets to make sure */
2875 /* a NAT'd ICMP packet gets correctly recognised. */
2876 /* ------------------------------------------------------------------------ */
nat_icmperror(fin,nflags,dir)2877 nat_t *nat_icmperror(fin, nflags, dir)
2878 fr_info_t *fin;
2879 u_int *nflags;
2880 int dir;
2881 {
2882 u_32_t sum1, sum2, sumd, sumd2;
2883 struct in_addr a1, a2;
2884 int flags, dlen, odst;
2885 icmphdr_t *icmp;
2886 u_short *csump;
2887 tcphdr_t *tcp;
2888 nat_t *nat;
2889 ip_t *oip;
2890 void *dp;
2891
2892 if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2893 return NULL;
2894 /*
2895 * nat_icmperrorlookup() will return NULL for `defective' packets.
2896 */
2897 if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2898 return NULL;
2899
2900 tcp = NULL;
2901 csump = NULL;
2902 flags = 0;
2903 sumd2 = 0;
2904 *nflags = IPN_ICMPERR;
2905 icmp = fin->fin_dp;
2906 oip = (ip_t *)&icmp->icmp_ip;
2907 dp = (((char *)oip) + (IP_HL(oip) << 2));
2908 if (oip->ip_p == IPPROTO_TCP) {
2909 tcp = (tcphdr_t *)dp;
2910 csump = (u_short *)&tcp->th_sum;
2911 flags = IPN_TCP;
2912 } else if (oip->ip_p == IPPROTO_UDP) {
2913 udphdr_t *udp;
2914
2915 udp = (udphdr_t *)dp;
2916 tcp = (tcphdr_t *)dp;
2917 csump = (u_short *)&udp->uh_sum;
2918 flags = IPN_UDP;
2919 } else if (oip->ip_p == IPPROTO_ICMP)
2920 flags = IPN_ICMPQUERY;
2921 dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
2922
2923 /*
2924 * Need to adjust ICMP header to include the real IP#'s and
2925 * port #'s. Only apply a checksum change relative to the
2926 * IP address change as it will be modified again in fr_checknatout
2927 * for both address and port. Two checksum changes are
2928 * necessary for the two header address changes. Be careful
2929 * to only modify the checksum once for the port # and twice
2930 * for the IP#.
2931 */
2932
2933 /*
2934 * Step 1
2935 * Fix the IP addresses in the offending IP packet. You also need
2936 * to adjust the IP header checksum of that offending IP packet.
2937 *
2938 * Normally, you would expect that the ICMP checksum of the
2939 * ICMP error message needs to be adjusted as well for the
2940 * IP address change in oip.
2941 * However, this is a NOP, because the ICMP checksum is
2942 * calculated over the complete ICMP packet, which includes the
2943 * changed oip IP addresses and oip->ip_sum. However, these
2944 * two changes cancel each other out (if the delta for
2945 * the IP address is x, then the delta for ip_sum is minus x),
2946 * so no change in the icmp_cksum is necessary.
2947 *
2948 * Inbound ICMP
2949 * ------------
2950 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2951 * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b)
2952 * - OIP_SRC(c)=nat_outip, OIP_DST(b)=nat_oip
2953 *
2954 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2955 * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2956 * - OIP_SRC(b)=nat_outip, OIP_DST(a)=nat_oip
2957 *
2958 * Outbound ICMP
2959 * -------------
2960 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2961 * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2962 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2963 *
2964 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2965 * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c)
2966 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2967 *
2968 */
2969 odst = (oip->ip_dst.s_addr == nat->nat_oip.s_addr) ? 1 : 0;
2970 if (odst == 1) {
2971 a1.s_addr = ntohl(nat->nat_inip.s_addr);
2972 a2.s_addr = ntohl(oip->ip_src.s_addr);
2973 oip->ip_src.s_addr = htonl(a1.s_addr);
2974 } else {
2975 a1.s_addr = ntohl(nat->nat_outip.s_addr);
2976 a2.s_addr = ntohl(oip->ip_dst.s_addr);
2977 oip->ip_dst.s_addr = htonl(a1.s_addr);
2978 }
2979
2980 sumd = a2.s_addr - a1.s_addr;
2981 if (sumd != 0) {
2982 if (a1.s_addr > a2.s_addr)
2983 sumd--;
2984 sumd = ~sumd;
2985
2986 fix_datacksum(&oip->ip_sum, sumd);
2987 }
2988
2989 sumd2 = sumd;
2990 sum1 = 0;
2991 sum2 = 0;
2992
2993 /*
2994 * Fix UDP pseudo header checksum to compensate for the
2995 * IP address change.
2996 */
2997 if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
2998 /*
2999 * Step 2 :
3000 * For offending TCP/UDP IP packets, translate the ports as
3001 * well, based on the NAT specification. Of course such
3002 * a change may be reflected in the ICMP checksum as well.
3003 *
3004 * Since the port fields are part of the TCP/UDP checksum
3005 * of the offending IP packet, you need to adjust that checksum
3006 * as well... except that the change in the port numbers should
3007 * be offset by the checksum change. However, the TCP/UDP
3008 * checksum will also need to change if there has been an
3009 * IP address change.
3010 */
3011 if (odst == 1) {
3012 sum1 = ntohs(nat->nat_inport);
3013 sum2 = ntohs(tcp->th_sport);
3014
3015 tcp->th_sport = htons(sum1);
3016 } else {
3017 sum1 = ntohs(nat->nat_outport);
3018 sum2 = ntohs(tcp->th_dport);
3019
3020 tcp->th_dport = htons(sum1);
3021 }
3022
3023 sumd += sum1 - sum2;
3024 if (sumd != 0 || sumd2 != 0) {
3025 /*
3026 * At this point, sumd is the delta to apply to the
3027 * TCP/UDP header, given the changes in both the IP
3028 * address and the ports and sumd2 is the delta to
3029 * apply to the ICMP header, given the IP address
3030 * change delta that may need to be applied to the
3031 * TCP/UDP checksum instead.
3032 *
3033 * If we will both the IP and TCP/UDP checksums
3034 * then the ICMP checksum changes by the address
3035 * delta applied to the TCP/UDP checksum. If we
3036 * do not change the TCP/UDP checksum them we
3037 * apply the delta in ports to the ICMP checksum.
3038 */
3039 if (oip->ip_p == IPPROTO_UDP) {
3040 if ((dlen >= 8) && (*csump != 0)) {
3041 fix_datacksum(csump, sumd);
3042 } else {
3043 sumd2 = sum1 - sum2;
3044 if (sum2 > sum1)
3045 sumd2--;
3046 }
3047 } else if (oip->ip_p == IPPROTO_TCP) {
3048 if (dlen >= 18) {
3049 fix_datacksum(csump, sumd);
3050 } else {
3051 sumd2 = sum2 - sum1;
3052 if (sum1 > sum2)
3053 sumd2--;
3054 }
3055 }
3056
3057 if (sumd2 != 0) {
3058 ipnat_t *np;
3059
3060 np = nat->nat_ptr;
3061 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3062 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3063 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3064
3065 if ((odst == 0) && (dir == NAT_OUTBOUND) &&
3066 (fin->fin_rev == 0) && (np != NULL) &&
3067 (np->in_redir & NAT_REDIRECT)) {
3068 fix_outcksum(fin, &icmp->icmp_cksum,
3069 sumd2);
3070 } else {
3071 fix_incksum(fin, &icmp->icmp_cksum,
3072 sumd2);
3073 }
3074 }
3075 }
3076 } else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
3077 icmphdr_t *orgicmp;
3078
3079 /*
3080 * XXX - what if this is bogus hl and we go off the end ?
3081 * In this case, nat_icmperrorlookup() will have returned NULL.
3082 */
3083 orgicmp = (icmphdr_t *)dp;
3084
3085 if (odst == 1) {
3086 if (orgicmp->icmp_id != nat->nat_inport) {
3087
3088 /*
3089 * Fix ICMP checksum (of the offening ICMP
3090 * query packet) to compensate the change
3091 * in the ICMP id of the offending ICMP
3092 * packet.
3093 *
3094 * Since you modify orgicmp->icmp_id with
3095 * a delta (say x) and you compensate that
3096 * in origicmp->icmp_cksum with a delta
3097 * minus x, you don't have to adjust the
3098 * overall icmp->icmp_cksum
3099 */
3100 sum1 = ntohs(orgicmp->icmp_id);
3101 sum2 = ntohs(nat->nat_inport);
3102 CALC_SUMD(sum1, sum2, sumd);
3103 orgicmp->icmp_id = nat->nat_inport;
3104 fix_datacksum(&orgicmp->icmp_cksum, sumd);
3105 }
3106 } /* nat_dir == NAT_INBOUND is impossible for icmp queries */
3107 }
3108 return nat;
3109 }
3110
3111
3112 /*
3113 * NB: these lookups don't lock access to the list, it assumed that it has
3114 * already been done!
3115 */
3116
3117 /* ------------------------------------------------------------------------ */
3118 /* Function: nat_inlookup */
3119 /* Returns: nat_t* - NULL == no match, */
3120 /* else pointer to matching NAT entry */
3121 /* Parameters: fin(I) - pointer to packet information */
3122 /* flags(I) - NAT flags for this packet */
3123 /* p(I) - protocol for this packet */
3124 /* src(I) - source IP address */
3125 /* mapdst(I) - destination IP address */
3126 /* */
3127 /* Lookup a nat entry based on the mapped destination ip address/port and */
3128 /* real source address/port. We use this lookup when receiving a packet, */
3129 /* we're looking for a table entry, based on the destination address. */
3130 /* */
3131 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
3132 /* */
3133 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
3134 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
3135 /* */
3136 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
3137 /* the packet is of said protocol */
3138 /* ------------------------------------------------------------------------ */
nat_inlookup(fin,flags,p,src,mapdst)3139 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3140 fr_info_t *fin;
3141 u_int flags, p;
3142 struct in_addr src , mapdst;
3143 {
3144 u_short sport, dport;
3145 grehdr_t *gre;
3146 ipnat_t *ipn;
3147 u_int sflags;
3148 nat_t *nat;
3149 int nflags;
3150 u_32_t dst;
3151 void *ifp;
3152 u_int hv;
3153
3154 ifp = fin->fin_ifp;
3155 sport = 0;
3156 dport = 0;
3157 gre = NULL;
3158 dst = mapdst.s_addr;
3159 sflags = flags & NAT_TCPUDPICMP;
3160
3161 switch (p)
3162 {
3163 case IPPROTO_TCP :
3164 case IPPROTO_UDP :
3165 sport = htons(fin->fin_data[0]);
3166 dport = htons(fin->fin_data[1]);
3167 break;
3168 case IPPROTO_ICMP :
3169 if (flags & IPN_ICMPERR)
3170 sport = fin->fin_data[1];
3171 else
3172 dport = fin->fin_data[1];
3173 break;
3174 default :
3175 break;
3176 }
3177
3178
3179 if ((flags & SI_WILDP) != 0)
3180 goto find_in_wild_ports;
3181
3182 hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3183 hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
3184 nat = nat_table[1][hv];
3185 for (; nat; nat = nat->nat_hnext[1]) {
3186 if (nat->nat_ifps[0] != NULL) {
3187 if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3188 continue;
3189 } else if (ifp != NULL)
3190 nat->nat_ifps[0] = ifp;
3191
3192 nflags = nat->nat_flags;
3193
3194 if (nat->nat_oip.s_addr == src.s_addr &&
3195 nat->nat_outip.s_addr == dst &&
3196 (((p == 0) &&
3197 (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3198 || (p == nat->nat_p))) {
3199 switch (p)
3200 {
3201 #if 0
3202 case IPPROTO_GRE :
3203 if (nat->nat_call[1] != fin->fin_data[0])
3204 continue;
3205 break;
3206 #endif
3207 case IPPROTO_ICMP :
3208 if ((flags & IPN_ICMPERR) != 0) {
3209 if (nat->nat_outport != sport)
3210 continue;
3211 } else {
3212 if (nat->nat_outport != dport)
3213 continue;
3214 }
3215 break;
3216 case IPPROTO_TCP :
3217 case IPPROTO_UDP :
3218 if (nat->nat_oport != sport)
3219 continue;
3220 if (nat->nat_outport != dport)
3221 continue;
3222 break;
3223 default :
3224 break;
3225 }
3226
3227 ipn = nat->nat_ptr;
3228 if ((ipn != NULL) && (nat->nat_aps != NULL))
3229 if (appr_match(fin, nat) != 0)
3230 continue;
3231 return nat;
3232 }
3233 }
3234
3235 /*
3236 * So if we didn't find it but there are wildcard members in the hash
3237 * table, go back and look for them. We do this search and update here
3238 * because it is modifying the NAT table and we want to do this only
3239 * for the first packet that matches. The exception, of course, is
3240 * for "dummy" (FI_IGNORE) lookups.
3241 */
3242 find_in_wild_ports:
3243 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3244 return NULL;
3245 if (nat_stats.ns_wilds == 0)
3246 return NULL;
3247
3248 RWLOCK_EXIT(&ipf_nat);
3249
3250 hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3251 hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
3252
3253 WRITE_ENTER(&ipf_nat);
3254
3255 nat = nat_table[1][hv];
3256 for (; nat; nat = nat->nat_hnext[1]) {
3257 if (nat->nat_ifps[0] != NULL) {
3258 if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3259 continue;
3260 } else if (ifp != NULL)
3261 nat->nat_ifps[0] = ifp;
3262
3263 if (nat->nat_p != fin->fin_p)
3264 continue;
3265 if (nat->nat_oip.s_addr != src.s_addr ||
3266 nat->nat_outip.s_addr != dst)
3267 continue;
3268
3269 nflags = nat->nat_flags;
3270 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3271 continue;
3272
3273 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3274 NAT_INBOUND) == 1) {
3275 if ((fin->fin_flx & FI_IGNORE) != 0)
3276 break;
3277 if ((nflags & SI_CLONE) != 0) {
3278 nat = fr_natclone(fin, nat);
3279 if (nat == NULL)
3280 break;
3281 } else {
3282 MUTEX_ENTER(&ipf_nat_new);
3283 nat_stats.ns_wilds--;
3284 MUTEX_EXIT(&ipf_nat_new);
3285 }
3286 nat->nat_oport = sport;
3287 nat->nat_outport = dport;
3288 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3289 nat_tabmove(nat);
3290 break;
3291 }
3292 }
3293
3294 MUTEX_DOWNGRADE(&ipf_nat);
3295
3296 return nat;
3297 }
3298
3299
3300 /* ------------------------------------------------------------------------ */
3301 /* Function: nat_tabmove */
3302 /* Returns: Nil */
3303 /* Parameters: nat(I) - pointer to NAT structure */
3304 /* Write Lock: ipf_nat */
3305 /* */
3306 /* This function is only called for TCP/UDP NAT table entries where the */
3307 /* original was placed in the table without hashing on the ports and we now */
3308 /* want to include hashing on port numbers. */
3309 /* ------------------------------------------------------------------------ */
nat_tabmove(nat)3310 static void nat_tabmove(nat)
3311 nat_t *nat;
3312 {
3313 nat_t **natp;
3314 u_int hv;
3315
3316 if (nat->nat_flags & SI_CLONE)
3317 return;
3318
3319 /*
3320 * Remove the NAT entry from the old location
3321 */
3322 if (nat->nat_hnext[0])
3323 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3324 *nat->nat_phnext[0] = nat->nat_hnext[0];
3325 nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3326
3327 if (nat->nat_hnext[1])
3328 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3329 *nat->nat_phnext[1] = nat->nat_hnext[1];
3330 nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3331
3332 /*
3333 * Add into the NAT table in the new position
3334 */
3335 hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3336 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3337 ipf_nattable_sz);
3338 nat->nat_hv[0] = hv;
3339 natp = &nat_table[0][hv];
3340 if (*natp)
3341 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3342 nat->nat_phnext[0] = natp;
3343 nat->nat_hnext[0] = *natp;
3344 *natp = nat;
3345 nat_stats.ns_bucketlen[0][hv]++;
3346
3347 hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3348 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3349 ipf_nattable_sz);
3350 nat->nat_hv[1] = hv;
3351 natp = &nat_table[1][hv];
3352 if (*natp)
3353 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3354 nat->nat_phnext[1] = natp;
3355 nat->nat_hnext[1] = *natp;
3356 *natp = nat;
3357 nat_stats.ns_bucketlen[1][hv]++;
3358 }
3359
3360
3361 /* ------------------------------------------------------------------------ */
3362 /* Function: nat_outlookup */
3363 /* Returns: nat_t* - NULL == no match, */
3364 /* else pointer to matching NAT entry */
3365 /* Parameters: fin(I) - pointer to packet information */
3366 /* flags(I) - NAT flags for this packet */
3367 /* p(I) - protocol for this packet */
3368 /* src(I) - source IP address */
3369 /* dst(I) - destination IP address */
3370 /* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */
3371 /* */
3372 /* Lookup a nat entry based on the source 'real' ip address/port and */
3373 /* destination address/port. We use this lookup when sending a packet out, */
3374 /* we're looking for a table entry, based on the source address. */
3375 /* */
3376 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
3377 /* */
3378 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
3379 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
3380 /* */
3381 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
3382 /* the packet is of said protocol */
3383 /* ------------------------------------------------------------------------ */
nat_outlookup(fin,flags,p,src,dst)3384 nat_t *nat_outlookup(fin, flags, p, src, dst)
3385 fr_info_t *fin;
3386 u_int flags, p;
3387 struct in_addr src , dst;
3388 {
3389 u_short sport, dport;
3390 u_int sflags;
3391 ipnat_t *ipn;
3392 u_32_t srcip;
3393 nat_t *nat;
3394 int nflags;
3395 void *ifp;
3396 u_int hv;
3397
3398 ifp = fin->fin_ifp;
3399 srcip = src.s_addr;
3400 sflags = flags & IPN_TCPUDPICMP;
3401 sport = 0;
3402 dport = 0;
3403
3404 switch (p)
3405 {
3406 case IPPROTO_TCP :
3407 case IPPROTO_UDP :
3408 sport = htons(fin->fin_data[0]);
3409 dport = htons(fin->fin_data[1]);
3410 break;
3411 case IPPROTO_ICMP :
3412 if (flags & IPN_ICMPERR)
3413 sport = fin->fin_data[1];
3414 else
3415 dport = fin->fin_data[1];
3416 break;
3417 default :
3418 break;
3419 }
3420
3421 if ((flags & SI_WILDP) != 0)
3422 goto find_out_wild_ports;
3423
3424 hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3425 hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
3426 nat = nat_table[0][hv];
3427 for (; nat; nat = nat->nat_hnext[0]) {
3428 if (nat->nat_ifps[1] != NULL) {
3429 if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3430 continue;
3431 } else if (ifp != NULL)
3432 nat->nat_ifps[1] = ifp;
3433
3434 nflags = nat->nat_flags;
3435
3436 if (nat->nat_inip.s_addr == srcip &&
3437 nat->nat_oip.s_addr == dst.s_addr &&
3438 (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3439 || (p == nat->nat_p))) {
3440 switch (p)
3441 {
3442 #if 0
3443 case IPPROTO_GRE :
3444 if (nat->nat_call[1] != fin->fin_data[0])
3445 continue;
3446 break;
3447 #endif
3448 case IPPROTO_TCP :
3449 case IPPROTO_UDP :
3450 if (nat->nat_oport != dport)
3451 continue;
3452 if (nat->nat_inport != sport)
3453 continue;
3454 break;
3455 default :
3456 break;
3457 }
3458
3459 ipn = nat->nat_ptr;
3460 if ((ipn != NULL) && (nat->nat_aps != NULL))
3461 if (appr_match(fin, nat) != 0)
3462 continue;
3463 return nat;
3464 }
3465 }
3466
3467 /*
3468 * So if we didn't find it but there are wildcard members in the hash
3469 * table, go back and look for them. We do this search and update here
3470 * because it is modifying the NAT table and we want to do this only
3471 * for the first packet that matches. The exception, of course, is
3472 * for "dummy" (FI_IGNORE) lookups.
3473 */
3474 find_out_wild_ports:
3475 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3476 return NULL;
3477 if (nat_stats.ns_wilds == 0)
3478 return NULL;
3479
3480 RWLOCK_EXIT(&ipf_nat);
3481
3482 hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3483 hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
3484
3485 WRITE_ENTER(&ipf_nat);
3486
3487 nat = nat_table[0][hv];
3488 for (; nat; nat = nat->nat_hnext[0]) {
3489 if (nat->nat_ifps[1] != NULL) {
3490 if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3491 continue;
3492 } else if (ifp != NULL)
3493 nat->nat_ifps[1] = ifp;
3494
3495 if (nat->nat_p != fin->fin_p)
3496 continue;
3497 if ((nat->nat_inip.s_addr != srcip) ||
3498 (nat->nat_oip.s_addr != dst.s_addr))
3499 continue;
3500
3501 nflags = nat->nat_flags;
3502 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3503 continue;
3504
3505 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3506 NAT_OUTBOUND) == 1) {
3507 if ((fin->fin_flx & FI_IGNORE) != 0)
3508 break;
3509 if ((nflags & SI_CLONE) != 0) {
3510 nat = fr_natclone(fin, nat);
3511 if (nat == NULL)
3512 break;
3513 } else {
3514 MUTEX_ENTER(&ipf_nat_new);
3515 nat_stats.ns_wilds--;
3516 MUTEX_EXIT(&ipf_nat_new);
3517 }
3518 nat->nat_inport = sport;
3519 nat->nat_oport = dport;
3520 if (nat->nat_outport == 0)
3521 nat->nat_outport = sport;
3522 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3523 nat_tabmove(nat);
3524 break;
3525 }
3526 }
3527
3528 MUTEX_DOWNGRADE(&ipf_nat);
3529
3530 return nat;
3531 }
3532
3533
3534 /* ------------------------------------------------------------------------ */
3535 /* Function: nat_lookupredir */
3536 /* Returns: nat_t* - NULL == no match, */
3537 /* else pointer to matching NAT entry */
3538 /* Parameters: np(I) - pointer to description of packet to find NAT table */
3539 /* entry for. */
3540 /* */
3541 /* Lookup the NAT tables to search for a matching redirect */
3542 /* The contents of natlookup_t should imitate those found in a packet that */
3543 /* would be translated - ie a packet coming in for RDR or going out for MAP.*/
3544 /* We can do the lookup in one of two ways, imitating an inbound or */
3545 /* outbound packet. By default we assume outbound, unless IPN_IN is set. */
3546 /* For IN, the fields are set as follows: */
3547 /* nl_real* = source information */
3548 /* nl_out* = destination information (translated) */
3549 /* For an out packet, the fields are set like this: */
3550 /* nl_in* = source information (untranslated) */
3551 /* nl_out* = destination information (translated) */
3552 /* ------------------------------------------------------------------------ */
nat_lookupredir(np)3553 nat_t *nat_lookupredir(np)
3554 natlookup_t *np;
3555 {
3556 fr_info_t fi;
3557 nat_t *nat;
3558
3559 bzero((char *)&fi, sizeof(fi));
3560 if (np->nl_flags & IPN_IN) {
3561 fi.fin_data[0] = ntohs(np->nl_realport);
3562 fi.fin_data[1] = ntohs(np->nl_outport);
3563 } else {
3564 fi.fin_data[0] = ntohs(np->nl_inport);
3565 fi.fin_data[1] = ntohs(np->nl_outport);
3566 }
3567 if (np->nl_flags & IPN_TCP)
3568 fi.fin_p = IPPROTO_TCP;
3569 else if (np->nl_flags & IPN_UDP)
3570 fi.fin_p = IPPROTO_UDP;
3571 else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3572 fi.fin_p = IPPROTO_ICMP;
3573
3574 /*
3575 * We can do two sorts of lookups:
3576 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3577 * - default: we have the `in' and `out' address, look for `real'.
3578 */
3579 if (np->nl_flags & IPN_IN) {
3580 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3581 np->nl_realip, np->nl_outip))) {
3582 np->nl_inip = nat->nat_inip;
3583 np->nl_inport = nat->nat_inport;
3584 }
3585 } else {
3586 /*
3587 * If nl_inip is non null, this is a lookup based on the real
3588 * ip address. Else, we use the fake.
3589 */
3590 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3591 np->nl_inip, np->nl_outip))) {
3592
3593 if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3594 fr_info_t fin;
3595 bzero((char *)&fin, sizeof(fin));
3596 fin.fin_p = nat->nat_p;
3597 fin.fin_data[0] = ntohs(nat->nat_outport);
3598 fin.fin_data[1] = ntohs(nat->nat_oport);
3599 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3600 nat->nat_outip,
3601 nat->nat_oip) != NULL) {
3602 np->nl_flags &= ~IPN_FINDFORWARD;
3603 }
3604 }
3605
3606 np->nl_realip = nat->nat_outip;
3607 np->nl_realport = nat->nat_outport;
3608 }
3609 }
3610
3611 return nat;
3612 }
3613
3614
3615 /* ------------------------------------------------------------------------ */
3616 /* Function: nat_match */
3617 /* Returns: int - 0 == no match, 1 == match */
3618 /* Parameters: fin(I) - pointer to packet information */
3619 /* np(I) - pointer to NAT rule */
3620 /* */
3621 /* Pull the matching of a packet against a NAT rule out of that complex */
3622 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3623 /* ------------------------------------------------------------------------ */
nat_match(fin,np)3624 static int nat_match(fin, np)
3625 fr_info_t *fin;
3626 ipnat_t *np;
3627 {
3628 frtuc_t *ft;
3629
3630 if (fin->fin_v != 4)
3631 return 0;
3632
3633 if (np->in_p && fin->fin_p != np->in_p)
3634 return 0;
3635
3636 if (fin->fin_out) {
3637 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3638 return 0;
3639 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3640 ^ ((np->in_flags & IPN_NOTSRC) != 0))
3641 return 0;
3642 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3643 ^ ((np->in_flags & IPN_NOTDST) != 0))
3644 return 0;
3645 } else {
3646 if (!(np->in_redir & NAT_REDIRECT))
3647 return 0;
3648 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3649 ^ ((np->in_flags & IPN_NOTSRC) != 0))
3650 return 0;
3651 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3652 ^ ((np->in_flags & IPN_NOTDST) != 0))
3653 return 0;
3654 }
3655
3656 ft = &np->in_tuc;
3657 if (!(fin->fin_flx & FI_TCPUDP) ||
3658 (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3659 if (ft->ftu_scmp || ft->ftu_dcmp)
3660 return 0;
3661 return 1;
3662 }
3663
3664 return fr_tcpudpchk(fin, ft);
3665 }
3666
3667
3668 /* ------------------------------------------------------------------------ */
3669 /* Function: nat_update */
3670 /* Returns: Nil */
3671 /* Parameters: nat(I) - pointer to NAT structure */
3672 /* np(I) - pointer to NAT rule */
3673 /* */
3674 /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */
3675 /* called with fin_rev updated - i.e. after calling nat_proto(). */
3676 /* ------------------------------------------------------------------------ */
nat_update(fin,nat,np)3677 void nat_update(fin, nat, np)
3678 fr_info_t *fin;
3679 nat_t *nat;
3680 ipnat_t *np;
3681 {
3682 ipftq_t *ifq, *ifq2;
3683 ipftqent_t *tqe;
3684
3685 MUTEX_ENTER(&nat->nat_lock);
3686 tqe = &nat->nat_tqe;
3687 ifq = tqe->tqe_ifq;
3688
3689 /*
3690 * We allow over-riding of NAT timeouts from NAT rules, even for
3691 * TCP, however, if it is TCP and there is no rule timeout set,
3692 * then do not update the timeout here.
3693 */
3694 if (np != NULL)
3695 ifq2 = np->in_tqehead[fin->fin_rev];
3696 else
3697 ifq2 = NULL;
3698
3699 if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3700 u_32_t end, ack;
3701 u_char tcpflags;
3702 tcphdr_t *tcp;
3703 int dsize;
3704
3705 tcp = fin->fin_dp;
3706 tcpflags = tcp->th_flags;
3707 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
3708 ((tcpflags & TH_SYN) ? 1 : 0) +
3709 ((tcpflags & TH_FIN) ? 1 : 0);
3710
3711 ack = ntohl(tcp->th_ack);
3712 end = ntohl(tcp->th_seq) + dsize;
3713
3714 if (SEQ_GT(ack, nat->nat_seqnext[1 - fin->fin_rev]))
3715 nat->nat_seqnext[1 - fin->fin_rev] = ack;
3716
3717 if (nat->nat_seqnext[fin->fin_rev] == 0)
3718 nat->nat_seqnext[fin->fin_rev] = end;
3719
3720 (void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0);
3721 } else {
3722 if (ifq2 == NULL) {
3723 if (nat->nat_p == IPPROTO_UDP)
3724 ifq2 = &nat_udptq;
3725 else if (nat->nat_p == IPPROTO_ICMP)
3726 ifq2 = &nat_icmptq;
3727 else
3728 ifq2 = &nat_iptq;
3729 }
3730
3731 fr_movequeue(tqe, ifq, ifq2);
3732 }
3733 MUTEX_EXIT(&nat->nat_lock);
3734 }
3735
3736
3737 /* ------------------------------------------------------------------------ */
3738 /* Function: fr_checknatout */
3739 /* Returns: int - -1 == packet failed NAT checks so block it, */
3740 /* 0 == no packet translation occurred, */
3741 /* 1 == packet was successfully translated. */
3742 /* Parameters: fin(I) - pointer to packet information */
3743 /* passp(I) - pointer to filtering result flags */
3744 /* */
3745 /* Check to see if an outcoming packet should be changed. ICMP packets are */
3746 /* first checked to see if they match an existing entry (if an error), */
3747 /* otherwise a search of the current NAT table is made. If neither results */
3748 /* in a match then a search for a matching NAT rule is made. Create a new */
3749 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */
3750 /* packet header(s) as required. */
3751 /* ------------------------------------------------------------------------ */
fr_checknatout(fin,passp)3752 int fr_checknatout(fin, passp)
3753 fr_info_t *fin;
3754 u_32_t *passp;
3755 {
3756 struct ifnet *ifp, *sifp;
3757 icmphdr_t *icmp = NULL;
3758 tcphdr_t *tcp = NULL;
3759 int rval, natfailed;
3760 ipnat_t *np = NULL;
3761 u_int nflags = 0;
3762 u_32_t ipa, iph;
3763 int natadd = 1;
3764 frentry_t *fr;
3765 nat_t *nat;
3766
3767 if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
3768 return 0;
3769
3770 natfailed = 0;
3771 fr = fin->fin_fr;
3772 sifp = fin->fin_ifp;
3773 if (fr != NULL) {
3774 ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3775 if ((ifp != NULL) && (ifp != (void *)-1))
3776 fin->fin_ifp = ifp;
3777 }
3778 ifp = fin->fin_ifp;
3779
3780 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3781 switch (fin->fin_p)
3782 {
3783 case IPPROTO_TCP :
3784 nflags = IPN_TCP;
3785 break;
3786 case IPPROTO_UDP :
3787 nflags = IPN_UDP;
3788 break;
3789 case IPPROTO_ICMP :
3790 icmp = fin->fin_dp;
3791
3792 /*
3793 * This is an incoming packet, so the destination is
3794 * the icmp_id and the source port equals 0
3795 */
3796 if (nat_icmpquerytype4(icmp->icmp_type))
3797 nflags = IPN_ICMPQUERY;
3798 break;
3799 default :
3800 break;
3801 }
3802
3803 if ((nflags & IPN_TCPUDP))
3804 tcp = fin->fin_dp;
3805 }
3806
3807 ipa = fin->fin_saddr;
3808
3809 READ_ENTER(&ipf_nat);
3810
3811 if (((fin->fin_flx & FI_ICMPERR) != 0) &&
3812 (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3813 /*EMPTY*/;
3814 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3815 natadd = 0;
3816 else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3817 fin->fin_src, fin->fin_dst))) {
3818 nflags = nat->nat_flags;
3819 } else {
3820 u_32_t hv, msk, nmsk;
3821
3822 /*
3823 * If there is no current entry in the nat table for this IP#,
3824 * create one for it (if there is a matching rule).
3825 */
3826 RWLOCK_EXIT(&ipf_nat);
3827 msk = 0xffffffff;
3828 nmsk = nat_masks;
3829 WRITE_ENTER(&ipf_nat);
3830 maskloop:
3831 iph = ipa & htonl(msk);
3832 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
3833 for (np = nat_rules[hv]; np; np = np->in_mnext)
3834 {
3835 if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3836 continue;
3837 if (np->in_v != fin->fin_v)
3838 continue;
3839 if (np->in_p && (np->in_p != fin->fin_p))
3840 continue;
3841 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3842 continue;
3843 if (np->in_flags & IPN_FILTER) {
3844 if (!nat_match(fin, np))
3845 continue;
3846 } else if ((ipa & np->in_inmsk) != np->in_inip)
3847 continue;
3848
3849 if ((fr != NULL) &&
3850 !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3851 continue;
3852
3853 if (*np->in_plabel != '\0') {
3854 if (((np->in_flags & IPN_FILTER) == 0) &&
3855 (np->in_dport != tcp->th_dport))
3856 continue;
3857 if (appr_ok(fin, tcp, np) == 0)
3858 continue;
3859 }
3860
3861 if ((nat = nat_new(fin, np, NULL, nflags,
3862 NAT_OUTBOUND))) {
3863 np->in_hits++;
3864 break;
3865 } else
3866 natfailed = -1;
3867 }
3868 if ((np == NULL) && (nmsk != 0)) {
3869 while (nmsk) {
3870 msk <<= 1;
3871 if (nmsk & 0x80000000)
3872 break;
3873 nmsk <<= 1;
3874 }
3875 if (nmsk != 0) {
3876 nmsk <<= 1;
3877 goto maskloop;
3878 }
3879 }
3880 MUTEX_DOWNGRADE(&ipf_nat);
3881 }
3882
3883 if (nat != NULL) {
3884 rval = fr_natout(fin, nat, natadd, nflags);
3885 if (rval == 1) {
3886 MUTEX_ENTER(&nat->nat_lock);
3887 nat->nat_ref++;
3888 MUTEX_EXIT(&nat->nat_lock);
3889 nat->nat_touched = fr_ticks;
3890 fin->fin_nat = nat;
3891 }
3892 } else
3893 rval = natfailed;
3894 RWLOCK_EXIT(&ipf_nat);
3895
3896 if (rval == -1) {
3897 if (passp != NULL)
3898 *passp = FR_BLOCK;
3899 fin->fin_flx |= FI_BADNAT;
3900 }
3901 fin->fin_ifp = sifp;
3902 return rval;
3903 }
3904
3905 /* ------------------------------------------------------------------------ */
3906 /* Function: fr_natout */
3907 /* Returns: int - -1 == packet failed NAT checks so block it, */
3908 /* 1 == packet was successfully translated. */
3909 /* Parameters: fin(I) - pointer to packet information */
3910 /* nat(I) - pointer to NAT structure */
3911 /* natadd(I) - flag indicating if it is safe to add frag cache */
3912 /* nflags(I) - NAT flags set for this packet */
3913 /* */
3914 /* Translate a packet coming "out" on an interface. */
3915 /* ------------------------------------------------------------------------ */
fr_natout(fin,nat,natadd,nflags)3916 int fr_natout(fin, nat, natadd, nflags)
3917 fr_info_t *fin;
3918 nat_t *nat;
3919 int natadd;
3920 u_32_t nflags;
3921 {
3922 icmphdr_t *icmp;
3923 u_short *csump;
3924 tcphdr_t *tcp;
3925 ipnat_t *np;
3926 int i;
3927
3928 tcp = NULL;
3929 icmp = NULL;
3930 csump = NULL;
3931 np = nat->nat_ptr;
3932
3933 if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
3934 (void) fr_nat_newfrag(fin, 0, nat);
3935
3936 MUTEX_ENTER(&nat->nat_lock);
3937 nat->nat_bytes[1] += fin->fin_plen;
3938 nat->nat_pkts[1]++;
3939 MUTEX_EXIT(&nat->nat_lock);
3940
3941 /*
3942 * Fix up checksums, not by recalculating them, but
3943 * simply computing adjustments.
3944 * This is only done for STREAMS based IP implementations where the
3945 * checksum has already been calculated by IP. In all other cases,
3946 * IPFilter is called before the checksum needs calculating so there
3947 * is no call to modify whatever is in the header now.
3948 */
3949 if (fin->fin_v == 4) {
3950 if (nflags == IPN_ICMPERR) {
3951 u_32_t s1, s2, sumd;
3952
3953 s1 = LONG_SUM(ntohl(fin->fin_saddr));
3954 s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3955 CALC_SUMD(s1, s2, sumd);
3956 fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd);
3957 }
3958 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3959 defined(linux) || defined(BRIDGE_IPF) || defined(__FreeBSD__)
3960 else {
3961 /*
3962 * Strictly speaking, this isn't necessary on BSD
3963 * kernels because they do checksum calculation after
3964 * this code has run BUT if ipfilter is being used
3965 * to do NAT as a bridge, that code doesn't exist.
3966 */
3967 if (nat->nat_dir == NAT_OUTBOUND)
3968 fix_outcksum(fin, &fin->fin_ip->ip_sum,
3969 nat->nat_ipsumd);
3970 else
3971 fix_incksum(fin, &fin->fin_ip->ip_sum,
3972 nat->nat_ipsumd);
3973 }
3974 #endif
3975 }
3976
3977 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3978 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3979 tcp = fin->fin_dp;
3980
3981 tcp->th_sport = nat->nat_outport;
3982 fin->fin_data[0] = ntohs(nat->nat_outport);
3983 }
3984
3985 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3986 icmp = fin->fin_dp;
3987 icmp->icmp_id = nat->nat_outport;
3988 }
3989
3990 csump = nat_proto(fin, nat, nflags);
3991 }
3992
3993 fin->fin_ip->ip_src = nat->nat_outip;
3994
3995 nat_update(fin, nat, np);
3996
3997 /*
3998 * The above comments do not hold for layer 4 (or higher) checksums...
3999 */
4000 if (csump != NULL) {
4001 if (nat->nat_dir == NAT_OUTBOUND)
4002 fix_outcksum(fin, csump, nat->nat_sumd[1]);
4003 else
4004 fix_incksum(fin, csump, nat->nat_sumd[1]);
4005 }
4006 #ifdef IPFILTER_SYNC
4007 ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4008 #endif
4009 /* ------------------------------------------------------------- */
4010 /* A few quick notes: */
4011 /* Following are test conditions prior to calling the */
4012 /* appr_check routine. */
4013 /* */
4014 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
4015 /* with a redirect rule, we attempt to match the packet's */
4016 /* source port against in_dport, otherwise we'd compare the */
4017 /* packet's destination. */
4018 /* ------------------------------------------------------------- */
4019 if ((np != NULL) && (np->in_apr != NULL)) {
4020 i = appr_check(fin, nat);
4021 if (i == 0)
4022 i = 1;
4023 } else
4024 i = 1;
4025 ATOMIC_INCL(nat_stats.ns_mapped[1]);
4026 fin->fin_flx |= FI_NATED;
4027 return i;
4028 }
4029
4030
4031 /* ------------------------------------------------------------------------ */
4032 /* Function: fr_checknatin */
4033 /* Returns: int - -1 == packet failed NAT checks so block it, */
4034 /* 0 == no packet translation occurred, */
4035 /* 1 == packet was successfully translated. */
4036 /* Parameters: fin(I) - pointer to packet information */
4037 /* passp(I) - pointer to filtering result flags */
4038 /* */
4039 /* Check to see if an incoming packet should be changed. ICMP packets are */
4040 /* first checked to see if they match an existing entry (if an error), */
4041 /* otherwise a search of the current NAT table is made. If neither results */
4042 /* in a match then a search for a matching NAT rule is made. Create a new */
4043 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */
4044 /* packet header(s) as required. */
4045 /* ------------------------------------------------------------------------ */
fr_checknatin(fin,passp)4046 int fr_checknatin(fin, passp)
4047 fr_info_t *fin;
4048 u_32_t *passp;
4049 {
4050 u_int nflags, natadd;
4051 int rval, natfailed;
4052 struct ifnet *ifp;
4053 struct in_addr in;
4054 icmphdr_t *icmp;
4055 tcphdr_t *tcp;
4056 u_short dport;
4057 ipnat_t *np;
4058 nat_t *nat;
4059 u_32_t iph;
4060
4061 if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
4062 return 0;
4063
4064 tcp = NULL;
4065 icmp = NULL;
4066 dport = 0;
4067 natadd = 1;
4068 nflags = 0;
4069 natfailed = 0;
4070 ifp = fin->fin_ifp;
4071
4072 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4073 switch (fin->fin_p)
4074 {
4075 case IPPROTO_TCP :
4076 nflags = IPN_TCP;
4077 break;
4078 case IPPROTO_UDP :
4079 nflags = IPN_UDP;
4080 break;
4081 case IPPROTO_ICMP :
4082 icmp = fin->fin_dp;
4083
4084 /*
4085 * This is an incoming packet, so the destination is
4086 * the icmp_id and the source port equals 0
4087 */
4088 if (nat_icmpquerytype4(icmp->icmp_type)) {
4089 nflags = IPN_ICMPQUERY;
4090 dport = icmp->icmp_id;
4091 } break;
4092 default :
4093 break;
4094 }
4095
4096 if ((nflags & IPN_TCPUDP)) {
4097 tcp = fin->fin_dp;
4098 dport = tcp->th_dport;
4099 }
4100 }
4101
4102 in = fin->fin_dst;
4103
4104 READ_ENTER(&ipf_nat);
4105
4106 if (((fin->fin_flx & FI_ICMPERR) != 0) &&
4107 (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4108 /*EMPTY*/;
4109 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4110 natadd = 0;
4111 else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4112 fin->fin_src, in))) {
4113 nflags = nat->nat_flags;
4114 } else {
4115 u_32_t hv, msk, rmsk;
4116
4117 RWLOCK_EXIT(&ipf_nat);
4118 rmsk = rdr_masks;
4119 msk = 0xffffffff;
4120 WRITE_ENTER(&ipf_nat);
4121 /*
4122 * If there is no current entry in the nat table for this IP#,
4123 * create one for it (if there is a matching rule).
4124 */
4125 maskloop:
4126 iph = in.s_addr & htonl(msk);
4127 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
4128 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
4129 if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4130 continue;
4131 if (np->in_v != fin->fin_v)
4132 continue;
4133 if (np->in_p && (np->in_p != fin->fin_p))
4134 continue;
4135 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4136 continue;
4137 if (np->in_flags & IPN_FILTER) {
4138 if (!nat_match(fin, np))
4139 continue;
4140 } else {
4141 if ((in.s_addr & np->in_outmsk) != np->in_outip)
4142 continue;
4143 if (np->in_pmin &&
4144 ((ntohs(np->in_pmax) < ntohs(dport)) ||
4145 (ntohs(dport) < ntohs(np->in_pmin))))
4146 continue;
4147 }
4148
4149 if (*np->in_plabel != '\0') {
4150 if (!appr_ok(fin, tcp, np)) {
4151 continue;
4152 }
4153 }
4154
4155 nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4156 if (nat != NULL) {
4157 np->in_hits++;
4158 break;
4159 } else
4160 natfailed = -1;
4161 }
4162
4163 if ((np == NULL) && (rmsk != 0)) {
4164 while (rmsk) {
4165 msk <<= 1;
4166 if (rmsk & 0x80000000)
4167 break;
4168 rmsk <<= 1;
4169 }
4170 if (rmsk != 0) {
4171 rmsk <<= 1;
4172 goto maskloop;
4173 }
4174 }
4175 MUTEX_DOWNGRADE(&ipf_nat);
4176 }
4177 if (nat != NULL) {
4178 rval = fr_natin(fin, nat, natadd, nflags);
4179 if (rval == 1) {
4180 MUTEX_ENTER(&nat->nat_lock);
4181 nat->nat_ref++;
4182 MUTEX_EXIT(&nat->nat_lock);
4183 nat->nat_touched = fr_ticks;
4184 fin->fin_nat = nat;
4185 }
4186 } else
4187 rval = natfailed;
4188 RWLOCK_EXIT(&ipf_nat);
4189
4190 if (rval == -1) {
4191 if (passp != NULL)
4192 *passp = FR_BLOCK;
4193 fin->fin_flx |= FI_BADNAT;
4194 }
4195 return rval;
4196 }
4197
4198
4199 /* ------------------------------------------------------------------------ */
4200 /* Function: fr_natin */
4201 /* Returns: int - -1 == packet failed NAT checks so block it, */
4202 /* 1 == packet was successfully translated. */
4203 /* Parameters: fin(I) - pointer to packet information */
4204 /* nat(I) - pointer to NAT structure */
4205 /* natadd(I) - flag indicating if it is safe to add frag cache */
4206 /* nflags(I) - NAT flags set for this packet */
4207 /* Locks Held: ipf_nat (READ) */
4208 /* */
4209 /* Translate a packet coming "in" on an interface. */
4210 /* ------------------------------------------------------------------------ */
fr_natin(fin,nat,natadd,nflags)4211 int fr_natin(fin, nat, natadd, nflags)
4212 fr_info_t *fin;
4213 nat_t *nat;
4214 int natadd;
4215 u_32_t nflags;
4216 {
4217 icmphdr_t *icmp;
4218 u_short *csump;
4219 tcphdr_t *tcp;
4220 ipnat_t *np;
4221 int i;
4222
4223 tcp = NULL;
4224 csump = NULL;
4225 np = nat->nat_ptr;
4226 fin->fin_fr = nat->nat_fr;
4227
4228 if (np != NULL) {
4229 if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4230 (void) fr_nat_newfrag(fin, 0, nat);
4231
4232 /* ------------------------------------------------------------- */
4233 /* A few quick notes: */
4234 /* Following are test conditions prior to calling the */
4235 /* appr_check routine. */
4236 /* */
4237 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
4238 /* with a map rule, we attempt to match the packet's */
4239 /* source port against in_dport, otherwise we'd compare the */
4240 /* packet's destination. */
4241 /* ------------------------------------------------------------- */
4242 if (np->in_apr != NULL) {
4243 i = appr_check(fin, nat);
4244 if (i == -1) {
4245 return -1;
4246 }
4247 }
4248 }
4249
4250 #ifdef IPFILTER_SYNC
4251 ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4252 #endif
4253
4254 MUTEX_ENTER(&nat->nat_lock);
4255 nat->nat_bytes[0] += fin->fin_plen;
4256 nat->nat_pkts[0]++;
4257 MUTEX_EXIT(&nat->nat_lock);
4258
4259 fin->fin_ip->ip_dst = nat->nat_inip;
4260 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4261 if (nflags & IPN_TCPUDP)
4262 tcp = fin->fin_dp;
4263
4264 /*
4265 * Fix up checksums, not by recalculating them, but
4266 * simply computing adjustments.
4267 * Why only do this for some platforms on inbound packets ?
4268 * Because for those that it is done, IP processing is yet to happen
4269 * and so the IPv4 header checksum has not yet been evaluated.
4270 * Perhaps it should always be done for the benefit of things like
4271 * fast forwarding (so that it doesn't need to be recomputed) but with
4272 * header checksum offloading, perhaps it is a moot point.
4273 */
4274 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4275 defined(__osf__) || defined(linux)
4276 if (nat->nat_dir == NAT_OUTBOUND)
4277 fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4278 else
4279 fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4280 #endif
4281
4282 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4283 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4284 tcp->th_dport = nat->nat_inport;
4285 fin->fin_data[1] = ntohs(nat->nat_inport);
4286 }
4287
4288
4289 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4290 icmp = fin->fin_dp;
4291
4292 icmp->icmp_id = nat->nat_inport;
4293 }
4294
4295 csump = nat_proto(fin, nat, nflags);
4296 }
4297
4298 nat_update(fin, nat, np);
4299
4300 /*
4301 * The above comments do not hold for layer 4 (or higher) checksums...
4302 */
4303 if (csump != NULL) {
4304 if (nat->nat_dir == NAT_OUTBOUND)
4305 fix_incksum(fin, csump, nat->nat_sumd[0]);
4306 else
4307 fix_outcksum(fin, csump, nat->nat_sumd[0]);
4308 }
4309 ATOMIC_INCL(nat_stats.ns_mapped[0]);
4310 fin->fin_flx |= FI_NATED;
4311 if (np != NULL && np->in_tag.ipt_num[0] != 0)
4312 fin->fin_nattag = &np->in_tag;
4313 return 1;
4314 }
4315
4316
4317 /* ------------------------------------------------------------------------ */
4318 /* Function: nat_proto */
4319 /* Returns: u_short* - pointer to transport header checksum to update, */
4320 /* NULL if the transport protocol is not recognised */
4321 /* as needing a checksum update. */
4322 /* Parameters: fin(I) - pointer to packet information */
4323 /* nat(I) - pointer to NAT structure */
4324 /* nflags(I) - NAT flags set for this packet */
4325 /* */
4326 /* Return the pointer to the checksum field for each protocol so understood.*/
4327 /* If support for making other changes to a protocol header is required, */
4328 /* that is not strictly 'address' translation, such as clamping the MSS in */
4329 /* TCP down to a specific value, then do it from here. */
4330 /* ------------------------------------------------------------------------ */
nat_proto(fin,nat,nflags)4331 u_short *nat_proto(fin, nat, nflags)
4332 fr_info_t *fin;
4333 nat_t *nat;
4334 u_int nflags;
4335 {
4336 icmphdr_t *icmp;
4337 u_short *csump;
4338 tcphdr_t *tcp;
4339 udphdr_t *udp;
4340
4341 csump = NULL;
4342 if (fin->fin_out == 0) {
4343 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4344 } else {
4345 fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4346 }
4347
4348 switch (fin->fin_p)
4349 {
4350 case IPPROTO_TCP :
4351 tcp = fin->fin_dp;
4352
4353 csump = &tcp->th_sum;
4354
4355 /*
4356 * Do a MSS CLAMPING on a SYN packet,
4357 * only deal IPv4 for now.
4358 */
4359 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4360 nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
4361
4362 break;
4363
4364 case IPPROTO_UDP :
4365 udp = fin->fin_dp;
4366
4367 if (udp->uh_sum)
4368 csump = &udp->uh_sum;
4369 break;
4370
4371 case IPPROTO_ICMP :
4372 icmp = fin->fin_dp;
4373
4374 if ((nflags & IPN_ICMPQUERY) != 0) {
4375 if (icmp->icmp_cksum != 0)
4376 csump = &icmp->icmp_cksum;
4377 }
4378 break;
4379 }
4380 return csump;
4381 }
4382
4383
4384 /* ------------------------------------------------------------------------ */
4385 /* Function: fr_natunload */
4386 /* Returns: Nil */
4387 /* Parameters: Nil */
4388 /* */
4389 /* Free all memory used by NAT structures allocated at runtime. */
4390 /* ------------------------------------------------------------------------ */
fr_natunload()4391 void fr_natunload()
4392 {
4393 ipftq_t *ifq, *ifqnext;
4394
4395 (void) nat_clearlist();
4396 (void) nat_flushtable();
4397
4398 /*
4399 * Proxy timeout queues are not cleaned here because although they
4400 * exist on the NAT list, appr_unload is called after fr_natunload
4401 * and the proxies actually are responsible for them being created.
4402 * Should the proxy timeouts have their own list? There's no real
4403 * justification as this is the only complication.
4404 */
4405 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4406 ifqnext = ifq->ifq_next;
4407 if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4408 (fr_deletetimeoutqueue(ifq) == 0))
4409 fr_freetimeoutqueue(ifq);
4410 }
4411
4412 if (nat_table[0] != NULL) {
4413 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
4414 nat_table[0] = NULL;
4415 }
4416 if (nat_table[1] != NULL) {
4417 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
4418 nat_table[1] = NULL;
4419 }
4420 if (nat_rules != NULL) {
4421 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
4422 nat_rules = NULL;
4423 }
4424 if (rdr_rules != NULL) {
4425 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
4426 rdr_rules = NULL;
4427 }
4428 if (ipf_hm_maptable != NULL) {
4429 KFREES(ipf_hm_maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
4430 ipf_hm_maptable = NULL;
4431 }
4432 if (nat_stats.ns_bucketlen[0] != NULL) {
4433 KFREES(nat_stats.ns_bucketlen[0],
4434 sizeof(u_long *) * ipf_nattable_sz);
4435 nat_stats.ns_bucketlen[0] = NULL;
4436 }
4437 if (nat_stats.ns_bucketlen[1] != NULL) {
4438 KFREES(nat_stats.ns_bucketlen[1],
4439 sizeof(u_long *) * ipf_nattable_sz);
4440 nat_stats.ns_bucketlen[1] = NULL;
4441 }
4442
4443 if (fr_nat_maxbucket_reset == 1)
4444 fr_nat_maxbucket = 0;
4445
4446 if (fr_nat_init == 1) {
4447 fr_nat_init = 0;
4448 fr_sttab_destroy(nat_tqb);
4449
4450 RW_DESTROY(&ipf_natfrag);
4451 RW_DESTROY(&ipf_nat);
4452
4453 MUTEX_DESTROY(&ipf_nat_new);
4454 MUTEX_DESTROY(&ipf_natio);
4455
4456 MUTEX_DESTROY(&nat_udptq.ifq_lock);
4457 MUTEX_DESTROY(&nat_icmptq.ifq_lock);
4458 MUTEX_DESTROY(&nat_iptq.ifq_lock);
4459 }
4460 }
4461
4462
4463 /* ------------------------------------------------------------------------ */
4464 /* Function: fr_natexpire */
4465 /* Returns: Nil */
4466 /* Parameters: Nil */
4467 /* */
4468 /* Check all of the timeout queues for entries at the top which need to be */
4469 /* expired. */
4470 /* ------------------------------------------------------------------------ */
fr_natexpire()4471 void fr_natexpire()
4472 {
4473 ipftq_t *ifq, *ifqnext;
4474 ipftqent_t *tqe, *tqn;
4475 int i;
4476 SPL_INT(s);
4477
4478 SPL_NET(s);
4479 WRITE_ENTER(&ipf_nat);
4480 for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4481 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4482 if (tqe->tqe_die > fr_ticks)
4483 break;
4484 tqn = tqe->tqe_next;
4485 nat_delete(tqe->tqe_parent, NL_EXPIRE);
4486 }
4487 }
4488
4489 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4490 ifqnext = ifq->ifq_next;
4491
4492 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4493 if (tqe->tqe_die > fr_ticks)
4494 break;
4495 tqn = tqe->tqe_next;
4496 nat_delete(tqe->tqe_parent, NL_EXPIRE);
4497 }
4498 }
4499
4500 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4501 ifqnext = ifq->ifq_next;
4502
4503 if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4504 (ifq->ifq_ref == 0)) {
4505 fr_freetimeoutqueue(ifq);
4506 }
4507 }
4508
4509 if (fr_nat_doflush != 0) {
4510 nat_extraflush(2);
4511 fr_nat_doflush = 0;
4512 }
4513
4514 RWLOCK_EXIT(&ipf_nat);
4515 SPL_X(s);
4516 }
4517
4518
4519 /* ------------------------------------------------------------------------ */
4520 /* Function: fr_natsync */
4521 /* Returns: Nil */
4522 /* Parameters: ifp(I) - pointer to network interface */
4523 /* */
4524 /* Walk through all of the currently active NAT sessions, looking for those */
4525 /* which need to have their translated address updated. */
4526 /* ------------------------------------------------------------------------ */
fr_natsync(ifp)4527 void fr_natsync(ifp)
4528 void *ifp;
4529 {
4530 u_32_t sum1, sum2, sumd;
4531 struct in_addr in;
4532 ipnat_t *n;
4533 nat_t *nat;
4534 void *ifp2;
4535 SPL_INT(s);
4536
4537 if (fr_running <= 0)
4538 return;
4539
4540 /*
4541 * Change IP addresses for NAT sessions for any protocol except TCP
4542 * since it will break the TCP connection anyway. The only rules
4543 * which will get changed are those which are "map ... -> 0/32",
4544 * where the rule specifies the address is taken from the interface.
4545 */
4546 SPL_NET(s);
4547 WRITE_ENTER(&ipf_nat);
4548
4549 if (fr_running <= 0) {
4550 RWLOCK_EXIT(&ipf_nat);
4551 return;
4552 }
4553
4554 for (nat = nat_instances; nat; nat = nat->nat_next) {
4555 if ((nat->nat_flags & IPN_TCP) != 0)
4556 continue;
4557 n = nat->nat_ptr;
4558 if ((n == NULL) ||
4559 (n->in_outip != 0) || (n->in_outmsk != 0xffffffff))
4560 continue;
4561 if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
4562 (ifp == nat->nat_ifps[1]))) {
4563 nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4);
4564 if (nat->nat_ifnames[1][0] != '\0') {
4565 nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
4566 4);
4567 } else
4568 nat->nat_ifps[1] = nat->nat_ifps[0];
4569 ifp2 = nat->nat_ifps[0];
4570 if (ifp2 == NULL)
4571 continue;
4572
4573 /*
4574 * Change the map-to address to be the same as the
4575 * new one.
4576 */
4577 sum1 = nat->nat_outip.s_addr;
4578 if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1)
4579 nat->nat_outip = in;
4580 sum2 = nat->nat_outip.s_addr;
4581
4582 if (sum1 == sum2)
4583 continue;
4584 /*
4585 * Readjust the checksum adjustment to take into
4586 * account the new IP#.
4587 */
4588 CALC_SUMD(sum1, sum2, sumd);
4589 /* XXX - dont change for TCP when solaris does
4590 * hardware checksumming.
4591 */
4592 sumd += nat->nat_sumd[0];
4593 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4594 nat->nat_sumd[1] = nat->nat_sumd[0];
4595 }
4596 }
4597
4598 for (n = nat_list; (n != NULL); n = n->in_next) {
4599 if ((ifp == NULL) || (n->in_ifps[0] == ifp))
4600 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
4601 if ((ifp == NULL) || (n->in_ifps[1] == ifp))
4602 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
4603 }
4604 RWLOCK_EXIT(&ipf_nat);
4605 SPL_X(s);
4606 }
4607
4608
4609 /* ------------------------------------------------------------------------ */
4610 /* Function: nat_icmpquerytype4 */
4611 /* Returns: int - 1 == success, 0 == failure */
4612 /* Parameters: icmptype(I) - ICMP type number */
4613 /* */
4614 /* Tests to see if the ICMP type number passed is a query/response type or */
4615 /* not. */
4616 /* ------------------------------------------------------------------------ */
nat_icmpquerytype4(icmptype)4617 static int nat_icmpquerytype4(icmptype)
4618 int icmptype;
4619 {
4620
4621 /*
4622 * For the ICMP query NAT code, it is essential that both the query
4623 * and the reply match on the NAT rule. Because the NAT structure
4624 * does not keep track of the icmptype, and a single NAT structure
4625 * is used for all icmp types with the same src, dest and id, we
4626 * simply define the replies as queries as well. The funny thing is,
4627 * altough it seems silly to call a reply a query, this is exactly
4628 * as it is defined in the IPv4 specification
4629 */
4630
4631 switch (icmptype)
4632 {
4633
4634 case ICMP_ECHOREPLY:
4635 case ICMP_ECHO:
4636 /* route aedvertisement/solliciation is currently unsupported: */
4637 /* it would require rewriting the ICMP data section */
4638 case ICMP_TSTAMP:
4639 case ICMP_TSTAMPREPLY:
4640 case ICMP_IREQ:
4641 case ICMP_IREQREPLY:
4642 case ICMP_MASKREQ:
4643 case ICMP_MASKREPLY:
4644 return 1;
4645 default:
4646 return 0;
4647 }
4648 }
4649
4650
4651 /* ------------------------------------------------------------------------ */
4652 /* Function: nat_log */
4653 /* Returns: Nil */
4654 /* Parameters: nat(I) - pointer to NAT structure */
4655 /* type(I) - type of log entry to create */
4656 /* */
4657 /* Creates a NAT log entry. */
4658 /* ------------------------------------------------------------------------ */
nat_log(nat,type)4659 void nat_log(nat, type)
4660 struct nat *nat;
4661 u_int type;
4662 {
4663 #ifdef IPFILTER_LOG
4664 # ifndef LARGE_NAT
4665 struct ipnat *np;
4666 int rulen;
4667 # endif
4668 struct natlog natl;
4669 void *items[1];
4670 size_t sizes[1];
4671 int types[1];
4672
4673 natl.nl_inip = nat->nat_inip;
4674 natl.nl_outip = nat->nat_outip;
4675 natl.nl_origip = nat->nat_oip;
4676 natl.nl_bytes[0] = nat->nat_bytes[0];
4677 natl.nl_bytes[1] = nat->nat_bytes[1];
4678 natl.nl_pkts[0] = nat->nat_pkts[0];
4679 natl.nl_pkts[1] = nat->nat_pkts[1];
4680 natl.nl_origport = nat->nat_oport;
4681 natl.nl_inport = nat->nat_inport;
4682 natl.nl_outport = nat->nat_outport;
4683 natl.nl_p = nat->nat_p;
4684 natl.nl_type = type;
4685 natl.nl_rule = -1;
4686 # ifndef LARGE_NAT
4687 if (nat->nat_ptr != NULL) {
4688 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
4689 if (np == nat->nat_ptr) {
4690 natl.nl_rule = rulen;
4691 break;
4692 }
4693 }
4694 # endif
4695 items[0] = &natl;
4696 sizes[0] = sizeof(natl);
4697 types[0] = 0;
4698
4699 (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
4700 #endif
4701 }
4702
4703
4704 #if defined(__OpenBSD__)
4705 /* ------------------------------------------------------------------------ */
4706 /* Function: nat_ifdetach */
4707 /* Returns: Nil */
4708 /* Parameters: ifp(I) - pointer to network interface */
4709 /* */
4710 /* Compatibility interface for OpenBSD to trigger the correct updating of */
4711 /* interface references within IPFilter. */
4712 /* ------------------------------------------------------------------------ */
nat_ifdetach(ifp)4713 void nat_ifdetach(ifp)
4714 void *ifp;
4715 {
4716 frsync(ifp);
4717 return;
4718 }
4719 #endif
4720
4721
4722 /* ------------------------------------------------------------------------ */
4723 /* Function: fr_ipnatderef */
4724 /* Returns: Nil */
4725 /* Parameters: isp(I) - pointer to pointer to NAT rule */
4726 /* Write Locks: ipf_nat */
4727 /* */
4728 /* ------------------------------------------------------------------------ */
fr_ipnatderef(inp)4729 void fr_ipnatderef(inp)
4730 ipnat_t **inp;
4731 {
4732 ipnat_t *in;
4733
4734 in = *inp;
4735 *inp = NULL;
4736 in->in_space++;
4737 in->in_use--;
4738 if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4739 if (in->in_apr)
4740 appr_free(in->in_apr);
4741 MUTEX_DESTROY(&in->in_lock);
4742 KFREE(in);
4743 nat_stats.ns_rules--;
4744 #if SOLARIS && !defined(_INET_IP_STACK_H)
4745 if (nat_stats.ns_rules == 0)
4746 pfil_delayed_copy = 1;
4747 #endif
4748 }
4749 }
4750
4751
4752 /* ------------------------------------------------------------------------ */
4753 /* Function: fr_natderef */
4754 /* Returns: Nil */
4755 /* Parameters: isp(I) - pointer to pointer to NAT table entry */
4756 /* */
4757 /* Decrement the reference counter for this NAT table entry and free it if */
4758 /* there are no more things using it. */
4759 /* */
4760 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4761 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
4762 /* incremented. If nat_ref == 1 then we shouldn't decrement it here */
4763 /* because nat_delete() will do that and send nat_ref to -1. */
4764 /* */
4765 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
4766 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
4767 /* ------------------------------------------------------------------------ */
fr_natderef(natp)4768 void fr_natderef(natp)
4769 nat_t **natp;
4770 {
4771 nat_t *nat;
4772
4773 nat = *natp;
4774 *natp = NULL;
4775
4776 MUTEX_ENTER(&nat->nat_lock);
4777 if (nat->nat_ref > 1) {
4778 nat->nat_ref--;
4779 MUTEX_EXIT(&nat->nat_lock);
4780 return;
4781 }
4782 MUTEX_EXIT(&nat->nat_lock);
4783
4784 WRITE_ENTER(&ipf_nat);
4785 nat_delete(nat, NL_EXPIRE);
4786 RWLOCK_EXIT(&ipf_nat);
4787 }
4788
4789
4790 /* ------------------------------------------------------------------------ */
4791 /* Function: fr_natclone */
4792 /* Returns: ipstate_t* - NULL == cloning failed, */
4793 /* else pointer to new state structure */
4794 /* Parameters: fin(I) - pointer to packet information */
4795 /* is(I) - pointer to master state structure */
4796 /* Write Lock: ipf_nat */
4797 /* */
4798 /* Create a "duplcate" state table entry from the master. */
4799 /* ------------------------------------------------------------------------ */
fr_natclone(fin,nat)4800 static nat_t *fr_natclone(fin, nat)
4801 fr_info_t *fin;
4802 nat_t *nat;
4803 {
4804 frentry_t *fr;
4805 nat_t *clone;
4806 ipnat_t *np;
4807
4808 KMALLOC(clone, nat_t *);
4809 if (clone == NULL)
4810 return NULL;
4811 bcopy((char *)nat, (char *)clone, sizeof(*clone));
4812
4813 MUTEX_NUKE(&clone->nat_lock);
4814
4815 clone->nat_aps = NULL;
4816 /*
4817 * Initialize all these so that nat_delete() doesn't cause a crash.
4818 */
4819 clone->nat_tqe.tqe_pnext = NULL;
4820 clone->nat_tqe.tqe_next = NULL;
4821 clone->nat_tqe.tqe_ifq = NULL;
4822 clone->nat_tqe.tqe_parent = clone;
4823
4824 clone->nat_flags &= ~SI_CLONE;
4825 clone->nat_flags |= SI_CLONED;
4826
4827 if (clone->nat_hm)
4828 clone->nat_hm->hm_ref++;
4829
4830 if (nat_insert(clone, fin->fin_rev) == -1) {
4831 KFREE(clone);
4832 return NULL;
4833 }
4834 np = clone->nat_ptr;
4835 if (np != NULL) {
4836 if (nat_logging)
4837 nat_log(clone, (u_int)np->in_redir);
4838 np->in_use++;
4839 }
4840 fr = clone->nat_fr;
4841 if (fr != NULL) {
4842 MUTEX_ENTER(&fr->fr_lock);
4843 fr->fr_ref++;
4844 MUTEX_EXIT(&fr->fr_lock);
4845 }
4846
4847 /*
4848 * Because the clone is created outside the normal loop of things and
4849 * TCP has special needs in terms of state, initialise the timeout
4850 * state of the new NAT from here.
4851 */
4852 if (clone->nat_p == IPPROTO_TCP) {
4853 (void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb,
4854 clone->nat_flags);
4855 }
4856 #ifdef IPFILTER_SYNC
4857 clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4858 #endif
4859 if (nat_logging)
4860 nat_log(clone, NL_CLONE);
4861 return clone;
4862 }
4863
4864
4865 /* ------------------------------------------------------------------------ */
4866 /* Function: nat_wildok */
4867 /* Returns: int - 1 == packet's ports match wildcards */
4868 /* 0 == packet's ports don't match wildcards */
4869 /* Parameters: nat(I) - NAT entry */
4870 /* sport(I) - source port */
4871 /* dport(I) - destination port */
4872 /* flags(I) - wildcard flags */
4873 /* dir(I) - packet direction */
4874 /* */
4875 /* Use NAT entry and packet direction to determine which combination of */
4876 /* wildcard flags should be used. */
4877 /* ------------------------------------------------------------------------ */
nat_wildok(nat,sport,dport,flags,dir)4878 static int nat_wildok(nat, sport, dport, flags, dir)
4879 nat_t *nat;
4880 int sport;
4881 int dport;
4882 int flags;
4883 int dir;
4884 {
4885 /*
4886 * When called by dir is set to
4887 * nat_inlookup NAT_INBOUND (0)
4888 * nat_outlookup NAT_OUTBOUND (1)
4889 *
4890 * We simply combine the packet's direction in dir with the original
4891 * "intended" direction of that NAT entry in nat->nat_dir to decide
4892 * which combination of wildcard flags to allow.
4893 */
4894
4895 switch ((dir << 1) | nat->nat_dir)
4896 {
4897 case 3: /* outbound packet / outbound entry */
4898 if (((nat->nat_inport == sport) ||
4899 (flags & SI_W_SPORT)) &&
4900 ((nat->nat_oport == dport) ||
4901 (flags & SI_W_DPORT)))
4902 return 1;
4903 break;
4904 case 2: /* outbound packet / inbound entry */
4905 if (((nat->nat_outport == sport) ||
4906 (flags & SI_W_DPORT)) &&
4907 ((nat->nat_oport == dport) ||
4908 (flags & SI_W_SPORT)))
4909 return 1;
4910 break;
4911 case 1: /* inbound packet / outbound entry */
4912 if (((nat->nat_oport == sport) ||
4913 (flags & SI_W_DPORT)) &&
4914 ((nat->nat_outport == dport) ||
4915 (flags & SI_W_SPORT)))
4916 return 1;
4917 break;
4918 case 0: /* inbound packet / inbound entry */
4919 if (((nat->nat_oport == sport) ||
4920 (flags & SI_W_SPORT)) &&
4921 ((nat->nat_outport == dport) ||
4922 (flags & SI_W_DPORT)))
4923 return 1;
4924 break;
4925 default:
4926 break;
4927 }
4928
4929 return(0);
4930 }
4931
4932
4933 /* ------------------------------------------------------------------------ */
4934 /* Function: nat_mssclamp */
4935 /* Returns: Nil */
4936 /* Parameters: tcp(I) - pointer to TCP header */
4937 /* maxmss(I) - value to clamp the TCP MSS to */
4938 /* fin(I) - pointer to packet information */
4939 /* csump(I) - pointer to TCP checksum */
4940 /* */
4941 /* Check for MSS option and clamp it if necessary. If found and changed, */
4942 /* then the TCP header checksum will be updated to reflect the change in */
4943 /* the MSS. */
4944 /* ------------------------------------------------------------------------ */
nat_mssclamp(tcp,maxmss,fin,csump)4945 static void nat_mssclamp(tcp, maxmss, fin, csump)
4946 tcphdr_t *tcp;
4947 u_32_t maxmss;
4948 fr_info_t *fin;
4949 u_short *csump;
4950 {
4951 u_char *cp, *ep, opt;
4952 int hlen, advance;
4953 u_32_t mss, sumd;
4954
4955 hlen = TCP_OFF(tcp) << 2;
4956 if (hlen > sizeof(*tcp)) {
4957 cp = (u_char *)tcp + sizeof(*tcp);
4958 ep = (u_char *)tcp + hlen;
4959
4960 while (cp < ep) {
4961 opt = cp[0];
4962 if (opt == TCPOPT_EOL)
4963 break;
4964 else if (opt == TCPOPT_NOP) {
4965 cp++;
4966 continue;
4967 }
4968
4969 if (cp + 1 >= ep)
4970 break;
4971 advance = cp[1];
4972 if ((cp + advance > ep) || (advance <= 0))
4973 break;
4974 switch (opt)
4975 {
4976 case TCPOPT_MAXSEG:
4977 if (advance != 4)
4978 break;
4979 mss = cp[2] * 256 + cp[3];
4980 if (mss > maxmss) {
4981 cp[2] = maxmss / 256;
4982 cp[3] = maxmss & 0xff;
4983 CALC_SUMD(mss, maxmss, sumd);
4984 fix_outcksum(fin, csump, sumd);
4985 }
4986 break;
4987 default:
4988 /* ignore unknown options */
4989 break;
4990 }
4991
4992 cp += advance;
4993 }
4994 }
4995 }
4996
4997
4998 /* ------------------------------------------------------------------------ */
4999 /* Function: fr_setnatqueue */
5000 /* Returns: Nil */
5001 /* Parameters: nat(I)- pointer to NAT structure */
5002 /* rev(I) - forward(0) or reverse(1) direction */
5003 /* Locks: ipf_nat (read or write) */
5004 /* */
5005 /* Put the NAT entry on its default queue entry, using rev as a helped in */
5006 /* determining which queue it should be placed on. */
5007 /* ------------------------------------------------------------------------ */
fr_setnatqueue(nat,rev)5008 void fr_setnatqueue(nat, rev)
5009 nat_t *nat;
5010 int rev;
5011 {
5012 ipftq_t *oifq, *nifq;
5013
5014 if (nat->nat_ptr != NULL)
5015 nifq = nat->nat_ptr->in_tqehead[rev];
5016 else
5017 nifq = NULL;
5018
5019 if (nifq == NULL) {
5020 switch (nat->nat_p)
5021 {
5022 case IPPROTO_UDP :
5023 nifq = &nat_udptq;
5024 break;
5025 case IPPROTO_ICMP :
5026 nifq = &nat_icmptq;
5027 break;
5028 case IPPROTO_TCP :
5029 nifq = nat_tqb + nat->nat_tqe.tqe_state[rev];
5030 break;
5031 default :
5032 nifq = &nat_iptq;
5033 break;
5034 }
5035 }
5036
5037 oifq = nat->nat_tqe.tqe_ifq;
5038 /*
5039 * If it's currently on a timeout queue, move it from one queue to
5040 * another, else put it on the end of the newly determined queue.
5041 */
5042 if (oifq != NULL)
5043 fr_movequeue(&nat->nat_tqe, oifq, nifq);
5044 else
5045 fr_queueappend(&nat->nat_tqe, nifq, nat);
5046 return;
5047 }
5048
5049
5050 /* ------------------------------------------------------------------------ */
5051 /* Function: nat_getnext */
5052 /* Returns: int - 0 == ok, else error */
5053 /* Parameters: t(I) - pointer to ipftoken structure */
5054 /* itp(I) - pointer to ipfgeniter_t structure */
5055 /* */
5056 /* Fetch the next nat/ipnat structure pointer from the linked list and */
5057 /* copy it out to the storage space pointed to by itp_data. The next item */
5058 /* in the list to look at is put back in the ipftoken struture. */
5059 /* If we call ipf_freetoken, the accompanying pointer is set to NULL because*/
5060 /* ipf_freetoken will call a deref function for us and we dont want to call */
5061 /* that twice (second time would be in the second switch statement below. */
5062 /* ------------------------------------------------------------------------ */
nat_getnext(t,itp)5063 static int nat_getnext(t, itp)
5064 ipftoken_t *t;
5065 ipfgeniter_t *itp;
5066 {
5067 hostmap_t *hm, *nexthm = NULL, zerohm;
5068 ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5069 nat_t *nat, *nextnat = NULL, zeronat;
5070 int error = 0, count;
5071 char *dst;
5072
5073 count = itp->igi_nitems;
5074 if (count < 1)
5075 return ENOSPC;
5076
5077 READ_ENTER(&ipf_nat);
5078
5079 switch (itp->igi_type)
5080 {
5081 case IPFGENITER_HOSTMAP :
5082 hm = t->ipt_data;
5083 if (hm == NULL) {
5084 nexthm = ipf_hm_maplist;
5085 } else {
5086 nexthm = hm->hm_next;
5087 }
5088 break;
5089
5090 case IPFGENITER_IPNAT :
5091 ipn = t->ipt_data;
5092 if (ipn == NULL) {
5093 nextipnat = nat_list;
5094 } else {
5095 nextipnat = ipn->in_next;
5096 }
5097 break;
5098
5099 case IPFGENITER_NAT :
5100 nat = t->ipt_data;
5101 if (nat == NULL) {
5102 nextnat = nat_instances;
5103 } else {
5104 nextnat = nat->nat_next;
5105 }
5106 break;
5107 default :
5108 RWLOCK_EXIT(&ipf_nat);
5109 return EINVAL;
5110 }
5111
5112 dst = itp->igi_data;
5113 for (;;) {
5114 switch (itp->igi_type)
5115 {
5116 case IPFGENITER_HOSTMAP :
5117 if (nexthm != NULL) {
5118 if (count == 1) {
5119 ATOMIC_INC32(nexthm->hm_ref);
5120 t->ipt_data = nexthm;
5121 }
5122 } else {
5123 bzero(&zerohm, sizeof(zerohm));
5124 nexthm = &zerohm;
5125 count = 1;
5126 t->ipt_data = NULL;
5127 }
5128 break;
5129
5130 case IPFGENITER_IPNAT :
5131 if (nextipnat != NULL) {
5132 if (count == 1) {
5133 MUTEX_ENTER(&nextipnat->in_lock);
5134 nextipnat->in_use++;
5135 MUTEX_EXIT(&nextipnat->in_lock);
5136 t->ipt_data = nextipnat;
5137 }
5138 } else {
5139 bzero(&zeroipn, sizeof(zeroipn));
5140 nextipnat = &zeroipn;
5141 count = 1;
5142 t->ipt_data = NULL;
5143 }
5144 break;
5145
5146 case IPFGENITER_NAT :
5147 if (nextnat != NULL) {
5148 if (count == 1) {
5149 MUTEX_ENTER(&nextnat->nat_lock);
5150 nextnat->nat_ref++;
5151 MUTEX_EXIT(&nextnat->nat_lock);
5152 t->ipt_data = nextnat;
5153 }
5154 } else {
5155 bzero(&zeronat, sizeof(zeronat));
5156 nextnat = &zeronat;
5157 count = 1;
5158 t->ipt_data = NULL;
5159 }
5160 break;
5161 default :
5162 break;
5163 }
5164 RWLOCK_EXIT(&ipf_nat);
5165
5166 /*
5167 * Copying out to user space needs to be done without the lock.
5168 */
5169 switch (itp->igi_type)
5170 {
5171 case IPFGENITER_HOSTMAP :
5172 error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5173 if (error != 0)
5174 error = EFAULT;
5175 else
5176 dst += sizeof(*nexthm);
5177 break;
5178
5179 case IPFGENITER_IPNAT :
5180 error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5181 if (error != 0)
5182 error = EFAULT;
5183 else
5184 dst += sizeof(*nextipnat);
5185 break;
5186
5187 case IPFGENITER_NAT :
5188 error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5189 if (error != 0)
5190 error = EFAULT;
5191 else
5192 dst += sizeof(*nextnat);
5193 break;
5194 }
5195
5196 if ((count == 1) || (error != 0))
5197 break;
5198
5199 count--;
5200
5201 READ_ENTER(&ipf_nat);
5202
5203 /*
5204 * We need to have the lock again here to make sure that
5205 * using _next is consistent.
5206 */
5207 switch (itp->igi_type)
5208 {
5209 case IPFGENITER_HOSTMAP :
5210 nexthm = nexthm->hm_next;
5211 break;
5212 case IPFGENITER_IPNAT :
5213 nextipnat = nextipnat->in_next;
5214 break;
5215 case IPFGENITER_NAT :
5216 nextnat = nextnat->nat_next;
5217 break;
5218 }
5219 }
5220
5221
5222 switch (itp->igi_type)
5223 {
5224 case IPFGENITER_HOSTMAP :
5225 if (hm != NULL) {
5226 WRITE_ENTER(&ipf_nat);
5227 fr_hostmapdel(&hm);
5228 RWLOCK_EXIT(&ipf_nat);
5229 }
5230 break;
5231 case IPFGENITER_IPNAT :
5232 if (ipn != NULL) {
5233 fr_ipnatderef(&ipn);
5234 }
5235 break;
5236 case IPFGENITER_NAT :
5237 if (nat != NULL) {
5238 fr_natderef(&nat);
5239 }
5240 break;
5241 default :
5242 break;
5243 }
5244
5245 return error;
5246 }
5247
5248
5249 /* ------------------------------------------------------------------------ */
5250 /* Function: nat_iterator */
5251 /* Returns: int - 0 == ok, else error */
5252 /* Parameters: token(I) - pointer to ipftoken structure */
5253 /* itp(I) - pointer to ipfgeniter_t structure */
5254 /* */
5255 /* This function acts as a handler for the SIOCGENITER ioctls that use a */
5256 /* generic structure to iterate through a list. There are three different */
5257 /* linked lists of NAT related information to go through: NAT rules, active */
5258 /* NAT mappings and the NAT fragment cache. */
5259 /* ------------------------------------------------------------------------ */
nat_iterator(token,itp)5260 static int nat_iterator(token, itp)
5261 ipftoken_t *token;
5262 ipfgeniter_t *itp;
5263 {
5264 int error;
5265
5266 if (itp->igi_data == NULL)
5267 return EFAULT;
5268
5269 token->ipt_subtype = itp->igi_type;
5270
5271 switch (itp->igi_type)
5272 {
5273 case IPFGENITER_HOSTMAP :
5274 case IPFGENITER_IPNAT :
5275 case IPFGENITER_NAT :
5276 error = nat_getnext(token, itp);
5277 break;
5278
5279 case IPFGENITER_NATFRAG :
5280 #ifdef USE_MUTEXES
5281 error = fr_nextfrag(token, itp, &ipfr_natlist,
5282 &ipfr_nattail, &ipf_natfrag);
5283 #else
5284 error = fr_nextfrag(token, itp, &ipfr_natlist, &ipfr_nattail);
5285 #endif
5286 break;
5287 default :
5288 error = EINVAL;
5289 break;
5290 }
5291
5292 return error;
5293 }
5294
5295
5296 /* ------------------------------------------------------------------------ */
5297 /* Function: nat_extraflush */
5298 /* Returns: int - 0 == success, -1 == failure */
5299 /* Parameters: which(I) - how to flush the active NAT table */
5300 /* Write Locks: ipf_nat */
5301 /* */
5302 /* Flush nat tables. Three actions currently defined: */
5303 /* which == 0 : flush all nat table entries */
5304 /* which == 1 : flush TCP connections which have started to close but are */
5305 /* stuck for some reason. */
5306 /* which == 2 : flush TCP connections which have been idle for a long time, */
5307 /* starting at > 4 days idle and working back in successive half-*/
5308 /* days to at most 12 hours old. If this fails to free enough */
5309 /* slots then work backwards in half hour slots to 30 minutes. */
5310 /* If that too fails, then work backwards in 30 second intervals */
5311 /* for the last 30 minutes to at worst 30 seconds idle. */
5312 /* ------------------------------------------------------------------------ */
nat_extraflush(which)5313 static int nat_extraflush(which)
5314 int which;
5315 {
5316 ipftq_t *ifq, *ifqnext;
5317 nat_t *nat, **natp;
5318 ipftqent_t *tqn;
5319 int removed;
5320 SPL_INT(s);
5321
5322 removed = 0;
5323
5324 SPL_NET(s);
5325
5326 switch (which)
5327 {
5328 case 0 :
5329 /*
5330 * Style 0 flush removes everything...
5331 */
5332 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5333 nat_delete(nat, NL_FLUSH);
5334 removed++;
5335 }
5336 break;
5337
5338 case 1 :
5339 /*
5340 * Since we're only interested in things that are closing,
5341 * we can start with the appropriate timeout queue.
5342 */
5343 for (ifq = nat_tqb + IPF_TCPS_CLOSE_WAIT; ifq != NULL;
5344 ifq = ifq->ifq_next) {
5345
5346 for (tqn = ifq->ifq_head; tqn != NULL; ) {
5347 nat = tqn->tqe_parent;
5348 tqn = tqn->tqe_next;
5349 if (nat->nat_p != IPPROTO_TCP)
5350 break;
5351 nat_delete(nat, NL_EXPIRE);
5352 removed++;
5353 }
5354 }
5355
5356 /*
5357 * Also need to look through the user defined queues.
5358 */
5359 for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
5360 ifqnext = ifq->ifq_next;
5361 for (tqn = ifq->ifq_head; tqn != NULL; ) {
5362 nat = tqn->tqe_parent;
5363 tqn = tqn->tqe_next;
5364 if (nat->nat_p != IPPROTO_TCP)
5365 continue;
5366
5367 if ((nat->nat_tcpstate[0] >
5368 IPF_TCPS_ESTABLISHED) &&
5369 (nat->nat_tcpstate[1] >
5370 IPF_TCPS_ESTABLISHED)) {
5371 nat_delete(nat, NL_EXPIRE);
5372 removed++;
5373 }
5374 }
5375 }
5376 break;
5377
5378 /*
5379 * Args 5-11 correspond to flushing those particular states
5380 * for TCP connections.
5381 */
5382 case IPF_TCPS_CLOSE_WAIT :
5383 case IPF_TCPS_FIN_WAIT_1 :
5384 case IPF_TCPS_CLOSING :
5385 case IPF_TCPS_LAST_ACK :
5386 case IPF_TCPS_FIN_WAIT_2 :
5387 case IPF_TCPS_TIME_WAIT :
5388 case IPF_TCPS_CLOSED :
5389 tqn = nat_tqb[which].ifq_head;
5390 while (tqn != NULL) {
5391 nat = tqn->tqe_parent;
5392 tqn = tqn->tqe_next;
5393 nat_delete(nat, NL_FLUSH);
5394 removed++;
5395 }
5396 break;
5397
5398 default :
5399 if (which < 30)
5400 break;
5401
5402 /*
5403 * Take a large arbitrary number to mean the number of seconds
5404 * for which which consider to be the maximum value we'll allow
5405 * the expiration to be.
5406 */
5407 which = IPF_TTLVAL(which);
5408 for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5409 if (fr_ticks - nat->nat_touched > which) {
5410 nat_delete(nat, NL_FLUSH);
5411 removed++;
5412 } else
5413 natp = &nat->nat_next;
5414 }
5415 break;
5416 }
5417
5418 if (which != 2) {
5419 SPL_X(s);
5420 return removed;
5421 }
5422
5423 /*
5424 * Asked to remove inactive entries because the table is full.
5425 */
5426 if (fr_ticks - nat_last_force_flush > IPF_TTLVAL(5)) {
5427 nat_last_force_flush = fr_ticks;
5428 removed = ipf_queueflush(nat_flush_entry, nat_tqb, nat_utqe);
5429 }
5430
5431 SPL_X(s);
5432 return removed;
5433 }
5434
5435
5436 /* ------------------------------------------------------------------------ */
5437 /* Function: nat_flush_entry */
5438 /* Returns: 0 - always succeeds */
5439 /* Parameters: entry(I) - pointer to NAT entry */
5440 /* Write Locks: ipf_nat */
5441 /* */
5442 /* This function is a stepping stone between ipf_queueflush() and */
5443 /* nat_dlete(). It is used so we can provide a uniform interface via the */
5444 /* ipf_queueflush() function. Since the nat_delete() function returns void */
5445 /* we translate that to mean it always succeeds in deleting something. */
5446 /* ------------------------------------------------------------------------ */
nat_flush_entry(entry)5447 static int nat_flush_entry(entry)
5448 void *entry;
5449 {
5450 nat_delete(entry, NL_FLUSH);
5451 return 0;
5452 }
5453
5454
5455 /* ------------------------------------------------------------------------ */
5456 /* Function: nat_gettable */
5457 /* Returns: int - 0 = success, else error */
5458 /* Parameters: data(I) - pointer to ioctl data */
5459 /* */
5460 /* This function handles ioctl requests for tables of nat information. */
5461 /* At present the only table it deals with is the hash bucket statistics. */
5462 /* ------------------------------------------------------------------------ */
nat_gettable(data)5463 static int nat_gettable(data)
5464 char *data;
5465 {
5466 ipftable_t table;
5467 int error;
5468
5469 error = fr_inobj(data, &table, IPFOBJ_GTABLE);
5470 if (error != 0)
5471 return error;
5472
5473 switch (table.ita_type)
5474 {
5475 case IPFTABLE_BUCKETS_NATIN :
5476 error = COPYOUT(nat_stats.ns_bucketlen[0], table.ita_table,
5477 ipf_nattable_sz * sizeof(u_long));
5478 break;
5479
5480 case IPFTABLE_BUCKETS_NATOUT :
5481 error = COPYOUT(nat_stats.ns_bucketlen[1], table.ita_table,
5482 ipf_nattable_sz * sizeof(u_long));
5483 break;
5484
5485 default :
5486 return EINVAL;
5487 }
5488
5489 if (error != 0) {
5490 error = EFAULT;
5491 }
5492 return error;
5493 }
5494