1 /*	$FreeBSD: stable/9/sys/contrib/ipfilter/netinet/ip_nat.c 296924 2016-03-16 02:01:17Z cy $	*/
2 
3 /*
4  * Copyright (C) 1995-2003 by Darren Reed.
5  *
6  * See the IPFILTER.LICENCE file for details on licencing.
7  */
8 #if defined(KERNEL) || defined(_KERNEL)
9 # undef KERNEL
10 # undef _KERNEL
11 # define        KERNEL	1
12 # define        _KERNEL	1
13 #endif
14 #include <sys/errno.h>
15 #include <sys/types.h>
16 #include <sys/param.h>
17 #include <sys/time.h>
18 #include <sys/file.h>
19 #if defined(_KERNEL) && defined(__NetBSD_Version__) && \
20     (__NetBSD_Version__ >= 399002000)
21 # include <sys/kauth.h>
22 #endif
23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
24     defined(_KERNEL)
25 #if defined(__NetBSD_Version__) && (__NetBSD_Version__ < 399001400)
26 #  include "opt_ipfilter_log.h"
27 # else
28 #  include "opt_ipfilter.h"
29 # endif
30 #endif
31 #if !defined(_KERNEL)
32 # include <stdio.h>
33 # include <string.h>
34 # include <stdlib.h>
35 # define _KERNEL
36 # ifdef __OpenBSD__
37 struct file;
38 # endif
39 # include <sys/uio.h>
40 # undef _KERNEL
41 #endif
42 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
43 # include <sys/filio.h>
44 # include <sys/fcntl.h>
45 #else
46 # include <sys/ioctl.h>
47 #endif
48 #if !defined(AIX)
49 # include <sys/fcntl.h>
50 #endif
51 #if !defined(linux)
52 # include <sys/protosw.h>
53 #endif
54 #include <sys/socket.h>
55 #if defined(_KERNEL)
56 # include <sys/systm.h>
57 # if !defined(__SVR4) && !defined(__svr4__)
58 #  include <sys/mbuf.h>
59 # endif
60 #endif
61 #if defined(__SVR4) || defined(__svr4__)
62 # include <sys/filio.h>
63 # include <sys/byteorder.h>
64 # ifdef _KERNEL
65 #  include <sys/dditypes.h>
66 # endif
67 # include <sys/stream.h>
68 # include <sys/kmem.h>
69 #endif
70 #if __FreeBSD_version >= 300000
71 # include <sys/queue.h>
72 #endif
73 #include <net/if.h>
74 #if __FreeBSD_version >= 300000
75 # include <net/if_var.h>
76 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
77 #  include "opt_ipfilter.h"
78 # endif
79 #endif
80 #ifdef sun
81 # include <net/af.h>
82 #endif
83 #include <net/route.h>
84 #include <netinet/in.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/ip.h>
87 
88 #ifdef RFC1825
89 # include <vpn/md5.h>
90 # include <vpn/ipsec.h>
91 extern struct ifnet vpnif;
92 #endif
93 
94 #if !defined(linux)
95 # include <netinet/ip_var.h>
96 #endif
97 #include <netinet/tcp.h>
98 #include <netinet/udp.h>
99 #include <netinet/ip_icmp.h>
100 #include "netinet/ip_compat.h"
101 #include <netinet/tcpip.h>
102 #include "netinet/ip_fil.h"
103 #include "netinet/ip_nat.h"
104 #include "netinet/ip_frag.h"
105 #include "netinet/ip_state.h"
106 #include "netinet/ip_proxy.h"
107 #ifdef	IPFILTER_SYNC
108 #include "netinet/ip_sync.h"
109 #endif
110 #if (__FreeBSD_version >= 300000)
111 # include <sys/malloc.h>
112 #endif
113 /* END OF INCLUDES */
114 
115 #undef	SOCKADDR_IN
116 #define	SOCKADDR_IN	struct sockaddr_in
117 
118 #if !defined(lint)
119 static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
120 static const char rcsid[] = "@(#)$FreeBSD: stable/9/sys/contrib/ipfilter/netinet/ip_nat.c 296924 2016-03-16 02:01:17Z cy $";
121 /* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.102 2007/10/16 10:08:10 darrenr Exp $"; */
122 #endif
123 
124 
125 /* ======================================================================== */
126 /* How the NAT is organised and works.                                      */
127 /*                                                                          */
128 /* Inside (interface y) NAT       Outside (interface x)                     */
129 /* -------------------- -+- -------------------------------------           */
130 /* Packet going          |   out, processsed by fr_checknatout() for x      */
131 /* ------------>         |   ------------>                                  */
132 /* src=10.1.1.1          |   src=192.1.1.1                                  */
133 /*                       |                                                  */
134 /*                       |   in, processed by fr_checknatin() for x         */
135 /* <------------         |   <------------                                  */
136 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
137 /* -------------------- -+- -------------------------------------           */
138 /* fr_checknatout() - changes ip_src and if required, sport                 */
139 /*             - creates a new mapping, if required.                        */
140 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
141 /*                                                                          */
142 /* In the NAT table, internal source is recorded as "in" and externally     */
143 /* seen as "out".                                                           */
144 /* ======================================================================== */
145 
146 
147 nat_t	**nat_table[2] = { NULL, NULL },
148 	*nat_instances = NULL;
149 ipnat_t	*nat_list = NULL;
150 u_int	ipf_nattable_max = NAT_TABLE_MAX;
151 u_int	ipf_nattable_sz = NAT_TABLE_SZ;
152 u_int	ipf_natrules_sz = NAT_SIZE;
153 u_int	ipf_rdrrules_sz = RDR_SIZE;
154 u_int	ipf_hostmap_sz = HOSTMAP_SIZE;
155 u_int	fr_nat_maxbucket = 0,
156 	fr_nat_maxbucket_reset = 1;
157 u_32_t	nat_masks = 0;
158 u_32_t	rdr_masks = 0;
159 u_long	nat_last_force_flush = 0;
160 ipnat_t	**nat_rules = NULL;
161 ipnat_t	**rdr_rules = NULL;
162 hostmap_t	**ipf_hm_maptable  = NULL;
163 hostmap_t	*ipf_hm_maplist  = NULL;
164 ipftq_t	nat_tqb[IPF_TCP_NSTATES];
165 ipftq_t	nat_udptq;
166 ipftq_t	nat_icmptq;
167 ipftq_t	nat_iptq;
168 ipftq_t	*nat_utqe = NULL;
169 int	fr_nat_doflush = 0;
170 #ifdef  IPFILTER_LOG
171 int	nat_logging = 1;
172 #else
173 int	nat_logging = 0;
174 #endif
175 
176 u_long	fr_defnatage = DEF_NAT_AGE,
177 	fr_defnatipage = 120,		/* 60 seconds */
178 	fr_defnaticmpage = 6;		/* 3 seconds */
179 natstat_t nat_stats;
180 int	fr_nat_lock = 0;
181 int	fr_nat_init = 0;
182 #if SOLARIS && !defined(_INET_IP_STACK_H)
183 extern	int		pfil_delayed_copy;
184 #endif
185 
186 static	int	nat_flush_entry __P((void *));
187 static	int	nat_flushtable __P((void));
188 static	int	nat_clearlist __P((void));
189 static	void	nat_addnat __P((struct ipnat *));
190 static	void	nat_addrdr __P((struct ipnat *));
191 static	void	nat_delrdr __P((struct ipnat *));
192 static	void	nat_delnat __P((struct ipnat *));
193 static	int	fr_natgetent __P((caddr_t, int));
194 static	int	fr_natgetsz __P((caddr_t, int));
195 static	int	fr_natputent __P((caddr_t, int));
196 static	int	nat_extraflush __P((int));
197 static	int	nat_gettable __P((char *));
198 static	void	nat_tabmove __P((nat_t *));
199 static	int	nat_match __P((fr_info_t *, ipnat_t *));
200 static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
201 static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
202 static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
203 				    struct in_addr, struct in_addr, u_32_t));
204 static	int	nat_icmpquerytype4 __P((int));
205 static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int));
206 static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int));
207 static	int	nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
208 				      tcphdr_t *, nat_t **, int));
209 static	int	nat_resolverule __P((ipnat_t *));
210 static	nat_t	*fr_natclone __P((fr_info_t *, nat_t *));
211 static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
212 static	int	nat_wildok __P((nat_t *, int, int, int, int));
213 static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *));
214 static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *));
215 
216 
217 /* ------------------------------------------------------------------------ */
218 /* Function:    fr_natinit                                                  */
219 /* Returns:     int - 0 == success, -1 == failure                           */
220 /* Parameters:  Nil                                                         */
221 /*                                                                          */
222 /* Initialise all of the NAT locks, tables and other structures.            */
223 /* ------------------------------------------------------------------------ */
fr_natinit()224 int fr_natinit()
225 {
226 	int i;
227 
228 	KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
229 	if (nat_table[0] != NULL)
230 		bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
231 	else
232 		return -1;
233 
234 	KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
235 	if (nat_table[1] != NULL)
236 		bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
237 	else
238 		return -2;
239 
240 	KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
241 	if (nat_rules != NULL)
242 		bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
243 	else
244 		return -3;
245 
246 	KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
247 	if (rdr_rules != NULL)
248 		bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
249 	else
250 		return -4;
251 
252 	KMALLOCS(ipf_hm_maptable, hostmap_t **, \
253 		 sizeof(hostmap_t *) * ipf_hostmap_sz);
254 	if (ipf_hm_maptable != NULL)
255 		bzero((char *)ipf_hm_maptable,
256 		      sizeof(hostmap_t *) * ipf_hostmap_sz);
257 	else
258 		return -5;
259 	ipf_hm_maplist = NULL;
260 
261 	KMALLOCS(nat_stats.ns_bucketlen[0], u_long *,
262 		 ipf_nattable_sz * sizeof(u_long));
263 	if (nat_stats.ns_bucketlen[0] == NULL)
264 		return -6;
265 	bzero((char *)nat_stats.ns_bucketlen[0],
266 	      ipf_nattable_sz * sizeof(u_long));
267 
268 	KMALLOCS(nat_stats.ns_bucketlen[1], u_long *,
269 		 ipf_nattable_sz * sizeof(u_long));
270 	if (nat_stats.ns_bucketlen[1] == NULL)
271 		return -7;
272 
273 	bzero((char *)nat_stats.ns_bucketlen[1],
274 	      ipf_nattable_sz * sizeof(u_long));
275 
276 	if (fr_nat_maxbucket == 0) {
277 		for (i = ipf_nattable_sz; i > 0; i >>= 1)
278 			fr_nat_maxbucket++;
279 		fr_nat_maxbucket *= 2;
280 	}
281 
282 	fr_sttab_init(nat_tqb);
283 	/*
284 	 * Increase this because we may have "keep state" following this too
285 	 * and packet storms can occur if this is removed too quickly.
286 	 */
287 	nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack;
288 	nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq;
289 	nat_udptq.ifq_ttl = fr_defnatage;
290 	nat_udptq.ifq_ref = 1;
291 	nat_udptq.ifq_head = NULL;
292 	nat_udptq.ifq_tail = &nat_udptq.ifq_head;
293 	MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab");
294 	nat_udptq.ifq_next = &nat_icmptq;
295 	nat_icmptq.ifq_ttl = fr_defnaticmpage;
296 	nat_icmptq.ifq_ref = 1;
297 	nat_icmptq.ifq_head = NULL;
298 	nat_icmptq.ifq_tail = &nat_icmptq.ifq_head;
299 	MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab");
300 	nat_icmptq.ifq_next = &nat_iptq;
301 	nat_iptq.ifq_ttl = fr_defnatipage;
302 	nat_iptq.ifq_ref = 1;
303 	nat_iptq.ifq_head = NULL;
304 	nat_iptq.ifq_tail = &nat_iptq.ifq_head;
305 	MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab");
306 	nat_iptq.ifq_next = NULL;
307 
308 	for (i = 0; i < IPF_TCP_NSTATES; i++) {
309 		if (nat_tqb[i].ifq_ttl < fr_defnaticmpage)
310 			nat_tqb[i].ifq_ttl = fr_defnaticmpage;
311 #ifdef LARGE_NAT
312 		else if (nat_tqb[i].ifq_ttl > fr_defnatage)
313 			nat_tqb[i].ifq_ttl = fr_defnatage;
314 #endif
315 	}
316 
317 	/*
318 	 * Increase this because we may have "keep state" following
319 	 * this too and packet storms can occur if this is removed
320 	 * too quickly.
321 	 */
322 	nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
323 
324 	RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock");
325 	RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock");
326 	MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex");
327 	MUTEX_INIT(&ipf_natio, "ipf nat io mutex");
328 
329 	fr_nat_init = 1;
330 
331 	return 0;
332 }
333 
334 
335 /* ------------------------------------------------------------------------ */
336 /* Function:    nat_addrdr                                                  */
337 /* Returns:     Nil                                                         */
338 /* Parameters:  n(I) - pointer to NAT rule to add                           */
339 /*                                                                          */
340 /* Adds a redirect rule to the hash table of redirect rules and the list of */
341 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
342 /* use by redirect rules.                                                   */
343 /* ------------------------------------------------------------------------ */
nat_addrdr(n)344 static void nat_addrdr(n)
345 ipnat_t *n;
346 {
347 	ipnat_t **np;
348 	u_32_t j;
349 	u_int hv;
350 	int k;
351 
352 	k = count4bits(n->in_outmsk);
353 	if ((k >= 0) && (k != 32))
354 		rdr_masks |= 1 << k;
355 	j = (n->in_outip & n->in_outmsk);
356 	hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
357 	np = rdr_rules + hv;
358 	while (*np != NULL)
359 		np = &(*np)->in_rnext;
360 	n->in_rnext = NULL;
361 	n->in_prnext = np;
362 	n->in_hv = hv;
363 	*np = n;
364 }
365 
366 
367 /* ------------------------------------------------------------------------ */
368 /* Function:    nat_addnat                                                  */
369 /* Returns:     Nil                                                         */
370 /* Parameters:  n(I) - pointer to NAT rule to add                           */
371 /*                                                                          */
372 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
373 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
374 /* redirect rules.                                                          */
375 /* ------------------------------------------------------------------------ */
nat_addnat(n)376 static void nat_addnat(n)
377 ipnat_t *n;
378 {
379 	ipnat_t **np;
380 	u_32_t j;
381 	u_int hv;
382 	int k;
383 
384 	k = count4bits(n->in_inmsk);
385 	if ((k >= 0) && (k != 32))
386 		nat_masks |= 1 << k;
387 	j = (n->in_inip & n->in_inmsk);
388 	hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
389 	np = nat_rules + hv;
390 	while (*np != NULL)
391 		np = &(*np)->in_mnext;
392 	n->in_mnext = NULL;
393 	n->in_pmnext = np;
394 	n->in_hv = hv;
395 	*np = n;
396 }
397 
398 
399 /* ------------------------------------------------------------------------ */
400 /* Function:    nat_delrdr                                                  */
401 /* Returns:     Nil                                                         */
402 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
403 /*                                                                          */
404 /* Removes a redirect rule from the hash table of redirect rules.           */
405 /* ------------------------------------------------------------------------ */
nat_delrdr(n)406 static void nat_delrdr(n)
407 ipnat_t *n;
408 {
409 	if (n->in_rnext)
410 		n->in_rnext->in_prnext = n->in_prnext;
411 	*n->in_prnext = n->in_rnext;
412 }
413 
414 
415 /* ------------------------------------------------------------------------ */
416 /* Function:    nat_delnat                                                  */
417 /* Returns:     Nil                                                         */
418 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
419 /*                                                                          */
420 /* Removes a NAT map rule from the hash table of NAT map rules.             */
421 /* ------------------------------------------------------------------------ */
nat_delnat(n)422 static void nat_delnat(n)
423 ipnat_t *n;
424 {
425 	if (n->in_mnext != NULL)
426 		n->in_mnext->in_pmnext = n->in_pmnext;
427 	*n->in_pmnext = n->in_mnext;
428 }
429 
430 
431 /* ------------------------------------------------------------------------ */
432 /* Function:    nat_hostmap                                                 */
433 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
434 /*                                else a pointer to the hostmapping to use  */
435 /* Parameters:  np(I)   - pointer to NAT rule                               */
436 /*              real(I) - real IP address                                   */
437 /*              map(I)  - mapped IP address                                 */
438 /*              port(I) - destination port number                           */
439 /* Write Locks: ipf_nat                                                     */
440 /*                                                                          */
441 /* Check if an ip address has already been allocated for a given mapping    */
442 /* that is not doing port based translation.  If is not yet allocated, then */
443 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
444 /* ------------------------------------------------------------------------ */
nat_hostmap(np,src,dst,map,port)445 static struct hostmap *nat_hostmap(np, src, dst, map, port)
446 ipnat_t *np;
447 struct in_addr src;
448 struct in_addr dst;
449 struct in_addr map;
450 u_32_t port;
451 {
452 	hostmap_t *hm;
453 	u_int hv;
454 
455 	hv = (src.s_addr ^ dst.s_addr);
456 	hv += src.s_addr;
457 	hv += dst.s_addr;
458 	hv %= HOSTMAP_SIZE;
459 	for (hm = ipf_hm_maptable[hv]; hm; hm = hm->hm_next)
460 		if ((hm->hm_srcip.s_addr == src.s_addr) &&
461 		    (hm->hm_dstip.s_addr == dst.s_addr) &&
462 		    ((np == NULL) || (np == hm->hm_ipnat)) &&
463 		    ((port == 0) || (port == hm->hm_port))) {
464 			hm->hm_ref++;
465 			return hm;
466 		}
467 
468 	if (np == NULL)
469 		return NULL;
470 
471 	KMALLOC(hm, hostmap_t *);
472 	if (hm) {
473 		hm->hm_next = ipf_hm_maplist;
474 		hm->hm_pnext = &ipf_hm_maplist;
475 		if (ipf_hm_maplist != NULL)
476 			ipf_hm_maplist->hm_pnext = &hm->hm_next;
477 		ipf_hm_maplist = hm;
478 		hm->hm_hnext = ipf_hm_maptable[hv];
479 		hm->hm_phnext = ipf_hm_maptable + hv;
480 		if (ipf_hm_maptable[hv] != NULL)
481 			ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext;
482 		ipf_hm_maptable[hv] = hm;
483 		hm->hm_ipnat = np;
484 		hm->hm_srcip = src;
485 		hm->hm_dstip = dst;
486 		hm->hm_mapip = map;
487 		hm->hm_ref = 1;
488 		hm->hm_port = port;
489 	}
490 	return hm;
491 }
492 
493 
494 /* ------------------------------------------------------------------------ */
495 /* Function:    fr_hostmapdel                                               */
496 /* Returns:     Nil                                                         */
497 /* Parameters:  hmp(I) - pointer to hostmap structure pointer               */
498 /* Write Locks: ipf_nat                                                     */
499 /*                                                                          */
500 /* Decrement the references to this hostmap structure by one.  If this      */
501 /* reaches zero then remove it and free it.                                 */
502 /* ------------------------------------------------------------------------ */
fr_hostmapdel(hmp)503 void fr_hostmapdel(hmp)
504 struct hostmap **hmp;
505 {
506 	struct hostmap *hm;
507 
508 	hm = *hmp;
509 	*hmp = NULL;
510 
511 	hm->hm_ref--;
512 	if (hm->hm_ref == 0) {
513 		if (hm->hm_hnext)
514 			hm->hm_hnext->hm_phnext = hm->hm_phnext;
515 		*hm->hm_phnext = hm->hm_hnext;
516 		if (hm->hm_next)
517 			hm->hm_next->hm_pnext = hm->hm_pnext;
518 		*hm->hm_pnext = hm->hm_next;
519 		KFREE(hm);
520 	}
521 }
522 
523 
524 /* ------------------------------------------------------------------------ */
525 /* Function:    fix_outcksum                                                */
526 /* Returns:     Nil                                                         */
527 /* Parameters:  fin(I) - pointer to packet information                      */
528 /*              sp(I)  - location of 16bit checksum to update               */
529 /*              n((I)  - amount to adjust checksum by                       */
530 /*                                                                          */
531 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
532 /* ------------------------------------------------------------------------ */
fix_outcksum(fin,sp,n)533 void fix_outcksum(fin, sp, n)
534 fr_info_t *fin;
535 u_short *sp;
536 u_32_t n;
537 {
538 	u_short sumshort;
539 	u_32_t sum1;
540 
541 	if (n == 0)
542 		return;
543 
544 	if (n & NAT_HW_CKSUM) {
545 		n &= 0xffff;
546 		n += fin->fin_dlen;
547 		n = (n & 0xffff) + (n >> 16);
548 		*sp = n & 0xffff;
549 		return;
550 	}
551 	sum1 = (~ntohs(*sp)) & 0xffff;
552 	sum1 += (n);
553 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
554 	/* Again */
555 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
556 	sumshort = ~(u_short)sum1;
557 	*(sp) = htons(sumshort);
558 }
559 
560 
561 /* ------------------------------------------------------------------------ */
562 /* Function:    fix_incksum                                                 */
563 /* Returns:     Nil                                                         */
564 /* Parameters:  fin(I) - pointer to packet information                      */
565 /*              sp(I)  - location of 16bit checksum to update               */
566 /*              n((I)  - amount to adjust checksum by                       */
567 /*                                                                          */
568 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
569 /* ------------------------------------------------------------------------ */
fix_incksum(fin,sp,n)570 void fix_incksum(fin, sp, n)
571 fr_info_t *fin;
572 u_short *sp;
573 u_32_t n;
574 {
575 	u_short sumshort;
576 	u_32_t sum1;
577 
578 	if (n == 0)
579 		return;
580 
581 	if (n & NAT_HW_CKSUM) {
582 		n &= 0xffff;
583 		n += fin->fin_dlen;
584 		n = (n & 0xffff) + (n >> 16);
585 		*sp = n & 0xffff;
586 		return;
587 	}
588 	sum1 = (~ntohs(*sp)) & 0xffff;
589 	sum1 += ~(n) & 0xffff;
590 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
591 	/* Again */
592 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
593 	sumshort = ~(u_short)sum1;
594 	*(sp) = htons(sumshort);
595 }
596 
597 
598 /* ------------------------------------------------------------------------ */
599 /* Function:    fix_datacksum                                               */
600 /* Returns:     Nil                                                         */
601 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
602 /*              n((I)  - amount to adjust checksum by                       */
603 /*                                                                          */
604 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
605 /* data section of an IP packet.                                            */
606 /*                                                                          */
607 /* The only situation in which you need to do this is when NAT'ing an       */
608 /* ICMP error message. Such a message, contains in its body the IP header   */
609 /* of the original IP packet, that causes the error.                        */
610 /*                                                                          */
611 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
612 /* kernel the data section of the ICMP error is just data, and no special   */
613 /* processing like hardware cksum or ntohs processing have been done by the */
614 /* kernel on the data section.                                              */
615 /* ------------------------------------------------------------------------ */
fix_datacksum(sp,n)616 void fix_datacksum(sp, n)
617 u_short *sp;
618 u_32_t n;
619 {
620 	u_short sumshort;
621 	u_32_t sum1;
622 
623 	if (n == 0)
624 		return;
625 
626 	sum1 = (~ntohs(*sp)) & 0xffff;
627 	sum1 += (n);
628 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
629 	/* Again */
630 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
631 	sumshort = ~(u_short)sum1;
632 	*(sp) = htons(sumshort);
633 }
634 
635 
636 /* ------------------------------------------------------------------------ */
637 /* Function:    fr_nat_ioctl                                                */
638 /* Returns:     int - 0 == success, != 0 == failure                         */
639 /* Parameters:  data(I) - pointer to ioctl data                             */
640 /*              cmd(I)  - ioctl command integer                             */
641 /*              mode(I) - file mode bits used with open                     */
642 /*                                                                          */
643 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
644 /* ------------------------------------------------------------------------ */
fr_nat_ioctl(data,cmd,mode,uid,ctx)645 int fr_nat_ioctl(data, cmd, mode, uid, ctx)
646 ioctlcmd_t cmd;
647 caddr_t data;
648 int mode, uid;
649 void *ctx;
650 {
651 	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
652 	int error = 0, ret, arg, getlock;
653 	ipnat_t natd;
654 	SPL_INT(s);
655 
656 #if (BSD >= 199306) && defined(_KERNEL)
657 # if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 399002000)
658 	if ((mode & FWRITE) &&
659 	     kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL,
660 				     KAUTH_REQ_NETWORK_FIREWALL_FW,
661 				     NULL, NULL, NULL)) {
662 		return EPERM;
663 	}
664 # else
665 #  if defined(__FreeBSD_version) && (__FreeBSD_version >= 500034)
666 	if (securelevel_ge(curthread->td_ucred, 3) && (mode & FWRITE)) {
667 #  else
668 	if ((securelevel >= 3) && (mode & FWRITE)) {
669 #  endif
670 		return EPERM;
671 	}
672 # endif
673 #endif
674 
675 #if defined(__osf__) && defined(_KERNEL)
676 	getlock = 0;
677 #else
678 	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
679 #endif
680 
681 	nat = NULL;     /* XXX gcc -Wuninitialized */
682 	if (cmd == (ioctlcmd_t)SIOCADNAT) {
683 		KMALLOC(nt, ipnat_t *);
684 	} else {
685 		nt = NULL;
686 	}
687 
688 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
689 		if (mode & NAT_SYSSPACE) {
690 			bcopy(data, (char *)&natd, sizeof(natd));
691 			error = 0;
692 		} else {
693 			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
694 		}
695 	}
696 
697 	if (error != 0)
698 		goto done;
699 
700 	/*
701 	 * For add/delete, look to see if the NAT entry is already present
702 	 */
703 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
704 		nat = &natd;
705 		if (nat->in_v == 0)	/* For backward compat. */
706 			nat->in_v = 4;
707 		nat->in_flags &= IPN_USERFLAGS;
708 		if ((nat->in_redir & NAT_MAPBLK) == 0) {
709 			if ((nat->in_flags & IPN_SPLIT) == 0)
710 				nat->in_inip &= nat->in_inmsk;
711 			if ((nat->in_flags & IPN_IPRANGE) == 0)
712 				nat->in_outip &= nat->in_outmsk;
713 		}
714 		MUTEX_ENTER(&ipf_natio);
715 		for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next)
716 			if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
717 					IPN_CMPSIZ) == 0) {
718 				if (nat->in_redir == NAT_REDIRECT &&
719 				    nat->in_pnext != n->in_pnext)
720 					continue;
721 				break;
722 			}
723 	}
724 
725 	switch (cmd)
726 	{
727 #ifdef  IPFILTER_LOG
728 	case SIOCIPFFB :
729 	{
730 		int tmp;
731 
732 		if (!(mode & FWRITE))
733 			error = EPERM;
734 		else {
735 			tmp = ipflog_clear(IPL_LOGNAT);
736 			error = BCOPYOUT((char *)&tmp, (char *)data,
737 					 sizeof(tmp));
738 			if (error != 0)
739 				error = EFAULT;
740 		}
741 		break;
742 	}
743 
744 	case SIOCSETLG :
745 		if (!(mode & FWRITE))
746 			error = EPERM;
747 		else {
748 			error = BCOPYIN((char *)data, (char *)&nat_logging,
749 					sizeof(nat_logging));
750 			if (error != 0)
751 				error = EFAULT;
752 		}
753 		break;
754 
755 	case SIOCGETLG :
756 		error = BCOPYOUT((char *)&nat_logging, (char *)data,
757 				 sizeof(nat_logging));
758 		if (error != 0)
759 			error = EFAULT;
760 		break;
761 
762 	case FIONREAD :
763 		arg = iplused[IPL_LOGNAT];
764 		error = BCOPYOUT(&arg, data, sizeof(arg));
765 		if (error != 0)
766 			error = EFAULT;
767 		break;
768 #endif
769 	case SIOCADNAT :
770 		if (!(mode & FWRITE)) {
771 			error = EPERM;
772 		} else if (n != NULL) {
773 			error = EEXIST;
774 		} else if (nt == NULL) {
775 			error = ENOMEM;
776 		}
777 		if (error != 0) {
778 			MUTEX_EXIT(&ipf_natio);
779 			break;
780 		}
781 		bcopy((char *)nat, (char *)nt, sizeof(*n));
782 		error = nat_siocaddnat(nt, np, getlock);
783 		MUTEX_EXIT(&ipf_natio);
784 		if (error == 0)
785 			nt = NULL;
786 		break;
787 
788 	case SIOCRMNAT :
789 		if (!(mode & FWRITE)) {
790 			error = EPERM;
791 			n = NULL;
792 		} else if (n == NULL) {
793 			error = ESRCH;
794 		}
795 
796 		if (error != 0) {
797 			MUTEX_EXIT(&ipf_natio);
798 			break;
799 		}
800 		nat_siocdelnat(n, np, getlock);
801 
802 		MUTEX_EXIT(&ipf_natio);
803 		n = NULL;
804 		break;
805 
806 	case SIOCGNATS :
807 		nat_stats.ns_table[0] = nat_table[0];
808 		nat_stats.ns_table[1] = nat_table[1];
809 		nat_stats.ns_list = nat_list;
810 		nat_stats.ns_maptable = ipf_hm_maptable;
811 		nat_stats.ns_maplist = ipf_hm_maplist;
812 		nat_stats.ns_nattab_sz = ipf_nattable_sz;
813 		nat_stats.ns_nattab_max = ipf_nattable_max;
814 		nat_stats.ns_rultab_sz = ipf_natrules_sz;
815 		nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
816 		nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
817 		nat_stats.ns_instances = nat_instances;
818 		nat_stats.ns_apslist = ap_sess_list;
819 		nat_stats.ns_ticks = fr_ticks;
820 		error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT);
821 		break;
822 
823 	case SIOCGNATL :
824 	    {
825 		natlookup_t nl;
826 
827 		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
828 		if (error == 0) {
829 			void *ptr;
830 
831 			if (getlock) {
832 				READ_ENTER(&ipf_nat);
833 			}
834 			ptr = nat_lookupredir(&nl);
835 			if (getlock) {
836 				RWLOCK_EXIT(&ipf_nat);
837 			}
838 			if (ptr != NULL) {
839 				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
840 			} else {
841 				error = ESRCH;
842 			}
843 		}
844 		break;
845 	    }
846 
847 	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
848 		if (!(mode & FWRITE)) {
849 			error = EPERM;
850 			break;
851 		}
852 		if (getlock) {
853 			WRITE_ENTER(&ipf_nat);
854 		}
855 
856 		error = BCOPYIN(data, &arg, sizeof(arg));
857 		if (error != 0)
858 			error = EFAULT;
859 		else {
860 			if (arg == 0)
861 				ret = nat_flushtable();
862 			else if (arg == 1)
863 				ret = nat_clearlist();
864 			else
865 				ret = nat_extraflush(arg);
866 		}
867 
868 		if (getlock) {
869 			RWLOCK_EXIT(&ipf_nat);
870 		}
871 		if (error == 0) {
872 			error = BCOPYOUT(&ret, data, sizeof(ret));
873 		}
874 		break;
875 
876 	case SIOCPROXY :
877 		error = appr_ioctl(data, cmd, mode, ctx);
878 		break;
879 
880 	case SIOCSTLCK :
881 		if (!(mode & FWRITE)) {
882 			error = EPERM;
883 		} else {
884 			error = fr_lock(data, &fr_nat_lock);
885 		}
886 		break;
887 
888 	case SIOCSTPUT :
889 		if ((mode & FWRITE) != 0) {
890 			error = fr_natputent(data, getlock);
891 		} else {
892 			error = EACCES;
893 		}
894 		break;
895 
896 	case SIOCSTGSZ :
897 		if (fr_nat_lock) {
898 			error = fr_natgetsz(data, getlock);
899 		} else
900 			error = EACCES;
901 		break;
902 
903 	case SIOCSTGET :
904 		if (fr_nat_lock) {
905 			error = fr_natgetent(data, getlock);
906 		} else
907 			error = EACCES;
908 		break;
909 
910 	case SIOCGENITER :
911 	    {
912 		ipfgeniter_t iter;
913 		ipftoken_t *token;
914 
915 		SPL_SCHED(s);
916 		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
917 		if (error == 0) {
918 			token = ipf_findtoken(iter.igi_type, uid, ctx);
919 			if (token != NULL) {
920 				error  = nat_iterator(token, &iter);
921 			}
922 			RWLOCK_EXIT(&ipf_tokens);
923 		}
924 		SPL_X(s);
925 		break;
926 	    }
927 
928 	case SIOCIPFDELTOK :
929 		error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
930 		if (error == 0) {
931 			SPL_SCHED(s);
932 			error = ipf_deltoken(arg, uid, ctx);
933 			SPL_X(s);
934 		} else {
935 			error = EFAULT;
936 		}
937 		break;
938 
939 	case SIOCGTQTAB :
940 		error = fr_outobj(data, nat_tqb, IPFOBJ_STATETQTAB);
941 		break;
942 
943 	case SIOCGTABL :
944 		error = nat_gettable(data);
945 		break;
946 
947 	default :
948 		error = EINVAL;
949 		break;
950 	}
951 done:
952 	if (nt != NULL)
953 		KFREE(nt);
954 	return error;
955 }
956 
957 
958 /* ------------------------------------------------------------------------ */
959 /* Function:    nat_siocaddnat                                              */
960 /* Returns:     int - 0 == success, != 0 == failure                         */
961 /* Parameters:  n(I)       - pointer to new NAT rule                        */
962 /*              np(I)      - pointer to where to insert new NAT rule        */
963 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
964 /* Mutex Locks: ipf_natio                                                   */
965 /*                                                                          */
966 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
967 /* from information passed to the kernel, then add it  to the appropriate   */
968 /* NAT rule table(s).                                                       */
969 /* ------------------------------------------------------------------------ */
nat_siocaddnat(n,np,getlock)970 static int nat_siocaddnat(n, np, getlock)
971 ipnat_t *n, **np;
972 int getlock;
973 {
974 	int error = 0, i, j;
975 
976 	if (nat_resolverule(n) != 0)
977 		return ENOENT;
978 
979 	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
980 		return EINVAL;
981 
982 	n->in_use = 0;
983 	if (n->in_redir & NAT_MAPBLK)
984 		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
985 	else if (n->in_flags & IPN_AUTOPORTMAP)
986 		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
987 	else if (n->in_flags & IPN_IPRANGE)
988 		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
989 	else if (n->in_flags & IPN_SPLIT)
990 		n->in_space = 2;
991 	else if (n->in_outmsk != 0)
992 		n->in_space = ~ntohl(n->in_outmsk);
993 	else
994 		n->in_space = 1;
995 
996 	/*
997 	 * Calculate the number of valid IP addresses in the output
998 	 * mapping range.  In all cases, the range is inclusive of
999 	 * the start and ending IP addresses.
1000 	 * If to a CIDR address, lose 2: broadcast + network address
1001 	 *                               (so subtract 1)
1002 	 * If to a range, add one.
1003 	 * If to a single IP address, set to 1.
1004 	 */
1005 	if (n->in_space) {
1006 		if ((n->in_flags & IPN_IPRANGE) != 0)
1007 			n->in_space += 1;
1008 		else
1009 			n->in_space -= 1;
1010 	} else
1011 		n->in_space = 1;
1012 
1013 	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
1014 	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
1015 		n->in_nip = ntohl(n->in_outip) + 1;
1016 	else if ((n->in_flags & IPN_SPLIT) &&
1017 		 (n->in_redir & NAT_REDIRECT))
1018 		n->in_nip = ntohl(n->in_inip);
1019 	else
1020 		n->in_nip = ntohl(n->in_outip);
1021 	if (n->in_redir & NAT_MAP) {
1022 		n->in_pnext = ntohs(n->in_pmin);
1023 		/*
1024 		 * Multiply by the number of ports made available.
1025 		 */
1026 		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
1027 			n->in_space *= (ntohs(n->in_pmax) -
1028 					ntohs(n->in_pmin) + 1);
1029 			/*
1030 			 * Because two different sources can map to
1031 			 * different destinations but use the same
1032 			 * local IP#/port #.
1033 			 * If the result is smaller than in_space, then
1034 			 * we may have wrapped around 32bits.
1035 			 */
1036 			i = n->in_inmsk;
1037 			if ((i != 0) && (i != 0xffffffff)) {
1038 				j = n->in_space * (~ntohl(i) + 1);
1039 				if (j >= n->in_space)
1040 					n->in_space = j;
1041 				else
1042 					n->in_space = 0xffffffff;
1043 			}
1044 		}
1045 		/*
1046 		 * If no protocol is specified, multiple by 256 to allow for
1047 		 * at least one IP:IP mapping per protocol.
1048 		 */
1049 		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1050 				j = n->in_space * 256;
1051 				if (j >= n->in_space)
1052 					n->in_space = j;
1053 				else
1054 					n->in_space = 0xffffffff;
1055 		}
1056 	}
1057 
1058 	/* Otherwise, these fields are preset */
1059 
1060 	if (getlock) {
1061 		WRITE_ENTER(&ipf_nat);
1062 	}
1063 	n->in_next = NULL;
1064 	*np = n;
1065 
1066 	if (n->in_age[0] != 0)
1067 		n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]);
1068 
1069 	if (n->in_age[1] != 0)
1070 		n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]);
1071 
1072 	if (n->in_redir & NAT_REDIRECT) {
1073 		n->in_flags &= ~IPN_NOTDST;
1074 		nat_addrdr(n);
1075 	}
1076 	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1077 		n->in_flags &= ~IPN_NOTSRC;
1078 		nat_addnat(n);
1079 	}
1080 	MUTEX_INIT(&n->in_lock, "ipnat rule lock");
1081 
1082 	n = NULL;
1083 	nat_stats.ns_rules++;
1084 #if SOLARIS && !defined(_INET_IP_STACK_H)
1085 	pfil_delayed_copy = 0;
1086 #endif
1087 	if (getlock) {
1088 		RWLOCK_EXIT(&ipf_nat);			/* WRITE */
1089 	}
1090 
1091 	return error;
1092 }
1093 
1094 
1095 /* ------------------------------------------------------------------------ */
1096 /* Function:    nat_resolvrule                                              */
1097 /* Returns:     Nil                                                         */
1098 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1099 /*                                                                          */
1100 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1101 /* from information passed to the kernel, then add it  to the appropriate   */
1102 /* NAT rule table(s).                                                       */
1103 /* ------------------------------------------------------------------------ */
nat_resolverule(n)1104 static int nat_resolverule(n)
1105 ipnat_t *n;
1106 {
1107 	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1108 	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
1109 
1110 	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1111 	if (n->in_ifnames[1][0] == '\0') {
1112 		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1113 		n->in_ifps[1] = n->in_ifps[0];
1114 	} else {
1115 		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
1116 	}
1117 
1118 	if (n->in_plabel[0] != '\0') {
1119 		n->in_apr = appr_lookup(n->in_p, n->in_plabel);
1120 		if (n->in_apr == NULL)
1121 			return -1;
1122 	}
1123 	return 0;
1124 }
1125 
1126 
1127 /* ------------------------------------------------------------------------ */
1128 /* Function:    nat_siocdelnat                                              */
1129 /* Returns:     int - 0 == success, != 0 == failure                         */
1130 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1131 /*              np(I)      - pointer to where to insert new NAT rule        */
1132 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1133 /* Mutex Locks: ipf_natio                                                   */
1134 /*                                                                          */
1135 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1136 /* from information passed to the kernel, then add it  to the appropriate   */
1137 /* NAT rule table(s).                                                       */
1138 /* ------------------------------------------------------------------------ */
nat_siocdelnat(n,np,getlock)1139 static void nat_siocdelnat(n, np, getlock)
1140 ipnat_t *n, **np;
1141 int getlock;
1142 {
1143 	if (getlock) {
1144 		WRITE_ENTER(&ipf_nat);
1145 	}
1146 	if (n->in_redir & NAT_REDIRECT)
1147 		nat_delrdr(n);
1148 	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1149 		nat_delnat(n);
1150 	if (nat_list == NULL) {
1151 		nat_masks = 0;
1152 		rdr_masks = 0;
1153 	}
1154 
1155 	if (n->in_tqehead[0] != NULL) {
1156 		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1157 			fr_freetimeoutqueue(n->in_tqehead[1]);
1158 		}
1159 	}
1160 
1161 	if (n->in_tqehead[1] != NULL) {
1162 		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1163 			fr_freetimeoutqueue(n->in_tqehead[1]);
1164 		}
1165 	}
1166 
1167 	*np = n->in_next;
1168 
1169 	if (n->in_use == 0) {
1170 		if (n->in_apr)
1171 			appr_free(n->in_apr);
1172 		MUTEX_DESTROY(&n->in_lock);
1173 		KFREE(n);
1174 		nat_stats.ns_rules--;
1175 #if SOLARIS && !defined(_INET_IP_STACK_H)
1176 		if (nat_stats.ns_rules == 0)
1177 			pfil_delayed_copy = 1;
1178 #endif
1179 	} else {
1180 		n->in_flags |= IPN_DELETE;
1181 		n->in_next = NULL;
1182 	}
1183 	if (getlock) {
1184 		RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
1185 	}
1186 }
1187 
1188 
1189 /* ------------------------------------------------------------------------ */
1190 /* Function:    fr_natgetsz                                                 */
1191 /* Returns:     int - 0 == success, != 0 is the error value.                */
1192 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1193 /*                        get the size of.                                  */
1194 /*                                                                          */
1195 /* Handle SIOCSTGSZ.                                                        */
1196 /* Return the size of the nat list entry to be copied back to user space.   */
1197 /* The size of the entry is stored in the ng_sz field and the enture natget */
1198 /* structure is copied back to the user.                                    */
1199 /* ------------------------------------------------------------------------ */
fr_natgetsz(data,getlock)1200 static int fr_natgetsz(data, getlock)
1201 caddr_t data;
1202 int getlock;
1203 {
1204 	ap_session_t *aps;
1205 	nat_t *nat, *n;
1206 	natget_t ng;
1207 
1208 	if (BCOPYIN(data, &ng, sizeof(ng)) != 0)
1209 		return EFAULT;
1210 
1211 	if (getlock) {
1212 		READ_ENTER(&ipf_nat);
1213 	}
1214 
1215 	nat = ng.ng_ptr;
1216 	if (!nat) {
1217 		nat = nat_instances;
1218 		ng.ng_sz = 0;
1219 		/*
1220 		 * Empty list so the size returned is 0.  Simple.
1221 		 */
1222 		if (nat == NULL) {
1223 			if (getlock) {
1224 				RWLOCK_EXIT(&ipf_nat);
1225 			}
1226 			if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1227 				return EFAULT;
1228 			return 0;
1229 		}
1230 	} else {
1231 		/*
1232 		 * Make sure the pointer we're copying from exists in the
1233 		 * current list of entries.  Security precaution to prevent
1234 		 * copying of random kernel data.
1235 		 */
1236 		for (n = nat_instances; n; n = n->nat_next)
1237 			if (n == nat)
1238 				break;
1239 		if (n == NULL) {
1240 			if (getlock) {
1241 				RWLOCK_EXIT(&ipf_nat);
1242 			}
1243 			return ESRCH;
1244 		}
1245 	}
1246 
1247 	/*
1248 	 * Incluse any space required for proxy data structures.
1249 	 */
1250 	ng.ng_sz = sizeof(nat_save_t);
1251 	aps = nat->nat_aps;
1252 	if (aps != NULL) {
1253 		ng.ng_sz += sizeof(ap_session_t) - 4;
1254 		if (aps->aps_data != 0)
1255 			ng.ng_sz += aps->aps_psiz;
1256 	}
1257 	if (getlock) {
1258 		RWLOCK_EXIT(&ipf_nat);
1259 	}
1260 
1261 	if (BCOPYOUT(&ng, data, sizeof(ng)) != 0)
1262 		return EFAULT;
1263 	return 0;
1264 }
1265 
1266 
1267 /* ------------------------------------------------------------------------ */
1268 /* Function:    fr_natgetent                                                */
1269 /* Returns:     int - 0 == success, != 0 is the error value.                */
1270 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1271 /*                        to NAT structure to copy out.                     */
1272 /*                                                                          */
1273 /* Handle SIOCSTGET.                                                        */
1274 /* Copies out NAT entry to user space.  Any additional data held for a      */
1275 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1276 /* ------------------------------------------------------------------------ */
fr_natgetent(data,getlock)1277 static int fr_natgetent(data, getlock)
1278 caddr_t data;
1279 int getlock;
1280 {
1281 	int error, outsize;
1282 	ap_session_t *aps;
1283 	nat_save_t *ipn, ipns;
1284 	nat_t *n, *nat;
1285 
1286 	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1287 	if (error != 0)
1288 		return error;
1289 
1290 	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1291 		return EINVAL;
1292 
1293 	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1294 	if (ipn == NULL)
1295 		return ENOMEM;
1296 
1297 	if (getlock) {
1298 		READ_ENTER(&ipf_nat);
1299 	}
1300 
1301 	ipn->ipn_dsize = ipns.ipn_dsize;
1302 	nat = ipns.ipn_next;
1303 	if (nat == NULL) {
1304 		nat = nat_instances;
1305 		if (nat == NULL) {
1306 			if (nat_instances == NULL)
1307 				error = ENOENT;
1308 			goto finished;
1309 		}
1310 	} else {
1311 		/*
1312 		 * Make sure the pointer we're copying from exists in the
1313 		 * current list of entries.  Security precaution to prevent
1314 		 * copying of random kernel data.
1315 		 */
1316 		for (n = nat_instances; n; n = n->nat_next)
1317 			if (n == nat)
1318 				break;
1319 		if (n == NULL) {
1320 			error = ESRCH;
1321 			goto finished;
1322 		}
1323 	}
1324 	ipn->ipn_next = nat->nat_next;
1325 
1326 	/*
1327 	 * Copy the NAT structure.
1328 	 */
1329 	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1330 
1331 	/*
1332 	 * If we have a pointer to the NAT rule it belongs to, save that too.
1333 	 */
1334 	if (nat->nat_ptr != NULL)
1335 		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1336 		      sizeof(ipn->ipn_ipnat));
1337 
1338 	/*
1339 	 * If we also know the NAT entry has an associated filter rule,
1340 	 * save that too.
1341 	 */
1342 	if (nat->nat_fr != NULL)
1343 		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1344 		      sizeof(ipn->ipn_fr));
1345 
1346 	/*
1347 	 * Last but not least, if there is an application proxy session set
1348 	 * up for this NAT entry, then copy that out too, including any
1349 	 * private data saved along side it by the proxy.
1350 	 */
1351 	aps = nat->nat_aps;
1352 	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1353 	if (aps != NULL) {
1354 		char *s;
1355 
1356 		if (outsize < sizeof(*aps)) {
1357 			error = ENOBUFS;
1358 			goto finished;
1359 		}
1360 
1361 		s = ipn->ipn_data;
1362 		bcopy((char *)aps, s, sizeof(*aps));
1363 		s += sizeof(*aps);
1364 		outsize -= sizeof(*aps);
1365 		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1366 			bcopy(aps->aps_data, s, aps->aps_psiz);
1367 		else
1368 			error = ENOBUFS;
1369 	}
1370 	if (error == 0) {
1371 		if (getlock) {
1372 			RWLOCK_EXIT(&ipf_nat);
1373 			getlock = 0;
1374 		}
1375 		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1376 	}
1377 
1378 finished:
1379 	if (getlock) {
1380 		RWLOCK_EXIT(&ipf_nat);
1381 	}
1382 	if (ipn != NULL) {
1383 		KFREES(ipn, ipns.ipn_dsize);
1384 	}
1385 	return error;
1386 }
1387 
1388 
1389 /* ------------------------------------------------------------------------ */
1390 /* Function:    fr_natputent                                                */
1391 /* Returns:     int - 0 == success, != 0 is the error value.                */
1392 /* Parameters:  data(I) -     pointer to natget structure with NAT          */
1393 /*                            structure information to load into the kernel */
1394 /*              getlock(I) - flag indicating whether or not a write lock    */
1395 /*                           on ipf_nat is already held.                    */
1396 /*                                                                          */
1397 /* Handle SIOCSTPUT.                                                        */
1398 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1399 /* firewall rule data structures, if pointers to them indicate so.          */
1400 /* ------------------------------------------------------------------------ */
fr_natputent(data,getlock)1401 static int fr_natputent(data, getlock)
1402 caddr_t data;
1403 int getlock;
1404 {
1405 	nat_save_t ipn, *ipnn;
1406 	ap_session_t *aps;
1407 	nat_t *n, *nat;
1408 	frentry_t *fr;
1409 	fr_info_t fin;
1410 	ipnat_t *in;
1411 	int error;
1412 
1413 	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1414 	if (error != 0)
1415 		return error;
1416 
1417 	/*
1418 	 * Initialise early because of code at junkput label.
1419 	 */
1420 	in = NULL;
1421 	aps = NULL;
1422 	nat = NULL;
1423 	ipnn = NULL;
1424 	fr = NULL;
1425 
1426 	/*
1427 	 * New entry, copy in the rest of the NAT entry if it's size is more
1428 	 * than just the nat_t structure.
1429 	 */
1430 	if (ipn.ipn_dsize > sizeof(ipn)) {
1431 		if (ipn.ipn_dsize > 81920) {
1432 			error = ENOMEM;
1433 			goto junkput;
1434 		}
1435 
1436 		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1437 		if (ipnn == NULL)
1438 			return ENOMEM;
1439 
1440 		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1441 		if (error != 0) {
1442 			error = EFAULT;
1443 			goto junkput;
1444 		}
1445 	} else
1446 		ipnn = &ipn;
1447 
1448 	KMALLOC(nat, nat_t *);
1449 	if (nat == NULL) {
1450 		error = ENOMEM;
1451 		goto junkput;
1452 	}
1453 
1454 	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1455 	/*
1456 	 * Initialize all these so that nat_delete() doesn't cause a crash.
1457 	 */
1458 	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1459 	nat->nat_tqe.tqe_pnext = NULL;
1460 	nat->nat_tqe.tqe_next = NULL;
1461 	nat->nat_tqe.tqe_ifq = NULL;
1462 	nat->nat_tqe.tqe_parent = nat;
1463 
1464 	/*
1465 	 * Restore the rule associated with this nat session
1466 	 */
1467 	in = ipnn->ipn_nat.nat_ptr;
1468 	if (in != NULL) {
1469 		KMALLOC(in, ipnat_t *);
1470 		nat->nat_ptr = in;
1471 		if (in == NULL) {
1472 			error = ENOMEM;
1473 			goto junkput;
1474 		}
1475 		bzero((char *)in, offsetof(struct ipnat, in_next6));
1476 		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1477 		in->in_use = 1;
1478 		in->in_flags |= IPN_DELETE;
1479 
1480 		ATOMIC_INC(nat_stats.ns_rules);
1481 
1482 		if (nat_resolverule(in) != 0) {
1483 			error = ESRCH;
1484 			goto junkput;
1485 		}
1486 	}
1487 
1488 	/*
1489 	 * Check that the NAT entry doesn't already exist in the kernel.
1490 	 *
1491 	 * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry.  To do
1492 	 * this, we check to see if the inbound combination of addresses and
1493 	 * ports is already known.  Similar logic is applied for NAT_INBOUND.
1494 	 *
1495 	 */
1496 	bzero((char *)&fin, sizeof(fin));
1497 	fin.fin_p = nat->nat_p;
1498 	if (nat->nat_dir == NAT_OUTBOUND) {
1499 		fin.fin_ifp = nat->nat_ifps[0];
1500 		fin.fin_data[0] = ntohs(nat->nat_oport);
1501 		fin.fin_data[1] = ntohs(nat->nat_outport);
1502 		if (getlock) {
1503 			READ_ENTER(&ipf_nat);
1504 		}
1505 		n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1506 				 nat->nat_oip, nat->nat_inip);
1507 		if (getlock) {
1508 			RWLOCK_EXIT(&ipf_nat);
1509 		}
1510 		if (n != NULL) {
1511 			error = EEXIST;
1512 			goto junkput;
1513 		}
1514 	} else if (nat->nat_dir == NAT_INBOUND) {
1515 		fin.fin_ifp = nat->nat_ifps[0];
1516 		fin.fin_data[0] = ntohs(nat->nat_outport);
1517 		fin.fin_data[1] = ntohs(nat->nat_oport);
1518 		if (getlock) {
1519 			READ_ENTER(&ipf_nat);
1520 		}
1521 		n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1522 				  nat->nat_outip, nat->nat_oip);
1523 		if (getlock) {
1524 			RWLOCK_EXIT(&ipf_nat);
1525 		}
1526 		if (n != NULL) {
1527 			error = EEXIST;
1528 			goto junkput;
1529 		}
1530 	} else {
1531 		error = EINVAL;
1532 		goto junkput;
1533 	}
1534 
1535 	/*
1536 	 * Restore ap_session_t structure.  Include the private data allocated
1537 	 * if it was there.
1538 	 */
1539 	aps = nat->nat_aps;
1540 	if (aps != NULL) {
1541 		KMALLOC(aps, ap_session_t *);
1542 		nat->nat_aps = aps;
1543 		if (aps == NULL) {
1544 			error = ENOMEM;
1545 			goto junkput;
1546 		}
1547 		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1548 		if (in != NULL)
1549 			aps->aps_apr = in->in_apr;
1550 		else
1551 			aps->aps_apr = NULL;
1552 		if (aps->aps_psiz != 0) {
1553 			if (aps->aps_psiz > 81920) {
1554 				error = ENOMEM;
1555 				goto junkput;
1556 			}
1557 			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1558 			if (aps->aps_data == NULL) {
1559 				error = ENOMEM;
1560 				goto junkput;
1561 			}
1562 			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1563 			      aps->aps_psiz);
1564 		} else {
1565 			aps->aps_psiz = 0;
1566 			aps->aps_data = NULL;
1567 		}
1568 	}
1569 
1570 	/*
1571 	 * If there was a filtering rule associated with this entry then
1572 	 * build up a new one.
1573 	 */
1574 	fr = nat->nat_fr;
1575 	if (fr != NULL) {
1576 		if ((nat->nat_flags & SI_NEWFR) != 0) {
1577 			KMALLOC(fr, frentry_t *);
1578 			nat->nat_fr = fr;
1579 			if (fr == NULL) {
1580 				error = ENOMEM;
1581 				goto junkput;
1582 			}
1583 			ipnn->ipn_nat.nat_fr = fr;
1584 			fr->fr_ref = 1;
1585 			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1586 			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1587 
1588 			fr->fr_ref = 1;
1589 			fr->fr_dsize = 0;
1590 			fr->fr_data = NULL;
1591 			fr->fr_type = FR_T_NONE;
1592 
1593 			MUTEX_NUKE(&fr->fr_lock);
1594 			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1595 		} else {
1596 			if (getlock) {
1597 				READ_ENTER(&ipf_nat);
1598 			}
1599 			for (n = nat_instances; n; n = n->nat_next)
1600 				if (n->nat_fr == fr)
1601 					break;
1602 
1603 			if (n != NULL) {
1604 				MUTEX_ENTER(&fr->fr_lock);
1605 				fr->fr_ref++;
1606 				MUTEX_EXIT(&fr->fr_lock);
1607 			}
1608 			if (getlock) {
1609 				RWLOCK_EXIT(&ipf_nat);
1610 			}
1611 
1612 			if (!n) {
1613 				error = ESRCH;
1614 				goto junkput;
1615 			}
1616 		}
1617 	}
1618 
1619 	if (ipnn != &ipn) {
1620 		KFREES(ipnn, ipn.ipn_dsize);
1621 		ipnn = NULL;
1622 	}
1623 
1624 	if (getlock) {
1625 		WRITE_ENTER(&ipf_nat);
1626 	}
1627 	error = nat_insert(nat, nat->nat_rev);
1628 	if ((error == 0) && (aps != NULL)) {
1629 		aps->aps_next = ap_sess_list;
1630 		ap_sess_list = aps;
1631 	}
1632 	if (getlock) {
1633 		RWLOCK_EXIT(&ipf_nat);
1634 	}
1635 
1636 	if (error == 0)
1637 		return 0;
1638 
1639 	error = ENOMEM;
1640 
1641 junkput:
1642 	if (fr != NULL)
1643 		(void) fr_derefrule(&fr);
1644 
1645 	if ((ipnn != NULL) && (ipnn != &ipn)) {
1646 		KFREES(ipnn, ipn.ipn_dsize);
1647 	}
1648 	if (nat != NULL) {
1649 		if (aps != NULL) {
1650 			if (aps->aps_data != NULL) {
1651 				KFREES(aps->aps_data, aps->aps_psiz);
1652 			}
1653 			KFREE(aps);
1654 		}
1655 		if (in != NULL) {
1656 			if (in->in_apr)
1657 				appr_free(in->in_apr);
1658 			KFREE(in);
1659 		}
1660 		KFREE(nat);
1661 	}
1662 	return error;
1663 }
1664 
1665 
1666 /* ------------------------------------------------------------------------ */
1667 /* Function:    nat_delete                                                  */
1668 /* Returns:     Nil                                                         */
1669 /* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1670 /*              logtype(I) - type of LOG record to create before deleting   */
1671 /* Write Lock:  ipf_nat                                                     */
1672 /*                                                                          */
1673 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1674 /* enabled then generate a NAT log record for this event.                   */
1675 /* ------------------------------------------------------------------------ */
nat_delete(nat,logtype)1676 void nat_delete(nat, logtype)
1677 struct nat *nat;
1678 int logtype;
1679 {
1680 	struct ipnat *ipn;
1681 	int removed = 0;
1682 
1683 	if (logtype != 0 && nat_logging != 0)
1684 		nat_log(nat, logtype);
1685 #if defined(NEED_LOCAL_RAND) && defined(_KERNEL)
1686 	ipf_rand_push(nat, sizeof(*nat));
1687 #endif
1688 
1689 	/*
1690 	 * Take it as a general indication that all the pointers are set if
1691 	 * nat_pnext is set.
1692 	 */
1693 	if (nat->nat_pnext != NULL) {
1694 		removed = 1;
1695 
1696 		nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1697 		nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1698 
1699 		*nat->nat_pnext = nat->nat_next;
1700 		if (nat->nat_next != NULL) {
1701 			nat->nat_next->nat_pnext = nat->nat_pnext;
1702 			nat->nat_next = NULL;
1703 		}
1704 		nat->nat_pnext = NULL;
1705 
1706 		*nat->nat_phnext[0] = nat->nat_hnext[0];
1707 		if (nat->nat_hnext[0] != NULL) {
1708 			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1709 			nat->nat_hnext[0] = NULL;
1710 		}
1711 		nat->nat_phnext[0] = NULL;
1712 
1713 		*nat->nat_phnext[1] = nat->nat_hnext[1];
1714 		if (nat->nat_hnext[1] != NULL) {
1715 			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1716 			nat->nat_hnext[1] = NULL;
1717 		}
1718 		nat->nat_phnext[1] = NULL;
1719 
1720 		if ((nat->nat_flags & SI_WILDP) != 0)
1721 			nat_stats.ns_wilds--;
1722 	}
1723 
1724 	if (nat->nat_me != NULL) {
1725 		*nat->nat_me = NULL;
1726 		nat->nat_me = NULL;
1727 	}
1728 
1729 	if (nat->nat_tqe.tqe_ifq != NULL)
1730 		fr_deletequeueentry(&nat->nat_tqe);
1731 
1732 	if (logtype == NL_EXPIRE)
1733 		nat_stats.ns_expire++;
1734 
1735 	MUTEX_ENTER(&nat->nat_lock);
1736 	/*
1737 	 * NL_DESTROY should only be passed in when we've got nat_ref >= 2.
1738 	 * This happens when a nat'd packet is blocked and we want to throw
1739 	 * away the NAT session.
1740 	 */
1741 	if (logtype == NL_DESTROY) {
1742 		if (nat->nat_ref > 2) {
1743 			nat->nat_ref -= 2;
1744 			MUTEX_EXIT(&nat->nat_lock);
1745 			if (removed)
1746 				nat_stats.ns_orphans++;
1747 			return;
1748 		}
1749 	} else if (nat->nat_ref > 1) {
1750 		nat->nat_ref--;
1751 		MUTEX_EXIT(&nat->nat_lock);
1752 		if (removed)
1753 			nat_stats.ns_orphans++;
1754 		return;
1755 	}
1756 	MUTEX_EXIT(&nat->nat_lock);
1757 
1758 	/*
1759 	 * At this point, nat_ref is 1, doing "--" would make it 0..
1760 	 */
1761 	nat->nat_ref = 0;
1762 	if (!removed)
1763 		nat_stats.ns_orphans--;
1764 
1765 #ifdef	IPFILTER_SYNC
1766 	if (nat->nat_sync)
1767 		ipfsync_del(nat->nat_sync);
1768 #endif
1769 
1770 	if (nat->nat_fr != NULL)
1771 		(void) fr_derefrule(&nat->nat_fr);
1772 
1773 	if (nat->nat_hm != NULL)
1774 		fr_hostmapdel(&nat->nat_hm);
1775 
1776 	/*
1777 	 * If there is an active reference from the nat entry to its parent
1778 	 * rule, decrement the rule's reference count and free it too if no
1779 	 * longer being used.
1780 	 */
1781 	ipn = nat->nat_ptr;
1782 	if (ipn != NULL) {
1783 		fr_ipnatderef(&ipn);
1784 	}
1785 
1786 	MUTEX_DESTROY(&nat->nat_lock);
1787 
1788 	aps_free(nat->nat_aps);
1789 	nat_stats.ns_inuse--;
1790 
1791 	/*
1792 	 * If there's a fragment table entry too for this nat entry, then
1793 	 * dereference that as well.  This is after nat_lock is released
1794 	 * because of Tru64.
1795 	 */
1796 	fr_forgetnat((void *)nat);
1797 
1798 	KFREE(nat);
1799 }
1800 
1801 
1802 /* ------------------------------------------------------------------------ */
1803 /* Function:    nat_flushtable                                              */
1804 /* Returns:     int - number of NAT rules deleted                           */
1805 /* Parameters:  Nil                                                         */
1806 /*                                                                          */
1807 /* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1808 /* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1809 /* ------------------------------------------------------------------------ */
1810 /*
1811  * nat_flushtable - clear the NAT table of all mapping entries.
1812  */
nat_flushtable()1813 static int nat_flushtable()
1814 {
1815 	nat_t *nat;
1816 	int j = 0;
1817 
1818 	/*
1819 	 * ALL NAT mappings deleted, so lets just make the deletions
1820 	 * quicker.
1821 	 */
1822 	if (nat_table[0] != NULL)
1823 		bzero((char *)nat_table[0],
1824 		      sizeof(nat_table[0]) * ipf_nattable_sz);
1825 	if (nat_table[1] != NULL)
1826 		bzero((char *)nat_table[1],
1827 		      sizeof(nat_table[1]) * ipf_nattable_sz);
1828 
1829 	while ((nat = nat_instances) != NULL) {
1830 		nat_delete(nat, NL_FLUSH);
1831 		j++;
1832 	}
1833 
1834 	nat_stats.ns_inuse = 0;
1835 	return j;
1836 }
1837 
1838 
1839 /* ------------------------------------------------------------------------ */
1840 /* Function:    nat_clearlist                                               */
1841 /* Returns:     int - number of NAT/RDR rules deleted                       */
1842 /* Parameters:  Nil                                                         */
1843 /*                                                                          */
1844 /* Delete all rules in the current list of rules.  There is nothing elegant */
1845 /* about this cleanup: simply free all entries on the list of rules and     */
1846 /* clear out the tables used for hashed NAT rule lookups.                   */
1847 /* ------------------------------------------------------------------------ */
nat_clearlist()1848 static int nat_clearlist()
1849 {
1850 	ipnat_t *n, **np = &nat_list;
1851 	int i = 0;
1852 
1853 	if (nat_rules != NULL)
1854 		bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1855 	if (rdr_rules != NULL)
1856 		bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1857 
1858 	while ((n = *np) != NULL) {
1859 		*np = n->in_next;
1860 		if (n->in_use == 0) {
1861 			if (n->in_apr != NULL)
1862 				appr_free(n->in_apr);
1863 			MUTEX_DESTROY(&n->in_lock);
1864 			KFREE(n);
1865 			nat_stats.ns_rules--;
1866 		} else {
1867 			n->in_flags |= IPN_DELETE;
1868 			n->in_next = NULL;
1869 		}
1870 		i++;
1871 	}
1872 #if SOLARIS && !defined(_INET_IP_STACK_H)
1873 	pfil_delayed_copy = 1;
1874 #endif
1875 	nat_masks = 0;
1876 	rdr_masks = 0;
1877 	return i;
1878 }
1879 
1880 
1881 /* ------------------------------------------------------------------------ */
1882 /* Function:    nat_newmap                                                  */
1883 /* Returns:     int - -1 == error, 0 == success                             */
1884 /* Parameters:  fin(I) - pointer to packet information                      */
1885 /*              nat(I) - pointer to NAT entry                               */
1886 /*              ni(I)  - pointer to structure with misc. information needed */
1887 /*                       to create new NAT entry.                           */
1888 /*                                                                          */
1889 /* Given an empty NAT structure, populate it with new information about a   */
1890 /* new NAT session, as defined by the matching NAT rule.                    */
1891 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1892 /* to the new IP address for the translation.                               */
1893 /* ------------------------------------------------------------------------ */
nat_newmap(fin,nat,ni)1894 static INLINE int nat_newmap(fin, nat, ni)
1895 fr_info_t *fin;
1896 nat_t *nat;
1897 natinfo_t *ni;
1898 {
1899 	u_short st_port, dport, sport, port, sp, dp;
1900 	struct in_addr in, inb;
1901 	hostmap_t *hm;
1902 	u_32_t flags;
1903 	u_32_t st_ip;
1904 	ipnat_t *np;
1905 	nat_t *natl;
1906 	int l;
1907 
1908 	/*
1909 	 * If it's an outbound packet which doesn't match any existing
1910 	 * record, then create a new port
1911 	 */
1912 	l = 0;
1913 	hm = NULL;
1914 	np = ni->nai_np;
1915 	st_ip = np->in_nip;
1916 	st_port = np->in_pnext;
1917 	flags = ni->nai_flags;
1918 	sport = ni->nai_sport;
1919 	dport = ni->nai_dport;
1920 
1921 	/*
1922 	 * Do a loop until we either run out of entries to try or we find
1923 	 * a NAT mapping that isn't currently being used.  This is done
1924 	 * because the change to the source is not (usually) being fixed.
1925 	 */
1926 	do {
1927 		port = 0;
1928 		in.s_addr = htonl(np->in_nip);
1929 		if (l == 0) {
1930 			/*
1931 			 * Check to see if there is an existing NAT
1932 			 * setup for this IP address pair.
1933 			 */
1934 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1935 					 in, 0);
1936 			if (hm != NULL)
1937 				in.s_addr = hm->hm_mapip.s_addr;
1938 		} else if ((l == 1) && (hm != NULL)) {
1939 			fr_hostmapdel(&hm);
1940 		}
1941 		in.s_addr = ntohl(in.s_addr);
1942 
1943 		nat->nat_hm = hm;
1944 
1945 		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1946 			if (l > 0)
1947 				return -1;
1948 		}
1949 
1950 		if (np->in_redir == NAT_BIMAP &&
1951 		    np->in_inmsk == np->in_outmsk) {
1952 			/*
1953 			 * map the address block in a 1:1 fashion
1954 			 */
1955 			in.s_addr = np->in_outip;
1956 			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1957 			in.s_addr = ntohl(in.s_addr);
1958 
1959 		} else if (np->in_redir & NAT_MAPBLK) {
1960 			if ((l >= np->in_ppip) || ((l > 0) &&
1961 			     !(flags & IPN_TCPUDP)))
1962 				return -1;
1963 			/*
1964 			 * map-block - Calculate destination address.
1965 			 */
1966 			in.s_addr = ntohl(fin->fin_saddr);
1967 			in.s_addr &= ntohl(~np->in_inmsk);
1968 			inb.s_addr = in.s_addr;
1969 			in.s_addr /= np->in_ippip;
1970 			in.s_addr &= ntohl(~np->in_outmsk);
1971 			in.s_addr += ntohl(np->in_outip);
1972 			/*
1973 			 * Calculate destination port.
1974 			 */
1975 			if ((flags & IPN_TCPUDP) &&
1976 			    (np->in_ppip != 0)) {
1977 				port = ntohs(sport) + l;
1978 				port %= np->in_ppip;
1979 				port += np->in_ppip *
1980 					(inb.s_addr % np->in_ippip);
1981 				port += MAPBLK_MINPORT;
1982 				port = htons(port);
1983 			}
1984 
1985 		} else if ((np->in_outip == 0) &&
1986 			   (np->in_outmsk == 0xffffffff)) {
1987 			/*
1988 			 * 0/32 - use the interface's IP address.
1989 			 */
1990 			if ((l > 0) ||
1991 			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1992 				       &in, NULL) == -1)
1993 				return -1;
1994 			in.s_addr = ntohl(in.s_addr);
1995 
1996 		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1997 			/*
1998 			 * 0/0 - use the original source address/port.
1999 			 */
2000 			if (l > 0)
2001 				return -1;
2002 			in.s_addr = ntohl(fin->fin_saddr);
2003 
2004 		} else if ((np->in_outmsk != 0xffffffff) &&
2005 			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2006 			np->in_nip++;
2007 
2008 		natl = NULL;
2009 
2010 		if ((flags & IPN_TCPUDP) &&
2011 		    ((np->in_redir & NAT_MAPBLK) == 0) &&
2012 		    (np->in_flags & IPN_AUTOPORTMAP)) {
2013 			/*
2014 			 * "ports auto" (without map-block)
2015 			 */
2016 			if ((l > 0) && (l % np->in_ppip == 0)) {
2017 				if (l > np->in_space) {
2018 					return -1;
2019 				} else if ((l > np->in_ppip) &&
2020 					   np->in_outmsk != 0xffffffff)
2021 					np->in_nip++;
2022 			}
2023 			if (np->in_ppip != 0) {
2024 				port = ntohs(sport);
2025 				port += (l % np->in_ppip);
2026 				port %= np->in_ppip;
2027 				port += np->in_ppip *
2028 					(ntohl(fin->fin_saddr) %
2029 					 np->in_ippip);
2030 				port += MAPBLK_MINPORT;
2031 				port = htons(port);
2032 			}
2033 
2034 		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2035 			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2036 			/*
2037 			 * Standard port translation.  Select next port.
2038 			 */
2039 			if (np->in_flags & IPN_SEQUENTIAL) {
2040 				port = np->in_pnext;
2041 			} else {
2042 				port = ipf_random() % (ntohs(np->in_pmax) -
2043 						       ntohs(np->in_pmin));
2044 				port += ntohs(np->in_pmin);
2045 			}
2046 			port = htons(port);
2047 			np->in_pnext++;
2048 
2049 			if (np->in_pnext > ntohs(np->in_pmax)) {
2050 				np->in_pnext = ntohs(np->in_pmin);
2051 				if (np->in_outmsk != 0xffffffff)
2052 					np->in_nip++;
2053 			}
2054 		}
2055 
2056 		if (np->in_flags & IPN_IPRANGE) {
2057 			if (np->in_nip > ntohl(np->in_outmsk))
2058 				np->in_nip = ntohl(np->in_outip);
2059 		} else {
2060 			if ((np->in_outmsk != 0xffffffff) &&
2061 			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2062 			    ntohl(np->in_outip))
2063 				np->in_nip = ntohl(np->in_outip) + 1;
2064 		}
2065 
2066 		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2067 			port = sport;
2068 
2069 		/*
2070 		 * Here we do a lookup of the connection as seen from
2071 		 * the outside.  If an IP# pair already exists, try
2072 		 * again.  So if you have A->B becomes C->B, you can
2073 		 * also have D->E become C->E but not D->B causing
2074 		 * another C->B.  Also take protocol and ports into
2075 		 * account when determining whether a pre-existing
2076 		 * NAT setup will cause an external conflict where
2077 		 * this is appropriate.
2078 		 */
2079 		inb.s_addr = htonl(in.s_addr);
2080 		sp = fin->fin_data[0];
2081 		dp = fin->fin_data[1];
2082 		fin->fin_data[0] = fin->fin_data[1];
2083 		fin->fin_data[1] = htons(port);
2084 		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2085 				    (u_int)fin->fin_p, fin->fin_dst, inb);
2086 		fin->fin_data[0] = sp;
2087 		fin->fin_data[1] = dp;
2088 
2089 		/*
2090 		 * Has the search wrapped around and come back to the
2091 		 * start ?
2092 		 */
2093 		if ((natl != NULL) &&
2094 		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2095 		    (np->in_nip != 0) && (st_ip == np->in_nip))
2096 			return -1;
2097 		l++;
2098 	} while (natl != NULL);
2099 
2100 	if (np->in_space > 0)
2101 		np->in_space--;
2102 
2103 	/* Setup the NAT table */
2104 	nat->nat_inip = fin->fin_src;
2105 	nat->nat_outip.s_addr = htonl(in.s_addr);
2106 	nat->nat_oip = fin->fin_dst;
2107 	if (nat->nat_hm == NULL)
2108 		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2109 					  nat->nat_outip, 0);
2110 
2111 	/*
2112 	 * The ICMP checksum does not have a pseudo header containing
2113 	 * the IP addresses
2114 	 */
2115 	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2116 	ni->nai_sum2 = LONG_SUM(in.s_addr);
2117 	if ((flags & IPN_TCPUDP)) {
2118 		ni->nai_sum1 += ntohs(sport);
2119 		ni->nai_sum2 += ntohs(port);
2120 	}
2121 
2122 	if (flags & IPN_TCPUDP) {
2123 		nat->nat_inport = sport;
2124 		nat->nat_outport = port;	/* sport */
2125 		nat->nat_oport = dport;
2126 		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2127 	} else if (flags & IPN_ICMPQUERY) {
2128 		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2129 		nat->nat_inport = port;
2130 		nat->nat_outport = port;
2131 	} else if (fin->fin_p == IPPROTO_GRE) {
2132 #if 0
2133 		nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2134 		if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2135 			nat->nat_oport = 0;/*fin->fin_data[1];*/
2136 			nat->nat_inport = 0;/*fin->fin_data[0];*/
2137 			nat->nat_outport = 0;/*fin->fin_data[0];*/
2138 			nat->nat_call[0] = fin->fin_data[0];
2139 			nat->nat_call[1] = fin->fin_data[0];
2140 		}
2141 #endif
2142 	}
2143 	ni->nai_ip.s_addr = in.s_addr;
2144 	ni->nai_port = port;
2145 	ni->nai_nport = dport;
2146 	return 0;
2147 }
2148 
2149 
2150 /* ------------------------------------------------------------------------ */
2151 /* Function:    nat_newrdr                                                  */
2152 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2153 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
2154 /* Parameters:  fin(I) - pointer to packet information                      */
2155 /*              nat(I) - pointer to NAT entry                               */
2156 /*              ni(I)  - pointer to structure with misc. information needed */
2157 /*                       to create new NAT entry.                           */
2158 /*                                                                          */
2159 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2160 /* to the new IP address for the translation.                               */
2161 /* ------------------------------------------------------------------------ */
nat_newrdr(fin,nat,ni)2162 static INLINE int nat_newrdr(fin, nat, ni)
2163 fr_info_t *fin;
2164 nat_t *nat;
2165 natinfo_t *ni;
2166 {
2167 	u_short nport, dport, sport;
2168 	struct in_addr in, inb;
2169 	u_short sp, dp;
2170 	hostmap_t *hm;
2171 	u_32_t flags;
2172 	ipnat_t *np;
2173 	nat_t *natl;
2174 	int move;
2175 
2176 	move = 1;
2177 	hm = NULL;
2178 	in.s_addr = 0;
2179 	np = ni->nai_np;
2180 	flags = ni->nai_flags;
2181 	sport = ni->nai_sport;
2182 	dport = ni->nai_dport;
2183 
2184 	/*
2185 	 * If the matching rule has IPN_STICKY set, then we want to have the
2186 	 * same rule kick in as before.  Why would this happen?  If you have
2187 	 * a collection of rdr rules with "round-robin sticky", the current
2188 	 * packet might match a different one to the previous connection but
2189 	 * we want the same destination to be used.
2190 	 */
2191 	if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) &&
2192 	    ((np->in_flags & IPN_STICKY) != 0)) {
2193 		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2194 				 (u_32_t)dport);
2195 		if (hm != NULL) {
2196 			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2197 			np = hm->hm_ipnat;
2198 			ni->nai_np = np;
2199 			move = 0;
2200 		}
2201 	}
2202 
2203 	/*
2204 	 * Otherwise, it's an inbound packet. Most likely, we don't
2205 	 * want to rewrite source ports and source addresses. Instead,
2206 	 * we want to rewrite to a fixed internal address and fixed
2207 	 * internal port.
2208 	 */
2209 	if (np->in_flags & IPN_SPLIT) {
2210 		in.s_addr = np->in_nip;
2211 
2212 		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2213 			hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst,
2214 					 in, (u_32_t)dport);
2215 			if (hm != NULL) {
2216 				in.s_addr = hm->hm_mapip.s_addr;
2217 				move = 0;
2218 			}
2219 		}
2220 
2221 		if (hm == NULL || hm->hm_ref == 1) {
2222 			if (np->in_inip == htonl(in.s_addr)) {
2223 				np->in_nip = ntohl(np->in_inmsk);
2224 				move = 0;
2225 			} else {
2226 				np->in_nip = ntohl(np->in_inip);
2227 			}
2228 		}
2229 
2230 	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2231 		/*
2232 		 * 0/32 - use the interface's IP address.
2233 		 */
2234 		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1)
2235 			return -1;
2236 		in.s_addr = ntohl(in.s_addr);
2237 
2238 	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2239 		/*
2240 		 * 0/0 - use the original destination address/port.
2241 		 */
2242 		in.s_addr = ntohl(fin->fin_daddr);
2243 
2244 	} else if (np->in_redir == NAT_BIMAP &&
2245 		   np->in_inmsk == np->in_outmsk) {
2246 		/*
2247 		 * map the address block in a 1:1 fashion
2248 		 */
2249 		in.s_addr = np->in_inip;
2250 		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2251 		in.s_addr = ntohl(in.s_addr);
2252 	} else {
2253 		in.s_addr = ntohl(np->in_inip);
2254 	}
2255 
2256 	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2257 		nport = dport;
2258 	else {
2259 		/*
2260 		 * Whilst not optimized for the case where
2261 		 * pmin == pmax, the gain is not significant.
2262 		 */
2263 		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2264 		    (np->in_pmin != np->in_pmax)) {
2265 			nport = ntohs(dport) - ntohs(np->in_pmin) +
2266 				ntohs(np->in_pnext);
2267 			nport = htons(nport);
2268 		} else
2269 			nport = np->in_pnext;
2270 	}
2271 
2272 	/*
2273 	 * When the redirect-to address is set to 0.0.0.0, just
2274 	 * assume a blank `forwarding' of the packet.  We don't
2275 	 * setup any translation for this either.
2276 	 */
2277 	if (in.s_addr == 0) {
2278 		if (nport == dport)
2279 			return -1;
2280 		in.s_addr = ntohl(fin->fin_daddr);
2281 	}
2282 
2283 	/*
2284 	 * Check to see if this redirect mapping already exists and if
2285 	 * it does, return "failure" (allowing it to be created will just
2286 	 * cause one or both of these "connections" to stop working.)
2287 	 */
2288 	inb.s_addr = htonl(in.s_addr);
2289 	sp = fin->fin_data[0];
2290 	dp = fin->fin_data[1];
2291 	fin->fin_data[1] = fin->fin_data[0];
2292 	fin->fin_data[0] = ntohs(nport);
2293 	natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2294 			     (u_int)fin->fin_p, inb, fin->fin_src);
2295 	fin->fin_data[0] = sp;
2296 	fin->fin_data[1] = dp;
2297 	if (natl != NULL)
2298 		return -1;
2299 
2300 	nat->nat_inip.s_addr = htonl(in.s_addr);
2301 	nat->nat_outip = fin->fin_dst;
2302 	nat->nat_oip = fin->fin_src;
2303 	if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0))
2304 		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, in,
2305 					  (u_32_t)dport);
2306 
2307 	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2308 	ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2309 
2310 	ni->nai_ip.s_addr = in.s_addr;
2311 	ni->nai_nport = nport;
2312 	ni->nai_port = sport;
2313 
2314 	if (flags & IPN_TCPUDP) {
2315 		nat->nat_inport = nport;
2316 		nat->nat_outport = dport;
2317 		nat->nat_oport = sport;
2318 		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2319 	} else if (flags & IPN_ICMPQUERY) {
2320 		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2321 		nat->nat_inport = nport;
2322 		nat->nat_outport = nport;
2323 	} else if (fin->fin_p == IPPROTO_GRE) {
2324 #if 0
2325 		nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags;
2326 		if (GRE_REV(nat->nat_gre.gs_flags) == 1) {
2327 			nat->nat_call[0] = fin->fin_data[0];
2328 			nat->nat_call[1] = fin->fin_data[1];
2329 			nat->nat_oport = 0; /*fin->fin_data[0];*/
2330 			nat->nat_inport = 0; /*fin->fin_data[1];*/
2331 			nat->nat_outport = 0; /*fin->fin_data[1];*/
2332 		}
2333 #endif
2334 	}
2335 
2336 	return move;
2337 }
2338 
2339 /* ------------------------------------------------------------------------ */
2340 /* Function:    nat_new                                                     */
2341 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2342 /*                       else pointer to new NAT structure                  */
2343 /* Parameters:  fin(I)       - pointer to packet information                */
2344 /*              np(I)        - pointer to NAT rule                          */
2345 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2346 /*              flags(I)     - flags describing the current packet          */
2347 /*              direction(I) - direction of packet (in/out)                 */
2348 /* Write Lock:  ipf_nat                                                     */
2349 /*                                                                          */
2350 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2351 /* in any way.                                                              */
2352 /*                                                                          */
2353 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2354 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2355 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2356 /* and (3) building that structure and putting it into the NAT table(s).    */
2357 /*                                                                          */
2358 /* NOTE: natsave should NOT be used top point back to an ipstate_t struct   */
2359 /*       as it can result in memory being corrupted.                        */
2360 /* ------------------------------------------------------------------------ */
nat_new(fin,np,natsave,flags,direction)2361 nat_t *nat_new(fin, np, natsave, flags, direction)
2362 fr_info_t *fin;
2363 ipnat_t *np;
2364 nat_t **natsave;
2365 u_int flags;
2366 int direction;
2367 {
2368 	u_short port = 0, sport = 0, dport = 0, nport = 0;
2369 	tcphdr_t *tcp = NULL;
2370 	hostmap_t *hm = NULL;
2371 	struct in_addr in;
2372 	nat_t *nat, *natl;
2373 	u_int nflags;
2374 	natinfo_t ni;
2375 	u_32_t sumd;
2376 	int move;
2377 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2378 	qpktinfo_t *qpi = fin->fin_qpi;
2379 #endif
2380 
2381 	if (nat_stats.ns_inuse >= ipf_nattable_max) {
2382 		nat_stats.ns_memfail++;
2383 		fr_nat_doflush = 1;
2384 		return NULL;
2385 	}
2386 
2387 	move = 1;
2388 	nflags = np->in_flags & flags;
2389 	nflags &= NAT_FROMRULE;
2390 
2391 	ni.nai_np = np;
2392 	ni.nai_nflags = nflags;
2393 	ni.nai_flags = flags;
2394 	ni.nai_dport = 0;
2395 	ni.nai_sport = 0;
2396 
2397 	/* Give me a new nat */
2398 	KMALLOC(nat, nat_t *);
2399 	if (nat == NULL) {
2400 		nat_stats.ns_memfail++;
2401 		/*
2402 		 * Try to automatically tune the max # of entries in the
2403 		 * table allowed to be less than what will cause kmem_alloc()
2404 		 * to fail and try to eliminate panics due to out of memory
2405 		 * conditions arising.
2406 		 */
2407 		if (ipf_nattable_max > ipf_nattable_sz) {
2408 			ipf_nattable_max = nat_stats.ns_inuse - 100;
2409 			printf("ipf_nattable_max reduced to %d\n",
2410 				ipf_nattable_max);
2411 		}
2412 		return NULL;
2413 	}
2414 
2415 	if (flags & IPN_TCPUDP) {
2416 		tcp = fin->fin_dp;
2417 		ni.nai_sport = htons(fin->fin_sport);
2418 		ni.nai_dport = htons(fin->fin_dport);
2419 	} else if (flags & IPN_ICMPQUERY) {
2420 		/*
2421 		 * In the ICMP query NAT code, we translate the ICMP id fields
2422 		 * to make them unique. This is indepedent of the ICMP type
2423 		 * (e.g. in the unlikely event that a host sends an echo and
2424 		 * an tstamp request with the same id, both packets will have
2425 		 * their ip address/id field changed in the same way).
2426 		 */
2427 		/* The icmp_id field is used by the sender to identify the
2428 		 * process making the icmp request. (the receiver justs
2429 		 * copies it back in its response). So, it closely matches
2430 		 * the concept of source port. We overlay sport, so we can
2431 		 * maximally reuse the existing code.
2432 		 */
2433 		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2434 		ni.nai_dport = ni.nai_sport;
2435 	}
2436 
2437 	bzero((char *)nat, sizeof(*nat));
2438 	nat->nat_flags = flags;
2439 	nat->nat_redir = np->in_redir;
2440 
2441 	if ((flags & NAT_SLAVE) == 0) {
2442 		MUTEX_ENTER(&ipf_nat_new);
2443 	}
2444 
2445 	/*
2446 	 * Search the current table for a match.
2447 	 */
2448 	if (direction == NAT_OUTBOUND) {
2449 		/*
2450 		 * We can now arrange to call this for the same connection
2451 		 * because ipf_nat_new doesn't protect the code path into
2452 		 * this function.
2453 		 */
2454 		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2455 				     fin->fin_src, fin->fin_dst);
2456 		if (natl != NULL) {
2457 			KFREE(nat);
2458 			nat = natl;
2459 			goto done;
2460 		}
2461 
2462 		move = nat_newmap(fin, nat, &ni);
2463 		if (move == -1)
2464 			goto badnat;
2465 
2466 		np = ni.nai_np;
2467 		in = ni.nai_ip;
2468 	} else {
2469 		/*
2470 		 * NAT_INBOUND is used only for redirects rules
2471 		 */
2472 		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2473 				    fin->fin_src, fin->fin_dst);
2474 		if (natl != NULL) {
2475 			KFREE(nat);
2476 			nat = natl;
2477 			goto done;
2478 		}
2479 
2480 		move = nat_newrdr(fin, nat, &ni);
2481 		if (move == -1)
2482 			goto badnat;
2483 
2484 		np = ni.nai_np;
2485 		in = ni.nai_ip;
2486 	}
2487 	port = ni.nai_port;
2488 	nport = ni.nai_nport;
2489 
2490 	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2491 		if (np->in_redir == NAT_REDIRECT) {
2492 			nat_delrdr(np);
2493 			nat_addrdr(np);
2494 		} else if (np->in_redir == NAT_MAP) {
2495 			nat_delnat(np);
2496 			nat_addnat(np);
2497 		}
2498 	}
2499 
2500 	if (flags & IPN_TCPUDP) {
2501 		sport = ni.nai_sport;
2502 		dport = ni.nai_dport;
2503 	} else if (flags & IPN_ICMPQUERY) {
2504 		sport = ni.nai_sport;
2505 		dport = 0;
2506 	}
2507 
2508 	CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2509 	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2510 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC)
2511 	if ((flags & IPN_TCP) && dohwcksum &&
2512 	    (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
2513 		if (direction == NAT_OUTBOUND)
2514 			ni.nai_sum1 = LONG_SUM(in.s_addr);
2515 		else
2516 			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2517 		ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr));
2518 		ni.nai_sum1 += 30;
2519 		ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16);
2520 		nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff);
2521 	} else
2522 #endif
2523 		nat->nat_sumd[1] = nat->nat_sumd[0];
2524 
2525 	if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2526 		if (direction == NAT_OUTBOUND)
2527 			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2528 		else
2529 			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2530 
2531 		ni.nai_sum2 = LONG_SUM(in.s_addr);
2532 
2533 		CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2534 		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2535 	} else {
2536 		nat->nat_ipsumd = nat->nat_sumd[0];
2537 		if (!(flags & IPN_TCPUDPICMP)) {
2538 			nat->nat_sumd[0] = 0;
2539 			nat->nat_sumd[1] = 0;
2540 		}
2541 	}
2542 
2543 	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2544 		fr_nat_doflush = 1;
2545 		goto badnat;
2546 	}
2547 	if (flags & SI_WILDP)
2548 		nat_stats.ns_wilds++;
2549 	fin->fin_flx |= FI_NEWNAT;
2550 	goto done;
2551 badnat:
2552 	nat_stats.ns_badnat++;
2553 	if ((hm = nat->nat_hm) != NULL)
2554 		fr_hostmapdel(&hm);
2555 	KFREE(nat);
2556 	nat = NULL;
2557 done:
2558 	if ((flags & NAT_SLAVE) == 0) {
2559 		MUTEX_EXIT(&ipf_nat_new);
2560 	}
2561 	return nat;
2562 }
2563 
2564 
2565 /* ------------------------------------------------------------------------ */
2566 /* Function:    nat_finalise                                                */
2567 /* Returns:     int - 0 == sucess, -1 == failure                            */
2568 /* Parameters:  fin(I) - pointer to packet information                      */
2569 /*              nat(I) - pointer to NAT entry                               */
2570 /*              ni(I)  - pointer to structure with misc. information needed */
2571 /*                       to create new NAT entry.                           */
2572 /* Write Lock:  ipf_nat                                                     */
2573 /*                                                                          */
2574 /* This is the tail end of constructing a new NAT entry and is the same     */
2575 /* for both IPv4 and IPv6.                                                  */
2576 /* ------------------------------------------------------------------------ */
2577 /*ARGSUSED*/
nat_finalise(fin,nat,ni,tcp,natsave,direction)2578 static int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2579 fr_info_t *fin;
2580 nat_t *nat;
2581 natinfo_t *ni;
2582 tcphdr_t *tcp;
2583 nat_t **natsave;
2584 int direction;
2585 {
2586 	frentry_t *fr;
2587 	ipnat_t *np;
2588 
2589 	np = ni->nai_np;
2590 
2591 	if (np->in_ifps[0] != NULL) {
2592 		COPYIFNAME(4, np->in_ifps[0], nat->nat_ifnames[0]);
2593 	}
2594 	if (np->in_ifps[1] != NULL) {
2595 		COPYIFNAME(4, np->in_ifps[1], nat->nat_ifnames[1]);
2596 	}
2597 #ifdef	IPFILTER_SYNC
2598 	if ((nat->nat_flags & SI_CLONE) == 0)
2599 		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2600 #endif
2601 
2602 	nat->nat_me = natsave;
2603 	nat->nat_dir = direction;
2604 	nat->nat_ifps[0] = np->in_ifps[0];
2605 	nat->nat_ifps[1] = np->in_ifps[1];
2606 	nat->nat_ptr = np;
2607 	nat->nat_p = fin->fin_p;
2608 	nat->nat_mssclamp = np->in_mssclamp;
2609 	if (nat->nat_p == IPPROTO_TCP)
2610 		nat->nat_seqnext[0] = ntohl(tcp->th_seq);
2611 
2612 	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2613 		if (appr_new(fin, nat) == -1)
2614 			return -1;
2615 
2616 	if (nat_insert(nat, fin->fin_rev) == 0) {
2617 		if (nat_logging)
2618 			nat_log(nat, (u_int)np->in_redir);
2619 		np->in_use++;
2620 		fr = fin->fin_fr;
2621 		nat->nat_fr = fr;
2622 		if (fr != NULL) {
2623 			MUTEX_ENTER(&fr->fr_lock);
2624 			fr->fr_ref++;
2625 			MUTEX_EXIT(&fr->fr_lock);
2626 		}
2627 		return 0;
2628 	}
2629 
2630 	/*
2631 	 * nat_insert failed, so cleanup time...
2632 	 */
2633 	return -1;
2634 }
2635 
2636 
2637 /* ------------------------------------------------------------------------ */
2638 /* Function:   nat_insert                                                   */
2639 /* Returns:    int - 0 == sucess, -1 == failure                             */
2640 /* Parameters: nat(I) - pointer to NAT structure                            */
2641 /*             rev(I) - flag indicating forward/reverse direction of packet */
2642 /* Write Lock: ipf_nat                                                      */
2643 /*                                                                          */
2644 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2645 /* list of active NAT entries.  Adjust global counters when complete.       */
2646 /* ------------------------------------------------------------------------ */
nat_insert(nat,rev)2647 int	nat_insert(nat, rev)
2648 nat_t	*nat;
2649 int	rev;
2650 {
2651 	u_int hv1, hv2;
2652 	nat_t **natp;
2653 
2654 	/*
2655 	 * Try and return an error as early as possible, so calculate the hash
2656 	 * entry numbers first and then proceed.
2657 	 */
2658 	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2659 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2660 				  0xffffffff);
2661 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2662 				  ipf_nattable_sz);
2663 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2664 				  0xffffffff);
2665 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2666 				  ipf_nattable_sz);
2667 	} else {
2668 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2669 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz);
2670 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2671 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz);
2672 	}
2673 
2674 	if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket ||
2675 	    nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) {
2676 		return -1;
2677 	}
2678 
2679 	nat->nat_hv[0] = hv1;
2680 	nat->nat_hv[1] = hv2;
2681 
2682 	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2683 
2684 	nat->nat_rev = rev;
2685 	nat->nat_ref = 1;
2686 	nat->nat_bytes[0] = 0;
2687 	nat->nat_pkts[0] = 0;
2688 	nat->nat_bytes[1] = 0;
2689 	nat->nat_pkts[1] = 0;
2690 
2691 	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2692 	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4);
2693 
2694 	if (nat->nat_ifnames[1][0] != '\0') {
2695 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2696 		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4);
2697 	} else {
2698 		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2699 			       LIFNAMSIZ);
2700 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2701 		nat->nat_ifps[1] = nat->nat_ifps[0];
2702 	}
2703 
2704 	nat->nat_next = nat_instances;
2705 	nat->nat_pnext = &nat_instances;
2706 	if (nat_instances)
2707 		nat_instances->nat_pnext = &nat->nat_next;
2708 	nat_instances = nat;
2709 
2710 	natp = &nat_table[0][hv1];
2711 	if (*natp)
2712 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2713 	nat->nat_phnext[0] = natp;
2714 	nat->nat_hnext[0] = *natp;
2715 	*natp = nat;
2716 	nat_stats.ns_bucketlen[0][hv1]++;
2717 
2718 	natp = &nat_table[1][hv2];
2719 	if (*natp)
2720 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2721 	nat->nat_phnext[1] = natp;
2722 	nat->nat_hnext[1] = *natp;
2723 	*natp = nat;
2724 	nat_stats.ns_bucketlen[1][hv2]++;
2725 
2726 	fr_setnatqueue(nat, rev);
2727 
2728 	nat_stats.ns_added++;
2729 	nat_stats.ns_inuse++;
2730 	return 0;
2731 }
2732 
2733 
2734 /* ------------------------------------------------------------------------ */
2735 /* Function:    nat_icmperrorlookup                                         */
2736 /* Returns:     nat_t* - point to matching NAT structure                    */
2737 /* Parameters:  fin(I) - pointer to packet information                      */
2738 /*              dir(I) - direction of packet (in/out)                       */
2739 /*                                                                          */
2740 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2741 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2742 /* the required length.                                                     */
2743 /* ------------------------------------------------------------------------ */
nat_icmperrorlookup(fin,dir)2744 nat_t *nat_icmperrorlookup(fin, dir)
2745 fr_info_t *fin;
2746 int dir;
2747 {
2748 	int flags = 0, type, minlen;
2749 	icmphdr_t *icmp, *orgicmp;
2750 	tcphdr_t *tcp = NULL;
2751 	u_short data[2];
2752 	nat_t *nat;
2753 	ip_t *oip;
2754 	u_int p;
2755 
2756 	icmp = fin->fin_dp;
2757 	type = icmp->icmp_type;
2758 	/*
2759 	 * Does it at least have the return (basic) IP header ?
2760 	 * Only a basic IP header (no options) should be with an ICMP error
2761 	 * header.  Also, if it's not an error type, then return.
2762 	 */
2763 	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2764 		return NULL;
2765 
2766 	/*
2767 	 * Check packet size
2768 	 */
2769 	oip = (ip_t *)((char *)fin->fin_dp + 8);
2770 	minlen = IP_HL(oip) << 2;
2771 	if ((minlen < sizeof(ip_t)) ||
2772 	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2773 		return NULL;
2774 	/*
2775 	 * Is the buffer big enough for all of it ?  It's the size of the IP
2776 	 * header claimed in the encapsulated part which is of concern.  It
2777 	 * may be too big to be in this buffer but not so big that it's
2778 	 * outside the ICMP packet, leading to TCP deref's causing problems.
2779 	 * This is possible because we don't know how big oip_hl is when we
2780 	 * do the pullup early in fr_check() and thus can't gaurantee it is
2781 	 * all here now.
2782 	 */
2783 #ifdef  _KERNEL
2784 	{
2785 	mb_t *m;
2786 
2787 	m = fin->fin_m;
2788 # if defined(MENTAT)
2789 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2790 		return NULL;
2791 # else
2792 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2793 	    (char *)fin->fin_ip + M_LEN(m))
2794 		return NULL;
2795 # endif
2796 	}
2797 #endif
2798 
2799 	if (fin->fin_daddr != oip->ip_src.s_addr)
2800 		return NULL;
2801 
2802 	p = oip->ip_p;
2803 	if (p == IPPROTO_TCP)
2804 		flags = IPN_TCP;
2805 	else if (p == IPPROTO_UDP)
2806 		flags = IPN_UDP;
2807 	else if (p == IPPROTO_ICMP) {
2808 		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2809 
2810 		/* see if this is related to an ICMP query */
2811 		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2812 			data[0] = fin->fin_data[0];
2813 			data[1] = fin->fin_data[1];
2814 			fin->fin_data[0] = 0;
2815 			fin->fin_data[1] = orgicmp->icmp_id;
2816 
2817 			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2818 			/*
2819 			 * NOTE : dir refers to the direction of the original
2820 			 *        ip packet. By definition the icmp error
2821 			 *        message flows in the opposite direction.
2822 			 */
2823 			if (dir == NAT_INBOUND)
2824 				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2825 						   oip->ip_src);
2826 			else
2827 				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2828 						    oip->ip_src);
2829 			fin->fin_data[0] = data[0];
2830 			fin->fin_data[1] = data[1];
2831 			return nat;
2832 		}
2833 	}
2834 
2835 	if (flags & IPN_TCPUDP) {
2836 		minlen += 8;		/* + 64bits of data to get ports */
2837 		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2838 			return NULL;
2839 
2840 		data[0] = fin->fin_data[0];
2841 		data[1] = fin->fin_data[1];
2842 		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2843 		fin->fin_data[0] = ntohs(tcp->th_dport);
2844 		fin->fin_data[1] = ntohs(tcp->th_sport);
2845 
2846 		if (dir == NAT_INBOUND) {
2847 			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2848 					   oip->ip_src);
2849 		} else {
2850 			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2851 					    oip->ip_src);
2852 		}
2853 		fin->fin_data[0] = data[0];
2854 		fin->fin_data[1] = data[1];
2855 		return nat;
2856 	}
2857 	if (dir == NAT_INBOUND)
2858 		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2859 	else
2860 		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2861 }
2862 
2863 
2864 /* ------------------------------------------------------------------------ */
2865 /* Function:    nat_icmperror                                               */
2866 /* Returns:     nat_t* - point to matching NAT structure                    */
2867 /* Parameters:  fin(I)    - pointer to packet information                   */
2868 /*              nflags(I) - NAT flags for this packet                       */
2869 /*              dir(I)    - direction of packet (in/out)                    */
2870 /*                                                                          */
2871 /* Fix up an ICMP packet which is an error message for an existing NAT      */
2872 /* session.  This will correct both packet header data and checksums.       */
2873 /*                                                                          */
2874 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2875 /* a NAT'd ICMP packet gets correctly recognised.                           */
2876 /* ------------------------------------------------------------------------ */
nat_icmperror(fin,nflags,dir)2877 nat_t *nat_icmperror(fin, nflags, dir)
2878 fr_info_t *fin;
2879 u_int *nflags;
2880 int dir;
2881 {
2882 	u_32_t sum1, sum2, sumd, sumd2;
2883 	struct in_addr a1, a2;
2884 	int flags, dlen, odst;
2885 	icmphdr_t *icmp;
2886 	u_short *csump;
2887 	tcphdr_t *tcp;
2888 	nat_t *nat;
2889 	ip_t *oip;
2890 	void *dp;
2891 
2892 	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2893 		return NULL;
2894 	/*
2895 	 * nat_icmperrorlookup() will return NULL for `defective' packets.
2896 	 */
2897 	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2898 		return NULL;
2899 
2900 	tcp = NULL;
2901 	csump = NULL;
2902 	flags = 0;
2903 	sumd2 = 0;
2904 	*nflags = IPN_ICMPERR;
2905 	icmp = fin->fin_dp;
2906 	oip = (ip_t *)&icmp->icmp_ip;
2907 	dp = (((char *)oip) + (IP_HL(oip) << 2));
2908 	if (oip->ip_p == IPPROTO_TCP) {
2909 		tcp = (tcphdr_t *)dp;
2910 		csump = (u_short *)&tcp->th_sum;
2911 		flags = IPN_TCP;
2912 	} else if (oip->ip_p == IPPROTO_UDP) {
2913 		udphdr_t *udp;
2914 
2915 		udp = (udphdr_t *)dp;
2916 		tcp = (tcphdr_t *)dp;
2917 		csump = (u_short *)&udp->uh_sum;
2918 		flags = IPN_UDP;
2919 	} else if (oip->ip_p == IPPROTO_ICMP)
2920 		flags = IPN_ICMPQUERY;
2921 	dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip);
2922 
2923 	/*
2924 	 * Need to adjust ICMP header to include the real IP#'s and
2925 	 * port #'s.  Only apply a checksum change relative to the
2926 	 * IP address change as it will be modified again in fr_checknatout
2927 	 * for both address and port.  Two checksum changes are
2928 	 * necessary for the two header address changes.  Be careful
2929 	 * to only modify the checksum once for the port # and twice
2930 	 * for the IP#.
2931 	 */
2932 
2933 	/*
2934 	 * Step 1
2935 	 * Fix the IP addresses in the offending IP packet. You also need
2936 	 * to adjust the IP header checksum of that offending IP packet.
2937 	 *
2938 	 * Normally, you would expect that the ICMP checksum of the
2939 	 * ICMP error message needs to be adjusted as well for the
2940 	 * IP address change in oip.
2941 	 * However, this is a NOP, because the ICMP checksum is
2942 	 * calculated over the complete ICMP packet, which includes the
2943 	 * changed oip IP addresses and oip->ip_sum. However, these
2944 	 * two changes cancel each other out (if the delta for
2945 	 * the IP address is x, then the delta for ip_sum is minus x),
2946 	 * so no change in the icmp_cksum is necessary.
2947 	 *
2948 	 * Inbound ICMP
2949 	 * ------------
2950 	 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2951 	 * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b)
2952 	 * - OIP_SRC(c)=nat_outip, OIP_DST(b)=nat_oip
2953 	 *
2954 	 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2955 	 * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2956 	 * - OIP_SRC(b)=nat_outip, OIP_DST(a)=nat_oip
2957 	 *
2958 	 * Outbound ICMP
2959 	 * -------------
2960 	 * MAP rule, SRC=a,DST=b -> SRC=c,DST=b
2961 	 * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a)
2962 	 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2963 	 *
2964 	 * RDR rule, SRC=a,DST=b -> SRC=a,DST=c
2965 	 * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c)
2966 	 * - OIP_SRC(a)=nat_oip, OIP_DST(c)=nat_inip
2967 	 *
2968 	 */
2969 	odst = (oip->ip_dst.s_addr == nat->nat_oip.s_addr) ? 1 : 0;
2970 	if (odst == 1) {
2971 		a1.s_addr = ntohl(nat->nat_inip.s_addr);
2972 		a2.s_addr = ntohl(oip->ip_src.s_addr);
2973 		oip->ip_src.s_addr = htonl(a1.s_addr);
2974 	} else {
2975 		a1.s_addr = ntohl(nat->nat_outip.s_addr);
2976 		a2.s_addr = ntohl(oip->ip_dst.s_addr);
2977 		oip->ip_dst.s_addr = htonl(a1.s_addr);
2978 	}
2979 
2980 	sumd = a2.s_addr - a1.s_addr;
2981 	if (sumd != 0) {
2982 		if (a1.s_addr > a2.s_addr)
2983 			sumd--;
2984 		sumd = ~sumd;
2985 
2986 		fix_datacksum(&oip->ip_sum, sumd);
2987 	}
2988 
2989 	sumd2 = sumd;
2990 	sum1 = 0;
2991 	sum2 = 0;
2992 
2993 	/*
2994 	 * Fix UDP pseudo header checksum to compensate for the
2995 	 * IP address change.
2996 	 */
2997 	if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
2998 		/*
2999 		 * Step 2 :
3000 		 * For offending TCP/UDP IP packets, translate the ports as
3001 		 * well, based on the NAT specification. Of course such
3002 		 * a change may be reflected in the ICMP checksum as well.
3003 		 *
3004 		 * Since the port fields are part of the TCP/UDP checksum
3005 		 * of the offending IP packet, you need to adjust that checksum
3006 		 * as well... except that the change in the port numbers should
3007 		 * be offset by the checksum change.  However, the TCP/UDP
3008 		 * checksum will also need to change if there has been an
3009 		 * IP address change.
3010 		 */
3011 		if (odst == 1) {
3012 			sum1 = ntohs(nat->nat_inport);
3013 			sum2 = ntohs(tcp->th_sport);
3014 
3015 			tcp->th_sport = htons(sum1);
3016 		} else {
3017 			sum1 = ntohs(nat->nat_outport);
3018 			sum2 = ntohs(tcp->th_dport);
3019 
3020 			tcp->th_dport = htons(sum1);
3021 		}
3022 
3023 		sumd += sum1 - sum2;
3024 		if (sumd != 0 || sumd2 != 0) {
3025 			/*
3026 			 * At this point, sumd is the delta to apply to the
3027 			 * TCP/UDP header, given the changes in both the IP
3028 			 * address and the ports and sumd2 is the delta to
3029 			 * apply to the ICMP header, given the IP address
3030 			 * change delta that may need to be applied to the
3031 			 * TCP/UDP checksum instead.
3032 			 *
3033 			 * If we will both the IP and TCP/UDP checksums
3034 			 * then the ICMP checksum changes by the address
3035 			 * delta applied to the TCP/UDP checksum.  If we
3036 			 * do not change the TCP/UDP checksum them we
3037 			 * apply the delta in ports to the ICMP checksum.
3038 			 */
3039 			if (oip->ip_p == IPPROTO_UDP) {
3040 				if ((dlen >= 8) && (*csump != 0)) {
3041 					fix_datacksum(csump, sumd);
3042 				} else {
3043 					sumd2 = sum1 - sum2;
3044 					if (sum2 > sum1)
3045 						sumd2--;
3046 				}
3047 			} else if (oip->ip_p == IPPROTO_TCP) {
3048 				if (dlen >= 18) {
3049 					fix_datacksum(csump, sumd);
3050 				} else {
3051 					sumd2 = sum2 - sum1;
3052 					if (sum1 > sum2)
3053 						sumd2--;
3054 				}
3055 			}
3056 
3057 			if (sumd2 != 0) {
3058 				ipnat_t *np;
3059 
3060 				np = nat->nat_ptr;
3061 				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3062 				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3063 				sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3064 
3065 				if ((odst == 0) && (dir == NAT_OUTBOUND) &&
3066 				    (fin->fin_rev == 0) && (np != NULL) &&
3067 				    (np->in_redir & NAT_REDIRECT)) {
3068 					fix_outcksum(fin, &icmp->icmp_cksum,
3069 						     sumd2);
3070 				} else {
3071 					fix_incksum(fin, &icmp->icmp_cksum,
3072 						    sumd2);
3073 				}
3074 			}
3075 		}
3076 	} else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) {
3077 		icmphdr_t *orgicmp;
3078 
3079 		/*
3080 		 * XXX - what if this is bogus hl and we go off the end ?
3081 		 * In this case, nat_icmperrorlookup() will have returned NULL.
3082 		 */
3083 		orgicmp = (icmphdr_t *)dp;
3084 
3085 		if (odst == 1) {
3086 			if (orgicmp->icmp_id != nat->nat_inport) {
3087 
3088 				/*
3089 				 * Fix ICMP checksum (of the offening ICMP
3090 				 * query packet) to compensate the change
3091 				 * in the ICMP id of the offending ICMP
3092 				 * packet.
3093 				 *
3094 				 * Since you modify orgicmp->icmp_id with
3095 				 * a delta (say x) and you compensate that
3096 				 * in origicmp->icmp_cksum with a delta
3097 				 * minus x, you don't have to adjust the
3098 				 * overall icmp->icmp_cksum
3099 				 */
3100 				sum1 = ntohs(orgicmp->icmp_id);
3101 				sum2 = ntohs(nat->nat_inport);
3102 				CALC_SUMD(sum1, sum2, sumd);
3103 				orgicmp->icmp_id = nat->nat_inport;
3104 				fix_datacksum(&orgicmp->icmp_cksum, sumd);
3105 			}
3106 		} /* nat_dir == NAT_INBOUND is impossible for icmp queries */
3107 	}
3108 	return nat;
3109 }
3110 
3111 
3112 /*
3113  * NB: these lookups don't lock access to the list, it assumed that it has
3114  * already been done!
3115  */
3116 
3117 /* ------------------------------------------------------------------------ */
3118 /* Function:    nat_inlookup                                                */
3119 /* Returns:     nat_t* - NULL == no match,                                  */
3120 /*                       else pointer to matching NAT entry                 */
3121 /* Parameters:  fin(I)    - pointer to packet information                   */
3122 /*              flags(I)  - NAT flags for this packet                       */
3123 /*              p(I)      - protocol for this packet                        */
3124 /*              src(I)    - source IP address                               */
3125 /*              mapdst(I) - destination IP address                          */
3126 /*                                                                          */
3127 /* Lookup a nat entry based on the mapped destination ip address/port and   */
3128 /* real source address/port.  We use this lookup when receiving a packet,   */
3129 /* we're looking for a table entry, based on the destination address.       */
3130 /*                                                                          */
3131 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3132 /*                                                                          */
3133 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3134 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3135 /*                                                                          */
3136 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3137 /*            the packet is of said protocol                                */
3138 /* ------------------------------------------------------------------------ */
nat_inlookup(fin,flags,p,src,mapdst)3139 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3140 fr_info_t *fin;
3141 u_int flags, p;
3142 struct in_addr src , mapdst;
3143 {
3144 	u_short sport, dport;
3145 	grehdr_t *gre;
3146 	ipnat_t *ipn;
3147 	u_int sflags;
3148 	nat_t *nat;
3149 	int nflags;
3150 	u_32_t dst;
3151 	void *ifp;
3152 	u_int hv;
3153 
3154 	ifp = fin->fin_ifp;
3155 	sport = 0;
3156 	dport = 0;
3157 	gre = NULL;
3158 	dst = mapdst.s_addr;
3159 	sflags = flags & NAT_TCPUDPICMP;
3160 
3161 	switch (p)
3162 	{
3163 	case IPPROTO_TCP :
3164 	case IPPROTO_UDP :
3165 		sport = htons(fin->fin_data[0]);
3166 		dport = htons(fin->fin_data[1]);
3167 		break;
3168 	case IPPROTO_ICMP :
3169 		if (flags & IPN_ICMPERR)
3170 			sport = fin->fin_data[1];
3171 		else
3172 			dport = fin->fin_data[1];
3173 		break;
3174 	default :
3175 		break;
3176 	}
3177 
3178 
3179 	if ((flags & SI_WILDP) != 0)
3180 		goto find_in_wild_ports;
3181 
3182 	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3183 	hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
3184 	nat = nat_table[1][hv];
3185 	for (; nat; nat = nat->nat_hnext[1]) {
3186 		if (nat->nat_ifps[0] != NULL) {
3187 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3188 				continue;
3189 		} else if (ifp != NULL)
3190 			nat->nat_ifps[0] = ifp;
3191 
3192 		nflags = nat->nat_flags;
3193 
3194 		if (nat->nat_oip.s_addr == src.s_addr &&
3195 		    nat->nat_outip.s_addr == dst &&
3196 		    (((p == 0) &&
3197 		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3198 		     || (p == nat->nat_p))) {
3199 			switch (p)
3200 			{
3201 #if 0
3202 			case IPPROTO_GRE :
3203 				if (nat->nat_call[1] != fin->fin_data[0])
3204 					continue;
3205 				break;
3206 #endif
3207 			case IPPROTO_ICMP :
3208 				if ((flags & IPN_ICMPERR) != 0) {
3209 					if (nat->nat_outport != sport)
3210 						continue;
3211 				} else {
3212 					if (nat->nat_outport != dport)
3213 						continue;
3214 				}
3215 				break;
3216 			case IPPROTO_TCP :
3217 			case IPPROTO_UDP :
3218 				if (nat->nat_oport != sport)
3219 					continue;
3220 				if (nat->nat_outport != dport)
3221 					continue;
3222 				break;
3223 			default :
3224 				break;
3225 			}
3226 
3227 			ipn = nat->nat_ptr;
3228 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3229 				if (appr_match(fin, nat) != 0)
3230 					continue;
3231 			return nat;
3232 		}
3233 	}
3234 
3235 	/*
3236 	 * So if we didn't find it but there are wildcard members in the hash
3237 	 * table, go back and look for them.  We do this search and update here
3238 	 * because it is modifying the NAT table and we want to do this only
3239 	 * for the first packet that matches.  The exception, of course, is
3240 	 * for "dummy" (FI_IGNORE) lookups.
3241 	 */
3242 find_in_wild_ports:
3243 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3244 		return NULL;
3245 	if (nat_stats.ns_wilds == 0)
3246 		return NULL;
3247 
3248 	RWLOCK_EXIT(&ipf_nat);
3249 
3250 	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3251 	hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz);
3252 
3253 	WRITE_ENTER(&ipf_nat);
3254 
3255 	nat = nat_table[1][hv];
3256 	for (; nat; nat = nat->nat_hnext[1]) {
3257 		if (nat->nat_ifps[0] != NULL) {
3258 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3259 				continue;
3260 		} else if (ifp != NULL)
3261 			nat->nat_ifps[0] = ifp;
3262 
3263 		if (nat->nat_p != fin->fin_p)
3264 			continue;
3265 		if (nat->nat_oip.s_addr != src.s_addr ||
3266 		    nat->nat_outip.s_addr != dst)
3267 			continue;
3268 
3269 		nflags = nat->nat_flags;
3270 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3271 			continue;
3272 
3273 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3274 			       NAT_INBOUND) == 1) {
3275 			if ((fin->fin_flx & FI_IGNORE) != 0)
3276 				break;
3277 			if ((nflags & SI_CLONE) != 0) {
3278 				nat = fr_natclone(fin, nat);
3279 				if (nat == NULL)
3280 					break;
3281 			} else {
3282 				MUTEX_ENTER(&ipf_nat_new);
3283 				nat_stats.ns_wilds--;
3284 				MUTEX_EXIT(&ipf_nat_new);
3285 			}
3286 			nat->nat_oport = sport;
3287 			nat->nat_outport = dport;
3288 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3289 			nat_tabmove(nat);
3290 			break;
3291 		}
3292 	}
3293 
3294 	MUTEX_DOWNGRADE(&ipf_nat);
3295 
3296 	return nat;
3297 }
3298 
3299 
3300 /* ------------------------------------------------------------------------ */
3301 /* Function:    nat_tabmove                                                 */
3302 /* Returns:     Nil                                                         */
3303 /* Parameters:  nat(I) - pointer to NAT structure                           */
3304 /* Write Lock:  ipf_nat                                                     */
3305 /*                                                                          */
3306 /* This function is only called for TCP/UDP NAT table entries where the     */
3307 /* original was placed in the table without hashing on the ports and we now */
3308 /* want to include hashing on port numbers.                                 */
3309 /* ------------------------------------------------------------------------ */
nat_tabmove(nat)3310 static void nat_tabmove(nat)
3311 nat_t *nat;
3312 {
3313 	nat_t **natp;
3314 	u_int hv;
3315 
3316 	if (nat->nat_flags & SI_CLONE)
3317 		return;
3318 
3319 	/*
3320 	 * Remove the NAT entry from the old location
3321 	 */
3322 	if (nat->nat_hnext[0])
3323 		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3324 	*nat->nat_phnext[0] = nat->nat_hnext[0];
3325 	nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3326 
3327 	if (nat->nat_hnext[1])
3328 		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3329 	*nat->nat_phnext[1] = nat->nat_hnext[1];
3330 	nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3331 
3332 	/*
3333 	 * Add into the NAT table in the new position
3334 	 */
3335 	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3336 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3337 			 ipf_nattable_sz);
3338 	nat->nat_hv[0] = hv;
3339 	natp = &nat_table[0][hv];
3340 	if (*natp)
3341 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3342 	nat->nat_phnext[0] = natp;
3343 	nat->nat_hnext[0] = *natp;
3344 	*natp = nat;
3345 	nat_stats.ns_bucketlen[0][hv]++;
3346 
3347 	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3348 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3349 			 ipf_nattable_sz);
3350 	nat->nat_hv[1] = hv;
3351 	natp = &nat_table[1][hv];
3352 	if (*natp)
3353 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3354 	nat->nat_phnext[1] = natp;
3355 	nat->nat_hnext[1] = *natp;
3356 	*natp = nat;
3357 	nat_stats.ns_bucketlen[1][hv]++;
3358 }
3359 
3360 
3361 /* ------------------------------------------------------------------------ */
3362 /* Function:    nat_outlookup                                               */
3363 /* Returns:     nat_t* - NULL == no match,                                  */
3364 /*                       else pointer to matching NAT entry                 */
3365 /* Parameters:  fin(I)   - pointer to packet information                    */
3366 /*              flags(I) - NAT flags for this packet                        */
3367 /*              p(I)     - protocol for this packet                         */
3368 /*              src(I)   - source IP address                                */
3369 /*              dst(I)   - destination IP address                           */
3370 /*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3371 /*                                                                          */
3372 /* Lookup a nat entry based on the source 'real' ip address/port and        */
3373 /* destination address/port.  We use this lookup when sending a packet out, */
3374 /* we're looking for a table entry, based on the source address.            */
3375 /*                                                                          */
3376 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3377 /*                                                                          */
3378 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3379 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3380 /*                                                                          */
3381 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3382 /*            the packet is of said protocol                                */
3383 /* ------------------------------------------------------------------------ */
nat_outlookup(fin,flags,p,src,dst)3384 nat_t *nat_outlookup(fin, flags, p, src, dst)
3385 fr_info_t *fin;
3386 u_int flags, p;
3387 struct in_addr src , dst;
3388 {
3389 	u_short sport, dport;
3390 	u_int sflags;
3391 	ipnat_t *ipn;
3392 	u_32_t srcip;
3393 	nat_t *nat;
3394 	int nflags;
3395 	void *ifp;
3396 	u_int hv;
3397 
3398 	ifp = fin->fin_ifp;
3399 	srcip = src.s_addr;
3400 	sflags = flags & IPN_TCPUDPICMP;
3401 	sport = 0;
3402 	dport = 0;
3403 
3404 	switch (p)
3405 	{
3406 	case IPPROTO_TCP :
3407 	case IPPROTO_UDP :
3408 		sport = htons(fin->fin_data[0]);
3409 		dport = htons(fin->fin_data[1]);
3410 		break;
3411 	case IPPROTO_ICMP :
3412 		if (flags & IPN_ICMPERR)
3413 			sport = fin->fin_data[1];
3414 		else
3415 			dport = fin->fin_data[1];
3416 		break;
3417 	default :
3418 		break;
3419 	}
3420 
3421 	if ((flags & SI_WILDP) != 0)
3422 		goto find_out_wild_ports;
3423 
3424 	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3425 	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
3426 	nat = nat_table[0][hv];
3427 	for (; nat; nat = nat->nat_hnext[0]) {
3428 		if (nat->nat_ifps[1] != NULL) {
3429 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3430 				continue;
3431 		} else if (ifp != NULL)
3432 			nat->nat_ifps[1] = ifp;
3433 
3434 		nflags = nat->nat_flags;
3435 
3436 		if (nat->nat_inip.s_addr == srcip &&
3437 		    nat->nat_oip.s_addr == dst.s_addr &&
3438 		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3439 		     || (p == nat->nat_p))) {
3440 			switch (p)
3441 			{
3442 #if 0
3443 			case IPPROTO_GRE :
3444 				if (nat->nat_call[1] != fin->fin_data[0])
3445 					continue;
3446 				break;
3447 #endif
3448 			case IPPROTO_TCP :
3449 			case IPPROTO_UDP :
3450 				if (nat->nat_oport != dport)
3451 					continue;
3452 				if (nat->nat_inport != sport)
3453 					continue;
3454 				break;
3455 			default :
3456 				break;
3457 			}
3458 
3459 			ipn = nat->nat_ptr;
3460 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3461 				if (appr_match(fin, nat) != 0)
3462 					continue;
3463 			return nat;
3464 		}
3465 	}
3466 
3467 	/*
3468 	 * So if we didn't find it but there are wildcard members in the hash
3469 	 * table, go back and look for them.  We do this search and update here
3470 	 * because it is modifying the NAT table and we want to do this only
3471 	 * for the first packet that matches.  The exception, of course, is
3472 	 * for "dummy" (FI_IGNORE) lookups.
3473 	 */
3474 find_out_wild_ports:
3475 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3476 		return NULL;
3477 	if (nat_stats.ns_wilds == 0)
3478 		return NULL;
3479 
3480 	RWLOCK_EXIT(&ipf_nat);
3481 
3482 	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3483 	hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz);
3484 
3485 	WRITE_ENTER(&ipf_nat);
3486 
3487 	nat = nat_table[0][hv];
3488 	for (; nat; nat = nat->nat_hnext[0]) {
3489 		if (nat->nat_ifps[1] != NULL) {
3490 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3491 				continue;
3492 		} else if (ifp != NULL)
3493 			nat->nat_ifps[1] = ifp;
3494 
3495 		if (nat->nat_p != fin->fin_p)
3496 			continue;
3497 		if ((nat->nat_inip.s_addr != srcip) ||
3498 		    (nat->nat_oip.s_addr != dst.s_addr))
3499 			continue;
3500 
3501 		nflags = nat->nat_flags;
3502 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3503 			continue;
3504 
3505 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3506 			       NAT_OUTBOUND) == 1) {
3507 			if ((fin->fin_flx & FI_IGNORE) != 0)
3508 				break;
3509 			if ((nflags & SI_CLONE) != 0) {
3510 				nat = fr_natclone(fin, nat);
3511 				if (nat == NULL)
3512 					break;
3513 			} else {
3514 				MUTEX_ENTER(&ipf_nat_new);
3515 				nat_stats.ns_wilds--;
3516 				MUTEX_EXIT(&ipf_nat_new);
3517 			}
3518 			nat->nat_inport = sport;
3519 			nat->nat_oport = dport;
3520 			if (nat->nat_outport == 0)
3521 				nat->nat_outport = sport;
3522 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3523 			nat_tabmove(nat);
3524 			break;
3525 		}
3526 	}
3527 
3528 	MUTEX_DOWNGRADE(&ipf_nat);
3529 
3530 	return nat;
3531 }
3532 
3533 
3534 /* ------------------------------------------------------------------------ */
3535 /* Function:    nat_lookupredir                                             */
3536 /* Returns:     nat_t* - NULL == no match,                                  */
3537 /*                       else pointer to matching NAT entry                 */
3538 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3539 /*                      entry for.                                          */
3540 /*                                                                          */
3541 /* Lookup the NAT tables to search for a matching redirect                  */
3542 /* The contents of natlookup_t should imitate those found in a packet that  */
3543 /* would be translated - ie a packet coming in for RDR or going out for MAP.*/
3544 /* We can do the lookup in one of two ways, imitating an inbound or         */
3545 /* outbound  packet.  By default we assume outbound, unless IPN_IN is set.  */
3546 /* For IN, the fields are set as follows:                                   */
3547 /*     nl_real* = source information                                        */
3548 /*     nl_out* = destination information (translated)                       */
3549 /* For an out packet, the fields are set like this:                         */
3550 /*     nl_in* = source information (untranslated)                           */
3551 /*     nl_out* = destination information (translated)                       */
3552 /* ------------------------------------------------------------------------ */
nat_lookupredir(np)3553 nat_t *nat_lookupredir(np)
3554 natlookup_t *np;
3555 {
3556 	fr_info_t fi;
3557 	nat_t *nat;
3558 
3559 	bzero((char *)&fi, sizeof(fi));
3560 	if (np->nl_flags & IPN_IN) {
3561 		fi.fin_data[0] = ntohs(np->nl_realport);
3562 		fi.fin_data[1] = ntohs(np->nl_outport);
3563 	} else {
3564 		fi.fin_data[0] = ntohs(np->nl_inport);
3565 		fi.fin_data[1] = ntohs(np->nl_outport);
3566 	}
3567 	if (np->nl_flags & IPN_TCP)
3568 		fi.fin_p = IPPROTO_TCP;
3569 	else if (np->nl_flags & IPN_UDP)
3570 		fi.fin_p = IPPROTO_UDP;
3571 	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3572 		fi.fin_p = IPPROTO_ICMP;
3573 
3574 	/*
3575 	 * We can do two sorts of lookups:
3576 	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3577 	 * - default: we have the `in' and `out' address, look for `real'.
3578 	 */
3579 	if (np->nl_flags & IPN_IN) {
3580 		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3581 					np->nl_realip, np->nl_outip))) {
3582 			np->nl_inip = nat->nat_inip;
3583 			np->nl_inport = nat->nat_inport;
3584 		}
3585 	} else {
3586 		/*
3587 		 * If nl_inip is non null, this is a lookup based on the real
3588 		 * ip address. Else, we use the fake.
3589 		 */
3590 		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3591 					 np->nl_inip, np->nl_outip))) {
3592 
3593 			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3594 				fr_info_t fin;
3595 				bzero((char *)&fin, sizeof(fin));
3596 				fin.fin_p = nat->nat_p;
3597 				fin.fin_data[0] = ntohs(nat->nat_outport);
3598 				fin.fin_data[1] = ntohs(nat->nat_oport);
3599 				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3600 						 nat->nat_outip,
3601 						 nat->nat_oip) != NULL) {
3602 					np->nl_flags &= ~IPN_FINDFORWARD;
3603 				}
3604 			}
3605 
3606 			np->nl_realip = nat->nat_outip;
3607 			np->nl_realport = nat->nat_outport;
3608 		}
3609  	}
3610 
3611 	return nat;
3612 }
3613 
3614 
3615 /* ------------------------------------------------------------------------ */
3616 /* Function:    nat_match                                                   */
3617 /* Returns:     int - 0 == no match, 1 == match                             */
3618 /* Parameters:  fin(I)   - pointer to packet information                    */
3619 /*              np(I)    - pointer to NAT rule                              */
3620 /*                                                                          */
3621 /* Pull the matching of a packet against a NAT rule out of that complex     */
3622 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3623 /* ------------------------------------------------------------------------ */
nat_match(fin,np)3624 static int nat_match(fin, np)
3625 fr_info_t *fin;
3626 ipnat_t *np;
3627 {
3628 	frtuc_t *ft;
3629 
3630 	if (fin->fin_v != 4)
3631 		return 0;
3632 
3633 	if (np->in_p && fin->fin_p != np->in_p)
3634 		return 0;
3635 
3636 	if (fin->fin_out) {
3637 		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3638 			return 0;
3639 		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3640 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3641 			return 0;
3642 		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3643 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3644 			return 0;
3645 	} else {
3646 		if (!(np->in_redir & NAT_REDIRECT))
3647 			return 0;
3648 		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3649 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3650 			return 0;
3651 		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3652 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3653 			return 0;
3654 	}
3655 
3656 	ft = &np->in_tuc;
3657 	if (!(fin->fin_flx & FI_TCPUDP) ||
3658 	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3659 		if (ft->ftu_scmp || ft->ftu_dcmp)
3660 			return 0;
3661 		return 1;
3662 	}
3663 
3664 	return fr_tcpudpchk(fin, ft);
3665 }
3666 
3667 
3668 /* ------------------------------------------------------------------------ */
3669 /* Function:    nat_update                                                  */
3670 /* Returns:     Nil                                                         */
3671 /* Parameters:  nat(I)    - pointer to NAT structure                        */
3672 /*              np(I)     - pointer to NAT rule                             */
3673 /*                                                                          */
3674 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3675 /* called with fin_rev updated - i.e. after calling nat_proto().            */
3676 /* ------------------------------------------------------------------------ */
nat_update(fin,nat,np)3677 void nat_update(fin, nat, np)
3678 fr_info_t *fin;
3679 nat_t *nat;
3680 ipnat_t *np;
3681 {
3682 	ipftq_t *ifq, *ifq2;
3683 	ipftqent_t *tqe;
3684 
3685 	MUTEX_ENTER(&nat->nat_lock);
3686 	tqe = &nat->nat_tqe;
3687 	ifq = tqe->tqe_ifq;
3688 
3689 	/*
3690 	 * We allow over-riding of NAT timeouts from NAT rules, even for
3691 	 * TCP, however, if it is TCP and there is no rule timeout set,
3692 	 * then do not update the timeout here.
3693 	 */
3694 	if (np != NULL)
3695 		ifq2 = np->in_tqehead[fin->fin_rev];
3696 	else
3697 		ifq2 = NULL;
3698 
3699 	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3700 		u_32_t end, ack;
3701 		u_char tcpflags;
3702 		tcphdr_t *tcp;
3703 		int dsize;
3704 
3705 		tcp = fin->fin_dp;
3706 		tcpflags = tcp->th_flags;
3707 		dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
3708 			((tcpflags & TH_SYN) ? 1 : 0) +
3709 			((tcpflags & TH_FIN) ? 1 : 0);
3710 
3711 		ack = ntohl(tcp->th_ack);
3712 		end = ntohl(tcp->th_seq) + dsize;
3713 
3714 		if (SEQ_GT(ack, nat->nat_seqnext[1 - fin->fin_rev]))
3715 			nat->nat_seqnext[1 - fin->fin_rev] = ack;
3716 
3717 		if (nat->nat_seqnext[fin->fin_rev] == 0)
3718 			nat->nat_seqnext[fin->fin_rev] = end;
3719 
3720 		(void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0);
3721 	} else {
3722 		if (ifq2 == NULL) {
3723 			if (nat->nat_p == IPPROTO_UDP)
3724 				ifq2 = &nat_udptq;
3725 			else if (nat->nat_p == IPPROTO_ICMP)
3726 				ifq2 = &nat_icmptq;
3727 			else
3728 				ifq2 = &nat_iptq;
3729 		}
3730 
3731 		fr_movequeue(tqe, ifq, ifq2);
3732 	}
3733 	MUTEX_EXIT(&nat->nat_lock);
3734 }
3735 
3736 
3737 /* ------------------------------------------------------------------------ */
3738 /* Function:    fr_checknatout                                              */
3739 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3740 /*                     0 == no packet translation occurred,                 */
3741 /*                     1 == packet was successfully translated.             */
3742 /* Parameters:  fin(I)   - pointer to packet information                    */
3743 /*              passp(I) - pointer to filtering result flags                */
3744 /*                                                                          */
3745 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
3746 /* first checked to see if they match an existing entry (if an error),      */
3747 /* otherwise a search of the current NAT table is made.  If neither results */
3748 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3749 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3750 /* packet header(s) as required.                                            */
3751 /* ------------------------------------------------------------------------ */
fr_checknatout(fin,passp)3752 int fr_checknatout(fin, passp)
3753 fr_info_t *fin;
3754 u_32_t *passp;
3755 {
3756 	struct ifnet *ifp, *sifp;
3757 	icmphdr_t *icmp = NULL;
3758 	tcphdr_t *tcp = NULL;
3759 	int rval, natfailed;
3760 	ipnat_t *np = NULL;
3761 	u_int nflags = 0;
3762 	u_32_t ipa, iph;
3763 	int natadd = 1;
3764 	frentry_t *fr;
3765 	nat_t *nat;
3766 
3767 	if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
3768 		return 0;
3769 
3770 	natfailed = 0;
3771 	fr = fin->fin_fr;
3772 	sifp = fin->fin_ifp;
3773 	if (fr != NULL) {
3774 		ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3775 		if ((ifp != NULL) && (ifp != (void *)-1))
3776 			fin->fin_ifp = ifp;
3777 	}
3778 	ifp = fin->fin_ifp;
3779 
3780 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3781 		switch (fin->fin_p)
3782 		{
3783 		case IPPROTO_TCP :
3784 			nflags = IPN_TCP;
3785 			break;
3786 		case IPPROTO_UDP :
3787 			nflags = IPN_UDP;
3788 			break;
3789 		case IPPROTO_ICMP :
3790 			icmp = fin->fin_dp;
3791 
3792 			/*
3793 			 * This is an incoming packet, so the destination is
3794 			 * the icmp_id and the source port equals 0
3795 			 */
3796 			if (nat_icmpquerytype4(icmp->icmp_type))
3797 				nflags = IPN_ICMPQUERY;
3798 			break;
3799 		default :
3800 			break;
3801 		}
3802 
3803 		if ((nflags & IPN_TCPUDP))
3804 			tcp = fin->fin_dp;
3805 	}
3806 
3807 	ipa = fin->fin_saddr;
3808 
3809 	READ_ENTER(&ipf_nat);
3810 
3811 	if (((fin->fin_flx & FI_ICMPERR) != 0) &&
3812 	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3813 		/*EMPTY*/;
3814 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3815 		natadd = 0;
3816 	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3817 				      fin->fin_src, fin->fin_dst))) {
3818 		nflags = nat->nat_flags;
3819 	} else {
3820 		u_32_t hv, msk, nmsk;
3821 
3822 		/*
3823 		 * If there is no current entry in the nat table for this IP#,
3824 		 * create one for it (if there is a matching rule).
3825 		 */
3826 		RWLOCK_EXIT(&ipf_nat);
3827 		msk = 0xffffffff;
3828 		nmsk = nat_masks;
3829 		WRITE_ENTER(&ipf_nat);
3830 maskloop:
3831 		iph = ipa & htonl(msk);
3832 		hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
3833 		for (np = nat_rules[hv]; np; np = np->in_mnext)
3834 		{
3835 			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3836 				continue;
3837 			if (np->in_v != fin->fin_v)
3838 				continue;
3839 			if (np->in_p && (np->in_p != fin->fin_p))
3840 				continue;
3841 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3842 				continue;
3843 			if (np->in_flags & IPN_FILTER) {
3844 				if (!nat_match(fin, np))
3845 					continue;
3846 			} else if ((ipa & np->in_inmsk) != np->in_inip)
3847 				continue;
3848 
3849 			if ((fr != NULL) &&
3850 			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3851 				continue;
3852 
3853 			if (*np->in_plabel != '\0') {
3854 				if (((np->in_flags & IPN_FILTER) == 0) &&
3855 				    (np->in_dport != tcp->th_dport))
3856 					continue;
3857 				if (appr_ok(fin, tcp, np) == 0)
3858 					continue;
3859 			}
3860 
3861 			if ((nat = nat_new(fin, np, NULL, nflags,
3862 					   NAT_OUTBOUND))) {
3863 				np->in_hits++;
3864 				break;
3865 			} else
3866 				natfailed = -1;
3867 		}
3868 		if ((np == NULL) && (nmsk != 0)) {
3869 			while (nmsk) {
3870 				msk <<= 1;
3871 				if (nmsk & 0x80000000)
3872 					break;
3873 				nmsk <<= 1;
3874 			}
3875 			if (nmsk != 0) {
3876 				nmsk <<= 1;
3877 				goto maskloop;
3878 			}
3879 		}
3880 		MUTEX_DOWNGRADE(&ipf_nat);
3881 	}
3882 
3883 	if (nat != NULL) {
3884 		rval = fr_natout(fin, nat, natadd, nflags);
3885 		if (rval == 1) {
3886 			MUTEX_ENTER(&nat->nat_lock);
3887 			nat->nat_ref++;
3888 			MUTEX_EXIT(&nat->nat_lock);
3889 			nat->nat_touched = fr_ticks;
3890 			fin->fin_nat = nat;
3891 		}
3892 	} else
3893 		rval = natfailed;
3894 	RWLOCK_EXIT(&ipf_nat);
3895 
3896 	if (rval == -1) {
3897 		if (passp != NULL)
3898 			*passp = FR_BLOCK;
3899 		fin->fin_flx |= FI_BADNAT;
3900 	}
3901 	fin->fin_ifp = sifp;
3902 	return rval;
3903 }
3904 
3905 /* ------------------------------------------------------------------------ */
3906 /* Function:    fr_natout                                                   */
3907 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3908 /*                     1 == packet was successfully translated.             */
3909 /* Parameters:  fin(I)    - pointer to packet information                   */
3910 /*              nat(I)    - pointer to NAT structure                        */
3911 /*              natadd(I) - flag indicating if it is safe to add frag cache */
3912 /*              nflags(I) - NAT flags set for this packet                   */
3913 /*                                                                          */
3914 /* Translate a packet coming "out" on an interface.                         */
3915 /* ------------------------------------------------------------------------ */
fr_natout(fin,nat,natadd,nflags)3916 int fr_natout(fin, nat, natadd, nflags)
3917 fr_info_t *fin;
3918 nat_t *nat;
3919 int natadd;
3920 u_32_t nflags;
3921 {
3922 	icmphdr_t *icmp;
3923 	u_short *csump;
3924 	tcphdr_t *tcp;
3925 	ipnat_t *np;
3926 	int i;
3927 
3928 	tcp = NULL;
3929 	icmp = NULL;
3930 	csump = NULL;
3931 	np = nat->nat_ptr;
3932 
3933 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL))
3934 		(void) fr_nat_newfrag(fin, 0, nat);
3935 
3936 	MUTEX_ENTER(&nat->nat_lock);
3937 	nat->nat_bytes[1] += fin->fin_plen;
3938 	nat->nat_pkts[1]++;
3939 	MUTEX_EXIT(&nat->nat_lock);
3940 
3941 	/*
3942 	 * Fix up checksums, not by recalculating them, but
3943 	 * simply computing adjustments.
3944 	 * This is only done for STREAMS based IP implementations where the
3945 	 * checksum has already been calculated by IP.  In all other cases,
3946 	 * IPFilter is called before the checksum needs calculating so there
3947 	 * is no call to modify whatever is in the header now.
3948 	 */
3949 	if (fin->fin_v == 4) {
3950 		if (nflags == IPN_ICMPERR) {
3951 			u_32_t s1, s2, sumd;
3952 
3953 			s1 = LONG_SUM(ntohl(fin->fin_saddr));
3954 			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3955 			CALC_SUMD(s1, s2, sumd);
3956 			fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd);
3957 		}
3958 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3959     defined(linux) || defined(BRIDGE_IPF) || defined(__FreeBSD__)
3960 		else {
3961 			/*
3962 			 * Strictly speaking, this isn't necessary on BSD
3963 			 * kernels because they do checksum calculation after
3964 			 * this code has run BUT if ipfilter is being used
3965 			 * to do NAT as a bridge, that code doesn't exist.
3966 			 */
3967 			if (nat->nat_dir == NAT_OUTBOUND)
3968 				fix_outcksum(fin, &fin->fin_ip->ip_sum,
3969 					     nat->nat_ipsumd);
3970 			else
3971 				fix_incksum(fin, &fin->fin_ip->ip_sum,
3972 					    nat->nat_ipsumd);
3973 		}
3974 #endif
3975 	}
3976 
3977 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3978 		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3979 			tcp = fin->fin_dp;
3980 
3981 			tcp->th_sport = nat->nat_outport;
3982 			fin->fin_data[0] = ntohs(nat->nat_outport);
3983 		}
3984 
3985 		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3986 			icmp = fin->fin_dp;
3987 			icmp->icmp_id = nat->nat_outport;
3988 		}
3989 
3990 		csump = nat_proto(fin, nat, nflags);
3991 	}
3992 
3993 	fin->fin_ip->ip_src = nat->nat_outip;
3994 
3995 	nat_update(fin, nat, np);
3996 
3997 	/*
3998 	 * The above comments do not hold for layer 4 (or higher) checksums...
3999 	 */
4000 	if (csump != NULL) {
4001 		if (nat->nat_dir == NAT_OUTBOUND)
4002 			fix_outcksum(fin, csump, nat->nat_sumd[1]);
4003 		else
4004 			fix_incksum(fin, csump, nat->nat_sumd[1]);
4005 	}
4006 #ifdef	IPFILTER_SYNC
4007 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4008 #endif
4009 	/* ------------------------------------------------------------- */
4010 	/* A few quick notes:						 */
4011 	/*	Following are test conditions prior to calling the 	 */
4012 	/*	appr_check routine.					 */
4013 	/*								 */
4014 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4015 	/*	with a redirect rule, we attempt to match the packet's	 */
4016 	/*	source port against in_dport, otherwise	we'd compare the */
4017 	/*	packet's destination.			 		 */
4018 	/* ------------------------------------------------------------- */
4019 	if ((np != NULL) && (np->in_apr != NULL)) {
4020 		i = appr_check(fin, nat);
4021 		if (i == 0)
4022 			i = 1;
4023 	} else
4024 		i = 1;
4025 	ATOMIC_INCL(nat_stats.ns_mapped[1]);
4026 	fin->fin_flx |= FI_NATED;
4027 	return i;
4028 }
4029 
4030 
4031 /* ------------------------------------------------------------------------ */
4032 /* Function:    fr_checknatin                                               */
4033 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4034 /*                     0 == no packet translation occurred,                 */
4035 /*                     1 == packet was successfully translated.             */
4036 /* Parameters:  fin(I)   - pointer to packet information                    */
4037 /*              passp(I) - pointer to filtering result flags                */
4038 /*                                                                          */
4039 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
4040 /* first checked to see if they match an existing entry (if an error),      */
4041 /* otherwise a search of the current NAT table is made.  If neither results */
4042 /* in a match then a search for a matching NAT rule is made.  Create a new  */
4043 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
4044 /* packet header(s) as required.                                            */
4045 /* ------------------------------------------------------------------------ */
fr_checknatin(fin,passp)4046 int fr_checknatin(fin, passp)
4047 fr_info_t *fin;
4048 u_32_t *passp;
4049 {
4050 	u_int nflags, natadd;
4051 	int rval, natfailed;
4052 	struct ifnet *ifp;
4053 	struct in_addr in;
4054 	icmphdr_t *icmp;
4055 	tcphdr_t *tcp;
4056 	u_short dport;
4057 	ipnat_t *np;
4058 	nat_t *nat;
4059 	u_32_t iph;
4060 
4061 	if (nat_stats.ns_rules == 0 || fr_nat_lock != 0)
4062 		return 0;
4063 
4064 	tcp = NULL;
4065 	icmp = NULL;
4066 	dport = 0;
4067 	natadd = 1;
4068 	nflags = 0;
4069 	natfailed = 0;
4070 	ifp = fin->fin_ifp;
4071 
4072 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4073 		switch (fin->fin_p)
4074 		{
4075 		case IPPROTO_TCP :
4076 			nflags = IPN_TCP;
4077 			break;
4078 		case IPPROTO_UDP :
4079 			nflags = IPN_UDP;
4080 			break;
4081 		case IPPROTO_ICMP :
4082 			icmp = fin->fin_dp;
4083 
4084 			/*
4085 			 * This is an incoming packet, so the destination is
4086 			 * the icmp_id and the source port equals 0
4087 			 */
4088 			if (nat_icmpquerytype4(icmp->icmp_type)) {
4089 				nflags = IPN_ICMPQUERY;
4090 				dport = icmp->icmp_id;
4091 			} break;
4092 		default :
4093 			break;
4094 		}
4095 
4096 		if ((nflags & IPN_TCPUDP)) {
4097 			tcp = fin->fin_dp;
4098 			dport = tcp->th_dport;
4099 		}
4100 	}
4101 
4102 	in = fin->fin_dst;
4103 
4104 	READ_ENTER(&ipf_nat);
4105 
4106 	if (((fin->fin_flx & FI_ICMPERR) != 0) &&
4107 	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4108 		/*EMPTY*/;
4109 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4110 		natadd = 0;
4111 	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4112 				     fin->fin_src, in))) {
4113 		nflags = nat->nat_flags;
4114 	} else {
4115 		u_32_t hv, msk, rmsk;
4116 
4117 		RWLOCK_EXIT(&ipf_nat);
4118 		rmsk = rdr_masks;
4119 		msk = 0xffffffff;
4120 		WRITE_ENTER(&ipf_nat);
4121 		/*
4122 		 * If there is no current entry in the nat table for this IP#,
4123 		 * create one for it (if there is a matching rule).
4124 		 */
4125 maskloop:
4126 		iph = in.s_addr & htonl(msk);
4127 		hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
4128 		for (np = rdr_rules[hv]; np; np = np->in_rnext) {
4129 			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4130 				continue;
4131 			if (np->in_v != fin->fin_v)
4132 				continue;
4133 			if (np->in_p && (np->in_p != fin->fin_p))
4134 				continue;
4135 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4136 				continue;
4137 			if (np->in_flags & IPN_FILTER) {
4138 				if (!nat_match(fin, np))
4139 					continue;
4140 			} else {
4141 				if ((in.s_addr & np->in_outmsk) != np->in_outip)
4142 					continue;
4143 				if (np->in_pmin &&
4144 				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
4145 				     (ntohs(dport) < ntohs(np->in_pmin))))
4146 					continue;
4147 			}
4148 
4149 			if (*np->in_plabel != '\0') {
4150 				if (!appr_ok(fin, tcp, np)) {
4151 					continue;
4152 				}
4153 			}
4154 
4155 			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4156 			if (nat != NULL) {
4157 				np->in_hits++;
4158 				break;
4159 			} else
4160 				natfailed = -1;
4161 		}
4162 
4163 		if ((np == NULL) && (rmsk != 0)) {
4164 			while (rmsk) {
4165 				msk <<= 1;
4166 				if (rmsk & 0x80000000)
4167 					break;
4168 				rmsk <<= 1;
4169 			}
4170 			if (rmsk != 0) {
4171 				rmsk <<= 1;
4172 				goto maskloop;
4173 			}
4174 		}
4175 		MUTEX_DOWNGRADE(&ipf_nat);
4176 	}
4177 	if (nat != NULL) {
4178 		rval = fr_natin(fin, nat, natadd, nflags);
4179 		if (rval == 1) {
4180 			MUTEX_ENTER(&nat->nat_lock);
4181 			nat->nat_ref++;
4182 			MUTEX_EXIT(&nat->nat_lock);
4183 			nat->nat_touched = fr_ticks;
4184 			fin->fin_nat = nat;
4185 		}
4186 	} else
4187 		rval = natfailed;
4188 	RWLOCK_EXIT(&ipf_nat);
4189 
4190 	if (rval == -1) {
4191 		if (passp != NULL)
4192 			*passp = FR_BLOCK;
4193 		fin->fin_flx |= FI_BADNAT;
4194 	}
4195 	return rval;
4196 }
4197 
4198 
4199 /* ------------------------------------------------------------------------ */
4200 /* Function:    fr_natin                                                    */
4201 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4202 /*                     1 == packet was successfully translated.             */
4203 /* Parameters:  fin(I)    - pointer to packet information                   */
4204 /*              nat(I)    - pointer to NAT structure                        */
4205 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4206 /*              nflags(I) - NAT flags set for this packet                   */
4207 /* Locks Held:  ipf_nat (READ)                                              */
4208 /*                                                                          */
4209 /* Translate a packet coming "in" on an interface.                          */
4210 /* ------------------------------------------------------------------------ */
fr_natin(fin,nat,natadd,nflags)4211 int fr_natin(fin, nat, natadd, nflags)
4212 fr_info_t *fin;
4213 nat_t *nat;
4214 int natadd;
4215 u_32_t nflags;
4216 {
4217 	icmphdr_t *icmp;
4218 	u_short *csump;
4219 	tcphdr_t *tcp;
4220 	ipnat_t *np;
4221 	int i;
4222 
4223 	tcp = NULL;
4224 	csump = NULL;
4225 	np = nat->nat_ptr;
4226 	fin->fin_fr = nat->nat_fr;
4227 
4228 	if (np != NULL) {
4229 		if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4230 			(void) fr_nat_newfrag(fin, 0, nat);
4231 
4232 	/* ------------------------------------------------------------- */
4233 	/* A few quick notes:						 */
4234 	/*	Following are test conditions prior to calling the 	 */
4235 	/*	appr_check routine.					 */
4236 	/*								 */
4237 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4238 	/*	with a map rule, we attempt to match the packet's	 */
4239 	/*	source port against in_dport, otherwise	we'd compare the */
4240 	/*	packet's destination.			 		 */
4241 	/* ------------------------------------------------------------- */
4242 		if (np->in_apr != NULL) {
4243 			i = appr_check(fin, nat);
4244 			if (i == -1) {
4245 				return -1;
4246 			}
4247 		}
4248 	}
4249 
4250 #ifdef	IPFILTER_SYNC
4251 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4252 #endif
4253 
4254 	MUTEX_ENTER(&nat->nat_lock);
4255 	nat->nat_bytes[0] += fin->fin_plen;
4256 	nat->nat_pkts[0]++;
4257 	MUTEX_EXIT(&nat->nat_lock);
4258 
4259 	fin->fin_ip->ip_dst = nat->nat_inip;
4260 	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4261 	if (nflags & IPN_TCPUDP)
4262 		tcp = fin->fin_dp;
4263 
4264 	/*
4265 	 * Fix up checksums, not by recalculating them, but
4266 	 * simply computing adjustments.
4267 	 * Why only do this for some platforms on inbound packets ?
4268 	 * Because for those that it is done, IP processing is yet to happen
4269 	 * and so the IPv4 header checksum has not yet been evaluated.
4270 	 * Perhaps it should always be done for the benefit of things like
4271 	 * fast forwarding (so that it doesn't need to be recomputed) but with
4272 	 * header checksum offloading, perhaps it is a moot point.
4273 	 */
4274 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4275      defined(__osf__) || defined(linux)
4276 	if (nat->nat_dir == NAT_OUTBOUND)
4277 		fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4278 	else
4279 		fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd);
4280 #endif
4281 
4282 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4283 		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4284 			tcp->th_dport = nat->nat_inport;
4285 			fin->fin_data[1] = ntohs(nat->nat_inport);
4286 		}
4287 
4288 
4289 		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4290 			icmp = fin->fin_dp;
4291 
4292 			icmp->icmp_id = nat->nat_inport;
4293 		}
4294 
4295 		csump = nat_proto(fin, nat, nflags);
4296 	}
4297 
4298 	nat_update(fin, nat, np);
4299 
4300 	/*
4301 	 * The above comments do not hold for layer 4 (or higher) checksums...
4302 	 */
4303 	if (csump != NULL) {
4304 		if (nat->nat_dir == NAT_OUTBOUND)
4305 			fix_incksum(fin, csump, nat->nat_sumd[0]);
4306 		else
4307 			fix_outcksum(fin, csump, nat->nat_sumd[0]);
4308 	}
4309 	ATOMIC_INCL(nat_stats.ns_mapped[0]);
4310 	fin->fin_flx |= FI_NATED;
4311 	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4312 		fin->fin_nattag = &np->in_tag;
4313 	return 1;
4314 }
4315 
4316 
4317 /* ------------------------------------------------------------------------ */
4318 /* Function:    nat_proto                                                   */
4319 /* Returns:     u_short* - pointer to transport header checksum to update,  */
4320 /*                         NULL if the transport protocol is not recognised */
4321 /*                         as needing a checksum update.                    */
4322 /* Parameters:  fin(I)    - pointer to packet information                   */
4323 /*              nat(I)    - pointer to NAT structure                        */
4324 /*              nflags(I) - NAT flags set for this packet                   */
4325 /*                                                                          */
4326 /* Return the pointer to the checksum field for each protocol so understood.*/
4327 /* If support for making other changes to a protocol header is required,    */
4328 /* that is not strictly 'address' translation, such as clamping the MSS in  */
4329 /* TCP down to a specific value, then do it from here.                      */
4330 /* ------------------------------------------------------------------------ */
nat_proto(fin,nat,nflags)4331 u_short *nat_proto(fin, nat, nflags)
4332 fr_info_t *fin;
4333 nat_t *nat;
4334 u_int nflags;
4335 {
4336 	icmphdr_t *icmp;
4337 	u_short *csump;
4338 	tcphdr_t *tcp;
4339 	udphdr_t *udp;
4340 
4341 	csump = NULL;
4342 	if (fin->fin_out == 0) {
4343 		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4344 	} else {
4345 		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4346 	}
4347 
4348 	switch (fin->fin_p)
4349 	{
4350 	case IPPROTO_TCP :
4351 		tcp = fin->fin_dp;
4352 
4353 		csump = &tcp->th_sum;
4354 
4355 		/*
4356 		 * Do a MSS CLAMPING on a SYN packet,
4357 		 * only deal IPv4 for now.
4358 		 */
4359 		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4360 			nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump);
4361 
4362 		break;
4363 
4364 	case IPPROTO_UDP :
4365 		udp = fin->fin_dp;
4366 
4367 		if (udp->uh_sum)
4368 			csump = &udp->uh_sum;
4369 		break;
4370 
4371 	case IPPROTO_ICMP :
4372 		icmp = fin->fin_dp;
4373 
4374 		if ((nflags & IPN_ICMPQUERY) != 0) {
4375 			if (icmp->icmp_cksum != 0)
4376 				csump = &icmp->icmp_cksum;
4377 		}
4378 		break;
4379 	}
4380 	return csump;
4381 }
4382 
4383 
4384 /* ------------------------------------------------------------------------ */
4385 /* Function:    fr_natunload                                                */
4386 /* Returns:     Nil                                                         */
4387 /* Parameters:  Nil                                                         */
4388 /*                                                                          */
4389 /* Free all memory used by NAT structures allocated at runtime.             */
4390 /* ------------------------------------------------------------------------ */
fr_natunload()4391 void fr_natunload()
4392 {
4393 	ipftq_t *ifq, *ifqnext;
4394 
4395 	(void) nat_clearlist();
4396 	(void) nat_flushtable();
4397 
4398 	/*
4399 	 * Proxy timeout queues are not cleaned here because although they
4400 	 * exist on the NAT list, appr_unload is called after fr_natunload
4401 	 * and the proxies actually are responsible for them being created.
4402 	 * Should the proxy timeouts have their own list?  There's no real
4403 	 * justification as this is the only complication.
4404 	 */
4405 	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4406 		ifqnext = ifq->ifq_next;
4407 		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4408 		    (fr_deletetimeoutqueue(ifq) == 0))
4409 			fr_freetimeoutqueue(ifq);
4410 	}
4411 
4412 	if (nat_table[0] != NULL) {
4413 		KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
4414 		nat_table[0] = NULL;
4415 	}
4416 	if (nat_table[1] != NULL) {
4417 		KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
4418 		nat_table[1] = NULL;
4419 	}
4420 	if (nat_rules != NULL) {
4421 		KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
4422 		nat_rules = NULL;
4423 	}
4424 	if (rdr_rules != NULL) {
4425 		KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
4426 		rdr_rules = NULL;
4427 	}
4428 	if (ipf_hm_maptable != NULL) {
4429 		KFREES(ipf_hm_maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
4430 		ipf_hm_maptable = NULL;
4431 	}
4432 	if (nat_stats.ns_bucketlen[0] != NULL) {
4433 		KFREES(nat_stats.ns_bucketlen[0],
4434 		       sizeof(u_long *) * ipf_nattable_sz);
4435 		nat_stats.ns_bucketlen[0] = NULL;
4436 	}
4437 	if (nat_stats.ns_bucketlen[1] != NULL) {
4438 		KFREES(nat_stats.ns_bucketlen[1],
4439 		       sizeof(u_long *) * ipf_nattable_sz);
4440 		nat_stats.ns_bucketlen[1] = NULL;
4441 	}
4442 
4443 	if (fr_nat_maxbucket_reset == 1)
4444 		fr_nat_maxbucket = 0;
4445 
4446 	if (fr_nat_init == 1) {
4447 		fr_nat_init = 0;
4448 		fr_sttab_destroy(nat_tqb);
4449 
4450 		RW_DESTROY(&ipf_natfrag);
4451 		RW_DESTROY(&ipf_nat);
4452 
4453 		MUTEX_DESTROY(&ipf_nat_new);
4454 		MUTEX_DESTROY(&ipf_natio);
4455 
4456 		MUTEX_DESTROY(&nat_udptq.ifq_lock);
4457 		MUTEX_DESTROY(&nat_icmptq.ifq_lock);
4458 		MUTEX_DESTROY(&nat_iptq.ifq_lock);
4459 	}
4460 }
4461 
4462 
4463 /* ------------------------------------------------------------------------ */
4464 /* Function:    fr_natexpire                                                */
4465 /* Returns:     Nil                                                         */
4466 /* Parameters:  Nil                                                         */
4467 /*                                                                          */
4468 /* Check all of the timeout queues for entries at the top which need to be  */
4469 /* expired.                                                                 */
4470 /* ------------------------------------------------------------------------ */
fr_natexpire()4471 void fr_natexpire()
4472 {
4473 	ipftq_t *ifq, *ifqnext;
4474 	ipftqent_t *tqe, *tqn;
4475 	int i;
4476 	SPL_INT(s);
4477 
4478 	SPL_NET(s);
4479 	WRITE_ENTER(&ipf_nat);
4480 	for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4481 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4482 			if (tqe->tqe_die > fr_ticks)
4483 				break;
4484 			tqn = tqe->tqe_next;
4485 			nat_delete(tqe->tqe_parent, NL_EXPIRE);
4486 		}
4487 	}
4488 
4489 	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4490 		ifqnext = ifq->ifq_next;
4491 
4492 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4493 			if (tqe->tqe_die > fr_ticks)
4494 				break;
4495 			tqn = tqe->tqe_next;
4496 			nat_delete(tqe->tqe_parent, NL_EXPIRE);
4497 		}
4498 	}
4499 
4500 	for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
4501 		ifqnext = ifq->ifq_next;
4502 
4503 		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4504 		    (ifq->ifq_ref == 0)) {
4505 			fr_freetimeoutqueue(ifq);
4506 		}
4507 	}
4508 
4509 	if (fr_nat_doflush != 0) {
4510 		nat_extraflush(2);
4511 		fr_nat_doflush = 0;
4512 	}
4513 
4514 	RWLOCK_EXIT(&ipf_nat);
4515 	SPL_X(s);
4516 }
4517 
4518 
4519 /* ------------------------------------------------------------------------ */
4520 /* Function:    fr_natsync                                                  */
4521 /* Returns:     Nil                                                         */
4522 /* Parameters:  ifp(I) - pointer to network interface                       */
4523 /*                                                                          */
4524 /* Walk through all of the currently active NAT sessions, looking for those */
4525 /* which need to have their translated address updated.                     */
4526 /* ------------------------------------------------------------------------ */
fr_natsync(ifp)4527 void fr_natsync(ifp)
4528 void *ifp;
4529 {
4530 	u_32_t sum1, sum2, sumd;
4531 	struct in_addr in;
4532 	ipnat_t *n;
4533 	nat_t *nat;
4534 	void *ifp2;
4535 	SPL_INT(s);
4536 
4537 	if (fr_running <= 0)
4538 		return;
4539 
4540 	/*
4541 	 * Change IP addresses for NAT sessions for any protocol except TCP
4542 	 * since it will break the TCP connection anyway.  The only rules
4543 	 * which will get changed are those which are "map ... -> 0/32",
4544 	 * where the rule specifies the address is taken from the interface.
4545 	 */
4546 	SPL_NET(s);
4547 	WRITE_ENTER(&ipf_nat);
4548 
4549 	if (fr_running <= 0) {
4550 		RWLOCK_EXIT(&ipf_nat);
4551 		return;
4552 	}
4553 
4554 	for (nat = nat_instances; nat; nat = nat->nat_next) {
4555 		if ((nat->nat_flags & IPN_TCP) != 0)
4556 			continue;
4557 		n = nat->nat_ptr;
4558 		if ((n == NULL) ||
4559 		    (n->in_outip != 0) || (n->in_outmsk != 0xffffffff))
4560 			continue;
4561 		if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) ||
4562 		     (ifp == nat->nat_ifps[1]))) {
4563 			nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4);
4564 			if (nat->nat_ifnames[1][0] != '\0') {
4565 				nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1],
4566 							  4);
4567 			} else
4568 				nat->nat_ifps[1] = nat->nat_ifps[0];
4569 			ifp2 = nat->nat_ifps[0];
4570 			if (ifp2 == NULL)
4571 				continue;
4572 
4573 			/*
4574 			 * Change the map-to address to be the same as the
4575 			 * new one.
4576 			 */
4577 			sum1 = nat->nat_outip.s_addr;
4578 			if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1)
4579 				nat->nat_outip = in;
4580 			sum2 = nat->nat_outip.s_addr;
4581 
4582 			if (sum1 == sum2)
4583 				continue;
4584 			/*
4585 			 * Readjust the checksum adjustment to take into
4586 			 * account the new IP#.
4587 			 */
4588 			CALC_SUMD(sum1, sum2, sumd);
4589 			/* XXX - dont change for TCP when solaris does
4590 			 * hardware checksumming.
4591 			 */
4592 			sumd += nat->nat_sumd[0];
4593 			nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4594 			nat->nat_sumd[1] = nat->nat_sumd[0];
4595 		}
4596 	}
4597 
4598 	for (n = nat_list; (n != NULL); n = n->in_next) {
4599 		if ((ifp == NULL) || (n->in_ifps[0] == ifp))
4600 			n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4);
4601 		if ((ifp == NULL) || (n->in_ifps[1] == ifp))
4602 			n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4);
4603 	}
4604 	RWLOCK_EXIT(&ipf_nat);
4605 	SPL_X(s);
4606 }
4607 
4608 
4609 /* ------------------------------------------------------------------------ */
4610 /* Function:    nat_icmpquerytype4                                          */
4611 /* Returns:     int - 1 == success, 0 == failure                            */
4612 /* Parameters:  icmptype(I) - ICMP type number                              */
4613 /*                                                                          */
4614 /* Tests to see if the ICMP type number passed is a query/response type or  */
4615 /* not.                                                                     */
4616 /* ------------------------------------------------------------------------ */
nat_icmpquerytype4(icmptype)4617 static int nat_icmpquerytype4(icmptype)
4618 int icmptype;
4619 {
4620 
4621 	/*
4622 	 * For the ICMP query NAT code, it is essential that both the query
4623 	 * and the reply match on the NAT rule. Because the NAT structure
4624 	 * does not keep track of the icmptype, and a single NAT structure
4625 	 * is used for all icmp types with the same src, dest and id, we
4626 	 * simply define the replies as queries as well. The funny thing is,
4627 	 * altough it seems silly to call a reply a query, this is exactly
4628 	 * as it is defined in the IPv4 specification
4629 	 */
4630 
4631 	switch (icmptype)
4632 	{
4633 
4634 	case ICMP_ECHOREPLY:
4635 	case ICMP_ECHO:
4636 	/* route aedvertisement/solliciation is currently unsupported: */
4637 	/* it would require rewriting the ICMP data section            */
4638 	case ICMP_TSTAMP:
4639 	case ICMP_TSTAMPREPLY:
4640 	case ICMP_IREQ:
4641 	case ICMP_IREQREPLY:
4642 	case ICMP_MASKREQ:
4643 	case ICMP_MASKREPLY:
4644 		return 1;
4645 	default:
4646 		return 0;
4647 	}
4648 }
4649 
4650 
4651 /* ------------------------------------------------------------------------ */
4652 /* Function:    nat_log                                                     */
4653 /* Returns:     Nil                                                         */
4654 /* Parameters:  nat(I)  - pointer to NAT structure                          */
4655 /*              type(I) - type of log entry to create                       */
4656 /*                                                                          */
4657 /* Creates a NAT log entry.                                                 */
4658 /* ------------------------------------------------------------------------ */
nat_log(nat,type)4659 void nat_log(nat, type)
4660 struct nat *nat;
4661 u_int type;
4662 {
4663 #ifdef	IPFILTER_LOG
4664 # ifndef LARGE_NAT
4665 	struct ipnat *np;
4666 	int rulen;
4667 # endif
4668 	struct natlog natl;
4669 	void *items[1];
4670 	size_t sizes[1];
4671 	int types[1];
4672 
4673 	natl.nl_inip = nat->nat_inip;
4674 	natl.nl_outip = nat->nat_outip;
4675 	natl.nl_origip = nat->nat_oip;
4676 	natl.nl_bytes[0] = nat->nat_bytes[0];
4677 	natl.nl_bytes[1] = nat->nat_bytes[1];
4678 	natl.nl_pkts[0] = nat->nat_pkts[0];
4679 	natl.nl_pkts[1] = nat->nat_pkts[1];
4680 	natl.nl_origport = nat->nat_oport;
4681 	natl.nl_inport = nat->nat_inport;
4682 	natl.nl_outport = nat->nat_outport;
4683 	natl.nl_p = nat->nat_p;
4684 	natl.nl_type = type;
4685 	natl.nl_rule = -1;
4686 # ifndef LARGE_NAT
4687 	if (nat->nat_ptr != NULL) {
4688 		for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
4689 			if (np == nat->nat_ptr) {
4690 				natl.nl_rule = rulen;
4691 				break;
4692 			}
4693 	}
4694 # endif
4695 	items[0] = &natl;
4696 	sizes[0] = sizeof(natl);
4697 	types[0] = 0;
4698 
4699 	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
4700 #endif
4701 }
4702 
4703 
4704 #if defined(__OpenBSD__)
4705 /* ------------------------------------------------------------------------ */
4706 /* Function:    nat_ifdetach                                                */
4707 /* Returns:     Nil                                                         */
4708 /* Parameters:  ifp(I) - pointer to network interface                       */
4709 /*                                                                          */
4710 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
4711 /* interface references within IPFilter.                                    */
4712 /* ------------------------------------------------------------------------ */
nat_ifdetach(ifp)4713 void nat_ifdetach(ifp)
4714 void *ifp;
4715 {
4716 	frsync(ifp);
4717 	return;
4718 }
4719 #endif
4720 
4721 
4722 /* ------------------------------------------------------------------------ */
4723 /* Function:    fr_ipnatderef                                               */
4724 /* Returns:     Nil                                                         */
4725 /* Parameters:  isp(I) - pointer to pointer to NAT rule                     */
4726 /* Write Locks: ipf_nat                                                     */
4727 /*                                                                          */
4728 /* ------------------------------------------------------------------------ */
fr_ipnatderef(inp)4729 void fr_ipnatderef(inp)
4730 ipnat_t **inp;
4731 {
4732 	ipnat_t *in;
4733 
4734 	in = *inp;
4735 	*inp = NULL;
4736 	in->in_space++;
4737 	in->in_use--;
4738 	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4739 		if (in->in_apr)
4740 			appr_free(in->in_apr);
4741 		MUTEX_DESTROY(&in->in_lock);
4742 		KFREE(in);
4743 		nat_stats.ns_rules--;
4744 #if SOLARIS && !defined(_INET_IP_STACK_H)
4745 		if (nat_stats.ns_rules == 0)
4746 			pfil_delayed_copy = 1;
4747 #endif
4748 	}
4749 }
4750 
4751 
4752 /* ------------------------------------------------------------------------ */
4753 /* Function:    fr_natderef                                                 */
4754 /* Returns:     Nil                                                         */
4755 /* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
4756 /*                                                                          */
4757 /* Decrement the reference counter for this NAT table entry and free it if  */
4758 /* there are no more things using it.                                       */
4759 /*                                                                          */
4760 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4761 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
4762 /* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
4763 /* because nat_delete() will do that and send nat_ref to -1.                */
4764 /*                                                                          */
4765 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
4766 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
4767 /* ------------------------------------------------------------------------ */
fr_natderef(natp)4768 void fr_natderef(natp)
4769 nat_t **natp;
4770 {
4771 	nat_t *nat;
4772 
4773 	nat = *natp;
4774 	*natp = NULL;
4775 
4776 	MUTEX_ENTER(&nat->nat_lock);
4777 	if (nat->nat_ref > 1) {
4778 		nat->nat_ref--;
4779 		MUTEX_EXIT(&nat->nat_lock);
4780 		return;
4781 	}
4782 	MUTEX_EXIT(&nat->nat_lock);
4783 
4784 	WRITE_ENTER(&ipf_nat);
4785 	nat_delete(nat, NL_EXPIRE);
4786 	RWLOCK_EXIT(&ipf_nat);
4787 }
4788 
4789 
4790 /* ------------------------------------------------------------------------ */
4791 /* Function:    fr_natclone                                                 */
4792 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
4793 /*                           else pointer to new state structure            */
4794 /* Parameters:  fin(I) - pointer to packet information                      */
4795 /*              is(I)  - pointer to master state structure                  */
4796 /* Write Lock:  ipf_nat                                                     */
4797 /*                                                                          */
4798 /* Create a "duplcate" state table entry from the master.                   */
4799 /* ------------------------------------------------------------------------ */
fr_natclone(fin,nat)4800 static nat_t *fr_natclone(fin, nat)
4801 fr_info_t *fin;
4802 nat_t *nat;
4803 {
4804 	frentry_t *fr;
4805 	nat_t *clone;
4806 	ipnat_t *np;
4807 
4808 	KMALLOC(clone, nat_t *);
4809 	if (clone == NULL)
4810 		return NULL;
4811 	bcopy((char *)nat, (char *)clone, sizeof(*clone));
4812 
4813 	MUTEX_NUKE(&clone->nat_lock);
4814 
4815 	clone->nat_aps = NULL;
4816 	/*
4817 	 * Initialize all these so that nat_delete() doesn't cause a crash.
4818 	 */
4819 	clone->nat_tqe.tqe_pnext = NULL;
4820 	clone->nat_tqe.tqe_next = NULL;
4821 	clone->nat_tqe.tqe_ifq = NULL;
4822 	clone->nat_tqe.tqe_parent = clone;
4823 
4824 	clone->nat_flags &= ~SI_CLONE;
4825 	clone->nat_flags |= SI_CLONED;
4826 
4827 	if (clone->nat_hm)
4828 		clone->nat_hm->hm_ref++;
4829 
4830 	if (nat_insert(clone, fin->fin_rev) == -1) {
4831 		KFREE(clone);
4832 		return NULL;
4833 	}
4834 	np = clone->nat_ptr;
4835 	if (np != NULL) {
4836 		if (nat_logging)
4837 			nat_log(clone, (u_int)np->in_redir);
4838 		np->in_use++;
4839 	}
4840 	fr = clone->nat_fr;
4841 	if (fr != NULL) {
4842 		MUTEX_ENTER(&fr->fr_lock);
4843 		fr->fr_ref++;
4844 		MUTEX_EXIT(&fr->fr_lock);
4845 	}
4846 
4847 	/*
4848 	 * Because the clone is created outside the normal loop of things and
4849 	 * TCP has special needs in terms of state, initialise the timeout
4850 	 * state of the new NAT from here.
4851 	 */
4852 	if (clone->nat_p == IPPROTO_TCP) {
4853 		(void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb,
4854 				  clone->nat_flags);
4855 	}
4856 #ifdef	IPFILTER_SYNC
4857 	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4858 #endif
4859 	if (nat_logging)
4860 		nat_log(clone, NL_CLONE);
4861 	return clone;
4862 }
4863 
4864 
4865 /* ------------------------------------------------------------------------ */
4866 /* Function:   nat_wildok                                                   */
4867 /* Returns:    int - 1 == packet's ports match wildcards                    */
4868 /*                   0 == packet's ports don't match wildcards              */
4869 /* Parameters: nat(I)   - NAT entry                                         */
4870 /*             sport(I) - source port                                       */
4871 /*             dport(I) - destination port                                  */
4872 /*             flags(I) - wildcard flags                                    */
4873 /*             dir(I)   - packet direction                                  */
4874 /*                                                                          */
4875 /* Use NAT entry and packet direction to determine which combination of     */
4876 /* wildcard flags should be used.                                           */
4877 /* ------------------------------------------------------------------------ */
nat_wildok(nat,sport,dport,flags,dir)4878 static int nat_wildok(nat, sport, dport, flags, dir)
4879 nat_t *nat;
4880 int sport;
4881 int dport;
4882 int flags;
4883 int dir;
4884 {
4885 	/*
4886 	 * When called by       dir is set to
4887 	 * nat_inlookup         NAT_INBOUND (0)
4888 	 * nat_outlookup        NAT_OUTBOUND (1)
4889 	 *
4890 	 * We simply combine the packet's direction in dir with the original
4891 	 * "intended" direction of that NAT entry in nat->nat_dir to decide
4892 	 * which combination of wildcard flags to allow.
4893 	 */
4894 
4895 	switch ((dir << 1) | nat->nat_dir)
4896 	{
4897 	case 3: /* outbound packet / outbound entry */
4898 		if (((nat->nat_inport == sport) ||
4899 		    (flags & SI_W_SPORT)) &&
4900 		    ((nat->nat_oport == dport) ||
4901 		    (flags & SI_W_DPORT)))
4902 			return 1;
4903 		break;
4904 	case 2: /* outbound packet / inbound entry */
4905 		if (((nat->nat_outport == sport) ||
4906 		    (flags & SI_W_DPORT)) &&
4907 		    ((nat->nat_oport == dport) ||
4908 		    (flags & SI_W_SPORT)))
4909 			return 1;
4910 		break;
4911 	case 1: /* inbound packet / outbound entry */
4912 		if (((nat->nat_oport == sport) ||
4913 		    (flags & SI_W_DPORT)) &&
4914 		    ((nat->nat_outport == dport) ||
4915 		    (flags & SI_W_SPORT)))
4916 			return 1;
4917 		break;
4918 	case 0: /* inbound packet / inbound entry */
4919 		if (((nat->nat_oport == sport) ||
4920 		    (flags & SI_W_SPORT)) &&
4921 		    ((nat->nat_outport == dport) ||
4922 		    (flags & SI_W_DPORT)))
4923 			return 1;
4924 		break;
4925 	default:
4926 		break;
4927 	}
4928 
4929 	return(0);
4930 }
4931 
4932 
4933 /* ------------------------------------------------------------------------ */
4934 /* Function:    nat_mssclamp                                                */
4935 /* Returns:     Nil                                                         */
4936 /* Parameters:  tcp(I)    - pointer to TCP header                           */
4937 /*              maxmss(I) - value to clamp the TCP MSS to                   */
4938 /*              fin(I)    - pointer to packet information                   */
4939 /*              csump(I)  - pointer to TCP checksum                         */
4940 /*                                                                          */
4941 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
4942 /* then the TCP header checksum will be updated to reflect the change in    */
4943 /* the MSS.                                                                 */
4944 /* ------------------------------------------------------------------------ */
nat_mssclamp(tcp,maxmss,fin,csump)4945 static void nat_mssclamp(tcp, maxmss, fin, csump)
4946 tcphdr_t *tcp;
4947 u_32_t maxmss;
4948 fr_info_t *fin;
4949 u_short *csump;
4950 {
4951 	u_char *cp, *ep, opt;
4952 	int hlen, advance;
4953 	u_32_t mss, sumd;
4954 
4955 	hlen = TCP_OFF(tcp) << 2;
4956 	if (hlen > sizeof(*tcp)) {
4957 		cp = (u_char *)tcp + sizeof(*tcp);
4958 		ep = (u_char *)tcp + hlen;
4959 
4960 		while (cp < ep) {
4961 			opt = cp[0];
4962 			if (opt == TCPOPT_EOL)
4963 				break;
4964 			else if (opt == TCPOPT_NOP) {
4965 				cp++;
4966 				continue;
4967 			}
4968 
4969 			if (cp + 1 >= ep)
4970 				break;
4971 			advance = cp[1];
4972 			if ((cp + advance > ep) || (advance <= 0))
4973 				break;
4974 			switch (opt)
4975 			{
4976 			case TCPOPT_MAXSEG:
4977 				if (advance != 4)
4978 					break;
4979 				mss = cp[2] * 256 + cp[3];
4980 				if (mss > maxmss) {
4981 					cp[2] = maxmss / 256;
4982 					cp[3] = maxmss & 0xff;
4983 					CALC_SUMD(mss, maxmss, sumd);
4984 					fix_outcksum(fin, csump, sumd);
4985 				}
4986 				break;
4987 			default:
4988 				/* ignore unknown options */
4989 				break;
4990 			}
4991 
4992 			cp += advance;
4993 		}
4994 	}
4995 }
4996 
4997 
4998 /* ------------------------------------------------------------------------ */
4999 /* Function:    fr_setnatqueue                                              */
5000 /* Returns:     Nil                                                         */
5001 /* Parameters:  nat(I)- pointer to NAT structure                            */
5002 /*              rev(I) - forward(0) or reverse(1) direction                 */
5003 /* Locks:       ipf_nat (read or write)                                     */
5004 /*                                                                          */
5005 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
5006 /* determining which queue it should be placed on.                          */
5007 /* ------------------------------------------------------------------------ */
fr_setnatqueue(nat,rev)5008 void fr_setnatqueue(nat, rev)
5009 nat_t *nat;
5010 int rev;
5011 {
5012 	ipftq_t *oifq, *nifq;
5013 
5014 	if (nat->nat_ptr != NULL)
5015 		nifq = nat->nat_ptr->in_tqehead[rev];
5016 	else
5017 		nifq = NULL;
5018 
5019 	if (nifq == NULL) {
5020 		switch (nat->nat_p)
5021 		{
5022 		case IPPROTO_UDP :
5023 			nifq = &nat_udptq;
5024 			break;
5025 		case IPPROTO_ICMP :
5026 			nifq = &nat_icmptq;
5027 			break;
5028 		case IPPROTO_TCP :
5029 			nifq = nat_tqb + nat->nat_tqe.tqe_state[rev];
5030 			break;
5031 		default :
5032 			nifq = &nat_iptq;
5033 			break;
5034 		}
5035 	}
5036 
5037 	oifq = nat->nat_tqe.tqe_ifq;
5038 	/*
5039 	 * If it's currently on a timeout queue, move it from one queue to
5040 	 * another, else put it on the end of the newly determined queue.
5041 	 */
5042 	if (oifq != NULL)
5043 		fr_movequeue(&nat->nat_tqe, oifq, nifq);
5044 	else
5045 		fr_queueappend(&nat->nat_tqe, nifq, nat);
5046 	return;
5047 }
5048 
5049 
5050 /* ------------------------------------------------------------------------ */
5051 /* Function:    nat_getnext                                                 */
5052 /* Returns:     int - 0 == ok, else error                                   */
5053 /* Parameters:  t(I)   - pointer to ipftoken structure                      */
5054 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5055 /*                                                                          */
5056 /* Fetch the next nat/ipnat structure pointer from the linked list and      */
5057 /* copy it out to the storage space pointed to by itp_data.  The next item  */
5058 /* in the list to look at is put back in the ipftoken struture.             */
5059 /* If we call ipf_freetoken, the accompanying pointer is set to NULL because*/
5060 /* ipf_freetoken will call a deref function for us and we dont want to call */
5061 /* that twice (second time would be in the second switch statement below.   */
5062 /* ------------------------------------------------------------------------ */
nat_getnext(t,itp)5063 static int nat_getnext(t, itp)
5064 ipftoken_t *t;
5065 ipfgeniter_t *itp;
5066 {
5067 	hostmap_t *hm, *nexthm = NULL, zerohm;
5068 	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5069 	nat_t *nat, *nextnat = NULL, zeronat;
5070 	int error = 0, count;
5071 	char *dst;
5072 
5073 	count = itp->igi_nitems;
5074 	if (count < 1)
5075 		return ENOSPC;
5076 
5077 	READ_ENTER(&ipf_nat);
5078 
5079 	switch (itp->igi_type)
5080 	{
5081 	case IPFGENITER_HOSTMAP :
5082 		hm = t->ipt_data;
5083 		if (hm == NULL) {
5084 			nexthm = ipf_hm_maplist;
5085 		} else {
5086 			nexthm = hm->hm_next;
5087 		}
5088 		break;
5089 
5090 	case IPFGENITER_IPNAT :
5091 		ipn = t->ipt_data;
5092 		if (ipn == NULL) {
5093 			nextipnat = nat_list;
5094 		} else {
5095 			nextipnat = ipn->in_next;
5096 		}
5097 		break;
5098 
5099 	case IPFGENITER_NAT :
5100 		nat = t->ipt_data;
5101 		if (nat == NULL) {
5102 			nextnat = nat_instances;
5103 		} else {
5104 			nextnat = nat->nat_next;
5105 		}
5106 		break;
5107 	default :
5108 		RWLOCK_EXIT(&ipf_nat);
5109 		return EINVAL;
5110 	}
5111 
5112 	dst = itp->igi_data;
5113 	for (;;) {
5114 		switch (itp->igi_type)
5115 		{
5116 		case IPFGENITER_HOSTMAP :
5117 			if (nexthm != NULL) {
5118 				if (count == 1) {
5119 					ATOMIC_INC32(nexthm->hm_ref);
5120 					t->ipt_data = nexthm;
5121 				}
5122 			} else {
5123 				bzero(&zerohm, sizeof(zerohm));
5124 				nexthm = &zerohm;
5125 				count = 1;
5126 				t->ipt_data = NULL;
5127 			}
5128 			break;
5129 
5130 		case IPFGENITER_IPNAT :
5131 			if (nextipnat != NULL) {
5132 				if (count == 1) {
5133 					MUTEX_ENTER(&nextipnat->in_lock);
5134 					nextipnat->in_use++;
5135 					MUTEX_EXIT(&nextipnat->in_lock);
5136 					t->ipt_data = nextipnat;
5137 				}
5138 			} else {
5139 				bzero(&zeroipn, sizeof(zeroipn));
5140 				nextipnat = &zeroipn;
5141 				count = 1;
5142 				t->ipt_data = NULL;
5143 			}
5144 			break;
5145 
5146 		case IPFGENITER_NAT :
5147 			if (nextnat != NULL) {
5148 				if (count == 1) {
5149 					MUTEX_ENTER(&nextnat->nat_lock);
5150 					nextnat->nat_ref++;
5151 					MUTEX_EXIT(&nextnat->nat_lock);
5152 					t->ipt_data = nextnat;
5153 				}
5154 			} else {
5155 				bzero(&zeronat, sizeof(zeronat));
5156 				nextnat = &zeronat;
5157 				count = 1;
5158 				t->ipt_data = NULL;
5159 			}
5160 			break;
5161 		default :
5162 			break;
5163 		}
5164 		RWLOCK_EXIT(&ipf_nat);
5165 
5166 		/*
5167 		 * Copying out to user space needs to be done without the lock.
5168 		 */
5169 		switch (itp->igi_type)
5170 		{
5171 		case IPFGENITER_HOSTMAP :
5172 			error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5173 			if (error != 0)
5174 				error = EFAULT;
5175 			else
5176 				dst += sizeof(*nexthm);
5177 			break;
5178 
5179 		case IPFGENITER_IPNAT :
5180 			error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5181 			if (error != 0)
5182 				error = EFAULT;
5183 			else
5184 				dst += sizeof(*nextipnat);
5185 			break;
5186 
5187 		case IPFGENITER_NAT :
5188 			error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5189 			if (error != 0)
5190 				error = EFAULT;
5191 			else
5192 				dst += sizeof(*nextnat);
5193 			break;
5194 		}
5195 
5196 		if ((count == 1) || (error != 0))
5197 			break;
5198 
5199 		count--;
5200 
5201 		READ_ENTER(&ipf_nat);
5202 
5203 		/*
5204 		 * We need to have the lock again here to make sure that
5205 		 * using _next is consistent.
5206 		 */
5207 		switch (itp->igi_type)
5208 		{
5209 		case IPFGENITER_HOSTMAP :
5210 			nexthm = nexthm->hm_next;
5211 			break;
5212 		case IPFGENITER_IPNAT :
5213 			nextipnat = nextipnat->in_next;
5214 			break;
5215 		case IPFGENITER_NAT :
5216 			nextnat = nextnat->nat_next;
5217 			break;
5218 		}
5219 	}
5220 
5221 
5222 	switch (itp->igi_type)
5223 	{
5224 	case IPFGENITER_HOSTMAP :
5225 		if (hm != NULL) {
5226 			WRITE_ENTER(&ipf_nat);
5227 			fr_hostmapdel(&hm);
5228 			RWLOCK_EXIT(&ipf_nat);
5229 		}
5230 		break;
5231 	case IPFGENITER_IPNAT :
5232 		if (ipn != NULL) {
5233 			fr_ipnatderef(&ipn);
5234 		}
5235 		break;
5236 	case IPFGENITER_NAT :
5237 		if (nat != NULL) {
5238 			fr_natderef(&nat);
5239 		}
5240 		break;
5241 	default :
5242 		break;
5243 	}
5244 
5245 	return error;
5246 }
5247 
5248 
5249 /* ------------------------------------------------------------------------ */
5250 /* Function:    nat_iterator                                                */
5251 /* Returns:     int - 0 == ok, else error                                   */
5252 /* Parameters:  token(I) - pointer to ipftoken structure                    */
5253 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5254 /*                                                                          */
5255 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5256 /* generic structure to iterate through a list.  There are three different  */
5257 /* linked lists of NAT related information to go through: NAT rules, active */
5258 /* NAT mappings and the NAT fragment cache.                                 */
5259 /* ------------------------------------------------------------------------ */
nat_iterator(token,itp)5260 static int nat_iterator(token, itp)
5261 ipftoken_t *token;
5262 ipfgeniter_t *itp;
5263 {
5264 	int error;
5265 
5266 	if (itp->igi_data == NULL)
5267 		return EFAULT;
5268 
5269 	token->ipt_subtype = itp->igi_type;
5270 
5271 	switch (itp->igi_type)
5272 	{
5273 	case IPFGENITER_HOSTMAP :
5274 	case IPFGENITER_IPNAT :
5275 	case IPFGENITER_NAT :
5276 		error = nat_getnext(token, itp);
5277 		break;
5278 
5279 	case IPFGENITER_NATFRAG :
5280 #ifdef USE_MUTEXES
5281 		error = fr_nextfrag(token, itp, &ipfr_natlist,
5282 				    &ipfr_nattail, &ipf_natfrag);
5283 #else
5284 		error = fr_nextfrag(token, itp, &ipfr_natlist, &ipfr_nattail);
5285 #endif
5286 		break;
5287 	default :
5288 		error = EINVAL;
5289 		break;
5290 	}
5291 
5292 	return error;
5293 }
5294 
5295 
5296 /* ------------------------------------------------------------------------ */
5297 /* Function:    nat_extraflush                                              */
5298 /* Returns:     int - 0 == success, -1 == failure                           */
5299 /* Parameters:  which(I) - how to flush the active NAT table                */
5300 /* Write Locks: ipf_nat                                                     */
5301 /*                                                                          */
5302 /* Flush nat tables.  Three actions currently defined:                      */
5303 /* which == 0 : flush all nat table entries                                 */
5304 /* which == 1 : flush TCP connections which have started to close but are   */
5305 /*	      stuck for some reason.                                        */
5306 /* which == 2 : flush TCP connections which have been idle for a long time, */
5307 /*	      starting at > 4 days idle and working back in successive half-*/
5308 /*	      days to at most 12 hours old.  If this fails to free enough   */
5309 /*            slots then work backwards in half hour slots to 30 minutes.   */
5310 /*            If that too fails, then work backwards in 30 second intervals */
5311 /*            for the last 30 minutes to at worst 30 seconds idle.          */
5312 /* ------------------------------------------------------------------------ */
nat_extraflush(which)5313 static int nat_extraflush(which)
5314 int which;
5315 {
5316 	ipftq_t *ifq, *ifqnext;
5317 	nat_t *nat, **natp;
5318 	ipftqent_t *tqn;
5319 	int removed;
5320 	SPL_INT(s);
5321 
5322 	removed = 0;
5323 
5324 	SPL_NET(s);
5325 
5326 	switch (which)
5327 	{
5328 	case 0 :
5329 		/*
5330 		 * Style 0 flush removes everything...
5331 		 */
5332 		for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5333 			nat_delete(nat, NL_FLUSH);
5334 			removed++;
5335 		}
5336 		break;
5337 
5338 	case 1 :
5339 		/*
5340 		 * Since we're only interested in things that are closing,
5341 		 * we can start with the appropriate timeout queue.
5342 		 */
5343 		for (ifq = nat_tqb + IPF_TCPS_CLOSE_WAIT; ifq != NULL;
5344 		     ifq = ifq->ifq_next) {
5345 
5346 			for (tqn = ifq->ifq_head; tqn != NULL; ) {
5347 				nat = tqn->tqe_parent;
5348 				tqn = tqn->tqe_next;
5349 				if (nat->nat_p != IPPROTO_TCP)
5350 					break;
5351 				nat_delete(nat, NL_EXPIRE);
5352 				removed++;
5353 			}
5354 		}
5355 
5356 		/*
5357 		 * Also need to look through the user defined queues.
5358 		 */
5359 		for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) {
5360 			ifqnext = ifq->ifq_next;
5361 			for (tqn = ifq->ifq_head; tqn != NULL; ) {
5362 				nat = tqn->tqe_parent;
5363 				tqn = tqn->tqe_next;
5364 				if (nat->nat_p != IPPROTO_TCP)
5365 					continue;
5366 
5367 				if ((nat->nat_tcpstate[0] >
5368 				     IPF_TCPS_ESTABLISHED) &&
5369 				    (nat->nat_tcpstate[1] >
5370 				     IPF_TCPS_ESTABLISHED)) {
5371 					nat_delete(nat, NL_EXPIRE);
5372 					removed++;
5373 				}
5374 			}
5375 		}
5376 		break;
5377 
5378 		/*
5379 		 * Args 5-11 correspond to flushing those particular states
5380 		 * for TCP connections.
5381 		 */
5382 	case IPF_TCPS_CLOSE_WAIT :
5383 	case IPF_TCPS_FIN_WAIT_1 :
5384 	case IPF_TCPS_CLOSING :
5385 	case IPF_TCPS_LAST_ACK :
5386 	case IPF_TCPS_FIN_WAIT_2 :
5387 	case IPF_TCPS_TIME_WAIT :
5388 	case IPF_TCPS_CLOSED :
5389 		tqn = nat_tqb[which].ifq_head;
5390 		while (tqn != NULL) {
5391 			nat = tqn->tqe_parent;
5392 			tqn = tqn->tqe_next;
5393 			nat_delete(nat, NL_FLUSH);
5394 			removed++;
5395 		}
5396 		break;
5397 
5398 	default :
5399 		if (which < 30)
5400 			break;
5401 
5402 		/*
5403 		 * Take a large arbitrary number to mean the number of seconds
5404 		 * for which which consider to be the maximum value we'll allow
5405 		 * the expiration to be.
5406 		 */
5407 		which = IPF_TTLVAL(which);
5408 		for (natp = &nat_instances; ((nat = *natp) != NULL); ) {
5409 			if (fr_ticks - nat->nat_touched > which) {
5410 				nat_delete(nat, NL_FLUSH);
5411 				removed++;
5412 			} else
5413 				natp = &nat->nat_next;
5414 		}
5415 		break;
5416 	}
5417 
5418 	if (which != 2) {
5419 		SPL_X(s);
5420 		return removed;
5421 	}
5422 
5423 	/*
5424 	 * Asked to remove inactive entries because the table is full.
5425 	 */
5426 	if (fr_ticks - nat_last_force_flush > IPF_TTLVAL(5)) {
5427 		nat_last_force_flush = fr_ticks;
5428 		removed = ipf_queueflush(nat_flush_entry, nat_tqb, nat_utqe);
5429 	}
5430 
5431 	SPL_X(s);
5432 	return removed;
5433 }
5434 
5435 
5436 /* ------------------------------------------------------------------------ */
5437 /* Function:    nat_flush_entry                                             */
5438 /* Returns:     0 - always succeeds                                         */
5439 /* Parameters:  entry(I) - pointer to NAT entry                             */
5440 /* Write Locks: ipf_nat                                                     */
5441 /*                                                                          */
5442 /* This function is a stepping stone between ipf_queueflush() and           */
5443 /* nat_dlete().  It is used so we can provide a uniform interface via the   */
5444 /* ipf_queueflush() function.  Since the nat_delete() function returns void */
5445 /* we translate that to mean it always succeeds in deleting something.      */
5446 /* ------------------------------------------------------------------------ */
nat_flush_entry(entry)5447 static int nat_flush_entry(entry)
5448 void *entry;
5449 {
5450 	nat_delete(entry, NL_FLUSH);
5451 	return 0;
5452 }
5453 
5454 
5455 /* ------------------------------------------------------------------------ */
5456 /* Function:    nat_gettable                                                */
5457 /* Returns:     int     - 0 = success, else error                           */
5458 /* Parameters:  data(I) - pointer to ioctl data                             */
5459 /*                                                                          */
5460 /* This function handles ioctl requests for tables of nat information.      */
5461 /* At present the only table it deals with is the hash bucket statistics.   */
5462 /* ------------------------------------------------------------------------ */
nat_gettable(data)5463 static int nat_gettable(data)
5464 char *data;
5465 {
5466 	ipftable_t table;
5467 	int error;
5468 
5469 	error = fr_inobj(data, &table, IPFOBJ_GTABLE);
5470 	if (error != 0)
5471 		return error;
5472 
5473 	switch (table.ita_type)
5474 	{
5475 	case IPFTABLE_BUCKETS_NATIN :
5476 		error = COPYOUT(nat_stats.ns_bucketlen[0], table.ita_table,
5477 				ipf_nattable_sz * sizeof(u_long));
5478 		break;
5479 
5480 	case IPFTABLE_BUCKETS_NATOUT :
5481 		error = COPYOUT(nat_stats.ns_bucketlen[1], table.ita_table,
5482 				ipf_nattable_sz * sizeof(u_long));
5483 		break;
5484 
5485 	default :
5486 		return EINVAL;
5487 	}
5488 
5489 	if (error != 0) {
5490 		error = EFAULT;
5491 	}
5492 	return error;
5493 }
5494