1 /*	$FreeBSD: stable/9/sys/contrib/ipfilter/netinet/ip_frag.c 172776 2007-10-18 21:52:14Z darrenr $	*/
2 
3 /*
4  * Copyright (C) 1993-2003 by Darren Reed.
5  *
6  * See the IPFILTER.LICENCE file for details on licencing.
7  */
8 #if defined(KERNEL) || defined(_KERNEL)
9 # undef KERNEL
10 # undef _KERNEL
11 # define        KERNEL	1
12 # define        _KERNEL	1
13 #endif
14 #include <sys/errno.h>
15 #include <sys/types.h>
16 #include <sys/param.h>
17 #include <sys/time.h>
18 #include <sys/file.h>
19 #ifdef __hpux
20 # include <sys/timeout.h>
21 #endif
22 #if !defined(_KERNEL)
23 # include <stdio.h>
24 # include <string.h>
25 # include <stdlib.h>
26 # define _KERNEL
27 # ifdef __OpenBSD__
28 struct file;
29 # endif
30 # include <sys/uio.h>
31 # undef _KERNEL
32 #endif
33 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
34 # include <sys/filio.h>
35 # include <sys/fcntl.h>
36 #else
37 # include <sys/ioctl.h>
38 #endif
39 #if !defined(linux)
40 # include <sys/protosw.h>
41 #endif
42 #include <sys/socket.h>
43 #if defined(_KERNEL)
44 # include <sys/systm.h>
45 # if !defined(__SVR4) && !defined(__svr4__)
46 #  include <sys/mbuf.h>
47 # endif
48 #endif
49 #if !defined(__SVR4) && !defined(__svr4__)
50 # if defined(_KERNEL) && !defined(__sgi) && !defined(AIX)
51 #  include <sys/kernel.h>
52 # endif
53 #else
54 # include <sys/byteorder.h>
55 # ifdef _KERNEL
56 #  include <sys/dditypes.h>
57 # endif
58 # include <sys/stream.h>
59 # include <sys/kmem.h>
60 #endif
61 #include <net/if.h>
62 #ifdef sun
63 # include <net/af.h>
64 #endif
65 #include <net/route.h>
66 #include <netinet/in.h>
67 #include <netinet/in_systm.h>
68 #include <netinet/ip.h>
69 #if !defined(linux)
70 # include <netinet/ip_var.h>
71 #endif
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 #include <netinet/ip_icmp.h>
75 #include "netinet/ip_compat.h"
76 #include <netinet/tcpip.h>
77 #include "netinet/ip_fil.h"
78 #include "netinet/ip_nat.h"
79 #include "netinet/ip_frag.h"
80 #include "netinet/ip_state.h"
81 #include "netinet/ip_auth.h"
82 #include "netinet/ip_proxy.h"
83 #if (__FreeBSD_version >= 300000)
84 # include <sys/malloc.h>
85 # if defined(_KERNEL)
86 #  ifndef IPFILTER_LKM
87 #   include <sys/libkern.h>
88 #   include <sys/systm.h>
89 #  endif
90 extern struct callout_handle fr_slowtimer_ch;
91 # endif
92 #endif
93 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
94 # include <sys/callout.h>
95 extern struct callout fr_slowtimer_ch;
96 #endif
97 #if defined(__OpenBSD__)
98 # include <sys/timeout.h>
99 extern struct timeout fr_slowtimer_ch;
100 #endif
101 /* END OF INCLUDES */
102 
103 #if !defined(lint)
104 static const char sccsid[] = "@(#)ip_frag.c	1.11 3/24/96 (C) 1993-2000 Darren Reed";
105 static const char rcsid[] = "@(#)$FreeBSD: stable/9/sys/contrib/ipfilter/netinet/ip_frag.c 172776 2007-10-18 21:52:14Z darrenr $";
106 /* static const char rcsid[] = "@(#)$Id: ip_frag.c,v 2.77.2.12 2007/09/20 12:51:51 darrenr Exp $"; */
107 #endif
108 
109 
110 ipfr_t   *ipfr_list = NULL;
111 ipfr_t   **ipfr_tail = &ipfr_list;
112 
113 ipfr_t   *ipfr_natlist = NULL;
114 ipfr_t   **ipfr_nattail = &ipfr_natlist;
115 
116 ipfr_t   *ipfr_ipidlist = NULL;
117 ipfr_t   **ipfr_ipidtail = &ipfr_ipidlist;
118 
119 static ipfr_t	**ipfr_heads;
120 static ipfr_t	**ipfr_nattab;
121 static ipfr_t	**ipfr_ipidtab;
122 
123 static ipfrstat_t ipfr_stats;
124 static int	ipfr_inuse = 0;
125 int		ipfr_size = IPFT_SIZE;
126 
127 int	fr_ipfrttl = 120;	/* 60 seconds */
128 int	fr_frag_lock = 0;
129 int	fr_frag_init = 0;
130 u_long	fr_ticks = 0;
131 
132 
133 static ipfr_t *ipfr_newfrag __P((fr_info_t *, u_32_t, ipfr_t **));
134 static ipfr_t *fr_fraglookup __P((fr_info_t *, ipfr_t **));
135 static void fr_fragdelete __P((ipfr_t *, ipfr_t ***));
136 static void fr_fragfree __P((ipfr_t *));
137 
138 
139 /* ------------------------------------------------------------------------ */
140 /* Function:    fr_fraginit                                                 */
141 /* Returns:     int - 0 == success, -1 == error                             */
142 /* Parameters:  Nil                                                         */
143 /*                                                                          */
144 /* Initialise the hash tables for the fragment cache lookups.               */
145 /* ------------------------------------------------------------------------ */
fr_fraginit()146 int fr_fraginit()
147 {
148 	KMALLOCS(ipfr_heads, ipfr_t **, ipfr_size * sizeof(ipfr_t *));
149 	if (ipfr_heads == NULL)
150 		return -1;
151 	bzero((char *)ipfr_heads, ipfr_size * sizeof(ipfr_t *));
152 
153 	KMALLOCS(ipfr_nattab, ipfr_t **, ipfr_size * sizeof(ipfr_t *));
154 	if (ipfr_nattab == NULL)
155 		return -1;
156 	bzero((char *)ipfr_nattab, ipfr_size * sizeof(ipfr_t *));
157 
158 	KMALLOCS(ipfr_ipidtab, ipfr_t **, ipfr_size * sizeof(ipfr_t *));
159 	if (ipfr_ipidtab == NULL)
160 		return -1;
161 	bzero((char *)ipfr_ipidtab, ipfr_size * sizeof(ipfr_t *));
162 
163 	RWLOCK_INIT(&ipf_frag, "ipf fragment rwlock");
164 	fr_frag_init = 1;
165 
166 	return 0;
167 }
168 
169 
170 /* ------------------------------------------------------------------------ */
171 /* Function:    fr_fragunload                                               */
172 /* Returns:     Nil                                                         */
173 /* Parameters:  Nil                                                         */
174 /*                                                                          */
175 /* Free all memory allocated whilst running and from initialisation.        */
176 /* ------------------------------------------------------------------------ */
fr_fragunload()177 void fr_fragunload()
178 {
179 	if (fr_frag_init == 1) {
180 		fr_fragclear();
181 
182 		RW_DESTROY(&ipf_frag);
183 		fr_frag_init = 0;
184 	}
185 
186 	if (ipfr_heads != NULL)
187 		KFREES(ipfr_heads, ipfr_size * sizeof(ipfr_t *));
188 	ipfr_heads = NULL;
189 
190 	if (ipfr_nattab != NULL)
191 		KFREES(ipfr_nattab, ipfr_size * sizeof(ipfr_t *));
192 	ipfr_nattab = NULL;
193 
194 	if (ipfr_ipidtab != NULL)
195 		KFREES(ipfr_ipidtab, ipfr_size * sizeof(ipfr_t *));
196 	ipfr_ipidtab = NULL;
197 }
198 
199 
200 /* ------------------------------------------------------------------------ */
201 /* Function:    fr_fragstats                                                */
202 /* Returns:     ipfrstat_t* - pointer to struct with current frag stats     */
203 /* Parameters:  Nil                                                         */
204 /*                                                                          */
205 /* Updates ipfr_stats with current information and returns a pointer to it  */
206 /* ------------------------------------------------------------------------ */
fr_fragstats()207 ipfrstat_t *fr_fragstats()
208 {
209 	ipfr_stats.ifs_table = ipfr_heads;
210 	ipfr_stats.ifs_nattab = ipfr_nattab;
211 	ipfr_stats.ifs_inuse = ipfr_inuse;
212 	return &ipfr_stats;
213 }
214 
215 
216 /* ------------------------------------------------------------------------ */
217 /* Function:    ipfr_newfrag                                                */
218 /* Returns:     ipfr_t * - pointer to fragment cache state info or NULL     */
219 /* Parameters:  fin(I)   - pointer to packet information                    */
220 /*              table(I) - pointer to frag table to add to                  */
221 /*                                                                          */
222 /* Add a new entry to the fragment cache, registering it as having come     */
223 /* through this box, with the result of the filter operation.               */
224 /* ------------------------------------------------------------------------ */
ipfr_newfrag(fin,pass,table)225 static ipfr_t *ipfr_newfrag(fin, pass, table)
226 fr_info_t *fin;
227 u_32_t pass;
228 ipfr_t *table[];
229 {
230 	ipfr_t *fra, frag;
231 	u_int idx, off;
232 	frentry_t *fr;
233 	ip_t *ip;
234 
235 	if (ipfr_inuse >= IPFT_SIZE)
236 		return NULL;
237 
238 	if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
239 		return NULL;
240 
241 	ip = fin->fin_ip;
242 
243 	if (pass & FR_FRSTRICT)
244 		if (fin->fin_off != 0)
245 			return NULL;
246 
247 	frag.ipfr_p = ip->ip_p;
248 	idx = ip->ip_p;
249 	frag.ipfr_id = ip->ip_id;
250 	idx += ip->ip_id;
251 	frag.ipfr_tos = ip->ip_tos;
252 	frag.ipfr_src.s_addr = ip->ip_src.s_addr;
253 	idx += ip->ip_src.s_addr;
254 	frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
255 	idx += ip->ip_dst.s_addr;
256 	frag.ipfr_ifp = fin->fin_ifp;
257 	idx *= 127;
258 	idx %= IPFT_SIZE;
259 
260 	frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
261 	frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
262 	frag.ipfr_auth = fin->fin_fi.fi_auth;
263 
264 	/*
265 	 * first, make sure it isn't already there...
266 	 */
267 	for (fra = table[idx]; (fra != NULL); fra = fra->ipfr_hnext)
268 		if (!bcmp((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp,
269 			  IPFR_CMPSZ)) {
270 			ipfr_stats.ifs_exists++;
271 			return NULL;
272 		}
273 
274 	/*
275 	 * allocate some memory, if possible, if not, just record that we
276 	 * failed to do so.
277 	 */
278 	KMALLOC(fra, ipfr_t *);
279 	if (fra == NULL) {
280 		ipfr_stats.ifs_nomem++;
281 		return NULL;
282 	}
283 
284 	fr = fin->fin_fr;
285 	fra->ipfr_rule = fr;
286 	if (fr != NULL) {
287 		MUTEX_ENTER(&fr->fr_lock);
288 		fr->fr_ref++;
289 		MUTEX_EXIT(&fr->fr_lock);
290 	}
291 
292 	/*
293 	 * Insert the fragment into the fragment table, copy the struct used
294 	 * in the search using bcopy rather than reassign each field.
295 	 * Set the ttl to the default.
296 	 */
297 	if ((fra->ipfr_hnext = table[idx]) != NULL)
298 		table[idx]->ipfr_hprev = &fra->ipfr_hnext;
299 	fra->ipfr_hprev = table + idx;
300 	fra->ipfr_data = NULL;
301 	table[idx] = fra;
302 	bcopy((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp, IPFR_CMPSZ);
303 	fra->ipfr_ttl = fr_ticks + fr_ipfrttl;
304 
305 	/*
306 	 * Compute the offset of the expected start of the next packet.
307 	 */
308 	off = ip->ip_off & IP_OFFMASK;
309 	if (off == 0)
310 		fra->ipfr_seen0 = 1;
311 	fra->ipfr_off = off + (fin->fin_dlen >> 3);
312 	fra->ipfr_pass = pass;
313 	fra->ipfr_ref = 1;
314 	ipfr_stats.ifs_new++;
315 	ipfr_inuse++;
316 	return fra;
317 }
318 
319 
320 /* ------------------------------------------------------------------------ */
321 /* Function:    fr_newfrag                                                  */
322 /* Returns:     int - 0 == success, -1 == error                             */
323 /* Parameters:  fin(I)  - pointer to packet information                     */
324 /*                                                                          */
325 /* Add a new entry to the fragment cache table based on the current packet  */
326 /* ------------------------------------------------------------------------ */
fr_newfrag(fin,pass)327 int fr_newfrag(fin, pass)
328 u_32_t pass;
329 fr_info_t *fin;
330 {
331 	ipfr_t	*fra;
332 
333 	if ((fin->fin_v != 4) || (fr_frag_lock != 0))
334 		return -1;
335 
336 	WRITE_ENTER(&ipf_frag);
337 	fra = ipfr_newfrag(fin, pass, ipfr_heads);
338 	if (fra != NULL) {
339 		*ipfr_tail = fra;
340 		fra->ipfr_prev = ipfr_tail;
341 		ipfr_tail = &fra->ipfr_next;
342 		if (ipfr_list == NULL)
343 			ipfr_list = fra;
344 		fra->ipfr_next = NULL;
345 	}
346 	RWLOCK_EXIT(&ipf_frag);
347 	return fra ? 0 : -1;
348 }
349 
350 
351 /* ------------------------------------------------------------------------ */
352 /* Function:    fr_nat_newfrag                                              */
353 /* Returns:     int - 0 == success, -1 == error                             */
354 /* Parameters:  fin(I)  - pointer to packet information                     */
355 /*              nat(I)  - pointer to NAT structure                          */
356 /*                                                                          */
357 /* Create a new NAT fragment cache entry based on the current packet and    */
358 /* the NAT structure for this "session".                                    */
359 /* ------------------------------------------------------------------------ */
fr_nat_newfrag(fin,pass,nat)360 int fr_nat_newfrag(fin, pass, nat)
361 fr_info_t *fin;
362 u_32_t pass;
363 nat_t *nat;
364 {
365 	ipfr_t	*fra;
366 
367 	if ((fin->fin_v != 4) || (fr_frag_lock != 0))
368 		return 0;
369 
370 	WRITE_ENTER(&ipf_natfrag);
371 	fra = ipfr_newfrag(fin, pass, ipfr_nattab);
372 	if (fra != NULL) {
373 		fra->ipfr_data = nat;
374 		nat->nat_data = fra;
375 		*ipfr_nattail = fra;
376 		fra->ipfr_prev = ipfr_nattail;
377 		ipfr_nattail = &fra->ipfr_next;
378 		fra->ipfr_next = NULL;
379 	}
380 	RWLOCK_EXIT(&ipf_natfrag);
381 	return fra ? 0 : -1;
382 }
383 
384 
385 /* ------------------------------------------------------------------------ */
386 /* Function:    fr_ipid_newfrag                                             */
387 /* Returns:     int - 0 == success, -1 == error                             */
388 /* Parameters:  fin(I)  - pointer to packet information                     */
389 /*              ipid(I) - new IP ID for this fragmented packet              */
390 /*                                                                          */
391 /* Create a new fragment cache entry for this packet and store, as a data   */
392 /* pointer, the new IP ID value.                                            */
393 /* ------------------------------------------------------------------------ */
fr_ipid_newfrag(fin,ipid)394 int fr_ipid_newfrag(fin, ipid)
395 fr_info_t *fin;
396 u_32_t ipid;
397 {
398 	ipfr_t	*fra;
399 
400 	if ((fin->fin_v != 4) || (fr_frag_lock))
401 		return 0;
402 
403 	WRITE_ENTER(&ipf_ipidfrag);
404 	fra = ipfr_newfrag(fin, 0, ipfr_ipidtab);
405 	if (fra != NULL) {
406 		fra->ipfr_data = (void *)(uintptr_t)ipid;
407 		*ipfr_ipidtail = fra;
408 		fra->ipfr_prev = ipfr_ipidtail;
409 		ipfr_ipidtail = &fra->ipfr_next;
410 		fra->ipfr_next = NULL;
411 	}
412 	RWLOCK_EXIT(&ipf_ipidfrag);
413 	return fra ? 0 : -1;
414 }
415 
416 
417 /* ------------------------------------------------------------------------ */
418 /* Function:    fr_fraglookup                                               */
419 /* Returns:     ipfr_t * - pointer to ipfr_t structure if there's a         */
420 /*                         matching entry in the frag table, else NULL      */
421 /* Parameters:  fin(I)   - pointer to packet information                    */
422 /*              table(I) - pointer to fragment cache table to search        */
423 /*                                                                          */
424 /* Check the fragment cache to see if there is already a record of this     */
425 /* packet with its filter result known.                                     */
426 /* ------------------------------------------------------------------------ */
fr_fraglookup(fin,table)427 static ipfr_t *fr_fraglookup(fin, table)
428 fr_info_t *fin;
429 ipfr_t *table[];
430 {
431 	ipfr_t *f, frag;
432 	u_int idx;
433 	ip_t *ip;
434 
435 	if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
436 		return NULL;
437 
438 	/*
439 	 * For fragments, we record protocol, packet id, TOS and both IP#'s
440 	 * (these should all be the same for all fragments of a packet).
441 	 *
442 	 * build up a hash value to index the table with.
443 	 */
444 	ip = fin->fin_ip;
445 	frag.ipfr_p = ip->ip_p;
446 	idx = ip->ip_p;
447 	frag.ipfr_id = ip->ip_id;
448 	idx += ip->ip_id;
449 	frag.ipfr_tos = ip->ip_tos;
450 	frag.ipfr_src.s_addr = ip->ip_src.s_addr;
451 	idx += ip->ip_src.s_addr;
452 	frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
453 	idx += ip->ip_dst.s_addr;
454 	frag.ipfr_ifp = fin->fin_ifp;
455 	idx *= 127;
456 	idx %= IPFT_SIZE;
457 
458 	frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
459 	frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
460 	frag.ipfr_auth = fin->fin_fi.fi_auth;
461 
462 	/*
463 	 * check the table, careful to only compare the right amount of data
464 	 */
465 	for (f = table[idx]; f; f = f->ipfr_hnext)
466 		if (!bcmp((char *)&frag.ipfr_ifp, (char *)&f->ipfr_ifp,
467 			  IPFR_CMPSZ)) {
468 			u_short	off;
469 
470 			/*
471 			 * We don't want to let short packets match because
472 			 * they could be compromising the security of other
473 			 * rules that want to match on layer 4 fields (and
474 			 * can't because they have been fragmented off.)
475 			 * Why do this check here?  The counter acts as an
476 			 * indicator of this kind of attack, whereas if it was
477 			 * elsewhere, it wouldn't know if other matching
478 			 * packets had been seen.
479 			 */
480 			if (fin->fin_flx & FI_SHORT) {
481 				ATOMIC_INCL(ipfr_stats.ifs_short);
482 				continue;
483 			}
484 
485 			/*
486 			 * XXX - We really need to be guarding against the
487 			 * retransmission of (src,dst,id,offset-range) here
488 			 * because a fragmented packet is never resent with
489 			 * the same IP ID# (or shouldn't).
490 			 */
491 			off = ip->ip_off & IP_OFFMASK;
492 			if (f->ipfr_seen0) {
493 				if (off == 0) {
494 					ATOMIC_INCL(ipfr_stats.ifs_retrans0);
495 					continue;
496 				}
497 			} else if (off == 0)
498 				f->ipfr_seen0 = 1;
499 
500 			if (f != table[idx]) {
501 				ipfr_t **fp;
502 
503 				/*
504 				 * Move fragment info. to the top of the list
505 				 * to speed up searches.  First, delink...
506 				 */
507 				fp = f->ipfr_hprev;
508 				(*fp) = f->ipfr_hnext;
509 				if (f->ipfr_hnext != NULL)
510 					f->ipfr_hnext->ipfr_hprev = fp;
511 				/*
512 				 * Then put back at the top of the chain.
513 				 */
514 				f->ipfr_hnext = table[idx];
515 				table[idx]->ipfr_hprev = &f->ipfr_hnext;
516 				f->ipfr_hprev = table + idx;
517 				table[idx] = f;
518 			}
519 
520 			/*
521 			 * If we've follwed the fragments, and this is the
522 			 * last (in order), shrink expiration time.
523 			 */
524 			if (off == f->ipfr_off) {
525 				if (!(ip->ip_off & IP_MF))
526 					f->ipfr_ttl = fr_ticks + 1;
527 				f->ipfr_off = (fin->fin_dlen >> 3) + off;
528 			} else if (f->ipfr_pass & FR_FRSTRICT)
529 				continue;
530 			ATOMIC_INCL(ipfr_stats.ifs_hits);
531 			return f;
532 		}
533 	return NULL;
534 }
535 
536 
537 /* ------------------------------------------------------------------------ */
538 /* Function:    fr_nat_knownfrag                                            */
539 /* Returns:     nat_t* - pointer to 'parent' NAT structure if frag table    */
540 /*                       match found, else NULL                             */
541 /* Parameters:  fin(I)  - pointer to packet information                     */
542 /*                                                                          */
543 /* Functional interface for NAT lookups of the NAT fragment cache           */
544 /* ------------------------------------------------------------------------ */
fr_nat_knownfrag(fin)545 nat_t *fr_nat_knownfrag(fin)
546 fr_info_t *fin;
547 {
548 	nat_t	*nat;
549 	ipfr_t	*ipf;
550 
551 	if ((fin->fin_v != 4) || (fr_frag_lock) || !ipfr_natlist)
552 		return NULL;
553 	READ_ENTER(&ipf_natfrag);
554 	ipf = fr_fraglookup(fin, ipfr_nattab);
555 	if (ipf != NULL) {
556 		nat = ipf->ipfr_data;
557 		/*
558 		 * This is the last fragment for this packet.
559 		 */
560 		if ((ipf->ipfr_ttl == fr_ticks + 1) && (nat != NULL)) {
561 			nat->nat_data = NULL;
562 			ipf->ipfr_data = NULL;
563 		}
564 	} else
565 		nat = NULL;
566 	RWLOCK_EXIT(&ipf_natfrag);
567 	return nat;
568 }
569 
570 
571 /* ------------------------------------------------------------------------ */
572 /* Function:    fr_ipid_knownfrag                                           */
573 /* Returns:     u_32_t - IPv4 ID for this packet if match found, else       */
574 /*                       return 0xfffffff to indicate no match.             */
575 /* Parameters:  fin(I) - pointer to packet information                      */
576 /*                                                                          */
577 /* Functional interface for IP ID lookups of the IP ID fragment cache       */
578 /* ------------------------------------------------------------------------ */
fr_ipid_knownfrag(fin)579 u_32_t fr_ipid_knownfrag(fin)
580 fr_info_t *fin;
581 {
582 	ipfr_t	*ipf;
583 	u_32_t	id;
584 
585 	if ((fin->fin_v != 4) || (fr_frag_lock) || !ipfr_ipidlist)
586 		return 0xffffffff;
587 
588 	READ_ENTER(&ipf_ipidfrag);
589 	ipf = fr_fraglookup(fin, ipfr_ipidtab);
590 	if (ipf != NULL)
591 		id = (u_32_t)(uintptr_t)ipf->ipfr_data;
592 	else
593 		id = 0xffffffff;
594 	RWLOCK_EXIT(&ipf_ipidfrag);
595 	return id;
596 }
597 
598 
599 /* ------------------------------------------------------------------------ */
600 /* Function:    fr_knownfrag                                                */
601 /* Returns:     frentry_t* - pointer to filter rule if a match is found in  */
602 /*                           the frag cache table, else NULL.               */
603 /* Parameters:  fin(I)   - pointer to packet information                    */
604 /*              passp(O) - pointer to where to store rule flags resturned   */
605 /*                                                                          */
606 /* Functional interface for normal lookups of the fragment cache.  If a     */
607 /* match is found, return the rule pointer and flags from the rule, except  */
608 /* that if FR_LOGFIRST is set, reset FR_LOG.                                */
609 /* ------------------------------------------------------------------------ */
fr_knownfrag(fin,passp)610 frentry_t *fr_knownfrag(fin, passp)
611 fr_info_t *fin;
612 u_32_t *passp;
613 {
614 	frentry_t *fr = NULL;
615 	ipfr_t	*fra;
616 	u_32_t pass;
617 
618 	if ((fin->fin_v != 4) || (fr_frag_lock) || (ipfr_list == NULL))
619 		return NULL;
620 
621 	READ_ENTER(&ipf_frag);
622 	fra = fr_fraglookup(fin, ipfr_heads);
623 	if (fra != NULL) {
624 		fr = fra->ipfr_rule;
625 		fin->fin_fr = fr;
626 		if (fr != NULL) {
627 			pass = fr->fr_flags;
628 			if ((pass & FR_LOGFIRST) != 0)
629 				pass &= ~(FR_LOGFIRST|FR_LOG);
630 			*passp = pass;
631 		}
632 	}
633 	RWLOCK_EXIT(&ipf_frag);
634 	return fr;
635 }
636 
637 
638 /* ------------------------------------------------------------------------ */
639 /* Function:    fr_forget                                                   */
640 /* Returns:     Nil                                                         */
641 /* Parameters:  ptr(I) - pointer to data structure                          */
642 /*                                                                          */
643 /* Search through all of the fragment cache entries and wherever a pointer  */
644 /* is found to match ptr, reset it to NULL.                                 */
645 /* ------------------------------------------------------------------------ */
fr_forget(ptr)646 void fr_forget(ptr)
647 void *ptr;
648 {
649 	ipfr_t	*fr;
650 
651 	WRITE_ENTER(&ipf_frag);
652 	for (fr = ipfr_list; fr; fr = fr->ipfr_next)
653 		if (fr->ipfr_data == ptr)
654 			fr->ipfr_data = NULL;
655 	RWLOCK_EXIT(&ipf_frag);
656 }
657 
658 
659 /* ------------------------------------------------------------------------ */
660 /* Function:    fr_forgetnat                                                */
661 /* Returns:     Nil                                                         */
662 /* Parameters:  ptr(I) - pointer to data structure                          */
663 /*                                                                          */
664 /* Search through all of the fragment cache entries for NAT and wherever a  */
665 /* pointer  is found to match ptr, reset it to NULL.                        */
666 /* ------------------------------------------------------------------------ */
fr_forgetnat(ptr)667 void fr_forgetnat(ptr)
668 void *ptr;
669 {
670 	ipfr_t	*fr;
671 
672 	WRITE_ENTER(&ipf_natfrag);
673 	for (fr = ipfr_natlist; fr; fr = fr->ipfr_next)
674 		if (fr->ipfr_data == ptr)
675 			fr->ipfr_data = NULL;
676 	RWLOCK_EXIT(&ipf_natfrag);
677 }
678 
679 
680 /* ------------------------------------------------------------------------ */
681 /* Function:    fr_fragdelete                                               */
682 /* Returns:     Nil                                                         */
683 /* Parameters:  fra(I)   - pointer to fragment structure to delete          */
684 /*              tail(IO) - pointer to the pointer to the tail of the frag   */
685 /*                         list                                             */
686 /*                                                                          */
687 /* Remove a fragment cache table entry from the table & list.  Also free    */
688 /* the filter rule it is associated with it if it is no longer used as a    */
689 /* result of decreasing the reference count.                                */
690 /* ------------------------------------------------------------------------ */
fr_fragdelete(fra,tail)691 static void fr_fragdelete(fra, tail)
692 ipfr_t *fra, ***tail;
693 {
694 
695 	if (fra->ipfr_next)
696 		fra->ipfr_next->ipfr_prev = fra->ipfr_prev;
697 	*fra->ipfr_prev = fra->ipfr_next;
698 	if (*tail == &fra->ipfr_next)
699 		*tail = fra->ipfr_prev;
700 
701 	if (fra->ipfr_hnext)
702 		fra->ipfr_hnext->ipfr_hprev = fra->ipfr_hprev;
703 	*fra->ipfr_hprev = fra->ipfr_hnext;
704 
705 	if (fra->ipfr_rule != NULL) {
706 		(void) fr_derefrule(&fra->ipfr_rule);
707 	}
708 
709 	if (fra->ipfr_ref <= 0)
710 		fr_fragfree(fra);
711 }
712 
713 
714 /* ------------------------------------------------------------------------ */
715 /* Function:    fr_fragfree                                                 */
716 /* Returns:     Nil                                                         */
717 /* Parameters:  fra - pointer to frag structure to free                     */
718 /*                                                                          */
719 /* Take care of the details associated with deleting an entry from the frag */
720 /* cache.  Currently this just means bumping stats correctly after freeing  */
721 /* ------------------------------------------------------------------------ */
fr_fragfree(fra)722 static void fr_fragfree(fra)
723 ipfr_t *fra;
724 {
725 	KFREE(fra);
726 	ipfr_stats.ifs_expire++;
727 	ipfr_inuse--;
728 }
729 
730 
731 /* ------------------------------------------------------------------------ */
732 /* Function:    fr_fragclear                                                */
733 /* Returns:     Nil                                                         */
734 /* Parameters:  Nil                                                         */
735 /*                                                                          */
736 /* Free memory in use by fragment state information kept.  Do the normal    */
737 /* fragment state stuff first and then the NAT-fragment table.              */
738 /* ------------------------------------------------------------------------ */
fr_fragclear()739 void fr_fragclear()
740 {
741 	ipfr_t	*fra;
742 	nat_t	*nat;
743 
744 	WRITE_ENTER(&ipf_frag);
745 	while ((fra = ipfr_list) != NULL) {
746 		fra->ipfr_ref--;
747 		fr_fragdelete(fra, &ipfr_tail);
748 	}
749 	ipfr_tail = &ipfr_list;
750 	RWLOCK_EXIT(&ipf_frag);
751 
752 	WRITE_ENTER(&ipf_nat);
753 	WRITE_ENTER(&ipf_natfrag);
754 	while ((fra = ipfr_natlist) != NULL) {
755 		nat = fra->ipfr_data;
756 		if (nat != NULL) {
757 			if (nat->nat_data == fra)
758 				nat->nat_data = NULL;
759 		}
760 		fra->ipfr_ref--;
761 		fr_fragdelete(fra, &ipfr_nattail);
762 	}
763 	ipfr_nattail = &ipfr_natlist;
764 	RWLOCK_EXIT(&ipf_natfrag);
765 	RWLOCK_EXIT(&ipf_nat);
766 }
767 
768 
769 /* ------------------------------------------------------------------------ */
770 /* Function:    fr_fragexpire                                               */
771 /* Returns:     Nil                                                         */
772 /* Parameters:  Nil                                                         */
773 /*                                                                          */
774 /* Expire entries in the fragment cache table that have been there too long */
775 /* ------------------------------------------------------------------------ */
fr_fragexpire()776 void fr_fragexpire()
777 {
778 	ipfr_t	**fp, *fra;
779 	nat_t	*nat;
780 	SPL_INT(s);
781 
782 	if (fr_frag_lock)
783 		return;
784 
785 	SPL_NET(s);
786 	WRITE_ENTER(&ipf_frag);
787 	/*
788 	 * Go through the entire table, looking for entries to expire,
789 	 * which is indicated by the ttl being less than or equal to fr_ticks.
790 	 */
791 	for (fp = &ipfr_list; ((fra = *fp) != NULL); ) {
792 		if (fra->ipfr_ttl > fr_ticks)
793 			break;
794 		fra->ipfr_ref--;
795 		fr_fragdelete(fra, &ipfr_tail);
796 	}
797 	RWLOCK_EXIT(&ipf_frag);
798 
799 	WRITE_ENTER(&ipf_ipidfrag);
800 	for (fp = &ipfr_ipidlist; ((fra = *fp) != NULL); ) {
801 		if (fra->ipfr_ttl > fr_ticks)
802 			break;
803 		fra->ipfr_ref--;
804 		fr_fragdelete(fra, &ipfr_ipidtail);
805 	}
806 	RWLOCK_EXIT(&ipf_ipidfrag);
807 
808 	/*
809 	 * Same again for the NAT table, except that if the structure also
810 	 * still points to a NAT structure, and the NAT structure points back
811 	 * at the one to be free'd, NULL the reference from the NAT struct.
812 	 * NOTE: We need to grab both mutex's early, and in this order so as
813 	 * to prevent a deadlock if both try to expire at the same time.
814 	 * The extra if() statement here is because it locks out all NAT
815 	 * operations - no need to do that if there are no entries in this
816 	 * list, right?
817 	 */
818 	if (ipfr_natlist != NULL) {
819 		WRITE_ENTER(&ipf_nat);
820 		WRITE_ENTER(&ipf_natfrag);
821 		for (fp = &ipfr_natlist; ((fra = *fp) != NULL); ) {
822 			if (fra->ipfr_ttl > fr_ticks)
823 				break;
824 			nat = fra->ipfr_data;
825 			if (nat != NULL) {
826 				if (nat->nat_data == fra)
827 					nat->nat_data = NULL;
828 			}
829 			fra->ipfr_ref--;
830 			fr_fragdelete(fra, &ipfr_nattail);
831 		}
832 		RWLOCK_EXIT(&ipf_natfrag);
833 		RWLOCK_EXIT(&ipf_nat);
834 	}
835 	SPL_X(s);
836 }
837 
838 
839 /* ------------------------------------------------------------------------ */
840 /* Function:    fr_slowtimer                                                */
841 /* Returns:     Nil                                                         */
842 /* Parameters:  Nil                                                         */
843 /*                                                                          */
844 /* Slowly expire held state for fragments.  Timeouts are set * in           */
845 /* expectation of this being called twice per second.                       */
846 /* ------------------------------------------------------------------------ */
847 #if !defined(_KERNEL) || (!SOLARIS && !defined(__hpux) && !defined(__sgi) && \
848 			  !defined(__osf__) && !defined(linux))
849 # if defined(_KERNEL) && ((BSD >= 199103) || defined(__sgi))
fr_slowtimer(void * ptr)850 void fr_slowtimer __P((void *ptr))
851 # else
852 int fr_slowtimer()
853 # endif
854 {
855 	READ_ENTER(&ipf_global);
856 
857 	ipf_expiretokens();
858 	fr_fragexpire();
859 	fr_timeoutstate();
860 	fr_natexpire();
861 	fr_authexpire();
862 	fr_ticks++;
863 	if (fr_running <= 0)
864 		goto done;
865 # ifdef _KERNEL
866 #  if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000)
867 	callout_reset(&fr_slowtimer_ch, hz / 2, fr_slowtimer, NULL);
868 #  else
869 #   if defined(__OpenBSD__)
870 	timeout_add(&fr_slowtimer_ch, hz/2);
871 #   else
872 #    if (__FreeBSD_version >= 300000)
873 	fr_slowtimer_ch = timeout(fr_slowtimer, NULL, hz/2);
874 #    else
875 #     ifdef linux
876 	;
877 #     else
878 	timeout(fr_slowtimer, NULL, hz/2);
879 #     endif
880 #    endif /* FreeBSD */
881 #   endif /* OpenBSD */
882 #  endif /* NetBSD */
883 # endif
884 done:
885 	RWLOCK_EXIT(&ipf_global);
886 # if (BSD < 199103) || !defined(_KERNEL)
887 	return 0;
888 # endif
889 }
890 #endif /* !SOLARIS && !defined(__hpux) && !defined(__sgi) */
891 
892 
893 /* ------------------------------------------------------------------------ */
894 /* Function:    fr_nextfrag                                                 */
895 /* Returns:     int      - 0 == success, else error                         */
896 /* Parameters:  token(I) - pointer to token information for this caller     */
897 /*              itp(I)   - pointer to generic iterator from caller          */
898 /*              top(I)   - top of the fragment list                         */
899 /*              tail(I)  - tail of the fragment list                        */
900 /*              lock(I)  - fragment cache lock                              */
901 /*                                                                          */
902 /* This function is used to interate through the list of entries in the     */
903 /* fragment cache.  It increases the reference count on the one currently   */
904 /* being returned so that the caller can come back and resume from it later.*/
905 /*                                                                          */
906 /* This function is used for both the NAT fragment cache as well as the ipf */
907 /* fragment cache - hence the reason for passing in top, tail and lock.     */
908 /* ------------------------------------------------------------------------ */
fr_nextfrag(token,itp,top,tail,lock)909 int fr_nextfrag(token, itp, top, tail
910 #ifdef USE_MUTEXES
911 , lock
912 #endif
913 )
914 ipftoken_t *token;
915 ipfgeniter_t *itp;
916 ipfr_t **top, ***tail;
917 #ifdef USE_MUTEXES
918 ipfrwlock_t *lock;
919 #endif
920 {
921 	ipfr_t *frag, *next, zero;
922 	int error = 0;
923 
924 	frag = token->ipt_data;
925 	if (frag == (ipfr_t *)-1) {
926 		ipf_freetoken(token);
927 		return ESRCH;
928 	}
929 
930 	READ_ENTER(lock);
931 	if (frag == NULL)
932 		next = *top;
933 	else
934 		next = frag->ipfr_next;
935 
936 	if (next != NULL) {
937 		ATOMIC_INC(next->ipfr_ref);
938 		token->ipt_data = next;
939 	} else {
940 		bzero(&zero, sizeof(zero));
941 		next = &zero;
942 		token->ipt_data = NULL;
943 	}
944 	RWLOCK_EXIT(lock);
945 
946 	if (frag != NULL) {
947 #ifdef USE_MUTEXES
948 		fr_fragderef(&frag, lock);
949 #else
950 		fr_fragderef(&frag);
951 #endif
952 	}
953 
954 	error = COPYOUT(next, itp->igi_data, sizeof(*next));
955 	if (error != 0)
956 		error = EFAULT;
957 
958 	return error;
959 }
960 
961 
962 /* ------------------------------------------------------------------------ */
963 /* Function:    fr_fragderef                                                */
964 /* Returns:     Nil                                                         */
965 /* Parameters:  frp(IO) - pointer to fragment structure to deference        */
966 /*              lock(I) - lock associated with the fragment                 */
967 /*                                                                          */
968 /* This function dereferences a fragment structure (ipfr_t).  The pointer   */
969 /* passed in will always be reset back to NULL, even if the structure is    */
970 /* not freed, to enforce the notion that the caller is no longer entitled   */
971 /* to use the pointer it is dropping the reference to.                      */
972 /* ------------------------------------------------------------------------ */
fr_fragderef(frp,lock)973 void fr_fragderef(frp
974 #ifdef USE_MUTEXES
975 , lock
976 #endif
977 )
978 ipfr_t **frp;
979 #ifdef USE_MUTEXES
980 ipfrwlock_t *lock;
981 #endif
982 {
983 	ipfr_t *fra;
984 
985 	fra = *frp;
986 	*frp = NULL;
987 
988 	WRITE_ENTER(lock);
989 	fra->ipfr_ref--;
990 	if (fra->ipfr_ref <= 0)
991 		fr_fragfree(fra);
992 	RWLOCK_EXIT(lock);
993 }
994