1 /*	$OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $ */
2 
3 /*
4  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #ifdef __FreeBSD__
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_pf.h"
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD: stable/9/sys/contrib/pf/net/pf_norm.c 292731 2015-12-25 15:12:11Z kp $");
35 
36 #ifdef DEV_PFLOG
37 #define	NPFLOG	DEV_PFLOG
38 #else
39 #define	NPFLOG	0
40 #endif
41 #else
42 #include "pflog.h"
43 #endif
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/mbuf.h>
48 #include <sys/filio.h>
49 #include <sys/fcntl.h>
50 #include <sys/socket.h>
51 #include <sys/kernel.h>
52 #include <sys/time.h>
53 #ifndef __FreeBSD__
54 #include <sys/pool.h>
55 
56 #include <dev/rndvar.h>
57 #endif
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/bpf.h>
61 #include <net/route.h>
62 #include <net/if_pflog.h>
63 
64 #include <netinet/in.h>
65 #include <netinet/in_var.h>
66 #include <netinet/in_systm.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip_var.h>
69 #include <netinet/tcp.h>
70 #include <netinet/tcp_seq.h>
71 #include <netinet/udp.h>
72 #include <netinet/ip_icmp.h>
73 
74 #ifdef INET6
75 #include <netinet/ip6.h>
76 #endif /* INET6 */
77 
78 #include <net/pfvar.h>
79 
80 #ifndef __FreeBSD__
81 struct pf_frent {
82 	LIST_ENTRY(pf_frent) fr_next;
83 	struct ip *fr_ip;
84 	struct mbuf *fr_m;
85 };
86 
87 struct pf_frcache {
88 	LIST_ENTRY(pf_frcache) fr_next;
89 	uint16_t	fr_off;
90 	uint16_t	fr_end;
91 };
92 #endif
93 
94 #define PFFRAG_SEENLAST	0x0001		/* Seen the last fragment for this */
95 #define PFFRAG_NOBUFFER	0x0002		/* Non-buffering fragment cache */
96 #define PFFRAG_DROP	0x0004		/* Drop all fragments */
97 #define BUFFER_FRAGMENTS(fr)	(!((fr)->fr_flags & PFFRAG_NOBUFFER))
98 
99 #ifndef __FreeBSD__
100 struct pf_fragment {
101 	RB_ENTRY(pf_fragment) fr_entry;
102 	TAILQ_ENTRY(pf_fragment) frag_next;
103 	struct in_addr	fr_src;
104 	struct in_addr	fr_dst;
105 	u_int8_t	fr_p;		/* protocol of this fragment */
106 	u_int8_t	fr_flags;	/* status flags */
107 	u_int16_t	fr_id;		/* fragment id for reassemble */
108 	u_int16_t	fr_max;		/* fragment data max */
109 	u_int32_t	fr_timeout;
110 #define fr_queue	fr_u.fru_queue
111 #define fr_cache	fr_u.fru_cache
112 	union {
113 		LIST_HEAD(pf_fragq, pf_frent) fru_queue;	/* buffering */
114 		LIST_HEAD(pf_cacheq, pf_frcache) fru_cache;	/* non-buf */
115 	} fr_u;
116 };
117 #endif
118 
119 #ifdef __FreeBSD__
120 TAILQ_HEAD(pf_fragqueue, pf_fragment);
121 TAILQ_HEAD(pf_cachequeue, pf_fragment);
122 VNET_DEFINE(struct pf_fragqueue,	pf_fragqueue);
123 #define	V_pf_fragqueue			VNET(pf_fragqueue)
124 VNET_DEFINE(struct pf_cachequeue,	pf_cachequeue);
125 #define	V_pf_cachequeue			VNET(pf_cachequeue)
126 #else
127 TAILQ_HEAD(pf_fragqueue, pf_fragment)	pf_fragqueue;
128 TAILQ_HEAD(pf_cachequeue, pf_fragment)	pf_cachequeue;
129 #endif
130 
131 #ifndef __FreeBSD__
132 static __inline int	 pf_frag_compare(struct pf_fragment *,
133 			    struct pf_fragment *);
134 #else
135 static int		 pf_frag_compare(struct pf_fragment *,
136 			    struct pf_fragment *);
137 #endif
138 
139 #ifdef __FreeBSD__
140 RB_HEAD(pf_frag_tree, pf_fragment);
141 VNET_DEFINE(struct pf_frag_tree,	pf_frag_tree);
142 #define	V_pf_frag_tree			VNET(pf_frag_tree)
143 VNET_DEFINE(struct pf_frag_tree,	pf_cache_tree);
144 #define	V_pf_cache_tree			VNET(pf_cache_tree)
145 #else
146 RB_HEAD(pf_frag_tree, pf_fragment)	pf_frag_tree, pf_cache_tree;
147 #endif
148 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
149 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
150 
151 /* Private prototypes */
152 void			 pf_ip2key(struct pf_fragment *, struct ip *);
153 void			 pf_remove_fragment(struct pf_fragment *);
154 void			 pf_flush_fragments(void);
155 void			 pf_free_fragment(struct pf_fragment *);
156 struct pf_fragment	*pf_find_fragment(struct ip *, struct pf_frag_tree *);
157 struct mbuf		*pf_reassemble(struct mbuf **, struct pf_fragment **,
158 			    struct pf_frent *, int);
159 struct mbuf		*pf_fragcache(struct mbuf **, struct ip*,
160 			    struct pf_fragment **, int, int, int *);
161 int			 pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
162 			    struct tcphdr *, int, sa_family_t);
163 void			 pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t,
164 			    u_int8_t);
165 #ifdef INET6
166 void			 pf_scrub_ip6(struct mbuf **, u_int8_t);
167 #endif
168 #ifdef __FreeBSD__
169 #define	DPFPRINTF(x) do {				\
170 	if (V_pf_status.debug >= PF_DEBUG_MISC) {	\
171 		printf("%s: ", __func__);		\
172 		printf x ;				\
173 	}						\
174 } while(0)
175 #else
176 #define	DPFPRINTF(x) do {				\
177 	if (pf_status.debug >= PF_DEBUG_MISC) {		\
178 		printf("%s: ", __func__);		\
179 		printf x ;				\
180 	}						\
181 } while(0)
182 #endif
183 
184 /* Globals */
185 #ifdef __FreeBSD__
186 VNET_DEFINE(uma_zone_t,		pf_frent_pl);
187 VNET_DEFINE(uma_zone_t,		pf_frag_pl);
188 VNET_DEFINE(uma_zone_t,		pf_cache_pl);
189 VNET_DEFINE(uma_zone_t,		pf_cent_pl);
190 VNET_DEFINE(uma_zone_t,		pf_state_scrub_pl);
191 
192 VNET_DEFINE(int,		pf_nfrents);
193 #define	V_pf_nfrents		VNET(pf_nfrents)
194 VNET_DEFINE(int,		pf_ncache);
195 #define	V_pf_ncache		VNET(pf_ncache)
196 #else
197 struct pool		 pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
198 struct pool		 pf_state_scrub_pl;
199 int			 pf_nfrents, pf_ncache;
200 #endif
201 
202 void
pf_normalize_init(void)203 pf_normalize_init(void)
204 {
205 #ifdef __FreeBSD__
206 	/*
207 	 * XXX
208 	 * No high water mark support(It's hint not hard limit).
209 	 * uma_zone_set_max(pf_frag_pl, PFFRAG_FRAG_HIWAT);
210 	 */
211 	uma_zone_set_max(V_pf_frent_pl, PFFRAG_FRENT_HIWAT);
212 	uma_zone_set_max(V_pf_cache_pl, PFFRAG_FRCACHE_HIWAT);
213 	uma_zone_set_max(V_pf_cent_pl, PFFRAG_FRCENT_HIWAT);
214 #else
215 	pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
216 	    NULL);
217 	pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
218 	    NULL);
219 	pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
220 	    "pffrcache", NULL);
221 	pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
222 	    NULL);
223 	pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
224 	    "pfstscr", NULL);
225 
226 	pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
227 	pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
228 	pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
229 	pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
230 #endif
231 
232 #ifdef __FreeBSD__
233 	TAILQ_INIT(&V_pf_fragqueue);
234 	TAILQ_INIT(&V_pf_cachequeue);
235 #else
236 	TAILQ_INIT(&pf_fragqueue);
237 	TAILQ_INIT(&pf_cachequeue);
238 #endif
239 }
240 
241 #ifdef __FreeBSD__
242 static int
243 #else
244 static __inline int
245 #endif
pf_frag_compare(struct pf_fragment * a,struct pf_fragment * b)246 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
247 {
248 	int	diff;
249 
250 	if ((diff = a->fr_id - b->fr_id))
251 		return (diff);
252 	else if ((diff = a->fr_p - b->fr_p))
253 		return (diff);
254 	else if (a->fr_src.s_addr < b->fr_src.s_addr)
255 		return (-1);
256 	else if (a->fr_src.s_addr > b->fr_src.s_addr)
257 		return (1);
258 	else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
259 		return (-1);
260 	else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
261 		return (1);
262 	return (0);
263 }
264 
265 void
pf_purge_expired_fragments(void)266 pf_purge_expired_fragments(void)
267 {
268 	struct pf_fragment	*frag;
269 #ifdef __FreeBSD__
270 	u_int32_t		 expire = time_second -
271 				    V_pf_default_rule.timeout[PFTM_FRAG];
272 #else
273 	u_int32_t		 expire = time_second -
274 				    pf_default_rule.timeout[PFTM_FRAG];
275 #endif
276 
277 #ifdef __FreeBSD__
278 	while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) {
279 		KASSERT((BUFFER_FRAGMENTS(frag)),
280 		    ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__));
281 #else
282 	while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
283 		KASSERT(BUFFER_FRAGMENTS(frag));
284 #endif
285 		if (frag->fr_timeout > expire)
286 			break;
287 
288 		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
289 		pf_free_fragment(frag);
290 	}
291 
292 #ifdef __FreeBSD__
293 	while ((frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue)) != NULL) {
294 		KASSERT((!BUFFER_FRAGMENTS(frag)),
295 		    ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__));
296 #else
297 	while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
298 		KASSERT(!BUFFER_FRAGMENTS(frag));
299 #endif
300 		if (frag->fr_timeout > expire)
301 			break;
302 
303 		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
304 		pf_free_fragment(frag);
305 #ifdef __FreeBSD__
306 		KASSERT((TAILQ_EMPTY(&V_pf_cachequeue) ||
307 		    TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue) != frag),
308 		    ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s",
309 		    __FUNCTION__));
310 #else
311 		KASSERT(TAILQ_EMPTY(&pf_cachequeue) ||
312 		    TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
313 #endif
314 	}
315 }
316 
317 /*
318  * Try to flush old fragments to make space for new ones
319  */
320 
321 void
322 pf_flush_fragments(void)
323 {
324 	struct pf_fragment	*frag;
325 	int			 goal;
326 
327 #ifdef __FreeBSD__
328 	goal = V_pf_nfrents * 9 / 10;
329 	DPFPRINTF(("trying to free > %d frents\n",
330 	    V_pf_nfrents - goal));
331 	while (goal < V_pf_nfrents) {
332 #else
333 	goal = pf_nfrents * 9 / 10;
334 	DPFPRINTF(("trying to free > %d frents\n",
335 	    pf_nfrents - goal));
336 	while (goal < pf_nfrents) {
337 #endif
338 #ifdef __FreeBSD__
339 		frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue);
340 #else
341 		frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
342 #endif
343 		if (frag == NULL)
344 			break;
345 		pf_free_fragment(frag);
346 	}
347 
348 
349 #ifdef __FreeBSD__
350 	goal = V_pf_ncache * 9 / 10;
351 	DPFPRINTF(("trying to free > %d cache entries\n",
352 	    V_pf_ncache - goal));
353 	while (goal < V_pf_ncache) {
354 #else
355 	goal = pf_ncache * 9 / 10;
356 	DPFPRINTF(("trying to free > %d cache entries\n",
357 	    pf_ncache - goal));
358 	while (goal < pf_ncache) {
359 #endif
360 #ifdef __FreeBSD__
361 		frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue);
362 #else
363 		frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
364 #endif
365 		if (frag == NULL)
366 			break;
367 		pf_free_fragment(frag);
368 	}
369 }
370 
371 /* Frees the fragments and all associated entries */
372 
373 void
374 pf_free_fragment(struct pf_fragment *frag)
375 {
376 	struct pf_frent		*frent;
377 	struct pf_frcache	*frcache;
378 
379 	/* Free all fragments */
380 	if (BUFFER_FRAGMENTS(frag)) {
381 		for (frent = LIST_FIRST(&frag->fr_queue); frent;
382 		    frent = LIST_FIRST(&frag->fr_queue)) {
383 			LIST_REMOVE(frent, fr_next);
384 
385 			m_freem(frent->fr_m);
386 #ifdef __FreeBSD__
387 			pool_put(&V_pf_frent_pl, frent);
388 			V_pf_nfrents--;
389 #else
390 			pool_put(&pf_frent_pl, frent);
391 			pf_nfrents--;
392 #endif
393 		}
394 	} else {
395 		for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
396 		    frcache = LIST_FIRST(&frag->fr_cache)) {
397 			LIST_REMOVE(frcache, fr_next);
398 
399 #ifdef __FreeBSD__
400 			KASSERT((LIST_EMPTY(&frag->fr_cache) ||
401 			    LIST_FIRST(&frag->fr_cache)->fr_off >
402 			    frcache->fr_end),
403 			    ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >"
404 			      " frcache->fr_end): %s", __FUNCTION__));
405 
406 			pool_put(&V_pf_cent_pl, frcache);
407 			V_pf_ncache--;
408 #else
409 			KASSERT(LIST_EMPTY(&frag->fr_cache) ||
410 			    LIST_FIRST(&frag->fr_cache)->fr_off >
411 			    frcache->fr_end);
412 
413 			pool_put(&pf_cent_pl, frcache);
414 			pf_ncache--;
415 #endif
416 		}
417 	}
418 
419 	pf_remove_fragment(frag);
420 }
421 
422 void
423 pf_ip2key(struct pf_fragment *key, struct ip *ip)
424 {
425 	key->fr_p = ip->ip_p;
426 	key->fr_id = ip->ip_id;
427 	key->fr_src.s_addr = ip->ip_src.s_addr;
428 	key->fr_dst.s_addr = ip->ip_dst.s_addr;
429 }
430 
431 struct pf_fragment *
432 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
433 {
434 	struct pf_fragment	 key;
435 	struct pf_fragment	*frag;
436 
437 	pf_ip2key(&key, ip);
438 
439 	frag = RB_FIND(pf_frag_tree, tree, &key);
440 	if (frag != NULL) {
441 		/* XXX Are we sure we want to update the timeout? */
442 		frag->fr_timeout = time_second;
443 		if (BUFFER_FRAGMENTS(frag)) {
444 #ifdef __FreeBSD__
445 			TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
446 			TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
447 #else
448 			TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
449 			TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
450 #endif
451 		} else {
452 #ifdef __FreeBSD__
453 			TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
454 			TAILQ_INSERT_HEAD(&V_pf_cachequeue, frag, frag_next);
455 #else
456 			TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
457 			TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
458 #endif
459 		}
460 	}
461 
462 	return (frag);
463 }
464 
465 /* Removes a fragment from the fragment queue and frees the fragment */
466 
467 void
468 pf_remove_fragment(struct pf_fragment *frag)
469 {
470 	if (BUFFER_FRAGMENTS(frag)) {
471 #ifdef __FreeBSD__
472 		RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag);
473 		TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
474 		pool_put(&V_pf_frag_pl, frag);
475 #else
476 		RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
477 		TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
478 		pool_put(&pf_frag_pl, frag);
479 #endif
480 	} else {
481 #ifdef __FreeBSD__
482 		RB_REMOVE(pf_frag_tree, &V_pf_cache_tree, frag);
483 		TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
484 		pool_put(&V_pf_cache_pl, frag);
485 #else
486 		RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
487 		TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
488 		pool_put(&pf_cache_pl, frag);
489 #endif
490 	}
491 }
492 
493 #define FR_IP_OFF(fr)	((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
494 struct mbuf *
495 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
496     struct pf_frent *frent, int mff)
497 {
498 	struct mbuf	*m = *m0, *m2;
499 	struct pf_frent	*frea, *next;
500 	struct pf_frent	*frep = NULL;
501 	struct ip	*ip = frent->fr_ip;
502 	int		 hlen = ip->ip_hl << 2;
503 	u_int16_t	 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
504 	u_int16_t	 ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
505 	u_int16_t	 max = ip_len + off;
506 
507 #ifdef __FreeBSD__
508 	KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)),
509 	    ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
510 #else
511 	KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag));
512 #endif
513 
514 	/* Strip off ip header */
515 	m->m_data += hlen;
516 	m->m_len -= hlen;
517 
518 	/* Create a new reassembly queue for this packet */
519 	if (*frag == NULL) {
520 #ifdef __FreeBSD__
521 		*frag = pool_get(&V_pf_frag_pl, PR_NOWAIT);
522 #else
523 		*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
524 #endif
525 		if (*frag == NULL) {
526 			pf_flush_fragments();
527 #ifdef __FreeBSD__
528 			*frag = pool_get(&V_pf_frag_pl, PR_NOWAIT);
529 #else
530 			*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
531 #endif
532 			if (*frag == NULL)
533 				goto drop_fragment;
534 		}
535 
536 		(*frag)->fr_flags = 0;
537 		(*frag)->fr_max = 0;
538 		(*frag)->fr_src = frent->fr_ip->ip_src;
539 		(*frag)->fr_dst = frent->fr_ip->ip_dst;
540 		(*frag)->fr_p = frent->fr_ip->ip_p;
541 		(*frag)->fr_id = frent->fr_ip->ip_id;
542 		(*frag)->fr_timeout = time_second;
543 		LIST_INIT(&(*frag)->fr_queue);
544 
545 #ifdef __FreeBSD__
546 		RB_INSERT(pf_frag_tree, &V_pf_frag_tree, *frag);
547 		TAILQ_INSERT_HEAD(&V_pf_fragqueue, *frag, frag_next);
548 #else
549 		RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
550 		TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
551 #endif
552 
553 		/* We do not have a previous fragment */
554 		frep = NULL;
555 		goto insert;
556 	}
557 
558 	/*
559 	 * Find a fragment after the current one:
560 	 *  - off contains the real shifted offset.
561 	 */
562 	LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
563 		if (FR_IP_OFF(frea) > off)
564 			break;
565 		frep = frea;
566 	}
567 
568 #ifdef __FreeBSD__
569 	KASSERT((frep != NULL || frea != NULL),
570 	    ("!(frep != NULL || frea != NULL): %s", __FUNCTION__));;
571 #else
572 	KASSERT(frep != NULL || frea != NULL);
573 #endif
574 
575 	if (frep != NULL &&
576 	    FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
577 	    4 > off)
578 	{
579 		u_int16_t	precut;
580 
581 		precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
582 		    frep->fr_ip->ip_hl * 4 - off;
583 		if (precut >= ip_len)
584 			goto drop_fragment;
585 		m_adj(frent->fr_m, precut);
586 		DPFPRINTF(("overlap -%d\n", precut));
587 		/* Enforce 8 byte boundaries */
588 		ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
589 		off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
590 		ip_len -= precut;
591 		ip->ip_len = htons(ip_len);
592 	}
593 
594 	for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
595 	    frea = next)
596 	{
597 		u_int16_t	aftercut;
598 
599 		aftercut = ip_len + off - FR_IP_OFF(frea);
600 		DPFPRINTF(("adjust overlap %d\n", aftercut));
601 		if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
602 		    * 4)
603 		{
604 			frea->fr_ip->ip_len =
605 			    htons(ntohs(frea->fr_ip->ip_len) - aftercut);
606 			frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
607 			    (aftercut >> 3));
608 			m_adj(frea->fr_m, aftercut);
609 			break;
610 		}
611 
612 		/* This fragment is completely overlapped, lose it */
613 		next = LIST_NEXT(frea, fr_next);
614 		m_freem(frea->fr_m);
615 		LIST_REMOVE(frea, fr_next);
616 #ifdef __FreeBSD__
617 		pool_put(&V_pf_frent_pl, frea);
618 		V_pf_nfrents--;
619 #else
620 		pool_put(&pf_frent_pl, frea);
621 		pf_nfrents--;
622 #endif
623 	}
624 
625  insert:
626 	/* Update maximum data size */
627 	if ((*frag)->fr_max < max)
628 		(*frag)->fr_max = max;
629 	/* This is the last segment */
630 	if (!mff)
631 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
632 
633 	if (frep == NULL)
634 		LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
635 	else
636 		LIST_INSERT_AFTER(frep, frent, fr_next);
637 
638 	/* Check if we are completely reassembled */
639 	if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
640 		return (NULL);
641 
642 	/* Check if we have all the data */
643 	off = 0;
644 	for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
645 		next = LIST_NEXT(frep, fr_next);
646 
647 		off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
648 		if (off < (*frag)->fr_max &&
649 		    (next == NULL || FR_IP_OFF(next) != off))
650 		{
651 			DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
652 			    off, next == NULL ? -1 : FR_IP_OFF(next),
653 			    (*frag)->fr_max));
654 			return (NULL);
655 		}
656 	}
657 	DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
658 	if (off < (*frag)->fr_max)
659 		return (NULL);
660 
661 	/* We have all the data */
662 	frent = LIST_FIRST(&(*frag)->fr_queue);
663 #ifdef __FreeBSD__
664 	KASSERT((frent != NULL), ("frent == NULL: %s", __FUNCTION__));
665 #else
666 	KASSERT(frent != NULL);
667 #endif
668 	if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
669 		DPFPRINTF(("drop: too big: %d\n", off));
670 		pf_free_fragment(*frag);
671 		*frag = NULL;
672 		return (NULL);
673 	}
674 	next = LIST_NEXT(frent, fr_next);
675 
676 	/* Magic from ip_input */
677 	ip = frent->fr_ip;
678 	m = frent->fr_m;
679 	m2 = m->m_next;
680 	m->m_next = NULL;
681 	m_cat(m, m2);
682 #ifdef __FreeBSD__
683 	pool_put(&V_pf_frent_pl, frent);
684 	V_pf_nfrents--;
685 #else
686 	pool_put(&pf_frent_pl, frent);
687 	pf_nfrents--;
688 #endif
689 	for (frent = next; frent != NULL; frent = next) {
690 		next = LIST_NEXT(frent, fr_next);
691 
692 		m2 = frent->fr_m;
693 #ifdef __FreeBSD__
694 		pool_put(&V_pf_frent_pl, frent);
695 		V_pf_nfrents--;
696 #else
697 		pool_put(&pf_frent_pl, frent);
698 		pf_nfrents--;
699 #endif
700 #ifdef __FreeBSD__
701 		m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags;
702 		m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data;
703 #endif
704 		m_cat(m, m2);
705 	}
706 
707 #ifdef __FreeBSD__
708 	while (m->m_pkthdr.csum_data & 0xffff0000)
709 		m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
710 		    (m->m_pkthdr.csum_data >> 16);
711 #endif
712 	ip->ip_src = (*frag)->fr_src;
713 	ip->ip_dst = (*frag)->fr_dst;
714 
715 	/* Remove from fragment queue */
716 	pf_remove_fragment(*frag);
717 	*frag = NULL;
718 
719 	hlen = ip->ip_hl << 2;
720 	ip->ip_len = htons(off + hlen);
721 	m->m_len += hlen;
722 	m->m_data -= hlen;
723 
724 	/* some debugging cruft by sklower, below, will go away soon */
725 	/* XXX this should be done elsewhere */
726 	if (m->m_flags & M_PKTHDR) {
727 		int plen = 0;
728 		for (m2 = m; m2; m2 = m2->m_next)
729 			plen += m2->m_len;
730 		m->m_pkthdr.len = plen;
731 	}
732 
733 	DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
734 	return (m);
735 
736  drop_fragment:
737 	/* Oops - fail safe - drop packet */
738 #ifdef __FreeBSD__
739 	pool_put(&V_pf_frent_pl, frent);
740 	V_pf_nfrents--;
741 #else
742 	pool_put(&pf_frent_pl, frent);
743 	pf_nfrents--;
744 #endif
745 	m_freem(m);
746 	return (NULL);
747 }
748 
749 struct mbuf *
750 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
751     int drop, int *nomem)
752 {
753 	struct mbuf		*m = *m0;
754 	struct pf_frcache	*frp, *fra, *cur = NULL;
755 	int			 ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
756 	u_int16_t		 off = ntohs(h->ip_off) << 3;
757 	u_int16_t		 max = ip_len + off;
758 	int			 hosed = 0;
759 
760 #ifdef __FreeBSD__
761 	KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)),
762 	    ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
763 #else
764 	KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
765 #endif
766 
767 	/* Create a new range queue for this packet */
768 	if (*frag == NULL) {
769 #ifdef __FreeBSD__
770 		*frag = pool_get(&V_pf_cache_pl, PR_NOWAIT);
771 #else
772 		*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
773 #endif
774 		if (*frag == NULL) {
775 			pf_flush_fragments();
776 #ifdef __FreeBSD__
777 			*frag = pool_get(&V_pf_cache_pl, PR_NOWAIT);
778 #else
779 			*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
780 #endif
781 			if (*frag == NULL)
782 				goto no_mem;
783 		}
784 
785 		/* Get an entry for the queue */
786 #ifdef __FreeBSD__
787 		cur = pool_get(&V_pf_cent_pl, PR_NOWAIT);
788 		if (cur == NULL) {
789 			pool_put(&V_pf_cache_pl, *frag);
790 #else
791 		cur = pool_get(&pf_cent_pl, PR_NOWAIT);
792 		if (cur == NULL) {
793 			pool_put(&pf_cache_pl, *frag);
794 #endif
795 			*frag = NULL;
796 			goto no_mem;
797 		}
798 #ifdef __FreeBSD__
799 		V_pf_ncache++;
800 #else
801 		pf_ncache++;
802 #endif
803 
804 		(*frag)->fr_flags = PFFRAG_NOBUFFER;
805 		(*frag)->fr_max = 0;
806 		(*frag)->fr_src = h->ip_src;
807 		(*frag)->fr_dst = h->ip_dst;
808 		(*frag)->fr_p = h->ip_p;
809 		(*frag)->fr_id = h->ip_id;
810 		(*frag)->fr_timeout = time_second;
811 
812 		cur->fr_off = off;
813 		cur->fr_end = max;
814 		LIST_INIT(&(*frag)->fr_cache);
815 		LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
816 
817 #ifdef __FreeBSD__
818 		RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag);
819 		TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next);
820 #else
821 		RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
822 		TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
823 #endif
824 
825 		DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
826 
827 		goto pass;
828 	}
829 
830 	/*
831 	 * Find a fragment after the current one:
832 	 *  - off contains the real shifted offset.
833 	 */
834 	frp = NULL;
835 	LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
836 		if (fra->fr_off > off)
837 			break;
838 		frp = fra;
839 	}
840 
841 #ifdef __FreeBSD__
842 	KASSERT((frp != NULL || fra != NULL),
843 	    ("!(frp != NULL || fra != NULL): %s", __FUNCTION__));
844 #else
845 	KASSERT(frp != NULL || fra != NULL);
846 #endif
847 
848 	if (frp != NULL) {
849 		int	precut;
850 
851 		precut = frp->fr_end - off;
852 		if (precut >= ip_len) {
853 			/* Fragment is entirely a duplicate */
854 			DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
855 			    h->ip_id, frp->fr_off, frp->fr_end, off, max));
856 			goto drop_fragment;
857 		}
858 		if (precut == 0) {
859 			/* They are adjacent.  Fixup cache entry */
860 			DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
861 			    h->ip_id, frp->fr_off, frp->fr_end, off, max));
862 			frp->fr_end = max;
863 		} else if (precut > 0) {
864 			/* The first part of this payload overlaps with a
865 			 * fragment that has already been passed.
866 			 * Need to trim off the first part of the payload.
867 			 * But to do so easily, we need to create another
868 			 * mbuf to throw the original header into.
869 			 */
870 
871 			DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
872 			    h->ip_id, precut, frp->fr_off, frp->fr_end, off,
873 			    max));
874 
875 			off += precut;
876 			max -= precut;
877 			/* Update the previous frag to encompass this one */
878 			frp->fr_end = max;
879 
880 			if (!drop) {
881 				/* XXX Optimization opportunity
882 				 * This is a very heavy way to trim the payload.
883 				 * we could do it much faster by diddling mbuf
884 				 * internals but that would be even less legible
885 				 * than this mbuf magic.  For my next trick,
886 				 * I'll pull a rabbit out of my laptop.
887 				 */
888 #ifdef __FreeBSD__
889 				*m0 = m_dup(m, M_DONTWAIT);
890 #else
891 				*m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT);
892 #endif
893 				if (*m0 == NULL)
894 					goto no_mem;
895 #ifdef __FreeBSD__
896 				/* From KAME Project : We have missed this! */
897 				m_adj(*m0, (h->ip_hl << 2) -
898 				    (*m0)->m_pkthdr.len);
899 
900 				KASSERT(((*m0)->m_next == NULL),
901 				    ("(*m0)->m_next != NULL: %s",
902 				    __FUNCTION__));
903 #else
904 				KASSERT((*m0)->m_next == NULL);
905 #endif
906 				m_adj(m, precut + (h->ip_hl << 2));
907 				m_cat(*m0, m);
908 				m = *m0;
909 				if (m->m_flags & M_PKTHDR) {
910 					int plen = 0;
911 					struct mbuf *t;
912 					for (t = m; t; t = t->m_next)
913 						plen += t->m_len;
914 					m->m_pkthdr.len = plen;
915 				}
916 
917 
918 				h = mtod(m, struct ip *);
919 
920 #ifdef __FreeBSD__
921 				KASSERT(((int)m->m_len ==
922 				    ntohs(h->ip_len) - precut),
923 				    ("m->m_len != ntohs(h->ip_len) - precut: %s",
924 				    __FUNCTION__));
925 #else
926 				KASSERT((int)m->m_len ==
927 				    ntohs(h->ip_len) - precut);
928 #endif
929 				h->ip_off = htons(ntohs(h->ip_off) +
930 				    (precut >> 3));
931 				h->ip_len = htons(ntohs(h->ip_len) - precut);
932 			} else {
933 				hosed++;
934 			}
935 		} else {
936 			/* There is a gap between fragments */
937 
938 			DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
939 			    h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
940 			    max));
941 
942 #ifdef __FreeBSD__
943 			cur = pool_get(&V_pf_cent_pl, PR_NOWAIT);
944 #else
945 			cur = pool_get(&pf_cent_pl, PR_NOWAIT);
946 #endif
947 			if (cur == NULL)
948 				goto no_mem;
949 #ifdef __FreeBSD__
950 			V_pf_ncache++;
951 #else
952 			pf_ncache++;
953 #endif
954 
955 			cur->fr_off = off;
956 			cur->fr_end = max;
957 			LIST_INSERT_AFTER(frp, cur, fr_next);
958 		}
959 	}
960 
961 	if (fra != NULL) {
962 		int	aftercut;
963 		int	merge = 0;
964 
965 		aftercut = max - fra->fr_off;
966 		if (aftercut == 0) {
967 			/* Adjacent fragments */
968 			DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
969 			    h->ip_id, off, max, fra->fr_off, fra->fr_end));
970 			fra->fr_off = off;
971 			merge = 1;
972 		} else if (aftercut > 0) {
973 			/* Need to chop off the tail of this fragment */
974 			DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
975 			    h->ip_id, aftercut, off, max, fra->fr_off,
976 			    fra->fr_end));
977 			fra->fr_off = off;
978 			max -= aftercut;
979 
980 			merge = 1;
981 
982 			if (!drop) {
983 				m_adj(m, -aftercut);
984 				if (m->m_flags & M_PKTHDR) {
985 					int plen = 0;
986 					struct mbuf *t;
987 					for (t = m; t; t = t->m_next)
988 						plen += t->m_len;
989 					m->m_pkthdr.len = plen;
990 				}
991 				h = mtod(m, struct ip *);
992 #ifdef __FreeBSD__
993 				KASSERT(((int)m->m_len == ntohs(h->ip_len) - aftercut),
994 				    ("m->m_len != ntohs(h->ip_len) - aftercut: %s",
995 				    __FUNCTION__));
996 #else
997 				KASSERT((int)m->m_len ==
998 				    ntohs(h->ip_len) - aftercut);
999 #endif
1000 				h->ip_len = htons(ntohs(h->ip_len) - aftercut);
1001 			} else {
1002 				hosed++;
1003 			}
1004 		} else if (frp == NULL) {
1005 			/* There is a gap between fragments */
1006 			DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
1007 			    h->ip_id, -aftercut, off, max, fra->fr_off,
1008 			    fra->fr_end));
1009 
1010 #ifdef __FreeBSD__
1011 			cur = pool_get(&V_pf_cent_pl, PR_NOWAIT);
1012 #else
1013 			cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1014 #endif
1015 			if (cur == NULL)
1016 				goto no_mem;
1017 #ifdef __FreeBSD__
1018 			V_pf_ncache++;
1019 #else
1020 			pf_ncache++;
1021 #endif
1022 
1023 			cur->fr_off = off;
1024 			cur->fr_end = max;
1025 			LIST_INSERT_BEFORE(fra, cur, fr_next);
1026 		}
1027 
1028 
1029 		/* Need to glue together two separate fragment descriptors */
1030 		if (merge) {
1031 			if (cur && fra->fr_off <= cur->fr_end) {
1032 				/* Need to merge in a previous 'cur' */
1033 				DPFPRINTF(("fragcache[%d]: adjacent(merge "
1034 				    "%d-%d) %d-%d (%d-%d)\n",
1035 				    h->ip_id, cur->fr_off, cur->fr_end, off,
1036 				    max, fra->fr_off, fra->fr_end));
1037 				fra->fr_off = cur->fr_off;
1038 				LIST_REMOVE(cur, fr_next);
1039 #ifdef __FreeBSD__
1040 				pool_put(&V_pf_cent_pl, cur);
1041 				V_pf_ncache--;
1042 #else
1043 				pool_put(&pf_cent_pl, cur);
1044 				pf_ncache--;
1045 #endif
1046 				cur = NULL;
1047 
1048 			} else if (frp && fra->fr_off <= frp->fr_end) {
1049 				/* Need to merge in a modified 'frp' */
1050 #ifdef __FreeBSD__
1051 				KASSERT((cur == NULL), ("cur != NULL: %s",
1052 				    __FUNCTION__));
1053 #else
1054 				KASSERT(cur == NULL);
1055 #endif
1056 				DPFPRINTF(("fragcache[%d]: adjacent(merge "
1057 				    "%d-%d) %d-%d (%d-%d)\n",
1058 				    h->ip_id, frp->fr_off, frp->fr_end, off,
1059 				    max, fra->fr_off, fra->fr_end));
1060 				fra->fr_off = frp->fr_off;
1061 				LIST_REMOVE(frp, fr_next);
1062 #ifdef __FreeBSD__
1063 				pool_put(&V_pf_cent_pl, frp);
1064 				V_pf_ncache--;
1065 #else
1066 				pool_put(&pf_cent_pl, frp);
1067 				pf_ncache--;
1068 #endif
1069 				frp = NULL;
1070 
1071 			}
1072 		}
1073 	}
1074 
1075 	if (hosed) {
1076 		/*
1077 		 * We must keep tracking the overall fragment even when
1078 		 * we're going to drop it anyway so that we know when to
1079 		 * free the overall descriptor.  Thus we drop the frag late.
1080 		 */
1081 		goto drop_fragment;
1082 	}
1083 
1084 
1085  pass:
1086 	/* Update maximum data size */
1087 	if ((*frag)->fr_max < max)
1088 		(*frag)->fr_max = max;
1089 
1090 	/* This is the last segment */
1091 	if (!mff)
1092 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
1093 
1094 	/* Check if we are completely reassembled */
1095 	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1096 	    LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
1097 	    LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
1098 		/* Remove from fragment queue */
1099 		DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
1100 		    (*frag)->fr_max));
1101 		pf_free_fragment(*frag);
1102 		*frag = NULL;
1103 	}
1104 
1105 	return (m);
1106 
1107  no_mem:
1108 	*nomem = 1;
1109 
1110 	/* Still need to pay attention to !IP_MF */
1111 	if (!mff && *frag != NULL)
1112 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
1113 
1114 	m_freem(m);
1115 	return (NULL);
1116 
1117  drop_fragment:
1118 
1119 	/* Still need to pay attention to !IP_MF */
1120 	if (!mff && *frag != NULL)
1121 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
1122 
1123 	if (drop) {
1124 		/* This fragment has been deemed bad.  Don't reass */
1125 		if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
1126 			DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
1127 			    h->ip_id));
1128 		(*frag)->fr_flags |= PFFRAG_DROP;
1129 	}
1130 
1131 	m_freem(m);
1132 	return (NULL);
1133 }
1134 
1135 #ifdef INET
1136 int
1137 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
1138     struct pf_pdesc *pd)
1139 {
1140 	struct mbuf		*m = *m0;
1141 	struct pf_rule		*r;
1142 	struct pf_frent		*frent;
1143 	struct pf_fragment	*frag = NULL;
1144 	struct ip		*h = mtod(m, struct ip *);
1145 	int			 mff = (ntohs(h->ip_off) & IP_MF);
1146 	int			 hlen = h->ip_hl << 2;
1147 	u_int16_t		 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1148 	u_int16_t		 max;
1149 	int			 ip_len;
1150 	int			 ip_off;
1151 	int			 tag = -1;
1152 
1153 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1154 	while (r != NULL) {
1155 		r->evaluations++;
1156 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
1157 			r = r->skip[PF_SKIP_IFP].ptr;
1158 		else if (r->direction && r->direction != dir)
1159 			r = r->skip[PF_SKIP_DIR].ptr;
1160 		else if (r->af && r->af != AF_INET)
1161 			r = r->skip[PF_SKIP_AF].ptr;
1162 		else if (r->proto && r->proto != h->ip_p)
1163 			r = r->skip[PF_SKIP_PROTO].ptr;
1164 		else if (PF_MISMATCHAW(&r->src.addr,
1165 		    (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
1166 		    r->src.neg, kif, M_GETFIB(m)))
1167 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1168 		else if (PF_MISMATCHAW(&r->dst.addr,
1169 		    (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
1170 		    r->dst.neg, NULL, M_GETFIB(m)))
1171 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
1172 #ifdef __FreeBSD__
1173 		else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag))
1174 #else
1175 		else if (r->match_tag && !pf_match_tag(m, r, &tag))
1176 #endif
1177 			r = TAILQ_NEXT(r, entries);
1178 		else
1179 			break;
1180 	}
1181 
1182 	if (r == NULL || r->action == PF_NOSCRUB)
1183 		return (PF_PASS);
1184 	else {
1185 		r->packets[dir == PF_OUT]++;
1186 		r->bytes[dir == PF_OUT] += pd->tot_len;
1187 	}
1188 
1189 	/* Check for illegal packets */
1190 	if (hlen < (int)sizeof(struct ip))
1191 		goto drop;
1192 
1193 	if (hlen > ntohs(h->ip_len))
1194 		goto drop;
1195 
1196 	/* Clear IP_DF if the rule uses the no-df option */
1197 	if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
1198 		u_int16_t ip_off = h->ip_off;
1199 
1200 		h->ip_off &= htons(~IP_DF);
1201 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
1202 	}
1203 
1204 	/* We will need other tests here */
1205 	if (!fragoff && !mff)
1206 		goto no_fragment;
1207 
1208 	/* We're dealing with a fragment now. Don't allow fragments
1209 	 * with IP_DF to enter the cache. If the flag was cleared by
1210 	 * no-df above, fine. Otherwise drop it.
1211 	 */
1212 	if (h->ip_off & htons(IP_DF)) {
1213 		DPFPRINTF(("IP_DF\n"));
1214 		goto bad;
1215 	}
1216 
1217 	ip_len = ntohs(h->ip_len) - hlen;
1218 	ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1219 
1220 	/* All fragments are 8 byte aligned */
1221 	if (mff && (ip_len & 0x7)) {
1222 		DPFPRINTF(("mff and %d\n", ip_len));
1223 		goto bad;
1224 	}
1225 
1226 	/* Respect maximum length */
1227 	if (fragoff + ip_len > IP_MAXPACKET) {
1228 		DPFPRINTF(("max packet %d\n", fragoff + ip_len));
1229 		goto bad;
1230 	}
1231 	max = fragoff + ip_len;
1232 
1233 	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
1234 		/* Fully buffer all of the fragments */
1235 
1236 #ifdef __FreeBSD__
1237 		frag = pf_find_fragment(h, &V_pf_frag_tree);
1238 #else
1239 		frag = pf_find_fragment(h, &pf_frag_tree);
1240 #endif
1241 
1242 		/* Check if we saw the last fragment already */
1243 		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1244 		    max > frag->fr_max)
1245 			goto bad;
1246 
1247 		/* Get an entry for the fragment queue */
1248 #ifdef __FreeBSD__
1249 		frent = pool_get(&V_pf_frent_pl, PR_NOWAIT);
1250 #else
1251 		frent = pool_get(&pf_frent_pl, PR_NOWAIT);
1252 #endif
1253 		if (frent == NULL) {
1254 			REASON_SET(reason, PFRES_MEMORY);
1255 			return (PF_DROP);
1256 		}
1257 #ifdef __FreeBSD__
1258 		V_pf_nfrents++;
1259 #else
1260 		pf_nfrents++;
1261 #endif
1262 		frent->fr_ip = h;
1263 		frent->fr_m = m;
1264 
1265 		/* Might return a completely reassembled mbuf, or NULL */
1266 		DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
1267 		*m0 = m = pf_reassemble(m0, &frag, frent, mff);
1268 
1269 		if (m == NULL)
1270 			return (PF_DROP);
1271 
1272 		/* use mtag from concatenated mbuf chain */
1273 		pd->pf_mtag = pf_find_mtag(m);
1274 #ifdef DIAGNOSTIC
1275 		if (pd->pf_mtag == NULL) {
1276 			printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
1277 			if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1278 				m_freem(m);
1279 				*m0 = NULL;
1280 				goto no_mem;
1281 			}
1282 		}
1283 #endif
1284 		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1285 			goto drop;
1286 
1287 		h = mtod(m, struct ip *);
1288 	} else {
1289 		/* non-buffering fragment cache (drops or masks overlaps) */
1290 		int	nomem = 0;
1291 
1292 #ifdef __FreeBSD__
1293 		if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) {
1294 #else
1295 		if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) {
1296 #endif
1297 			/*
1298 			 * Already passed the fragment cache in the
1299 			 * input direction.  If we continued, it would
1300 			 * appear to be a dup and would be dropped.
1301 			 */
1302 			goto fragment_pass;
1303 		}
1304 
1305 #ifdef __FreeBSD__
1306 		frag = pf_find_fragment(h, &V_pf_cache_tree);
1307 #else
1308 		frag = pf_find_fragment(h, &pf_cache_tree);
1309 #endif
1310 
1311 		/* Check if we saw the last fragment already */
1312 		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1313 		    max > frag->fr_max) {
1314 			if (r->rule_flag & PFRULE_FRAGDROP)
1315 				frag->fr_flags |= PFFRAG_DROP;
1316 			goto bad;
1317 		}
1318 
1319 		*m0 = m = pf_fragcache(m0, h, &frag, mff,
1320 		    (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
1321 		if (m == NULL) {
1322 			if (nomem)
1323 				goto no_mem;
1324 			goto drop;
1325 		}
1326 
1327 		/* use mtag from copied and trimmed mbuf chain */
1328 		pd->pf_mtag = pf_find_mtag(m);
1329 #ifdef DIAGNOSTIC
1330 		if (pd->pf_mtag == NULL) {
1331 			printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
1332 			if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1333 				m_freem(m);
1334 				*m0 = NULL;
1335 				goto no_mem;
1336 			}
1337 		}
1338 #endif
1339 		if (dir == PF_IN)
1340 #ifdef __FreeBSD__
1341 			pd->pf_mtag->flags |= PF_TAG_FRAGCACHE;
1342 #else
1343 			m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE;
1344 #endif
1345 
1346 		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1347 			goto drop;
1348 		goto fragment_pass;
1349 	}
1350 
1351  no_fragment:
1352 	/* At this point, only IP_DF is allowed in ip_off */
1353 	if (h->ip_off & ~htons(IP_DF)) {
1354 		u_int16_t ip_off = h->ip_off;
1355 
1356 		h->ip_off &= htons(IP_DF);
1357 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
1358 	}
1359 
1360 	/* not missing a return here */
1361 
1362  fragment_pass:
1363 	pf_scrub_ip(&m, r->rule_flag, r->min_ttl, r->set_tos);
1364 
1365 	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1366 		pd->flags |= PFDESC_IP_REAS;
1367 	return (PF_PASS);
1368 
1369  no_mem:
1370 	REASON_SET(reason, PFRES_MEMORY);
1371 	if (r != NULL && r->log)
1372 		PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1373 	return (PF_DROP);
1374 
1375  drop:
1376 	REASON_SET(reason, PFRES_NORM);
1377 	if (r != NULL && r->log)
1378 		PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1379 	return (PF_DROP);
1380 
1381  bad:
1382 	DPFPRINTF(("dropping bad fragment\n"));
1383 
1384 	/* Free associated fragments */
1385 	if (frag != NULL)
1386 		pf_free_fragment(frag);
1387 
1388 	REASON_SET(reason, PFRES_FRAG);
1389 	if (r != NULL && r->log)
1390 		PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1391 
1392 	return (PF_DROP);
1393 }
1394 #endif
1395 
1396 #ifdef INET6
1397 int
1398 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1399     u_short *reason, struct pf_pdesc *pd)
1400 {
1401 	struct mbuf		*m = *m0;
1402 	struct pf_rule		*r;
1403 	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
1404 	int			 off;
1405 	struct ip6_ext		 ext;
1406 	struct ip6_opt		 opt;
1407 	struct ip6_opt_jumbo	 jumbo;
1408 	struct ip6_frag		 frag;
1409 	u_int32_t		 jumbolen = 0, plen;
1410 	u_int16_t		 fragoff = 0;
1411 	int			 optend;
1412 	int			 ooff;
1413 	u_int8_t		 proto;
1414 	int			 terminal;
1415 
1416 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1417 	while (r != NULL) {
1418 		r->evaluations++;
1419 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
1420 			r = r->skip[PF_SKIP_IFP].ptr;
1421 		else if (r->direction && r->direction != dir)
1422 			r = r->skip[PF_SKIP_DIR].ptr;
1423 		else if (r->af && r->af != AF_INET6)
1424 			r = r->skip[PF_SKIP_AF].ptr;
1425 #if 0 /* header chain! */
1426 		else if (r->proto && r->proto != h->ip6_nxt)
1427 			r = r->skip[PF_SKIP_PROTO].ptr;
1428 #endif
1429 		else if (PF_MISMATCHAW(&r->src.addr,
1430 		    (struct pf_addr *)&h->ip6_src, AF_INET6,
1431 		    r->src.neg, kif, M_GETFIB(m)))
1432 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1433 		else if (PF_MISMATCHAW(&r->dst.addr,
1434 		    (struct pf_addr *)&h->ip6_dst, AF_INET6,
1435 		    r->dst.neg, NULL, M_GETFIB(m)))
1436 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
1437 		else
1438 			break;
1439 	}
1440 
1441 	if (r == NULL || r->action == PF_NOSCRUB)
1442 		return (PF_PASS);
1443 	else {
1444 		r->packets[dir == PF_OUT]++;
1445 		r->bytes[dir == PF_OUT] += pd->tot_len;
1446 	}
1447 
1448 	/* Check for illegal packets */
1449 	if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
1450 		goto drop;
1451 
1452 	off = sizeof(struct ip6_hdr);
1453 	proto = h->ip6_nxt;
1454 	terminal = 0;
1455 	do {
1456 		switch (proto) {
1457 		case IPPROTO_FRAGMENT:
1458 			goto fragment;
1459 			break;
1460 		case IPPROTO_AH:
1461 		case IPPROTO_ROUTING:
1462 		case IPPROTO_DSTOPTS:
1463 			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1464 			    NULL, AF_INET6))
1465 				goto shortpkt;
1466 			if (proto == IPPROTO_AH)
1467 				off += (ext.ip6e_len + 2) * 4;
1468 			else
1469 				off += (ext.ip6e_len + 1) * 8;
1470 			proto = ext.ip6e_nxt;
1471 			break;
1472 		case IPPROTO_HOPOPTS:
1473 			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1474 			    NULL, AF_INET6))
1475 				goto shortpkt;
1476 			optend = off + (ext.ip6e_len + 1) * 8;
1477 			ooff = off + sizeof(ext);
1478 			do {
1479 				if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1480 				    sizeof(opt.ip6o_type), NULL, NULL,
1481 				    AF_INET6))
1482 					goto shortpkt;
1483 				if (opt.ip6o_type == IP6OPT_PAD1) {
1484 					ooff++;
1485 					continue;
1486 				}
1487 				if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
1488 				    NULL, NULL, AF_INET6))
1489 					goto shortpkt;
1490 				if (ooff + sizeof(opt) + opt.ip6o_len > optend)
1491 					goto drop;
1492 				switch (opt.ip6o_type) {
1493 				case IP6OPT_JUMBO:
1494 					if (h->ip6_plen != 0)
1495 						goto drop;
1496 					if (!pf_pull_hdr(m, ooff, &jumbo,
1497 					    sizeof(jumbo), NULL, NULL,
1498 					    AF_INET6))
1499 						goto shortpkt;
1500 					memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1501 					    sizeof(jumbolen));
1502 					jumbolen = ntohl(jumbolen);
1503 					if (jumbolen <= IPV6_MAXPACKET)
1504 						goto drop;
1505 					if (sizeof(struct ip6_hdr) + jumbolen !=
1506 					    m->m_pkthdr.len)
1507 						goto drop;
1508 					break;
1509 				default:
1510 					break;
1511 				}
1512 				ooff += sizeof(opt) + opt.ip6o_len;
1513 			} while (ooff < optend);
1514 
1515 			off = optend;
1516 			proto = ext.ip6e_nxt;
1517 			break;
1518 		default:
1519 			terminal = 1;
1520 			break;
1521 		}
1522 	} while (!terminal);
1523 
1524 	/* jumbo payload option must be present, or plen > 0 */
1525 	if (ntohs(h->ip6_plen) == 0)
1526 		plen = jumbolen;
1527 	else
1528 		plen = ntohs(h->ip6_plen);
1529 	if (plen == 0)
1530 		goto drop;
1531 	if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
1532 		goto shortpkt;
1533 
1534 	pf_scrub_ip6(&m, r->min_ttl);
1535 
1536 	return (PF_PASS);
1537 
1538  fragment:
1539 	if (ntohs(h->ip6_plen) == 0 || jumbolen)
1540 		goto drop;
1541 	plen = ntohs(h->ip6_plen);
1542 
1543 	if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
1544 		goto shortpkt;
1545 	fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
1546 	if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
1547 		goto badfrag;
1548 
1549 	/* do something about it */
1550 	/* remember to set pd->flags |= PFDESC_IP_REAS */
1551 	return (PF_PASS);
1552 
1553  shortpkt:
1554 	REASON_SET(reason, PFRES_SHORT);
1555 	if (r != NULL && r->log)
1556 		PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1557 	return (PF_DROP);
1558 
1559  drop:
1560 	REASON_SET(reason, PFRES_NORM);
1561 	if (r != NULL && r->log)
1562 		PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1563 	return (PF_DROP);
1564 
1565  badfrag:
1566 	REASON_SET(reason, PFRES_FRAG);
1567 	if (r != NULL && r->log)
1568 		PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1569 	return (PF_DROP);
1570 }
1571 #endif /* INET6 */
1572 
1573 int
1574 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
1575     int off, void *h, struct pf_pdesc *pd)
1576 {
1577 	struct pf_rule	*r, *rm = NULL;
1578 	struct tcphdr	*th = pd->hdr.tcp;
1579 	int		 rewrite = 0;
1580 	u_short		 reason;
1581 	u_int8_t	 flags;
1582 	sa_family_t	 af = pd->af;
1583 
1584 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1585 	while (r != NULL) {
1586 		r->evaluations++;
1587 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
1588 			r = r->skip[PF_SKIP_IFP].ptr;
1589 		else if (r->direction && r->direction != dir)
1590 			r = r->skip[PF_SKIP_DIR].ptr;
1591 		else if (r->af && r->af != af)
1592 			r = r->skip[PF_SKIP_AF].ptr;
1593 		else if (r->proto && r->proto != pd->proto)
1594 			r = r->skip[PF_SKIP_PROTO].ptr;
1595 		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
1596 		    r->src.neg, kif, M_GETFIB(m)))
1597 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1598 		else if (r->src.port_op && !pf_match_port(r->src.port_op,
1599 			    r->src.port[0], r->src.port[1], th->th_sport))
1600 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
1601 		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
1602 		    r->dst.neg, NULL, M_GETFIB(m)))
1603 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
1604 		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
1605 			    r->dst.port[0], r->dst.port[1], th->th_dport))
1606 			r = r->skip[PF_SKIP_DST_PORT].ptr;
1607 		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
1608 			    pf_osfp_fingerprint(pd, m, off, th),
1609 			    r->os_fingerprint))
1610 			r = TAILQ_NEXT(r, entries);
1611 		else {
1612 			rm = r;
1613 			break;
1614 		}
1615 	}
1616 
1617 	if (rm == NULL || rm->action == PF_NOSCRUB)
1618 		return (PF_PASS);
1619 	else {
1620 		r->packets[dir == PF_OUT]++;
1621 		r->bytes[dir == PF_OUT] += pd->tot_len;
1622 	}
1623 
1624 	if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
1625 		pd->flags |= PFDESC_TCP_NORM;
1626 
1627 	flags = th->th_flags;
1628 	if (flags & TH_SYN) {
1629 		/* Illegal packet */
1630 		if (flags & TH_RST)
1631 			goto tcp_drop;
1632 
1633 		if (flags & TH_FIN)
1634 			flags &= ~TH_FIN;
1635 	} else {
1636 		/* Illegal packet */
1637 		if (!(flags & (TH_ACK|TH_RST)))
1638 			goto tcp_drop;
1639 	}
1640 
1641 	if (!(flags & TH_ACK)) {
1642 		/* These flags are only valid if ACK is set */
1643 		if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
1644 			goto tcp_drop;
1645 	}
1646 
1647 	/* Check for illegal header length */
1648 	if (th->th_off < (sizeof(struct tcphdr) >> 2))
1649 		goto tcp_drop;
1650 
1651 	/* If flags changed, or reserved data set, then adjust */
1652 	if (flags != th->th_flags || th->th_x2 != 0) {
1653 		u_int16_t	ov, nv;
1654 
1655 		ov = *(u_int16_t *)(&th->th_ack + 1);
1656 		th->th_flags = flags;
1657 		th->th_x2 = 0;
1658 		nv = *(u_int16_t *)(&th->th_ack + 1);
1659 
1660 		th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, ov, nv, 0);
1661 		rewrite = 1;
1662 	}
1663 
1664 	/* Remove urgent pointer, if TH_URG is not set */
1665 	if (!(flags & TH_URG) && th->th_urp) {
1666 		th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, th->th_urp,
1667 		    0, 0);
1668 		th->th_urp = 0;
1669 		rewrite = 1;
1670 	}
1671 
1672 	/* Process options */
1673 	if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af))
1674 		rewrite = 1;
1675 
1676 	/* copy back packet headers if we sanitized */
1677 	if (rewrite)
1678 #ifdef __FreeBSD__
1679 		m_copyback(m, off, sizeof(*th), (caddr_t)th);
1680 #else
1681 		m_copyback(m, off, sizeof(*th), th);
1682 #endif
1683 
1684 	return (PF_PASS);
1685 
1686  tcp_drop:
1687 	REASON_SET(&reason, PFRES_NORM);
1688 	if (rm != NULL && r->log)
1689 		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
1690 	return (PF_DROP);
1691 }
1692 
1693 int
1694 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
1695     struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
1696 {
1697 	u_int32_t tsval, tsecr;
1698 	u_int8_t hdr[60];
1699 	u_int8_t *opt;
1700 
1701 #ifdef __FreeBSD__
1702 	KASSERT((src->scrub == NULL),
1703 	    ("pf_normalize_tcp_init: src->scrub != NULL"));
1704 
1705 	src->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT);
1706 #else
1707 	KASSERT(src->scrub == NULL);
1708 
1709 	src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
1710 #endif
1711 	if (src->scrub == NULL)
1712 		return (1);
1713 	bzero(src->scrub, sizeof(*src->scrub));
1714 
1715 	switch (pd->af) {
1716 #ifdef INET
1717 	case AF_INET: {
1718 		struct ip *h = mtod(m, struct ip *);
1719 		src->scrub->pfss_ttl = h->ip_ttl;
1720 		break;
1721 	}
1722 #endif /* INET */
1723 #ifdef INET6
1724 	case AF_INET6: {
1725 		struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1726 		src->scrub->pfss_ttl = h->ip6_hlim;
1727 		break;
1728 	}
1729 #endif /* INET6 */
1730 	}
1731 
1732 
1733 	/*
1734 	 * All normalizations below are only begun if we see the start of
1735 	 * the connections.  They must all set an enabled bit in pfss_flags
1736 	 */
1737 	if ((th->th_flags & TH_SYN) == 0)
1738 		return (0);
1739 
1740 
1741 	if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
1742 	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1743 		/* Diddle with TCP options */
1744 		int hlen;
1745 		opt = hdr + sizeof(struct tcphdr);
1746 		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1747 		while (hlen >= TCPOLEN_TIMESTAMP) {
1748 			switch (*opt) {
1749 			case TCPOPT_EOL:	/* FALLTHROUGH */
1750 			case TCPOPT_NOP:
1751 				opt++;
1752 				hlen--;
1753 				break;
1754 			case TCPOPT_TIMESTAMP:
1755 				if (opt[1] >= TCPOLEN_TIMESTAMP) {
1756 					src->scrub->pfss_flags |=
1757 					    PFSS_TIMESTAMP;
1758 					src->scrub->pfss_ts_mod =
1759 					    htonl(arc4random());
1760 
1761 					/* note PFSS_PAWS not set yet */
1762 					memcpy(&tsval, &opt[2],
1763 					    sizeof(u_int32_t));
1764 					memcpy(&tsecr, &opt[6],
1765 					    sizeof(u_int32_t));
1766 					src->scrub->pfss_tsval0 = ntohl(tsval);
1767 					src->scrub->pfss_tsval = ntohl(tsval);
1768 					src->scrub->pfss_tsecr = ntohl(tsecr);
1769 					getmicrouptime(&src->scrub->pfss_last);
1770 				}
1771 				/* FALLTHROUGH */
1772 			default:
1773 				hlen -= MAX(opt[1], 2);
1774 				opt += MAX(opt[1], 2);
1775 				break;
1776 			}
1777 		}
1778 	}
1779 
1780 	return (0);
1781 }
1782 
1783 void
1784 pf_normalize_tcp_cleanup(struct pf_state *state)
1785 {
1786 #ifdef __FreeBSD__
1787 	if (state->src.scrub)
1788 		pool_put(&V_pf_state_scrub_pl, state->src.scrub);
1789 	if (state->dst.scrub)
1790 		pool_put(&V_pf_state_scrub_pl, state->dst.scrub);
1791 #else
1792 	if (state->src.scrub)
1793 		pool_put(&pf_state_scrub_pl, state->src.scrub);
1794 	if (state->dst.scrub)
1795 		pool_put(&pf_state_scrub_pl, state->dst.scrub);
1796 #endif
1797 
1798 	/* Someday... flush the TCP segment reassembly descriptors. */
1799 }
1800 
1801 int
1802 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
1803     u_short *reason, struct tcphdr *th, struct pf_state *state,
1804     struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
1805 {
1806 	struct timeval uptime;
1807 	u_int32_t tsval, tsecr;
1808 	u_int tsval_from_last;
1809 	u_int8_t hdr[60];
1810 	u_int8_t *opt;
1811 	int copyback = 0;
1812 	int got_ts = 0;
1813 
1814 #ifdef __FreeBSD__
1815 	KASSERT((src->scrub || dst->scrub),
1816 	    ("pf_normalize_tcp_statefull: src->scrub && dst->scrub!"));
1817 #else
1818 	KASSERT(src->scrub || dst->scrub);
1819 #endif
1820 
1821 	/*
1822 	 * Enforce the minimum TTL seen for this connection.  Negate a common
1823 	 * technique to evade an intrusion detection system and confuse
1824 	 * firewall state code.
1825 	 */
1826 	switch (pd->af) {
1827 #ifdef INET
1828 	case AF_INET: {
1829 		if (src->scrub) {
1830 			struct ip *h = mtod(m, struct ip *);
1831 			if (h->ip_ttl > src->scrub->pfss_ttl)
1832 				src->scrub->pfss_ttl = h->ip_ttl;
1833 			h->ip_ttl = src->scrub->pfss_ttl;
1834 		}
1835 		break;
1836 	}
1837 #endif /* INET */
1838 #ifdef INET6
1839 	case AF_INET6: {
1840 		if (src->scrub) {
1841 			struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1842 			if (h->ip6_hlim > src->scrub->pfss_ttl)
1843 				src->scrub->pfss_ttl = h->ip6_hlim;
1844 			h->ip6_hlim = src->scrub->pfss_ttl;
1845 		}
1846 		break;
1847 	}
1848 #endif /* INET6 */
1849 	}
1850 
1851 	if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
1852 	    ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
1853 	    (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
1854 	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1855 		/* Diddle with TCP options */
1856 		int hlen;
1857 		opt = hdr + sizeof(struct tcphdr);
1858 		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1859 		while (hlen >= TCPOLEN_TIMESTAMP) {
1860 			switch (*opt) {
1861 			case TCPOPT_EOL:	/* FALLTHROUGH */
1862 			case TCPOPT_NOP:
1863 				opt++;
1864 				hlen--;
1865 				break;
1866 			case TCPOPT_TIMESTAMP:
1867 				/* Modulate the timestamps.  Can be used for
1868 				 * NAT detection, OS uptime determination or
1869 				 * reboot detection.
1870 				 */
1871 
1872 				if (got_ts) {
1873 					/* Huh?  Multiple timestamps!? */
1874 #ifdef __FreeBSD__
1875 					if (V_pf_status.debug >= PF_DEBUG_MISC) {
1876 #else
1877 					if (pf_status.debug >= PF_DEBUG_MISC) {
1878 #endif
1879 						DPFPRINTF(("multiple TS??"));
1880 						pf_print_state(state);
1881 						printf("\n");
1882 					}
1883 					REASON_SET(reason, PFRES_TS);
1884 					return (PF_DROP);
1885 				}
1886 				if (opt[1] >= TCPOLEN_TIMESTAMP) {
1887 					memcpy(&tsval, &opt[2],
1888 					    sizeof(u_int32_t));
1889 					if (tsval && src->scrub &&
1890 					    (src->scrub->pfss_flags &
1891 					    PFSS_TIMESTAMP)) {
1892 						tsval = ntohl(tsval);
1893 						pf_change_proto_a(m, &opt[2],
1894 						    &th->th_sum,
1895 						    htonl(tsval +
1896 						    src->scrub->pfss_ts_mod),
1897 						    0);
1898 						copyback = 1;
1899 					}
1900 
1901 					/* Modulate TS reply iff valid (!0) */
1902 					memcpy(&tsecr, &opt[6],
1903 					    sizeof(u_int32_t));
1904 					if (tsecr && dst->scrub &&
1905 					    (dst->scrub->pfss_flags &
1906 					    PFSS_TIMESTAMP)) {
1907 						tsecr = ntohl(tsecr)
1908 						    - dst->scrub->pfss_ts_mod;
1909 						pf_change_proto_a(m, &opt[6],
1910 						    &th->th_sum, htonl(tsecr),
1911 						    0);
1912 						copyback = 1;
1913 					}
1914 					got_ts = 1;
1915 				}
1916 				/* FALLTHROUGH */
1917 			default:
1918 				hlen -= MAX(opt[1], 2);
1919 				opt += MAX(opt[1], 2);
1920 				break;
1921 			}
1922 		}
1923 		if (copyback) {
1924 			/* Copyback the options, caller copys back header */
1925 			*writeback = 1;
1926 			m_copyback(m, off + sizeof(struct tcphdr),
1927 			    (th->th_off << 2) - sizeof(struct tcphdr), hdr +
1928 			    sizeof(struct tcphdr));
1929 		}
1930 	}
1931 
1932 
1933 	/*
1934 	 * Must invalidate PAWS checks on connections idle for too long.
1935 	 * The fastest allowed timestamp clock is 1ms.  That turns out to
1936 	 * be about 24 days before it wraps.  XXX Right now our lowerbound
1937 	 * TS echo check only works for the first 12 days of a connection
1938 	 * when the TS has exhausted half its 32bit space
1939 	 */
1940 #define TS_MAX_IDLE	(24*24*60*60)
1941 #define TS_MAX_CONN	(12*24*60*60)	/* XXX remove when better tsecr check */
1942 
1943 	getmicrouptime(&uptime);
1944 	if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
1945 	    (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
1946 	    time_second - state->creation > TS_MAX_CONN))  {
1947 #ifdef __FreeBSD__
1948 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
1949 #else
1950 		if (pf_status.debug >= PF_DEBUG_MISC) {
1951 #endif
1952 			DPFPRINTF(("src idled out of PAWS\n"));
1953 			pf_print_state(state);
1954 			printf("\n");
1955 		}
1956 		src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
1957 		    | PFSS_PAWS_IDLED;
1958 	}
1959 	if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
1960 	    uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
1961 #ifdef __FreeBSD__
1962 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
1963 #else
1964 		if (pf_status.debug >= PF_DEBUG_MISC) {
1965 #endif
1966 			DPFPRINTF(("dst idled out of PAWS\n"));
1967 			pf_print_state(state);
1968 			printf("\n");
1969 		}
1970 		dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
1971 		    | PFSS_PAWS_IDLED;
1972 	}
1973 
1974 	if (got_ts && src->scrub && dst->scrub &&
1975 	    (src->scrub->pfss_flags & PFSS_PAWS) &&
1976 	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
1977 		/* Validate that the timestamps are "in-window".
1978 		 * RFC1323 describes TCP Timestamp options that allow
1979 		 * measurement of RTT (round trip time) and PAWS
1980 		 * (protection against wrapped sequence numbers).  PAWS
1981 		 * gives us a set of rules for rejecting packets on
1982 		 * long fat pipes (packets that were somehow delayed
1983 		 * in transit longer than the time it took to send the
1984 		 * full TCP sequence space of 4Gb).  We can use these
1985 		 * rules and infer a few others that will let us treat
1986 		 * the 32bit timestamp and the 32bit echoed timestamp
1987 		 * as sequence numbers to prevent a blind attacker from
1988 		 * inserting packets into a connection.
1989 		 *
1990 		 * RFC1323 tells us:
1991 		 *  - The timestamp on this packet must be greater than
1992 		 *    or equal to the last value echoed by the other
1993 		 *    endpoint.  The RFC says those will be discarded
1994 		 *    since it is a dup that has already been acked.
1995 		 *    This gives us a lowerbound on the timestamp.
1996 		 *        timestamp >= other last echoed timestamp
1997 		 *  - The timestamp will be less than or equal to
1998 		 *    the last timestamp plus the time between the
1999 		 *    last packet and now.  The RFC defines the max
2000 		 *    clock rate as 1ms.  We will allow clocks to be
2001 		 *    up to 10% fast and will allow a total difference
2002 		 *    or 30 seconds due to a route change.  And this
2003 		 *    gives us an upperbound on the timestamp.
2004 		 *        timestamp <= last timestamp + max ticks
2005 		 *    We have to be careful here.  Windows will send an
2006 		 *    initial timestamp of zero and then initialize it
2007 		 *    to a random value after the 3whs; presumably to
2008 		 *    avoid a DoS by having to call an expensive RNG
2009 		 *    during a SYN flood.  Proof MS has at least one
2010 		 *    good security geek.
2011 		 *
2012 		 *  - The TCP timestamp option must also echo the other
2013 		 *    endpoints timestamp.  The timestamp echoed is the
2014 		 *    one carried on the earliest unacknowledged segment
2015 		 *    on the left edge of the sequence window.  The RFC
2016 		 *    states that the host will reject any echoed
2017 		 *    timestamps that were larger than any ever sent.
2018 		 *    This gives us an upperbound on the TS echo.
2019 		 *        tescr <= largest_tsval
2020 		 *  - The lowerbound on the TS echo is a little more
2021 		 *    tricky to determine.  The other endpoint's echoed
2022 		 *    values will not decrease.  But there may be
2023 		 *    network conditions that re-order packets and
2024 		 *    cause our view of them to decrease.  For now the
2025 		 *    only lowerbound we can safely determine is that
2026 		 *    the TS echo will never be less than the original
2027 		 *    TS.  XXX There is probably a better lowerbound.
2028 		 *    Remove TS_MAX_CONN with better lowerbound check.
2029 		 *        tescr >= other original TS
2030 		 *
2031 		 * It is also important to note that the fastest
2032 		 * timestamp clock of 1ms will wrap its 32bit space in
2033 		 * 24 days.  So we just disable TS checking after 24
2034 		 * days of idle time.  We actually must use a 12d
2035 		 * connection limit until we can come up with a better
2036 		 * lowerbound to the TS echo check.
2037 		 */
2038 		struct timeval delta_ts;
2039 		int ts_fudge;
2040 
2041 
2042 		/*
2043 		 * PFTM_TS_DIFF is how many seconds of leeway to allow
2044 		 * a host's timestamp.  This can happen if the previous
2045 		 * packet got delayed in transit for much longer than
2046 		 * this packet.
2047 		 */
2048 		if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
2049 #ifdef __FreeBSD__
2050 			ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF];
2051 #else
2052 			ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
2053 #endif
2054 
2055 
2056 		/* Calculate max ticks since the last timestamp */
2057 #define TS_MAXFREQ	1100		/* RFC max TS freq of 1Khz + 10% skew */
2058 #define TS_MICROSECS	1000000		/* microseconds per second */
2059 #ifdef __FreeBSD__
2060 #ifndef timersub
2061 #define	timersub(tvp, uvp, vvp)						\
2062 	do {								\
2063 		(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;		\
2064 		(vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;	\
2065 		if ((vvp)->tv_usec < 0) {				\
2066 			(vvp)->tv_sec--;				\
2067 			(vvp)->tv_usec += 1000000;			\
2068 		}							\
2069 	} while (0)
2070 #endif
2071 #endif
2072 		timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
2073 		tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
2074 		tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
2075 
2076 
2077 		if ((src->state >= TCPS_ESTABLISHED &&
2078 		    dst->state >= TCPS_ESTABLISHED) &&
2079 		    (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
2080 		    SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
2081 		    (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
2082 		    SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
2083 			/* Bad RFC1323 implementation or an insertion attack.
2084 			 *
2085 			 * - Solaris 2.6 and 2.7 are known to send another ACK
2086 			 *   after the FIN,FIN|ACK,ACK closing that carries
2087 			 *   an old timestamp.
2088 			 */
2089 
2090 			DPFPRINTF(("Timestamp failed %c%c%c%c\n",
2091 			    SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
2092 			    SEQ_GT(tsval, src->scrub->pfss_tsval +
2093 			    tsval_from_last) ? '1' : ' ',
2094 			    SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
2095 			    SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
2096 #ifdef __FreeBSD__
2097 			DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
2098 			    "idle: %jus %lums\n",
2099 			    tsval, tsecr, tsval_from_last,
2100 			    (uintmax_t)delta_ts.tv_sec,
2101 			    delta_ts.tv_usec / 1000));
2102 			DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
2103 			    src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
2104 			DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u"
2105 			    "\n", dst->scrub->pfss_tsval,
2106 			    dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
2107 #else
2108 			DPFPRINTF((" tsval: %lu  tsecr: %lu  +ticks: %lu  "
2109 			    "idle: %lus %lums\n",
2110 			    tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
2111 			    delta_ts.tv_usec / 1000));
2112 			DPFPRINTF((" src->tsval: %lu  tsecr: %lu\n",
2113 			    src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
2114 			DPFPRINTF((" dst->tsval: %lu  tsecr: %lu  tsval0: %lu"
2115 			    "\n", dst->scrub->pfss_tsval,
2116 			    dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
2117 #endif
2118 #ifdef __FreeBSD__
2119 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
2120 #else
2121 			if (pf_status.debug >= PF_DEBUG_MISC) {
2122 #endif
2123 				pf_print_state(state);
2124 				pf_print_flags(th->th_flags);
2125 				printf("\n");
2126 			}
2127 			REASON_SET(reason, PFRES_TS);
2128 			return (PF_DROP);
2129 		}
2130 
2131 		/* XXX I'd really like to require tsecr but it's optional */
2132 
2133 	} else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
2134 	    ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
2135 	    || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
2136 	    src->scrub && dst->scrub &&
2137 	    (src->scrub->pfss_flags & PFSS_PAWS) &&
2138 	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
2139 		/* Didn't send a timestamp.  Timestamps aren't really useful
2140 		 * when:
2141 		 *  - connection opening or closing (often not even sent).
2142 		 *    but we must not let an attacker to put a FIN on a
2143 		 *    data packet to sneak it through our ESTABLISHED check.
2144 		 *  - on a TCP reset.  RFC suggests not even looking at TS.
2145 		 *  - on an empty ACK.  The TS will not be echoed so it will
2146 		 *    probably not help keep the RTT calculation in sync and
2147 		 *    there isn't as much danger when the sequence numbers
2148 		 *    got wrapped.  So some stacks don't include TS on empty
2149 		 *    ACKs :-(
2150 		 *
2151 		 * To minimize the disruption to mostly RFC1323 conformant
2152 		 * stacks, we will only require timestamps on data packets.
2153 		 *
2154 		 * And what do ya know, we cannot require timestamps on data
2155 		 * packets.  There appear to be devices that do legitimate
2156 		 * TCP connection hijacking.  There are HTTP devices that allow
2157 		 * a 3whs (with timestamps) and then buffer the HTTP request.
2158 		 * If the intermediate device has the HTTP response cache, it
2159 		 * will spoof the response but not bother timestamping its
2160 		 * packets.  So we can look for the presence of a timestamp in
2161 		 * the first data packet and if there, require it in all future
2162 		 * packets.
2163 		 */
2164 
2165 		if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
2166 			/*
2167 			 * Hey!  Someone tried to sneak a packet in.  Or the
2168 			 * stack changed its RFC1323 behavior?!?!
2169 			 */
2170 #ifdef __FreeBSD__
2171 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
2172 #else
2173 			if (pf_status.debug >= PF_DEBUG_MISC) {
2174 #endif
2175 				DPFPRINTF(("Did not receive expected RFC1323 "
2176 				    "timestamp\n"));
2177 				pf_print_state(state);
2178 				pf_print_flags(th->th_flags);
2179 				printf("\n");
2180 			}
2181 			REASON_SET(reason, PFRES_TS);
2182 			return (PF_DROP);
2183 		}
2184 	}
2185 
2186 
2187 	/*
2188 	 * We will note if a host sends his data packets with or without
2189 	 * timestamps.  And require all data packets to contain a timestamp
2190 	 * if the first does.  PAWS implicitly requires that all data packets be
2191 	 * timestamped.  But I think there are middle-man devices that hijack
2192 	 * TCP streams immediately after the 3whs and don't timestamp their
2193 	 * packets (seen in a WWW accelerator or cache).
2194 	 */
2195 	if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
2196 	    (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
2197 		if (got_ts)
2198 			src->scrub->pfss_flags |= PFSS_DATA_TS;
2199 		else {
2200 			src->scrub->pfss_flags |= PFSS_DATA_NOTS;
2201 #ifdef __FreeBSD__
2202 			if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
2203 #else
2204 			if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
2205 #endif
2206 			    (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
2207 				/* Don't warn if other host rejected RFC1323 */
2208 				DPFPRINTF(("Broken RFC1323 stack did not "
2209 				    "timestamp data packet. Disabled PAWS "
2210 				    "security.\n"));
2211 				pf_print_state(state);
2212 				pf_print_flags(th->th_flags);
2213 				printf("\n");
2214 			}
2215 		}
2216 	}
2217 
2218 
2219 	/*
2220 	 * Update PAWS values
2221 	 */
2222 	if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
2223 	    (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
2224 		getmicrouptime(&src->scrub->pfss_last);
2225 		if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
2226 		    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2227 			src->scrub->pfss_tsval = tsval;
2228 
2229 		if (tsecr) {
2230 			if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
2231 			    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2232 				src->scrub->pfss_tsecr = tsecr;
2233 
2234 			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
2235 			    (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
2236 			    src->scrub->pfss_tsval0 == 0)) {
2237 				/* tsval0 MUST be the lowest timestamp */
2238 				src->scrub->pfss_tsval0 = tsval;
2239 			}
2240 
2241 			/* Only fully initialized after a TS gets echoed */
2242 			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
2243 				src->scrub->pfss_flags |= PFSS_PAWS;
2244 		}
2245 	}
2246 
2247 	/* I have a dream....  TCP segment reassembly.... */
2248 	return (0);
2249 }
2250 
2251 int
2252 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
2253     int off, sa_family_t af)
2254 {
2255 	u_int16_t	*mss;
2256 	int		 thoff;
2257 	int		 opt, cnt, optlen = 0;
2258 	int		 rewrite = 0;
2259 #ifdef __FreeBSD__
2260 	u_char		 opts[TCP_MAXOLEN];
2261 #else
2262 	u_char		 opts[MAX_TCPOPTLEN];
2263 #endif
2264 	u_char		*optp = opts;
2265 
2266 	thoff = th->th_off << 2;
2267 	cnt = thoff - sizeof(struct tcphdr);
2268 
2269 	if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt,
2270 	    NULL, NULL, af))
2271 		return (rewrite);
2272 
2273 	for (; cnt > 0; cnt -= optlen, optp += optlen) {
2274 		opt = optp[0];
2275 		if (opt == TCPOPT_EOL)
2276 			break;
2277 		if (opt == TCPOPT_NOP)
2278 			optlen = 1;
2279 		else {
2280 			if (cnt < 2)
2281 				break;
2282 			optlen = optp[1];
2283 			if (optlen < 2 || optlen > cnt)
2284 				break;
2285 		}
2286 		switch (opt) {
2287 		case TCPOPT_MAXSEG:
2288 			mss = (u_int16_t *)(optp + 2);
2289 			if ((ntohs(*mss)) > r->max_mss) {
2290 				th->th_sum = pf_proto_cksum_fixup(m,
2291 				    th->th_sum, *mss, htons(r->max_mss), 0);
2292 				*mss = htons(r->max_mss);
2293 				rewrite = 1;
2294 			}
2295 			break;
2296 		default:
2297 			break;
2298 		}
2299 	}
2300 
2301 	if (rewrite)
2302 		m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts);
2303 
2304 	return (rewrite);
2305 }
2306 
2307 void
2308 pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos)
2309 {
2310 	struct mbuf		*m = *m0;
2311 	struct ip		*h = mtod(m, struct ip *);
2312 
2313 	/* Clear IP_DF if no-df was requested */
2314 	if (flags & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
2315 		u_int16_t ip_off = h->ip_off;
2316 
2317 		h->ip_off &= htons(~IP_DF);
2318 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
2319 	}
2320 
2321 	/* Enforce a minimum ttl, may cause endless packet loops */
2322 	if (min_ttl && h->ip_ttl < min_ttl) {
2323 		u_int16_t ip_ttl = h->ip_ttl;
2324 
2325 		h->ip_ttl = min_ttl;
2326 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
2327 	}
2328 
2329 	/* Enforce tos */
2330 	if (flags & PFRULE_SET_TOS) {
2331 		u_int16_t	ov, nv;
2332 
2333 		ov = *(u_int16_t *)h;
2334 		h->ip_tos = tos;
2335 		nv = *(u_int16_t *)h;
2336 
2337 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0);
2338 	}
2339 
2340 	/* random-id, but not for fragments */
2341 	if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) {
2342 		u_int16_t ip_id = h->ip_id;
2343 
2344 		h->ip_id = ip_randomid();
2345 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
2346 	}
2347 }
2348 
2349 #ifdef INET6
2350 void
2351 pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl)
2352 {
2353 	struct mbuf		*m = *m0;
2354 	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
2355 
2356 	/* Enforce a minimum ttl, may cause endless packet loops */
2357 	if (min_ttl && h->ip6_hlim < min_ttl)
2358 		h->ip6_hlim = min_ttl;
2359 }
2360 #endif
2361