1 /*        $NetBSD: pf_norm.c,v 1.30 2023/08/07 23:28:58 mrg Exp $     */
2 /*        $OpenBSD: pf_norm.c,v 1.109 2007/05/28 17:16:39 henning Exp $ */
3 
4 /*
5  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: pf_norm.c,v 1.30 2023/08/07 23:28:58 mrg Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #endif
35 
36 #include "pflog.h"
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/mbuf.h>
41 #include <sys/filio.h>
42 #include <sys/fcntl.h>
43 #include <sys/socket.h>
44 #include <sys/kernel.h>
45 #include <sys/time.h>
46 #include <sys/pool.h>
47 
48 #ifdef __NetBSD__
49 #include <sys/cprng.h>
50 #else
51 #include <dev/rndvar.h>
52 #endif /* !__NetBSD__ */
53 #include <net/if.h>
54 #include <net/if_types.h>
55 #include <net/bpf.h>
56 #include <net/route.h>
57 #include <net/if_pflog.h>
58 
59 #include <netinet/in.h>
60 #include <netinet/in_var.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/ip.h>
63 #include <netinet/ip_var.h>
64 #include <netinet/tcp.h>
65 #include <netinet/tcp_seq.h>
66 #include <netinet/udp.h>
67 #include <netinet/ip_icmp.h>
68 
69 #ifdef INET6
70 #include <netinet/ip6.h>
71 #endif /* INET6 */
72 
73 #include <net/pfvar.h>
74 
75 struct pf_frent {
76           LIST_ENTRY(pf_frent) fr_next;
77           struct ip *fr_ip;
78           struct mbuf *fr_m;
79 };
80 
81 struct pf_frcache {
82           LIST_ENTRY(pf_frcache) fr_next;
83           uint16_t  fr_off;
84           uint16_t  fr_end;
85 };
86 
87 #define PFFRAG_SEENLAST       0x0001              /* Seen the last fragment for this */
88 #define PFFRAG_NOBUFFER       0x0002              /* Non-buffering fragment cache */
89 #define PFFRAG_DROP 0x0004              /* Drop all fragments */
90 #define BUFFER_FRAGMENTS(fr)  (!((fr)->fr_flags & PFFRAG_NOBUFFER))
91 
92 struct pf_fragment {
93           RB_ENTRY(pf_fragment) fr_entry;
94           TAILQ_ENTRY(pf_fragment) frag_next;
95           struct in_addr      fr_src;
96           struct in_addr      fr_dst;
97           u_int8_t  fr_p;               /* protocol of this fragment */
98           u_int8_t  fr_flags; /* status flags */
99           u_int16_t fr_id;              /* fragment id for reassemble */
100           u_int16_t fr_max;             /* fragment data max */
101           u_int32_t fr_timeout;
102 #define fr_queue    fr_u.fru_queue
103 #define fr_cache    fr_u.fru_cache
104           union {
105                     LIST_HEAD(pf_fragq, pf_frent) fru_queue;          /* buffering */
106                     LIST_HEAD(pf_cacheq, pf_frcache) fru_cache;       /* non-buf */
107           } fr_u;
108 };
109 
110 TAILQ_HEAD(pf_fragqueue, pf_fragment)   pf_fragqueue;
111 TAILQ_HEAD(pf_cachequeue, pf_fragment)  pf_cachequeue;
112 
113 static __inline int  pf_frag_compare(struct pf_fragment *,
114                                   struct pf_fragment *);
115 RB_HEAD(pf_frag_tree, pf_fragment)      pf_frag_tree, pf_cache_tree;
116 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
117 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
118 
119 /* Private prototypes */
120 void                           pf_ip2key(struct pf_fragment *, struct ip *);
121 void                           pf_remove_fragment(struct pf_fragment *);
122 void                           pf_flush_fragments(void);
123 void                           pf_free_fragment(struct pf_fragment *);
124 struct pf_fragment  *pf_find_fragment(struct ip *, struct pf_frag_tree *);
125 struct mbuf                   *pf_reassemble(struct mbuf **, struct pf_fragment **,
126                                   struct pf_frent *, int);
127 struct mbuf                   *pf_fragcache(struct mbuf **, struct ip*,
128                                   struct pf_fragment **, int, int, int *);
129 int                            pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
130                                   struct tcphdr *, int);
131 
132 #define   DPFPRINTF(x) do {                                 \
133           if (pf_status.debug >= PF_DEBUG_MISC) {           \
134                     printf("%s: ", __func__);               \
135                     printf x ;                                        \
136           }                                                           \
137 } while(0)
138 
139 /* Globals */
140 struct pool                    pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
141 struct pool                    pf_state_scrub_pl;
142 int                            pf_nfrents, pf_ncache;
143 
144 void
pf_normalize_init(void)145 pf_normalize_init(void)
146 {
147 #ifdef __NetBSD__
148           pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
149               NULL, IPL_SOFTNET);
150           pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
151               NULL, IPL_SOFTNET);
152           pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
153               "pffrcache", NULL, IPL_SOFTNET);
154           pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
155               NULL, IPL_SOFTNET);
156           pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
157               "pfstscr", NULL, IPL_SOFTNET);
158 #else
159           pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
160               NULL);
161           pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
162               NULL);
163           pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
164               "pffrcache", NULL);
165           pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
166               NULL);
167           pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
168               "pfstscr", NULL);
169 #endif /* !__NetBSD__ */
170 
171           pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
172           pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
173           pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
174           pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
175 
176           TAILQ_INIT(&pf_fragqueue);
177           TAILQ_INIT(&pf_cachequeue);
178 }
179 
180 #ifdef _MODULE
181 void
pf_normalize_destroy(void)182 pf_normalize_destroy(void)
183 {
184           pool_destroy(&pf_state_scrub_pl);
185           pool_destroy(&pf_cent_pl);
186           pool_destroy(&pf_cache_pl);
187           pool_destroy(&pf_frag_pl);
188           pool_destroy(&pf_frent_pl);
189 }
190 #endif /* _MODULE */
191 
192 static __inline int
pf_frag_compare(struct pf_fragment * a,struct pf_fragment * b)193 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
194 {
195           int       diff;
196 
197           if ((diff = a->fr_id - b->fr_id))
198                     return (diff);
199           else if ((diff = a->fr_p - b->fr_p))
200                     return (diff);
201           else if (a->fr_src.s_addr < b->fr_src.s_addr)
202                     return (-1);
203           else if (a->fr_src.s_addr > b->fr_src.s_addr)
204                     return (1);
205           else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
206                     return (-1);
207           else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
208                     return (1);
209           return (0);
210 }
211 
212 void
pf_purge_expired_fragments(void)213 pf_purge_expired_fragments(void)
214 {
215           struct pf_fragment  *frag;
216           u_int32_t            expire = time_second -
217                                             pf_default_rule.timeout[PFTM_FRAG];
218 
219           while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
220                     KASSERT(BUFFER_FRAGMENTS(frag));
221                     if (frag->fr_timeout > expire)
222                               break;
223 
224                     DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
225                     pf_free_fragment(frag);
226           }
227 
228           while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
229                     KASSERT(!BUFFER_FRAGMENTS(frag));
230                     if (frag->fr_timeout > expire)
231                               break;
232 
233                     DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
234                     pf_free_fragment(frag);
235                     KASSERT(TAILQ_EMPTY(&pf_cachequeue) ||
236                         TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
237           }
238 }
239 
240 /*
241  * Try to flush old fragments to make space for new ones
242  */
243 
244 void
pf_flush_fragments(void)245 pf_flush_fragments(void)
246 {
247           struct pf_fragment  *frag;
248           int                            goal;
249 
250           goal = pf_nfrents * 9 / 10;
251           DPFPRINTF(("trying to free > %d frents\n",
252               pf_nfrents - goal));
253           while (goal < pf_nfrents) {
254                     frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
255                     if (frag == NULL)
256                               break;
257                     pf_free_fragment(frag);
258           }
259 
260 
261           goal = pf_ncache * 9 / 10;
262           DPFPRINTF(("trying to free > %d cache entries\n",
263               pf_ncache - goal));
264           while (goal < pf_ncache) {
265                     frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
266                     if (frag == NULL)
267                               break;
268                     pf_free_fragment(frag);
269           }
270 }
271 
272 /* Frees the fragments and all associated entries */
273 
274 void
pf_free_fragment(struct pf_fragment * frag)275 pf_free_fragment(struct pf_fragment *frag)
276 {
277           struct pf_frent               *frent;
278           struct pf_frcache   *frcache;
279 
280           /* Free all fragments */
281           if (BUFFER_FRAGMENTS(frag)) {
282                     for (frent = LIST_FIRST(&frag->fr_queue); frent;
283                         frent = LIST_FIRST(&frag->fr_queue)) {
284                               LIST_REMOVE(frent, fr_next);
285 
286                               m_freem(frent->fr_m);
287                               pool_put(&pf_frent_pl, frent);
288                               pf_nfrents--;
289                     }
290           } else {
291                     for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
292                         frcache = LIST_FIRST(&frag->fr_cache)) {
293                               LIST_REMOVE(frcache, fr_next);
294 
295                               KASSERT(LIST_EMPTY(&frag->fr_cache) ||
296                                   LIST_FIRST(&frag->fr_cache)->fr_off >
297                                   frcache->fr_end);
298 
299                               pool_put(&pf_cent_pl, frcache);
300                               pf_ncache--;
301                     }
302           }
303 
304           pf_remove_fragment(frag);
305 }
306 
307 void
pf_ip2key(struct pf_fragment * key,struct ip * ip)308 pf_ip2key(struct pf_fragment *key, struct ip *ip)
309 {
310           key->fr_p = ip->ip_p;
311           key->fr_id = ip->ip_id;
312           key->fr_src.s_addr = ip->ip_src.s_addr;
313           key->fr_dst.s_addr = ip->ip_dst.s_addr;
314 }
315 
316 struct pf_fragment *
pf_find_fragment(struct ip * ip,struct pf_frag_tree * tree)317 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
318 {
319           struct pf_fragment   key;
320           struct pf_fragment  *frag;
321 
322           pf_ip2key(&key, ip);
323 
324           frag = RB_FIND(pf_frag_tree, tree, &key);
325           if (frag != NULL) {
326                     /* XXX Are we sure we want to update the timeout? */
327                     frag->fr_timeout = time_second;
328                     if (BUFFER_FRAGMENTS(frag)) {
329                               TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
330                               TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
331                     } else {
332                               TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
333                               TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
334                     }
335           }
336 
337           return (frag);
338 }
339 
340 /* Removes a fragment from the fragment queue and frees the fragment */
341 
342 void
pf_remove_fragment(struct pf_fragment * frag)343 pf_remove_fragment(struct pf_fragment *frag)
344 {
345           if (BUFFER_FRAGMENTS(frag)) {
346                     RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
347                     TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
348                     pool_put(&pf_frag_pl, frag);
349           } else {
350                     RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
351                     TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
352                     pool_put(&pf_cache_pl, frag);
353           }
354 }
355 
356 #define FR_IP_OFF(fr)         ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
357 struct mbuf *
pf_reassemble(struct mbuf ** m0,struct pf_fragment ** frag,struct pf_frent * frent,int mff)358 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
359     struct pf_frent *frent, int mff)
360 {
361           struct mbuf         *m = *m0, *m2;
362           struct pf_frent     *frea, *next;
363           struct pf_frent     *frep = NULL;
364           struct ip *ip = frent->fr_ip;
365           int                  hlen = ip->ip_hl << 2;
366           u_int16_t  off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
367           u_int16_t  ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
368           u_int16_t  frmax = ip_len + off;
369 
370           KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag));
371 
372           /* Strip off ip header */
373           m->m_data += hlen;
374           m->m_len -= hlen;
375 
376           /* Create a new reassembly queue for this packet */
377           if (*frag == NULL) {
378                     *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
379                     if (*frag == NULL) {
380                               pf_flush_fragments();
381                               *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
382                               if (*frag == NULL)
383                                         goto drop_fragment;
384                     }
385 
386                     (*frag)->fr_flags = 0;
387                     (*frag)->fr_max = 0;
388                     (*frag)->fr_src = frent->fr_ip->ip_src;
389                     (*frag)->fr_dst = frent->fr_ip->ip_dst;
390                     (*frag)->fr_p = frent->fr_ip->ip_p;
391                     (*frag)->fr_id = frent->fr_ip->ip_id;
392                     (*frag)->fr_timeout = time_second;
393                     LIST_INIT(&(*frag)->fr_queue);
394 
395                     RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
396                     TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
397 
398                     /* We do not have a previous fragment */
399                     frep = NULL;
400                     goto insert;
401           }
402 
403           /*
404            * Find a fragment after the current one:
405            *  - off contains the real shifted offset.
406            */
407           LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
408                     if (FR_IP_OFF(frea) > off)
409                               break;
410                     frep = frea;
411           }
412 
413           KASSERT(frep != NULL || frea != NULL);
414 
415           if (frep != NULL &&
416               FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
417               4 > off)
418           {
419                     u_int16_t precut;
420 
421                     precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
422                         frep->fr_ip->ip_hl * 4 - off;
423                     if (precut >= ip_len)
424                               goto drop_fragment;
425                     m_adj(frent->fr_m, precut);
426                     DPFPRINTF(("overlap -%d\n", precut));
427                     /* Enforce 8 byte boundaries */
428                     ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
429                     off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
430                     ip_len -= precut;
431                     ip->ip_len = htons(ip_len);
432           }
433 
434           for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
435               frea = next)
436           {
437                     u_int16_t aftercut;
438 
439                     aftercut = ip_len + off - FR_IP_OFF(frea);
440                     DPFPRINTF(("adjust overlap %d\n", aftercut));
441                     if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
442                         * 4)
443                     {
444                               frea->fr_ip->ip_len =
445                                   htons(ntohs(frea->fr_ip->ip_len) - aftercut);
446                               frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
447                                   (aftercut >> 3));
448                               m_adj(frea->fr_m, aftercut);
449                               break;
450                     }
451 
452                     /* This fragment is completely overlapped, lose it */
453                     next = LIST_NEXT(frea, fr_next);
454                     m_freem(frea->fr_m);
455                     LIST_REMOVE(frea, fr_next);
456                     pool_put(&pf_frent_pl, frea);
457                     pf_nfrents--;
458           }
459 
460  insert:
461           /* Update maximum data size */
462           if ((*frag)->fr_max < frmax)
463                     (*frag)->fr_max = frmax;
464           /* This is the last segment */
465           if (!mff)
466                     (*frag)->fr_flags |= PFFRAG_SEENLAST;
467 
468           if (frep == NULL)
469                     LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
470           else
471                     LIST_INSERT_AFTER(frep, frent, fr_next);
472 
473           /* Check if we are completely reassembled */
474           if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
475                     return (NULL);
476 
477           /* Check if we have all the data */
478           off = 0;
479           for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
480                     next = LIST_NEXT(frep, fr_next);
481 
482                     off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
483                     if (off < (*frag)->fr_max &&
484                         (next == NULL || FR_IP_OFF(next) != off))
485                     {
486                               DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
487                                   off, next == NULL ? -1 : FR_IP_OFF(next),
488                                   (*frag)->fr_max));
489                               return (NULL);
490                     }
491           }
492           DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
493           if (off < (*frag)->fr_max)
494                     return (NULL);
495 
496           /* We have all the data */
497           frent = LIST_FIRST(&(*frag)->fr_queue);
498           KASSERT(frent != NULL);
499           if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
500                     DPFPRINTF(("drop: too big: %d\n", off));
501                     pf_free_fragment(*frag);
502                     *frag = NULL;
503                     return (NULL);
504           }
505           next = LIST_NEXT(frent, fr_next);
506 
507           /* Magic from ip_input */
508           ip = frent->fr_ip;
509           m = frent->fr_m;
510           m2 = m->m_next;
511           m->m_next = NULL;
512           m_cat(m, m2);
513           pool_put(&pf_frent_pl, frent);
514           pf_nfrents--;
515           for (frent = next; frent != NULL; frent = next) {
516                     next = LIST_NEXT(frent, fr_next);
517 
518                     m2 = frent->fr_m;
519                     pool_put(&pf_frent_pl, frent);
520                     pf_nfrents--;
521                     m_cat(m, m2);
522           }
523 
524           ip->ip_src = (*frag)->fr_src;
525           ip->ip_dst = (*frag)->fr_dst;
526 
527           /* Remove from fragment queue */
528           pf_remove_fragment(*frag);
529           *frag = NULL;
530 
531           hlen = ip->ip_hl << 2;
532           ip->ip_len = htons(off + hlen);
533           m->m_len += hlen;
534           m->m_data -= hlen;
535 
536           /* some debugging cruft by sklower, below, will go away soon */
537           /* XXX this should be done elsewhere */
538           if (m->m_flags & M_PKTHDR) {
539                     int plen = 0;
540                     for (m2 = m; m2; m2 = m2->m_next)
541                               plen += m2->m_len;
542                     m->m_pkthdr.len = plen;
543 #ifdef __NetBSD__
544                     m->m_pkthdr.csum_flags = 0;
545 #endif /* __NetBSD__ */
546           }
547 
548           DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
549           return (m);
550 
551  drop_fragment:
552           /* Oops - fail safe - drop packet */
553           pool_put(&pf_frent_pl, frent);
554           pf_nfrents--;
555           m_freem(m);
556           return (NULL);
557 }
558 
559 struct mbuf *
pf_fragcache(struct mbuf ** m0,struct ip * h,struct pf_fragment ** frag,int mff,int drop,int * nomem)560 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
561     int drop, int *nomem)
562 {
563           struct mbuf                   *m = *m0;
564           struct pf_frcache   *frp, *fra, *cur = NULL;
565           int                            ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
566           u_int16_t            off = ntohs(h->ip_off) << 3;
567           u_int16_t            frmax = ip_len + off;
568           int                            hosed = 0;
569 
570           KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
571 
572           /* Create a new range queue for this packet */
573           if (*frag == NULL) {
574                     *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
575                     if (*frag == NULL) {
576                               pf_flush_fragments();
577                               *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
578                               if (*frag == NULL)
579                                         goto no_mem;
580                     }
581 
582                     /* Get an entry for the queue */
583                     cur = pool_get(&pf_cent_pl, PR_NOWAIT);
584                     if (cur == NULL) {
585                               pool_put(&pf_cache_pl, *frag);
586                               *frag = NULL;
587                               goto no_mem;
588                     }
589                     pf_ncache++;
590 
591                     (*frag)->fr_flags = PFFRAG_NOBUFFER;
592                     (*frag)->fr_max = 0;
593                     (*frag)->fr_src = h->ip_src;
594                     (*frag)->fr_dst = h->ip_dst;
595                     (*frag)->fr_p = h->ip_p;
596                     (*frag)->fr_id = h->ip_id;
597                     (*frag)->fr_timeout = time_second;
598 
599                     cur->fr_off = off;
600                     cur->fr_end = frmax;
601                     LIST_INIT(&(*frag)->fr_cache);
602                     LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
603 
604                     RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
605                     TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
606 
607                     DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, frmax));
608 
609                     goto pass;
610           }
611 
612           /*
613            * Find a fragment after the current one:
614            *  - off contains the real shifted offset.
615            */
616           frp = NULL;
617           LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
618                     if (fra->fr_off > off)
619                               break;
620                     frp = fra;
621           }
622 
623           KASSERT(frp != NULL || fra != NULL);
624 
625           if (frp != NULL) {
626                     int       precut;
627 
628                     precut = frp->fr_end - off;
629                     if (precut >= ip_len) {
630                               /* Fragment is entirely a duplicate */
631                               DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
632                                   h->ip_id, frp->fr_off, frp->fr_end, off, frmax));
633                               goto drop_fragment;
634                     }
635                     if (precut == 0) {
636                               /* They are adjacent.  Fixup cache entry */
637                               DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
638                                   h->ip_id, frp->fr_off, frp->fr_end, off, frmax));
639                               frp->fr_end = frmax;
640                     } else if (precut > 0) {
641                               /* The first part of this payload overlaps with a
642                                * fragment that has already been passed.
643                                * Need to trim off the first part of the payload.
644                                * But to do so easily, we need to create another
645                                * mbuf to throw the original header into.
646                                */
647 
648                               DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
649                                   h->ip_id, precut, frp->fr_off, frp->fr_end, off,
650                                   frmax));
651 
652                               off += precut;
653                               frmax -= precut;
654                               /* Update the previous frag to encompass this one */
655                               frp->fr_end = frmax;
656 
657                               if (!drop) {
658                                         /* XXX Optimization opportunity
659                                          * This is a very heavy way to trim the payload.
660                                          * we could do it much faster by diddling mbuf
661                                          * internals but that would be even less legible
662                                          * than this mbuf magic.  For my next trick,
663                                          * I'll pull a rabbit out of my laptop.
664                                          */
665                                         *m0 = m_dup(m, 0, h->ip_hl << 2, M_NOWAIT);
666                                         if (*m0 == NULL)
667                                                   goto no_mem;
668                                         KASSERT((*m0)->m_next == NULL);
669                                         m_adj(m, precut + (h->ip_hl << 2));
670                                         m_cat(*m0, m);
671                                         m = *m0;
672                                         if (m->m_flags & M_PKTHDR) {
673                                                   int plen = 0;
674                                                   struct mbuf *t;
675                                                   for (t = m; t; t = t->m_next)
676                                                             plen += t->m_len;
677                                                   m->m_pkthdr.len = plen;
678                                         }
679 
680 
681                                         h = mtod(m, struct ip *);
682 
683 
684                                         KASSERT((int)m->m_len ==
685                                             ntohs(h->ip_len) - precut);
686                                         h->ip_off = htons(ntohs(h->ip_off) +
687                                             (precut >> 3));
688                                         h->ip_len = htons(ntohs(h->ip_len) - precut);
689                               } else {
690                                         hosed++;
691                               }
692                     } else {
693                               /* There is a gap between fragments */
694 
695                               DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
696                                   h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
697                                   frmax));
698 
699                               cur = pool_get(&pf_cent_pl, PR_NOWAIT);
700                               if (cur == NULL)
701                                         goto no_mem;
702                               pf_ncache++;
703 
704                               cur->fr_off = off;
705                               cur->fr_end = frmax;
706                               LIST_INSERT_AFTER(frp, cur, fr_next);
707                     }
708           }
709 
710           if (fra != NULL) {
711                     int       aftercut;
712                     int       merge = 0;
713 
714                     aftercut = frmax - fra->fr_off;
715                     if (aftercut == 0) {
716                               /* Adjacent fragments */
717                               DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
718                                   h->ip_id, off, frmax, fra->fr_off, fra->fr_end));
719                               fra->fr_off = off;
720                               merge = 1;
721                     } else if (aftercut > 0) {
722                               /* Need to chop off the tail of this fragment */
723                               DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
724                                   h->ip_id, aftercut, off, frmax, fra->fr_off,
725                                   fra->fr_end));
726                               fra->fr_off = off;
727                               frmax -= aftercut;
728 
729                               merge = 1;
730 
731                               if (!drop) {
732                                         m_adj(m, -aftercut);
733                                         if (m->m_flags & M_PKTHDR) {
734                                                   int plen = 0;
735                                                   struct mbuf *t;
736                                                   for (t = m; t; t = t->m_next)
737                                                             plen += t->m_len;
738                                                   m->m_pkthdr.len = plen;
739                                         }
740                                         h = mtod(m, struct ip *);
741                                         KASSERT((int)m->m_len ==
742                                             ntohs(h->ip_len) - aftercut);
743                                         h->ip_len = htons(ntohs(h->ip_len) - aftercut);
744                               } else {
745                                         hosed++;
746                               }
747                     } else if (frp == NULL) {
748                               /* There is a gap between fragments */
749                               DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
750                                   h->ip_id, -aftercut, off, frmax, fra->fr_off,
751                                   fra->fr_end));
752 
753                               cur = pool_get(&pf_cent_pl, PR_NOWAIT);
754                               if (cur == NULL)
755                                         goto no_mem;
756                               pf_ncache++;
757 
758                               cur->fr_off = off;
759                               cur->fr_end = frmax;
760                               LIST_INSERT_BEFORE(fra, cur, fr_next);
761                     }
762 
763 
764                     /* Need to glue together two separate fragment descriptors */
765                     if (merge) {
766                               if (cur && fra->fr_off <= cur->fr_end) {
767                                         /* Need to merge in a previous 'cur' */
768                                         DPFPRINTF(("fragcache[%d]: adjacent(merge "
769                                             "%d-%d) %d-%d (%d-%d)\n",
770                                             h->ip_id, cur->fr_off, cur->fr_end, off,
771                                             frmax, fra->fr_off, fra->fr_end));
772                                         fra->fr_off = cur->fr_off;
773                                         LIST_REMOVE(cur, fr_next);
774                                         pool_put(&pf_cent_pl, cur);
775                                         pf_ncache--;
776                                         cur = NULL;
777 
778                               } else if (frp && fra->fr_off <= frp->fr_end) {
779                                         /* Need to merge in a modified 'frp' */
780                                         KASSERT(cur == NULL);
781                                         DPFPRINTF(("fragcache[%d]: adjacent(merge "
782                                             "%d-%d) %d-%d (%d-%d)\n",
783                                             h->ip_id, frp->fr_off, frp->fr_end, off,
784                                             frmax, fra->fr_off, fra->fr_end));
785                                         fra->fr_off = frp->fr_off;
786                                         LIST_REMOVE(frp, fr_next);
787                                         pool_put(&pf_cent_pl, frp);
788                                         pf_ncache--;
789                                         frp = NULL;
790 
791                               }
792                     }
793           }
794 
795           if (hosed) {
796                     /*
797                      * We must keep tracking the overall fragment even when
798                      * we're going to drop it anyway so that we know when to
799                      * free the overall descriptor.  Thus we drop the frag late.
800                      */
801                     goto drop_fragment;
802           }
803 
804 
805  pass:
806           /* Update maximum data size */
807           if ((*frag)->fr_max < frmax)
808                     (*frag)->fr_max = frmax;
809 
810           /* This is the last segment */
811           if (!mff)
812                     (*frag)->fr_flags |= PFFRAG_SEENLAST;
813 
814           /* Check if we are completely reassembled */
815           if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
816               LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
817               LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
818                     /* Remove from fragment queue */
819                     DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
820                         (*frag)->fr_max));
821                     pf_free_fragment(*frag);
822                     *frag = NULL;
823           }
824 
825           return (m);
826 
827  no_mem:
828           *nomem = 1;
829 
830           /* Still need to pay attention to !IP_MF */
831           if (!mff && *frag != NULL)
832                     (*frag)->fr_flags |= PFFRAG_SEENLAST;
833 
834           m_freem(m);
835           return (NULL);
836 
837  drop_fragment:
838 
839           /* Still need to pay attention to !IP_MF */
840           if (!mff && *frag != NULL)
841                     (*frag)->fr_flags |= PFFRAG_SEENLAST;
842 
843           if (drop) {
844                     /* This fragment has been deemed bad.  Don't reass */
845                     if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
846                               DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
847                                   h->ip_id));
848                     (*frag)->fr_flags |= PFFRAG_DROP;
849           }
850 
851           m_freem(m);
852           return (NULL);
853 }
854 
855 int
pf_normalize_ip(struct mbuf ** m0,int dir,struct pfi_kif * kif,u_short * reason,struct pf_pdesc * pd)856 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
857     struct pf_pdesc *pd)
858 {
859           struct mbuf                   *m = *m0;
860           struct pf_rule                *r;
861           struct pf_frent               *frent;
862           struct pf_fragment  *frag = NULL;
863           struct ip           *h = mtod(m, struct ip *);
864           int                            mff = (ntohs(h->ip_off) & IP_MF);
865           int                            hlen = h->ip_hl << 2;
866           u_int16_t            fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
867           u_int16_t            frmax;
868           int                            ip_len;
869 
870           r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
871           while (r != NULL) {
872                     r->evaluations++;
873                     if (pfi_kif_match(r->kif, kif) == r->ifnot)
874                               r = r->skip[PF_SKIP_IFP].ptr;
875                     else if (r->direction && r->direction != dir)
876                               r = r->skip[PF_SKIP_DIR].ptr;
877                     else if (r->af && r->af != AF_INET)
878                               r = r->skip[PF_SKIP_AF].ptr;
879                     else if (r->proto && r->proto != h->ip_p)
880                               r = r->skip[PF_SKIP_PROTO].ptr;
881                     else if (PF_MISMATCHAW(&r->src.addr,
882                         (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
883                         r->src.neg, kif))
884                               r = r->skip[PF_SKIP_SRC_ADDR].ptr;
885                     else if (PF_MISMATCHAW(&r->dst.addr,
886                         (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
887                         r->dst.neg, NULL))
888                               r = r->skip[PF_SKIP_DST_ADDR].ptr;
889                     else
890                               break;
891           }
892 
893           if (r == NULL || r->action == PF_NOSCRUB)
894                     return (PF_PASS);
895           else {
896                     r->packets[dir == PF_OUT]++;
897                     r->bytes[dir == PF_OUT] += pd->tot_len;
898           }
899 
900           /* Check for illegal packets */
901           if (hlen < (int)sizeof(struct ip))
902                     goto drop;
903 
904           if (hlen > ntohs(h->ip_len))
905                     goto drop;
906 
907           /* Clear IP_DF if the rule uses the no-df option */
908           if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
909                     u_int16_t off = h->ip_off;
910 
911                     h->ip_off &= htons(~IP_DF);
912                     h->ip_sum = pf_cksum_fixup(h->ip_sum, off, h->ip_off, 0);
913           }
914 
915           /* We will need other tests here */
916           if (!fragoff && !mff)
917                     goto no_fragment;
918 
919           /* We're dealing with a fragment now. Don't allow fragments
920            * with IP_DF to enter the cache. If the flag was cleared by
921            * no-df above, fine. Otherwise drop it.
922            */
923           if (h->ip_off & htons(IP_DF)) {
924                     DPFPRINTF(("IP_DF\n"));
925                     goto bad;
926           }
927 
928           ip_len = ntohs(h->ip_len) - hlen;
929 
930           /* All fragments are 8 byte aligned */
931           if (mff && (ip_len & 0x7)) {
932                     DPFPRINTF(("mff and %d\n", ip_len));
933                     goto bad;
934           }
935 
936           /* Respect maximum length */
937           if (fragoff + ip_len > IP_MAXPACKET) {
938                     DPFPRINTF(("max packet %d\n", fragoff + ip_len));
939                     goto bad;
940           }
941           frmax = fragoff + ip_len;
942 
943           if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
944                     /* Fully buffer all of the fragments */
945 
946                     frag = pf_find_fragment(h, &pf_frag_tree);
947 
948                     /* Check if we saw the last fragment already */
949                     if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
950                         frmax > frag->fr_max)
951                               goto bad;
952 
953                     /* Get an entry for the fragment queue */
954                     frent = pool_get(&pf_frent_pl, PR_NOWAIT);
955                     if (frent == NULL) {
956                               REASON_SET(reason, PFRES_MEMORY);
957                               return (PF_DROP);
958                     }
959                     pf_nfrents++;
960                     frent->fr_ip = h;
961                     frent->fr_m = m;
962 
963                     /* Might return a completely reassembled mbuf, or NULL */
964                     DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, frmax));
965                     *m0 = m = pf_reassemble(m0, &frag, frent, mff);
966 
967                     if (m == NULL)
968                               return (PF_DROP);
969 
970                     if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
971                               goto drop;
972 
973                     h = mtod(m, struct ip *);
974           } else {
975                     /* non-buffering fragment cache (drops or masks overlaps) */
976                     int       nomem = 0;
977 
978 #ifdef __NetBSD__
979                     struct pf_mtag *pf_mtag = pf_find_mtag(m);
980                     KASSERT(pf_mtag != NULL);
981 
982                     if (dir == PF_OUT && pf_mtag->flags & PF_TAG_FRAGCACHE) {
983 #else
984                     if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) {
985 #endif /* !__NetBSD__ */
986                               /*
987                                * Already passed the fragment cache in the
988                                * input direction.  If we continued, it would
989                                * appear to be a dup and would be dropped.
990                                */
991                               goto fragment_pass;
992                     }
993 
994                     frag = pf_find_fragment(h, &pf_cache_tree);
995 
996                     /* Check if we saw the last fragment already */
997                     if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
998                         frmax > frag->fr_max) {
999                               if (r->rule_flag & PFRULE_FRAGDROP)
1000                                         frag->fr_flags |= PFFRAG_DROP;
1001                               goto bad;
1002                     }
1003 
1004                     *m0 = m = pf_fragcache(m0, h, &frag, mff,
1005                         (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
1006                     if (m == NULL) {
1007                               if (nomem)
1008                                         goto no_mem;
1009                               goto drop;
1010                     }
1011 
1012                     if (dir == PF_IN)
1013 #ifdef __NetBSD__
1014                     {
1015                               pf_mtag = pf_find_mtag(m);
1016                               KASSERT(pf_mtag != NULL);
1017 
1018                               pf_mtag->flags |= PF_TAG_FRAGCACHE;
1019                     }
1020 #else
1021                               m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE;
1022 #endif /* !__NetBSD__ */
1023 
1024                     if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1025                               goto drop;
1026                     goto fragment_pass;
1027           }
1028 
1029  no_fragment:
1030           /* At this point, only IP_DF is allowed in ip_off */
1031           if (h->ip_off & ~htons(IP_DF)) {
1032                     u_int16_t off = h->ip_off;
1033 
1034                     h->ip_off &= htons(IP_DF);
1035                     h->ip_sum = pf_cksum_fixup(h->ip_sum, off, h->ip_off, 0);
1036           }
1037 
1038           /* Enforce a minimum ttl, may cause endless packet loops */
1039           if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1040                     u_int16_t ip_ttl = h->ip_ttl;
1041 
1042                     h->ip_ttl = r->min_ttl;
1043                     h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1044           }
1045 
1046           if (r->rule_flag & PFRULE_RANDOMID) {
1047                     u_int16_t id = h->ip_id;
1048 
1049                     h->ip_id = ip_randomid();
1050                     h->ip_sum = pf_cksum_fixup(h->ip_sum, id, h->ip_id, 0);
1051           }
1052           if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1053                     pd->flags |= PFDESC_IP_REAS;
1054 
1055           return (PF_PASS);
1056 
1057  fragment_pass:
1058           /* Enforce a minimum ttl, may cause endless packet loops */
1059           if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1060                     u_int16_t ip_ttl = h->ip_ttl;
1061 
1062                     h->ip_ttl = r->min_ttl;
1063                     h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1064           }
1065           if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1066                     pd->flags |= PFDESC_IP_REAS;
1067           return (PF_PASS);
1068 
1069  no_mem:
1070           REASON_SET(reason, PFRES_MEMORY);
1071           if (r != NULL && r->log)
1072                     PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1073           return (PF_DROP);
1074 
1075  drop:
1076           REASON_SET(reason, PFRES_NORM);
1077           if (r != NULL && r->log)
1078                     PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1079           return (PF_DROP);
1080 
1081  bad:
1082           DPFPRINTF(("dropping bad fragment\n"));
1083 
1084           /* Free associated fragments */
1085           if (frag != NULL)
1086                     pf_free_fragment(frag);
1087 
1088           REASON_SET(reason, PFRES_FRAG);
1089           if (r != NULL && r->log)
1090                     PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1091 
1092           return (PF_DROP);
1093 }
1094 
1095 #ifdef INET6
1096 int
1097 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1098     u_short *reason, struct pf_pdesc *pd)
1099 {
1100           struct mbuf                   *m = *m0;
1101           struct pf_rule                *r;
1102           struct ip6_hdr                *h = mtod(m, struct ip6_hdr *);
1103           int                            off;
1104           struct ip6_ext                 ext;
1105           struct ip6_opt                 opt;
1106           struct ip6_opt_jumbo           jumbo;
1107           struct ip6_frag                frag;
1108           u_int32_t            jumbolen = 0, plen;
1109           u_int16_t            fragoff = 0;
1110           int                            optend;
1111           int                            ooff;
1112           u_int8_t             proto;
1113           int                            terminal;
1114 
1115           r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1116           while (r != NULL) {
1117                     r->evaluations++;
1118                     if (pfi_kif_match(r->kif, kif) == r->ifnot)
1119                               r = r->skip[PF_SKIP_IFP].ptr;
1120                     else if (r->direction && r->direction != dir)
1121                               r = r->skip[PF_SKIP_DIR].ptr;
1122                     else if (r->af && r->af != AF_INET6)
1123                               r = r->skip[PF_SKIP_AF].ptr;
1124 #if 0 /* header chain! */
1125                     else if (r->proto && r->proto != h->ip6_nxt)
1126                               r = r->skip[PF_SKIP_PROTO].ptr;
1127 #endif
1128                     else if (PF_MISMATCHAW(&r->src.addr,
1129                         (struct pf_addr *)&h->ip6_src, AF_INET6,
1130                         r->src.neg, kif))
1131                               r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1132                     else if (PF_MISMATCHAW(&r->dst.addr,
1133                         (struct pf_addr *)&h->ip6_dst, AF_INET6,
1134                         r->dst.neg, NULL))
1135                               r = r->skip[PF_SKIP_DST_ADDR].ptr;
1136                     else
1137                               break;
1138           }
1139 
1140           if (r == NULL || r->action == PF_NOSCRUB)
1141                     return (PF_PASS);
1142           else {
1143                     r->packets[dir == PF_OUT]++;
1144                     r->bytes[dir == PF_OUT] += pd->tot_len;
1145           }
1146 
1147           /* Check for illegal packets */
1148           if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
1149                     goto drop;
1150 
1151           off = sizeof(struct ip6_hdr);
1152           proto = h->ip6_nxt;
1153           terminal = 0;
1154           do {
1155                     switch (proto) {
1156                     case IPPROTO_FRAGMENT:
1157                               goto fragment;
1158                               break;
1159                     case IPPROTO_AH:
1160                     case IPPROTO_ROUTING:
1161                     case IPPROTO_DSTOPTS:
1162                               if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1163                                   NULL, AF_INET6))
1164                                         goto shortpkt;
1165                               if (proto == IPPROTO_AH)
1166                                         off += (ext.ip6e_len + 2) * 4;
1167                               else
1168                                         off += (ext.ip6e_len + 1) * 8;
1169                               proto = ext.ip6e_nxt;
1170                               break;
1171                     case IPPROTO_HOPOPTS:
1172                               if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1173                                   NULL, AF_INET6))
1174                                         goto shortpkt;
1175                               optend = off + (ext.ip6e_len + 1) * 8;
1176                               ooff = off + sizeof(ext);
1177                               do {
1178                                         if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1179                                             sizeof(opt.ip6o_type), NULL, NULL,
1180                                             AF_INET6))
1181                                                   goto shortpkt;
1182                                         if (opt.ip6o_type == IP6OPT_PAD1) {
1183                                                   ooff++;
1184                                                   continue;
1185                                         }
1186                                         if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
1187                                             NULL, NULL, AF_INET6))
1188                                                   goto shortpkt;
1189                                         if (ooff + sizeof(opt) + opt.ip6o_len > optend)
1190                                                   goto drop;
1191                                         switch (opt.ip6o_type) {
1192                                         case IP6OPT_JUMBO:
1193                                                   if (h->ip6_plen != 0)
1194                                                             goto drop;
1195                                                   if (!pf_pull_hdr(m, ooff, &jumbo,
1196                                                       sizeof(jumbo), NULL, NULL,
1197                                                       AF_INET6))
1198                                                             goto shortpkt;
1199                                                   memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1200                                                       sizeof(jumbolen));
1201                                                   jumbolen = ntohl(jumbolen);
1202                                                   if (jumbolen <= IPV6_MAXPACKET)
1203                                                             goto drop;
1204                                                   if (sizeof(struct ip6_hdr) + jumbolen !=
1205                                                       m->m_pkthdr.len)
1206                                                             goto drop;
1207                                                   break;
1208                                         default:
1209                                                   break;
1210                                         }
1211                                         ooff += sizeof(opt) + opt.ip6o_len;
1212                               } while (ooff < optend);
1213 
1214                               off = optend;
1215                               proto = ext.ip6e_nxt;
1216                               break;
1217                     default:
1218                               terminal = 1;
1219                               break;
1220                     }
1221           } while (!terminal);
1222 
1223           /* jumbo payload option must be present, or plen > 0 */
1224           if (ntohs(h->ip6_plen) == 0)
1225                     plen = jumbolen;
1226           else
1227                     plen = ntohs(h->ip6_plen);
1228           if (plen == 0)
1229                     goto drop;
1230           if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
1231                     goto shortpkt;
1232 
1233           /* Enforce a minimum ttl, may cause endless packet loops */
1234           if (r->min_ttl && h->ip6_hlim < r->min_ttl)
1235                     h->ip6_hlim = r->min_ttl;
1236 
1237           return (PF_PASS);
1238 
1239  fragment:
1240           if (ntohs(h->ip6_plen) == 0 || jumbolen)
1241                     goto drop;
1242           plen = ntohs(h->ip6_plen);
1243 
1244           if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
1245                     goto shortpkt;
1246           fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
1247           if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
1248                     goto badfrag;
1249 
1250           /* do something about it */
1251           /* remember to set pd->flags |= PFDESC_IP_REAS */
1252           return (PF_PASS);
1253 
1254  shortpkt:
1255           REASON_SET(reason, PFRES_SHORT);
1256           if (r != NULL && r->log)
1257                     PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1258           return (PF_DROP);
1259 
1260  drop:
1261           REASON_SET(reason, PFRES_NORM);
1262           if (r != NULL && r->log)
1263                     PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1264           return (PF_DROP);
1265 
1266  badfrag:
1267           REASON_SET(reason, PFRES_FRAG);
1268           if (r != NULL && r->log)
1269                     PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1270           return (PF_DROP);
1271 }
1272 #endif /* INET6 */
1273 
1274 int
1275 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m,
1276     int ipoff, int off, void *h, struct pf_pdesc *pd)
1277 {
1278           struct pf_rule      *r, *rm = NULL;
1279           struct tcphdr       *th = pd->hdr.tcp;
1280           int                  rewrite = 0;
1281           u_short              reason;
1282           u_int8_t   flags;
1283           sa_family_t          af = pd->af;
1284 
1285           r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1286           while (r != NULL) {
1287                     r->evaluations++;
1288                     if (pfi_kif_match(r->kif, kif) == r->ifnot)
1289                               r = r->skip[PF_SKIP_IFP].ptr;
1290                     else if (r->direction && r->direction != dir)
1291                               r = r->skip[PF_SKIP_DIR].ptr;
1292                     else if (r->af && r->af != af)
1293                               r = r->skip[PF_SKIP_AF].ptr;
1294                     else if (r->proto && r->proto != pd->proto)
1295                               r = r->skip[PF_SKIP_PROTO].ptr;
1296                     else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
1297                         r->src.neg, kif))
1298                               r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1299                     else if (r->src.port_op && !pf_match_port(r->src.port_op,
1300                                   r->src.port[0], r->src.port[1], th->th_sport))
1301                               r = r->skip[PF_SKIP_SRC_PORT].ptr;
1302                     else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
1303                         r->dst.neg, NULL))
1304                               r = r->skip[PF_SKIP_DST_ADDR].ptr;
1305                     else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
1306                                   r->dst.port[0], r->dst.port[1], th->th_dport))
1307                               r = r->skip[PF_SKIP_DST_PORT].ptr;
1308                     else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
1309                                   pf_osfp_fingerprint(pd, m, off, th),
1310                                   r->os_fingerprint))
1311                               r = TAILQ_NEXT(r, entries);
1312                     else {
1313                               rm = r;
1314                               break;
1315                     }
1316           }
1317 
1318           if (rm == NULL || rm->action == PF_NOSCRUB)
1319                     return (PF_PASS);
1320           else {
1321                     r->packets[dir == PF_OUT]++;
1322                     r->bytes[dir == PF_OUT] += pd->tot_len;
1323           }
1324 
1325           if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
1326                     pd->flags |= PFDESC_TCP_NORM;
1327 
1328           flags = th->th_flags;
1329           if (flags & TH_SYN) {
1330                     /* Illegal packet */
1331                     if (flags & TH_RST)
1332                               goto tcp_drop;
1333 
1334                     if (flags & TH_FIN)
1335                               flags &= ~TH_FIN;
1336           } else {
1337                     /* Illegal packet */
1338                     if (!(flags & (TH_ACK|TH_RST)))
1339                               goto tcp_drop;
1340           }
1341 
1342           if (!(flags & TH_ACK)) {
1343                     /* These flags are only valid if ACK is set */
1344                     if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
1345                               goto tcp_drop;
1346           }
1347 
1348           /* Check for illegal header length */
1349           if (th->th_off < (sizeof(struct tcphdr) >> 2))
1350                     goto tcp_drop;
1351 
1352           /* If flags changed, or reserved data set, then adjust */
1353           if (flags != th->th_flags || th->th_x2 != 0) {
1354                     u_int16_t ov, nv;
1355 
1356                     ov = *(u_int16_t *)(&th->th_ack + 1);
1357                     th->th_flags = flags;
1358                     th->th_x2 = 0;
1359                     nv = *(u_int16_t *)(&th->th_ack + 1);
1360 
1361                     th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
1362                     rewrite = 1;
1363           }
1364 
1365           /* Remove urgent pointer, if TH_URG is not set */
1366           if (!(flags & TH_URG) && th->th_urp) {
1367                     th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
1368                     th->th_urp = 0;
1369                     rewrite = 1;
1370           }
1371 
1372           /* Process options */
1373           if (r->max_mss && pf_normalize_tcpopt(r, m, th, off))
1374                     rewrite = 1;
1375 
1376           /* copy back packet headers if we sanitized */
1377           if (rewrite)
1378                     m_copyback(m, off, sizeof(*th), th);
1379 
1380           return (PF_PASS);
1381 
1382  tcp_drop:
1383           REASON_SET_NOPTR(&reason, PFRES_NORM);
1384           if (rm != NULL && r->log)
1385                     PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
1386           return (PF_DROP);
1387 }
1388 
1389 int
1390 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
1391     struct tcphdr *th, struct pf_state_peer *src,
1392     struct pf_state_peer *dst)
1393 {
1394           u_int32_t tsval, tsecr;
1395           u_int8_t hdr[60];
1396           u_int8_t *opt;
1397 
1398           KASSERT(src->scrub == NULL);
1399 
1400           src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
1401           if (src->scrub == NULL)
1402                     return (1);
1403           bzero(src->scrub, sizeof(*src->scrub));
1404 
1405           switch (pd->af) {
1406 #ifdef INET
1407           case AF_INET: {
1408                     struct ip *h = mtod(m, struct ip *);
1409                     src->scrub->pfss_ttl = h->ip_ttl;
1410                     break;
1411           }
1412 #endif /* INET */
1413 #ifdef INET6
1414           case AF_INET6: {
1415                     struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1416                     src->scrub->pfss_ttl = h->ip6_hlim;
1417                     break;
1418           }
1419 #endif /* INET6 */
1420           }
1421 
1422 
1423           /*
1424            * All normalizations below are only begun if we see the start of
1425            * the connections.  They must all set an enabled bit in pfss_flags
1426            */
1427           if ((th->th_flags & TH_SYN) == 0)
1428                     return (0);
1429 
1430 
1431           if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
1432               pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1433                     /* Diddle with TCP options */
1434                     int hlen;
1435                     opt = hdr + sizeof(struct tcphdr);
1436                     hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1437                     while (hlen >= TCPOLEN_TIMESTAMP) {
1438                               switch (*opt) {
1439                               case TCPOPT_EOL:    /* FALLTHROUGH */
1440                               case TCPOPT_NOP:
1441                                         opt++;
1442                                         hlen--;
1443                                         break;
1444                               case TCPOPT_TIMESTAMP:
1445                                         if (opt[1] >= TCPOLEN_TIMESTAMP) {
1446                                                   src->scrub->pfss_flags |=
1447                                                       PFSS_TIMESTAMP;
1448                                                   src->scrub->pfss_ts_mod =
1449                                                       htonl(cprng_fast32());
1450 
1451                                                   /* note PFSS_PAWS not set yet */
1452                                                   memcpy(&tsval, &opt[2],
1453                                                       sizeof(u_int32_t));
1454                                                   memcpy(&tsecr, &opt[6],
1455                                                       sizeof(u_int32_t));
1456                                                   src->scrub->pfss_tsval0 = ntohl(tsval);
1457                                                   src->scrub->pfss_tsval = ntohl(tsval);
1458                                                   src->scrub->pfss_tsecr = ntohl(tsecr);
1459                                                   getmicrouptime(&src->scrub->pfss_last);
1460                                         }
1461                                         /* FALLTHROUGH */
1462                               default:
1463                                         hlen -= MAX(opt[1], 2);
1464                                         opt += MAX(opt[1], 2);
1465                                         break;
1466                               }
1467                     }
1468           }
1469 
1470           return (0);
1471 }
1472 
1473 void
1474 pf_normalize_tcp_cleanup(struct pf_state *state)
1475 {
1476           if (state->src.scrub)
1477                     pool_put(&pf_state_scrub_pl, state->src.scrub);
1478           if (state->dst.scrub)
1479                     pool_put(&pf_state_scrub_pl, state->dst.scrub);
1480 
1481           /* Someday... flush the TCP segment reassembly descriptors. */
1482 }
1483 
1484 int
1485 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
1486     u_short *reason, struct tcphdr *th, struct pf_state *state,
1487     struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
1488 {
1489           struct timeval uptime;
1490           u_int32_t tsval = 0, tsecr = 0;
1491           u_int tsval_from_last;
1492           u_int8_t hdr[60];
1493           u_int8_t *opt;
1494           int copyback = 0;
1495           int got_ts = 0;
1496 
1497           KASSERT(src->scrub || dst->scrub);
1498 
1499           /*
1500            * Enforce the minimum TTL seen for this connection.  Negate a common
1501            * technique to evade an intrusion detection system and confuse
1502            * firewall state code.
1503            */
1504           switch (pd->af) {
1505 #ifdef INET
1506           case AF_INET: {
1507                     if (src->scrub) {
1508                               struct ip *h = mtod(m, struct ip *);
1509                               if (h->ip_ttl > src->scrub->pfss_ttl)
1510                                         src->scrub->pfss_ttl = h->ip_ttl;
1511                               h->ip_ttl = src->scrub->pfss_ttl;
1512                     }
1513                     break;
1514           }
1515 #endif /* INET */
1516 #ifdef INET6
1517           case AF_INET6: {
1518                     if (src->scrub) {
1519                               struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1520                               if (h->ip6_hlim > src->scrub->pfss_ttl)
1521                                         src->scrub->pfss_ttl = h->ip6_hlim;
1522                               h->ip6_hlim = src->scrub->pfss_ttl;
1523                     }
1524                     break;
1525           }
1526 #endif /* INET6 */
1527           }
1528 
1529           if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
1530               ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
1531               (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
1532               pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1533                     /* Diddle with TCP options */
1534                     int hlen;
1535                     opt = hdr + sizeof(struct tcphdr);
1536                     hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1537                     while (hlen >= TCPOLEN_TIMESTAMP) {
1538                               switch (*opt) {
1539                               case TCPOPT_EOL:    /* FALLTHROUGH */
1540                               case TCPOPT_NOP:
1541                                         opt++;
1542                                         hlen--;
1543                                         break;
1544                               case TCPOPT_TIMESTAMP:
1545                                         /* Modulate the timestamps.  Can be used for
1546                                          * NAT detection, OS uptime determination or
1547                                          * reboot detection.
1548                                          */
1549 
1550                                         if (got_ts) {
1551                                                   /* Huh?  Multiple timestamps!? */
1552                                                   if (pf_status.debug >= PF_DEBUG_MISC) {
1553                                                             DPFPRINTF(("multiple TS??"));
1554                                                             pf_print_state(state);
1555                                                             printf("\n");
1556                                                   }
1557                                                   REASON_SET(reason, PFRES_TS);
1558                                                   return (PF_DROP);
1559                                         }
1560                                         if (opt[1] >= TCPOLEN_TIMESTAMP) {
1561                                                   memcpy(&tsval, &opt[2],
1562                                                       sizeof(u_int32_t));
1563                                                   if (tsval && src->scrub &&
1564                                                       (src->scrub->pfss_flags &
1565                                                       PFSS_TIMESTAMP)) {
1566                                                             tsval = ntohl(tsval);
1567                                                             pf_change_a(&opt[2],
1568                                                                 &th->th_sum,
1569                                                                 htonl(tsval +
1570                                                                 src->scrub->pfss_ts_mod),
1571                                                                 0);
1572                                                             copyback = 1;
1573                                                   }
1574 
1575                                                   /* Modulate TS reply iff valid (!0) */
1576                                                   memcpy(&tsecr, &opt[6],
1577                                                       sizeof(u_int32_t));
1578                                                   if (tsecr && dst->scrub &&
1579                                                       (dst->scrub->pfss_flags &
1580                                                       PFSS_TIMESTAMP)) {
1581                                                             tsecr = ntohl(tsecr)
1582                                                                 - dst->scrub->pfss_ts_mod;
1583                                                             pf_change_a(&opt[6],
1584                                                                 &th->th_sum, htonl(tsecr),
1585                                                                 0);
1586                                                             copyback = 1;
1587                                                   }
1588                                                   got_ts = 1;
1589                                         }
1590                                         /* FALLTHROUGH */
1591                               default:
1592                                         hlen -= MAX(opt[1], 2);
1593                                         opt += MAX(opt[1], 2);
1594                                         break;
1595                               }
1596                     }
1597                     if (copyback) {
1598                               /* Copyback the options, caller copys back header */
1599                               *writeback = 1;
1600                               m_copyback(m, off + sizeof(struct tcphdr),
1601                                   (th->th_off << 2) - sizeof(struct tcphdr), hdr +
1602                                   sizeof(struct tcphdr));
1603                     }
1604           }
1605 
1606 
1607           /*
1608            * Must invalidate PAWS checks on connections idle for too long.
1609            * The fastest allowed timestamp clock is 1ms.  That turns out to
1610            * be about 24 days before it wraps.  XXX Right now our lowerbound
1611            * TS echo check only works for the first 12 days of a connection
1612            * when the TS has exhausted half its 32bit space
1613            */
1614 #define TS_MAX_IDLE (24*24*60*60)
1615 #define TS_MAX_CONN (12*24*60*60)       /* XXX remove when better tsecr check */
1616 
1617           getmicrouptime(&uptime);
1618           if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
1619               (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
1620               time_second - state->creation > TS_MAX_CONN))  {
1621                     if (pf_status.debug >= PF_DEBUG_MISC) {
1622                               DPFPRINTF(("src idled out of PAWS\n"));
1623                               pf_print_state(state);
1624                               printf("\n");
1625                     }
1626                     src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
1627                         | PFSS_PAWS_IDLED;
1628           }
1629           if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
1630               uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
1631                     if (pf_status.debug >= PF_DEBUG_MISC) {
1632                               DPFPRINTF(("dst idled out of PAWS\n"));
1633                               pf_print_state(state);
1634                               printf("\n");
1635                     }
1636                     dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
1637                         | PFSS_PAWS_IDLED;
1638           }
1639 
1640           if (got_ts && src->scrub && dst->scrub &&
1641               (src->scrub->pfss_flags & PFSS_PAWS) &&
1642               (dst->scrub->pfss_flags & PFSS_PAWS)) {
1643                     /* Validate that the timestamps are "in-window".
1644                      * RFC1323 describes TCP Timestamp options that allow
1645                      * measurement of RTT (round trip time) and PAWS
1646                      * (protection against wrapped sequence numbers).  PAWS
1647                      * gives us a set of rules for rejecting packets on
1648                      * long fat pipes (packets that were somehow delayed
1649                      * in transit longer than the time it took to send the
1650                      * full TCP sequence space of 4Gb).  We can use these
1651                      * rules and infer a few others that will let us treat
1652                      * the 32bit timestamp and the 32bit echoed timestamp
1653                      * as sequence numbers to prevent a blind attacker from
1654                      * inserting packets into a connection.
1655                      *
1656                      * RFC1323 tells us:
1657                      *  - The timestamp on this packet must be greater than
1658                      *    or equal to the last value echoed by the other
1659                      *    endpoint.  The RFC says those will be discarded
1660                      *    since it is a dup that has already been acked.
1661                      *    This gives us a lowerbound on the timestamp.
1662                      *        timestamp >= other last echoed timestamp
1663                      *  - The timestamp will be less than or equal to
1664                      *    the last timestamp plus the time between the
1665                      *    last packet and now.  The RFC defines the max
1666                      *    clock rate as 1ms.  We will allow clocks to be
1667                      *    up to 10% fast and will allow a total difference
1668                      *    or 30 seconds due to a route change.  And this
1669                      *    gives us an upperbound on the timestamp.
1670                      *        timestamp <= last timestamp + max ticks
1671                      *    We have to be careful here.  Windows will send an
1672                      *    initial timestamp of zero and then initialize it
1673                      *    to a random value after the 3whs; presumably to
1674                      *    avoid a DoS by having to call an expensive RNG
1675                      *    during a SYN flood.  Proof MS has at least one
1676                      *    good security geek.
1677                      *
1678                      *  - The TCP timestamp option must also echo the other
1679                      *    endpoints timestamp.  The timestamp echoed is the
1680                      *    one carried on the earliest unacknowledged segment
1681                      *    on the left edge of the sequence window.  The RFC
1682                      *    states that the host will reject any echoed
1683                      *    timestamps that were larger than any ever sent.
1684                      *    This gives us an upperbound on the TS echo.
1685                      *        tescr <= largest_tsval
1686                      *  - The lowerbound on the TS echo is a little more
1687                      *    tricky to determine.  The other endpoint's echoed
1688                      *    values will not decrease.  But there may be
1689                      *    network conditions that re-order packets and
1690                      *    cause our view of them to decrease.  For now the
1691                      *    only lowerbound we can safely determine is that
1692                      *    the TS echo will never be less than the original
1693                      *    TS.  XXX There is probably a better lowerbound.
1694                      *    Remove TS_MAX_CONN with better lowerbound check.
1695                      *        tescr >= other original TS
1696                      *
1697                      * It is also important to note that the fastest
1698                      * timestamp clock of 1ms will wrap its 32bit space in
1699                      * 24 days.  So we just disable TS checking after 24
1700                      * days of idle time.  We actually must use a 12d
1701                      * connection limit until we can come up with a better
1702                      * lowerbound to the TS echo check.
1703                      */
1704                     struct timeval delta_ts;
1705                     int ts_fudge;
1706 
1707 
1708                     /*
1709                      * PFTM_TS_DIFF is how many seconds of leeway to allow
1710                      * a host's timestamp.  This can happen if the previous
1711                      * packet got delayed in transit for much longer than
1712                      * this packet.
1713                      */
1714                     if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
1715                               ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
1716 
1717 
1718                     /* Calculate max ticks since the last timestamp */
1719 #define TS_MAXFREQ  1100                /* RFC max TS freq of 1 kHz + 10% skew */
1720 #define TS_MICROSECS          1000000             /* microseconds per second */
1721                     timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
1722                     tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
1723                     tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
1724 
1725 
1726                     if ((src->state >= TCPS_ESTABLISHED &&
1727                         dst->state >= TCPS_ESTABLISHED) &&
1728                         (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
1729                         SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
1730                         (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
1731                         SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
1732                               /* Bad RFC1323 implementation or an insertion attack.
1733                                *
1734                                * - Solaris 2.6 and 2.7 are known to send another ACK
1735                                *   after the FIN,FIN|ACK,ACK closing that carries
1736                                *   an old timestamp.
1737                                */
1738 
1739                               DPFPRINTF(("Timestamp failed %c%c%c%c\n",
1740                                   SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
1741                                   SEQ_GT(tsval, src->scrub->pfss_tsval +
1742                                   tsval_from_last) ? '1' : ' ',
1743                                   SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
1744                                   SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
1745                               DPFPRINTF((" tsval: %" PRIu32 "  tsecr: %" PRIu32
1746                                   "  +ticks: %" PRIu32 "  idle: %"PRIx64"s %ums\n",
1747                                   tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
1748                                   delta_ts.tv_usec / 1000U));
1749                               DPFPRINTF((" src->tsval: %" PRIu32 "  tsecr: %" PRIu32
1750                                   "\n",
1751                                   src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
1752                               DPFPRINTF((" dst->tsval: %" PRIu32 "  tsecr: %" PRIu32
1753                                   "  tsval0: %" PRIu32 "\n",
1754                                   dst->scrub->pfss_tsval,
1755                                   dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
1756                               if (pf_status.debug >= PF_DEBUG_MISC) {
1757                                         pf_print_state(state);
1758                                         pf_print_flags(th->th_flags);
1759                                         printf("\n");
1760                               }
1761                               REASON_SET(reason, PFRES_TS);
1762                               return (PF_DROP);
1763                     }
1764 
1765                     /* XXX I'd really like to require tsecr but it's optional */
1766 
1767           } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
1768               ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
1769               || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
1770               src->scrub && dst->scrub &&
1771               (src->scrub->pfss_flags & PFSS_PAWS) &&
1772               (dst->scrub->pfss_flags & PFSS_PAWS)) {
1773                     /* Didn't send a timestamp.  Timestamps aren't really useful
1774                      * when:
1775                      *  - connection opening or closing (often not even sent).
1776                      *    but we must not let an attacker to put a FIN on a
1777                      *    data packet to sneak it through our ESTABLISHED check.
1778                      *  - on a TCP reset.  RFC suggests not even looking at TS.
1779                      *  - on an empty ACK.  The TS will not be echoed so it will
1780                      *    probably not help keep the RTT calculation in sync and
1781                      *    there isn't as much danger when the sequence numbers
1782                      *    got wrapped.  So some stacks don't include TS on empty
1783                      *    ACKs :-(
1784                      *
1785                      * To minimize the disruption to mostly RFC1323 conformant
1786                      * stacks, we will only require timestamps on data packets.
1787                      *
1788                      * And what do ya know, we cannot require timestamps on data
1789                      * packets.  There appear to be devices that do legitimate
1790                      * TCP connection hijacking.  There are HTTP devices that allow
1791                      * a 3whs (with timestamps) and then buffer the HTTP request.
1792                      * If the intermediate device has the HTTP response cache, it
1793                      * will spoof the response but not bother timestamping its
1794                      * packets.  So we can look for the presence of a timestamp in
1795                      * the first data packet and if there, require it in all future
1796                      * packets.
1797                      */
1798 
1799                     if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
1800                               /*
1801                                * Hey!  Someone tried to sneak a packet in.  Or the
1802                                * stack changed its RFC1323 behavior?!?!
1803                                */
1804                               if (pf_status.debug >= PF_DEBUG_MISC) {
1805                                         DPFPRINTF(("Did not receive expected RFC1323 "
1806                                             "timestamp\n"));
1807                                         pf_print_state(state);
1808                                         pf_print_flags(th->th_flags);
1809                                         printf("\n");
1810                               }
1811                               REASON_SET(reason, PFRES_TS);
1812                               return (PF_DROP);
1813                     }
1814           }
1815 
1816 
1817           /*
1818            * We will note if a host sends his data packets with or without
1819            * timestamps.  And require all data packets to contain a timestamp
1820            * if the first does.  PAWS implicitly requires that all data packets be
1821            * timestamped.  But I think there are middle-man devices that hijack
1822            * TCP streams immediately after the 3whs and don't timestamp their
1823            * packets (seen in a WWW accelerator or cache).
1824            */
1825           if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
1826               (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
1827                     if (got_ts)
1828                               src->scrub->pfss_flags |= PFSS_DATA_TS;
1829                     else {
1830                               src->scrub->pfss_flags |= PFSS_DATA_NOTS;
1831                               if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
1832                                   (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
1833                                         /* Don't warn if other host rejected RFC1323 */
1834                                         DPFPRINTF(("Broken RFC1323 stack did not "
1835                                             "timestamp data packet. Disabled PAWS "
1836                                             "security.\n"));
1837                                         pf_print_state(state);
1838                                         pf_print_flags(th->th_flags);
1839                                         printf("\n");
1840                               }
1841                     }
1842           }
1843 
1844 
1845           /*
1846            * Update PAWS values
1847            */
1848           if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
1849               (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
1850                     getmicrouptime(&src->scrub->pfss_last);
1851                     if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
1852                         (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1853                               src->scrub->pfss_tsval = tsval;
1854 
1855                     if (tsecr) {
1856                               if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
1857                                   (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1858                                         src->scrub->pfss_tsecr = tsecr;
1859 
1860                               if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
1861                                   (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
1862                                   src->scrub->pfss_tsval0 == 0)) {
1863                                         /* tsval0 MUST be the lowest timestamp */
1864                                         src->scrub->pfss_tsval0 = tsval;
1865                               }
1866 
1867                               /* Only fully initialized after a TS gets echoed */
1868                               if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
1869                                         src->scrub->pfss_flags |= PFSS_PAWS;
1870                     }
1871           }
1872 
1873           /* I have a dream....  TCP segment reassembly.... */
1874           return (0);
1875 }
1876 
1877 int
1878 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
1879     int off)
1880 {
1881           u_int16_t *mss;
1882           int                  thoff;
1883           int                  opt, cnt, optlen = 0;
1884           int                  rewrite = 0;
1885           u_char              *optp;
1886 
1887           thoff = th->th_off << 2;
1888           cnt = thoff - sizeof(struct tcphdr);
1889           optp = mtod(m, u_char *) + off + sizeof(struct tcphdr);
1890 
1891           for (; cnt > 0; cnt -= optlen, optp += optlen) {
1892                     opt = optp[0];
1893                     if (opt == TCPOPT_EOL)
1894                               break;
1895                     if (opt == TCPOPT_NOP)
1896                               optlen = 1;
1897                     else {
1898                               if (cnt < 2)
1899                                         break;
1900                               optlen = optp[1];
1901                               if (optlen < 2 || optlen > cnt)
1902                                         break;
1903                     }
1904                     switch (opt) {
1905                     case TCPOPT_MAXSEG:
1906                               mss = (u_int16_t *)(optp + 2);
1907                               if ((ntohs(*mss)) > r->max_mss) {
1908                                         th->th_sum = pf_cksum_fixup(th->th_sum,
1909                                             *mss, htons(r->max_mss), 0);
1910                                         *mss = htons(r->max_mss);
1911                                         rewrite = 1;
1912                               }
1913                               break;
1914                     default:
1915                               break;
1916                     }
1917           }
1918 
1919           return (rewrite);
1920 }
1921