xref: /dragonfly/sys/net/pf/pf.c (revision b272101acc636ac635f83d03265ef6a44a3ba51a)
1 /*
2  * Copyright (c) 2004 The DragonFly Project.  All rights reserved.
3  *
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  */
37 
38 #include "opt_inet.h"
39 #include "opt_inet6.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/filio.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/kernel.h>
49 #include <sys/time.h>
50 #include <sys/sysctl.h>
51 #include <sys/endian.h>
52 #include <sys/proc.h>
53 #include <sys/kthread.h>
54 #include <sys/spinlock.h>
55 
56 #include <sys/md5.h>
57 
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/bpf.h>
61 #include <net/netisr2.h>
62 #include <net/route.h>
63 
64 #include <netinet/in.h>
65 #include <netinet/in_var.h>
66 #include <netinet/in_systm.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip_var.h>
69 #include <netinet/tcp.h>
70 #include <netinet/tcp_seq.h>
71 #include <netinet/udp.h>
72 #include <netinet/ip_icmp.h>
73 #include <netinet/in_pcb.h>
74 #include <netinet/tcp_timer.h>
75 #include <netinet/tcp_var.h>
76 #include <netinet/udp_var.h>
77 #include <netinet/icmp_var.h>
78 #include <netinet/if_ether.h>
79 
80 #include <net/pf/pfvar.h>
81 #include <net/pf/if_pflog.h>
82 
83 #include <net/pf/if_pfsync.h>
84 
85 #ifdef INET6
86 #include <netinet/ip6.h>
87 #include <netinet/icmp6.h>
88 #include <netinet6/nd6.h>
89 #include <netinet6/ip6_var.h>
90 #include <netinet6/in6_pcb.h>
91 #endif /* INET6 */
92 
93 #include <sys/in_cksum.h>
94 #include <sys/ucred.h>
95 #include <machine/limits.h>
96 #include <sys/msgport2.h>
97 #include <sys/spinlock2.h>
98 #include <net/netmsg2.h>
99 #include <net/toeplitz2.h>
100 
101 extern int ip_optcopy(struct ip *, struct ip *);
102 extern int debug_pfugidhack;
103 
104 /*
105  * pf_token - shared lock for cpu-localized operations,
106  *              exclusive lock otherwise.
107  *
108  * pf_gtoken- exclusive lock used for initialization.
109  */
110 struct lwkt_token pf_token = LWKT_TOKEN_INITIALIZER(pf_token);
111 struct lwkt_token pf_gtoken = LWKT_TOKEN_INITIALIZER(pf_gtoken);
112 
113 #define DPFPRINTF(n, x)       if (pf_status.debug >= (n)) kprintf x
114 
115 #define FAIL(code)  { error = (code); goto done; }
116 
117 /*
118  * Global variables
119  */
120 
121 /* mask radix tree */
122 struct radix_node_head        *pf_maskhead;
123 
124 /* state tables */
125 struct pf_state_tree           *pf_statetbl;                /* incls one global table */
126 struct pf_state               **purge_cur;
127 struct pf_altqqueue  pf_altqs[2];
128 struct pf_palist     pf_pabuf;
129 struct pf_altqqueue *pf_altqs_active;
130 struct pf_altqqueue *pf_altqs_inactive;
131 struct pf_status     pf_status;
132 
133 u_int32_t            ticket_altqs_active;
134 u_int32_t            ticket_altqs_inactive;
135 int                            altqs_inactive_open;
136 u_int32_t            ticket_pabuf;
137 
138 MD5_CTX                        pf_tcp_secret_ctx;
139 u_char                         pf_tcp_secret[16];
140 int                            pf_tcp_secret_init;
141 int                            pf_tcp_iss_off;
142 
143 struct pf_anchor_stackframe {
144           struct pf_ruleset                       *rs;
145           struct pf_rule                                    *r;
146           struct pf_anchor_node                             *parent;
147           struct pf_anchor                        *child;
148 } pf_anchor_stack[64];
149 
150 struct malloc_type   *pf_src_tree_pl, *pf_rule_pl, *pf_pooladdr_pl;
151 struct malloc_type   *pf_state_pl, *pf_state_key_pl, *pf_state_item_pl;
152 struct malloc_type   *pf_altq_pl;
153 
154 void                           pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
155 
156 void                           pf_init_threshold(struct pf_threshold *, u_int32_t,
157                                   u_int32_t);
158 void                           pf_add_threshold(struct pf_threshold *);
159 int                            pf_check_threshold(struct pf_threshold *);
160 
161 void                           pf_change_ap(struct pf_addr *, u_int16_t *,
162                                   u_int16_t *, u_int16_t *, struct pf_addr *,
163                                   u_int16_t, u_int8_t, sa_family_t);
164 int                            pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
165                                   struct tcphdr *, struct pf_state_peer *);
166 #ifdef INET6
167 void                           pf_change_a6(struct pf_addr *, u_int16_t *,
168                                   struct pf_addr *, u_int8_t);
169 #endif /* INET6 */
170 void                           pf_change_icmp(struct pf_addr *, u_int16_t *,
171                                   struct pf_addr *, struct pf_addr *, u_int16_t,
172                                   u_int16_t *, u_int16_t *, u_int16_t *,
173                                   u_int16_t *, u_int8_t, sa_family_t);
174 void                           pf_send_tcp(const struct pf_rule *, sa_family_t,
175                                   const struct pf_addr *, const struct pf_addr *,
176                                   u_int16_t, u_int16_t, u_int32_t, u_int32_t,
177                                   u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
178                                   u_int16_t, struct ether_header *, struct ifnet *);
179 void                           pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
180                                   sa_family_t, struct pf_rule *);
181 struct pf_rule                *pf_match_translation(struct pf_pdesc *, struct mbuf *,
182                                   int, int, struct pfi_kif *,
183                                   struct pf_addr *, u_int16_t, struct pf_addr *,
184                                   u_int16_t, int);
185 struct pf_rule                *pf_get_translation(struct pf_pdesc *, struct mbuf *,
186                                   int, int, struct pfi_kif *, struct pf_src_node **,
187                                   struct pf_state_key **, struct pf_state_key **,
188                                   struct pf_state_key **, struct pf_state_key **,
189                                   struct pf_addr *, struct pf_addr *,
190                                   u_int16_t, u_int16_t);
191 void                           pf_detach_state(struct pf_state *);
192 int                            pf_state_key_setup(struct pf_pdesc *, struct pf_rule *,
193                                   struct pf_state_key **, struct pf_state_key **,
194                                   struct pf_state_key **, struct pf_state_key **,
195                                   struct pf_addr *, struct pf_addr *,
196                                   u_int16_t, u_int16_t);
197 void                           pf_state_key_detach(struct pf_state *, int);
198 u_int32_t            pf_tcp_iss(struct pf_pdesc *);
199 int                            pf_test_rule(struct pf_rule **, struct pf_state **,
200                                   int, struct pfi_kif *, struct mbuf *, int,
201                                   void *, struct pf_pdesc *, struct pf_rule **,
202                                   struct pf_ruleset **, struct ifqueue *, struct inpcb *);
203 static __inline int  pf_create_state(struct pf_rule *, struct pf_rule *,
204                                   struct pf_rule *, struct pf_pdesc *,
205                                   struct pf_src_node *, struct pf_state_key *,
206                                   struct pf_state_key *, struct pf_state_key *,
207                                   struct pf_state_key *, struct mbuf *, int,
208                                   u_int16_t, u_int16_t, int *, struct pfi_kif *,
209                                   struct pf_state **, int, u_int16_t, u_int16_t,
210                                   int);
211 int                            pf_test_fragment(struct pf_rule **, int,
212                                   struct pfi_kif *, struct mbuf *, void *,
213                                   struct pf_pdesc *, struct pf_rule **,
214                                   struct pf_ruleset **);
215 int                            pf_tcp_track_full(struct pf_state_peer *,
216                                   struct pf_state_peer *, struct pf_state **,
217                                   struct pfi_kif *, struct mbuf *, int,
218                                   struct pf_pdesc *, u_short *, int *);
219 int                           pf_tcp_track_sloppy(struct pf_state_peer *,
220                                   struct pf_state_peer *, struct pf_state **,
221                                   struct pf_pdesc *, u_short *);
222 int                            pf_test_state_tcp(struct pf_state **, int,
223                                   struct pfi_kif *, struct mbuf *, int,
224                                   void *, struct pf_pdesc *, u_short *);
225 int                            pf_test_state_udp(struct pf_state **, int,
226                                   struct pfi_kif *, struct mbuf *, int,
227                                   void *, struct pf_pdesc *);
228 int                            pf_test_state_icmp(struct pf_state **, int,
229                                   struct pfi_kif *, struct mbuf *, int,
230                                   void *, struct pf_pdesc *, u_short *);
231 int                            pf_test_state_other(struct pf_state **, int,
232                                   struct pfi_kif *, struct mbuf *, struct pf_pdesc *);
233 void                           pf_step_into_anchor(int *, struct pf_ruleset **, int,
234                                   struct pf_rule **, struct pf_rule **, int *);
235 int                            pf_step_out_of_anchor(int *, struct pf_ruleset **,
236                                    int, struct pf_rule **, struct pf_rule **,
237                                    int *);
238 void                           pf_hash(struct pf_addr *, struct pf_addr *,
239                                   struct pf_poolhashkey *, sa_family_t);
240 int                            pf_map_addr(u_int8_t, struct pf_rule *,
241                                   struct pf_addr *, struct pf_addr *,
242                                   struct pf_addr *, struct pf_src_node **);
243 int                            pf_get_sport(struct pf_pdesc *,
244                                   sa_family_t, u_int8_t, struct pf_rule *,
245                                   struct pf_addr *, struct pf_addr *,
246                                   u_int16_t, u_int16_t,
247                                   struct pf_addr *, u_int16_t *,
248                                   u_int16_t, u_int16_t,
249                                   struct pf_src_node **);
250 void                           pf_route(struct mbuf **, struct pf_rule *, int,
251                                   struct ifnet *, struct pf_state *,
252                                   struct pf_pdesc *);
253 void                           pf_route6(struct mbuf **, struct pf_rule *, int,
254                                   struct ifnet *, struct pf_state *,
255                                   struct pf_pdesc *);
256 u_int8_t             pf_get_wscale(struct mbuf *, int, u_int16_t,
257                                   sa_family_t);
258 u_int16_t            pf_get_mss(struct mbuf *, int, u_int16_t,
259                                   sa_family_t);
260 u_int16_t            pf_calc_mss(struct pf_addr *, sa_family_t,
261                                         u_int16_t);
262 void                           pf_set_rt_ifp(struct pf_state *,
263                                   struct pf_addr *);
264 int                            pf_check_proto_cksum(struct mbuf *, int, int,
265                                   u_int8_t, sa_family_t);
266 struct pf_divert    *pf_get_divert(struct mbuf *);
267 void                           pf_print_state_parts(struct pf_state *,
268                                   struct pf_state_key *, struct pf_state_key *);
269 int                            pf_addr_wrap_neq(struct pf_addr_wrap *,
270                                   struct pf_addr_wrap *);
271 struct pf_state               *pf_find_state(struct pfi_kif *,
272                                   struct pf_state_key_cmp *, u_int, struct mbuf *);
273 int                            pf_src_connlimit(struct pf_state *);
274 int                            pf_check_congestion(struct ifqueue *);
275 
276 extern int pf_end_threads;
277 
278 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
279           { &pf_state_pl, PFSTATE_HIWAT },
280           { &pf_src_tree_pl, PFSNODE_HIWAT },
281           { &pf_frent_pl, PFFRAG_FRENT_HIWAT },
282           { &pfr_ktable_pl, PFR_KTABLE_HIWAT },
283           { &pfr_kentry_pl, PFR_KENTRY_HIWAT }
284 };
285 
286 /*
287  * If route-to and direction is out we match with no further processing
288  *        (rt_kif must be assigned and not equal to the out interface)
289  * If reply-to and direction is in we match with no further processing
290  *        (rt_kif must be assigned and not equal to the in interface)
291  */
292 #define STATE_LOOKUP(i, k, d, s, m)                                             \
293           do {                                                                            \
294                     s = pf_find_state(i, k, d, m);                                        \
295                     if (s == NULL || (s)->timeout == PFTM_PURGE)                \
296                               return (PF_DROP);                                 \
297                     if (d == PF_OUT &&                                          \
298                         (((s)->rule.ptr->rt == PF_ROUTETO &&                    \
299                         (s)->rule.ptr->direction == PF_OUT) ||                  \
300                         ((s)->rule.ptr->rt == PF_REPLYTO &&                     \
301                         (s)->rule.ptr->direction == PF_IN)) &&                  \
302                         (s)->rt_kif != NULL &&                                  \
303                         (s)->rt_kif != i)                                                 \
304                               return (PF_PASS);                                 \
305           } while (0)
306 
307 #define BOUND_IFACE(r, k) \
308           ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
309 
310 #define STATE_INC_COUNTERS(s)                               \
311           do {                                                        \
312                     atomic_add_int(&s->rule.ptr->states_cur, 1);      \
313                     s->rule.ptr->states_tot++;              \
314                     if (s->anchor.ptr != NULL) {            \
315                               atomic_add_int(&s->anchor.ptr->states_cur, 1);    \
316                               s->anchor.ptr->states_tot++;  \
317                     }                                                 \
318                     if (s->nat_rule.ptr != NULL) {                    \
319                               atomic_add_int(&s->nat_rule.ptr->states_cur, 1); \
320                               s->nat_rule.ptr->states_tot++;          \
321                     }                                                 \
322           } while (0)
323 
324 #define STATE_DEC_COUNTERS(s)                               \
325           do {                                                        \
326                     if (s->nat_rule.ptr != NULL)            \
327                               atomic_add_int(&s->nat_rule.ptr->states_cur, -1); \
328                     if (s->anchor.ptr != NULL)              \
329                               atomic_add_int(&s->anchor.ptr->states_cur, -1);   \
330                     atomic_add_int(&s->rule.ptr->states_cur, -1);               \
331           } while (0)
332 
333 static MALLOC_DEFINE(M_PFSTATEPL, "pfstatepl", "pf state pool list");
334 static MALLOC_DEFINE(M_PFSRCTREEPL, "pfsrctpl", "pf source tree pool list");
335 static MALLOC_DEFINE(M_PFSTATEKEYPL, "pfstatekeypl", "pf state key pool list");
336 static MALLOC_DEFINE(M_PFSTATEITEMPL, "pfstateitempl", "pf state item pool list");
337 
338 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
339 static __inline int pf_state_compare_key(struct pf_state_key *,
340                                         struct pf_state_key *);
341 static __inline int pf_state_compare_rkey(struct pf_state_key *,
342                                         struct pf_state_key *);
343 static __inline int pf_state_compare_id(struct pf_state *,
344                                         struct pf_state *);
345 
346 struct pf_src_tree *tree_src_tracking;
347 struct pf_state_tree_id *tree_id;
348 struct pf_state_queue *state_list;
349 struct pf_counters *pf_counters;
350 
351 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
352 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key);
353 RB_GENERATE(pf_state_rtree, pf_state_key, entry, pf_state_compare_rkey);
354 RB_GENERATE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id);
355 
356 static __inline int
pf_src_compare(struct pf_src_node * a,struct pf_src_node * b)357 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
358 {
359           int       diff;
360 
361           if (a->rule.ptr > b->rule.ptr)
362                     return (1);
363           if (a->rule.ptr < b->rule.ptr)
364                     return (-1);
365           if ((diff = a->af - b->af) != 0)
366                     return (diff);
367           switch (a->af) {
368 #ifdef INET
369           case AF_INET:
370                     if (a->addr.addr32[0] > b->addr.addr32[0])
371                               return (1);
372                     if (a->addr.addr32[0] < b->addr.addr32[0])
373                               return (-1);
374                     break;
375 #endif /* INET */
376 #ifdef INET6
377           case AF_INET6:
378                     if (a->addr.addr32[3] > b->addr.addr32[3])
379                               return (1);
380                     if (a->addr.addr32[3] < b->addr.addr32[3])
381                               return (-1);
382                     if (a->addr.addr32[2] > b->addr.addr32[2])
383                               return (1);
384                     if (a->addr.addr32[2] < b->addr.addr32[2])
385                               return (-1);
386                     if (a->addr.addr32[1] > b->addr.addr32[1])
387                               return (1);
388                     if (a->addr.addr32[1] < b->addr.addr32[1])
389                               return (-1);
390                     if (a->addr.addr32[0] > b->addr.addr32[0])
391                               return (1);
392                     if (a->addr.addr32[0] < b->addr.addr32[0])
393                               return (-1);
394                     break;
395 #endif /* INET6 */
396           }
397           return (0);
398 }
399 
400 u_int32_t
pf_state_hash(struct pf_state_key * sk)401 pf_state_hash(struct pf_state_key *sk)
402 {
403           u_int32_t hv = (u_int32_t)(((intptr_t)sk >> 6) ^ ((intptr_t)sk >> 15));
404           if (hv == 0)        /* disallow 0 */
405                     hv = 1;
406           return(hv);
407 }
408 
409 #ifdef INET6
410 void
pf_addrcpy(struct pf_addr * dst,struct pf_addr * src,sa_family_t af)411 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
412 {
413           switch (af) {
414 #ifdef INET
415           case AF_INET:
416                     dst->addr32[0] = src->addr32[0];
417                     break;
418 #endif /* INET */
419           case AF_INET6:
420                     dst->addr32[0] = src->addr32[0];
421                     dst->addr32[1] = src->addr32[1];
422                     dst->addr32[2] = src->addr32[2];
423                     dst->addr32[3] = src->addr32[3];
424                     break;
425           }
426 }
427 #endif /* INET6 */
428 
429 void
pf_init_threshold(struct pf_threshold * threshold,u_int32_t limit,u_int32_t seconds)430 pf_init_threshold(struct pf_threshold *threshold,
431     u_int32_t limit, u_int32_t seconds)
432 {
433           threshold->limit = limit * PF_THRESHOLD_MULT;
434           threshold->seconds = seconds;
435           threshold->count = 0;
436           threshold->last = time_second;
437 }
438 
439 void
pf_add_threshold(struct pf_threshold * threshold)440 pf_add_threshold(struct pf_threshold *threshold)
441 {
442           u_int32_t t = time_second, diff = t - threshold->last;
443 
444           if (diff >= threshold->seconds)
445                     threshold->count = 0;
446           else
447                     threshold->count -= threshold->count * diff /
448                         threshold->seconds;
449           threshold->count += PF_THRESHOLD_MULT;
450           threshold->last = t;
451 }
452 
453 int
pf_check_threshold(struct pf_threshold * threshold)454 pf_check_threshold(struct pf_threshold *threshold)
455 {
456           return (threshold->count > threshold->limit);
457 }
458 
459 int
pf_src_connlimit(struct pf_state * state)460 pf_src_connlimit(struct pf_state *state)
461 {
462           int bad = 0;
463           int cpu = mycpu->gd_cpuid;
464 
465           atomic_add_int(&state->src_node->conn, 1);
466           state->src.tcp_est = 1;
467           pf_add_threshold(&state->src_node->conn_rate);
468 
469           if (state->rule.ptr->max_src_conn &&
470               state->rule.ptr->max_src_conn <
471               state->src_node->conn) {
472                     PF_INC_LCOUNTER(LCNT_SRCCONN);
473                     bad++;
474           }
475 
476           if (state->rule.ptr->max_src_conn_rate.limit &&
477               pf_check_threshold(&state->src_node->conn_rate)) {
478                     PF_INC_LCOUNTER(LCNT_SRCCONNRATE);
479                     bad++;
480           }
481 
482           if (!bad)
483                     return 0;
484 
485           if (state->rule.ptr->overload_tbl) {
486                     struct pfr_addr p;
487                     u_int32_t killed = 0;
488 
489                     PF_INC_LCOUNTER(LCNT_OVERLOAD_TABLE);
490                     if (pf_status.debug >= PF_DEBUG_MISC) {
491                               kprintf("pf_src_connlimit: blocking address ");
492                               pf_print_host(&state->src_node->addr, 0,
493                                   state->key[PF_SK_WIRE]->af);
494                     }
495 
496                     bzero(&p, sizeof(p));
497                     p.pfra_af = state->key[PF_SK_WIRE]->af;
498                     switch (state->key[PF_SK_WIRE]->af) {
499 #ifdef INET
500                     case AF_INET:
501                               p.pfra_net = 32;
502                               p.pfra_ip4addr = state->src_node->addr.v4;
503                               break;
504 #endif /* INET */
505 #ifdef INET6
506                     case AF_INET6:
507                               p.pfra_net = 128;
508                               p.pfra_ip6addr = state->src_node->addr.v6;
509                               break;
510 #endif /* INET6 */
511                     }
512 
513                     pfr_insert_kentry(state->rule.ptr->overload_tbl,
514                         &p, time_second);
515 
516                     /* kill existing states if that's required. */
517                     if (state->rule.ptr->flush) {
518                               struct pf_state_key *sk;
519                               struct pf_state *st;
520 
521                               PF_INC_LCOUNTER(LCNT_OVERLOAD_FLUSH);
522                               RB_FOREACH(st, pf_state_tree_id, &tree_id[cpu]) {
523                                         sk = st->key[PF_SK_WIRE];
524                                         /*
525                                          * Kill states from this source.  (Only those
526                                          * from the same rule if PF_FLUSH_GLOBAL is not
527                                          * set).  (Only on current cpu).
528                                          */
529                                         if (sk->af ==
530                                             state->key[PF_SK_WIRE]->af &&
531                                             ((state->direction == PF_OUT &&
532                                             PF_AEQ(&state->src_node->addr,
533                                                   &sk->addr[0], sk->af)) ||
534                                             (state->direction == PF_IN &&
535                                             PF_AEQ(&state->src_node->addr,
536                                                   &sk->addr[1], sk->af))) &&
537                                             (state->rule.ptr->flush &
538                                             PF_FLUSH_GLOBAL ||
539                                             state->rule.ptr == st->rule.ptr)) {
540                                                   st->timeout = PFTM_PURGE;
541                                                   st->src.state = st->dst.state =
542                                                       TCPS_CLOSED;
543                                                   killed++;
544                                         }
545                               }
546                               if (pf_status.debug >= PF_DEBUG_MISC)
547                                         kprintf(", %u states killed", killed);
548                     }
549                     if (pf_status.debug >= PF_DEBUG_MISC)
550                               kprintf("\n");
551           }
552 
553           /* kill this state */
554           state->timeout = PFTM_PURGE;
555           state->src.state = state->dst.state = TCPS_CLOSED;
556 
557           return 1;
558 }
559 
560 int
pf_insert_src_node(struct pf_src_node ** sn,struct pf_rule * rule,struct pf_addr * src,sa_family_t af)561 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
562     struct pf_addr *src, sa_family_t af)
563 {
564           struct pf_src_node  k;
565           int cpu = mycpu->gd_cpuid;
566 
567           bzero(&k, sizeof(k));         /* avoid gcc warnings */
568           if (*sn == NULL) {
569                     k.af = af;
570                     PF_ACPY(&k.addr, src, af);
571                     if (rule->rule_flag & PFRULE_RULESRCTRACK ||
572                         rule->rpool.opts & PF_POOL_STICKYADDR)
573                               k.rule.ptr = rule;
574                     else
575                               k.rule.ptr = NULL;
576                     PF_INC_SCOUNTER(SCNT_SRC_NODE_SEARCH);
577                     *sn = RB_FIND(pf_src_tree, &tree_src_tracking[cpu], &k);
578           }
579           if (*sn == NULL) {
580                     if (!rule->max_src_nodes ||
581                         rule->src_nodes < rule->max_src_nodes)
582                               (*sn) = kmalloc(sizeof(struct pf_src_node),
583                                                   M_PFSRCTREEPL, M_NOWAIT|M_ZERO);
584                     else
585                               PF_INC_LCOUNTER(LCNT_SRCNODES);
586                     if ((*sn) == NULL)
587                               return (-1);
588 
589                     pf_init_threshold(&(*sn)->conn_rate,
590                         rule->max_src_conn_rate.limit,
591                         rule->max_src_conn_rate.seconds);
592 
593                     (*sn)->af = af;
594                     if (rule->rule_flag & PFRULE_RULESRCTRACK ||
595                         rule->rpool.opts & PF_POOL_STICKYADDR)
596                               (*sn)->rule.ptr = rule;
597                     else
598                               (*sn)->rule.ptr = NULL;
599                     PF_ACPY(&(*sn)->addr, src, af);
600                     if (RB_INSERT(pf_src_tree,
601                         &tree_src_tracking[cpu], *sn) != NULL) {
602                               if (pf_status.debug >= PF_DEBUG_MISC) {
603                                         kprintf("pf: src_tree insert failed: ");
604                                         pf_print_host(&(*sn)->addr, 0, af);
605                                         kprintf("\n");
606                               }
607                               kfree(*sn, M_PFSRCTREEPL);
608                               return (-1);
609                     }
610 
611                     /*
612                      * Atomic op required to increment src_nodes in the rule
613                      * because we hold a shared token here (decrements will use
614                      * an exclusive token).
615                      */
616                     (*sn)->creation = time_second;
617                     (*sn)->ruletype = rule->action;
618                     if ((*sn)->rule.ptr != NULL)
619                               atomic_add_int(&(*sn)->rule.ptr->src_nodes, 1);
620                     PF_INC_SCOUNTER(SCNT_SRC_NODE_INSERT);
621                     atomic_add_int(&pf_status.src_nodes, 1);
622           } else {
623                     if (rule->max_src_states &&
624                         (*sn)->states >= rule->max_src_states) {
625                               PF_INC_LCOUNTER(LCNT_SRCSTATES);
626                               return (-1);
627                     }
628           }
629           return (0);
630 }
631 
632 /*
633  * state table (indexed by the pf_state_key structure), normal RBTREE
634  * comparison.
635  */
636 static __inline int
pf_state_compare_key(struct pf_state_key * a,struct pf_state_key * b)637 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b)
638 {
639           int       diff;
640 
641           if ((diff = a->proto - b->proto) != 0)
642                     return (diff);
643           if ((diff = a->af - b->af) != 0)
644                     return (diff);
645           switch (a->af) {
646 #ifdef INET
647           case AF_INET:
648                     if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
649                               return (1);
650                     if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
651                               return (-1);
652                     if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
653                               return (1);
654                     if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
655                               return (-1);
656                     break;
657 #endif /* INET */
658 #ifdef INET6
659           case AF_INET6:
660                     if (a->addr[0].addr32[3] > b->addr[0].addr32[3])
661                               return (1);
662                     if (a->addr[0].addr32[3] < b->addr[0].addr32[3])
663                               return (-1);
664                     if (a->addr[1].addr32[3] > b->addr[1].addr32[3])
665                               return (1);
666                     if (a->addr[1].addr32[3] < b->addr[1].addr32[3])
667                               return (-1);
668                     if (a->addr[0].addr32[2] > b->addr[0].addr32[2])
669                               return (1);
670                     if (a->addr[0].addr32[2] < b->addr[0].addr32[2])
671                               return (-1);
672                     if (a->addr[1].addr32[2] > b->addr[1].addr32[2])
673                               return (1);
674                     if (a->addr[1].addr32[2] < b->addr[1].addr32[2])
675                               return (-1);
676                     if (a->addr[0].addr32[1] > b->addr[0].addr32[1])
677                               return (1);
678                     if (a->addr[0].addr32[1] < b->addr[0].addr32[1])
679                               return (-1);
680                     if (a->addr[1].addr32[1] > b->addr[1].addr32[1])
681                               return (1);
682                     if (a->addr[1].addr32[1] < b->addr[1].addr32[1])
683                               return (-1);
684                     if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
685                               return (1);
686                     if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
687                               return (-1);
688                     if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
689                               return (1);
690                     if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
691                               return (-1);
692                     break;
693 #endif /* INET6 */
694           }
695 
696           if ((diff = a->port[0] - b->port[0]) != 0)
697                     return (diff);
698           if ((diff = a->port[1] - b->port[1]) != 0)
699                     return (diff);
700 
701           return (0);
702 }
703 
704 /*
705  * Used for RB_FIND only, compare in the reverse direction.  The
706  * element to be reversed is always (a), since we obviously can't
707  * reverse the state tree depicted by (b).
708  */
709 static __inline int
pf_state_compare_rkey(struct pf_state_key * a,struct pf_state_key * b)710 pf_state_compare_rkey(struct pf_state_key *a, struct pf_state_key *b)
711 {
712           int       diff;
713 
714           if ((diff = a->proto - b->proto) != 0)
715                     return (diff);
716           if ((diff = a->af - b->af) != 0)
717                     return (diff);
718           switch (a->af) {
719 #ifdef INET
720           case AF_INET:
721                     if (a->addr[1].addr32[0] > b->addr[0].addr32[0])
722                               return (1);
723                     if (a->addr[1].addr32[0] < b->addr[0].addr32[0])
724                               return (-1);
725                     if (a->addr[0].addr32[0] > b->addr[1].addr32[0])
726                               return (1);
727                     if (a->addr[0].addr32[0] < b->addr[1].addr32[0])
728                               return (-1);
729                     break;
730 #endif /* INET */
731 #ifdef INET6
732           case AF_INET6:
733                     if (a->addr[1].addr32[3] > b->addr[0].addr32[3])
734                               return (1);
735                     if (a->addr[1].addr32[3] < b->addr[0].addr32[3])
736                               return (-1);
737                     if (a->addr[0].addr32[3] > b->addr[1].addr32[3])
738                               return (1);
739                     if (a->addr[0].addr32[3] < b->addr[1].addr32[3])
740                               return (-1);
741                     if (a->addr[1].addr32[2] > b->addr[0].addr32[2])
742                               return (1);
743                     if (a->addr[1].addr32[2] < b->addr[0].addr32[2])
744                               return (-1);
745                     if (a->addr[0].addr32[2] > b->addr[1].addr32[2])
746                               return (1);
747                     if (a->addr[0].addr32[2] < b->addr[1].addr32[2])
748                               return (-1);
749                     if (a->addr[1].addr32[1] > b->addr[0].addr32[1])
750                               return (1);
751                     if (a->addr[1].addr32[1] < b->addr[0].addr32[1])
752                               return (-1);
753                     if (a->addr[0].addr32[1] > b->addr[1].addr32[1])
754                               return (1);
755                     if (a->addr[0].addr32[1] < b->addr[1].addr32[1])
756                               return (-1);
757                     if (a->addr[1].addr32[0] > b->addr[0].addr32[0])
758                               return (1);
759                     if (a->addr[1].addr32[0] < b->addr[0].addr32[0])
760                               return (-1);
761                     if (a->addr[0].addr32[0] > b->addr[1].addr32[0])
762                               return (1);
763                     if (a->addr[0].addr32[0] < b->addr[1].addr32[0])
764                               return (-1);
765                     break;
766 #endif /* INET6 */
767           }
768 
769           if ((diff = a->port[1] - b->port[0]) != 0)
770                     return (diff);
771           if ((diff = a->port[0] - b->port[1]) != 0)
772                     return (diff);
773 
774           return (0);
775 }
776 
777 static __inline int
pf_state_compare_id(struct pf_state * a,struct pf_state * b)778 pf_state_compare_id(struct pf_state *a, struct pf_state *b)
779 {
780           if (a->id > b->id)
781                     return (1);
782           if (a->id < b->id)
783                     return (-1);
784           if (a->creatorid > b->creatorid)
785                     return (1);
786           if (a->creatorid < b->creatorid)
787                     return (-1);
788 
789           return (0);
790 }
791 
792 int
pf_state_key_attach(struct pf_state_key * sk,struct pf_state * s,int idx)793 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx)
794 {
795           struct pf_state_item          *si;
796           struct pf_state_key     *cur;
797           int cpu;
798           int error;
799 
800           /*
801            * PFSTATE_STACK_GLOBAL is set when the state might not hash to the
802            * current cpu.  The keys are managed on the global statetbl tree
803            * for this case.  Only translations (RDR, NAT) can cause this.
804            *
805            * When this flag is not set we must still check the global statetbl
806            * for a collision, and if we find one we set the HALF_DUPLEX flag
807            * in the state.
808            */
809           if (s->state_flags & PFSTATE_STACK_GLOBAL) {
810                     cpu = ncpus;
811                     lockmgr(&pf_global_statetbl_lock, LK_EXCLUSIVE);
812           } else {
813                     cpu = mycpu->gd_cpuid;
814                     lockmgr(&pf_global_statetbl_lock, LK_SHARED);
815           }
816           KKASSERT(s->key[idx] == NULL);          /* XXX handle this? */
817 
818           if (pf_status.debug >= PF_DEBUG_MISC) {
819                     kprintf("state_key attach cpu %d (%08x:%d) %s (%08x:%d)\n",
820                               cpu,
821                               ntohl(sk->addr[0].addr32[0]), ntohs(sk->port[0]),
822                               (idx == PF_SK_WIRE ? "->" : "<-"),
823                               ntohl(sk->addr[1].addr32[0]), ntohs(sk->port[1]));
824           }
825 
826           /*
827            * Check whether (e.g.) a PASS rule being put on a per-cpu tree
828            * collides with a translation rule on the global tree.  This is
829            * NOT an error.  We *WANT* to establish state for this case so the
830            * packet path is short-cutted and doesn't need to scan the ruleset
831            * on every packet.  But the established state will only see one
832            * side of a two-way packet conversation.  To prevent this from
833            * causing problems (e.g. generating a RST), we force PFSTATE_SLOPPY
834            * to be set on the established state.
835            *
836            * A collision against RDR state can only occur with a PASS IN in the
837            * opposite direction or a PASS OUT in the forwards direction.  This
838            * is because RDRs are processed on the input side.
839            *
840            * A collision against NAT state can only occur with a PASS IN in the
841            * forwards direction or a PASS OUT in the opposite direction.  This
842            * is because NATs are processed on the output side.
843            *
844            * In both situations we need to do a reverse addr/port test because
845            * the PASS IN or PASS OUT only establishes if it doesn't match the
846            * established RDR state in the forwards direction.  The direction
847            * flag has to be ignored (it will be one way for a PASS IN and the
848            * other way for a PASS OUT).
849            *
850            * pf_global_statetbl_lock will be locked shared when testing and
851            * not entering into the global state table.
852            */
853           if (cpu != ncpus &&
854               (cur = RB_FIND(pf_state_rtree,
855                                  (struct pf_state_rtree *)&pf_statetbl[ncpus],
856                                  sk)) != NULL) {
857                     TAILQ_FOREACH(si, &cur->states, entry) {
858                               /*
859                                * NOTE: We must ignore direction mismatches.
860                                */
861                               if (si->s->kif == s->kif) {
862                                         s->state_flags |= PFSTATE_HALF_DUPLEX |
863                                                               PFSTATE_SLOPPY;
864                                         if (pf_status.debug >= PF_DEBUG_MISC) {
865                                                   kprintf(
866                                                       "pf: %s key attach collision "
867                                                       "on %s: ",
868                                                       (idx == PF_SK_WIRE) ?
869                                                       "wire" : "stack",
870                                                       s->kif->pfik_name);
871                                                   pf_print_state_parts(s,
872                                                       (idx == PF_SK_WIRE) ? sk : NULL,
873                                                       (idx == PF_SK_STACK) ? sk : NULL);
874                                                   kprintf("\n");
875                                         }
876                                         break;
877                               }
878                     }
879           }
880 
881           /*
882            * Enter into either the per-cpu or the global state table.
883            *
884            * pf_global_statetbl_lock will be locked exclusively when entering
885            * into the global state table.
886            */
887           if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl[cpu], sk)) != NULL) {
888                     /* key exists. check for same kif, if none, add to key */
889                     TAILQ_FOREACH(si, &cur->states, entry) {
890                               if (si->s->kif == s->kif &&
891                                   si->s->direction == s->direction) {
892                                         if (pf_status.debug >= PF_DEBUG_MISC) {
893                                                   kprintf(
894                                                       "pf: %s key attach failed on %s: ",
895                                                       (idx == PF_SK_WIRE) ?
896                                                       "wire" : "stack",
897                                                       s->kif->pfik_name);
898                                                   pf_print_state_parts(s,
899                                                       (idx == PF_SK_WIRE) ? sk : NULL,
900                                                       (idx == PF_SK_STACK) ? sk : NULL);
901                                                   kprintf("\n");
902                                         }
903                                         kfree(sk, M_PFSTATEKEYPL);
904                                         error = -1;
905                                         goto failed;        /* collision! */
906                               }
907                     }
908                     kfree(sk, M_PFSTATEKEYPL);
909 
910                     s->key[idx] = cur;
911           } else {
912                     s->key[idx] = sk;
913           }
914 
915           if ((si = kmalloc(sizeof(struct pf_state_item),
916                                 M_PFSTATEITEMPL, M_NOWAIT)) == NULL) {
917                     pf_state_key_detach(s, idx);
918                     error = -1;
919                     goto failed;        /* collision! */
920           }
921           si->s = s;
922 
923           /* list is sorted, if-bound states before floating */
924           if (s->kif == pfi_all)
925                     TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry);
926           else
927                     TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry);
928 
929           error = 0;
930 failed:
931           lockmgr(&pf_global_statetbl_lock, LK_RELEASE);
932           return error;
933 }
934 
935 /*
936  * NOTE: Can only be called indirectly via the purge thread with pf_token
937  *         exclusively locked.
938  */
939 void
pf_detach_state(struct pf_state * s)940 pf_detach_state(struct pf_state *s)
941 {
942           if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK])
943                     s->key[PF_SK_WIRE] = NULL;
944 
945           if (s->key[PF_SK_STACK] != NULL)
946                     pf_state_key_detach(s, PF_SK_STACK);
947 
948           if (s->key[PF_SK_WIRE] != NULL)
949                     pf_state_key_detach(s, PF_SK_WIRE);
950 }
951 
952 /*
953  * NOTE: Can only be called indirectly via the purge thread with pf_token
954  *         exclusively locked.
955  */
956 void
pf_state_key_detach(struct pf_state * s,int idx)957 pf_state_key_detach(struct pf_state *s, int idx)
958 {
959           struct pf_state_item          *si;
960           int cpu;
961 
962           /*
963            * PFSTATE_STACK_GLOBAL is set for translations when the translated
964            * address/port is not localized to the same cpu that the untranslated
965            * address/port is on.  The wire pf_state_key is managed on the global
966            * statetbl tree for this case.
967            */
968           if (s->state_flags & PFSTATE_STACK_GLOBAL) {
969                     cpu = ncpus;
970                     lockmgr(&pf_global_statetbl_lock, LK_EXCLUSIVE);
971           } else {
972                     cpu = mycpu->gd_cpuid;
973           }
974 
975           si = TAILQ_FIRST(&s->key[idx]->states);
976           while (si && si->s != s)
977                     si = TAILQ_NEXT(si, entry);
978 
979           if (si) {
980                     TAILQ_REMOVE(&s->key[idx]->states, si, entry);
981                     kfree(si, M_PFSTATEITEMPL);
982           }
983 
984           if (TAILQ_EMPTY(&s->key[idx]->states)) {
985                     RB_REMOVE(pf_state_tree, &pf_statetbl[cpu], s->key[idx]);
986                     if (s->key[idx]->reverse)
987                               s->key[idx]->reverse->reverse = NULL;
988                     if (s->key[idx]->inp)
989                               s->key[idx]->inp->inp_pf_sk = NULL;
990                     kfree(s->key[idx], M_PFSTATEKEYPL);
991           }
992           s->key[idx] = NULL;
993 
994           if (s->state_flags & PFSTATE_STACK_GLOBAL)
995                     lockmgr(&pf_global_statetbl_lock, LK_RELEASE);
996 }
997 
998 struct pf_state_key *
pf_alloc_state_key(int pool_flags)999 pf_alloc_state_key(int pool_flags)
1000 {
1001           struct pf_state_key *sk;
1002 
1003           sk = kmalloc(sizeof(struct pf_state_key), M_PFSTATEKEYPL, pool_flags);
1004           if (sk) {
1005                     TAILQ_INIT(&sk->states);
1006           }
1007           return (sk);
1008 }
1009 
1010 int
pf_state_key_setup(struct pf_pdesc * pd,struct pf_rule * nr,struct pf_state_key ** skw,struct pf_state_key ** sks,struct pf_state_key ** skp,struct pf_state_key ** nkp,struct pf_addr * saddr,struct pf_addr * daddr,u_int16_t sport,u_int16_t dport)1011 pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr,
1012           struct pf_state_key **skw, struct pf_state_key **sks,
1013           struct pf_state_key **skp, struct pf_state_key **nkp,
1014           struct pf_addr *saddr, struct pf_addr *daddr,
1015           u_int16_t sport, u_int16_t dport)
1016 {
1017           KKASSERT((*skp == NULL && *nkp == NULL));
1018 
1019           if ((*skp = pf_alloc_state_key(M_NOWAIT | M_ZERO)) == NULL)
1020                     return (ENOMEM);
1021 
1022           PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af);
1023           PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af);
1024           (*skp)->port[pd->sidx] = sport;
1025           (*skp)->port[pd->didx] = dport;
1026           (*skp)->proto = pd->proto;
1027           (*skp)->af = pd->af;
1028 
1029           if (nr != NULL) {
1030                     if ((*nkp = pf_alloc_state_key(M_NOWAIT | M_ZERO)) == NULL)
1031                               return (ENOMEM); /* caller must handle cleanup */
1032 
1033                     /* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */
1034                     PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af);
1035                     PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af);
1036                     (*nkp)->port[0] = (*skp)->port[0];
1037                     (*nkp)->port[1] = (*skp)->port[1];
1038                     (*nkp)->proto = pd->proto;
1039                     (*nkp)->af = pd->af;
1040           } else {
1041                     *nkp = *skp;
1042           }
1043 
1044           if (pd->dir == PF_IN) {
1045                     *skw = *skp;
1046                     *sks = *nkp;
1047           } else {
1048                     *sks = *skp;
1049                     *skw = *nkp;
1050           }
1051           return (0);
1052 }
1053 
1054 /*
1055  * Insert pf_state with one or two state keys (allowing a reverse path lookup
1056  * which is used by NAT).  In the NAT case skw is the initiator (?) and
1057  * sks is the target.
1058  */
1059 int
pf_state_insert(struct pfi_kif * kif,struct pf_state_key * skw,struct pf_state_key * sks,struct pf_state * s)1060 pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw,
1061                     struct pf_state_key *sks, struct pf_state *s)
1062 {
1063           int cpu = mycpu->gd_cpuid;
1064 
1065           s->kif = kif;
1066           s->cpuid = cpu;
1067 
1068           if (skw == sks) {
1069                     if (pf_state_key_attach(skw, s, PF_SK_WIRE))
1070                               return (-1);
1071                     s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
1072           } else {
1073                     /*
1074                     skw->reverse = sks;
1075                     sks->reverse = skw;
1076                     */
1077                     if (pf_state_key_attach(skw, s, PF_SK_WIRE)) {
1078                               kfree(sks, M_PFSTATEKEYPL);
1079                               return (-1);
1080                     }
1081                     if (pf_state_key_attach(sks, s, PF_SK_STACK)) {
1082                               pf_state_key_detach(s, PF_SK_WIRE);
1083                               return (-1);
1084                     }
1085           }
1086 
1087           if (s->id == 0 && s->creatorid == 0) {
1088                     u_int64_t sid;
1089 
1090                     sid = atomic_fetchadd_long(&pf_status.stateid, 1);
1091                     s->id = htobe64(sid);
1092                     s->creatorid = pf_status.hostid;
1093           }
1094 
1095           /*
1096            * Calculate hash code for altq
1097            */
1098           s->hash = crc32(s->key[PF_SK_WIRE], PF_STATE_KEY_HASH_LENGTH);
1099 
1100           if (RB_INSERT(pf_state_tree_id, &tree_id[cpu], s) != NULL) {
1101                     if (pf_status.debug >= PF_DEBUG_MISC) {
1102                               kprintf("pf: state insert failed: "
1103                                   "id: %016jx creatorid: %08x",
1104                                     (uintmax_t)be64toh(s->id), ntohl(s->creatorid));
1105                               if (s->sync_flags & PFSTATE_FROMSYNC)
1106                                         kprintf(" (from sync)");
1107                               kprintf("\n");
1108                     }
1109                     pf_detach_state(s);
1110                     return (-1);
1111           }
1112           TAILQ_INSERT_TAIL(&state_list[cpu], s, entry_list);
1113           PF_INC_FCOUNTER(FCNT_STATE_INSERT);
1114           atomic_add_int(&pf_status.states, 1);
1115           pfi_kif_ref(kif, PFI_KIF_REF_STATE);
1116           pfsync_insert_state(s);
1117           return (0);
1118 }
1119 
1120 struct pf_state *
pf_find_state_byid(struct pf_state_cmp * key)1121 pf_find_state_byid(struct pf_state_cmp *key)
1122 {
1123           int cpu = mycpu->gd_cpuid;
1124 
1125           PF_INC_FCOUNTER(FCNT_STATE_SEARCH);
1126 
1127           return (RB_FIND(pf_state_tree_id, &tree_id[cpu],
1128                               (struct pf_state *)key));
1129 }
1130 
1131 /*
1132  * WARNING! May return a state structure that was localized to another cpu,
1133  *            destruction is typically protected by the callers pf_token.
1134  *            The element can only be destroyed
1135  */
1136 struct pf_state *
pf_find_state(struct pfi_kif * kif,struct pf_state_key_cmp * key,u_int dir,struct mbuf * m)1137 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir,
1138                 struct mbuf *m)
1139 {
1140           struct pf_state_key *skey = (void *)key;
1141           struct pf_state_key *sk;
1142           struct pf_state_item          *si;
1143           struct pf_state *s;
1144           int cpu = mycpu->gd_cpuid;
1145           int globalstl = 0;
1146 
1147           PF_INC_FCOUNTER(FCNT_STATE_SEARCH);
1148 
1149           if (dir == PF_OUT && m->m_pkthdr.pf.statekey &&
1150               ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse) {
1151                     sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse;
1152           } else {
1153                     sk = RB_FIND(pf_state_tree, &pf_statetbl[cpu], skey);
1154                     if (sk == NULL) {
1155                               lockmgr(&pf_global_statetbl_lock, LK_SHARED);
1156                               sk = RB_FIND(pf_state_tree, &pf_statetbl[ncpus], skey);
1157                               if (sk == NULL) {
1158                                         lockmgr(&pf_global_statetbl_lock, LK_RELEASE);
1159                                         return (NULL);
1160                               }
1161                               globalstl = 1;
1162                     }
1163                     if (dir == PF_OUT && m->m_pkthdr.pf.statekey) {
1164                               ((struct pf_state_key *)
1165                                   m->m_pkthdr.pf.statekey)->reverse = sk;
1166                               sk->reverse = m->m_pkthdr.pf.statekey;
1167                     }
1168           }
1169           if (dir == PF_OUT)
1170                     m->m_pkthdr.pf.statekey = NULL;
1171 
1172           /* list is sorted, if-bound states before floating ones */
1173           TAILQ_FOREACH(si, &sk->states, entry) {
1174                     if ((si->s->kif == pfi_all || si->s->kif == kif) &&
1175                         sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] :
1176                                                     si->s->key[PF_SK_STACK])) {
1177                               break;
1178                     }
1179           }
1180 
1181           /*
1182            * Extract state before potentially releasing the global statetbl
1183            * lock.  Ignore the state if the create is still in-progress as
1184            * it can be deleted out from under us by the owning localized cpu.
1185            * However, if CREATEINPROG is not set, state can only be deleted
1186            * by the purge thread which we are protected from via our shared
1187            * pf_token.
1188            */
1189           if (si) {
1190                     s = si->s;
1191                     if (s && (s->state_flags & PFSTATE_CREATEINPROG))
1192                               s = NULL;
1193           } else {
1194                     s = NULL;
1195           }
1196           if (globalstl)
1197                     lockmgr(&pf_global_statetbl_lock, LK_RELEASE);
1198           return s;
1199 }
1200 
1201 /*
1202  * WARNING! May return a state structure that was localized to another cpu,
1203  *            destruction is typically protected by the callers pf_token.
1204  */
1205 struct pf_state *
pf_find_state_all(struct pf_state_key_cmp * key,u_int dir,int * more)1206 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
1207 {
1208           struct pf_state_key *skey = (void *)key;
1209           struct pf_state_key *sk;
1210           struct pf_state_item          *si, *ret = NULL;
1211           struct pf_state               *s;
1212           int cpu = mycpu->gd_cpuid;
1213           int globalstl = 0;
1214 
1215           PF_INC_FCOUNTER(FCNT_STATE_SEARCH);
1216 
1217           sk = RB_FIND(pf_state_tree, &pf_statetbl[cpu], skey);
1218           if (sk == NULL) {
1219                     lockmgr(&pf_global_statetbl_lock, LK_SHARED);
1220                     sk = RB_FIND(pf_state_tree, &pf_statetbl[ncpus], skey);
1221                     globalstl = 1;
1222           }
1223           if (sk != NULL) {
1224                     TAILQ_FOREACH(si, &sk->states, entry)
1225                               if (dir == PF_INOUT ||
1226                                   (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] :
1227                                   si->s->key[PF_SK_STACK]))) {
1228                                         if (more == NULL) {
1229                                                   ret = si;
1230                                                   break;
1231                                         }
1232                                         if (ret)
1233                                                   (*more)++;
1234                                         else
1235                                                   ret = si;
1236                               }
1237           }
1238 
1239           /*
1240            * Extract state before potentially releasing the global statetbl
1241            * lock.  Ignore the state if the create is still in-progress as
1242            * it can be deleted out from under us by the owning localized cpu.
1243            * However, if CREATEINPROG is not set, state can only be deleted
1244            * by the purge thread which we are protected from via our shared
1245            * pf_token.
1246            */
1247           if (ret) {
1248                     s = ret->s;
1249                     if (s && (s->state_flags & PFSTATE_CREATEINPROG))
1250                               s = NULL;
1251           } else {
1252                     s = NULL;
1253           }
1254           if (globalstl)
1255                     lockmgr(&pf_global_statetbl_lock, LK_RELEASE);
1256           return s;
1257 }
1258 
1259 /* END state table stuff */
1260 
1261 void
pf_purge_thread(void * v)1262 pf_purge_thread(void *v)
1263 {
1264           globaldata_t save_gd = mycpu;
1265           int nloops = 0;
1266           int locked = 0;
1267           int nn;
1268           int endingit;
1269 
1270           for (;;) {
1271                     tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
1272 
1273                     endingit = pf_end_threads;
1274 
1275                     for (nn = 0; nn < ncpus; ++nn) {
1276                               lwkt_setcpu_self(globaldata_find(nn));
1277 
1278                               lwkt_gettoken(&pf_token);
1279                               lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
1280                               crit_enter();
1281 
1282                               /*
1283                                * process a fraction of the state table every second
1284                                */
1285                               if(!pf_purge_expired_states(
1286                                         1 + (pf_status.states /
1287                                              pf_default_rule.timeout[
1288                                                   PFTM_INTERVAL]), 0)) {
1289                                         pf_purge_expired_states(
1290                                                   1 + (pf_status.states /
1291                                                        pf_default_rule.timeout[
1292                                                             PFTM_INTERVAL]), 1);
1293                               }
1294 
1295                               /*
1296                                * purge other expired types every PFTM_INTERVAL
1297                                * seconds
1298                                */
1299                               if (++nloops >=
1300                                   pf_default_rule.timeout[PFTM_INTERVAL]) {
1301                                         pf_purge_expired_fragments();
1302                                         if (!pf_purge_expired_src_nodes(locked)) {
1303                                                   pf_purge_expired_src_nodes(1);
1304                                         }
1305                                         nloops = 0;
1306                               }
1307 
1308                               /*
1309                                * If terminating the thread, clean everything out
1310                                * (on all cpus).
1311                                */
1312                               if (endingit) {
1313                                         pf_purge_expired_states(pf_status.states, 0);
1314                                         pf_purge_expired_fragments();
1315                                         pf_purge_expired_src_nodes(1);
1316                               }
1317 
1318                               crit_exit();
1319                               lockmgr(&pf_consistency_lock, LK_RELEASE);
1320                               lwkt_reltoken(&pf_token);
1321                     }
1322                     lwkt_setcpu_self(save_gd);
1323                     if (endingit)
1324                               break;
1325           }
1326 
1327           /*
1328            * Thread termination
1329            */
1330           pf_end_threads++;
1331           wakeup(pf_purge_thread);
1332           kthread_exit();
1333 }
1334 
1335 u_int32_t
pf_state_expires(const struct pf_state * state)1336 pf_state_expires(const struct pf_state *state)
1337 {
1338           u_int32_t timeout;
1339           u_int32_t start;
1340           u_int32_t end;
1341           u_int32_t states;
1342 
1343           /* handle all PFTM_* > PFTM_MAX here */
1344           if (state->timeout == PFTM_PURGE)
1345                     return (time_second);
1346           if (state->timeout == PFTM_UNTIL_PACKET)
1347                     return (0);
1348           KKASSERT(state->timeout != PFTM_UNLINKED);
1349           KKASSERT(state->timeout < PFTM_MAX);
1350           timeout = state->rule.ptr->timeout[state->timeout];
1351           if (!timeout)
1352                     timeout = pf_default_rule.timeout[state->timeout];
1353           start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1354           if (start) {
1355                     end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1356                     states = state->rule.ptr->states_cur;
1357           } else {
1358                     start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1359                     end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1360                     states = pf_status.states;
1361           }
1362 
1363           /*
1364            * If the number of states exceeds allowed values, adaptively
1365            * timeout the state more quickly.  This can be very dangerous
1366            * to legitimate connections, however, so defray the timeout
1367            * based on the packet count.
1368            *
1369            * Retain from 0-100% based on number of states.
1370            *
1371            * Recover up to 50% of the lost portion if there was
1372            * packet traffic (100 pkts = 50%).
1373            */
1374           if (end && states > start && start < end) {
1375                     u_int32_t n;                            /* timeout retention 0-100% */
1376                     u_int64_t pkts;
1377 #if 0
1378                     static struct krate boorate = { .freq = 1 };
1379 #endif
1380 
1381                     /*
1382                      * Reduce timeout by n% (0-100)
1383                      */
1384                     n = (states - start) * 100 / (end - start);
1385                     if (n > 100)
1386                               n = 0;
1387                     else
1388                               n = 100 - n;
1389 
1390                     /*
1391                      * But claw back some of the reduction based on packet
1392                      * count associated with the state.
1393                      */
1394                     pkts = state->packets[0] + state->packets[1];
1395                     if (pkts > 100)
1396                               pkts = 100;
1397 #if 0
1398                     krateprintf(&boorate, "timeout %-4u n=%u pkts=%-3lu -> %lu\n",
1399                               timeout, n, pkts, n + (100 - n) * pkts / 200);
1400 #endif
1401 
1402                     n += (100 - n) * pkts / 200;  /* recover by up-to 50% */
1403                     timeout = timeout * n / 100;
1404 
1405           }
1406           return (state->expire + timeout);
1407 }
1408 
1409 /*
1410  * (called with exclusive pf_token)
1411  */
1412 int
pf_purge_expired_src_nodes(int waslocked)1413 pf_purge_expired_src_nodes(int waslocked)
1414 {
1415           struct pf_src_node *cur, *next;
1416           int locked = waslocked;
1417           int cpu = mycpu->gd_cpuid;
1418 
1419           for (cur = RB_MIN(pf_src_tree, &tree_src_tracking[cpu]);
1420                cur;
1421                cur = next) {
1422                     next = RB_NEXT(pf_src_tree, &tree_src_tracking[cpu], cur);
1423 
1424                     if (cur->states <= 0 && cur->expire <= time_second) {
1425                                if (!locked) {
1426                                          lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
1427                                          next = RB_NEXT(pf_src_tree,
1428                                              &tree_src_tracking[cpu], cur);
1429                                          locked = 1;
1430                                }
1431                                if (cur->rule.ptr != NULL) {
1432                                         /*
1433                                          * decrements in rule should be ok, token is
1434                                          * held exclusively in this code path.
1435                                          */
1436                                          atomic_add_int(&cur->rule.ptr->src_nodes, -1);
1437                                          if (cur->rule.ptr->states_cur <= 0 &&
1438                                              cur->rule.ptr->max_src_nodes <= 0)
1439                                                    pf_rm_rule(NULL, cur->rule.ptr);
1440                                }
1441                                RB_REMOVE(pf_src_tree, &tree_src_tracking[cpu], cur);
1442                                PF_INC_SCOUNTER(SCNT_SRC_NODE_REMOVALS);
1443                                atomic_add_int(&pf_status.src_nodes, -1);
1444                                kfree(cur, M_PFSRCTREEPL);
1445                     }
1446           }
1447           if (locked && !waslocked)
1448                     lockmgr(&pf_consistency_lock, LK_RELEASE);
1449           return(1);
1450 }
1451 
1452 void
pf_src_tree_remove_state(struct pf_state * s)1453 pf_src_tree_remove_state(struct pf_state *s)
1454 {
1455           u_int32_t timeout;
1456 
1457           if (s->src_node != NULL) {
1458                     if (s->src.tcp_est)
1459                               atomic_add_int(&s->src_node->conn, -1);
1460                     if (--s->src_node->states <= 0) {
1461                               timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1462                               if (!timeout) {
1463                                         timeout =
1464                                             pf_default_rule.timeout[PFTM_SRC_NODE];
1465                               }
1466                               s->src_node->expire = time_second + timeout;
1467                     }
1468           }
1469           if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1470                     if (--s->nat_src_node->states <= 0) {
1471                               timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1472                               if (!timeout)
1473                                         timeout =
1474                                             pf_default_rule.timeout[PFTM_SRC_NODE];
1475                               s->nat_src_node->expire = time_second + timeout;
1476                     }
1477           }
1478           s->src_node = s->nat_src_node = NULL;
1479 }
1480 
1481 /* callers should be at crit_enter() */
1482 void
pf_unlink_state(struct pf_state * cur)1483 pf_unlink_state(struct pf_state *cur)
1484 {
1485           int cpu = mycpu->gd_cpuid;
1486 
1487           if (cur->src.state == PF_TCPS_PROXY_DST) {
1488                     /* XXX wire key the right one? */
1489                     pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af,
1490                         &cur->key[PF_SK_WIRE]->addr[1],
1491                         &cur->key[PF_SK_WIRE]->addr[0],
1492                         cur->key[PF_SK_WIRE]->port[1],
1493                         cur->key[PF_SK_WIRE]->port[0],
1494                         cur->src.seqhi, cur->src.seqlo + 1,
1495                         TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1496           }
1497           RB_REMOVE(pf_state_tree_id, &tree_id[cpu], cur);
1498           if (cur->creatorid == pf_status.hostid)
1499                     pfsync_delete_state(cur);
1500           cur->timeout = PFTM_UNLINKED;
1501           pf_src_tree_remove_state(cur);
1502           pf_detach_state(cur);
1503 }
1504 
1505 /*
1506  * callers should be at crit_enter() and hold pf_consistency_lock exclusively.
1507  * pf_token must also be held exclusively.
1508  */
1509 void
pf_free_state(struct pf_state * cur)1510 pf_free_state(struct pf_state *cur)
1511 {
1512           int cpu = mycpu->gd_cpuid;
1513 
1514           KKASSERT(cur->cpuid == cpu);
1515 
1516           if (pfsyncif != NULL &&
1517               (pfsyncif->sc_bulk_send_next == cur ||
1518               pfsyncif->sc_bulk_terminator == cur))
1519                     return;
1520           KKASSERT(cur->timeout == PFTM_UNLINKED);
1521           /*
1522            * decrements in rule should be ok, token is
1523            * held exclusively in this code path.
1524            */
1525           if (--cur->rule.ptr->states_cur <= 0 &&
1526               cur->rule.ptr->src_nodes <= 0)
1527                     pf_rm_rule(NULL, cur->rule.ptr);
1528           if (cur->nat_rule.ptr != NULL) {
1529                     if (--cur->nat_rule.ptr->states_cur <= 0 &&
1530                               cur->nat_rule.ptr->src_nodes <= 0) {
1531                               pf_rm_rule(NULL, cur->nat_rule.ptr);
1532                     }
1533           }
1534           if (cur->anchor.ptr != NULL) {
1535                     if (--cur->anchor.ptr->states_cur <= 0)
1536                               pf_rm_rule(NULL, cur->anchor.ptr);
1537           }
1538           pf_normalize_tcp_cleanup(cur);
1539           pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
1540 
1541           /*
1542            * We may be freeing pf_purge_expired_states()'s saved scan entry,
1543            * adjust it if necessary.
1544            */
1545           if (purge_cur[cpu] == cur) {
1546                     kprintf("PURGE CONFLICT\n");
1547                     purge_cur[cpu] = TAILQ_NEXT(purge_cur[cpu], entry_list);
1548           }
1549           TAILQ_REMOVE(&state_list[cpu], cur, entry_list);
1550           if (cur->tag)
1551                     pf_tag_unref(cur->tag);
1552           kfree(cur, M_PFSTATEPL);
1553           PF_INC_FCOUNTER(FCNT_STATE_REMOVALS);
1554           atomic_add_int(&pf_status.states, -1);
1555 }
1556 
1557 int
pf_purge_expired_states(u_int32_t maxcheck,int waslocked)1558 pf_purge_expired_states(u_int32_t maxcheck, int waslocked)
1559 {
1560           struct pf_state               *cur;
1561           int locked = waslocked;
1562           int cpu = mycpu->gd_cpuid;
1563 
1564           while (maxcheck--) {
1565                     /*
1566                      * Wrap to start of list when we hit the end
1567                      */
1568                     cur = purge_cur[cpu];
1569                     if (cur == NULL) {
1570                               cur = TAILQ_FIRST(&state_list[cpu]);
1571                               if (cur == NULL)
1572                                         break;    /* list empty */
1573                     }
1574 
1575                     /*
1576                      * Setup next (purge_cur) while we process this one.  If
1577                      * we block and something else deletes purge_cur,
1578                      * pf_free_state() will adjust it further ahead.
1579                      */
1580                     purge_cur[cpu] = TAILQ_NEXT(cur, entry_list);
1581 
1582                     if (cur->timeout == PFTM_UNLINKED) {
1583                               /* free unlinked state */
1584                               if (! locked) {
1585                                         lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
1586                                         locked = 1;
1587                               }
1588                               pf_free_state(cur);
1589                     } else if (pf_state_expires(cur) <= time_second) {
1590                               /* unlink and free expired state */
1591                               pf_unlink_state(cur);
1592                               if (! locked) {
1593                                         if (!lockmgr(&pf_consistency_lock, LK_EXCLUSIVE))
1594                                                   return (0);
1595                                         locked = 1;
1596                               }
1597                               pf_free_state(cur);
1598                     }
1599           }
1600 
1601           if (locked)
1602                     lockmgr(&pf_consistency_lock, LK_RELEASE);
1603           return (1);
1604 }
1605 
1606 int
pf_tbladdr_setup(struct pf_ruleset * rs,struct pf_addr_wrap * aw)1607 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1608 {
1609           if (aw->type != PF_ADDR_TABLE)
1610                     return (0);
1611           if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
1612                     return (1);
1613           return (0);
1614 }
1615 
1616 void
pf_tbladdr_remove(struct pf_addr_wrap * aw)1617 pf_tbladdr_remove(struct pf_addr_wrap *aw)
1618 {
1619           if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
1620                     return;
1621           pfr_detach_table(aw->p.tbl);
1622           aw->p.tbl = NULL;
1623 }
1624 
1625 void
pf_tbladdr_copyout(struct pf_addr_wrap * aw)1626 pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1627 {
1628           struct pfr_ktable *kt = aw->p.tbl;
1629 
1630           if (aw->type != PF_ADDR_TABLE || kt == NULL)
1631                     return;
1632           if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
1633                     kt = kt->pfrkt_root;
1634           aw->p.tbl = NULL;
1635           aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1636                     kt->pfrkt_cnt : -1;
1637 }
1638 
1639 void
pf_print_host(struct pf_addr * addr,u_int16_t p,sa_family_t af)1640 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1641 {
1642           switch (af) {
1643 #ifdef INET
1644           case AF_INET: {
1645                     u_int32_t a = ntohl(addr->addr32[0]);
1646                     kprintf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
1647                         (a>>8)&255, a&255);
1648                     if (p) {
1649                               p = ntohs(p);
1650                               kprintf(":%u", p);
1651                     }
1652                     break;
1653           }
1654 #endif /* INET */
1655 #ifdef INET6
1656           case AF_INET6: {
1657                     u_int16_t b;
1658                     u_int8_t i, curstart, curend, maxstart, maxend;
1659                     curstart = curend = maxstart = maxend = 255;
1660                     for (i = 0; i < 8; i++) {
1661                               if (!addr->addr16[i]) {
1662                                         if (curstart == 255)
1663                                                   curstart = i;
1664                                         curend = i;
1665                               } else {
1666                                         if ((curend - curstart) >
1667                                             (maxend - maxstart)) {
1668                                                   maxstart = curstart;
1669                                                   maxend = curend;
1670                                         }
1671                                         curstart = curend = 255;
1672                               }
1673                     }
1674                     if ((curend - curstart) >
1675                         (maxend - maxstart)) {
1676                               maxstart = curstart;
1677                               maxend = curend;
1678                     }
1679                     for (i = 0; i < 8; i++) {
1680                               if (i >= maxstart && i <= maxend) {
1681                                         if (i == 0)
1682                                                   kprintf(":");
1683                                         if (i == maxend)
1684                                                   kprintf(":");
1685                               } else {
1686                                         b = ntohs(addr->addr16[i]);
1687                                         kprintf("%x", b);
1688                                         if (i < 7)
1689                                                   kprintf(":");
1690                               }
1691                     }
1692                     if (p) {
1693                               p = ntohs(p);
1694                               kprintf("[%u]", p);
1695                     }
1696                     break;
1697           }
1698 #endif /* INET6 */
1699           }
1700 }
1701 
1702 void
pf_print_state(struct pf_state * s)1703 pf_print_state(struct pf_state *s)
1704 {
1705           pf_print_state_parts(s, NULL, NULL);
1706 }
1707 
1708 void
pf_print_state_parts(struct pf_state * s,struct pf_state_key * skwp,struct pf_state_key * sksp)1709 pf_print_state_parts(struct pf_state *s,
1710     struct pf_state_key *skwp, struct pf_state_key *sksp)
1711 {
1712           struct pf_state_key *skw, *sks;
1713           u_int8_t proto, dir;
1714 
1715           /* Do our best to fill these, but they're skipped if NULL */
1716           skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
1717           sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
1718           proto = skw ? skw->proto : (sks ? sks->proto : 0);
1719           dir = s ? s->direction : 0;
1720 
1721           switch (proto) {
1722           case IPPROTO_TCP:
1723                     kprintf("TCP ");
1724                     break;
1725           case IPPROTO_UDP:
1726                     kprintf("UDP ");
1727                     break;
1728           case IPPROTO_ICMP:
1729                     kprintf("ICMP ");
1730                     break;
1731           case IPPROTO_ICMPV6:
1732                     kprintf("ICMPV6 ");
1733                     break;
1734           default:
1735                     kprintf("%u ", skw->proto);
1736                     break;
1737           }
1738           switch (dir) {
1739           case PF_IN:
1740                     kprintf(" in");
1741                     break;
1742           case PF_OUT:
1743                     kprintf(" out");
1744                     break;
1745           }
1746           if (skw) {
1747                     kprintf(" wire: ");
1748                     pf_print_host(&skw->addr[0], skw->port[0], skw->af);
1749                     kprintf(" ");
1750                     pf_print_host(&skw->addr[1], skw->port[1], skw->af);
1751           }
1752           if (sks) {
1753                     kprintf(" stack: ");
1754                     if (sks != skw) {
1755                               pf_print_host(&sks->addr[0], sks->port[0], sks->af);
1756                               kprintf(" ");
1757                               pf_print_host(&sks->addr[1], sks->port[1], sks->af);
1758                     } else
1759                               kprintf("-");
1760           }
1761           if (s) {
1762                     if (proto == IPPROTO_TCP) {
1763                               kprintf(" [lo=%u high=%u win=%u modulator=%u",
1764                                   s->src.seqlo, s->src.seqhi,
1765                                   s->src.max_win, s->src.seqdiff);
1766                               if (s->src.wscale && s->dst.wscale)
1767                                         kprintf(" wscale=%u",
1768                                             s->src.wscale & PF_WSCALE_MASK);
1769                               kprintf("]");
1770                               kprintf(" [lo=%u high=%u win=%u modulator=%u",
1771                                   s->dst.seqlo, s->dst.seqhi,
1772                                   s->dst.max_win, s->dst.seqdiff);
1773                               if (s->src.wscale && s->dst.wscale)
1774                                         kprintf(" wscale=%u",
1775                                         s->dst.wscale & PF_WSCALE_MASK);
1776                               kprintf("]");
1777                     }
1778                     kprintf(" %u:%u", s->src.state, s->dst.state);
1779           }
1780 }
1781 
1782 void
pf_print_flags(u_int8_t f)1783 pf_print_flags(u_int8_t f)
1784 {
1785           if (f)
1786                     kprintf(" ");
1787           if (f & TH_FIN)
1788                     kprintf("F");
1789           if (f & TH_SYN)
1790                     kprintf("S");
1791           if (f & TH_RST)
1792                     kprintf("R");
1793           if (f & TH_PUSH)
1794                     kprintf("P");
1795           if (f & TH_ACK)
1796                     kprintf("A");
1797           if (f & TH_URG)
1798                     kprintf("U");
1799           if (f & TH_ECE)
1800                     kprintf("E");
1801           if (f & TH_CWR)
1802                     kprintf("W");
1803 }
1804 
1805 #define   PF_SET_SKIP_STEPS(i)                                                  \
1806           do {                                                                  \
1807                     while (head[i] != cur) {                          \
1808                               head[i]->skip[i].ptr = cur;             \
1809                               head[i] = TAILQ_NEXT(head[i], entries); \
1810                     }                                                           \
1811           } while (0)
1812 
1813 void
pf_calc_skip_steps(struct pf_rulequeue * rules)1814 pf_calc_skip_steps(struct pf_rulequeue *rules)
1815 {
1816           struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1817           int i;
1818 
1819           cur = TAILQ_FIRST(rules);
1820           prev = cur;
1821           for (i = 0; i < PF_SKIP_COUNT; ++i)
1822                     head[i] = cur;
1823           while (cur != NULL) {
1824 
1825                     if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1826                               PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1827                     if (cur->direction != prev->direction)
1828                               PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1829                     if (cur->af != prev->af)
1830                               PF_SET_SKIP_STEPS(PF_SKIP_AF);
1831                     if (cur->proto != prev->proto)
1832                               PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1833                     if (cur->src.neg != prev->src.neg ||
1834                         pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1835                               PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1836                     if (cur->src.port[0] != prev->src.port[0] ||
1837                         cur->src.port[1] != prev->src.port[1] ||
1838                         cur->src.port_op != prev->src.port_op)
1839                               PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1840                     if (cur->dst.neg != prev->dst.neg ||
1841                         pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1842                               PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1843                     if (cur->dst.port[0] != prev->dst.port[0] ||
1844                         cur->dst.port[1] != prev->dst.port[1] ||
1845                         cur->dst.port_op != prev->dst.port_op)
1846                               PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1847 
1848                     prev = cur;
1849                     cur = TAILQ_NEXT(cur, entries);
1850           }
1851           for (i = 0; i < PF_SKIP_COUNT; ++i)
1852                     PF_SET_SKIP_STEPS(i);
1853 }
1854 
1855 int
pf_addr_wrap_neq(struct pf_addr_wrap * aw1,struct pf_addr_wrap * aw2)1856 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1857 {
1858           if (aw1->type != aw2->type)
1859                     return (1);
1860           switch (aw1->type) {
1861           case PF_ADDR_ADDRMASK:
1862           case PF_ADDR_RANGE:
1863                     if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6))
1864                               return (1);
1865                     if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6))
1866                               return (1);
1867                     return (0);
1868           case PF_ADDR_DYNIFTL:
1869                     return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1870           case PF_ADDR_NOROUTE:
1871           case PF_ADDR_URPFFAILED:
1872                     return (0);
1873           case PF_ADDR_TABLE:
1874                     return (aw1->p.tbl != aw2->p.tbl);
1875           case PF_ADDR_RTLABEL:
1876                     return (aw1->v.rtlabel != aw2->v.rtlabel);
1877           default:
1878                     kprintf("invalid address type: %d\n", aw1->type);
1879                     return (1);
1880           }
1881 }
1882 
1883 u_int16_t
pf_cksum_fixup(u_int16_t cksum,u_int16_t old,u_int16_t new,u_int8_t udp)1884 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1885 {
1886           u_int32_t l;
1887 
1888           if (udp && !cksum)
1889                     return (0x0000);
1890           l = cksum + old - new;
1891           l = (l >> 16) + (l & 65535);
1892           l = l & 65535;
1893           if (udp && !l)
1894                     return (0xFFFF);
1895           return (l);
1896 }
1897 
1898 void
pf_change_ap(struct pf_addr * a,u_int16_t * p,u_int16_t * ic,u_int16_t * pc,struct pf_addr * an,u_int16_t pn,u_int8_t u,sa_family_t af)1899 pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1900     struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1901 {
1902           struct pf_addr      ao;
1903           u_int16_t po = *p;
1904 
1905           PF_ACPY(&ao, a, af);
1906           PF_ACPY(a, an, af);
1907 
1908           *p = pn;
1909 
1910           switch (af) {
1911 #ifdef INET
1912           case AF_INET:
1913                     *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1914                         ao.addr16[0], an->addr16[0], 0),
1915                         ao.addr16[1], an->addr16[1], 0);
1916                     *p = pn;
1917                     *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1918                         ao.addr16[0], an->addr16[0], u),
1919                         ao.addr16[1], an->addr16[1], u),
1920                         po, pn, u);
1921                     break;
1922 #endif /* INET */
1923 #ifdef INET6
1924           case AF_INET6:
1925                     *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1926                         pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1927                         pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1928                         ao.addr16[0], an->addr16[0], u),
1929                         ao.addr16[1], an->addr16[1], u),
1930                         ao.addr16[2], an->addr16[2], u),
1931                         ao.addr16[3], an->addr16[3], u),
1932                         ao.addr16[4], an->addr16[4], u),
1933                         ao.addr16[5], an->addr16[5], u),
1934                         ao.addr16[6], an->addr16[6], u),
1935                         ao.addr16[7], an->addr16[7], u),
1936                         po, pn, u);
1937                     break;
1938 #endif /* INET6 */
1939           }
1940 }
1941 
1942 
1943 /* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
1944 void
pf_change_a(void * a,u_int16_t * c,u_int32_t an,u_int8_t u)1945 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1946 {
1947           u_int32_t ao;
1948 
1949           memcpy(&ao, a, sizeof(ao));
1950           memcpy(a, &an, sizeof(u_int32_t));
1951           *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1952               ao % 65536, an % 65536, u);
1953 }
1954 
1955 #ifdef INET6
1956 void
pf_change_a6(struct pf_addr * a,u_int16_t * c,struct pf_addr * an,u_int8_t u)1957 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1958 {
1959           struct pf_addr      ao;
1960 
1961           PF_ACPY(&ao, a, AF_INET6);
1962           PF_ACPY(a, an, AF_INET6);
1963 
1964           *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1965               pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1966               pf_cksum_fixup(pf_cksum_fixup(*c,
1967               ao.addr16[0], an->addr16[0], u),
1968               ao.addr16[1], an->addr16[1], u),
1969               ao.addr16[2], an->addr16[2], u),
1970               ao.addr16[3], an->addr16[3], u),
1971               ao.addr16[4], an->addr16[4], u),
1972               ao.addr16[5], an->addr16[5], u),
1973               ao.addr16[6], an->addr16[6], u),
1974               ao.addr16[7], an->addr16[7], u);
1975 }
1976 #endif /* INET6 */
1977 
1978 void
pf_change_icmp(struct pf_addr * ia,u_int16_t * ip,struct pf_addr * oa,struct pf_addr * na,u_int16_t np,u_int16_t * pc,u_int16_t * h2c,u_int16_t * ic,u_int16_t * hc,u_int8_t u,sa_family_t af)1979 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1980     struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1981     u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1982 {
1983           struct pf_addr      oia, ooa;
1984 
1985           PF_ACPY(&oia, ia, af);
1986           if (oa)
1987                     PF_ACPY(&ooa, oa, af);
1988 
1989           /* Change inner protocol port, fix inner protocol checksum. */
1990           if (ip != NULL) {
1991                     u_int16_t oip = *ip;
1992                     u_int32_t opc = 0;
1993 
1994                     if (pc != NULL)
1995                               opc = *pc;
1996                     *ip = np;
1997                     if (pc != NULL)
1998                               *pc = pf_cksum_fixup(*pc, oip, *ip, u);
1999                     *ic = pf_cksum_fixup(*ic, oip, *ip, 0);
2000                     if (pc != NULL)
2001                               *ic = pf_cksum_fixup(*ic, opc, *pc, 0);
2002           }
2003           /* Change inner ip address, fix inner ip and icmp checksums. */
2004           PF_ACPY(ia, na, af);
2005           switch (af) {
2006 #ifdef INET
2007           case AF_INET: {
2008                     u_int32_t  oh2c = *h2c;
2009 
2010                     *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
2011                         oia.addr16[0], ia->addr16[0], 0),
2012                         oia.addr16[1], ia->addr16[1], 0);
2013                     *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2014                         oia.addr16[0], ia->addr16[0], 0),
2015                         oia.addr16[1], ia->addr16[1], 0);
2016                     *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
2017                     break;
2018           }
2019 #endif /* INET */
2020 #ifdef INET6
2021           case AF_INET6:
2022                     *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2023                         pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2024                         pf_cksum_fixup(pf_cksum_fixup(*ic,
2025                         oia.addr16[0], ia->addr16[0], u),
2026                         oia.addr16[1], ia->addr16[1], u),
2027                         oia.addr16[2], ia->addr16[2], u),
2028                         oia.addr16[3], ia->addr16[3], u),
2029                         oia.addr16[4], ia->addr16[4], u),
2030                         oia.addr16[5], ia->addr16[5], u),
2031                         oia.addr16[6], ia->addr16[6], u),
2032                         oia.addr16[7], ia->addr16[7], u);
2033                     break;
2034 #endif /* INET6 */
2035           }
2036           /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
2037           if (oa) {
2038                     PF_ACPY(oa, na, af);
2039                     switch (af) {
2040 #ifdef INET
2041                     case AF_INET:
2042                               *hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
2043                                   ooa.addr16[0], oa->addr16[0], 0),
2044                                   ooa.addr16[1], oa->addr16[1], 0);
2045                               break;
2046 #endif /* INET */
2047 #ifdef INET6
2048                     case AF_INET6:
2049                               *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2050                                   pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2051                                   pf_cksum_fixup(pf_cksum_fixup(*ic,
2052                                   ooa.addr16[0], oa->addr16[0], u),
2053                                   ooa.addr16[1], oa->addr16[1], u),
2054                                   ooa.addr16[2], oa->addr16[2], u),
2055                                   ooa.addr16[3], oa->addr16[3], u),
2056                                   ooa.addr16[4], oa->addr16[4], u),
2057                                   ooa.addr16[5], oa->addr16[5], u),
2058                                   ooa.addr16[6], oa->addr16[6], u),
2059                                   ooa.addr16[7], oa->addr16[7], u);
2060                               break;
2061 #endif /* INET6 */
2062                     }
2063           }
2064 }
2065 
2066 
2067 /*
2068  * Need to modulate the sequence numbers in the TCP SACK option
2069  * (credits to Krzysztof Pfaff for report and patch)
2070  */
2071 int
pf_modulate_sack(struct mbuf * m,int off,struct pf_pdesc * pd,struct tcphdr * th,struct pf_state_peer * dst)2072 pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
2073     struct tcphdr *th, struct pf_state_peer *dst)
2074 {
2075           int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
2076           u_int8_t opts[TCP_MAXOLEN], *opt = opts;
2077           int copyback = 0, i, olen;
2078           struct raw_sackblock sack;
2079 
2080 #define TCPOLEN_SACKLEN       (TCPOLEN_SACK + 2)
2081           if (hlen < TCPOLEN_SACKLEN ||
2082               !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
2083                     return 0;
2084 
2085           while (hlen >= TCPOLEN_SACKLEN) {
2086                     olen = opt[1];
2087                     switch (*opt) {
2088                     case TCPOPT_EOL:    /* FALLTHROUGH */
2089                     case TCPOPT_NOP:
2090                               opt++;
2091                               hlen--;
2092                               break;
2093                     case TCPOPT_SACK:
2094                               if (olen > hlen)
2095                                         olen = hlen;
2096                               if (olen >= TCPOLEN_SACKLEN) {
2097                                         for (i = 2; i + TCPOLEN_SACK <= olen;
2098                                             i += TCPOLEN_SACK) {
2099                                                   memcpy(&sack, &opt[i], sizeof(sack));
2100                                                   pf_change_a(&sack.rblk_start, &th->th_sum,
2101                                                       htonl(ntohl(sack.rblk_start) -
2102                                                       dst->seqdiff), 0);
2103                                                   pf_change_a(&sack.rblk_end, &th->th_sum,
2104                                                       htonl(ntohl(sack.rblk_end) -
2105                                                       dst->seqdiff), 0);
2106                                                   memcpy(&opt[i], &sack, sizeof(sack));
2107                                         }
2108                                         copyback = 1;
2109                               }
2110                               /* FALLTHROUGH */
2111                     default:
2112                               if (olen < 2)
2113                                         olen = 2;
2114                               hlen -= olen;
2115                               opt += olen;
2116                     }
2117           }
2118 
2119           if (copyback)
2120                     m_copyback(m, off + sizeof(*th), thoptlen, opts);
2121           return (copyback);
2122 }
2123 
2124 void
pf_send_tcp(const struct pf_rule * r,sa_family_t af,const struct pf_addr * saddr,const struct pf_addr * daddr,u_int16_t sport,u_int16_t dport,u_int32_t seq,u_int32_t ack,u_int8_t flags,u_int16_t win,u_int16_t mss,u_int8_t ttl,int tag,u_int16_t rtag,struct ether_header * eh,struct ifnet * ifp)2125 pf_send_tcp(const struct pf_rule *r, sa_family_t af,
2126     const struct pf_addr *saddr, const struct pf_addr *daddr,
2127     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
2128     u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
2129     u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
2130 {
2131           struct mbuf         *m;
2132           int                  len = 0, tlen;
2133 #ifdef INET
2134           struct ip *h = NULL;
2135 #endif /* INET */
2136 #ifdef INET6
2137           struct ip6_hdr      *h6 = NULL;
2138 #endif /* INET6 */
2139           struct tcphdr       *th = NULL;
2140           char                *opt;
2141 
2142           ASSERT_LWKT_TOKEN_HELD(&pf_token);
2143 
2144           /* maximum segment size tcp option */
2145           tlen = sizeof(struct tcphdr);
2146           if (mss)
2147                     tlen += 4;
2148 
2149           switch (af) {
2150 #ifdef INET
2151           case AF_INET:
2152                     len = sizeof(struct ip) + tlen;
2153                     break;
2154 #endif /* INET */
2155 #ifdef INET6
2156           case AF_INET6:
2157                     len = sizeof(struct ip6_hdr) + tlen;
2158                     break;
2159 #endif /* INET6 */
2160           }
2161 
2162           /*
2163            * Create outgoing mbuf.
2164            *
2165            * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
2166            * so make sure pf.flags is clear.
2167            */
2168           m = m_gethdr(M_NOWAIT, MT_HEADER);
2169           if (m == NULL) {
2170                     return;
2171           }
2172           if (tag)
2173                     m->m_pkthdr.fw_flags |= PF_MBUF_TAGGED;
2174           m->m_pkthdr.pf.flags = 0;
2175           m->m_pkthdr.pf.tag = rtag;
2176           /* XXX Recheck when upgrading to > 4.4 */
2177           m->m_pkthdr.pf.statekey = NULL;
2178           if (r != NULL && r->rtableid >= 0)
2179                     m->m_pkthdr.pf.rtableid = r->rtableid;
2180 
2181 #ifdef ALTQ
2182           if (r != NULL && r->qid) {
2183                     m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE;
2184                     m->m_pkthdr.pf.qid = r->qid;
2185                     m->m_pkthdr.pf.ecn_af = af;
2186                     m->m_pkthdr.pf.hdr = mtod(m, struct ip *);
2187           }
2188 #endif /* ALTQ */
2189           m->m_data += max_linkhdr;
2190           m->m_pkthdr.len = m->m_len = len;
2191           m->m_pkthdr.rcvif = NULL;
2192           bzero(m->m_data, len);
2193           switch (af) {
2194 #ifdef INET
2195           case AF_INET:
2196                     h = mtod(m, struct ip *);
2197 
2198                     /* IP header fields included in the TCP checksum */
2199                     h->ip_p = IPPROTO_TCP;
2200                     h->ip_len = htons(tlen);
2201                     h->ip_src.s_addr = saddr->v4.s_addr;
2202                     h->ip_dst.s_addr = daddr->v4.s_addr;
2203 
2204                     th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
2205                     break;
2206 #endif /* INET */
2207 #ifdef INET6
2208           case AF_INET6:
2209                     h6 = mtod(m, struct ip6_hdr *);
2210 
2211                     /* IP header fields included in the TCP checksum */
2212                     h6->ip6_nxt = IPPROTO_TCP;
2213                     h6->ip6_plen = htons(tlen);
2214                     memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
2215                     memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
2216 
2217                     th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
2218                     break;
2219 #endif /* INET6 */
2220           }
2221 
2222           /* TCP header */
2223           th->th_sport = sport;
2224           th->th_dport = dport;
2225           th->th_seq = htonl(seq);
2226           th->th_ack = htonl(ack);
2227           th->th_off = tlen >> 2;
2228           th->th_flags = flags;
2229           th->th_win = htons(win);
2230 
2231           if (mss) {
2232                     opt = (char *)(th + 1);
2233                     opt[0] = TCPOPT_MAXSEG;
2234                     opt[1] = 4;
2235                     mss = htons(mss);
2236                     bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
2237           }
2238 
2239           switch (af) {
2240 #ifdef INET
2241           case AF_INET:
2242                     /* TCP checksum */
2243                     th->th_sum = in_cksum(m, len);
2244 
2245                     /* Finish the IP header */
2246                     h->ip_v = 4;
2247                     h->ip_hl = sizeof(*h) >> 2;
2248                     h->ip_tos = IPTOS_LOWDELAY;
2249                     h->ip_len = htons(len);
2250                     h->ip_off = path_mtu_discovery ? htons(IP_DF) : 0;
2251                     h->ip_ttl = ttl ? ttl : ip_defttl;
2252                     h->ip_sum = 0;
2253                     if (eh == NULL) {
2254                               lwkt_reltoken(&pf_token);
2255                               ip_output(m, NULL, NULL, 0, NULL, NULL);
2256                               lwkt_gettoken(&pf_token);
2257                     } else {
2258                               struct route                   ro;
2259                               struct rtentry                 rt;
2260                               struct ether_header *e = (void *)ro.ro_dst.sa_data;
2261 
2262                               if (ifp == NULL) {
2263                                         m_freem(m);
2264                                         return;
2265                               }
2266                               rt.rt_ifp = ifp;
2267                               ro.ro_rt = &rt;
2268                               ro.ro_dst.sa_len = sizeof(ro.ro_dst);
2269                               ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
2270                               bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
2271                               bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
2272                               e->ether_type = eh->ether_type;
2273                               /* XXX_IMPORT: later */
2274                               lwkt_reltoken(&pf_token);
2275                               ip_output(m, NULL, &ro, 0, NULL, NULL);
2276                               lwkt_gettoken(&pf_token);
2277                     }
2278                     break;
2279 #endif /* INET */
2280 #ifdef INET6
2281           case AF_INET6:
2282                     /* TCP checksum */
2283                     th->th_sum = in6_cksum(m, IPPROTO_TCP,
2284                         sizeof(struct ip6_hdr), tlen);
2285 
2286                     h6->ip6_vfc |= IPV6_VERSION;
2287                     h6->ip6_hlim = IPV6_DEFHLIM;
2288 
2289                     lwkt_reltoken(&pf_token);
2290                     ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
2291                     lwkt_gettoken(&pf_token);
2292                     break;
2293 #endif /* INET6 */
2294           }
2295 }
2296 
2297 void
pf_send_icmp(struct mbuf * m,u_int8_t type,u_int8_t code,sa_family_t af,struct pf_rule * r)2298 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
2299     struct pf_rule *r)
2300 {
2301           struct mbuf         *m0;
2302 
2303           /*
2304            * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
2305            * so make sure pf.flags is clear.
2306            */
2307           if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL)
2308                     return;
2309 
2310           m0->m_pkthdr.fw_flags |= PF_MBUF_TAGGED;
2311           m0->m_pkthdr.pf.flags = 0;
2312           /* XXX Re-Check when Upgrading to > 4.4 */
2313           m0->m_pkthdr.pf.statekey = NULL;
2314 
2315           if (r->rtableid >= 0)
2316                     m0->m_pkthdr.pf.rtableid = r->rtableid;
2317 
2318 #ifdef ALTQ
2319           if (r->qid) {
2320                     m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE;
2321                     m0->m_pkthdr.pf.qid = r->qid;
2322                     m0->m_pkthdr.pf.ecn_af = af;
2323                     m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *);
2324           }
2325 #endif /* ALTQ */
2326 
2327           switch (af) {
2328 #ifdef INET
2329           case AF_INET:
2330                     icmp_error(m0, type, code, 0, 0);
2331                     break;
2332 #endif /* INET */
2333 #ifdef INET6
2334           case AF_INET6:
2335                     icmp6_error(m0, type, code, 0);
2336                     break;
2337 #endif /* INET6 */
2338           }
2339 }
2340 
2341 /*
2342  * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
2343  * If n is 0, they match if they are equal. If n is != 0, they match if they
2344  * are different.
2345  */
2346 int
pf_match_addr(u_int8_t n,struct pf_addr * a,struct pf_addr * m,struct pf_addr * b,sa_family_t af)2347 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
2348     struct pf_addr *b, sa_family_t af)
2349 {
2350           int       match = 0;
2351 
2352           switch (af) {
2353 #ifdef INET
2354           case AF_INET:
2355                     if ((a->addr32[0] & m->addr32[0]) ==
2356                         (b->addr32[0] & m->addr32[0]))
2357                               match++;
2358                     break;
2359 #endif /* INET */
2360 #ifdef INET6
2361           case AF_INET6:
2362                     if (((a->addr32[0] & m->addr32[0]) ==
2363                          (b->addr32[0] & m->addr32[0])) &&
2364                         ((a->addr32[1] & m->addr32[1]) ==
2365                          (b->addr32[1] & m->addr32[1])) &&
2366                         ((a->addr32[2] & m->addr32[2]) ==
2367                          (b->addr32[2] & m->addr32[2])) &&
2368                         ((a->addr32[3] & m->addr32[3]) ==
2369                          (b->addr32[3] & m->addr32[3])))
2370                               match++;
2371                     break;
2372 #endif /* INET6 */
2373           }
2374           if (match) {
2375                     if (n)
2376                               return (0);
2377                     else
2378                               return (1);
2379           } else {
2380                     if (n)
2381                               return (1);
2382                     else
2383                               return (0);
2384           }
2385 }
2386 
2387 /*
2388  * Return 1 if b <= a <= e, otherwise return 0.
2389  */
2390 int
pf_match_addr_range(struct pf_addr * b,struct pf_addr * e,struct pf_addr * a,sa_family_t af)2391 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
2392     struct pf_addr *a, sa_family_t af)
2393 {
2394           switch (af) {
2395 #ifdef INET
2396           case AF_INET:
2397                     if ((a->addr32[0] < b->addr32[0]) ||
2398                         (a->addr32[0] > e->addr32[0]))
2399                               return (0);
2400                     break;
2401 #endif /* INET */
2402 #ifdef INET6
2403           case AF_INET6: {
2404                     int       i;
2405 
2406                     /* check a >= b */
2407                     for (i = 0; i < 4; ++i)
2408                               if (a->addr32[i] > b->addr32[i])
2409                                         break;
2410                               else if (a->addr32[i] < b->addr32[i])
2411                                         return (0);
2412                     /* check a <= e */
2413                     for (i = 0; i < 4; ++i)
2414                               if (a->addr32[i] < e->addr32[i])
2415                                         break;
2416                               else if (a->addr32[i] > e->addr32[i])
2417                                         return (0);
2418                     break;
2419           }
2420 #endif /* INET6 */
2421           }
2422           return (1);
2423 }
2424 
2425 int
pf_match(u_int8_t op,u_int32_t a1,u_int32_t a2,u_int32_t p)2426 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
2427 {
2428           switch (op) {
2429           case PF_OP_IRG:
2430                     return ((p > a1) && (p < a2));
2431           case PF_OP_XRG:
2432                     return ((p < a1) || (p > a2));
2433           case PF_OP_RRG:
2434                     return ((p >= a1) && (p <= a2));
2435           case PF_OP_EQ:
2436                     return (p == a1);
2437           case PF_OP_NE:
2438                     return (p != a1);
2439           case PF_OP_LT:
2440                     return (p < a1);
2441           case PF_OP_LE:
2442                     return (p <= a1);
2443           case PF_OP_GT:
2444                     return (p > a1);
2445           case PF_OP_GE:
2446                     return (p >= a1);
2447           }
2448           return (0); /* never reached */
2449 }
2450 
2451 int
pf_match_port(u_int8_t op,u_int16_t a1,u_int16_t a2,u_int16_t p)2452 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
2453 {
2454           a1 = ntohs(a1);
2455           a2 = ntohs(a2);
2456           p = ntohs(p);
2457           return (pf_match(op, a1, a2, p));
2458 }
2459 
2460 int
pf_match_uid(u_int8_t op,uid_t a1,uid_t a2,uid_t u)2461 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
2462 {
2463           if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2464                     return (0);
2465           return (pf_match(op, a1, a2, u));
2466 }
2467 
2468 int
pf_match_gid(u_int8_t op,gid_t a1,gid_t a2,gid_t g)2469 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
2470 {
2471           if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2472                     return (0);
2473           return (pf_match(op, a1, a2, g));
2474 }
2475 
2476 int
pf_match_tag(struct mbuf * m,struct pf_rule * r,int * tag)2477 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag)
2478 {
2479           if (*tag == -1)
2480                     *tag = m->m_pkthdr.pf.tag;
2481 
2482           return ((!r->match_tag_not && r->match_tag == *tag) ||
2483               (r->match_tag_not && r->match_tag != *tag));
2484 }
2485 
2486 int
pf_tag_packet(struct mbuf * m,int tag,int rtableid)2487 pf_tag_packet(struct mbuf *m, int tag, int rtableid)
2488 {
2489           if (tag <= 0 && rtableid < 0)
2490                     return (0);
2491 
2492           if (tag > 0)
2493                     m->m_pkthdr.pf.tag = tag;
2494           if (rtableid >= 0)
2495                     m->m_pkthdr.pf.rtableid = rtableid;
2496 
2497           return (0);
2498 }
2499 
2500 void
pf_step_into_anchor(int * depth,struct pf_ruleset ** rs,int n,struct pf_rule ** r,struct pf_rule ** a,int * match)2501 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
2502     struct pf_rule **r, struct pf_rule **a, int *match)
2503 {
2504           struct pf_anchor_stackframe   *f;
2505 
2506           (*r)->anchor->match = 0;
2507           if (match)
2508                     *match = 0;
2509           if (*depth >= NELEM(pf_anchor_stack)) {
2510                     kprintf("pf_step_into_anchor: stack overflow\n");
2511                     *r = TAILQ_NEXT(*r, entries);
2512                     return;
2513           } else if (*depth == 0 && a != NULL)
2514                     *a = *r;
2515           f = pf_anchor_stack + (*depth)++;
2516           f->rs = *rs;
2517           f->r = *r;
2518           if ((*r)->anchor_wildcard) {
2519                     f->parent = &(*r)->anchor->children;
2520                     if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
2521                         NULL) {
2522                               *r = NULL;
2523                               return;
2524                     }
2525                     *rs = &f->child->ruleset;
2526           } else {
2527                     f->parent = NULL;
2528                     f->child = NULL;
2529                     *rs = &(*r)->anchor->ruleset;
2530           }
2531           *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2532 }
2533 
2534 int
pf_step_out_of_anchor(int * depth,struct pf_ruleset ** rs,int n,struct pf_rule ** r,struct pf_rule ** a,int * match)2535 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
2536     struct pf_rule **r, struct pf_rule **a, int *match)
2537 {
2538           struct pf_anchor_stackframe   *f;
2539           int quick = 0;
2540 
2541           do {
2542                     if (*depth <= 0)
2543                               break;
2544                     f = pf_anchor_stack + *depth - 1;
2545                     if (f->parent != NULL && f->child != NULL) {
2546                               if (f->child->match ||
2547                                   (match != NULL && *match)) {
2548                                         f->r->anchor->match = 1;
2549                                         *match = 0;
2550                               }
2551                               f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
2552                               if (f->child != NULL) {
2553                                         *rs = &f->child->ruleset;
2554                                         *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2555                                         if (*r == NULL)
2556                                                   continue;
2557                                         else
2558                                                   break;
2559                               }
2560                     }
2561                     (*depth)--;
2562                     if (*depth == 0 && a != NULL)
2563                               *a = NULL;
2564                     *rs = f->rs;
2565                     if (f->r->anchor->match || (match != NULL && *match))
2566                               quick = f->r->quick;
2567                     *r = TAILQ_NEXT(f->r, entries);
2568           } while (*r == NULL);
2569 
2570           return (quick);
2571 }
2572 
2573 #ifdef INET6
2574 void
pf_poolmask(struct pf_addr * naddr,struct pf_addr * raddr,struct pf_addr * rmask,struct pf_addr * saddr,sa_family_t af)2575 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
2576     struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
2577 {
2578           switch (af) {
2579 #ifdef INET
2580           case AF_INET:
2581                     naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2582                     ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2583                     break;
2584 #endif /* INET */
2585           case AF_INET6:
2586                     naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2587                     ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2588                     naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
2589                     ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
2590                     naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
2591                     ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
2592                     naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
2593                     ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
2594                     break;
2595           }
2596 }
2597 
2598 void
pf_addr_inc(struct pf_addr * addr,sa_family_t af)2599 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
2600 {
2601           switch (af) {
2602 #ifdef INET
2603           case AF_INET:
2604                     addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
2605                     break;
2606 #endif /* INET */
2607           case AF_INET6:
2608                     if (addr->addr32[3] == 0xffffffff) {
2609                               addr->addr32[3] = 0;
2610                               if (addr->addr32[2] == 0xffffffff) {
2611                                         addr->addr32[2] = 0;
2612                                         if (addr->addr32[1] == 0xffffffff) {
2613                                                   addr->addr32[1] = 0;
2614                                                   addr->addr32[0] =
2615                                                       htonl(ntohl(addr->addr32[0]) + 1);
2616                                         } else
2617                                                   addr->addr32[1] =
2618                                                       htonl(ntohl(addr->addr32[1]) + 1);
2619                               } else
2620                                         addr->addr32[2] =
2621                                             htonl(ntohl(addr->addr32[2]) + 1);
2622                     } else
2623                               addr->addr32[3] =
2624                                   htonl(ntohl(addr->addr32[3]) + 1);
2625                     break;
2626           }
2627 }
2628 #endif /* INET6 */
2629 
2630 #define mix(a,b,c) \
2631           do {                                              \
2632                     a -= b; a -= c; a ^= (c >> 13);         \
2633                     b -= c; b -= a; b ^= (a << 8);          \
2634                     c -= a; c -= b; c ^= (b >> 13);         \
2635                     a -= b; a -= c; a ^= (c >> 12);         \
2636                     b -= c; b -= a; b ^= (a << 16);         \
2637                     c -= a; c -= b; c ^= (b >> 5);          \
2638                     a -= b; a -= c; a ^= (c >> 3);          \
2639                     b -= c; b -= a; b ^= (a << 10);         \
2640                     c -= a; c -= b; c ^= (b >> 15);         \
2641           } while (0)
2642 
2643 /*
2644  * hash function based on bridge_hash in if_bridge.c
2645  */
2646 void
pf_hash(struct pf_addr * inaddr,struct pf_addr * hash,struct pf_poolhashkey * key,sa_family_t af)2647 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
2648     struct pf_poolhashkey *key, sa_family_t af)
2649 {
2650           u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
2651 
2652           switch (af) {
2653 #ifdef INET
2654           case AF_INET:
2655                     a += inaddr->addr32[0];
2656                     b += key->key32[1];
2657                     mix(a, b, c);
2658                     hash->addr32[0] = c + key->key32[2];
2659                     break;
2660 #endif /* INET */
2661 #ifdef INET6
2662           case AF_INET6:
2663                     a += inaddr->addr32[0];
2664                     b += inaddr->addr32[2];
2665                     mix(a, b, c);
2666                     hash->addr32[0] = c;
2667                     a += inaddr->addr32[1];
2668                     b += inaddr->addr32[3];
2669                     c += key->key32[1];
2670                     mix(a, b, c);
2671                     hash->addr32[1] = c;
2672                     a += inaddr->addr32[2];
2673                     b += inaddr->addr32[1];
2674                     c += key->key32[2];
2675                     mix(a, b, c);
2676                     hash->addr32[2] = c;
2677                     a += inaddr->addr32[3];
2678                     b += inaddr->addr32[0];
2679                     c += key->key32[3];
2680                     mix(a, b, c);
2681                     hash->addr32[3] = c;
2682                     break;
2683 #endif /* INET6 */
2684           }
2685 }
2686 
2687 int
pf_map_addr(sa_family_t af,struct pf_rule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pf_addr * init_addr,struct pf_src_node ** sn)2688 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
2689     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
2690 {
2691           unsigned char                  hash[16];
2692           struct pf_pool                *rpool = &r->rpool;
2693           struct pf_pooladdr  *acur = rpool->cur;
2694           struct pf_pooladdr  *cur;
2695           struct pf_addr                *raddr;
2696           struct pf_addr                *rmask;
2697           struct pf_addr                counter;
2698           struct pf_src_node   k;
2699           int cpu = mycpu->gd_cpuid;
2700           int tblidx;
2701 
2702           bzero(hash, sizeof(hash));    /* avoid gcc warnings */
2703 
2704           /*
2705            * NOTE! rpool->cur and rpool->tblidx can be iterators and thus
2706            *         may represent a SMP race due to the shared nature of the
2707            *         rpool structure.  We allow the race and ensure that updates
2708            *         do not create a fatal condition.
2709            */
2710           cpu_ccfence();
2711           cur = acur;
2712           raddr = &cur->addr.v.a.addr;
2713           rmask = &cur->addr.v.a.mask;
2714 
2715           if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
2716               (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2717                     k.af = af;
2718                     PF_ACPY(&k.addr, saddr, af);
2719                     if (r->rule_flag & PFRULE_RULESRCTRACK ||
2720                         r->rpool.opts & PF_POOL_STICKYADDR)
2721                               k.rule.ptr = r;
2722                     else
2723                               k.rule.ptr = NULL;
2724                     PF_INC_SCOUNTER(SCNT_SRC_NODE_SEARCH);
2725                     *sn = RB_FIND(pf_src_tree, &tree_src_tracking[cpu], &k);
2726                     if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
2727                               PF_ACPY(naddr, &(*sn)->raddr, af);
2728                               if (pf_status.debug >= PF_DEBUG_MISC) {
2729                                         kprintf("pf_map_addr: src tracking maps ");
2730                                         pf_print_host(&k.addr, 0, af);
2731                                         kprintf(" to ");
2732                                         pf_print_host(naddr, 0, af);
2733                                         kprintf("\n");
2734                               }
2735                               return (0);
2736                     }
2737           }
2738 
2739           if (cur->addr.type == PF_ADDR_NOROUTE)
2740                     return (1);
2741           if (cur->addr.type == PF_ADDR_DYNIFTL) {
2742                     switch (af) {
2743 #ifdef INET
2744                     case AF_INET:
2745                               if (cur->addr.p.dyn->pfid_acnt4 < 1 &&
2746                                   (rpool->opts & PF_POOL_TYPEMASK) !=
2747                                   PF_POOL_ROUNDROBIN)
2748                                         return (1);
2749                               raddr = &cur->addr.p.dyn->pfid_addr4;
2750                               rmask = &cur->addr.p.dyn->pfid_mask4;
2751                               break;
2752 #endif /* INET */
2753 #ifdef INET6
2754                     case AF_INET6:
2755                               if (cur->addr.p.dyn->pfid_acnt6 < 1 &&
2756                                   (rpool->opts & PF_POOL_TYPEMASK) !=
2757                                   PF_POOL_ROUNDROBIN)
2758                                         return (1);
2759                               raddr = &cur->addr.p.dyn->pfid_addr6;
2760                               rmask = &cur->addr.p.dyn->pfid_mask6;
2761                               break;
2762 #endif /* INET6 */
2763                     }
2764           } else if (cur->addr.type == PF_ADDR_TABLE) {
2765                     if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
2766                               return (1); /* unsupported */
2767           } else {
2768                     raddr = &cur->addr.v.a.addr;
2769                     rmask = &cur->addr.v.a.mask;
2770           }
2771 
2772           switch (rpool->opts & PF_POOL_TYPEMASK) {
2773           case PF_POOL_NONE:
2774                     PF_ACPY(naddr, raddr, af);
2775                     break;
2776           case PF_POOL_BITMASK:
2777                     PF_POOLMASK(naddr, raddr, rmask, saddr, af);
2778                     break;
2779           case PF_POOL_RANDOM:
2780                     if (init_addr != NULL && PF_AZERO(init_addr, af)) {
2781                               switch (af) {
2782 #ifdef INET
2783                               case AF_INET:
2784                                         counter.addr32[0] = htonl(karc4random());
2785                                         break;
2786 #endif /* INET */
2787 #ifdef INET6
2788                               case AF_INET6:
2789                                         if (rmask->addr32[3] != 0xffffffff)
2790                                                   counter.addr32[3] =
2791                                                             htonl(karc4random());
2792                                         else
2793                                                   break;
2794                                         if (rmask->addr32[2] != 0xffffffff)
2795                                                   counter.addr32[2] =
2796                                                             htonl(karc4random());
2797                                         else
2798                                                   break;
2799                                         if (rmask->addr32[1] != 0xffffffff)
2800                                                   counter.addr32[1] =
2801                                                             htonl(karc4random());
2802                                         else
2803                                                   break;
2804                                         if (rmask->addr32[0] != 0xffffffff)
2805                                                   counter.addr32[0] =
2806                                                             htonl(karc4random());
2807                                         break;
2808 #endif /* INET6 */
2809                               }
2810                               PF_POOLMASK(naddr, raddr, rmask, &counter, af);
2811                               PF_ACPY(init_addr, naddr, af);
2812 
2813                     } else {
2814                               counter = rpool->counter;
2815                               cpu_ccfence();
2816                               PF_AINC(&counter, af);
2817                               PF_POOLMASK(naddr, raddr, rmask, &counter, af);
2818                               rpool->counter = counter;
2819                     }
2820                     break;
2821           case PF_POOL_SRCHASH:
2822                     pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
2823                     PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
2824                     break;
2825           case PF_POOL_ROUNDROBIN:
2826                     tblidx = rpool->tblidx;
2827                     counter = rpool->counter;
2828                     if (cur->addr.type == PF_ADDR_TABLE) {
2829                               if (!pfr_pool_get(cur->addr.p.tbl,
2830                                   &tblidx, &counter,
2831                                   &raddr, &rmask, af)) {
2832                                         goto get_addr;
2833                               }
2834                     } else if (cur->addr.type == PF_ADDR_DYNIFTL) {
2835                               if (!pfr_pool_get(cur->addr.p.dyn->pfid_kt,
2836                                   &tblidx, &counter,
2837                                   &raddr, &rmask, af)) {
2838                                         goto get_addr;
2839                               }
2840                     } else if (pf_match_addr(0, raddr, rmask,
2841                                                    &counter, af)) {
2842                               goto get_addr;
2843                     }
2844 
2845           try_next:
2846                     if ((cur = TAILQ_NEXT(cur, entries)) == NULL)
2847                               cur = TAILQ_FIRST(&rpool->list);
2848                     if (cur->addr.type == PF_ADDR_TABLE) {
2849                               tblidx = -1;
2850                               if (pfr_pool_get(cur->addr.p.tbl,
2851                                   &tblidx, &counter,
2852                                   &raddr, &rmask, af)) {
2853                                         /* table contains no address of type 'af' */
2854                                         if (cur != acur)
2855                                                   goto try_next;
2856                                         return (1);
2857                               }
2858                     } else if (cur->addr.type == PF_ADDR_DYNIFTL) {
2859                               tblidx = -1;
2860                               if (pfr_pool_get(cur->addr.p.dyn->pfid_kt,
2861                                   &tblidx, &counter,
2862                                   &raddr, &rmask, af)) {
2863                                         /* table contains no address of type 'af' */
2864                                         if (cur != acur)
2865                                                   goto try_next;
2866                                         return (1);
2867                               }
2868                     } else {
2869                               raddr = &cur->addr.v.a.addr;
2870                               rmask = &cur->addr.v.a.mask;
2871                               PF_ACPY(&counter, raddr, af);
2872                     }
2873 
2874           get_addr:
2875                     rpool->cur = cur;
2876                     rpool->tblidx = tblidx;
2877                     PF_ACPY(naddr, &counter, af);
2878                     if (init_addr != NULL && PF_AZERO(init_addr, af))
2879                               PF_ACPY(init_addr, naddr, af);
2880                     PF_AINC(&counter, af);
2881                     rpool->counter = counter;
2882                     break;
2883           }
2884           if (*sn != NULL)
2885                     PF_ACPY(&(*sn)->raddr, naddr, af);
2886 
2887           if (pf_status.debug >= PF_DEBUG_MISC &&
2888               (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2889                     kprintf("pf_map_addr: selected address ");
2890                     pf_print_host(naddr, 0, af);
2891                     kprintf("\n");
2892           }
2893 
2894           return (0);
2895 }
2896 
2897 int
pf_get_sport(struct pf_pdesc * pd,sa_family_t af,u_int8_t proto,struct pf_rule * r,struct pf_addr * saddr,struct pf_addr * daddr,u_int16_t sport,u_int16_t dport,struct pf_addr * naddr,u_int16_t * nport,u_int16_t low,u_int16_t high,struct pf_src_node ** sn)2898 pf_get_sport(struct pf_pdesc *pd, sa_family_t af,
2899                u_int8_t proto, struct pf_rule *r,
2900                struct pf_addr *saddr, struct pf_addr *daddr,
2901                u_int16_t sport, u_int16_t dport,
2902                struct pf_addr *naddr, u_int16_t *nport,
2903                u_int16_t low, u_int16_t high, struct pf_src_node **sn)
2904 {
2905           struct pf_state_key_cmp       key;
2906           struct pf_addr                init_addr;
2907           u_int16_t           cut;
2908           u_int32_t           hash_base = 0;
2909           int                           do_hash = 0;
2910 
2911           bzero(&init_addr, sizeof(init_addr));
2912           if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2913                     return (1);
2914 
2915           if (proto == IPPROTO_ICMP) {
2916                     low = 1;
2917                     high = 65535;
2918           }
2919 
2920           bzero(&key, sizeof(key));
2921           key.af = af;
2922           key.proto = proto;
2923           key.port[0] = dport;
2924           PF_ACPY(&key.addr[0], daddr, key.af);
2925 
2926           do {
2927                     PF_ACPY(&key.addr[1], naddr, key.af);
2928 
2929                     /*
2930                      * We want to select a port that calculates to a toeplitz hash
2931                      * that masks to the same cpu, otherwise the response may
2932                      * not see the new state.
2933                      *
2934                      * We can still do this even if the kernel is disregarding
2935                      * the hash and vectoring the packets to a specific cpu,
2936                      * but it will reduce the number of ports we can use.
2937                      */
2938                     switch(af) {
2939                     case AF_INET:
2940                               if (proto == IPPROTO_TCP) {
2941                                         do_hash = 1;
2942                                         hash_base = toeplitz_piecemeal_port(dport) ^
2943                                             toeplitz_piecemeal_addr(daddr->v4.s_addr) ^
2944                                             toeplitz_piecemeal_addr(naddr->v4.s_addr);
2945                               }
2946                               break;
2947                     case AF_INET6:
2948                               /* XXX TODO XXX */
2949                     default:
2950                               /* XXX TODO XXX */
2951                               break;
2952                     }
2953 
2954                     /*
2955                      * port search; start random, step;
2956                      * similar 2 portloop in in_pcbbind
2957                      *
2958                      * WARNING! We try to match such that the kernel will
2959                      *            dispatch the translated host/port to the same
2960                      *            cpu, but this might not be possible.
2961                      *
2962                      *            In the case where the port is fixed, or for the
2963                      *            UDP case (whos toeplitz does not incorporate the
2964                      *            port), we set not_cpu_localized which ultimately
2965                      *            causes the pf_state_tree element
2966                      *
2967                      * XXX fixed ports present a problem for cpu localization.
2968                      */
2969                     if (!(proto == IPPROTO_TCP ||
2970                           proto == IPPROTO_UDP ||
2971                           proto == IPPROTO_ICMP)) {
2972                               /*
2973                                * non-specific protocol, leave port intact.
2974                                */
2975                               key.port[1] = sport;
2976                               if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
2977                                         *nport = sport;
2978                                         pd->not_cpu_localized = 1;
2979                                         return (0);
2980                               }
2981                     } else if (low == 0 && high == 0) {
2982                               /*
2983                                * static-port same as originator.
2984                                */
2985                               key.port[1] = sport;
2986                               if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
2987                                         *nport = sport;
2988                                         pd->not_cpu_localized = 1;
2989                                         return (0);
2990                               }
2991                     } else if (low == high) {
2992                               /*
2993                                * specific port as specified.
2994                                */
2995                               key.port[1] = htons(low);
2996                               if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
2997                                         *nport = htons(low);
2998                                         pd->not_cpu_localized = 1;
2999                                         return (0);
3000                               }
3001                     } else {
3002                               /*
3003                                * normal dynamic port
3004                                */
3005                               u_int16_t tmp;
3006 
3007                               if (low > high) {
3008                                         tmp = low;
3009                                         low = high;
3010                                         high = tmp;
3011                               }
3012                               /* low < high */
3013                               cut = htonl(karc4random()) % (1 + high - low) + low;
3014                               /* low <= cut <= high */
3015                               for (tmp = cut; tmp <= high; ++(tmp)) {
3016                                         key.port[1] = htons(tmp);
3017                                         if (do_hash) {
3018                                                   uint32_t hash;
3019 
3020                                                   hash = hash_base ^
3021                                                   toeplitz_piecemeal_port(key.port[1]);
3022                                                   if (netisr_hashcpu(hash) != mycpuid)
3023                                                             continue;
3024                                         }
3025                                         if (pf_find_state_all(&key, PF_IN, NULL) ==
3026                                             NULL && !in_baddynamic(tmp, proto)) {
3027                                                   if (proto == IPPROTO_UDP)
3028                                                             pd->not_cpu_localized = 1;
3029                                                   *nport = htons(tmp);
3030                                                   return (0);
3031                                         }
3032                               }
3033                               for (tmp = cut - 1; tmp >= low; --(tmp)) {
3034                                         key.port[1] = htons(tmp);
3035                                         if (do_hash) {
3036                                                   uint32_t hash;
3037 
3038                                                   hash = hash_base ^
3039                                                   toeplitz_piecemeal_port(key.port[1]);
3040                                                   if (netisr_hashcpu(hash) != mycpuid)
3041                                                             continue;
3042                                         }
3043                                         if (pf_find_state_all(&key, PF_IN, NULL) ==
3044                                             NULL && !in_baddynamic(tmp, proto)) {
3045                                                   if (proto == IPPROTO_UDP)
3046                                                             pd->not_cpu_localized = 1;
3047                                                   *nport = htons(tmp);
3048                                                   return (0);
3049                                         }
3050                               }
3051                     }
3052 
3053                     /*
3054                      * Next address
3055                      */
3056                     switch (r->rpool.opts & PF_POOL_TYPEMASK) {
3057                     case PF_POOL_RANDOM:
3058                     case PF_POOL_ROUNDROBIN:
3059                               if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
3060                                         return (1);
3061                               break;
3062                     case PF_POOL_NONE:
3063                     case PF_POOL_SRCHASH:
3064                     case PF_POOL_BITMASK:
3065                     default:
3066                               return (1);
3067                     }
3068           } while (! PF_AEQ(&init_addr, naddr, af) );
3069           return (1);                                                 /* none available */
3070 }
3071 
3072 struct pf_rule *
pf_match_translation(struct pf_pdesc * pd,struct mbuf * m,int off,int direction,struct pfi_kif * kif,struct pf_addr * saddr,u_int16_t sport,struct pf_addr * daddr,u_int16_t dport,int rs_num)3073 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
3074     int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
3075     struct pf_addr *daddr, u_int16_t dport, int rs_num)
3076 {
3077           struct pf_rule                *r, *rm = NULL;
3078           struct pf_ruleset   *ruleset = NULL;
3079           int                            tag = -1;
3080           int                            rtableid = -1;
3081           int                            asd = 0;
3082 
3083           r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
3084           while (r && rm == NULL) {
3085                     struct pf_rule_addr *src = NULL, *dst = NULL;
3086                     struct pf_addr_wrap *xdst = NULL;
3087                     struct pf_pooladdr  *cur;
3088 
3089                     if (r->action == PF_BINAT && direction == PF_IN) {
3090                               src = &r->dst;
3091                               cur = r->rpool.cur; /* SMP race possible */
3092                               cpu_ccfence();
3093                               if (cur)
3094                                         xdst = &cur->addr;
3095                     } else {
3096                               src = &r->src;
3097                               dst = &r->dst;
3098                     }
3099 
3100                     r->evaluations++;
3101                     if (pfi_kif_match(r->kif, kif) == r->ifnot)
3102                               r = r->skip[PF_SKIP_IFP].ptr;
3103                     else if (r->direction && r->direction != direction)
3104                               r = r->skip[PF_SKIP_DIR].ptr;
3105                     else if (r->af && r->af != pd->af)
3106                               r = r->skip[PF_SKIP_AF].ptr;
3107                     else if (r->proto && r->proto != pd->proto)
3108                               r = r->skip[PF_SKIP_PROTO].ptr;
3109                     else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
3110                         src->neg, kif))
3111                               r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
3112                                   PF_SKIP_DST_ADDR].ptr;
3113                     else if (src->port_op && !pf_match_port(src->port_op,
3114                         src->port[0], src->port[1], sport))
3115                               r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
3116                                   PF_SKIP_DST_PORT].ptr;
3117                     else if (dst != NULL &&
3118                         PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL))
3119                               r = r->skip[PF_SKIP_DST_ADDR].ptr;
3120                     else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
3121                         0, NULL))
3122                               r = TAILQ_NEXT(r, entries);
3123                     else if (dst != NULL && dst->port_op &&
3124                         !pf_match_port(dst->port_op, dst->port[0],
3125                         dst->port[1], dport))
3126                               r = r->skip[PF_SKIP_DST_PORT].ptr;
3127                     else if (r->match_tag && !pf_match_tag(m, r, &tag))
3128                               r = TAILQ_NEXT(r, entries);
3129                     else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
3130                         IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
3131                         off, pd->hdr.tcp), r->os_fingerprint)))
3132                               r = TAILQ_NEXT(r, entries);
3133                     else {
3134                               if (r->tag)
3135                                         tag = r->tag;
3136                               if (r->rtableid >= 0)
3137                                         rtableid = r->rtableid;
3138                               if (r->anchor == NULL) {
3139                                         rm = r;
3140                               } else
3141                                         pf_step_into_anchor(&asd, &ruleset, rs_num,
3142                                             &r, NULL, NULL);
3143                     }
3144                     if (r == NULL)
3145                               pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
3146                                   NULL, NULL);
3147           }
3148           if (pf_tag_packet(m, tag, rtableid))
3149                     return (NULL);
3150           if (rm != NULL && (rm->action == PF_NONAT ||
3151               rm->action == PF_NORDR || rm->action == PF_NOBINAT))
3152                     return (NULL);
3153           return (rm);
3154 }
3155 
3156 struct pf_rule *
pf_get_translation(struct pf_pdesc * pd,struct mbuf * m,int off,int direction,struct pfi_kif * kif,struct pf_src_node ** sn,struct pf_state_key ** skw,struct pf_state_key ** sks,struct pf_state_key ** skp,struct pf_state_key ** nkp,struct pf_addr * saddr,struct pf_addr * daddr,u_int16_t sport,u_int16_t dport)3157 pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
3158     struct pfi_kif *kif, struct pf_src_node **sn,
3159     struct pf_state_key **skw, struct pf_state_key **sks,
3160     struct pf_state_key **skp, struct pf_state_key **nkp,
3161     struct pf_addr *saddr, struct pf_addr *daddr,
3162     u_int16_t sport, u_int16_t dport)
3163 {
3164           struct pf_rule      *r = NULL;
3165 
3166           if (direction == PF_OUT) {
3167                     r = pf_match_translation(pd, m, off, direction, kif, saddr,
3168                         sport, daddr, dport, PF_RULESET_BINAT);
3169                     if (r == NULL)
3170                               r = pf_match_translation(pd, m, off, direction, kif,
3171                                   saddr, sport, daddr, dport, PF_RULESET_NAT);
3172           } else {
3173                     r = pf_match_translation(pd, m, off, direction, kif, saddr,
3174                         sport, daddr, dport, PF_RULESET_RDR);
3175                     if (r == NULL)
3176                               r = pf_match_translation(pd, m, off, direction, kif,
3177                                   saddr, sport, daddr, dport, PF_RULESET_BINAT);
3178           }
3179 
3180           if (r != NULL) {
3181                     struct pf_addr      *naddr;
3182                     u_int16_t *nport;
3183 
3184                     if (pf_state_key_setup(pd, r, skw, sks, skp, nkp,
3185                         saddr, daddr, sport, dport))
3186                               return r;
3187 
3188                     /* XXX We only modify one side for now. */
3189                     naddr = &(*nkp)->addr[1];
3190                     nport = &(*nkp)->port[1];
3191 
3192                     /*
3193                      * NOTE: Currently all translations will clear
3194                      *         BRIDGE_MBUF_TAGGED, telling the bridge to
3195                      *         ignore the original input encapsulation.
3196                      */
3197                     switch (r->action) {
3198                     case PF_NONAT:
3199                     case PF_NOBINAT:
3200                     case PF_NORDR:
3201                               return (NULL);
3202                     case PF_NAT:
3203                               m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
3204                               if (pf_get_sport(pd, pd->af, pd->proto, r,
3205                                   saddr, daddr, sport, dport,
3206                                   naddr, nport, r->rpool.proxy_port[0],
3207                                   r->rpool.proxy_port[1], sn)) {
3208                                         DPFPRINTF(PF_DEBUG_MISC,
3209                                             ("pf: NAT proxy port allocation "
3210                                             "(%u-%u) failed\n",
3211                                             r->rpool.proxy_port[0],
3212                                             r->rpool.proxy_port[1]));
3213                                         return (NULL);
3214                               }
3215                               break;
3216                     case PF_BINAT:
3217                               m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
3218                               switch (direction) {
3219                               case PF_OUT:
3220                                         if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
3221                                                   switch (pd->af) {
3222 #ifdef INET
3223                                                   case AF_INET:
3224                                                             if (r->rpool.cur->addr.p.dyn->
3225                                                                 pfid_acnt4 < 1)
3226                                                                       return (NULL);
3227                                                             PF_POOLMASK(naddr,
3228                                                                 &r->rpool.cur->addr.p.dyn->
3229                                                                 pfid_addr4,
3230                                                                 &r->rpool.cur->addr.p.dyn->
3231                                                                 pfid_mask4,
3232                                                                 saddr, AF_INET);
3233                                                             break;
3234 #endif /* INET */
3235 #ifdef INET6
3236                                                   case AF_INET6:
3237                                                             if (r->rpool.cur->addr.p.dyn->
3238                                                                 pfid_acnt6 < 1)
3239                                                                       return (NULL);
3240                                                             PF_POOLMASK(naddr,
3241                                                                 &r->rpool.cur->addr.p.dyn->
3242                                                                 pfid_addr6,
3243                                                                 &r->rpool.cur->addr.p.dyn->
3244                                                                 pfid_mask6,
3245                                                                 saddr, AF_INET6);
3246                                                             break;
3247 #endif /* INET6 */
3248                                                   }
3249                                         } else
3250                                                   PF_POOLMASK(naddr,
3251                                                       &r->rpool.cur->addr.v.a.addr,
3252                                                       &r->rpool.cur->addr.v.a.mask,
3253                                                       saddr, pd->af);
3254                                         break;
3255                               case PF_IN:
3256                                         if (r->src.addr.type == PF_ADDR_DYNIFTL) {
3257                                                   switch (pd->af) {
3258 #ifdef INET
3259                                                   case AF_INET:
3260                                                             if (r->src.addr.p.dyn->
3261                                                                 pfid_acnt4 < 1)
3262                                                                       return (NULL);
3263                                                             PF_POOLMASK(naddr,
3264                                                                 &r->src.addr.p.dyn->
3265                                                                 pfid_addr4,
3266                                                                 &r->src.addr.p.dyn->
3267                                                                 pfid_mask4,
3268                                                                 daddr, AF_INET);
3269                                                             break;
3270 #endif /* INET */
3271 #ifdef INET6
3272                                                   case AF_INET6:
3273                                                             if (r->src.addr.p.dyn->
3274                                                                 pfid_acnt6 < 1)
3275                                                                       return (NULL);
3276                                                             PF_POOLMASK(naddr,
3277                                                                 &r->src.addr.p.dyn->
3278                                                                 pfid_addr6,
3279                                                                 &r->src.addr.p.dyn->
3280                                                                 pfid_mask6,
3281                                                                 daddr, AF_INET6);
3282                                                             break;
3283 #endif /* INET6 */
3284                                                   }
3285                                         } else
3286                                                   PF_POOLMASK(naddr,
3287                                                       &r->src.addr.v.a.addr,
3288                                                       &r->src.addr.v.a.mask, daddr,
3289                                                       pd->af);
3290                                         break;
3291                               }
3292                               break;
3293                     case PF_RDR: {
3294                               m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
3295                               if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
3296                                         return (NULL);
3297                               if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
3298                                   PF_POOL_BITMASK)
3299                                         PF_POOLMASK(naddr, naddr,
3300                                             &r->rpool.cur->addr.v.a.mask, daddr,
3301                                             pd->af);
3302 
3303                               if (r->rpool.proxy_port[1]) {
3304                                         u_int32_t tmp_nport;
3305 
3306                                         tmp_nport = ((ntohs(dport) -
3307                                             ntohs(r->dst.port[0])) %
3308                                             (r->rpool.proxy_port[1] -
3309                                             r->rpool.proxy_port[0] + 1)) +
3310                                             r->rpool.proxy_port[0];
3311 
3312                                         /* wrap around if necessary */
3313                                         if (tmp_nport > 65535)
3314                                                   tmp_nport -= 65535;
3315                                         *nport = htons((u_int16_t)tmp_nport);
3316                               } else if (r->rpool.proxy_port[0]) {
3317                                         *nport = htons(r->rpool.proxy_port[0]);
3318                               }
3319                               pd->not_cpu_localized = 1;
3320                               break;
3321                     }
3322                     default:
3323                               return (NULL);
3324                     }
3325           }
3326 
3327           return (r);
3328 }
3329 
3330 struct netmsg_hashlookup {
3331           struct netmsg_base  base;
3332           struct inpcb                  **nm_pinp;
3333           struct inpcbinfo              *nm_pcbinfo;
3334           struct pf_addr                *nm_saddr;
3335           struct pf_addr                *nm_daddr;
3336           uint16_t            nm_sport;
3337           uint16_t            nm_dport;
3338           sa_family_t                   nm_af;
3339 };
3340 
3341 #ifdef PF_SOCKET_LOOKUP_DOMSG
3342 static void
in_pcblookup_hash_handler(netmsg_t msg)3343 in_pcblookup_hash_handler(netmsg_t msg)
3344 {
3345           struct netmsg_hashlookup *rmsg = (struct netmsg_hashlookup *)msg;
3346 
3347           if (rmsg->nm_af == AF_INET)
3348                     *rmsg->nm_pinp = in_pcblookup_hash(rmsg->nm_pcbinfo,
3349                         rmsg->nm_saddr->v4, rmsg->nm_sport, rmsg->nm_daddr->v4,
3350                         rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL);
3351 #ifdef INET6
3352           else
3353                     *rmsg->nm_pinp = in6_pcblookup_hash(rmsg->nm_pcbinfo,
3354                         &rmsg->nm_saddr->v6, rmsg->nm_sport, &rmsg->nm_daddr->v6,
3355                         rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL);
3356 #endif /* INET6 */
3357           lwkt_replymsg(&rmsg->base.lmsg, 0);
3358 }
3359 #endif    /* PF_SOCKET_LOOKUP_DOMSG */
3360 
3361 int
pf_socket_lookup(int direction,struct pf_pdesc * pd)3362 pf_socket_lookup(int direction, struct pf_pdesc *pd)
3363 {
3364           struct pf_addr                *saddr, *daddr;
3365           u_int16_t            sport, dport;
3366           struct inpcbinfo    *pi;
3367           struct inpcb                  *inp;
3368           struct netmsg_hashlookup *msg = NULL;
3369 #ifdef PF_SOCKET_LOOKUP_DOMSG
3370           struct netmsg_hashlookup msg0;
3371 #endif
3372           int                            pi_cpu = 0;
3373 
3374           if (pd == NULL)
3375                     return (-1);
3376           pd->lookup.uid = UID_MAX;
3377           pd->lookup.gid = GID_MAX;
3378           pd->lookup.pid = NO_PID;
3379           if (direction == PF_IN) {
3380                     saddr = pd->src;
3381                     daddr = pd->dst;
3382           } else {
3383                     saddr = pd->dst;
3384                     daddr = pd->src;
3385           }
3386           switch (pd->proto) {
3387           case IPPROTO_TCP:
3388                     if (pd->hdr.tcp == NULL)
3389                               return (-1);
3390                     sport = pd->hdr.tcp->th_sport;
3391                     dport = pd->hdr.tcp->th_dport;
3392 
3393                     pi_cpu = tcp_addrcpu(saddr->v4.s_addr, sport, daddr->v4.s_addr, dport);
3394                     pi = &tcbinfo[pi_cpu];
3395                     /*
3396                      * Our netstack runs lockless on MP systems
3397                      * (only for TCP connections at the moment).
3398                      *
3399                      * As we are not allowed to read another CPU's tcbinfo,
3400                      * we have to ask that CPU via remote call to search the
3401                      * table for us.
3402                      *
3403                      * Prepare a msg iff data belongs to another CPU.
3404                      */
3405                     if (pi_cpu != mycpu->gd_cpuid) {
3406 #ifdef PF_SOCKET_LOOKUP_DOMSG
3407                               /*
3408                                * NOTE:
3409                                *
3410                                * Following lwkt_domsg() is dangerous and could
3411                                * lockup the network system, e.g.
3412                                *
3413                                * On 2 CPU system:
3414                                * netisr0 domsg to netisr1 (due to lookup)
3415                                * netisr1 domsg to netisr0 (due to lookup)
3416                                *
3417                                * We simply return -1 here, since we are probably
3418                                * called before NAT, so the TCP packet should
3419                                * already be on the correct CPU.
3420                                */
3421                               msg = &msg0;
3422                               netmsg_init(&msg->base, NULL, &curthread->td_msgport,
3423                                             0, in_pcblookup_hash_handler);
3424                               msg->nm_pinp = &inp;
3425                               msg->nm_pcbinfo = pi;
3426                               msg->nm_saddr = saddr;
3427                               msg->nm_sport = sport;
3428                               msg->nm_daddr = daddr;
3429                               msg->nm_dport = dport;
3430                               msg->nm_af = pd->af;
3431 #else     /* !PF_SOCKET_LOOKUP_DOMSG */
3432                               kprintf("pf_socket_lookup: tcp packet not on the "
3433                                         "correct cpu %d, cur cpu %d\n",
3434                                         pi_cpu, mycpuid);
3435                               print_backtrace(-1);
3436                               return -1;
3437 #endif    /* PF_SOCKET_LOOKUP_DOMSG */
3438                     }
3439                     break;
3440           case IPPROTO_UDP:
3441                     if (pd->hdr.udp == NULL)
3442                               return (-1);
3443                     sport = pd->hdr.udp->uh_sport;
3444                     dport = pd->hdr.udp->uh_dport;
3445                     pi = &udbinfo[mycpuid];
3446                     break;
3447           default:
3448                     return (-1);
3449           }
3450           if (direction != PF_IN) {
3451                     u_int16_t p;
3452 
3453                     p = sport;
3454                     sport = dport;
3455                     dport = p;
3456           }
3457           switch (pd->af) {
3458 #ifdef INET6
3459           case AF_INET6:
3460                     /*
3461                      * Query other CPU, second part
3462                      *
3463                      * msg only gets initialized when:
3464                      * 1) packet is TCP
3465                      * 2) the info belongs to another CPU
3466                      *
3467                      * Use some switch/case magic to avoid code duplication.
3468                      */
3469                     if (msg == NULL) {
3470                               inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
3471                                   &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
3472 
3473                               if (inp == NULL)
3474                                         return (-1);
3475                               break;
3476                     }
3477                     /* FALLTHROUGH if SMP and on other CPU */
3478 #endif /* INET6 */
3479           case AF_INET:
3480                     if (msg != NULL) {
3481                               lwkt_domsg(netisr_cpuport(pi_cpu),
3482                                              &msg->base.lmsg, 0);
3483                     } else
3484                     {
3485                               inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
3486                                   dport, INPLOOKUP_WILDCARD, NULL);
3487                     }
3488                     if (inp == NULL)
3489                               return (-1);
3490                     break;
3491 
3492           default:
3493                     return (-1);
3494           }
3495           pd->lookup.uid = inp->inp_socket->so_cred->cr_uid;
3496           pd->lookup.gid = inp->inp_socket->so_cred->cr_groups[0];
3497           return (1);
3498 }
3499 
3500 u_int8_t
pf_get_wscale(struct mbuf * m,int off,u_int16_t th_off,sa_family_t af)3501 pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
3502 {
3503           int                  hlen;
3504           u_int8_t   hdr[60];
3505           u_int8_t  *opt, optlen;
3506           u_int8_t   wscale = 0;
3507 
3508           hlen = th_off << 2;           /* hlen <= sizeof(hdr) */
3509           if (hlen <= sizeof(struct tcphdr))
3510                     return (0);
3511           if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
3512                     return (0);
3513           opt = hdr + sizeof(struct tcphdr);
3514           hlen -= sizeof(struct tcphdr);
3515           while (hlen >= 3) {
3516                     switch (*opt) {
3517                     case TCPOPT_EOL:
3518                     case TCPOPT_NOP:
3519                               ++opt;
3520                               --hlen;
3521                               break;
3522                     case TCPOPT_WINDOW:
3523                               wscale = opt[2];
3524                               if (wscale > TCP_MAX_WINSHIFT)
3525                                         wscale = TCP_MAX_WINSHIFT;
3526                               wscale |= PF_WSCALE_FLAG;
3527                               /* FALLTHROUGH */
3528                     default:
3529                               optlen = opt[1];
3530                               if (optlen < 2)
3531                                         optlen = 2;
3532                               hlen -= optlen;
3533                               opt += optlen;
3534                               break;
3535                     }
3536           }
3537           return (wscale);
3538 }
3539 
3540 u_int16_t
pf_get_mss(struct mbuf * m,int off,u_int16_t th_off,sa_family_t af)3541 pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
3542 {
3543           int                  hlen;
3544           u_int8_t   hdr[60];
3545           u_int8_t  *opt, optlen;
3546           u_int16_t  mss = tcp_mssdflt;
3547 
3548           hlen = th_off << 2; /* hlen <= sizeof(hdr) */
3549           if (hlen <= sizeof(struct tcphdr))
3550                     return (0);
3551           if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
3552                     return (0);
3553           opt = hdr + sizeof(struct tcphdr);
3554           hlen -= sizeof(struct tcphdr);
3555           while (hlen >= TCPOLEN_MAXSEG) {
3556                     switch (*opt) {
3557                     case TCPOPT_EOL:
3558                     case TCPOPT_NOP:
3559                               ++opt;
3560                               --hlen;
3561                               break;
3562                     case TCPOPT_MAXSEG:
3563                               bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
3564                               /* FALLTHROUGH */
3565                     default:
3566                               optlen = opt[1];
3567                               if (optlen < 2)
3568                                         optlen = 2;
3569                               hlen -= optlen;
3570                               opt += optlen;
3571                               break;
3572                     }
3573           }
3574           return (mss);
3575 }
3576 
3577 u_int16_t
pf_calc_mss(struct pf_addr * addr,sa_family_t af,u_int16_t offer)3578 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
3579 {
3580 #ifdef INET
3581           struct sockaddr_in  *dst;
3582           struct route                   ro;
3583 #endif /* INET */
3584 #ifdef INET6
3585           struct sockaddr_in6 *dst6;
3586           struct route_in6     ro6;
3587 #endif /* INET6 */
3588           struct rtentry                *rt = NULL;
3589           int                            hlen = 0;
3590           u_int16_t            mss = tcp_mssdflt;
3591 
3592           switch (af) {
3593 #ifdef INET
3594           case AF_INET:
3595                     hlen = sizeof(struct ip);
3596                     bzero(&ro, sizeof(ro));
3597                     dst = (struct sockaddr_in *)&ro.ro_dst;
3598                     dst->sin_family = AF_INET;
3599                     dst->sin_len = sizeof(*dst);
3600                     dst->sin_addr = addr->v4;
3601                     rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
3602                     rt = ro.ro_rt;
3603                     break;
3604 #endif /* INET */
3605 #ifdef INET6
3606           case AF_INET6:
3607                     hlen = sizeof(struct ip6_hdr);
3608                     bzero(&ro6, sizeof(ro6));
3609                     dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
3610                     dst6->sin6_family = AF_INET6;
3611                     dst6->sin6_len = sizeof(*dst6);
3612                     dst6->sin6_addr = addr->v6;
3613                     rtalloc_ign((struct route *)&ro6, (RTF_CLONING | RTF_PRCLONING));
3614                     rt = ro6.ro_rt;
3615                     break;
3616 #endif /* INET6 */
3617           }
3618 
3619           if (rt && rt->rt_ifp) {
3620                     mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
3621                     mss = max(tcp_mssdflt, mss);
3622                     RTFREE(rt);
3623           }
3624           mss = min(mss, offer);
3625           mss = max(mss, 64);           /* sanity - at least max opt space */
3626           return (mss);
3627 }
3628 
3629 void
pf_set_rt_ifp(struct pf_state * s,struct pf_addr * saddr)3630 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
3631 {
3632           struct pf_rule *r = s->rule.ptr;
3633 
3634           s->rt_kif = NULL;
3635           if (!r->rt || r->rt == PF_FASTROUTE)
3636                     return;
3637           switch (s->key[PF_SK_WIRE]->af) {
3638 #ifdef INET
3639           case AF_INET:
3640                     pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
3641                         &s->nat_src_node);
3642                     s->rt_kif = r->rpool.cur->kif;
3643                     break;
3644 #endif /* INET */
3645 #ifdef INET6
3646           case AF_INET6:
3647                     pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
3648                         &s->nat_src_node);
3649                     s->rt_kif = r->rpool.cur->kif;
3650                     break;
3651 #endif /* INET6 */
3652           }
3653 }
3654 
3655 u_int32_t
pf_tcp_iss(struct pf_pdesc * pd)3656 pf_tcp_iss(struct pf_pdesc *pd)
3657 {
3658           MD5_CTX ctx;
3659           u_int32_t digest[4];
3660 
3661           if (pf_tcp_secret_init == 0) {
3662                     lwkt_gettoken(&pf_gtoken);
3663                     if (pf_tcp_secret_init == 0) {
3664                               karc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret));
3665                               MD5Init(&pf_tcp_secret_ctx);
3666                               MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret,
3667                                   sizeof(pf_tcp_secret));
3668                               pf_tcp_secret_init = 1;
3669                     }
3670                     lwkt_reltoken(&pf_gtoken);
3671           }
3672           ctx = pf_tcp_secret_ctx;
3673 
3674           MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
3675           MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
3676           if (pd->af == AF_INET6) {
3677                     MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
3678                     MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
3679           } else {
3680                     MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
3681                     MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
3682           }
3683           MD5Final((u_char *)digest, &ctx);
3684           pf_tcp_iss_off += 4096;
3685 
3686           return (digest[0] + pd->hdr.tcp->th_seq + pf_tcp_iss_off);
3687 }
3688 
3689 int
pf_test_rule(struct pf_rule ** rm,struct pf_state ** sm,int direction,struct pfi_kif * kif,struct mbuf * m,int off,void * h,struct pf_pdesc * pd,struct pf_rule ** am,struct pf_ruleset ** rsm,struct ifqueue * ifq,struct inpcb * inp)3690 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
3691     struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3692     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3693     struct ifqueue *ifq, struct inpcb *inp)
3694 {
3695           struct pf_rule                *nr = NULL;
3696           struct pf_addr                *saddr = pd->src, *daddr = pd->dst;
3697           sa_family_t                    af = pd->af;
3698           struct pf_rule                *r, *a = NULL;
3699           struct pf_ruleset   *ruleset = NULL;
3700           struct pf_src_node  *nsn = NULL;
3701           struct tcphdr                 *th = pd->hdr.tcp;
3702           struct pf_state_key *skw = NULL, *sks = NULL;
3703           struct pf_state_key *sk = NULL, *nk = NULL;
3704           u_short                        reason;
3705           int                            rewrite = 0, hdrlen = 0;
3706           int                            tag = -1, rtableid = -1;
3707           int                            asd = 0;
3708           int                            match = 0;
3709           int                            state_icmp = 0;
3710           u_int16_t            sport = 0, dport = 0;
3711           u_int16_t            bproto_sum = 0, bip_sum = 0;
3712           u_int8_t             icmptype = 0, icmpcode = 0;
3713 
3714 
3715           if (direction == PF_IN && pf_check_congestion(ifq)) {
3716                     REASON_SET(&reason, PFRES_CONGEST);
3717                     return (PF_DROP);
3718           }
3719 
3720           if (inp != NULL)
3721                     pd->lookup.done = pf_socket_lookup(direction, pd);
3722           else if (debug_pfugidhack) {
3723                     DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
3724                     pd->lookup.done = pf_socket_lookup(direction, pd);
3725           }
3726 
3727           switch (pd->proto) {
3728           case IPPROTO_TCP:
3729                     sport = th->th_sport;
3730                     dport = th->th_dport;
3731                     hdrlen = sizeof(*th);
3732                     break;
3733           case IPPROTO_UDP:
3734                     sport = pd->hdr.udp->uh_sport;
3735                     dport = pd->hdr.udp->uh_dport;
3736                     hdrlen = sizeof(*pd->hdr.udp);
3737                     break;
3738 #ifdef INET
3739           case IPPROTO_ICMP:
3740                     if (pd->af != AF_INET)
3741                               break;
3742                     sport = dport = pd->hdr.icmp->icmp_id;
3743                     hdrlen = sizeof(*pd->hdr.icmp);
3744                     icmptype = pd->hdr.icmp->icmp_type;
3745                     icmpcode = pd->hdr.icmp->icmp_code;
3746 
3747                     if (icmptype == ICMP_UNREACH ||
3748                         icmptype == ICMP_SOURCEQUENCH ||
3749                         icmptype == ICMP_REDIRECT ||
3750                         icmptype == ICMP_TIMXCEED ||
3751                         icmptype == ICMP_PARAMPROB)
3752                               state_icmp++;
3753                     break;
3754 #endif /* INET */
3755 #ifdef INET6
3756           case IPPROTO_ICMPV6:
3757                     if (af != AF_INET6)
3758                               break;
3759                     sport = dport = pd->hdr.icmp6->icmp6_id;
3760                     hdrlen = sizeof(*pd->hdr.icmp6);
3761                     icmptype = pd->hdr.icmp6->icmp6_type;
3762                     icmpcode = pd->hdr.icmp6->icmp6_code;
3763 
3764                     if (icmptype == ICMP6_DST_UNREACH ||
3765                         icmptype == ICMP6_PACKET_TOO_BIG ||
3766                         icmptype == ICMP6_TIME_EXCEEDED ||
3767                         icmptype == ICMP6_PARAM_PROB)
3768                               state_icmp++;
3769                     break;
3770 #endif /* INET6 */
3771           default:
3772                     sport = dport = hdrlen = 0;
3773                     break;
3774           }
3775 
3776           r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3777 
3778           /* check packet for BINAT/NAT/RDR */
3779           if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn,
3780               &skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) {
3781                     if (nk == NULL || sk == NULL) {
3782                               REASON_SET(&reason, PFRES_MEMORY);
3783                               goto cleanup;
3784                     }
3785 
3786                     if (pd->ip_sum)
3787                               bip_sum = *pd->ip_sum;
3788 
3789                     m->m_flags &= ~M_HASH;
3790                     switch (pd->proto) {
3791                     case IPPROTO_TCP:
3792                               bproto_sum = th->th_sum;
3793                               pd->proto_sum = &th->th_sum;
3794 
3795                               if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
3796                                   nk->port[pd->sidx] != sport) {
3797                                         pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
3798                                             &th->th_sum, &nk->addr[pd->sidx],
3799                                             nk->port[pd->sidx], 0, af);
3800                                         pd->sport = &th->th_sport;
3801                                         sport = th->th_sport;
3802                               }
3803 
3804                               if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
3805                                   nk->port[pd->didx] != dport) {
3806                                         pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
3807                                             &th->th_sum, &nk->addr[pd->didx],
3808                                             nk->port[pd->didx], 0, af);
3809                                         dport = th->th_dport;
3810                                         pd->dport = &th->th_dport;
3811                               }
3812                               rewrite++;
3813                               break;
3814                     case IPPROTO_UDP:
3815                               bproto_sum = pd->hdr.udp->uh_sum;
3816                               pd->proto_sum = &pd->hdr.udp->uh_sum;
3817 
3818                               if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
3819                                   nk->port[pd->sidx] != sport) {
3820                                         pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
3821                                             pd->ip_sum, &pd->hdr.udp->uh_sum,
3822                                             &nk->addr[pd->sidx],
3823                                             nk->port[pd->sidx], 1, af);
3824                                         sport = pd->hdr.udp->uh_sport;
3825                                         pd->sport = &pd->hdr.udp->uh_sport;
3826                               }
3827 
3828                               if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
3829                                   nk->port[pd->didx] != dport) {
3830                                         pf_change_ap(daddr, &pd->hdr.udp->uh_dport,
3831                                             pd->ip_sum, &pd->hdr.udp->uh_sum,
3832                                             &nk->addr[pd->didx],
3833                                             nk->port[pd->didx], 1, af);
3834                                         dport = pd->hdr.udp->uh_dport;
3835                                         pd->dport = &pd->hdr.udp->uh_dport;
3836                               }
3837                               rewrite++;
3838                               break;
3839 #ifdef INET
3840                     case IPPROTO_ICMP:
3841                               nk->port[0] = nk->port[1];
3842                               if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
3843                                         pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3844                                             nk->addr[pd->sidx].v4.s_addr, 0);
3845 
3846                               if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
3847                                         pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
3848                                             nk->addr[pd->didx].v4.s_addr, 0);
3849 
3850                               if (nk->port[1] != pd->hdr.icmp->icmp_id) {
3851                                         pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
3852                                             pd->hdr.icmp->icmp_cksum, sport,
3853                                             nk->port[1], 0);
3854                                         pd->hdr.icmp->icmp_id = nk->port[1];
3855                                         pd->sport = &pd->hdr.icmp->icmp_id;
3856                               }
3857                               m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp);
3858                               break;
3859 #endif /* INET */
3860 #ifdef INET6
3861                     case IPPROTO_ICMPV6:
3862                               nk->port[0] = nk->port[1];
3863                               if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
3864                                         pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3865                                             &nk->addr[pd->sidx], 0);
3866 
3867                               if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
3868                                         pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3869                                             &nk->addr[pd->didx], 0);
3870                               rewrite++;
3871                               break;
3872 #endif /* INET */
3873                     default:
3874                               switch (af) {
3875 #ifdef INET
3876                               case AF_INET:
3877                                         if (PF_ANEQ(saddr,
3878                                             &nk->addr[pd->sidx], AF_INET))
3879                                                   pf_change_a(&saddr->v4.s_addr,
3880                                                       pd->ip_sum,
3881                                                       nk->addr[pd->sidx].v4.s_addr, 0);
3882 
3883                                         if (PF_ANEQ(daddr,
3884                                             &nk->addr[pd->didx], AF_INET))
3885                                                   pf_change_a(&daddr->v4.s_addr,
3886                                                       pd->ip_sum,
3887                                                       nk->addr[pd->didx].v4.s_addr, 0);
3888                                         break;
3889 #endif /* INET */
3890 #ifdef INET6
3891                               case AF_INET6:
3892                                         if (PF_ANEQ(saddr,
3893                                             &nk->addr[pd->sidx], AF_INET6))
3894                                                   PF_ACPY(saddr, &nk->addr[pd->sidx], af);
3895 
3896                                         if (PF_ANEQ(daddr,
3897                                             &nk->addr[pd->didx], AF_INET6))
3898                                                   PF_ACPY(saddr, &nk->addr[pd->didx], af);
3899                                         break;
3900 #endif /* INET */
3901                               }
3902                               break;
3903                     }
3904                     if (nr->natpass)
3905                               r = NULL;
3906                     pd->nat_rule = nr;
3907           }
3908 
3909           while (r != NULL) {
3910                     r->evaluations++;
3911                     if (pfi_kif_match(r->kif, kif) == r->ifnot)
3912                               r = r->skip[PF_SKIP_IFP].ptr;
3913                     else if (r->direction && r->direction != direction)
3914                               r = r->skip[PF_SKIP_DIR].ptr;
3915                     else if (r->af && r->af != af)
3916                               r = r->skip[PF_SKIP_AF].ptr;
3917                     else if (r->proto && r->proto != pd->proto)
3918                               r = r->skip[PF_SKIP_PROTO].ptr;
3919                     else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3920                         r->src.neg, kif))
3921                               r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3922                     /* tcp/udp only. port_op always 0 in other cases */
3923                     else if (r->src.port_op && !pf_match_port(r->src.port_op,
3924                         r->src.port[0], r->src.port[1], sport))
3925                               r = r->skip[PF_SKIP_SRC_PORT].ptr;
3926                     else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3927                         r->dst.neg, NULL))
3928                               r = r->skip[PF_SKIP_DST_ADDR].ptr;
3929                     /* tcp/udp only. port_op always 0 in other cases */
3930                     else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3931                         r->dst.port[0], r->dst.port[1], dport))
3932                               r = r->skip[PF_SKIP_DST_PORT].ptr;
3933                     /* icmp only. type always 0 in other cases */
3934                     else if (r->type && r->type != icmptype + 1)
3935                               r = TAILQ_NEXT(r, entries);
3936                     /* icmp only. type always 0 in other cases */
3937                     else if (r->code && r->code != icmpcode + 1)
3938                               r = TAILQ_NEXT(r, entries);
3939                     else if (r->tos && !(r->tos == pd->tos))
3940                               r = TAILQ_NEXT(r, entries);
3941                     else if (r->rule_flag & PFRULE_FRAGMENT)
3942                               r = TAILQ_NEXT(r, entries);
3943                     else if (pd->proto == IPPROTO_TCP &&
3944                         (r->flagset & th->th_flags) != r->flags)
3945                               r = TAILQ_NEXT(r, entries);
3946                     /* tcp/udp only. uid.op always 0 in other cases */
3947                     else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
3948                         pf_socket_lookup(direction, pd), 1)) &&
3949                         !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3950                         pd->lookup.uid))
3951                               r = TAILQ_NEXT(r, entries);
3952                     /* tcp/udp only. gid.op always 0 in other cases */
3953                     else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
3954                         pf_socket_lookup(direction, pd), 1)) &&
3955                         !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3956                         pd->lookup.gid))
3957                               r = TAILQ_NEXT(r, entries);
3958                     else if (r->prob &&
3959                       r->prob <= karc4random())
3960                               r = TAILQ_NEXT(r, entries);
3961                     else if (r->match_tag && !pf_match_tag(m, r, &tag))
3962                               r = TAILQ_NEXT(r, entries);
3963                     else if (r->os_fingerprint != PF_OSFP_ANY &&
3964                         (pd->proto != IPPROTO_TCP || !pf_osfp_match(
3965                         pf_osfp_fingerprint(pd, m, off, th),
3966                         r->os_fingerprint)))
3967                               r = TAILQ_NEXT(r, entries);
3968                     else {
3969                               if (r->tag)
3970                                         tag = r->tag;
3971                               if (r->rtableid >= 0)
3972                                         rtableid = r->rtableid;
3973                               if (r->anchor == NULL) {
3974                                         match = 1;
3975                                         *rm = r;
3976                                         *am = a;
3977                                         *rsm = ruleset;
3978                                         if ((*rm)->quick)
3979                                                   break;
3980                                         r = TAILQ_NEXT(r, entries);
3981                               } else
3982                                         pf_step_into_anchor(&asd, &ruleset,
3983                                             PF_RULESET_FILTER, &r, &a, &match);
3984                     }
3985                     if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3986                         PF_RULESET_FILTER, &r, &a, &match))
3987                               break;
3988           }
3989           r = *rm;
3990           a = *am;
3991           ruleset = *rsm;
3992 
3993           REASON_SET(&reason, PFRES_MATCH);
3994 
3995           if (r->log || (nr != NULL && nr->log)) {
3996                     if (rewrite)
3997                               m_copyback(m, off, hdrlen, pd->hdr.any);
3998                     PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
3999                         a, ruleset, pd);
4000           }
4001 
4002           if ((r->action == PF_DROP) &&
4003               ((r->rule_flag & PFRULE_RETURNRST) ||
4004               (r->rule_flag & PFRULE_RETURNICMP) ||
4005               (r->rule_flag & PFRULE_RETURN))) {
4006                     /* undo NAT changes, if they have taken place */
4007                     if (nr != NULL) {
4008                               PF_ACPY(saddr, &sk->addr[pd->sidx], af);
4009                               PF_ACPY(daddr, &sk->addr[pd->didx], af);
4010                               if (pd->sport)
4011                                         *pd->sport = sk->port[pd->sidx];
4012                               if (pd->dport)
4013                                         *pd->dport = sk->port[pd->didx];
4014                               if (pd->proto_sum)
4015                                         *pd->proto_sum = bproto_sum;
4016                               if (pd->ip_sum)
4017                                         *pd->ip_sum = bip_sum;
4018                               m_copyback(m, off, hdrlen, pd->hdr.any);
4019                     }
4020                     if (pd->proto == IPPROTO_TCP &&
4021                         ((r->rule_flag & PFRULE_RETURNRST) ||
4022                         (r->rule_flag & PFRULE_RETURN)) &&
4023                         !(th->th_flags & TH_RST)) {
4024                               u_int32_t  ack = ntohl(th->th_seq) + pd->p_len;
4025                               int                  len = 0;
4026                               struct ip *h4;
4027 #ifdef INET6
4028                               struct ip6_hdr      *h6;
4029 #endif
4030                               switch (af) {
4031                               case AF_INET:
4032                                         h4 = mtod(m, struct ip *);
4033                                         len = ntohs(h4->ip_len) - off;
4034                                         break;
4035 #ifdef INET6
4036                               case AF_INET6:
4037                                         h6 = mtod(m, struct ip6_hdr *);
4038                                         len = h6->ip6_plen - (off - sizeof(*h6));
4039                                         break;
4040 #endif
4041                               }
4042 
4043                               if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
4044                                         REASON_SET(&reason, PFRES_PROTCKSUM);
4045                               else {
4046                                         if (th->th_flags & TH_SYN)
4047                                                   ack++;
4048                                         if (th->th_flags & TH_FIN)
4049                                                   ack++;
4050                                         pf_send_tcp(r, af, pd->dst,
4051                                             pd->src, th->th_dport, th->th_sport,
4052                                             ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
4053                                             r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
4054                               }
4055                     } else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
4056                         r->return_icmp)
4057                               pf_send_icmp(m, r->return_icmp >> 8,
4058                                   r->return_icmp & 255, af, r);
4059                     else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
4060                         r->return_icmp6)
4061                               pf_send_icmp(m, r->return_icmp6 >> 8,
4062                                   r->return_icmp6 & 255, af, r);
4063           }
4064 
4065           if (r->action == PF_DROP)
4066                     goto cleanup;
4067 
4068           if (pf_tag_packet(m, tag, rtableid)) {
4069                     REASON_SET(&reason, PFRES_MEMORY);
4070                     goto cleanup;
4071           }
4072 
4073           if (!state_icmp && (r->keep_state || nr != NULL ||
4074               (pd->flags & PFDESC_TCP_NORM))) {
4075                     int action;
4076                     action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m,
4077                         off, sport, dport, &rewrite, kif, sm, tag, bproto_sum,
4078                         bip_sum, hdrlen);
4079                     if (action != PF_PASS)
4080                               return (action);
4081           }
4082 
4083           /* copy back packet headers if we performed NAT operations */
4084           if (rewrite)
4085                     m_copyback(m, off, hdrlen, pd->hdr.any);
4086 
4087           return (PF_PASS);
4088 
4089 cleanup:
4090           if (sk != NULL)
4091                     kfree(sk, M_PFSTATEKEYPL);
4092           if (nk != NULL)
4093                     kfree(nk, M_PFSTATEKEYPL);
4094           return (PF_DROP);
4095 }
4096 
4097 static __inline int
pf_create_state(struct pf_rule * r,struct pf_rule * nr,struct pf_rule * a,struct pf_pdesc * pd,struct pf_src_node * nsn,struct pf_state_key * skw,struct pf_state_key * sks,struct pf_state_key * nk,struct pf_state_key * sk,struct mbuf * m,int off,u_int16_t sport,u_int16_t dport,int * rewrite,struct pfi_kif * kif,struct pf_state ** sm,int tag,u_int16_t bproto_sum,u_int16_t bip_sum,int hdrlen)4098 pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
4099     struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw,
4100     struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk,
4101     struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite,
4102     struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum,
4103     u_int16_t bip_sum, int hdrlen)
4104 {
4105           struct pf_state               *s = NULL;
4106           struct pf_src_node  *sn = NULL;
4107           struct tcphdr                 *th = pd->hdr.tcp;
4108           u_int16_t            mss = tcp_mssdflt;
4109           u_short                        reason;
4110           int cpu = mycpu->gd_cpuid;
4111 
4112           /* check maximums */
4113           if (r->max_states && (r->states_cur >= r->max_states)) {
4114                     PF_INC_LCOUNTER(LCNT_STATES);
4115                     REASON_SET(&reason, PFRES_MAXSTATES);
4116                     return (PF_DROP);
4117           }
4118           /* src node for filter rule */
4119           if ((r->rule_flag & PFRULE_SRCTRACK ||
4120               r->rpool.opts & PF_POOL_STICKYADDR) &&
4121               pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
4122                     REASON_SET(&reason, PFRES_SRCLIMIT);
4123                     goto csfailed;
4124           }
4125           /* src node for translation rule */
4126           if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
4127               pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
4128                     REASON_SET(&reason, PFRES_SRCLIMIT);
4129                     goto csfailed;
4130           }
4131           s = kmalloc(sizeof(struct pf_state), M_PFSTATEPL, M_NOWAIT|M_ZERO);
4132           if (s == NULL) {
4133                     REASON_SET(&reason, PFRES_MEMORY);
4134                     goto csfailed;
4135           }
4136           lockinit(&s->lk, "pfstlk", 0, 0);
4137           s->id = 0; /* XXX Do we really need that? not in OpenBSD */
4138           s->creatorid = 0;
4139           s->rule.ptr = r;
4140           s->nat_rule.ptr = nr;
4141           s->anchor.ptr = a;
4142           s->state_flags = PFSTATE_CREATEINPROG;
4143           STATE_INC_COUNTERS(s);
4144           if (r->allow_opts)
4145                     s->state_flags |= PFSTATE_ALLOWOPTS;
4146           if (r->rule_flag & PFRULE_STATESLOPPY)
4147                     s->state_flags |= PFSTATE_SLOPPY;
4148           if (pd->not_cpu_localized)
4149                     s->state_flags |= PFSTATE_STACK_GLOBAL;
4150 
4151           s->log = r->log & PF_LOG_ALL;
4152           if (nr != NULL)
4153                     s->log |= nr->log & PF_LOG_ALL;
4154           switch (pd->proto) {
4155           case IPPROTO_TCP:
4156                     s->src.seqlo = ntohl(th->th_seq);
4157                     s->src.seqhi = s->src.seqlo + pd->p_len + 1;
4158                     if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
4159                         r->keep_state == PF_STATE_MODULATE) {
4160                               /* Generate sequence number modulator */
4161                               if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
4162                                   0)
4163                                         s->src.seqdiff = 1;
4164                               pf_change_a(&th->th_seq, &th->th_sum,
4165                                   htonl(s->src.seqlo + s->src.seqdiff), 0);
4166                               *rewrite = 1;
4167                     } else
4168                               s->src.seqdiff = 0;
4169                     if (th->th_flags & TH_SYN) {
4170                               s->src.seqhi++;
4171                               s->src.wscale = pf_get_wscale(m, off,
4172                                   th->th_off, pd->af);
4173                     }
4174                     s->src.max_win = MAX(ntohs(th->th_win), 1);
4175                     if (s->src.wscale & PF_WSCALE_MASK) {
4176                               /* Remove scale factor from initial window */
4177                               int win = s->src.max_win;
4178                               win += 1 << (s->src.wscale & PF_WSCALE_MASK);
4179                               s->src.max_win = (win - 1) >>
4180                                   (s->src.wscale & PF_WSCALE_MASK);
4181                     }
4182                     if (th->th_flags & TH_FIN)
4183                               s->src.seqhi++;
4184                     s->dst.seqhi = 1;
4185                     s->dst.max_win = 1;
4186                     s->src.state = TCPS_SYN_SENT;
4187                     s->dst.state = TCPS_CLOSED;
4188                     s->timeout = PFTM_TCP_FIRST_PACKET;
4189                     break;
4190           case IPPROTO_UDP:
4191                     s->src.state = PFUDPS_SINGLE;
4192                     s->dst.state = PFUDPS_NO_TRAFFIC;
4193                     s->timeout = PFTM_UDP_FIRST_PACKET;
4194                     break;
4195           case IPPROTO_ICMP:
4196 #ifdef INET6
4197           case IPPROTO_ICMPV6:
4198 #endif
4199                     s->timeout = PFTM_ICMP_FIRST_PACKET;
4200                     break;
4201           default:
4202                     s->src.state = PFOTHERS_SINGLE;
4203                     s->dst.state = PFOTHERS_NO_TRAFFIC;
4204                     s->timeout = PFTM_OTHER_FIRST_PACKET;
4205           }
4206 
4207           s->creation = time_second;
4208           s->expire = time_second;
4209 
4210           if (sn != NULL) {
4211                     s->src_node = sn;
4212                     s->src_node->states++;
4213           }
4214           if (nsn != NULL) {
4215                     /* XXX We only modify one side for now. */
4216                     PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
4217                     s->nat_src_node = nsn;
4218                     s->nat_src_node->states++;
4219           }
4220           if (pd->proto == IPPROTO_TCP) {
4221                     if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
4222                         off, pd, th, &s->src, &s->dst)) {
4223                               REASON_SET(&reason, PFRES_MEMORY);
4224                               pf_src_tree_remove_state(s);
4225                               STATE_DEC_COUNTERS(s);
4226                               kfree(s, M_PFSTATEPL);
4227                               return (PF_DROP);
4228                     }
4229                     if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
4230                         pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
4231                         &s->src, &s->dst, rewrite)) {
4232                               /* This really shouldn't happen!!! */
4233                               DPFPRINTF(PF_DEBUG_URGENT,
4234                                   ("pf_normalize_tcp_stateful failed on first pkt"));
4235                               pf_normalize_tcp_cleanup(s);
4236                               pf_src_tree_remove_state(s);
4237                               STATE_DEC_COUNTERS(s);
4238                               kfree(s, M_PFSTATEPL);
4239                               return (PF_DROP);
4240                     }
4241           }
4242           s->direction = pd->dir;
4243 
4244           if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk,
4245                                                        pd->src, pd->dst, sport, dport)) {
4246                     REASON_SET(&reason, PFRES_MEMORY);
4247                     goto csfailed;
4248           }
4249 
4250           if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) {
4251                     if (pd->proto == IPPROTO_TCP)
4252                               pf_normalize_tcp_cleanup(s);
4253                     REASON_SET(&reason, PFRES_STATEINS);
4254                     pf_src_tree_remove_state(s);
4255                     STATE_DEC_COUNTERS(s);
4256                     kfree(s, M_PFSTATEPL);
4257                     return (PF_DROP);
4258           } else
4259                     *sm = s;
4260 
4261           pf_set_rt_ifp(s, pd->src);    /* needs s->state_key set */
4262           if (tag > 0) {
4263                     pf_tag_ref(tag);
4264                     s->tag = tag;
4265           }
4266           if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
4267               TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
4268                     s->src.state = PF_TCPS_PROXY_SRC;
4269                     /* undo NAT changes, if they have taken place */
4270                     if (nr != NULL) {
4271                               struct pf_state_key *skt = s->key[PF_SK_WIRE];
4272                               if (pd->dir == PF_OUT)
4273                                         skt = s->key[PF_SK_STACK];
4274                               PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
4275                               PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
4276                               if (pd->sport)
4277                                         *pd->sport = skt->port[pd->sidx];
4278                               if (pd->dport)
4279                                         *pd->dport = skt->port[pd->didx];
4280                               if (pd->proto_sum)
4281                                         *pd->proto_sum = bproto_sum;
4282                               if (pd->ip_sum)
4283                                         *pd->ip_sum = bip_sum;
4284                               m->m_flags &= ~M_HASH;
4285                               m_copyback(m, off, hdrlen, pd->hdr.any);
4286                     }
4287                     s->src.seqhi = htonl(karc4random());
4288                     /* Find mss option */
4289                     mss = pf_get_mss(m, off, th->th_off, pd->af);
4290                     mss = pf_calc_mss(pd->src, pd->af, mss);
4291                     mss = pf_calc_mss(pd->dst, pd->af, mss);
4292                     s->src.mss = mss;
4293                     s->state_flags &= ~PFSTATE_CREATEINPROG;
4294                     pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
4295                                   th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
4296                                   TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
4297                     REASON_SET(&reason, PFRES_SYNPROXY);
4298                     return (PF_SYNPROXY_DROP);
4299           }
4300 
4301           s->state_flags &= ~PFSTATE_CREATEINPROG;
4302           return (PF_PASS);
4303 
4304 csfailed:
4305           if (sk != NULL)
4306                     kfree(sk, M_PFSTATEKEYPL);
4307           if (nk != NULL)
4308                     kfree(nk, M_PFSTATEKEYPL);
4309 
4310           if (sn != NULL && sn->states == 0 && sn->expire == 0) {
4311                     RB_REMOVE(pf_src_tree, &tree_src_tracking[cpu], sn);
4312                     PF_INC_SCOUNTER(SCNT_SRC_NODE_REMOVALS);
4313                     atomic_add_int(&pf_status.src_nodes, -1);
4314                     kfree(sn, M_PFSRCTREEPL);
4315           }
4316           if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) {
4317                     RB_REMOVE(pf_src_tree, &tree_src_tracking[cpu], nsn);
4318                     PF_INC_SCOUNTER(SCNT_SRC_NODE_REMOVALS);
4319                     atomic_add_int(&pf_status.src_nodes, -1);
4320                     kfree(nsn, M_PFSRCTREEPL);
4321           }
4322           if (s) {
4323                     pf_src_tree_remove_state(s);
4324                     STATE_DEC_COUNTERS(s);
4325                     kfree(s, M_PFSTATEPL);
4326           }
4327 
4328           return (PF_DROP);
4329 }
4330 
4331 int
pf_test_fragment(struct pf_rule ** rm,int direction,struct pfi_kif * kif,struct mbuf * m,void * h,struct pf_pdesc * pd,struct pf_rule ** am,struct pf_ruleset ** rsm)4332 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
4333     struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
4334     struct pf_ruleset **rsm)
4335 {
4336           struct pf_rule                *r, *a = NULL;
4337           struct pf_ruleset   *ruleset = NULL;
4338           sa_family_t                    af = pd->af;
4339           u_short                        reason;
4340           int                            tag = -1;
4341           int                            asd = 0;
4342           int                            match = 0;
4343 
4344           r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
4345           while (r != NULL) {
4346                     r->evaluations++;
4347                     if (pfi_kif_match(r->kif, kif) == r->ifnot)
4348                               r = r->skip[PF_SKIP_IFP].ptr;
4349                     else if (r->direction && r->direction != direction)
4350                               r = r->skip[PF_SKIP_DIR].ptr;
4351                     else if (r->af && r->af != af)
4352                               r = r->skip[PF_SKIP_AF].ptr;
4353                     else if (r->proto && r->proto != pd->proto)
4354                               r = r->skip[PF_SKIP_PROTO].ptr;
4355                     else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
4356                         r->src.neg, kif))
4357                               r = r->skip[PF_SKIP_SRC_ADDR].ptr;
4358                     else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
4359                         r->dst.neg, NULL))
4360                               r = r->skip[PF_SKIP_DST_ADDR].ptr;
4361                     else if (r->tos && !(r->tos == pd->tos))
4362                               r = TAILQ_NEXT(r, entries);
4363                     else if (r->os_fingerprint != PF_OSFP_ANY)
4364                               r = TAILQ_NEXT(r, entries);
4365                     else if (pd->proto == IPPROTO_UDP &&
4366                         (r->src.port_op || r->dst.port_op))
4367                               r = TAILQ_NEXT(r, entries);
4368                     else if (pd->proto == IPPROTO_TCP &&
4369                         (r->src.port_op || r->dst.port_op || r->flagset))
4370                               r = TAILQ_NEXT(r, entries);
4371                     else if ((pd->proto == IPPROTO_ICMP ||
4372                         pd->proto == IPPROTO_ICMPV6) &&
4373                         (r->type || r->code))
4374                               r = TAILQ_NEXT(r, entries);
4375                     else if (r->prob && r->prob <= karc4random())
4376                               r = TAILQ_NEXT(r, entries);
4377                     else if (r->match_tag && !pf_match_tag(m, r, &tag))
4378                               r = TAILQ_NEXT(r, entries);
4379                     else {
4380                               if (r->anchor == NULL) {
4381                                         match = 1;
4382                                         *rm = r;
4383                                         *am = a;
4384                                         *rsm = ruleset;
4385                                         if ((*rm)->quick)
4386                                                   break;
4387                                         r = TAILQ_NEXT(r, entries);
4388                               } else
4389                                         pf_step_into_anchor(&asd, &ruleset,
4390                                             PF_RULESET_FILTER, &r, &a, &match);
4391                     }
4392                     if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
4393                         PF_RULESET_FILTER, &r, &a, &match))
4394                               break;
4395           }
4396           r = *rm;
4397           a = *am;
4398           ruleset = *rsm;
4399 
4400           REASON_SET(&reason, PFRES_MATCH);
4401 
4402           if (r->log)
4403                     PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset,
4404                         pd);
4405 
4406           if (r->action != PF_PASS)
4407                     return (PF_DROP);
4408 
4409           if (pf_tag_packet(m, tag, -1)) {
4410                     REASON_SET(&reason, PFRES_MEMORY);
4411                     return (PF_DROP);
4412           }
4413 
4414           return (PF_PASS);
4415 }
4416 
4417 /*
4418  * Called with state locked
4419  */
4420 int
pf_tcp_track_full(struct pf_state_peer * src,struct pf_state_peer * dst,struct pf_state ** state,struct pfi_kif * kif,struct mbuf * m,int off,struct pf_pdesc * pd,u_short * reason,int * copyback)4421 pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
4422           struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off,
4423           struct pf_pdesc *pd, u_short *reason, int *copyback)
4424 {
4425           struct tcphdr                 *th = pd->hdr.tcp;
4426           u_int16_t            win = ntohs(th->th_win);
4427           u_int32_t            ack, end, seq, orig_seq;
4428           u_int8_t             sws, dws;
4429           int                            ackskew;
4430 
4431           if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
4432                     sws = src->wscale & PF_WSCALE_MASK;
4433                     dws = dst->wscale & PF_WSCALE_MASK;
4434           } else {
4435                     sws = dws = 0;
4436           }
4437 
4438           /*
4439            * Sequence tracking algorithm from Guido van Rooij's paper:
4440            *   http://www.madison-gurkha.com/publications/tcp_filtering/
4441            *        tcp_filtering.ps
4442            */
4443 
4444           orig_seq = seq = ntohl(th->th_seq);
4445           if (src->seqlo == 0) {
4446                     /* First packet from this end. Set its state */
4447 
4448                     if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
4449                         src->scrub == NULL) {
4450                               if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
4451                                         REASON_SET(reason, PFRES_MEMORY);
4452                                         return (PF_DROP);
4453                               }
4454                     }
4455 
4456                     /* Deferred generation of sequence number modulator */
4457                     if (dst->seqdiff && !src->seqdiff) {
4458                               /* use random iss for the TCP server */
4459                               while ((src->seqdiff = karc4random() - seq) == 0)
4460                                         ;
4461                               ack = ntohl(th->th_ack) - dst->seqdiff;
4462                               pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4463                                   src->seqdiff), 0);
4464                               pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4465                               *copyback = 1;
4466                     } else {
4467                               ack = ntohl(th->th_ack);
4468                     }
4469 
4470                     end = seq + pd->p_len;
4471                     if (th->th_flags & TH_SYN) {
4472                               end++;
4473                               (*state)->sync_flags |= PFSTATE_GOT_SYN2;
4474                               if (dst->wscale & PF_WSCALE_FLAG) {
4475                                         src->wscale = pf_get_wscale(m, off, th->th_off,
4476                                             pd->af);
4477                                         if (src->wscale & PF_WSCALE_FLAG) {
4478                                                   /* Remove scale factor from initial
4479                                                    * window */
4480                                                   sws = src->wscale & PF_WSCALE_MASK;
4481                                                   win = ((u_int32_t)win + (1 << sws) - 1)
4482                                                       >> sws;
4483                                                   dws = dst->wscale & PF_WSCALE_MASK;
4484                                         } else {
4485                                                   /* fixup other window */
4486                                                   dst->max_win <<= dst->wscale &
4487                                                       PF_WSCALE_MASK;
4488                                                   /* in case of a retrans SYN|ACK */
4489                                                   dst->wscale = 0;
4490                                         }
4491                               }
4492                     }
4493                     if (th->th_flags & TH_FIN)
4494                               end++;
4495 
4496                     src->seqlo = seq;
4497                     if (src->state < TCPS_SYN_SENT)
4498                               src->state = TCPS_SYN_SENT;
4499 
4500                     /*
4501                      * May need to slide the window (seqhi may have been set by
4502                      * the crappy stack check or if we picked up the connection
4503                      * after establishment)
4504                      */
4505                     if (src->seqhi == 1 ||
4506                         SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
4507                               src->seqhi = end + MAX(1, dst->max_win << dws);
4508                     if (win > src->max_win)
4509                               src->max_win = win;
4510 
4511           } else {
4512                     ack = ntohl(th->th_ack) - dst->seqdiff;
4513                     if (src->seqdiff) {
4514                               /* Modulate sequence numbers */
4515                               pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4516                                   src->seqdiff), 0);
4517                               pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4518                               *copyback = 1;
4519                     }
4520                     end = seq + pd->p_len;
4521                     if (th->th_flags & TH_SYN)
4522                               end++;
4523                     if (th->th_flags & TH_FIN)
4524                               end++;
4525           }
4526 
4527           if ((th->th_flags & TH_ACK) == 0) {
4528                     /* Let it pass through the ack skew check */
4529                     ack = dst->seqlo;
4530           } else if ((ack == 0 &&
4531               (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
4532               /* broken tcp stacks do not set ack */
4533               (dst->state < TCPS_SYN_SENT)) {
4534                     /*
4535                      * Many stacks (ours included) will set the ACK number in an
4536                      * FIN|ACK if the SYN times out -- no sequence to ACK.
4537                      */
4538                     ack = dst->seqlo;
4539           }
4540 
4541           if (seq == end) {
4542                     /* Ease sequencing restrictions on no data packets */
4543                     seq = src->seqlo;
4544                     end = seq;
4545           }
4546 
4547           ackskew = dst->seqlo - ack;
4548 
4549 
4550           /*
4551            * Need to demodulate the sequence numbers in any TCP SACK options
4552            * (Selective ACK). We could optionally validate the SACK values
4553            * against the current ACK window, either forwards or backwards, but
4554            * I'm not confident that SACK has been implemented properly
4555            * everywhere. It wouldn't surprise me if several stacks accidently
4556            * SACK too far backwards of previously ACKed data. There really aren't
4557            * any security implications of bad SACKing unless the target stack
4558            * doesn't validate the option length correctly. Someone trying to
4559            * spoof into a TCP connection won't bother blindly sending SACK
4560            * options anyway.
4561            */
4562           if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
4563                     if (pf_modulate_sack(m, off, pd, th, dst))
4564                               *copyback = 1;
4565           }
4566 
4567 
4568 #define MAXACKWINDOW (0xffff + 1500)    /* 1500 is an arbitrary fudge factor */
4569           if (SEQ_GEQ(src->seqhi, end) &&
4570               /* Last octet inside other's window space */
4571               SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4572               /* Retrans: not more than one window back */
4573               (ackskew >= -MAXACKWINDOW) &&
4574               /* Acking not more than one reassembled fragment backwards */
4575               (ackskew <= (MAXACKWINDOW << sws)) &&
4576               /* Acking not more than one window forward */
4577               ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
4578               (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo)))
4579           {
4580                     /*
4581                      * Require an exact/+1 sequence match on resets
4582                      * when possible
4583                      */
4584                     if (dst->scrub || src->scrub) {
4585                               if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4586                                   *state, src, dst, copyback))
4587                                         return (PF_DROP);
4588                     }
4589 
4590                     /* update max window */
4591                     if (src->max_win < win)
4592                               src->max_win = win;
4593                     /* synchronize sequencing */
4594                     if (SEQ_GT(end, src->seqlo))
4595                               src->seqlo = end;
4596                     /* slide the window of what the other end can send */
4597                     if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4598                               dst->seqhi = ack + MAX((win << sws), 1);
4599 
4600 
4601                     /* update states */
4602                     if (th->th_flags & TH_SYN)
4603                               if (src->state < TCPS_SYN_SENT)
4604                                         src->state = TCPS_SYN_SENT;
4605                     if (th->th_flags & TH_FIN)
4606                               if (src->state < TCPS_CLOSING)
4607                                         src->state = TCPS_CLOSING;
4608                     if (th->th_flags & TH_ACK) {
4609                               if (dst->state == TCPS_SYN_SENT) {
4610                                         dst->state = TCPS_ESTABLISHED;
4611                                         if (src->state == TCPS_ESTABLISHED &&
4612                                             (*state)->src_node != NULL &&
4613                                             pf_src_connlimit(*state)) {
4614                                                   REASON_SET(reason, PFRES_SRCLIMIT);
4615                                                   return (PF_DROP);
4616                                         }
4617                               } else if (dst->state == TCPS_CLOSING)
4618                                         dst->state = TCPS_FIN_WAIT_2;
4619                     }
4620                     if (th->th_flags & TH_RST)
4621                               src->state = dst->state = TCPS_TIME_WAIT;
4622 
4623                     /* update expire time */
4624                     (*state)->expire = time_second;
4625                     if (src->state >= TCPS_FIN_WAIT_2 &&
4626                         dst->state >= TCPS_FIN_WAIT_2)
4627                               (*state)->timeout = PFTM_TCP_CLOSED;
4628                     else if (src->state >= TCPS_CLOSING &&
4629                         dst->state >= TCPS_CLOSING)
4630                               (*state)->timeout = PFTM_TCP_FIN_WAIT;
4631                     else if (src->state < TCPS_ESTABLISHED ||
4632                         dst->state < TCPS_ESTABLISHED)
4633                               (*state)->timeout = PFTM_TCP_OPENING;
4634                     else if (src->state >= TCPS_CLOSING ||
4635                         dst->state >= TCPS_CLOSING)
4636                               (*state)->timeout = PFTM_TCP_CLOSING;
4637                     else if ((th->th_flags & TH_SYN) &&
4638                                ((*state)->state_flags & PFSTATE_SLOPPY))
4639                               (*state)->timeout = PFTM_TCP_FIRST_PACKET;
4640                     else
4641                               (*state)->timeout = PFTM_TCP_ESTABLISHED;
4642 
4643                     /* Fall through to PASS packet */
4644 
4645           } else if ((dst->state < TCPS_SYN_SENT ||
4646                     dst->state >= TCPS_FIN_WAIT_2 ||
4647                     src->state >= TCPS_FIN_WAIT_2) &&
4648               SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
4649               /* Within a window forward of the originating packet */
4650               SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
4651               /* Within a window backward of the originating packet */
4652 
4653                     /*
4654                      * This currently handles three situations:
4655                      *  1) Stupid stacks will shotgun SYNs before their peer
4656                      *     replies.
4657                      *  2) When PF catches an already established stream (the
4658                      *     firewall rebooted, the state table was flushed, routes
4659                      *     changed...)
4660                      *  3) Packets get funky immediately after the connection
4661                      *     closes (this should catch Solaris spurious ACK|FINs
4662                      *     that web servers like to spew after a close)
4663                      *
4664                      * This must be a little more careful than the above code
4665                      * since packet floods will also be caught here. We don't
4666                      * update the TTL here to mitigate the damage of a packet
4667                      * flood and so the same code can handle awkward establishment
4668                      * and a loosened connection close.
4669                      * In the establishment case, a correct peer response will
4670                      * validate the connection, go through the normal state code
4671                      * and keep updating the state TTL.
4672                      */
4673 
4674                     if (pf_status.debug >= PF_DEBUG_MISC) {
4675                               kprintf("pf: loose state match: ");
4676                               pf_print_state(*state);
4677                               pf_print_flags(th->th_flags);
4678                               kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4679                                   "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, pd->p_len,
4680                                   ackskew, (unsigned long long)(*state)->packets[0],
4681                                   (unsigned long long)(*state)->packets[1],
4682                                   pd->dir == PF_IN ? "in" : "out",
4683                                   pd->dir == (*state)->direction ? "fwd" : "rev");
4684                     }
4685 
4686                     if (dst->scrub || src->scrub) {
4687                               if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4688                                   *state, src, dst, copyback))
4689                                         return (PF_DROP);
4690                     }
4691 
4692                     /* update max window */
4693                     if (src->max_win < win)
4694                               src->max_win = win;
4695                     /* synchronize sequencing */
4696                     if (SEQ_GT(end, src->seqlo))
4697                               src->seqlo = end;
4698                     /* slide the window of what the other end can send */
4699                     if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4700                               dst->seqhi = ack + MAX((win << sws), 1);
4701 
4702                     /*
4703                      * Cannot set dst->seqhi here since this could be a shotgunned
4704                      * SYN and not an already established connection.
4705                      */
4706 
4707                     if (th->th_flags & TH_FIN)
4708                               if (src->state < TCPS_CLOSING)
4709                                         src->state = TCPS_CLOSING;
4710                     if (th->th_flags & TH_RST)
4711                               src->state = dst->state = TCPS_TIME_WAIT;
4712 
4713                     /* Fall through to PASS packet */
4714 
4715           } else if ((*state)->pickup_mode == PF_PICKUPS_HASHONLY ||
4716                         ((*state)->pickup_mode == PF_PICKUPS_ENABLED &&
4717                          ((*state)->sync_flags & PFSTATE_GOT_SYN_MASK) !=
4718                           PFSTATE_GOT_SYN_MASK)) {
4719                     /*
4720                      * If pickup mode is hash only, do not fail on sequence checks.
4721                      *
4722                      * If pickup mode is enabled and we did not see the SYN in
4723                      * both direction, do not fail on sequence checks because
4724                      * we do not have complete information on window scale.
4725                      *
4726                      * Adjust expiration and fall through to PASS packet.
4727                      * XXX Add a FIN check to reduce timeout?
4728                      */
4729                     (*state)->expire = time_second;
4730           } else  {
4731                     /*
4732                      * Failure processing
4733                      */
4734                     if ((*state)->dst.state == TCPS_SYN_SENT &&
4735                         (*state)->src.state == TCPS_SYN_SENT) {
4736                               /* Send RST for state mismatches during handshake */
4737                               if (!(th->th_flags & TH_RST))
4738                                         pf_send_tcp((*state)->rule.ptr, pd->af,
4739                                             pd->dst, pd->src, th->th_dport,
4740                                             th->th_sport, ntohl(th->th_ack), 0,
4741                                             TH_RST, 0, 0,
4742                                             (*state)->rule.ptr->return_ttl, 1, 0,
4743                                             pd->eh, kif->pfik_ifp);
4744                               src->seqlo = 0;
4745                               src->seqhi = 1;
4746                               src->max_win = 1;
4747                     } else if (pf_status.debug >= PF_DEBUG_MISC) {
4748                               kprintf("pf: BAD state: ");
4749                               pf_print_state(*state);
4750                               pf_print_flags(th->th_flags);
4751                               kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4752                                   "pkts=%llu:%llu dir=%s,%s\n",
4753                                   seq, orig_seq, ack, pd->p_len, ackskew,
4754                                   (unsigned long long)(*state)->packets[0],
4755                                         (unsigned long long)(*state)->packets[1],
4756                                   pd->dir == PF_IN ? "in" : "out",
4757                                   pd->dir == (*state)->direction ? "fwd" : "rev");
4758                               kprintf("pf: State failure on: %c %c %c %c | %c %c\n",
4759                                   SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
4760                                   SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
4761                                   ' ': '2',
4762                                   (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
4763                                   (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
4764                                   SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
4765                                   SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
4766                     }
4767                     REASON_SET(reason, PFRES_BADSTATE);
4768                     return (PF_DROP);
4769           }
4770 
4771           return (PF_PASS);
4772 }
4773 
4774 /*
4775  * Called with state locked
4776  */
4777 int
pf_tcp_track_sloppy(struct pf_state_peer * src,struct pf_state_peer * dst,struct pf_state ** state,struct pf_pdesc * pd,u_short * reason)4778 pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst,
4779           struct pf_state **state, struct pf_pdesc *pd, u_short *reason)
4780 {
4781           struct tcphdr                 *th = pd->hdr.tcp;
4782 
4783           if (th->th_flags & TH_SYN)
4784                     if (src->state < TCPS_SYN_SENT)
4785                               src->state = TCPS_SYN_SENT;
4786           if (th->th_flags & TH_FIN)
4787                     if (src->state < TCPS_CLOSING)
4788                               src->state = TCPS_CLOSING;
4789           if (th->th_flags & TH_ACK) {
4790                     if (dst->state == TCPS_SYN_SENT) {
4791                               dst->state = TCPS_ESTABLISHED;
4792                               if (src->state == TCPS_ESTABLISHED &&
4793                                   (*state)->src_node != NULL &&
4794                                   pf_src_connlimit(*state)) {
4795                                         REASON_SET(reason, PFRES_SRCLIMIT);
4796                                         return (PF_DROP);
4797                               }
4798                     } else if (dst->state == TCPS_CLOSING) {
4799                               dst->state = TCPS_FIN_WAIT_2;
4800                     } else if (src->state == TCPS_SYN_SENT &&
4801                         dst->state < TCPS_SYN_SENT) {
4802                               /*
4803                                * Handle a special sloppy case where we only see one
4804                                * half of the connection. If there is a ACK after
4805                                * the initial SYN without ever seeing a packet from
4806                                * the destination, set the connection to established.
4807                                */
4808                               dst->state = src->state = TCPS_ESTABLISHED;
4809                               if ((*state)->src_node != NULL &&
4810                                   pf_src_connlimit(*state)) {
4811                                         REASON_SET(reason, PFRES_SRCLIMIT);
4812                                         return (PF_DROP);
4813                               }
4814                     } else if (src->state == TCPS_CLOSING &&
4815                         dst->state == TCPS_ESTABLISHED &&
4816                         dst->seqlo == 0) {
4817                               /*
4818                                * Handle the closing of half connections where we
4819                                * don't see the full bidirectional FIN/ACK+ACK
4820                                * handshake.
4821                                */
4822                               dst->state = TCPS_CLOSING;
4823                     }
4824           }
4825           if (th->th_flags & TH_RST)
4826                     src->state = dst->state = TCPS_TIME_WAIT;
4827 
4828           /* update expire time */
4829           (*state)->expire = time_second;
4830           if (src->state >= TCPS_FIN_WAIT_2 &&
4831               dst->state >= TCPS_FIN_WAIT_2)
4832                     (*state)->timeout = PFTM_TCP_CLOSED;
4833           else if (src->state >= TCPS_CLOSING &&
4834               dst->state >= TCPS_CLOSING)
4835                     (*state)->timeout = PFTM_TCP_FIN_WAIT;
4836           else if (src->state < TCPS_ESTABLISHED ||
4837               dst->state < TCPS_ESTABLISHED)
4838                     (*state)->timeout = PFTM_TCP_OPENING;
4839           else if (src->state >= TCPS_CLOSING ||
4840               dst->state >= TCPS_CLOSING)
4841                     (*state)->timeout = PFTM_TCP_CLOSING;
4842           else if ((th->th_flags & TH_SYN) &&
4843                      ((*state)->state_flags & PFSTATE_SLOPPY))
4844                     (*state)->timeout = PFTM_TCP_FIRST_PACKET;
4845           else
4846                     (*state)->timeout = PFTM_TCP_ESTABLISHED;
4847 
4848           return (PF_PASS);
4849 }
4850 
4851 /*
4852  * Test TCP connection state.  Caller must hold the state locked.
4853  */
4854 int
pf_test_state_tcp(struct pf_state ** state,int direction,struct pfi_kif * kif,struct mbuf * m,int off,void * h,struct pf_pdesc * pd,u_short * reason)4855 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
4856                       struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
4857                       u_short *reason)
4858 {
4859           struct pf_state_key_cmp        key;
4860           struct tcphdr                 *th = pd->hdr.tcp;
4861           int                            copyback = 0;
4862           int                            error;
4863           struct pf_state_peer          *src, *dst;
4864           struct pf_state_key *sk;
4865 
4866           bzero(&key, sizeof(key));
4867           key.af = pd->af;
4868           key.proto = IPPROTO_TCP;
4869           if (direction == PF_IN)       {         /* wire side, straight */
4870                     PF_ACPY(&key.addr[0], pd->src, key.af);
4871                     PF_ACPY(&key.addr[1], pd->dst, key.af);
4872                     key.port[0] = th->th_sport;
4873                     key.port[1] = th->th_dport;
4874                     if (pf_status.debug >= PF_DEBUG_MISC) {
4875                               kprintf("test-tcp IN (%08x:%d) -> (%08x:%d)\n",
4876                                         ntohl(key.addr[0].addr32[0]),
4877                                         ntohs(key.port[0]),
4878                                         ntohl(key.addr[1].addr32[0]),
4879                                         ntohs(key.port[1]));
4880                     }
4881           } else {                      /* stack side, reverse */
4882                     PF_ACPY(&key.addr[1], pd->src, key.af);
4883                     PF_ACPY(&key.addr[0], pd->dst, key.af);
4884                     key.port[1] = th->th_sport;
4885                     key.port[0] = th->th_dport;
4886                     if (pf_status.debug >= PF_DEBUG_MISC) {
4887                               kprintf("test-tcp OUT (%08x:%d) <- (%08x:%d)\n",
4888                                         ntohl(key.addr[0].addr32[0]),
4889                                         ntohs(key.port[0]),
4890                                         ntohl(key.addr[1].addr32[0]),
4891                                         ntohs(key.port[1]));
4892                     }
4893           }
4894 
4895           STATE_LOOKUP(kif, &key, direction, *state, m);
4896           lockmgr(&(*state)->lk, LK_EXCLUSIVE);
4897 
4898           if (direction == (*state)->direction) {
4899                     src = &(*state)->src;
4900                     dst = &(*state)->dst;
4901           } else {
4902                     src = &(*state)->dst;
4903                     dst = &(*state)->src;
4904           }
4905 
4906           sk = (*state)->key[pd->didx];
4907 
4908           if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
4909                     if (direction != (*state)->direction) {
4910                               REASON_SET(reason, PFRES_SYNPROXY);
4911                               FAIL (PF_SYNPROXY_DROP);
4912                     }
4913                     if (th->th_flags & TH_SYN) {
4914                               if (ntohl(th->th_seq) != (*state)->src.seqlo) {
4915                                         REASON_SET(reason, PFRES_SYNPROXY);
4916                                         FAIL (PF_DROP);
4917                               }
4918                               pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
4919                                   pd->src, th->th_dport, th->th_sport,
4920                                   (*state)->src.seqhi, ntohl(th->th_seq) + 1,
4921                                   TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1,
4922                                   0, NULL, NULL);
4923                               REASON_SET(reason, PFRES_SYNPROXY);
4924                               FAIL (PF_SYNPROXY_DROP);
4925                     } else if (!(th->th_flags & TH_ACK) ||
4926                         (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4927                         (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4928                               REASON_SET(reason, PFRES_SYNPROXY);
4929                               FAIL (PF_DROP);
4930                     } else if ((*state)->src_node != NULL &&
4931                         pf_src_connlimit(*state)) {
4932                               REASON_SET(reason, PFRES_SRCLIMIT);
4933                               FAIL (PF_DROP);
4934                     } else
4935                               (*state)->src.state = PF_TCPS_PROXY_DST;
4936           }
4937           if ((*state)->src.state == PF_TCPS_PROXY_DST) {
4938                     if (direction == (*state)->direction) {
4939                               if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
4940                                   (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4941                                   (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4942                                         REASON_SET(reason, PFRES_SYNPROXY);
4943                                         FAIL (PF_DROP);
4944                               }
4945                               (*state)->src.max_win = MAX(ntohs(th->th_win), 1);
4946                               if ((*state)->dst.seqhi == 1)
4947                                         (*state)->dst.seqhi = htonl(karc4random());
4948                               pf_send_tcp((*state)->rule.ptr, pd->af,
4949                                   &sk->addr[pd->sidx], &sk->addr[pd->didx],
4950                                   sk->port[pd->sidx], sk->port[pd->didx],
4951                                   (*state)->dst.seqhi, 0, TH_SYN, 0,
4952                                   (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
4953                               REASON_SET(reason, PFRES_SYNPROXY);
4954                               FAIL (PF_SYNPROXY_DROP);
4955                     } else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
4956                         (TH_SYN|TH_ACK)) ||
4957                         (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
4958                               REASON_SET(reason, PFRES_SYNPROXY);
4959                               FAIL (PF_DROP);
4960                     } else {
4961                               (*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
4962                               (*state)->dst.seqlo = ntohl(th->th_seq);
4963                               pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
4964                                   pd->src, th->th_dport, th->th_sport,
4965                                   ntohl(th->th_ack), ntohl(th->th_seq) + 1,
4966                                   TH_ACK, (*state)->src.max_win, 0, 0, 0,
4967                                   (*state)->tag, NULL, NULL);
4968                               pf_send_tcp((*state)->rule.ptr, pd->af,
4969                                   &sk->addr[pd->sidx], &sk->addr[pd->didx],
4970                                   sk->port[pd->sidx], sk->port[pd->didx],
4971                                   (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
4972                                   TH_ACK, (*state)->dst.max_win, 0, 0, 1,
4973                                   0, NULL, NULL);
4974                               (*state)->src.seqdiff = (*state)->dst.seqhi -
4975                                   (*state)->src.seqlo;
4976                               (*state)->dst.seqdiff = (*state)->src.seqhi -
4977                                   (*state)->dst.seqlo;
4978                               (*state)->src.seqhi = (*state)->src.seqlo +
4979                                   (*state)->dst.max_win;
4980                               (*state)->dst.seqhi = (*state)->dst.seqlo +
4981                                   (*state)->src.max_win;
4982                               (*state)->src.wscale = (*state)->dst.wscale = 0;
4983                               (*state)->src.state = (*state)->dst.state =
4984                                   TCPS_ESTABLISHED;
4985                               REASON_SET(reason, PFRES_SYNPROXY);
4986                               FAIL (PF_SYNPROXY_DROP);
4987                     }
4988           }
4989 
4990           /*
4991            * Check for connection (addr+port pair) reuse.  We can't actually
4992            * unlink the state if we don't own it.
4993            */
4994           if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
4995               dst->state >= TCPS_FIN_WAIT_2 &&
4996               src->state >= TCPS_FIN_WAIT_2) {
4997                     if (pf_status.debug >= PF_DEBUG_MISC) {
4998                               kprintf("pf: state reuse ");
4999                               pf_print_state(*state);
5000                               pf_print_flags(th->th_flags);
5001                               kprintf("\n");
5002                     }
5003                     /* XXX make sure it's the same direction ?? */
5004                     (*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
5005                     if ((*state)->cpuid == mycpu->gd_cpuid) {
5006                               pf_unlink_state(*state);
5007                               *state = NULL;
5008                     } else {
5009                               (*state)->timeout = PFTM_PURGE;
5010                     }
5011                     FAIL (PF_DROP);
5012           }
5013 
5014           if ((*state)->state_flags & PFSTATE_SLOPPY) {
5015                     if (pf_tcp_track_sloppy(src, dst, state, pd,
5016                                                   reason) == PF_DROP) {
5017                               FAIL (PF_DROP);
5018                     }
5019           } else {
5020                     if (pf_tcp_track_full(src, dst, state, kif, m, off, pd,
5021                                               reason, &copyback) == PF_DROP) {
5022                               FAIL (PF_DROP);
5023                     }
5024           }
5025 
5026           /* translate source/destination address, if necessary */
5027           if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
5028                     struct pf_state_key *nk = (*state)->key[pd->didx];
5029 
5030                     if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
5031                         nk->port[pd->sidx] != th->th_sport)  {
5032                               /*
5033                                * The translated source address may be completely
5034                                * unrelated to the saved link header, make sure
5035                                * a bridge doesn't try to use it.
5036                                */
5037                               m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
5038                               pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
5039                                   &th->th_sum, &nk->addr[pd->sidx],
5040                                   nk->port[pd->sidx], 0, pd->af);
5041                     }
5042 
5043                     if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
5044                         nk->port[pd->didx] != th->th_dport) {
5045                               /*
5046                                * If we don't redispatch the packet will go into
5047                                * the protocol stack on the wrong cpu for the
5048                                * post-translated address.
5049                                */
5050                               pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
5051                                   &th->th_sum, &nk->addr[pd->didx],
5052                                   nk->port[pd->didx], 0, pd->af);
5053                     }
5054                     copyback = 1;
5055           }
5056 
5057           /* Copyback sequence modulation or stateful scrub changes if needed */
5058           if (copyback) {
5059                     m->m_flags &= ~M_HASH;
5060                     m_copyback(m, off, sizeof(*th), th);
5061           }
5062 
5063           pfsync_update_state(*state);
5064           error = PF_PASS;
5065 done:
5066           if (*state)
5067                     lockmgr(&(*state)->lk, LK_RELEASE);
5068           return (error);
5069 }
5070 
5071 /*
5072  * Test UDP connection state.  Caller must hold the state locked.
5073  */
5074 int
pf_test_state_udp(struct pf_state ** state,int direction,struct pfi_kif * kif,struct mbuf * m,int off,void * h,struct pf_pdesc * pd)5075 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
5076                       struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
5077 {
5078           struct pf_state_peer          *src, *dst;
5079           struct pf_state_key_cmp        key;
5080           struct udphdr                 *uh = pd->hdr.udp;
5081 
5082           bzero(&key, sizeof(key));
5083           key.af = pd->af;
5084           key.proto = IPPROTO_UDP;
5085           if (direction == PF_IN)       {         /* wire side, straight */
5086                     PF_ACPY(&key.addr[0], pd->src, key.af);
5087                     PF_ACPY(&key.addr[1], pd->dst, key.af);
5088                     key.port[0] = uh->uh_sport;
5089                     key.port[1] = uh->uh_dport;
5090           } else {                      /* stack side, reverse */
5091                     PF_ACPY(&key.addr[1], pd->src, key.af);
5092                     PF_ACPY(&key.addr[0], pd->dst, key.af);
5093                     key.port[1] = uh->uh_sport;
5094                     key.port[0] = uh->uh_dport;
5095           }
5096 
5097           STATE_LOOKUP(kif, &key, direction, *state, m);
5098           lockmgr(&(*state)->lk, LK_EXCLUSIVE);
5099 
5100           if (direction == (*state)->direction) {
5101                     src = &(*state)->src;
5102                     dst = &(*state)->dst;
5103           } else {
5104                     src = &(*state)->dst;
5105                     dst = &(*state)->src;
5106           }
5107 
5108           /* update states */
5109           if (src->state < PFUDPS_SINGLE)
5110                     src->state = PFUDPS_SINGLE;
5111           if (dst->state == PFUDPS_SINGLE)
5112                     dst->state = PFUDPS_MULTIPLE;
5113 
5114           /* update expire time */
5115           (*state)->expire = time_second;
5116           if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
5117                     (*state)->timeout = PFTM_UDP_MULTIPLE;
5118           else
5119                     (*state)->timeout = PFTM_UDP_SINGLE;
5120 
5121           /* translate source/destination address, if necessary */
5122           if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
5123                     struct pf_state_key *nk = (*state)->key[pd->didx];
5124 
5125                     if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
5126                         nk->port[pd->sidx] != uh->uh_sport) {
5127                               /*
5128                                * The translated source address may be completely
5129                                * unrelated to the saved link header, make sure
5130                                * a bridge doesn't try to use it.
5131                                */
5132                               m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
5133                               m->m_flags &= ~M_HASH;
5134                               pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
5135                                   &uh->uh_sum, &nk->addr[pd->sidx],
5136                                   nk->port[pd->sidx], 1, pd->af);
5137                     }
5138 
5139                     if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
5140                         nk->port[pd->didx] != uh->uh_dport) {
5141                               /*
5142                                * If we don't redispatch the packet will go into
5143                                * the protocol stack on the wrong cpu for the
5144                                * post-translated address.
5145                                */
5146                               m->m_flags &= ~M_HASH;
5147                               pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
5148                                   &uh->uh_sum, &nk->addr[pd->didx],
5149                                   nk->port[pd->didx], 1, pd->af);
5150                     }
5151                     m_copyback(m, off, sizeof(*uh), uh);
5152           }
5153 
5154           pfsync_update_state(*state);
5155           lockmgr(&(*state)->lk, LK_RELEASE);
5156           return (PF_PASS);
5157 }
5158 
5159 /*
5160  * Test ICMP connection state.  Caller must hold the state locked.
5161  */
5162 int
pf_test_state_icmp(struct pf_state ** state,int direction,struct pfi_kif * kif,struct mbuf * m,int off,void * h,struct pf_pdesc * pd,u_short * reason)5163 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
5164                        struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
5165                        u_short *reason)
5166 {
5167           struct pf_addr      *saddr = pd->src, *daddr = pd->dst;
5168           u_int16_t  icmpid = 0, *icmpsum = NULL;
5169           u_int8_t   icmptype = 0;
5170           int                  state_icmp = 0;
5171           int                  error;
5172           struct pf_state_key_cmp key;
5173 
5174           bzero(&key, sizeof(key));
5175 
5176           switch (pd->proto) {
5177 #ifdef INET
5178           case IPPROTO_ICMP:
5179                     icmptype = pd->hdr.icmp->icmp_type;
5180                     icmpid = pd->hdr.icmp->icmp_id;
5181                     icmpsum = &pd->hdr.icmp->icmp_cksum;
5182 
5183                     if (icmptype == ICMP_UNREACH ||
5184                         icmptype == ICMP_SOURCEQUENCH ||
5185                         icmptype == ICMP_REDIRECT ||
5186                         icmptype == ICMP_TIMXCEED ||
5187                         icmptype == ICMP_PARAMPROB)
5188                               state_icmp++;
5189                     break;
5190 #endif /* INET */
5191 #ifdef INET6
5192           case IPPROTO_ICMPV6:
5193                     icmptype = pd->hdr.icmp6->icmp6_type;
5194                     icmpid = pd->hdr.icmp6->icmp6_id;
5195                     icmpsum = &pd->hdr.icmp6->icmp6_cksum;
5196 
5197                     if (icmptype == ICMP6_DST_UNREACH ||
5198                         icmptype == ICMP6_PACKET_TOO_BIG ||
5199                         icmptype == ICMP6_TIME_EXCEEDED ||
5200                         icmptype == ICMP6_PARAM_PROB)
5201                               state_icmp++;
5202                     break;
5203 #endif /* INET6 */
5204           }
5205 
5206           if (!state_icmp) {
5207 
5208                     /*
5209                      * ICMP query/reply message not related to a TCP/UDP packet.
5210                      * Search for an ICMP state.
5211                      */
5212                     key.af = pd->af;
5213                     key.proto = pd->proto;
5214                     key.port[0] = key.port[1] = icmpid;
5215                     if (direction == PF_IN)       {         /* wire side, straight */
5216                               PF_ACPY(&key.addr[0], pd->src, key.af);
5217                               PF_ACPY(&key.addr[1], pd->dst, key.af);
5218                     } else {                      /* stack side, reverse */
5219                               PF_ACPY(&key.addr[1], pd->src, key.af);
5220                               PF_ACPY(&key.addr[0], pd->dst, key.af);
5221                     }
5222 
5223                     STATE_LOOKUP(kif, &key, direction, *state, m);
5224                     lockmgr(&(*state)->lk, LK_EXCLUSIVE);
5225 
5226                     (*state)->expire = time_second;
5227                     (*state)->timeout = PFTM_ICMP_ERROR_REPLY;
5228 
5229                     /* translate source/destination address, if necessary */
5230                     if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
5231                               struct pf_state_key *nk = (*state)->key[pd->didx];
5232 
5233                               switch (pd->af) {
5234 #ifdef INET
5235                               case AF_INET:
5236                                         if (PF_ANEQ(pd->src,
5237                                             &nk->addr[pd->sidx], AF_INET))
5238                                                   pf_change_a(&saddr->v4.s_addr,
5239                                                       pd->ip_sum,
5240                                                       nk->addr[pd->sidx].v4.s_addr, 0);
5241 
5242                                         if (PF_ANEQ(pd->dst, &nk->addr[pd->didx],
5243                                             AF_INET))
5244                                                   pf_change_a(&daddr->v4.s_addr,
5245                                                       pd->ip_sum,
5246                                                       nk->addr[pd->didx].v4.s_addr, 0);
5247 
5248                                         if (nk->port[0] !=
5249                                             pd->hdr.icmp->icmp_id) {
5250                                                   pd->hdr.icmp->icmp_cksum =
5251                                                       pf_cksum_fixup(
5252                                                       pd->hdr.icmp->icmp_cksum, icmpid,
5253                                                       nk->port[pd->sidx], 0);
5254                                                   pd->hdr.icmp->icmp_id =
5255                                                       nk->port[pd->sidx];
5256                                         }
5257 
5258                                         m->m_flags &= ~M_HASH;
5259                                         m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp);
5260                                         break;
5261 #endif /* INET */
5262 #ifdef INET6
5263                               case AF_INET6:
5264                                         if (PF_ANEQ(pd->src,
5265                                             &nk->addr[pd->sidx], AF_INET6))
5266                                                   pf_change_a6(saddr,
5267                                                       &pd->hdr.icmp6->icmp6_cksum,
5268                                                       &nk->addr[pd->sidx], 0);
5269 
5270                                         if (PF_ANEQ(pd->dst,
5271                                             &nk->addr[pd->didx], AF_INET6))
5272                                                   pf_change_a6(daddr,
5273                                                       &pd->hdr.icmp6->icmp6_cksum,
5274                                                       &nk->addr[pd->didx], 0);
5275 
5276                                         m->m_flags &= ~M_HASH;
5277                                         m_copyback(m, off, sizeof(struct icmp6_hdr),
5278                                             pd->hdr.icmp6);
5279                                         break;
5280 #endif /* INET6 */
5281                               }
5282                     }
5283           } else {
5284                     /*
5285                      * ICMP error message in response to a TCP/UDP packet.
5286                      * Extract the inner TCP/UDP header and search for that state.
5287                      */
5288 
5289                     struct pf_pdesc     pd2;
5290 #ifdef INET
5291                     struct ip h2;
5292 #endif /* INET */
5293 #ifdef INET6
5294                     struct ip6_hdr      h2_6;
5295                     int                 terminal = 0;
5296 #endif /* INET6 */
5297                     int                 ipoff2;
5298                     int                 off2;
5299 
5300                     pd2.not_cpu_localized = 1;
5301                     pd2.af = pd->af;
5302                     /* Payload packet is from the opposite direction. */
5303                     pd2.sidx = (direction == PF_IN) ? 1 : 0;
5304                     pd2.didx = (direction == PF_IN) ? 0 : 1;
5305                     switch (pd->af) {
5306 #ifdef INET
5307                     case AF_INET:
5308                               /* offset of h2 in mbuf chain */
5309                               ipoff2 = off + ICMP_MINLEN;
5310 
5311                               if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
5312                                   NULL, reason, pd2.af)) {
5313                                         DPFPRINTF(PF_DEBUG_MISC,
5314                                             ("pf: ICMP error message too short "
5315                                             "(ip)\n"));
5316                                         FAIL (PF_DROP);
5317                               }
5318                               /*
5319                                * ICMP error messages don't refer to non-first
5320                                * fragments
5321                                */
5322                               if (h2.ip_off & htons(IP_OFFMASK)) {
5323                                         REASON_SET(reason, PFRES_FRAG);
5324                                         FAIL (PF_DROP);
5325                               }
5326 
5327                               /* offset of protocol header that follows h2 */
5328                               off2 = ipoff2 + (h2.ip_hl << 2);
5329 
5330                               pd2.proto = h2.ip_p;
5331                               pd2.src = (struct pf_addr *)&h2.ip_src;
5332                               pd2.dst = (struct pf_addr *)&h2.ip_dst;
5333                               pd2.ip_sum = &h2.ip_sum;
5334                               break;
5335 #endif /* INET */
5336 #ifdef INET6
5337                     case AF_INET6:
5338                               ipoff2 = off + sizeof(struct icmp6_hdr);
5339 
5340                               if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
5341                                   NULL, reason, pd2.af)) {
5342                                         DPFPRINTF(PF_DEBUG_MISC,
5343                                             ("pf: ICMP error message too short "
5344                                             "(ip6)\n"));
5345                                         FAIL (PF_DROP);
5346                               }
5347                               pd2.proto = h2_6.ip6_nxt;
5348                               pd2.src = (struct pf_addr *)&h2_6.ip6_src;
5349                               pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
5350                               pd2.ip_sum = NULL;
5351                               off2 = ipoff2 + sizeof(h2_6);
5352                               do {
5353                                         switch (pd2.proto) {
5354                                         case IPPROTO_FRAGMENT:
5355                                                   /*
5356                                                    * ICMPv6 error messages for
5357                                                    * non-first fragments
5358                                                    */
5359                                                   REASON_SET(reason, PFRES_FRAG);
5360                                                   FAIL (PF_DROP);
5361                                         case IPPROTO_AH:
5362                                         case IPPROTO_HOPOPTS:
5363                                         case IPPROTO_ROUTING:
5364                                         case IPPROTO_DSTOPTS: {
5365                                                   /* get next header and header length */
5366                                                   struct ip6_ext opt6;
5367 
5368                                                   if (!pf_pull_hdr(m, off2, &opt6,
5369                                                       sizeof(opt6), NULL, reason,
5370                                                       pd2.af)) {
5371                                                             DPFPRINTF(PF_DEBUG_MISC,
5372                                                                 ("pf: ICMPv6 short opt\n"));
5373                                                             FAIL (PF_DROP);
5374                                                   }
5375                                                   if (pd2.proto == IPPROTO_AH)
5376                                                             off2 += (opt6.ip6e_len + 2) * 4;
5377                                                   else
5378                                                             off2 += (opt6.ip6e_len + 1) * 8;
5379                                                   pd2.proto = opt6.ip6e_nxt;
5380                                                   /* goto the next header */
5381                                                   break;
5382                                         }
5383                                         default:
5384                                                   terminal++;
5385                                                   break;
5386                                         }
5387                               } while (!terminal);
5388                               break;
5389 #endif /* INET6 */
5390                     default:
5391                               DPFPRINTF(PF_DEBUG_MISC,
5392                                   ("pf: ICMP AF %d unknown (ip6)\n", pd->af));
5393                               FAIL (PF_DROP);
5394                               break;
5395                     }
5396 
5397                     switch (pd2.proto) {
5398                     case IPPROTO_TCP: {
5399                               struct tcphdr                  th;
5400                               u_int32_t            seq;
5401                               struct pf_state_peer          *src, *dst;
5402                               u_int8_t             dws;
5403                               int                            copyback = 0;
5404 
5405                               /*
5406                                * Only the first 8 bytes of the TCP header can be
5407                                * expected. Don't access any TCP header fields after
5408                                * th_seq, an ackskew test is not possible.
5409                                */
5410                               if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
5411                                   pd2.af)) {
5412                                         DPFPRINTF(PF_DEBUG_MISC,
5413                                             ("pf: ICMP error message too short "
5414                                             "(tcp)\n"));
5415                                         FAIL (PF_DROP);
5416                               }
5417 
5418                               key.af = pd2.af;
5419                               key.proto = IPPROTO_TCP;
5420                               PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5421                               PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5422                               key.port[pd2.sidx] = th.th_sport;
5423                               key.port[pd2.didx] = th.th_dport;
5424 
5425                               STATE_LOOKUP(kif, &key, direction, *state, m);
5426                               lockmgr(&(*state)->lk, LK_EXCLUSIVE);
5427 
5428                               if (direction == (*state)->direction) {
5429                                         src = &(*state)->dst;
5430                                         dst = &(*state)->src;
5431                               } else {
5432                                         src = &(*state)->src;
5433                                         dst = &(*state)->dst;
5434                               }
5435 
5436                               if (src->wscale && dst->wscale)
5437                                         dws = dst->wscale & PF_WSCALE_MASK;
5438                               else
5439                                         dws = 0;
5440 
5441                               /* Demodulate sequence number */
5442                               seq = ntohl(th.th_seq) - src->seqdiff;
5443                               if (src->seqdiff) {
5444                                         pf_change_a(&th.th_seq, icmpsum,
5445                                             htonl(seq), 0);
5446                                         copyback = 1;
5447                               }
5448 
5449                               if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
5450                                   (!SEQ_GEQ(src->seqhi, seq) ||
5451                                   !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
5452                                         if (pf_status.debug >= PF_DEBUG_MISC) {
5453                                                   kprintf("pf: BAD ICMP %d:%d ",
5454                                                       icmptype, pd->hdr.icmp->icmp_code);
5455                                                   pf_print_host(pd->src, 0, pd->af);
5456                                                   kprintf(" -> ");
5457                                                   pf_print_host(pd->dst, 0, pd->af);
5458                                                   kprintf(" state: ");
5459                                                   pf_print_state(*state);
5460                                                   kprintf(" seq=%u\n", seq);
5461                                         }
5462                                         REASON_SET(reason, PFRES_BADSTATE);
5463                                         FAIL (PF_DROP);
5464                               } else {
5465                                         if (pf_status.debug >= PF_DEBUG_MISC) {
5466                                                   kprintf("pf: OK ICMP %d:%d ",
5467                                                       icmptype, pd->hdr.icmp->icmp_code);
5468                                                   pf_print_host(pd->src, 0, pd->af);
5469                                                   kprintf(" -> ");
5470                                                   pf_print_host(pd->dst, 0, pd->af);
5471                                                   kprintf(" state: ");
5472                                                   pf_print_state(*state);
5473                                                   kprintf(" seq=%u\n", seq);
5474                                         }
5475                               }
5476 
5477                               /* translate source/destination address, if necessary */
5478                               if ((*state)->key[PF_SK_WIRE] !=
5479                                   (*state)->key[PF_SK_STACK]) {
5480                                         struct pf_state_key *nk =
5481                                             (*state)->key[pd->didx];
5482 
5483                                         if (PF_ANEQ(pd2.src,
5484                                             &nk->addr[pd2.sidx], pd2.af) ||
5485                                             nk->port[pd2.sidx] != th.th_sport)
5486                                                   pf_change_icmp(pd2.src, &th.th_sport,
5487                                                       daddr, &nk->addr[pd2.sidx],
5488                                                       nk->port[pd2.sidx], NULL,
5489                                                       pd2.ip_sum, icmpsum,
5490                                                       pd->ip_sum, 0, pd2.af);
5491 
5492                                         if (PF_ANEQ(pd2.dst,
5493                                             &nk->addr[pd2.didx], pd2.af) ||
5494                                             nk->port[pd2.didx] != th.th_dport)
5495                                                   pf_change_icmp(pd2.dst, &th.th_dport,
5496                                                       NULL, /* XXX Inbound NAT? */
5497                                                       &nk->addr[pd2.didx],
5498                                                       nk->port[pd2.didx], NULL,
5499                                                       pd2.ip_sum, icmpsum,
5500                                                       pd->ip_sum, 0, pd2.af);
5501                                         copyback = 1;
5502                               }
5503 
5504                               if (copyback) {
5505                                         switch (pd2.af) {
5506 #ifdef INET
5507                                         case AF_INET:
5508                                                   m_copyback(m, off, ICMP_MINLEN,
5509                                                       pd->hdr.icmp);
5510                                                   m_copyback(m, ipoff2, sizeof(h2),
5511                                                       &h2);
5512                                                   break;
5513 #endif /* INET */
5514 #ifdef INET6
5515                                         case AF_INET6:
5516                                                   m_copyback(m, off,
5517                                                       sizeof(struct icmp6_hdr),
5518                                                       pd->hdr.icmp6);
5519                                                   m_copyback(m, ipoff2, sizeof(h2_6),
5520                                                       &h2_6);
5521                                                   break;
5522 #endif /* INET6 */
5523                                         }
5524                                         m->m_flags &= ~M_HASH;
5525                                         m_copyback(m, off2, 8, &th);
5526                               }
5527                               break;
5528                     }
5529                     case IPPROTO_UDP: {
5530                               struct udphdr                 uh;
5531 
5532                               if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
5533                                   NULL, reason, pd2.af)) {
5534                                         DPFPRINTF(PF_DEBUG_MISC,
5535                                             ("pf: ICMP error message too short "
5536                                             "(udp)\n"));
5537                                         return (PF_DROP);
5538                               }
5539 
5540                               key.af = pd2.af;
5541                               key.proto = IPPROTO_UDP;
5542                               PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5543                               PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5544                               key.port[pd2.sidx] = uh.uh_sport;
5545                               key.port[pd2.didx] = uh.uh_dport;
5546 
5547                               STATE_LOOKUP(kif, &key, direction, *state, m);
5548                               lockmgr(&(*state)->lk, LK_EXCLUSIVE);
5549 
5550                               /* translate source/destination address, if necessary */
5551                               if ((*state)->key[PF_SK_WIRE] !=
5552                                   (*state)->key[PF_SK_STACK]) {
5553                                         struct pf_state_key *nk =
5554                                             (*state)->key[pd->didx];
5555 
5556                                         if (PF_ANEQ(pd2.src,
5557                                             &nk->addr[pd2.sidx], pd2.af) ||
5558                                             nk->port[pd2.sidx] != uh.uh_sport)
5559                                                   pf_change_icmp(pd2.src, &uh.uh_sport,
5560                                                       daddr, &nk->addr[pd2.sidx],
5561                                                       nk->port[pd2.sidx], &uh.uh_sum,
5562                                                       pd2.ip_sum, icmpsum,
5563                                                       pd->ip_sum, 1, pd2.af);
5564 
5565                                         if (PF_ANEQ(pd2.dst,
5566                                             &nk->addr[pd2.didx], pd2.af) ||
5567                                             nk->port[pd2.didx] != uh.uh_dport)
5568                                                   pf_change_icmp(pd2.dst, &uh.uh_dport,
5569                                                       NULL, /* XXX Inbound NAT? */
5570                                                       &nk->addr[pd2.didx],
5571                                                       nk->port[pd2.didx], &uh.uh_sum,
5572                                                       pd2.ip_sum, icmpsum,
5573                                                       pd->ip_sum, 1, pd2.af);
5574 
5575                                         switch (pd2.af) {
5576 #ifdef INET
5577                                         case AF_INET:
5578                                                   m_copyback(m, off, ICMP_MINLEN,
5579                                                       pd->hdr.icmp);
5580                                                   m_copyback(m, ipoff2, sizeof(h2),
5581                                                       &h2);
5582                                                   break;
5583 #endif /* INET */
5584 #ifdef INET6
5585                                         case AF_INET6:
5586                                                   m_copyback(m, off,
5587                                                       sizeof(struct icmp6_hdr),
5588                                                       pd->hdr.icmp6);
5589                                                   m_copyback(m, ipoff2, sizeof(h2_6),
5590                                                       &h2_6);
5591                                                   break;
5592 #endif /* INET6 */
5593                                         }
5594                                         m->m_flags &= ~M_HASH;
5595                                         m_copyback(m, off2, sizeof(uh), &uh);
5596                               }
5597                               break;
5598                     }
5599 #ifdef INET
5600                     case IPPROTO_ICMP: {
5601                               struct icmp                   iih;
5602 
5603                               if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
5604                                   NULL, reason, pd2.af)) {
5605                                         DPFPRINTF(PF_DEBUG_MISC,
5606                                             ("pf: ICMP error message too short i"
5607                                             "(icmp)\n"));
5608                                         return (PF_DROP);
5609                               }
5610 
5611                               key.af = pd2.af;
5612                               key.proto = IPPROTO_ICMP;
5613                               PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5614                               PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5615                               key.port[0] = key.port[1] = iih.icmp_id;
5616 
5617                               STATE_LOOKUP(kif, &key, direction, *state, m);
5618                               lockmgr(&(*state)->lk, LK_EXCLUSIVE);
5619 
5620                               /* translate source/destination address, if necessary */
5621                               if ((*state)->key[PF_SK_WIRE] !=
5622                                   (*state)->key[PF_SK_STACK]) {
5623                                         struct pf_state_key *nk =
5624                                             (*state)->key[pd->didx];
5625 
5626                                         if (PF_ANEQ(pd2.src,
5627                                             &nk->addr[pd2.sidx], pd2.af) ||
5628                                             nk->port[pd2.sidx] != iih.icmp_id)
5629                                                   pf_change_icmp(pd2.src, &iih.icmp_id,
5630                                                       daddr, &nk->addr[pd2.sidx],
5631                                                       nk->port[pd2.sidx], NULL,
5632                                                       pd2.ip_sum, icmpsum,
5633                                                       pd->ip_sum, 0, AF_INET);
5634 
5635                                         if (PF_ANEQ(pd2.dst,
5636                                             &nk->addr[pd2.didx], pd2.af) ||
5637                                             nk->port[pd2.didx] != iih.icmp_id)
5638                                                   pf_change_icmp(pd2.dst, &iih.icmp_id,
5639                                                       NULL, /* XXX Inbound NAT? */
5640                                                       &nk->addr[pd2.didx],
5641                                                       nk->port[pd2.didx], NULL,
5642                                                       pd2.ip_sum, icmpsum,
5643                                                       pd->ip_sum, 0, AF_INET);
5644 
5645                                         m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp);
5646                                         m_copyback(m, ipoff2, sizeof(h2), &h2);
5647                                         m_copyback(m, off2, ICMP_MINLEN, &iih);
5648                                         m->m_flags &= ~M_HASH;
5649                               }
5650                               break;
5651                     }
5652 #endif /* INET */
5653 #ifdef INET6
5654                     case IPPROTO_ICMPV6: {
5655                               struct icmp6_hdr    iih;
5656 
5657                               if (!pf_pull_hdr(m, off2, &iih,
5658                                   sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
5659                                         DPFPRINTF(PF_DEBUG_MISC,
5660                                             ("pf: ICMP error message too short "
5661                                             "(icmp6)\n"));
5662                                         FAIL (PF_DROP);
5663                               }
5664 
5665                               key.af = pd2.af;
5666                               key.proto = IPPROTO_ICMPV6;
5667                               PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5668                               PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5669                               key.port[0] = key.port[1] = iih.icmp6_id;
5670 
5671                               STATE_LOOKUP(kif, &key, direction, *state, m);
5672                               lockmgr(&(*state)->lk, LK_EXCLUSIVE);
5673 
5674                               /* translate source/destination address, if necessary */
5675                               if ((*state)->key[PF_SK_WIRE] !=
5676                                   (*state)->key[PF_SK_STACK]) {
5677                                         struct pf_state_key *nk =
5678                                             (*state)->key[pd->didx];
5679 
5680                                         if (PF_ANEQ(pd2.src,
5681                                             &nk->addr[pd2.sidx], pd2.af) ||
5682                                             nk->port[pd2.sidx] != iih.icmp6_id)
5683                                                   pf_change_icmp(pd2.src, &iih.icmp6_id,
5684                                                       daddr, &nk->addr[pd2.sidx],
5685                                                       nk->port[pd2.sidx], NULL,
5686                                                       pd2.ip_sum, icmpsum,
5687                                                       pd->ip_sum, 0, AF_INET6);
5688 
5689                                         if (PF_ANEQ(pd2.dst,
5690                                             &nk->addr[pd2.didx], pd2.af) ||
5691                                             nk->port[pd2.didx] != iih.icmp6_id)
5692                                                   pf_change_icmp(pd2.dst, &iih.icmp6_id,
5693                                                       NULL, /* XXX Inbound NAT? */
5694                                                       &nk->addr[pd2.didx],
5695                                                       nk->port[pd2.didx], NULL,
5696                                                       pd2.ip_sum, icmpsum,
5697                                                       pd->ip_sum, 0, AF_INET6);
5698 
5699                                         m_copyback(m, off, sizeof(struct icmp6_hdr),
5700                                             pd->hdr.icmp6);
5701                                         m_copyback(m, ipoff2, sizeof(h2_6), &h2_6);
5702                                         m_copyback(m, off2, sizeof(struct icmp6_hdr),
5703                                             &iih);
5704                                         m->m_flags &= ~M_HASH;
5705                               }
5706                               break;
5707                     }
5708 #endif /* INET6 */
5709                     default: {
5710                               key.af = pd2.af;
5711                               key.proto = pd2.proto;
5712                               PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
5713                               PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
5714                               key.port[0] = key.port[1] = 0;
5715 
5716                               STATE_LOOKUP(kif, &key, direction, *state, m);
5717                               lockmgr(&(*state)->lk, LK_EXCLUSIVE);
5718 
5719                               /* translate source/destination address, if necessary */
5720                               if ((*state)->key[PF_SK_WIRE] !=
5721                                   (*state)->key[PF_SK_STACK]) {
5722                                         struct pf_state_key *nk =
5723                                             (*state)->key[pd->didx];
5724 
5725                                         if (PF_ANEQ(pd2.src,
5726                                             &nk->addr[pd2.sidx], pd2.af))
5727                                                   pf_change_icmp(pd2.src, NULL, daddr,
5728                                                       &nk->addr[pd2.sidx], 0, NULL,
5729                                                       pd2.ip_sum, icmpsum,
5730                                                       pd->ip_sum, 0, pd2.af);
5731 
5732                                         if (PF_ANEQ(pd2.dst,
5733                                             &nk->addr[pd2.didx], pd2.af))
5734                                                   pf_change_icmp(pd2.src, NULL,
5735                                                       NULL, /* XXX Inbound NAT? */
5736                                                       &nk->addr[pd2.didx], 0, NULL,
5737                                                       pd2.ip_sum, icmpsum,
5738                                                       pd->ip_sum, 0, pd2.af);
5739 
5740                                         switch (pd2.af) {
5741 #ifdef INET
5742                                         case AF_INET:
5743                                                   m_copyback(m, off, ICMP_MINLEN,
5744                                                       pd->hdr.icmp);
5745                                                   m_copyback(m, ipoff2, sizeof(h2),
5746                                                       &h2);
5747                                                   m->m_flags &= ~M_HASH;
5748                                                   break;
5749 #endif /* INET */
5750 #ifdef INET6
5751                                         case AF_INET6:
5752                                                   m_copyback(m, off,
5753                                                       sizeof(struct icmp6_hdr),
5754                                                       pd->hdr.icmp6);
5755                                                   m_copyback(m, ipoff2, sizeof(h2_6),
5756                                                       &h2_6);
5757                                                   m->m_flags &= ~M_HASH;
5758                                                   break;
5759 #endif /* INET6 */
5760                                         }
5761                               }
5762                               break;
5763                     }
5764                     }
5765           }
5766 
5767           pfsync_update_state(*state);
5768           error = PF_PASS;
5769 done:
5770           if (*state)
5771                     lockmgr(&(*state)->lk, LK_RELEASE);
5772           return (error);
5773 }
5774 
5775 /*
5776  * Test other connection state.  Caller must hold the state locked.
5777  */
5778 int
pf_test_state_other(struct pf_state ** state,int direction,struct pfi_kif * kif,struct mbuf * m,struct pf_pdesc * pd)5779 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
5780                         struct mbuf *m, struct pf_pdesc *pd)
5781 {
5782           struct pf_state_peer          *src, *dst;
5783           struct pf_state_key_cmp        key;
5784 
5785           bzero(&key, sizeof(key));
5786           key.af = pd->af;
5787           key.proto = pd->proto;
5788           if (direction == PF_IN)       {
5789                     PF_ACPY(&key.addr[0], pd->src, key.af);
5790                     PF_ACPY(&key.addr[1], pd->dst, key.af);
5791                     key.port[0] = key.port[1] = 0;
5792           } else {
5793                     PF_ACPY(&key.addr[1], pd->src, key.af);
5794                     PF_ACPY(&key.addr[0], pd->dst, key.af);
5795                     key.port[1] = key.port[0] = 0;
5796           }
5797 
5798           STATE_LOOKUP(kif, &key, direction, *state, m);
5799           lockmgr(&(*state)->lk, LK_EXCLUSIVE);
5800 
5801           if (direction == (*state)->direction) {
5802                     src = &(*state)->src;
5803                     dst = &(*state)->dst;
5804           } else {
5805                     src = &(*state)->dst;
5806                     dst = &(*state)->src;
5807           }
5808 
5809           /* update states */
5810           if (src->state < PFOTHERS_SINGLE)
5811                     src->state = PFOTHERS_SINGLE;
5812           if (dst->state == PFOTHERS_SINGLE)
5813                     dst->state = PFOTHERS_MULTIPLE;
5814 
5815           /* update expire time */
5816           (*state)->expire = time_second;
5817           if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
5818                     (*state)->timeout = PFTM_OTHER_MULTIPLE;
5819           else
5820                     (*state)->timeout = PFTM_OTHER_SINGLE;
5821 
5822           /* translate source/destination address, if necessary */
5823           if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
5824                     struct pf_state_key *nk = (*state)->key[pd->didx];
5825 
5826                     KKASSERT(nk);
5827                     KKASSERT(pd);
5828                     KKASSERT(pd->src);
5829                     KKASSERT(pd->dst);
5830                     switch (pd->af) {
5831 #ifdef INET
5832                     case AF_INET:
5833                               if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
5834                                         pf_change_a(&pd->src->v4.s_addr,
5835                                             pd->ip_sum,
5836                                             nk->addr[pd->sidx].v4.s_addr,
5837                                             0);
5838 
5839 
5840                               if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
5841                                         pf_change_a(&pd->dst->v4.s_addr,
5842                                             pd->ip_sum,
5843                                             nk->addr[pd->didx].v4.s_addr,
5844                                             0);
5845 
5846                               break;
5847 #endif /* INET */
5848 #ifdef INET6
5849                     case AF_INET6:
5850                               if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET6))
5851                                         PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
5852 
5853                               if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET6))
5854                                         PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
5855 #endif /* INET6 */
5856                     }
5857           }
5858 
5859           pfsync_update_state(*state);
5860           lockmgr(&(*state)->lk, LK_RELEASE);
5861           return (PF_PASS);
5862 }
5863 
5864 /*
5865  * ipoff and off are measured from the start of the mbuf chain.
5866  * h must be at "ipoff" on the mbuf chain.
5867  */
5868 void *
pf_pull_hdr(struct mbuf * m,int off,void * p,int len,u_short * actionp,u_short * reasonp,sa_family_t af)5869 pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
5870     u_short *actionp, u_short *reasonp, sa_family_t af)
5871 {
5872           switch (af) {
5873 #ifdef INET
5874           case AF_INET: {
5875                     struct ip *h = mtod(m, struct ip *);
5876                     u_int16_t  fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
5877 
5878                     if (fragoff) {
5879                               if (fragoff >= len)
5880                                         ACTION_SET(actionp, PF_PASS);
5881                               else {
5882                                         ACTION_SET(actionp, PF_DROP);
5883                                         REASON_SET(reasonp, PFRES_FRAG);
5884                               }
5885                               return (NULL);
5886                     }
5887                     if (m->m_pkthdr.len < off + len ||
5888                         ntohs(h->ip_len) < off + len) {
5889                               ACTION_SET(actionp, PF_DROP);
5890                               REASON_SET(reasonp, PFRES_SHORT);
5891                               return (NULL);
5892                     }
5893                     break;
5894           }
5895 #endif /* INET */
5896 #ifdef INET6
5897           case AF_INET6: {
5898                     struct ip6_hdr      *h = mtod(m, struct ip6_hdr *);
5899 
5900                     if (m->m_pkthdr.len < off + len ||
5901                         (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
5902                         (unsigned)(off + len)) {
5903                               ACTION_SET(actionp, PF_DROP);
5904                               REASON_SET(reasonp, PFRES_SHORT);
5905                               return (NULL);
5906                     }
5907                     break;
5908           }
5909 #endif /* INET6 */
5910           }
5911           m_copydata(m, off, len, p);
5912           return (p);
5913 }
5914 
5915 int
pf_routable(struct pf_addr * addr,sa_family_t af,struct pfi_kif * kif)5916 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
5917 {
5918           struct sockaddr_in  *dst;
5919           int                            ret = 1;
5920           int                            check_mpath;
5921 #ifdef INET6
5922           struct sockaddr_in6 *dst6;
5923           struct route_in6     ro;
5924 #else
5925           struct route                   ro;
5926 #endif
5927           struct radix_node   *rn;
5928           struct rtentry                *rt;
5929           struct ifnet                  *ifp;
5930 
5931           check_mpath = 0;
5932           bzero(&ro, sizeof(ro));
5933           switch (af) {
5934           case AF_INET:
5935                     dst = satosin(&ro.ro_dst);
5936                     dst->sin_family = AF_INET;
5937                     dst->sin_len = sizeof(*dst);
5938                     dst->sin_addr = addr->v4;
5939                     break;
5940 #ifdef INET6
5941           case AF_INET6:
5942                     /*
5943                      * Skip check for addresses with embedded interface scope,
5944                      * as they would always match anyway.
5945                      */
5946                     if (IN6_IS_SCOPE_EMBED(&addr->v6))
5947                               goto out;
5948                     dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5949                     dst6->sin6_family = AF_INET6;
5950                     dst6->sin6_len = sizeof(*dst6);
5951                     dst6->sin6_addr = addr->v6;
5952                     break;
5953 #endif /* INET6 */
5954           default:
5955                     return (0);
5956           }
5957 
5958           /* Skip checks for ipsec interfaces */
5959           if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
5960                     goto out;
5961 
5962           rtalloc_ign((struct route *)&ro, 0);
5963 
5964           if (ro.ro_rt != NULL) {
5965                     /* No interface given, this is a no-route check */
5966                     if (kif == NULL)
5967                               goto out;
5968 
5969                     if (kif->pfik_ifp == NULL) {
5970                               ret = 0;
5971                               goto out;
5972                     }
5973 
5974                     /* Perform uRPF check if passed input interface */
5975                     ret = 0;
5976                     rn = (struct radix_node *)ro.ro_rt;
5977                     do {
5978                               rt = (struct rtentry *)rn;
5979                               ifp = rt->rt_ifp;
5980 
5981                               if (kif->pfik_ifp == ifp)
5982                                         ret = 1;
5983                               rn = NULL;
5984                     } while (check_mpath == 1 && rn != NULL && ret == 0);
5985           } else
5986                     ret = 0;
5987 out:
5988           if (ro.ro_rt != NULL)
5989                     RTFREE(ro.ro_rt);
5990           return (ret);
5991 }
5992 
5993 int
pf_rtlabel_match(struct pf_addr * addr,sa_family_t af,struct pf_addr_wrap * aw)5994 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
5995 {
5996           struct sockaddr_in  *dst;
5997 #ifdef INET6
5998           struct sockaddr_in6 *dst6;
5999           struct route_in6     ro;
6000 #else
6001           struct route                   ro;
6002 #endif
6003           int                            ret = 0;
6004 
6005           ASSERT_LWKT_TOKEN_HELD(&pf_token);
6006 
6007           bzero(&ro, sizeof(ro));
6008           switch (af) {
6009           case AF_INET:
6010                     dst = satosin(&ro.ro_dst);
6011                     dst->sin_family = AF_INET;
6012                     dst->sin_len = sizeof(*dst);
6013                     dst->sin_addr = addr->v4;
6014                     break;
6015 #ifdef INET6
6016           case AF_INET6:
6017                     dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
6018                     dst6->sin6_family = AF_INET6;
6019                     dst6->sin6_len = sizeof(*dst6);
6020                     dst6->sin6_addr = addr->v6;
6021                     break;
6022 #endif /* INET6 */
6023           default:
6024                     return (0);
6025           }
6026 
6027 rtalloc_ign((struct route *)&ro, (RTF_CLONING | RTF_PRCLONING));
6028 
6029           if (ro.ro_rt != NULL) {
6030                     RTFREE(ro.ro_rt);
6031           }
6032 
6033           return (ret);
6034 }
6035 
6036 #ifdef INET
6037 void
pf_route(struct mbuf ** m,struct pf_rule * r,int dir,struct ifnet * oifp,struct pf_state * s,struct pf_pdesc * pd)6038 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
6039     struct pf_state *s, struct pf_pdesc *pd)
6040 {
6041           struct mbuf                   *m0, *m1;
6042           struct route                   iproute;
6043           struct route                  *ro = NULL;
6044           struct sockaddr_in  *dst;
6045           struct ip           *ip;
6046           struct ifnet                  *ifp = NULL;
6047           struct pf_addr                 naddr;
6048           struct pf_src_node  *sn = NULL;
6049           int                            error = 0;
6050           int sw_csum;
6051 
6052           ASSERT_LWKT_TOKEN_HELD(&pf_token);
6053 
6054           if (m == NULL || *m == NULL || r == NULL ||
6055               (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
6056                     panic("pf_route: invalid parameters");
6057 
6058           if (((*m)->m_pkthdr.fw_flags & PF_MBUF_ROUTED) == 0) {
6059                     (*m)->m_pkthdr.fw_flags |= PF_MBUF_ROUTED;
6060                     (*m)->m_pkthdr.pf.routed = 1;
6061           } else {
6062                     if ((*m)->m_pkthdr.pf.routed++ > 3) {
6063                               m0 = *m;
6064                               *m = NULL;
6065                               goto bad;
6066                     }
6067           }
6068 
6069           if (r->rt == PF_DUPTO) {
6070                     if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
6071                               return;
6072                     }
6073           } else {
6074                     if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
6075                               return;
6076                     }
6077                     m0 = *m;
6078           }
6079 
6080           if (m0->m_len < sizeof(struct ip)) {
6081                     DPFPRINTF(PF_DEBUG_URGENT,
6082                         ("pf_route: m0->m_len < sizeof(struct ip)\n"));
6083                     goto bad;
6084           }
6085 
6086           ip = mtod(m0, struct ip *);
6087 
6088           ro = &iproute;
6089           bzero((caddr_t)ro, sizeof(*ro));
6090           dst = satosin(&ro->ro_dst);
6091           dst->sin_family = AF_INET;
6092           dst->sin_len = sizeof(*dst);
6093           dst->sin_addr = ip->ip_dst;
6094 
6095           if (r->rt == PF_FASTROUTE) {
6096                     rtalloc(ro);
6097                     if (ro->ro_rt == 0) {
6098                               ipstat.ips_noroute++;
6099                               goto bad;
6100                     }
6101 
6102                     ifp = ro->ro_rt->rt_ifp;
6103                     ro->ro_rt->rt_use++;
6104 
6105                     if (ro->ro_rt->rt_flags & RTF_GATEWAY)
6106                               dst = satosin(ro->ro_rt->rt_gateway);
6107           } else {
6108                     if (TAILQ_EMPTY(&r->rpool.list)) {
6109                               DPFPRINTF(PF_DEBUG_URGENT,
6110                                   ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
6111                               goto bad;
6112                     }
6113                     if (s == NULL) {
6114                               pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
6115                                   &naddr, NULL, &sn);
6116                               if (!PF_AZERO(&naddr, AF_INET))
6117                                         dst->sin_addr.s_addr = naddr.v4.s_addr;
6118                               ifp = r->rpool.cur->kif ?
6119                                   r->rpool.cur->kif->pfik_ifp : NULL;
6120                     } else {
6121                               if (!PF_AZERO(&s->rt_addr, AF_INET))
6122                                         dst->sin_addr.s_addr =
6123                                             s->rt_addr.v4.s_addr;
6124                               ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
6125                     }
6126           }
6127           if (ifp == NULL)
6128                     goto bad;
6129 
6130           if (oifp != ifp) {
6131                     if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
6132                               goto bad;
6133                     } else if (m0 == NULL) {
6134                               goto done;
6135                     }
6136                     if (m0->m_len < sizeof(struct ip)) {
6137                               DPFPRINTF(PF_DEBUG_URGENT,
6138                                   ("pf_route: m0->m_len < sizeof(struct ip)\n"));
6139                               goto bad;
6140                     }
6141                     ip = mtod(m0, struct ip *);
6142           }
6143 
6144           /* Copied from FreeBSD 5.1-CURRENT ip_output. */
6145           m0->m_pkthdr.csum_flags |= CSUM_IP;
6146           sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
6147           if (sw_csum & CSUM_DELAY_DATA) {
6148                     in_delayed_cksum(m0);
6149                     sw_csum &= ~CSUM_DELAY_DATA;
6150           }
6151           m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
6152           m0->m_pkthdr.csum_iphlen = (ip->ip_hl << 2);
6153 
6154           /*
6155            * WARNING!  We cannot fragment if the packet was modified from an
6156            *             original which expected to be using TSO.  In this
6157            *             situation we pray that the target interface is
6158            *             compatible with the originating interface.
6159            */
6160           if (ntohs(ip->ip_len) <= ifp->if_mtu ||
6161               (m0->m_pkthdr.csum_flags & CSUM_TSO) ||
6162               ((ifp->if_hwassist & CSUM_FRAGMENT) &&
6163                     (ip->ip_off & htons(IP_DF)) == 0)) {
6164                     ip->ip_sum = 0;
6165                     if (sw_csum & CSUM_DELAY_IP) {
6166                               /* From KAME */
6167                               if (ip->ip_v == IPVERSION &&
6168                                   (ip->ip_hl << 2) == sizeof(*ip)) {
6169                                         ip->ip_sum = in_cksum_hdr(ip);
6170                               } else {
6171                                         ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
6172                               }
6173                     }
6174                     lwkt_reltoken(&pf_token);
6175                     error = ifp->if_output(ifp, m0, sintosa(dst), ro->ro_rt);
6176                     lwkt_gettoken(&pf_token);
6177                     goto done;
6178           }
6179 
6180           /*
6181            * Too large for interface; fragment if possible.
6182            * Must be able to put at least 8 bytes per fragment.
6183            */
6184           if (ip->ip_off & htons(IP_DF)) {
6185                     ipstat.ips_cantfrag++;
6186                     if (r->rt != PF_DUPTO) {
6187                               icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
6188                                            ifp->if_mtu);
6189                               goto done;
6190                     } else
6191                               goto bad;
6192           }
6193 
6194           m1 = m0;
6195           error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
6196           if (error) {
6197                     goto bad;
6198           }
6199 
6200           for (m0 = m1; m0; m0 = m1) {
6201                     m1 = m0->m_nextpkt;
6202                     m0->m_nextpkt = 0;
6203                     if (error == 0) {
6204                               lwkt_reltoken(&pf_token);
6205                               error = (*ifp->if_output)(ifp, m0, sintosa(dst),
6206                                                               NULL);
6207                               lwkt_gettoken(&pf_token);
6208                     } else
6209                               m_freem(m0);
6210           }
6211 
6212           if (error == 0)
6213                     ipstat.ips_fragmented++;
6214 
6215 done:
6216           if (r->rt != PF_DUPTO)
6217                     *m = NULL;
6218           if (ro == &iproute && ro->ro_rt)
6219                     RTFREE(ro->ro_rt);
6220           return;
6221 
6222 bad:
6223           m_freem(m0);
6224           goto done;
6225 }
6226 #endif /* INET */
6227 
6228 #ifdef INET6
6229 void
pf_route6(struct mbuf ** m,struct pf_rule * r,int dir,struct ifnet * oifp,struct pf_state * s,struct pf_pdesc * pd)6230 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
6231     struct pf_state *s, struct pf_pdesc *pd)
6232 {
6233           struct mbuf                   *m0;
6234           struct route_in6     ip6route;
6235           struct route_in6    *ro;
6236           struct sockaddr_in6 *dst;
6237           struct ip6_hdr                *ip6;
6238           struct ifnet                  *ifp = NULL;
6239           struct pf_addr                 naddr;
6240           struct pf_src_node  *sn = NULL;
6241 
6242           if (m == NULL || *m == NULL || r == NULL ||
6243               (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
6244                     panic("pf_route6: invalid parameters");
6245 
6246           if (((*m)->m_pkthdr.fw_flags & PF_MBUF_ROUTED) == 0) {
6247                     (*m)->m_pkthdr.fw_flags |= PF_MBUF_ROUTED;
6248                     (*m)->m_pkthdr.pf.routed = 1;
6249           } else {
6250                     if ((*m)->m_pkthdr.pf.routed++ > 3) {
6251                               m0 = *m;
6252                               *m = NULL;
6253                               goto bad;
6254                     }
6255           }
6256 
6257           if (r->rt == PF_DUPTO) {
6258                     if ((m0 = m_dup(*m, M_NOWAIT)) == NULL)
6259                               return;
6260           } else {
6261                     if ((r->rt == PF_REPLYTO) == (r->direction == dir))
6262                               return;
6263                     m0 = *m;
6264           }
6265 
6266           if (m0->m_len < sizeof(struct ip6_hdr)) {
6267                     DPFPRINTF(PF_DEBUG_URGENT,
6268                         ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
6269                     goto bad;
6270           }
6271           ip6 = mtod(m0, struct ip6_hdr *);
6272 
6273           ro = &ip6route;
6274           bzero((caddr_t)ro, sizeof(*ro));
6275           dst = (struct sockaddr_in6 *)&ro->ro_dst;
6276           dst->sin6_family = AF_INET6;
6277           dst->sin6_len = sizeof(*dst);
6278           dst->sin6_addr = ip6->ip6_dst;
6279 
6280           /*
6281            * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
6282            * so make sure pf.flags is clear.
6283            *
6284            * Cheat. XXX why only in the v6 case???
6285            */
6286           if (r->rt == PF_FASTROUTE) {
6287                     m0->m_pkthdr.fw_flags |= PF_MBUF_TAGGED;
6288                     m0->m_pkthdr.pf.flags = 0;
6289                     /* XXX Re-Check when Upgrading to > 4.4 */
6290                     m0->m_pkthdr.pf.statekey = NULL;
6291                     ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
6292                     return;
6293           }
6294 
6295           if (TAILQ_EMPTY(&r->rpool.list)) {
6296                     DPFPRINTF(PF_DEBUG_URGENT,
6297                         ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
6298                     goto bad;
6299           }
6300           if (s == NULL) {
6301                     pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
6302                         &naddr, NULL, &sn);
6303                     if (!PF_AZERO(&naddr, AF_INET6))
6304                               PF_ACPY((struct pf_addr *)&dst->sin6_addr,
6305                                   &naddr, AF_INET6);
6306                     ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
6307           } else {
6308                     if (!PF_AZERO(&s->rt_addr, AF_INET6))
6309                               PF_ACPY((struct pf_addr *)&dst->sin6_addr,
6310                                   &s->rt_addr, AF_INET6);
6311                     ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
6312           }
6313           if (ifp == NULL)
6314                     goto bad;
6315 
6316           if (oifp != ifp) {
6317                     if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
6318                               goto bad;
6319                     } else if (m0 == NULL) {
6320                               goto done;
6321                     }
6322                     if (m0->m_len < sizeof(struct ip6_hdr)) {
6323                               DPFPRINTF(PF_DEBUG_URGENT,
6324                                   ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
6325                               goto bad;
6326                     }
6327                     ip6 = mtod(m0, struct ip6_hdr *);
6328           }
6329 
6330           /*
6331            * If the packet is too large for the outgoing interface,
6332            * send back an icmp6 error.
6333            */
6334           if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr))
6335                     dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
6336           if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
6337                     nd6_output(ifp, ifp, m0, dst, NULL);
6338           } else {
6339                     in6_ifstat_inc(ifp, ifs6_in_toobig);
6340                     if (r->rt != PF_DUPTO)
6341                               icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
6342                     else
6343                               goto bad;
6344           }
6345 
6346 done:
6347           if (r->rt != PF_DUPTO)
6348                     *m = NULL;
6349           return;
6350 
6351 bad:
6352           m_freem(m0);
6353           goto done;
6354 }
6355 #endif /* INET6 */
6356 
6357 
6358 /*
6359  * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
6360  *   off is the offset where the protocol header starts
6361  *   len is the total length of protocol header plus payload
6362  * returns 0 when the checksum is valid, otherwise returns 1.
6363  */
6364 /*
6365  * XXX
6366  * FreeBSD supports cksum offload for the following drivers.
6367  * em(4), gx(4), lge(4), nge(4), ti(4), xl(4)
6368  * If we can make full use of it we would outperform ipfw/ipfilter in
6369  * very heavy traffic.
6370  * I have not tested 'cause I don't have NICs that supports cksum offload.
6371  * (There might be problems. Typical phenomena would be
6372  *   1. No route message for UDP packet.
6373  *   2. No connection acceptance from external hosts regardless of rule set.)
6374  */
6375 int
pf_check_proto_cksum(struct mbuf * m,int off,int len,u_int8_t p,sa_family_t af)6376 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
6377     sa_family_t af)
6378 {
6379           u_int16_t sum = 0;
6380           int hw_assist = 0;
6381           struct ip *ip;
6382 
6383           if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
6384                     return (1);
6385           if (m->m_pkthdr.len < off + len)
6386                     return (1);
6387 
6388           switch (p) {
6389           case IPPROTO_TCP:
6390           case IPPROTO_UDP:
6391                     if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
6392                               if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
6393                                         sum = m->m_pkthdr.csum_data;
6394                               } else {
6395                                         ip = mtod(m, struct ip *);
6396                                         sum = in_pseudo(ip->ip_src.s_addr,
6397                                                   ip->ip_dst.s_addr, htonl((u_short)len +
6398                                                   m->m_pkthdr.csum_data + p));
6399                               }
6400                               sum ^= 0xffff;
6401                               ++hw_assist;
6402                     }
6403                     break;
6404           case IPPROTO_ICMP:
6405 #ifdef INET6
6406           case IPPROTO_ICMPV6:
6407 #endif /* INET6 */
6408                     break;
6409           default:
6410                     return (1);
6411           }
6412 
6413           if (!hw_assist) {
6414                     switch (af) {
6415                     case AF_INET:
6416                               if (p == IPPROTO_ICMP) {
6417                                         if (m->m_len < off)
6418                                                   return (1);
6419                                         m->m_data += off;
6420                                         m->m_len -= off;
6421                                         sum = in_cksum(m, len);
6422                                         m->m_data -= off;
6423                                         m->m_len += off;
6424                               } else {
6425                                         if (m->m_len < sizeof(struct ip))
6426                                                   return (1);
6427                                         sum = in_cksum_range(m, p, off, len);
6428                                         if (sum == 0) {
6429                                                   m->m_pkthdr.csum_flags |=
6430                                                       (CSUM_DATA_VALID |
6431                                                        CSUM_PSEUDO_HDR);
6432                                                   m->m_pkthdr.csum_data = 0xffff;
6433                                         }
6434                               }
6435                               break;
6436 #ifdef INET6
6437                     case AF_INET6:
6438                               if (m->m_len < sizeof(struct ip6_hdr))
6439                                         return (1);
6440                               sum = in6_cksum(m, p, off, len);
6441                               /*
6442                                * XXX
6443                                * IPv6 H/W cksum off-load not supported yet!
6444                                *
6445                                * if (sum == 0) {
6446                                *        m->m_pkthdr.csum_flags |=
6447                                *            (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
6448                                *        m->m_pkthdr.csum_data = 0xffff;
6449                                *}
6450                                */
6451                               break;
6452 #endif /* INET6 */
6453                     default:
6454                               return (1);
6455                     }
6456           }
6457           if (sum) {
6458                     switch (p) {
6459                     case IPPROTO_TCP:
6460                               tcpstat.tcps_rcvbadsum++;
6461                               break;
6462                     case IPPROTO_UDP:
6463                               udp_stat.udps_badsum++;
6464                               break;
6465                     case IPPROTO_ICMP:
6466                               icmpstat.icps_checksum++;
6467                               break;
6468 #ifdef INET6
6469                     case IPPROTO_ICMPV6:
6470                               icmp6stat.icp6s_checksum++;
6471                               break;
6472 #endif /* INET6 */
6473                     }
6474                     return (1);
6475           }
6476           return (0);
6477 }
6478 
6479 struct pf_divert *
pf_find_divert(struct mbuf * m)6480 pf_find_divert(struct mbuf *m)
6481 {
6482           struct m_tag    *mtag;
6483 
6484           if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL)
6485                     return (NULL);
6486 
6487           return ((struct pf_divert *)(mtag + 1));
6488 }
6489 
6490 struct pf_divert *
pf_get_divert(struct mbuf * m)6491 pf_get_divert(struct mbuf *m)
6492 {
6493           struct m_tag    *mtag;
6494 
6495           if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) {
6496                     mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert),
6497                         M_NOWAIT);
6498                     if (mtag == NULL)
6499                               return (NULL);
6500                     bzero(mtag + 1, sizeof(struct pf_divert));
6501                     m_tag_prepend(m, mtag);
6502           }
6503 
6504           return ((struct pf_divert *)(mtag + 1));
6505 }
6506 
6507 #ifdef INET
6508 
6509 /*
6510  * WARNING: pf_token held shared on entry, THIS IS CPU LOCALIZED CODE
6511  */
6512 int
pf_test(int dir,struct ifnet * ifp,struct mbuf ** m0,struct ether_header * eh,struct inpcb * inp)6513 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
6514     struct ether_header *eh, struct inpcb *inp)
6515 {
6516           struct pfi_kif                *kif;
6517           u_short                        action, reason = 0, log = 0;
6518           struct mbuf                   *m = *m0;
6519           struct ip           *h = NULL;
6520           struct pf_rule                *a = NULL, *r = &pf_default_rule, *tr, *nr;
6521           struct pf_state               *s = NULL;
6522           struct pf_ruleset   *ruleset = NULL;
6523           struct pf_pdesc                pd;
6524           int                            off, dirndx;
6525 #ifdef ALTQ
6526           int                            pqid = 0;
6527 #endif
6528 
6529           if (m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE) {
6530                     /* Skip us; continue in ipfw. */
6531                     return (PF_PASS);
6532           }
6533 
6534           if (!pf_status.running)
6535                     return (PF_PASS);
6536 
6537           memset(&pd, 0, sizeof(pd));
6538 #ifdef foo
6539           if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
6540                     kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
6541           else
6542 #endif
6543                     kif = (struct pfi_kif *)ifp->if_pf_kif;
6544 
6545           if (kif == NULL) {
6546                     DPFPRINTF(PF_DEBUG_URGENT,
6547                         ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
6548                     return (PF_DROP);
6549           }
6550           if (kif->pfik_flags & PFI_IFLAG_SKIP)
6551                     return (PF_PASS);
6552 
6553 #ifdef DIAGNOSTIC
6554           if ((m->m_flags & M_PKTHDR) == 0)
6555                     panic("non-M_PKTHDR is passed to pf_test");
6556 #endif /* DIAGNOSTIC */
6557 
6558           if (m->m_pkthdr.len < (int)sizeof(*h)) {
6559                     action = PF_DROP;
6560                     REASON_SET(&reason, PFRES_SHORT);
6561                     log = 1;
6562                     goto done;
6563           }
6564 
6565           /*
6566            * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
6567            * so make sure pf.flags is clear.
6568            */
6569           if (m->m_pkthdr.fw_flags & PF_MBUF_TAGGED)
6570                     return (PF_PASS);
6571           m->m_pkthdr.pf.flags = 0;
6572           /* Re-Check when updating to > 4.4 */
6573           m->m_pkthdr.pf.statekey = NULL;
6574 
6575           /* We do IP header normalization and packet reassembly here */
6576           if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
6577                     action = PF_DROP;
6578                     goto done;
6579           }
6580           m = *m0;  /* pf_normalize messes with m0 */
6581           h = mtod(m, struct ip *);
6582 
6583           off = h->ip_hl << 2;
6584           if (off < (int)sizeof(*h)) {
6585                     action = PF_DROP;
6586                     REASON_SET(&reason, PFRES_SHORT);
6587                     log = 1;
6588                     goto done;
6589           }
6590 
6591           pd.src = (struct pf_addr *)&h->ip_src;
6592           pd.dst = (struct pf_addr *)&h->ip_dst;
6593           pd.sport = pd.dport = NULL;
6594           pd.ip_sum = &h->ip_sum;
6595           pd.proto_sum = NULL;
6596           pd.proto = h->ip_p;
6597           pd.dir = dir;
6598           pd.sidx = (dir == PF_IN) ? 0 : 1;
6599           pd.didx = (dir == PF_IN) ? 1 : 0;
6600           pd.af = AF_INET;
6601           pd.tos = h->ip_tos;
6602           pd.tot_len = ntohs(h->ip_len);
6603           pd.eh = eh;
6604 
6605           /* handle fragments that didn't get reassembled by normalization */
6606           if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
6607                     action = pf_test_fragment(&r, dir, kif, m, h,
6608                         &pd, &a, &ruleset);
6609                     goto done;
6610           }
6611 
6612           switch (h->ip_p) {
6613 
6614           case IPPROTO_TCP: {
6615                     struct tcphdr       th;
6616 
6617                     pd.hdr.tcp = &th;
6618                     if (!pf_pull_hdr(m, off, &th, sizeof(th),
6619                         &action, &reason, AF_INET)) {
6620                               log = action != PF_PASS;
6621                               goto done;
6622                     }
6623                     pd.p_len = pd.tot_len - off - (th.th_off << 2);
6624 #ifdef ALTQ
6625                     if ((th.th_flags & TH_ACK) && pd.p_len == 0)
6626                               pqid = 1;
6627 #endif
6628                     action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6629                     if (action == PF_DROP)
6630                               goto done;
6631                     action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6632                                                      &reason);
6633                     if (action == PF_PASS) {
6634                               r = s->rule.ptr;
6635                               a = s->anchor.ptr;
6636                               log = s->log;
6637                     } else if (s == NULL) {
6638                               action = pf_test_rule(&r, &s, dir, kif,
6639                                                         m, off, h, &pd, &a,
6640                                                         &ruleset, NULL, inp);
6641                     }
6642                     break;
6643           }
6644 
6645           case IPPROTO_UDP: {
6646                     struct udphdr       uh;
6647 
6648                     pd.hdr.udp = &uh;
6649                     if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6650                         &action, &reason, AF_INET)) {
6651                               log = action != PF_PASS;
6652                               goto done;
6653                     }
6654                     if (uh.uh_dport == 0 ||
6655                         ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6656                         ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6657                               action = PF_DROP;
6658                               REASON_SET(&reason, PFRES_SHORT);
6659                               goto done;
6660                     }
6661                     action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6662                     if (action == PF_PASS) {
6663                               r = s->rule.ptr;
6664                               a = s->anchor.ptr;
6665                               log = s->log;
6666                     } else if (s == NULL) {
6667                               action = pf_test_rule(&r, &s, dir, kif,
6668                                                         m, off, h, &pd, &a,
6669                                                         &ruleset, NULL, inp);
6670                     }
6671                     break;
6672           }
6673 
6674           case IPPROTO_ICMP: {
6675                     struct icmp         ih;
6676 
6677                     pd.hdr.icmp = &ih;
6678                     if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
6679                         &action, &reason, AF_INET)) {
6680                               log = action != PF_PASS;
6681                               goto done;
6682                     }
6683                     action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
6684                                                       &reason);
6685                     if (action == PF_PASS) {
6686                               r = s->rule.ptr;
6687                               a = s->anchor.ptr;
6688                               log = s->log;
6689                     } else if (s == NULL) {
6690                               action = pf_test_rule(&r, &s, dir, kif,
6691                                                         m, off, h, &pd, &a,
6692                                                         &ruleset, NULL, inp);
6693                     }
6694                     break;
6695           }
6696 
6697           default:
6698                     action = pf_test_state_other(&s, dir, kif, m, &pd);
6699                     if (action == PF_PASS) {
6700                               r = s->rule.ptr;
6701                               a = s->anchor.ptr;
6702                               log = s->log;
6703                     } else if (s == NULL) {
6704                               action = pf_test_rule(&r, &s, dir, kif, m, off, h,
6705                                                         &pd, &a, &ruleset, NULL, inp);
6706                     }
6707                     break;
6708           }
6709 
6710 done:
6711           if (action == PF_PASS && h->ip_hl > 5 &&
6712               !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
6713                     action = PF_DROP;
6714                     REASON_SET(&reason, PFRES_IPOPTIONS);
6715                     log = 1;
6716                     DPFPRINTF(PF_DEBUG_MISC,
6717                         ("pf: dropping packet with ip options\n"));
6718           }
6719 
6720           if ((s && s->tag) || r->rtableid)
6721                     pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
6722 
6723 #if 0
6724           if (dir == PF_IN && s && s->key[PF_SK_STACK])
6725                     m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK];
6726 #endif
6727 
6728 #ifdef ALTQ
6729           /*
6730            * Generate a hash code and qid request for ALTQ.  A qid of 0
6731            * is allowed and will cause altq to select the default queue.
6732            */
6733           if (action == PF_PASS) {
6734                     m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE;
6735                     if (pqid || (pd.tos & IPTOS_LOWDELAY))
6736                               m->m_pkthdr.pf.qid = r->pqid;
6737                     else
6738                               m->m_pkthdr.pf.qid = r->qid;
6739                     m->m_pkthdr.pf.ecn_af = AF_INET;
6740                     m->m_pkthdr.pf.hdr = h;
6741                     /* add connection hash for fairq */
6742                     if (s) {
6743                               /* for fairq */
6744                               m->m_pkthdr.pf.state_hash = s->hash;
6745                               m->m_pkthdr.pf.flags |= PF_TAG_STATE_HASHED;
6746                     }
6747           }
6748 #endif /* ALTQ */
6749 
6750           /*
6751            * connections redirected to loopback should not match sockets
6752            * bound specifically to loopback due to security implications,
6753            * see tcp_input() and in_pcblookup_listen().
6754            */
6755           if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6756               pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6757               (s->nat_rule.ptr->action == PF_RDR ||
6758               s->nat_rule.ptr->action == PF_BINAT) &&
6759               (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
6760           {
6761                     m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
6762           }
6763 
6764           if (dir == PF_IN && action == PF_PASS && r->divert.port) {
6765                     struct pf_divert *divert;
6766 
6767                     if ((divert = pf_get_divert(m))) {
6768                               m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED;
6769                               divert->port = r->divert.port;
6770                               divert->addr.ipv4 = r->divert.addr.v4;
6771                     }
6772           }
6773 
6774           if (log) {
6775                     struct pf_rule *lr;
6776 
6777                     if (s != NULL && s->nat_rule.ptr != NULL &&
6778                         s->nat_rule.ptr->log & PF_LOG_ALL)
6779                               lr = s->nat_rule.ptr;
6780                     else
6781                               lr = r;
6782                     PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset,
6783                         &pd);
6784           }
6785 
6786           kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6787           kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
6788 
6789           if (action == PF_PASS || r->action == PF_DROP) {
6790                     dirndx = (dir == PF_OUT);
6791                     r->packets[dirndx]++;
6792                     r->bytes[dirndx] += pd.tot_len;
6793                     if (a != NULL) {
6794                               a->packets[dirndx]++;
6795                               a->bytes[dirndx] += pd.tot_len;
6796                     }
6797                     if (s != NULL) {
6798                               if (s->nat_rule.ptr != NULL) {
6799                                         s->nat_rule.ptr->packets[dirndx]++;
6800                                         s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
6801                               }
6802                               if (s->src_node != NULL) {
6803                                         s->src_node->packets[dirndx]++;
6804                                         s->src_node->bytes[dirndx] += pd.tot_len;
6805                               }
6806                               if (s->nat_src_node != NULL) {
6807                                         s->nat_src_node->packets[dirndx]++;
6808                                         s->nat_src_node->bytes[dirndx] += pd.tot_len;
6809                               }
6810                               dirndx = (dir == s->direction) ? 0 : 1;
6811                               s->packets[dirndx]++;
6812                               s->bytes[dirndx] += pd.tot_len;
6813                     }
6814                     tr = r;
6815                     nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6816                     if (nr != NULL && r == &pf_default_rule)
6817                               tr = nr;
6818                     if (tr->src.addr.type == PF_ADDR_TABLE)
6819                               pfr_update_stats(tr->src.addr.p.tbl,
6820                                   (s == NULL) ? pd.src :
6821                                   &s->key[(s->direction == PF_IN)]->
6822                                         addr[(s->direction == PF_OUT)],
6823                                   pd.af, pd.tot_len, dir == PF_OUT,
6824                                   r->action == PF_PASS, tr->src.neg);
6825                     if (tr->dst.addr.type == PF_ADDR_TABLE)
6826                               pfr_update_stats(tr->dst.addr.p.tbl,
6827                                   (s == NULL) ? pd.dst :
6828                                   &s->key[(s->direction == PF_IN)]->
6829                                         addr[(s->direction == PF_IN)],
6830                                   pd.af, pd.tot_len, dir == PF_OUT,
6831                                   r->action == PF_PASS, tr->dst.neg);
6832           }
6833 
6834 
6835           if (action == PF_SYNPROXY_DROP) {
6836                     m_freem(*m0);
6837                     *m0 = NULL;
6838                     action = PF_PASS;
6839           } else if (r->rt) {
6840                     /* pf_route can free the mbuf causing *m0 to become NULL */
6841                     pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
6842           }
6843 
6844           return (action);
6845 }
6846 #endif /* INET */
6847 
6848 #ifdef INET6
6849 
6850 /*
6851  * WARNING: pf_token held shared on entry, THIS IS CPU LOCALIZED CODE
6852  */
6853 int
pf_test6(int dir,struct ifnet * ifp,struct mbuf ** m0,struct ether_header * eh,struct inpcb * inp)6854 pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
6855     struct ether_header *eh, struct inpcb *inp)
6856 {
6857           struct pfi_kif                *kif;
6858           u_short                        action, reason = 0, log = 0;
6859           struct mbuf                   *m = *m0, *n = NULL;
6860           struct ip6_hdr                *h = NULL;
6861           struct pf_rule                *a = NULL, *r = &pf_default_rule, *tr, *nr;
6862           struct pf_state               *s = NULL;
6863           struct pf_ruleset   *ruleset = NULL;
6864           struct pf_pdesc                pd;
6865           int                            off, terminal = 0, dirndx, rh_cnt = 0;
6866 
6867           if (!pf_status.running)
6868                     return (PF_PASS);
6869 
6870           memset(&pd, 0, sizeof(pd));
6871 #ifdef foo
6872           if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
6873                     kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
6874           else
6875 #endif
6876                     kif = (struct pfi_kif *)ifp->if_pf_kif;
6877 
6878           if (kif == NULL) {
6879                     DPFPRINTF(PF_DEBUG_URGENT,
6880                         ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
6881                     return (PF_DROP);
6882           }
6883           if (kif->pfik_flags & PFI_IFLAG_SKIP)
6884                     return (PF_PASS);
6885 
6886 #ifdef DIAGNOSTIC
6887           if ((m->m_flags & M_PKTHDR) == 0)
6888                     panic("non-M_PKTHDR is passed to pf_test6");
6889 #endif /* DIAGNOSTIC */
6890 
6891           if (m->m_pkthdr.len < (int)sizeof(*h)) {
6892                     action = PF_DROP;
6893                     REASON_SET(&reason, PFRES_SHORT);
6894                     log = 1;
6895                     goto done;
6896           }
6897 
6898           /*
6899            * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
6900            * so make sure pf.flags is clear.
6901            */
6902           if (m->m_pkthdr.fw_flags & PF_MBUF_TAGGED)
6903                     return (PF_PASS);
6904           m->m_pkthdr.pf.flags = 0;
6905           /* Re-Check when updating to > 4.4 */
6906           m->m_pkthdr.pf.statekey = NULL;
6907 
6908           /* We do IP header normalization and packet reassembly here */
6909           if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
6910                     action = PF_DROP;
6911                     goto done;
6912           }
6913           m = *m0;  /* pf_normalize messes with m0 */
6914           h = mtod(m, struct ip6_hdr *);
6915 
6916 #if 1
6917           /*
6918            * we do not support jumbogram yet.  if we keep going, zero ip6_plen
6919            * will do something bad, so drop the packet for now.
6920            */
6921           if (htons(h->ip6_plen) == 0) {
6922                     action = PF_DROP;
6923                     REASON_SET(&reason, PFRES_NORM);        /*XXX*/
6924                     goto done;
6925           }
6926 #endif
6927 
6928           pd.src = (struct pf_addr *)&h->ip6_src;
6929           pd.dst = (struct pf_addr *)&h->ip6_dst;
6930           pd.sport = pd.dport = NULL;
6931           pd.ip_sum = NULL;
6932           pd.proto_sum = NULL;
6933           pd.dir = dir;
6934           pd.sidx = (dir == PF_IN) ? 0 : 1;
6935           pd.didx = (dir == PF_IN) ? 1 : 0;
6936           pd.af = AF_INET6;
6937           pd.tos = 0;
6938           pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
6939           pd.eh = eh;
6940 
6941           off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
6942           pd.proto = h->ip6_nxt;
6943           do {
6944                     switch (pd.proto) {
6945                     case IPPROTO_FRAGMENT:
6946                               action = pf_test_fragment(&r, dir, kif, m, h,
6947                                   &pd, &a, &ruleset);
6948                               if (action == PF_DROP)
6949                                         REASON_SET(&reason, PFRES_FRAG);
6950                               goto done;
6951                     case IPPROTO_ROUTING: {
6952                               struct ip6_rthdr rthdr;
6953 
6954                               if (rh_cnt++) {
6955                                         DPFPRINTF(PF_DEBUG_MISC,
6956                                             ("pf: IPv6 more than one rthdr\n"));
6957                                         action = PF_DROP;
6958                                         REASON_SET(&reason, PFRES_IPOPTIONS);
6959                                         log = 1;
6960                                         goto done;
6961                               }
6962                               if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
6963                                   &reason, pd.af)) {
6964                                         DPFPRINTF(PF_DEBUG_MISC,
6965                                             ("pf: IPv6 short rthdr\n"));
6966                                         action = PF_DROP;
6967                                         REASON_SET(&reason, PFRES_SHORT);
6968                                         log = 1;
6969                                         goto done;
6970                               }
6971                               if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
6972                                         DPFPRINTF(PF_DEBUG_MISC,
6973                                             ("pf: IPv6 rthdr0\n"));
6974                                         action = PF_DROP;
6975                                         REASON_SET(&reason, PFRES_IPOPTIONS);
6976                                         log = 1;
6977                                         goto done;
6978                               }
6979                               /* FALLTHROUGH */
6980                     }
6981                     case IPPROTO_AH:
6982                     case IPPROTO_HOPOPTS:
6983                     case IPPROTO_DSTOPTS: {
6984                               /* get next header and header length */
6985                               struct ip6_ext      opt6;
6986 
6987                               if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
6988                                   NULL, &reason, pd.af)) {
6989                                         DPFPRINTF(PF_DEBUG_MISC,
6990                                             ("pf: IPv6 short opt\n"));
6991                                         action = PF_DROP;
6992                                         log = 1;
6993                                         goto done;
6994                               }
6995                               if (pd.proto == IPPROTO_AH)
6996                                         off += (opt6.ip6e_len + 2) * 4;
6997                               else
6998                                         off += (opt6.ip6e_len + 1) * 8;
6999                               pd.proto = opt6.ip6e_nxt;
7000                               /* goto the next header */
7001                               break;
7002                     }
7003                     default:
7004                               terminal++;
7005                               break;
7006                     }
7007           } while (!terminal);
7008 
7009           /* if there's no routing header, use unmodified mbuf for checksumming */
7010           if (!n)
7011                     n = m;
7012 
7013           switch (pd.proto) {
7014 
7015           case IPPROTO_TCP: {
7016                     struct tcphdr       th;
7017 
7018                     pd.hdr.tcp = &th;
7019                     if (!pf_pull_hdr(m, off, &th, sizeof(th),
7020                         &action, &reason, AF_INET6)) {
7021                               log = action != PF_PASS;
7022                               goto done;
7023                     }
7024                     pd.p_len = pd.tot_len - off - (th.th_off << 2);
7025                     action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
7026                     if (action == PF_DROP)
7027                               goto done;
7028                     action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
7029                                                      &reason);
7030                     if (action == PF_PASS) {
7031                               r = s->rule.ptr;
7032                               a = s->anchor.ptr;
7033                               log = s->log;
7034                     } else if (s == NULL) {
7035                               action = pf_test_rule(&r, &s, dir, kif,
7036                                                         m, off, h, &pd, &a,
7037                                                         &ruleset, NULL, inp);
7038                     }
7039                     break;
7040           }
7041 
7042           case IPPROTO_UDP: {
7043                     struct udphdr       uh;
7044 
7045                     pd.hdr.udp = &uh;
7046                     if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
7047                         &action, &reason, AF_INET6)) {
7048                               log = action != PF_PASS;
7049                               goto done;
7050                     }
7051                     if (uh.uh_dport == 0 ||
7052                         ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
7053                         ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
7054                               action = PF_DROP;
7055                               REASON_SET(&reason, PFRES_SHORT);
7056                               goto done;
7057                     }
7058                     action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
7059                     if (action == PF_PASS) {
7060                               r = s->rule.ptr;
7061                               a = s->anchor.ptr;
7062                               log = s->log;
7063                     } else if (s == NULL) {
7064                               action = pf_test_rule(&r, &s, dir, kif,
7065                                                         m, off, h, &pd, &a,
7066                                                         &ruleset, NULL, inp);
7067                     }
7068                     break;
7069           }
7070 
7071           case IPPROTO_ICMPV6: {
7072                     struct icmp6_hdr    ih;
7073 
7074                     pd.hdr.icmp6 = &ih;
7075                     if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
7076                         &action, &reason, AF_INET6)) {
7077                               log = action != PF_PASS;
7078                               goto done;
7079                     }
7080                     action = pf_test_state_icmp(&s, dir, kif,
7081                                                       m, off, h, &pd, &reason);
7082                     if (action == PF_PASS) {
7083                               r = s->rule.ptr;
7084                               a = s->anchor.ptr;
7085                               log = s->log;
7086                     } else if (s == NULL) {
7087                               action = pf_test_rule(&r, &s, dir, kif,
7088                                                         m, off, h, &pd, &a,
7089                                                         &ruleset, NULL, inp);
7090                     }
7091                     break;
7092           }
7093 
7094           default:
7095                     action = pf_test_state_other(&s, dir, kif, m, &pd);
7096                     if (action == PF_PASS) {
7097                               r = s->rule.ptr;
7098                               a = s->anchor.ptr;
7099                               log = s->log;
7100                     } else if (s == NULL) {
7101                               action = pf_test_rule(&r, &s, dir, kif, m, off, h,
7102                                                         &pd, &a, &ruleset, NULL, inp);
7103                     }
7104                     break;
7105           }
7106 
7107 done:
7108           if (n != m) {
7109                     m_freem(n);
7110                     n = NULL;
7111           }
7112 
7113           /* handle dangerous IPv6 extension headers. */
7114           if (action == PF_PASS && rh_cnt &&
7115               !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
7116                     action = PF_DROP;
7117                     REASON_SET(&reason, PFRES_IPOPTIONS);
7118                     log = 1;
7119                     DPFPRINTF(PF_DEBUG_MISC,
7120                         ("pf: dropping packet with dangerous v6 headers\n"));
7121           }
7122 
7123           if ((s && s->tag) || r->rtableid)
7124                     pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
7125 
7126 #if 0
7127           if (dir == PF_IN && s && s->key[PF_SK_STACK])
7128                     m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK];
7129 #endif
7130 
7131 #ifdef ALTQ
7132           /*
7133            * Generate a hash code and qid request for ALTQ.  A qid of 0
7134            * is allowed and will cause altq to select the default queue.
7135            */
7136           if (action == PF_PASS) {
7137                     m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE;
7138                     if (pd.tos & IPTOS_LOWDELAY)
7139                               m->m_pkthdr.pf.qid = r->pqid;
7140                     else
7141                               m->m_pkthdr.pf.qid = r->qid;
7142                     m->m_pkthdr.pf.ecn_af = AF_INET6;
7143                     m->m_pkthdr.pf.hdr = h;
7144                     if (s) {
7145                               /* for fairq */
7146                               m->m_pkthdr.pf.state_hash = s->hash;
7147                               m->m_pkthdr.pf.flags |= PF_TAG_STATE_HASHED;
7148                     }
7149           }
7150 #endif /* ALTQ */
7151 
7152           if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
7153               pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
7154               (s->nat_rule.ptr->action == PF_RDR ||
7155               s->nat_rule.ptr->action == PF_BINAT) &&
7156               IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
7157           {
7158                     m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
7159           }
7160 
7161           if (dir == PF_IN && action == PF_PASS && r->divert.port) {
7162                     struct pf_divert *divert;
7163 
7164                     if ((divert = pf_get_divert(m))) {
7165                               m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED;
7166                               divert->port = r->divert.port;
7167                               divert->addr.ipv6 = r->divert.addr.v6;
7168                     }
7169           }
7170 
7171           if (log) {
7172                     struct pf_rule *lr;
7173 
7174                     if (s != NULL && s->nat_rule.ptr != NULL &&
7175                         s->nat_rule.ptr->log & PF_LOG_ALL)
7176                               lr = s->nat_rule.ptr;
7177                     else
7178                               lr = r;
7179                     PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset,
7180                         &pd);
7181           }
7182 
7183           kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
7184           kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
7185 
7186           if (action == PF_PASS || r->action == PF_DROP) {
7187                     dirndx = (dir == PF_OUT);
7188                     r->packets[dirndx]++;
7189                     r->bytes[dirndx] += pd.tot_len;
7190                     if (a != NULL) {
7191                               a->packets[dirndx]++;
7192                               a->bytes[dirndx] += pd.tot_len;
7193                     }
7194                     if (s != NULL) {
7195                               if (s->nat_rule.ptr != NULL) {
7196                                         s->nat_rule.ptr->packets[dirndx]++;
7197                                         s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
7198                               }
7199                               if (s->src_node != NULL) {
7200                                         s->src_node->packets[dirndx]++;
7201                                         s->src_node->bytes[dirndx] += pd.tot_len;
7202                               }
7203                               if (s->nat_src_node != NULL) {
7204                                         s->nat_src_node->packets[dirndx]++;
7205                                         s->nat_src_node->bytes[dirndx] += pd.tot_len;
7206                               }
7207                               dirndx = (dir == s->direction) ? 0 : 1;
7208                               s->packets[dirndx]++;
7209                               s->bytes[dirndx] += pd.tot_len;
7210                     }
7211                     tr = r;
7212                     nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
7213                     if (nr != NULL && r == &pf_default_rule)
7214                               tr = nr;
7215                     if (tr->src.addr.type == PF_ADDR_TABLE)
7216                               pfr_update_stats(tr->src.addr.p.tbl,
7217                                   (s == NULL) ? pd.src :
7218                                   &s->key[(s->direction == PF_IN)]->addr[0],
7219                                   pd.af, pd.tot_len, dir == PF_OUT,
7220                                   r->action == PF_PASS, tr->src.neg);
7221                     if (tr->dst.addr.type == PF_ADDR_TABLE)
7222                               pfr_update_stats(tr->dst.addr.p.tbl,
7223                                   (s == NULL) ? pd.dst :
7224                                   &s->key[(s->direction == PF_IN)]->addr[1],
7225                                   pd.af, pd.tot_len, dir == PF_OUT,
7226                                   r->action == PF_PASS, tr->dst.neg);
7227           }
7228 
7229 
7230           if (action == PF_SYNPROXY_DROP) {
7231                     m_freem(*m0);
7232                     *m0 = NULL;
7233                     action = PF_PASS;
7234           } else if (r->rt)
7235                     /* pf_route6 can free the mbuf causing *m0 to become NULL */
7236                     pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
7237 
7238           return (action);
7239 }
7240 #endif /* INET6 */
7241 
7242 int
pf_check_congestion(struct ifqueue * ifq)7243 pf_check_congestion(struct ifqueue *ifq)
7244 {
7245                     return (0);
7246 }
7247