1 /*        $NetBSD: ip_state.c,v 1.12 2020/04/18 17:02:00 christos Exp $         */
2 
3 /*
4  * Copyright (C) 2012 by Darren Reed.
5  *
6  * See the IPFILTER.LICENCE file for details on licencing.
7  *
8  * Id: ip_state.c,v 1.1.1.2 2012/07/22 13:45:37 darrenr Exp
9  */
10 #if defined(KERNEL) || defined(_KERNEL)
11 # undef KERNEL
12 # undef _KERNEL
13 # define        KERNEL        1
14 # define        _KERNEL       1
15 #endif
16 #include <sys/errno.h>
17 #include <sys/types.h>
18 #include <sys/param.h>
19 #include <sys/file.h>
20 #if defined(_KERNEL) && defined(__FreeBSD_version) && \
21     (__FreeBSD_version >= 400000) && !defined(KLD_MODULE)
22 #include "opt_inet6.h"
23 #endif
24 #if !defined(_KERNEL) && !defined(__KERNEL__)
25 # include <stdio.h>
26 # include <stdlib.h>
27 # include <string.h>
28 # define _KERNEL
29 # ifdef __OpenBSD__
30 struct file;
31 # endif
32 # include <sys/uio.h>
33 # undef _KERNEL
34 #endif
35 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
36 # include <sys/filio.h>
37 # include <sys/fcntl.h>
38 #else
39 # include <sys/ioctl.h>
40 #endif
41 #include <sys/time.h>
42 #if !defined(linux)
43 # include <sys/protosw.h>
44 #endif
45 #include <sys/socket.h>
46 #if defined(_KERNEL)
47 # include <sys/systm.h>
48 # if !defined(__SVR4) && !defined(__svr4__)
49 #  include <sys/mbuf.h>
50 # endif
51 #endif
52 #if defined(__SVR4) || defined(__svr4__)
53 # include <sys/filio.h>
54 # include <sys/byteorder.h>
55 # ifdef _KERNEL
56 #  include <sys/dditypes.h>
57 # endif
58 # include <sys/stream.h>
59 # include <sys/kmem.h>
60 #endif
61 
62 #include <net/if.h>
63 #ifdef sun
64 # include <net/af.h>
65 #endif
66 #include <netinet/in.h>
67 #include <netinet/in_systm.h>
68 #include <netinet/ip.h>
69 #include <netinet/tcp.h>
70 #if !defined(__hpux) && !defined(linux)
71 # include <netinet/tcp_fsm.h>
72 #endif
73 #include <netinet/udp.h>
74 #include <netinet/ip_icmp.h>
75 #if !defined(_KERNEL)
76 # include "ipf.h"
77 #endif
78 #include "netinet/ip_compat.h"
79 #include "netinet/ip_fil.h"
80 #include "netinet/ip_nat.h"
81 #include "netinet/ip_frag.h"
82 #include "netinet/ip_state.h"
83 #include "netinet/ip_proxy.h"
84 #include "netinet/ip_lookup.h"
85 #include "netinet/ip_dstlist.h"
86 #include "netinet/ip_sync.h"
87 #ifdef    USE_INET6
88 #include <netinet/icmp6.h>
89 #endif
90 #if FREEBSD_GE_REV(300000)
91 # include <sys/malloc.h>
92 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
93 #  include <sys/libkern.h>
94 #  include <sys/systm.h>
95 # endif
96 #endif
97 /* END OF INCLUDES */
98 
99 
100 #if !defined(lint)
101 #if defined(__NetBSD__)
102 #include <sys/cdefs.h>
103 __KERNEL_RCSID(0, "$NetBSD: ip_state.c,v 1.12 2020/04/18 17:02:00 christos Exp $");
104 #else
105 static const char sccsid[] = "@(#)ip_state.c      1.8 6/5/96 (C) 1993-2000 Darren Reed";
106 static const char rcsid[] = "@(#)Id: ip_state.c,v 1.1.1.2 2012/07/22 13:45:37 darrenr Exp";
107 #endif
108 #endif
109 
110 
111 static const ipftuneable_t ipf_state_tuneables[] = {
112           { { (void *)offsetof(ipf_state_softc_t, ipf_state_max) },
113                     "state_max",                  1,        0x7fffffff,
114                     stsizeof(ipf_state_softc_t, ipf_state_max),
115                     0,                            NULL,     NULL },
116           { { (void *)offsetof(ipf_state_softc_t, ipf_state_size) },
117                     "state_size",                 1,        0x7fffffff,
118                     stsizeof(ipf_state_softc_t, ipf_state_size),
119                     0,                            NULL,     ipf_state_rehash },
120           { { (void *)offsetof(ipf_state_softc_t, ipf_state_lock) },
121                     "state_lock",                 0,        1,
122                     stsizeof(ipf_state_softc_t, ipf_state_lock),
123                     IPFT_RDONLY,                  NULL,     NULL },
124           { { (void *)offsetof(ipf_state_softc_t, ipf_state_maxbucket) },
125                     "state_maxbucket",  1,        0x7fffffff,
126                     stsizeof(ipf_state_softc_t, ipf_state_maxbucket),
127                     0,                            NULL,     NULL },
128           { { (void *)offsetof(ipf_state_softc_t, ipf_state_logging) },
129                     "state_logging",0,  1,
130                     stsizeof(ipf_state_softc_t, ipf_state_logging),
131                     0,                            NULL,     NULL },
132           { { (void *)offsetof(ipf_state_softc_t, ipf_state_wm_high) },
133                     "state_wm_high",2,  100,
134                     stsizeof(ipf_state_softc_t, ipf_state_wm_high),
135                     0,                            NULL,     NULL },
136           { { (void *)offsetof(ipf_state_softc_t, ipf_state_wm_low) },
137                     "state_wm_low",     1,        99,
138                     stsizeof(ipf_state_softc_t, ipf_state_wm_low),
139                     0,                            NULL,     NULL },
140           { { (void *)offsetof(ipf_state_softc_t, ipf_state_wm_freq) },
141                     "state_wm_freq",2,  999999,
142                     stsizeof(ipf_state_softc_t, ipf_state_wm_freq),
143                     0,                            NULL,     NULL },
144           { { NULL },
145                     NULL,                         0,        0,
146                     0,
147                     0,        NULL, NULL }
148 };
149 
150 #define   SINCL(x)  ATOMIC_INCL(softs->x)
151 #define   SBUMP(x)  (softs->x)++
152 #define   SBUMPD(x, y)        do { (softs->x.y)++; DT(y); } while (0)
153 #define   SBUMPDX(x, y, z)do { (softs->x.y)++; DT(z); } while (0)
154 
155 #ifdef    USE_INET6
156 static ipstate_t *ipf_checkicmp6matchingstate(fr_info_t *);
157 #endif
158 static int ipf_allowstateicmp(fr_info_t *, ipstate_t *, i6addr_t *);
159 static ipstate_t *ipf_matchsrcdst(fr_info_t *, ipstate_t *, i6addr_t *,
160                                           i6addr_t *, tcphdr_t *, u_32_t);
161 static ipstate_t *ipf_checkicmpmatchingstate(fr_info_t *);
162 static int ipf_state_flush_entry(ipf_main_softc_t *, void *);
163 static ips_stat_t *ipf_state_stats(ipf_main_softc_t *);
164 static int ipf_state_del(ipf_main_softc_t *, ipstate_t *, int);
165 static int ipf_state_remove(ipf_main_softc_t *, void *);
166 static int ipf_state_match(ipstate_t *is1, ipstate_t *is2);
167 static int ipf_state_matchaddresses(ipstate_t *is1, ipstate_t *is2);
168 static int ipf_state_matchipv4addrs(ipstate_t *is1, ipstate_t *is2);
169 static int ipf_state_matchipv6addrs(ipstate_t *is1, ipstate_t *is2);
170 static int ipf_state_matchisps(ipstate_t *is1, ipstate_t *is2);
171 static int ipf_state_matchports(udpinfo_t *is1, udpinfo_t *is2);
172 static int ipf_state_matcharray(ipstate_t *, int *, u_long);
173 static void ipf_ipsmove(ipf_state_softc_t *, ipstate_t *, u_int);
174 static int ipf_state_tcp(ipf_main_softc_t *, ipf_state_softc_t *,
175                                fr_info_t *, tcphdr_t *, ipstate_t *);
176 static int ipf_tcpoptions(ipf_state_softc_t *, fr_info_t *,
177                                 tcphdr_t *, tcpdata_t *);
178 static ipstate_t *ipf_state_clone(fr_info_t *, tcphdr_t *, ipstate_t *);
179 static void ipf_fixinisn(fr_info_t *, ipstate_t *);
180 static void ipf_fixoutisn(fr_info_t *, ipstate_t *);
181 static void ipf_checknewisn(fr_info_t *, ipstate_t *);
182 static int ipf_state_iter(ipf_main_softc_t *, ipftoken_t *,
183                                 ipfgeniter_t *, ipfobj_t *);
184 static int ipf_state_gettable(ipf_main_softc_t *, ipf_state_softc_t *, char *);
185 static    int ipf_state_tcpinwindow(struct fr_info *, struct tcpdata *,
186                                           struct tcpdata *, tcphdr_t *, int);
187 
188 static int ipf_state_getent(ipf_main_softc_t *, ipf_state_softc_t *, void *);
189 static int ipf_state_putent(ipf_main_softc_t *, ipf_state_softc_t *, void *);
190 
191 #define   ONE_DAY             IPF_TTLVAL(1 * 86400)         /* 1 day */
192 #define   FIVE_DAYS (5 * ONE_DAY)
193 #define   DOUBLE_HASH(x)      (((x) + softs->ipf_state_seed[(x) % \
194                                softs->ipf_state_size]) % softs->ipf_state_size)
195 
196 
197 /* ------------------------------------------------------------------------ */
198 /* Function:    ipf_state_main_load                                         */
199 /* Returns:     int - 0 == success, -1 == failure                           */
200 /* Parameters:  Nil                                                         */
201 /*                                                                          */
202 /* A null-op function that exists as a placeholder so that the flow in      */
203 /* other functions is obvious.                                              */
204 /* ------------------------------------------------------------------------ */
205 int
ipf_state_main_load(void)206 ipf_state_main_load(void)
207 {
208           return 0;
209 }
210 
211 
212 /* ------------------------------------------------------------------------ */
213 /* Function:    ipf_state_main_unload                                       */
214 /* Returns:     int - 0 == success, -1 == failure                           */
215 /* Parameters:  Nil                                                         */
216 /*                                                                          */
217 /* A null-op function that exists as a placeholder so that the flow in      */
218 /* other functions is obvious.                                              */
219 /* ------------------------------------------------------------------------ */
220 int
ipf_state_main_unload(void)221 ipf_state_main_unload(void)
222 {
223           return 0;
224 }
225 
226 
227 /* ------------------------------------------------------------------------ */
228 /* Function:    ipf_state_soft_create                                       */
229 /* Returns:     void *   - NULL = failure, else pointer to soft context     */
230 /* Parameters:  softc(I) - pointer to soft context main structure           */
231 /*                                                                          */
232 /* Create a new state soft context structure and populate it with the list  */
233 /* of tunables and other default settings.                                  */
234 /* ------------------------------------------------------------------------ */
235 void *
ipf_state_soft_create(ipf_main_softc_t * softc)236 ipf_state_soft_create(ipf_main_softc_t *softc)
237 {
238           ipf_state_softc_t *softs;
239 
240           KMALLOC(softs, ipf_state_softc_t *);
241           if (softs == NULL)
242                     return NULL;
243 
244           bzero((char *)softs, sizeof(*softs));
245 
246           softs->ipf_state_tune = ipf_tune_array_copy(softs,
247                                                                 sizeof(ipf_state_tuneables),
248                                                                 ipf_state_tuneables);
249           if (softs->ipf_state_tune == NULL) {
250                     ipf_state_soft_destroy(softc, softs);
251                     return NULL;
252           }
253           if (ipf_tune_array_link(softc, softs->ipf_state_tune) == -1) {
254                     ipf_state_soft_destroy(softc, softs);
255                     return NULL;
256           }
257 
258 #ifdef    IPFILTER_LOG
259           softs->ipf_state_logging = 1;
260 #else
261           softs->ipf_state_logging = 0;
262 #endif
263           softs->ipf_state_size = IPSTATE_SIZE,
264           softs->ipf_state_maxbucket = 0;
265           softs->ipf_state_wm_freq = IPF_TTLVAL(10);
266           softs->ipf_state_max = IPSTATE_MAX;
267           softs->ipf_state_wm_last = 0;
268           softs->ipf_state_wm_high = 99;
269           softs->ipf_state_wm_low = 90;
270           softs->ipf_state_inited = 0;
271           softs->ipf_state_lock = 0;
272           softs->ipf_state_doflush = 0;
273 
274           return softs;
275 }
276 
277 
278 /* ------------------------------------------------------------------------ */
279 /* Function:    ipf_state_soft_destroy                                      */
280 /* Returns:     Nil                                                         */
281 /* Parameters:  softc(I) - pointer to soft context main structure           */
282 /*              arg(I)   - pointer to local context to use                  */
283 /*                                                                          */
284 /* Undo only what we did in soft create: unlink and free the tunables and   */
285 /* free the soft context structure itself.                                  */
286 /* ------------------------------------------------------------------------ */
287 void
ipf_state_soft_destroy(ipf_main_softc_t * softc,void * arg)288 ipf_state_soft_destroy(ipf_main_softc_t *softc, void *arg)
289 {
290           ipf_state_softc_t *softs = arg;
291 
292           if (softs->ipf_state_tune != NULL) {
293                     ipf_tune_array_unlink(softc, softs->ipf_state_tune);
294                     KFREES(softs->ipf_state_tune, sizeof(ipf_state_tuneables));
295                     softs->ipf_state_tune = NULL;
296           }
297 
298           KFREE(softs);
299 }
300 
301 static void *
ipf_state_seed_alloc(u_int state_size,u_int state_max)302 ipf_state_seed_alloc(u_int state_size, u_int state_max)
303 {
304           u_int i;
305           u_long *state_seed;
306           KMALLOCS(state_seed, u_long *, state_size * sizeof(*state_seed));
307           if (state_seed == NULL)
308                     return NULL;
309 
310           for (i = 0; i < state_size; i++) {
311                     /*
312                      * XXX - ipf_state_seed[X] should be a random number of sorts.
313                      */
314 #if !defined(NEED_LOCAL_RAND) && defined(_KERNEL)
315                     state_seed[i] = cprng_fast32();
316 #else
317                     state_seed[i] = ((u_long)state_seed + i) * state_size;
318                     state_seed[i] ^= 0xa5a55a5a;
319                     state_seed[i] *= (u_long)state_seed;
320                     state_seed[i] ^= 0x5a5aa5a5;
321                     state_seed[i] *= state_max;
322 #endif
323           }
324           return state_seed;
325 }
326 
327 
328 /* ------------------------------------------------------------------------ */
329 /* Function:    ipf_state_soft_init                                         */
330 /* Returns:     int      - 0 == success, -1 == failure                      */
331 /* Parameters:  softc(I) - pointer to soft context main structure           */
332 /*              arg(I)   - pointer to local context to use                  */
333 /*                                                                          */
334 /* Initialise the state soft context structure so it is ready for use.      */
335 /* This involves:                                                           */
336 /* - allocating a hash table and zero'ing it out                            */
337 /* - building a secondary table of seeds for double hashing to make it more */
338 /*   difficult to attempt to attack the hash table itself (for DoS)         */
339 /* - initialise all of the timeout queues, including a table for TCP, some  */
340 /*   pairs of query/response for UDP and other IP protocols (typically the  */
341 /*   reply queue has a shorter timeout than the query)                      */
342 /* ------------------------------------------------------------------------ */
343 int
ipf_state_soft_init(ipf_main_softc_t * softc,void * arg)344 ipf_state_soft_init(ipf_main_softc_t *softc, void *arg)
345 {
346           ipf_state_softc_t *softs = arg;
347           int i;
348 
349           KMALLOCS(softs->ipf_state_table,
350                      ipstate_t **, softs->ipf_state_size * sizeof(ipstate_t *));
351           if (softs->ipf_state_table == NULL)
352                     return -1;
353 
354           bzero((char *)softs->ipf_state_table,
355                 softs->ipf_state_size * sizeof(ipstate_t *));
356 
357           softs->ipf_state_seed = ipf_state_seed_alloc(softs->ipf_state_size,
358               softs->ipf_state_max);
359           if (softs->ipf_state_seed == NULL)
360                     return -2;
361 
362           KMALLOCS(softs->ipf_state_stats.iss_bucketlen, u_int *,
363                      softs->ipf_state_size * sizeof(u_int));
364           if (softs->ipf_state_stats.iss_bucketlen == NULL)
365                     return -3;
366 
367           bzero((char *)softs->ipf_state_stats.iss_bucketlen,
368                 softs->ipf_state_size * sizeof(u_int));
369 
370           if (softs->ipf_state_maxbucket == 0) {
371                     for (i = softs->ipf_state_size; i > 0; i >>= 1)
372                               softs->ipf_state_maxbucket++;
373                     softs->ipf_state_maxbucket *= 2;
374           }
375 
376           ipf_sttab_init(softc, softs->ipf_state_tcptq);
377           softs->ipf_state_stats.iss_tcptab = softs->ipf_state_tcptq;
378           softs->ipf_state_tcptq[IPF_TCP_NSTATES - 1].ifq_next =
379                                                             &softs->ipf_state_udptq;
380 
381           IPFTQ_INIT(&softs->ipf_state_udptq, softc->ipf_udptimeout,
382                        "ipftq udp tab");
383           softs->ipf_state_udptq.ifq_next = &softs->ipf_state_udpacktq;
384 
385           IPFTQ_INIT(&softs->ipf_state_udpacktq, softc->ipf_udpacktimeout,
386                        "ipftq udpack tab");
387           softs->ipf_state_udpacktq.ifq_next = &softs->ipf_state_icmptq;
388 
389           IPFTQ_INIT(&softs->ipf_state_icmptq, softc->ipf_icmptimeout,
390                        "ipftq icmp tab");
391           softs->ipf_state_icmptq.ifq_next = &softs->ipf_state_icmpacktq;
392 
393           IPFTQ_INIT(&softs->ipf_state_icmpacktq, softc->ipf_icmpacktimeout,
394                       "ipftq icmpack tab");
395           softs->ipf_state_icmpacktq.ifq_next = &softs->ipf_state_iptq;
396 
397           IPFTQ_INIT(&softs->ipf_state_iptq, softc->ipf_iptimeout,
398                        "ipftq iptimeout tab");
399           softs->ipf_state_iptq.ifq_next = &softs->ipf_state_pending;
400 
401           IPFTQ_INIT(&softs->ipf_state_pending, IPF_HZ_DIVIDE, "ipftq pending");
402           softs->ipf_state_pending.ifq_next = &softs->ipf_state_deletetq;
403 
404           IPFTQ_INIT(&softs->ipf_state_deletetq, 1, "ipftq delete");
405           softs->ipf_state_deletetq.ifq_next = NULL;
406 
407           MUTEX_INIT(&softs->ipf_stinsert, "ipf state insert mutex");
408 
409 
410           softs->ipf_state_wm_last = softc->ipf_ticks;
411           softs->ipf_state_inited = 1;
412 
413           return 0;
414 }
415 
416 
417 /* ------------------------------------------------------------------------ */
418 /* Function:    ipf_state_soft_fini                                         */
419 /* Returns:     int      - 0 = success, -1 = failure                        */
420 /* Parameters:  softc(I) - pointer to soft context main structure           */
421 /*              arg(I)   - pointer to local context to use                  */
422 /*                                                                          */
423 /* Release and destroy any resources acquired or initialised so that        */
424 /* IPFilter can be unloaded or re-initialised.                              */
425 /* ------------------------------------------------------------------------ */
426 int
ipf_state_soft_fini(ipf_main_softc_t * softc,void * arg)427 ipf_state_soft_fini(ipf_main_softc_t *softc, void *arg)
428 {
429           ipf_state_softc_t *softs = arg;
430           ipftq_t *ifq, *ifqnext;
431           ipstate_t *is;
432 
433           while ((is = softs->ipf_state_list) != NULL)
434                     ipf_state_del(softc, is, ISL_UNLOAD);
435 
436           /*
437            * Proxy timeout queues are not cleaned here because although they
438            * exist on the state list, appr_unload is called after
439            * ipf_state_unload and the proxies actually are responsible for them
440            * being created. Should the proxy timeouts have their own list?
441            * There's no real justification as this is the only complication.
442            */
443           for (ifq = softs->ipf_state_usertq; ifq != NULL; ifq = ifqnext) {
444                     ifqnext = ifq->ifq_next;
445 
446                     if (ipf_deletetimeoutqueue(ifq) == 0)
447                               ipf_freetimeoutqueue(softc, ifq);
448           }
449 
450           softs->ipf_state_stats.iss_inuse = 0;
451           softs->ipf_state_stats.iss_active = 0;
452 
453           if (softs->ipf_state_inited == 1) {
454                     softs->ipf_state_inited = 0;
455                     ipf_sttab_destroy(softs->ipf_state_tcptq);
456                     MUTEX_DESTROY(&softs->ipf_state_udptq.ifq_lock);
457                     MUTEX_DESTROY(&softs->ipf_state_icmptq.ifq_lock);
458                     MUTEX_DESTROY(&softs->ipf_state_udpacktq.ifq_lock);
459                     MUTEX_DESTROY(&softs->ipf_state_icmpacktq.ifq_lock);
460                     MUTEX_DESTROY(&softs->ipf_state_iptq.ifq_lock);
461                     MUTEX_DESTROY(&softs->ipf_state_deletetq.ifq_lock);
462                     MUTEX_DESTROY(&softs->ipf_state_pending.ifq_lock);
463                     MUTEX_DESTROY(&softs->ipf_stinsert);
464           }
465 
466           if (softs->ipf_state_table != NULL) {
467                     KFREES(softs->ipf_state_table,
468                            softs->ipf_state_size * sizeof(*softs->ipf_state_table));
469                     softs->ipf_state_table = NULL;
470           }
471 
472           if (softs->ipf_state_seed != NULL) {
473                     KFREES(softs->ipf_state_seed,
474                            softs->ipf_state_size * sizeof(*softs->ipf_state_seed));
475                     softs->ipf_state_seed = NULL;
476           }
477 
478           if (softs->ipf_state_stats.iss_bucketlen != NULL) {
479                     KFREES(softs->ipf_state_stats.iss_bucketlen,
480                            softs->ipf_state_size * sizeof(u_int));
481                     softs->ipf_state_stats.iss_bucketlen = NULL;
482           }
483 
484           return 0;
485 }
486 
487 
488 /* ------------------------------------------------------------------------ */
489 /* Function:    ipf_state_set_lock                                          */
490 /* Returns:     Nil                                                         */
491 /* Parameters:  arg(I) - pointer to local context to use                    */
492 /*              tmp(I) - new value for lock                                 */
493 /*                                                                          */
494 /* Stub function that allows for external manipulation of ipf_state_lock    */
495 /* ------------------------------------------------------------------------ */
496 void
ipf_state_setlock(void * arg,int tmp)497 ipf_state_setlock(void *arg, int tmp)
498 {
499           ipf_state_softc_t *softs = arg;
500 
501           softs->ipf_state_lock = tmp;
502 }
503 
504 
505 /* ------------------------------------------------------------------------ */
506 /* Function:    ipf_state_stats                                             */
507 /* Returns:     ips_state_t* - pointer to state stats structure             */
508 /* Parameters:  softc(I) - pointer to soft context main structure           */
509 /*                                                                          */
510 /* Put all the current numbers and pointers into a single struct and return */
511 /* a pointer to it.                                                         */
512 /* ------------------------------------------------------------------------ */
513 static ips_stat_t *
ipf_state_stats(ipf_main_softc_t * softc)514 ipf_state_stats(ipf_main_softc_t *softc)
515 {
516           ipf_state_softc_t *softs = softc->ipf_state_soft;
517           ips_stat_t *issp = &softs->ipf_state_stats;
518 
519           issp->iss_state_size = softs->ipf_state_size;
520           issp->iss_state_max = softs->ipf_state_max;
521           issp->iss_table = softs->ipf_state_table;
522           issp->iss_list = softs->ipf_state_list;
523           issp->iss_ticks = softc->ipf_ticks;
524 
525 #ifdef IPFILTER_LOGGING
526           issp->iss_log_ok = ipf_log_logok(softc, IPF_LOGSTATE);
527           issp->iss_log_fail = ipf_log_failures(softc, IPF_LOGSTATE);
528 #else
529           issp->iss_log_ok = 0;
530           issp->iss_log_fail = 0;
531 #endif
532           return issp;
533 }
534 
535 /* ------------------------------------------------------------------------ */
536 /* Function:    ipf_state_remove                                            */
537 /* Returns:     int - 0 == success, != 0 == failure                         */
538 /* Parameters:  softc(I) - pointer to soft context main structure           */
539 /*              data(I)  - pointer to state structure to delete from table  */
540 /*                                                                          */
541 /* Search for a state structure that matches the one passed, according to   */
542 /* the IP addresses and other protocol specific information.                */
543 /* ------------------------------------------------------------------------ */
544 static int
ipf_state_remove(ipf_main_softc_t * softc,void * data)545 ipf_state_remove(ipf_main_softc_t *softc, void *data)
546 {
547           ipf_state_softc_t *softs = softc->ipf_state_soft;
548           ipstate_t *sp, st;
549           int error;
550 
551           sp = &st;
552           error = ipf_inobj(softc, data, NULL, &st, IPFOBJ_IPSTATE);
553           if (error)
554                     return EFAULT;
555 
556           WRITE_ENTER(&softc->ipf_state);
557           for (sp = softs->ipf_state_list; sp; sp = sp->is_next)
558                     if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) &&
559                         !bcmp((void *)&sp->is_src, (void *)&st.is_src,
560                                 sizeof(st.is_src)) &&
561                         !bcmp((void *)&sp->is_dst, (void *)&st.is_src,
562                                 sizeof(st.is_dst)) &&
563                         !bcmp((void *)&sp->is_ps, (void *)&st.is_ps,
564                                 sizeof(st.is_ps))) {
565                               ipf_state_del(softc, sp, ISL_REMOVE);
566                               RWLOCK_EXIT(&softc->ipf_state);
567                               return 0;
568                     }
569           RWLOCK_EXIT(&softc->ipf_state);
570 
571           IPFERROR(100001);
572           return ESRCH;
573 }
574 
575 
576 /* ------------------------------------------------------------------------ */
577 /* Function:    ipf_state_ioctl                                             */
578 /* Returns:     int - 0 == success, != 0 == failure                         */
579 /* Parameters:  softc(I) - pointer to soft context main structure           */
580 /*              data(I)  - pointer to ioctl data                            */
581 /*              cmd(I)   - ioctl command integer                            */
582 /*              mode(I)  - file mode bits used with open                    */
583 /*              uid(I)   - uid of process making the ioctl call             */
584 /*              ctx(I)   - pointer specific to context of the call          */
585 /*                                                                          */
586 /* Processes an ioctl call made to operate on the IP Filter state device.   */
587 /* ------------------------------------------------------------------------ */
588 int
ipf_state_ioctl(ipf_main_softc_t * softc,void * data,ioctlcmd_t cmd,int mode,int uid,void * ctx)589 ipf_state_ioctl(ipf_main_softc_t *softc, void *data, ioctlcmd_t cmd, int mode,
590     int uid, void *ctx)
591 {
592           ipf_state_softc_t *softs = softc->ipf_state_soft;
593           int arg, ret, error = 0;
594           SPL_INT(s);
595 
596           switch (cmd)
597           {
598           /*
599            * Delete an entry from the state table.
600            */
601           case SIOCDELST :
602                     error = ipf_state_remove(softc, data);
603                     break;
604 
605           /*
606            * Flush the state table
607            */
608           case SIOCIPFFL :
609                     error = BCOPYIN(data, &arg, sizeof(arg));
610                     if (error != 0) {
611                               IPFERROR(100002);
612                               error = EFAULT;
613 
614                     } else {
615                               WRITE_ENTER(&softc->ipf_state);
616                               ret = ipf_state_flush(softc, arg, 4);
617                               RWLOCK_EXIT(&softc->ipf_state);
618 
619                               error = BCOPYOUT(&ret, data, sizeof(ret));
620                               if (error != 0) {
621                                         IPFERROR(100003);
622                                         error = EFAULT;
623                               }
624                     }
625                     break;
626 
627 #ifdef    USE_INET6
628           case SIOCIPFL6 :
629                     error = BCOPYIN(data, &arg, sizeof(arg));
630                     if (error != 0) {
631                               IPFERROR(100004);
632                               error = EFAULT;
633 
634                     } else {
635                               WRITE_ENTER(&softc->ipf_state);
636                               ret = ipf_state_flush(softc, arg, 6);
637                               RWLOCK_EXIT(&softc->ipf_state);
638 
639                               error = BCOPYOUT(&ret, data, sizeof(ret));
640                               if (error != 0) {
641                                         IPFERROR(100005);
642                                         error = EFAULT;
643                               }
644                     }
645                     break;
646 #endif
647 
648           case SIOCMATCHFLUSH :
649                     WRITE_ENTER(&softc->ipf_state);
650                     error = ipf_state_matchflush(softc, data);
651                     RWLOCK_EXIT(&softc->ipf_state);
652                     break;
653 
654 #ifdef    IPFILTER_LOG
655           /*
656            * Flush the state log.
657            */
658           case SIOCIPFFB :
659                     if (!(mode & FWRITE)) {
660                               IPFERROR(100008);
661                               error = EPERM;
662                     } else {
663                               int tmp;
664 
665                               tmp = ipf_log_clear(softc, IPL_LOGSTATE);
666                               error = BCOPYOUT(&tmp, data, sizeof(tmp));
667                               if (error != 0) {
668                                         IPFERROR(100009);
669                                         error = EFAULT;
670                               }
671                     }
672                     break;
673 
674           /*
675            * Turn logging of state information on/off.
676            */
677           case SIOCSETLG :
678                     if (!(mode & FWRITE)) {
679                               IPFERROR(100010);
680                               error = EPERM;
681                     } else {
682                               error = BCOPYIN(data, &softs->ipf_state_logging,
683                                                   sizeof(softs->ipf_state_logging));
684                               if (error != 0) {
685                                         IPFERROR(100011);
686                                         error = EFAULT;
687                               }
688                     }
689                     break;
690 
691           /*
692            * Return the current state of logging.
693            */
694           case SIOCGETLG :
695                     error = BCOPYOUT(&softs->ipf_state_logging, data,
696                                          sizeof(softs->ipf_state_logging));
697                     if (error != 0) {
698                               IPFERROR(100012);
699                               error = EFAULT;
700                     }
701                     break;
702 
703           /*
704            * Return the number of bytes currently waiting to be read.
705            */
706           case FIONREAD :
707                     arg = ipf_log_bytesused(softc, IPL_LOGSTATE);
708                     error = BCOPYOUT(&arg, data, sizeof(arg));
709                     if (error != 0) {
710                               IPFERROR(100013);
711                               error = EFAULT;
712                     }
713                     break;
714 #endif
715 
716           /*
717            * Get the current state statistics.
718            */
719           case SIOCGETFS :
720                     error = ipf_outobj(softc, data, ipf_state_stats(softc),
721                                            IPFOBJ_STATESTAT);
722                     break;
723 
724           /*
725            * Lock/Unlock the state table.  (Locking prevents any changes, which
726            * means no packets match).
727            */
728           case SIOCSTLCK :
729                     if (!(mode & FWRITE)) {
730                               IPFERROR(100014);
731                               error = EPERM;
732                     } else {
733                               error = ipf_lock(data, &softs->ipf_state_lock);
734                     }
735                     break;
736 
737           /*
738            * Add an entry to the current state table.
739            */
740           case SIOCSTPUT :
741                     if (!softs->ipf_state_lock || !(mode &FWRITE)) {
742                               IPFERROR(100015);
743                               error = EACCES;
744                               break;
745                     }
746                     error = ipf_state_putent(softc, softs, data);
747                     break;
748 
749           /*
750            * Get a state table entry.
751            */
752           case SIOCSTGET :
753                     if (!softs->ipf_state_lock) {
754                               IPFERROR(100016);
755                               error = EACCES;
756                               break;
757                     }
758                     error = ipf_state_getent(softc, softs, data);
759                     break;
760 
761           /*
762            * Return a copy of the hash table bucket lengths
763            */
764           case SIOCSTAT1 :
765                     error = BCOPYOUT(softs->ipf_state_stats.iss_bucketlen, data,
766                                          softs->ipf_state_size * sizeof(u_int));
767                     if (error != 0) {
768                               IPFERROR(100017);
769                               error = EFAULT;
770                     }
771                     break;
772 
773           case SIOCGENITER :
774               {
775                     ipftoken_t *token;
776                     ipfgeniter_t iter;
777                     ipfobj_t obj;
778 
779                     error = ipf_inobj(softc, data, &obj, &iter, IPFOBJ_GENITER);
780                     if (error != 0)
781                               break;
782 
783                     SPL_SCHED(s);
784                     token = ipf_token_find(softc, IPFGENITER_STATE, uid, ctx);
785                     if (token != NULL) {
786                               error = ipf_state_iter(softc, token, &iter, &obj);
787                               WRITE_ENTER(&softc->ipf_tokens);
788                               ipf_token_deref(softc, token);
789                               RWLOCK_EXIT(&softc->ipf_tokens);
790                     } else {
791                               IPFERROR(100018);
792                               error = ESRCH;
793                     }
794                     SPL_X(s);
795                     break;
796               }
797 
798           case SIOCGTABL :
799                     error = ipf_state_gettable(softc, softs, data);
800                     break;
801 
802           case SIOCIPFDELTOK :
803                     error = BCOPYIN(data, &arg, sizeof(arg));
804                     if (error != 0) {
805                               IPFERROR(100019);
806                               error = EFAULT;
807                     } else {
808                               SPL_SCHED(s);
809                               error = ipf_token_del(softc, arg, uid, ctx);
810                               SPL_X(s);
811                     }
812                     break;
813 
814           case SIOCGTQTAB :
815                     error = ipf_outobj(softc, data, softs->ipf_state_tcptq,
816                                            IPFOBJ_STATETQTAB);
817                     break;
818 
819           default :
820                     IPFERROR(100020);
821                     error = EINVAL;
822                     break;
823           }
824           return error;
825 }
826 
827 
828 /* ------------------------------------------------------------------------ */
829 /* Function:    ipf_state_getent                                            */
830 /* Returns:     int - 0 == success, != 0 == failure                         */
831 /* Parameters:  softc(I) - pointer to soft context main structure           */
832 /*              softs(I) - pointer to state context structure               */
833 /*              data(I)  - pointer to state structure to retrieve from table*/
834 /*                                                                          */
835 /* Copy out state information from the kernel to a user space process.  If  */
836 /* there is a filter rule associated with the state entry, copy that out    */
837 /* as well.  The entry to copy out is taken from the value of "ips_next" in */
838 /* the struct passed in and if not null and not found in the list of current*/
839 /* state entries, the retrieval fails.                                      */
840 /* ------------------------------------------------------------------------ */
841 static int
ipf_state_getent(ipf_main_softc_t * softc,ipf_state_softc_t * softs,void * data)842 ipf_state_getent(ipf_main_softc_t *softc, ipf_state_softc_t *softs, void *data)
843 {
844           ipstate_t *is, *isn;
845           ipstate_save_t ips;
846           int error;
847 
848           error = ipf_inobj(softc, data, NULL, &ips, IPFOBJ_STATESAVE);
849           if (error)
850                     return EFAULT;
851 
852           READ_ENTER(&softc->ipf_state);
853           isn = ips.ips_next;
854           if (isn == NULL) {
855                     isn = softs->ipf_state_list;
856                     if (isn == NULL) {
857                               if (ips.ips_next == NULL) {
858                                         RWLOCK_EXIT(&softc->ipf_state);
859                                         IPFERROR(100021);
860                                         return ENOENT;
861                               }
862                               return 0;
863                     }
864           } else {
865                     /*
866                      * Make sure the pointer we're copying from exists in the
867                      * current list of entries.  Security precaution to prevent
868                      * copying of random kernel data.
869                      */
870                     for (is = softs->ipf_state_list; is; is = is->is_next)
871                               if (is == isn)
872                                         break;
873                     if (!is) {
874                               RWLOCK_EXIT(&softc->ipf_state);
875                               IPFERROR(100022);
876                               return ESRCH;
877                     }
878           }
879           ips.ips_next = isn->is_next;
880           bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is));
881           ips.ips_rule = isn->is_rule;
882           if (isn->is_rule != NULL)
883                     bcopy((char *)isn->is_rule, (char *)&ips.ips_fr,
884                           sizeof(ips.ips_fr));
885           RWLOCK_EXIT(&softc->ipf_state);
886           error = ipf_outobj(softc, data, &ips, IPFOBJ_STATESAVE);
887           return error;
888 }
889 
890 
891 /* ------------------------------------------------------------------------ */
892 /* Function:    ipf_state_putent                                            */
893 /* Returns:     int - 0 == success, != 0 == failure                         */
894 /* Parameters:  softc(I) - pointer to soft context main structure           */
895 /*              softs(I) - pointer to state context structure               */
896 /*              data(I)  - pointer to state information struct              */
897 /*                                                                          */
898 /* This function implements the SIOCSTPUT ioctl: insert a state entry into  */
899 /* the state table.  If the state info. includes a pointer to a filter rule */
900 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */
901 /* output.                                                                  */
902 /* ------------------------------------------------------------------------ */
903 int
ipf_state_putent(ipf_main_softc_t * softc,ipf_state_softc_t * softs,void * data)904 ipf_state_putent(ipf_main_softc_t *softc, ipf_state_softc_t *softs, void *data)
905 {
906           ipstate_t *is, *isn;
907           ipstate_save_t ips;
908           int error, i;
909           frentry_t *fr;
910           char *name;
911 
912           error = ipf_inobj(softc, data, NULL, &ips, IPFOBJ_STATESAVE);
913           if (error != 0)
914                     return error;
915 
916           KMALLOC(isn, ipstate_t *);
917           if (isn == NULL) {
918                     IPFERROR(100023);
919                     return ENOMEM;
920           }
921 
922           bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn));
923           bzero((char *)isn, offsetof(struct ipstate, is_pkts));
924           isn->is_sti.tqe_pnext = NULL;
925           isn->is_sti.tqe_next = NULL;
926           isn->is_sti.tqe_ifq = NULL;
927           isn->is_sti.tqe_parent = isn;
928           isn->is_ifp[0] = NULL;
929           isn->is_ifp[1] = NULL;
930           isn->is_ifp[2] = NULL;
931           isn->is_ifp[3] = NULL;
932           isn->is_sync = NULL;
933           fr = ips.ips_rule;
934 
935           if (fr == NULL) {
936                     int inserr;
937 
938                     READ_ENTER(&softc->ipf_state);
939                     inserr = ipf_state_insert(softc, isn, 0);
940                     MUTEX_EXIT(&isn->is_lock);
941                     RWLOCK_EXIT(&softc->ipf_state);
942 
943                     return inserr;
944           }
945 
946           if (isn->is_flags & SI_NEWFR) {
947                     KMALLOC(fr, frentry_t *);
948                     if (fr == NULL) {
949                               KFREE(isn);
950                               IPFERROR(100024);
951                               return ENOMEM;
952                     }
953                     bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr));
954                     isn->is_rule = fr;
955                     ips.ips_is.is_rule = fr;
956                     MUTEX_NUKE(&fr->fr_lock);
957                     MUTEX_INIT(&fr->fr_lock, "state filter rule lock");
958 
959                     /*
960                      * Look up all the interface names in the rule.
961                      */
962                     for (i = 0; i < 4; i++) {
963                               if (fr->fr_ifnames[i] == -1) {
964                                         fr->fr_ifas[i] = NULL;
965                                         continue;
966                               }
967                               name = fr->fr_names + fr->fr_ifnames[i];
968                               fr->fr_ifas[i] = ipf_resolvenic(softc, name,
969                                                                       fr->fr_family);
970                     }
971 
972                     for (i = 0; i < 4; i++) {
973                               name = isn->is_ifname[i];
974                               isn->is_ifp[i] = ipf_resolvenic(softc, name,
975                                                                       isn->is_v);
976                     }
977 
978                     fr->fr_ref = 0;
979                     fr->fr_dsize = 0;
980                     fr->fr_data = NULL;
981                     fr->fr_type = FR_T_NONE;
982 
983                     (void) ipf_resolvedest(softc, fr->fr_names, &fr->fr_tifs[0],
984                                         fr->fr_family);
985                     (void) ipf_resolvedest(softc, fr->fr_names, &fr->fr_tifs[1],
986                                         fr->fr_family);
987                     (void) ipf_resolvedest(softc, fr->fr_names, &fr->fr_dif,
988                                         fr->fr_family);
989 
990                     /*
991                      * send a copy back to userland of what we ended up
992                      * to allow for verification.
993                      */
994                     error = ipf_outobj(softc, data, &ips, IPFOBJ_STATESAVE);
995                     if (error != 0) {
996                               KFREE(isn);
997                               MUTEX_DESTROY(&fr->fr_lock);
998                               KFREE(fr);
999                               IPFERROR(100025);
1000                               return EFAULT;
1001                     }
1002                     READ_ENTER(&softc->ipf_state);
1003                     error = ipf_state_insert(softc, isn, 0);
1004                     MUTEX_EXIT(&isn->is_lock);
1005                     RWLOCK_EXIT(&softc->ipf_state);
1006 
1007           } else {
1008                     READ_ENTER(&softc->ipf_state);
1009                     for (is = softs->ipf_state_list; is; is = is->is_next)
1010                               if (is->is_rule == fr) {
1011                                         error = ipf_state_insert(softc, isn, 0);
1012                                         MUTEX_EXIT(&isn->is_lock);
1013                                         break;
1014                               }
1015 
1016                     if (is == NULL) {
1017                               KFREE(isn);
1018                               isn = NULL;
1019                     }
1020                     RWLOCK_EXIT(&softc->ipf_state);
1021 
1022                     if (isn == NULL) {
1023                               IPFERROR(100033);
1024                               error = ESRCH;
1025                     }
1026           }
1027 
1028           return error;
1029 }
1030 
1031 
1032 /* ------------------------------------------------------------------------ */
1033 /* Function:    ipf_state_insert                                            */
1034 /* Returns:     int    - 0 == success, -1 == failure                        */
1035 /* Parameters:  softc(I) - pointer to soft context main structure           */
1036 /* Parameters:  is(I)    - pointer to state structure                       */
1037 /*              rev(I) - flag indicating direction of packet                */
1038 /*                                                                          */
1039 /* Inserts a state structure into the hash table (for lookups) and the list */
1040 /* of state entries (for enumeration).  Resolves all of the interface names */
1041 /* to pointers and adjusts running stats for the hash table as appropriate. */
1042 /*                                                                          */
1043 /* This function can fail if the filter rule has had a population policy of */
1044 /* IP addresses used with stateful filtering assigned to it.                */
1045 /*                                                                          */
1046 /* Locking: it is assumed that some kind of lock on ipf_state is held.      */
1047 /*          Exits with is_lock initialised and held - *EVEN IF ERROR*.      */
1048 /* ------------------------------------------------------------------------ */
1049 int
ipf_state_insert(ipf_main_softc_t * softc,ipstate_t * is,int rev)1050 ipf_state_insert(ipf_main_softc_t *softc, ipstate_t *is, int rev)
1051 {
1052           ipf_state_softc_t *softs = softc->ipf_state_soft;
1053           frentry_t *fr;
1054           u_int hv;
1055           int i;
1056 
1057           /*
1058            * Look up all the interface names in the state entry.
1059            */
1060           for (i = 0; i < 4; i++) {
1061                     if (is->is_ifp[i] != NULL)
1062                               continue;
1063                     is->is_ifp[i] = ipf_resolvenic(softc, is->is_ifname[i],
1064                                                          is->is_v);
1065           }
1066 
1067           /*
1068            * If we could trust is_hv, then the modulus would not be needed,
1069            * but when running with IPFILTER_SYNC, this stops bad values.
1070            */
1071           hv = is->is_hv % softs->ipf_state_size;
1072           /* TRACE is, hv */
1073           is->is_hv = hv;
1074 
1075           /*
1076            * We need to get both of these locks...the first because it is
1077            * possible that once the insert is complete another packet might
1078            * come along, match the entry and want to update it.
1079            */
1080           MUTEX_INIT(&is->is_lock, "ipf state entry");
1081           MUTEX_ENTER(&is->is_lock);
1082           MUTEX_ENTER(&softs->ipf_stinsert);
1083 
1084           fr = is->is_rule;
1085           if (fr != NULL) {
1086                     if ((fr->fr_srctrack.ht_max_nodes != 0) &&
1087                         (ipf_ht_node_add(softc, &fr->fr_srctrack,
1088                                              is->is_family, &is->is_src) == -1)) {
1089                               SBUMPD(ipf_state_stats, iss_max_track);
1090                               MUTEX_EXIT(&softs->ipf_stinsert);
1091                               return -1;
1092                     }
1093 
1094                     MUTEX_ENTER(&fr->fr_lock);
1095                     fr->fr_ref++;
1096                     MUTEX_EXIT(&fr->fr_lock);
1097                     fr->fr_statecnt++;
1098           }
1099 
1100           if (is->is_flags & (SI_WILDP|SI_WILDA)) {
1101                     DT(iss_wild_plus_one);
1102                     SINCL(ipf_state_stats.iss_wild);
1103           }
1104 
1105           SBUMP(ipf_state_stats.iss_proto[is->is_p]);
1106           SBUMP(ipf_state_stats.iss_active_proto[is->is_p]);
1107 
1108           /*
1109            * add into list table.
1110            */
1111           if (softs->ipf_state_list != NULL)
1112                     softs->ipf_state_list->is_pnext = &is->is_next;
1113           is->is_pnext = &softs->ipf_state_list;
1114           is->is_next = softs->ipf_state_list;
1115           softs->ipf_state_list = is;
1116 
1117           if (softs->ipf_state_table[hv] != NULL)
1118                     softs->ipf_state_table[hv]->is_phnext = &is->is_hnext;
1119           else
1120                     softs->ipf_state_stats.iss_inuse++;
1121           is->is_phnext = softs->ipf_state_table + hv;
1122           is->is_hnext = softs->ipf_state_table[hv];
1123           softs->ipf_state_table[hv] = is;
1124           softs->ipf_state_stats.iss_bucketlen[hv]++;
1125           softs->ipf_state_stats.iss_active++;
1126           MUTEX_EXIT(&softs->ipf_stinsert);
1127 
1128           ipf_state_setqueue(softc, is, rev);
1129 
1130           return 0;
1131 }
1132 
1133 
1134 /* ------------------------------------------------------------------------ */
1135 /* Function:    ipf_state_matchipv4addrs                                    */
1136 /* Returns:     int - 2 addresses match (strong match), 1 reverse match,    */
1137 /*                    0 no match                                            */
1138 /* Parameters:  is1, is2 pointers to states we are checking                 */
1139 /*                                                                          */
1140 /* Function matches IPv4 addresses it returns strong match for ICMP proto   */
1141 /* even there is only reverse match                                         */
1142 /* ------------------------------------------------------------------------ */
1143 static int
ipf_state_matchipv4addrs(ipstate_t * is1,ipstate_t * is2)1144 ipf_state_matchipv4addrs(ipstate_t *is1, ipstate_t *is2)
1145 {
1146           int       rv;
1147 
1148           if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr)
1149                     rv = 2;
1150           else if (is1->is_saddr == is2->is_daddr &&
1151               is1->is_daddr == is2->is_saddr) {
1152                     /* force strong match for ICMP protocol */
1153                     rv = (is1->is_p == IPPROTO_ICMP) ? 2 : 1;
1154           }
1155           else
1156                     rv = 0;
1157 
1158           return (rv);
1159 }
1160 
1161 
1162 /* ------------------------------------------------------------------------ */
1163 /* Function:    ipf_state_matchipv6addrs                                    */
1164 /* Returns:     int - 2 addresses match (strong match), 1 reverse match,    */
1165 /*                    0 no match                                            */
1166 /* Parameters:  is1, is2 pointers to states we are checking                 */
1167 /*                                                                          */
1168 /* Function matches IPv6 addresses it returns strong match for ICMP proto   */
1169 /* even there is only reverse match                                         */
1170 /* ------------------------------------------------------------------------ */
1171 static int
ipf_state_matchipv6addrs(ipstate_t * is1,ipstate_t * is2)1172 ipf_state_matchipv6addrs(ipstate_t *is1, ipstate_t *is2)
1173 {
1174           int       rv;
1175 
1176           if (IP6_EQ(&is1->is_src, &is2->is_src) &&
1177               IP6_EQ(&is1->is_dst, &is2->is_dst))
1178                     rv = 2;
1179           else if (IP6_EQ(&is1->is_src, &is2->is_dst) &&
1180               IP6_EQ(&is1->is_dst, &is2->is_src)) {
1181                     /* force strong match for ICMPv6 protocol */
1182                     rv = (is1->is_p == IPPROTO_ICMPV6) ? 2 : 1;
1183           }
1184           else
1185                     rv = 0;
1186 
1187           return (rv);
1188 }
1189 
1190 
1191 /* ------------------------------------------------------------------------ */
1192 /* Function:    ipf_state_matchaddresses                                    */
1193 /* Returns:     int - 2 addresses match, 1 reverse match, zero no match     */
1194 /* Parameters:  is1, is2 pointers to states we are checking                 */
1195 /*                                                                          */
1196 /* function retruns true if two pairs of addresses belong to single         */
1197 /* connection. suppose there are two endpoints:                             */
1198 /*      endpoint1 1.1.1.1                                                   */
1199 /*      endpoint2 1.1.1.2                                                   */
1200 /*                                                                          */
1201 /* the state is established by packet flying from .1 to .2 so we see:       */
1202 /*      is1->src = 1.1.1.1                                                  */
1203 /*      is1->dst = 1.1.1.2                                                  */
1204 /* now endpoint 1.1.1.2 sends answer                                        */
1205 /* retreives is1 record created by first packat and compares it with is2    */
1206 /* temporal record, is2 is initialized as follows:                          */
1207 /*      is2->src = 1.1.1.2                                                  */
1208 /*      is2->dst = 1.1.1.1                                                  */
1209 /* in this case 1 will be returned                                          */
1210 /*                                                                          */
1211 /* the ipf_matchaddresses() assumes those two records to be same. of course */
1212 /* the ipf_matchaddresses() also assume records are same in case you pass   */
1213 /* identical arguments (i.e. ipf_matchaddress(is1, is1) would return 2      */
1214 /* ------------------------------------------------------------------------ */
1215 static int
ipf_state_matchaddresses(ipstate_t * is1,ipstate_t * is2)1216 ipf_state_matchaddresses(ipstate_t *is1, ipstate_t *is2)
1217 {
1218           int       rv;
1219 
1220           if (is1->is_v == 4) {
1221                     rv = ipf_state_matchipv4addrs(is1, is2);
1222           }
1223           else {
1224                     rv = ipf_state_matchipv6addrs(is1, is2);
1225           }
1226 
1227           return (rv);
1228 }
1229 
1230 
1231 /* ------------------------------------------------------------------------ */
1232 /* Function:    ipf_matchports                                              */
1233 /* Returns:     int - 2 match, 1 rverse match, 0 no match                   */
1234 /* Parameters:  ppairs1, ppairs - src, dst ports we want to match           */
1235 /*                                                                          */
1236 /* performs the same match for isps members as for addresses                */
1237 /* ------------------------------------------------------------------------ */
1238 static int
ipf_state_matchports(udpinfo_t * ppairs1,udpinfo_t * ppairs2)1239 ipf_state_matchports(udpinfo_t *ppairs1, udpinfo_t *ppairs2)
1240 {
1241           int       rv;
1242 
1243           if (ppairs1->us_sport == ppairs2->us_sport &&
1244               ppairs1->us_dport == ppairs2->us_dport)
1245                     rv = 2;
1246           else if (ppairs1->us_sport == ppairs2->us_dport &&
1247                         ppairs1->us_dport == ppairs2->us_sport)
1248                     rv = 1;
1249           else
1250                     rv = 0;
1251 
1252           return (rv);
1253 }
1254 
1255 
1256 /* ------------------------------------------------------------------------ */
1257 /* Function:    ipf_matchisps                                               */
1258 /* Returns:     int - nonzero if isps members match, 0 nomatch              */
1259 /* Parameters:  is1, is2 - states we want to match                          */
1260 /*                                                                          */
1261 /* performs the same match for isps members as for addresses                */
1262 /* ------------------------------------------------------------------------ */
1263 static int
ipf_state_matchisps(ipstate_t * is1,ipstate_t * is2)1264 ipf_state_matchisps(ipstate_t *is1, ipstate_t *is2)
1265 {
1266           int       rv;
1267 
1268           if (is1->is_p == is2->is_p) {
1269                     switch (is1->is_p)
1270                     {
1271                     case IPPROTO_TCP :
1272                     case IPPROTO_UDP :
1273                     case IPPROTO_GRE :
1274                               /* greinfo_t can be also interprted as port pair */
1275                               rv = ipf_state_matchports(&is1->is_ps.is_us,
1276                                                               &is2->is_ps.is_us);
1277                               break;
1278 
1279                     case IPPROTO_ICMP :
1280                     case IPPROTO_ICMPV6 :
1281                               /* force strong match for ICMP datagram. */
1282                               if (bcmp(&is1->is_ps, &is2->is_ps,
1283                                          sizeof(icmpinfo_t)) == 0)  {
1284                                         rv = 2;
1285                               } else {
1286                                         rv = 0;
1287                               }
1288                               break;
1289 
1290                     default:
1291                               rv = 0;
1292                     }
1293           } else {
1294                     rv = 0;
1295           }
1296 
1297           return (rv);
1298 }
1299 
1300 
1301 /* ------------------------------------------------------------------------ */
1302 /* Function:    ipf_state_match                                             */
1303 /* Returns:     int - nonzero match, zero no match                          */
1304 /* Parameters:  is1, is2 - states we want to match                          */
1305 /*                                                                          */
1306 /* ------------------------------------------------------------------------ */
1307 static int
ipf_state_match(ipstate_t * is1,ipstate_t * is2)1308 ipf_state_match(ipstate_t *is1, ipstate_t *is2)
1309 {
1310           int       rv;
1311           int       amatch;
1312           int       pomatch;
1313 
1314           if (bcmp(&is1->is_pass, &is2->is_pass,
1315                      offsetof(struct ipstate, is_authmsk) -
1316                      offsetof(struct ipstate, is_pass)) == 0) {
1317 
1318                     pomatch = ipf_state_matchisps(is1, is2);
1319                     amatch = ipf_state_matchaddresses(is1, is2);
1320                     rv = (amatch != 0) && (amatch == pomatch);
1321           } else {
1322                     rv = 0;
1323           }
1324 
1325           return (rv);
1326 }
1327 
1328 /* ------------------------------------------------------------------------ */
1329 /* Function:    ipf_state_add                                               */
1330 /* Returns:     ipstate_t - 0 = success                                     */
1331 /* Parameters:  softc(I)  - pointer to soft context main structure          */
1332 /*              fin(I)    - pointer to packet information                   */
1333 /*              stsave(O) - pointer to place to save pointer to created     */
1334 /*                          state structure.                                */
1335 /*              flags(I)  - flags to use when creating the structure        */
1336 /*                                                                          */
1337 /* Creates a new IP state structure from the packet information collected.  */
1338 /* Inserts it into the state table and appends to the bottom of the active  */
1339 /* list.  If the capacity of the table has reached the maximum allowed then */
1340 /* the call will fail and a flush is scheduled for the next timeout call.   */
1341 /*                                                                          */
1342 /* NOTE: The use of stsave to point to nat_state will result in memory      */
1343 /*       corruption.  It should only be used to point to objects that will  */
1344 /*       either outlive this (not expired) or will deref the ip_state_t     */
1345 /*       when they are deleted.                                             */
1346 /* ------------------------------------------------------------------------ */
1347 int
ipf_state_add(ipf_main_softc_t * softc,fr_info_t * fin,ipstate_t ** stsave,u_int flags)1348 ipf_state_add(ipf_main_softc_t *softc, fr_info_t *fin, ipstate_t **stsave,
1349     u_int flags)
1350 {
1351           ipf_state_softc_t *softs = softc->ipf_state_soft;
1352           ipstate_t *is, ips;
1353           struct icmp *ic;
1354           u_int pass, hv;
1355           frentry_t *fr;
1356           tcphdr_t *tcp;
1357           frdest_t *fdp;
1358           int out;
1359 
1360           /*
1361            * If a packet that was created locally is trying to go out but we
1362            * do not match here because of this lock, it is likely that
1363            * the policy will block it and return network unreachable back up
1364            * the stack. To mitigate this error, EAGAIN is returned instead,
1365            * telling the IP stack to try sending this packet again later.
1366            */
1367           if (softs->ipf_state_lock) {
1368                     SBUMPD(ipf_state_stats, iss_add_locked);
1369                     fin->fin_error = EAGAIN;
1370                     return -1;
1371           }
1372 
1373           if (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD)) {
1374                     SBUMPD(ipf_state_stats, iss_add_bad);
1375                     return -1;
1376           }
1377 
1378           if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) {
1379                     SBUMPD(ipf_state_stats, iss_add_oow);
1380                     return -1;
1381           }
1382 
1383           if ((softs->ipf_state_stats.iss_active * 100 / softs->ipf_state_max) >
1384               softs->ipf_state_wm_high) {
1385                     softs->ipf_state_doflush = 1;
1386           }
1387 
1388           /*
1389            * If a "keep state" rule has reached the maximum number of references
1390            * to it, then schedule an automatic flush in case we can clear out
1391            * some "dead old wood".  Note that because the lock isn't held on
1392            * fr it is possible that we could overflow.  The cost of overflowing
1393            * is being ignored here as the number by which it can overflow is
1394            * a product of the number of simultaneous threads that could be
1395            * executing in here, so a limit of 100 won't result in 200, but could
1396            * result in 101 or 102.
1397            */
1398           fr = fin->fin_fr;
1399           if (fr != NULL) {
1400                     if ((softs->ipf_state_stats.iss_active >=
1401                          softs->ipf_state_max) && (fr->fr_statemax == 0)) {
1402                               SBUMPD(ipf_state_stats, iss_max);
1403                               return 1;
1404                     }
1405                     if ((fr->fr_statemax != 0) &&
1406                         (fr->fr_statecnt >= fr->fr_statemax)) {
1407                               SBUMPD(ipf_state_stats, iss_max_ref);
1408                               return 2;
1409                     }
1410           }
1411 
1412           is = &ips;
1413           if (fr == NULL) {
1414                     pass = softc->ipf_flags;
1415                     is->is_tag = FR_NOLOGTAG;
1416           } else {
1417                     pass = fr->fr_flags;
1418           }
1419 
1420           ic = NULL;
1421           tcp = NULL;
1422           out = fin->fin_out;
1423           bzero((char *)is, sizeof(*is));
1424           is->is_die = 1 + softc->ipf_ticks;
1425           /*
1426            * We want to check everything that is a property of this packet,
1427            * but we don't (automatically) care about its fragment status as
1428            * this may change.
1429            */
1430           is->is_pass = pass;
1431           is->is_v = fin->fin_v;
1432           is->is_sec = fin->fin_secmsk;
1433           is->is_secmsk = 0xffff;
1434           is->is_auth = fin->fin_auth;
1435           is->is_authmsk = 0xffff;
1436           is->is_family = fin->fin_family;
1437           is->is_opt[0] = fin->fin_optmsk;
1438           is->is_optmsk[0] = 0xffffffff;
1439           if (is->is_v == 6) {
1440                     is->is_opt[0] &= ~0x8;
1441                     is->is_optmsk[0] &= ~0x8;
1442           }
1443 
1444           /*
1445            * Copy and calculate...
1446            */
1447           hv = (is->is_p = fin->fin_fi.fi_p);
1448           is->is_src = fin->fin_fi.fi_src;
1449           hv += is->is_saddr;
1450           is->is_dst = fin->fin_fi.fi_dst;
1451           hv += is->is_daddr;
1452 #ifdef    USE_INET6
1453           if (fin->fin_v == 6) {
1454                     /*
1455                      * For ICMPv6, we check to see if the destination address is
1456                      * a multicast address.  If it is, do not include it in the
1457                      * calculation of the hash because the correct reply will come
1458                      * back from a real address, not a multicast address.
1459                      */
1460                     if ((is->is_p == IPPROTO_ICMPV6) &&
1461                         IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) {
1462                               /*
1463                                * So you can do keep state with neighbour discovery.
1464                                *
1465                                * Here we could use the address from the neighbour
1466                                * solicit message to put in the state structure and
1467                                * we could use that without a wildcard flag too...
1468                                */
1469                               flags |= SI_W_DADDR;
1470                               hv -= is->is_daddr;
1471                     } else {
1472                               hv += is->is_dst.i6[1];
1473                               hv += is->is_dst.i6[2];
1474                               hv += is->is_dst.i6[3];
1475                     }
1476                     hv += is->is_src.i6[1];
1477                     hv += is->is_src.i6[2];
1478                     hv += is->is_src.i6[3];
1479           }
1480 #endif
1481           if ((fin->fin_v == 4) &&
1482               (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) {
1483                     flags |= SI_W_DADDR;
1484                     hv -= is->is_daddr;
1485           }
1486 
1487           switch (is->is_p)
1488           {
1489 #ifdef    USE_INET6
1490           case IPPROTO_ICMPV6 :
1491                     ic = fin->fin_dp;
1492 
1493                     switch (ic->icmp_type)
1494                     {
1495                     case ICMP6_ECHO_REQUEST :
1496                               hv += (is->is_icmp.ici_id = ic->icmp_id);
1497                               /*FALLTHROUGH*/
1498                     case ICMP6_MEMBERSHIP_QUERY :
1499                     case ND_ROUTER_SOLICIT :
1500                     case ND_NEIGHBOR_SOLICIT :
1501                     case ICMP6_NI_QUERY :
1502                               is->is_icmp.ici_type = ic->icmp_type;
1503                               break;
1504                     default :
1505                               SBUMPD(ipf_state_stats, iss_icmp6_notquery);
1506                               return -2;
1507                     }
1508                     break;
1509 #endif
1510           case IPPROTO_ICMP :
1511                     ic = fin->fin_dp;
1512 
1513                     switch (ic->icmp_type)
1514                     {
1515                     case ICMP_ECHO :
1516                     case ICMP_TSTAMP :
1517                     case ICMP_IREQ :
1518                     case ICMP_MASKREQ :
1519                               is->is_icmp.ici_type = ic->icmp_type;
1520                               hv += (is->is_icmp.ici_id = ic->icmp_id);
1521                               break;
1522                     default :
1523                               SBUMPD(ipf_state_stats, iss_icmp_notquery);
1524                               return -3;
1525                     }
1526                     break;
1527 
1528 #if 0
1529           case IPPROTO_GRE :
1530                     gre = fin->fin_dp;
1531 
1532                     is->is_gre.gs_flags = gre->gr_flags;
1533                     is->is_gre.gs_ptype = gre->gr_ptype;
1534                     if (GRE_REV(is->is_gre.gs_flags) == 1) {
1535                               is->is_call[0] = fin->fin_data[0];
1536                               is->is_call[1] = fin->fin_data[1];
1537                     }
1538                     break;
1539 #endif
1540 
1541           case IPPROTO_TCP :
1542                     tcp = fin->fin_dp;
1543 
1544                     if (tcp->th_flags & TH_RST) {
1545                               SBUMPD(ipf_state_stats, iss_tcp_rstadd);
1546                               return -4;
1547                     }
1548 
1549                     /* TRACE is, flags, hv */
1550 
1551                     /*
1552                      * The endian of the ports doesn't matter, but the ack and
1553                      * sequence numbers do as we do mathematics on them later.
1554                      */
1555                     is->is_sport = htons(fin->fin_data[0]);
1556                     is->is_dport = htons(fin->fin_data[1]);
1557                     if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
1558                               hv += is->is_sport;
1559                               hv += is->is_dport;
1560                     }
1561 
1562                     /* TRACE is, flags, hv */
1563 
1564                     /*
1565                      * If this is a real packet then initialise fields in the
1566                      * state information structure from the TCP header information.
1567                      */
1568 
1569                     is->is_maxdwin = 1;
1570                     is->is_maxswin = ntohs(tcp->th_win);
1571                     if (is->is_maxswin == 0)
1572                               is->is_maxswin = 1;
1573 
1574                     if ((fin->fin_flx & FI_IGNORE) == 0) {
1575                               is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen -
1576                                               (TCP_OFF(tcp) << 2) +
1577                                               ((tcp->th_flags & TH_SYN) ? 1 : 0) +
1578                                               ((tcp->th_flags & TH_FIN) ? 1 : 0);
1579                               is->is_maxsend = is->is_send;
1580 
1581                               /*
1582                                * Window scale option is only present in
1583                                * SYN/SYN-ACK packet.
1584                                */
1585                               if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) ==
1586                                   TH_SYN &&
1587                                   (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
1588                                         if (ipf_tcpoptions(softs, fin, tcp,
1589                                                         &is->is_tcp.ts_data[0]) == -1)
1590                                                   fin->fin_flx |= FI_BAD;
1591                               }
1592 
1593                               if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) {
1594                                         ipf_checknewisn(fin, is);
1595                                         ipf_fixoutisn(fin, is);
1596                               }
1597 
1598                               if ((tcp->th_flags & TH_OPENING) == TH_SYN)
1599                                         flags |= IS_TCPFSM;
1600                               else {
1601                                         is->is_maxdwin = is->is_maxswin * 2;
1602                                         is->is_dend = ntohl(tcp->th_ack);
1603                                         is->is_maxdend = ntohl(tcp->th_ack);
1604                                         is->is_maxdwin *= 2;
1605                               }
1606                     }
1607 
1608                     /*
1609                      * If we're creating state for a starting connection, start
1610                      * the timer on it as we'll never see an error if it fails
1611                      * to connect.
1612                      */
1613                     break;
1614 
1615           case IPPROTO_UDP :
1616                     tcp = fin->fin_dp;
1617 
1618                     is->is_sport = htons(fin->fin_data[0]);
1619                     is->is_dport = htons(fin->fin_data[1]);
1620                     if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
1621                               hv += tcp->th_dport;
1622                               hv += tcp->th_sport;
1623                     }
1624                     break;
1625 
1626           default :
1627                     break;
1628           }
1629           hv = DOUBLE_HASH(hv);
1630           is->is_hv = hv;
1631 
1632           /*
1633            * Look for identical state.
1634            */
1635           for (is = softs->ipf_state_table[hv % softs->ipf_state_size];
1636                is != NULL; is = is->is_hnext) {
1637                     if (ipf_state_match(&ips, is) == 1)
1638                               break;
1639           }
1640           if (is != NULL) {
1641                     SBUMPD(ipf_state_stats, iss_add_dup);
1642                     return 3;
1643           }
1644 
1645           if (softs->ipf_state_stats.iss_bucketlen[hv] >=
1646               softs->ipf_state_maxbucket) {
1647                     SBUMPD(ipf_state_stats, iss_bucket_full);
1648                     return 4;
1649           }
1650 
1651           /*
1652            * No existing state; create new
1653            */
1654           KMALLOC(is, ipstate_t *);
1655           if (is == NULL) {
1656                     SBUMPD(ipf_state_stats, iss_nomem);
1657                     return 5;
1658           }
1659           bcopy((char *)&ips, (char *)is, sizeof(*is));
1660           is->is_flags = flags & IS_INHERITED;
1661           is->is_rulen = fin->fin_rule;
1662           is->is_rule = fr;
1663 
1664           /*
1665            * Do not do the modulus here, it is done in ipf_state_insert().
1666            */
1667           if (fr != NULL) {
1668                     ipftq_t *tq;
1669 
1670                     (void) strncpy(is->is_group, FR_NAME(fr, fr_group),
1671                                      FR_GROUPLEN);
1672                     if (fr->fr_age[0] != 0) {
1673                               tq = ipf_addtimeoutqueue(softc,
1674                                                              &softs->ipf_state_usertq,
1675                                                              fr->fr_age[0]);
1676                               is->is_tqehead[0] = tq;
1677                               is->is_sti.tqe_flags |= TQE_RULEBASED;
1678                     }
1679                     if (fr->fr_age[1] != 0) {
1680                               tq = ipf_addtimeoutqueue(softc,
1681                                                              &softs->ipf_state_usertq,
1682                                                              fr->fr_age[1]);
1683                               is->is_tqehead[1] = tq;
1684                               is->is_sti.tqe_flags |= TQE_RULEBASED;
1685                     }
1686 
1687                     is->is_tag = fr->fr_logtag;
1688           }
1689 
1690           /*
1691            * It may seem strange to set is_ref to 2, but if stsave is not NULL
1692            * then a copy of the pointer is being stored somewhere else and in
1693            * the end, it will expect to be able to do something with it.
1694            */
1695           is->is_me = stsave;
1696           if (stsave != NULL) {
1697                     *stsave = is;
1698                     is->is_ref = 2;
1699           } else {
1700                     is->is_ref = 1;
1701           }
1702           is->is_pkts[0] = 0, is->is_bytes[0] = 0;
1703           is->is_pkts[1] = 0, is->is_bytes[1] = 0;
1704           is->is_pkts[2] = 0, is->is_bytes[2] = 0;
1705           is->is_pkts[3] = 0, is->is_bytes[3] = 0;
1706           if ((fin->fin_flx & FI_IGNORE) == 0) {
1707                     is->is_pkts[out] = 1;
1708                     fin->fin_pktnum = 1;
1709                     is->is_bytes[out] = fin->fin_plen;
1710                     is->is_flx[out][0] = fin->fin_flx & FI_CMP;
1711                     is->is_flx[out][0] &= ~FI_OOW;
1712           }
1713 
1714           if (pass & FR_STLOOSE)
1715                     is->is_flags |= IS_LOOSE;
1716 
1717           if (pass & FR_STSTRICT)
1718                     is->is_flags |= IS_STRICT;
1719 
1720           if (pass & FR_STATESYNC)
1721                     is->is_flags |= IS_STATESYNC;
1722 
1723           if (pass & FR_LOGFIRST)
1724                     is->is_pass &= ~(FR_LOGFIRST|FR_LOG);
1725 
1726           READ_ENTER(&softc->ipf_state);
1727 
1728           if (ipf_state_insert(softc, is, fin->fin_rev) == -1) {
1729                     RWLOCK_EXIT(&softc->ipf_state);
1730                     /*
1731                      * This is a bit more manual than it should be but
1732                      * ipf_state_del cannot be called.
1733                      */
1734                     MUTEX_EXIT(&is->is_lock);
1735                     MUTEX_DESTROY(&is->is_lock);
1736                     if (is->is_tqehead[0] != NULL) {
1737                               if (ipf_deletetimeoutqueue(is->is_tqehead[0]) == 0)
1738                                         ipf_freetimeoutqueue(softc, is->is_tqehead[0]);
1739                               is->is_tqehead[0] = NULL;
1740                     }
1741                     if (is->is_tqehead[1] != NULL) {
1742                               if (ipf_deletetimeoutqueue(is->is_tqehead[1]) == 0)
1743                                         ipf_freetimeoutqueue(softc, is->is_tqehead[1]);
1744                               is->is_tqehead[1] = NULL;
1745                     }
1746                     KFREE(is);
1747                     return -1;
1748           }
1749 
1750           /*
1751            * Filling in the interface name is after the insert so that an
1752            * event (such as add/delete) of an interface that is referenced
1753            * by this rule will see this state entry.
1754            */
1755           if (fr != NULL) {
1756                     /*
1757                      * The name '-' is special for network interfaces and causes
1758                      * a NULL name to be present, always, allowing packets to
1759                      * match it, regardless of their interface.
1760                      */
1761                     if ((fin->fin_ifp == NULL) ||
1762                         (fr->fr_ifnames[out << 1] != -1 &&
1763                          fr->fr_names[fr->fr_ifnames[out << 1] + 0] == '-' &&
1764                          fr->fr_names[fr->fr_ifnames[out << 1] + 1] == '\0')) {
1765                               is->is_ifp[out << 1] = fr->fr_ifas[0];
1766                               strncpy(is->is_ifname[out << 1],
1767                                         fr->fr_names + fr->fr_ifnames[0],
1768                                         sizeof(fr->fr_ifnames[0]));
1769                     } else {
1770                               is->is_ifp[out << 1] = fin->fin_ifp;
1771                               COPYIFNAME(fin->fin_v, fin->fin_ifp,
1772                                            is->is_ifname[out << 1]);
1773                     }
1774 
1775                     is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1];
1776                     if (fr->fr_ifnames[1] != -1) {
1777                               strncpy(is->is_ifname[(out << 1) + 1],
1778                                         fr->fr_names + fr->fr_ifnames[1],
1779                                         sizeof(fr->fr_ifnames[1]));
1780                     }
1781 
1782                     is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2];
1783                     if (fr->fr_ifnames[2] != -1) {
1784                               strncpy(is->is_ifname[((1 - out) << 1)],
1785                                         fr->fr_names + fr->fr_ifnames[2],
1786                                         sizeof(fr->fr_ifnames[2]));
1787                     }
1788 
1789                     is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3];
1790                     if (fr->fr_ifnames[3] != -1) {
1791                               strncpy(is->is_ifname[((1 - out) << 1) + 1],
1792                                         fr->fr_names + fr->fr_ifnames[3],
1793                                         sizeof(fr->fr_ifnames[3]));
1794                     }
1795           } else {
1796                     if (fin->fin_ifp != NULL) {
1797                               is->is_ifp[out << 1] = fin->fin_ifp;
1798                               COPYIFNAME(fin->fin_v, fin->fin_ifp,
1799                                            is->is_ifname[out << 1]);
1800                     }
1801           }
1802 
1803           if (fin->fin_p == IPPROTO_TCP) {
1804                     /*
1805                     * If we're creating state for a starting connection, start the
1806                     * timer on it as we'll never see an error if it fails to
1807                     * connect.
1808                     */
1809                     (void) ipf_tcp_age(&is->is_sti, fin, softs->ipf_state_tcptq,
1810                                            is->is_flags, 2);
1811           }
1812           MUTEX_EXIT(&is->is_lock);
1813           if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0))
1814                     is->is_sync = ipf_sync_new(softc, SMC_STATE, fin, is);
1815           if (softs->ipf_state_logging)
1816                     ipf_state_log(softc, is, ISL_NEW);
1817 
1818           RWLOCK_EXIT(&softc->ipf_state);
1819 
1820           fin->fin_flx |= FI_STATE;
1821           if (fin->fin_flx & FI_FRAG)
1822                     (void) ipf_frag_new(softc, fin, pass);
1823 
1824           fdp = &fr->fr_tifs[0];
1825           if (fdp->fd_type == FRD_DSTLIST) {
1826                     ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL,
1827                                                   &is->is_tifs[0]);
1828           } else {
1829                     bcopy(fdp, &is->is_tifs[0], sizeof(*fdp));
1830           }
1831 
1832           fdp = &fr->fr_tifs[1];
1833           if (fdp->fd_type == FRD_DSTLIST) {
1834                     ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL,
1835                                                   &is->is_tifs[1]);
1836           } else {
1837                     bcopy(fdp, &is->is_tifs[1], sizeof(*fdp));
1838           }
1839           fin->fin_tif = &is->is_tifs[fin->fin_rev];
1840 
1841           fdp = &fr->fr_dif;
1842           if (fdp->fd_type == FRD_DSTLIST) {
1843                     ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL,
1844                                                   &is->is_dif);
1845           } else {
1846                     bcopy(fdp, &is->is_dif, sizeof(*fdp));
1847           }
1848           fin->fin_dif = &is->is_dif;
1849 
1850           return 0;
1851 }
1852 
1853 
1854 /* ------------------------------------------------------------------------ */
1855 /* Function:    ipf_tcpoptions                                              */
1856 /* Returns:     int - 1 == packet matches state entry, 0 == it does not,    */
1857 /*                   -1 == packet has bad TCP options data                  */
1858 /* Parameters:  softs(I) - pointer to state context structure               */
1859 /*              fin(I) - pointer to packet information                      */
1860 /*              tcp(I) - pointer to TCP packet header                       */
1861 /*              td(I)  - pointer to TCP data held as part of the state      */
1862 /*                                                                          */
1863 /* Look after the TCP header for any options and deal with those that are   */
1864 /* present.  Record details about those that we recogise.                   */
1865 /* ------------------------------------------------------------------------ */
1866 static int
ipf_tcpoptions(ipf_state_softc_t * softs,fr_info_t * fin,tcphdr_t * tcp,tcpdata_t * td)1867 ipf_tcpoptions(ipf_state_softc_t *softs, fr_info_t *fin, tcphdr_t *tcp,
1868     tcpdata_t *td)
1869 {
1870           int off, mlen, ol, i, len, retval;
1871           char buf[64], *s, opt;
1872           mb_t *m = NULL;
1873 
1874           len = (TCP_OFF(tcp) << 2);
1875           if (fin->fin_dlen < len) {
1876                     SBUMPD(ipf_state_stats, iss_tcp_toosmall);
1877                     return 0;
1878           }
1879           len -= sizeof(*tcp);
1880 
1881           off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff;
1882 
1883           m = fin->fin_m;
1884           mlen = MSGDSIZE(m) - off;
1885           if (len > mlen) {
1886                     len = mlen;
1887                     retval = 0;
1888           } else {
1889                     retval = 1;
1890           }
1891 
1892           COPYDATA(m, off, len, buf);
1893 
1894           for (s = buf; len > 0; ) {
1895                     opt = *s;
1896                     if (opt == TCPOPT_EOL)
1897                               break;
1898                     else if (opt == TCPOPT_NOP)
1899                               ol = 1;
1900                     else {
1901                               if (len < 2)
1902                                         break;
1903                               ol = (int)*(s + 1);
1904                               if (ol < 2 || ol > len)
1905                                         break;
1906 
1907                               /*
1908                                * Extract the TCP options we are interested in out of
1909                                * the header and store them in the tcpdata struct.
1910                                */
1911                               switch (opt)
1912                               {
1913                               case TCPOPT_WINDOW :
1914                                         if (ol == TCPOLEN_WINDOW) {
1915                                                   i = (int)*(s + 2);
1916                                                   if (i > TCP_WSCALE_MAX)
1917                                                             i = TCP_WSCALE_MAX;
1918                                                   else if (i < 0)
1919                                                             i = 0;
1920                                                   td->td_winscale = i;
1921                                                   td->td_winflags |= TCP_WSCALE_SEEN|
1922                                                                          TCP_WSCALE_FIRST;
1923                                         } else
1924                                                   retval = -1;
1925                                         break;
1926                               case TCPOPT_MAXSEG :
1927                                         /*
1928                                          * So, if we wanted to set the TCP MAXSEG,
1929                                          * it should be done here...
1930                                          */
1931                                         if (ol == TCPOLEN_MAXSEG) {
1932                                                   i = (int)*(s + 2);
1933                                                   i <<= 8;
1934                                                   i += (int)*(s + 3);
1935                                                   td->td_maxseg = i;
1936                                         } else
1937                                                   retval = -1;
1938                                         break;
1939                               case TCPOPT_SACK_PERMITTED :
1940                                         if (ol == TCPOLEN_SACK_PERMITTED)
1941                                                   td->td_winflags |= TCP_SACK_PERMIT;
1942                                         else
1943                                                   retval = -1;
1944                                         break;
1945                               }
1946                     }
1947                     len -= ol;
1948                     s += ol;
1949           }
1950           if (retval == -1) {
1951                     SBUMPD(ipf_state_stats, iss_tcp_badopt);
1952           }
1953           return retval;
1954 }
1955 
1956 
1957 /* ------------------------------------------------------------------------ */
1958 /* Function:    ipf_state_tcp                                               */
1959 /* Returns:     int - 1 == packet matches state entry, 0 == it does not     */
1960 /* Parameters:  softc(I)  - pointer to soft context main structure          */
1961 /*              softs(I) - pointer to state context structure               */
1962 /*              fin(I)   - pointer to packet information                    */
1963 /*              tcp(I)   - pointer to TCP packet header                     */
1964 /*              is(I)  - pointer to master state structure                  */
1965 /*                                                                          */
1966 /* Check to see if a packet with TCP headers fits within the TCP window.    */
1967 /* Change timeout depending on whether new packet is a SYN-ACK returning    */
1968 /* for a SYN or a RST or FIN which indicate time to close up shop.          */
1969 /* ------------------------------------------------------------------------ */
1970 static int
ipf_state_tcp(ipf_main_softc_t * softc,ipf_state_softc_t * softs,fr_info_t * fin,tcphdr_t * tcp,ipstate_t * is)1971 ipf_state_tcp(ipf_main_softc_t *softc, ipf_state_softc_t *softs, fr_info_t *fin,
1972     tcphdr_t *tcp, ipstate_t *is)
1973 {
1974           tcpdata_t  *fdata, *tdata;
1975           int source, ret, flags;
1976 
1977           source = !fin->fin_rev;
1978           if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) &&
1979               (ntohs(is->is_sport) != fin->fin_data[0]))
1980                     source = 0;
1981           fdata = &is->is_tcp.ts_data[!source];
1982           tdata = &is->is_tcp.ts_data[source];
1983 
1984           MUTEX_ENTER(&is->is_lock);
1985 
1986           /*
1987            * If a SYN packet is received for a connection that is on the way out
1988            * but hasn't yet departed then advance this session along the way.
1989            */
1990           if ((tcp->th_flags & TH_OPENING) == TH_SYN) {
1991                     if ((is->is_state[0] > IPF_TCPS_ESTABLISHED) &&
1992                         (is->is_state[1] > IPF_TCPS_ESTABLISHED)) {
1993                               is->is_state[!source] = IPF_TCPS_CLOSED;
1994                               ipf_movequeue(softc->ipf_ticks, &is->is_sti,
1995                                               is->is_sti.tqe_ifq,
1996                                               &softs->ipf_state_deletetq);
1997                               MUTEX_EXIT(&is->is_lock);
1998                               DT1(iss_tcp_closing, ipstate_t *, is);
1999                               SBUMP(ipf_state_stats.iss_tcp_closing);
2000                               return 0;
2001                     }
2002           }
2003 
2004           if (is->is_flags & IS_LOOSE)
2005                     ret = 1;
2006           else
2007                     ret = ipf_state_tcpinwindow(fin, fdata, tdata, tcp,
2008                                                       is->is_flags);
2009           if (ret > 0) {
2010                     /*
2011                      * Nearing end of connection, start timeout.
2012                      */
2013                     ret = ipf_tcp_age(&is->is_sti, fin, softs->ipf_state_tcptq,
2014                                           is->is_flags, ret);
2015                     if (ret == 0) {
2016                               MUTEX_EXIT(&is->is_lock);
2017                               DT2(iss_tcp_fsm, fr_info_t *, fin, ipstate_t *, is);
2018                               SBUMP(ipf_state_stats.iss_tcp_fsm);
2019                               return 0;
2020                     }
2021 
2022                     if (softs->ipf_state_logging > 4)
2023                               ipf_state_log(softc, is, ISL_STATECHANGE);
2024 
2025                     /*
2026                      * set s0's as appropriate.  Use syn-ack packet as it
2027                      * contains both pieces of required information.
2028                      */
2029                     /*
2030                      * Window scale option is only present in SYN/SYN-ACK packet.
2031                      * Compare with ~TH_FIN to mask out T/TCP setups.
2032                      */
2033                     flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL);
2034                     if (flags == (TH_SYN|TH_ACK)) {
2035                               is->is_s0[source] = ntohl(tcp->th_ack);
2036                               is->is_s0[!source] = ntohl(tcp->th_seq) + 1;
2037                               if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
2038                                         if (ipf_tcpoptions(softs, fin, tcp,
2039                                                                fdata) == -1)
2040                                                   fin->fin_flx |= FI_BAD;
2041                               }
2042                               if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
2043                                         ipf_checknewisn(fin, is);
2044                     } else if (flags == TH_SYN) {
2045                               is->is_s0[source] = ntohl(tcp->th_seq) + 1;
2046                               if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
2047                                         if (ipf_tcpoptions(softs, fin, tcp,
2048                                                                fdata) == -1)
2049                                                   fin->fin_flx |= FI_BAD;
2050                               }
2051 
2052                               if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
2053                                         ipf_checknewisn(fin, is);
2054 
2055                     }
2056                     ret = 1;
2057           } else {
2058                     DT2(iss_tcp_oow, fr_info_t *, fin, ipstate_t *, is);
2059                     SBUMP(ipf_state_stats.iss_tcp_oow);
2060                     ret = 0;
2061           }
2062           MUTEX_EXIT(&is->is_lock);
2063           return ret;
2064 }
2065 
2066 
2067 /* ------------------------------------------------------------------------ */
2068 /* Function:    ipf_checknewisn                                             */
2069 /* Returns:     Nil                                                         */
2070 /* Parameters:  fin(I)   - pointer to packet information                    */
2071 /*              is(I)  - pointer to master state structure                  */
2072 /*                                                                          */
2073 /* Check to see if this TCP connection is expecting and needs a new         */
2074 /* sequence number for a particular direction of the connection.            */
2075 /*                                                                          */
2076 /* NOTE: This does not actually change the sequence numbers, only gets new  */
2077 /* one ready.                                                               */
2078 /* ------------------------------------------------------------------------ */
2079 static void
ipf_checknewisn(fr_info_t * fin,ipstate_t * is)2080 ipf_checknewisn(fr_info_t *fin, ipstate_t *is)
2081 {
2082           u_32_t sumd, old, new;
2083           tcphdr_t *tcp;
2084           int i;
2085 
2086           i = fin->fin_rev;
2087           tcp = fin->fin_dp;
2088 
2089           if (((i == 0) && !(is->is_flags & IS_ISNSYN)) ||
2090               ((i == 1) && !(is->is_flags & IS_ISNACK))) {
2091                     old = ntohl(tcp->th_seq);
2092                     new = ipf_newisn(fin);
2093                     is->is_isninc[i] = new - old;
2094                     CALC_SUMD(old, new, sumd);
2095                     is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16);
2096 
2097                     is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK);
2098           }
2099 }
2100 
2101 
2102 /* ------------------------------------------------------------------------ */
2103 /* Function:    ipf_state_tcpinwindow                                       */
2104 /* Returns:     int - 1 == packet inside TCP "window", 0 == not inside.     */
2105 /* Parameters:  fin(I)   - pointer to packet information                    */
2106 /*              fdata(I) - pointer to tcp state informatio (forward)        */
2107 /*              tdata(I) - pointer to tcp state informatio (reverse)        */
2108 /*              tcp(I)   - pointer to TCP packet header                     */
2109 /*                                                                          */
2110 /* Given a packet has matched addresses and ports, check to see if it is    */
2111 /* within the TCP data window.  In a show of generosity, allow packets that */
2112 /* are within the window space behind the current sequence # as well.       */
2113 /* ------------------------------------------------------------------------ */
2114 static int
ipf_state_tcpinwindow(fr_info_t * fin,tcpdata_t * fdata,tcpdata_t * tdata,tcphdr_t * tcp,int flags)2115 ipf_state_tcpinwindow(fr_info_t *fin, tcpdata_t  *fdata, tcpdata_t *tdata,
2116     tcphdr_t *tcp, int flags)
2117 {
2118           ipf_main_softc_t *softc = fin->fin_main_soft;
2119           ipf_state_softc_t *softs = softc->ipf_state_soft;
2120           tcp_seq seq, ack, end;
2121           int ackskew, tcpflags;
2122           u_32_t win, maxwin;
2123           int dsize, inseq;
2124 
2125           /*
2126            * Find difference between last checked packet and this packet.
2127            */
2128           tcpflags = tcp->th_flags;
2129           seq = ntohl(tcp->th_seq);
2130           ack = ntohl(tcp->th_ack);
2131           if (tcpflags & TH_SYN)
2132                     win = ntohs(tcp->th_win);
2133           else
2134                     win = ntohs(tcp->th_win) << fdata->td_winscale;
2135 
2136           /*
2137            * A window of 0 produces undesirable behaviour from this function.
2138            */
2139           if (win == 0)
2140                     win = 1;
2141 
2142           dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
2143                   ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0);
2144 
2145           /*
2146            * if window scaling is present, the scaling is only allowed
2147            * for windows not in the first SYN packet. In that packet the
2148            * window is 65535 to specify the largest window possible
2149            * for receivers not implementing the window scale option.
2150            * Currently, we do not assume TTCP here. That means that
2151            * if we see a second packet from a host (after the initial
2152            * SYN), we can assume that the receiver of the SYN did
2153            * already send back the SYN/ACK (and thus that we know if
2154            * the receiver also does window scaling)
2155            */
2156           if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) {
2157                     fdata->td_winflags &= ~TCP_WSCALE_FIRST;
2158                     fdata->td_maxwin = win;
2159           }
2160 
2161           end = seq + dsize;
2162 
2163           if ((fdata->td_end == 0) &&
2164               (!(flags & IS_TCPFSM) ||
2165                ((tcpflags & TH_OPENING) == TH_OPENING))) {
2166                     /*
2167                      * Must be a (outgoing) SYN-ACK in reply to a SYN.
2168                      */
2169                     fdata->td_end = end - 1;
2170                     fdata->td_maxwin = 1;
2171                     fdata->td_maxend = end + win;
2172           }
2173 
2174           if (!(tcpflags & TH_ACK)) {  /* Pretend an ack was sent */
2175                     ack = tdata->td_end;
2176           } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) &&
2177                        (ack == 0)) {
2178                     /* gross hack to get around certain broken tcp stacks */
2179                     ack = tdata->td_end;
2180           }
2181 
2182           maxwin = tdata->td_maxwin;
2183           ackskew = tdata->td_end - ack;
2184 
2185           /*
2186            * Strict sequencing only allows in-order delivery.
2187            */
2188           if ((flags & IS_STRICT) != 0) {
2189                     if (seq != fdata->td_end) {
2190                               DT2(iss_tcp_struct, tcpdata_t *, fdata, int, seq);
2191                               SBUMP(ipf_state_stats.iss_tcp_strict);
2192                               fin->fin_flx |= FI_OOW;
2193                               return 0;
2194                     }
2195           }
2196 
2197 #define   SEQ_GE(a,b)         ((int)((a) - (b)) >= 0)
2198 #define   SEQ_GT(a,b)         ((int)((a) - (b)) > 0)
2199           inseq = 0;
2200           if ((SEQ_GE(fdata->td_maxend, end)) &&
2201               (SEQ_GE(seq, fdata->td_end - maxwin)) &&
2202 /* XXX what about big packets */
2203 #define MAXACKWINDOW 66000
2204               (-ackskew <= (MAXACKWINDOW)) &&
2205               ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) {
2206                     inseq = 1;
2207           /*
2208            * Microsoft Windows will send the next packet to the right of the
2209            * window if SACK is in use.
2210            */
2211           } else if ((seq == fdata->td_maxend) && (ackskew == 0) &&
2212               (fdata->td_winflags & TCP_SACK_PERMIT) &&
2213               (tdata->td_winflags & TCP_SACK_PERMIT)) {
2214                     DT2(iss_sinsack, tcpdata_t *, fdata, int, seq);
2215                     SBUMP(ipf_state_stats.iss_winsack);
2216                     inseq = 1;
2217           /*
2218            * Sometimes a TCP RST will be generated with only the ACK field
2219            * set to non-zero.
2220            */
2221           } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) &&
2222                        (ackskew >= -1) && (ackskew <= 1)) {
2223                     inseq = 1;
2224           } else if (!(flags & IS_TCPFSM)) {
2225 #if 0
2226                     int i;
2227 
2228                     i = (fin->fin_rev << 1) + fin->fin_out;
2229 
2230                     if (is_pkts[i]0 == 0) {
2231                               /*
2232                                * Picking up a connection in the middle, the "next"
2233                                * packet seen from a direction that is new should be
2234                                * accepted, even if it appears out of sequence.
2235                                */
2236                               inseq = 1;
2237                     } else
2238 #endif
2239                     if (!(fdata->td_winflags &
2240                                   (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) {
2241                               /*
2242                                * No TCPFSM and no window scaling, so make some
2243                                * extra guesses.
2244                                */
2245                               if ((seq == fdata->td_maxend) && (ackskew == 0))
2246                                         inseq = 1;
2247                               else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin))
2248                                         inseq = 1;
2249                     }
2250           }
2251 
2252           /* TRACE(inseq, fdata, tdata, seq, end, ack, ackskew, win, maxwin) */
2253 
2254           if (inseq) {
2255                     /* if ackskew < 0 then this should be due to fragmented
2256                      * packets. There is no way to know the length of the
2257                      * total packet in advance.
2258                      * We do know the total length from the fragment cache though.
2259                      * Note however that there might be more sessions with
2260                      * exactly the same source and destination parameters in the
2261                      * state cache (and source and destination is the only stuff
2262                      * that is saved in the fragment cache). Note further that
2263                      * some TCP connections in the state cache are hashed with
2264                      * sport and dport as well which makes it not worthwhile to
2265                      * look for them.
2266                      * Thus, when ackskew is negative but still seems to belong
2267                      * to this session, we bump up the destinations end value.
2268                      */
2269                     if (ackskew < 0)
2270                               tdata->td_end = ack;
2271 
2272                     /* update max window seen */
2273                     if (fdata->td_maxwin < win)
2274                               fdata->td_maxwin = win;
2275                     if (SEQ_GT(end, fdata->td_end))
2276                               fdata->td_end = end;
2277                     if (SEQ_GE(ack + win, tdata->td_maxend))
2278                               tdata->td_maxend = ack + win;
2279                     return 1;
2280           }
2281           SBUMP(ipf_state_stats.iss_oow);
2282           fin->fin_flx |= FI_OOW;
2283           return 0;
2284 }
2285 
2286 
2287 /* ------------------------------------------------------------------------ */
2288 /* Function:    ipf_state_clone                                             */
2289 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
2290 /*                           else pointer to new state structure            */
2291 /* Parameters:  fin(I) - pointer to packet information                      */
2292 /*              tcp(I) - pointer to TCP/UDP header                          */
2293 /*              is(I)  - pointer to master state structure                  */
2294 /*                                                                          */
2295 /* Create a "duplcate" state table entry from the master.                   */
2296 /* ------------------------------------------------------------------------ */
2297 static ipstate_t *
ipf_state_clone(fr_info_t * fin,tcphdr_t * tcp,ipstate_t * is)2298 ipf_state_clone(fr_info_t *fin, tcphdr_t *tcp, ipstate_t *is)
2299 {
2300           ipf_main_softc_t *softc = fin->fin_main_soft;
2301           ipf_state_softc_t *softs = softc->ipf_state_soft;
2302           ipstate_t *clone;
2303           u_32_t send;
2304 
2305           if (softs->ipf_state_stats.iss_active == softs->ipf_state_max) {
2306                     SBUMPD(ipf_state_stats, iss_max);
2307                     softs->ipf_state_doflush = 1;
2308                     return NULL;
2309           }
2310           KMALLOC(clone, ipstate_t *);
2311           if (clone == NULL) {
2312                     SBUMPD(ipf_state_stats, iss_clone_nomem);
2313                     return NULL;
2314           }
2315           bcopy((char *)is, (char *)clone, sizeof(*clone));
2316 
2317           MUTEX_NUKE(&clone->is_lock);
2318           /*
2319            * It has not yet been placed on any timeout queue, so make sure
2320            * all of that data is zero'd out.
2321            */
2322           clone->is_sti.tqe_pnext = NULL;
2323           clone->is_sti.tqe_next = NULL;
2324           clone->is_sti.tqe_ifq = NULL;
2325           clone->is_sti.tqe_parent = clone;
2326 
2327           clone->is_die = ONE_DAY + softc->ipf_ticks;
2328           clone->is_state[0] = 0;
2329           clone->is_state[1] = 0;
2330           send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) +
2331                     ((tcp->th_flags & TH_SYN) ? 1 : 0) +
2332                     ((tcp->th_flags & TH_FIN) ? 1 : 0);
2333 
2334           if (fin->fin_rev == 1) {
2335                     clone->is_dend = send;
2336                     clone->is_maxdend = send;
2337                     clone->is_send = 0;
2338                     clone->is_maxswin = 1;
2339                     clone->is_maxdwin = ntohs(tcp->th_win);
2340                     if (clone->is_maxdwin == 0)
2341                               clone->is_maxdwin = 1;
2342           } else {
2343                     clone->is_send = send;
2344                     clone->is_maxsend = send;
2345                     clone->is_dend = 0;
2346                     clone->is_maxdwin = 1;
2347                     clone->is_maxswin = ntohs(tcp->th_win);
2348                     if (clone->is_maxswin == 0)
2349                               clone->is_maxswin = 1;
2350           }
2351 
2352           clone->is_flags &= ~SI_CLONE;
2353           clone->is_flags |= SI_CLONED;
2354           if (ipf_state_insert(softc, clone, fin->fin_rev) == -1) {
2355                     KFREE(clone);
2356                     return NULL;
2357           }
2358 
2359           clone->is_ref = 1;
2360           if (clone->is_p == IPPROTO_TCP) {
2361                     (void) ipf_tcp_age(&clone->is_sti, fin, softs->ipf_state_tcptq,
2362                                            clone->is_flags, 2);
2363           }
2364           MUTEX_EXIT(&clone->is_lock);
2365           if (is->is_flags & IS_STATESYNC)
2366                     clone->is_sync = ipf_sync_new(softc, SMC_STATE, fin, clone);
2367           DT2(iss_clone, ipstate_t *, is, ipstate_t *, clone);
2368           SBUMP(ipf_state_stats.iss_cloned);
2369           return clone;
2370 }
2371 
2372 
2373 /* ------------------------------------------------------------------------ */
2374 /* Function:    ipf_matchsrcdst                                             */
2375 /* Returns:     Nil                                                         */
2376 /* Parameters:  fin(I)   - pointer to packet information                    */
2377 /*              is(I)    - pointer to state structure                       */
2378 /*              src(I)   - pointer to source address                        */
2379 /*              dst(I)   - pointer to destination address                   */
2380 /*              tcp(I)   - pointer to TCP/UDP header                        */
2381 /*              cmask(I) - mask of FI_* bits to check                       */
2382 /*                                                                          */
2383 /* Match a state table entry against an IP packet.  The logic below is that */
2384 /* ret gets set to one if the match succeeds, else remains 0.  If it is     */
2385 /* still 0 after the test. no match.                                        */
2386 /* ------------------------------------------------------------------------ */
2387 static ipstate_t *
ipf_matchsrcdst(fr_info_t * fin,ipstate_t * is,i6addr_t * src,i6addr_t * dst,tcphdr_t * tcp,u_32_t cmask)2388 ipf_matchsrcdst(fr_info_t *fin, ipstate_t *is, i6addr_t *src, i6addr_t *dst,
2389     tcphdr_t *tcp, u_32_t cmask)
2390 {
2391           ipf_main_softc_t *softc = fin->fin_main_soft;
2392           ipf_state_softc_t *softs = softc->ipf_state_soft;
2393           int ret = 0, rev, out, flags, flx = 0, idx;
2394           u_short sp, dp;
2395           u_32_t cflx;
2396           void *ifp;
2397 
2398           /*
2399            * If a connection is about to be deleted, no packets
2400            * are allowed to match it.
2401            */
2402           if (is->is_sti.tqe_ifq == &softs->ipf_state_deletetq)
2403                     return NULL;
2404 
2405           rev = IP6_NEQ(&is->is_dst, dst);
2406           ifp = fin->fin_ifp;
2407           out = fin->fin_out;
2408           flags = is->is_flags;
2409           sp = 0;
2410           dp = 0;
2411 
2412           if (tcp != NULL) {
2413                     sp = htons(fin->fin_sport);
2414                     dp = htons(fin->fin_dport);
2415           }
2416           if (!rev) {
2417                     if (tcp != NULL) {
2418                               if (!(flags & SI_W_SPORT) && (sp != is->is_sport))
2419                                         rev = 1;
2420                               else if (!(flags & SI_W_DPORT) && (dp != is->is_dport))
2421                                         rev = 1;
2422                     }
2423           }
2424 
2425           idx = (out << 1) + rev;
2426 
2427           /*
2428            * If the interface for this 'direction' is set, make sure it matches.
2429            * An interface name that is not set matches any, as does a name of *.
2430            */
2431           if ((is->is_ifp[idx] == ifp) || (is->is_ifp[idx] == NULL &&
2432               (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '-' ||
2433                *is->is_ifname[idx] == '*')))
2434                     ret = 1;
2435 
2436           if (ret == 0) {
2437                     DT2(iss_lookup_badifp, fr_info_t *, fin, ipstate_t *, is);
2438                     SBUMP(ipf_state_stats.iss_lookup_badifp);
2439                     /* TRACE is, out, rev, idx */
2440                     return NULL;
2441           }
2442           ret = 0;
2443 
2444           /*
2445            * Match addresses and ports.
2446            */
2447           if (rev == 0) {
2448                     if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) &&
2449                         (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) {
2450                               if (tcp) {
2451                                         if ((sp == is->is_sport || flags & SI_W_SPORT)
2452                                             &&
2453                                             (dp == is->is_dport || flags & SI_W_DPORT))
2454                                                   ret = 1;
2455                               } else {
2456                                         ret = 1;
2457                               }
2458                     }
2459           } else {
2460                     if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) &&
2461                         (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) {
2462                               if (tcp) {
2463                                         if ((dp == is->is_sport || flags & SI_W_SPORT)
2464                                             &&
2465                                             (sp == is->is_dport || flags & SI_W_DPORT))
2466                                                   ret = 1;
2467                               } else {
2468                                         ret = 1;
2469                               }
2470                     }
2471           }
2472 
2473           if (ret == 0) {
2474                     SBUMP(ipf_state_stats.iss_lookup_badport);
2475                     DT2(iss_lookup_badport, fr_info_t *, fin, ipstate_t *, is);
2476                     /* TRACE rev, is, sp, dp, src, dst */
2477                     return NULL;
2478           }
2479 
2480           /*
2481            * Whether or not this should be here, is questionable, but the aim
2482            * is to get this out of the main line.
2483            */
2484           if (tcp == NULL)
2485                     flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED);
2486 
2487           /*
2488            * Only one of the source or destination address can be flaged as a
2489            * wildcard.  Fill in the missing address, if set.
2490            * For IPv6, if the address being copied in is multicast, then
2491            * don't reset the wild flag - multicast causes it to be set in the
2492            * first place!
2493            */
2494           if ((flags & (SI_W_SADDR|SI_W_DADDR))) {
2495                     fr_ip_t *fi = &fin->fin_fi;
2496 
2497                     if ((flags & SI_W_SADDR) != 0) {
2498                               if (rev == 0) {
2499                                         is->is_src = fi->fi_src;
2500                                         is->is_flags &= ~SI_W_SADDR;
2501                               } else {
2502                                         if (!(fin->fin_flx & (FI_MULTICAST|FI_MBCAST))){
2503                                                   is->is_src = fi->fi_dst;
2504                                                   is->is_flags &= ~SI_W_SADDR;
2505                                         }
2506                               }
2507                     } else if ((flags & SI_W_DADDR) != 0) {
2508                               if (rev == 0) {
2509                                         if (!(fin->fin_flx & (FI_MULTICAST|FI_MBCAST))){
2510                                                   is->is_dst = fi->fi_dst;
2511                                                   is->is_flags &= ~SI_W_DADDR;
2512                                         }
2513                               } else {
2514                                         is->is_dst = fi->fi_src;
2515                                         is->is_flags &= ~SI_W_DADDR;
2516                               }
2517                     }
2518                     if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) {
2519                               ATOMIC_DECL(softs->ipf_state_stats.iss_wild);
2520                     }
2521           }
2522 
2523           flx = fin->fin_flx & cmask;
2524           cflx = is->is_flx[out][rev];
2525 
2526           /*
2527            * Match up any flags set from IP options.
2528            */
2529           if ((cflx && (flx != (cflx & cmask))) ||
2530               ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) ||
2531               ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) ||
2532               ((fin->fin_auth & is->is_authmsk) != is->is_auth)) {
2533                     SBUMPD(ipf_state_stats, iss_miss_mask);
2534                     return NULL;
2535           }
2536 
2537           if ((fin->fin_flx & FI_IGNORE) != 0) {
2538                     fin->fin_rev = rev;
2539                     return is;
2540           }
2541 
2542           /*
2543            * Only one of the source or destination port can be flagged as a
2544            * wildcard.  When filling it in, fill in a copy of the matched entry
2545            * if it has the cloning flag set.
2546            */
2547           if ((flags & (SI_W_SPORT|SI_W_DPORT))) {
2548                     if ((flags & SI_CLONE) != 0) {
2549                               ipstate_t *clone;
2550 
2551                               clone = ipf_state_clone(fin, tcp, is);
2552                               if (clone == NULL)
2553                                         return NULL;
2554                               is = clone;
2555                     } else {
2556                               ATOMIC_DECL(softs->ipf_state_stats.iss_wild);
2557                     }
2558 
2559                     if ((flags & SI_W_SPORT) != 0) {
2560                               if (rev == 0) {
2561                                         is->is_sport = sp;
2562                                         is->is_send = ntohl(tcp->th_seq);
2563                               } else {
2564                                         is->is_sport = dp;
2565                                         is->is_send = ntohl(tcp->th_ack);
2566                               }
2567                               is->is_maxsend = is->is_send + 1;
2568                     } else if ((flags & SI_W_DPORT) != 0) {
2569                               if (rev == 0) {
2570                                         is->is_dport = dp;
2571                                         is->is_dend = ntohl(tcp->th_ack);
2572                               } else {
2573                                         is->is_dport = sp;
2574                                         is->is_dend = ntohl(tcp->th_seq);
2575                               }
2576                               is->is_maxdend = is->is_dend + 1;
2577                     }
2578                     is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT);
2579                     if ((flags & SI_CLONED) && softs->ipf_state_logging)
2580                               ipf_state_log(softc, is, ISL_CLONE);
2581           }
2582 
2583           ret = -1;
2584 
2585           if (is->is_flx[out][rev] == 0) {
2586                     is->is_flx[out][rev] = flx;
2587                     if (rev == 1 && is->is_optmsk[1] == 0) {
2588                               is->is_opt[1] = fin->fin_optmsk;
2589                               is->is_optmsk[1] = 0xffffffff;
2590                               if (is->is_v == 6) {
2591                                         is->is_opt[1] &= ~0x8;
2592                                         is->is_optmsk[1] &= ~0x8;
2593                               }
2594                     }
2595           }
2596 
2597           /*
2598            * Check if the interface name for this "direction" is set and if not,
2599            * fill it in.
2600            */
2601           if (is->is_ifp[idx] == NULL &&
2602               (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) {
2603                     is->is_ifp[idx] = ifp;
2604                     COPYIFNAME(fin->fin_v, ifp, is->is_ifname[idx]);
2605           }
2606           fin->fin_rev = rev;
2607           return is;
2608 }
2609 
2610 
2611 /* ------------------------------------------------------------------------ */
2612 /* Function:    ipf_checkicmpmatchingstate                                  */
2613 /* Returns:     Nil                                                         */
2614 /* Parameters:  fin(I) - pointer to packet information                      */
2615 /*                                                                          */
2616 /* If we've got an ICMP error message, using the information stored in the  */
2617 /* ICMP packet, look for a matching state table entry.                      */
2618 /*                                                                          */
2619 /* If we return NULL then no lock on ipf_state is held.                     */
2620 /* If we return non-null then a read-lock on ipf_state is held.             */
2621 /* ------------------------------------------------------------------------ */
2622 static ipstate_t *
ipf_checkicmpmatchingstate(fr_info_t * fin)2623 ipf_checkicmpmatchingstate(fr_info_t *fin)
2624 {
2625           ipf_main_softc_t *softc = fin->fin_main_soft;
2626           ipf_state_softc_t *softs = softc->ipf_state_soft;
2627           ipstate_t *is, **isp;
2628           i6addr_t dst, src;
2629           struct icmp *ic;
2630           u_short savelen;
2631           icmphdr_t *icmp;
2632           fr_info_t ofin;
2633           tcphdr_t *tcp;
2634           int len;
2635           u_char    pr;
2636           ip_t *oip;
2637           u_int hv;
2638 
2639           /*
2640            * Does it at least have the return (basic) IP header ?
2641            * Is it an actual recognised ICMP error type?
2642            * Only a basic IP header (no options) should be with
2643            * an ICMP error header.
2644            */
2645           if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) ||
2646               (fin->fin_plen < ICMPERR_MINPKTLEN) ||
2647               !(fin->fin_flx & FI_ICMPERR)) {
2648                     SBUMPD(ipf_state_stats, iss_icmp_bad);
2649                     return NULL;
2650           }
2651           ic = fin->fin_dp;
2652 
2653           oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN);
2654           /*
2655            * Check if the at least the old IP header (with options) and
2656            * 8 bytes of payload is present.
2657            */
2658           if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) {
2659                     SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_1);
2660                     return NULL;
2661           }
2662 
2663           /*
2664            * Sanity Checks.
2665            */
2666           len = fin->fin_dlen - ICMPERR_ICMPHLEN;
2667           if ((len <= 0) || ((IP_HL(oip) << 2) > len)) {
2668                     DT2(iss_icmp_len, fr_info_t *, fin, struct ip*, oip);
2669                     SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_1);
2670                     return NULL;
2671           }
2672 
2673           /*
2674            * Is the buffer big enough for all of it ?  It's the size of the IP
2675            * header claimed in the encapsulated part which is of concern.  It
2676            * may be too big to be in this buffer but not so big that it's
2677            * outside the ICMP packet, leading to TCP deref's causing problems.
2678            * This is possible because we don't know how big oip_hl is when we
2679            * do the pullup early in ipf_check() and thus can't guarantee it is
2680            * all here now.
2681            */
2682 #ifdef  _KERNEL
2683           {
2684           mb_t *m;
2685 
2686           m = fin->fin_m;
2687 # if defined(MENTAT)
2688           if ((char *)oip + len > (char *)m->b_wptr) {
2689                     SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_2);
2690                     return NULL;
2691           }
2692 # else
2693           if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) {
2694                     SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_3);
2695                     return NULL;
2696           }
2697 # endif
2698           }
2699 #endif
2700 
2701           bcopy((char *)fin, (char *)&ofin, sizeof(*fin));
2702 
2703           /*
2704            * in the IPv4 case we must zero the i6addr union otherwise
2705            * the IP6_EQ and IP6_NEQ macros produce the wrong results because
2706            * of the 'junk' in the unused part of the union
2707            */
2708           bzero((char *)&src, sizeof(src));
2709           bzero((char *)&dst, sizeof(dst));
2710 
2711           /*
2712            * we make an fin entry to be able to feed it to
2713            * matchsrcdst note that not all fields are encessary
2714            * but this is the cleanest way. Note further we fill
2715            * in fin_mp such that if someone uses it we'll get
2716            * a kernel panic. ipf_matchsrcdst does not use this.
2717            *
2718            * watch out here, as ip is in host order and oip in network
2719            * order. Any change we make must be undone afterwards, like
2720            * oip->ip_len.
2721            */
2722           savelen = oip->ip_len;
2723           oip->ip_len = htons(len);
2724 
2725           ofin.fin_flx = FI_NOCKSUM;
2726           ofin.fin_v = 4;
2727           ofin.fin_ip = oip;
2728           ofin.fin_m = NULL;  /* if dereferenced, panic XXX */
2729           ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
2730           (void) ipf_makefrip(IP_HL(oip) << 2, oip, &ofin);
2731           ofin.fin_ifp = fin->fin_ifp;
2732           ofin.fin_out = !fin->fin_out;
2733 
2734           hv = (pr = oip->ip_p);
2735           src.in4 = oip->ip_src;
2736           hv += src.in4.s_addr;
2737           dst.in4 = oip->ip_dst;
2738           hv += dst.in4.s_addr;
2739 
2740           /*
2741            * Reset the short and bad flag here because in ipf_matchsrcdst()
2742            * the flags for the current packet (fin_flx) are compared against
2743            * those for the existing session.
2744            */
2745           ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
2746 
2747           /*
2748            * Put old values of ip_len back as we don't know
2749            * if we have to forward the packet or process it again.
2750            */
2751           oip->ip_len = savelen;
2752 
2753           switch (oip->ip_p)
2754           {
2755           case IPPROTO_ICMP :
2756                     /*
2757                      * an ICMP error can only be generated as a result of an
2758                      * ICMP query, not as the response on an ICMP error
2759                      *
2760                      * XXX theoretically ICMP_ECHOREP and the other reply's are
2761                      * ICMP query's as well, but adding them here seems strange XXX
2762                      */
2763                     if ((ofin.fin_flx & FI_ICMPERR) != 0) {
2764                               DT1(iss_icmp_icmperr, fr_info_t *, &ofin);
2765                               SBUMP(ipf_state_stats.iss_icmp_icmperr);
2766                               return NULL;
2767                     }
2768 
2769                     /*
2770                      * perform a lookup of the ICMP packet in the state table
2771                      */
2772                     icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2773                     hv += icmp->icmp_id;
2774                     hv = DOUBLE_HASH(hv);
2775 
2776                     READ_ENTER(&softc->ipf_state);
2777                     for (isp = &softs->ipf_state_table[hv];
2778                          ((is = *isp) != NULL); ) {
2779                               isp = &is->is_hnext;
2780                               if ((is->is_p != pr) || (is->is_v != 4))
2781                                         continue;
2782                               if (is->is_pass & FR_NOICMPERR)
2783                                         continue;
2784 
2785                               is = ipf_matchsrcdst(&ofin, is, &src, &dst,
2786                                                       NULL, FI_ICMPCMP);
2787                               if ((is != NULL) && !ipf_allowstateicmp(fin, is, &src))
2788                                         return is;
2789                     }
2790                     RWLOCK_EXIT(&softc->ipf_state);
2791                     SBUMPDX(ipf_state_stats, iss_icmp_miss, iss_icmp_miss_1);
2792                     return NULL;
2793           case IPPROTO_TCP :
2794           case IPPROTO_UDP :
2795                     break;
2796           default :
2797                     SBUMPDX(ipf_state_stats, iss_icmp_miss, iss_icmp_miss_2);
2798                     return NULL;
2799           }
2800 
2801           tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2802 
2803           hv += tcp->th_dport;;
2804           hv += tcp->th_sport;;
2805           hv = DOUBLE_HASH(hv);
2806 
2807           READ_ENTER(&softc->ipf_state);
2808           for (isp = &softs->ipf_state_table[hv]; ((is = *isp) != NULL); ) {
2809                     isp = &is->is_hnext;
2810                     /*
2811                      * Only allow this icmp though if the
2812                      * encapsulated packet was allowed through the
2813                      * other way around. Note that the minimal amount
2814                      * of info present does not allow for checking against
2815                      * tcp internals such as seq and ack numbers.   Only the
2816                      * ports are known to be present and can be even if the
2817                      * short flag is set.
2818                      */
2819                     if ((is->is_p == pr) && (is->is_v == 4) &&
2820                         (is = ipf_matchsrcdst(&ofin, is, &src, &dst,
2821                                                     tcp, FI_ICMPCMP))) {
2822                               if (ipf_allowstateicmp(fin, is, &src) == 0)
2823                                         return is;
2824                     }
2825           }
2826           RWLOCK_EXIT(&softc->ipf_state);
2827           SBUMPDX(ipf_state_stats, iss_icmp_miss, iss_icmp_miss_3);
2828           return NULL;
2829 }
2830 
2831 
2832 /* ------------------------------------------------------------------------ */
2833 /* Function:    ipf_allowstateicmp                                          */
2834 /* Returns:     int - 1 = packet denied, 0 = packet allowed                 */
2835 /* Parameters:  fin(I) - pointer to packet information                      */
2836 /*              is(I)  - pointer to state table entry                       */
2837 /*              src(I) - source address to check permission for             */
2838 /*                                                                          */
2839 /* For an ICMP packet that has so far matched a state table entry, check if */
2840 /* there are any further refinements that might mean we want to block this  */
2841 /* packet.  This code isn't specific to either IPv4 or IPv6.                */
2842 /* ------------------------------------------------------------------------ */
2843 static int
ipf_allowstateicmp(fr_info_t * fin,ipstate_t * is,i6addr_t * src)2844 ipf_allowstateicmp(fr_info_t *fin, ipstate_t *is, i6addr_t *src)
2845 {
2846           ipf_main_softc_t *softc = fin->fin_main_soft;
2847           ipf_state_softc_t *softs = softc->ipf_state_soft;
2848           frentry_t *savefr;
2849           frentry_t *fr;
2850           u_32_t ipass;
2851           int backward;
2852           int oi;
2853           int i;
2854 
2855           fr = is->is_rule;
2856           if (fr != NULL && fr->fr_icmpgrp != NULL) {
2857                     savefr = fin->fin_fr;
2858                     fin->fin_fr = fr->fr_icmpgrp->fg_start;
2859 
2860                     ipass = ipf_scanlist(fin, softc->ipf_pass);
2861                     fin->fin_fr = savefr;
2862                     if (FR_ISBLOCK(ipass)) {
2863                               SBUMPD(ipf_state_stats, iss_icmp_headblock);
2864                               return 1;
2865                     }
2866           }
2867 
2868           /*
2869            * i  : the index of this packet (the icmp unreachable)
2870            * oi : the index of the original packet found in the
2871            *      icmp header (i.e. the packet causing this icmp)
2872            * backward : original packet was backward compared to
2873            *            the state
2874            */
2875           backward = IP6_NEQ(&is->is_src, src);
2876           fin->fin_rev = !backward;
2877           i = (!backward << 1) + fin->fin_out;
2878           oi = (backward << 1) + !fin->fin_out;
2879 
2880           if (is->is_pass & FR_NOICMPERR) {
2881                     SBUMPD(ipf_state_stats, iss_icmp_banned);
2882                     return 1;
2883           }
2884           if (is->is_icmppkts[i] > is->is_pkts[oi]) {
2885                     SBUMPD(ipf_state_stats, iss_icmp_toomany);
2886                     return 1;
2887           }
2888 
2889           DT2(iss_icmp_hits, fr_info_t *, fin, ipstate_t *, is);
2890           SBUMP(ipf_state_stats.iss_icmp_hits);
2891           is->is_icmppkts[i]++;
2892 
2893           /*
2894            * we deliberately do not touch the timeouts
2895            * for the accompanying state table entry.
2896            * It remains to be seen if that is correct. XXX
2897            */
2898           return 0;
2899 }
2900 
2901 
2902 /* ------------------------------------------------------------------------ */
2903 /* Function:    ipf_ipsmove                                                 */
2904 /* Returns:     Nil                                                         */
2905 /* Parameters:  is(I) - pointer to state table entry                        */
2906 /*              hv(I) - new hash value for state table entry                */
2907 /* Write Locks: ipf_state                                                   */
2908 /*                                                                          */
2909 /* Move a state entry from one position in the hash table to another.       */
2910 /* ------------------------------------------------------------------------ */
2911 static void
ipf_ipsmove(ipf_state_softc_t * softs,ipstate_t * is,u_int hv)2912 ipf_ipsmove(ipf_state_softc_t *softs, ipstate_t *is, u_int hv)
2913 {
2914           ipstate_t **isp;
2915           u_int hvm;
2916 
2917           hvm = is->is_hv;
2918 
2919           /* TRACE is, is_hv, hvm */
2920 
2921           /*
2922            * Remove the hash from the old location...
2923            */
2924           isp = is->is_phnext;
2925           if (is->is_hnext)
2926                     is->is_hnext->is_phnext = isp;
2927           *isp = is->is_hnext;
2928           if (softs->ipf_state_table[hvm] == NULL)
2929                     softs->ipf_state_stats.iss_inuse--;
2930           softs->ipf_state_stats.iss_bucketlen[hvm]--;
2931 
2932           /*
2933            * ...and put the hash in the new one.
2934            */
2935           hvm = DOUBLE_HASH(hv);
2936           is->is_hv = hvm;
2937 
2938           /* TRACE is, hv, is_hv, hvm */
2939 
2940           isp = &softs->ipf_state_table[hvm];
2941           if (*isp)
2942                     (*isp)->is_phnext = &is->is_hnext;
2943           else
2944                     softs->ipf_state_stats.iss_inuse++;
2945           softs->ipf_state_stats.iss_bucketlen[hvm]++;
2946           is->is_phnext = isp;
2947           is->is_hnext = *isp;
2948           *isp = is;
2949 }
2950 
2951 
2952 /* ------------------------------------------------------------------------ */
2953 /* Function:    ipf_state_lookup                                            */
2954 /* Returns:     ipstate_t* - NULL == no matching state found,               */
2955 /*                           else pointer to state information is returned  */
2956 /* Parameters:  fin(I)  - pointer to packet information                     */
2957 /*              tcp(I)  - pointer to TCP/UDP header.                        */
2958 /*              ifqp(O) - pointer for storing tailq timeout                 */
2959 /*                                                                          */
2960 /* Search the state table for a matching entry to the packet described by   */
2961 /* the contents of *fin. For certain protocols, when a match is found the   */
2962 /* timeout queue is also selected and stored in ifpq if it is non-NULL.     */
2963 /*                                                                          */
2964 /* If we return NULL then no lock on ipf_state is held.                     */
2965 /* If we return non-null then a read-lock on ipf_state is held.             */
2966 /* ------------------------------------------------------------------------ */
2967 ipstate_t *
ipf_state_lookup(fr_info_t * fin,tcphdr_t * tcp,ipftq_t ** ifqp)2968 ipf_state_lookup(fr_info_t *fin, tcphdr_t *tcp, ipftq_t **ifqp)
2969 {
2970           ipf_main_softc_t *softc = fin->fin_main_soft;
2971           ipf_state_softc_t *softs = softc->ipf_state_soft;
2972           u_int hv, hvm, pr, v, tryagain;
2973           ipstate_t *is, **isp;
2974           u_short dport, sport;
2975           i6addr_t src, dst;
2976           struct icmp *ic;
2977           ipftq_t *ifq;
2978           int oow;
2979 
2980           is = NULL;
2981           ifq = NULL;
2982           tcp = fin->fin_dp;
2983           ic = (struct icmp *)tcp;
2984           hv = (pr = fin->fin_fi.fi_p);
2985           src = fin->fin_fi.fi_src;
2986           dst = fin->fin_fi.fi_dst;
2987           hv += src.in4.s_addr;
2988           hv += dst.in4.s_addr;
2989 
2990           v = fin->fin_fi.fi_v;
2991 #ifdef    USE_INET6
2992           if (v == 6) {
2993                     hv  += fin->fin_fi.fi_src.i6[1];
2994                     hv  += fin->fin_fi.fi_src.i6[2];
2995                     hv  += fin->fin_fi.fi_src.i6[3];
2996 
2997                     if ((fin->fin_p == IPPROTO_ICMPV6) &&
2998                         IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) {
2999                               hv -= dst.in4.s_addr;
3000                     } else {
3001                               hv += fin->fin_fi.fi_dst.i6[1];
3002                               hv += fin->fin_fi.fi_dst.i6[2];
3003                               hv += fin->fin_fi.fi_dst.i6[3];
3004                     }
3005           }
3006 #endif
3007           if ((v == 4) &&
3008               (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) {
3009                     if (fin->fin_out == 0) {
3010                               hv -= src.in4.s_addr;
3011                     } else {
3012                               hv -= dst.in4.s_addr;
3013                     }
3014           }
3015 
3016           /* TRACE fin_saddr, fin_daddr, hv */
3017 
3018           /*
3019            * Search the hash table for matching packet header info.
3020            */
3021           switch (pr)
3022           {
3023 #ifdef    USE_INET6
3024           case IPPROTO_ICMPV6 :
3025                     tryagain = 0;
3026                     if (v == 6) {
3027                               if ((ic->icmp_type == ICMP6_ECHO_REQUEST) ||
3028                                   (ic->icmp_type == ICMP6_ECHO_REPLY)) {
3029                                         hv += ic->icmp_id;
3030                               }
3031                     }
3032                     READ_ENTER(&softc->ipf_state);
3033 icmp6again:
3034                     hvm = DOUBLE_HASH(hv);
3035                     for (isp = &softs->ipf_state_table[hvm];
3036                          ((is = *isp) != NULL); ) {
3037                               isp = &is->is_hnext;
3038                               if ((is->is_p != pr) || (is->is_v != v))
3039                                         continue;
3040                               is = ipf_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
3041                               if (is != NULL &&
3042                                   ipf_matchicmpqueryreply(v, &is->is_icmp,
3043                                                                ic, fin->fin_rev)) {
3044                                         if (fin->fin_rev)
3045                                                   ifq = &softs->ipf_state_icmpacktq;
3046                                         else
3047                                                   ifq = &softs->ipf_state_icmptq;
3048                                         break;
3049                               }
3050                     }
3051 
3052                     if (is != NULL) {
3053                               if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) {
3054                                         hv += fin->fin_fi.fi_src.i6[0];
3055                                         hv += fin->fin_fi.fi_src.i6[1];
3056                                         hv += fin->fin_fi.fi_src.i6[2];
3057                                         hv += fin->fin_fi.fi_src.i6[3];
3058                                         ipf_ipsmove(softs, is, hv);
3059                                         MUTEX_DOWNGRADE(&softc->ipf_state);
3060                               }
3061                               break;
3062                     }
3063                     RWLOCK_EXIT(&softc->ipf_state);
3064 
3065                     /*
3066                      * No matching icmp state entry. Perhaps this is a
3067                      * response to another state entry.
3068                      *
3069                      * XXX With some ICMP6 packets, the "other" address is already
3070                      * in the packet, after the ICMP6 header, and this could be
3071                      * used in place of the multicast address.  However, taking
3072                      * advantage of this requires some significant code changes
3073                      * to handle the specific types where that is the case.
3074                      */
3075                     if ((softs->ipf_state_stats.iss_wild != 0) &&
3076                         ((fin->fin_flx & FI_NOWILD) == 0) &&
3077                         (v == 6) && (tryagain == 0)) {
3078                               hv -= fin->fin_fi.fi_src.i6[0];
3079                               hv -= fin->fin_fi.fi_src.i6[1];
3080                               hv -= fin->fin_fi.fi_src.i6[2];
3081                               hv -= fin->fin_fi.fi_src.i6[3];
3082                               tryagain = 1;
3083                               WRITE_ENTER(&softc->ipf_state);
3084                               goto icmp6again;
3085                     }
3086 
3087                     is = ipf_checkicmp6matchingstate(fin);
3088                     if (is != NULL)
3089                               return is;
3090                     break;
3091 #endif
3092 
3093           case IPPROTO_ICMP :
3094                     if (v == 4) {
3095                               hv += ic->icmp_id;
3096                     }
3097                     hv = DOUBLE_HASH(hv);
3098                     READ_ENTER(&softc->ipf_state);
3099                     for (isp = &softs->ipf_state_table[hv];
3100                          ((is = *isp) != NULL); ) {
3101                               isp = &is->is_hnext;
3102                               if ((is->is_p != pr) || (is->is_v != v))
3103                                         continue;
3104                               is = ipf_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
3105                               if ((is != NULL) &&
3106                                   (ic->icmp_id == is->is_icmp.ici_id) &&
3107                                   ipf_matchicmpqueryreply(v, &is->is_icmp,
3108                                                                ic, fin->fin_rev)) {
3109                                         if (fin->fin_rev)
3110                                                   ifq = &softs->ipf_state_icmpacktq;
3111                                         else
3112                                                   ifq = &softs->ipf_state_icmptq;
3113                                         break;
3114                               }
3115                     }
3116                     if (is == NULL) {
3117                               RWLOCK_EXIT(&softc->ipf_state);
3118                     }
3119                     break;
3120 
3121           case IPPROTO_TCP :
3122           case IPPROTO_UDP :
3123                     ifqp = NULL;
3124                     sport = htons(fin->fin_data[0]);
3125                     hv += sport;
3126                     dport = htons(fin->fin_data[1]);
3127                     hv += dport;
3128                     oow = 0;
3129                     tryagain = 0;
3130                     READ_ENTER(&softc->ipf_state);
3131 retry_tcpudp:
3132                     hvm = DOUBLE_HASH(hv);
3133 
3134                     /* TRACE hv, hvm */
3135 
3136                     for (isp = &softs->ipf_state_table[hvm];
3137                          ((is = *isp) != NULL); ) {
3138                               isp = &is->is_hnext;
3139                               if ((is->is_p != pr) || (is->is_v != v))
3140                                         continue;
3141                               fin->fin_flx &= ~FI_OOW;
3142                               is = ipf_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP);
3143                               if (is != NULL) {
3144                                         if (pr == IPPROTO_TCP) {
3145                                                   if (!ipf_state_tcp(softc, softs, fin,
3146                                                                          tcp, is)) {
3147                                                             oow |= fin->fin_flx & FI_OOW;
3148                                                             continue;
3149                                                   }
3150                                         }
3151                                         break;
3152                               }
3153                     }
3154                     if (is != NULL) {
3155                               if (tryagain &&
3156                                   !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) {
3157                                         hv += dport;
3158                                         hv += sport;
3159                                         ipf_ipsmove(softs, is, hv);
3160                                         MUTEX_DOWNGRADE(&softc->ipf_state);
3161                               }
3162                               break;
3163                     }
3164                     RWLOCK_EXIT(&softc->ipf_state);
3165 
3166                     if ((softs->ipf_state_stats.iss_wild != 0) &&
3167                         ((fin->fin_flx & FI_NOWILD) == 0)) {
3168                               if (tryagain == 0) {
3169                                         hv -= dport;
3170                                         hv -= sport;
3171                               } else if (tryagain == 1) {
3172                                         hv = fin->fin_fi.fi_p;
3173                                         /*
3174                                          * If we try to pretend this is a reply to a
3175                                          * multicast/broadcast packet then we need to
3176                                          * exclude part of the address from the hash
3177                                          * calculation.
3178                                          */
3179                                         if (fin->fin_out == 0) {
3180                                                   hv += src.in4.s_addr;
3181                                         } else {
3182                                                   hv += dst.in4.s_addr;
3183                                         }
3184                                         hv += dport;
3185                                         hv += sport;
3186                               }
3187                               tryagain++;
3188                               if (tryagain <= 2) {
3189                                         WRITE_ENTER(&softc->ipf_state);
3190                                         goto retry_tcpudp;
3191                               }
3192                     }
3193                     fin->fin_flx |= oow;
3194                     break;
3195 
3196 #if 0
3197           case IPPROTO_GRE :
3198                     gre = fin->fin_dp;
3199                     if (GRE_REV(gre->gr_flags) == 1) {
3200                               hv += gre->gr_call;
3201                     }
3202                     /* FALLTHROUGH */
3203 #endif
3204           default :
3205                     ifqp = NULL;
3206                     hvm = DOUBLE_HASH(hv);
3207                     READ_ENTER(&softc->ipf_state);
3208                     for (isp = &softs->ipf_state_table[hvm];
3209                          ((is = *isp) != NULL); ) {
3210                               isp = &is->is_hnext;
3211                               if ((is->is_p != pr) || (is->is_v != v))
3212                                         continue;
3213                               is = ipf_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
3214                               if (is != NULL) {
3215                                         ifq = &softs->ipf_state_iptq;
3216                                         break;
3217                               }
3218                     }
3219                     if (is == NULL) {
3220                               RWLOCK_EXIT(&softc->ipf_state);
3221                     }
3222                     break;
3223           }
3224 
3225           if (is != NULL) {
3226                     if (((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) &&
3227                         (is->is_tqehead[fin->fin_rev] != NULL))
3228                               ifq = is->is_tqehead[fin->fin_rev];
3229                     if (ifq != NULL && ifqp != NULL)
3230                               *ifqp = ifq;
3231           } else {
3232                     SBUMP(ipf_state_stats.iss_lookup_miss);
3233           }
3234           return is;
3235 }
3236 
3237 
3238 /* ------------------------------------------------------------------------ */
3239 /* Function:    ipf_state_check                                             */
3240 /* Returns:     frentry_t* - NULL == search failed,                         */
3241 /*                           else pointer to rule for matching state        */
3242 /* Parameters:  fin(I)   - pointer to packet information                    */
3243 /*              passp(I) - pointer to filtering result flags                */
3244 /*                                                                          */
3245 /* Check if a packet is associated with an entry in the state table.        */
3246 /* ------------------------------------------------------------------------ */
3247 frentry_t *
ipf_state_check(fr_info_t * fin,u_32_t * passp)3248 ipf_state_check(fr_info_t *fin, u_32_t *passp)
3249 {
3250           ipf_main_softc_t *softc = fin->fin_main_soft;
3251           ipf_state_softc_t *softs = softc->ipf_state_soft;
3252           ipftqent_t *tqe;
3253           ipstate_t *is;
3254           frentry_t *fr;
3255           tcphdr_t *tcp;
3256           ipftq_t *ifq;
3257           u_int pass;
3258           int inout;
3259 
3260           if (softs->ipf_state_lock || (softs->ipf_state_list == NULL))
3261                     return NULL;
3262 
3263           if (fin->fin_flx & (FI_SHORT|FI_FRAGBODY|FI_BAD)) {
3264                     SBUMPD(ipf_state_stats, iss_check_bad);
3265                     return NULL;
3266           }
3267 
3268           if ((fin->fin_flx & FI_TCPUDP) ||
3269               (fin->fin_fi.fi_p == IPPROTO_ICMP)
3270 #ifdef    USE_INET6
3271               || (fin->fin_fi.fi_p == IPPROTO_ICMPV6)
3272 #endif
3273               )
3274                     tcp = fin->fin_dp;
3275           else
3276                     tcp = NULL;
3277 
3278           ifq = NULL;
3279           /*
3280            * Search the hash table for matching packet header info.
3281            */
3282           is = ipf_state_lookup(fin, tcp, &ifq);
3283 
3284           switch (fin->fin_p)
3285           {
3286 #ifdef    USE_INET6
3287           case IPPROTO_ICMPV6 :
3288                     if (is != NULL)
3289                               break;
3290                     if (fin->fin_v == 6) {
3291                               is = ipf_checkicmp6matchingstate(fin);
3292                     }
3293                     break;
3294 #endif
3295           case IPPROTO_ICMP :
3296                     if (is != NULL)
3297                               break;
3298                     /*
3299                      * No matching icmp state entry. Perhaps this is a
3300                      * response to another state entry.
3301                      */
3302                     is = ipf_checkicmpmatchingstate(fin);
3303                     break;
3304 
3305           case IPPROTO_TCP :
3306                     if (is == NULL)
3307                               break;
3308 
3309                     if (is->is_pass & FR_NEWISN) {
3310                               if (fin->fin_out == 0)
3311                                         ipf_fixinisn(fin, is);
3312                               else if (fin->fin_out == 1)
3313                                         ipf_fixoutisn(fin, is);
3314                     }
3315                     break;
3316           default :
3317                     if (fin->fin_rev)
3318                               ifq = &softs->ipf_state_udpacktq;
3319                     else
3320                               ifq = &softs->ipf_state_udptq;
3321                     break;
3322           }
3323           if (is == NULL) {
3324                     SBUMP(ipf_state_stats.iss_check_miss);
3325                     return NULL;
3326           }
3327 
3328           fr = is->is_rule;
3329           if (fr != NULL) {
3330                     if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) {
3331                               if (fin->fin_nattag == NULL) {
3332                                         RWLOCK_EXIT(&softc->ipf_state);
3333                                         SBUMPD(ipf_state_stats, iss_check_notag);
3334                                         return NULL;
3335                               }
3336                               if (ipf_matchtag(&fr->fr_nattag, fin->fin_nattag)!=0) {
3337                                         RWLOCK_EXIT(&softc->ipf_state);
3338                                         SBUMPD(ipf_state_stats, iss_check_nattag);
3339                                         return NULL;
3340                               }
3341                     }
3342                     (void) strncpy(fin->fin_group, FR_NAME(fr, fr_group),
3343                                      FR_GROUPLEN);
3344                     fin->fin_icode = fr->fr_icode;
3345           }
3346 
3347           fin->fin_rule = is->is_rulen;
3348           fin->fin_fr = fr;
3349 
3350           /*
3351            * If this packet is a fragment and the rule says to track fragments,
3352            * then create a new fragment cache entry.
3353            */
3354           if (fin->fin_flx & FI_FRAG && FR_ISPASS(is->is_pass) &&
3355              is->is_pass & FR_KEEPFRAG)
3356                     (void) ipf_frag_new(softc, fin, is->is_pass);
3357 
3358           /*
3359            * For TCP packets, ifq == NULL.  For all others, check if this new
3360            * queue is different to the last one it was on and move it if so.
3361            */
3362           tqe = &is->is_sti;
3363           if ((tqe->tqe_flags & TQE_RULEBASED) != 0)
3364                     ifq = is->is_tqehead[fin->fin_rev];
3365 
3366           MUTEX_ENTER(&is->is_lock);
3367 
3368           if (ifq != NULL)
3369                     ipf_movequeue(softc->ipf_ticks, tqe, tqe->tqe_ifq, ifq);
3370 
3371           inout = (fin->fin_rev << 1) + fin->fin_out;
3372           is->is_pkts[inout]++;
3373           is->is_bytes[inout] += fin->fin_plen;
3374           fin->fin_pktnum = is->is_pkts[inout] + is->is_icmppkts[inout];
3375 
3376           MUTEX_EXIT(&is->is_lock);
3377 
3378           pass = is->is_pass;
3379 
3380           if (is->is_flags & IS_STATESYNC)
3381                     ipf_sync_update(softc, SMC_STATE, fin, is->is_sync);
3382 
3383           RWLOCK_EXIT(&softc->ipf_state);
3384 
3385           SBUMP(ipf_state_stats.iss_hits);
3386 
3387           fin->fin_dif = &is->is_dif;
3388           fin->fin_tif = &is->is_tifs[fin->fin_rev];
3389           fin->fin_flx |= FI_STATE;
3390           if ((pass & FR_LOGFIRST) != 0)
3391                     pass &= ~(FR_LOGFIRST|FR_LOG);
3392           *passp = pass;
3393           return fr;
3394 }
3395 
3396 
3397 /* ------------------------------------------------------------------------ */
3398 /* Function:    ipf_fixoutisn                                               */
3399 /* Returns:     Nil                                                         */
3400 /* Parameters:  fin(I) - pointer to packet information                      */
3401 /*              is(I)  - pointer to master state structure                  */
3402 /*                                                                          */
3403 /* Called only for outbound packets, adjusts the sequence number and the    */
3404 /* TCP checksum to match that change.                                       */
3405 /* ------------------------------------------------------------------------ */
3406 static void
ipf_fixoutisn(fr_info_t * fin,ipstate_t * is)3407 ipf_fixoutisn(fr_info_t *fin, ipstate_t *is)
3408 {
3409           tcphdr_t *tcp;
3410           int rev;
3411           u_32_t seq;
3412 
3413           tcp = fin->fin_dp;
3414           rev = fin->fin_rev;
3415           if ((is->is_flags & IS_ISNSYN) != 0) {
3416                     if ((rev == 0) && (fin->fin_cksum < FI_CK_L4PART)) {
3417                               seq = ntohl(tcp->th_seq);
3418                               seq += is->is_isninc[0];
3419                               tcp->th_seq = htonl(seq);
3420                               ipf_fix_outcksum(0, &tcp->th_sum, is->is_sumd[0], 0);
3421                     }
3422           }
3423           if ((is->is_flags & IS_ISNACK) != 0) {
3424                     if ((rev == 1) && (fin->fin_cksum < FI_CK_L4PART)) {
3425                               seq = ntohl(tcp->th_seq);
3426                               seq += is->is_isninc[1];
3427                               tcp->th_seq = htonl(seq);
3428                               ipf_fix_outcksum(0, &tcp->th_sum, is->is_sumd[1], 0);
3429                     }
3430           }
3431 }
3432 
3433 
3434 /* ------------------------------------------------------------------------ */
3435 /* Function:    ipf_fixinisn                                                */
3436 /* Returns:     Nil                                                         */
3437 /* Parameters:  fin(I)   - pointer to packet information                    */
3438 /*              is(I)  - pointer to master state structure                  */
3439 /*                                                                          */
3440 /* Called only for inbound packets, adjusts the acknowledge number and the  */
3441 /* TCP checksum to match that change.                                       */
3442 /* ------------------------------------------------------------------------ */
3443 static void
ipf_fixinisn(fr_info_t * fin,ipstate_t * is)3444 ipf_fixinisn(fr_info_t *fin, ipstate_t *is)
3445 {
3446           tcphdr_t *tcp;
3447           int rev;
3448           u_32_t ack;
3449 
3450           tcp = fin->fin_dp;
3451           rev = fin->fin_rev;
3452           if ((is->is_flags & IS_ISNSYN) != 0) {
3453                     if ((rev == 1) && (fin->fin_cksum < FI_CK_L4PART)) {
3454                               ack = ntohl(tcp->th_ack);
3455                               ack -= is->is_isninc[0];
3456                               tcp->th_ack = htonl(ack);
3457                               ipf_fix_incksum(0, &tcp->th_sum, is->is_sumd[0], 0);
3458                     }
3459           }
3460           if ((is->is_flags & IS_ISNACK) != 0) {
3461                     if ((rev == 0) && (fin->fin_cksum < FI_CK_L4PART)) {
3462                               ack = ntohl(tcp->th_ack);
3463                               ack -= is->is_isninc[1];
3464                               tcp->th_ack = htonl(ack);
3465                               ipf_fix_incksum(0, &tcp->th_sum, is->is_sumd[1], 0);
3466                     }
3467           }
3468 }
3469 
3470 
3471 /* ------------------------------------------------------------------------ */
3472 /* Function:    ipf_state_sync                                              */
3473 /* Returns:     Nil                                                         */
3474 /* Parameters:  softc(I) - pointer to soft context main structure           */
3475 /*              ifp(I)   - pointer to interface                             */
3476 /*                                                                          */
3477 /* Walk through all state entries and if an interface pointer match is      */
3478 /* found then look it up again, based on its name in case the pointer has   */
3479 /* changed since last time.                                                 */
3480 /*                                                                          */
3481 /* If ifp is passed in as being non-null then we are only doing updates for */
3482 /* existing, matching, uses of it.                                          */
3483 /* ------------------------------------------------------------------------ */
3484 void
ipf_state_sync(ipf_main_softc_t * softc,void * ifp)3485 ipf_state_sync(ipf_main_softc_t *softc, void *ifp)
3486 {
3487           ipf_state_softc_t *softs = softc->ipf_state_soft;
3488           ipstate_t *is;
3489           int i;
3490 
3491           if (softc->ipf_running <= 0)
3492                     return;
3493 
3494           WRITE_ENTER(&softc->ipf_state);
3495 
3496           if (softc->ipf_running <= 0) {
3497                     RWLOCK_EXIT(&softc->ipf_state);
3498                     return;
3499           }
3500 
3501           for (is = softs->ipf_state_list; is; is = is->is_next) {
3502                     /*
3503                      * Look up all the interface names in the state entry.
3504                      */
3505                     for (i = 0; i < 4; i++) {
3506                               if (ifp == NULL || ifp == is->is_ifp[i])
3507                                         is->is_ifp[i] = ipf_resolvenic(softc,
3508                                                                             is->is_ifname[i],
3509                                                                             is->is_v);
3510                     }
3511           }
3512           RWLOCK_EXIT(&softc->ipf_state);
3513 }
3514 
3515 
3516 /* ------------------------------------------------------------------------ */
3517 /* Function:    ipf_state_del                                               */
3518 /* Returns:     int    - 0 = deleted, else refernce count on active struct  */
3519 /* Parameters:  softc(I) - pointer to soft context main structure           */
3520 /*              is(I)  - pointer to state structure to delete               */
3521 /*              why(I) - if not 0, log reason why it was deleted            */
3522 /* Write Locks: ipf_state                                                   */
3523 /*                                                                          */
3524 /* Deletes a state entry from the enumerated list as well as the hash table */
3525 /* and timeout queue lists.  Make adjustments to hash table statistics and  */
3526 /* global counters as required.                                             */
3527 /* ------------------------------------------------------------------------ */
3528 static int
ipf_state_del(ipf_main_softc_t * softc,ipstate_t * is,int why)3529 ipf_state_del(ipf_main_softc_t *softc, ipstate_t *is, int why)
3530 {
3531           ipf_state_softc_t *softs = softc->ipf_state_soft;
3532           int orphan = 1;
3533           frentry_t *fr;
3534 
3535           /*
3536            * Since we want to delete this, remove it from the state table,
3537            * where it can be found & used, first.
3538            */
3539           if (is->is_phnext != NULL) {
3540                     *is->is_phnext = is->is_hnext;
3541                     if (is->is_hnext != NULL)
3542                               is->is_hnext->is_phnext = is->is_phnext;
3543                     if (softs->ipf_state_table[is->is_hv] == NULL)
3544                               softs->ipf_state_stats.iss_inuse--;
3545                     softs->ipf_state_stats.iss_bucketlen[is->is_hv]--;
3546 
3547                     is->is_phnext = NULL;
3548                     is->is_hnext = NULL;
3549                     orphan = 0;
3550           }
3551 
3552           /*
3553            * Because ipf_state_stats.iss_wild is a count of entries in the state
3554            * table that have wildcard flags set, only decerement it once
3555            * and do it here.
3556            */
3557           if (is->is_flags & (SI_WILDP|SI_WILDA)) {
3558                     if (!(is->is_flags & SI_CLONED)) {
3559                               ATOMIC_DECL(softs->ipf_state_stats.iss_wild);
3560                     }
3561                     is->is_flags &= ~(SI_WILDP|SI_WILDA);
3562           }
3563 
3564           /*
3565            * Next, remove it from the timeout queue it is in.
3566            */
3567           if (is->is_sti.tqe_ifq != NULL)
3568                     ipf_deletequeueentry(&is->is_sti);
3569 
3570           /*
3571            * If it is still in use by something else, do not go any further,
3572            * but note that at this point it is now an orphan.  How can this
3573            * be?  ipf_state_flush() calls ipf_delete() directly because it wants
3574            * to empty the table out and if something has a hold on a state
3575            * entry (such as ipfstat), it'll do the deref path that'll bring
3576            * us back here to do the real delete & free.
3577            */
3578           MUTEX_ENTER(&is->is_lock);
3579           if (is->is_me != NULL) {
3580                     *is->is_me = NULL;
3581                     is->is_me = NULL;
3582                     is->is_ref--;
3583           }
3584           is->is_ref--;
3585           if (is->is_ref > 0) {
3586                     int refs;
3587 
3588                     refs = is->is_ref;
3589                     MUTEX_EXIT(&is->is_lock);
3590                     if (!orphan)
3591                               softs->ipf_state_stats.iss_orphan++;
3592                     return refs;
3593           }
3594 
3595           fr = is->is_rule;
3596           is->is_rule = NULL;
3597           if (fr != NULL) {
3598                     if (fr->fr_srctrack.ht_max_nodes != 0) {
3599                               (void) ipf_ht_node_del(&fr->fr_srctrack,
3600                                                          is->is_family, &is->is_src);
3601                     }
3602           }
3603 
3604           ASSERT(is->is_ref == 0);
3605           MUTEX_EXIT(&is->is_lock);
3606 
3607           if (is->is_tqehead[0] != NULL) {
3608                     if (ipf_deletetimeoutqueue(is->is_tqehead[0]) == 0)
3609                               ipf_freetimeoutqueue(softc, is->is_tqehead[0]);
3610           }
3611           if (is->is_tqehead[1] != NULL) {
3612                     if (ipf_deletetimeoutqueue(is->is_tqehead[1]) == 0)
3613                               ipf_freetimeoutqueue(softc, is->is_tqehead[1]);
3614           }
3615 
3616           if (is->is_sync)
3617                     ipf_sync_del_state(softc->ipf_sync_soft, is->is_sync);
3618 
3619           /*
3620            * Now remove it from the linked list of known states
3621            */
3622           if (is->is_pnext != NULL) {
3623                     *is->is_pnext = is->is_next;
3624 
3625                     if (is->is_next != NULL)
3626                               is->is_next->is_pnext = is->is_pnext;
3627 
3628                     is->is_pnext = NULL;
3629                     is->is_next = NULL;
3630           }
3631 
3632           if (softs->ipf_state_logging != 0 && why != 0)
3633                     ipf_state_log(softc, is, why);
3634 
3635           if (is->is_p == IPPROTO_TCP)
3636                     softs->ipf_state_stats.iss_fin++;
3637           else
3638                     softs->ipf_state_stats.iss_expire++;
3639           if (orphan)
3640                     softs->ipf_state_stats.iss_orphan--;
3641 
3642           if (fr != NULL) {
3643                     fr->fr_statecnt--;
3644                     (void) ipf_derefrule(softc, &fr);
3645           }
3646 
3647           softs->ipf_state_stats.iss_active_proto[is->is_p]--;
3648 
3649           MUTEX_DESTROY(&is->is_lock);
3650           KFREE(is);
3651           softs->ipf_state_stats.iss_active--;
3652 
3653           return 0;
3654 }
3655 
3656 
3657 /* ------------------------------------------------------------------------ */
3658 /* Function:    ipf_state_expire                                            */
3659 /* Returns:     Nil                                                         */
3660 /* Parameters:  softc(I) - pointer to soft context main structure           */
3661 /*                                                                          */
3662 /* Slowly expire held state for thingslike UDP and ICMP.  The algorithm     */
3663 /* used here is to keep the queue sorted with the oldest things at the top  */
3664 /* and the youngest at the bottom.  So if the top one doesn't need to be    */
3665 /* expired then neither will any under it.                                  */
3666 /* ------------------------------------------------------------------------ */
3667 void
ipf_state_expire(ipf_main_softc_t * softc)3668 ipf_state_expire(ipf_main_softc_t *softc)
3669 {
3670           ipf_state_softc_t *softs = softc->ipf_state_soft;
3671           ipftq_t *ifq, *ifqnext;
3672           ipftqent_t *tqe, *tqn;
3673           ipstate_t *is;
3674           SPL_INT(s);
3675 
3676           SPL_NET(s);
3677           WRITE_ENTER(&softc->ipf_state);
3678           for (ifq = softs->ipf_state_tcptq; ifq != NULL; ifq = ifq->ifq_next)
3679                     for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3680                               if (tqe->tqe_die > softc->ipf_ticks)
3681                                         break;
3682                               tqn = tqe->tqe_next;
3683                               is = tqe->tqe_parent;
3684                               ipf_state_del(softc, is, ISL_EXPIRE);
3685                     }
3686 
3687           for (ifq = softs->ipf_state_usertq; ifq != NULL; ifq = ifqnext) {
3688                     ifqnext = ifq->ifq_next;
3689 
3690                     for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3691                               if (tqe->tqe_die > softc->ipf_ticks)
3692                                         break;
3693                               tqn = tqe->tqe_next;
3694                               is = tqe->tqe_parent;
3695                               ipf_state_del(softc, is, ISL_EXPIRE);
3696                     }
3697           }
3698 
3699           for (ifq = softs->ipf_state_usertq; ifq != NULL; ifq = ifqnext) {
3700                     ifqnext = ifq->ifq_next;
3701 
3702                     if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
3703                         (ifq->ifq_ref == 0)) {
3704                               ipf_freetimeoutqueue(softc, ifq);
3705                     }
3706           }
3707 
3708           if (softs->ipf_state_doflush) {
3709                     (void) ipf_state_flush(softc, 2, 0);
3710                     softs->ipf_state_doflush = 0;
3711                     softs->ipf_state_wm_last = softc->ipf_ticks;
3712           }
3713 
3714           RWLOCK_EXIT(&softc->ipf_state);
3715           SPL_X(s);
3716 }
3717 
3718 
3719 /* ------------------------------------------------------------------------ */
3720 /* Function:    ipf_state_flush                                             */
3721 /* Returns:     int - 0 == success, -1 == failure                           */
3722 /* Parameters:  softc(I) - pointer to soft context main structure           */
3723 /*              which(I) - which flush action to perform                    */
3724 /*              proto(I) - which protocol to flush (0 == ALL)               */
3725 /* Write Locks: ipf_state                                                   */
3726 /*                                                                          */
3727 /* Flush state tables.  Three actions currently defined:                    */
3728 /* which == 0 : flush all state table entries                               */
3729 /* which == 1 : flush TCP connections which have started to close but are   */
3730 /*              stuck for some reason.                                        */
3731 /* which == 2 : flush TCP connections which have been idle for a long time, */
3732 /*              starting at > 4 days idle and working back in successive half-*/
3733 /*              days to at most 12 hours old.  If this fails to free enough   */
3734 /*            slots then work backwards in half hour slots to 30 minutes.   */
3735 /*            If that too fails, then work backwards in 30 second intervals */
3736 /*            for the last 30 minutes to at worst 30 seconds idle.          */
3737 /* ------------------------------------------------------------------------ */
3738 int
ipf_state_flush(ipf_main_softc_t * softc,int which,int proto)3739 ipf_state_flush(ipf_main_softc_t *softc, int which, int proto)
3740 {
3741           ipf_state_softc_t *softs = softc->ipf_state_soft;
3742           ipftqent_t *tqe, *tqn;
3743           ipstate_t *is, **isp;
3744           ipftq_t *ifq;
3745           int removed;
3746           SPL_INT(s);
3747 
3748           removed = 0;
3749 
3750           SPL_NET(s);
3751 
3752           switch (which)
3753           {
3754           case 0 :
3755                     SBUMP(ipf_state_stats.iss_flush_all);
3756                     /*
3757                      * Style 0 flush removes everything...
3758                      */
3759                     for (isp = &softs->ipf_state_list; ((is = *isp) != NULL); ) {
3760                               if ((proto != 0) && (is->is_v != proto)) {
3761                                         isp = &is->is_next;
3762                                         continue;
3763                               }
3764                               if (ipf_state_del(softc, is, ISL_FLUSH) == 0)
3765                                         removed++;
3766                               else
3767                                         isp = &is->is_next;
3768                     }
3769                     break;
3770 
3771           case 1 :
3772                     SBUMP(ipf_state_stats.iss_flush_closing);
3773                     /*
3774                      * Since we're only interested in things that are closing,
3775                      * we can start with the appropriate timeout queue.
3776                      */
3777                     for (ifq = softs->ipf_state_tcptq + IPF_TCPS_CLOSE_WAIT;
3778                          ifq != NULL; ifq = ifq->ifq_next) {
3779 
3780                               for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3781                                         tqn = tqe->tqe_next;
3782                                         is = tqe->tqe_parent;
3783                                         if (is->is_p != IPPROTO_TCP)
3784                                                   break;
3785                                         if (ipf_state_del(softc, is, ISL_FLUSH) == 0)
3786                                                   removed++;
3787                               }
3788                     }
3789 
3790                     /*
3791                      * Also need to look through the user defined queues.
3792                      */
3793                     for (ifq = softs->ipf_state_usertq; ifq != NULL;
3794                          ifq = ifq->ifq_next) {
3795                               for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3796                                         tqn = tqe->tqe_next;
3797                                         is = tqe->tqe_parent;
3798                                         if (is->is_p != IPPROTO_TCP)
3799                                                   continue;
3800 
3801                                         if ((is->is_state[0] > IPF_TCPS_ESTABLISHED) &&
3802                                             (is->is_state[1] > IPF_TCPS_ESTABLISHED)) {
3803                                                   if (ipf_state_del(softc, is,
3804                                                                         ISL_FLUSH) == 0)
3805                                                             removed++;
3806                                         }
3807                               }
3808                     }
3809                     break;
3810 
3811           case 2 :
3812                     break;
3813 
3814                     /*
3815                      * Args 5-11 correspond to flushing those particular states
3816                      * for TCP connections.
3817                      */
3818           case IPF_TCPS_CLOSE_WAIT :
3819           case IPF_TCPS_FIN_WAIT_1 :
3820           case IPF_TCPS_CLOSING :
3821           case IPF_TCPS_LAST_ACK :
3822           case IPF_TCPS_FIN_WAIT_2 :
3823           case IPF_TCPS_TIME_WAIT :
3824           case IPF_TCPS_CLOSED :
3825                     SBUMP(ipf_state_stats.iss_flush_queue);
3826                     tqn = softs->ipf_state_tcptq[which].ifq_head;
3827                     while (tqn != NULL) {
3828                               tqe = tqn;
3829                               tqn = tqe->tqe_next;
3830                               is = tqe->tqe_parent;
3831                               if (ipf_state_del(softc, is, ISL_FLUSH) == 0)
3832                                         removed++;
3833                     }
3834                     break;
3835 
3836           default :
3837                     if (which < 30)
3838                               break;
3839 
3840                     SBUMP(ipf_state_stats.iss_flush_state);
3841                     /*
3842                      * Take a large arbitrary number to mean the number of seconds
3843                      * for which which consider to be the maximum value we'll allow
3844                      * the expiration to be.
3845                      */
3846                     which = IPF_TTLVAL(which);
3847                     for (isp = &softs->ipf_state_list; ((is = *isp) != NULL); ) {
3848                               if ((proto == 0) || (is->is_v == proto)) {
3849                                         if (softc->ipf_ticks - is->is_touched > which) {
3850                                                   if (ipf_state_del(softc, is,
3851                                                                         ISL_FLUSH) == 0) {
3852                                                             removed++;
3853                                                             continue;
3854                                                   }
3855                                         }
3856                               }
3857                               isp = &is->is_next;
3858                     }
3859                     break;
3860           }
3861 
3862           if (which != 2) {
3863                     SPL_X(s);
3864                     return removed;
3865           }
3866 
3867           SBUMP(ipf_state_stats.iss_flush_timeout);
3868           /*
3869            * Asked to remove inactive entries because the table is full, try
3870            * again, 3 times, if first attempt failed with a different criteria
3871            * each time.  The order tried in must be in decreasing age.
3872            * Another alternative is to implement random drop and drop N entries
3873            * at random until N have been freed up.
3874            */
3875           if (softc->ipf_ticks - softs->ipf_state_wm_last >
3876               softs->ipf_state_wm_freq) {
3877                     removed = ipf_queueflush(softc, ipf_state_flush_entry,
3878                                                    softs->ipf_state_tcptq,
3879                                                    softs->ipf_state_usertq,
3880                                                    &softs->ipf_state_stats.iss_active,
3881                                                    softs->ipf_state_size,
3882                                                    softs->ipf_state_wm_low);
3883                     softs->ipf_state_wm_last = softc->ipf_ticks;
3884           }
3885 
3886           SPL_X(s);
3887           return removed;
3888 }
3889 
3890 
3891 /* ------------------------------------------------------------------------ */
3892 /* Function:    ipf_state_flush_entry                                       */
3893 /* Returns:     int - 0 = entry deleted, else not deleted                   */
3894 /* Parameters:  softc(I) - pointer to soft context main structure           */
3895 /*              entry(I)  - pointer to state structure to delete            */
3896 /* Write Locks: ipf_state                                                   */
3897 /*                                                                          */
3898 /* This function is a stepping stone between ipf_queueflush() and           */
3899 /* ipf_state_del().  It is used so we can provide a uniform interface via   */
3900 /* the ipf_queueflush() function.                                           */
3901 /* ------------------------------------------------------------------------ */
3902 static int
ipf_state_flush_entry(ipf_main_softc_t * softc,void * entry)3903 ipf_state_flush_entry(ipf_main_softc_t *softc, void *entry)
3904 {
3905           return ipf_state_del(softc, entry, ISL_FLUSH);
3906 }
3907 
3908 
3909 /* ------------------------------------------------------------------------ */
3910 /* Function:    ipf_tcp_age                                                 */
3911 /* Returns:     int - 1 == state transition made, 0 == no change (rejected) */
3912 /* Parameters:  tqe(I)   - pointer to timeout queue information             */
3913 /*              fin(I)   - pointer to packet information                    */
3914 /*              tqtab(I) - TCP timeout queue table this is in               */
3915 /*              flags(I) - flags from state/NAT entry                       */
3916 /*              ok(I)    - can we advance state                             */
3917 /*                                                                          */
3918 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29:          */
3919 /*                                                                          */
3920 /* - (try to) base state transitions on real evidence only,                 */
3921 /*   i.e. packets that are sent and have been received by ipfilter;         */
3922 /*   diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used.       */
3923 /*                                                                          */
3924 /* - deal with half-closed connections correctly;                           */
3925 /*                                                                          */
3926 /* - store the state of the source in state[0] such that ipfstat            */
3927 /*   displays the state as source/dest instead of dest/source; the calls    */
3928 /*   to ipf_tcp_age have been changed accordingly.                          */
3929 /*                                                                          */
3930 /* Internal Parameters:                                                     */
3931 /*                                                                          */
3932 /*    state[0] = state of source (host that initiated connection)           */
3933 /*    state[1] = state of dest   (host that accepted the connection)        */
3934 /*                                                                          */
3935 /*    dir == 0 : a packet from source to dest                               */
3936 /*    dir == 1 : a packet from dest to source                               */
3937 /*                                                                          */
3938 /* A typical procession for a connection is as follows:                     */
3939 /*                                                                          */
3940 /* +--------------+-------------------+                                     */
3941 /* | Side '0'     | Side '1'          |                                     */
3942 /* +--------------+-------------------+                                     */
3943 /* | 0 -> 1 (SYN) |                   |                                     */
3944 /* |              | 0 -> 2 (SYN-ACK)  |                                     */
3945 /* | 1 -> 3 (ACK) |                   |                                     */
3946 /* |              | 2 -> 4 (ACK-PUSH) |                                     */
3947 /* | 3 -> 4 (ACK) |                   |                                     */
3948 /* |   ...        |   ...             |                                     */
3949 /* |              | 4 -> 6 (FIN-ACK)  |                                     */
3950 /* | 4 -> 5 (ACK) |                   |                                     */
3951 /* |              | 6 -> 6 (ACK-PUSH) |                                     */
3952 /* | 5 -> 5 (ACK) |                   |                                     */
3953 /* | 5 -> 8 (FIN) |                   |                                     */
3954 /* |              | 6 -> 10 (ACK)     |                                     */
3955 /* +--------------+-------------------+                                     */
3956 /*                                                                          */
3957 /* Locking: it is assumed that the parent of the tqe structure is locked.   */
3958 /* ------------------------------------------------------------------------ */
3959 int
ipf_tcp_age(ipftqent_t * tqe,fr_info_t * fin,ipftq_t * tqtab,int flags,int ok)3960 ipf_tcp_age(ipftqent_t *tqe, fr_info_t *fin, ipftq_t *tqtab, int flags, int ok)
3961 {
3962           ipf_main_softc_t *softc = fin->fin_main_soft;
3963           int dlen, ostate, nstate, rval, dir;
3964           u_char tcpflags;
3965           tcphdr_t *tcp;
3966 
3967           tcp = fin->fin_dp;
3968 
3969           rval = 0;
3970           dir = fin->fin_rev;
3971           tcpflags = tcp->th_flags;
3972           dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2);
3973           ostate = tqe->tqe_state[1 - dir];
3974           nstate = tqe->tqe_state[dir];
3975 
3976           if (tcpflags & TH_RST) {
3977                     if (!(tcpflags & TH_PUSH) && !dlen)
3978                               nstate = IPF_TCPS_CLOSED;
3979                     else
3980                               nstate = IPF_TCPS_CLOSE_WAIT;
3981 
3982                     if (ostate <= IPF_TCPS_ESTABLISHED) {
3983                               tqe->tqe_state[1 - dir] = IPF_TCPS_CLOSE_WAIT;
3984                     }
3985                     rval = 1;
3986           } else {
3987                     switch (nstate)
3988                     {
3989                     case IPF_TCPS_LISTEN: /* 0 */
3990                               if ((tcpflags & TH_OPENING) == TH_OPENING) {
3991                                         /*
3992                                          * 'dir' received an S and sends SA in
3993                                          * response, LISTEN -> SYN_RECEIVED
3994                                          */
3995                                         nstate = IPF_TCPS_SYN_RECEIVED;
3996                                         rval = 1;
3997                               } else if ((tcpflags & TH_OPENING) == TH_SYN) {
3998                                         /* 'dir' sent S, LISTEN -> SYN_SENT */
3999                                         nstate = IPF_TCPS_SYN_SENT;
4000                                         rval = 1;
4001                               }
4002                               /*
4003                                * the next piece of code makes it possible to get
4004                                * already established connections into the state table
4005                                * after a restart or reload of the filter rules; this
4006                                * does not work when a strict 'flags S keep state' is
4007                                * used for tcp connections of course
4008                                */
4009                               if (((flags & IS_TCPFSM) == 0) &&
4010                                   ((tcpflags & TH_ACKMASK) == TH_ACK)) {
4011                                         /*
4012                                          * we saw an A, guess 'dir' is in ESTABLISHED
4013                                          * mode
4014                                          */
4015                                         switch (ostate)
4016                                         {
4017                                         case IPF_TCPS_LISTEN :
4018                                         case IPF_TCPS_SYN_RECEIVED :
4019                                                   nstate = IPF_TCPS_HALF_ESTAB;
4020                                                   rval = 1;
4021                                                   break;
4022                                         case IPF_TCPS_HALF_ESTAB :
4023                                         case IPF_TCPS_ESTABLISHED :
4024                                                   nstate = IPF_TCPS_ESTABLISHED;
4025                                                   rval = 1;
4026                                                   break;
4027                                         default :
4028                                                   break;
4029                                         }
4030                               }
4031                               /*
4032                                * TODO: besides regular ACK packets we can have other
4033                                * packets as well; it is yet to be determined how we
4034                                * should initialize the states in those cases
4035                                */
4036                               break;
4037 
4038                     case IPF_TCPS_SYN_SENT: /* 1 */
4039                               if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) {
4040                                         /*
4041                                          * A retransmitted SYN packet.  We do not reset
4042                                          * the timeout here to ipf_tcptimeout because a
4043                                          * connection connect timeout does not renew
4044                                          * after every packet that is sent.  We need to
4045                                          * set rval so as to indicate the packet has
4046                                          * passed the check for its flags being valid
4047                                          * in the TCP FSM.  Setting rval to 2 has the
4048                                          * result of not resetting the timeout.
4049                                          */
4050                                         rval = 2;
4051                               } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) ==
4052                                            TH_ACK) {
4053                                         /*
4054                                          * we see an A from 'dir' which is in SYN_SENT
4055                                          * state: 'dir' sent an A in response to an SA
4056                                          * which it received, SYN_SENT -> ESTABLISHED
4057                                          */
4058                                         nstate = IPF_TCPS_ESTABLISHED;
4059                                         rval = 1;
4060                               } else if (tcpflags & TH_FIN) {
4061                                         /*
4062                                          * we see an F from 'dir' which is in SYN_SENT
4063                                          * state and wants to close its side of the
4064                                          * connection; SYN_SENT -> FIN_WAIT_1
4065                                          */
4066                                         nstate = IPF_TCPS_FIN_WAIT_1;
4067                                         rval = 1;
4068                               } else if ((tcpflags & TH_OPENING) == TH_OPENING) {
4069                                         /*
4070                                          * we see an SA from 'dir' which is already in
4071                                          * SYN_SENT state, this means we have a
4072                                          * simultaneous open; SYN_SENT -> SYN_RECEIVED
4073                                          */
4074                                         nstate = IPF_TCPS_SYN_RECEIVED;
4075                                         rval = 1;
4076                               }
4077                               break;
4078 
4079                     case IPF_TCPS_SYN_RECEIVED: /* 2 */
4080                               if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) {
4081                                         /*
4082                                          * we see an A from 'dir' which was in
4083                                          * SYN_RECEIVED state so it must now be in
4084                                          * established state, SYN_RECEIVED ->
4085                                          * ESTABLISHED
4086                                          */
4087                                         nstate = IPF_TCPS_ESTABLISHED;
4088                                         rval = 1;
4089                               } else if ((tcpflags & ~(TH_ECN|TH_CWR)) ==
4090                                            TH_OPENING) {
4091                                         /*
4092                                          * We see an SA from 'dir' which is already in
4093                                          * SYN_RECEIVED state.
4094                                          */
4095                                         rval = 2;
4096                               } else if (tcpflags & TH_FIN) {
4097                                         /*
4098                                          * we see an F from 'dir' which is in
4099                                          * SYN_RECEIVED state and wants to close its
4100                                          * side of the connection; SYN_RECEIVED ->
4101                                          * FIN_WAIT_1
4102                                          */
4103                                         nstate = IPF_TCPS_FIN_WAIT_1;
4104                                         rval = 1;
4105                               }
4106                               break;
4107 
4108                     case IPF_TCPS_HALF_ESTAB: /* 3 */
4109                               if (tcpflags & TH_FIN) {
4110                                         nstate = IPF_TCPS_FIN_WAIT_1;
4111                                         rval = 1;
4112                               } else if ((tcpflags & TH_ACKMASK) == TH_ACK) {
4113                                         /*
4114                                          * If we've picked up a connection in mid
4115                                          * flight, we could be looking at a follow on
4116                                          * packet from the same direction as the one
4117                                          * that created this state.  Recognise it but
4118                                          * do not advance the entire connection's
4119                                          * state.
4120                                          */
4121                                         switch (ostate)
4122                                         {
4123                                         case IPF_TCPS_LISTEN :
4124                                         case IPF_TCPS_SYN_SENT :
4125                                         case IPF_TCPS_SYN_RECEIVED :
4126                                                   rval = 1;
4127                                                   break;
4128                                         case IPF_TCPS_HALF_ESTAB :
4129                                         case IPF_TCPS_ESTABLISHED :
4130                                                   nstate = IPF_TCPS_ESTABLISHED;
4131                                                   rval = 1;
4132                                                   break;
4133                                         default :
4134                                                   break;
4135                                         }
4136                               }
4137                               break;
4138 
4139                     case IPF_TCPS_ESTABLISHED: /* 4 */
4140                               rval = 1;
4141                               if (tcpflags & TH_FIN) {
4142                                         /*
4143                                          * 'dir' closed its side of the connection;
4144                                          * this gives us a half-closed connection;
4145                                          * ESTABLISHED -> FIN_WAIT_1
4146                                          */
4147                                         if (ostate == IPF_TCPS_FIN_WAIT_1) {
4148                                                   nstate = IPF_TCPS_CLOSING;
4149                                         } else {
4150                                                   nstate = IPF_TCPS_FIN_WAIT_1;
4151                                         }
4152                               } else if (tcpflags & TH_ACK) {
4153                                         /*
4154                                          * an ACK, should we exclude other flags here?
4155                                          */
4156                                         if (ostate == IPF_TCPS_FIN_WAIT_1) {
4157                                                   /*
4158                                                    * We know the other side did an active
4159                                                    * close, so we are ACKing the recvd
4160                                                    * FIN packet (does the window matching
4161                                                    * code guarantee this?) and go into
4162                                                    * CLOSE_WAIT state; this gives us a
4163                                                    * half-closed connection
4164                                                    */
4165                                                   nstate = IPF_TCPS_CLOSE_WAIT;
4166                                         } else if (ostate < IPF_TCPS_CLOSE_WAIT) {
4167                                                   /*
4168                                                    * still a fully established
4169                                                    * connection reset timeout
4170                                                    */
4171                                                   nstate = IPF_TCPS_ESTABLISHED;
4172                                         }
4173                               }
4174                               break;
4175 
4176                     case IPF_TCPS_CLOSE_WAIT: /* 5 */
4177                               rval = 1;
4178                               if (tcpflags & TH_FIN) {
4179                                         /*
4180                                          * application closed and 'dir' sent a FIN,
4181                                          * we're now going into LAST_ACK state
4182                                          */
4183                                         nstate = IPF_TCPS_LAST_ACK;
4184                               } else {
4185                                         /*
4186                                          * we remain in CLOSE_WAIT because the other
4187                                          * side has closed already and we did not
4188                                          * close our side yet; reset timeout
4189                                          */
4190                                         nstate = IPF_TCPS_CLOSE_WAIT;
4191                               }
4192                               break;
4193 
4194                     case IPF_TCPS_FIN_WAIT_1: /* 6 */
4195                               rval = 1;
4196                               if ((tcpflags & TH_ACK) &&
4197                                   ostate > IPF_TCPS_CLOSE_WAIT) {
4198                                         /*
4199                                          * if the other side is not active anymore
4200                                          * it has sent us a FIN packet that we are
4201                                          * ack'ing now with an ACK; this means both
4202                                          * sides have now closed the connection and
4203                                          * we go into TIME_WAIT
4204                                          */
4205                                         /*
4206                                          * XXX: how do we know we really are ACKing
4207                                          * the FIN packet here? does the window code
4208                                          * guarantee that?
4209                                          */
4210                                         nstate = IPF_TCPS_LAST_ACK;
4211                               } else {
4212                                         /*
4213                                          * we closed our side of the connection
4214                                          * already but the other side is still active
4215                                          * (ESTABLISHED/CLOSE_WAIT); continue with
4216                                          * this half-closed connection
4217                                          */
4218                                         nstate = IPF_TCPS_FIN_WAIT_1;
4219                               }
4220                               break;
4221 
4222                     case IPF_TCPS_CLOSING: /* 7 */
4223                               if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) {
4224                                         nstate = IPF_TCPS_TIME_WAIT;
4225                               }
4226                               rval = 1;
4227                               break;
4228 
4229                     case IPF_TCPS_LAST_ACK: /* 8 */
4230                               if (tcpflags & TH_ACK) {
4231                                         rval = 1;
4232                               }
4233                               /*
4234                                * we cannot detect when we go out of LAST_ACK state
4235                                * to CLOSED because that is based on the reception
4236                                * of ACK packets; ipfilter can only detect that a
4237                                * packet has been sent by a host
4238                                */
4239                               break;
4240 
4241                     case IPF_TCPS_FIN_WAIT_2: /* 9 */
4242                               /* NOT USED */
4243                               break;
4244 
4245                     case IPF_TCPS_TIME_WAIT: /* 10 */
4246                               /* we're in 2MSL timeout now */
4247                               if (ostate == IPF_TCPS_LAST_ACK) {
4248                                         nstate = IPF_TCPS_CLOSED;
4249                                         rval = 1;
4250                               } else {
4251                                         rval = 2;
4252                               }
4253                               break;
4254 
4255                     case IPF_TCPS_CLOSED: /* 11 */
4256                               rval = 2;
4257                               break;
4258 
4259                     default :
4260 #if !defined(_KERNEL)
4261                               abort();
4262 #endif
4263                               break;
4264                     }
4265           }
4266 
4267           /*
4268            * If rval == 2 then do not update the queue position, but treat the
4269            * packet as being ok.
4270            */
4271           if (rval == 2)
4272                     rval = 1;
4273           else if (rval == 1) {
4274                     if (ok)
4275                               tqe->tqe_state[dir] = nstate;
4276                     if ((tqe->tqe_flags & TQE_RULEBASED) == 0)
4277                               ipf_movequeue(softc->ipf_ticks, tqe, tqe->tqe_ifq,
4278                                               tqtab + nstate);
4279           }
4280 
4281           return rval;
4282 }
4283 
4284 
4285 /* ------------------------------------------------------------------------ */
4286 /* Function:    ipf_state_log                                               */
4287 /* Returns:     Nil                                                         */
4288 /* Parameters:  softc(I) - pointer to soft context main structure           */
4289 /*              is(I)    - pointer to state structure                       */
4290 /*              type(I)  - type of log entry to create                      */
4291 /*                                                                          */
4292 /* Creates a state table log entry using the state structure and type info. */
4293 /* passed in.  Log packet/byte counts, source/destination address and other */
4294 /* protocol specific information.                                           */
4295 /* ------------------------------------------------------------------------ */
4296 void
ipf_state_log(ipf_main_softc_t * softc,struct ipstate * is,u_int type)4297 ipf_state_log(ipf_main_softc_t *softc, struct ipstate *is, u_int type)
4298 {
4299 #ifdef    IPFILTER_LOG
4300           struct    ipslog    ipsl;
4301           size_t sizes[1];
4302           void *items[1];
4303           int types[1];
4304 
4305           /*
4306            * Copy information out of the ipstate_t structure and into the
4307            * structure used for logging.
4308            */
4309           ipsl.isl_type = type;
4310           ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0];
4311           ipsl.isl_bytes[0] = is->is_bytes[0];
4312           ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1];
4313           ipsl.isl_bytes[1] = is->is_bytes[1];
4314           ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2];
4315           ipsl.isl_bytes[2] = is->is_bytes[2];
4316           ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3];
4317           ipsl.isl_bytes[3] = is->is_bytes[3];
4318           ipsl.isl_src = is->is_src;
4319           ipsl.isl_dst = is->is_dst;
4320           ipsl.isl_p = is->is_p;
4321           ipsl.isl_v = is->is_v;
4322           ipsl.isl_flags = is->is_flags;
4323           ipsl.isl_tag = is->is_tag;
4324           ipsl.isl_rulen = is->is_rulen;
4325           (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN);
4326 
4327           if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) {
4328                     ipsl.isl_sport = is->is_sport;
4329                     ipsl.isl_dport = is->is_dport;
4330                     if (ipsl.isl_p == IPPROTO_TCP) {
4331                               ipsl.isl_state[0] = is->is_state[0];
4332                               ipsl.isl_state[1] = is->is_state[1];
4333                     }
4334           } else if (ipsl.isl_p == IPPROTO_ICMP) {
4335                     ipsl.isl_itype = is->is_icmp.ici_type;
4336           } else if (ipsl.isl_p == IPPROTO_ICMPV6) {
4337                     ipsl.isl_itype = is->is_icmp.ici_type;
4338           } else {
4339                     ipsl.isl_ps.isl_filler[0] = 0;
4340                     ipsl.isl_ps.isl_filler[1] = 0;
4341           }
4342 
4343           items[0] = &ipsl;
4344           sizes[0] = sizeof(ipsl);
4345           types[0] = 0;
4346 
4347           (void) ipf_log_items(softc, IPL_LOGSTATE, NULL, items, sizes, types, 1);
4348 #endif
4349 }
4350 
4351 
4352 #ifdef    USE_INET6
4353 /* ------------------------------------------------------------------------ */
4354 /* Function:    ipf_checkicmp6matchingstate                                 */
4355 /* Returns:     ipstate_t* - NULL == no match found,                        */
4356 /*                           else  pointer to matching state entry          */
4357 /* Parameters:  fin(I) - pointer to packet information                      */
4358 /* Locks:       NULL == no locks, else Read Lock on ipf_state               */
4359 /*                                                                          */
4360 /* If we've got an ICMPv6 error message, using the information stored in    */
4361 /* the ICMPv6 packet, look for a matching state table entry.                */
4362 /* ------------------------------------------------------------------------ */
4363 static ipstate_t *
ipf_checkicmp6matchingstate(fr_info_t * fin)4364 ipf_checkicmp6matchingstate(fr_info_t *fin)
4365 {
4366           ipf_main_softc_t *softc = fin->fin_main_soft;
4367           ipf_state_softc_t *softs = softc->ipf_state_soft;
4368           struct icmp6_hdr *ic6, *oic;
4369           ipstate_t *is, **isp;
4370           u_short sport, dport;
4371           i6addr_t dst, src;
4372           u_short savelen;
4373           icmpinfo_t *ic;
4374           fr_info_t ofin;
4375           tcphdr_t *tcp;
4376           ip6_t *oip6;
4377           u_char pr;
4378           u_int hv;
4379 
4380           /*
4381            * Does it at least have the return (basic) IP header ?
4382            * Is it an actual recognised ICMP error type?
4383            * Only a basic IP header (no options) should be with
4384            * an ICMP error header.
4385            */
4386           if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) ||
4387               !(fin->fin_flx & FI_ICMPERR)) {
4388                     SBUMPD(ipf_state_stats, iss_icmp_bad);
4389                     return NULL;
4390           }
4391 
4392           ic6 = fin->fin_dp;
4393 
4394           oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN);
4395           if (fin->fin_plen < sizeof(*oip6)) {
4396                     SBUMPD(ipf_state_stats, iss_icmp_short);
4397                     return NULL;
4398           }
4399 
4400           bcopy((char *)fin, (char *)&ofin, sizeof(*fin));
4401           ofin.fin_v = 6;
4402           ofin.fin_ifp = fin->fin_ifp;
4403           ofin.fin_out = !fin->fin_out;
4404           ofin.fin_m = NULL;  /* if dereferenced, panic XXX */
4405           ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
4406 
4407           /*
4408            * We make a fin entry to be able to feed it to
4409            * matchsrcdst. Note that not all fields are necessary
4410            * but this is the cleanest way. Note further we fill
4411            * in fin_mp such that if someone uses it we'll get
4412            * a kernel panic. ipf_matchsrcdst does not use this.
4413            *
4414            * watch out here, as ip is in host order and oip6 in network
4415            * order. Any change we make must be undone afterwards.
4416            */
4417           savelen = oip6->ip6_plen;
4418           oip6->ip6_plen = htons(fin->fin_dlen - ICMPERR_ICMPHLEN);
4419           ofin.fin_flx = FI_NOCKSUM;
4420           ofin.fin_ip = (ip_t *)oip6;
4421           (void) ipf_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin);
4422           ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
4423           oip6->ip6_plen = savelen;
4424           pr = ofin.fin_p;
4425 
4426           /*
4427            * an ICMP error can never generate an ICMP error in response.
4428            */
4429           if (ofin.fin_flx & FI_ICMPERR) {
4430                     DT1(iss_icmp6_icmperr, fr_info_t *, &ofin);
4431                     SBUMP(ipf_state_stats.iss_icmp6_icmperr);
4432                     return NULL;
4433           }
4434 
4435           if (oip6->ip6_nxt == IPPROTO_ICMPV6) {
4436                     oic = ofin.fin_dp;
4437                     /*
4438                      * an ICMP error can only be generated as a result of an
4439                      * ICMP query, not as the response on an ICMP error
4440                      *
4441                      * XXX theoretically ICMP_ECHOREP and the other reply's are
4442                      * ICMP query's as well, but adding them here seems strange XXX
4443                      */
4444                      if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) {
4445                               DT1(iss_icmp6_notinfo, fr_info_t *, &ofin);
4446                               SBUMP(ipf_state_stats.iss_icmp6_notinfo);
4447                               return NULL;
4448                     }
4449 
4450                     /*
4451                      * perform a lookup of the ICMP packet in the state table
4452                      */
4453                     hv = (pr = oip6->ip6_nxt);
4454                     src.in6 = oip6->ip6_src;
4455                     hv += src.in4.s_addr;
4456                     dst.in6 = oip6->ip6_dst;
4457                     hv += dst.in4.s_addr;
4458                     hv += oic->icmp6_id;
4459                     hv += oic->icmp6_seq;
4460                     hv = DOUBLE_HASH(hv);
4461 
4462                     READ_ENTER(&softc->ipf_state);
4463                     for (isp = &softs->ipf_state_table[hv];
4464                          ((is = *isp) != NULL); ) {
4465                               ic = &is->is_icmp;
4466                               isp = &is->is_hnext;
4467                               if ((is->is_p == pr) &&
4468                                   !(is->is_pass & FR_NOICMPERR) &&
4469                                   (oic->icmp6_id == ic->ici_id) &&
4470                                   (oic->icmp6_seq == ic->ici_seq) &&
4471                                   (is = ipf_matchsrcdst(&ofin, is, &src,
4472                                                              &dst, NULL, FI_ICMPCMP))) {
4473                                         /*
4474                                          * in the state table ICMP query's are stored
4475                                          * with the type of the corresponding ICMP
4476                                          * response. Correct here
4477                                          */
4478                                         if (((ic->ici_type == ICMP6_ECHO_REPLY) &&
4479                                              (oic->icmp6_type == ICMP6_ECHO_REQUEST)) ||
4480                                              (ic->ici_type - 1 == oic->icmp6_type )) {
4481                                                   if (!ipf_allowstateicmp(fin, is, &src))
4482                                                             return is;
4483                                         }
4484                               }
4485                     }
4486                     RWLOCK_EXIT(&softc->ipf_state);
4487                     SBUMPD(ipf_state_stats, iss_icmp6_miss);
4488                     return NULL;
4489           }
4490 
4491           hv = (pr = oip6->ip6_nxt);
4492           src.in6 = oip6->ip6_src;
4493           hv += src.i6[0];
4494           hv += src.i6[1];
4495           hv += src.i6[2];
4496           hv += src.i6[3];
4497           dst.in6 = oip6->ip6_dst;
4498           hv += dst.i6[0];
4499           hv += dst.i6[1];
4500           hv += dst.i6[2];
4501           hv += dst.i6[3];
4502 
4503           tcp = NULL;
4504 
4505           switch (oip6->ip6_nxt)
4506           {
4507           case IPPROTO_TCP :
4508           case IPPROTO_UDP :
4509                     tcp = (tcphdr_t *)(oip6 + 1);
4510                     dport = tcp->th_dport;
4511                     sport = tcp->th_sport;
4512                     hv += dport;
4513                     hv += sport;
4514                     break;
4515 
4516           case IPPROTO_ICMPV6 :
4517                     oic = (struct icmp6_hdr *)(oip6 + 1);
4518                     hv += oic->icmp6_id;
4519                     hv += oic->icmp6_seq;
4520                     break;
4521 
4522           default :
4523                     break;
4524           }
4525 
4526           hv = DOUBLE_HASH(hv);
4527 
4528           READ_ENTER(&softc->ipf_state);
4529           for (isp = &softs->ipf_state_table[hv]; ((is = *isp) != NULL); ) {
4530                     isp = &is->is_hnext;
4531                     /*
4532                      * Only allow this icmp though if the
4533                      * encapsulated packet was allowed through the
4534                      * other way around. Note that the minimal amount
4535                      * of info present does not allow for checking against
4536                      * tcp internals such as seq and ack numbers.
4537                      */
4538                     if ((is->is_p != pr) || (is->is_v != 6) ||
4539                         (is->is_pass & FR_NOICMPERR))
4540                               continue;
4541                     is = ipf_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP);
4542                     if ((is != NULL) && (ipf_allowstateicmp(fin, is, &src) == 0))
4543                               return is;
4544           }
4545           RWLOCK_EXIT(&softc->ipf_state);
4546           SBUMPD(ipf_state_stats, iss_icmp_miss);
4547           return NULL;
4548 }
4549 #endif
4550 
4551 
4552 /* ------------------------------------------------------------------------ */
4553 /* Function:    ipf_sttab_init                                              */
4554 /* Returns:     Nil                                                         */
4555 /* Parameters:  softc(I) - pointer to soft context main structure           */
4556 /*              tqp(I)   - pointer to an array of timeout queues for TCP    */
4557 /*                                                                          */
4558 /* Initialise the array of timeout queues for TCP.                          */
4559 /* ------------------------------------------------------------------------ */
4560 void
ipf_sttab_init(ipf_main_softc_t * softc,ipftq_t * tqp)4561 ipf_sttab_init(ipf_main_softc_t *softc, ipftq_t *tqp)
4562 {
4563           int i;
4564 
4565           for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) {
4566                     IPFTQ_INIT(&tqp[i], 0, "ipftq tcp tab");
4567                     tqp[i].ifq_next = tqp + i + 1;
4568           }
4569           tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL;
4570           tqp[IPF_TCPS_CLOSED].ifq_ttl = softc->ipf_tcpclosed;
4571           tqp[IPF_TCPS_LISTEN].ifq_ttl = softc->ipf_tcptimeout;
4572           tqp[IPF_TCPS_SYN_SENT].ifq_ttl = softc->ipf_tcpsynsent;
4573           tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = softc->ipf_tcpsynrecv;
4574           tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = softc->ipf_tcpidletimeout;
4575           tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = softc->ipf_tcphalfclosed;
4576           tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = softc->ipf_tcphalfclosed;
4577           tqp[IPF_TCPS_CLOSING].ifq_ttl = softc->ipf_tcptimeout;
4578           tqp[IPF_TCPS_LAST_ACK].ifq_ttl = softc->ipf_tcplastack;
4579           tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = softc->ipf_tcpclosewait;
4580           tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = softc->ipf_tcptimewait;
4581           tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = softc->ipf_tcptimeout;
4582 }
4583 
4584 
4585 /* ------------------------------------------------------------------------ */
4586 /* Function:    ipf_sttab_destroy                                           */
4587 /* Returns:     Nil                                                         */
4588 /* Parameters:  tqp(I) - pointer to an array of timeout queues for TCP      */
4589 /*                                                                          */
4590 /* Do whatever is necessary to "destroy" each of the entries in the array   */
4591 /* of timeout queues for TCP.                                               */
4592 /* ------------------------------------------------------------------------ */
4593 void
ipf_sttab_destroy(ipftq_t * tqp)4594 ipf_sttab_destroy(ipftq_t *tqp)
4595 {
4596           int i;
4597 
4598           for (i = IPF_TCP_NSTATES - 1; i >= 0; i--)
4599                     MUTEX_DESTROY(&tqp[i].ifq_lock);
4600 }
4601 
4602 
4603 /* ------------------------------------------------------------------------ */
4604 /* Function:    ipf_state_deref                                             */
4605 /* Returns:     Nil                                                         */
4606 /* Parameters:  softc(I) - pointer to soft context main structure           */
4607 /*              isp(I) - pointer to pointer to state table entry            */
4608 /*                                                                          */
4609 /* Decrement the reference counter for this state table entry and free it   */
4610 /* if there are no more things using it.                                    */
4611 /*                                                                          */
4612 /* This function is only called when cleaning up after increasing is_ref by */
4613 /* one earlier in the 'code path' so if is_ref is 1 when entering, we do    */
4614 /* have an orphan, otherwise not.  However there is a possible race between */
4615 /* the entry being deleted via flushing with an ioctl call (that calls the  */
4616 /* delete function directly) and the tail end of packet processing so we    */
4617 /* need to grab is_lock before doing the check to synchronise the two code  */
4618 /* paths.                                                                   */
4619 /*                                                                          */
4620 /* When operating in userland (ipftest), we have no timers to clear a state */
4621 /* entry.  Therefore, we make a few simple tests before deleting an entry   */
4622 /* outright.  We compare states on each side looking for a combination of   */
4623 /* TIME_WAIT (should really be FIN_WAIT_2?) and LAST_ACK.  Then we factor   */
4624 /* in packet direction with the interface list to make sure we don't        */
4625 /* prematurely delete an entry on a final inbound packet that's we're also  */
4626 /* supposed to route elsewhere.                                             */
4627 /*                                                                          */
4628 /* Internal parameters:                                                     */
4629 /*    state[0] = state of source (host that initiated connection)           */
4630 /*    state[1] = state of dest   (host that accepted the connection)        */
4631 /*                                                                          */
4632 /*    dir == 0 : a packet from source to dest                               */
4633 /*    dir == 1 : a packet from dest to source                               */
4634 /* ------------------------------------------------------------------------ */
4635 void
ipf_state_deref(ipf_main_softc_t * softc,ipstate_t ** isp)4636 ipf_state_deref(ipf_main_softc_t *softc, ipstate_t **isp)
4637 {
4638           ipstate_t *is = *isp;
4639 
4640           is = *isp;
4641           *isp = NULL;
4642 
4643           MUTEX_ENTER(&is->is_lock);
4644           if (is->is_ref > 1) {
4645                     is->is_ref--;
4646                     MUTEX_EXIT(&is->is_lock);
4647 #ifndef   _KERNEL
4648                     if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) ||
4649                         (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) {
4650                               ipf_state_del(softc, is, ISL_EXPIRE);
4651                     }
4652 #endif
4653                     return;
4654           }
4655           MUTEX_EXIT(&is->is_lock);
4656 
4657           WRITE_ENTER(&softc->ipf_state);
4658           ipf_state_del(softc, is, ISL_ORPHAN);
4659           RWLOCK_EXIT(&softc->ipf_state);
4660 }
4661 
4662 
4663 /* ------------------------------------------------------------------------ */
4664 /* Function:    ipf_state_setqueue                                          */
4665 /* Returns:     Nil                                                         */
4666 /* Parameters:  softc(I) - pointer to soft context main structure           */
4667 /*              is(I)    - pointer to state structure                       */
4668 /*              rev(I)   - forward(0) or reverse(1) direction               */
4669 /* Locks:       ipf_state (read or write)                                   */
4670 /*                                                                          */
4671 /* Put the state entry on its default queue entry, using rev as a helped in */
4672 /* determining which queue it should be placed on.                          */
4673 /* ------------------------------------------------------------------------ */
4674 void
ipf_state_setqueue(ipf_main_softc_t * softc,ipstate_t * is,int rev)4675 ipf_state_setqueue(ipf_main_softc_t *softc, ipstate_t *is, int rev)
4676 {
4677           ipf_state_softc_t *softs = softc->ipf_state_soft;
4678           ipftq_t *oifq, *nifq;
4679 
4680           if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0)
4681                     nifq = is->is_tqehead[rev];
4682           else
4683                     nifq = NULL;
4684 
4685           if (nifq == NULL) {
4686                     switch (is->is_p)
4687                     {
4688 #ifdef USE_INET6
4689                     case IPPROTO_ICMPV6 :
4690                               if (rev == 1)
4691                                         nifq = &softs->ipf_state_icmpacktq;
4692                               else
4693                                         nifq = &softs->ipf_state_icmptq;
4694                               break;
4695 #endif
4696                     case IPPROTO_ICMP :
4697                               if (rev == 1)
4698                                         nifq = &softs->ipf_state_icmpacktq;
4699                               else
4700                                         nifq = &softs->ipf_state_icmptq;
4701                               break;
4702                     case IPPROTO_TCP :
4703                               nifq = softs->ipf_state_tcptq + is->is_state[rev];
4704                               break;
4705 
4706                     case IPPROTO_UDP :
4707                               if (rev == 1)
4708                                         nifq = &softs->ipf_state_udpacktq;
4709                               else
4710                                         nifq = &softs->ipf_state_udptq;
4711                               break;
4712 
4713                     default :
4714                               nifq = &softs->ipf_state_iptq;
4715                               break;
4716                     }
4717           }
4718 
4719           oifq = is->is_sti.tqe_ifq;
4720           /*
4721            * If it's currently on a timeout queue, move it from one queue to
4722            * another, else put it on the end of the newly determined queue.
4723            */
4724           if (oifq != NULL)
4725                     ipf_movequeue(softc->ipf_ticks, &is->is_sti, oifq, nifq);
4726           else
4727                     ipf_queueappend(softc->ipf_ticks, &is->is_sti, nifq, is);
4728           return;
4729 }
4730 
4731 
4732 /* ------------------------------------------------------------------------ */
4733 /* Function:    ipf_state_iter                                              */
4734 /* Returns:     int - 0 == success, else error                              */
4735 /* Parameters:  softc(I) - pointer to main soft context                     */
4736 /*              token(I) - pointer to ipftoken structure                    */
4737 /*              itp(I)   - pointer to ipfgeniter structure                  */
4738 /*              obj(I)   - pointer to data description structure            */
4739 /*                                                                          */
4740 /* This function handles the SIOCGENITER ioctl for the state tables and     */
4741 /* walks through the list of entries in the state table list (softs->ipf_state_list.)    */
4742 /* ------------------------------------------------------------------------ */
4743 static int
ipf_state_iter(ipf_main_softc_t * softc,ipftoken_t * token,ipfgeniter_t * itp,ipfobj_t * obj)4744 ipf_state_iter(ipf_main_softc_t *softc, ipftoken_t *token, ipfgeniter_t *itp,
4745     ipfobj_t *obj)
4746 {
4747           ipf_state_softc_t *softs = softc->ipf_state_soft;
4748           ipstate_t *is, *next, zero;
4749           int error;
4750 
4751           if (itp->igi_data == NULL) {
4752                     IPFERROR(100026);
4753                     return EFAULT;
4754           }
4755 
4756           if (itp->igi_nitems < 1) {
4757                     IPFERROR(100027);
4758                     return ENOSPC;
4759           }
4760 
4761           if (itp->igi_type != IPFGENITER_STATE) {
4762                     IPFERROR(100028);
4763                     return EINVAL;
4764           }
4765 
4766           is = token->ipt_data;
4767           if (is == (void *)-1) {
4768                     IPFERROR(100029);
4769                     return ESRCH;
4770           }
4771 
4772           error = 0;
4773           obj->ipfo_type = IPFOBJ_IPSTATE;
4774           obj->ipfo_size = sizeof(ipstate_t);
4775 
4776           READ_ENTER(&softc->ipf_state);
4777 
4778           is = token->ipt_data;
4779           if (is == NULL) {
4780                     next = softs->ipf_state_list;
4781           } else {
4782                     next = is->is_next;
4783           }
4784 
4785           /*
4786            * If we find a state entry to use, bump its reference count so that
4787            * it can be used for is_next when we come back.
4788            */
4789           if (next != NULL) {
4790                     MUTEX_ENTER(&next->is_lock);
4791                     next->is_ref++;
4792                     MUTEX_EXIT(&next->is_lock);
4793                     token->ipt_data = next;
4794           } else {
4795                     bzero(&zero, sizeof(zero));
4796                     next = &zero;
4797                     token->ipt_data = NULL;
4798           }
4799           if (next->is_next == NULL)
4800                     ipf_token_mark_complete(token);
4801 
4802           RWLOCK_EXIT(&softc->ipf_state);
4803 
4804           obj->ipfo_ptr = itp->igi_data;
4805           error = ipf_outobjk(softc, obj, next);
4806           if (is != NULL)
4807                     ipf_state_deref(softc, &is);
4808 
4809           return error;
4810 }
4811 
4812 
4813 /* ------------------------------------------------------------------------ */
4814 /* Function:    ipf_state_gettable                                          */
4815 /* Returns:     int     - 0 = success, else error                           */
4816 /* Parameters:  softc(I) - pointer to main soft context                     */
4817 /*              softs(I) - pointer to state context structure               */
4818 /*              data(I)  - pointer to ioctl data                             */
4819 /*                                                                          */
4820 /* This function handles ioctl requests for tables of state information.    */
4821 /* At present the only table it deals with is the hash bucket statistics.   */
4822 /* ------------------------------------------------------------------------ */
4823 static int
ipf_state_gettable(ipf_main_softc_t * softc,ipf_state_softc_t * softs,char * data)4824 ipf_state_gettable(ipf_main_softc_t *softc, ipf_state_softc_t *softs,
4825     char *data)
4826 {
4827           ipftable_t table;
4828           int error;
4829 
4830           error = ipf_inobj(softc, data, NULL, &table, IPFOBJ_GTABLE);
4831           if (error != 0)
4832                     return error;
4833 
4834           if (table.ita_type != IPFTABLE_BUCKETS) {
4835                     IPFERROR(100031);
4836                     return EINVAL;
4837           }
4838 
4839           error = COPYOUT(softs->ipf_state_stats.iss_bucketlen, table.ita_table,
4840                               softs->ipf_state_size * sizeof(u_int));
4841           if (error != 0) {
4842                     IPFERROR(100032);
4843                     error = EFAULT;
4844           }
4845           return error;
4846 }
4847 
4848 
4849 /* ------------------------------------------------------------------------ */
4850 /* Function:    ipf_state_setpending                                        */
4851 /* Returns:     Nil                                                         */
4852 /* Parameters:  softc(I) - pointer to main soft context                     */
4853 /*              is(I)    - pointer to state structure                       */
4854 /* Locks:       ipf_state (read or write)                                   */
4855 /*                                                                          */
4856 /* Put the state entry on to the pending queue - this queue has a very      */
4857 /* short lifetime where items are put that can't be deleted straight away   */
4858 /* because of locking issues but we want to delete them ASAP, anyway.       */
4859 /* ------------------------------------------------------------------------ */
4860 void
ipf_state_setpending(ipf_main_softc_t * softc,ipstate_t * is)4861 ipf_state_setpending(ipf_main_softc_t *softc, ipstate_t *is)
4862 {
4863           ipf_state_softc_t *softs = softc->ipf_state_soft;
4864           ipftq_t *oifq;
4865 
4866           oifq = is->is_sti.tqe_ifq;
4867           if (oifq != NULL)
4868                     ipf_movequeue(softc->ipf_ticks, &is->is_sti, oifq,
4869                                     &softs->ipf_state_pending);
4870           else
4871                     ipf_queueappend(softc->ipf_ticks, &is->is_sti,
4872                                         &softs->ipf_state_pending, is);
4873 
4874           MUTEX_ENTER(&is->is_lock);
4875           if (is->is_me != NULL) {
4876                     *is->is_me = NULL;
4877                     is->is_me = NULL;
4878                     is->is_ref--;
4879           }
4880           MUTEX_EXIT(&is->is_lock);
4881 }
4882 
4883 
4884 /* ------------------------------------------------------------------------ */
4885 /* Function:    ipf_state_matchflush                                        */
4886 /* Returns:     Nil                                                         */
4887 /* Parameters:  softc(I) - pointer to main soft context                     */
4888 /*              data(I)  - pointer to state structure                       */
4889 /* Locks:       ipf_state (read or write)                                   */
4890 /*                                                                          */
4891 /* Flush all entries from the list of state entries that match the          */
4892 /* properties in the array loaded.                                          */
4893 /* ------------------------------------------------------------------------ */
4894 int
ipf_state_matchflush(ipf_main_softc_t * softc,void * data)4895 ipf_state_matchflush(ipf_main_softc_t *softc, void *data)
4896 {
4897           ipf_state_softc_t *softs = softc->ipf_state_soft;
4898           int *array, flushed, error;
4899           ipstate_t *state, *statenext;
4900           ipfobj_t obj;
4901 
4902           error = ipf_matcharray_load(softc, data, &obj, &array);
4903           if (error != 0)
4904                     return error;
4905 
4906           flushed = 0;
4907 
4908           for (state = softs->ipf_state_list; state != NULL; state = statenext) {
4909                     statenext = state->is_next;
4910                     if (ipf_state_matcharray(state, array, softc->ipf_ticks) == 0) {
4911                               ipf_state_del(softc, state, ISL_FLUSH);
4912                               flushed++;
4913                     }
4914           }
4915 
4916           obj.ipfo_retval = flushed;
4917           error = BCOPYOUT(&obj, data, sizeof(obj));
4918 
4919           KFREES(array, array[0] * sizeof(*array));
4920 
4921           return error;
4922 }
4923 
4924 
4925 /* ------------------------------------------------------------------------ */
4926 /* Function:    ipf_state_matcharray                                        */
4927 /* Returns:     int   - 0 = no match, 1 = match                             */
4928 /* Parameters:  state(I) - pointer to state structure                       */
4929 /*              array(I) - pointer to ipf matching expression               */
4930 /*              ticks(I) - current value of ipfilter tick timer             */
4931 /* Locks:       ipf_state (read or write)                                   */
4932 /*                                                                          */
4933 /* Compare a state entry with the match array passed in and return a value  */
4934 /* to indicate whether or not the matching was successful.                  */
4935 /* ------------------------------------------------------------------------ */
4936 static int
ipf_state_matcharray(ipstate_t * state,int * array,u_long ticks)4937 ipf_state_matcharray(ipstate_t *state, int *array, u_long ticks)
4938 {
4939           int i, n, *x, rv, p;
4940           ipfexp_t *e;
4941 
4942           rv = 0;
4943           n = array[0];
4944           x = array + 1;
4945 
4946           for (; n > 0; x += 3 + x[3], rv = 0) {
4947                     e = (ipfexp_t *)x;
4948                     n -= e->ipfe_size;
4949                     if (x[0] == IPF_EXP_END)
4950                               break;
4951 
4952                     /*
4953                      * If we need to match the protocol and that doesn't match,
4954                      * don't even both with the instruction array.
4955                      */
4956                     p = e->ipfe_cmd >> 16;
4957                     if ((p != 0) && (p != state->is_p))
4958                               break;
4959 
4960                     switch (e->ipfe_cmd)
4961                     {
4962                     case IPF_EXP_IP_PR :
4963                               for (i = 0; !rv && i < e->ipfe_narg; i++) {
4964                                         rv |= (state->is_p == e->ipfe_arg0[i]);
4965                               }
4966                               break;
4967 
4968                     case IPF_EXP_IP_SRCADDR :
4969                               if (state->is_v != 4)
4970                                         break;
4971                               for (i = 0; !rv && i < e->ipfe_narg; i++) {
4972                                         rv |= ((state->is_saddr &
4973                                                   e->ipfe_arg0[i * 2 + 1]) ==
4974                                               e->ipfe_arg0[i * 2]);
4975                               }
4976                               break;
4977 
4978                     case IPF_EXP_IP_DSTADDR :
4979                               if (state->is_v != 4)
4980                                         break;
4981                               for (i = 0; !rv && i < e->ipfe_narg; i++) {
4982                                         rv |= ((state->is_daddr &
4983                                                   e->ipfe_arg0[i * 2 + 1]) ==
4984                                                e->ipfe_arg0[i * 2]);
4985                               }
4986                               break;
4987 
4988                     case IPF_EXP_IP_ADDR :
4989                               if (state->is_v != 4)
4990                                         break;
4991                               for (i = 0; !rv && i < e->ipfe_narg; i++) {
4992                                         rv |= ((state->is_saddr &
4993                                                   e->ipfe_arg0[i * 2 + 1]) ==
4994                                                e->ipfe_arg0[i * 2]) ||
4995                                                ((state->is_daddr &
4996                                                   e->ipfe_arg0[i * 2 + 1]) ==
4997                                                e->ipfe_arg0[i * 2]);
4998                               }
4999                               break;
5000 
5001 #ifdef USE_INET6
5002                     case IPF_EXP_IP6_SRCADDR :
5003                               if (state->is_v != 6)
5004                                         break;
5005                               for (i = 0; !rv && i < x[3]; i++) {
5006                                         rv |= IP6_MASKEQ(&state->is_src.in6,
5007                                                              &e->ipfe_arg0[i * 8 + 4],
5008                                                              &e->ipfe_arg0[i * 8]);
5009                               }
5010                               break;
5011 
5012                     case IPF_EXP_IP6_DSTADDR :
5013                               if (state->is_v != 6)
5014                                         break;
5015                               for (i = 0; !rv && i < x[3]; i++) {
5016                                         rv |= IP6_MASKEQ(&state->is_dst.in6,
5017                                                              &e->ipfe_arg0[i * 8 + 4],
5018                                                              &e->ipfe_arg0[i * 8]);
5019                               }
5020                               break;
5021 
5022                     case IPF_EXP_IP6_ADDR :
5023                               if (state->is_v != 6)
5024                                         break;
5025                               for (i = 0; !rv && i < x[3]; i++) {
5026                                         rv |= IP6_MASKEQ(&state->is_src.in6,
5027                                                              &e->ipfe_arg0[i * 8 + 4],
5028                                                              &e->ipfe_arg0[i * 8]) ||
5029                                               IP6_MASKEQ(&state->is_dst.in6,
5030                                                              &e->ipfe_arg0[i * 8 + 4],
5031                                                              &e->ipfe_arg0[i * 8]);
5032                               }
5033                               break;
5034 #endif
5035 
5036                     case IPF_EXP_UDP_PORT :
5037                     case IPF_EXP_TCP_PORT :
5038                               for (i = 0; !rv && i < e->ipfe_narg; i++) {
5039                                         rv |= (state->is_sport == e->ipfe_arg0[i]) ||
5040                                               (state->is_dport == e->ipfe_arg0[i]);
5041                               }
5042                               break;
5043 
5044                     case IPF_EXP_UDP_SPORT :
5045                     case IPF_EXP_TCP_SPORT :
5046                               for (i = 0; !rv && i < e->ipfe_narg; i++) {
5047                                         rv |= (state->is_sport == e->ipfe_arg0[i]);
5048                               }
5049                               break;
5050 
5051                     case IPF_EXP_UDP_DPORT :
5052                     case IPF_EXP_TCP_DPORT :
5053                               for (i = 0; !rv && i < e->ipfe_narg; i++) {
5054                                         rv |= (state->is_dport == e->ipfe_arg0[i]);
5055                               }
5056                               break;
5057 
5058                     case IPF_EXP_TCP_STATE :
5059                               for (i = 0; !rv && i < e->ipfe_narg; i++) {
5060                                         rv |= (state->is_state[0] == e->ipfe_arg0[i]) ||
5061                                               (state->is_state[1] == e->ipfe_arg0[i]);
5062                               }
5063                               break;
5064 
5065                     case IPF_EXP_IDLE_GT :
5066                               rv |= (ticks - state->is_touched > e->ipfe_arg0[0]);
5067                               break;
5068                     }
5069 
5070                     /*
5071                      * Factor in doing a negative match.
5072                      */
5073                     rv ^= e->ipfe_not;
5074 
5075                     if (rv == 0)
5076                               break;
5077           }
5078 
5079           return rv;
5080 }
5081 
5082 
5083 /* ------------------------------------------------------------------------ */
5084 /* Function:    ipf_state_settimeout                                        */
5085 /* Returns:     int 0 = success, else failure                               */
5086 /* Parameters:  softc(I)  - pointer to main soft context                    */
5087 /*              t(I)      - pointer to tuneable being changed               */
5088 /*              p(I)      - pointer to the new value                        */
5089 /*                                                                          */
5090 /* Sets a timeout value for one of the many timeout queues.  We find the    */
5091 /* correct queue using a somewhat manual process of comparing the timeout   */
5092 /* names for each specific value available and calling ipf_apply_timeout on */
5093 /* that queue so that all of the items on it are updated accordingly.       */
5094 /* ------------------------------------------------------------------------ */
5095 int
ipf_state_settimeout(struct ipf_main_softc_s * softc,ipftuneable_t * t,ipftuneval_t * p)5096 ipf_state_settimeout(struct ipf_main_softc_s *softc, ipftuneable_t *t,
5097     ipftuneval_t *p)
5098 {
5099           ipf_state_softc_t *softs = softc->ipf_state_soft;
5100 
5101           /*
5102            * In case there is nothing to do...
5103            */
5104           if (*t->ipft_pint == p->ipftu_int)
5105                     return 0;
5106 
5107           if (!strncmp(t->ipft_name, "tcp_", 4))
5108                     return ipf_settimeout_tcp(t, p, softs->ipf_state_tcptq);
5109 
5110           if (!strcmp(t->ipft_name, "udp_timeout")) {
5111                     ipf_apply_timeout(&softs->ipf_state_udptq, p->ipftu_int);
5112           } else if (!strcmp(t->ipft_name, "udp_ack_timeout")) {
5113                     ipf_apply_timeout(&softs->ipf_state_udpacktq, p->ipftu_int);
5114           } else if (!strcmp(t->ipft_name, "icmp_timeout")) {
5115                     ipf_apply_timeout(&softs->ipf_state_icmptq, p->ipftu_int);
5116           } else if (!strcmp(t->ipft_name, "icmp_ack_timeout")) {
5117                     ipf_apply_timeout(&softs->ipf_state_icmpacktq, p->ipftu_int);
5118           } else if (!strcmp(t->ipft_name, "ip_timeout")) {
5119                     ipf_apply_timeout(&softs->ipf_state_iptq, p->ipftu_int);
5120           } else {
5121                     IPFERROR(100034);
5122                     return ESRCH;
5123           }
5124 
5125           /*
5126            * Update the tuneable being set.
5127            */
5128           *t->ipft_pint = p->ipftu_int;
5129 
5130           return 0;
5131 }
5132 
5133 
5134 /* ------------------------------------------------------------------------ */
5135 /* Function:    ipf_state_rehash                                            */
5136 /* Returns:     int 0 = success, else failure                               */
5137 /* Parameters:  softc(I)  - pointer to main soft context                    */
5138 /*              t(I)      - pointer to tuneable being changed               */
5139 /*              p(I)      - pointer to the new value                        */
5140 /*                                                                          */
5141 /* To change the size of the state hash table at runtime, a new table has   */
5142 /* to be allocated and then all of the existing entries put in it, bumping  */
5143 /* up the bucketlength for it as we go along.                               */
5144 /* ------------------------------------------------------------------------ */
5145 int
ipf_state_rehash(ipf_main_softc_t * softc,ipftuneable_t * t,ipftuneval_t * p)5146 ipf_state_rehash(ipf_main_softc_t *softc, ipftuneable_t *t, ipftuneval_t *p)
5147 {
5148           ipf_state_softc_t *softs = softc->ipf_state_soft;
5149           ipstate_t **newtab, *is;
5150           u_long *newseed;
5151           u_int *bucketlens;
5152           u_int maxbucket;
5153           u_int newsize;
5154           u_int hv;
5155           int i;
5156 
5157           newsize = p->ipftu_int;
5158           /*
5159            * In case there is nothing to do...
5160            */
5161           if (newsize == softs->ipf_state_size)
5162                     return 0;
5163 
5164           KMALLOCS(newtab, ipstate_t **, newsize * sizeof(ipstate_t *));
5165           if (newtab == NULL) {
5166                     IPFERROR(100035);
5167                     return ENOMEM;
5168           }
5169 
5170           KMALLOCS(bucketlens, u_int *, newsize * sizeof(u_int));
5171           if (bucketlens == NULL) {
5172                     KFREES(newtab, newsize * sizeof(*softs->ipf_state_table));
5173                     IPFERROR(100036);
5174                     return ENOMEM;
5175           }
5176 
5177           newseed = ipf_state_seed_alloc(newsize, softs->ipf_state_max);
5178           if (newseed == NULL) {
5179                     KFREES(bucketlens, newsize * sizeof(*bucketlens));
5180                     KFREES(newtab, newsize * sizeof(*newtab));
5181                     IPFERROR(100037);
5182                     return ENOMEM;
5183           }
5184 
5185           for (maxbucket = 0, i = newsize; i > 0; i >>= 1)
5186                     maxbucket++;
5187           maxbucket *= 2;
5188 
5189           bzero((char *)newtab, newsize * sizeof(ipstate_t *));
5190           bzero((char *)bucketlens, newsize * sizeof(u_int));
5191 
5192           WRITE_ENTER(&softc->ipf_state);
5193 
5194           if (softs->ipf_state_table != NULL) {
5195                     KFREES(softs->ipf_state_table,
5196                            softs->ipf_state_size * sizeof(*softs->ipf_state_table));
5197           }
5198           softs->ipf_state_table = newtab;
5199 
5200           if (softs->ipf_state_seed != NULL) {
5201                     KFREES(softs->ipf_state_seed,
5202                            softs->ipf_state_size * sizeof(*softs->ipf_state_seed));
5203           }
5204           softs->ipf_state_seed = newseed;
5205 
5206           if (softs->ipf_state_stats.iss_bucketlen != NULL) {
5207                     KFREES(softs->ipf_state_stats.iss_bucketlen,
5208                            softs->ipf_state_size * sizeof(u_int));
5209           }
5210           softs->ipf_state_stats.iss_bucketlen = bucketlens;
5211           softs->ipf_state_maxbucket = maxbucket;
5212           softs->ipf_state_size = newsize;
5213 
5214           /*
5215            * Walk through the entire list of state table entries and put them
5216            * in the new state table, somewhere.  Because we have a new table,
5217            * we need to restart the counter of how many chains are in use.
5218            */
5219           softs->ipf_state_stats.iss_inuse = 0;
5220           for (is = softs->ipf_state_list; is != NULL; is = is->is_next) {
5221                     is->is_hnext = NULL;
5222                     is->is_phnext = NULL;
5223                     hv = is->is_hv % softs->ipf_state_size;
5224 
5225                     if (softs->ipf_state_table[hv] != NULL)
5226                               softs->ipf_state_table[hv]->is_phnext = &is->is_hnext;
5227                     else
5228                               softs->ipf_state_stats.iss_inuse++;
5229                     is->is_phnext = softs->ipf_state_table + hv;
5230                     is->is_hnext = softs->ipf_state_table[hv];
5231                     softs->ipf_state_table[hv] = is;
5232                     softs->ipf_state_stats.iss_bucketlen[hv]++;
5233           }
5234           RWLOCK_EXIT(&softc->ipf_state);
5235 
5236           return 0;
5237 }
5238 
5239 
5240 /* ------------------------------------------------------------------------ */
5241 /* Function:    ipf_state_add_tq                                            */
5242 /* Returns:     ipftq_t * - NULL = failure, else pointer to new timeout     */
5243 /*                          queue                                           */
5244 /* Parameters:  softc(I)  - pointer to main soft context                    */
5245 /*              ttl(I)    - pointer to the ttl for the new queue            */
5246 /*                                                                          */
5247 /* Request a pointer to a timeout queue that has a ttl as given by the      */
5248 /* value being passed in.  The timeout queue is added tot the list of those */
5249 /* used internally for stateful filtering.                                  */
5250 /* ------------------------------------------------------------------------ */
5251 ipftq_t *
ipf_state_add_tq(ipf_main_softc_t * softc,int ttl)5252 ipf_state_add_tq(ipf_main_softc_t *softc, int ttl)
5253 {
5254           ipf_state_softc_t *softs = softc->ipf_state_soft;
5255 
5256         return ipf_addtimeoutqueue(softc, &softs->ipf_state_usertq, ttl);
5257 }
5258 
5259 
5260 #ifndef _KERNEL
5261 /*
5262  * Display the built up state table rules and mapping entries.
5263  */
5264 void
ipf_state_dump(ipf_main_softc_t * softc,void * arg)5265 ipf_state_dump(ipf_main_softc_t *softc, void *arg)
5266 {
5267           ipf_state_softc_t *softs = arg;
5268           ipstate_t *ips;
5269 
5270           printf("List of active state sessions:\n");
5271           for (ips = softs->ipf_state_list; ips != NULL; )
5272                     ips = printstate(ips, opts & (OPT_DEBUG|OPT_VERBOSE),
5273                                          softc->ipf_ticks);
5274 }
5275 #endif
5276