1 /*        $NetBSD: ip_sync.c,v 1.6 2018/05/03 07:13:48 maxv Exp $     */
2 
3 /*
4  * Copyright (C) 2012 by Darren Reed.
5  *
6  * See the IPFILTER.LICENCE file for details on licencing.
7  */
8 #if defined(KERNEL) || defined(_KERNEL)
9 # undef KERNEL
10 # undef _KERNEL
11 # define        KERNEL        1
12 # define        _KERNEL       1
13 #endif
14 #include <sys/errno.h>
15 #include <sys/types.h>
16 #include <sys/param.h>
17 #include <sys/file.h>
18 #if !defined(_KERNEL) && !defined(__KERNEL__)
19 # include <stdio.h>
20 # include <stdlib.h>
21 # include <string.h>
22 # define _KERNEL
23 # define KERNEL
24 # ifdef __OpenBSD__
25 struct file;
26 # endif
27 # include <sys/uio.h>
28 # undef _KERNEL
29 # undef KERNEL
30 #else
31 # include <sys/systm.h>
32 # if !defined(__SVR4) && !defined(__svr4__)
33 #  include <sys/mbuf.h>
34 # endif
35 # include <sys/select.h>
36 # if __FreeBSD_version >= 500000
37 #  include <sys/selinfo.h>
38 # endif
39 #endif
40 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000)
41 # include <sys/proc.h>
42 #endif
43 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
44 # include <sys/filio.h>
45 # include <sys/fcntl.h>
46 #else
47 # include <sys/ioctl.h>
48 #endif
49 #include <sys/time.h>
50 #if !defined(linux)
51 # include <sys/protosw.h>
52 #endif
53 #include <sys/socket.h>
54 #if defined(__SVR4) || defined(__svr4__)
55 # include <sys/filio.h>
56 # include <sys/byteorder.h>
57 # ifdef _KERNEL
58 #  include <sys/dditypes.h>
59 # endif
60 # include <sys/stream.h>
61 # include <sys/kmem.h>
62 #endif
63 
64 #include <net/if.h>
65 #ifdef sun
66 # include <net/af.h>
67 #endif
68 #include <netinet/in.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/ip.h>
71 #include <netinet/tcp.h>
72 #if !defined(linux)
73 # include <netinet/ip_var.h>
74 #endif
75 #if !defined(__hpux) && !defined(linux)
76 # include <netinet/tcp_fsm.h>
77 #endif
78 #include <netinet/udp.h>
79 #include <netinet/ip_icmp.h>
80 #include "netinet/ip_compat.h"
81 #include "netinet/ip_fil.h"
82 #include "netinet/ip_nat.h"
83 #include "netinet/ip_frag.h"
84 #include "netinet/ip_state.h"
85 #include "netinet/ip_proxy.h"
86 #include "netinet/ip_sync.h"
87 #ifdef  USE_INET6
88 #include <netinet/icmp6.h>
89 #endif
90 #if (__FreeBSD_version >= 300000)
91 # include <sys/malloc.h>
92 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
93 #  include <sys/libkern.h>
94 #  include <sys/systm.h>
95 # endif
96 #endif
97 /* END OF INCLUDES */
98 
99 #if !defined(lint)
100 #if defined(__NetBSD__)
101 #include <sys/cdefs.h>
102 __KERNEL_RCSID(0, "$NetBSD: ip_sync.c,v 1.6 2018/05/03 07:13:48 maxv Exp $");
103 #else
104 static const char rcsid[] = "@(#)Id: ip_sync.c,v 1.1.1.2 2012/07/22 13:45:38 darrenr Exp";
105 #endif
106 #endif
107 
108 #define   SYNC_STATETABSZ     256
109 #define   SYNC_NATTABSZ       256
110 
111 typedef struct ipf_sync_softc_s {
112           ipfmutex_t          ipf_syncadd;
113           ipfmutex_t          ipsl_mutex;
114           ipfrwlock_t         ipf_syncstate;
115           ipfrwlock_t         ipf_syncnat;
116 #if SOLARIS && defined(_KERNEL)
117           kcondvar_t          ipslwait;
118 #endif
119 #if defined(linux) && defined(_KERNEL)
120           wait_queue_head_t   sl_tail_linux;
121 #endif
122           synclist_t          **syncstatetab;
123           synclist_t          **syncnattab;
124           synclogent_t        *synclog;
125           syncupdent_t        *syncupd;
126           u_int               ipf_sync_num;
127           u_int               ipf_sync_wrap;
128           u_int               sl_idx;             /* next available sync log entry */
129           u_int               su_idx;             /* next available sync update entry */
130           u_int               sl_tail;  /* next sync log entry to read */
131           u_int               su_tail;  /* next sync update entry to read */
132           int                 ipf_sync_log_sz;
133           int                 ipf_sync_nat_tab_sz;
134           int                 ipf_sync_state_tab_sz;
135           int                 ipf_sync_debug;
136           int                 ipf_sync_events;
137           u_32_t              ipf_sync_lastwakeup;
138           int                 ipf_sync_wake_interval;
139           int                 ipf_sync_event_high_wm;
140           int                 ipf_sync_queue_high_wm;
141           int                 ipf_sync_inited;
142 } ipf_sync_softc_t;
143 
144 static int ipf_sync_flush_table(ipf_sync_softc_t *, int, synclist_t **);
145 static void ipf_sync_wakeup(ipf_main_softc_t *);
146 static void ipf_sync_del(ipf_sync_softc_t *, synclist_t *);
147 static void ipf_sync_poll_wakeup(ipf_main_softc_t *);
148 static int ipf_sync_nat(ipf_main_softc_t *, synchdr_t *, void *);
149 static int ipf_sync_state(ipf_main_softc_t *, synchdr_t *, void *);
150 
151 # if !defined(sparc) && !defined(__hppa)
152 void ipf_sync_tcporder(int, struct tcpdata *);
153 void ipf_sync_natorder(int, struct nat *);
154 void ipf_sync_storder(int, struct ipstate *);
155 # endif
156 
157 
158 void *
ipf_sync_soft_create(ipf_main_softc_t * softc)159 ipf_sync_soft_create(ipf_main_softc_t *softc)
160 {
161           ipf_sync_softc_t *softs;
162 
163           KMALLOC(softs, ipf_sync_softc_t *);
164           if (softs == NULL) {
165                     IPFERROR(110024);
166                     return NULL;
167           }
168 
169           bzero((char *)softs, sizeof(*softs));
170 
171           softs->ipf_sync_log_sz = SYNCLOG_SZ;
172           softs->ipf_sync_nat_tab_sz = SYNC_STATETABSZ;
173           softs->ipf_sync_state_tab_sz = SYNC_STATETABSZ;
174           softs->ipf_sync_event_high_wm = SYNCLOG_SZ * 100 / 90;      /* 90% */
175           softs->ipf_sync_queue_high_wm = SYNCLOG_SZ * 100 / 90;      /* 90% */
176 
177           return softs;
178 }
179 
180 
181 /* ------------------------------------------------------------------------ */
182 /* Function:    ipf_sync_init                                               */
183 /* Returns:     int - 0 == success, -1 == failure                           */
184 /* Parameters:  Nil                                                         */
185 /*                                                                          */
186 /* Initialise all of the locks required for the sync code and initialise    */
187 /* any data structures, as required.                                        */
188 /* ------------------------------------------------------------------------ */
189 int
ipf_sync_soft_init(ipf_main_softc_t * softc,void * arg)190 ipf_sync_soft_init(ipf_main_softc_t *softc, void *arg)
191 {
192           ipf_sync_softc_t *softs = arg;
193 
194           KMALLOCS(softs->synclog, synclogent_t *,
195                      softs->ipf_sync_log_sz * sizeof(*softs->synclog));
196           if (softs->synclog == NULL)
197                     return -1;
198           bzero((char *)softs->synclog,
199                 softs->ipf_sync_log_sz * sizeof(*softs->synclog));
200 
201           KMALLOCS(softs->syncupd, syncupdent_t *,
202                      softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
203           if (softs->syncupd == NULL)
204                     return -2;
205           bzero((char *)softs->syncupd,
206                 softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
207 
208           KMALLOCS(softs->syncstatetab, synclist_t **,
209                      softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
210           if (softs->syncstatetab == NULL)
211                     return -3;
212           bzero((char *)softs->syncstatetab,
213                 softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
214 
215           KMALLOCS(softs->syncnattab, synclist_t **,
216                      softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
217           if (softs->syncnattab == NULL)
218                     return -3;
219           bzero((char *)softs->syncnattab,
220                 softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
221 
222           softs->ipf_sync_num = 1;
223           softs->ipf_sync_wrap = 0;
224           softs->sl_idx = 0;
225           softs->su_idx = 0;
226           softs->sl_tail = 0;
227           softs->su_tail = 0;
228           softs->ipf_sync_events = 0;
229           softs->ipf_sync_lastwakeup = 0;
230 
231 
232 # if SOLARIS && defined(_KERNEL)
233           cv_init(&softs->ipslwait, "ipsl condvar", CV_DRIVER, NULL);
234 # endif
235           RWLOCK_INIT(&softs->ipf_syncstate, "add things to state sync table");
236           RWLOCK_INIT(&softs->ipf_syncnat, "add things to nat sync table");
237           MUTEX_INIT(&softs->ipf_syncadd, "add things to sync table");
238           MUTEX_INIT(&softs->ipsl_mutex, "read ring lock");
239 
240           softs->ipf_sync_inited = 1;
241 
242           return 0;
243 }
244 
245 
246 /* ------------------------------------------------------------------------ */
247 /* Function:    ipf_sync_unload                                             */
248 /* Returns:     int - 0 == success, -1 == failure                           */
249 /* Parameters:  Nil                                                         */
250 /*                                                                          */
251 /* Destroy the locks created when initialising and free any memory in use   */
252 /* with the synchronisation tables.                                         */
253 /* ------------------------------------------------------------------------ */
254 int
ipf_sync_soft_fini(ipf_main_softc_t * softc,void * arg)255 ipf_sync_soft_fini(ipf_main_softc_t *softc, void *arg)
256 {
257           ipf_sync_softc_t *softs = arg;
258 
259           if (softs->syncnattab != NULL) {
260                     ipf_sync_flush_table(softs, softs->ipf_sync_nat_tab_sz,
261                                              softs->syncnattab);
262                     KFREES(softs->syncnattab,
263                            softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
264                     softs->syncnattab = NULL;
265           }
266 
267           if (softs->syncstatetab != NULL) {
268                     ipf_sync_flush_table(softs, softs->ipf_sync_state_tab_sz,
269                                              softs->syncstatetab);
270                     KFREES(softs->syncstatetab,
271                            softs->ipf_sync_state_tab_sz *
272                            sizeof(*softs->syncstatetab));
273                     softs->syncstatetab = NULL;
274           }
275 
276           if (softs->syncupd != NULL) {
277                     KFREES(softs->syncupd,
278                            softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
279                     softs->syncupd = NULL;
280           }
281 
282           if (softs->synclog != NULL) {
283                     KFREES(softs->synclog,
284                            softs->ipf_sync_log_sz * sizeof(*softs->synclog));
285                     softs->synclog = NULL;
286           }
287 
288           if (softs->ipf_sync_inited == 1) {
289                     MUTEX_DESTROY(&softs->ipsl_mutex);
290                     MUTEX_DESTROY(&softs->ipf_syncadd);
291                     RW_DESTROY(&softs->ipf_syncnat);
292                     RW_DESTROY(&softs->ipf_syncstate);
293                     softs->ipf_sync_inited = 0;
294           }
295 
296           return 0;
297 }
298 
299 void
ipf_sync_soft_destroy(ipf_main_softc_t * softc,void * arg)300 ipf_sync_soft_destroy(ipf_main_softc_t *softc, void *arg)
301 {
302           ipf_sync_softc_t *softs = arg;
303 
304           KFREE(softs);
305 }
306 
307 
308 # if !defined(sparc) && !defined(__hppa)
309 /* ------------------------------------------------------------------------ */
310 /* Function:    ipf_sync_tcporder                                           */
311 /* Returns:     Nil                                                         */
312 /* Parameters:  way(I) - direction of byte order conversion.                */
313 /*              td(IO) - pointer to data to be converted.                   */
314 /*                                                                          */
315 /* Do byte swapping on values in the TCP state information structure that   */
316 /* need to be used at both ends by the host in their native byte order.     */
317 /* ------------------------------------------------------------------------ */
318 void
ipf_sync_tcporder(int way,tcpdata_t * td)319 ipf_sync_tcporder(int way, tcpdata_t *td)
320 {
321           if (way) {
322                     td->td_maxwin = htons(td->td_maxwin);
323                     td->td_end = htonl(td->td_end);
324                     td->td_maxend = htonl(td->td_maxend);
325           } else {
326                     td->td_maxwin = ntohs(td->td_maxwin);
327                     td->td_end = ntohl(td->td_end);
328                     td->td_maxend = ntohl(td->td_maxend);
329           }
330 }
331 
332 
333 /* ------------------------------------------------------------------------ */
334 /* Function:    ipf_sync_natorder                                           */
335 /* Returns:     Nil                                                         */
336 /* Parameters:  way(I)  - direction of byte order conversion.               */
337 /*              nat(IO) - pointer to data to be converted.                  */
338 /*                                                                          */
339 /* Do byte swapping on values in the NAT data structure that need to be     */
340 /* used at both ends by the host in their native byte order.                */
341 /* ------------------------------------------------------------------------ */
342 void
ipf_sync_natorder(int way,nat_t * n)343 ipf_sync_natorder(int way, nat_t *n)
344 {
345           if (way) {
346                     n->nat_age = htonl(n->nat_age);
347                     n->nat_flags = htonl(n->nat_flags);
348                     n->nat_ipsumd = htonl(n->nat_ipsumd);
349                     n->nat_use = htonl(n->nat_use);
350                     n->nat_dir = htonl(n->nat_dir);
351           } else {
352                     n->nat_age = ntohl(n->nat_age);
353                     n->nat_flags = ntohl(n->nat_flags);
354                     n->nat_ipsumd = ntohl(n->nat_ipsumd);
355                     n->nat_use = ntohl(n->nat_use);
356                     n->nat_dir = ntohl(n->nat_dir);
357           }
358 }
359 
360 
361 /* ------------------------------------------------------------------------ */
362 /* Function:    ipf_sync_storder                                            */
363 /* Returns:     Nil                                                         */
364 /* Parameters:  way(I)  - direction of byte order conversion.               */
365 /*              ips(IO) - pointer to data to be converted.                  */
366 /*                                                                          */
367 /* Do byte swapping on values in the IP state data structure that need to   */
368 /* be used at both ends by the host in their native byte order.             */
369 /* ------------------------------------------------------------------------ */
370 void
ipf_sync_storder(int way,ipstate_t * ips)371 ipf_sync_storder(int way, ipstate_t *ips)
372 {
373           ipf_sync_tcporder(way, &ips->is_tcp.ts_data[0]);
374           ipf_sync_tcporder(way, &ips->is_tcp.ts_data[1]);
375 
376           if (way) {
377                     ips->is_hv = htonl(ips->is_hv);
378                     ips->is_die = htonl(ips->is_die);
379                     ips->is_pass = htonl(ips->is_pass);
380                     ips->is_flags = htonl(ips->is_flags);
381                     ips->is_opt[0] = htonl(ips->is_opt[0]);
382                     ips->is_opt[1] = htonl(ips->is_opt[1]);
383                     ips->is_optmsk[0] = htonl(ips->is_optmsk[0]);
384                     ips->is_optmsk[1] = htonl(ips->is_optmsk[1]);
385                     ips->is_sec = htons(ips->is_sec);
386                     ips->is_secmsk = htons(ips->is_secmsk);
387                     ips->is_auth = htons(ips->is_auth);
388                     ips->is_authmsk = htons(ips->is_authmsk);
389                     ips->is_s0[0] = htonl(ips->is_s0[0]);
390                     ips->is_s0[1] = htonl(ips->is_s0[1]);
391                     ips->is_smsk[0] = htons(ips->is_smsk[0]);
392                     ips->is_smsk[1] = htons(ips->is_smsk[1]);
393           } else {
394                     ips->is_hv = ntohl(ips->is_hv);
395                     ips->is_die = ntohl(ips->is_die);
396                     ips->is_pass = ntohl(ips->is_pass);
397                     ips->is_flags = ntohl(ips->is_flags);
398                     ips->is_opt[0] = ntohl(ips->is_opt[0]);
399                     ips->is_opt[1] = ntohl(ips->is_opt[1]);
400                     ips->is_optmsk[0] = ntohl(ips->is_optmsk[0]);
401                     ips->is_optmsk[1] = ntohl(ips->is_optmsk[1]);
402                     ips->is_sec = ntohs(ips->is_sec);
403                     ips->is_secmsk = ntohs(ips->is_secmsk);
404                     ips->is_auth = ntohs(ips->is_auth);
405                     ips->is_authmsk = ntohs(ips->is_authmsk);
406                     ips->is_s0[0] = ntohl(ips->is_s0[0]);
407                     ips->is_s0[1] = ntohl(ips->is_s0[1]);
408                     ips->is_smsk[0] = ntohl(ips->is_smsk[0]);
409                     ips->is_smsk[1] = ntohl(ips->is_smsk[1]);
410           }
411 }
412 # else /* !defined(sparc) && !defined(__hppa) */
413 #  define ipf_sync_tcporder(x,y)
414 #  define ipf_sync_natorder(x,y)
415 #  define ipf_sync_storder(x,y)
416 # endif /* !defined(sparc) && !defined(__hppa) */
417 
418 
419 /* ------------------------------------------------------------------------ */
420 /* Function:    ipf_sync_write                                              */
421 /* Returns:     int    - 0 == success, else error value.                    */
422 /* Parameters:  uio(I) - pointer to information about data to write         */
423 /*                                                                          */
424 /* Moves data from user space into the kernel and uses it for updating data */
425 /* structures in the state/NAT tables.                                      */
426 /* ------------------------------------------------------------------------ */
427 int
ipf_sync_write(ipf_main_softc_t * softc,struct uio * uio)428 ipf_sync_write(ipf_main_softc_t *softc, struct uio *uio)
429 {
430           ipf_sync_softc_t *softs = softc->ipf_sync_soft;
431           synchdr_t sh;
432 
433           /*
434            * THIS MUST BE SUFFICIENT LARGE TO STORE
435            * ANY POSSIBLE DATA TYPE
436            */
437           char data[2048];
438 
439           int err = 0;
440 
441 #  if BSD_GE_YEAR(199306) || defined(__FreeBSD__) || defined(__osf__)
442           uio->uio_rw = UIO_WRITE;
443 #  endif
444 
445           /* Try to get bytes */
446           while (uio->uio_resid > 0) {
447 
448                     if (uio->uio_resid >= sizeof(sh)) {
449 
450                               err = UIOMOVE((void *)&sh, sizeof(sh), UIO_WRITE, uio);
451 
452                               if (err) {
453                                         if (softs->ipf_sync_debug > 2)
454                                                   printf("uiomove(header) failed: %d\n",
455                                                             err);
456                                         return err;
457                               }
458 
459                               /* convert to host order */
460                               sh.sm_magic = ntohl(sh.sm_magic);
461                               sh.sm_len = ntohl(sh.sm_len);
462                               sh.sm_num = ntohl(sh.sm_num);
463 
464                               if (softs->ipf_sync_debug > 8)
465                                         printf("[%d] Read v:%d p:%d cmd:%d table:%d rev:%d len:%d magic:%x\n",
466                                                   sh.sm_num, sh.sm_v, sh.sm_p, sh.sm_cmd,
467                                                   sh.sm_table, sh.sm_rev, sh.sm_len,
468                                                   sh.sm_magic);
469 
470                               if (sh.sm_magic != SYNHDRMAGIC) {
471                                         if (softs->ipf_sync_debug > 2)
472                                                   printf("uiomove(header) invalid %s\n",
473                                                             "magic");
474                                         IPFERROR(110001);
475                                         return EINVAL;
476                               }
477 
478                               if (sh.sm_v != 4 && sh.sm_v != 6) {
479                                         if (softs->ipf_sync_debug > 2)
480                                                   printf("uiomove(header) invalid %s\n",
481                                                             "protocol");
482                                         IPFERROR(110002);
483                                         return EINVAL;
484                               }
485 
486                               if (sh.sm_cmd > SMC_MAXCMD) {
487                                         if (softs->ipf_sync_debug > 2)
488                                                   printf("uiomove(header) invalid %s\n",
489                                                             "command");
490                                         IPFERROR(110003);
491                                         return EINVAL;
492                               }
493 
494 
495                               if (sh.sm_table > SMC_MAXTBL) {
496                                         if (softs->ipf_sync_debug > 2)
497                                                   printf("uiomove(header) invalid %s\n",
498                                                             "table");
499                                         IPFERROR(110004);
500                                         return EINVAL;
501                               }
502 
503                     } else {
504                               /* unsufficient data, wait until next call */
505                               if (softs->ipf_sync_debug > 2)
506                                         printf("uiomove(header) insufficient data");
507                               IPFERROR(110005);
508                               return EAGAIN;
509                     }
510 
511 
512                     /*
513                      * We have a header, so try to read the amount of data
514                      * needed for the request
515                      */
516 
517                     /* not supported */
518                     if (sh.sm_len == 0) {
519                               if (softs->ipf_sync_debug > 2)
520                                         printf("uiomove(data zero length %s\n",
521                                                   "not supported");
522                               IPFERROR(110006);
523                               return EINVAL;
524                     }
525 
526                     if (uio->uio_resid >= sh.sm_len) {
527 
528                               err = UIOMOVE((void *)data, sh.sm_len, UIO_WRITE, uio);
529 
530                               if (err) {
531                                         if (softs->ipf_sync_debug > 2)
532                                                   printf("uiomove(data) failed: %d\n",
533                                                             err);
534                                         return err;
535                               }
536 
537                               if (softs->ipf_sync_debug > 7)
538                                         printf("uiomove(data) %d bytes read\n",
539                                                   sh.sm_len);
540 
541                               if (sh.sm_table == SMC_STATE)
542                                         err = ipf_sync_state(softc, &sh, data);
543                               else if (sh.sm_table == SMC_NAT)
544                                         err = ipf_sync_nat(softc, &sh, data);
545                               if (softs->ipf_sync_debug > 7)
546                                         printf("[%d] Finished with error %d\n",
547                                                   sh.sm_num, err);
548 
549                     } else {
550                               /* insufficient data, wait until next call */
551                               if (softs->ipf_sync_debug > 2)
552                                         printf("uiomove(data) %s %d bytes, got %d\n",
553                                                   "insufficient data, need",
554                                                   sh.sm_len, (int)uio->uio_resid);
555                               IPFERROR(110007);
556                               return EAGAIN;
557                     }
558           }
559 
560           /* no more data */
561           return 0;
562 }
563 
564 
565 /* ------------------------------------------------------------------------ */
566 /* Function:    ipf_sync_read                                               */
567 /* Returns:     int    - 0 == success, else error value.                    */
568 /* Parameters:  uio(O) - pointer to information about where to store data   */
569 /*                                                                          */
570 /* This function is called when a user program wants to read some data      */
571 /* for pending state/NAT updates.  If no data is available, the caller is   */
572 /* put to sleep, pending a wakeup from the "lower half" of this code.       */
573 /* ------------------------------------------------------------------------ */
574 int
ipf_sync_read(ipf_main_softc_t * softc,struct uio * uio)575 ipf_sync_read(ipf_main_softc_t *softc, struct uio *uio)
576 {
577           ipf_sync_softc_t *softs = softc->ipf_sync_soft;
578           syncupdent_t *su;
579           synclogent_t *sl;
580           int err = 0;
581 
582           if ((uio->uio_resid & 3) || (uio->uio_resid < 8)) {
583                     IPFERROR(110008);
584                     return EINVAL;
585           }
586 
587 #  if BSD_GE_YEAR(199306) || defined(__FreeBSD__) || defined(__osf__)
588           uio->uio_rw = UIO_READ;
589 #  endif
590 
591           MUTEX_ENTER(&softs->ipsl_mutex);
592           while ((softs->sl_tail == softs->sl_idx) &&
593                  (softs->su_tail == softs->su_idx)) {
594 #  if defined(_KERNEL)
595 #   if SOLARIS
596                     if (!cv_wait_sig(&softs->ipslwait, &softs->ipsl_mutex.ipf_lk)) {
597                               MUTEX_EXIT(&softs->ipsl_mutex);
598                               IPFERROR(110009);
599                               return EINTR;
600                     }
601 #   else
602 #    ifdef __hpux
603                     {
604                     lock_t *l;
605 
606                     l = get_sleep_lock(&softs->sl_tail);
607                     err = sleep(&softs->sl_tail, PZERO+1);
608                     if (err) {
609                               MUTEX_EXIT(&softs->ipsl_mutex);
610                               IPFERROR(110010);
611                               return EINTR;
612                     }
613                     spinunlock(l);
614                     }
615 #    else /* __hpux */
616 #     ifdef __osf__
617                     err = mpsleep(&softs->sl_tail, PSUSP|PCATCH,  "ipl sleep", 0,
618                                     &softs->ipsl_mutex, MS_LOCK_SIMPLE);
619                     if (err) {
620                               IPFERROR(110011);
621                               return EINTR;
622                     }
623 #     else
624                     MUTEX_EXIT(&softs->ipsl_mutex);
625                     err = SLEEP(&softs->sl_tail, "ipl sleep");
626                     if (err) {
627                               IPFERROR(110012);
628                               return EINTR;
629                     }
630                     MUTEX_ENTER(&softs->ipsl_mutex);
631 #     endif /* __osf__ */
632 #    endif /* __hpux */
633 #   endif /* SOLARIS */
634 #  endif /* _KERNEL */
635           }
636 
637           while ((softs->sl_tail < softs->sl_idx) &&
638                  (uio->uio_resid > sizeof(*sl))) {
639                     sl = softs->synclog + softs->sl_tail++;
640                     MUTEX_EXIT(&softs->ipsl_mutex);
641                     err = UIOMOVE(sl, sizeof(*sl), UIO_READ, uio);
642                     if (err != 0)
643                               goto goterror;
644                     MUTEX_ENTER(&softs->ipsl_mutex);
645           }
646 
647           while ((softs->su_tail < softs->su_idx) &&
648                  (uio->uio_resid > sizeof(*su))) {
649                     su = softs->syncupd + softs->su_tail;
650                     softs->su_tail++;
651                     MUTEX_EXIT(&softs->ipsl_mutex);
652                     err = UIOMOVE(su, sizeof(*su), UIO_READ, uio);
653                     if (err != 0)
654                               goto goterror;
655                     MUTEX_ENTER(&softs->ipsl_mutex);
656                     if (su->sup_hdr.sm_sl != NULL)
657                               su->sup_hdr.sm_sl->sl_idx = -1;
658           }
659           if (softs->sl_tail == softs->sl_idx)
660                     softs->sl_tail = softs->sl_idx = 0;
661           if (softs->su_tail == softs->su_idx)
662                     softs->su_tail = softs->su_idx = 0;
663           MUTEX_EXIT(&softs->ipsl_mutex);
664 goterror:
665           return err;
666 }
667 
668 
669 /* ------------------------------------------------------------------------ */
670 /* Function:    ipf_sync_state                                              */
671 /* Returns:     int    - 0 == success, else error value.                    */
672 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
673 /*              uio(I) - pointer to user data for further information       */
674 /*                                                                          */
675 /* Updates the state table according to information passed in the sync      */
676 /* header.  As required, more data is fetched from the uio structure but    */
677 /* varies depending on the contents of the sync header.  This function can  */
678 /* create a new state entry or update one.  Deletion is left to the state   */
679 /* structures being timed out correctly.                                    */
680 /* ------------------------------------------------------------------------ */
681 static int
ipf_sync_state(ipf_main_softc_t * softc,synchdr_t * sp,void * data)682 ipf_sync_state(ipf_main_softc_t *softc, synchdr_t *sp, void *data)
683 {
684           ipf_sync_softc_t *softs = softc->ipf_sync_soft;
685           synctcp_update_t su;
686           ipstate_t *is, sn;
687           synclist_t *sl;
688           frentry_t *fr;
689           u_int hv;
690           int err = 0;
691 
692           hv = sp->sm_num & (softs->ipf_sync_state_tab_sz - 1);
693 
694           switch (sp->sm_cmd)
695           {
696           case SMC_CREATE :
697 
698                     bcopy(data, &sn, sizeof(sn));
699                     KMALLOC(is, ipstate_t *);
700                     if (is == NULL) {
701                               IPFERROR(110013);
702                               err = ENOMEM;
703                               break;
704                     }
705 
706                     KMALLOC(sl, synclist_t *);
707                     if (sl == NULL) {
708                               IPFERROR(110014);
709                               err = ENOMEM;
710                               KFREE(is);
711                               break;
712                     }
713 
714                     bzero((char *)is, offsetof(ipstate_t, is_die));
715                     bcopy((char *)&sn.is_die, (char *)&is->is_die,
716                           sizeof(*is) - offsetof(ipstate_t, is_die));
717                     ipf_sync_storder(0, is);
718 
719                     /*
720                      * We need to find the same rule on the slave as was used on
721                      * the master to create this state entry.
722                      */
723                     READ_ENTER(&softc->ipf_mutex);
724                     fr = ipf_getrulen(softc, IPL_LOGIPF, sn.is_group, sn.is_rulen);
725                     if (fr != NULL) {
726                               MUTEX_ENTER(&fr->fr_lock);
727                               fr->fr_ref++;
728                               fr->fr_statecnt++;
729                               MUTEX_EXIT(&fr->fr_lock);
730                     }
731                     RWLOCK_EXIT(&softc->ipf_mutex);
732 
733                     if (softs->ipf_sync_debug > 4)
734                               printf("[%d] Filter rules = %p\n", sp->sm_num, fr);
735 
736                     is->is_rule = fr;
737                     is->is_sync = sl;
738 
739                     sl->sl_idx = -1;
740                     sl->sl_ips = is;
741                     bcopy(sp, &sl->sl_hdr, sizeof(struct synchdr));
742 
743                     WRITE_ENTER(&softs->ipf_syncstate);
744                     WRITE_ENTER(&softc->ipf_state);
745 
746                     sl->sl_pnext = softs->syncstatetab + hv;
747                     sl->sl_next = softs->syncstatetab[hv];
748                     if (softs->syncstatetab[hv] != NULL)
749                               softs->syncstatetab[hv]->sl_pnext = &sl->sl_next;
750                     softs->syncstatetab[hv] = sl;
751                     MUTEX_DOWNGRADE(&softs->ipf_syncstate);
752                     ipf_state_insert(softc, is, sp->sm_rev);
753                     /*
754                      * Do not initialise the interface pointers for the state
755                      * entry as the full complement of interface names may not
756                      * be present.
757                      *
758                      * Put this state entry on its timeout queue.
759                      */
760                     /*fr_setstatequeue(is, sp->sm_rev);*/
761                     break;
762 
763           case SMC_UPDATE :
764                     bcopy(data, &su, sizeof(su));
765 
766                     if (softs->ipf_sync_debug > 4)
767                               printf("[%d] Update age %lu state %d/%d \n",
768                                         sp->sm_num, su.stu_age, su.stu_state[0],
769                                         su.stu_state[1]);
770 
771                     READ_ENTER(&softs->ipf_syncstate);
772                     for (sl = softs->syncstatetab[hv]; (sl != NULL);
773                          sl = sl->sl_next)
774                               if (sl->sl_hdr.sm_num == sp->sm_num)
775                                         break;
776                     if (sl == NULL) {
777                               if (softs->ipf_sync_debug > 1)
778                                         printf("[%d] State not found - can't update\n",
779                                                   sp->sm_num);
780                               RWLOCK_EXIT(&softs->ipf_syncstate);
781                               IPFERROR(110015);
782                               err = ENOENT;
783                               break;
784                     }
785 
786                     READ_ENTER(&softc->ipf_state);
787 
788                     if (softs->ipf_sync_debug > 6)
789                               printf("[%d] Data from state v:%d p:%d cmd:%d table:%d rev:%d\n",
790                                         sp->sm_num, sl->sl_hdr.sm_v, sl->sl_hdr.sm_p,
791                                         sl->sl_hdr.sm_cmd, sl->sl_hdr.sm_table,
792                                         sl->sl_hdr.sm_rev);
793 
794                     is = sl->sl_ips;
795 
796                     MUTEX_ENTER(&is->is_lock);
797                     switch (sp->sm_p)
798                     {
799                     case IPPROTO_TCP :
800                               /* XXX FV --- shouldn't we do ntohl/htonl???? XXX */
801                               is->is_send = su.stu_data[0].td_end;
802                               is->is_maxsend = su.stu_data[0].td_maxend;
803                               is->is_maxswin = su.stu_data[0].td_maxwin;
804                               is->is_state[0] = su.stu_state[0];
805                               is->is_dend = su.stu_data[1].td_end;
806                               is->is_maxdend = su.stu_data[1].td_maxend;
807                               is->is_maxdwin = su.stu_data[1].td_maxwin;
808                               is->is_state[1] = su.stu_state[1];
809                               break;
810                     default :
811                               break;
812                     }
813 
814                     if (softs->ipf_sync_debug > 6)
815                               printf("[%d] Setting timers for state\n", sp->sm_num);
816 
817                     ipf_state_setqueue(softc, is, sp->sm_rev);
818 
819                     MUTEX_EXIT(&is->is_lock);
820                     break;
821 
822           default :
823                     IPFERROR(110016);
824                     err = EINVAL;
825                     break;
826           }
827 
828           if (err == 0) {
829                     RWLOCK_EXIT(&softc->ipf_state);
830                     RWLOCK_EXIT(&softs->ipf_syncstate);
831           }
832 
833           if (softs->ipf_sync_debug > 6)
834                     printf("[%d] Update completed with error %d\n",
835                               sp->sm_num, err);
836 
837           return err;
838 }
839 
840 
841 /* ------------------------------------------------------------------------ */
842 /* Function:    ipf_sync_del                                                */
843 /* Returns:     Nil                                                         */
844 /* Parameters:  sl(I) - pointer to synclist object to delete                */
845 /*                                                                          */
846 /* Deletes an object from the synclist.                                     */
847 /* ------------------------------------------------------------------------ */
848 static void
ipf_sync_del(ipf_sync_softc_t * softs,synclist_t * sl)849 ipf_sync_del(ipf_sync_softc_t *softs, synclist_t *sl)
850 {
851           *sl->sl_pnext = sl->sl_next;
852           if (sl->sl_next != NULL)
853                     sl->sl_next->sl_pnext = sl->sl_pnext;
854           if (sl->sl_idx != -1)
855                     softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
856 }
857 
858 
859 /* ------------------------------------------------------------------------ */
860 /* Function:    ipf_sync_del_state                                          */
861 /* Returns:     Nil                                                         */
862 /* Parameters:  sl(I) - pointer to synclist object to delete                */
863 /*                                                                          */
864 /* Deletes an object from the synclist state table and free's its memory.   */
865 /* ------------------------------------------------------------------------ */
866 void
ipf_sync_del_state(void * arg,synclist_t * sl)867 ipf_sync_del_state(void *arg, synclist_t *sl)
868 {
869           ipf_sync_softc_t *softs = arg;
870 
871           WRITE_ENTER(&softs->ipf_syncstate);
872           ipf_sync_del(softs, sl);
873           RWLOCK_EXIT(&softs->ipf_syncstate);
874           KFREE(sl);
875 }
876 
877 
878 /* ------------------------------------------------------------------------ */
879 /* Function:    ipf_sync_del_nat                                            */
880 /* Returns:     Nil                                                         */
881 /* Parameters:  sl(I) - pointer to synclist object to delete                */
882 /*                                                                          */
883 /* Deletes an object from the synclist nat table and free's its memory.     */
884 /* ------------------------------------------------------------------------ */
885 void
ipf_sync_del_nat(void * arg,synclist_t * sl)886 ipf_sync_del_nat(void *arg, synclist_t *sl)
887 {
888           ipf_sync_softc_t *softs = arg;
889 
890           WRITE_ENTER(&softs->ipf_syncnat);
891           ipf_sync_del(softs, sl);
892           RWLOCK_EXIT(&softs->ipf_syncnat);
893           KFREE(sl);
894 }
895 
896 
897 /* ------------------------------------------------------------------------ */
898 /* Function:    ipf_sync_nat                                                */
899 /* Returns:     int    - 0 == success, else error value.                    */
900 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
901 /*              uio(I) - pointer to user data for further information       */
902 /*                                                                          */
903 /* Updates the NAT  table according to information passed in the sync       */
904 /* header.  As required, more data is fetched from the uio structure but    */
905 /* varies depending on the contents of the sync header.  This function can  */
906 /* create a new NAT entry or update one.  Deletion is left to the NAT       */
907 /* structures being timed out correctly.                                    */
908 /* ------------------------------------------------------------------------ */
909 static int
ipf_sync_nat(ipf_main_softc_t * softc,synchdr_t * sp,void * data)910 ipf_sync_nat(ipf_main_softc_t *softc, synchdr_t *sp, void *data)
911 {
912           ipf_sync_softc_t *softs = softc->ipf_sync_soft;
913           syncupdent_t su;
914           nat_t *n, *nat;
915           synclist_t *sl;
916           u_int hv = 0;
917           int err = 0;
918 
919           READ_ENTER(&softs->ipf_syncnat);
920 
921           switch (sp->sm_cmd)
922           {
923           case SMC_CREATE :
924                     KMALLOC(n, nat_t *);
925                     if (n == NULL) {
926                               IPFERROR(110017);
927                               err = ENOMEM;
928                               break;
929                     }
930 
931                     KMALLOC(sl, synclist_t *);
932                     if (sl == NULL) {
933                               IPFERROR(110018);
934                               err = ENOMEM;
935                               KFREE(n);
936                               break;
937                     }
938 
939                     nat = (nat_t *)data;
940                     bzero((char *)n, offsetof(nat_t, nat_age));
941                     bcopy((char *)&nat->nat_age, (char *)&n->nat_age,
942                           sizeof(*n) - offsetof(nat_t, nat_age));
943                     ipf_sync_natorder(0, n);
944                     n->nat_sync = sl;
945                     n->nat_rev = sl->sl_rev;
946 
947                     sl->sl_idx = -1;
948                     sl->sl_ipn = n;
949                     sl->sl_num = ntohl(sp->sm_num);
950 
951                     WRITE_ENTER(&softc->ipf_nat);
952                     sl->sl_pnext = softs->syncnattab + hv;
953                     sl->sl_next = softs->syncnattab[hv];
954                     if (softs->syncnattab[hv] != NULL)
955                               softs->syncnattab[hv]->sl_pnext = &sl->sl_next;
956                     softs->syncnattab[hv] = sl;
957                     (void) ipf_nat_insert(softc, softc->ipf_nat_soft, n);
958                     RWLOCK_EXIT(&softc->ipf_nat);
959                     break;
960 
961           case SMC_UPDATE :
962                     bcopy(data, &su, sizeof(su));
963 
964                     for (sl = softs->syncnattab[hv]; (sl != NULL);
965                          sl = sl->sl_next)
966                               if (sl->sl_hdr.sm_num == sp->sm_num)
967                                         break;
968                     if (sl == NULL) {
969                               IPFERROR(110019);
970                               err = ENOENT;
971                               break;
972                     }
973 
974                     READ_ENTER(&softc->ipf_nat);
975 
976                     nat = sl->sl_ipn;
977                     nat->nat_rev = sl->sl_rev;
978 
979                     MUTEX_ENTER(&nat->nat_lock);
980                     ipf_nat_setqueue(softc, softc->ipf_nat_soft, nat);
981                     MUTEX_EXIT(&nat->nat_lock);
982 
983                     RWLOCK_EXIT(&softc->ipf_nat);
984 
985                     break;
986 
987           default :
988                     IPFERROR(110020);
989                     err = EINVAL;
990                     break;
991           }
992 
993           RWLOCK_EXIT(&softs->ipf_syncnat);
994           return err;
995 }
996 
997 
998 /* ------------------------------------------------------------------------ */
999 /* Function:    ipf_sync_new                                                */
1000 /* Returns:     synclist_t* - NULL == failure, else pointer to new synclist */
1001 /*                            data structure.                               */
1002 /* Parameters:  tab(I) - type of synclist_t to create                       */
1003 /*              fin(I) - pointer to packet information                      */
1004 /*              ptr(I) - pointer to owning object                           */
1005 /*                                                                          */
1006 /* Creates a new sync table entry and notifies any sleepers that it's there */
1007 /* waiting to be processed.                                                 */
1008 /* ------------------------------------------------------------------------ */
1009 synclist_t *
ipf_sync_new(ipf_main_softc_t * softc,int tab,fr_info_t * fin,void * ptr)1010 ipf_sync_new(ipf_main_softc_t *softc, int tab, fr_info_t *fin, void *ptr)
1011 {
1012           ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1013           synclist_t *sl, *ss;
1014           synclogent_t *sle;
1015           u_int hv, sz;
1016 
1017           if (softs->sl_idx == softs->ipf_sync_log_sz)
1018                     return NULL;
1019           KMALLOC(sl, synclist_t *);
1020           if (sl == NULL)
1021                     return NULL;
1022 
1023           MUTEX_ENTER(&softs->ipf_syncadd);
1024           /*
1025            * Get a unique number for this synclist_t.  The number is only meant
1026            * to be unique for the lifetime of the structure and may be reused
1027            * later.
1028            */
1029           softs->ipf_sync_num++;
1030           if (softs->ipf_sync_num == 0) {
1031                     softs->ipf_sync_num = 1;
1032                     softs->ipf_sync_wrap++;
1033           }
1034 
1035           /*
1036            * Use the synch number of the object as the hash key.  Should end up
1037            * with relatively even distribution over time.
1038            * XXX - an attacker could lunch an DoS attack, of sorts, if they are
1039            * the only one causing new table entries by only keeping open every
1040            * nth connection they make, where n is a value in the interval
1041            * [0, SYNC_STATETABSZ-1].
1042            */
1043           switch (tab)
1044           {
1045           case SMC_STATE :
1046                     hv = softs->ipf_sync_num & (softs->ipf_sync_state_tab_sz - 1);
1047                     while (softs->ipf_sync_wrap != 0) {
1048                               for (ss = softs->syncstatetab[hv]; ss; ss = ss->sl_next)
1049                                         if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
1050                                                   break;
1051                               if (ss == NULL)
1052                                         break;
1053                               softs->ipf_sync_num++;
1054                               hv = softs->ipf_sync_num &
1055                                    (softs->ipf_sync_state_tab_sz - 1);
1056                     }
1057                     sl->sl_pnext = softs->syncstatetab + hv;
1058                     sl->sl_next = softs->syncstatetab[hv];
1059                     softs->syncstatetab[hv] = sl;
1060                     break;
1061 
1062           case SMC_NAT :
1063                     hv = softs->ipf_sync_num & (softs->ipf_sync_nat_tab_sz - 1);
1064                     while (softs->ipf_sync_wrap != 0) {
1065                               for (ss = softs->syncnattab[hv]; ss; ss = ss->sl_next)
1066                                         if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
1067                                                   break;
1068                               if (ss == NULL)
1069                                         break;
1070                               softs->ipf_sync_num++;
1071                               hv = softs->ipf_sync_num &
1072                                    (softs->ipf_sync_nat_tab_sz - 1);
1073                     }
1074                     sl->sl_pnext = softs->syncnattab + hv;
1075                     sl->sl_next = softs->syncnattab[hv];
1076                     softs->syncnattab[hv] = sl;
1077                     break;
1078 
1079           default :
1080                     break;
1081           }
1082 
1083           sl->sl_num = softs->ipf_sync_num;
1084           MUTEX_EXIT(&softs->ipf_syncadd);
1085 
1086           sl->sl_magic = htonl(SYNHDRMAGIC);
1087           sl->sl_v = fin->fin_v;
1088           sl->sl_p = fin->fin_p;
1089           sl->sl_cmd = SMC_CREATE;
1090           sl->sl_idx = -1;
1091           sl->sl_table = tab;
1092           sl->sl_rev = fin->fin_rev;
1093           if (tab == SMC_STATE) {
1094                     sl->sl_ips = ptr;
1095                     sz = sizeof(*sl->sl_ips);
1096           } else if (tab == SMC_NAT) {
1097                     sl->sl_ipn = ptr;
1098                     sz = sizeof(*sl->sl_ipn);
1099           } else {
1100                     ptr = NULL;
1101                     sz = 0;
1102           }
1103           sl->sl_len = sz;
1104 
1105           /*
1106            * Create the log entry to be read by a user daemon.  When it has been
1107            * finished and put on the queue, send a signal to wakeup any waiters.
1108            */
1109           MUTEX_ENTER(&softs->ipf_syncadd);
1110           sle = softs->synclog + softs->sl_idx++;
1111           bcopy((char *)&sl->sl_hdr, (char *)&sle->sle_hdr,
1112                 sizeof(sle->sle_hdr));
1113           sle->sle_hdr.sm_num = htonl(sle->sle_hdr.sm_num);
1114           sle->sle_hdr.sm_len = htonl(sle->sle_hdr.sm_len);
1115           if (ptr != NULL) {
1116                     bcopy((char *)ptr, (char *)&sle->sle_un, sz);
1117                     if (tab == SMC_STATE) {
1118                               ipf_sync_storder(1, &sle->sle_un.sleu_ips);
1119                     } else if (tab == SMC_NAT) {
1120                               ipf_sync_natorder(1, &sle->sle_un.sleu_ipn);
1121                     }
1122           }
1123           MUTEX_EXIT(&softs->ipf_syncadd);
1124 
1125           ipf_sync_wakeup(softc);
1126           return sl;
1127 }
1128 
1129 
1130 /* ------------------------------------------------------------------------ */
1131 /* Function:    ipf_sync_update                                             */
1132 /* Returns:     Nil                                                         */
1133 /* Parameters:  tab(I) - type of synclist_t to create                       */
1134 /*              fin(I) - pointer to packet information                      */
1135 /*              sl(I)  - pointer to synchronisation object                  */
1136 /*                                                                          */
1137 /* For outbound packets, only, create an sync update record for the user    */
1138 /* process to read.                                                         */
1139 /* ------------------------------------------------------------------------ */
1140 void
ipf_sync_update(ipf_main_softc_t * softc,int tab,fr_info_t * fin,synclist_t * sl)1141 ipf_sync_update(ipf_main_softc_t *softc, int tab, fr_info_t *fin,
1142     synclist_t *sl)
1143 {
1144           ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1145           synctcp_update_t *st;
1146           syncupdent_t *slu;
1147           ipstate_t *ips;
1148           nat_t *nat;
1149           ipfrwlock_t *lock;
1150 
1151           if (fin->fin_out == 0 || sl == NULL)
1152                     return;
1153 
1154           if (tab == SMC_STATE) {
1155                     lock = &softs->ipf_syncstate;
1156           } else {
1157                     lock = &softs->ipf_syncnat;
1158           }
1159 
1160           READ_ENTER(lock);
1161           if (sl->sl_idx == -1) {
1162                     MUTEX_ENTER(&softs->ipf_syncadd);
1163                     slu = softs->syncupd + softs->su_idx;
1164                     sl->sl_idx = softs->su_idx++;
1165                     MUTEX_EXIT(&softs->ipf_syncadd);
1166 
1167                     bcopy((char *)&sl->sl_hdr, (char *)&slu->sup_hdr,
1168                           sizeof(slu->sup_hdr));
1169                     slu->sup_hdr.sm_magic = htonl(SYNHDRMAGIC);
1170                     slu->sup_hdr.sm_sl = sl;
1171                     slu->sup_hdr.sm_cmd = SMC_UPDATE;
1172                     slu->sup_hdr.sm_table = tab;
1173                     slu->sup_hdr.sm_num = htonl(sl->sl_num);
1174                     slu->sup_hdr.sm_len = htonl(sizeof(struct synctcp_update));
1175                     slu->sup_hdr.sm_rev = fin->fin_rev;
1176 # if 0
1177                     if (fin->fin_p == IPPROTO_TCP) {
1178                               st->stu_len[0] = 0;
1179                               st->stu_len[1] = 0;
1180                     }
1181 # endif
1182           } else
1183                     slu = softs->syncupd + sl->sl_idx;
1184 
1185           /*
1186            * Only TCP has complex timeouts, others just use default timeouts.
1187            * For TCP, we only need to track the connection state and window.
1188            */
1189           if (fin->fin_p == IPPROTO_TCP) {
1190                     st = &slu->sup_tcp;
1191                     if (tab == SMC_STATE) {
1192                               ips = sl->sl_ips;
1193                               st->stu_age = htonl(ips->is_die);
1194                               st->stu_data[0].td_end = ips->is_send;
1195                               st->stu_data[0].td_maxend = ips->is_maxsend;
1196                               st->stu_data[0].td_maxwin = ips->is_maxswin;
1197                               st->stu_state[0] = ips->is_state[0];
1198                               st->stu_data[1].td_end = ips->is_dend;
1199                               st->stu_data[1].td_maxend = ips->is_maxdend;
1200                               st->stu_data[1].td_maxwin = ips->is_maxdwin;
1201                               st->stu_state[1] = ips->is_state[1];
1202                     } else if (tab == SMC_NAT) {
1203                               nat = sl->sl_ipn;
1204                               st->stu_age = htonl(nat->nat_age);
1205                     }
1206           }
1207           RWLOCK_EXIT(lock);
1208 
1209           ipf_sync_wakeup(softc);
1210 }
1211 
1212 
1213 /* ------------------------------------------------------------------------ */
1214 /* Function:    ipf_sync_flush_table                                        */
1215 /* Returns:     int - number of entries freed by flushing table             */
1216 /* Parameters:  tabsize(I) - size of the array pointed to by table          */
1217 /*              table(I)   - pointer to sync table to empty                 */
1218 /*                                                                          */
1219 /* Walk through a table of sync entries and free each one.  It is assumed   */
1220 /* that some lock is held so that nobody else tries to access the table     */
1221 /* during this cleanup.                                                     */
1222 /* ------------------------------------------------------------------------ */
1223 static int
ipf_sync_flush_table(ipf_sync_softc_t * softs,int tabsize,synclist_t ** table)1224 ipf_sync_flush_table(ipf_sync_softc_t *softs, int tabsize, synclist_t **table)
1225 {
1226           synclist_t *sl;
1227           int i, items;
1228 
1229           items = 0;
1230 
1231           for (i = 0; i < tabsize; i++) {
1232                     while ((sl = table[i]) != NULL) {
1233                               switch (sl->sl_table) {
1234                               case SMC_STATE :
1235                                         if (sl->sl_ips != NULL)
1236                                                   sl->sl_ips->is_sync = NULL;
1237                                         break;
1238                               case SMC_NAT :
1239                                         if (sl->sl_ipn != NULL)
1240                                                   sl->sl_ipn->nat_sync = NULL;
1241                                         break;
1242                               }
1243                               if (sl->sl_next != NULL)
1244                                         sl->sl_next->sl_pnext = sl->sl_pnext;
1245                               table[i] = sl->sl_next;
1246                               if (sl->sl_idx != -1)
1247                                         softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
1248                               KFREE(sl);
1249                               items++;
1250                     }
1251           }
1252 
1253           return items;
1254 }
1255 
1256 
1257 /* ------------------------------------------------------------------------ */
1258 /* Function:    ipf_sync_ioctl                                              */
1259 /* Returns:     int - 0 == success, != 0 == failure                         */
1260 /* Parameters:  data(I) - pointer to ioctl data                             */
1261 /*              cmd(I)  - ioctl command integer                             */
1262 /*              mode(I) - file mode bits used with open                     */
1263 /*                                                                          */
1264 /* This function currently does not handle any ioctls and so just returns   */
1265 /* EINVAL on all occasions.                                                 */
1266 /* ------------------------------------------------------------------------ */
1267 int
ipf_sync_ioctl(ipf_main_softc_t * softc,void * data,ioctlcmd_t cmd,int mode,int uid,void * ctx)1268 ipf_sync_ioctl(ipf_main_softc_t *softc, void *data, ioctlcmd_t cmd, int mode,
1269     int uid, void *ctx)
1270 {
1271           ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1272           int error, i;
1273           SPL_INT(s);
1274 
1275           switch (cmd)
1276           {
1277         case SIOCIPFFL:
1278                     error = BCOPYIN(data, &i, sizeof(i));
1279                     if (error != 0) {
1280                               IPFERROR(110023);
1281                               error = EFAULT;
1282                               break;
1283                     }
1284 
1285                     switch (i)
1286                     {
1287                     case SMC_RLOG :
1288                               SPL_NET(s);
1289                               MUTEX_ENTER(&softs->ipsl_mutex);
1290                               i = (softs->sl_tail - softs->sl_idx) +
1291                                   (softs->su_tail - softs->su_idx);
1292                               softs->sl_idx = 0;
1293                               softs->su_idx = 0;
1294                               softs->sl_tail = 0;
1295                               softs->su_tail = 0;
1296                               MUTEX_EXIT(&softs->ipsl_mutex);
1297                               SPL_X(s);
1298                               break;
1299 
1300                     case SMC_NAT :
1301                               SPL_NET(s);
1302                               WRITE_ENTER(&softs->ipf_syncnat);
1303                               i = ipf_sync_flush_table(softs, SYNC_NATTABSZ,
1304                                                              softs->syncnattab);
1305                               RWLOCK_EXIT(&softs->ipf_syncnat);
1306                               SPL_X(s);
1307                               break;
1308 
1309                     case SMC_STATE :
1310                               SPL_NET(s);
1311                               WRITE_ENTER(&softs->ipf_syncstate);
1312                               i = ipf_sync_flush_table(softs, SYNC_STATETABSZ,
1313                                                              softs->syncstatetab);
1314                               RWLOCK_EXIT(&softs->ipf_syncstate);
1315                               SPL_X(s);
1316                               break;
1317                     }
1318 
1319                     error = BCOPYOUT(&i, data, sizeof(i));
1320                     if (error != 0) {
1321                               IPFERROR(110022);
1322                               error = EFAULT;
1323                     }
1324                     break;
1325 
1326           default :
1327                     IPFERROR(110021);
1328                     error = EINVAL;
1329                     break;
1330           }
1331 
1332           return error;
1333 }
1334 
1335 
1336 /* ------------------------------------------------------------------------ */
1337 /* Function:    ipf_sync_canread                                            */
1338 /* Returns:     int - 0 == success, != 0 == failure                         */
1339 /* Parameters:  Nil                                                         */
1340 /*                                                                          */
1341 /* This function provides input to the poll handler about whether or not    */
1342 /* there is data waiting to be read from the /dev/ipsync device.            */
1343 /* ------------------------------------------------------------------------ */
1344 int
ipf_sync_canread(void * arg)1345 ipf_sync_canread(void *arg)
1346 {
1347           ipf_sync_softc_t *softs = arg;
1348           return !((softs->sl_tail == softs->sl_idx) &&
1349                      (softs->su_tail == softs->su_idx));
1350 }
1351 
1352 
1353 /* ------------------------------------------------------------------------ */
1354 /* Function:    ipf_sync_canwrite                                           */
1355 /* Returns:     int - 1 == can always write                                 */
1356 /* Parameters:  Nil                                                         */
1357 /*                                                                          */
1358 /* This function lets the poll handler know that it is always ready willing */
1359 /* to accept write events.                                                  */
1360 /* XXX Maybe this should return false if the sync table is full?            */
1361 /* ------------------------------------------------------------------------ */
1362 int
ipf_sync_canwrite(void * arg)1363 ipf_sync_canwrite(void *arg)
1364 {
1365           return 1;
1366 }
1367 
1368 
1369 /* ------------------------------------------------------------------------ */
1370 /* Function:    ipf_sync_wakeup                                             */
1371 /* Parameters:  Nil                                                         */
1372 /* Returns:     Nil                                                         */
1373 /*                                                                          */
1374 /* This function implements the heuristics that decide how often to         */
1375 /* generate a poll wakeup for programs that are waiting for information     */
1376 /* about when they can do a read on /dev/ipsync.                            */
1377 /*                                                                          */
1378 /* There are three different considerations here:                           */
1379 /* - do not keep a program waiting too long: ipf_sync_wake_interval is the  */
1380 /*   maximum number of ipf ticks to let pass by;                            */
1381 /* - do not let the queue of ouststanding things to generate notifies for   */
1382 /*   get too full (ipf_sync_queue_high_wm is the high water mark);          */
1383 /* - do not let too many events get collapsed in before deciding that the   */
1384 /*   other host(s) need an update (ipf_sync_event_high_wm is the high water */
1385 /*   mark for this counter.)                                                */
1386 /* ------------------------------------------------------------------------ */
1387 static void
ipf_sync_wakeup(ipf_main_softc_t * softc)1388 ipf_sync_wakeup(ipf_main_softc_t *softc)
1389 {
1390           ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1391 
1392           softs->ipf_sync_events++;
1393           if ((softc->ipf_ticks >
1394               softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval) ||
1395               (softs->ipf_sync_events > softs->ipf_sync_event_high_wm) ||
1396               ((softs->sl_tail - softs->sl_idx) >
1397                softs->ipf_sync_queue_high_wm) ||
1398               ((softs->su_tail - softs->su_idx) >
1399                softs->ipf_sync_queue_high_wm)) {
1400 
1401                     ipf_sync_poll_wakeup(softc);
1402           }
1403 }
1404 
1405 
1406 /* ------------------------------------------------------------------------ */
1407 /* Function:    ipf_sync_poll_wakeup                                        */
1408 /* Parameters:  Nil                                                         */
1409 /* Returns:     Nil                                                         */
1410 /*                                                                          */
1411 /* Deliver a poll wakeup and reset counters for two of the three heuristics */
1412 /* ------------------------------------------------------------------------ */
1413 static void
ipf_sync_poll_wakeup(ipf_main_softc_t * softc)1414 ipf_sync_poll_wakeup(ipf_main_softc_t *softc)
1415 {
1416           ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1417 
1418           softs->ipf_sync_events = 0;
1419           softs->ipf_sync_lastwakeup = softc->ipf_ticks;
1420 
1421 # ifdef _KERNEL
1422 #  if SOLARIS
1423           MUTEX_ENTER(&softs->ipsl_mutex);
1424           cv_signal(&softs->ipslwait);
1425           MUTEX_EXIT(&softs->ipsl_mutex);
1426           pollwakeup(&softc->ipf_poll_head[IPL_LOGSYNC], POLLIN|POLLRDNORM);
1427 #  else
1428           WAKEUP(&softs->sl_tail, 0);
1429           POLLWAKEUP(IPL_LOGSYNC);
1430 #  endif
1431 # endif
1432 }
1433 
1434 
1435 /* ------------------------------------------------------------------------ */
1436 /* Function:    ipf_sync_expire                                             */
1437 /* Parameters:  Nil                                                         */
1438 /* Returns:     Nil                                                         */
1439 /*                                                                          */
1440 /* This is the function called even ipf_tick.  It implements one of the     */
1441 /* three heuristics above *IF* there are events waiting.                    */
1442 /* ------------------------------------------------------------------------ */
1443 void
ipf_sync_expire(ipf_main_softc_t * softc)1444 ipf_sync_expire(ipf_main_softc_t *softc)
1445 {
1446           ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1447 
1448           if ((softs->ipf_sync_events > 0) &&
1449               (softc->ipf_ticks >
1450                softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval)) {
1451                     ipf_sync_poll_wakeup(softc);
1452           }
1453 }
1454