1 /*        $NetBSD: tcp_usrreq.c,v 1.238 2022/11/04 09:01:53 ozaki-r Exp $       */
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1997, 1998, 2005, 2006 The NetBSD Foundation, Inc.
34  * All rights reserved.
35  *
36  * This code is derived from software contributed to The NetBSD Foundation
37  * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
38  * Facility, NASA Ames Research Center.
39  * This code is derived from software contributed to The NetBSD Foundation
40  * by Charles M. Hannum.
41  * This code is derived from software contributed to The NetBSD Foundation
42  * by Rui Paulo.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  * 1. Redistributions of source code must retain the above copyright
48  *    notice, this list of conditions and the following disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63  * POSSIBILITY OF SUCH DAMAGE.
64  */
65 
66 /*
67  * Copyright (c) 1982, 1986, 1988, 1993, 1995
68  *        The Regents of the University of California.  All rights reserved.
69  *
70  * Redistribution and use in source and binary forms, with or without
71  * modification, are permitted provided that the following conditions
72  * are met:
73  * 1. Redistributions of source code must retain the above copyright
74  *    notice, this list of conditions and the following disclaimer.
75  * 2. Redistributions in binary form must reproduce the above copyright
76  *    notice, this list of conditions and the following disclaimer in the
77  *    documentation and/or other materials provided with the distribution.
78  * 3. Neither the name of the University nor the names of its contributors
79  *    may be used to endorse or promote products derived from this software
80  *    without specific prior written permission.
81  *
82  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
83  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
84  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
85  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
86  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
87  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
88  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
89  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
90  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
91  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
92  * SUCH DAMAGE.
93  *
94  *        @(#)tcp_usrreq.c    8.5 (Berkeley) 6/21/95
95  */
96 
97 /*
98  * TCP protocol interface to socket abstraction.
99  */
100 
101 #include <sys/cdefs.h>
102 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.238 2022/11/04 09:01:53 ozaki-r Exp $");
103 
104 #ifdef _KERNEL_OPT
105 #include "opt_inet.h"
106 #include "opt_tcp_debug.h"
107 #include "opt_mbuftrace.h"
108 #include "opt_tcp_space.h"
109 #include "opt_net_mpsafe.h"
110 #endif
111 
112 #include <sys/param.h>
113 #include <sys/systm.h>
114 #include <sys/kernel.h>
115 #include <sys/mbuf.h>
116 #include <sys/socket.h>
117 #include <sys/socketvar.h>
118 #include <sys/protosw.h>
119 #include <sys/errno.h>
120 #include <sys/stat.h>
121 #include <sys/proc.h>
122 #include <sys/domain.h>
123 #include <sys/sysctl.h>
124 #include <sys/kauth.h>
125 #include <sys/kernel.h>
126 #include <sys/uidinfo.h>
127 
128 #include <net/if.h>
129 
130 #include <netinet/in.h>
131 #include <netinet/in_systm.h>
132 #include <netinet/in_var.h>
133 #include <netinet/ip.h>
134 #include <netinet/in_pcb.h>
135 #include <netinet/ip_var.h>
136 #include <netinet/in_offload.h>
137 
138 #ifdef INET6
139 #include <netinet/ip6.h>
140 #include <netinet6/in6_pcb.h>
141 #include <netinet6/ip6_var.h>
142 #include <netinet6/scope6_var.h>
143 #endif
144 
145 #include <netinet/tcp.h>
146 #include <netinet/tcp_fsm.h>
147 #include <netinet/tcp_seq.h>
148 #include <netinet/tcp_timer.h>
149 #include <netinet/tcp_var.h>
150 #include <netinet/tcp_private.h>
151 #include <netinet/tcp_congctl.h>
152 #include <netinet/tcp_debug.h>
153 #include <netinet/tcp_vtw.h>
154 #include <netinet/tcp_syncache.h>
155 
156 static int
tcp_debug_capture(struct tcpcb * tp,int req)157 tcp_debug_capture(struct tcpcb *tp, int req)
158 {
159 #ifdef TCP_DEBUG
160           return tp->t_state;
161 #endif
162           return 0;
163 }
164 
165 static inline void
tcp_debug_trace(struct socket * so,struct tcpcb * tp,int ostate,int req)166 tcp_debug_trace(struct socket *so, struct tcpcb *tp, int ostate, int req)
167 {
168 #ifdef TCP_DEBUG
169           if (tp && (so->so_options & SO_DEBUG))
170                     tcp_trace(TA_USER, ostate, tp, NULL, req);
171 #endif
172 }
173 
174 static void
change_keepalive(struct socket * so,struct tcpcb * tp)175 change_keepalive(struct socket *so, struct tcpcb *tp)
176 {
177           tp->t_maxidle = tp->t_keepcnt * MIN(tp->t_keepintvl,
178               TCP_TIMER_MAXTICKS / tp->t_keepcnt);
179           TCP_TIMER_DISARM(tp, TCPT_KEEP);
180           TCP_TIMER_DISARM(tp, TCPT_2MSL);
181 
182           if (tp->t_state == TCPS_SYN_RECEIVED ||
183               tp->t_state == TCPS_SYN_SENT) {
184                     TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
185           } else if (so->so_options & SO_KEEPALIVE &&
186               tp->t_state <= TCPS_CLOSE_WAIT) {
187                     TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepintvl);
188           } else {
189                     TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle);
190           }
191 
192           if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0))
193                     TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle);
194 }
195 
196 /*
197  * Export TCP internal state information via a struct tcp_info, based on the
198  * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
199  * (TCP state machine, etc).  We export all information using FreeBSD-native
200  * constants -- for example, the numeric values for tcpi_state will differ
201  * from Linux.
202  */
203 static void
tcp_fill_info(struct tcpcb * tp,struct tcp_info * ti)204 tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
205 {
206 
207           bzero(ti, sizeof(*ti));
208 
209           ti->tcpi_state = tp->t_state;
210           if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
211                     ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
212           if (tp->t_flags & TF_SACK_PERMIT)
213                     ti->tcpi_options |= TCPI_OPT_SACK;
214           if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
215                     ti->tcpi_options |= TCPI_OPT_WSCALE;
216                     ti->tcpi_snd_wscale = tp->snd_scale;
217                     ti->tcpi_rcv_wscale = tp->rcv_scale;
218           }
219           if (tp->t_flags & TF_ECN_PERMIT) {
220                     ti->tcpi_options |= TCPI_OPT_ECN;
221           }
222 
223           ti->tcpi_rto = tp->t_rxtcur * tick;
224           ti->tcpi_last_data_recv = (long)(getticks() -
225                                                    (int)tp->t_rcvtime) * tick;
226           ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick / PR_SLOWHZ)
227                              >> (TCP_RTT_SHIFT + 2);
228           ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick / PR_SLOWHZ)
229                              >> (TCP_RTTVAR_SHIFT + 2);
230 
231           ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
232           /* Linux API wants these in # of segments, apparently */
233           ti->tcpi_snd_cwnd = tp->snd_cwnd / tp->t_segsz;
234           ti->tcpi_snd_wnd = tp->snd_wnd / tp->t_segsz;
235 
236           /*
237            * FreeBSD-specific extension fields for tcp_info.
238            */
239           ti->tcpi_rcv_space = tp->rcv_wnd;
240           ti->tcpi_rcv_nxt = tp->rcv_nxt;
241           ti->tcpi_snd_bwnd = 0;                  /* Unused, kept for compat. */
242           ti->tcpi_snd_nxt = tp->snd_nxt;
243           ti->tcpi_snd_mss = tp->t_segsz;
244           ti->tcpi_rcv_mss = tp->t_segsz;
245 #ifdef TF_TOE
246           if (tp->t_flags & TF_TOE)
247                     ti->tcpi_options |= TCPI_OPT_TOE;
248 #endif
249           /* From the redundant department of redundancies... */
250           ti->__tcpi_retransmits = ti->__tcpi_retrans =
251                     ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
252 
253           ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
254           ti->tcpi_snd_zerowin = tp->t_sndzerowin;
255 }
256 
257 int
tcp_ctloutput(int op,struct socket * so,struct sockopt * sopt)258 tcp_ctloutput(int op, struct socket *so, struct sockopt *sopt)
259 {
260           int error = 0, s;
261           struct inpcb *inp;
262           struct tcpcb *tp;
263           struct tcp_info ti;
264           u_int ui;
265           int family;         /* family of the socket */
266           int level, optname, optval;
267 
268           level = sopt->sopt_level;
269           optname = sopt->sopt_name;
270 
271           family = so->so_proto->pr_domain->dom_family;
272 
273           s = splsoftnet();
274           inp = sotoinpcb(so);
275           if (inp == NULL) {
276                     splx(s);
277                     return ECONNRESET;
278           }
279           if (level != IPPROTO_TCP) {
280                     switch (family) {
281                     case PF_INET:
282                               error = ip_ctloutput(op, so, sopt);
283                               break;
284 #ifdef INET6
285                     case PF_INET6:
286                               error = ip6_ctloutput(op, so, sopt);
287                               break;
288 #endif
289                     }
290                     splx(s);
291                     return error;
292           }
293           tp = intotcpcb(inp);
294 
295           switch (op) {
296           case PRCO_SETOPT:
297                     switch (optname) {
298 #ifdef TCP_SIGNATURE
299                     case TCP_MD5SIG:
300                               error = sockopt_getint(sopt, &optval);
301                               if (error)
302                                         break;
303                               if (optval > 0)
304                                         tp->t_flags |= TF_SIGNATURE;
305                               else
306                                         tp->t_flags &= ~TF_SIGNATURE;
307                               break;
308 #endif /* TCP_SIGNATURE */
309 
310                     case TCP_NODELAY:
311                               error = sockopt_getint(sopt, &optval);
312                               if (error)
313                                         break;
314                               if (optval)
315                                         tp->t_flags |= TF_NODELAY;
316                               else
317                                         tp->t_flags &= ~TF_NODELAY;
318                               break;
319 
320                     case TCP_MAXSEG:
321                               error = sockopt_getint(sopt, &optval);
322                               if (error)
323                                         break;
324                               if (optval > 0 && optval <= tp->t_peermss)
325                                         tp->t_peermss = optval; /* limit on send size */
326                               else
327                                         error = EINVAL;
328                               break;
329 #ifdef notyet
330                     case TCP_CONGCTL:
331                               /* XXX string overflow XXX */
332                               error = tcp_congctl_select(tp, sopt->sopt_data);
333                               break;
334 #endif
335 
336                     case TCP_KEEPIDLE:
337                               error = sockopt_get(sopt, &ui, sizeof(ui));
338                               if (error)
339                                         break;
340                               if (ui > 0 && ui <= TCP_TIMER_MAXTICKS) {
341                                         tp->t_keepidle = ui;
342                                         change_keepalive(so, tp);
343                               } else
344                                         error = EINVAL;
345                               break;
346 
347                     case TCP_KEEPINTVL:
348                               error = sockopt_get(sopt, &ui, sizeof(ui));
349                               if (error)
350                                         break;
351                               if (ui > 0 && ui <= TCP_TIMER_MAXTICKS) {
352                                         tp->t_keepintvl = ui;
353                                         change_keepalive(so, tp);
354                               } else
355                                         error = EINVAL;
356                               break;
357 
358                     case TCP_KEEPCNT:
359                               error = sockopt_get(sopt, &ui, sizeof(ui));
360                               if (error)
361                                         break;
362                               if (ui > 0 && ui <= TCP_TIMER_MAXTICKS) {
363                                         tp->t_keepcnt = ui;
364                                         change_keepalive(so, tp);
365                               } else
366                                         error = EINVAL;
367                               break;
368 
369                     case TCP_KEEPINIT:
370                               error = sockopt_get(sopt, &ui, sizeof(ui));
371                               if (error)
372                                         break;
373                               if (ui > 0 && ui <= TCP_TIMER_MAXTICKS) {
374                                         tp->t_keepinit = ui;
375                                         change_keepalive(so, tp);
376                               } else
377                                         error = EINVAL;
378                               break;
379 
380                     default:
381                               error = ENOPROTOOPT;
382                               break;
383                     }
384                     break;
385 
386           case PRCO_GETOPT:
387                     switch (optname) {
388 #ifdef TCP_SIGNATURE
389                     case TCP_MD5SIG:
390                               optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
391                               goto setval;
392 #endif
393                     case TCP_NODELAY:
394                               optval = tp->t_flags & TF_NODELAY;
395                               goto setval;
396                     case TCP_MAXSEG:
397                               optval = tp->t_peermss;
398                               goto setval;
399                     case TCP_INFO:
400                               tcp_fill_info(tp, &ti);
401                               error = sockopt_set(sopt, &ti, sizeof ti);
402                               break;
403 #ifdef notyet
404                     case TCP_CONGCTL:
405                               break;
406 #endif
407                     case TCP_KEEPIDLE:
408                               optval = tp->t_keepidle;
409                               goto setval;
410                     case TCP_KEEPINTVL:
411                               optval = tp->t_keepintvl;
412                               goto setval;
413                     case TCP_KEEPCNT:
414                               optval = tp->t_keepcnt;
415                               goto setval;
416                     case TCP_KEEPINIT:
417                               optval = tp->t_keepinit;
418 setval:                       error = sockopt_set(sopt, &optval, sizeof(optval));
419                               break;
420                     default:
421                               error = ENOPROTOOPT;
422                               break;
423                     }
424                     break;
425           }
426           splx(s);
427           return error;
428 }
429 
430 #ifndef TCP_SENDSPACE
431 #define   TCP_SENDSPACE       1024*32
432 #endif
433 int       tcp_sendspace = TCP_SENDSPACE;
434 #ifndef TCP_RECVSPACE
435 #define   TCP_RECVSPACE       1024*32
436 #endif
437 int       tcp_recvspace = TCP_RECVSPACE;
438 
439 /*
440  * tcp_attach: attach TCP protocol to socket, allocating internet protocol
441  * control block, TCP control block, buffer space and entering LISTEN state
442  * if to accept connections.
443  */
444 static int
tcp_attach(struct socket * so,int proto)445 tcp_attach(struct socket *so, int proto)
446 {
447           struct tcpcb *tp;
448           struct inpcb *inp;
449           int s, error, family;
450 
451           /* Assign the lock (must happen even if we will error out). */
452           s = splsoftnet();
453           sosetlock(so);
454           KASSERT(solocked(so));
455           KASSERT(sotoinpcb(so) == NULL);
456 
457           inp = sotoinpcb(so);
458           KASSERT(inp == NULL);
459 
460           family = soaf(so);
461 
462 #ifdef MBUFTRACE
463           so->so_mowner = &tcp_sock_mowner;
464           so->so_rcv.sb_mowner = &tcp_sock_rx_mowner;
465           so->so_snd.sb_mowner = &tcp_sock_tx_mowner;
466 #endif
467           if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
468                     error = soreserve(so, tcp_sendspace, tcp_recvspace);
469                     if (error)
470                               goto out;
471           }
472 
473           so->so_rcv.sb_flags |= SB_AUTOSIZE;
474           so->so_snd.sb_flags |= SB_AUTOSIZE;
475 
476           error = inpcb_create(so, &tcbtable);
477           if (error)
478                     goto out;
479           inp = sotoinpcb(so);
480 
481           tp = tcp_newtcpcb(family, inp);
482           if (tp == NULL) {
483                     int nofd = so->so_state & SS_NOFDREF;   /* XXX */
484 
485                     so->so_state &= ~SS_NOFDREF;  /* don't free the socket yet */
486                     inpcb_destroy(inp);
487                     so->so_state |= nofd;
488                     error = ENOBUFS;
489                     goto out;
490           }
491           tp->t_state = TCPS_CLOSED;
492           if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
493                     so->so_linger = TCP_LINGERTIME;
494           }
495 out:
496           KASSERT(solocked(so));
497           splx(s);
498           return error;
499 }
500 
501 static void
tcp_detach(struct socket * so)502 tcp_detach(struct socket *so)
503 {
504           struct inpcb *inp;
505           struct tcpcb *tp;
506           int s;
507 
508           inp = sotoinpcb(so);
509           if (inp == NULL)
510                     return;
511           tp = intotcpcb(inp);
512 
513           s = splsoftnet();
514           (void)tcp_disconnect1(tp);
515           splx(s);
516 }
517 
518 static int
tcp_accept(struct socket * so,struct sockaddr * nam)519 tcp_accept(struct socket *so, struct sockaddr *nam)
520 {
521           struct inpcb *inp;
522           struct tcpcb *tp;
523           int ostate = 0;
524           int s;
525 
526           inp = sotoinpcb(so);
527           if (inp == NULL)
528                     return EINVAL;
529           tp = intotcpcb(inp);
530 
531           ostate = tcp_debug_capture(tp, PRU_ACCEPT);
532 
533           /*
534            * Accept a connection.  Essentially all the work is
535            * done at higher levels; just return the address
536            * of the peer, storing through addr.
537            */
538           s = splsoftnet();
539           if (inp->inp_af == AF_INET) {
540                     inpcb_fetch_peeraddr(inp, (struct sockaddr_in *)nam);
541           }
542 #ifdef INET6
543           else if (inp->inp_af == AF_INET6) {
544                     in6pcb_fetch_peeraddr(inp, (struct sockaddr_in6 *)nam);
545           }
546 #endif
547           tcp_debug_trace(so, tp, ostate, PRU_ACCEPT);
548           splx(s);
549 
550           return 0;
551 }
552 
553 static int
tcp_bind(struct socket * so,struct sockaddr * nam,struct lwp * l)554 tcp_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
555 {
556           struct inpcb *inp = NULL;
557           struct sockaddr_in *sin = (struct sockaddr_in *)nam;
558 #ifdef INET6
559           struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
560 #endif /* INET6 */
561           struct tcpcb *tp;
562           int s;
563           int error = 0;
564           int ostate = 0;
565 
566           inp = sotoinpcb(so);
567           if (inp == NULL)
568                     return EINVAL;
569           tp = intotcpcb(inp);
570 
571           ostate = tcp_debug_capture(tp, PRU_BIND);
572 
573           /*
574            * Give the socket an address.
575            */
576           s = splsoftnet();
577           switch (so->so_proto->pr_domain->dom_family) {
578           case PF_INET:
579                     error = inpcb_bind(inp, sin, l);
580                     break;
581 #ifdef INET6
582           case PF_INET6:
583                     error = in6pcb_bind(inp, sin6, l);
584                     if (!error) {
585                               /* mapped addr case */
586                               if (IN6_IS_ADDR_V4MAPPED(&in6p_laddr(inp)))
587                                         tp->t_family = AF_INET;
588                               else
589                                         tp->t_family = AF_INET6;
590                     }
591                     break;
592 #endif
593           }
594           tcp_debug_trace(so, tp, ostate, PRU_BIND);
595           splx(s);
596 
597           return error;
598 }
599 
600 static int
tcp_listen(struct socket * so,struct lwp * l)601 tcp_listen(struct socket *so, struct lwp *l)
602 {
603           struct inpcb *inp;
604           struct tcpcb *tp;
605           int error = 0;
606           int ostate = 0;
607           int s;
608 
609           inp = sotoinpcb(so);
610           if (inp == NULL)
611                     return EINVAL;
612           tp = intotcpcb(inp);
613 
614           ostate = tcp_debug_capture(tp, PRU_LISTEN);
615 
616           /*
617            * Prepare to accept connections.
618            */
619           s = splsoftnet();
620           if (inp->inp_af == AF_INET && inp->inp_lport == 0) {
621                     error = inpcb_bind(inp, NULL, l);
622                     if (error)
623                               goto release;
624           }
625 #ifdef INET6
626           if (inp->inp_af == AF_INET6 && inp->inp_lport == 0) {
627                     error = in6pcb_bind(inp, NULL, l);
628                     if (error)
629                               goto release;
630           }
631 #endif
632           tp->t_state = TCPS_LISTEN;
633 
634 release:
635           tcp_debug_trace(so, tp, ostate, PRU_LISTEN);
636           splx(s);
637 
638           return error;
639 }
640 
641 static int
tcp_connect(struct socket * so,struct sockaddr * nam,struct lwp * l)642 tcp_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
643 {
644           struct inpcb *inp;
645           struct tcpcb *tp;
646           int s;
647           int error = 0;
648           int ostate = 0;
649 
650           inp = sotoinpcb(so);
651           if (inp == NULL)
652                     return EINVAL;
653           tp = intotcpcb(inp);
654 
655           ostate = tcp_debug_capture(tp, PRU_CONNECT);
656 
657           /*
658            * Initiate connection to peer.
659            * Create a template for use in transmissions on this connection.
660            * Enter SYN_SENT state, and mark socket as connecting.
661            * Start keep-alive timer, and seed output sequence space.
662            * Send initial segment on connection.
663            */
664           s = splsoftnet();
665 
666           if (inp->inp_af == AF_INET) {
667                     if (inp->inp_lport == 0) {
668                               error = inpcb_bind(inp, NULL, l);
669                               if (error)
670                                         goto release;
671                     }
672                     error = inpcb_connect(inp, (struct sockaddr_in *)nam, l);
673           }
674 #ifdef INET6
675           if (inp->inp_af == AF_INET6) {
676                     if (inp->inp_lport == 0) {
677                               error = in6pcb_bind(inp, NULL, l);
678                               if (error)
679                                         goto release;
680                     }
681                     error = in6pcb_connect(inp, (struct sockaddr_in6 *)nam, l);
682                     if (!error) {
683                               /* mapped addr case */
684                               if (IN6_IS_ADDR_V4MAPPED(&in6p_faddr(inp)))
685                                         tp->t_family = AF_INET;
686                               else
687                                         tp->t_family = AF_INET6;
688                     }
689           }
690 #endif
691           if (error)
692                     goto release;
693           tp->t_template = tcp_template(tp);
694           if (tp->t_template == 0) {
695                     if (inp->inp_af == AF_INET)
696                               inpcb_disconnect(inp);
697 #ifdef INET6
698                     else if (inp->inp_af == AF_INET6)
699                               in6pcb_disconnect(inp);
700 #endif
701                     error = ENOBUFS;
702                     goto release;
703           }
704           /*
705            * Compute window scaling to request.
706            * XXX: This should be moved to tcp_output().
707            */
708           while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
709               (TCP_MAXWIN << tp->request_r_scale) < sb_max)
710                     tp->request_r_scale++;
711           soisconnecting(so);
712           TCP_STATINC(TCP_STAT_CONNATTEMPT);
713           tp->t_state = TCPS_SYN_SENT;
714           TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
715           tp->iss = tcp_new_iss(tp);
716           tcp_sendseqinit(tp);
717           error = tcp_output(tp);
718 
719 release:
720           tcp_debug_trace(so, tp, ostate, PRU_CONNECT);
721           splx(s);
722 
723           return error;
724 }
725 
726 static int
tcp_connect2(struct socket * so,struct socket * so2)727 tcp_connect2(struct socket *so, struct socket *so2)
728 {
729           struct inpcb *inp;
730           struct tcpcb *tp;
731           int ostate = 0;
732 
733           KASSERT(solocked(so));
734 
735           inp = sotoinpcb(so);
736           if (inp == NULL)
737                     return EINVAL;
738           tp = intotcpcb(inp);
739 
740           ostate = tcp_debug_capture(tp, PRU_CONNECT2);
741 
742           tcp_debug_trace(so, tp, ostate, PRU_CONNECT2);
743 
744           return EOPNOTSUPP;
745 }
746 
747 static int
tcp_disconnect(struct socket * so)748 tcp_disconnect(struct socket *so)
749 {
750           struct inpcb *inp;
751           struct tcpcb *tp;
752           int error = 0;
753           int ostate = 0;
754           int s;
755 
756           inp = sotoinpcb(so);
757           if (inp == NULL)
758                     return EINVAL;
759           tp = intotcpcb(inp);
760 
761           ostate = tcp_debug_capture(tp, PRU_DISCONNECT);
762 
763           /*
764            * Initiate disconnect from peer.
765            * If connection never passed embryonic stage, just drop;
766            * else if don't need to let data drain, then can just drop anyways,
767            * else have to begin TCP shutdown process: mark socket disconnecting,
768            * drain unread data, state switch to reflect user close, and
769            * send segment (e.g. FIN) to peer.  Socket will be really disconnected
770            * when peer sends FIN and acks ours.
771            *
772            * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
773            */
774           s = splsoftnet();
775           tp = tcp_disconnect1(tp);
776           tcp_debug_trace(so, tp, ostate, PRU_DISCONNECT);
777           splx(s);
778 
779           return error;
780 }
781 
782 static int
tcp_shutdown(struct socket * so)783 tcp_shutdown(struct socket *so)
784 {
785           struct inpcb *inp;
786           struct tcpcb *tp;
787           int error = 0;
788           int ostate = 0;
789           int s;
790 
791           inp = sotoinpcb(so);
792           if (inp == NULL)
793                     return EINVAL;
794           tp = intotcpcb(inp);
795 
796           ostate = tcp_debug_capture(tp, PRU_SHUTDOWN);
797           /*
798            * Mark the connection as being incapable of further output.
799            */
800           s = splsoftnet();
801           socantsendmore(so);
802           tp = tcp_usrclosed(tp);
803           if (tp)
804                     error = tcp_output(tp);
805           tcp_debug_trace(so, tp, ostate, PRU_SHUTDOWN);
806           splx(s);
807 
808           return error;
809 }
810 
811 static int
tcp_abort(struct socket * so)812 tcp_abort(struct socket *so)
813 {
814           struct inpcb *inp;
815           struct tcpcb *tp;
816           int error = 0;
817           int ostate = 0;
818           int s;
819 
820           inp = sotoinpcb(so);
821           if (inp == NULL)
822                     return EINVAL;
823           tp = intotcpcb(inp);
824 
825           ostate = tcp_debug_capture(tp, PRU_ABORT);
826 
827           /*
828            * Abort the TCP.
829            */
830           s = splsoftnet();
831           tp = tcp_drop(tp, ECONNABORTED);
832           tcp_debug_trace(so, tp, ostate, PRU_ABORT);
833           splx(s);
834 
835           return error;
836 }
837 
838 static int
tcp_ioctl(struct socket * so,u_long cmd,void * nam,struct ifnet * ifp)839 tcp_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
840 {
841           switch (so->so_proto->pr_domain->dom_family) {
842           case PF_INET:
843                     return in_control(so, cmd, nam, ifp);
844 #ifdef INET6
845           case PF_INET6:
846                     return in6_control(so, cmd, nam, ifp);
847 #endif
848           default:
849                     return EAFNOSUPPORT;
850           }
851 }
852 
853 static int
tcp_stat(struct socket * so,struct stat * ub)854 tcp_stat(struct socket *so, struct stat *ub)
855 {
856           KASSERT(solocked(so));
857 
858           /* stat: don't bother with a blocksize.  */
859           return 0;
860 }
861 
862 static int
tcp_peeraddr(struct socket * so,struct sockaddr * nam)863 tcp_peeraddr(struct socket *so, struct sockaddr *nam)
864 {
865           struct inpcb *inp;
866           struct tcpcb *tp;
867           int ostate = 0;
868           int s;
869 
870           inp = sotoinpcb(so);
871           if (inp == NULL)
872                     return EINVAL;
873           tp = intotcpcb(inp);
874 
875           ostate = tcp_debug_capture(tp, PRU_PEERADDR);
876 
877           s = splsoftnet();
878           if (inp->inp_af == AF_INET) {
879                     inpcb_fetch_peeraddr(inp, (struct sockaddr_in *)nam);
880           }
881 #ifdef INET6
882           else if (inp->inp_af == AF_INET6) {
883                     in6pcb_fetch_peeraddr(inp, (struct sockaddr_in6 *)nam);
884           }
885 #endif
886           tcp_debug_trace(so, tp, ostate, PRU_PEERADDR);
887           splx(s);
888 
889           return 0;
890 }
891 
892 static int
tcp_sockaddr(struct socket * so,struct sockaddr * nam)893 tcp_sockaddr(struct socket *so, struct sockaddr *nam)
894 {
895           struct inpcb *inp;
896           struct tcpcb *tp;
897           int ostate = 0;
898           int s;
899 
900           inp = sotoinpcb(so);
901           if (inp == NULL)
902                     return EINVAL;
903           tp = intotcpcb(inp);
904 
905           ostate = tcp_debug_capture(tp, PRU_SOCKADDR);
906 
907           s = splsoftnet();
908           if (inp->inp_af == AF_INET) {
909                     inpcb_fetch_sockaddr(inp, (struct sockaddr_in *)nam);
910           }
911 #ifdef INET6
912           if (inp->inp_af == AF_INET6) {
913                     in6pcb_fetch_sockaddr(inp, (struct sockaddr_in6 *)nam);
914           }
915 #endif
916           tcp_debug_trace(so, tp, ostate, PRU_SOCKADDR);
917           splx(s);
918 
919           return 0;
920 }
921 
922 static int
tcp_rcvd(struct socket * so,int flags,struct lwp * l)923 tcp_rcvd(struct socket *so, int flags, struct lwp *l)
924 {
925           struct inpcb *inp;
926           struct tcpcb *tp;
927           int ostate = 0;
928           int s;
929 
930           inp = sotoinpcb(so);
931           if (inp == NULL)
932                     return EINVAL;
933           tp = intotcpcb(inp);
934 
935           ostate = tcp_debug_capture(tp, PRU_RCVD);
936 
937           /*
938            * After a receive, possibly send window update to peer.
939            *
940            * soreceive() calls this function when a user receives
941            * ancillary data on a listening socket. We don't call
942            * tcp_output in such a case, since there is no header
943            * template for a listening socket and hence the kernel
944            * will panic.
945            */
946           s = splsoftnet();
947           if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
948                     (void) tcp_output(tp);
949           splx(s);
950 
951           tcp_debug_trace(so, tp, ostate, PRU_RCVD);
952 
953           return 0;
954 }
955 
956 static int
tcp_recvoob(struct socket * so,struct mbuf * m,int flags)957 tcp_recvoob(struct socket *so, struct mbuf *m, int flags)
958 {
959           struct inpcb *inp;
960           struct tcpcb *tp;
961           int ostate = 0;
962           int s;
963 
964           inp = sotoinpcb(so);
965           if (inp == NULL)
966                     return EINVAL;
967           tp = intotcpcb(inp);
968 
969           ostate = tcp_debug_capture(tp, PRU_RCVOOB);
970 
971           s = splsoftnet();
972           if ((so->so_oobmark == 0 &&
973               (so->so_state & SS_RCVATMARK) == 0) ||
974               so->so_options & SO_OOBINLINE ||
975               tp->t_oobflags & TCPOOB_HADDATA) {
976                     splx(s);
977                     return EINVAL;
978           }
979 
980           if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
981                     splx(s);
982                     return EWOULDBLOCK;
983           }
984 
985           m->m_len = 1;
986           *mtod(m, char *) = tp->t_iobc;
987           if ((flags & MSG_PEEK) == 0) {
988                     tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
989                     so->so_state &= ~SS_POLLRDBAND;
990           }
991 
992           tcp_debug_trace(so, tp, ostate, PRU_RCVOOB);
993           splx(s);
994 
995           return 0;
996 }
997 
998 static int
tcp_send(struct socket * so,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct lwp * l)999 tcp_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
1000     struct mbuf *control, struct lwp *l)
1001 {
1002           struct inpcb *inp;
1003           struct tcpcb *tp;
1004           int ostate = 0;
1005           int error = 0;
1006           int s;
1007 
1008           inp = sotoinpcb(so);
1009           if (inp == NULL)
1010                     return EINVAL;
1011           tp = intotcpcb(inp);
1012 
1013           ostate = tcp_debug_capture(tp, PRU_SEND);
1014 
1015           /*
1016            * Do a send by putting data in output queue and updating urgent
1017            * marker if URG set.  Possibly send more data.
1018            */
1019           s = splsoftnet();
1020           if (control && control->m_len) {
1021                     m_freem(control);
1022                     m_freem(m);
1023                     tcp_debug_trace(so, tp, ostate, PRU_SEND);
1024                     splx(s);
1025                     return EINVAL;
1026           }
1027 
1028           sbappendstream(&so->so_snd, m);
1029           error = tcp_output(tp);
1030           tcp_debug_trace(so, tp, ostate, PRU_SEND);
1031           splx(s);
1032 
1033           return error;
1034 }
1035 
1036 static int
tcp_sendoob(struct socket * so,struct mbuf * m,struct mbuf * control)1037 tcp_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
1038 {
1039           struct inpcb *inp = NULL;
1040           struct tcpcb *tp = NULL;
1041           int ostate = 0;
1042           int error = 0;
1043           int s;
1044 
1045           inp = sotoinpcb(so);
1046           if (inp == NULL) {
1047                     m_freem(m);
1048                     m_freem(control);
1049                     return EINVAL;
1050           }
1051           tp = intotcpcb(inp);
1052           if (tp->t_template == NULL) {
1053                     /*
1054                      * XXX FreeBSD appears to open the connection
1055                      * automagically in this case, but the socket address
1056                      * isn't passed through here so we can't do that.
1057                      */
1058                     m_freem(m);
1059                     m_freem(control);
1060                     return ENOTCONN;
1061           }
1062 
1063           ostate = tcp_debug_capture(tp, PRU_SENDOOB);
1064 
1065           s = splsoftnet();
1066           if (sbspace_oob(&so->so_snd) == 0) {
1067                     m_freem(m);
1068                     m_freem(control);
1069                     splx(s);
1070                     return ENOBUFS;
1071           }
1072           /*
1073            * According to RFC961 (Assigned Protocols),
1074            * the urgent pointer points to the last octet
1075            * of urgent data.  We continue, however,
1076            * to consider it to indicate the first octet
1077            * of data past the urgent section.
1078            * Otherwise, snd_up should be one lower.
1079            */
1080           sbappendstream(&so->so_snd, m);
1081           tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
1082           tp->t_force = 1;
1083           error = tcp_output(tp);
1084           tp->t_force = 0;
1085           tcp_debug_trace(so, tp, ostate, PRU_SENDOOB);
1086           splx(s);
1087           m_freem(control);
1088 
1089           return error;
1090 }
1091 
1092 static int
tcp_purgeif(struct socket * so,struct ifnet * ifp)1093 tcp_purgeif(struct socket *so, struct ifnet *ifp)
1094 {
1095           int s;
1096           int error = 0;
1097 
1098           s = splsoftnet();
1099 
1100           mutex_enter(softnet_lock);
1101           switch (so->so_proto->pr_domain->dom_family) {
1102           case PF_INET:
1103                     inpcb_purgeif0(&tcbtable, ifp);
1104 #ifdef NET_MPSAFE
1105                     mutex_exit(softnet_lock);
1106 #endif
1107                     in_purgeif(ifp);
1108 #ifdef NET_MPSAFE
1109                     mutex_enter(softnet_lock);
1110 #endif
1111                     inpcb_purgeif(&tcbtable, ifp);
1112                     break;
1113 #ifdef INET6
1114           case PF_INET6:
1115                     in6pcb_purgeif0(&tcbtable, ifp);
1116 #ifdef NET_MPSAFE
1117                     mutex_exit(softnet_lock);
1118 #endif
1119                     in6_purgeif(ifp);
1120 #ifdef NET_MPSAFE
1121                     mutex_enter(softnet_lock);
1122 #endif
1123                     in6pcb_purgeif(&tcbtable, ifp);
1124                     break;
1125 #endif
1126           default:
1127                     error = EAFNOSUPPORT;
1128                     break;
1129           }
1130           mutex_exit(softnet_lock);
1131           splx(s);
1132 
1133           return error;
1134 }
1135 
1136 /*
1137  * Initiate (or continue) disconnect.
1138  * If embryonic state, just send reset (once).
1139  * If in ``let data drain'' option and linger null, just drop.
1140  * Otherwise (hard), mark socket disconnecting and drop
1141  * current input data; switch states based on user close, and
1142  * send segment to peer (with FIN).
1143  */
1144 struct tcpcb *
tcp_disconnect1(struct tcpcb * tp)1145 tcp_disconnect1(struct tcpcb *tp)
1146 {
1147           struct socket *so;
1148 
1149           so = tp->t_inpcb->inp_socket;
1150 
1151           if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
1152                     tp = tcp_close(tp);
1153           else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
1154                     tp = tcp_drop(tp, 0);
1155           else {
1156                     soisdisconnecting(so);
1157                     sbflush(&so->so_rcv);
1158                     tp = tcp_usrclosed(tp);
1159                     if (tp)
1160                               (void) tcp_output(tp);
1161           }
1162           return tp;
1163 }
1164 
1165 /*
1166  * User issued close, and wish to trail through shutdown states:
1167  * if never received SYN, just forget it.  If got a SYN from peer,
1168  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
1169  * If already got a FIN from peer, then almost done; go to LAST_ACK
1170  * state.  In all other cases, have already sent FIN to peer (e.g.
1171  * after PRU_SHUTDOWN), and just have to play tedious game waiting
1172  * for peer to send FIN or not respond to keep-alives, etc.
1173  * We can let the user exit from the close as soon as the FIN is acked.
1174  */
1175 struct tcpcb *
tcp_usrclosed(struct tcpcb * tp)1176 tcp_usrclosed(struct tcpcb *tp)
1177 {
1178 
1179           switch (tp->t_state) {
1180 
1181           case TCPS_CLOSED:
1182           case TCPS_LISTEN:
1183           case TCPS_SYN_SENT:
1184                     tp->t_state = TCPS_CLOSED;
1185                     tp = tcp_close(tp);
1186                     break;
1187 
1188           case TCPS_SYN_RECEIVED:
1189           case TCPS_ESTABLISHED:
1190                     tp->t_state = TCPS_FIN_WAIT_1;
1191                     break;
1192 
1193           case TCPS_CLOSE_WAIT:
1194                     tp->t_state = TCPS_LAST_ACK;
1195                     break;
1196           }
1197           if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
1198                     struct socket *so = tp->t_inpcb->inp_socket;
1199                     if (so)
1200                               soisdisconnected(so);
1201                     /*
1202                      * If we are in FIN_WAIT_2, we arrived here because the
1203                      * application did a shutdown of the send side.  Like the
1204                      * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
1205                      * a full close, we start a timer to make sure sockets are
1206                      * not left in FIN_WAIT_2 forever.
1207                      */
1208                     if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0))
1209                               TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle);
1210                     else if (tp->t_state == TCPS_TIME_WAIT
1211                                && ((tp->t_inpcb->inp_af == AF_INET
1212                                     && (tcp4_vtw_enable & 1)
1213                                     && vtw_add(AF_INET, tp))
1214                                    ||
1215                                    (tp->t_inpcb->inp_af == AF_INET6
1216                                     && (tcp6_vtw_enable & 1)
1217                                     && vtw_add(AF_INET6, tp)))) {
1218                               tp = 0;
1219                     }
1220           }
1221           return tp;
1222 }
1223 
1224 /*
1225  * sysctl helper routine for net.inet.ip.mssdflt.  it can't be less
1226  * than 32.
1227  */
1228 static int
sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS)1229 sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS)
1230 {
1231           int error, mssdflt;
1232           struct sysctlnode node;
1233 
1234           mssdflt = tcp_mssdflt;
1235           node = *rnode;
1236           node.sysctl_data = &mssdflt;
1237           error = sysctl_lookup(SYSCTLFN_CALL(&node));
1238           if (error || newp == NULL)
1239                     return error;
1240 
1241           if (mssdflt < 32)
1242                     return EINVAL;
1243           tcp_mssdflt = mssdflt;
1244 
1245           mutex_enter(softnet_lock);
1246           tcp_tcpcb_template();
1247           mutex_exit(softnet_lock);
1248 
1249           return 0;
1250 }
1251 
1252 /*
1253  * sysctl helper for TCP CB template update
1254  */
1255 static int
sysctl_update_tcpcb_template(SYSCTLFN_ARGS)1256 sysctl_update_tcpcb_template(SYSCTLFN_ARGS)
1257 {
1258           int t, error;
1259           struct sysctlnode node;
1260 
1261           /* follow procedures in sysctl(9) manpage */
1262           t = *(int *)rnode->sysctl_data;
1263           node = *rnode;
1264           node.sysctl_data = &t;
1265           error = sysctl_lookup(SYSCTLFN_CALL(&node));
1266           if (error || newp == NULL)
1267                     return error;
1268 
1269           if (t < 0)
1270                     return EINVAL;
1271 
1272           *(int *)rnode->sysctl_data = t;
1273 
1274           mutex_enter(softnet_lock);
1275           tcp_tcpcb_template();
1276           mutex_exit(softnet_lock);
1277 
1278           return 0;
1279 }
1280 
1281 /*
1282  * sysctl helper routine for setting port related values under
1283  * net.inet.ip and net.inet6.ip6.  does basic range checking and does
1284  * additional checks for each type.  this code has placed in
1285  * tcp_input.c since INET and INET6 both use the same tcp code.
1286  *
1287  * this helper is not static so that both inet and inet6 can use it.
1288  */
1289 int
sysctl_net_inet_ip_ports(SYSCTLFN_ARGS)1290 sysctl_net_inet_ip_ports(SYSCTLFN_ARGS)
1291 {
1292           int error, tmp;
1293           int apmin, apmax;
1294 #ifndef IPNOPRIVPORTS
1295           int lpmin, lpmax;
1296 #endif /* IPNOPRIVPORTS */
1297           struct sysctlnode node;
1298 
1299           if (namelen != 0)
1300                     return EINVAL;
1301 
1302           switch (name[-3]) {
1303               case PF_INET:
1304                     apmin = anonportmin;
1305                     apmax = anonportmax;
1306 #ifndef IPNOPRIVPORTS
1307                     lpmin = lowportmin;
1308                     lpmax = lowportmax;
1309 #endif /* IPNOPRIVPORTS */
1310                     break;
1311 #ifdef INET6
1312               case PF_INET6:
1313                     apmin = ip6_anonportmin;
1314                     apmax = ip6_anonportmax;
1315 #ifndef IPNOPRIVPORTS
1316                     lpmin = ip6_lowportmin;
1317                     lpmax = ip6_lowportmax;
1318 #endif /* IPNOPRIVPORTS */
1319                     break;
1320 #endif /* INET6 */
1321               default:
1322                     return EINVAL;
1323           }
1324 
1325           /*
1326            * insert temporary copy into node, perform lookup on
1327            * temporary, then restore pointer
1328            */
1329           node = *rnode;
1330           tmp = *(int*)rnode->sysctl_data;
1331           node.sysctl_data = &tmp;
1332           error = sysctl_lookup(SYSCTLFN_CALL(&node));
1333           if (error || newp == NULL)
1334                     return error;
1335 
1336           /*
1337            * simple port range check
1338            */
1339           if (tmp < 0 || tmp > 65535)
1340                     return EINVAL;
1341 
1342           /*
1343            * per-node range checks
1344            */
1345           switch (rnode->sysctl_num) {
1346           case IPCTL_ANONPORTMIN:
1347           case IPV6CTL_ANONPORTMIN:
1348                     if (tmp >= apmax)
1349                               return EINVAL;
1350 #ifndef IPNOPRIVPORTS
1351                     if (tmp < IPPORT_RESERVED)
1352                         return EINVAL;
1353 #endif /* IPNOPRIVPORTS */
1354                     break;
1355 
1356           case IPCTL_ANONPORTMAX:
1357           case IPV6CTL_ANONPORTMAX:
1358                 if (apmin >= tmp)
1359                               return EINVAL;
1360 #ifndef IPNOPRIVPORTS
1361                     if (tmp < IPPORT_RESERVED)
1362                         return EINVAL;
1363 #endif /* IPNOPRIVPORTS */
1364                     break;
1365 
1366 #ifndef IPNOPRIVPORTS
1367           case IPCTL_LOWPORTMIN:
1368           case IPV6CTL_LOWPORTMIN:
1369                     if (tmp >= lpmax ||
1370                         tmp > IPPORT_RESERVEDMAX ||
1371                         tmp < IPPORT_RESERVEDMIN)
1372                               return EINVAL;
1373                     break;
1374 
1375           case IPCTL_LOWPORTMAX:
1376           case IPV6CTL_LOWPORTMAX:
1377                     if (lpmin >= tmp ||
1378                         tmp > IPPORT_RESERVEDMAX ||
1379                         tmp < IPPORT_RESERVEDMIN)
1380                               return EINVAL;
1381                     break;
1382 #endif /* IPNOPRIVPORTS */
1383 
1384           default:
1385                     return EINVAL;
1386           }
1387 
1388           *(int*)rnode->sysctl_data = tmp;
1389 
1390           return 0;
1391 }
1392 
1393 static inline int
copyout_uid(struct socket * sockp,void * oldp,size_t * oldlenp)1394 copyout_uid(struct socket *sockp, void *oldp, size_t *oldlenp)
1395 {
1396           if (oldp) {
1397                     size_t sz;
1398                     uid_t uid;
1399                     int error;
1400 
1401                     if (sockp->so_cred == NULL)
1402                               return EPERM;
1403 
1404                     uid = kauth_cred_geteuid(sockp->so_cred);
1405                     sz = MIN(sizeof(uid), *oldlenp);
1406                     if ((error = copyout(&uid, oldp, sz)) != 0)
1407                               return error;
1408           }
1409           *oldlenp = sizeof(uid_t);
1410           return 0;
1411 }
1412 
1413 static inline int
inet4_ident_core(struct in_addr raddr,u_int rport,struct in_addr laddr,u_int lport,void * oldp,size_t * oldlenp,struct lwp * l,int dodrop)1414 inet4_ident_core(struct in_addr raddr, u_int rport,
1415     struct in_addr laddr, u_int lport,
1416     void *oldp, size_t *oldlenp,
1417     struct lwp *l, int dodrop)
1418 {
1419           struct inpcb *inp;
1420           struct socket *sockp;
1421 
1422           inp = inpcb_lookup(&tcbtable, raddr, rport, laddr, lport, 0);
1423 
1424           if (inp == NULL || (sockp = inp->inp_socket) == NULL)
1425                     return ESRCH;
1426 
1427           if (dodrop) {
1428                     struct tcpcb *tp;
1429                     int error;
1430 
1431                     if (inp == NULL || (tp = intotcpcb(inp)) == NULL ||
1432                         (inp->inp_socket->so_options & SO_ACCEPTCONN) != 0)
1433                               return ESRCH;
1434 
1435                     error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET,
1436                         KAUTH_REQ_NETWORK_SOCKET_DROP, inp->inp_socket, tp, NULL);
1437                     if (error)
1438                               return error;
1439 
1440                     (void)tcp_drop(tp, ECONNABORTED);
1441                     return 0;
1442           }
1443 
1444           return copyout_uid(sockp, oldp, oldlenp);
1445 }
1446 
1447 #ifdef INET6
1448 static inline int
inet6_ident_core(struct in6_addr * raddr,u_int rport,struct in6_addr * laddr,u_int lport,void * oldp,size_t * oldlenp,struct lwp * l,int dodrop)1449 inet6_ident_core(struct in6_addr *raddr, u_int rport,
1450     struct in6_addr *laddr, u_int lport,
1451     void *oldp, size_t *oldlenp,
1452     struct lwp *l, int dodrop)
1453 {
1454           struct inpcb *inp;
1455           struct socket *sockp;
1456 
1457           inp = in6pcb_lookup(&tcbtable, raddr, rport, laddr, lport, 0, 0);
1458 
1459           if (inp == NULL || (sockp = inp->inp_socket) == NULL)
1460                     return ESRCH;
1461 
1462           if (dodrop) {
1463                     struct tcpcb *tp;
1464                     int error;
1465 
1466                     if (inp == NULL || (tp = intotcpcb(inp)) == NULL ||
1467                         (inp->inp_socket->so_options & SO_ACCEPTCONN) != 0)
1468                               return ESRCH;
1469 
1470                     error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET,
1471                         KAUTH_REQ_NETWORK_SOCKET_DROP, inp->inp_socket, tp, NULL);
1472                     if (error)
1473                               return error;
1474 
1475                     (void)tcp_drop(tp, ECONNABORTED);
1476                     return 0;
1477           }
1478 
1479           return copyout_uid(sockp, oldp, oldlenp);
1480 }
1481 #endif
1482 
1483 /*
1484  * sysctl helper routine for the net.inet.tcp.drop and
1485  * net.inet6.tcp6.drop nodes.
1486  */
1487 #define sysctl_net_inet_tcp_drop sysctl_net_inet_tcp_ident
1488 
1489 /*
1490  * sysctl helper routine for the net.inet.tcp.ident and
1491  * net.inet6.tcp6.ident nodes.  contains backwards compat code for the
1492  * old way of looking up the ident information for ipv4 which involves
1493  * stuffing the port/addr pairs into the mib lookup.
1494  */
1495 static int
sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS)1496 sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS)
1497 {
1498           struct sockaddr_in *si4[2];
1499 #ifdef INET6
1500           struct sockaddr_in6 *si6[2];
1501 #endif
1502           struct sockaddr_storage sa[2];
1503           int error, pf, dodrop;
1504 
1505           dodrop = name[-1] == TCPCTL_DROP;
1506           if (dodrop) {
1507                     if (oldp != NULL || *oldlenp != 0)
1508                               return EINVAL;
1509                     if (newp == NULL)
1510                               return EPERM;
1511                     if (newlen < sizeof(sa))
1512                               return ENOMEM;
1513           }
1514           if (namelen != 4 && namelen != 0)
1515                     return EINVAL;
1516           if (name[-2] != IPPROTO_TCP)
1517                     return EINVAL;
1518           pf = name[-3];
1519 
1520           /* old style lookup, ipv4 only */
1521           if (namelen == 4) {
1522                     struct in_addr laddr, raddr;
1523                     u_int lport, rport;
1524 
1525                     if (pf != PF_INET)
1526                               return EPROTONOSUPPORT;
1527                     raddr.s_addr = (uint32_t)name[0];
1528                     rport = (u_int)name[1];
1529                     laddr.s_addr = (uint32_t)name[2];
1530                     lport = (u_int)name[3];
1531 
1532                     mutex_enter(softnet_lock);
1533                     error = inet4_ident_core(raddr, rport, laddr, lport,
1534                         oldp, oldlenp, l, dodrop);
1535                     mutex_exit(softnet_lock);
1536                     return error;
1537           }
1538 
1539           if (newp == NULL || newlen != sizeof(sa))
1540                     return EINVAL;
1541           error = copyin(newp, &sa, newlen);
1542           if (error)
1543                     return error;
1544 
1545           /*
1546            * requested families must match
1547            */
1548           if (pf != sa[0].ss_family || sa[0].ss_family != sa[1].ss_family)
1549                     return EINVAL;
1550 
1551           switch (pf) {
1552 #ifdef INET6
1553           case PF_INET6:
1554                     si6[0] = (struct sockaddr_in6*)&sa[0];
1555                     si6[1] = (struct sockaddr_in6*)&sa[1];
1556                     if (si6[0]->sin6_len != sizeof(*si6[0]) ||
1557                         si6[1]->sin6_len != sizeof(*si6[1]))
1558                               return EINVAL;
1559 
1560                     if (!IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) &&
1561                         !IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) {
1562                               error = sa6_embedscope(si6[0], ip6_use_defzone);
1563                               if (error)
1564                                         return error;
1565                               error = sa6_embedscope(si6[1], ip6_use_defzone);
1566                               if (error)
1567                                         return error;
1568 
1569                               mutex_enter(softnet_lock);
1570                               error = inet6_ident_core(&si6[0]->sin6_addr,
1571                                   si6[0]->sin6_port, &si6[1]->sin6_addr,
1572                                   si6[1]->sin6_port, oldp, oldlenp, l, dodrop);
1573                               mutex_exit(softnet_lock);
1574                               return error;
1575                     }
1576 
1577                     if (IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) !=
1578                         IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr))
1579                               return EINVAL;
1580 
1581                     in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[0]);
1582                     in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[1]);
1583 #endif /* INET6 */
1584                     /*FALLTHROUGH*/
1585           case PF_INET:
1586                     si4[0] = (struct sockaddr_in*)&sa[0];
1587                     si4[1] = (struct sockaddr_in*)&sa[1];
1588                     if (si4[0]->sin_len != sizeof(*si4[0]) ||
1589                         si4[0]->sin_len != sizeof(*si4[1]))
1590                               return EINVAL;
1591 
1592                     mutex_enter(softnet_lock);
1593                     error = inet4_ident_core(si4[0]->sin_addr, si4[0]->sin_port,
1594                         si4[1]->sin_addr, si4[1]->sin_port,
1595                         oldp, oldlenp, l, dodrop);
1596                     mutex_exit(softnet_lock);
1597                     return error;
1598           default:
1599                     return EPROTONOSUPPORT;
1600           }
1601 }
1602 
1603 /*
1604  * sysctl helper for the inet and inet6 pcblists.  handles tcp/udp and
1605  * inet/inet6, as well as raw pcbs for each.  specifically not
1606  * declared static so that raw sockets and udp/udp6 can use it as
1607  * well.
1608  */
1609 int
sysctl_inpcblist(SYSCTLFN_ARGS)1610 sysctl_inpcblist(SYSCTLFN_ARGS)
1611 {
1612           const bool allowaddr = get_expose_address(curproc);
1613           struct sockaddr_in *in;
1614           const struct inpcb *inp;
1615 #ifdef INET6
1616           struct sockaddr_in6 *in6;
1617 #endif
1618           struct inpcbtable *pcbtbl = __UNCONST(rnode->sysctl_data);
1619           struct tcpcb *tp;
1620           struct kinfo_pcb pcb;
1621           char *dp;
1622           size_t len, needed, elem_size, out_size;
1623           int error, elem_count, pf, proto, pf2;
1624 
1625           if (namelen != 4)
1626                     return EINVAL;
1627 
1628           if (oldp != NULL) {
1629                         len = *oldlenp;
1630                         elem_size = name[2];
1631                         elem_count = name[3];
1632                         if (elem_size != sizeof(pcb))
1633                                   return EINVAL;
1634           } else {
1635                         len = 0;
1636                         elem_count = INT_MAX;
1637                         elem_size = sizeof(pcb);
1638           }
1639           error = 0;
1640           dp = oldp;
1641           out_size = elem_size;
1642           needed = 0;
1643 
1644           if (namelen == 1 && name[0] == CTL_QUERY)
1645                     return (sysctl_query(SYSCTLFN_CALL(rnode)));
1646 
1647           if (name - oname != 4)
1648                     return EINVAL;
1649 
1650           pf = oname[1];
1651           proto = oname[2];
1652           pf2 = (oldp != NULL) ? pf : 0;
1653 
1654           mutex_enter(softnet_lock);
1655 
1656           TAILQ_FOREACH(inp, &pcbtbl->inpt_queue, inp_queue) {
1657                     if (inp->inp_af != pf)
1658                               continue;
1659 
1660                     if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET,
1661                         KAUTH_REQ_NETWORK_SOCKET_CANSEE, inp->inp_socket, NULL,
1662                         NULL) != 0)
1663                               continue;
1664 
1665                     memset(&pcb, 0, sizeof(pcb));
1666 
1667                     pcb.ki_family = pf;
1668                     pcb.ki_type = proto;
1669 
1670                     switch (pf2) {
1671                     case 0:
1672                               /* just probing for size */
1673                               break;
1674                     case PF_INET:
1675                               pcb.ki_family = inp->inp_socket->so_proto->
1676                                   pr_domain->dom_family;
1677                               pcb.ki_type = inp->inp_socket->so_proto->
1678                                   pr_type;
1679                               pcb.ki_protocol = inp->inp_socket->so_proto->
1680                                   pr_protocol;
1681                               pcb.ki_pflags = inp->inp_flags;
1682 
1683                               pcb.ki_sostate = inp->inp_socket->so_state;
1684                               pcb.ki_prstate = inp->inp_state;
1685                               if (proto == IPPROTO_TCP) {
1686                                         tp = intotcpcb(inp);
1687                                         pcb.ki_tstate = tp->t_state;
1688                                         pcb.ki_tflags = tp->t_flags;
1689                               }
1690 
1691                               COND_SET_VALUE(pcb.ki_pcbaddr,
1692                                   PTRTOUINT64(inp), allowaddr);
1693                               COND_SET_VALUE(pcb.ki_ppcbaddr,
1694                                   PTRTOUINT64(inp->inp_ppcb), allowaddr);
1695                               COND_SET_VALUE(pcb.ki_sockaddr,
1696                                   PTRTOUINT64(inp->inp_socket), allowaddr);
1697 
1698                               pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc;
1699                               pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc;
1700 
1701                               in = satosin(&pcb.ki_src);
1702                               in->sin_len = sizeof(*in);
1703                               in->sin_family = pf;
1704                               in->sin_port = inp->inp_lport;
1705                               in->sin_addr = const_in4p_laddr(inp);
1706                               if (pcb.ki_prstate >= INP_CONNECTED) {
1707                                         in = satosin(&pcb.ki_dst);
1708                                         in->sin_len = sizeof(*in);
1709                                         in->sin_family = pf;
1710                                         in->sin_port = inp->inp_fport;
1711                                         in->sin_addr = const_in4p_faddr(inp);
1712                               }
1713                               break;
1714 #ifdef INET6
1715                     case PF_INET6:
1716                               pcb.ki_family = inp->inp_socket->so_proto->
1717                                   pr_domain->dom_family;
1718                               pcb.ki_type = inp->inp_socket->so_proto->pr_type;
1719                               pcb.ki_protocol = inp->inp_socket->so_proto->
1720                                   pr_protocol;
1721                               pcb.ki_pflags = inp->inp_flags;
1722 
1723                               pcb.ki_sostate = inp->inp_socket->so_state;
1724                               pcb.ki_prstate = inp->inp_state;
1725                               if (proto == IPPROTO_TCP) {
1726                                         tp = intotcpcb(inp);
1727                                         pcb.ki_tstate = tp->t_state;
1728                                         pcb.ki_tflags = tp->t_flags;
1729                               }
1730 
1731                               COND_SET_VALUE(pcb.ki_pcbaddr,
1732                                   PTRTOUINT64(inp), allowaddr);
1733                               COND_SET_VALUE(pcb.ki_ppcbaddr,
1734                                   PTRTOUINT64(inp->inp_ppcb), allowaddr);
1735                               COND_SET_VALUE(pcb.ki_sockaddr,
1736                                   PTRTOUINT64(inp->inp_socket), allowaddr);
1737 
1738                               pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc;
1739                               pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc;
1740 
1741                               in6 = satosin6(&pcb.ki_src);
1742                               in6->sin6_len = sizeof(*in6);
1743                               in6->sin6_family = pf;
1744                               in6->sin6_port = inp->inp_lport;
1745                               in6->sin6_flowinfo = const_in6p_flowinfo(inp);
1746                               in6->sin6_addr = const_in6p_laddr(inp);
1747                               in6->sin6_scope_id = 0; /* XXX? */
1748 
1749                               if (pcb.ki_prstate >= INP_CONNECTED) {
1750                                         in6 = satosin6(&pcb.ki_dst);
1751                                         in6->sin6_len = sizeof(*in6);
1752                                         in6->sin6_family = pf;
1753                                         in6->sin6_port = inp->inp_fport;
1754                                         in6->sin6_flowinfo = const_in6p_flowinfo(inp);
1755                                         in6->sin6_addr = const_in6p_faddr(inp);
1756                                         in6->sin6_scope_id = 0; /* XXX? */
1757                               }
1758                               break;
1759 #endif
1760                     }
1761 
1762                     if (len >= elem_size && elem_count > 0) {
1763                               error = copyout(&pcb, dp, out_size);
1764                               if (error) {
1765                                         mutex_exit(softnet_lock);
1766                                         return error;
1767                               }
1768                               dp += elem_size;
1769                               len -= elem_size;
1770                     }
1771                     needed += elem_size;
1772                     if (elem_count > 0 && elem_count != INT_MAX)
1773                               elem_count--;
1774           }
1775 
1776           *oldlenp = needed;
1777           if (oldp == NULL)
1778                     *oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb);
1779 
1780           mutex_exit(softnet_lock);
1781 
1782           return error;
1783 }
1784 
1785 static int
sysctl_tcp_congctl(SYSCTLFN_ARGS)1786 sysctl_tcp_congctl(SYSCTLFN_ARGS)
1787 {
1788           struct sysctlnode node;
1789           int error;
1790           char newname[TCPCC_MAXLEN];
1791 
1792           strlcpy(newname, tcp_congctl_global_name, sizeof(newname) - 1);
1793 
1794           node = *rnode;
1795           node.sysctl_data = newname;
1796           node.sysctl_size = sizeof(newname);
1797 
1798           error = sysctl_lookup(SYSCTLFN_CALL(&node));
1799 
1800           if (error ||
1801               newp == NULL ||
1802               strncmp(newname, tcp_congctl_global_name, sizeof(newname)) == 0)
1803                     return error;
1804 
1805           mutex_enter(softnet_lock);
1806           error = tcp_congctl_select(NULL, newname);
1807           mutex_exit(softnet_lock);
1808 
1809           return error;
1810 }
1811 
1812 static int
sysctl_tcp_init_win(SYSCTLFN_ARGS)1813 sysctl_tcp_init_win(SYSCTLFN_ARGS)
1814 {
1815           int error;
1816           u_int iw;
1817           struct sysctlnode node;
1818 
1819           iw = *(u_int *)rnode->sysctl_data;
1820           node = *rnode;
1821           node.sysctl_data = &iw;
1822           node.sysctl_size = sizeof(iw);
1823           error = sysctl_lookup(SYSCTLFN_CALL(&node));
1824           if (error || newp == NULL)
1825                     return error;
1826 
1827           if (iw >= __arraycount(tcp_init_win_max))
1828                     return EINVAL;
1829           *(u_int *)rnode->sysctl_data = iw;
1830           return 0;
1831 }
1832 
1833 static int
sysctl_tcp_keep(SYSCTLFN_ARGS)1834 sysctl_tcp_keep(SYSCTLFN_ARGS)
1835 {
1836           int error;
1837           u_int tmp;
1838           struct sysctlnode node;
1839 
1840           node = *rnode;
1841           tmp = *(u_int *)rnode->sysctl_data;
1842           node.sysctl_data = &tmp;
1843 
1844           error = sysctl_lookup(SYSCTLFN_CALL(&node));
1845           if (error || newp == NULL)
1846                     return error;
1847 
1848           if (!(tmp > 0 && tmp <= TCP_TIMER_MAXTICKS))
1849                     return EINVAL;
1850 
1851           mutex_enter(softnet_lock);
1852 
1853           *(u_int *)rnode->sysctl_data = tmp;
1854           tcp_tcpcb_template();         /* update the template */
1855 
1856           mutex_exit(softnet_lock);
1857           return 0;
1858 }
1859 
1860 static int
sysctl_net_inet_tcp_stats(SYSCTLFN_ARGS)1861 sysctl_net_inet_tcp_stats(SYSCTLFN_ARGS)
1862 {
1863 
1864           return (NETSTAT_SYSCTL(tcpstat_percpu, TCP_NSTATS));
1865 }
1866 
1867 /*
1868  * this (second stage) setup routine is a replacement for tcp_sysctl()
1869  * (which is currently used for ipv4 and ipv6)
1870  */
1871 static void
sysctl_net_inet_tcp_setup2(struct sysctllog ** clog,int pf,const char * pfname,const char * tcpname)1872 sysctl_net_inet_tcp_setup2(struct sysctllog **clog, int pf, const char *pfname,
1873                                  const char *tcpname)
1874 {
1875           const struct sysctlnode *sack_node;
1876           const struct sysctlnode *abc_node;
1877           const struct sysctlnode *ecn_node;
1878           const struct sysctlnode *congctl_node;
1879           const struct sysctlnode *mslt_node;
1880           const struct sysctlnode *vtw_node;
1881 #ifdef TCP_DEBUG
1882           extern struct tcp_debug tcp_debug[TCP_NDEBUG];
1883           extern int tcp_debx;
1884 #endif
1885 
1886           sysctl_createv(clog, 0, NULL, NULL,
1887                            CTLFLAG_PERMANENT,
1888                            CTLTYPE_NODE, pfname, NULL,
1889                            NULL, 0, NULL, 0,
1890                            CTL_NET, pf, CTL_EOL);
1891           sysctl_createv(clog, 0, NULL, NULL,
1892                            CTLFLAG_PERMANENT,
1893                            CTLTYPE_NODE, tcpname,
1894                            SYSCTL_DESCR("TCP related settings"),
1895                            NULL, 0, NULL, 0,
1896                            CTL_NET, pf, IPPROTO_TCP, CTL_EOL);
1897 
1898           sysctl_createv(clog, 0, NULL, NULL,
1899                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1900                            CTLTYPE_INT, "rfc1323",
1901                            SYSCTL_DESCR("Enable RFC1323 TCP extensions"),
1902                            sysctl_update_tcpcb_template, 0, &tcp_do_rfc1323, 0,
1903                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_RFC1323, CTL_EOL);
1904           sysctl_createv(clog, 0, NULL, NULL,
1905                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1906                            CTLTYPE_INT, "sendspace",
1907                            SYSCTL_DESCR("Default TCP send buffer size"),
1908                            NULL, 0, &tcp_sendspace, 0,
1909                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_SENDSPACE, CTL_EOL);
1910           sysctl_createv(clog, 0, NULL, NULL,
1911                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1912                            CTLTYPE_INT, "recvspace",
1913                            SYSCTL_DESCR("Default TCP receive buffer size"),
1914                            NULL, 0, &tcp_recvspace, 0,
1915                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_RECVSPACE, CTL_EOL);
1916           sysctl_createv(clog, 0, NULL, NULL,
1917                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1918                            CTLTYPE_INT, "mssdflt",
1919                            SYSCTL_DESCR("Default maximum segment size"),
1920                            sysctl_net_inet_tcp_mssdflt, 0, &tcp_mssdflt, 0,
1921                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSSDFLT, CTL_EOL);
1922           sysctl_createv(clog, 0, NULL, NULL,
1923                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1924                            CTLTYPE_INT, "minmss",
1925                            SYSCTL_DESCR("Lower limit for TCP maximum segment size"),
1926                            NULL, 0, &tcp_minmss, 0,
1927                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
1928           sysctl_createv(clog, 0, NULL, NULL,
1929                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1930                            CTLTYPE_INT, "msl",
1931                            SYSCTL_DESCR("Maximum Segment Life"),
1932                            NULL, 0, &tcp_msl, 0,
1933                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSL, CTL_EOL);
1934           sysctl_createv(clog, 0, NULL, NULL,
1935                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1936                            CTLTYPE_INT, "syn_cache_limit",
1937                            SYSCTL_DESCR("Maximum number of entries in the TCP "
1938                                             "compressed state engine"),
1939                            NULL, 0, &tcp_syn_cache_limit, 0,
1940                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_LIMIT,
1941                            CTL_EOL);
1942           sysctl_createv(clog, 0, NULL, NULL,
1943                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1944                            CTLTYPE_INT, "syn_bucket_limit",
1945                            SYSCTL_DESCR("Maximum number of entries per hash "
1946                                             "bucket in the TCP compressed state "
1947                                             "engine"),
1948                            NULL, 0, &tcp_syn_bucket_limit, 0,
1949                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_BUCKET_LIMIT,
1950                            CTL_EOL);
1951 #if 0 /* obsoleted */
1952           sysctl_createv(clog, 0, NULL, NULL,
1953                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1954                            CTLTYPE_INT, "syn_cache_interval",
1955                            SYSCTL_DESCR("TCP compressed state engine's timer interval"),
1956                            NULL, 0, &tcp_syn_cache_interval, 0,
1957                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_INTER,
1958                            CTL_EOL);
1959 #endif
1960           sysctl_createv(clog, 0, NULL, NULL,
1961                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1962                            CTLTYPE_INT, "init_win",
1963                            SYSCTL_DESCR("Initial TCP congestion window"),
1964                            sysctl_tcp_init_win, 0, &tcp_init_win, 0,
1965                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN, CTL_EOL);
1966           sysctl_createv(clog, 0, NULL, NULL,
1967                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1968                            CTLTYPE_INT, "mss_ifmtu",
1969                            SYSCTL_DESCR("Use interface MTU for calculating MSS"),
1970                            NULL, 0, &tcp_mss_ifmtu, 0,
1971                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSS_IFMTU, CTL_EOL);
1972           sysctl_createv(clog, 0, NULL, &sack_node,
1973                            CTLFLAG_PERMANENT,
1974                            CTLTYPE_NODE, "sack",
1975                            SYSCTL_DESCR("RFC2018 Selective ACKnowledgement tunables"),
1976                            NULL, 0, NULL, 0,
1977                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_EOL);
1978 
1979           /* Congctl subtree */
1980           sysctl_createv(clog, 0, NULL, &congctl_node,
1981                            CTLFLAG_PERMANENT,
1982                            CTLTYPE_NODE, "congctl",
1983                            SYSCTL_DESCR("TCP Congestion Control"),
1984                            NULL, 0, NULL, 0,
1985                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
1986           sysctl_createv(clog, 0, &congctl_node, NULL,
1987                            CTLFLAG_PERMANENT,
1988                            CTLTYPE_STRING, "available",
1989                            SYSCTL_DESCR("Available Congestion Control Mechanisms"),
1990                            NULL, 0, tcp_congctl_avail, 0, CTL_CREATE, CTL_EOL);
1991           sysctl_createv(clog, 0, &congctl_node, NULL,
1992                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1993                            CTLTYPE_STRING, "selected",
1994                            SYSCTL_DESCR("Selected Congestion Control Mechanism"),
1995                            sysctl_tcp_congctl, 0, NULL, TCPCC_MAXLEN,
1996                            CTL_CREATE, CTL_EOL);
1997 
1998           sysctl_createv(clog, 0, NULL, NULL,
1999                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2000                            CTLTYPE_INT, "win_scale",
2001                            SYSCTL_DESCR("Use RFC1323 window scale options"),
2002                            sysctl_update_tcpcb_template, 0, &tcp_do_win_scale, 0,
2003                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_WSCALE, CTL_EOL);
2004           sysctl_createv(clog, 0, NULL, NULL,
2005                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2006                            CTLTYPE_INT, "timestamps",
2007                            SYSCTL_DESCR("Use RFC1323 time stamp options"),
2008                            sysctl_update_tcpcb_template, 0, &tcp_do_timestamps, 0,
2009                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_TSTAMP, CTL_EOL);
2010           sysctl_createv(clog, 0, NULL, NULL,
2011                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2012                            CTLTYPE_INT, "cwm",
2013                            SYSCTL_DESCR("Hughes/Touch/Heidemann Congestion Window "
2014                                             "Monitoring"),
2015                            NULL, 0, &tcp_cwm, 0,
2016                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM, CTL_EOL);
2017           sysctl_createv(clog, 0, NULL, NULL,
2018                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2019                            CTLTYPE_INT, "cwm_burstsize",
2020                            SYSCTL_DESCR("Congestion Window Monitoring allowed "
2021                                             "burst count in packets"),
2022                            NULL, 0, &tcp_cwm_burstsize, 0,
2023                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM_BURSTSIZE,
2024                            CTL_EOL);
2025           sysctl_createv(clog, 0, NULL, NULL,
2026                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2027                            CTLTYPE_INT, "ack_on_push",
2028                            SYSCTL_DESCR("Immediately return ACK when PSH is "
2029                                             "received"),
2030                            NULL, 0, &tcp_ack_on_push, 0,
2031                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_ACK_ON_PUSH, CTL_EOL);
2032           sysctl_createv(clog, 0, NULL, NULL,
2033                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2034                            CTLTYPE_INT, "keepidle",
2035                            SYSCTL_DESCR("Allowed connection idle ticks before a "
2036                                             "keepalive probe is sent"),
2037                            sysctl_tcp_keep, 0, &tcp_keepidle, 0,
2038                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL);
2039           sysctl_createv(clog, 0, NULL, NULL,
2040                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2041                            CTLTYPE_INT, "keepintvl",
2042                            SYSCTL_DESCR("Ticks before next keepalive probe is sent"),
2043                            sysctl_tcp_keep, 0, &tcp_keepintvl, 0,
2044                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL);
2045           sysctl_createv(clog, 0, NULL, NULL,
2046                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2047                            CTLTYPE_INT, "keepcnt",
2048                            SYSCTL_DESCR("Number of keepalive probes to send"),
2049                            sysctl_tcp_keep, 0, &tcp_keepcnt, 0,
2050                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL);
2051           sysctl_createv(clog, 0, NULL, NULL,
2052                            CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
2053                            CTLTYPE_INT, "slowhz",
2054                            SYSCTL_DESCR("Keepalive ticks per second"),
2055                            NULL, PR_SLOWHZ, NULL, 0,
2056                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_SLOWHZ, CTL_EOL);
2057           sysctl_createv(clog, 0, NULL, NULL,
2058                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2059                            CTLTYPE_INT, "log_refused",
2060                            SYSCTL_DESCR("Log refused TCP connections"),
2061                            NULL, 0, &tcp_log_refused, 0,
2062                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOG_REFUSED, CTL_EOL);
2063 #if 0 /* obsoleted */
2064           sysctl_createv(clog, 0, NULL, NULL,
2065                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2066                            CTLTYPE_INT, "rstratelimit", NULL,
2067                            NULL, 0, &tcp_rst_ratelim, 0,
2068                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTRATELIMIT, CTL_EOL);
2069 #endif
2070           sysctl_createv(clog, 0, NULL, NULL,
2071                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2072                            CTLTYPE_INT, "rstppslimit",
2073                            SYSCTL_DESCR("Maximum number of RST packets to send "
2074                                             "per second"),
2075                            NULL, 0, &tcp_rst_ppslim, 0,
2076                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTPPSLIMIT, CTL_EOL);
2077           sysctl_createv(clog, 0, NULL, NULL,
2078                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2079                            CTLTYPE_INT, "delack_ticks",
2080                            SYSCTL_DESCR("Number of ticks to delay sending an ACK"),
2081                            NULL, 0, &tcp_delack_ticks, 0,
2082                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_DELACK_TICKS, CTL_EOL);
2083           sysctl_createv(clog, 0, NULL, NULL,
2084                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2085                            CTLTYPE_INT, "init_win_local",
2086                            SYSCTL_DESCR("Initial TCP window size (in segments)"),
2087                            sysctl_tcp_init_win, 0, &tcp_init_win_local, 0,
2088                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN_LOCAL,
2089                            CTL_EOL);
2090           sysctl_createv(clog, 0, NULL, NULL,
2091                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2092                            CTLTYPE_STRUCT, "ident",
2093                            SYSCTL_DESCR("RFC1413 Identification Protocol lookups"),
2094                            sysctl_net_inet_tcp_ident, 0, NULL, sizeof(uid_t),
2095                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_IDENT, CTL_EOL);
2096           sysctl_createv(clog, 0, NULL, NULL,
2097                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2098                            CTLTYPE_INT, "do_loopback_cksum",
2099                            SYSCTL_DESCR("Perform TCP checksum on loopback"),
2100                            NULL, 0, &tcp_do_loopback_cksum, 0,
2101                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOOPBACKCKSUM,
2102                            CTL_EOL);
2103           sysctl_createv(clog, 0, NULL, NULL,
2104                            CTLFLAG_PERMANENT,
2105                            CTLTYPE_STRUCT, "pcblist",
2106                            SYSCTL_DESCR("TCP protocol control block list"),
2107                            sysctl_inpcblist, 0, &tcbtable, 0,
2108                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE,
2109                            CTL_EOL);
2110           sysctl_createv(clog, 0, NULL, NULL,
2111                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2112                            CTLTYPE_INT, "keepinit",
2113                            SYSCTL_DESCR("Ticks before initial tcp connection times out"),
2114                            sysctl_tcp_keep, 0, &tcp_keepinit, 0,
2115                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2116 
2117           /* TCP socket buffers auto-sizing nodes */
2118           sysctl_createv(clog, 0, NULL, NULL,
2119                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2120                            CTLTYPE_INT, "recvbuf_auto",
2121                            SYSCTL_DESCR("Enable automatic receive "
2122                                "buffer sizing (experimental)"),
2123                            NULL, 0, &tcp_do_autorcvbuf, 0,
2124                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2125           sysctl_createv(clog, 0, NULL, NULL,
2126                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2127                            CTLTYPE_INT, "recvbuf_inc",
2128                            SYSCTL_DESCR("Incrementor step size of "
2129                                "automatic receive buffer"),
2130                            NULL, 0, &tcp_autorcvbuf_inc, 0,
2131                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2132           sysctl_createv(clog, 0, NULL, NULL,
2133                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2134                            CTLTYPE_INT, "recvbuf_max",
2135                            SYSCTL_DESCR("Max size of automatic receive buffer"),
2136                            NULL, 0, &tcp_autorcvbuf_max, 0,
2137                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2138 
2139           sysctl_createv(clog, 0, NULL, NULL,
2140                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2141                            CTLTYPE_INT, "sendbuf_auto",
2142                            SYSCTL_DESCR("Enable automatic send "
2143                                "buffer sizing (experimental)"),
2144                            NULL, 0, &tcp_do_autosndbuf, 0,
2145                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2146           sysctl_createv(clog, 0, NULL, NULL,
2147                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2148                            CTLTYPE_INT, "sendbuf_inc",
2149                            SYSCTL_DESCR("Incrementor step size of "
2150                                "automatic send buffer"),
2151                            NULL, 0, &tcp_autosndbuf_inc, 0,
2152                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2153           sysctl_createv(clog, 0, NULL, NULL,
2154                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2155                            CTLTYPE_INT, "sendbuf_max",
2156                            SYSCTL_DESCR("Max size of automatic send buffer"),
2157                            NULL, 0, &tcp_autosndbuf_max, 0,
2158                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2159 
2160           /* ECN subtree */
2161           sysctl_createv(clog, 0, NULL, &ecn_node,
2162                            CTLFLAG_PERMANENT,
2163                            CTLTYPE_NODE, "ecn",
2164                            SYSCTL_DESCR("RFC3168 Explicit Congestion Notification"),
2165                            NULL, 0, NULL, 0,
2166                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2167           sysctl_createv(clog, 0, &ecn_node, NULL,
2168                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2169                            CTLTYPE_INT, "enable",
2170                            SYSCTL_DESCR("Enable TCP Explicit Congestion "
2171                                  "Notification"),
2172                            NULL, 0, &tcp_do_ecn, 0, CTL_CREATE, CTL_EOL);
2173           sysctl_createv(clog, 0, &ecn_node, NULL,
2174                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2175                            CTLTYPE_INT, "maxretries",
2176                            SYSCTL_DESCR("Number of times to retry ECN setup "
2177                                      "before disabling ECN on the connection"),
2178                            NULL, 0, &tcp_ecn_maxretries, 0, CTL_CREATE, CTL_EOL);
2179 
2180           /* SACK gets its own little subtree. */
2181           sysctl_createv(clog, 0, NULL, &sack_node,
2182                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2183                            CTLTYPE_INT, "enable",
2184                            SYSCTL_DESCR("Enable RFC2018 Selective ACKnowledgement"),
2185                            NULL, 0, &tcp_do_sack, 0,
2186                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);
2187           sysctl_createv(clog, 0, NULL, &sack_node,
2188                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2189                            CTLTYPE_INT, "maxholes",
2190                            SYSCTL_DESCR("Maximum number of TCP SACK holes allowed per connection"),
2191                            NULL, 0, &tcp_sack_tp_maxholes, 0,
2192                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);
2193           sysctl_createv(clog, 0, NULL, &sack_node,
2194                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2195                            CTLTYPE_INT, "globalmaxholes",
2196                            SYSCTL_DESCR("Global maximum number of TCP SACK holes"),
2197                            NULL, 0, &tcp_sack_globalmaxholes, 0,
2198                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);
2199           sysctl_createv(clog, 0, NULL, &sack_node,
2200                            CTLFLAG_PERMANENT,
2201                            CTLTYPE_INT, "globalholes",
2202                            SYSCTL_DESCR("Global number of TCP SACK holes"),
2203                            NULL, 0, &tcp_sack_globalholes, 0,
2204                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);
2205 
2206           sysctl_createv(clog, 0, NULL, NULL,
2207                            CTLFLAG_PERMANENT,
2208                            CTLTYPE_STRUCT, "stats",
2209                            SYSCTL_DESCR("TCP statistics"),
2210                            sysctl_net_inet_tcp_stats, 0, NULL, 0,
2211                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_STATS,
2212                            CTL_EOL);
2213         sysctl_createv(clog, 0, NULL, NULL,
2214                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2215                        CTLTYPE_INT, "local_by_rtt",
2216                        SYSCTL_DESCR("Use RTT estimator to decide which hosts "
2217                                             "are local"),
2218                            NULL, 0, &tcp_rttlocal, 0,
2219                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2220 #ifdef TCP_DEBUG
2221           sysctl_createv(clog, 0, NULL, NULL,
2222                            CTLFLAG_PERMANENT,
2223                            CTLTYPE_STRUCT, "debug",
2224                            SYSCTL_DESCR("TCP sockets debug information"),
2225                            NULL, 0, &tcp_debug, sizeof(tcp_debug),
2226                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBUG,
2227                            CTL_EOL);
2228           sysctl_createv(clog, 0, NULL, NULL,
2229                            CTLFLAG_PERMANENT,
2230                            CTLTYPE_INT, "debx",
2231                            SYSCTL_DESCR("Number of TCP debug sockets messages"),
2232                            NULL, 0, &tcp_debx, sizeof(tcp_debx),
2233                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBX,
2234                            CTL_EOL);
2235 #endif
2236           sysctl_createv(clog, 0, NULL, NULL,
2237                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2238                            CTLTYPE_STRUCT, "drop",
2239                            SYSCTL_DESCR("TCP drop connection"),
2240                            sysctl_net_inet_tcp_drop, 0, NULL, 0,
2241                            CTL_NET, pf, IPPROTO_TCP, TCPCTL_DROP, CTL_EOL);
2242           sysctl_createv(clog, 0, NULL, NULL,
2243                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2244                            CTLTYPE_INT, "iss_hash",
2245                            SYSCTL_DESCR("Enable RFC 1948 ISS by cryptographic "
2246                                             "hash computation"),
2247                            NULL, 0, &tcp_do_rfc1948, sizeof(tcp_do_rfc1948),
2248                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE,
2249                            CTL_EOL);
2250 
2251           /* ABC subtree */
2252 
2253           sysctl_createv(clog, 0, NULL, &abc_node,
2254                            CTLFLAG_PERMANENT, CTLTYPE_NODE, "abc",
2255                            SYSCTL_DESCR("RFC3465 Appropriate Byte Counting (ABC)"),
2256                            NULL, 0, NULL, 0,
2257                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2258           sysctl_createv(clog, 0, &abc_node, NULL,
2259                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2260                            CTLTYPE_INT, "enable",
2261                            SYSCTL_DESCR("Enable RFC3465 Appropriate Byte Counting"),
2262                            NULL, 0, &tcp_do_abc, 0, CTL_CREATE, CTL_EOL);
2263           sysctl_createv(clog, 0, &abc_node, NULL,
2264                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2265                            CTLTYPE_INT, "aggressive",
2266                            SYSCTL_DESCR("1: L=2*SMSS 0: L=1*SMSS"),
2267                            NULL, 0, &tcp_abc_aggressive, 0, CTL_CREATE, CTL_EOL);
2268 
2269           /* MSL tuning subtree */
2270 
2271           sysctl_createv(clog, 0, NULL, &mslt_node,
2272                            CTLFLAG_PERMANENT, CTLTYPE_NODE, "mslt",
2273                            SYSCTL_DESCR("MSL Tuning for TIME_WAIT truncation"),
2274                            NULL, 0, NULL, 0,
2275                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2276           sysctl_createv(clog, 0, &mslt_node, NULL,
2277                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2278                            CTLTYPE_INT, "enable",
2279                            SYSCTL_DESCR("Enable TIME_WAIT truncation"),
2280                            NULL, 0, &tcp_msl_enable, 0, CTL_CREATE, CTL_EOL);
2281           sysctl_createv(clog, 0, &mslt_node, NULL,
2282                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2283                            CTLTYPE_INT, "loopback",
2284                            SYSCTL_DESCR("MSL value to use for loopback connections"),
2285                            NULL, 0, &tcp_msl_loop, 0, CTL_CREATE, CTL_EOL);
2286           sysctl_createv(clog, 0, &mslt_node, NULL,
2287                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2288                            CTLTYPE_INT, "local",
2289                            SYSCTL_DESCR("MSL value to use for local connections"),
2290                            NULL, 0, &tcp_msl_local, 0, CTL_CREATE, CTL_EOL);
2291           sysctl_createv(clog, 0, &mslt_node, NULL,
2292                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2293                            CTLTYPE_INT, "remote",
2294                            SYSCTL_DESCR("MSL value to use for remote connections"),
2295                            NULL, 0, &tcp_msl_remote, 0, CTL_CREATE, CTL_EOL);
2296           sysctl_createv(clog, 0, &mslt_node, NULL,
2297                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2298                            CTLTYPE_INT, "remote_threshold",
2299                            SYSCTL_DESCR("RTT estimate value to promote local to remote"),
2300                            NULL, 0, &tcp_msl_remote_threshold, 0, CTL_CREATE, CTL_EOL);
2301 
2302           /* vestigial TIME_WAIT tuning subtree */
2303 
2304           sysctl_createv(clog, 0, NULL, &vtw_node,
2305                            CTLFLAG_PERMANENT, CTLTYPE_NODE, "vtw",
2306                            SYSCTL_DESCR("Tuning for Vestigial TIME_WAIT"),
2307                            NULL, 0, NULL, 0,
2308                            CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
2309           sysctl_createv(clog, 0, &vtw_node, NULL,
2310                            CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2311                            CTLTYPE_INT, "enable",
2312                            SYSCTL_DESCR("Enable Vestigial TIME_WAIT"),
2313                            sysctl_tcp_vtw_enable, 0,
2314                          (pf == AF_INET) ? &tcp4_vtw_enable : &tcp6_vtw_enable,
2315                            0, CTL_CREATE, CTL_EOL);
2316           sysctl_createv(clog, 0, &vtw_node, NULL,
2317                            CTLFLAG_PERMANENT|CTLFLAG_READONLY,
2318                            CTLTYPE_INT, "entries",
2319                            SYSCTL_DESCR("Maximum number of vestigial TIME_WAIT entries"),
2320                            NULL, 0, &tcp_vtw_entries, 0, CTL_CREATE, CTL_EOL);
2321 }
2322 
2323 void
tcp_usrreq_init(void)2324 tcp_usrreq_init(void)
2325 {
2326 
2327           sysctl_net_inet_tcp_setup2(NULL, PF_INET, "inet", "tcp");
2328 #ifdef INET6
2329           sysctl_net_inet_tcp_setup2(NULL, PF_INET6, "inet6", "tcp6");
2330 #endif
2331 }
2332 
2333 PR_WRAP_USRREQS(tcp)
2334 #define   tcp_attach          tcp_attach_wrapper
2335 #define   tcp_detach          tcp_detach_wrapper
2336 #define   tcp_accept          tcp_accept_wrapper
2337 #define   tcp_bind  tcp_bind_wrapper
2338 #define   tcp_listen          tcp_listen_wrapper
2339 #define   tcp_connect         tcp_connect_wrapper
2340 #define   tcp_connect2        tcp_connect2_wrapper
2341 #define   tcp_disconnect      tcp_disconnect_wrapper
2342 #define   tcp_shutdown        tcp_shutdown_wrapper
2343 #define   tcp_abort tcp_abort_wrapper
2344 #define   tcp_ioctl tcp_ioctl_wrapper
2345 #define   tcp_stat  tcp_stat_wrapper
2346 #define   tcp_peeraddr        tcp_peeraddr_wrapper
2347 #define   tcp_sockaddr        tcp_sockaddr_wrapper
2348 #define   tcp_rcvd  tcp_rcvd_wrapper
2349 #define   tcp_recvoob         tcp_recvoob_wrapper
2350 #define   tcp_send  tcp_send_wrapper
2351 #define   tcp_sendoob         tcp_sendoob_wrapper
2352 #define   tcp_purgeif         tcp_purgeif_wrapper
2353 
2354 const struct pr_usrreqs tcp_usrreqs = {
2355           .pr_attach          = tcp_attach,
2356           .pr_detach          = tcp_detach,
2357           .pr_accept          = tcp_accept,
2358           .pr_bind  = tcp_bind,
2359           .pr_listen          = tcp_listen,
2360           .pr_connect         = tcp_connect,
2361           .pr_connect2        = tcp_connect2,
2362           .pr_disconnect      = tcp_disconnect,
2363           .pr_shutdown        = tcp_shutdown,
2364           .pr_abort = tcp_abort,
2365           .pr_ioctl = tcp_ioctl,
2366           .pr_stat  = tcp_stat,
2367           .pr_peeraddr        = tcp_peeraddr,
2368           .pr_sockaddr        = tcp_sockaddr,
2369           .pr_rcvd  = tcp_rcvd,
2370           .pr_recvoob         = tcp_recvoob,
2371           .pr_send  = tcp_send,
2372           .pr_sendoob         = tcp_sendoob,
2373           .pr_purgeif         = tcp_purgeif,
2374 };
2375