1 /* $OpenBSD: tcp_usrreq.c,v 1.85 2004/04/27 17:51:33 otto Exp $ */
2 /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */
3
4 /*
5 * Copyright (c) 1982, 1986, 1988, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
33 *
34 * NRL grants permission for redistribution and use in source and binary
35 * forms, with or without modification, of the software and documentation
36 * created at NRL provided that the following conditions are met:
37 *
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgements:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * This product includes software developed at the Information
48 * Technology Division, US Naval Research Laboratory.
49 * 4. Neither the name of the NRL nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64 *
65 * The views and conclusions contained in the software and documentation
66 * are those of the authors and should not be interpreted as representing
67 * official policies, either expressed or implied, of the US Naval
68 * Research Laboratory (NRL).
69 */
70
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/protosw.h>
77 #include <sys/stat.h>
78 #include <sys/sysctl.h>
79 #include <sys/domain.h>
80 #include <sys/kernel.h>
81
82 #include <net/if.h>
83 #include <net/route.h>
84
85 #include <netinet/in.h>
86 #include <netinet/in_systm.h>
87 #include <netinet/in_var.h>
88 #include <netinet/ip.h>
89 #include <netinet/in_pcb.h>
90 #include <netinet/ip_var.h>
91 #include <netinet/tcp.h>
92 #include <netinet/tcp_fsm.h>
93 #include <netinet/tcp_seq.h>
94 #include <netinet/tcp_timer.h>
95 #include <netinet/tcp_var.h>
96 #include <netinet/tcpip.h>
97 #include <netinet/tcp_debug.h>
98
99 /*
100 * TCP protocol interface to socket abstraction.
101 */
102 extern char *tcpstates[];
103 extern int tcptv_keep_init;
104
105 extern int tcp_rst_ppslim;
106
107 /* from in_pcb.c */
108 extern struct baddynamicports baddynamicports;
109
110 #ifndef TCP_SENDSPACE
111 #define TCP_SENDSPACE 1024*16
112 #endif
113 u_int tcp_sendspace = TCP_SENDSPACE;
114 #ifndef TCP_RECVSPACE
115 #define TCP_RECVSPACE 1024*16
116 #endif
117 u_int tcp_recvspace = TCP_RECVSPACE;
118
119 int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_VARS;
120
121 struct inpcbtable tcbtable;
122
123 int tcp_ident(void *, size_t *, void *, size_t, int);
124
125 #ifdef INET6
126 int
tcp6_usrreq(so,req,m,nam,control,p)127 tcp6_usrreq(so, req, m, nam, control, p)
128 struct socket *so;
129 int req;
130 struct mbuf *m, *nam, *control;
131 struct proc *p;
132 {
133
134 return tcp_usrreq(so, req, m, nam, control);
135 }
136 #endif
137
138 /*
139 * Process a TCP user request for TCP tb. If this is a send request
140 * then m is the mbuf chain of send data. If this is a timer expiration
141 * (called from the software clock routine), then timertype tells which timer.
142 */
143 /*ARGSUSED*/
144 int
tcp_usrreq(so,req,m,nam,control)145 tcp_usrreq(so, req, m, nam, control)
146 struct socket *so;
147 int req;
148 struct mbuf *m, *nam, *control;
149 {
150 struct sockaddr_in *sin;
151 struct inpcb *inp;
152 struct tcpcb *tp = NULL;
153 int s;
154 int error = 0;
155 int ostate;
156
157 if (req == PRU_CONTROL) {
158 #ifdef INET6
159 if (sotopf(so) == PF_INET6)
160 return in6_control(so, (u_long)m, (caddr_t)nam,
161 (struct ifnet *)control, 0);
162 else
163 #endif /* INET6 */
164 return (in_control(so, (u_long)m, (caddr_t)nam,
165 (struct ifnet *)control));
166 }
167 if (control && control->m_len) {
168 m_freem(control);
169 if (m)
170 m_freem(m);
171 return (EINVAL);
172 }
173
174 s = splsoftnet();
175 inp = sotoinpcb(so);
176 /*
177 * When a TCP is attached to a socket, then there will be
178 * a (struct inpcb) pointed at by the socket, and this
179 * structure will point at a subsidiary (struct tcpcb).
180 */
181 if (inp == 0 && req != PRU_ATTACH) {
182 splx(s);
183 /*
184 * The following corrects an mbuf leak under rare
185 * circumstances
186 */
187 if (m && (req == PRU_SEND || req == PRU_SENDOOB))
188 m_freem(m);
189 return (EINVAL); /* XXX */
190 }
191 if (inp) {
192 tp = intotcpcb(inp);
193 /* WHAT IF TP IS 0? */
194 #ifdef KPROF
195 tcp_acounts[tp->t_state][req]++;
196 #endif
197 ostate = tp->t_state;
198 } else
199 ostate = 0;
200 switch (req) {
201
202 /*
203 * TCP attaches to socket via PRU_ATTACH, reserving space,
204 * and an internet control block.
205 */
206 case PRU_ATTACH:
207 if (inp) {
208 error = EISCONN;
209 break;
210 }
211 error = tcp_attach(so);
212 if (error)
213 break;
214 if ((so->so_options & SO_LINGER) && so->so_linger == 0)
215 so->so_linger = TCP_LINGERTIME;
216 tp = sototcpcb(so);
217 break;
218
219 /*
220 * PRU_DETACH detaches the TCP protocol from the socket.
221 * If the protocol state is non-embryonic, then can't
222 * do this directly: have to initiate a PRU_DISCONNECT,
223 * which may finish later; embryonic TCB's can just
224 * be discarded here.
225 */
226 case PRU_DETACH:
227 tp = tcp_disconnect(tp);
228 break;
229
230 /*
231 * Give the socket an address.
232 */
233 case PRU_BIND:
234 #ifdef INET6
235 if (inp->inp_flags & INP_IPV6)
236 error = in6_pcbbind(inp, nam);
237 else
238 #endif
239 error = in_pcbbind(inp, nam);
240 if (error)
241 break;
242 break;
243
244 /*
245 * Prepare to accept connections.
246 */
247 case PRU_LISTEN:
248 if (inp->inp_lport == 0) {
249 #ifdef INET6
250 if (inp->inp_flags & INP_IPV6)
251 error = in6_pcbbind(inp, NULL);
252 else
253 #endif
254 error = in_pcbbind(inp, NULL);
255 }
256 /* If the in_pcbbind() above is called, the tp->pf
257 should still be whatever it was before. */
258 if (error == 0)
259 tp->t_state = TCPS_LISTEN;
260 break;
261
262 /*
263 * Initiate connection to peer.
264 * Create a template for use in transmissions on this connection.
265 * Enter SYN_SENT state, and mark socket as connecting.
266 * Start keep-alive timer, and seed output sequence space.
267 * Send initial segment on connection.
268 */
269 case PRU_CONNECT:
270 sin = mtod(nam, struct sockaddr_in *);
271
272 #ifdef INET6
273 if (sin->sin_family == AF_INET6) {
274 struct in6_addr *in6_addr = &mtod(nam,
275 struct sockaddr_in6 *)->sin6_addr;
276
277 if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) ||
278 IN6_IS_ADDR_MULTICAST(in6_addr) ||
279 (IN6_IS_ADDR_V4MAPPED(in6_addr) &&
280 ((in6_addr->s6_addr32[3] == INADDR_ANY) ||
281 IN_MULTICAST(in6_addr->s6_addr32[3]) ||
282 in_broadcast(sin->sin_addr, NULL)))) {
283 error = EINVAL;
284 break;
285 }
286
287 if (inp->inp_lport == 0) {
288 error = in6_pcbbind(inp, NULL);
289 if (error)
290 break;
291 }
292 error = in6_pcbconnect(inp, nam);
293 } else if (sin->sin_family == AF_INET)
294 #endif /* INET6 */
295 {
296 if ((sin->sin_addr.s_addr == INADDR_ANY) ||
297 IN_MULTICAST(sin->sin_addr.s_addr) ||
298 in_broadcast(sin->sin_addr, NULL)) {
299 error = EINVAL;
300 break;
301 }
302
303 if (inp->inp_lport == 0) {
304 error = in_pcbbind(inp, NULL);
305 if (error)
306 break;
307 }
308 error = in_pcbconnect(inp, nam);
309 }
310
311 if (error)
312 break;
313
314 tp->t_template = tcp_template(tp);
315 if (tp->t_template == 0) {
316 in_pcbdisconnect(inp);
317 error = ENOBUFS;
318 break;
319 }
320
321 so->so_state |= SS_CONNECTOUT;
322 /* Compute window scaling to request. */
323 tcp_rscale(tp, so->so_rcv.sb_hiwat);
324
325 soisconnecting(so);
326 tcpstat.tcps_connattempt++;
327 tp->t_state = TCPS_SYN_SENT;
328 TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
329 #ifdef TCP_COMPAT_42
330 tp->iss = tcp_iss;
331 tcp_iss += TCP_ISSINCR/2;
332 #else /* TCP_COMPAT_42 */
333 tp->iss = tcp_rndiss_next();
334 #endif /* !TCP_COMPAT_42 */
335 tcp_sendseqinit(tp);
336 #if defined(TCP_SACK)
337 tp->snd_last = tp->snd_una;
338 #endif
339 #if defined(TCP_SACK) && defined(TCP_FACK)
340 tp->snd_fack = tp->snd_una;
341 tp->retran_data = 0;
342 tp->snd_awnd = 0;
343 #endif
344 error = tcp_output(tp);
345 break;
346
347 /*
348 * Create a TCP connection between two sockets.
349 */
350 case PRU_CONNECT2:
351 error = EOPNOTSUPP;
352 break;
353
354 /*
355 * Initiate disconnect from peer.
356 * If connection never passed embryonic stage, just drop;
357 * else if don't need to let data drain, then can just drop anyways,
358 * else have to begin TCP shutdown process: mark socket disconnecting,
359 * drain unread data, state switch to reflect user close, and
360 * send segment (e.g. FIN) to peer. Socket will be really disconnected
361 * when peer sends FIN and acks ours.
362 *
363 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
364 */
365 case PRU_DISCONNECT:
366 tp = tcp_disconnect(tp);
367 break;
368
369 /*
370 * Accept a connection. Essentially all the work is
371 * done at higher levels; just return the address
372 * of the peer, storing through addr.
373 */
374 case PRU_ACCEPT:
375 #ifdef INET6
376 if (inp->inp_flags & INP_IPV6)
377 in6_setpeeraddr(inp, nam);
378 else
379 #endif
380 in_setpeeraddr(inp, nam);
381 break;
382
383 /*
384 * Mark the connection as being incapable of further output.
385 */
386 case PRU_SHUTDOWN:
387 if (so->so_state & SS_CANTSENDMORE)
388 break;
389 socantsendmore(so);
390 tp = tcp_usrclosed(tp);
391 if (tp)
392 error = tcp_output(tp);
393 break;
394
395 /*
396 * After a receive, possibly send window update to peer.
397 */
398 case PRU_RCVD:
399 /*
400 * soreceive() calls this function when a user receives
401 * ancillary data on a listening socket. We don't call
402 * tcp_output in such a case, since there is no header
403 * template for a listening socket and hence the kernel
404 * will panic.
405 */
406 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
407 (void) tcp_output(tp);
408 break;
409
410 /*
411 * Do a send by putting data in output queue and updating urgent
412 * marker if URG set. Possibly send more data.
413 */
414 case PRU_SEND:
415 sbappendstream(&so->so_snd, m);
416 error = tcp_output(tp);
417 break;
418
419 /*
420 * Abort the TCP.
421 */
422 case PRU_ABORT:
423 tp = tcp_drop(tp, ECONNABORTED);
424 break;
425
426 case PRU_SENSE:
427 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
428 splx(s);
429 return (0);
430
431 case PRU_RCVOOB:
432 if ((so->so_oobmark == 0 &&
433 (so->so_state & SS_RCVATMARK) == 0) ||
434 so->so_options & SO_OOBINLINE ||
435 tp->t_oobflags & TCPOOB_HADDATA) {
436 error = EINVAL;
437 break;
438 }
439 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
440 error = EWOULDBLOCK;
441 break;
442 }
443 m->m_len = 1;
444 *mtod(m, caddr_t) = tp->t_iobc;
445 if (((long)nam & MSG_PEEK) == 0)
446 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
447 break;
448
449 case PRU_SENDOOB:
450 if (sbspace(&so->so_snd) < -512) {
451 m_freem(m);
452 error = ENOBUFS;
453 break;
454 }
455 /*
456 * According to RFC961 (Assigned Protocols),
457 * the urgent pointer points to the last octet
458 * of urgent data. We continue, however,
459 * to consider it to indicate the first octet
460 * of data past the urgent section.
461 * Otherwise, snd_up should be one lower.
462 */
463 sbappendstream(&so->so_snd, m);
464 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
465 tp->t_force = 1;
466 error = tcp_output(tp);
467 tp->t_force = 0;
468 break;
469
470 case PRU_SOCKADDR:
471 #ifdef INET6
472 if (inp->inp_flags & INP_IPV6)
473 in6_setsockaddr(inp, nam);
474 else
475 #endif
476 in_setsockaddr(inp, nam);
477 break;
478
479 case PRU_PEERADDR:
480 #ifdef INET6
481 if (inp->inp_flags & INP_IPV6)
482 in6_setpeeraddr(inp, nam);
483 else
484 #endif
485 in_setpeeraddr(inp, nam);
486 break;
487
488 default:
489 panic("tcp_usrreq");
490 }
491 if (tp && (so->so_options & SO_DEBUG))
492 tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0);
493 splx(s);
494 return (error);
495 }
496
497 int
tcp_ctloutput(op,so,level,optname,mp)498 tcp_ctloutput(op, so, level, optname, mp)
499 int op;
500 struct socket *so;
501 int level, optname;
502 struct mbuf **mp;
503 {
504 int error = 0, s;
505 struct inpcb *inp;
506 struct tcpcb *tp;
507 struct mbuf *m;
508 int i;
509
510 s = splsoftnet();
511 inp = sotoinpcb(so);
512 if (inp == NULL) {
513 splx(s);
514 if (op == PRCO_SETOPT && *mp)
515 (void) m_free(*mp);
516 return (ECONNRESET);
517 }
518 #ifdef INET6
519 tp = intotcpcb(inp);
520 #endif /* INET6 */
521 if (level != IPPROTO_TCP) {
522 switch (so->so_proto->pr_domain->dom_family) {
523 #ifdef INET6
524 case PF_INET6:
525 error = ip6_ctloutput(op, so, level, optname, mp);
526 break;
527 #endif /* INET6 */
528 case PF_INET:
529 error = ip_ctloutput(op, so, level, optname, mp);
530 break;
531 default:
532 error = EAFNOSUPPORT; /*?*/
533 break;
534 }
535 splx(s);
536 return (error);
537 }
538 #ifndef INET6
539 tp = intotcpcb(inp);
540 #endif /* !INET6 */
541
542 switch (op) {
543
544 case PRCO_SETOPT:
545 m = *mp;
546 switch (optname) {
547
548 case TCP_NODELAY:
549 if (m == NULL || m->m_len < sizeof (int))
550 error = EINVAL;
551 else if (*mtod(m, int *))
552 tp->t_flags |= TF_NODELAY;
553 else
554 tp->t_flags &= ~TF_NODELAY;
555 break;
556
557 case TCP_MAXSEG:
558 if (m == NULL || m->m_len < sizeof (int)) {
559 error = EINVAL;
560 break;
561 }
562
563 i = *mtod(m, int *);
564 if (i > 0 && i <= tp->t_maxseg)
565 tp->t_maxseg = i;
566 else
567 error = EINVAL;
568 break;
569
570 #ifdef TCP_SACK
571 case TCP_SACK_ENABLE:
572 if (m == NULL || m->m_len < sizeof (int)) {
573 error = EINVAL;
574 break;
575 }
576
577 if (TCPS_HAVEESTABLISHED(tp->t_state)) {
578 error = EPERM;
579 break;
580 }
581
582 if (tp->t_flags & TF_SIGNATURE) {
583 error = EPERM;
584 break;
585 }
586
587 if (*mtod(m, int *))
588 tp->sack_enable = 1;
589 else
590 tp->sack_enable = 0;
591 break;
592 #endif
593 #ifdef TCP_SIGNATURE
594 case TCP_MD5SIG:
595 if (m == NULL || m->m_len < sizeof (int)) {
596 error = EINVAL;
597 break;
598 }
599
600 if (TCPS_HAVEESTABLISHED(tp->t_state)) {
601 error = EPERM;
602 break;
603 }
604
605 if (*mtod(m, int *)) {
606 tp->t_flags |= TF_SIGNATURE;
607 #ifdef TCP_SACK
608 tp->sack_enable = 0;
609 #endif /* TCP_SACK */
610 } else
611 tp->t_flags &= ~TF_SIGNATURE;
612 break;
613 #endif /* TCP_SIGNATURE */
614 default:
615 error = ENOPROTOOPT;
616 break;
617 }
618 if (m)
619 (void) m_free(m);
620 break;
621
622 case PRCO_GETOPT:
623 *mp = m = m_get(M_WAIT, MT_SOOPTS);
624 m->m_len = sizeof(int);
625
626 switch (optname) {
627 case TCP_NODELAY:
628 *mtod(m, int *) = tp->t_flags & TF_NODELAY;
629 break;
630 case TCP_MAXSEG:
631 *mtod(m, int *) = tp->t_maxseg;
632 break;
633 #ifdef TCP_SACK
634 case TCP_SACK_ENABLE:
635 *mtod(m, int *) = tp->sack_enable;
636 break;
637 #endif
638 #ifdef TCP_SIGNATURE
639 case TCP_MD5SIG:
640 *mtod(m, int *) = tp->t_flags & TF_SIGNATURE;
641 break;
642 #endif
643 default:
644 error = ENOPROTOOPT;
645 break;
646 }
647 break;
648 }
649 splx(s);
650 return (error);
651 }
652
653 /*
654 * Attach TCP protocol to socket, allocating
655 * internet protocol control block, tcp control block,
656 * bufer space, and entering LISTEN state if to accept connections.
657 */
658 int
tcp_attach(so)659 tcp_attach(so)
660 struct socket *so;
661 {
662 struct tcpcb *tp;
663 struct inpcb *inp;
664 int error;
665
666 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
667 error = soreserve(so, tcp_sendspace, tcp_recvspace);
668 if (error)
669 return (error);
670 }
671 error = in_pcballoc(so, &tcbtable);
672 if (error)
673 return (error);
674 inp = sotoinpcb(so);
675 tp = tcp_newtcpcb(inp);
676 if (tp == NULL) {
677 int nofd = so->so_state & SS_NOFDREF; /* XXX */
678
679 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
680 in_pcbdetach(inp);
681 so->so_state |= nofd;
682 return (ENOBUFS);
683 }
684 tp->t_state = TCPS_CLOSED;
685 #ifdef INET6
686 /* we disallow IPv4 mapped address completely. */
687 if (inp->inp_flags & INP_IPV6)
688 tp->pf = PF_INET6;
689 else
690 tp->pf = PF_INET;
691 #else
692 tp->pf = PF_INET;
693 #endif
694 return (0);
695 }
696
697 /*
698 * Initiate (or continue) disconnect.
699 * If embryonic state, just send reset (once).
700 * If in ``let data drain'' option and linger null, just drop.
701 * Otherwise (hard), mark socket disconnecting and drop
702 * current input data; switch states based on user close, and
703 * send segment to peer (with FIN).
704 */
705 struct tcpcb *
tcp_disconnect(tp)706 tcp_disconnect(tp)
707 struct tcpcb *tp;
708 {
709 struct socket *so = tp->t_inpcb->inp_socket;
710
711 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
712 tp = tcp_close(tp);
713 else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
714 tp = tcp_drop(tp, 0);
715 else {
716 soisdisconnecting(so);
717 sbflush(&so->so_rcv);
718 tp = tcp_usrclosed(tp);
719 if (tp)
720 (void) tcp_output(tp);
721 }
722 return (tp);
723 }
724
725 /*
726 * User issued close, and wish to trail through shutdown states:
727 * if never received SYN, just forget it. If got a SYN from peer,
728 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
729 * If already got a FIN from peer, then almost done; go to LAST_ACK
730 * state. In all other cases, have already sent FIN to peer (e.g.
731 * after PRU_SHUTDOWN), and just have to play tedious game waiting
732 * for peer to send FIN or not respond to keep-alives, etc.
733 * We can let the user exit from the close as soon as the FIN is acked.
734 */
735 struct tcpcb *
tcp_usrclosed(tp)736 tcp_usrclosed(tp)
737 struct tcpcb *tp;
738 {
739
740 switch (tp->t_state) {
741
742 case TCPS_CLOSED:
743 case TCPS_LISTEN:
744 case TCPS_SYN_SENT:
745 tp->t_state = TCPS_CLOSED;
746 tp = tcp_close(tp);
747 break;
748
749 case TCPS_SYN_RECEIVED:
750 case TCPS_ESTABLISHED:
751 tp->t_state = TCPS_FIN_WAIT_1;
752 break;
753
754 case TCPS_CLOSE_WAIT:
755 tp->t_state = TCPS_LAST_ACK;
756 break;
757 }
758 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
759 soisdisconnected(tp->t_inpcb->inp_socket);
760 /*
761 * If we are in FIN_WAIT_2, we arrived here because the
762 * application did a shutdown of the send side. Like the
763 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
764 * a full close, we start a timer to make sure sockets are
765 * not left in FIN_WAIT_2 forever.
766 */
767 if (tp->t_state == TCPS_FIN_WAIT_2)
768 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
769 }
770 return (tp);
771 }
772
773 /*
774 * Look up a socket for ident or tcpdrop, ...
775 */
776 int
tcp_ident(oldp,oldlenp,newp,newlen,dodrop)777 tcp_ident(oldp, oldlenp, newp, newlen, dodrop)
778 void *oldp;
779 size_t *oldlenp;
780 void *newp;
781 size_t newlen;
782 int dodrop;
783 {
784 int error = 0, s;
785 struct tcp_ident_mapping tir;
786 struct inpcb *inp = NULL;
787 struct tcpcb *tp = NULL;
788 struct sockaddr_in *fin = NULL, *lin = NULL;
789 #ifdef INET6
790 struct sockaddr_in6 *fin6 = NULL, *lin6 = NULL;
791 struct in6_addr f6, l6;
792 #endif
793 if (dodrop) {
794 if (oldp != NULL || *oldlenp != 0)
795 return (EINVAL);
796 if (newp == NULL)
797 return (EPERM);
798 if (newlen < sizeof(tir))
799 return (ENOMEM);
800 if ((error = copyin(newp, &tir, sizeof (tir))) != 0 )
801 return (error);
802 } else {
803 if (oldp == NULL)
804 return (EINVAL);
805 if (*oldlenp < sizeof(tir))
806 return (ENOMEM);
807 if (newp != NULL || newlen != 0)
808 return (EINVAL);
809 if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 )
810 return (error);
811 }
812 switch (tir.faddr.ss_family) {
813 #ifdef INET6
814 case AF_INET6:
815 fin6 = (struct sockaddr_in6 *)&tir.faddr;
816 error = in6_embedscope(&f6, fin6, NULL, NULL);
817 if (error)
818 return EINVAL; /*?*/
819 lin6 = (struct sockaddr_in6 *)&tir.laddr;
820 error = in6_embedscope(&l6, lin6, NULL, NULL);
821 if (error)
822 return EINVAL; /*?*/
823 break;
824 #endif
825 case AF_INET:
826 fin = (struct sockaddr_in *)&tir.faddr;
827 lin = (struct sockaddr_in *)&tir.laddr;
828 break;
829 default:
830 return (EINVAL);
831 }
832
833 s = splsoftnet();
834 switch (tir.faddr.ss_family) {
835 case AF_INET6:
836 #ifdef INET6
837 inp = in6_pcbhashlookup(&tcbtable, &f6,
838 fin6->sin6_port, &l6, lin6->sin6_port);
839 break;
840 #endif
841 case AF_INET:
842 inp = in_pcbhashlookup(&tcbtable, fin->sin_addr,
843 fin->sin_port, lin->sin_addr, lin->sin_port);
844 break;
845 }
846
847 if (dodrop) {
848 if (inp && (tp = intotcpcb(inp)) &&
849 ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0))
850 tp = tcp_drop(tp, ECONNABORTED);
851 else
852 error = ESRCH;
853 splx(s);
854 return (error);
855 }
856
857 if (inp == NULL) {
858 ++tcpstat.tcps_pcbhashmiss;
859 switch (tir.faddr.ss_family) {
860 #ifdef INET6
861 case AF_INET6:
862 inp = in6_pcblookup_listen(&tcbtable,
863 &l6, lin6->sin6_port, 0);
864 break;
865 #endif
866 case AF_INET:
867 inp = in_pcblookup_listen(&tcbtable,
868 lin->sin_addr, lin->sin_port, 0);
869 break;
870 }
871 }
872
873 if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) {
874 tir.ruid = inp->inp_socket->so_ruid;
875 tir.euid = inp->inp_socket->so_euid;
876 } else {
877 tir.ruid = -1;
878 tir.euid = -1;
879 }
880 splx(s);
881
882 *oldlenp = sizeof (tir);
883 error = copyout((void *)&tir, oldp, sizeof (tir));
884 return (error);
885 }
886
887 /*
888 * Sysctl for tcp variables.
889 */
890 int
tcp_sysctl(name,namelen,oldp,oldlenp,newp,newlen)891 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
892 int *name;
893 u_int namelen;
894 void *oldp;
895 size_t *oldlenp;
896 void *newp;
897 size_t newlen;
898 {
899 int error, nval;
900
901 /* All sysctl names at this level are terminal. */
902 if (namelen != 1)
903 return (ENOTDIR);
904
905 switch (name[0]) {
906 #ifdef TCP_SACK
907 case TCPCTL_SACK:
908 return (sysctl_int(oldp, oldlenp, newp, newlen,
909 &tcp_do_sack));
910 #endif
911 case TCPCTL_SLOWHZ:
912 return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ));
913
914 case TCPCTL_BADDYNAMIC:
915 return (sysctl_struct(oldp, oldlenp, newp, newlen,
916 baddynamicports.tcp, sizeof(baddynamicports.tcp)));
917
918 case TCPCTL_IDENT:
919 return (tcp_ident(oldp, oldlenp, newp, newlen, 0));
920
921 case TCPCTL_DROP:
922 return (tcp_ident(oldp, oldlenp, newp, newlen, 1));
923
924 #ifdef TCP_ECN
925 case TCPCTL_ECN:
926 return (sysctl_int(oldp, oldlenp, newp, newlen,
927 &tcp_do_ecn));
928 #endif
929 case TCPCTL_REASS_LIMIT:
930 nval = tcp_reass_limit;
931 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
932 if (error)
933 return (error);
934 if (nval != tcp_reass_limit) {
935 error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0);
936 if (error)
937 return (error);
938 tcp_reass_limit = nval;
939 }
940 return (0);
941 #ifdef TCP_SACK
942 case TCPCTL_SACKHOLE_LIMIT:
943 nval = tcp_sackhole_limit;
944 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
945 if (error)
946 return (error);
947 if (nval != tcp_sackhole_limit) {
948 error = pool_sethardlimit(&sackhl_pool, nval, NULL, 0);
949 if (error)
950 return (error);
951 tcp_sackhole_limit = nval;
952 }
953 return (0);
954 #endif
955 default:
956 if (name[0] < TCPCTL_MAXID)
957 return (sysctl_int_arr(tcpctl_vars, name, namelen,
958 oldp, oldlenp, newp, newlen));
959 return (ENOPROTOOPT);
960 }
961 /* NOTREACHED */
962 }
963