1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
6 * Copyright (c) 2004 The FreeBSD Foundation. All rights reserved.
7 * Copyright (c) 2004-2008 Robert N. M. Watson. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * Excerpts taken from tcp_subr.c, tcp_usrreq.c, uipc_socket.c
34 */
35
36 /*
37 *
38 * Copyright (c) 2010 Isilon Systems, Inc.
39 * Copyright (c) 2010 iX Systems, Inc.
40 * Copyright (c) 2010 Panasas, Inc.
41 * All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice unmodified, this list of conditions, and the following
48 * disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
54 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
55 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
56 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
57 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
58 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
59 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
60 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
61 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
62 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
63 *
64 */
65 #include <sys/cdefs.h>
66 #include <sys/param.h>
67 #include <sys/eventhandler.h>
68 #include <sys/kernel.h>
69 #include <sys/malloc.h>
70
71 #include "sdp.h"
72
73 #include <net/if.h>
74 #include <net/route.h>
75 #include <net/vnet.h>
76 #include <sys/sysctl.h>
77
78 uma_zone_t sdp_zone;
79 struct rwlock sdp_lock;
80 LIST_HEAD(, sdp_sock) sdp_list;
81
82 struct workqueue_struct *rx_comp_wq;
83
84 RW_SYSINIT(sdplockinit, &sdp_lock, "SDP lock");
85 #define SDP_LIST_WLOCK() rw_wlock(&sdp_lock)
86 #define SDP_LIST_RLOCK() rw_rlock(&sdp_lock)
87 #define SDP_LIST_WUNLOCK() rw_wunlock(&sdp_lock)
88 #define SDP_LIST_RUNLOCK() rw_runlock(&sdp_lock)
89 #define SDP_LIST_WLOCK_ASSERT() rw_assert(&sdp_lock, RW_WLOCKED)
90 #define SDP_LIST_RLOCK_ASSERT() rw_assert(&sdp_lock, RW_RLOCKED)
91 #define SDP_LIST_LOCK_ASSERT() rw_assert(&sdp_lock, RW_LOCKED)
92
93 MALLOC_DEFINE(M_SDP, "sdp", "Sockets Direct Protocol");
94
95 static void sdp_stop_keepalive_timer(struct socket *so);
96
97 /*
98 * SDP protocol interface to socket abstraction.
99 */
100 /*
101 * sdp_sendspace and sdp_recvspace are the default send and receive window
102 * sizes, respectively.
103 */
104 u_long sdp_sendspace = 1024*32;
105 u_long sdp_recvspace = 1024*64;
106
107 static int sdp_count;
108
109 /*
110 * Disable async. CMA events for sockets which are being torn down.
111 */
112 static void
sdp_destroy_cma(struct sdp_sock * ssk)113 sdp_destroy_cma(struct sdp_sock *ssk)
114 {
115
116 if (ssk->id == NULL)
117 return;
118 rdma_destroy_id(ssk->id);
119 ssk->id = NULL;
120 }
121
122 static int
sdp_pcbbind(struct sdp_sock * ssk,struct sockaddr * nam,struct ucred * cred)123 sdp_pcbbind(struct sdp_sock *ssk, struct sockaddr *nam, struct ucred *cred)
124 {
125 struct sockaddr_in *sin;
126 struct sockaddr_in null;
127 int error;
128
129 SDP_WLOCK_ASSERT(ssk);
130
131 if (ssk->lport != 0 || ssk->laddr != INADDR_ANY)
132 return (EINVAL);
133 /* rdma_bind_addr handles bind races. */
134 SDP_WUNLOCK(ssk);
135 if (ssk->id == NULL)
136 ssk->id = rdma_create_id(&init_net, sdp_cma_handler, ssk, RDMA_PS_SDP, IB_QPT_RC);
137 if (ssk->id == NULL) {
138 SDP_WLOCK(ssk);
139 return (ENOMEM);
140 }
141 if (nam == NULL) {
142 null.sin_family = AF_INET;
143 null.sin_len = sizeof(null);
144 null.sin_addr.s_addr = INADDR_ANY;
145 null.sin_port = 0;
146 bzero(&null.sin_zero, sizeof(null.sin_zero));
147 nam = (struct sockaddr *)&null;
148 }
149 error = -rdma_bind_addr(ssk->id, nam);
150 SDP_WLOCK(ssk);
151 if (error == 0) {
152 sin = (struct sockaddr_in *)&ssk->id->route.addr.src_addr;
153 ssk->laddr = sin->sin_addr.s_addr;
154 ssk->lport = sin->sin_port;
155 } else
156 sdp_destroy_cma(ssk);
157 return (error);
158 }
159
160 static void
sdp_pcbfree(struct sdp_sock * ssk)161 sdp_pcbfree(struct sdp_sock *ssk)
162 {
163
164 KASSERT(ssk->socket == NULL, ("ssk %p socket still attached", ssk));
165 KASSERT((ssk->flags & SDP_DESTROY) == 0,
166 ("ssk %p already destroyed", ssk));
167
168 sdp_dbg(ssk->socket, "Freeing pcb");
169 SDP_WLOCK_ASSERT(ssk);
170 ssk->flags |= SDP_DESTROY;
171 SDP_WUNLOCK(ssk);
172 SDP_LIST_WLOCK();
173 sdp_count--;
174 LIST_REMOVE(ssk, list);
175 SDP_LIST_WUNLOCK();
176 crfree(ssk->cred);
177 ssk->qp_active = 0;
178 if (ssk->qp) {
179 ib_destroy_qp(ssk->qp);
180 ssk->qp = NULL;
181 }
182 sdp_tx_ring_destroy(ssk);
183 sdp_rx_ring_destroy(ssk);
184 sdp_destroy_cma(ssk);
185 rw_destroy(&ssk->rx_ring.destroyed_lock);
186 rw_destroy(&ssk->lock);
187 uma_zfree(sdp_zone, ssk);
188 }
189
190 /*
191 * Common routines to return a socket address.
192 */
193 static struct sockaddr *
sdp_sockaddr(in_port_t port,struct in_addr * addr_p)194 sdp_sockaddr(in_port_t port, struct in_addr *addr_p)
195 {
196 struct sockaddr_in *sin;
197
198 sin = malloc(sizeof *sin, M_SONAME,
199 M_WAITOK | M_ZERO);
200 sin->sin_family = AF_INET;
201 sin->sin_len = sizeof(*sin);
202 sin->sin_addr = *addr_p;
203 sin->sin_port = port;
204
205 return (struct sockaddr *)sin;
206 }
207
208 static int
sdp_getsockaddr(struct socket * so,struct sockaddr ** nam)209 sdp_getsockaddr(struct socket *so, struct sockaddr **nam)
210 {
211 struct sdp_sock *ssk;
212 struct in_addr addr;
213 in_port_t port;
214
215 ssk = sdp_sk(so);
216 SDP_RLOCK(ssk);
217 port = ssk->lport;
218 addr.s_addr = ssk->laddr;
219 SDP_RUNLOCK(ssk);
220
221 *nam = sdp_sockaddr(port, &addr);
222 return 0;
223 }
224
225 static int
sdp_getpeeraddr(struct socket * so,struct sockaddr ** nam)226 sdp_getpeeraddr(struct socket *so, struct sockaddr **nam)
227 {
228 struct sdp_sock *ssk;
229 struct in_addr addr;
230 in_port_t port;
231
232 ssk = sdp_sk(so);
233 SDP_RLOCK(ssk);
234 port = ssk->fport;
235 addr.s_addr = ssk->faddr;
236 SDP_RUNLOCK(ssk);
237
238 *nam = sdp_sockaddr(port, &addr);
239 return 0;
240 }
241
242 static void
sdp_pcbnotifyall(struct in_addr faddr,int errno,struct sdp_sock * (* notify)(struct sdp_sock *,int))243 sdp_pcbnotifyall(struct in_addr faddr, int errno,
244 struct sdp_sock *(*notify)(struct sdp_sock *, int))
245 {
246 struct sdp_sock *ssk, *ssk_temp;
247
248 SDP_LIST_WLOCK();
249 LIST_FOREACH_SAFE(ssk, &sdp_list, list, ssk_temp) {
250 SDP_WLOCK(ssk);
251 if (ssk->faddr != faddr.s_addr || ssk->socket == NULL) {
252 SDP_WUNLOCK(ssk);
253 continue;
254 }
255 if ((ssk->flags & SDP_DESTROY) == 0)
256 if ((*notify)(ssk, errno))
257 SDP_WUNLOCK(ssk);
258 }
259 SDP_LIST_WUNLOCK();
260 }
261
262 #if 0
263 static void
264 sdp_apply_all(void (*func)(struct sdp_sock *, void *), void *arg)
265 {
266 struct sdp_sock *ssk;
267
268 SDP_LIST_RLOCK();
269 LIST_FOREACH(ssk, &sdp_list, list) {
270 SDP_WLOCK(ssk);
271 func(ssk, arg);
272 SDP_WUNLOCK(ssk);
273 }
274 SDP_LIST_RUNLOCK();
275 }
276 #endif
277
278 static void
sdp_output_reset(struct sdp_sock * ssk)279 sdp_output_reset(struct sdp_sock *ssk)
280 {
281 struct rdma_cm_id *id;
282
283 SDP_WLOCK_ASSERT(ssk);
284 if (ssk->id) {
285 id = ssk->id;
286 ssk->qp_active = 0;
287 SDP_WUNLOCK(ssk);
288 rdma_disconnect(id);
289 SDP_WLOCK(ssk);
290 }
291 ssk->state = TCPS_CLOSED;
292 }
293
294 /*
295 * Attempt to close a SDP socket, marking it as dropped, and freeing
296 * the socket if we hold the only reference.
297 */
298 static struct sdp_sock *
sdp_closed(struct sdp_sock * ssk)299 sdp_closed(struct sdp_sock *ssk)
300 {
301 struct socket *so;
302
303 SDP_WLOCK_ASSERT(ssk);
304
305 ssk->flags |= SDP_DROPPED;
306 so = ssk->socket;
307 soisdisconnected(so);
308 if (ssk->flags & SDP_SOCKREF) {
309 KASSERT(so->so_state & SS_PROTOREF,
310 ("sdp_closed: !SS_PROTOREF"));
311 ssk->flags &= ~SDP_SOCKREF;
312 SDP_WUNLOCK(ssk);
313 SOCK_LOCK(so);
314 so->so_state &= ~SS_PROTOREF;
315 sofree(so);
316 return (NULL);
317 }
318 return (ssk);
319 }
320
321 /*
322 * Perform timer based shutdowns which can not operate in
323 * callout context.
324 */
325 static void
sdp_shutdown_task(void * data,int pending)326 sdp_shutdown_task(void *data, int pending)
327 {
328 struct sdp_sock *ssk;
329
330 ssk = data;
331 SDP_WLOCK(ssk);
332 /*
333 * I don't think this can race with another call to pcbfree()
334 * because SDP_TIMEWAIT protects it. SDP_DESTROY may be redundant.
335 */
336 if (ssk->flags & SDP_DESTROY)
337 panic("sdp_shutdown_task: Racing with pcbfree for ssk %p",
338 ssk);
339 if (ssk->flags & SDP_DISCON)
340 sdp_output_reset(ssk);
341 /* We have to clear this so sdp_detach() will call pcbfree(). */
342 ssk->flags &= ~(SDP_TIMEWAIT | SDP_DREQWAIT);
343 if ((ssk->flags & SDP_DROPPED) == 0 &&
344 sdp_closed(ssk) == NULL)
345 return;
346 if (ssk->socket == NULL) {
347 sdp_pcbfree(ssk);
348 return;
349 }
350 SDP_WUNLOCK(ssk);
351 }
352
353 /*
354 * 2msl has expired, schedule the shutdown task.
355 */
356 static void
sdp_2msl_timeout(void * data)357 sdp_2msl_timeout(void *data)
358 {
359 struct sdp_sock *ssk;
360
361 ssk = data;
362 /* Callout canceled. */
363 if (!callout_active(&ssk->keep2msl))
364 goto out;
365 callout_deactivate(&ssk->keep2msl);
366 /* Should be impossible, defensive programming. */
367 if ((ssk->flags & SDP_TIMEWAIT) == 0)
368 goto out;
369 taskqueue_enqueue(taskqueue_thread, &ssk->shutdown_task);
370 out:
371 SDP_WUNLOCK(ssk);
372 return;
373 }
374
375 /*
376 * Schedule the 2msl wait timer.
377 */
378 static void
sdp_2msl_wait(struct sdp_sock * ssk)379 sdp_2msl_wait(struct sdp_sock *ssk)
380 {
381
382 SDP_WLOCK_ASSERT(ssk);
383 ssk->flags |= SDP_TIMEWAIT;
384 ssk->state = TCPS_TIME_WAIT;
385 soisdisconnected(ssk->socket);
386 callout_reset(&ssk->keep2msl, TCPTV_MSL, sdp_2msl_timeout, ssk);
387 }
388
389 /*
390 * Timed out waiting for the final fin/ack from rdma_disconnect().
391 */
392 static void
sdp_dreq_timeout(void * data)393 sdp_dreq_timeout(void *data)
394 {
395 struct sdp_sock *ssk;
396
397 ssk = data;
398 /* Callout canceled. */
399 if (!callout_active(&ssk->keep2msl))
400 goto out;
401 /* Callout rescheduled, probably as a different timer. */
402 if (callout_pending(&ssk->keep2msl))
403 goto out;
404 callout_deactivate(&ssk->keep2msl);
405 if (ssk->state != TCPS_FIN_WAIT_1 && ssk->state != TCPS_LAST_ACK)
406 goto out;
407 if ((ssk->flags & SDP_DREQWAIT) == 0)
408 goto out;
409 ssk->flags &= ~SDP_DREQWAIT;
410 ssk->flags |= SDP_DISCON;
411 sdp_2msl_wait(ssk);
412 ssk->qp_active = 0;
413 out:
414 SDP_WUNLOCK(ssk);
415 }
416
417 /*
418 * Received the final fin/ack. Cancel the 2msl.
419 */
420 void
sdp_cancel_dreq_wait_timeout(struct sdp_sock * ssk)421 sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk)
422 {
423 sdp_dbg(ssk->socket, "cancelling dreq wait timeout\n");
424 ssk->flags &= ~SDP_DREQWAIT;
425 sdp_2msl_wait(ssk);
426 }
427
428 static int
sdp_init_sock(struct socket * sk)429 sdp_init_sock(struct socket *sk)
430 {
431 struct sdp_sock *ssk = sdp_sk(sk);
432
433 sdp_dbg(sk, "%s\n", __func__);
434
435 callout_init_rw(&ssk->keep2msl, &ssk->lock, CALLOUT_RETURNUNLOCKED);
436 TASK_INIT(&ssk->shutdown_task, 0, sdp_shutdown_task, ssk);
437 #ifdef SDP_ZCOPY
438 INIT_DELAYED_WORK(&ssk->srcavail_cancel_work, srcavail_cancel_timeout);
439 ssk->zcopy_thresh = -1; /* use global sdp_zcopy_thresh */
440 ssk->tx_ring.rdma_inflight = NULL;
441 #endif
442 atomic_set(&ssk->mseq_ack, 0);
443 sdp_rx_ring_init(ssk);
444 ssk->tx_ring.buffer = NULL;
445
446 return 0;
447 }
448
449 /*
450 * Allocate an sdp_sock for the socket and reserve socket buffer space.
451 */
452 static int
sdp_attach(struct socket * so,int proto,struct thread * td)453 sdp_attach(struct socket *so, int proto, struct thread *td)
454 {
455 struct sdp_sock *ssk;
456 int error;
457
458 ssk = sdp_sk(so);
459 KASSERT(ssk == NULL, ("sdp_attach: ssk already set on so %p", so));
460 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
461 error = soreserve(so, sdp_sendspace, sdp_recvspace);
462 if (error)
463 return (error);
464 }
465 so->so_rcv.sb_flags |= SB_AUTOSIZE;
466 so->so_snd.sb_flags |= SB_AUTOSIZE;
467 ssk = uma_zalloc(sdp_zone, M_NOWAIT | M_ZERO);
468 if (ssk == NULL)
469 return (ENOBUFS);
470 rw_init(&ssk->lock, "sdpsock");
471 ssk->socket = so;
472 ssk->cred = crhold(so->so_cred);
473 so->so_pcb = (caddr_t)ssk;
474 sdp_init_sock(so);
475 ssk->flags = 0;
476 ssk->qp_active = 0;
477 ssk->state = TCPS_CLOSED;
478 mbufq_init(&ssk->rxctlq, INT_MAX);
479 SDP_LIST_WLOCK();
480 LIST_INSERT_HEAD(&sdp_list, ssk, list);
481 sdp_count++;
482 SDP_LIST_WUNLOCK();
483
484 return (0);
485 }
486
487 /*
488 * Detach SDP from the socket, potentially leaving it around for the
489 * timewait to expire.
490 */
491 static void
sdp_detach(struct socket * so)492 sdp_detach(struct socket *so)
493 {
494 struct sdp_sock *ssk;
495
496 ssk = sdp_sk(so);
497 SDP_WLOCK(ssk);
498 KASSERT(ssk->socket != NULL, ("sdp_detach: socket is NULL"));
499 ssk->socket->so_pcb = NULL;
500 ssk->socket = NULL;
501 if (ssk->flags & (SDP_TIMEWAIT | SDP_DREQWAIT))
502 SDP_WUNLOCK(ssk);
503 else if (ssk->flags & SDP_DROPPED || ssk->state < TCPS_SYN_SENT)
504 sdp_pcbfree(ssk);
505 else
506 panic("sdp_detach: Unexpected state, ssk %p.\n", ssk);
507 }
508
509 /*
510 * Allocate a local address for the socket.
511 */
512 static int
sdp_bind(struct socket * so,struct sockaddr * nam,struct thread * td)513 sdp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
514 {
515 int error = 0;
516 struct sdp_sock *ssk;
517 struct sockaddr_in *sin;
518
519 sin = (struct sockaddr_in *)nam;
520 if (sin->sin_family != AF_INET)
521 return (EAFNOSUPPORT);
522 if (nam->sa_len != sizeof(*sin))
523 return (EINVAL);
524 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
525 return (EAFNOSUPPORT);
526
527 ssk = sdp_sk(so);
528 SDP_WLOCK(ssk);
529 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
530 error = EINVAL;
531 goto out;
532 }
533 error = sdp_pcbbind(ssk, nam, td->td_ucred);
534 out:
535 SDP_WUNLOCK(ssk);
536
537 return (error);
538 }
539
540 /*
541 * Prepare to accept connections.
542 */
543 static int
sdp_listen(struct socket * so,int backlog,struct thread * td)544 sdp_listen(struct socket *so, int backlog, struct thread *td)
545 {
546 int error = 0;
547 struct sdp_sock *ssk;
548
549 ssk = sdp_sk(so);
550 SDP_WLOCK(ssk);
551 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
552 error = EINVAL;
553 goto out;
554 }
555 if (error == 0 && ssk->lport == 0)
556 error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
557 SOCK_LOCK(so);
558 if (error == 0)
559 error = solisten_proto_check(so);
560 if (error == 0) {
561 solisten_proto(so, backlog);
562 ssk->state = TCPS_LISTEN;
563 }
564 SOCK_UNLOCK(so);
565
566 out:
567 SDP_WUNLOCK(ssk);
568 if (error == 0)
569 error = -rdma_listen(ssk->id, backlog);
570 return (error);
571 }
572
573 /*
574 * Initiate a SDP connection to nam.
575 */
576 static int
sdp_start_connect(struct sdp_sock * ssk,struct sockaddr * nam,struct thread * td)577 sdp_start_connect(struct sdp_sock *ssk, struct sockaddr *nam, struct thread *td)
578 {
579 struct sockaddr_in src;
580 struct socket *so;
581 int error;
582
583 so = ssk->socket;
584
585 SDP_WLOCK_ASSERT(ssk);
586 if (ssk->lport == 0) {
587 error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
588 if (error)
589 return error;
590 }
591 src.sin_family = AF_INET;
592 src.sin_len = sizeof(src);
593 bzero(&src.sin_zero, sizeof(src.sin_zero));
594 src.sin_port = ssk->lport;
595 src.sin_addr.s_addr = ssk->laddr;
596 soisconnecting(so);
597 SDP_WUNLOCK(ssk);
598 error = -rdma_resolve_addr(ssk->id, (struct sockaddr *)&src, nam,
599 SDP_RESOLVE_TIMEOUT);
600 SDP_WLOCK(ssk);
601 if (error == 0)
602 ssk->state = TCPS_SYN_SENT;
603
604 return 0;
605 }
606
607 /*
608 * Initiate SDP connection.
609 */
610 static int
sdp_connect(struct socket * so,struct sockaddr * nam,struct thread * td)611 sdp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
612 {
613 int error = 0;
614 struct sdp_sock *ssk;
615 struct sockaddr_in *sin;
616
617 sin = (struct sockaddr_in *)nam;
618 if (nam->sa_len != sizeof(*sin))
619 return (EINVAL);
620 if (sin->sin_family != AF_INET)
621 return (EAFNOSUPPORT);
622 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
623 return (EAFNOSUPPORT);
624 if ((error = prison_remote_ip4(td->td_ucred, &sin->sin_addr)) != 0)
625 return (error);
626 ssk = sdp_sk(so);
627 SDP_WLOCK(ssk);
628 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED))
629 error = EINVAL;
630 else
631 error = sdp_start_connect(ssk, nam, td);
632 SDP_WUNLOCK(ssk);
633 return (error);
634 }
635
636 /*
637 * Drop a SDP socket, reporting
638 * the specified error. If connection is synchronized,
639 * then send a RST to peer.
640 */
641 static struct sdp_sock *
sdp_drop(struct sdp_sock * ssk,int errno)642 sdp_drop(struct sdp_sock *ssk, int errno)
643 {
644 struct socket *so;
645
646 SDP_WLOCK_ASSERT(ssk);
647 so = ssk->socket;
648 if (TCPS_HAVERCVDSYN(ssk->state))
649 sdp_output_reset(ssk);
650 if (errno == ETIMEDOUT && ssk->softerror)
651 errno = ssk->softerror;
652 so->so_error = errno;
653 return (sdp_closed(ssk));
654 }
655
656 /*
657 * User issued close, and wish to trail through shutdown states:
658 * if never received SYN, just forget it. If got a SYN from peer,
659 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
660 * If already got a FIN from peer, then almost done; go to LAST_ACK
661 * state. In all other cases, have already sent FIN to peer (e.g.
662 * after PRU_SHUTDOWN), and just have to play tedious game waiting
663 * for peer to send FIN or not respond to keep-alives, etc.
664 * We can let the user exit from the close as soon as the FIN is acked.
665 */
666 static void
sdp_usrclosed(struct sdp_sock * ssk)667 sdp_usrclosed(struct sdp_sock *ssk)
668 {
669
670 SDP_WLOCK_ASSERT(ssk);
671
672 switch (ssk->state) {
673 case TCPS_LISTEN:
674 ssk->state = TCPS_CLOSED;
675 SDP_WUNLOCK(ssk);
676 sdp_destroy_cma(ssk);
677 SDP_WLOCK(ssk);
678 /* FALLTHROUGH */
679 case TCPS_CLOSED:
680 ssk = sdp_closed(ssk);
681 /*
682 * sdp_closed() should never return NULL here as the socket is
683 * still open.
684 */
685 KASSERT(ssk != NULL,
686 ("sdp_usrclosed: sdp_closed() returned NULL"));
687 break;
688
689 case TCPS_SYN_SENT:
690 /* FALLTHROUGH */
691 case TCPS_SYN_RECEIVED:
692 ssk->flags |= SDP_NEEDFIN;
693 break;
694
695 case TCPS_ESTABLISHED:
696 ssk->flags |= SDP_NEEDFIN;
697 ssk->state = TCPS_FIN_WAIT_1;
698 break;
699
700 case TCPS_CLOSE_WAIT:
701 ssk->state = TCPS_LAST_ACK;
702 break;
703 }
704 if (ssk->state >= TCPS_FIN_WAIT_2) {
705 /* Prevent the connection hanging in FIN_WAIT_2 forever. */
706 if (ssk->state == TCPS_FIN_WAIT_2)
707 sdp_2msl_wait(ssk);
708 else
709 soisdisconnected(ssk->socket);
710 }
711 }
712
713 static void
sdp_output_disconnect(struct sdp_sock * ssk)714 sdp_output_disconnect(struct sdp_sock *ssk)
715 {
716
717 SDP_WLOCK_ASSERT(ssk);
718 callout_reset(&ssk->keep2msl, SDP_FIN_WAIT_TIMEOUT,
719 sdp_dreq_timeout, ssk);
720 ssk->flags |= SDP_NEEDFIN | SDP_DREQWAIT;
721 sdp_post_sends(ssk, M_NOWAIT);
722 }
723
724 /*
725 * Initiate or continue a disconnect.
726 * If embryonic state, just send reset (once).
727 * If in ``let data drain'' option and linger null, just drop.
728 * Otherwise (hard), mark socket disconnecting and drop
729 * current input data; switch states based on user close, and
730 * send segment to peer (with FIN).
731 */
732 static void
sdp_start_disconnect(struct sdp_sock * ssk)733 sdp_start_disconnect(struct sdp_sock *ssk)
734 {
735 struct socket *so;
736 int unread;
737
738 so = ssk->socket;
739 SDP_WLOCK_ASSERT(ssk);
740 sdp_stop_keepalive_timer(so);
741 /*
742 * Neither sdp_closed() nor sdp_drop() should return NULL, as the
743 * socket is still open.
744 */
745 if (ssk->state < TCPS_ESTABLISHED) {
746 ssk = sdp_closed(ssk);
747 KASSERT(ssk != NULL,
748 ("sdp_start_disconnect: sdp_close() returned NULL"));
749 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
750 ssk = sdp_drop(ssk, 0);
751 KASSERT(ssk != NULL,
752 ("sdp_start_disconnect: sdp_drop() returned NULL"));
753 } else {
754 soisdisconnecting(so);
755 unread = sbused(&so->so_rcv);
756 sbflush(&so->so_rcv);
757 sdp_usrclosed(ssk);
758 if (!(ssk->flags & SDP_DROPPED)) {
759 if (unread)
760 sdp_output_reset(ssk);
761 else
762 sdp_output_disconnect(ssk);
763 }
764 }
765 }
766
767 /*
768 * User initiated disconnect.
769 */
770 static int
sdp_disconnect(struct socket * so)771 sdp_disconnect(struct socket *so)
772 {
773 struct sdp_sock *ssk;
774 int error = 0;
775
776 ssk = sdp_sk(so);
777 SDP_WLOCK(ssk);
778 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
779 error = ECONNRESET;
780 goto out;
781 }
782 sdp_start_disconnect(ssk);
783 out:
784 SDP_WUNLOCK(ssk);
785 return (error);
786 }
787
788 /*
789 * Accept a connection. Essentially all the work is done at higher levels;
790 * just return the address of the peer, storing through addr.
791 *
792 *
793 * XXX This is broken XXX
794 *
795 * The rationale for acquiring the sdp lock here is somewhat complicated,
796 * and is described in detail in the commit log entry for r175612. Acquiring
797 * it delays an accept(2) racing with sonewconn(), which inserts the socket
798 * before the address/port fields are initialized. A better fix would
799 * prevent the socket from being placed in the listen queue until all fields
800 * are fully initialized.
801 */
802 static int
sdp_accept(struct socket * so,struct sockaddr ** nam)803 sdp_accept(struct socket *so, struct sockaddr **nam)
804 {
805 struct sdp_sock *ssk = NULL;
806 struct in_addr addr;
807 in_port_t port;
808 int error;
809
810 if (so->so_state & SS_ISDISCONNECTED)
811 return (ECONNABORTED);
812
813 port = 0;
814 addr.s_addr = 0;
815 error = 0;
816 ssk = sdp_sk(so);
817 SDP_WLOCK(ssk);
818 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
819 error = ECONNABORTED;
820 goto out;
821 }
822 port = ssk->fport;
823 addr.s_addr = ssk->faddr;
824 out:
825 SDP_WUNLOCK(ssk);
826 if (error == 0)
827 *nam = sdp_sockaddr(port, &addr);
828 return error;
829 }
830
831 /*
832 * Mark the connection as being incapable of further output.
833 */
834 static int
sdp_shutdown(struct socket * so)835 sdp_shutdown(struct socket *so)
836 {
837 int error = 0;
838 struct sdp_sock *ssk;
839
840 ssk = sdp_sk(so);
841 SDP_WLOCK(ssk);
842 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
843 error = ECONNRESET;
844 goto out;
845 }
846 socantsendmore(so);
847 sdp_usrclosed(ssk);
848 if (!(ssk->flags & SDP_DROPPED))
849 sdp_output_disconnect(ssk);
850
851 out:
852 SDP_WUNLOCK(ssk);
853
854 return (error);
855 }
856
857 static void
sdp_append(struct sdp_sock * ssk,struct sockbuf * sb,struct mbuf * mb,int cnt)858 sdp_append(struct sdp_sock *ssk, struct sockbuf *sb, struct mbuf *mb, int cnt)
859 {
860 struct mbuf *n;
861 int ncnt;
862
863 SOCKBUF_LOCK_ASSERT(sb);
864 SBLASTRECORDCHK(sb);
865 KASSERT(mb->m_flags & M_PKTHDR,
866 ("sdp_append: %p Missing packet header.\n", mb));
867 n = sb->sb_lastrecord;
868 /*
869 * If the queue is empty just set all pointers and proceed.
870 */
871 if (n == NULL) {
872 sb->sb_lastrecord = sb->sb_mb = sb->sb_sndptr = mb;
873 for (; mb; mb = mb->m_next) {
874 sb->sb_mbtail = mb;
875 sballoc(sb, mb);
876 }
877 return;
878 }
879 /*
880 * Count the number of mbufs in the current tail.
881 */
882 for (ncnt = 0; n->m_next; n = n->m_next)
883 ncnt++;
884 n = sb->sb_lastrecord;
885 /*
886 * If the two chains can fit in a single sdp packet and
887 * the last record has not been sent yet (WRITABLE) coalesce
888 * them. The lastrecord remains the same but we must strip the
889 * packet header and then let sbcompress do the hard part.
890 */
891 if (M_WRITABLE(n) && ncnt + cnt < SDP_MAX_SEND_SGES &&
892 n->m_pkthdr.len + mb->m_pkthdr.len - SDP_HEAD_SIZE <
893 ssk->xmit_size_goal) {
894 m_adj(mb, SDP_HEAD_SIZE);
895 n->m_pkthdr.len += mb->m_pkthdr.len;
896 n->m_flags |= mb->m_flags & (M_PUSH | M_URG);
897 m_demote(mb, 1, 0);
898 sbcompress(sb, mb, sb->sb_mbtail);
899 return;
900 }
901 /*
902 * Not compressible, just append to the end and adjust counters.
903 */
904 sb->sb_lastrecord->m_flags |= M_PUSH;
905 sb->sb_lastrecord->m_nextpkt = mb;
906 sb->sb_lastrecord = mb;
907 if (sb->sb_sndptr == NULL)
908 sb->sb_sndptr = mb;
909 for (; mb; mb = mb->m_next) {
910 sb->sb_mbtail = mb;
911 sballoc(sb, mb);
912 }
913 }
914
915 /*
916 * Do a send by putting data in output queue and updating urgent
917 * marker if URG set. Possibly send more data. Unlike the other
918 * pru_*() routines, the mbuf chains are our responsibility. We
919 * must either enqueue them or free them. The other pru_* routines
920 * generally are caller-frees.
921 *
922 * This comes from sendfile, normal sends will come from sdp_sosend().
923 */
924 static int
sdp_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct thread * td)925 sdp_send(struct socket *so, int flags, struct mbuf *m,
926 struct sockaddr *nam, struct mbuf *control, struct thread *td)
927 {
928 struct sdp_sock *ssk;
929 struct mbuf *n;
930 int error;
931 int cnt;
932
933 if (nam != NULL) {
934 if (nam->sa_family != AF_INET) {
935 if (control)
936 m_freem(control);
937 m_freem(m);
938 return (EAFNOSUPPORT);
939 }
940 if (nam->sa_len != sizeof(struct sockaddr_in)) {
941 if (control)
942 m_freem(control);
943 m_freem(m);
944 return (EINVAL);
945 }
946 }
947
948 error = 0;
949 ssk = sdp_sk(so);
950 KASSERT(m->m_flags & M_PKTHDR,
951 ("sdp_send: %p no packet header", m));
952 M_PREPEND(m, SDP_HEAD_SIZE, M_WAITOK);
953 mtod(m, struct sdp_bsdh *)->mid = SDP_MID_DATA;
954 for (n = m, cnt = 0; n->m_next; n = n->m_next)
955 cnt++;
956 if (cnt > SDP_MAX_SEND_SGES) {
957 n = m_collapse(m, M_WAITOK, SDP_MAX_SEND_SGES);
958 if (n == NULL) {
959 m_freem(m);
960 return (EMSGSIZE);
961 }
962 m = n;
963 for (cnt = 0; n->m_next; n = n->m_next)
964 cnt++;
965 }
966 SDP_WLOCK(ssk);
967 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
968 if (control)
969 m_freem(control);
970 if (m)
971 m_freem(m);
972 error = ECONNRESET;
973 goto out;
974 }
975 if (control) {
976 /* SDP doesn't support control messages. */
977 if (control->m_len) {
978 m_freem(control);
979 if (m)
980 m_freem(m);
981 error = EINVAL;
982 goto out;
983 }
984 m_freem(control); /* empty control, just free it */
985 }
986 if (!(flags & PRUS_OOB)) {
987 SOCKBUF_LOCK(&so->so_snd);
988 sdp_append(ssk, &so->so_snd, m, cnt);
989 SOCKBUF_UNLOCK(&so->so_snd);
990 if (nam && ssk->state < TCPS_SYN_SENT) {
991 /*
992 * Do implied connect if not yet connected.
993 */
994 error = sdp_start_connect(ssk, nam, td);
995 if (error)
996 goto out;
997 }
998 if (flags & PRUS_EOF) {
999 /*
1000 * Close the send side of the connection after
1001 * the data is sent.
1002 */
1003 socantsendmore(so);
1004 sdp_usrclosed(ssk);
1005 if (!(ssk->flags & SDP_DROPPED))
1006 sdp_output_disconnect(ssk);
1007 } else if (!(ssk->flags & SDP_DROPPED) &&
1008 !(flags & PRUS_MORETOCOME))
1009 sdp_post_sends(ssk, M_NOWAIT);
1010 SDP_WUNLOCK(ssk);
1011 return (0);
1012 } else {
1013 SOCKBUF_LOCK(&so->so_snd);
1014 if (sbspace(&so->so_snd) < -512) {
1015 SOCKBUF_UNLOCK(&so->so_snd);
1016 m_freem(m);
1017 error = ENOBUFS;
1018 goto out;
1019 }
1020 /*
1021 * According to RFC961 (Assigned Protocols),
1022 * the urgent pointer points to the last octet
1023 * of urgent data. We continue, however,
1024 * to consider it to indicate the first octet
1025 * of data past the urgent section.
1026 * Otherwise, snd_up should be one lower.
1027 */
1028 m->m_flags |= M_URG | M_PUSH;
1029 sdp_append(ssk, &so->so_snd, m, cnt);
1030 SOCKBUF_UNLOCK(&so->so_snd);
1031 if (nam && ssk->state < TCPS_SYN_SENT) {
1032 /*
1033 * Do implied connect if not yet connected.
1034 */
1035 error = sdp_start_connect(ssk, nam, td);
1036 if (error)
1037 goto out;
1038 }
1039 sdp_post_sends(ssk, M_NOWAIT);
1040 SDP_WUNLOCK(ssk);
1041 return (0);
1042 }
1043 out:
1044 SDP_WUNLOCK(ssk);
1045 return (error);
1046 }
1047
1048 /*
1049 * Send on a socket. If send must go all at once and message is larger than
1050 * send buffering, then hard error. Lock against other senders. If must go
1051 * all at once and not enough room now, then inform user that this would
1052 * block and do nothing. Otherwise, if nonblocking, send as much as
1053 * possible. The data to be sent is described by "uio" if nonzero, otherwise
1054 * by the mbuf chain "top" (which must be null if uio is not). Data provided
1055 * in mbuf chain must be small enough to send all at once.
1056 *
1057 * Returns nonzero on error, timeout or signal; callers must check for short
1058 * counts if EINTR/ERESTART are returned. Data and control buffers are freed
1059 * on return.
1060 */
1061 static int
sdp_sosend(struct socket * so,struct sockaddr * addr,struct uio * uio,struct mbuf * top,struct mbuf * control,int flags,struct thread * td)1062 sdp_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
1063 struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
1064 {
1065 struct sdp_sock *ssk;
1066 long space, resid;
1067 int atomic;
1068 int error;
1069 int copy;
1070
1071 if (uio != NULL)
1072 resid = uio->uio_resid;
1073 else
1074 resid = top->m_pkthdr.len;
1075 atomic = top != NULL;
1076 if (control != NULL) {
1077 if (control->m_len) {
1078 m_freem(control);
1079 if (top)
1080 m_freem(top);
1081 return (EINVAL);
1082 }
1083 m_freem(control);
1084 control = NULL;
1085 }
1086 /*
1087 * In theory resid should be unsigned. However, space must be
1088 * signed, as it might be less than 0 if we over-committed, and we
1089 * must use a signed comparison of space and resid. On the other
1090 * hand, a negative resid causes us to loop sending 0-length
1091 * segments to the protocol.
1092 *
1093 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1094 * type sockets since that's an error.
1095 */
1096 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1097 error = EINVAL;
1098 goto out;
1099 }
1100 if (td != NULL)
1101 td->td_ru.ru_msgsnd++;
1102
1103 ssk = sdp_sk(so);
1104 error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
1105 if (error)
1106 goto out;
1107
1108 restart:
1109 do {
1110 SOCKBUF_LOCK(&so->so_snd);
1111 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1112 SOCKBUF_UNLOCK(&so->so_snd);
1113 error = EPIPE;
1114 goto release;
1115 }
1116 if (so->so_error) {
1117 error = so->so_error;
1118 so->so_error = 0;
1119 SOCKBUF_UNLOCK(&so->so_snd);
1120 goto release;
1121 }
1122 if ((so->so_state & SS_ISCONNECTED) == 0 && addr == NULL) {
1123 SOCKBUF_UNLOCK(&so->so_snd);
1124 error = ENOTCONN;
1125 goto release;
1126 }
1127 space = sbspace(&so->so_snd);
1128 if (flags & MSG_OOB)
1129 space += 1024;
1130 if (atomic && resid > ssk->xmit_size_goal - SDP_HEAD_SIZE) {
1131 SOCKBUF_UNLOCK(&so->so_snd);
1132 error = EMSGSIZE;
1133 goto release;
1134 }
1135 if (space < resid &&
1136 (atomic || space < so->so_snd.sb_lowat)) {
1137 if ((so->so_state & SS_NBIO) ||
1138 (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
1139 SOCKBUF_UNLOCK(&so->so_snd);
1140 error = EWOULDBLOCK;
1141 goto release;
1142 }
1143 error = sbwait(&so->so_snd);
1144 SOCKBUF_UNLOCK(&so->so_snd);
1145 if (error)
1146 goto release;
1147 goto restart;
1148 }
1149 SOCKBUF_UNLOCK(&so->so_snd);
1150 do {
1151 if (uio == NULL) {
1152 resid = 0;
1153 if (flags & MSG_EOR)
1154 top->m_flags |= M_EOR;
1155 } else {
1156 /*
1157 * Copy the data from userland into a mbuf
1158 * chain. If no data is to be copied in,
1159 * a single empty mbuf is returned.
1160 */
1161 copy = min(space,
1162 ssk->xmit_size_goal - SDP_HEAD_SIZE);
1163 top = m_uiotombuf(uio, M_WAITOK, copy,
1164 0, M_PKTHDR |
1165 ((flags & MSG_EOR) ? M_EOR : 0));
1166 if (top == NULL) {
1167 /* only possible error */
1168 error = EFAULT;
1169 goto release;
1170 }
1171 space -= resid - uio->uio_resid;
1172 resid = uio->uio_resid;
1173 }
1174 /*
1175 * XXX all the SBS_CANTSENDMORE checks previously
1176 * done could be out of date after dropping the
1177 * socket lock.
1178 */
1179 error = sdp_send(so, (flags & MSG_OOB) ? PRUS_OOB :
1180 /*
1181 * Set EOF on the last send if the user specified
1182 * MSG_EOF.
1183 */
1184 ((flags & MSG_EOF) && (resid <= 0)) ? PRUS_EOF :
1185 /* If there is more to send set PRUS_MORETOCOME. */
1186 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
1187 top, addr, NULL, td);
1188 top = NULL;
1189 if (error)
1190 goto release;
1191 } while (resid && space > 0);
1192 } while (resid);
1193
1194 release:
1195 SOCK_IO_SEND_UNLOCK(so);
1196 out:
1197 if (top != NULL)
1198 m_freem(top);
1199 return (error);
1200 }
1201
1202 /*
1203 * The part of soreceive() that implements reading non-inline out-of-band
1204 * data from a socket. For more complete comments, see soreceive(), from
1205 * which this code originated.
1206 *
1207 * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
1208 * unable to return an mbuf chain to the caller.
1209 */
1210 static int
soreceive_rcvoob(struct socket * so,struct uio * uio,int flags)1211 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
1212 {
1213 struct protosw *pr = so->so_proto;
1214 struct mbuf *m;
1215 int error;
1216
1217 KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
1218
1219 m = m_get(M_WAITOK, MT_DATA);
1220 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1221 if (error)
1222 goto bad;
1223 do {
1224 error = uiomove(mtod(m, void *),
1225 (int) min(uio->uio_resid, m->m_len), uio);
1226 m = m_free(m);
1227 } while (uio->uio_resid && error == 0 && m);
1228 bad:
1229 if (m != NULL)
1230 m_freem(m);
1231 return (error);
1232 }
1233
1234 /*
1235 * Optimized version of soreceive() for stream (TCP) sockets.
1236 */
1237 static int
sdp_sorecv(struct socket * so,struct sockaddr ** psa,struct uio * uio,struct mbuf ** mp0,struct mbuf ** controlp,int * flagsp)1238 sdp_sorecv(struct socket *so, struct sockaddr **psa, struct uio *uio,
1239 struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
1240 {
1241 int len = 0, error = 0, flags, oresid;
1242 struct sockbuf *sb;
1243 struct mbuf *m, *n = NULL;
1244 struct sdp_sock *ssk;
1245
1246 /* We only do stream sockets. */
1247 if (so->so_type != SOCK_STREAM)
1248 return (EINVAL);
1249 if (psa != NULL)
1250 *psa = NULL;
1251 if (controlp != NULL)
1252 return (EINVAL);
1253 if (flagsp != NULL)
1254 flags = *flagsp &~ MSG_EOR;
1255 else
1256 flags = 0;
1257 if (flags & MSG_OOB)
1258 return (soreceive_rcvoob(so, uio, flags));
1259 if (mp0 != NULL)
1260 *mp0 = NULL;
1261
1262 sb = &so->so_rcv;
1263 ssk = sdp_sk(so);
1264
1265 /* Prevent other readers from entering the socket. */
1266 error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
1267 if (error)
1268 return (error);
1269 SOCKBUF_LOCK(sb);
1270
1271 /* Easy one, no space to copyout anything. */
1272 if (uio->uio_resid == 0) {
1273 error = EINVAL;
1274 goto out;
1275 }
1276 oresid = uio->uio_resid;
1277
1278 /* We will never ever get anything unless we are connected. */
1279 if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
1280 /* When disconnecting there may be still some data left. */
1281 if (sbavail(sb))
1282 goto deliver;
1283 if (!(so->so_state & SS_ISDISCONNECTED))
1284 error = ENOTCONN;
1285 goto out;
1286 }
1287
1288 /* Socket buffer is empty and we shall not block. */
1289 if (sbavail(sb) == 0 &&
1290 ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
1291 error = EAGAIN;
1292 goto out;
1293 }
1294
1295 restart:
1296 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1297
1298 /* Abort if socket has reported problems. */
1299 if (so->so_error) {
1300 if (sbavail(sb))
1301 goto deliver;
1302 if (oresid > uio->uio_resid)
1303 goto out;
1304 error = so->so_error;
1305 if (!(flags & MSG_PEEK))
1306 so->so_error = 0;
1307 goto out;
1308 }
1309
1310 /* Door is closed. Deliver what is left, if any. */
1311 if (sb->sb_state & SBS_CANTRCVMORE) {
1312 if (sbavail(sb))
1313 goto deliver;
1314 else
1315 goto out;
1316 }
1317
1318 /* Socket buffer got some data that we shall deliver now. */
1319 if (sbavail(sb) && !(flags & MSG_WAITALL) &&
1320 ((so->so_state & SS_NBIO) ||
1321 (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
1322 sbavail(sb) >= sb->sb_lowat ||
1323 sbavail(sb) >= uio->uio_resid ||
1324 sbavail(sb) >= sb->sb_hiwat) ) {
1325 goto deliver;
1326 }
1327
1328 /* On MSG_WAITALL we must wait until all data or error arrives. */
1329 if ((flags & MSG_WAITALL) &&
1330 (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_lowat))
1331 goto deliver;
1332
1333 /*
1334 * Wait and block until (more) data comes in.
1335 * NB: Drops the sockbuf lock during wait.
1336 */
1337 error = sbwait(sb);
1338 if (error)
1339 goto out;
1340 goto restart;
1341
1342 deliver:
1343 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1344 KASSERT(sbavail(sb), ("%s: sockbuf empty", __func__));
1345 KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
1346
1347 /* Statistics. */
1348 if (uio->uio_td)
1349 uio->uio_td->td_ru.ru_msgrcv++;
1350
1351 /* Fill uio until full or current end of socket buffer is reached. */
1352 len = min(uio->uio_resid, sbavail(sb));
1353 if (mp0 != NULL) {
1354 /* Dequeue as many mbufs as possible. */
1355 if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
1356 for (*mp0 = m = sb->sb_mb;
1357 m != NULL && m->m_len <= len;
1358 m = m->m_next) {
1359 len -= m->m_len;
1360 uio->uio_resid -= m->m_len;
1361 sbfree(sb, m);
1362 n = m;
1363 }
1364 sb->sb_mb = m;
1365 if (sb->sb_mb == NULL)
1366 SB_EMPTY_FIXUP(sb);
1367 n->m_next = NULL;
1368 }
1369 /* Copy the remainder. */
1370 if (len > 0) {
1371 KASSERT(sb->sb_mb != NULL,
1372 ("%s: len > 0 && sb->sb_mb empty", __func__));
1373
1374 m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
1375 if (m == NULL)
1376 len = 0; /* Don't flush data from sockbuf. */
1377 else
1378 uio->uio_resid -= m->m_len;
1379 if (*mp0 != NULL)
1380 n->m_next = m;
1381 else
1382 *mp0 = m;
1383 if (*mp0 == NULL) {
1384 error = ENOBUFS;
1385 goto out;
1386 }
1387 }
1388 } else {
1389 /* NB: Must unlock socket buffer as uiomove may sleep. */
1390 SOCKBUF_UNLOCK(sb);
1391 error = m_mbuftouio(uio, sb->sb_mb, len);
1392 SOCKBUF_LOCK(sb);
1393 if (error)
1394 goto out;
1395 }
1396 SBLASTRECORDCHK(sb);
1397 SBLASTMBUFCHK(sb);
1398
1399 /*
1400 * Remove the delivered data from the socket buffer unless we
1401 * were only peeking.
1402 */
1403 if (!(flags & MSG_PEEK)) {
1404 if (len > 0)
1405 sbdrop_locked(sb, len);
1406
1407 /* Notify protocol that we drained some data. */
1408 SOCKBUF_UNLOCK(sb);
1409 SDP_WLOCK(ssk);
1410 sdp_do_posts(ssk);
1411 SDP_WUNLOCK(ssk);
1412 SOCKBUF_LOCK(sb);
1413 }
1414
1415 /*
1416 * For MSG_WAITALL we may have to loop again and wait for
1417 * more data to come in.
1418 */
1419 if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
1420 goto restart;
1421 out:
1422 SBLASTRECORDCHK(sb);
1423 SBLASTMBUFCHK(sb);
1424 SOCKBUF_UNLOCK(sb);
1425 SOCK_IO_RECV_UNLOCK(so);
1426 return (error);
1427 }
1428
1429 /*
1430 * Abort is used to teardown a connection typically while sitting in
1431 * the accept queue.
1432 */
1433 void
sdp_abort(struct socket * so)1434 sdp_abort(struct socket *so)
1435 {
1436 struct sdp_sock *ssk;
1437
1438 ssk = sdp_sk(so);
1439 SDP_WLOCK(ssk);
1440 /*
1441 * If we have not yet dropped, do it now.
1442 */
1443 if (!(ssk->flags & SDP_TIMEWAIT) &&
1444 !(ssk->flags & SDP_DROPPED))
1445 sdp_drop(ssk, ECONNABORTED);
1446 KASSERT(ssk->flags & SDP_DROPPED, ("sdp_abort: %p not dropped 0x%X",
1447 ssk, ssk->flags));
1448 SDP_WUNLOCK(ssk);
1449 }
1450
1451 /*
1452 * Close a SDP socket and initiate a friendly disconnect.
1453 */
1454 static void
sdp_close(struct socket * so)1455 sdp_close(struct socket *so)
1456 {
1457 struct sdp_sock *ssk;
1458
1459 ssk = sdp_sk(so);
1460 SDP_WLOCK(ssk);
1461 /*
1462 * If we have not yet dropped, do it now.
1463 */
1464 if (!(ssk->flags & SDP_TIMEWAIT) &&
1465 !(ssk->flags & SDP_DROPPED))
1466 sdp_start_disconnect(ssk);
1467
1468 /*
1469 * If we've still not dropped let the socket layer know we're
1470 * holding on to the socket and pcb for a while.
1471 */
1472 if (!(ssk->flags & SDP_DROPPED)) {
1473 SOCK_LOCK(so);
1474 so->so_state |= SS_PROTOREF;
1475 SOCK_UNLOCK(so);
1476 ssk->flags |= SDP_SOCKREF;
1477 }
1478 SDP_WUNLOCK(ssk);
1479 }
1480
1481 /*
1482 * User requests out-of-band data.
1483 */
1484 static int
sdp_rcvoob(struct socket * so,struct mbuf * m,int flags)1485 sdp_rcvoob(struct socket *so, struct mbuf *m, int flags)
1486 {
1487 int error = 0;
1488 struct sdp_sock *ssk;
1489
1490 ssk = sdp_sk(so);
1491 SDP_WLOCK(ssk);
1492 if (!rx_ring_trylock(&ssk->rx_ring)) {
1493 SDP_WUNLOCK(ssk);
1494 return (ECONNRESET);
1495 }
1496 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1497 error = ECONNRESET;
1498 goto out;
1499 }
1500 if ((so->so_oobmark == 0 &&
1501 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
1502 so->so_options & SO_OOBINLINE ||
1503 ssk->oobflags & SDP_HADOOB) {
1504 error = EINVAL;
1505 goto out;
1506 }
1507 if ((ssk->oobflags & SDP_HAVEOOB) == 0) {
1508 error = EWOULDBLOCK;
1509 goto out;
1510 }
1511 m->m_len = 1;
1512 *mtod(m, caddr_t) = ssk->iobc;
1513 if ((flags & MSG_PEEK) == 0)
1514 ssk->oobflags ^= (SDP_HAVEOOB | SDP_HADOOB);
1515 out:
1516 rx_ring_unlock(&ssk->rx_ring);
1517 SDP_WUNLOCK(ssk);
1518 return (error);
1519 }
1520
1521 void
sdp_urg(struct sdp_sock * ssk,struct mbuf * mb)1522 sdp_urg(struct sdp_sock *ssk, struct mbuf *mb)
1523 {
1524 struct mbuf *m;
1525 struct socket *so;
1526
1527 so = ssk->socket;
1528 if (so == NULL)
1529 return;
1530
1531 so->so_oobmark = sbused(&so->so_rcv) + mb->m_pkthdr.len - 1;
1532 sohasoutofband(so);
1533 ssk->oobflags &= ~(SDP_HAVEOOB | SDP_HADOOB);
1534 if (!(so->so_options & SO_OOBINLINE)) {
1535 for (m = mb; m->m_next != NULL; m = m->m_next);
1536 ssk->iobc = *(mtod(m, char *) + m->m_len - 1);
1537 ssk->oobflags |= SDP_HAVEOOB;
1538 m->m_len--;
1539 mb->m_pkthdr.len--;
1540 }
1541 }
1542
1543 /*
1544 * Notify a sdp socket of an asynchronous error.
1545 *
1546 * Do not wake up user since there currently is no mechanism for
1547 * reporting soft errors (yet - a kqueue filter may be added).
1548 */
1549 struct sdp_sock *
sdp_notify(struct sdp_sock * ssk,int error)1550 sdp_notify(struct sdp_sock *ssk, int error)
1551 {
1552
1553 SDP_WLOCK_ASSERT(ssk);
1554
1555 if ((ssk->flags & SDP_TIMEWAIT) ||
1556 (ssk->flags & SDP_DROPPED))
1557 return (ssk);
1558
1559 /*
1560 * Ignore some errors if we are hooked up.
1561 */
1562 if (ssk->state == TCPS_ESTABLISHED &&
1563 (error == EHOSTUNREACH || error == ENETUNREACH ||
1564 error == EHOSTDOWN))
1565 return (ssk);
1566 ssk->softerror = error;
1567 return sdp_drop(ssk, error);
1568 }
1569
1570 static void
sdp_ctlinput(int cmd,struct sockaddr * sa,void * vip)1571 sdp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
1572 {
1573 struct in_addr faddr;
1574
1575 faddr = ((struct sockaddr_in *)sa)->sin_addr;
1576 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
1577 return;
1578
1579 sdp_pcbnotifyall(faddr, inetctlerrmap[cmd], sdp_notify);
1580 }
1581
1582 static int
sdp_control(struct socket * so,u_long cmd,caddr_t data,struct ifnet * ifp,struct thread * td)1583 sdp_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
1584 struct thread *td)
1585 {
1586 return (EOPNOTSUPP);
1587 }
1588
1589 static void
sdp_keepalive_timeout(void * data)1590 sdp_keepalive_timeout(void *data)
1591 {
1592 struct sdp_sock *ssk;
1593
1594 ssk = data;
1595 /* Callout canceled. */
1596 if (!callout_active(&ssk->keep2msl))
1597 return;
1598 /* Callout rescheduled as a different kind of timer. */
1599 if (callout_pending(&ssk->keep2msl))
1600 goto out;
1601 callout_deactivate(&ssk->keep2msl);
1602 if (ssk->flags & SDP_DROPPED ||
1603 (ssk->socket->so_options & SO_KEEPALIVE) == 0)
1604 goto out;
1605 sdp_post_keepalive(ssk);
1606 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1607 sdp_keepalive_timeout, ssk);
1608 out:
1609 SDP_WUNLOCK(ssk);
1610 }
1611
1612
1613 void
sdp_start_keepalive_timer(struct socket * so)1614 sdp_start_keepalive_timer(struct socket *so)
1615 {
1616 struct sdp_sock *ssk;
1617
1618 ssk = sdp_sk(so);
1619 if (!callout_pending(&ssk->keep2msl))
1620 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1621 sdp_keepalive_timeout, ssk);
1622 }
1623
1624 static void
sdp_stop_keepalive_timer(struct socket * so)1625 sdp_stop_keepalive_timer(struct socket *so)
1626 {
1627 struct sdp_sock *ssk;
1628
1629 ssk = sdp_sk(so);
1630 callout_stop(&ssk->keep2msl);
1631 }
1632
1633 /*
1634 * sdp_ctloutput() must drop the inpcb lock before performing copyin on
1635 * socket option arguments. When it re-acquires the lock after the copy, it
1636 * has to revalidate that the connection is still valid for the socket
1637 * option.
1638 */
1639 #define SDP_WLOCK_RECHECK(inp) do { \
1640 SDP_WLOCK(ssk); \
1641 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) { \
1642 SDP_WUNLOCK(ssk); \
1643 return (ECONNRESET); \
1644 } \
1645 } while(0)
1646
1647 static int
sdp_ctloutput(struct socket * so,struct sockopt * sopt)1648 sdp_ctloutput(struct socket *so, struct sockopt *sopt)
1649 {
1650 int error, opt, optval;
1651 struct sdp_sock *ssk;
1652
1653 error = 0;
1654 ssk = sdp_sk(so);
1655 if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_KEEPALIVE) {
1656 SDP_WLOCK(ssk);
1657 if (so->so_options & SO_KEEPALIVE)
1658 sdp_start_keepalive_timer(so);
1659 else
1660 sdp_stop_keepalive_timer(so);
1661 SDP_WUNLOCK(ssk);
1662 }
1663 if (sopt->sopt_level != IPPROTO_TCP)
1664 return (error);
1665
1666 SDP_WLOCK(ssk);
1667 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1668 SDP_WUNLOCK(ssk);
1669 return (ECONNRESET);
1670 }
1671
1672 switch (sopt->sopt_dir) {
1673 case SOPT_SET:
1674 switch (sopt->sopt_name) {
1675 case TCP_NODELAY:
1676 SDP_WUNLOCK(ssk);
1677 error = sooptcopyin(sopt, &optval, sizeof optval,
1678 sizeof optval);
1679 if (error)
1680 return (error);
1681
1682 SDP_WLOCK_RECHECK(ssk);
1683 opt = SDP_NODELAY;
1684 if (optval)
1685 ssk->flags |= opt;
1686 else
1687 ssk->flags &= ~opt;
1688 sdp_do_posts(ssk);
1689 SDP_WUNLOCK(ssk);
1690 break;
1691
1692 default:
1693 SDP_WUNLOCK(ssk);
1694 error = ENOPROTOOPT;
1695 break;
1696 }
1697 break;
1698
1699 case SOPT_GET:
1700 switch (sopt->sopt_name) {
1701 case TCP_NODELAY:
1702 optval = ssk->flags & SDP_NODELAY;
1703 SDP_WUNLOCK(ssk);
1704 error = sooptcopyout(sopt, &optval, sizeof optval);
1705 break;
1706 default:
1707 SDP_WUNLOCK(ssk);
1708 error = ENOPROTOOPT;
1709 break;
1710 }
1711 break;
1712 }
1713 return (error);
1714 }
1715 #undef SDP_WLOCK_RECHECK
1716
1717 int sdp_mod_count = 0;
1718 int sdp_mod_usec = 0;
1719
1720 void
sdp_set_default_moderation(struct sdp_sock * ssk)1721 sdp_set_default_moderation(struct sdp_sock *ssk)
1722 {
1723 if (sdp_mod_count <= 0 || sdp_mod_usec <= 0)
1724 return;
1725 ib_modify_cq(ssk->rx_ring.cq, sdp_mod_count, sdp_mod_usec);
1726 }
1727
1728 static void
sdp_dev_add(struct ib_device * device)1729 sdp_dev_add(struct ib_device *device)
1730 {
1731 struct ib_fmr_pool_param param;
1732 struct sdp_device *sdp_dev;
1733
1734 sdp_dev = malloc(sizeof(*sdp_dev), M_SDP, M_WAITOK | M_ZERO);
1735 sdp_dev->pd = ib_alloc_pd(device, 0);
1736 if (IS_ERR(sdp_dev->pd))
1737 goto out_pd;
1738 memset(¶m, 0, sizeof param);
1739 param.max_pages_per_fmr = SDP_FMR_SIZE;
1740 param.page_shift = PAGE_SHIFT;
1741 param.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ);
1742 param.pool_size = SDP_FMR_POOL_SIZE;
1743 param.dirty_watermark = SDP_FMR_DIRTY_SIZE;
1744 param.cache = 1;
1745 sdp_dev->fmr_pool = ib_create_fmr_pool(sdp_dev->pd, ¶m);
1746 if (IS_ERR(sdp_dev->fmr_pool))
1747 goto out_fmr;
1748 ib_set_client_data(device, &sdp_client, sdp_dev);
1749 return;
1750
1751 out_fmr:
1752 ib_dealloc_pd(sdp_dev->pd);
1753 out_pd:
1754 free(sdp_dev, M_SDP);
1755 }
1756
1757 static void
sdp_dev_rem(struct ib_device * device,void * client_data)1758 sdp_dev_rem(struct ib_device *device, void *client_data)
1759 {
1760 struct sdp_device *sdp_dev;
1761 struct sdp_sock *ssk;
1762
1763 SDP_LIST_WLOCK();
1764 LIST_FOREACH(ssk, &sdp_list, list) {
1765 if (ssk->ib_device != device)
1766 continue;
1767 SDP_WLOCK(ssk);
1768 if ((ssk->flags & SDP_DESTROY) == 0)
1769 ssk = sdp_notify(ssk, ECONNRESET);
1770 if (ssk)
1771 SDP_WUNLOCK(ssk);
1772 }
1773 SDP_LIST_WUNLOCK();
1774 /*
1775 * XXX Do I need to wait between these two?
1776 */
1777 sdp_dev = ib_get_client_data(device, &sdp_client);
1778 if (!sdp_dev)
1779 return;
1780 ib_flush_fmr_pool(sdp_dev->fmr_pool);
1781 ib_destroy_fmr_pool(sdp_dev->fmr_pool);
1782 ib_dealloc_pd(sdp_dev->pd);
1783 free(sdp_dev, M_SDP);
1784 }
1785
1786 struct ib_client sdp_client =
1787 { .name = "sdp", .add = sdp_dev_add, .remove = sdp_dev_rem };
1788
1789
1790 static int
sdp_pcblist(SYSCTL_HANDLER_ARGS)1791 sdp_pcblist(SYSCTL_HANDLER_ARGS)
1792 {
1793 int error, n, i;
1794 struct sdp_sock *ssk;
1795 struct xinpgen xig;
1796
1797 /*
1798 * The process of preparing the TCB list is too time-consuming and
1799 * resource-intensive to repeat twice on every request.
1800 */
1801 if (req->oldptr == NULL) {
1802 n = sdp_count;
1803 n += imax(n / 8, 10);
1804 req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
1805 return (0);
1806 }
1807
1808 if (req->newptr != NULL)
1809 return (EPERM);
1810
1811 /*
1812 * OK, now we're committed to doing something.
1813 */
1814 SDP_LIST_RLOCK();
1815 n = sdp_count;
1816 SDP_LIST_RUNLOCK();
1817
1818 error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
1819 + n * sizeof(struct xtcpcb));
1820 if (error != 0)
1821 return (error);
1822
1823 bzero(&xig, sizeof(xig));
1824 xig.xig_len = sizeof xig;
1825 xig.xig_count = n;
1826 xig.xig_gen = 0;
1827 xig.xig_sogen = so_gencnt;
1828 error = SYSCTL_OUT(req, &xig, sizeof xig);
1829 if (error)
1830 return (error);
1831
1832 SDP_LIST_RLOCK();
1833 for (ssk = LIST_FIRST(&sdp_list), i = 0;
1834 ssk != NULL && i < n; ssk = LIST_NEXT(ssk, list)) {
1835 struct xtcpcb xt;
1836
1837 SDP_RLOCK(ssk);
1838 if (ssk->flags & SDP_TIMEWAIT) {
1839 if (ssk->cred != NULL)
1840 error = cr_cansee(req->td->td_ucred,
1841 ssk->cred);
1842 else
1843 error = EINVAL; /* Skip this inp. */
1844 } else if (ssk->socket)
1845 error = cr_canseesocket(req->td->td_ucred,
1846 ssk->socket);
1847 else
1848 error = EINVAL;
1849 if (error) {
1850 error = 0;
1851 goto next;
1852 }
1853
1854 bzero(&xt, sizeof(xt));
1855 xt.xt_len = sizeof xt;
1856 xt.xt_inp.inp_gencnt = 0;
1857 xt.xt_inp.inp_vflag = INP_IPV4;
1858 memcpy(&xt.xt_inp.inp_laddr, &ssk->laddr, sizeof(ssk->laddr));
1859 xt.xt_inp.inp_lport = ssk->lport;
1860 memcpy(&xt.xt_inp.inp_faddr, &ssk->faddr, sizeof(ssk->faddr));
1861 xt.xt_inp.inp_fport = ssk->fport;
1862 xt.t_state = ssk->state;
1863 if (ssk->socket != NULL)
1864 sotoxsocket(ssk->socket, &xt.xt_inp.xi_socket);
1865 xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
1866 SDP_RUNLOCK(ssk);
1867 error = SYSCTL_OUT(req, &xt, sizeof xt);
1868 if (error)
1869 break;
1870 i++;
1871 continue;
1872 next:
1873 SDP_RUNLOCK(ssk);
1874 }
1875 if (!error) {
1876 /*
1877 * Give the user an updated idea of our state.
1878 * If the generation differs from what we told
1879 * her before, she knows that something happened
1880 * while we were processing this request, and it
1881 * might be necessary to retry.
1882 */
1883 xig.xig_gen = 0;
1884 xig.xig_sogen = so_gencnt;
1885 xig.xig_count = sdp_count;
1886 error = SYSCTL_OUT(req, &xig, sizeof xig);
1887 }
1888 SDP_LIST_RUNLOCK();
1889 return (error);
1890 }
1891
1892 SYSCTL_NODE(_net_inet, -1, sdp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1893 "SDP");
1894
1895 SYSCTL_PROC(_net_inet_sdp, TCPCTL_PCBLIST, pcblist,
1896 CTLFLAG_RD | CTLTYPE_STRUCT | CTLFLAG_MPSAFE,
1897 0, 0, sdp_pcblist, "S,xtcpcb",
1898 "List of active SDP connections");
1899
1900 static void
sdp_zone_change(void * tag)1901 sdp_zone_change(void *tag)
1902 {
1903
1904 uma_zone_set_max(sdp_zone, maxsockets);
1905 }
1906
1907 static void
sdp_init(void)1908 sdp_init(void)
1909 {
1910
1911 LIST_INIT(&sdp_list);
1912 sdp_zone = uma_zcreate("sdp_sock", sizeof(struct sdp_sock),
1913 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1914 uma_zone_set_max(sdp_zone, maxsockets);
1915 EVENTHANDLER_REGISTER(maxsockets_change, sdp_zone_change, NULL,
1916 EVENTHANDLER_PRI_ANY);
1917 rx_comp_wq = create_singlethread_workqueue("rx_comp_wq");
1918 ib_register_client(&sdp_client);
1919 }
1920
1921 extern struct domain sdpdomain;
1922
1923 struct pr_usrreqs sdp_usrreqs = {
1924 .pru_abort = sdp_abort,
1925 .pru_accept = sdp_accept,
1926 .pru_attach = sdp_attach,
1927 .pru_bind = sdp_bind,
1928 .pru_connect = sdp_connect,
1929 .pru_control = sdp_control,
1930 .pru_detach = sdp_detach,
1931 .pru_disconnect = sdp_disconnect,
1932 .pru_listen = sdp_listen,
1933 .pru_peeraddr = sdp_getpeeraddr,
1934 .pru_rcvoob = sdp_rcvoob,
1935 .pru_send = sdp_send,
1936 .pru_sosend = sdp_sosend,
1937 .pru_soreceive = sdp_sorecv,
1938 .pru_shutdown = sdp_shutdown,
1939 .pru_sockaddr = sdp_getsockaddr,
1940 .pru_close = sdp_close,
1941 };
1942
1943 struct protosw sdpsw[] = {
1944 {
1945 .pr_type = SOCK_STREAM,
1946 .pr_domain = &sdpdomain,
1947 .pr_protocol = IPPROTO_IP,
1948 .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,
1949 .pr_ctlinput = sdp_ctlinput,
1950 .pr_ctloutput = sdp_ctloutput,
1951 .pr_usrreqs = &sdp_usrreqs
1952 },
1953 {
1954 .pr_type = SOCK_STREAM,
1955 .pr_domain = &sdpdomain,
1956 .pr_protocol = IPPROTO_TCP,
1957 .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,
1958 .pr_ctlinput = sdp_ctlinput,
1959 .pr_ctloutput = sdp_ctloutput,
1960 .pr_usrreqs = &sdp_usrreqs
1961 },
1962 };
1963
1964 struct domain sdpdomain = {
1965 .dom_family = AF_INET_SDP,
1966 .dom_name = "SDP",
1967 .dom_init = sdp_init,
1968 .dom_protosw = sdpsw,
1969 .dom_protoswNPROTOSW = &sdpsw[sizeof(sdpsw)/sizeof(sdpsw[0])],
1970 };
1971
1972 DOMAIN_SET(sdp);
1973
1974 int sdp_debug_level = 1;
1975 int sdp_data_debug_level = 0;
1976