1
2 /*-
3 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
4 * The Regents of the University of California. All rights reserved.
5 * Copyright (c) 2004 The FreeBSD Foundation. All rights reserved.
6 * Copyright (c) 2004-2008 Robert N. M. Watson. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * Excerpts taken from tcp_subr.c, tcp_usrreq.c, uipc_socket.c
33 */
34
35 /*
36 *
37 * Copyright (c) 2010 Isilon Systems, Inc.
38 * Copyright (c) 2010 iX Systems, Inc.
39 * Copyright (c) 2010 Panasas, Inc.
40 * All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice unmodified, this list of conditions, and the following
47 * disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
53 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
54 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
55 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
56 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
57 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
58 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
59 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
60 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
61 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
62 *
63 */
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
66
67 #include "sdp.h"
68
69 #include <net/if.h>
70 #include <net/route.h>
71 #include <net/vnet.h>
72
73 uma_zone_t sdp_zone;
74 struct rwlock sdp_lock;
75 LIST_HEAD(, sdp_sock) sdp_list;
76
77 struct workqueue_struct *rx_comp_wq;
78
79 RW_SYSINIT(sdplockinit, &sdp_lock, "SDP lock");
80 #define SDP_LIST_WLOCK() rw_wlock(&sdp_lock)
81 #define SDP_LIST_RLOCK() rw_rlock(&sdp_lock)
82 #define SDP_LIST_WUNLOCK() rw_wunlock(&sdp_lock)
83 #define SDP_LIST_RUNLOCK() rw_runlock(&sdp_lock)
84 #define SDP_LIST_WLOCK_ASSERT() rw_assert(&sdp_lock, RW_WLOCKED)
85 #define SDP_LIST_RLOCK_ASSERT() rw_assert(&sdp_lock, RW_RLOCKED)
86 #define SDP_LIST_LOCK_ASSERT() rw_assert(&sdp_lock, RW_LOCKED)
87
88 static MALLOC_DEFINE(M_SDP, "sdp", "Socket Direct Protocol");
89
90 static void sdp_stop_keepalive_timer(struct socket *so);
91
92 /*
93 * SDP protocol interface to socket abstraction.
94 */
95 /*
96 * sdp_sendspace and sdp_recvspace are the default send and receive window
97 * sizes, respectively.
98 */
99 u_long sdp_sendspace = 1024*32;
100 u_long sdp_recvspace = 1024*64;
101
102 static int sdp_count;
103
104 /*
105 * Disable async. CMA events for sockets which are being torn down.
106 */
107 static void
sdp_destroy_cma(struct sdp_sock * ssk)108 sdp_destroy_cma(struct sdp_sock *ssk)
109 {
110
111 if (ssk->id == NULL)
112 return;
113 rdma_destroy_id(ssk->id);
114 ssk->id = NULL;
115 }
116
117 static int
sdp_pcbbind(struct sdp_sock * ssk,struct sockaddr * nam,struct ucred * cred)118 sdp_pcbbind(struct sdp_sock *ssk, struct sockaddr *nam, struct ucred *cred)
119 {
120 struct sockaddr_in *sin;
121 struct sockaddr_in null;
122 int error;
123
124 SDP_WLOCK_ASSERT(ssk);
125
126 if (ssk->lport != 0 || ssk->laddr != INADDR_ANY)
127 return (EINVAL);
128 /* rdma_bind_addr handles bind races. */
129 SDP_WUNLOCK(ssk);
130 if (ssk->id == NULL)
131 ssk->id = rdma_create_id(sdp_cma_handler, ssk, RDMA_PS_SDP);
132 if (ssk->id == NULL) {
133 SDP_WLOCK(ssk);
134 return (ENOMEM);
135 }
136 if (nam == NULL) {
137 null.sin_family = AF_INET;
138 null.sin_len = sizeof(null);
139 null.sin_addr.s_addr = INADDR_ANY;
140 null.sin_port = 0;
141 bzero(&null.sin_zero, sizeof(null.sin_zero));
142 nam = (struct sockaddr *)&null;
143 }
144 error = -rdma_bind_addr(ssk->id, nam);
145 SDP_WLOCK(ssk);
146 if (error == 0) {
147 sin = (struct sockaddr_in *)&ssk->id->route.addr.src_addr;
148 ssk->laddr = sin->sin_addr.s_addr;
149 ssk->lport = sin->sin_port;
150 } else
151 sdp_destroy_cma(ssk);
152 return (error);
153 }
154
155 static void
sdp_pcbfree(struct sdp_sock * ssk)156 sdp_pcbfree(struct sdp_sock *ssk)
157 {
158 KASSERT(ssk->socket == NULL, ("ssk %p socket still attached", ssk));
159
160 sdp_dbg(ssk->socket, "Freeing pcb");
161 SDP_WLOCK_ASSERT(ssk);
162 ssk->flags |= SDP_DESTROY;
163 SDP_WUNLOCK(ssk);
164 SDP_LIST_WLOCK();
165 sdp_count--;
166 LIST_REMOVE(ssk, list);
167 SDP_LIST_WUNLOCK();
168 crfree(ssk->cred);
169 sdp_destroy_cma(ssk);
170 ssk->qp_active = 0;
171 if (ssk->qp) {
172 ib_destroy_qp(ssk->qp);
173 ssk->qp = NULL;
174 }
175 sdp_tx_ring_destroy(ssk);
176 sdp_rx_ring_destroy(ssk);
177 rw_destroy(&ssk->rx_ring.destroyed_lock);
178 uma_zfree(sdp_zone, ssk);
179 rw_destroy(&ssk->lock);
180 }
181
182 /*
183 * Common routines to return a socket address.
184 */
185 static struct sockaddr *
sdp_sockaddr(in_port_t port,struct in_addr * addr_p)186 sdp_sockaddr(in_port_t port, struct in_addr *addr_p)
187 {
188 struct sockaddr_in *sin;
189
190 sin = malloc(sizeof *sin, M_SONAME,
191 M_WAITOK | M_ZERO);
192 sin->sin_family = AF_INET;
193 sin->sin_len = sizeof(*sin);
194 sin->sin_addr = *addr_p;
195 sin->sin_port = port;
196
197 return (struct sockaddr *)sin;
198 }
199
200 static int
sdp_getsockaddr(struct socket * so,struct sockaddr ** nam)201 sdp_getsockaddr(struct socket *so, struct sockaddr **nam)
202 {
203 struct sdp_sock *ssk;
204 struct in_addr addr;
205 in_port_t port;
206
207 ssk = sdp_sk(so);
208 SDP_RLOCK(ssk);
209 port = ssk->lport;
210 addr.s_addr = ssk->laddr;
211 SDP_RUNLOCK(ssk);
212
213 *nam = sdp_sockaddr(port, &addr);
214 return 0;
215 }
216
217 static int
sdp_getpeeraddr(struct socket * so,struct sockaddr ** nam)218 sdp_getpeeraddr(struct socket *so, struct sockaddr **nam)
219 {
220 struct sdp_sock *ssk;
221 struct in_addr addr;
222 in_port_t port;
223
224 ssk = sdp_sk(so);
225 SDP_RLOCK(ssk);
226 port = ssk->fport;
227 addr.s_addr = ssk->faddr;
228 SDP_RUNLOCK(ssk);
229
230 *nam = sdp_sockaddr(port, &addr);
231 return 0;
232 }
233
234 static void
sdp_pcbnotifyall(struct in_addr faddr,int errno,struct sdp_sock * (* notify)(struct sdp_sock *,int))235 sdp_pcbnotifyall(struct in_addr faddr, int errno,
236 struct sdp_sock *(*notify)(struct sdp_sock *, int))
237 {
238 struct sdp_sock *ssk, *ssk_temp;
239
240 SDP_LIST_WLOCK();
241 LIST_FOREACH_SAFE(ssk, &sdp_list, list, ssk_temp) {
242 SDP_WLOCK(ssk);
243 if (ssk->faddr != faddr.s_addr || ssk->socket == NULL) {
244 SDP_WUNLOCK(ssk);
245 continue;
246 }
247 if ((ssk->flags & SDP_DESTROY) == 0)
248 if ((*notify)(ssk, errno))
249 SDP_WUNLOCK(ssk);
250 }
251 SDP_LIST_WUNLOCK();
252 }
253
254 #if 0
255 static void
256 sdp_apply_all(void (*func)(struct sdp_sock *, void *), void *arg)
257 {
258 struct sdp_sock *ssk;
259
260 SDP_LIST_RLOCK();
261 LIST_FOREACH(ssk, &sdp_list, list) {
262 SDP_WLOCK(ssk);
263 func(ssk, arg);
264 SDP_WUNLOCK(ssk);
265 }
266 SDP_LIST_RUNLOCK();
267 }
268 #endif
269
270 static void
sdp_output_reset(struct sdp_sock * ssk)271 sdp_output_reset(struct sdp_sock *ssk)
272 {
273 struct rdma_cm_id *id;
274
275 SDP_WLOCK_ASSERT(ssk);
276 if (ssk->id) {
277 id = ssk->id;
278 ssk->qp_active = 0;
279 SDP_WUNLOCK(ssk);
280 rdma_disconnect(id);
281 SDP_WLOCK(ssk);
282 }
283 ssk->state = TCPS_CLOSED;
284 }
285
286 /*
287 * Attempt to close a SDP socket, marking it as dropped, and freeing
288 * the socket if we hold the only reference.
289 */
290 static struct sdp_sock *
sdp_closed(struct sdp_sock * ssk)291 sdp_closed(struct sdp_sock *ssk)
292 {
293 struct socket *so;
294
295 SDP_WLOCK_ASSERT(ssk);
296
297 ssk->flags |= SDP_DROPPED;
298 so = ssk->socket;
299 soisdisconnected(so);
300 if (ssk->flags & SDP_SOCKREF) {
301 KASSERT(so->so_state & SS_PROTOREF,
302 ("sdp_closed: !SS_PROTOREF"));
303 ssk->flags &= ~SDP_SOCKREF;
304 SDP_WUNLOCK(ssk);
305 ACCEPT_LOCK();
306 SOCK_LOCK(so);
307 so->so_state &= ~SS_PROTOREF;
308 sofree(so);
309 return (NULL);
310 }
311 return (ssk);
312 }
313
314 /*
315 * Perform timer based shutdowns which can not operate in
316 * callout context.
317 */
318 static void
sdp_shutdown_task(void * data,int pending)319 sdp_shutdown_task(void *data, int pending)
320 {
321 struct sdp_sock *ssk;
322
323 ssk = data;
324 SDP_WLOCK(ssk);
325 /*
326 * I don't think this can race with another call to pcbfree()
327 * because SDP_TIMEWAIT protects it. SDP_DESTROY may be redundant.
328 */
329 if (ssk->flags & SDP_DESTROY)
330 panic("sdp_shutdown_task: Racing with pcbfree for ssk %p",
331 ssk);
332 if (ssk->flags & SDP_DISCON)
333 sdp_output_reset(ssk);
334 /* We have to clear this so sdp_detach() will call pcbfree(). */
335 ssk->flags &= ~(SDP_TIMEWAIT | SDP_DREQWAIT);
336 if ((ssk->flags & SDP_DROPPED) == 0 &&
337 sdp_closed(ssk) == NULL)
338 return;
339 if (ssk->socket == NULL) {
340 sdp_pcbfree(ssk);
341 return;
342 }
343 SDP_WUNLOCK(ssk);
344 }
345
346 /*
347 * 2msl has expired, schedule the shutdown task.
348 */
349 static void
sdp_2msl_timeout(void * data)350 sdp_2msl_timeout(void *data)
351 {
352 struct sdp_sock *ssk;
353
354 ssk = data;
355 /* Callout canceled. */
356 if (!callout_active(&ssk->keep2msl))
357 goto out;
358 callout_deactivate(&ssk->keep2msl);
359 /* Should be impossible, defensive programming. */
360 if ((ssk->flags & SDP_TIMEWAIT) == 0)
361 goto out;
362 taskqueue_enqueue(taskqueue_thread, &ssk->shutdown_task);
363 out:
364 SDP_WUNLOCK(ssk);
365 return;
366 }
367
368 /*
369 * Schedule the 2msl wait timer.
370 */
371 static void
sdp_2msl_wait(struct sdp_sock * ssk)372 sdp_2msl_wait(struct sdp_sock *ssk)
373 {
374
375 SDP_WLOCK_ASSERT(ssk);
376 ssk->flags |= SDP_TIMEWAIT;
377 ssk->state = TCPS_TIME_WAIT;
378 soisdisconnected(ssk->socket);
379 callout_reset(&ssk->keep2msl, TCPTV_MSL, sdp_2msl_timeout, ssk);
380 }
381
382 /*
383 * Timed out waiting for the final fin/ack from rdma_disconnect().
384 */
385 static void
sdp_dreq_timeout(void * data)386 sdp_dreq_timeout(void *data)
387 {
388 struct sdp_sock *ssk;
389
390 ssk = data;
391 /* Callout canceled. */
392 if (!callout_active(&ssk->keep2msl))
393 goto out;
394 /* Callout rescheduled, probably as a different timer. */
395 if (callout_pending(&ssk->keep2msl))
396 goto out;
397 callout_deactivate(&ssk->keep2msl);
398 if (ssk->state != TCPS_FIN_WAIT_1 && ssk->state != TCPS_LAST_ACK)
399 goto out;
400 if ((ssk->flags & SDP_DREQWAIT) == 0)
401 goto out;
402 ssk->flags &= ~SDP_DREQWAIT;
403 ssk->flags |= SDP_DISCON;
404 sdp_2msl_wait(ssk);
405 ssk->qp_active = 0;
406 out:
407 SDP_WUNLOCK(ssk);
408 }
409
410 /*
411 * Received the final fin/ack. Cancel the 2msl.
412 */
413 void
sdp_cancel_dreq_wait_timeout(struct sdp_sock * ssk)414 sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk)
415 {
416 sdp_dbg(ssk->socket, "cancelling dreq wait timeout\n");
417 ssk->flags &= ~SDP_DREQWAIT;
418 sdp_2msl_wait(ssk);
419 }
420
421 static int
sdp_init_sock(struct socket * sk)422 sdp_init_sock(struct socket *sk)
423 {
424 struct sdp_sock *ssk = sdp_sk(sk);
425
426 sdp_dbg(sk, "%s\n", __func__);
427
428 callout_init_rw(&ssk->keep2msl, &ssk->lock, CALLOUT_RETURNUNLOCKED);
429 TASK_INIT(&ssk->shutdown_task, 0, sdp_shutdown_task, ssk);
430 #ifdef SDP_ZCOPY
431 INIT_DELAYED_WORK(&ssk->srcavail_cancel_work, srcavail_cancel_timeout);
432 ssk->zcopy_thresh = -1; /* use global sdp_zcopy_thresh */
433 ssk->tx_ring.rdma_inflight = NULL;
434 #endif
435 atomic_set(&ssk->mseq_ack, 0);
436 sdp_rx_ring_init(ssk);
437 ssk->tx_ring.buffer = NULL;
438
439 return 0;
440 }
441
442 /*
443 * Allocate an sdp_sock for the socket and reserve socket buffer space.
444 */
445 static int
sdp_attach(struct socket * so,int proto,struct thread * td)446 sdp_attach(struct socket *so, int proto, struct thread *td)
447 {
448 struct sdp_sock *ssk;
449 int error;
450
451 ssk = sdp_sk(so);
452 KASSERT(ssk == NULL, ("sdp_attach: ssk already set on so %p", so));
453 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
454 error = soreserve(so, sdp_sendspace, sdp_recvspace);
455 if (error)
456 return (error);
457 }
458 so->so_rcv.sb_flags |= SB_AUTOSIZE;
459 so->so_snd.sb_flags |= SB_AUTOSIZE;
460 ssk = uma_zalloc(sdp_zone, M_NOWAIT | M_ZERO);
461 if (ssk == NULL)
462 return (ENOBUFS);
463 rw_init(&ssk->lock, "sdpsock");
464 ssk->socket = so;
465 ssk->cred = crhold(so->so_cred);
466 so->so_pcb = (caddr_t)ssk;
467 sdp_init_sock(so);
468 ssk->flags = 0;
469 ssk->qp_active = 0;
470 ssk->state = TCPS_CLOSED;
471 SDP_LIST_WLOCK();
472 LIST_INSERT_HEAD(&sdp_list, ssk, list);
473 sdp_count++;
474 SDP_LIST_WUNLOCK();
475 if ((so->so_options & SO_LINGER) && so->so_linger == 0)
476 so->so_linger = TCP_LINGERTIME;
477
478 return (0);
479 }
480
481 /*
482 * Detach SDP from the socket, potentially leaving it around for the
483 * timewait to expire.
484 */
485 static void
sdp_detach(struct socket * so)486 sdp_detach(struct socket *so)
487 {
488 struct sdp_sock *ssk;
489
490 ssk = sdp_sk(so);
491 SDP_WLOCK(ssk);
492 KASSERT(ssk->socket != NULL, ("sdp_detach: socket is NULL"));
493 ssk->socket->so_pcb = NULL;
494 ssk->socket = NULL;
495 if (ssk->flags & (SDP_TIMEWAIT | SDP_DREQWAIT))
496 SDP_WUNLOCK(ssk);
497 else if (ssk->flags & SDP_DROPPED || ssk->state < TCPS_SYN_SENT)
498 sdp_pcbfree(ssk);
499 else
500 panic("sdp_detach: Unexpected state, ssk %p.\n", ssk);
501 }
502
503 /*
504 * Allocate a local address for the socket.
505 */
506 static int
sdp_bind(struct socket * so,struct sockaddr * nam,struct thread * td)507 sdp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
508 {
509 int error = 0;
510 struct sdp_sock *ssk;
511 struct sockaddr_in *sin;
512
513 sin = (struct sockaddr_in *)nam;
514 if (nam->sa_len != sizeof (*sin))
515 return (EINVAL);
516 if (sin->sin_family != AF_INET)
517 return (EINVAL);
518 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
519 return (EAFNOSUPPORT);
520
521 ssk = sdp_sk(so);
522 SDP_WLOCK(ssk);
523 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
524 error = EINVAL;
525 goto out;
526 }
527 error = sdp_pcbbind(ssk, nam, td->td_ucred);
528 out:
529 SDP_WUNLOCK(ssk);
530
531 return (error);
532 }
533
534 /*
535 * Prepare to accept connections.
536 */
537 static int
sdp_listen(struct socket * so,int backlog,struct thread * td)538 sdp_listen(struct socket *so, int backlog, struct thread *td)
539 {
540 int error = 0;
541 struct sdp_sock *ssk;
542
543 ssk = sdp_sk(so);
544 SDP_WLOCK(ssk);
545 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
546 error = EINVAL;
547 goto out;
548 }
549 if (error == 0 && ssk->lport == 0)
550 error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
551 SOCK_LOCK(so);
552 if (error == 0)
553 error = solisten_proto_check(so);
554 if (error == 0) {
555 solisten_proto(so, backlog);
556 ssk->state = TCPS_LISTEN;
557 }
558 SOCK_UNLOCK(so);
559
560 out:
561 SDP_WUNLOCK(ssk);
562 if (error == 0)
563 error = -rdma_listen(ssk->id, backlog);
564 return (error);
565 }
566
567 /*
568 * Initiate a SDP connection to nam.
569 */
570 static int
sdp_start_connect(struct sdp_sock * ssk,struct sockaddr * nam,struct thread * td)571 sdp_start_connect(struct sdp_sock *ssk, struct sockaddr *nam, struct thread *td)
572 {
573 struct sockaddr_in src;
574 struct socket *so;
575 int error;
576
577 so = ssk->socket;
578
579 SDP_WLOCK_ASSERT(ssk);
580 if (ssk->lport == 0) {
581 error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
582 if (error)
583 return error;
584 }
585 src.sin_family = AF_INET;
586 src.sin_len = sizeof(src);
587 bzero(&src.sin_zero, sizeof(src.sin_zero));
588 src.sin_port = ssk->lport;
589 src.sin_addr.s_addr = ssk->laddr;
590 soisconnecting(so);
591 SDP_WUNLOCK(ssk);
592 error = -rdma_resolve_addr(ssk->id, (struct sockaddr *)&src, nam,
593 SDP_RESOLVE_TIMEOUT);
594 SDP_WLOCK(ssk);
595 if (error == 0)
596 ssk->state = TCPS_SYN_SENT;
597
598 return 0;
599 }
600
601 /*
602 * Initiate SDP connection.
603 */
604 static int
sdp_connect(struct socket * so,struct sockaddr * nam,struct thread * td)605 sdp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
606 {
607 int error = 0;
608 struct sdp_sock *ssk;
609 struct sockaddr_in *sin;
610
611 sin = (struct sockaddr_in *)nam;
612 if (nam->sa_len != sizeof (*sin))
613 return (EINVAL);
614 if (sin->sin_family != AF_INET)
615 return (EINVAL);
616 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
617 return (EAFNOSUPPORT);
618 if ((error = prison_remote_ip4(td->td_ucred, &sin->sin_addr)) != 0)
619 return (error);
620 ssk = sdp_sk(so);
621 SDP_WLOCK(ssk);
622 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED))
623 error = EINVAL;
624 else
625 error = sdp_start_connect(ssk, nam, td);
626 SDP_WUNLOCK(ssk);
627 return (error);
628 }
629
630 /*
631 * Drop a SDP socket, reporting
632 * the specified error. If connection is synchronized,
633 * then send a RST to peer.
634 */
635 static struct sdp_sock *
sdp_drop(struct sdp_sock * ssk,int errno)636 sdp_drop(struct sdp_sock *ssk, int errno)
637 {
638 struct socket *so;
639
640 SDP_WLOCK_ASSERT(ssk);
641 so = ssk->socket;
642 if (TCPS_HAVERCVDSYN(ssk->state))
643 sdp_output_reset(ssk);
644 if (errno == ETIMEDOUT && ssk->softerror)
645 errno = ssk->softerror;
646 so->so_error = errno;
647 return (sdp_closed(ssk));
648 }
649
650 /*
651 * User issued close, and wish to trail through shutdown states:
652 * if never received SYN, just forget it. If got a SYN from peer,
653 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
654 * If already got a FIN from peer, then almost done; go to LAST_ACK
655 * state. In all other cases, have already sent FIN to peer (e.g.
656 * after PRU_SHUTDOWN), and just have to play tedious game waiting
657 * for peer to send FIN or not respond to keep-alives, etc.
658 * We can let the user exit from the close as soon as the FIN is acked.
659 */
660 static void
sdp_usrclosed(struct sdp_sock * ssk)661 sdp_usrclosed(struct sdp_sock *ssk)
662 {
663
664 SDP_WLOCK_ASSERT(ssk);
665
666 switch (ssk->state) {
667 case TCPS_LISTEN:
668 ssk->state = TCPS_CLOSED;
669 SDP_WUNLOCK(ssk);
670 sdp_destroy_cma(ssk);
671 SDP_WLOCK(ssk);
672 /* FALLTHROUGH */
673 case TCPS_CLOSED:
674 ssk = sdp_closed(ssk);
675 /*
676 * sdp_closed() should never return NULL here as the socket is
677 * still open.
678 */
679 KASSERT(ssk != NULL,
680 ("sdp_usrclosed: sdp_closed() returned NULL"));
681 break;
682
683 case TCPS_SYN_SENT:
684 /* FALLTHROUGH */
685 case TCPS_SYN_RECEIVED:
686 ssk->flags |= SDP_NEEDFIN;
687 break;
688
689 case TCPS_ESTABLISHED:
690 ssk->flags |= SDP_NEEDFIN;
691 ssk->state = TCPS_FIN_WAIT_1;
692 break;
693
694 case TCPS_CLOSE_WAIT:
695 ssk->state = TCPS_LAST_ACK;
696 break;
697 }
698 if (ssk->state >= TCPS_FIN_WAIT_2) {
699 /* Prevent the connection hanging in FIN_WAIT_2 forever. */
700 if (ssk->state == TCPS_FIN_WAIT_2)
701 sdp_2msl_wait(ssk);
702 else
703 soisdisconnected(ssk->socket);
704 }
705 }
706
707 static void
sdp_output_disconnect(struct sdp_sock * ssk)708 sdp_output_disconnect(struct sdp_sock *ssk)
709 {
710
711 SDP_WLOCK_ASSERT(ssk);
712 callout_reset(&ssk->keep2msl, SDP_FIN_WAIT_TIMEOUT,
713 sdp_dreq_timeout, ssk);
714 ssk->flags |= SDP_NEEDFIN | SDP_DREQWAIT;
715 sdp_post_sends(ssk, M_NOWAIT);
716 }
717
718 /*
719 * Initiate or continue a disconnect.
720 * If embryonic state, just send reset (once).
721 * If in ``let data drain'' option and linger null, just drop.
722 * Otherwise (hard), mark socket disconnecting and drop
723 * current input data; switch states based on user close, and
724 * send segment to peer (with FIN).
725 */
726 static void
sdp_start_disconnect(struct sdp_sock * ssk)727 sdp_start_disconnect(struct sdp_sock *ssk)
728 {
729 struct socket *so;
730 int unread;
731
732 so = ssk->socket;
733 SDP_WLOCK_ASSERT(ssk);
734 sdp_stop_keepalive_timer(so);
735 /*
736 * Neither sdp_closed() nor sdp_drop() should return NULL, as the
737 * socket is still open.
738 */
739 if (ssk->state < TCPS_ESTABLISHED) {
740 ssk = sdp_closed(ssk);
741 KASSERT(ssk != NULL,
742 ("sdp_start_disconnect: sdp_close() returned NULL"));
743 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
744 ssk = sdp_drop(ssk, 0);
745 KASSERT(ssk != NULL,
746 ("sdp_start_disconnect: sdp_drop() returned NULL"));
747 } else {
748 soisdisconnecting(so);
749 unread = so->so_rcv.sb_cc;
750 sbflush(&so->so_rcv);
751 sdp_usrclosed(ssk);
752 if (!(ssk->flags & SDP_DROPPED)) {
753 if (unread)
754 sdp_output_reset(ssk);
755 else
756 sdp_output_disconnect(ssk);
757 }
758 }
759 }
760
761 /*
762 * User initiated disconnect.
763 */
764 static int
sdp_disconnect(struct socket * so)765 sdp_disconnect(struct socket *so)
766 {
767 struct sdp_sock *ssk;
768 int error = 0;
769
770 ssk = sdp_sk(so);
771 SDP_WLOCK(ssk);
772 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
773 error = ECONNRESET;
774 goto out;
775 }
776 sdp_start_disconnect(ssk);
777 out:
778 SDP_WUNLOCK(ssk);
779 return (error);
780 }
781
782 /*
783 * Accept a connection. Essentially all the work is done at higher levels;
784 * just return the address of the peer, storing through addr.
785 *
786 *
787 * XXX This is broken XXX
788 *
789 * The rationale for acquiring the sdp lock here is somewhat complicated,
790 * and is described in detail in the commit log entry for r175612. Acquiring
791 * it delays an accept(2) racing with sonewconn(), which inserts the socket
792 * before the address/port fields are initialized. A better fix would
793 * prevent the socket from being placed in the listen queue until all fields
794 * are fully initialized.
795 */
796 static int
sdp_accept(struct socket * so,struct sockaddr ** nam)797 sdp_accept(struct socket *so, struct sockaddr **nam)
798 {
799 struct sdp_sock *ssk = NULL;
800 struct in_addr addr;
801 in_port_t port;
802 int error;
803
804 if (so->so_state & SS_ISDISCONNECTED)
805 return (ECONNABORTED);
806
807 port = 0;
808 addr.s_addr = 0;
809 error = 0;
810 ssk = sdp_sk(so);
811 SDP_WLOCK(ssk);
812 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
813 error = ECONNABORTED;
814 goto out;
815 }
816 port = ssk->fport;
817 addr.s_addr = ssk->faddr;
818 out:
819 SDP_WUNLOCK(ssk);
820 if (error == 0)
821 *nam = sdp_sockaddr(port, &addr);
822 return error;
823 }
824
825 /*
826 * Mark the connection as being incapable of further output.
827 */
828 static int
sdp_shutdown(struct socket * so)829 sdp_shutdown(struct socket *so)
830 {
831 int error = 0;
832 struct sdp_sock *ssk;
833
834 ssk = sdp_sk(so);
835 SDP_WLOCK(ssk);
836 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
837 error = ECONNRESET;
838 goto out;
839 }
840 socantsendmore(so);
841 sdp_usrclosed(ssk);
842 if (!(ssk->flags & SDP_DROPPED))
843 sdp_output_disconnect(ssk);
844
845 out:
846 SDP_WUNLOCK(ssk);
847
848 return (error);
849 }
850
851 static void
sdp_append(struct sdp_sock * ssk,struct sockbuf * sb,struct mbuf * mb,int cnt)852 sdp_append(struct sdp_sock *ssk, struct sockbuf *sb, struct mbuf *mb, int cnt)
853 {
854 struct mbuf *n;
855 int ncnt;
856
857 SOCKBUF_LOCK_ASSERT(sb);
858 SBLASTRECORDCHK(sb);
859 KASSERT(mb->m_flags & M_PKTHDR,
860 ("sdp_append: %p Missing packet header.\n", mb));
861 n = sb->sb_lastrecord;
862 /*
863 * If the queue is empty just set all pointers and proceed.
864 */
865 if (n == NULL) {
866 sb->sb_lastrecord = sb->sb_mb = sb->sb_sndptr = mb;
867 for (; mb; mb = mb->m_next) {
868 sb->sb_mbtail = mb;
869 sballoc(sb, mb);
870 }
871 return;
872 }
873 /*
874 * Count the number of mbufs in the current tail.
875 */
876 for (ncnt = 0; n->m_next; n = n->m_next)
877 ncnt++;
878 n = sb->sb_lastrecord;
879 /*
880 * If the two chains can fit in a single sdp packet and
881 * the last record has not been sent yet (WRITABLE) coalesce
882 * them. The lastrecord remains the same but we must strip the
883 * packet header and then let sbcompress do the hard part.
884 */
885 if (M_WRITABLE(n) && ncnt + cnt < SDP_MAX_SEND_SGES &&
886 n->m_pkthdr.len + mb->m_pkthdr.len - SDP_HEAD_SIZE <
887 ssk->xmit_size_goal) {
888 m_adj(mb, SDP_HEAD_SIZE);
889 n->m_pkthdr.len += mb->m_pkthdr.len;
890 n->m_flags |= mb->m_flags & (M_PUSH | M_URG);
891 m_demote(mb, 1);
892 sbcompress(sb, mb, sb->sb_mbtail);
893 return;
894 }
895 /*
896 * Not compressible, just append to the end and adjust counters.
897 */
898 sb->sb_lastrecord->m_flags |= M_PUSH;
899 sb->sb_lastrecord->m_nextpkt = mb;
900 sb->sb_lastrecord = mb;
901 if (sb->sb_sndptr == NULL)
902 sb->sb_sndptr = mb;
903 for (; mb; mb = mb->m_next) {
904 sb->sb_mbtail = mb;
905 sballoc(sb, mb);
906 }
907 }
908
909 /*
910 * Do a send by putting data in output queue and updating urgent
911 * marker if URG set. Possibly send more data. Unlike the other
912 * pru_*() routines, the mbuf chains are our responsibility. We
913 * must either enqueue them or free them. The other pru_* routines
914 * generally are caller-frees.
915 *
916 * This comes from sendfile, normal sends will come from sdp_sosend().
917 */
918 static int
sdp_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct thread * td)919 sdp_send(struct socket *so, int flags, struct mbuf *m,
920 struct sockaddr *nam, struct mbuf *control, struct thread *td)
921 {
922 struct sdp_sock *ssk;
923 struct mbuf *n;
924 int error;
925 int cnt;
926
927 error = 0;
928 ssk = sdp_sk(so);
929 KASSERT(m->m_flags & M_PKTHDR,
930 ("sdp_send: %p no packet header", m));
931 M_PREPEND(m, SDP_HEAD_SIZE, M_WAITOK);
932 mtod(m, struct sdp_bsdh *)->mid = SDP_MID_DATA;
933 for (n = m, cnt = 0; n->m_next; n = n->m_next)
934 cnt++;
935 if (cnt > SDP_MAX_SEND_SGES) {
936 n = m_collapse(m, M_WAITOK, SDP_MAX_SEND_SGES);
937 if (n == NULL) {
938 m_freem(m);
939 return (EMSGSIZE);
940 }
941 m = n;
942 for (cnt = 0; n->m_next; n = n->m_next)
943 cnt++;
944 }
945 SDP_WLOCK(ssk);
946 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
947 if (control)
948 m_freem(control);
949 if (m)
950 m_freem(m);
951 error = ECONNRESET;
952 goto out;
953 }
954 if (control) {
955 /* SDP doesn't support control messages. */
956 if (control->m_len) {
957 m_freem(control);
958 if (m)
959 m_freem(m);
960 error = EINVAL;
961 goto out;
962 }
963 m_freem(control); /* empty control, just free it */
964 }
965 if (!(flags & PRUS_OOB)) {
966 SOCKBUF_LOCK(&so->so_snd);
967 sdp_append(ssk, &so->so_snd, m, cnt);
968 SOCKBUF_UNLOCK(&so->so_snd);
969 if (nam && ssk->state < TCPS_SYN_SENT) {
970 /*
971 * Do implied connect if not yet connected.
972 */
973 error = sdp_start_connect(ssk, nam, td);
974 if (error)
975 goto out;
976 }
977 if (flags & PRUS_EOF) {
978 /*
979 * Close the send side of the connection after
980 * the data is sent.
981 */
982 socantsendmore(so);
983 sdp_usrclosed(ssk);
984 if (!(ssk->flags & SDP_DROPPED))
985 sdp_output_disconnect(ssk);
986 } else if (!(ssk->flags & SDP_DROPPED) &&
987 !(flags & PRUS_MORETOCOME))
988 sdp_post_sends(ssk, M_NOWAIT);
989 SDP_WUNLOCK(ssk);
990 return (0);
991 } else {
992 SOCKBUF_LOCK(&so->so_snd);
993 if (sbspace(&so->so_snd) < -512) {
994 SOCKBUF_UNLOCK(&so->so_snd);
995 m_freem(m);
996 error = ENOBUFS;
997 goto out;
998 }
999 /*
1000 * According to RFC961 (Assigned Protocols),
1001 * the urgent pointer points to the last octet
1002 * of urgent data. We continue, however,
1003 * to consider it to indicate the first octet
1004 * of data past the urgent section.
1005 * Otherwise, snd_up should be one lower.
1006 */
1007 m->m_flags |= M_URG | M_PUSH;
1008 sdp_append(ssk, &so->so_snd, m, cnt);
1009 SOCKBUF_UNLOCK(&so->so_snd);
1010 if (nam && ssk->state < TCPS_SYN_SENT) {
1011 /*
1012 * Do implied connect if not yet connected.
1013 */
1014 error = sdp_start_connect(ssk, nam, td);
1015 if (error)
1016 goto out;
1017 }
1018 sdp_post_sends(ssk, M_NOWAIT);
1019 SDP_WUNLOCK(ssk);
1020 return (0);
1021 }
1022 out:
1023 SDP_WUNLOCK(ssk);
1024 return (error);
1025 }
1026
1027 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
1028
1029 /*
1030 * Send on a socket. If send must go all at once and message is larger than
1031 * send buffering, then hard error. Lock against other senders. If must go
1032 * all at once and not enough room now, then inform user that this would
1033 * block and do nothing. Otherwise, if nonblocking, send as much as
1034 * possible. The data to be sent is described by "uio" if nonzero, otherwise
1035 * by the mbuf chain "top" (which must be null if uio is not). Data provided
1036 * in mbuf chain must be small enough to send all at once.
1037 *
1038 * Returns nonzero on error, timeout or signal; callers must check for short
1039 * counts if EINTR/ERESTART are returned. Data and control buffers are freed
1040 * on return.
1041 */
1042 static int
sdp_sosend(struct socket * so,struct sockaddr * addr,struct uio * uio,struct mbuf * top,struct mbuf * control,int flags,struct thread * td)1043 sdp_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
1044 struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
1045 {
1046 struct sdp_sock *ssk;
1047 long space, resid;
1048 int atomic;
1049 int error;
1050 int copy;
1051
1052 if (uio != NULL)
1053 resid = uio->uio_resid;
1054 else
1055 resid = top->m_pkthdr.len;
1056 atomic = top != NULL;
1057 if (control != NULL) {
1058 if (control->m_len) {
1059 m_freem(control);
1060 if (top)
1061 m_freem(top);
1062 return (EINVAL);
1063 }
1064 m_freem(control);
1065 control = NULL;
1066 }
1067 /*
1068 * In theory resid should be unsigned. However, space must be
1069 * signed, as it might be less than 0 if we over-committed, and we
1070 * must use a signed comparison of space and resid. On the other
1071 * hand, a negative resid causes us to loop sending 0-length
1072 * segments to the protocol.
1073 *
1074 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1075 * type sockets since that's an error.
1076 */
1077 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1078 error = EINVAL;
1079 goto out;
1080 }
1081 if (td != NULL)
1082 td->td_ru.ru_msgsnd++;
1083
1084 ssk = sdp_sk(so);
1085 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1086 if (error)
1087 goto out;
1088
1089 restart:
1090 do {
1091 SOCKBUF_LOCK(&so->so_snd);
1092 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1093 SOCKBUF_UNLOCK(&so->so_snd);
1094 error = EPIPE;
1095 goto release;
1096 }
1097 if (so->so_error) {
1098 error = so->so_error;
1099 so->so_error = 0;
1100 SOCKBUF_UNLOCK(&so->so_snd);
1101 goto release;
1102 }
1103 if ((so->so_state & SS_ISCONNECTED) == 0 && addr == NULL) {
1104 SOCKBUF_UNLOCK(&so->so_snd);
1105 error = ENOTCONN;
1106 goto release;
1107 }
1108 space = sbspace(&so->so_snd);
1109 if (flags & MSG_OOB)
1110 space += 1024;
1111 if (atomic && resid > ssk->xmit_size_goal - SDP_HEAD_SIZE) {
1112 SOCKBUF_UNLOCK(&so->so_snd);
1113 error = EMSGSIZE;
1114 goto release;
1115 }
1116 if (space < resid &&
1117 (atomic || space < so->so_snd.sb_lowat)) {
1118 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) {
1119 SOCKBUF_UNLOCK(&so->so_snd);
1120 error = EWOULDBLOCK;
1121 goto release;
1122 }
1123 error = sbwait(&so->so_snd);
1124 SOCKBUF_UNLOCK(&so->so_snd);
1125 if (error)
1126 goto release;
1127 goto restart;
1128 }
1129 SOCKBUF_UNLOCK(&so->so_snd);
1130 do {
1131 if (uio == NULL) {
1132 resid = 0;
1133 if (flags & MSG_EOR)
1134 top->m_flags |= M_EOR;
1135 } else {
1136 /*
1137 * Copy the data from userland into a mbuf
1138 * chain. If no data is to be copied in,
1139 * a single empty mbuf is returned.
1140 */
1141 copy = min(space,
1142 ssk->xmit_size_goal - SDP_HEAD_SIZE);
1143 top = m_uiotombuf(uio, M_WAITOK, copy,
1144 0, M_PKTHDR |
1145 ((flags & MSG_EOR) ? M_EOR : 0));
1146 if (top == NULL) {
1147 /* only possible error */
1148 error = EFAULT;
1149 goto release;
1150 }
1151 space -= resid - uio->uio_resid;
1152 resid = uio->uio_resid;
1153 }
1154 /*
1155 * XXX all the SBS_CANTSENDMORE checks previously
1156 * done could be out of date after dropping the
1157 * socket lock.
1158 */
1159 error = sdp_send(so, (flags & MSG_OOB) ? PRUS_OOB :
1160 /*
1161 * Set EOF on the last send if the user specified
1162 * MSG_EOF.
1163 */
1164 ((flags & MSG_EOF) && (resid <= 0)) ? PRUS_EOF :
1165 /* If there is more to send set PRUS_MORETOCOME. */
1166 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
1167 top, addr, NULL, td);
1168 top = NULL;
1169 if (error)
1170 goto release;
1171 } while (resid && space > 0);
1172 } while (resid);
1173
1174 release:
1175 sbunlock(&so->so_snd);
1176 out:
1177 if (top != NULL)
1178 m_freem(top);
1179 return (error);
1180 }
1181
1182 /*
1183 * The part of soreceive() that implements reading non-inline out-of-band
1184 * data from a socket. For more complete comments, see soreceive(), from
1185 * which this code originated.
1186 *
1187 * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
1188 * unable to return an mbuf chain to the caller.
1189 */
1190 static int
soreceive_rcvoob(struct socket * so,struct uio * uio,int flags)1191 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
1192 {
1193 struct protosw *pr = so->so_proto;
1194 struct mbuf *m;
1195 int error;
1196
1197 KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
1198
1199 m = m_get(M_WAITOK, MT_DATA);
1200 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1201 if (error)
1202 goto bad;
1203 do {
1204 error = uiomove(mtod(m, void *),
1205 (int) min(uio->uio_resid, m->m_len), uio);
1206 m = m_free(m);
1207 } while (uio->uio_resid && error == 0 && m);
1208 bad:
1209 if (m != NULL)
1210 m_freem(m);
1211 return (error);
1212 }
1213
1214 /*
1215 * Optimized version of soreceive() for stream (TCP) sockets.
1216 */
1217 static int
sdp_sorecv(struct socket * so,struct sockaddr ** psa,struct uio * uio,struct mbuf ** mp0,struct mbuf ** controlp,int * flagsp)1218 sdp_sorecv(struct socket *so, struct sockaddr **psa, struct uio *uio,
1219 struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
1220 {
1221 int len = 0, error = 0, flags, oresid;
1222 struct sockbuf *sb;
1223 struct mbuf *m, *n = NULL;
1224 struct sdp_sock *ssk;
1225
1226 /* We only do stream sockets. */
1227 if (so->so_type != SOCK_STREAM)
1228 return (EINVAL);
1229 if (psa != NULL)
1230 *psa = NULL;
1231 if (controlp != NULL)
1232 return (EINVAL);
1233 if (flagsp != NULL)
1234 flags = *flagsp &~ MSG_EOR;
1235 else
1236 flags = 0;
1237 if (flags & MSG_OOB)
1238 return (soreceive_rcvoob(so, uio, flags));
1239 if (mp0 != NULL)
1240 *mp0 = NULL;
1241
1242 sb = &so->so_rcv;
1243 ssk = sdp_sk(so);
1244
1245 /* Prevent other readers from entering the socket. */
1246 error = sblock(sb, SBLOCKWAIT(flags));
1247 if (error)
1248 goto out;
1249 SOCKBUF_LOCK(sb);
1250
1251 /* Easy one, no space to copyout anything. */
1252 if (uio->uio_resid == 0) {
1253 error = EINVAL;
1254 goto out;
1255 }
1256 oresid = uio->uio_resid;
1257
1258 /* We will never ever get anything unless we are connected. */
1259 if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
1260 /* When disconnecting there may be still some data left. */
1261 if (sb->sb_cc > 0)
1262 goto deliver;
1263 if (!(so->so_state & SS_ISDISCONNECTED))
1264 error = ENOTCONN;
1265 goto out;
1266 }
1267
1268 /* Socket buffer is empty and we shall not block. */
1269 if (sb->sb_cc == 0 &&
1270 ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
1271 error = EAGAIN;
1272 goto out;
1273 }
1274
1275 restart:
1276 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1277
1278 /* Abort if socket has reported problems. */
1279 if (so->so_error) {
1280 if (sb->sb_cc > 0)
1281 goto deliver;
1282 if (oresid > uio->uio_resid)
1283 goto out;
1284 error = so->so_error;
1285 if (!(flags & MSG_PEEK))
1286 so->so_error = 0;
1287 goto out;
1288 }
1289
1290 /* Door is closed. Deliver what is left, if any. */
1291 if (sb->sb_state & SBS_CANTRCVMORE) {
1292 if (sb->sb_cc > 0)
1293 goto deliver;
1294 else
1295 goto out;
1296 }
1297
1298 /* Socket buffer got some data that we shall deliver now. */
1299 if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) &&
1300 ((so->so_state & SS_NBIO) ||
1301 (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
1302 sb->sb_cc >= sb->sb_lowat ||
1303 sb->sb_cc >= uio->uio_resid ||
1304 sb->sb_cc >= sb->sb_hiwat) ) {
1305 goto deliver;
1306 }
1307
1308 /* On MSG_WAITALL we must wait until all data or error arrives. */
1309 if ((flags & MSG_WAITALL) &&
1310 (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_lowat))
1311 goto deliver;
1312
1313 /*
1314 * Wait and block until (more) data comes in.
1315 * NB: Drops the sockbuf lock during wait.
1316 */
1317 error = sbwait(sb);
1318 if (error)
1319 goto out;
1320 goto restart;
1321
1322 deliver:
1323 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1324 KASSERT(sb->sb_cc > 0, ("%s: sockbuf empty", __func__));
1325 KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
1326
1327 /* Statistics. */
1328 if (uio->uio_td)
1329 uio->uio_td->td_ru.ru_msgrcv++;
1330
1331 /* Fill uio until full or current end of socket buffer is reached. */
1332 len = min(uio->uio_resid, sb->sb_cc);
1333 if (mp0 != NULL) {
1334 /* Dequeue as many mbufs as possible. */
1335 if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
1336 for (*mp0 = m = sb->sb_mb;
1337 m != NULL && m->m_len <= len;
1338 m = m->m_next) {
1339 len -= m->m_len;
1340 uio->uio_resid -= m->m_len;
1341 sbfree(sb, m);
1342 n = m;
1343 }
1344 sb->sb_mb = m;
1345 if (sb->sb_mb == NULL)
1346 SB_EMPTY_FIXUP(sb);
1347 n->m_next = NULL;
1348 }
1349 /* Copy the remainder. */
1350 if (len > 0) {
1351 KASSERT(sb->sb_mb != NULL,
1352 ("%s: len > 0 && sb->sb_mb empty", __func__));
1353
1354 m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
1355 if (m == NULL)
1356 len = 0; /* Don't flush data from sockbuf. */
1357 else
1358 uio->uio_resid -= m->m_len;
1359 if (*mp0 != NULL)
1360 n->m_next = m;
1361 else
1362 *mp0 = m;
1363 if (*mp0 == NULL) {
1364 error = ENOBUFS;
1365 goto out;
1366 }
1367 }
1368 } else {
1369 /* NB: Must unlock socket buffer as uiomove may sleep. */
1370 SOCKBUF_UNLOCK(sb);
1371 error = m_mbuftouio(uio, sb->sb_mb, len);
1372 SOCKBUF_LOCK(sb);
1373 if (error)
1374 goto out;
1375 }
1376 SBLASTRECORDCHK(sb);
1377 SBLASTMBUFCHK(sb);
1378
1379 /*
1380 * Remove the delivered data from the socket buffer unless we
1381 * were only peeking.
1382 */
1383 if (!(flags & MSG_PEEK)) {
1384 if (len > 0)
1385 sbdrop_locked(sb, len);
1386
1387 /* Notify protocol that we drained some data. */
1388 SOCKBUF_UNLOCK(sb);
1389 SDP_WLOCK(ssk);
1390 sdp_do_posts(ssk);
1391 SDP_WUNLOCK(ssk);
1392 SOCKBUF_LOCK(sb);
1393 }
1394
1395 /*
1396 * For MSG_WAITALL we may have to loop again and wait for
1397 * more data to come in.
1398 */
1399 if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
1400 goto restart;
1401 out:
1402 SOCKBUF_LOCK_ASSERT(sb);
1403 SBLASTRECORDCHK(sb);
1404 SBLASTMBUFCHK(sb);
1405 SOCKBUF_UNLOCK(sb);
1406 sbunlock(sb);
1407 return (error);
1408 }
1409
1410 /*
1411 * Abort is used to teardown a connection typically while sitting in
1412 * the accept queue.
1413 */
1414 void
sdp_abort(struct socket * so)1415 sdp_abort(struct socket *so)
1416 {
1417 struct sdp_sock *ssk;
1418
1419 ssk = sdp_sk(so);
1420 SDP_WLOCK(ssk);
1421 /*
1422 * If we have not yet dropped, do it now.
1423 */
1424 if (!(ssk->flags & SDP_TIMEWAIT) &&
1425 !(ssk->flags & SDP_DROPPED))
1426 sdp_drop(ssk, ECONNABORTED);
1427 KASSERT(ssk->flags & SDP_DROPPED, ("sdp_abort: %p not dropped 0x%X",
1428 ssk, ssk->flags));
1429 SDP_WUNLOCK(ssk);
1430 }
1431
1432 /*
1433 * Close a SDP socket and initiate a friendly disconnect.
1434 */
1435 static void
sdp_close(struct socket * so)1436 sdp_close(struct socket *so)
1437 {
1438 struct sdp_sock *ssk;
1439
1440 ssk = sdp_sk(so);
1441 SDP_WLOCK(ssk);
1442 /*
1443 * If we have not yet dropped, do it now.
1444 */
1445 if (!(ssk->flags & SDP_TIMEWAIT) &&
1446 !(ssk->flags & SDP_DROPPED))
1447 sdp_start_disconnect(ssk);
1448
1449 /*
1450 * If we've still not dropped let the socket layer know we're
1451 * holding on to the socket and pcb for a while.
1452 */
1453 if (!(ssk->flags & SDP_DROPPED)) {
1454 SOCK_LOCK(so);
1455 so->so_state |= SS_PROTOREF;
1456 SOCK_UNLOCK(so);
1457 ssk->flags |= SDP_SOCKREF;
1458 }
1459 SDP_WUNLOCK(ssk);
1460 }
1461
1462 /*
1463 * User requests out-of-band data.
1464 */
1465 static int
sdp_rcvoob(struct socket * so,struct mbuf * m,int flags)1466 sdp_rcvoob(struct socket *so, struct mbuf *m, int flags)
1467 {
1468 int error = 0;
1469 struct sdp_sock *ssk;
1470
1471 ssk = sdp_sk(so);
1472 SDP_WLOCK(ssk);
1473 if (!rx_ring_trylock(&ssk->rx_ring)) {
1474 SDP_WUNLOCK(ssk);
1475 return (ECONNRESET);
1476 }
1477 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1478 error = ECONNRESET;
1479 goto out;
1480 }
1481 if ((so->so_oobmark == 0 &&
1482 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
1483 so->so_options & SO_OOBINLINE ||
1484 ssk->oobflags & SDP_HADOOB) {
1485 error = EINVAL;
1486 goto out;
1487 }
1488 if ((ssk->oobflags & SDP_HAVEOOB) == 0) {
1489 error = EWOULDBLOCK;
1490 goto out;
1491 }
1492 m->m_len = 1;
1493 *mtod(m, caddr_t) = ssk->iobc;
1494 if ((flags & MSG_PEEK) == 0)
1495 ssk->oobflags ^= (SDP_HAVEOOB | SDP_HADOOB);
1496 out:
1497 rx_ring_unlock(&ssk->rx_ring);
1498 SDP_WUNLOCK(ssk);
1499 return (error);
1500 }
1501
1502 void
sdp_urg(struct sdp_sock * ssk,struct mbuf * mb)1503 sdp_urg(struct sdp_sock *ssk, struct mbuf *mb)
1504 {
1505 struct mbuf *m;
1506 struct socket *so;
1507
1508 so = ssk->socket;
1509 if (so == NULL)
1510 return;
1511
1512 so->so_oobmark = so->so_rcv.sb_cc + mb->m_pkthdr.len - 1;
1513 sohasoutofband(so);
1514 ssk->oobflags &= ~(SDP_HAVEOOB | SDP_HADOOB);
1515 if (!(so->so_options & SO_OOBINLINE)) {
1516 for (m = mb; m->m_next != NULL; m = m->m_next);
1517 ssk->iobc = *(mtod(m, char *) + m->m_len - 1);
1518 ssk->oobflags |= SDP_HAVEOOB;
1519 m->m_len--;
1520 mb->m_pkthdr.len--;
1521 }
1522 }
1523
1524 /*
1525 * Notify a sdp socket of an asynchronous error.
1526 *
1527 * Do not wake up user since there currently is no mechanism for
1528 * reporting soft errors (yet - a kqueue filter may be added).
1529 */
1530 struct sdp_sock *
sdp_notify(struct sdp_sock * ssk,int error)1531 sdp_notify(struct sdp_sock *ssk, int error)
1532 {
1533
1534 SDP_WLOCK_ASSERT(ssk);
1535
1536 if ((ssk->flags & SDP_TIMEWAIT) ||
1537 (ssk->flags & SDP_DROPPED))
1538 return (ssk);
1539
1540 /*
1541 * Ignore some errors if we are hooked up.
1542 */
1543 if (ssk->state == TCPS_ESTABLISHED &&
1544 (error == EHOSTUNREACH || error == ENETUNREACH ||
1545 error == EHOSTDOWN))
1546 return (ssk);
1547 ssk->softerror = error;
1548 return sdp_drop(ssk, error);
1549 }
1550
1551 static void
sdp_ctlinput(int cmd,struct sockaddr * sa,void * vip)1552 sdp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
1553 {
1554 struct in_addr faddr;
1555
1556 faddr = ((struct sockaddr_in *)sa)->sin_addr;
1557 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
1558 return;
1559
1560 sdp_pcbnotifyall(faddr, inetctlerrmap[cmd], sdp_notify);
1561 }
1562
1563 static int
sdp_control(struct socket * so,u_long cmd,caddr_t data,struct ifnet * ifp,struct thread * td)1564 sdp_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
1565 struct thread *td)
1566 {
1567 return (EOPNOTSUPP);
1568 }
1569
1570 static void
sdp_keepalive_timeout(void * data)1571 sdp_keepalive_timeout(void *data)
1572 {
1573 struct sdp_sock *ssk;
1574
1575 ssk = data;
1576 /* Callout canceled. */
1577 if (!callout_active(&ssk->keep2msl))
1578 return;
1579 /* Callout rescheduled as a different kind of timer. */
1580 if (callout_pending(&ssk->keep2msl))
1581 goto out;
1582 callout_deactivate(&ssk->keep2msl);
1583 if (ssk->flags & SDP_DROPPED ||
1584 (ssk->socket->so_options & SO_KEEPALIVE) == 0)
1585 goto out;
1586 sdp_post_keepalive(ssk);
1587 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1588 sdp_keepalive_timeout, ssk);
1589 out:
1590 SDP_WUNLOCK(ssk);
1591 }
1592
1593
1594 void
sdp_start_keepalive_timer(struct socket * so)1595 sdp_start_keepalive_timer(struct socket *so)
1596 {
1597 struct sdp_sock *ssk;
1598
1599 ssk = sdp_sk(so);
1600 if (!callout_pending(&ssk->keep2msl))
1601 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1602 sdp_keepalive_timeout, ssk);
1603 }
1604
1605 static void
sdp_stop_keepalive_timer(struct socket * so)1606 sdp_stop_keepalive_timer(struct socket *so)
1607 {
1608 struct sdp_sock *ssk;
1609
1610 ssk = sdp_sk(so);
1611 callout_stop(&ssk->keep2msl);
1612 }
1613
1614 /*
1615 * sdp_ctloutput() must drop the inpcb lock before performing copyin on
1616 * socket option arguments. When it re-acquires the lock after the copy, it
1617 * has to revalidate that the connection is still valid for the socket
1618 * option.
1619 */
1620 #define SDP_WLOCK_RECHECK(inp) do { \
1621 SDP_WLOCK(ssk); \
1622 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) { \
1623 SDP_WUNLOCK(ssk); \
1624 return (ECONNRESET); \
1625 } \
1626 } while(0)
1627
1628 static int
sdp_ctloutput(struct socket * so,struct sockopt * sopt)1629 sdp_ctloutput(struct socket *so, struct sockopt *sopt)
1630 {
1631 int error, opt, optval;
1632 struct sdp_sock *ssk;
1633
1634 error = 0;
1635 ssk = sdp_sk(so);
1636 if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_KEEPALIVE) {
1637 SDP_WLOCK(ssk);
1638 if (so->so_options & SO_KEEPALIVE)
1639 sdp_start_keepalive_timer(so);
1640 else
1641 sdp_stop_keepalive_timer(so);
1642 SDP_WUNLOCK(ssk);
1643 }
1644 if (sopt->sopt_level != IPPROTO_TCP)
1645 return (error);
1646
1647 SDP_WLOCK(ssk);
1648 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1649 SDP_WUNLOCK(ssk);
1650 return (ECONNRESET);
1651 }
1652
1653 switch (sopt->sopt_dir) {
1654 case SOPT_SET:
1655 switch (sopt->sopt_name) {
1656 case TCP_NODELAY:
1657 SDP_WUNLOCK(ssk);
1658 error = sooptcopyin(sopt, &optval, sizeof optval,
1659 sizeof optval);
1660 if (error)
1661 return (error);
1662
1663 SDP_WLOCK_RECHECK(ssk);
1664 opt = SDP_NODELAY;
1665 if (optval)
1666 ssk->flags |= opt;
1667 else
1668 ssk->flags &= ~opt;
1669 sdp_do_posts(ssk);
1670 SDP_WUNLOCK(ssk);
1671 break;
1672
1673 default:
1674 SDP_WUNLOCK(ssk);
1675 error = ENOPROTOOPT;
1676 break;
1677 }
1678 break;
1679
1680 case SOPT_GET:
1681 switch (sopt->sopt_name) {
1682 case TCP_NODELAY:
1683 optval = ssk->flags & SDP_NODELAY;
1684 SDP_WUNLOCK(ssk);
1685 error = sooptcopyout(sopt, &optval, sizeof optval);
1686 break;
1687 default:
1688 SDP_WUNLOCK(ssk);
1689 error = ENOPROTOOPT;
1690 break;
1691 }
1692 break;
1693 }
1694 return (error);
1695 }
1696 #undef SDP_WLOCK_RECHECK
1697
1698 int sdp_mod_count = 0;
1699 int sdp_mod_usec = 0;
1700
1701 void
sdp_set_default_moderation(struct sdp_sock * ssk)1702 sdp_set_default_moderation(struct sdp_sock *ssk)
1703 {
1704 if (sdp_mod_count <= 0 || sdp_mod_usec <= 0)
1705 return;
1706 ib_modify_cq(ssk->rx_ring.cq, sdp_mod_count, sdp_mod_usec);
1707 }
1708
1709
1710 static void
sdp_dev_add(struct ib_device * device)1711 sdp_dev_add(struct ib_device *device)
1712 {
1713 struct ib_fmr_pool_param param;
1714 struct sdp_device *sdp_dev;
1715
1716 sdp_dev = malloc(sizeof(*sdp_dev), M_SDP, M_WAITOK | M_ZERO);
1717 sdp_dev->pd = ib_alloc_pd(device);
1718 if (IS_ERR(sdp_dev->pd))
1719 goto out_pd;
1720 sdp_dev->mr = ib_get_dma_mr(sdp_dev->pd, IB_ACCESS_LOCAL_WRITE);
1721 if (IS_ERR(sdp_dev->mr))
1722 goto out_mr;
1723 memset(¶m, 0, sizeof param);
1724 param.max_pages_per_fmr = SDP_FMR_SIZE;
1725 param.page_shift = PAGE_SHIFT;
1726 param.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ);
1727 param.pool_size = SDP_FMR_POOL_SIZE;
1728 param.dirty_watermark = SDP_FMR_DIRTY_SIZE;
1729 param.cache = 1;
1730 sdp_dev->fmr_pool = ib_create_fmr_pool(sdp_dev->pd, ¶m);
1731 if (IS_ERR(sdp_dev->fmr_pool))
1732 goto out_fmr;
1733 ib_set_client_data(device, &sdp_client, sdp_dev);
1734 return;
1735
1736 out_fmr:
1737 ib_dereg_mr(sdp_dev->mr);
1738 out_mr:
1739 ib_dealloc_pd(sdp_dev->pd);
1740 out_pd:
1741 free(sdp_dev, M_SDP);
1742 }
1743
1744 static void
sdp_dev_rem(struct ib_device * device)1745 sdp_dev_rem(struct ib_device *device)
1746 {
1747 struct sdp_device *sdp_dev;
1748 struct sdp_sock *ssk;
1749
1750 SDP_LIST_WLOCK();
1751 LIST_FOREACH(ssk, &sdp_list, list) {
1752 if (ssk->ib_device != device)
1753 continue;
1754 SDP_WLOCK(ssk);
1755 if ((ssk->flags & SDP_DESTROY) == 0)
1756 ssk = sdp_notify(ssk, ECONNRESET);
1757 if (ssk)
1758 SDP_WUNLOCK(ssk);
1759 }
1760 SDP_LIST_WUNLOCK();
1761 /*
1762 * XXX Do I need to wait between these two?
1763 */
1764 sdp_dev = ib_get_client_data(device, &sdp_client);
1765 if (!sdp_dev)
1766 return;
1767 ib_flush_fmr_pool(sdp_dev->fmr_pool);
1768 ib_destroy_fmr_pool(sdp_dev->fmr_pool);
1769 ib_dereg_mr(sdp_dev->mr);
1770 ib_dealloc_pd(sdp_dev->pd);
1771 free(sdp_dev, M_SDP);
1772 }
1773
1774 struct ib_client sdp_client =
1775 { .name = "sdp", .add = sdp_dev_add, .remove = sdp_dev_rem };
1776
1777
1778 static int
sdp_pcblist(SYSCTL_HANDLER_ARGS)1779 sdp_pcblist(SYSCTL_HANDLER_ARGS)
1780 {
1781 int error, n, i;
1782 struct sdp_sock *ssk;
1783 struct xinpgen xig;
1784
1785 /*
1786 * The process of preparing the TCB list is too time-consuming and
1787 * resource-intensive to repeat twice on every request.
1788 */
1789 if (req->oldptr == NULL) {
1790 n = sdp_count;
1791 n += imax(n / 8, 10);
1792 req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
1793 return (0);
1794 }
1795
1796 if (req->newptr != NULL)
1797 return (EPERM);
1798
1799 /*
1800 * OK, now we're committed to doing something.
1801 */
1802 SDP_LIST_RLOCK();
1803 n = sdp_count;
1804 SDP_LIST_RUNLOCK();
1805
1806 error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
1807 + n * sizeof(struct xtcpcb));
1808 if (error != 0)
1809 return (error);
1810
1811 xig.xig_len = sizeof xig;
1812 xig.xig_count = n;
1813 xig.xig_gen = 0;
1814 xig.xig_sogen = so_gencnt;
1815 error = SYSCTL_OUT(req, &xig, sizeof xig);
1816 if (error)
1817 return (error);
1818
1819 SDP_LIST_RLOCK();
1820 for (ssk = LIST_FIRST(&sdp_list), i = 0;
1821 ssk != NULL && i < n; ssk = LIST_NEXT(ssk, list)) {
1822 struct xtcpcb xt;
1823
1824 SDP_RLOCK(ssk);
1825 if (ssk->flags & SDP_TIMEWAIT) {
1826 if (ssk->cred != NULL)
1827 error = cr_cansee(req->td->td_ucred,
1828 ssk->cred);
1829 else
1830 error = EINVAL; /* Skip this inp. */
1831 } else if (ssk->socket)
1832 error = cr_canseesocket(req->td->td_ucred,
1833 ssk->socket);
1834 else
1835 error = EINVAL;
1836 if (error) {
1837 error = 0;
1838 goto next;
1839 }
1840
1841 bzero(&xt, sizeof(xt));
1842 xt.xt_len = sizeof xt;
1843 xt.xt_inp.inp_gencnt = 0;
1844 xt.xt_inp.inp_vflag = INP_IPV4;
1845 memcpy(&xt.xt_inp.inp_laddr, &ssk->laddr, sizeof(ssk->laddr));
1846 xt.xt_inp.inp_lport = ssk->lport;
1847 memcpy(&xt.xt_inp.inp_faddr, &ssk->faddr, sizeof(ssk->faddr));
1848 xt.xt_inp.inp_fport = ssk->fport;
1849 xt.xt_tp.t_state = ssk->state;
1850 if (ssk->socket != NULL)
1851 sotoxsocket(ssk->socket, &xt.xt_socket);
1852 else
1853 bzero(&xt.xt_socket, sizeof xt.xt_socket);
1854 xt.xt_socket.xso_protocol = IPPROTO_TCP;
1855 SDP_RUNLOCK(ssk);
1856 error = SYSCTL_OUT(req, &xt, sizeof xt);
1857 if (error)
1858 break;
1859 i++;
1860 continue;
1861 next:
1862 SDP_RUNLOCK(ssk);
1863 }
1864 if (!error) {
1865 /*
1866 * Give the user an updated idea of our state.
1867 * If the generation differs from what we told
1868 * her before, she knows that something happened
1869 * while we were processing this request, and it
1870 * might be necessary to retry.
1871 */
1872 xig.xig_gen = 0;
1873 xig.xig_sogen = so_gencnt;
1874 xig.xig_count = sdp_count;
1875 error = SYSCTL_OUT(req, &xig, sizeof xig);
1876 }
1877 SDP_LIST_RUNLOCK();
1878 return (error);
1879 }
1880
1881 static SYSCTL_NODE(_net_inet, -1, sdp, CTLFLAG_RW, 0, "SDP");
1882
1883 SYSCTL_PROC(_net_inet_sdp, TCPCTL_PCBLIST, pcblist,
1884 CTLFLAG_RD | CTLTYPE_STRUCT, 0, 0, sdp_pcblist, "S,xtcpcb",
1885 "List of active SDP connections");
1886
1887 static void
sdp_zone_change(void * tag)1888 sdp_zone_change(void *tag)
1889 {
1890
1891 uma_zone_set_max(sdp_zone, maxsockets);
1892 }
1893
1894 static void
sdp_init(void)1895 sdp_init(void)
1896 {
1897
1898 LIST_INIT(&sdp_list);
1899 sdp_zone = uma_zcreate("sdp_sock", sizeof(struct sdp_sock),
1900 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1901 uma_zone_set_max(sdp_zone, maxsockets);
1902 EVENTHANDLER_REGISTER(maxsockets_change, sdp_zone_change, NULL,
1903 EVENTHANDLER_PRI_ANY);
1904 rx_comp_wq = create_singlethread_workqueue("rx_comp_wq");
1905 ib_register_client(&sdp_client);
1906 }
1907
1908 extern struct domain sdpdomain;
1909
1910 struct pr_usrreqs sdp_usrreqs = {
1911 .pru_abort = sdp_abort,
1912 .pru_accept = sdp_accept,
1913 .pru_attach = sdp_attach,
1914 .pru_bind = sdp_bind,
1915 .pru_connect = sdp_connect,
1916 .pru_control = sdp_control,
1917 .pru_detach = sdp_detach,
1918 .pru_disconnect = sdp_disconnect,
1919 .pru_listen = sdp_listen,
1920 .pru_peeraddr = sdp_getpeeraddr,
1921 .pru_rcvoob = sdp_rcvoob,
1922 .pru_send = sdp_send,
1923 .pru_sosend = sdp_sosend,
1924 .pru_soreceive = sdp_sorecv,
1925 .pru_shutdown = sdp_shutdown,
1926 .pru_sockaddr = sdp_getsockaddr,
1927 .pru_close = sdp_close,
1928 };
1929
1930 struct protosw sdpsw[] = {
1931 {
1932 .pr_type = SOCK_STREAM,
1933 .pr_domain = &sdpdomain,
1934 .pr_protocol = IPPROTO_IP,
1935 .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,
1936 .pr_ctlinput = sdp_ctlinput,
1937 .pr_ctloutput = sdp_ctloutput,
1938 .pr_usrreqs = &sdp_usrreqs
1939 },
1940 {
1941 .pr_type = SOCK_STREAM,
1942 .pr_domain = &sdpdomain,
1943 .pr_protocol = IPPROTO_TCP,
1944 .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,
1945 .pr_ctlinput = sdp_ctlinput,
1946 .pr_ctloutput = sdp_ctloutput,
1947 .pr_usrreqs = &sdp_usrreqs
1948 },
1949 };
1950
1951 struct domain sdpdomain = {
1952 .dom_family = AF_INET_SDP,
1953 .dom_name = "SDP",
1954 .dom_init = sdp_init,
1955 .dom_protosw = sdpsw,
1956 .dom_protoswNPROTOSW = &sdpsw[sizeof(sdpsw)/sizeof(sdpsw[0])],
1957 };
1958
1959 DOMAIN_SET(sdp);
1960
1961 int sdp_debug_level = 1;
1962 int sdp_data_debug_level = 0;
1963