1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2012 The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * This software was developed by Edward Tomasz Napierala under sponsorship
8 * from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 */
32
33 /*
34 * Software implementation of iSCSI Common Layer kobj(9) interface.
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD: stable/12/sys/dev/iscsi/icl_soft.c 372306 2022-07-29 17:11:04Z dim $");
39
40 #include <sys/param.h>
41 #include <sys/capsicum.h>
42 #include <sys/condvar.h>
43 #include <sys/conf.h>
44 #include <sys/gsb_crc32.h>
45 #include <sys/file.h>
46 #include <sys/kernel.h>
47 #include <sys/kthread.h>
48 #include <sys/lock.h>
49 #include <sys/mbuf.h>
50 #include <sys/mutex.h>
51 #include <sys/module.h>
52 #include <sys/protosw.h>
53 #include <sys/socket.h>
54 #include <sys/socketvar.h>
55 #include <sys/sysctl.h>
56 #include <sys/systm.h>
57 #include <sys/sx.h>
58 #include <sys/uio.h>
59 #include <vm/uma.h>
60 #include <netinet/in.h>
61 #include <netinet/tcp.h>
62
63 #include <dev/iscsi/icl.h>
64 #include <dev/iscsi/iscsi_proto.h>
65 #include <icl_conn_if.h>
66
67 SYSCTL_NODE(_kern_icl, OID_AUTO, soft, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
68 "Software iSCSI");
69 static int coalesce = 1;
70 SYSCTL_INT(_kern_icl_soft, OID_AUTO, coalesce, CTLFLAG_RWTUN,
71 &coalesce, 0, "Try to coalesce PDUs before sending");
72 static int partial_receive_len = 256 * 1024;
73 SYSCTL_INT(_kern_icl_soft, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
74 &partial_receive_len, 0, "Minimum read size for partially received "
75 "data segment");
76 static int max_data_segment_length = 256 * 1024;
77 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_data_segment_length, CTLFLAG_RWTUN,
78 &max_data_segment_length, 0, "Maximum data segment length");
79 static int first_burst_length = 1024 * 1024;
80 SYSCTL_INT(_kern_icl_soft, OID_AUTO, first_burst_length, CTLFLAG_RWTUN,
81 &first_burst_length, 0, "First burst length");
82 static int max_burst_length = 1024 * 1024;
83 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_burst_length, CTLFLAG_RWTUN,
84 &max_burst_length, 0, "Maximum burst length");
85 static int sendspace = 1536 * 1024;
86 SYSCTL_INT(_kern_icl_soft, OID_AUTO, sendspace, CTLFLAG_RWTUN,
87 &sendspace, 0, "Default send socket buffer size");
88 static int recvspace = 1536 * 1024;
89 SYSCTL_INT(_kern_icl_soft, OID_AUTO, recvspace, CTLFLAG_RWTUN,
90 &recvspace, 0, "Default receive socket buffer size");
91
92 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend");
93 static uma_zone_t icl_pdu_zone;
94
95 static volatile u_int icl_ncons;
96
97 #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock)
98 #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock)
99 #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED)
100 #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED)
101
102 STAILQ_HEAD(icl_pdu_stailq, icl_pdu);
103
104 static icl_conn_new_pdu_t icl_soft_conn_new_pdu;
105 static icl_conn_pdu_free_t icl_soft_conn_pdu_free;
106 static icl_conn_pdu_data_segment_length_t
107 icl_soft_conn_pdu_data_segment_length;
108 static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data;
109 static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data;
110 static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue;
111 static icl_conn_handoff_t icl_soft_conn_handoff;
112 static icl_conn_free_t icl_soft_conn_free;
113 static icl_conn_close_t icl_soft_conn_close;
114 static icl_conn_task_setup_t icl_soft_conn_task_setup;
115 static icl_conn_task_done_t icl_soft_conn_task_done;
116 static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup;
117 static icl_conn_transfer_done_t icl_soft_conn_transfer_done;
118 #ifdef ICL_KERNEL_PROXY
119 static icl_conn_connect_t icl_soft_conn_connect;
120 #endif
121
122 static kobj_method_t icl_soft_methods[] = {
123 KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu),
124 KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free),
125 KOBJMETHOD(icl_conn_pdu_data_segment_length,
126 icl_soft_conn_pdu_data_segment_length),
127 KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data),
128 KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data),
129 KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue),
130 KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff),
131 KOBJMETHOD(icl_conn_free, icl_soft_conn_free),
132 KOBJMETHOD(icl_conn_close, icl_soft_conn_close),
133 KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup),
134 KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done),
135 KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup),
136 KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done),
137 #ifdef ICL_KERNEL_PROXY
138 KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect),
139 #endif
140 { 0, 0 }
141 };
142
143 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn));
144
145 static void
icl_conn_fail(struct icl_conn * ic)146 icl_conn_fail(struct icl_conn *ic)
147 {
148 if (ic->ic_socket == NULL)
149 return;
150
151 /*
152 * XXX
153 */
154 ic->ic_socket->so_error = EDOOFUS;
155 (ic->ic_error)(ic);
156 }
157
158 static void
icl_soft_conn_pdu_free(struct icl_conn * ic,struct icl_pdu * ip)159 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
160 {
161
162 m_freem(ip->ip_bhs_mbuf);
163 m_freem(ip->ip_ahs_mbuf);
164 m_freem(ip->ip_data_mbuf);
165 uma_zfree(icl_pdu_zone, ip);
166 #ifdef DIAGNOSTIC
167 refcount_release(&ic->ic_outstanding_pdus);
168 #endif
169 }
170
171 /*
172 * Allocate icl_pdu with empty BHS to fill up by the caller.
173 */
174 struct icl_pdu *
icl_soft_conn_new_pdu(struct icl_conn * ic,int flags)175 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags)
176 {
177 struct icl_pdu *ip;
178
179 #ifdef DIAGNOSTIC
180 refcount_acquire(&ic->ic_outstanding_pdus);
181 #endif
182 ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
183 if (ip == NULL) {
184 ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
185 #ifdef DIAGNOSTIC
186 refcount_release(&ic->ic_outstanding_pdus);
187 #endif
188 return (NULL);
189 }
190 ip->ip_conn = ic;
191
192 CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN);
193 ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA);
194 if (ip->ip_bhs_mbuf == NULL) {
195 ICL_WARN("failed to allocate BHS mbuf");
196 icl_soft_conn_pdu_free(ic, ip);
197 return (NULL);
198 }
199 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
200 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
201 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
202
203 return (ip);
204 }
205
206 static int
icl_pdu_ahs_length(const struct icl_pdu * request)207 icl_pdu_ahs_length(const struct icl_pdu *request)
208 {
209
210 return (request->ip_bhs->bhs_total_ahs_len * 4);
211 }
212
213 static size_t
icl_pdu_data_segment_length(const struct icl_pdu * request)214 icl_pdu_data_segment_length(const struct icl_pdu *request)
215 {
216 uint32_t len = 0;
217
218 len += request->ip_bhs->bhs_data_segment_len[0];
219 len <<= 8;
220 len += request->ip_bhs->bhs_data_segment_len[1];
221 len <<= 8;
222 len += request->ip_bhs->bhs_data_segment_len[2];
223
224 return (len);
225 }
226
227 size_t
icl_soft_conn_pdu_data_segment_length(struct icl_conn * ic,const struct icl_pdu * request)228 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic,
229 const struct icl_pdu *request)
230 {
231
232 return (icl_pdu_data_segment_length(request));
233 }
234
235 static void
icl_pdu_set_data_segment_length(struct icl_pdu * response,uint32_t len)236 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
237 {
238
239 response->ip_bhs->bhs_data_segment_len[2] = len;
240 response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
241 response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
242 }
243
244 static size_t
icl_pdu_padding(const struct icl_pdu * ip)245 icl_pdu_padding(const struct icl_pdu *ip)
246 {
247
248 if ((ip->ip_data_len % 4) != 0)
249 return (4 - (ip->ip_data_len % 4));
250
251 return (0);
252 }
253
254 static size_t
icl_pdu_size(const struct icl_pdu * response)255 icl_pdu_size(const struct icl_pdu *response)
256 {
257 size_t len;
258
259 KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
260
261 len = sizeof(struct iscsi_bhs) + response->ip_data_len +
262 icl_pdu_padding(response);
263 if (response->ip_conn->ic_header_crc32c)
264 len += ISCSI_HEADER_DIGEST_SIZE;
265 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c)
266 len += ISCSI_DATA_DIGEST_SIZE;
267
268 return (len);
269 }
270
271 static void
icl_soft_receive_buf(struct mbuf ** r,size_t * rs,void * buf,size_t s)272 icl_soft_receive_buf(struct mbuf **r, size_t *rs, void *buf, size_t s)
273 {
274
275 m_copydata(*r, 0, s, buf);
276 m_adj(*r, s);
277 while ((*r) != NULL && (*r)->m_len == 0)
278 *r = m_free(*r);
279 *rs -= s;
280 }
281
282 static void
icl_pdu_receive_ahs(struct icl_pdu * request,struct mbuf ** r,size_t * rs)283 icl_pdu_receive_ahs(struct icl_pdu *request, struct mbuf **r, size_t *rs)
284 {
285
286 request->ip_ahs_len = icl_pdu_ahs_length(request);
287 if (request->ip_ahs_len == 0)
288 return;
289
290 request->ip_ahs_mbuf = *r;
291 *r = m_split(request->ip_ahs_mbuf, request->ip_ahs_len, M_WAITOK);
292 *rs -= request->ip_ahs_len;
293 }
294
295 static uint32_t
icl_mbuf_to_crc32c(const struct mbuf * m0)296 icl_mbuf_to_crc32c(const struct mbuf *m0)
297 {
298 uint32_t digest = 0xffffffff;
299 const struct mbuf *m;
300
301 for (m = m0; m != NULL; m = m->m_next)
302 digest = calculate_crc32c(digest,
303 mtod(m, const void *), m->m_len);
304
305 digest = digest ^ 0xffffffff;
306
307 return (digest);
308 }
309
310 static int
icl_pdu_check_header_digest(struct icl_pdu * request,struct mbuf ** r,size_t * rs)311 icl_pdu_check_header_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs)
312 {
313 uint32_t received_digest, valid_digest;
314
315 if (request->ip_conn->ic_header_crc32c == false)
316 return (0);
317
318 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE);
319 icl_soft_receive_buf(r, rs, &received_digest, ISCSI_HEADER_DIGEST_SIZE);
320
321 /* Temporary attach AHS to BHS to calculate header digest. */
322 request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf;
323 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
324 request->ip_bhs_mbuf->m_next = NULL;
325 if (received_digest != valid_digest) {
326 ICL_WARN("header digest check failed; got 0x%x, "
327 "should be 0x%x", received_digest, valid_digest);
328 return (-1);
329 }
330
331 return (0);
332 }
333
334 /*
335 * Return the number of bytes that should be waiting in the receive socket
336 * before icl_pdu_receive_data_segment() gets called.
337 */
338 static size_t
icl_pdu_data_segment_receive_len(const struct icl_pdu * request)339 icl_pdu_data_segment_receive_len(const struct icl_pdu *request)
340 {
341 size_t len;
342
343 len = icl_pdu_data_segment_length(request);
344 if (len == 0)
345 return (0);
346
347 /*
348 * Account for the parts of data segment already read from
349 * the socket buffer.
350 */
351 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
352 len -= request->ip_data_len;
353
354 /*
355 * Don't always wait for the full data segment to be delivered
356 * to the socket; this might badly affect performance due to
357 * TCP window scaling.
358 */
359 if (len > partial_receive_len) {
360 #if 0
361 ICL_DEBUG("need %zd bytes of data, limiting to %zd",
362 len, partial_receive_len));
363 #endif
364 len = partial_receive_len;
365
366 return (len);
367 }
368
369 /*
370 * Account for padding. Note that due to the way code is written,
371 * the icl_pdu_receive_data_segment() must always receive padding
372 * along with the last part of data segment, because it would be
373 * impossible to tell whether we've already received the full data
374 * segment including padding, or without it.
375 */
376 if ((len % 4) != 0)
377 len += 4 - (len % 4);
378
379 #if 0
380 ICL_DEBUG("need %zd bytes of data", len));
381 #endif
382
383 return (len);
384 }
385
386 static int
icl_pdu_receive_data_segment(struct icl_pdu * request,struct mbuf ** r,size_t * rs,bool * more_neededp)387 icl_pdu_receive_data_segment(struct icl_pdu *request, struct mbuf **r,
388 size_t *rs, bool *more_neededp)
389 {
390 struct icl_conn *ic;
391 size_t len, padding = 0;
392 struct mbuf *m;
393
394 ic = request->ip_conn;
395
396 *more_neededp = false;
397 ic->ic_receive_len = 0;
398
399 len = icl_pdu_data_segment_length(request);
400 if (len == 0)
401 return (0);
402
403 if ((len % 4) != 0)
404 padding = 4 - (len % 4);
405
406 /*
407 * Account for already received parts of data segment.
408 */
409 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
410 len -= request->ip_data_len;
411
412 if (len + padding > *rs) {
413 /*
414 * Not enough data in the socket buffer. Receive as much
415 * as we can. Don't receive padding, since, obviously, it's
416 * not the end of data segment yet.
417 */
418 #if 0
419 ICL_DEBUG("limited from %zd to %zd",
420 len + padding, *rs - padding));
421 #endif
422 len = *rs - padding;
423 *more_neededp = true;
424 padding = 0;
425 }
426
427 /*
428 * Must not try to receive padding without at least one byte
429 * of actual data segment.
430 */
431 if (len > 0) {
432 m = *r;
433 *r = m_split(m, len + padding, M_WAITOK);
434 *rs -= len + padding;
435
436 if (request->ip_data_mbuf == NULL)
437 request->ip_data_mbuf = m;
438 else
439 m_cat(request->ip_data_mbuf, m);
440
441 request->ip_data_len += len;
442 } else
443 ICL_DEBUG("len 0");
444
445 if (*more_neededp)
446 ic->ic_receive_len =
447 icl_pdu_data_segment_receive_len(request);
448
449 return (0);
450 }
451
452 static int
icl_pdu_check_data_digest(struct icl_pdu * request,struct mbuf ** r,size_t * rs)453 icl_pdu_check_data_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs)
454 {
455 uint32_t received_digest, valid_digest;
456
457 if (request->ip_conn->ic_data_crc32c == false)
458 return (0);
459
460 if (request->ip_data_len == 0)
461 return (0);
462
463 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE);
464 icl_soft_receive_buf(r, rs, &received_digest, ISCSI_DATA_DIGEST_SIZE);
465
466 /*
467 * Note that ip_data_mbuf also contains padding; since digest
468 * calculation is supposed to include that, we iterate over
469 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it.
470 */
471 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
472 if (received_digest != valid_digest) {
473 ICL_WARN("data digest check failed; got 0x%x, "
474 "should be 0x%x", received_digest, valid_digest);
475 return (-1);
476 }
477
478 return (0);
479 }
480
481 /*
482 * Somewhat contrary to the name, this attempts to receive only one
483 * "part" of PDU at a time; call it repeatedly until it returns non-NULL.
484 */
485 static struct icl_pdu *
icl_conn_receive_pdu(struct icl_conn * ic,struct mbuf ** r,size_t * rs)486 icl_conn_receive_pdu(struct icl_conn *ic, struct mbuf **r, size_t *rs)
487 {
488 struct icl_pdu *request;
489 size_t len;
490 int error = 0;
491 bool more_needed;
492
493 if (ic->ic_receive_state == ICL_CONN_STATE_BHS) {
494 KASSERT(ic->ic_receive_pdu == NULL,
495 ("ic->ic_receive_pdu != NULL"));
496 request = icl_soft_conn_new_pdu(ic, M_NOWAIT);
497 if (request == NULL) {
498 ICL_DEBUG("failed to allocate PDU; "
499 "dropping connection");
500 icl_conn_fail(ic);
501 return (NULL);
502 }
503 ic->ic_receive_pdu = request;
504 } else {
505 KASSERT(ic->ic_receive_pdu != NULL,
506 ("ic->ic_receive_pdu == NULL"));
507 request = ic->ic_receive_pdu;
508 }
509
510 switch (ic->ic_receive_state) {
511 case ICL_CONN_STATE_BHS:
512 //ICL_DEBUG("receiving BHS");
513 icl_soft_receive_buf(r, rs, request->ip_bhs,
514 sizeof(struct iscsi_bhs));
515
516 /*
517 * We don't enforce any limit for AHS length;
518 * its length is stored in 8 bit field.
519 */
520
521 len = icl_pdu_data_segment_length(request);
522 if (len > ic->ic_max_data_segment_length) {
523 ICL_WARN("received data segment "
524 "length %zd is larger than negotiated; "
525 "dropping connection", len);
526 error = EINVAL;
527 break;
528 }
529
530 ic->ic_receive_state = ICL_CONN_STATE_AHS;
531 ic->ic_receive_len = icl_pdu_ahs_length(request);
532 break;
533
534 case ICL_CONN_STATE_AHS:
535 //ICL_DEBUG("receiving AHS");
536 icl_pdu_receive_ahs(request, r, rs);
537 ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST;
538 if (ic->ic_header_crc32c == false)
539 ic->ic_receive_len = 0;
540 else
541 ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE;
542 break;
543
544 case ICL_CONN_STATE_HEADER_DIGEST:
545 //ICL_DEBUG("receiving header digest");
546 error = icl_pdu_check_header_digest(request, r, rs);
547 if (error != 0) {
548 ICL_DEBUG("header digest failed; "
549 "dropping connection");
550 break;
551 }
552
553 ic->ic_receive_state = ICL_CONN_STATE_DATA;
554 ic->ic_receive_len =
555 icl_pdu_data_segment_receive_len(request);
556 break;
557
558 case ICL_CONN_STATE_DATA:
559 //ICL_DEBUG("receiving data segment");
560 error = icl_pdu_receive_data_segment(request, r, rs,
561 &more_needed);
562 if (error != 0) {
563 ICL_DEBUG("failed to receive data segment;"
564 "dropping connection");
565 break;
566 }
567
568 if (more_needed)
569 break;
570
571 ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST;
572 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false)
573 ic->ic_receive_len = 0;
574 else
575 ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE;
576 break;
577
578 case ICL_CONN_STATE_DATA_DIGEST:
579 //ICL_DEBUG("receiving data digest");
580 error = icl_pdu_check_data_digest(request, r, rs);
581 if (error != 0) {
582 ICL_DEBUG("data digest failed; "
583 "dropping connection");
584 break;
585 }
586
587 /*
588 * We've received complete PDU; reset the receive state machine
589 * and return the PDU.
590 */
591 ic->ic_receive_state = ICL_CONN_STATE_BHS;
592 ic->ic_receive_len = sizeof(struct iscsi_bhs);
593 ic->ic_receive_pdu = NULL;
594 return (request);
595
596 default:
597 panic("invalid ic_receive_state %d\n", ic->ic_receive_state);
598 }
599
600 if (error != 0) {
601 /*
602 * Don't free the PDU; it's pointed to by ic->ic_receive_pdu
603 * and will get freed in icl_soft_conn_close().
604 */
605 icl_conn_fail(ic);
606 }
607
608 return (NULL);
609 }
610
611 static void
icl_conn_receive_pdus(struct icl_conn * ic,struct mbuf ** r,size_t * rs)612 icl_conn_receive_pdus(struct icl_conn *ic, struct mbuf **r, size_t *rs)
613 {
614 struct icl_pdu *response;
615
616 for (;;) {
617 if (ic->ic_disconnecting)
618 return;
619
620 /*
621 * Loop until we have a complete PDU or there is not enough
622 * data in the socket buffer.
623 */
624 if (*rs < ic->ic_receive_len) {
625 #if 0
626 ICL_DEBUG("not enough data; have %zd, need %zd",
627 *rs, ic->ic_receive_len);
628 #endif
629 return;
630 }
631
632 response = icl_conn_receive_pdu(ic, r, rs);
633 if (response == NULL)
634 continue;
635
636 if (response->ip_ahs_len > 0) {
637 ICL_WARN("received PDU with unsupported "
638 "AHS; opcode 0x%x; dropping connection",
639 response->ip_bhs->bhs_opcode);
640 icl_soft_conn_pdu_free(ic, response);
641 icl_conn_fail(ic);
642 return;
643 }
644
645 (ic->ic_receive)(response);
646 }
647 }
648
649 static void
icl_receive_thread(void * arg)650 icl_receive_thread(void *arg)
651 {
652 struct icl_conn *ic;
653 size_t available, read = 0;
654 struct socket *so;
655 struct mbuf *m, *r = NULL;
656 struct uio uio;
657 int error, flags;
658
659 ic = arg;
660 so = ic->ic_socket;
661
662 for (;;) {
663 SOCKBUF_LOCK(&so->so_rcv);
664 if (ic->ic_disconnecting) {
665 SOCKBUF_UNLOCK(&so->so_rcv);
666 break;
667 }
668
669 /*
670 * Set the low watermark, to be checked by
671 * soreadable() in icl_soupcall_receive()
672 * to avoid unnecessary wakeups until there
673 * is enough data received to read the PDU.
674 */
675 available = sbavail(&so->so_rcv);
676 if (read + available < ic->ic_receive_len) {
677 so->so_rcv.sb_lowat = ic->ic_receive_len - read;
678 cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx);
679 so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1;
680 available = sbavail(&so->so_rcv);
681 }
682 SOCKBUF_UNLOCK(&so->so_rcv);
683
684 if (available == 0) {
685 if (so->so_error != 0) {
686 ICL_DEBUG("connection error %d; "
687 "dropping connection", so->so_error);
688 icl_conn_fail(ic);
689 break;
690 }
691 continue;
692 }
693
694 memset(&uio, 0, sizeof(uio));
695 uio.uio_resid = available;
696 flags = MSG_DONTWAIT;
697 error = soreceive(so, NULL, &uio, &m, NULL, &flags);
698 if (error != 0) {
699 ICL_DEBUG("soreceive error %d", error);
700 break;
701 }
702 if (uio.uio_resid != 0) {
703 m_freem(m);
704 ICL_DEBUG("short read");
705 break;
706 }
707 if (r)
708 m_cat(r, m);
709 else
710 r = m;
711 read += available;
712
713 icl_conn_receive_pdus(ic, &r, &read);
714 }
715
716 if (r)
717 m_freem(r);
718
719 ICL_CONN_LOCK(ic);
720 ic->ic_receive_running = false;
721 cv_signal(&ic->ic_send_cv);
722 ICL_CONN_UNLOCK(ic);
723 kthread_exit();
724 }
725
726 static int
icl_soupcall_receive(struct socket * so,void * arg,int waitflag)727 icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
728 {
729 struct icl_conn *ic;
730
731 if (!soreadable(so))
732 return (SU_OK);
733
734 ic = arg;
735 cv_signal(&ic->ic_receive_cv);
736 return (SU_OK);
737 }
738
739 static int
icl_pdu_finalize(struct icl_pdu * request)740 icl_pdu_finalize(struct icl_pdu *request)
741 {
742 size_t padding, pdu_len;
743 uint32_t digest, zero = 0;
744 int ok;
745 struct icl_conn *ic;
746
747 ic = request->ip_conn;
748
749 icl_pdu_set_data_segment_length(request, request->ip_data_len);
750
751 pdu_len = icl_pdu_size(request);
752
753 if (ic->ic_header_crc32c) {
754 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
755 ok = m_append(request->ip_bhs_mbuf, sizeof(digest),
756 (void *)&digest);
757 if (ok != 1) {
758 ICL_WARN("failed to append header digest");
759 return (1);
760 }
761 }
762
763 if (request->ip_data_len != 0) {
764 padding = icl_pdu_padding(request);
765 if (padding > 0) {
766 ok = m_append(request->ip_data_mbuf, padding,
767 (void *)&zero);
768 if (ok != 1) {
769 ICL_WARN("failed to append padding");
770 return (1);
771 }
772 }
773
774 if (ic->ic_data_crc32c) {
775 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
776
777 ok = m_append(request->ip_data_mbuf, sizeof(digest),
778 (void *)&digest);
779 if (ok != 1) {
780 ICL_WARN("failed to append data digest");
781 return (1);
782 }
783 }
784
785 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
786 request->ip_data_mbuf = NULL;
787 }
788
789 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
790
791 return (0);
792 }
793
794 static void
icl_conn_send_pdus(struct icl_conn * ic,struct icl_pdu_stailq * queue)795 icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue)
796 {
797 struct icl_pdu *request, *request2;
798 struct mbuf *m;
799 struct socket *so;
800 long available, size, size2;
801 #ifdef DEBUG_COALESCED
802 int coalesced;
803 #endif
804 int error;
805
806 ICL_CONN_LOCK_ASSERT_NOT(ic);
807
808 so = ic->ic_socket;
809
810 SOCKBUF_LOCK(&so->so_snd);
811 /*
812 * Check how much space do we have for transmit. We can't just
813 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE,
814 * as it always frees the mbuf chain passed to it, even in case
815 * of error.
816 */
817 available = sbspace(&so->so_snd);
818 ic->ic_check_send_space = false;
819
820 /*
821 * Notify the socket upcall that we don't need wakeups
822 * for the time being.
823 */
824 so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
825 SOCKBUF_UNLOCK(&so->so_snd);
826
827 while (!STAILQ_EMPTY(queue)) {
828 request = STAILQ_FIRST(queue);
829 size = icl_pdu_size(request);
830 if (available < size) {
831
832 /*
833 * Set the low watermark, to be checked by
834 * sowriteable() in icl_soupcall_send()
835 * to avoid unnecessary wakeups until there
836 * is enough space for the PDU to fit.
837 */
838 SOCKBUF_LOCK(&so->so_snd);
839 available = sbspace(&so->so_snd);
840 if (available < size) {
841 #if 1
842 ICL_DEBUG("no space to send; "
843 "have %ld, need %ld",
844 available, size);
845 #endif
846 so->so_snd.sb_lowat = max(size,
847 so->so_snd.sb_hiwat / 8);
848 SOCKBUF_UNLOCK(&so->so_snd);
849 return;
850 }
851 SOCKBUF_UNLOCK(&so->so_snd);
852 }
853 STAILQ_REMOVE_HEAD(queue, ip_next);
854 error = icl_pdu_finalize(request);
855 if (error != 0) {
856 ICL_DEBUG("failed to finalize PDU; "
857 "dropping connection");
858 icl_soft_conn_pdu_free(ic, request);
859 icl_conn_fail(ic);
860 return;
861 }
862 if (coalesce) {
863 m = request->ip_bhs_mbuf;
864 for (
865 #ifdef DEBUG_COALESCED
866 coalesced = 1
867 #endif
868 ; ;
869 #ifdef DEBUG_COALESCED
870 coalesced++
871 #endif
872 ) {
873 request2 = STAILQ_FIRST(queue);
874 if (request2 == NULL)
875 break;
876 size2 = icl_pdu_size(request2);
877 if (available < size + size2)
878 break;
879 STAILQ_REMOVE_HEAD(queue, ip_next);
880 error = icl_pdu_finalize(request2);
881 if (error != 0) {
882 ICL_DEBUG("failed to finalize PDU; "
883 "dropping connection");
884 icl_soft_conn_pdu_free(ic, request);
885 icl_soft_conn_pdu_free(ic, request2);
886 icl_conn_fail(ic);
887 return;
888 }
889 while (m->m_next)
890 m = m->m_next;
891 m_cat(m, request2->ip_bhs_mbuf);
892 request2->ip_bhs_mbuf = NULL;
893 request->ip_bhs_mbuf->m_pkthdr.len += size2;
894 size += size2;
895 icl_soft_conn_pdu_free(ic, request2);
896 }
897 #ifdef DEBUG_COALESCED
898 if (coalesced > 1) {
899 ICL_DEBUG("coalesced %d PDUs into %ld bytes",
900 coalesced, size);
901 }
902 #endif
903 }
904 available -= size;
905 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
906 NULL, MSG_DONTWAIT, curthread);
907 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
908 if (error != 0) {
909 ICL_DEBUG("failed to send PDU, error %d; "
910 "dropping connection", error);
911 icl_soft_conn_pdu_free(ic, request);
912 icl_conn_fail(ic);
913 return;
914 }
915 icl_soft_conn_pdu_free(ic, request);
916 }
917 }
918
919 static void
icl_send_thread(void * arg)920 icl_send_thread(void *arg)
921 {
922 struct icl_conn *ic;
923 struct icl_pdu_stailq queue;
924
925 ic = arg;
926
927 STAILQ_INIT(&queue);
928
929 ICL_CONN_LOCK(ic);
930 for (;;) {
931 for (;;) {
932 /*
933 * Populate the local queue from the main one.
934 * This way the icl_conn_send_pdus() can go through
935 * all the queued PDUs without holding any locks.
936 */
937 if (STAILQ_EMPTY(&queue) || ic->ic_check_send_space)
938 STAILQ_CONCAT(&queue, &ic->ic_to_send);
939
940 ICL_CONN_UNLOCK(ic);
941 icl_conn_send_pdus(ic, &queue);
942 ICL_CONN_LOCK(ic);
943
944 /*
945 * The icl_soupcall_send() was called since the last
946 * call to sbspace(); go around;
947 */
948 if (ic->ic_check_send_space)
949 continue;
950
951 /*
952 * Local queue is empty, but we still have PDUs
953 * in the main one; go around.
954 */
955 if (STAILQ_EMPTY(&queue) &&
956 !STAILQ_EMPTY(&ic->ic_to_send))
957 continue;
958
959 /*
960 * There might be some stuff in the local queue,
961 * which didn't get sent due to not having enough send
962 * space. Wait for socket upcall.
963 */
964 break;
965 }
966
967 if (ic->ic_disconnecting) {
968 //ICL_DEBUG("terminating");
969 break;
970 }
971
972 cv_wait(&ic->ic_send_cv, ic->ic_lock);
973 }
974
975 /*
976 * We're exiting; move PDUs back to the main queue, so they can
977 * get freed properly. At this point ordering doesn't matter.
978 */
979 STAILQ_CONCAT(&ic->ic_to_send, &queue);
980
981 ic->ic_send_running = false;
982 cv_signal(&ic->ic_send_cv);
983 ICL_CONN_UNLOCK(ic);
984 kthread_exit();
985 }
986
987 static int
icl_soupcall_send(struct socket * so,void * arg,int waitflag)988 icl_soupcall_send(struct socket *so, void *arg, int waitflag)
989 {
990 struct icl_conn *ic;
991
992 if (!sowriteable(so))
993 return (SU_OK);
994
995 ic = arg;
996
997 ICL_CONN_LOCK(ic);
998 ic->ic_check_send_space = true;
999 ICL_CONN_UNLOCK(ic);
1000
1001 cv_signal(&ic->ic_send_cv);
1002
1003 return (SU_OK);
1004 }
1005
1006 static int
icl_soft_conn_pdu_append_data(struct icl_conn * ic,struct icl_pdu * request,const void * addr,size_t len,int flags)1007 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
1008 const void *addr, size_t len, int flags)
1009 {
1010 struct mbuf *mb, *newmb;
1011 size_t copylen, off = 0;
1012
1013 KASSERT(len > 0, ("len == 0"));
1014
1015 newmb = m_getm2(NULL, len, flags, MT_DATA, 0);
1016 if (newmb == NULL) {
1017 ICL_WARN("failed to allocate mbuf for %zd bytes", len);
1018 return (ENOMEM);
1019 }
1020
1021 for (mb = newmb; mb != NULL; mb = mb->m_next) {
1022 copylen = min(M_TRAILINGSPACE(mb), len - off);
1023 memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
1024 mb->m_len = copylen;
1025 off += copylen;
1026 }
1027 KASSERT(off == len, ("%s: off != len", __func__));
1028
1029 if (request->ip_data_mbuf == NULL) {
1030 request->ip_data_mbuf = newmb;
1031 request->ip_data_len = len;
1032 } else {
1033 m_cat(request->ip_data_mbuf, newmb);
1034 request->ip_data_len += len;
1035 }
1036
1037 return (0);
1038 }
1039
1040 void
icl_soft_conn_pdu_get_data(struct icl_conn * ic,struct icl_pdu * ip,size_t off,void * addr,size_t len)1041 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
1042 size_t off, void *addr, size_t len)
1043 {
1044
1045 m_copydata(ip->ip_data_mbuf, off, len, addr);
1046 }
1047
1048 static void
icl_pdu_queue(struct icl_pdu * ip)1049 icl_pdu_queue(struct icl_pdu *ip)
1050 {
1051 struct icl_conn *ic;
1052
1053 ic = ip->ip_conn;
1054
1055 ICL_CONN_LOCK_ASSERT(ic);
1056
1057 if (ic->ic_disconnecting || ic->ic_socket == NULL) {
1058 ICL_DEBUG("icl_pdu_queue on closed connection");
1059 icl_soft_conn_pdu_free(ic, ip);
1060 return;
1061 }
1062
1063 if (!STAILQ_EMPTY(&ic->ic_to_send)) {
1064 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1065 /*
1066 * If the queue is not empty, someone else had already
1067 * signaled the send thread; no need to do that again,
1068 * just return.
1069 */
1070 return;
1071 }
1072
1073 STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1074 cv_signal(&ic->ic_send_cv);
1075 }
1076
1077 void
icl_soft_conn_pdu_queue(struct icl_conn * ic,struct icl_pdu * ip)1078 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
1079 {
1080
1081 icl_pdu_queue(ip);
1082 }
1083
1084 static struct icl_conn *
icl_soft_new_conn(const char * name,struct mtx * lock)1085 icl_soft_new_conn(const char *name, struct mtx *lock)
1086 {
1087 struct icl_conn *ic;
1088
1089 refcount_acquire(&icl_ncons);
1090
1091 ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO);
1092
1093 STAILQ_INIT(&ic->ic_to_send);
1094 ic->ic_lock = lock;
1095 cv_init(&ic->ic_send_cv, "icl_tx");
1096 cv_init(&ic->ic_receive_cv, "icl_rx");
1097 #ifdef DIAGNOSTIC
1098 refcount_init(&ic->ic_outstanding_pdus, 0);
1099 #endif
1100 ic->ic_max_data_segment_length = max_data_segment_length;
1101 ic->ic_name = name;
1102 ic->ic_offload = "None";
1103 ic->ic_unmapped = false;
1104
1105 return (ic);
1106 }
1107
1108 void
icl_soft_conn_free(struct icl_conn * ic)1109 icl_soft_conn_free(struct icl_conn *ic)
1110 {
1111
1112 #ifdef DIAGNOSTIC
1113 KASSERT(ic->ic_outstanding_pdus == 0,
1114 ("destroying session with %d outstanding PDUs",
1115 ic->ic_outstanding_pdus));
1116 #endif
1117 cv_destroy(&ic->ic_send_cv);
1118 cv_destroy(&ic->ic_receive_cv);
1119 kobj_delete((struct kobj *)ic, M_ICL_SOFT);
1120 refcount_release(&icl_ncons);
1121 }
1122
1123 static int
icl_conn_start(struct icl_conn * ic)1124 icl_conn_start(struct icl_conn *ic)
1125 {
1126 size_t minspace;
1127 struct sockopt opt;
1128 int error, one = 1;
1129
1130 ICL_CONN_LOCK(ic);
1131
1132 /*
1133 * XXX: Ugly hack.
1134 */
1135 if (ic->ic_socket == NULL) {
1136 ICL_CONN_UNLOCK(ic);
1137 return (EINVAL);
1138 }
1139
1140 ic->ic_receive_state = ICL_CONN_STATE_BHS;
1141 ic->ic_receive_len = sizeof(struct iscsi_bhs);
1142 ic->ic_disconnecting = false;
1143
1144 ICL_CONN_UNLOCK(ic);
1145
1146 /*
1147 * For sendspace, this is required because the current code cannot
1148 * send a PDU in pieces; thus, the minimum buffer size is equal
1149 * to the maximum PDU size. "+4" is to account for possible padding.
1150 */
1151 minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length +
1152 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
1153 if (sendspace < minspace) {
1154 ICL_WARN("kern.icl.sendspace too low; must be at least %zd",
1155 minspace);
1156 sendspace = minspace;
1157 }
1158 if (recvspace < minspace) {
1159 ICL_WARN("kern.icl.recvspace too low; must be at least %zd",
1160 minspace);
1161 recvspace = minspace;
1162 }
1163
1164 error = soreserve(ic->ic_socket, sendspace, recvspace);
1165 if (error != 0) {
1166 ICL_WARN("soreserve failed with error %d", error);
1167 icl_soft_conn_close(ic);
1168 return (error);
1169 }
1170 ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE;
1171 ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE;
1172
1173 /*
1174 * Disable Nagle.
1175 */
1176 bzero(&opt, sizeof(opt));
1177 opt.sopt_dir = SOPT_SET;
1178 opt.sopt_level = IPPROTO_TCP;
1179 opt.sopt_name = TCP_NODELAY;
1180 opt.sopt_val = &one;
1181 opt.sopt_valsize = sizeof(one);
1182 error = sosetopt(ic->ic_socket, &opt);
1183 if (error != 0) {
1184 ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
1185 icl_soft_conn_close(ic);
1186 return (error);
1187 }
1188
1189 /*
1190 * Register socket upcall, to get notified about incoming PDUs
1191 * and free space to send outgoing ones.
1192 */
1193 SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1194 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic);
1195 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1196 SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1197 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic);
1198 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1199
1200 /*
1201 * Start threads.
1202 */
1203 ICL_CONN_LOCK(ic);
1204 ic->ic_send_running = ic->ic_receive_running = true;
1205 ICL_CONN_UNLOCK(ic);
1206 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx",
1207 ic->ic_name);
1208 if (error != 0) {
1209 ICL_WARN("kthread_add(9) failed with error %d", error);
1210 ICL_CONN_LOCK(ic);
1211 ic->ic_send_running = ic->ic_receive_running = false;
1212 cv_signal(&ic->ic_send_cv);
1213 ICL_CONN_UNLOCK(ic);
1214 icl_soft_conn_close(ic);
1215 return (error);
1216 }
1217 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx",
1218 ic->ic_name);
1219 if (error != 0) {
1220 ICL_WARN("kthread_add(9) failed with error %d", error);
1221 ICL_CONN_LOCK(ic);
1222 ic->ic_receive_running = false;
1223 cv_signal(&ic->ic_send_cv);
1224 ICL_CONN_UNLOCK(ic);
1225 icl_soft_conn_close(ic);
1226 return (error);
1227 }
1228
1229 return (0);
1230 }
1231
1232 int
icl_soft_conn_handoff(struct icl_conn * ic,int fd)1233 icl_soft_conn_handoff(struct icl_conn *ic, int fd)
1234 {
1235 struct file *fp;
1236 struct socket *so;
1237 cap_rights_t rights;
1238 int error;
1239
1240 ICL_CONN_LOCK_ASSERT_NOT(ic);
1241
1242 #ifdef ICL_KERNEL_PROXY
1243 /*
1244 * We're transitioning to Full Feature phase, and we don't
1245 * really care.
1246 */
1247 if (fd == 0) {
1248 ICL_CONN_LOCK(ic);
1249 if (ic->ic_socket == NULL) {
1250 ICL_CONN_UNLOCK(ic);
1251 ICL_WARN("proxy handoff without connect");
1252 return (EINVAL);
1253 }
1254 ICL_CONN_UNLOCK(ic);
1255 return (0);
1256 }
1257 #endif
1258
1259 /*
1260 * Steal the socket from userland.
1261 */
1262 error = fget(curthread, fd,
1263 cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
1264 if (error != 0)
1265 return (error);
1266 if (fp->f_type != DTYPE_SOCKET) {
1267 fdrop(fp, curthread);
1268 return (EINVAL);
1269 }
1270 so = fp->f_data;
1271 if (so->so_type != SOCK_STREAM) {
1272 fdrop(fp, curthread);
1273 return (EINVAL);
1274 }
1275
1276 ICL_CONN_LOCK(ic);
1277
1278 if (ic->ic_socket != NULL) {
1279 ICL_CONN_UNLOCK(ic);
1280 fdrop(fp, curthread);
1281 return (EBUSY);
1282 }
1283
1284 ic->ic_socket = fp->f_data;
1285 fp->f_ops = &badfileops;
1286 fp->f_data = NULL;
1287 fdrop(fp, curthread);
1288 ICL_CONN_UNLOCK(ic);
1289
1290 error = icl_conn_start(ic);
1291
1292 return (error);
1293 }
1294
1295 void
icl_soft_conn_close(struct icl_conn * ic)1296 icl_soft_conn_close(struct icl_conn *ic)
1297 {
1298 struct icl_pdu *pdu;
1299 struct socket *so;
1300
1301 /*
1302 * Wake up the threads, so they can properly terminate.
1303 * Receive thread sleeps on so->so_rcv lock, send on ic->ic_lock.
1304 */
1305 ICL_CONN_LOCK(ic);
1306 if (!ic->ic_disconnecting) {
1307 so = ic->ic_socket;
1308 if (so)
1309 SOCKBUF_LOCK(&so->so_rcv);
1310 ic->ic_disconnecting = true;
1311 if (so)
1312 SOCKBUF_UNLOCK(&so->so_rcv);
1313 }
1314 while (ic->ic_receive_running || ic->ic_send_running) {
1315 cv_signal(&ic->ic_receive_cv);
1316 cv_signal(&ic->ic_send_cv);
1317 cv_wait(&ic->ic_send_cv, ic->ic_lock);
1318 }
1319
1320 /* Some other thread could close the connection same time. */
1321 so = ic->ic_socket;
1322 if (so == NULL) {
1323 ICL_CONN_UNLOCK(ic);
1324 return;
1325 }
1326 ic->ic_socket = NULL;
1327
1328 /*
1329 * Deregister socket upcalls.
1330 */
1331 ICL_CONN_UNLOCK(ic);
1332 SOCKBUF_LOCK(&so->so_snd);
1333 if (so->so_snd.sb_upcall != NULL)
1334 soupcall_clear(so, SO_SND);
1335 SOCKBUF_UNLOCK(&so->so_snd);
1336 SOCKBUF_LOCK(&so->so_rcv);
1337 if (so->so_rcv.sb_upcall != NULL)
1338 soupcall_clear(so, SO_RCV);
1339 SOCKBUF_UNLOCK(&so->so_rcv);
1340 soclose(so);
1341 ICL_CONN_LOCK(ic);
1342
1343 if (ic->ic_receive_pdu != NULL) {
1344 //ICL_DEBUG("freeing partially received PDU");
1345 icl_soft_conn_pdu_free(ic, ic->ic_receive_pdu);
1346 ic->ic_receive_pdu = NULL;
1347 }
1348
1349 /*
1350 * Remove any outstanding PDUs from the send queue.
1351 */
1352 while (!STAILQ_EMPTY(&ic->ic_to_send)) {
1353 pdu = STAILQ_FIRST(&ic->ic_to_send);
1354 STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
1355 icl_soft_conn_pdu_free(ic, pdu);
1356 }
1357
1358 KASSERT(STAILQ_EMPTY(&ic->ic_to_send),
1359 ("destroying session with non-empty send queue"));
1360 ICL_CONN_UNLOCK(ic);
1361 }
1362
1363 int
icl_soft_conn_task_setup(struct icl_conn * ic,struct icl_pdu * ip,struct ccb_scsiio * csio,uint32_t * task_tagp,void ** prvp)1364 icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
1365 struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp)
1366 {
1367
1368 return (0);
1369 }
1370
1371 void
icl_soft_conn_task_done(struct icl_conn * ic,void * prv)1372 icl_soft_conn_task_done(struct icl_conn *ic, void *prv)
1373 {
1374 }
1375
1376 int
icl_soft_conn_transfer_setup(struct icl_conn * ic,union ctl_io * io,uint32_t * transfer_tag,void ** prvp)1377 icl_soft_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
1378 uint32_t *transfer_tag, void **prvp)
1379 {
1380
1381 return (0);
1382 }
1383
1384 void
icl_soft_conn_transfer_done(struct icl_conn * ic,void * prv)1385 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv)
1386 {
1387 }
1388
1389 static int
icl_soft_limits(struct icl_drv_limits * idl)1390 icl_soft_limits(struct icl_drv_limits *idl)
1391 {
1392
1393 idl->idl_max_recv_data_segment_length = max_data_segment_length;
1394 idl->idl_max_send_data_segment_length = max_data_segment_length;
1395 idl->idl_max_burst_length = max_burst_length;
1396 idl->idl_first_burst_length = first_burst_length;
1397
1398 return (0);
1399 }
1400
1401 #ifdef ICL_KERNEL_PROXY
1402 int
icl_soft_conn_connect(struct icl_conn * ic,int domain,int socktype,int protocol,struct sockaddr * from_sa,struct sockaddr * to_sa)1403 icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype,
1404 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
1405 {
1406
1407 return (icl_soft_proxy_connect(ic, domain, socktype, protocol,
1408 from_sa, to_sa));
1409 }
1410
1411 int
icl_soft_handoff_sock(struct icl_conn * ic,struct socket * so)1412 icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so)
1413 {
1414 int error;
1415
1416 ICL_CONN_LOCK_ASSERT_NOT(ic);
1417
1418 if (so->so_type != SOCK_STREAM)
1419 return (EINVAL);
1420
1421 ICL_CONN_LOCK(ic);
1422 if (ic->ic_socket != NULL) {
1423 ICL_CONN_UNLOCK(ic);
1424 return (EBUSY);
1425 }
1426 ic->ic_socket = so;
1427 ICL_CONN_UNLOCK(ic);
1428
1429 error = icl_conn_start(ic);
1430
1431 return (error);
1432 }
1433 #endif /* ICL_KERNEL_PROXY */
1434
1435 static int
icl_soft_load(void)1436 icl_soft_load(void)
1437 {
1438 int error;
1439
1440 icl_pdu_zone = uma_zcreate("icl_pdu",
1441 sizeof(struct icl_pdu), NULL, NULL, NULL, NULL,
1442 UMA_ALIGN_PTR, 0);
1443 refcount_init(&icl_ncons, 0);
1444
1445 /*
1446 * The reason we call this "none" is that to the user,
1447 * it's known as "offload driver"; "offload driver: soft"
1448 * doesn't make much sense.
1449 */
1450 error = icl_register("none", false, 0,
1451 icl_soft_limits, icl_soft_new_conn);
1452 KASSERT(error == 0, ("failed to register"));
1453
1454 #if defined(ICL_KERNEL_PROXY) && 0
1455 /*
1456 * Debugging aid for kernel proxy functionality.
1457 */
1458 error = icl_register("proxytest", true, 0,
1459 icl_soft_limits, icl_soft_new_conn);
1460 KASSERT(error == 0, ("failed to register"));
1461 #endif
1462
1463 return (error);
1464 }
1465
1466 static int
icl_soft_unload(void)1467 icl_soft_unload(void)
1468 {
1469
1470 if (icl_ncons != 0)
1471 return (EBUSY);
1472
1473 icl_unregister("none", false);
1474 #if defined(ICL_KERNEL_PROXY) && 0
1475 icl_unregister("proxytest", true);
1476 #endif
1477
1478 uma_zdestroy(icl_pdu_zone);
1479
1480 return (0);
1481 }
1482
1483 static int
icl_soft_modevent(module_t mod,int what,void * arg)1484 icl_soft_modevent(module_t mod, int what, void *arg)
1485 {
1486
1487 switch (what) {
1488 case MOD_LOAD:
1489 return (icl_soft_load());
1490 case MOD_UNLOAD:
1491 return (icl_soft_unload());
1492 default:
1493 return (EINVAL);
1494 }
1495 }
1496
1497 moduledata_t icl_soft_data = {
1498 "icl_soft",
1499 icl_soft_modevent,
1500 0
1501 };
1502
1503 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
1504 MODULE_DEPEND(icl_soft, icl, 1, 1, 1);
1505 MODULE_VERSION(icl_soft, 1);
1506