1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2012 The FreeBSD Foundation
5  * All rights reserved.
6  *
7  * This software was developed by Edward Tomasz Napierala under sponsorship
8  * from the FreeBSD Foundation.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  */
32 
33 /*
34  * Software implementation of iSCSI Common Layer kobj(9) interface.
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD: stable/12/sys/dev/iscsi/icl_soft.c 372306 2022-07-29 17:11:04Z dim $");
39 
40 #include <sys/param.h>
41 #include <sys/capsicum.h>
42 #include <sys/condvar.h>
43 #include <sys/conf.h>
44 #include <sys/gsb_crc32.h>
45 #include <sys/file.h>
46 #include <sys/kernel.h>
47 #include <sys/kthread.h>
48 #include <sys/lock.h>
49 #include <sys/mbuf.h>
50 #include <sys/mutex.h>
51 #include <sys/module.h>
52 #include <sys/protosw.h>
53 #include <sys/socket.h>
54 #include <sys/socketvar.h>
55 #include <sys/sysctl.h>
56 #include <sys/systm.h>
57 #include <sys/sx.h>
58 #include <sys/uio.h>
59 #include <vm/uma.h>
60 #include <netinet/in.h>
61 #include <netinet/tcp.h>
62 
63 #include <dev/iscsi/icl.h>
64 #include <dev/iscsi/iscsi_proto.h>
65 #include <icl_conn_if.h>
66 
67 SYSCTL_NODE(_kern_icl, OID_AUTO, soft, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
68     "Software iSCSI");
69 static int coalesce = 1;
70 SYSCTL_INT(_kern_icl_soft, OID_AUTO, coalesce, CTLFLAG_RWTUN,
71     &coalesce, 0, "Try to coalesce PDUs before sending");
72 static int partial_receive_len = 256 * 1024;
73 SYSCTL_INT(_kern_icl_soft, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
74     &partial_receive_len, 0, "Minimum read size for partially received "
75     "data segment");
76 static int max_data_segment_length = 256 * 1024;
77 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_data_segment_length, CTLFLAG_RWTUN,
78     &max_data_segment_length, 0, "Maximum data segment length");
79 static int first_burst_length = 1024 * 1024;
80 SYSCTL_INT(_kern_icl_soft, OID_AUTO, first_burst_length, CTLFLAG_RWTUN,
81     &first_burst_length, 0, "First burst length");
82 static int max_burst_length = 1024 * 1024;
83 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_burst_length, CTLFLAG_RWTUN,
84     &max_burst_length, 0, "Maximum burst length");
85 static int sendspace = 1536 * 1024;
86 SYSCTL_INT(_kern_icl_soft, OID_AUTO, sendspace, CTLFLAG_RWTUN,
87     &sendspace, 0, "Default send socket buffer size");
88 static int recvspace = 1536 * 1024;
89 SYSCTL_INT(_kern_icl_soft, OID_AUTO, recvspace, CTLFLAG_RWTUN,
90     &recvspace, 0, "Default receive socket buffer size");
91 
92 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend");
93 static uma_zone_t icl_pdu_zone;
94 
95 static volatile u_int	icl_ncons;
96 
97 #define ICL_CONN_LOCK(X)		mtx_lock(X->ic_lock)
98 #define ICL_CONN_UNLOCK(X)		mtx_unlock(X->ic_lock)
99 #define ICL_CONN_LOCK_ASSERT(X)		mtx_assert(X->ic_lock, MA_OWNED)
100 #define ICL_CONN_LOCK_ASSERT_NOT(X)	mtx_assert(X->ic_lock, MA_NOTOWNED)
101 
102 STAILQ_HEAD(icl_pdu_stailq, icl_pdu);
103 
104 static icl_conn_new_pdu_t	icl_soft_conn_new_pdu;
105 static icl_conn_pdu_free_t	icl_soft_conn_pdu_free;
106 static icl_conn_pdu_data_segment_length_t
107 				    icl_soft_conn_pdu_data_segment_length;
108 static icl_conn_pdu_append_data_t	icl_soft_conn_pdu_append_data;
109 static icl_conn_pdu_get_data_t	icl_soft_conn_pdu_get_data;
110 static icl_conn_pdu_queue_t	icl_soft_conn_pdu_queue;
111 static icl_conn_handoff_t	icl_soft_conn_handoff;
112 static icl_conn_free_t		icl_soft_conn_free;
113 static icl_conn_close_t		icl_soft_conn_close;
114 static icl_conn_task_setup_t	icl_soft_conn_task_setup;
115 static icl_conn_task_done_t	icl_soft_conn_task_done;
116 static icl_conn_transfer_setup_t	icl_soft_conn_transfer_setup;
117 static icl_conn_transfer_done_t	icl_soft_conn_transfer_done;
118 #ifdef ICL_KERNEL_PROXY
119 static icl_conn_connect_t	icl_soft_conn_connect;
120 #endif
121 
122 static kobj_method_t icl_soft_methods[] = {
123 	KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu),
124 	KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free),
125 	KOBJMETHOD(icl_conn_pdu_data_segment_length,
126 	    icl_soft_conn_pdu_data_segment_length),
127 	KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data),
128 	KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data),
129 	KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue),
130 	KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff),
131 	KOBJMETHOD(icl_conn_free, icl_soft_conn_free),
132 	KOBJMETHOD(icl_conn_close, icl_soft_conn_close),
133 	KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup),
134 	KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done),
135 	KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup),
136 	KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done),
137 #ifdef ICL_KERNEL_PROXY
138 	KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect),
139 #endif
140 	{ 0, 0 }
141 };
142 
143 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn));
144 
145 static void
icl_conn_fail(struct icl_conn * ic)146 icl_conn_fail(struct icl_conn *ic)
147 {
148 	if (ic->ic_socket == NULL)
149 		return;
150 
151 	/*
152 	 * XXX
153 	 */
154 	ic->ic_socket->so_error = EDOOFUS;
155 	(ic->ic_error)(ic);
156 }
157 
158 static void
icl_soft_conn_pdu_free(struct icl_conn * ic,struct icl_pdu * ip)159 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
160 {
161 
162 	m_freem(ip->ip_bhs_mbuf);
163 	m_freem(ip->ip_ahs_mbuf);
164 	m_freem(ip->ip_data_mbuf);
165 	uma_zfree(icl_pdu_zone, ip);
166 #ifdef DIAGNOSTIC
167 	refcount_release(&ic->ic_outstanding_pdus);
168 #endif
169 }
170 
171 /*
172  * Allocate icl_pdu with empty BHS to fill up by the caller.
173  */
174 struct icl_pdu *
icl_soft_conn_new_pdu(struct icl_conn * ic,int flags)175 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags)
176 {
177 	struct icl_pdu *ip;
178 
179 #ifdef DIAGNOSTIC
180 	refcount_acquire(&ic->ic_outstanding_pdus);
181 #endif
182 	ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
183 	if (ip == NULL) {
184 		ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
185 #ifdef DIAGNOSTIC
186 		refcount_release(&ic->ic_outstanding_pdus);
187 #endif
188 		return (NULL);
189 	}
190 	ip->ip_conn = ic;
191 
192 	CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN);
193 	ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA);
194 	if (ip->ip_bhs_mbuf == NULL) {
195 		ICL_WARN("failed to allocate BHS mbuf");
196 		icl_soft_conn_pdu_free(ic, ip);
197 		return (NULL);
198 	}
199 	ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
200 	memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
201 	ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
202 
203 	return (ip);
204 }
205 
206 static int
icl_pdu_ahs_length(const struct icl_pdu * request)207 icl_pdu_ahs_length(const struct icl_pdu *request)
208 {
209 
210 	return (request->ip_bhs->bhs_total_ahs_len * 4);
211 }
212 
213 static size_t
icl_pdu_data_segment_length(const struct icl_pdu * request)214 icl_pdu_data_segment_length(const struct icl_pdu *request)
215 {
216 	uint32_t len = 0;
217 
218 	len += request->ip_bhs->bhs_data_segment_len[0];
219 	len <<= 8;
220 	len += request->ip_bhs->bhs_data_segment_len[1];
221 	len <<= 8;
222 	len += request->ip_bhs->bhs_data_segment_len[2];
223 
224 	return (len);
225 }
226 
227 size_t
icl_soft_conn_pdu_data_segment_length(struct icl_conn * ic,const struct icl_pdu * request)228 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic,
229     const struct icl_pdu *request)
230 {
231 
232 	return (icl_pdu_data_segment_length(request));
233 }
234 
235 static void
icl_pdu_set_data_segment_length(struct icl_pdu * response,uint32_t len)236 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
237 {
238 
239 	response->ip_bhs->bhs_data_segment_len[2] = len;
240 	response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
241 	response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
242 }
243 
244 static size_t
icl_pdu_padding(const struct icl_pdu * ip)245 icl_pdu_padding(const struct icl_pdu *ip)
246 {
247 
248 	if ((ip->ip_data_len % 4) != 0)
249 		return (4 - (ip->ip_data_len % 4));
250 
251 	return (0);
252 }
253 
254 static size_t
icl_pdu_size(const struct icl_pdu * response)255 icl_pdu_size(const struct icl_pdu *response)
256 {
257 	size_t len;
258 
259 	KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
260 
261 	len = sizeof(struct iscsi_bhs) + response->ip_data_len +
262 	    icl_pdu_padding(response);
263 	if (response->ip_conn->ic_header_crc32c)
264 		len += ISCSI_HEADER_DIGEST_SIZE;
265 	if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c)
266 		len += ISCSI_DATA_DIGEST_SIZE;
267 
268 	return (len);
269 }
270 
271 static void
icl_soft_receive_buf(struct mbuf ** r,size_t * rs,void * buf,size_t s)272 icl_soft_receive_buf(struct mbuf **r, size_t *rs, void *buf, size_t s)
273 {
274 
275 	m_copydata(*r, 0, s, buf);
276 	m_adj(*r, s);
277 	while ((*r) != NULL && (*r)->m_len == 0)
278 		*r = m_free(*r);
279 	*rs -= s;
280 }
281 
282 static void
icl_pdu_receive_ahs(struct icl_pdu * request,struct mbuf ** r,size_t * rs)283 icl_pdu_receive_ahs(struct icl_pdu *request, struct mbuf **r, size_t *rs)
284 {
285 
286 	request->ip_ahs_len = icl_pdu_ahs_length(request);
287 	if (request->ip_ahs_len == 0)
288 		return;
289 
290 	request->ip_ahs_mbuf = *r;
291 	*r = m_split(request->ip_ahs_mbuf, request->ip_ahs_len, M_WAITOK);
292 	*rs -= request->ip_ahs_len;
293 }
294 
295 static uint32_t
icl_mbuf_to_crc32c(const struct mbuf * m0)296 icl_mbuf_to_crc32c(const struct mbuf *m0)
297 {
298 	uint32_t digest = 0xffffffff;
299 	const struct mbuf *m;
300 
301 	for (m = m0; m != NULL; m = m->m_next)
302 		digest = calculate_crc32c(digest,
303 		    mtod(m, const void *), m->m_len);
304 
305 	digest = digest ^ 0xffffffff;
306 
307 	return (digest);
308 }
309 
310 static int
icl_pdu_check_header_digest(struct icl_pdu * request,struct mbuf ** r,size_t * rs)311 icl_pdu_check_header_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs)
312 {
313 	uint32_t received_digest, valid_digest;
314 
315 	if (request->ip_conn->ic_header_crc32c == false)
316 		return (0);
317 
318 	CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE);
319 	icl_soft_receive_buf(r, rs, &received_digest, ISCSI_HEADER_DIGEST_SIZE);
320 
321 	/* Temporary attach AHS to BHS to calculate header digest. */
322 	request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf;
323 	valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
324 	request->ip_bhs_mbuf->m_next = NULL;
325 	if (received_digest != valid_digest) {
326 		ICL_WARN("header digest check failed; got 0x%x, "
327 		    "should be 0x%x", received_digest, valid_digest);
328 		return (-1);
329 	}
330 
331 	return (0);
332 }
333 
334 /*
335  * Return the number of bytes that should be waiting in the receive socket
336  * before icl_pdu_receive_data_segment() gets called.
337  */
338 static size_t
icl_pdu_data_segment_receive_len(const struct icl_pdu * request)339 icl_pdu_data_segment_receive_len(const struct icl_pdu *request)
340 {
341 	size_t len;
342 
343 	len = icl_pdu_data_segment_length(request);
344 	if (len == 0)
345 		return (0);
346 
347 	/*
348 	 * Account for the parts of data segment already read from
349 	 * the socket buffer.
350 	 */
351 	KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
352 	len -= request->ip_data_len;
353 
354 	/*
355 	 * Don't always wait for the full data segment to be delivered
356 	 * to the socket; this might badly affect performance due to
357 	 * TCP window scaling.
358 	 */
359 	if (len > partial_receive_len) {
360 #if 0
361 		ICL_DEBUG("need %zd bytes of data, limiting to %zd",
362 		    len, partial_receive_len));
363 #endif
364 		len = partial_receive_len;
365 
366 		return (len);
367 	}
368 
369 	/*
370 	 * Account for padding.  Note that due to the way code is written,
371 	 * the icl_pdu_receive_data_segment() must always receive padding
372 	 * along with the last part of data segment, because it would be
373 	 * impossible to tell whether we've already received the full data
374 	 * segment including padding, or without it.
375 	 */
376 	if ((len % 4) != 0)
377 		len += 4 - (len % 4);
378 
379 #if 0
380 	ICL_DEBUG("need %zd bytes of data", len));
381 #endif
382 
383 	return (len);
384 }
385 
386 static int
icl_pdu_receive_data_segment(struct icl_pdu * request,struct mbuf ** r,size_t * rs,bool * more_neededp)387 icl_pdu_receive_data_segment(struct icl_pdu *request, struct mbuf **r,
388     size_t *rs, bool *more_neededp)
389 {
390 	struct icl_conn *ic;
391 	size_t len, padding = 0;
392 	struct mbuf *m;
393 
394 	ic = request->ip_conn;
395 
396 	*more_neededp = false;
397 	ic->ic_receive_len = 0;
398 
399 	len = icl_pdu_data_segment_length(request);
400 	if (len == 0)
401 		return (0);
402 
403 	if ((len % 4) != 0)
404 		padding = 4 - (len % 4);
405 
406 	/*
407 	 * Account for already received parts of data segment.
408 	 */
409 	KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
410 	len -= request->ip_data_len;
411 
412 	if (len + padding > *rs) {
413 		/*
414 		 * Not enough data in the socket buffer.  Receive as much
415 		 * as we can.  Don't receive padding, since, obviously, it's
416 		 * not the end of data segment yet.
417 		 */
418 #if 0
419 		ICL_DEBUG("limited from %zd to %zd",
420 		    len + padding, *rs - padding));
421 #endif
422 		len = *rs - padding;
423 		*more_neededp = true;
424 		padding = 0;
425 	}
426 
427 	/*
428 	 * Must not try to receive padding without at least one byte
429 	 * of actual data segment.
430 	 */
431 	if (len > 0) {
432 		m = *r;
433 		*r = m_split(m, len + padding, M_WAITOK);
434 		*rs -= len + padding;
435 
436 		if (request->ip_data_mbuf == NULL)
437 			request->ip_data_mbuf = m;
438 		else
439 			m_cat(request->ip_data_mbuf, m);
440 
441 		request->ip_data_len += len;
442 	} else
443 		ICL_DEBUG("len 0");
444 
445 	if (*more_neededp)
446 		ic->ic_receive_len =
447 		    icl_pdu_data_segment_receive_len(request);
448 
449 	return (0);
450 }
451 
452 static int
icl_pdu_check_data_digest(struct icl_pdu * request,struct mbuf ** r,size_t * rs)453 icl_pdu_check_data_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs)
454 {
455 	uint32_t received_digest, valid_digest;
456 
457 	if (request->ip_conn->ic_data_crc32c == false)
458 		return (0);
459 
460 	if (request->ip_data_len == 0)
461 		return (0);
462 
463 	CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE);
464 	icl_soft_receive_buf(r, rs, &received_digest, ISCSI_DATA_DIGEST_SIZE);
465 
466 	/*
467 	 * Note that ip_data_mbuf also contains padding; since digest
468 	 * calculation is supposed to include that, we iterate over
469 	 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it.
470 	 */
471 	valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
472 	if (received_digest != valid_digest) {
473 		ICL_WARN("data digest check failed; got 0x%x, "
474 		    "should be 0x%x", received_digest, valid_digest);
475 		return (-1);
476 	}
477 
478 	return (0);
479 }
480 
481 /*
482  * Somewhat contrary to the name, this attempts to receive only one
483  * "part" of PDU at a time; call it repeatedly until it returns non-NULL.
484  */
485 static struct icl_pdu *
icl_conn_receive_pdu(struct icl_conn * ic,struct mbuf ** r,size_t * rs)486 icl_conn_receive_pdu(struct icl_conn *ic, struct mbuf **r, size_t *rs)
487 {
488 	struct icl_pdu *request;
489 	size_t len;
490 	int error = 0;
491 	bool more_needed;
492 
493 	if (ic->ic_receive_state == ICL_CONN_STATE_BHS) {
494 		KASSERT(ic->ic_receive_pdu == NULL,
495 		    ("ic->ic_receive_pdu != NULL"));
496 		request = icl_soft_conn_new_pdu(ic, M_NOWAIT);
497 		if (request == NULL) {
498 			ICL_DEBUG("failed to allocate PDU; "
499 			    "dropping connection");
500 			icl_conn_fail(ic);
501 			return (NULL);
502 		}
503 		ic->ic_receive_pdu = request;
504 	} else {
505 		KASSERT(ic->ic_receive_pdu != NULL,
506 		    ("ic->ic_receive_pdu == NULL"));
507 		request = ic->ic_receive_pdu;
508 	}
509 
510 	switch (ic->ic_receive_state) {
511 	case ICL_CONN_STATE_BHS:
512 		//ICL_DEBUG("receiving BHS");
513 		icl_soft_receive_buf(r, rs, request->ip_bhs,
514 		    sizeof(struct iscsi_bhs));
515 
516 		/*
517 		 * We don't enforce any limit for AHS length;
518 		 * its length is stored in 8 bit field.
519 		 */
520 
521 		len = icl_pdu_data_segment_length(request);
522 		if (len > ic->ic_max_data_segment_length) {
523 			ICL_WARN("received data segment "
524 			    "length %zd is larger than negotiated; "
525 			    "dropping connection", len);
526 			error = EINVAL;
527 			break;
528 		}
529 
530 		ic->ic_receive_state = ICL_CONN_STATE_AHS;
531 		ic->ic_receive_len = icl_pdu_ahs_length(request);
532 		break;
533 
534 	case ICL_CONN_STATE_AHS:
535 		//ICL_DEBUG("receiving AHS");
536 		icl_pdu_receive_ahs(request, r, rs);
537 		ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST;
538 		if (ic->ic_header_crc32c == false)
539 			ic->ic_receive_len = 0;
540 		else
541 			ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE;
542 		break;
543 
544 	case ICL_CONN_STATE_HEADER_DIGEST:
545 		//ICL_DEBUG("receiving header digest");
546 		error = icl_pdu_check_header_digest(request, r, rs);
547 		if (error != 0) {
548 			ICL_DEBUG("header digest failed; "
549 			    "dropping connection");
550 			break;
551 		}
552 
553 		ic->ic_receive_state = ICL_CONN_STATE_DATA;
554 		ic->ic_receive_len =
555 		    icl_pdu_data_segment_receive_len(request);
556 		break;
557 
558 	case ICL_CONN_STATE_DATA:
559 		//ICL_DEBUG("receiving data segment");
560 		error = icl_pdu_receive_data_segment(request, r, rs,
561 		    &more_needed);
562 		if (error != 0) {
563 			ICL_DEBUG("failed to receive data segment;"
564 			    "dropping connection");
565 			break;
566 		}
567 
568 		if (more_needed)
569 			break;
570 
571 		ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST;
572 		if (request->ip_data_len == 0 || ic->ic_data_crc32c == false)
573 			ic->ic_receive_len = 0;
574 		else
575 			ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE;
576 		break;
577 
578 	case ICL_CONN_STATE_DATA_DIGEST:
579 		//ICL_DEBUG("receiving data digest");
580 		error = icl_pdu_check_data_digest(request, r, rs);
581 		if (error != 0) {
582 			ICL_DEBUG("data digest failed; "
583 			    "dropping connection");
584 			break;
585 		}
586 
587 		/*
588 		 * We've received complete PDU; reset the receive state machine
589 		 * and return the PDU.
590 		 */
591 		ic->ic_receive_state = ICL_CONN_STATE_BHS;
592 		ic->ic_receive_len = sizeof(struct iscsi_bhs);
593 		ic->ic_receive_pdu = NULL;
594 		return (request);
595 
596 	default:
597 		panic("invalid ic_receive_state %d\n", ic->ic_receive_state);
598 	}
599 
600 	if (error != 0) {
601 		/*
602 		 * Don't free the PDU; it's pointed to by ic->ic_receive_pdu
603 		 * and will get freed in icl_soft_conn_close().
604 		 */
605 		icl_conn_fail(ic);
606 	}
607 
608 	return (NULL);
609 }
610 
611 static void
icl_conn_receive_pdus(struct icl_conn * ic,struct mbuf ** r,size_t * rs)612 icl_conn_receive_pdus(struct icl_conn *ic, struct mbuf **r, size_t *rs)
613 {
614 	struct icl_pdu *response;
615 
616 	for (;;) {
617 		if (ic->ic_disconnecting)
618 			return;
619 
620 		/*
621 		 * Loop until we have a complete PDU or there is not enough
622 		 * data in the socket buffer.
623 		 */
624 		if (*rs < ic->ic_receive_len) {
625 #if 0
626 			ICL_DEBUG("not enough data; have %zd, need %zd",
627 			    *rs, ic->ic_receive_len);
628 #endif
629 			return;
630 		}
631 
632 		response = icl_conn_receive_pdu(ic, r, rs);
633 		if (response == NULL)
634 			continue;
635 
636 		if (response->ip_ahs_len > 0) {
637 			ICL_WARN("received PDU with unsupported "
638 			    "AHS; opcode 0x%x; dropping connection",
639 			    response->ip_bhs->bhs_opcode);
640 			icl_soft_conn_pdu_free(ic, response);
641 			icl_conn_fail(ic);
642 			return;
643 		}
644 
645 		(ic->ic_receive)(response);
646 	}
647 }
648 
649 static void
icl_receive_thread(void * arg)650 icl_receive_thread(void *arg)
651 {
652 	struct icl_conn *ic;
653 	size_t available, read = 0;
654 	struct socket *so;
655 	struct mbuf *m, *r = NULL;
656 	struct uio uio;
657 	int error, flags;
658 
659 	ic = arg;
660 	so = ic->ic_socket;
661 
662 	for (;;) {
663 		SOCKBUF_LOCK(&so->so_rcv);
664 		if (ic->ic_disconnecting) {
665 			SOCKBUF_UNLOCK(&so->so_rcv);
666 			break;
667 		}
668 
669 		/*
670 		 * Set the low watermark, to be checked by
671 		 * soreadable() in icl_soupcall_receive()
672 		 * to avoid unnecessary wakeups until there
673 		 * is enough data received to read the PDU.
674 		 */
675 		available = sbavail(&so->so_rcv);
676 		if (read + available < ic->ic_receive_len) {
677 			so->so_rcv.sb_lowat = ic->ic_receive_len - read;
678 			cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx);
679 			so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1;
680 			available = sbavail(&so->so_rcv);
681 		}
682 		SOCKBUF_UNLOCK(&so->so_rcv);
683 
684 		if (available == 0) {
685 			if (so->so_error != 0) {
686 				ICL_DEBUG("connection error %d; "
687 				    "dropping connection", so->so_error);
688 				icl_conn_fail(ic);
689 				break;
690 			}
691 			continue;
692 		}
693 
694 		memset(&uio, 0, sizeof(uio));
695 		uio.uio_resid = available;
696 		flags = MSG_DONTWAIT;
697 		error = soreceive(so, NULL, &uio, &m, NULL, &flags);
698 		if (error != 0) {
699 			ICL_DEBUG("soreceive error %d", error);
700 			break;
701 		}
702 		if (uio.uio_resid != 0) {
703 			m_freem(m);
704 			ICL_DEBUG("short read");
705 			break;
706 		}
707 		if (r)
708 			m_cat(r, m);
709 		else
710 			r = m;
711 		read += available;
712 
713 		icl_conn_receive_pdus(ic, &r, &read);
714 	}
715 
716 	if (r)
717 		m_freem(r);
718 
719 	ICL_CONN_LOCK(ic);
720 	ic->ic_receive_running = false;
721 	cv_signal(&ic->ic_send_cv);
722 	ICL_CONN_UNLOCK(ic);
723 	kthread_exit();
724 }
725 
726 static int
icl_soupcall_receive(struct socket * so,void * arg,int waitflag)727 icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
728 {
729 	struct icl_conn *ic;
730 
731 	if (!soreadable(so))
732 		return (SU_OK);
733 
734 	ic = arg;
735 	cv_signal(&ic->ic_receive_cv);
736 	return (SU_OK);
737 }
738 
739 static int
icl_pdu_finalize(struct icl_pdu * request)740 icl_pdu_finalize(struct icl_pdu *request)
741 {
742 	size_t padding, pdu_len;
743 	uint32_t digest, zero = 0;
744 	int ok;
745 	struct icl_conn *ic;
746 
747 	ic = request->ip_conn;
748 
749 	icl_pdu_set_data_segment_length(request, request->ip_data_len);
750 
751 	pdu_len = icl_pdu_size(request);
752 
753 	if (ic->ic_header_crc32c) {
754 		digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf);
755 		ok = m_append(request->ip_bhs_mbuf, sizeof(digest),
756 		    (void *)&digest);
757 		if (ok != 1) {
758 			ICL_WARN("failed to append header digest");
759 			return (1);
760 		}
761 	}
762 
763 	if (request->ip_data_len != 0) {
764 		padding = icl_pdu_padding(request);
765 		if (padding > 0) {
766 			ok = m_append(request->ip_data_mbuf, padding,
767 			    (void *)&zero);
768 			if (ok != 1) {
769 				ICL_WARN("failed to append padding");
770 				return (1);
771 			}
772 		}
773 
774 		if (ic->ic_data_crc32c) {
775 			digest = icl_mbuf_to_crc32c(request->ip_data_mbuf);
776 
777 			ok = m_append(request->ip_data_mbuf, sizeof(digest),
778 			    (void *)&digest);
779 			if (ok != 1) {
780 				ICL_WARN("failed to append data digest");
781 				return (1);
782 			}
783 		}
784 
785 		m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
786 		request->ip_data_mbuf = NULL;
787 	}
788 
789 	request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
790 
791 	return (0);
792 }
793 
794 static void
icl_conn_send_pdus(struct icl_conn * ic,struct icl_pdu_stailq * queue)795 icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue)
796 {
797 	struct icl_pdu *request, *request2;
798 	struct mbuf *m;
799 	struct socket *so;
800 	long available, size, size2;
801 #ifdef DEBUG_COALESCED
802 	int coalesced;
803 #endif
804 	int error;
805 
806 	ICL_CONN_LOCK_ASSERT_NOT(ic);
807 
808 	so = ic->ic_socket;
809 
810 	SOCKBUF_LOCK(&so->so_snd);
811 	/*
812 	 * Check how much space do we have for transmit.  We can't just
813 	 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE,
814 	 * as it always frees the mbuf chain passed to it, even in case
815 	 * of error.
816 	 */
817 	available = sbspace(&so->so_snd);
818 	ic->ic_check_send_space = false;
819 
820 	/*
821 	 * Notify the socket upcall that we don't need wakeups
822 	 * for the time being.
823 	 */
824 	so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
825 	SOCKBUF_UNLOCK(&so->so_snd);
826 
827 	while (!STAILQ_EMPTY(queue)) {
828 		request = STAILQ_FIRST(queue);
829 		size = icl_pdu_size(request);
830 		if (available < size) {
831 
832 			/*
833 			 * Set the low watermark, to be checked by
834 			 * sowriteable() in icl_soupcall_send()
835 			 * to avoid unnecessary wakeups until there
836 			 * is enough space for the PDU to fit.
837 			 */
838 			SOCKBUF_LOCK(&so->so_snd);
839 			available = sbspace(&so->so_snd);
840 			if (available < size) {
841 #if 1
842 				ICL_DEBUG("no space to send; "
843 				    "have %ld, need %ld",
844 				    available, size);
845 #endif
846 				so->so_snd.sb_lowat = max(size,
847 				    so->so_snd.sb_hiwat / 8);
848 				SOCKBUF_UNLOCK(&so->so_snd);
849 				return;
850 			}
851 			SOCKBUF_UNLOCK(&so->so_snd);
852 		}
853 		STAILQ_REMOVE_HEAD(queue, ip_next);
854 		error = icl_pdu_finalize(request);
855 		if (error != 0) {
856 			ICL_DEBUG("failed to finalize PDU; "
857 			    "dropping connection");
858 			icl_soft_conn_pdu_free(ic, request);
859 			icl_conn_fail(ic);
860 			return;
861 		}
862 		if (coalesce) {
863 			m = request->ip_bhs_mbuf;
864 			for (
865 #ifdef DEBUG_COALESCED
866 			    coalesced = 1
867 #endif
868 			    ; ;
869 #ifdef DEBUG_COALESCED
870 			    coalesced++
871 #endif
872 			    ) {
873 				request2 = STAILQ_FIRST(queue);
874 				if (request2 == NULL)
875 					break;
876 				size2 = icl_pdu_size(request2);
877 				if (available < size + size2)
878 					break;
879 				STAILQ_REMOVE_HEAD(queue, ip_next);
880 				error = icl_pdu_finalize(request2);
881 				if (error != 0) {
882 					ICL_DEBUG("failed to finalize PDU; "
883 					    "dropping connection");
884 					icl_soft_conn_pdu_free(ic, request);
885 					icl_soft_conn_pdu_free(ic, request2);
886 					icl_conn_fail(ic);
887 					return;
888 				}
889 				while (m->m_next)
890 					m = m->m_next;
891 				m_cat(m, request2->ip_bhs_mbuf);
892 				request2->ip_bhs_mbuf = NULL;
893 				request->ip_bhs_mbuf->m_pkthdr.len += size2;
894 				size += size2;
895 				icl_soft_conn_pdu_free(ic, request2);
896 			}
897 #ifdef DEBUG_COALESCED
898 			if (coalesced > 1) {
899 				ICL_DEBUG("coalesced %d PDUs into %ld bytes",
900 				    coalesced, size);
901 			}
902 #endif
903 		}
904 		available -= size;
905 		error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
906 		    NULL, MSG_DONTWAIT, curthread);
907 		request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
908 		if (error != 0) {
909 			ICL_DEBUG("failed to send PDU, error %d; "
910 			    "dropping connection", error);
911 			icl_soft_conn_pdu_free(ic, request);
912 			icl_conn_fail(ic);
913 			return;
914 		}
915 		icl_soft_conn_pdu_free(ic, request);
916 	}
917 }
918 
919 static void
icl_send_thread(void * arg)920 icl_send_thread(void *arg)
921 {
922 	struct icl_conn *ic;
923 	struct icl_pdu_stailq queue;
924 
925 	ic = arg;
926 
927 	STAILQ_INIT(&queue);
928 
929 	ICL_CONN_LOCK(ic);
930 	for (;;) {
931 		for (;;) {
932 			/*
933 			 * Populate the local queue from the main one.
934 			 * This way the icl_conn_send_pdus() can go through
935 			 * all the queued PDUs without holding any locks.
936 			 */
937 			if (STAILQ_EMPTY(&queue) || ic->ic_check_send_space)
938 				STAILQ_CONCAT(&queue, &ic->ic_to_send);
939 
940 			ICL_CONN_UNLOCK(ic);
941 			icl_conn_send_pdus(ic, &queue);
942 			ICL_CONN_LOCK(ic);
943 
944 			/*
945 			 * The icl_soupcall_send() was called since the last
946 			 * call to sbspace(); go around;
947 			 */
948 			if (ic->ic_check_send_space)
949 				continue;
950 
951 			/*
952 			 * Local queue is empty, but we still have PDUs
953 			 * in the main one; go around.
954 			 */
955 			if (STAILQ_EMPTY(&queue) &&
956 			    !STAILQ_EMPTY(&ic->ic_to_send))
957 				continue;
958 
959 			/*
960 			 * There might be some stuff in the local queue,
961 			 * which didn't get sent due to not having enough send
962 			 * space.  Wait for socket upcall.
963 			 */
964 			break;
965 		}
966 
967 		if (ic->ic_disconnecting) {
968 			//ICL_DEBUG("terminating");
969 			break;
970 		}
971 
972 		cv_wait(&ic->ic_send_cv, ic->ic_lock);
973 	}
974 
975 	/*
976 	 * We're exiting; move PDUs back to the main queue, so they can
977 	 * get freed properly.  At this point ordering doesn't matter.
978 	 */
979 	STAILQ_CONCAT(&ic->ic_to_send, &queue);
980 
981 	ic->ic_send_running = false;
982 	cv_signal(&ic->ic_send_cv);
983 	ICL_CONN_UNLOCK(ic);
984 	kthread_exit();
985 }
986 
987 static int
icl_soupcall_send(struct socket * so,void * arg,int waitflag)988 icl_soupcall_send(struct socket *so, void *arg, int waitflag)
989 {
990 	struct icl_conn *ic;
991 
992 	if (!sowriteable(so))
993 		return (SU_OK);
994 
995 	ic = arg;
996 
997 	ICL_CONN_LOCK(ic);
998 	ic->ic_check_send_space = true;
999 	ICL_CONN_UNLOCK(ic);
1000 
1001 	cv_signal(&ic->ic_send_cv);
1002 
1003 	return (SU_OK);
1004 }
1005 
1006 static int
icl_soft_conn_pdu_append_data(struct icl_conn * ic,struct icl_pdu * request,const void * addr,size_t len,int flags)1007 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
1008     const void *addr, size_t len, int flags)
1009 {
1010 	struct mbuf *mb, *newmb;
1011 	size_t copylen, off = 0;
1012 
1013 	KASSERT(len > 0, ("len == 0"));
1014 
1015 	newmb = m_getm2(NULL, len, flags, MT_DATA, 0);
1016 	if (newmb == NULL) {
1017 		ICL_WARN("failed to allocate mbuf for %zd bytes", len);
1018 		return (ENOMEM);
1019 	}
1020 
1021 	for (mb = newmb; mb != NULL; mb = mb->m_next) {
1022 		copylen = min(M_TRAILINGSPACE(mb), len - off);
1023 		memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
1024 		mb->m_len = copylen;
1025 		off += copylen;
1026 	}
1027 	KASSERT(off == len, ("%s: off != len", __func__));
1028 
1029 	if (request->ip_data_mbuf == NULL) {
1030 		request->ip_data_mbuf = newmb;
1031 		request->ip_data_len = len;
1032 	} else {
1033 		m_cat(request->ip_data_mbuf, newmb);
1034 		request->ip_data_len += len;
1035 	}
1036 
1037 	return (0);
1038 }
1039 
1040 void
icl_soft_conn_pdu_get_data(struct icl_conn * ic,struct icl_pdu * ip,size_t off,void * addr,size_t len)1041 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
1042     size_t off, void *addr, size_t len)
1043 {
1044 
1045 	m_copydata(ip->ip_data_mbuf, off, len, addr);
1046 }
1047 
1048 static void
icl_pdu_queue(struct icl_pdu * ip)1049 icl_pdu_queue(struct icl_pdu *ip)
1050 {
1051 	struct icl_conn *ic;
1052 
1053 	ic = ip->ip_conn;
1054 
1055 	ICL_CONN_LOCK_ASSERT(ic);
1056 
1057 	if (ic->ic_disconnecting || ic->ic_socket == NULL) {
1058 		ICL_DEBUG("icl_pdu_queue on closed connection");
1059 		icl_soft_conn_pdu_free(ic, ip);
1060 		return;
1061 	}
1062 
1063 	if (!STAILQ_EMPTY(&ic->ic_to_send)) {
1064 		STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1065 		/*
1066 		 * If the queue is not empty, someone else had already
1067 		 * signaled the send thread; no need to do that again,
1068 		 * just return.
1069 		 */
1070 		return;
1071 	}
1072 
1073 	STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
1074 	cv_signal(&ic->ic_send_cv);
1075 }
1076 
1077 void
icl_soft_conn_pdu_queue(struct icl_conn * ic,struct icl_pdu * ip)1078 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
1079 {
1080 
1081 	icl_pdu_queue(ip);
1082 }
1083 
1084 static struct icl_conn *
icl_soft_new_conn(const char * name,struct mtx * lock)1085 icl_soft_new_conn(const char *name, struct mtx *lock)
1086 {
1087 	struct icl_conn *ic;
1088 
1089 	refcount_acquire(&icl_ncons);
1090 
1091 	ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO);
1092 
1093 	STAILQ_INIT(&ic->ic_to_send);
1094 	ic->ic_lock = lock;
1095 	cv_init(&ic->ic_send_cv, "icl_tx");
1096 	cv_init(&ic->ic_receive_cv, "icl_rx");
1097 #ifdef DIAGNOSTIC
1098 	refcount_init(&ic->ic_outstanding_pdus, 0);
1099 #endif
1100 	ic->ic_max_data_segment_length = max_data_segment_length;
1101 	ic->ic_name = name;
1102 	ic->ic_offload = "None";
1103 	ic->ic_unmapped = false;
1104 
1105 	return (ic);
1106 }
1107 
1108 void
icl_soft_conn_free(struct icl_conn * ic)1109 icl_soft_conn_free(struct icl_conn *ic)
1110 {
1111 
1112 #ifdef DIAGNOSTIC
1113 	KASSERT(ic->ic_outstanding_pdus == 0,
1114 	    ("destroying session with %d outstanding PDUs",
1115 	     ic->ic_outstanding_pdus));
1116 #endif
1117 	cv_destroy(&ic->ic_send_cv);
1118 	cv_destroy(&ic->ic_receive_cv);
1119 	kobj_delete((struct kobj *)ic, M_ICL_SOFT);
1120 	refcount_release(&icl_ncons);
1121 }
1122 
1123 static int
icl_conn_start(struct icl_conn * ic)1124 icl_conn_start(struct icl_conn *ic)
1125 {
1126 	size_t minspace;
1127 	struct sockopt opt;
1128 	int error, one = 1;
1129 
1130 	ICL_CONN_LOCK(ic);
1131 
1132 	/*
1133 	 * XXX: Ugly hack.
1134 	 */
1135 	if (ic->ic_socket == NULL) {
1136 		ICL_CONN_UNLOCK(ic);
1137 		return (EINVAL);
1138 	}
1139 
1140 	ic->ic_receive_state = ICL_CONN_STATE_BHS;
1141 	ic->ic_receive_len = sizeof(struct iscsi_bhs);
1142 	ic->ic_disconnecting = false;
1143 
1144 	ICL_CONN_UNLOCK(ic);
1145 
1146 	/*
1147 	 * For sendspace, this is required because the current code cannot
1148 	 * send a PDU in pieces; thus, the minimum buffer size is equal
1149 	 * to the maximum PDU size.  "+4" is to account for possible padding.
1150 	 */
1151 	minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length +
1152 	    ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
1153 	if (sendspace < minspace) {
1154 		ICL_WARN("kern.icl.sendspace too low; must be at least %zd",
1155 		    minspace);
1156 		sendspace = minspace;
1157 	}
1158 	if (recvspace < minspace) {
1159 		ICL_WARN("kern.icl.recvspace too low; must be at least %zd",
1160 		    minspace);
1161 		recvspace = minspace;
1162 	}
1163 
1164 	error = soreserve(ic->ic_socket, sendspace, recvspace);
1165 	if (error != 0) {
1166 		ICL_WARN("soreserve failed with error %d", error);
1167 		icl_soft_conn_close(ic);
1168 		return (error);
1169 	}
1170 	ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE;
1171 	ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE;
1172 
1173 	/*
1174 	 * Disable Nagle.
1175 	 */
1176 	bzero(&opt, sizeof(opt));
1177 	opt.sopt_dir = SOPT_SET;
1178 	opt.sopt_level = IPPROTO_TCP;
1179 	opt.sopt_name = TCP_NODELAY;
1180 	opt.sopt_val = &one;
1181 	opt.sopt_valsize = sizeof(one);
1182 	error = sosetopt(ic->ic_socket, &opt);
1183 	if (error != 0) {
1184 		ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
1185 		icl_soft_conn_close(ic);
1186 		return (error);
1187 	}
1188 
1189 	/*
1190 	 * Register socket upcall, to get notified about incoming PDUs
1191 	 * and free space to send outgoing ones.
1192 	 */
1193 	SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1194 	soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic);
1195 	SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1196 	SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1197 	soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic);
1198 	SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1199 
1200 	/*
1201 	 * Start threads.
1202 	 */
1203 	ICL_CONN_LOCK(ic);
1204 	ic->ic_send_running = ic->ic_receive_running = true;
1205 	ICL_CONN_UNLOCK(ic);
1206 	error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx",
1207 	    ic->ic_name);
1208 	if (error != 0) {
1209 		ICL_WARN("kthread_add(9) failed with error %d", error);
1210 		ICL_CONN_LOCK(ic);
1211 		ic->ic_send_running = ic->ic_receive_running = false;
1212 		cv_signal(&ic->ic_send_cv);
1213 		ICL_CONN_UNLOCK(ic);
1214 		icl_soft_conn_close(ic);
1215 		return (error);
1216 	}
1217 	error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx",
1218 	    ic->ic_name);
1219 	if (error != 0) {
1220 		ICL_WARN("kthread_add(9) failed with error %d", error);
1221 		ICL_CONN_LOCK(ic);
1222 		ic->ic_receive_running = false;
1223 		cv_signal(&ic->ic_send_cv);
1224 		ICL_CONN_UNLOCK(ic);
1225 		icl_soft_conn_close(ic);
1226 		return (error);
1227 	}
1228 
1229 	return (0);
1230 }
1231 
1232 int
icl_soft_conn_handoff(struct icl_conn * ic,int fd)1233 icl_soft_conn_handoff(struct icl_conn *ic, int fd)
1234 {
1235 	struct file *fp;
1236 	struct socket *so;
1237 	cap_rights_t rights;
1238 	int error;
1239 
1240 	ICL_CONN_LOCK_ASSERT_NOT(ic);
1241 
1242 #ifdef ICL_KERNEL_PROXY
1243 	/*
1244 	 * We're transitioning to Full Feature phase, and we don't
1245 	 * really care.
1246 	 */
1247 	if (fd == 0) {
1248 		ICL_CONN_LOCK(ic);
1249 		if (ic->ic_socket == NULL) {
1250 			ICL_CONN_UNLOCK(ic);
1251 			ICL_WARN("proxy handoff without connect");
1252 			return (EINVAL);
1253 		}
1254 		ICL_CONN_UNLOCK(ic);
1255 		return (0);
1256 	}
1257 #endif
1258 
1259 	/*
1260 	 * Steal the socket from userland.
1261 	 */
1262 	error = fget(curthread, fd,
1263 	    cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
1264 	if (error != 0)
1265 		return (error);
1266 	if (fp->f_type != DTYPE_SOCKET) {
1267 		fdrop(fp, curthread);
1268 		return (EINVAL);
1269 	}
1270 	so = fp->f_data;
1271 	if (so->so_type != SOCK_STREAM) {
1272 		fdrop(fp, curthread);
1273 		return (EINVAL);
1274 	}
1275 
1276 	ICL_CONN_LOCK(ic);
1277 
1278 	if (ic->ic_socket != NULL) {
1279 		ICL_CONN_UNLOCK(ic);
1280 		fdrop(fp, curthread);
1281 		return (EBUSY);
1282 	}
1283 
1284 	ic->ic_socket = fp->f_data;
1285 	fp->f_ops = &badfileops;
1286 	fp->f_data = NULL;
1287 	fdrop(fp, curthread);
1288 	ICL_CONN_UNLOCK(ic);
1289 
1290 	error = icl_conn_start(ic);
1291 
1292 	return (error);
1293 }
1294 
1295 void
icl_soft_conn_close(struct icl_conn * ic)1296 icl_soft_conn_close(struct icl_conn *ic)
1297 {
1298 	struct icl_pdu *pdu;
1299 	struct socket *so;
1300 
1301 	/*
1302 	 * Wake up the threads, so they can properly terminate.
1303 	 * Receive thread sleeps on so->so_rcv lock, send on ic->ic_lock.
1304 	 */
1305 	ICL_CONN_LOCK(ic);
1306 	if (!ic->ic_disconnecting) {
1307 		so = ic->ic_socket;
1308 		if (so)
1309 			SOCKBUF_LOCK(&so->so_rcv);
1310 		ic->ic_disconnecting = true;
1311 		if (so)
1312 			SOCKBUF_UNLOCK(&so->so_rcv);
1313 	}
1314 	while (ic->ic_receive_running || ic->ic_send_running) {
1315 		cv_signal(&ic->ic_receive_cv);
1316 		cv_signal(&ic->ic_send_cv);
1317 		cv_wait(&ic->ic_send_cv, ic->ic_lock);
1318 	}
1319 
1320 	/* Some other thread could close the connection same time. */
1321 	so = ic->ic_socket;
1322 	if (so == NULL) {
1323 		ICL_CONN_UNLOCK(ic);
1324 		return;
1325 	}
1326 	ic->ic_socket = NULL;
1327 
1328 	/*
1329 	 * Deregister socket upcalls.
1330 	 */
1331 	ICL_CONN_UNLOCK(ic);
1332 	SOCKBUF_LOCK(&so->so_snd);
1333 	if (so->so_snd.sb_upcall != NULL)
1334 		soupcall_clear(so, SO_SND);
1335 	SOCKBUF_UNLOCK(&so->so_snd);
1336 	SOCKBUF_LOCK(&so->so_rcv);
1337 	if (so->so_rcv.sb_upcall != NULL)
1338 		soupcall_clear(so, SO_RCV);
1339 	SOCKBUF_UNLOCK(&so->so_rcv);
1340 	soclose(so);
1341 	ICL_CONN_LOCK(ic);
1342 
1343 	if (ic->ic_receive_pdu != NULL) {
1344 		//ICL_DEBUG("freeing partially received PDU");
1345 		icl_soft_conn_pdu_free(ic, ic->ic_receive_pdu);
1346 		ic->ic_receive_pdu = NULL;
1347 	}
1348 
1349 	/*
1350 	 * Remove any outstanding PDUs from the send queue.
1351 	 */
1352 	while (!STAILQ_EMPTY(&ic->ic_to_send)) {
1353 		pdu = STAILQ_FIRST(&ic->ic_to_send);
1354 		STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
1355 		icl_soft_conn_pdu_free(ic, pdu);
1356 	}
1357 
1358 	KASSERT(STAILQ_EMPTY(&ic->ic_to_send),
1359 	    ("destroying session with non-empty send queue"));
1360 	ICL_CONN_UNLOCK(ic);
1361 }
1362 
1363 int
icl_soft_conn_task_setup(struct icl_conn * ic,struct icl_pdu * ip,struct ccb_scsiio * csio,uint32_t * task_tagp,void ** prvp)1364 icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
1365     struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp)
1366 {
1367 
1368 	return (0);
1369 }
1370 
1371 void
icl_soft_conn_task_done(struct icl_conn * ic,void * prv)1372 icl_soft_conn_task_done(struct icl_conn *ic, void *prv)
1373 {
1374 }
1375 
1376 int
icl_soft_conn_transfer_setup(struct icl_conn * ic,union ctl_io * io,uint32_t * transfer_tag,void ** prvp)1377 icl_soft_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
1378     uint32_t *transfer_tag, void **prvp)
1379 {
1380 
1381 	return (0);
1382 }
1383 
1384 void
icl_soft_conn_transfer_done(struct icl_conn * ic,void * prv)1385 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv)
1386 {
1387 }
1388 
1389 static int
icl_soft_limits(struct icl_drv_limits * idl)1390 icl_soft_limits(struct icl_drv_limits *idl)
1391 {
1392 
1393 	idl->idl_max_recv_data_segment_length = max_data_segment_length;
1394 	idl->idl_max_send_data_segment_length = max_data_segment_length;
1395 	idl->idl_max_burst_length = max_burst_length;
1396 	idl->idl_first_burst_length = first_burst_length;
1397 
1398 	return (0);
1399 }
1400 
1401 #ifdef ICL_KERNEL_PROXY
1402 int
icl_soft_conn_connect(struct icl_conn * ic,int domain,int socktype,int protocol,struct sockaddr * from_sa,struct sockaddr * to_sa)1403 icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype,
1404     int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
1405 {
1406 
1407 	return (icl_soft_proxy_connect(ic, domain, socktype, protocol,
1408 	    from_sa, to_sa));
1409 }
1410 
1411 int
icl_soft_handoff_sock(struct icl_conn * ic,struct socket * so)1412 icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so)
1413 {
1414 	int error;
1415 
1416 	ICL_CONN_LOCK_ASSERT_NOT(ic);
1417 
1418 	if (so->so_type != SOCK_STREAM)
1419 		return (EINVAL);
1420 
1421 	ICL_CONN_LOCK(ic);
1422 	if (ic->ic_socket != NULL) {
1423 		ICL_CONN_UNLOCK(ic);
1424 		return (EBUSY);
1425 	}
1426 	ic->ic_socket = so;
1427 	ICL_CONN_UNLOCK(ic);
1428 
1429 	error = icl_conn_start(ic);
1430 
1431 	return (error);
1432 }
1433 #endif /* ICL_KERNEL_PROXY */
1434 
1435 static int
icl_soft_load(void)1436 icl_soft_load(void)
1437 {
1438 	int error;
1439 
1440 	icl_pdu_zone = uma_zcreate("icl_pdu",
1441 	    sizeof(struct icl_pdu), NULL, NULL, NULL, NULL,
1442 	    UMA_ALIGN_PTR, 0);
1443 	refcount_init(&icl_ncons, 0);
1444 
1445 	/*
1446 	 * The reason we call this "none" is that to the user,
1447 	 * it's known as "offload driver"; "offload driver: soft"
1448 	 * doesn't make much sense.
1449 	 */
1450 	error = icl_register("none", false, 0,
1451 	    icl_soft_limits, icl_soft_new_conn);
1452 	KASSERT(error == 0, ("failed to register"));
1453 
1454 #if defined(ICL_KERNEL_PROXY) && 0
1455 	/*
1456 	 * Debugging aid for kernel proxy functionality.
1457 	 */
1458 	error = icl_register("proxytest", true, 0,
1459 	    icl_soft_limits, icl_soft_new_conn);
1460 	KASSERT(error == 0, ("failed to register"));
1461 #endif
1462 
1463 	return (error);
1464 }
1465 
1466 static int
icl_soft_unload(void)1467 icl_soft_unload(void)
1468 {
1469 
1470 	if (icl_ncons != 0)
1471 		return (EBUSY);
1472 
1473 	icl_unregister("none", false);
1474 #if defined(ICL_KERNEL_PROXY) && 0
1475 	icl_unregister("proxytest", true);
1476 #endif
1477 
1478 	uma_zdestroy(icl_pdu_zone);
1479 
1480 	return (0);
1481 }
1482 
1483 static int
icl_soft_modevent(module_t mod,int what,void * arg)1484 icl_soft_modevent(module_t mod, int what, void *arg)
1485 {
1486 
1487 	switch (what) {
1488 	case MOD_LOAD:
1489 		return (icl_soft_load());
1490 	case MOD_UNLOAD:
1491 		return (icl_soft_unload());
1492 	default:
1493 		return (EINVAL);
1494 	}
1495 }
1496 
1497 moduledata_t icl_soft_data = {
1498 	"icl_soft",
1499 	icl_soft_modevent,
1500 	0
1501 };
1502 
1503 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
1504 MODULE_DEPEND(icl_soft, icl, 1, 1, 1);
1505 MODULE_VERSION(icl_soft, 1);
1506