1 /*-
2  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include <sys/cdefs.h>
27 __FBSDID("$FreeBSD: stable/10/sys/dev/mlx5/mlx5_en/tcp_tlro.c 291184 2015-11-23 09:32:32Z hselasky $");
28 
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 
32 #include <sys/param.h>
33 #include <sys/libkern.h>
34 #include <sys/mbuf.h>
35 #include <sys/lock.h>
36 #include <sys/mutex.h>
37 #include <sys/sysctl.h>
38 #include <sys/malloc.h>
39 #include <sys/kernel.h>
40 #include <sys/endian.h>
41 #include <sys/socket.h>
42 #include <sys/sockopt.h>
43 #include <sys/smp.h>
44 
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/ethernet.h>
48 
49 #if defined(INET) || defined(INET6)
50 #include <netinet/in.h>
51 #endif
52 
53 #ifdef INET
54 #include <netinet/ip.h>
55 #endif
56 
57 #ifdef INET6
58 #include <netinet/ip6.h>
59 #endif
60 
61 #include <netinet/tcp_var.h>
62 
63 #include "tcp_tlro.h"
64 
65 #ifndef M_HASHTYPE_LRO_TCP
66 #ifndef KLD_MODULE
67 #warning "M_HASHTYPE_LRO_TCP is not defined"
68 #endif
69 #define	M_HASHTYPE_LRO_TCP 254
70 #endif
71 
72 static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, tlro,
73     CTLFLAG_RW, 0, "TCP turbo LRO parameters");
74 
75 static MALLOC_DEFINE(M_TLRO, "TLRO", "Turbo LRO");
76 
77 static int tlro_min_rate = 20;		/* Hz */
78 
79 SYSCTL_INT(_net_inet_tcp_tlro, OID_AUTO, min_rate, CTLFLAG_RWTUN,
80     &tlro_min_rate, 0, "Minimum serving rate in Hz");
81 
82 static int tlro_max_packet = IP_MAXPACKET;
83 
84 SYSCTL_INT(_net_inet_tcp_tlro, OID_AUTO, max_packet, CTLFLAG_RWTUN,
85     &tlro_max_packet, 0, "Maximum packet size in bytes");
86 
87 typedef struct {
88 	uint32_t value;
89 } __packed uint32_p_t;
90 
91 static uint16_t
tcp_tlro_csum(const uint32_p_t * p,size_t l)92 tcp_tlro_csum(const uint32_p_t *p, size_t l)
93 {
94 	const uint32_p_t *pend = p + (l / 4);
95 	uint64_t cs;
96 
97 	for (cs = 0; p != pend; p++)
98 		cs += le32toh(p->value);
99 	while (cs > 0xffff)
100 		cs = (cs >> 16) + (cs & 0xffff);
101 	return (cs);
102 }
103 
104 static void *
tcp_tlro_get_header(const struct mbuf * m,const u_int off,const u_int len)105 tcp_tlro_get_header(const struct mbuf *m, const u_int off,
106     const u_int len)
107 {
108 	if (m->m_len < (off + len))
109 		return (NULL);
110 	return (mtod(m, char *) + off);
111 }
112 
113 static uint8_t
tcp_tlro_info_save_timestamp(struct tlro_mbuf_data * pinfo)114 tcp_tlro_info_save_timestamp(struct tlro_mbuf_data *pinfo)
115 {
116 	struct tcphdr *tcp = pinfo->tcp;
117 	uint32_t *ts_ptr;
118 
119 	if (tcp->th_off < ((TCPOLEN_TSTAMP_APPA + sizeof(*tcp)) >> 2))
120 		return (0);
121 
122 	ts_ptr = (uint32_t *)(tcp + 1);
123 	if (*ts_ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
124 	    (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
125 		return (0);
126 
127 	/* Save timestamps */
128 	pinfo->tcp_ts = ts_ptr[1];
129 	pinfo->tcp_ts_reply = ts_ptr[2];
130 	return (1);
131 }
132 
133 static void
tcp_tlro_info_restore_timestamp(struct tlro_mbuf_data * pinfoa,struct tlro_mbuf_data * pinfob)134 tcp_tlro_info_restore_timestamp(struct tlro_mbuf_data *pinfoa,
135     struct tlro_mbuf_data *pinfob)
136 {
137 	struct tcphdr *tcp = pinfoa->tcp;
138 	uint32_t *ts_ptr;
139 
140 	if (tcp->th_off < ((TCPOLEN_TSTAMP_APPA + sizeof(*tcp)) >> 2))
141 		return;
142 
143 	ts_ptr = (uint32_t *)(tcp + 1);
144 	if (*ts_ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
145 	    (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
146 		return;
147 
148 	/* Restore timestamps */
149 	ts_ptr[1] = pinfob->tcp_ts;
150 	ts_ptr[2] = pinfob->tcp_ts_reply;
151 }
152 
153 static void
tcp_tlro_extract_header(struct tlro_mbuf_data * pinfo,struct mbuf * m,int seq)154 tcp_tlro_extract_header(struct tlro_mbuf_data *pinfo, struct mbuf *m, int seq)
155 {
156 	uint8_t *phdr = (uint8_t *)pinfo->buf;
157 	struct ether_header *eh;
158 	struct ether_vlan_header *vlan;
159 #ifdef INET
160 	struct ip *ip;
161 #endif
162 #ifdef INET6
163 	struct ip6_hdr *ip6;
164 #endif
165 	struct tcphdr *tcp;
166 	uint16_t etype;
167 	int diff;
168 	int off;
169 
170 	/* Fill in information */
171 	pinfo->head = m;
172 	pinfo->last_tick = ticks;
173 	pinfo->sequence = seq;
174 	pinfo->pprev = &m_last(m)->m_next;
175 
176 	off = sizeof(*eh);
177 	if (m->m_len < off)
178 		goto error;
179 	eh = tcp_tlro_get_header(m, 0, sizeof(*eh));
180 	if (eh == NULL)
181 		goto error;
182 	memcpy(phdr, &eh->ether_dhost, ETHER_ADDR_LEN);
183 	phdr += ETHER_ADDR_LEN;
184 	memcpy(phdr, &eh->ether_type, sizeof(eh->ether_type));
185 	phdr += sizeof(eh->ether_type);
186 	etype = ntohs(eh->ether_type);
187 
188 	if (etype == ETHERTYPE_VLAN) {
189 		vlan = tcp_tlro_get_header(m, off, sizeof(*vlan));
190 		if (vlan == NULL)
191 			goto error;
192 		memcpy(phdr, &vlan->evl_tag, sizeof(vlan->evl_tag) +
193 		    sizeof(vlan->evl_proto));
194 		phdr += sizeof(vlan->evl_tag) + sizeof(vlan->evl_proto);
195 		etype = ntohs(vlan->evl_proto);
196 		off += sizeof(*vlan) - sizeof(*eh);
197 	}
198 	switch (etype) {
199 #ifdef INET
200 	case ETHERTYPE_IP:
201 		/*
202 		 * Cannot LRO:
203 		 * - Non-IP packets
204 		 * - Fragmented packets
205 		 * - Packets with IPv4 options
206 		 * - Non-TCP packets
207 		 */
208 		ip = tcp_tlro_get_header(m, off, sizeof(*ip));
209 		if (ip == NULL ||
210 		    (ip->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 ||
211 		    (ip->ip_p != IPPROTO_TCP) ||
212 		    (ip->ip_hl << 2) != sizeof(*ip))
213 			goto error;
214 
215 		/* Legacy IP has a header checksum that needs to be correct */
216 		if (!(m->m_pkthdr.csum_flags & CSUM_IP_CHECKED)) {
217 			/* Verify IP header */
218 			if (tcp_tlro_csum((uint32_p_t *)ip, sizeof(*ip)) != 0xFFFF)
219 				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
220 			else
221 				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED |
222 				    CSUM_IP_VALID;
223 		}
224 		/* Only accept valid checksums */
225 		if (!(m->m_pkthdr.csum_flags & CSUM_IP_VALID) ||
226 		    !(m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
227 			goto error;
228 		memcpy(phdr, &ip->ip_src, sizeof(ip->ip_src) +
229 		    sizeof(ip->ip_dst));
230 		phdr += sizeof(ip->ip_src) + sizeof(ip->ip_dst);
231 		if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP)
232 			pinfo->ip_len = m->m_pkthdr.len - off;
233 		else
234 			pinfo->ip_len = ntohs(ip->ip_len);
235 		pinfo->ip_hdrlen = sizeof(*ip);
236 		pinfo->ip.v4 = ip;
237 		pinfo->ip_version = 4;
238 		off += sizeof(*ip);
239 		break;
240 #endif
241 #ifdef INET6
242 	case ETHERTYPE_IPV6:
243 		/*
244 		 * Cannot LRO:
245 		 * - Non-IP packets
246 		 * - Packets with IPv6 options
247 		 * - Non-TCP packets
248 		 */
249 		ip6 = tcp_tlro_get_header(m, off, sizeof(*ip6));
250 		if (ip6 == NULL || ip6->ip6_nxt != IPPROTO_TCP)
251 			goto error;
252 		if (!(m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
253 			goto error;
254 		memcpy(phdr, &ip6->ip6_src, sizeof(struct in6_addr) +
255 		    sizeof(struct in6_addr));
256 		phdr += sizeof(struct in6_addr) + sizeof(struct in6_addr);
257 		if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP)
258 			pinfo->ip_len = m->m_pkthdr.len - off;
259 		else
260 			pinfo->ip_len = ntohs(ip6->ip6_plen) + sizeof(*ip6);
261 		pinfo->ip_hdrlen = sizeof(*ip6);
262 		pinfo->ip.v6 = ip6;
263 		pinfo->ip_version = 6;
264 		off += sizeof(*ip6);
265 		break;
266 #endif
267 	default:
268 		goto error;
269 	}
270 	tcp = tcp_tlro_get_header(m, off, sizeof(*tcp));
271 	if (tcp == NULL)
272 		goto error;
273 	memcpy(phdr, &tcp->th_sport, sizeof(tcp->th_sport) +
274 	    sizeof(tcp->th_dport));
275 	phdr += sizeof(tcp->th_sport) +
276 	    sizeof(tcp->th_dport);
277 	/* Store TCP header length */
278 	*phdr++ = tcp->th_off;
279 	if (tcp->th_off < (sizeof(*tcp) >> 2))
280 		goto error;
281 
282 	/* Compute offset to data payload */
283 	pinfo->tcp_len = (tcp->th_off << 2);
284 	off += pinfo->tcp_len;
285 
286 	/* Store more info */
287 	pinfo->data_off = off;
288 	pinfo->tcp = tcp;
289 
290 	/* Try to save timestamp, if any */
291 	*phdr++ = tcp_tlro_info_save_timestamp(pinfo);
292 
293 	/* Verify offset and IP/TCP length */
294 	if (off > m->m_pkthdr.len ||
295 	    pinfo->ip_len < pinfo->tcp_len)
296 		goto error;
297 
298 	/* Compute data payload length */
299 	pinfo->data_len = (pinfo->ip_len - pinfo->tcp_len - pinfo->ip_hdrlen);
300 
301 	/* Trim any padded data */
302 	diff = (m->m_pkthdr.len - off) - pinfo->data_len;
303 	if (diff != 0) {
304 		if (diff < 0)
305 			goto error;
306 		else
307 			m_adj(m, -diff);
308 	}
309 	/* Compute header length */
310 	pinfo->buf_length = phdr - (uint8_t *)pinfo->buf;
311 	/* Zero-pad rest of buffer */
312 	memset(phdr, 0, TLRO_MAX_HEADER - pinfo->buf_length);
313 	return;
314 error:
315 	pinfo->buf_length = 0;
316 }
317 
318 static int
tcp_tlro_cmp64(const uint64_t * pa,const uint64_t * pb)319 tcp_tlro_cmp64(const uint64_t *pa, const uint64_t *pb)
320 {
321 	int64_t diff = 0;
322 	unsigned x;
323 
324 	for (x = 0; x != TLRO_MAX_HEADER / 8; x++) {
325 		/*
326 		 * NOTE: Endianness does not matter in this
327 		 * comparisation:
328 		 */
329 		diff = pa[x] - pb[x];
330 		if (diff != 0)
331 			goto done;
332 	}
333 done:
334 	if (diff < 0)
335 		return (-1);
336 	else if (diff > 0)
337 		return (1);
338 	return (0);
339 }
340 
341 static int
tcp_tlro_compare_header(const void * _ppa,const void * _ppb)342 tcp_tlro_compare_header(const void *_ppa, const void *_ppb)
343 {
344 	const struct tlro_mbuf_ptr *ppa = _ppa;
345 	const struct tlro_mbuf_ptr *ppb = _ppb;
346 	struct tlro_mbuf_data *pinfoa = ppa->data;
347 	struct tlro_mbuf_data *pinfob = ppb->data;
348 	int ret;
349 
350 	ret = (pinfoa->head == NULL) - (pinfob->head == NULL);
351 	if (ret != 0)
352 		goto done;
353 
354 	ret = pinfoa->buf_length - pinfob->buf_length;
355 	if (ret != 0)
356 		goto done;
357 	if (pinfoa->buf_length != 0) {
358 		ret = tcp_tlro_cmp64(pinfoa->buf, pinfob->buf);
359 		if (ret != 0)
360 			goto done;
361 		ret = ntohl(pinfoa->tcp->th_seq) - ntohl(pinfob->tcp->th_seq);
362 		if (ret != 0)
363 			goto done;
364 		ret = ntohl(pinfoa->tcp->th_ack) - ntohl(pinfob->tcp->th_ack);
365 		if (ret != 0)
366 			goto done;
367 		ret = pinfoa->sequence - pinfob->sequence;
368 		if (ret != 0)
369 			goto done;
370 	}
371 done:
372 	return (ret);
373 }
374 
375 static void
tcp_tlro_sort(struct tlro_ctrl * tlro)376 tcp_tlro_sort(struct tlro_ctrl *tlro)
377 {
378 	if (tlro->curr == 0)
379 		return;
380 
381 	qsort(tlro->mbuf, tlro->curr, sizeof(struct tlro_mbuf_ptr),
382 	    &tcp_tlro_compare_header);
383 }
384 
385 static int
tcp_tlro_get_ticks(void)386 tcp_tlro_get_ticks(void)
387 {
388 	int to = tlro_min_rate;
389 
390 	if (to < 1)
391 		to = 1;
392 	to = hz / to;
393 	if (to < 1)
394 		to = 1;
395 	return (to);
396 }
397 
398 static void
tcp_tlro_combine(struct tlro_ctrl * tlro,int force)399 tcp_tlro_combine(struct tlro_ctrl *tlro, int force)
400 {
401 	struct tlro_mbuf_data *pinfoa;
402 	struct tlro_mbuf_data *pinfob;
403 	uint32_t cs;
404 	int curr_ticks = ticks;
405 	int ticks_limit = tcp_tlro_get_ticks();
406 	unsigned x;
407 	unsigned y;
408 	unsigned z;
409 	int temp;
410 
411 	if (tlro->curr == 0)
412 		return;
413 
414 	for (y = 0; y != tlro->curr;) {
415 		struct mbuf *m;
416 
417 		pinfoa = tlro->mbuf[y].data;
418 		for (x = y + 1; x != tlro->curr; x++) {
419 			pinfob = tlro->mbuf[x].data;
420 			if (pinfoa->buf_length != pinfob->buf_length ||
421 			    tcp_tlro_cmp64(pinfoa->buf, pinfob->buf) != 0)
422 				break;
423 		}
424 		if (pinfoa->buf_length == 0) {
425 			/* Forward traffic which cannot be combined */
426 			for (z = y; z != x; z++) {
427 				/* Just forward packets */
428 				pinfob = tlro->mbuf[z].data;
429 
430 				m = pinfob->head;
431 
432 				/* Reset info structure */
433 				pinfob->head = NULL;
434 				pinfob->buf_length = 0;
435 
436 				/* Do stats */
437 				tlro->lro_flushed++;
438 
439 				/* Input packet to network layer */
440 				(*tlro->ifp->if_input) (tlro->ifp, m);
441 			}
442 			y = z;
443 			continue;
444 		}
445 
446 		/* Compute current checksum subtracted some header parts */
447 		temp = (pinfoa->ip_len - pinfoa->ip_hdrlen);
448 		cs = ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8) +
449 		    tcp_tlro_csum((uint32_p_t *)pinfoa->tcp, pinfoa->tcp_len);
450 
451 		/* Append all fragments into one block */
452 		for (z = y + 1; z != x; z++) {
453 
454 			pinfob = tlro->mbuf[z].data;
455 
456 			/* Check for command packets */
457 			if ((pinfoa->tcp->th_flags & ~(TH_ACK | TH_PUSH)) ||
458 			    (pinfob->tcp->th_flags & ~(TH_ACK | TH_PUSH)))
459 				break;
460 
461 			/* Check if there is enough space */
462 			if ((pinfoa->ip_len + pinfob->data_len) > tlro_max_packet)
463 				break;
464 
465 			/* Try to append the new segment */
466 			temp = ntohl(pinfoa->tcp->th_seq) + pinfoa->data_len;
467 			if (temp != (int)ntohl(pinfob->tcp->th_seq))
468 				break;
469 
470 			temp = pinfob->ip_len - pinfob->ip_hdrlen;
471 			cs += ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8) +
472 			    tcp_tlro_csum((uint32_p_t *)pinfob->tcp, pinfob->tcp_len);
473 			/* Remove fields which appear twice */
474 			cs += (IPPROTO_TCP << 8);
475 			if (pinfob->ip_version == 4) {
476 				cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v4->ip_src, 4);
477 				cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v4->ip_dst, 4);
478 			} else {
479 				cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v6->ip6_src, 16);
480 				cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v6->ip6_dst, 16);
481 			}
482 			/* Remainder computation */
483 			while (cs > 0xffff)
484 				cs = (cs >> 16) + (cs & 0xffff);
485 
486 			/* Update window and ack sequence number */
487 			pinfoa->tcp->th_ack = pinfob->tcp->th_ack;
488 			pinfoa->tcp->th_win = pinfob->tcp->th_win;
489 
490 			/* Check if we should restore the timestamp */
491 			tcp_tlro_info_restore_timestamp(pinfoa, pinfob);
492 
493 			/* Accumulate TCP flags */
494 			pinfoa->tcp->th_flags |= pinfob->tcp->th_flags;
495 
496 			/* update lengths */
497 			pinfoa->ip_len += pinfob->data_len;
498 			pinfoa->data_len += pinfob->data_len;
499 
500 			/* Clear mbuf pointer - packet is accumulated */
501 			m = pinfob->head;
502 
503 			/* Reset info structure */
504 			pinfob->head = NULL;
505 			pinfob->buf_length = 0;
506 
507 			/* Append data to mbuf [y] */
508 			m_adj(m, pinfob->data_off);
509 			/* Delete mbuf tags, if any */
510 			m_tag_delete_chain(m, NULL);
511 			/* Clear packet header flag */
512 			m->m_flags &= ~M_PKTHDR;
513 
514 			/* Concat mbuf(s) to end of list */
515 			pinfoa->pprev[0] = m;
516 			m = m_last(m);
517 			pinfoa->pprev = &m->m_next;
518 			pinfoa->head->m_pkthdr.len += pinfob->data_len;
519 		}
520 		/* Compute new TCP header checksum */
521 		pinfoa->tcp->th_sum = 0;
522 
523 		temp = pinfoa->ip_len - pinfoa->ip_hdrlen;
524 		cs = (cs ^ 0xFFFF) +
525 		    tcp_tlro_csum((uint32_p_t *)pinfoa->tcp, pinfoa->tcp_len) +
526 		    ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8);
527 
528 		/* Remainder computation */
529 		while (cs > 0xffff)
530 			cs = (cs >> 16) + (cs & 0xffff);
531 
532 		/* Update new checksum */
533 		pinfoa->tcp->th_sum = ~htole16(cs);
534 
535 		/* Update IP length, if any */
536 		if (pinfoa->ip_version == 4) {
537 			if (pinfoa->ip_len > IP_MAXPACKET) {
538 				M_HASHTYPE_SET(pinfoa->head, M_HASHTYPE_LRO_TCP);
539 				pinfoa->ip.v4->ip_len = htons(IP_MAXPACKET);
540 			} else {
541 				pinfoa->ip.v4->ip_len = htons(pinfoa->ip_len);
542 			}
543 		} else {
544 			if (pinfoa->ip_len > (IP_MAXPACKET + sizeof(*pinfoa->ip.v6))) {
545 				M_HASHTYPE_SET(pinfoa->head, M_HASHTYPE_LRO_TCP);
546 				pinfoa->ip.v6->ip6_plen = htons(IP_MAXPACKET);
547 			} else {
548 				temp = pinfoa->ip_len - sizeof(*pinfoa->ip.v6);
549 				pinfoa->ip.v6->ip6_plen = htons(temp);
550 			}
551 		}
552 
553 		temp = curr_ticks - pinfoa->last_tick;
554 		/* Check if packet should be forwarded */
555 		if (force != 0 || z != x || temp >= ticks_limit ||
556 		    pinfoa->data_len == 0) {
557 
558 			/* Compute new IPv4 header checksum */
559 			if (pinfoa->ip_version == 4) {
560 				pinfoa->ip.v4->ip_sum = 0;
561 				cs = tcp_tlro_csum((uint32_p_t *)pinfoa->ip.v4,
562 				    sizeof(*pinfoa->ip.v4));
563 				pinfoa->ip.v4->ip_sum = ~htole16(cs);
564 			}
565 			/* Forward packet */
566 			m = pinfoa->head;
567 
568 			/* Reset info structure */
569 			pinfoa->head = NULL;
570 			pinfoa->buf_length = 0;
571 
572 			/* Do stats */
573 			tlro->lro_flushed++;
574 
575 			/* Input packet to network layer */
576 			(*tlro->ifp->if_input) (tlro->ifp, m);
577 		}
578 		y = z;
579 	}
580 
581 	/* Cleanup all NULL heads */
582 	for (y = 0; y != tlro->curr; y++) {
583 		if (tlro->mbuf[y].data->head == NULL) {
584 			for (z = y + 1; z != tlro->curr; z++) {
585 				struct tlro_mbuf_ptr ptemp;
586 				if (tlro->mbuf[z].data->head == NULL)
587 					continue;
588 				ptemp = tlro->mbuf[y];
589 				tlro->mbuf[y] = tlro->mbuf[z];
590 				tlro->mbuf[z] = ptemp;
591 				y++;
592 			}
593 			break;
594 		}
595 	}
596 	tlro->curr = y;
597 }
598 
599 static void
tcp_tlro_cleanup(struct tlro_ctrl * tlro)600 tcp_tlro_cleanup(struct tlro_ctrl *tlro)
601 {
602 	while (tlro->curr != 0 &&
603 	    tlro->mbuf[tlro->curr - 1].data->head == NULL)
604 		tlro->curr--;
605 }
606 
607 void
tcp_tlro_flush(struct tlro_ctrl * tlro,int force)608 tcp_tlro_flush(struct tlro_ctrl *tlro, int force)
609 {
610 	if (tlro->curr == 0)
611 		return;
612 
613 	tcp_tlro_sort(tlro);
614 	tcp_tlro_cleanup(tlro);
615 	tcp_tlro_combine(tlro, force);
616 }
617 
618 int
tcp_tlro_init(struct tlro_ctrl * tlro,struct ifnet * ifp,int max_mbufs)619 tcp_tlro_init(struct tlro_ctrl *tlro, struct ifnet *ifp,
620     int max_mbufs)
621 {
622 	ssize_t size;
623 	uint32_t x;
624 
625 	/* Set zero defaults */
626 	memset(tlro, 0, sizeof(*tlro));
627 
628 	/* Compute size needed for data */
629 	size = (sizeof(struct tlro_mbuf_ptr) * max_mbufs) +
630 	    (sizeof(struct tlro_mbuf_data) * max_mbufs);
631 
632 	/* Range check */
633 	if (max_mbufs <= 0 || size <= 0 || ifp == NULL)
634 		return (EINVAL);
635 
636 	/* Setup tlro control structure */
637 	tlro->mbuf = malloc(size, M_TLRO, M_WAITOK | M_ZERO);
638 	tlro->max = max_mbufs;
639 	tlro->ifp = ifp;
640 
641 	/* Setup pointer array */
642 	for (x = 0; x != tlro->max; x++) {
643 		tlro->mbuf[x].data = ((struct tlro_mbuf_data *)
644 		    &tlro->mbuf[max_mbufs]) + x;
645 	}
646 	return (0);
647 }
648 
649 void
tcp_tlro_free(struct tlro_ctrl * tlro)650 tcp_tlro_free(struct tlro_ctrl *tlro)
651 {
652 	struct tlro_mbuf_data *pinfo;
653 	struct mbuf *m;
654 	uint32_t y;
655 
656 	/* Check if not setup */
657 	if (tlro->mbuf == NULL)
658 		return;
659 	/* Free MBUF array and any leftover MBUFs */
660 	for (y = 0; y != tlro->max; y++) {
661 
662 		pinfo = tlro->mbuf[y].data;
663 
664 		m = pinfo->head;
665 
666 		/* Reset info structure */
667 		pinfo->head = NULL;
668 		pinfo->buf_length = 0;
669 
670 		m_freem(m);
671 	}
672 	free(tlro->mbuf, M_TLRO);
673 	/* Reset buffer */
674 	memset(tlro, 0, sizeof(*tlro));
675 }
676 
677 void
tcp_tlro_rx(struct tlro_ctrl * tlro,struct mbuf * m)678 tcp_tlro_rx(struct tlro_ctrl *tlro, struct mbuf *m)
679 {
680 	if (m->m_len > 0 && tlro->curr < tlro->max) {
681 		/* do stats */
682 		tlro->lro_queued++;
683 
684 		/* extract header */
685 		tcp_tlro_extract_header(tlro->mbuf[tlro->curr++].data,
686 		    m, tlro->sequence++);
687 	} else if (tlro->ifp != NULL) {
688 		/* do stats */
689 		tlro->lro_flushed++;
690 
691 		/* input packet to network layer */
692 		(*tlro->ifp->if_input) (tlro->ifp, m);
693 	} else {
694 		/* packet drop */
695 		m_freem(m);
696 	}
697 }
698