xref: /trueos/sys/dev/ixgbe/ix_txrx.c (revision 5868f7205430cd67aa3b655419d3f15f83b70119)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40 
41 #include "ixgbe.h"
42 
43 #ifdef DEV_NETMAP
44 #include <net/netmap.h>
45 #include <sys/selinfo.h>
46 #include <dev/netmap/netmap_kern.h>
47 
48 extern int ix_crcstrip;
49 #endif
50 
51 /*
52 ** HW RSC control:
53 **  this feature only works with
54 **  IPv4, and only on 82599 and later.
55 **  Also this will cause IP forwarding to
56 **  fail and that can't be controlled by
57 **  the stack as LRO can. For all these
58 **  reasons I've deemed it best to leave
59 **  this off and not bother with a tuneable
60 **  interface, this would need to be compiled
61 **  to enable.
62 */
63 static bool ixgbe_rsc_enable = FALSE;
64 
65 #ifdef IXGBE_FDIR
66 /*
67 ** For Flow Director: this is the
68 ** number of TX packets we sample
69 ** for the filter pool, this means
70 ** every 20th packet will be probed.
71 **
72 ** This feature can be disabled by
73 ** setting this to 0.
74 */
75 static int atr_sample_rate = 20;
76 #endif
77 
78 /* Shared PCI config read/write */
79 inline u16
ixgbe_read_pci_cfg(struct ixgbe_hw * hw,u32 reg)80 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
81 {
82 	u16 value;
83 
84 	value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
85 	    reg, 2);
86 
87 	return (value);
88 }
89 
90 inline void
ixgbe_write_pci_cfg(struct ixgbe_hw * hw,u32 reg,u16 value)91 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
92 {
93 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
94 	    reg, value, 2);
95 
96 	return;
97 }
98 
99 /*********************************************************************
100  *  Local Function prototypes
101  *********************************************************************/
102 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
103 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
104 static int	ixgbe_setup_receive_ring(struct rx_ring *);
105 static void     ixgbe_free_receive_buffers(struct rx_ring *);
106 
107 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32);
108 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
109 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
110 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
111 		    struct mbuf *, u32 *, u32 *);
112 static int	ixgbe_tso_setup(struct tx_ring *,
113 		    struct mbuf *, u32 *, u32 *);
114 #ifdef IXGBE_FDIR
115 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
116 #endif
117 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
118 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
119 		    struct mbuf *, u32);
120 
121 #ifdef IXGBE_LEGACY_TX
122 /*********************************************************************
123  *  Transmit entry point
124  *
125  *  ixgbe_start is called by the stack to initiate a transmit.
126  *  The driver will remain in this routine as long as there are
127  *  packets to transmit and transmit resources are available.
128  *  In case resources are not available stack is notified and
129  *  the packet is requeued.
130  **********************************************************************/
131 
132 void
ixgbe_start_locked(struct tx_ring * txr,struct ifnet * ifp)133 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
134 {
135 	struct mbuf    *m_head;
136 	struct adapter *adapter = txr->adapter;
137 
138 	IXGBE_TX_LOCK_ASSERT(txr);
139 
140 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
141 		return;
142 	if (!adapter->link_active)
143 		return;
144 
145 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
146 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
147 			break;
148 
149 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
150 		if (m_head == NULL)
151 			break;
152 
153 		if (ixgbe_xmit(txr, &m_head)) {
154 			if (m_head != NULL)
155 				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
156 			break;
157 		}
158 		/* Send a copy of the frame to the BPF listener */
159 		ETHER_BPF_MTAP(ifp, m_head);
160 	}
161 	return;
162 }
163 
164 /*
165  * Legacy TX start - called by the stack, this
166  * always uses the first tx ring, and should
167  * not be used with multiqueue tx enabled.
168  */
169 void
ixgbe_start(struct ifnet * ifp)170 ixgbe_start(struct ifnet *ifp)
171 {
172 	struct adapter *adapter = ifp->if_softc;
173 	struct tx_ring	*txr = adapter->tx_rings;
174 
175 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
176 		IXGBE_TX_LOCK(txr);
177 		ixgbe_start_locked(txr, ifp);
178 		IXGBE_TX_UNLOCK(txr);
179 	}
180 	return;
181 }
182 
183 #else /* ! IXGBE_LEGACY_TX */
184 
185 /*
186 ** Multiqueue Transmit driver
187 **
188 */
189 int
ixgbe_mq_start(struct ifnet * ifp,struct mbuf * m)190 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
191 {
192 	struct adapter	*adapter = ifp->if_softc;
193 	struct ix_queue	*que;
194 	struct tx_ring	*txr;
195 	int 		i, err = 0;
196 
197 	/*
198 	 * When doing RSS, map it to the same outbound queue
199 	 * as the incoming flow would be mapped to.
200 	 *
201 	 * If everything is setup correctly, it should be the
202 	 * same bucket that the current CPU we're on is.
203 	 */
204 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
205 		i = m->m_pkthdr.flowid % adapter->num_queues;
206 	else
207 		i = curcpu % adapter->num_queues;
208 
209 	/* Check for a hung queue and pick alternative */
210 	if (((1 << i) & adapter->active_queues) == 0)
211 		i = ffsl(adapter->active_queues);
212 
213 	txr = &adapter->tx_rings[i];
214 	que = &adapter->queues[i];
215 
216 	err = drbr_enqueue(ifp, txr->br, m);
217 	if (err)
218 		return (err);
219 	if (IXGBE_TX_TRYLOCK(txr)) {
220 		ixgbe_mq_start_locked(ifp, txr);
221 		IXGBE_TX_UNLOCK(txr);
222 	} else
223 		taskqueue_enqueue(que->tq, &txr->txq_task);
224 
225 	return (0);
226 }
227 
228 int
ixgbe_mq_start_locked(struct ifnet * ifp,struct tx_ring * txr)229 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
230 {
231 	struct adapter  *adapter = txr->adapter;
232         struct mbuf     *next;
233         int             enqueued = 0, err = 0;
234 
235 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
236 	    adapter->link_active == 0)
237 		return (ENETDOWN);
238 
239 	/* Process the queue */
240 #if __FreeBSD_version < 901504
241 	next = drbr_dequeue(ifp, txr->br);
242 	while (next != NULL) {
243 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
244 			if (next != NULL)
245 				err = drbr_enqueue(ifp, txr->br, next);
246 #else
247 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
248 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
249 			if (next == NULL) {
250 				drbr_advance(ifp, txr->br);
251 			} else {
252 				drbr_putback(ifp, txr->br, next);
253 			}
254 #endif
255 			break;
256 		}
257 #if __FreeBSD_version >= 901504
258 		drbr_advance(ifp, txr->br);
259 #endif
260 		enqueued++;
261 #if 0 // this is VF-only
262 #if __FreeBSD_version >= 1100036
263 		/*
264 		 * Since we're looking at the tx ring, we can check
265 		 * to see if we're a VF by examing our tail register
266 		 * address.
267 		 */
268 		if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
269 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
270 #endif
271 #endif
272 		/* Send a copy of the frame to the BPF listener */
273 		ETHER_BPF_MTAP(ifp, next);
274 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
275 			break;
276 #if __FreeBSD_version < 901504
277 		next = drbr_dequeue(ifp, txr->br);
278 #endif
279 	}
280 
281 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
282 		ixgbe_txeof(txr);
283 
284 	return (err);
285 }
286 
287 /*
288  * Called from a taskqueue to drain queued transmit packets.
289  */
290 void
291 ixgbe_deferred_mq_start(void *arg, int pending)
292 {
293 	struct tx_ring *txr = arg;
294 	struct adapter *adapter = txr->adapter;
295 	struct ifnet *ifp = adapter->ifp;
296 
297 	IXGBE_TX_LOCK(txr);
298 	if (!drbr_empty(ifp, txr->br))
299 		ixgbe_mq_start_locked(ifp, txr);
300 	IXGBE_TX_UNLOCK(txr);
301 }
302 
303 /*
304  * Flush all ring buffers
305  */
306 void
307 ixgbe_qflush(struct ifnet *ifp)
308 {
309 	struct adapter	*adapter = ifp->if_softc;
310 	struct tx_ring	*txr = adapter->tx_rings;
311 	struct mbuf	*m;
312 
313 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
314 		IXGBE_TX_LOCK(txr);
315 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
316 			m_freem(m);
317 		IXGBE_TX_UNLOCK(txr);
318 	}
319 	if_qflush(ifp);
320 }
321 #endif /* IXGBE_LEGACY_TX */
322 
323 
324 /*********************************************************************
325  *
326  *  This routine maps the mbufs to tx descriptors, allowing the
327  *  TX engine to transmit the packets.
328  *  	- return 0 on success, positive on failure
329  *
330  **********************************************************************/
331 
332 static int
333 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
334 {
335 	struct adapter  *adapter = txr->adapter;
336 	u32		olinfo_status = 0, cmd_type_len;
337 	int             i, j, error, nsegs;
338 	int		first;
339 	bool		remap = TRUE;
340 	struct mbuf	*m_head;
341 	bus_dma_segment_t segs[adapter->num_segs];
342 	bus_dmamap_t	map;
343 	struct ixgbe_tx_buf *txbuf;
344 	union ixgbe_adv_tx_desc *txd = NULL;
345 
346 	m_head = *m_headp;
347 
348 	/* Basic descriptor defines */
349         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
350 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
351 
352 	if (m_head->m_flags & M_VLANTAG)
353         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
354 
355         /*
356          * Important to capture the first descriptor
357          * used because it will contain the index of
358          * the one we tell the hardware to report back
359          */
360         first = txr->next_avail_desc;
361 	txbuf = &txr->tx_buffers[first];
362 	map = txbuf->map;
363 
364 	/*
365 	 * Map the packet for DMA.
366 	 */
367 retry:
368 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
369 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
370 
371 	if (__predict_false(error)) {
372 		struct mbuf *m;
373 
374 		switch (error) {
375 		case EFBIG:
376 			/* Try it again? - one try */
377 			if (remap == TRUE) {
378 				remap = FALSE;
379 				/*
380 				 * XXX: m_defrag will choke on
381 				 * non-MCLBYTES-sized clusters
382 				 */
383 				m = m_defrag(*m_headp, M_NOWAIT);
384 				if (m == NULL) {
385 					adapter->mbuf_defrag_failed++;
386 					m_freem(*m_headp);
387 					*m_headp = NULL;
388 					return (ENOBUFS);
389 				}
390 				*m_headp = m;
391 				goto retry;
392 			} else
393 				return (error);
394 		case ENOMEM:
395 			txr->no_tx_dma_setup++;
396 			return (error);
397 		default:
398 			txr->no_tx_dma_setup++;
399 			m_freem(*m_headp);
400 			*m_headp = NULL;
401 			return (error);
402 		}
403 	}
404 
405 	/* Make certain there are enough descriptors */
406 	if (nsegs > txr->tx_avail - 2) {
407 		txr->no_desc_avail++;
408 		bus_dmamap_unload(txr->txtag, map);
409 		return (ENOBUFS);
410 	}
411 	m_head = *m_headp;
412 
413 	/*
414 	 * Set up the appropriate offload context
415 	 * this will consume the first descriptor
416 	 */
417 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
418 	if (__predict_false(error)) {
419 		if (error == ENOBUFS)
420 			*m_headp = NULL;
421 		return (error);
422 	}
423 
424 #ifdef IXGBE_FDIR
425 	/* Do the flow director magic */
426 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
427 		++txr->atr_count;
428 		if (txr->atr_count >= atr_sample_rate) {
429 			ixgbe_atr(txr, m_head);
430 			txr->atr_count = 0;
431 		}
432 	}
433 #endif
434 
435 	i = txr->next_avail_desc;
436 	for (j = 0; j < nsegs; j++) {
437 		bus_size_t seglen;
438 		bus_addr_t segaddr;
439 
440 		txbuf = &txr->tx_buffers[i];
441 		txd = &txr->tx_base[i];
442 		seglen = segs[j].ds_len;
443 		segaddr = htole64(segs[j].ds_addr);
444 
445 		txd->read.buffer_addr = segaddr;
446 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
447 		    cmd_type_len |seglen);
448 		txd->read.olinfo_status = htole32(olinfo_status);
449 
450 		if (++i == txr->num_desc)
451 			i = 0;
452 	}
453 
454 	txd->read.cmd_type_len |=
455 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
456 	txr->tx_avail -= nsegs;
457 	txr->next_avail_desc = i;
458 
459 	txbuf->m_head = m_head;
460 	/*
461 	 * Here we swap the map so the last descriptor,
462 	 * which gets the completion interrupt has the
463 	 * real map, and the first descriptor gets the
464 	 * unused map from this descriptor.
465 	 */
466 	txr->tx_buffers[first].map = txbuf->map;
467 	txbuf->map = map;
468 	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
469 
470         /* Set the EOP descriptor that will be marked done */
471         txbuf = &txr->tx_buffers[first];
472 	txbuf->eop = txd;
473 
474         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
475             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
476 	/*
477 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
478 	 * hardware that this frame is available to transmit.
479 	 */
480 	++txr->total_packets;
481 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
482 
483 	/* Mark queue as having work */
484 	if (txr->busy == 0)
485 		txr->busy = 1;
486 
487 	return (0);
488 }
489 
490 
491 /*********************************************************************
492  *
493  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
494  *  the information needed to transmit a packet on the wire. This is
495  *  called only once at attach, setup is done every reset.
496  *
497  **********************************************************************/
498 int
499 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
500 {
501 	struct adapter *adapter = txr->adapter;
502 	device_t dev = adapter->dev;
503 	struct ixgbe_tx_buf *txbuf;
504 	int error, i;
505 
506 	/*
507 	 * Setup DMA descriptor areas.
508 	 */
509 	if ((error = bus_dma_tag_create(
510 			       bus_get_dma_tag(adapter->dev),	/* parent */
511 			       1, 0,		/* alignment, bounds */
512 			       BUS_SPACE_MAXADDR,	/* lowaddr */
513 			       BUS_SPACE_MAXADDR,	/* highaddr */
514 			       NULL, NULL,		/* filter, filterarg */
515 			       IXGBE_TSO_SIZE,		/* maxsize */
516 			       adapter->num_segs,	/* nsegments */
517 			       PAGE_SIZE,		/* maxsegsize */
518 			       0,			/* flags */
519 			       NULL,			/* lockfunc */
520 			       NULL,			/* lockfuncarg */
521 			       &txr->txtag))) {
522 		device_printf(dev,"Unable to allocate TX DMA tag\n");
523 		goto fail;
524 	}
525 
526 	if (!(txr->tx_buffers =
527 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
528 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
529 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
530 		error = ENOMEM;
531 		goto fail;
532 	}
533 
534         /* Create the descriptor buffer dma maps */
535 	txbuf = txr->tx_buffers;
536 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
537 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
538 		if (error != 0) {
539 			device_printf(dev, "Unable to create TX DMA map\n");
540 			goto fail;
541 		}
542 	}
543 
544 	return 0;
545 fail:
546 	/* We free all, it handles case where we are in the middle */
547 	ixgbe_free_transmit_structures(adapter);
548 	return (error);
549 }
550 
551 /*********************************************************************
552  *
553  *  Initialize a transmit ring.
554  *
555  **********************************************************************/
556 static void
557 ixgbe_setup_transmit_ring(struct tx_ring *txr)
558 {
559 	struct adapter *adapter = txr->adapter;
560 	struct ixgbe_tx_buf *txbuf;
561 	int i;
562 #ifdef DEV_NETMAP
563 	struct netmap_adapter *na = NA(adapter->ifp);
564 	struct netmap_slot *slot;
565 #endif /* DEV_NETMAP */
566 
567 	/* Clear the old ring contents */
568 	IXGBE_TX_LOCK(txr);
569 #ifdef DEV_NETMAP
570 	/*
571 	 * (under lock): if in netmap mode, do some consistency
572 	 * checks and set slot to entry 0 of the netmap ring.
573 	 */
574 	slot = netmap_reset(na, NR_TX, txr->me, 0);
575 #endif /* DEV_NETMAP */
576 	bzero((void *)txr->tx_base,
577 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
578 	/* Reset indices */
579 	txr->next_avail_desc = 0;
580 	txr->next_to_clean = 0;
581 
582 	/* Free any existing tx buffers. */
583         txbuf = txr->tx_buffers;
584 	for (i = 0; i < txr->num_desc; i++, txbuf++) {
585 		if (txbuf->m_head != NULL) {
586 			bus_dmamap_sync(txr->txtag, txbuf->map,
587 			    BUS_DMASYNC_POSTWRITE);
588 			bus_dmamap_unload(txr->txtag, txbuf->map);
589 			m_freem(txbuf->m_head);
590 			txbuf->m_head = NULL;
591 		}
592 #ifdef DEV_NETMAP
593 		/*
594 		 * In netmap mode, set the map for the packet buffer.
595 		 * NOTE: Some drivers (not this one) also need to set
596 		 * the physical buffer address in the NIC ring.
597 		 * Slots in the netmap ring (indexed by "si") are
598 		 * kring->nkr_hwofs positions "ahead" wrt the
599 		 * corresponding slot in the NIC ring. In some drivers
600 		 * (not here) nkr_hwofs can be negative. Function
601 		 * netmap_idx_n2k() handles wraparounds properly.
602 		 */
603 		if (slot) {
604 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
605 			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
606 		}
607 #endif /* DEV_NETMAP */
608 		/* Clear the EOP descriptor pointer */
609 		txbuf->eop = NULL;
610         }
611 
612 #ifdef IXGBE_FDIR
613 	/* Set the rate at which we sample packets */
614 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
615 		txr->atr_sample = atr_sample_rate;
616 #endif
617 
618 	/* Set number of descriptors available */
619 	txr->tx_avail = adapter->num_tx_desc;
620 
621 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
622 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
623 	IXGBE_TX_UNLOCK(txr);
624 }
625 
626 /*********************************************************************
627  *
628  *  Initialize all transmit rings.
629  *
630  **********************************************************************/
631 int
632 ixgbe_setup_transmit_structures(struct adapter *adapter)
633 {
634 	struct tx_ring *txr = adapter->tx_rings;
635 
636 	for (int i = 0; i < adapter->num_queues; i++, txr++)
637 		ixgbe_setup_transmit_ring(txr);
638 
639 	return (0);
640 }
641 
642 /*********************************************************************
643  *
644  *  Free all transmit rings.
645  *
646  **********************************************************************/
647 void
648 ixgbe_free_transmit_structures(struct adapter *adapter)
649 {
650 	struct tx_ring *txr = adapter->tx_rings;
651 
652 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
653 		IXGBE_TX_LOCK(txr);
654 		ixgbe_free_transmit_buffers(txr);
655 		ixgbe_dma_free(adapter, &txr->txdma);
656 		IXGBE_TX_UNLOCK(txr);
657 		IXGBE_TX_LOCK_DESTROY(txr);
658 	}
659 	free(adapter->tx_rings, M_DEVBUF);
660 }
661 
662 /*********************************************************************
663  *
664  *  Free transmit ring related data structures.
665  *
666  **********************************************************************/
667 static void
668 ixgbe_free_transmit_buffers(struct tx_ring *txr)
669 {
670 	struct adapter *adapter = txr->adapter;
671 	struct ixgbe_tx_buf *tx_buffer;
672 	int             i;
673 
674 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
675 
676 	if (txr->tx_buffers == NULL)
677 		return;
678 
679 	tx_buffer = txr->tx_buffers;
680 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
681 		if (tx_buffer->m_head != NULL) {
682 			bus_dmamap_sync(txr->txtag, tx_buffer->map,
683 			    BUS_DMASYNC_POSTWRITE);
684 			bus_dmamap_unload(txr->txtag,
685 			    tx_buffer->map);
686 			m_freem(tx_buffer->m_head);
687 			tx_buffer->m_head = NULL;
688 			if (tx_buffer->map != NULL) {
689 				bus_dmamap_destroy(txr->txtag,
690 				    tx_buffer->map);
691 				tx_buffer->map = NULL;
692 			}
693 		} else if (tx_buffer->map != NULL) {
694 			bus_dmamap_unload(txr->txtag,
695 			    tx_buffer->map);
696 			bus_dmamap_destroy(txr->txtag,
697 			    tx_buffer->map);
698 			tx_buffer->map = NULL;
699 		}
700 	}
701 #ifdef IXGBE_LEGACY_TX
702 	if (txr->br != NULL)
703 		buf_ring_free(txr->br, M_DEVBUF);
704 #endif
705 	if (txr->tx_buffers != NULL) {
706 		free(txr->tx_buffers, M_DEVBUF);
707 		txr->tx_buffers = NULL;
708 	}
709 	if (txr->txtag != NULL) {
710 		bus_dma_tag_destroy(txr->txtag);
711 		txr->txtag = NULL;
712 	}
713 	return;
714 }
715 
716 /*********************************************************************
717  *
718  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
719  *
720  **********************************************************************/
721 
722 static int
723 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
724     u32 *cmd_type_len, u32 *olinfo_status)
725 {
726 	struct adapter *adapter = txr->adapter;
727 	struct ixgbe_adv_tx_context_desc *TXD;
728 	struct ether_vlan_header *eh;
729 	struct ip *ip;
730 	struct ip6_hdr *ip6;
731 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
732 	int	ehdrlen, ip_hlen = 0;
733 	u16	etype;
734 	u8	ipproto = 0;
735 	int	offload = TRUE;
736 	int	ctxd = txr->next_avail_desc;
737 	u16	vtag = 0;
738 
739 	/* First check if TSO is to be used */
740 	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
741 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
742 
743 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
744 		offload = FALSE;
745 
746 	/* Indicate the whole packet as payload when not doing TSO */
747        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
748 
749 	/* Now ready a context descriptor */
750 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
751 
752 	/*
753 	** In advanced descriptors the vlan tag must
754 	** be placed into the context descriptor. Hence
755 	** we need to make one even if not doing offloads.
756 	*/
757 	if (mp->m_flags & M_VLANTAG) {
758 		vtag = htole16(mp->m_pkthdr.ether_vtag);
759 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
760 	}
761 	else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
762 		return (0);
763 
764 	/*
765 	 * Determine where frame payload starts.
766 	 * Jump over vlan headers if already present,
767 	 * helpful for QinQ too.
768 	 */
769 	eh = mtod(mp, struct ether_vlan_header *);
770 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
771 		etype = ntohs(eh->evl_proto);
772 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
773 	} else {
774 		etype = ntohs(eh->evl_encap_proto);
775 		ehdrlen = ETHER_HDR_LEN;
776 	}
777 
778 	/* Set the ether header length */
779 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
780 
781 	if (offload == FALSE)
782 		goto no_offloads;
783 
784 	switch (etype) {
785 		case ETHERTYPE_IP:
786 			ip = (struct ip *)(mp->m_data + ehdrlen);
787 			ip_hlen = ip->ip_hl << 2;
788 			ipproto = ip->ip_p;
789 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
790 			break;
791 		case ETHERTYPE_IPV6:
792 			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
793 			ip_hlen = sizeof(struct ip6_hdr);
794 			/* XXX-BZ this will go badly in case of ext hdrs. */
795 			ipproto = ip6->ip6_nxt;
796 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
797 			break;
798 		default:
799 			offload = FALSE;
800 			break;
801 	}
802 
803 	vlan_macip_lens |= ip_hlen;
804 
805 	switch (ipproto) {
806 		case IPPROTO_TCP:
807 			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
808 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
809 			break;
810 
811 		case IPPROTO_UDP:
812 			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
813 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
814 			break;
815 
816 #if __FreeBSD_version >= 800000
817 		case IPPROTO_SCTP:
818 			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
819 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
820 			break;
821 #endif
822 		default:
823 			offload = FALSE;
824 			break;
825 	}
826 
827 	if (offload) /* For the TX descriptor setup */
828 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
829 
830 no_offloads:
831 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
832 
833 	/* Now copy bits into descriptor */
834 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
835 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
836 	TXD->seqnum_seed = htole32(0);
837 	TXD->mss_l4len_idx = htole32(0);
838 
839 	/* We've consumed the first desc, adjust counters */
840 	if (++ctxd == txr->num_desc)
841 		ctxd = 0;
842 	txr->next_avail_desc = ctxd;
843 	--txr->tx_avail;
844 
845         return (0);
846 }
847 
848 /**********************************************************************
849  *
850  *  Setup work for hardware segmentation offload (TSO) on
851  *  adapters using advanced tx descriptors
852  *
853  **********************************************************************/
854 static int
855 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
856     u32 *cmd_type_len, u32 *olinfo_status)
857 {
858 	struct ixgbe_adv_tx_context_desc *TXD;
859 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
860 	u32 mss_l4len_idx = 0, paylen;
861 	u16 vtag = 0, eh_type;
862 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
863 	struct ether_vlan_header *eh;
864 #ifdef INET6
865 	struct ip6_hdr *ip6;
866 #endif
867 #ifdef INET
868 	struct ip *ip;
869 #endif
870 	struct tcphdr *th;
871 
872 
873 	/*
874 	 * Determine where frame payload starts.
875 	 * Jump over vlan headers if already present
876 	 */
877 	eh = mtod(mp, struct ether_vlan_header *);
878 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
879 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
880 		eh_type = eh->evl_proto;
881 	} else {
882 		ehdrlen = ETHER_HDR_LEN;
883 		eh_type = eh->evl_encap_proto;
884 	}
885 
886 	switch (ntohs(eh_type)) {
887 #ifdef INET6
888 	case ETHERTYPE_IPV6:
889 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
890 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
891 		if (ip6->ip6_nxt != IPPROTO_TCP)
892 			return (ENXIO);
893 		ip_hlen = sizeof(struct ip6_hdr);
894 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
895 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
896 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
897 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
898 		break;
899 #endif
900 #ifdef INET
901 	case ETHERTYPE_IP:
902 		ip = (struct ip *)(mp->m_data + ehdrlen);
903 		if (ip->ip_p != IPPROTO_TCP)
904 			return (ENXIO);
905 		ip->ip_sum = 0;
906 		ip_hlen = ip->ip_hl << 2;
907 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
908 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
909 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
910 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
911 		/* Tell transmit desc to also do IPv4 checksum. */
912 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
913 		break;
914 #endif
915 	default:
916 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
917 		    __func__, ntohs(eh_type));
918 		break;
919 	}
920 
921 	ctxd = txr->next_avail_desc;
922 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
923 
924 	tcp_hlen = th->th_off << 2;
925 
926 	/* This is used in the transmit desc in encap */
927 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
928 
929 	/* VLAN MACLEN IPLEN */
930 	if (mp->m_flags & M_VLANTAG) {
931 		vtag = htole16(mp->m_pkthdr.ether_vtag);
932                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
933 	}
934 
935 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
936 	vlan_macip_lens |= ip_hlen;
937 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
938 
939 	/* ADV DTYPE TUCMD */
940 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
941 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
942 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
943 
944 	/* MSS L4LEN IDX */
945 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
946 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
947 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
948 
949 	TXD->seqnum_seed = htole32(0);
950 
951 	if (++ctxd == txr->num_desc)
952 		ctxd = 0;
953 
954 	txr->tx_avail--;
955 	txr->next_avail_desc = ctxd;
956 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
957 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
958 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
959 	++txr->tso_tx;
960 	return (0);
961 }
962 
963 
964 /**********************************************************************
965  *
966  *  Examine each tx_buffer in the used queue. If the hardware is done
967  *  processing the packet then free associated resources. The
968  *  tx_buffer is put back on the free queue.
969  *
970  **********************************************************************/
971 void
972 ixgbe_txeof(struct tx_ring *txr)
973 {
974 #ifdef DEV_NETMAP
975 	struct adapter		*adapter = txr->adapter;
976 	struct ifnet		*ifp = adapter->ifp;
977 #endif
978 	u32			work, processed = 0;
979 	u16			limit = txr->process_limit;
980 	struct ixgbe_tx_buf	*buf;
981 	union ixgbe_adv_tx_desc *txd;
982 
983 	mtx_assert(&txr->tx_mtx, MA_OWNED);
984 
985 #ifdef DEV_NETMAP
986 	if (ifp->if_capenable & IFCAP_NETMAP) {
987 		struct netmap_adapter *na = NA(ifp);
988 		struct netmap_kring *kring = &na->tx_rings[txr->me];
989 		txd = txr->tx_base;
990 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
991 		    BUS_DMASYNC_POSTREAD);
992 		/*
993 		 * In netmap mode, all the work is done in the context
994 		 * of the client thread. Interrupt handlers only wake up
995 		 * clients, which may be sleeping on individual rings
996 		 * or on a global resource for all rings.
997 		 * To implement tx interrupt mitigation, we wake up the client
998 		 * thread roughly every half ring, even if the NIC interrupts
999 		 * more frequently. This is implemented as follows:
1000 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1001 		 *   the slot that should wake up the thread (nkr_num_slots
1002 		 *   means the user thread should not be woken up);
1003 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1004 		 *   or the slot has the DD bit set.
1005 		 */
1006 		if (!netmap_mitigate ||
1007 		    (kring->nr_kflags < kring->nkr_num_slots &&
1008 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1009 			netmap_tx_irq(ifp, txr->me);
1010 		}
1011 		return;
1012 	}
1013 #endif /* DEV_NETMAP */
1014 
1015 	if (txr->tx_avail == txr->num_desc) {
1016 		txr->busy = 0;
1017 		return;
1018 	}
1019 
1020 	/* Get work starting point */
1021 	work = txr->next_to_clean;
1022 	buf = &txr->tx_buffers[work];
1023 	txd = &txr->tx_base[work];
1024 	work -= txr->num_desc; /* The distance to ring end */
1025         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1026             BUS_DMASYNC_POSTREAD);
1027 
1028 	do {
1029 		union ixgbe_adv_tx_desc *eop= buf->eop;
1030 		if (eop == NULL) /* No work */
1031 			break;
1032 
1033 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1034 			break;	/* I/O not complete */
1035 
1036 		if (buf->m_head) {
1037 			txr->bytes +=
1038 			    buf->m_head->m_pkthdr.len;
1039 			bus_dmamap_sync(txr->txtag,
1040 			    buf->map,
1041 			    BUS_DMASYNC_POSTWRITE);
1042 			bus_dmamap_unload(txr->txtag,
1043 			    buf->map);
1044 			m_freem(buf->m_head);
1045 			buf->m_head = NULL;
1046 		}
1047 		buf->eop = NULL;
1048 		++txr->tx_avail;
1049 
1050 		/* We clean the range if multi segment */
1051 		while (txd != eop) {
1052 			++txd;
1053 			++buf;
1054 			++work;
1055 			/* wrap the ring? */
1056 			if (__predict_false(!work)) {
1057 				work -= txr->num_desc;
1058 				buf = txr->tx_buffers;
1059 				txd = txr->tx_base;
1060 			}
1061 			if (buf->m_head) {
1062 				txr->bytes +=
1063 				    buf->m_head->m_pkthdr.len;
1064 				bus_dmamap_sync(txr->txtag,
1065 				    buf->map,
1066 				    BUS_DMASYNC_POSTWRITE);
1067 				bus_dmamap_unload(txr->txtag,
1068 				    buf->map);
1069 				m_freem(buf->m_head);
1070 				buf->m_head = NULL;
1071 			}
1072 			++txr->tx_avail;
1073 			buf->eop = NULL;
1074 
1075 		}
1076 		++txr->packets;
1077 		++processed;
1078 
1079 		/* Try the next packet */
1080 		++txd;
1081 		++buf;
1082 		++work;
1083 		/* reset with a wrap */
1084 		if (__predict_false(!work)) {
1085 			work -= txr->num_desc;
1086 			buf = txr->tx_buffers;
1087 			txd = txr->tx_base;
1088 		}
1089 		prefetch(txd);
1090 	} while (__predict_true(--limit));
1091 
1092 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1093 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1094 
1095 	work += txr->num_desc;
1096 	txr->next_to_clean = work;
1097 
1098 	/*
1099 	** Queue Hang detection, we know there's
1100 	** work outstanding or the first return
1101 	** would have been taken, so increment busy
1102 	** if nothing managed to get cleaned, then
1103 	** in local_timer it will be checked and
1104 	** marked as HUNG if it exceeds a MAX attempt.
1105 	*/
1106 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1107 		++txr->busy;
1108 	/*
1109 	** If anything gets cleaned we reset state to 1,
1110 	** note this will turn off HUNG if its set.
1111 	*/
1112 	if (processed)
1113 		txr->busy = 1;
1114 
1115 	if (txr->tx_avail == txr->num_desc)
1116 		txr->busy = 0;
1117 
1118 	return;
1119 }
1120 
1121 
1122 #ifdef IXGBE_FDIR
1123 /*
1124 ** This routine parses packet headers so that Flow
1125 ** Director can make a hashed filter table entry
1126 ** allowing traffic flows to be identified and kept
1127 ** on the same cpu.  This would be a performance
1128 ** hit, but we only do it at IXGBE_FDIR_RATE of
1129 ** packets.
1130 */
1131 static void
1132 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1133 {
1134 	struct adapter			*adapter = txr->adapter;
1135 	struct ix_queue			*que;
1136 	struct ip			*ip;
1137 	struct tcphdr			*th;
1138 	struct udphdr			*uh;
1139 	struct ether_vlan_header	*eh;
1140 	union ixgbe_atr_hash_dword	input = {.dword = 0};
1141 	union ixgbe_atr_hash_dword	common = {.dword = 0};
1142 	int  				ehdrlen, ip_hlen;
1143 	u16				etype;
1144 
1145 	eh = mtod(mp, struct ether_vlan_header *);
1146 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1147 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1148 		etype = eh->evl_proto;
1149 	} else {
1150 		ehdrlen = ETHER_HDR_LEN;
1151 		etype = eh->evl_encap_proto;
1152 	}
1153 
1154 	/* Only handling IPv4 */
1155 	if (etype != htons(ETHERTYPE_IP))
1156 		return;
1157 
1158 	ip = (struct ip *)(mp->m_data + ehdrlen);
1159 	ip_hlen = ip->ip_hl << 2;
1160 
1161 	/* check if we're UDP or TCP */
1162 	switch (ip->ip_p) {
1163 	case IPPROTO_TCP:
1164 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1165 		/* src and dst are inverted */
1166 		common.port.dst ^= th->th_sport;
1167 		common.port.src ^= th->th_dport;
1168 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1169 		break;
1170 	case IPPROTO_UDP:
1171 		uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1172 		/* src and dst are inverted */
1173 		common.port.dst ^= uh->uh_sport;
1174 		common.port.src ^= uh->uh_dport;
1175 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1176 		break;
1177 	default:
1178 		return;
1179 	}
1180 
1181 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1182 	if (mp->m_pkthdr.ether_vtag)
1183 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1184 	else
1185 		common.flex_bytes ^= etype;
1186 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1187 
1188 	que = &adapter->queues[txr->me];
1189 	/*
1190 	** This assumes the Rx queue and Tx
1191 	** queue are bound to the same CPU
1192 	*/
1193 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1194 	    input, common, que->msix);
1195 }
1196 #endif /* IXGBE_FDIR */
1197 
1198 /*
1199 ** Used to detect a descriptor that has
1200 ** been merged by Hardware RSC.
1201 */
1202 static inline u32
1203 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1204 {
1205 	return (le32toh(rx->wb.lower.lo_dword.data) &
1206 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1207 }
1208 
1209 /*********************************************************************
1210  *
1211  *  Initialize Hardware RSC (LRO) feature on 82599
1212  *  for an RX ring, this is toggled by the LRO capability
1213  *  even though it is transparent to the stack.
1214  *
1215  *  NOTE: since this HW feature only works with IPV4 and
1216  *        our testing has shown soft LRO to be as effective
1217  *        I have decided to disable this by default.
1218  *
1219  **********************************************************************/
1220 static void
1221 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1222 {
1223 	struct	adapter 	*adapter = rxr->adapter;
1224 	struct	ixgbe_hw	*hw = &adapter->hw;
1225 	u32			rscctrl, rdrxctl;
1226 
1227 	/* If turning LRO/RSC off we need to disable it */
1228 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1229 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1230 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1231 		return;
1232 	}
1233 
1234 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1235 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1236 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1237 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1238 #endif /* DEV_NETMAP */
1239 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1240 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1241 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1242 
1243 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1244 	rscctrl |= IXGBE_RSCCTL_RSCEN;
1245 	/*
1246 	** Limit the total number of descriptors that
1247 	** can be combined, so it does not exceed 64K
1248 	*/
1249 	if (rxr->mbuf_sz == MCLBYTES)
1250 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1251 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1252 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1253 	else if (rxr->mbuf_sz == MJUM9BYTES)
1254 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1255 	else  /* Using 16K cluster */
1256 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1257 
1258 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1259 
1260 	/* Enable TCP header recognition */
1261 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1262 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1263 	    IXGBE_PSRTYPE_TCPHDR));
1264 
1265 	/* Disable RSC for ACK packets */
1266 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1267 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1268 
1269 	rxr->hw_rsc = TRUE;
1270 }
1271 /*********************************************************************
1272  *
1273  *  Refresh mbuf buffers for RX descriptor rings
1274  *   - now keeps its own state so discards due to resource
1275  *     exhaustion are unnecessary, if an mbuf cannot be obtained
1276  *     it just returns, keeping its placeholder, thus it can simply
1277  *     be recalled to try again.
1278  *
1279  **********************************************************************/
1280 static void
1281 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1282 {
1283 	struct adapter		*adapter = rxr->adapter;
1284 	bus_dma_segment_t	seg[1];
1285 	struct ixgbe_rx_buf	*rxbuf;
1286 	struct mbuf		*mp;
1287 	int			i, j, nsegs, error;
1288 	bool			refreshed = FALSE;
1289 
1290 	i = j = rxr->next_to_refresh;
1291 	/* Control the loop with one beyond */
1292 	if (++j == rxr->num_desc)
1293 		j = 0;
1294 
1295 	while (j != limit) {
1296 		rxbuf = &rxr->rx_buffers[i];
1297 		if (rxbuf->buf == NULL) {
1298 			mp = m_getjcl(M_NOWAIT, MT_DATA,
1299 			    M_PKTHDR, rxr->mbuf_sz);
1300 			if (mp == NULL)
1301 				goto update;
1302 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1303 				m_adj(mp, ETHER_ALIGN);
1304 		} else
1305 			mp = rxbuf->buf;
1306 
1307 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1308 
1309 		/* If we're dealing with an mbuf that was copied rather
1310 		 * than replaced, there's no need to go through busdma.
1311 		 */
1312 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1313 			/* Get the memory mapping */
1314 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1315 			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1316 			    rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1317 			if (error != 0) {
1318 				printf("Refresh mbufs: payload dmamap load"
1319 				    " failure - %d\n", error);
1320 				m_free(mp);
1321 				rxbuf->buf = NULL;
1322 				goto update;
1323 			}
1324 			rxbuf->buf = mp;
1325 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1326 			    BUS_DMASYNC_PREREAD);
1327 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1328 			    htole64(seg[0].ds_addr);
1329 		} else {
1330 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1331 			rxbuf->flags &= ~IXGBE_RX_COPY;
1332 		}
1333 
1334 		refreshed = TRUE;
1335 		/* Next is precalculated */
1336 		i = j;
1337 		rxr->next_to_refresh = i;
1338 		if (++j == rxr->num_desc)
1339 			j = 0;
1340 	}
1341 update:
1342 	if (refreshed) /* Update hardware tail index */
1343 		IXGBE_WRITE_REG(&adapter->hw,
1344 		    rxr->tail, rxr->next_to_refresh);
1345 	return;
1346 }
1347 
1348 /*********************************************************************
1349  *
1350  *  Allocate memory for rx_buffer structures. Since we use one
1351  *  rx_buffer per received packet, the maximum number of rx_buffer's
1352  *  that we'll need is equal to the number of receive descriptors
1353  *  that we've allocated.
1354  *
1355  **********************************************************************/
1356 int
1357 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1358 {
1359 	struct	adapter 	*adapter = rxr->adapter;
1360 	device_t 		dev = adapter->dev;
1361 	struct ixgbe_rx_buf 	*rxbuf;
1362 	int             	i, bsize, error;
1363 
1364 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1365 	if (!(rxr->rx_buffers =
1366 	    (struct ixgbe_rx_buf *) malloc(bsize,
1367 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1368 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1369 		error = ENOMEM;
1370 		goto fail;
1371 	}
1372 
1373 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
1374 				   1, 0,	/* alignment, bounds */
1375 				   BUS_SPACE_MAXADDR,	/* lowaddr */
1376 				   BUS_SPACE_MAXADDR,	/* highaddr */
1377 				   NULL, NULL,		/* filter, filterarg */
1378 				   MJUM16BYTES,		/* maxsize */
1379 				   1,			/* nsegments */
1380 				   MJUM16BYTES,		/* maxsegsize */
1381 				   0,			/* flags */
1382 				   NULL,		/* lockfunc */
1383 				   NULL,		/* lockfuncarg */
1384 				   &rxr->ptag))) {
1385 		device_printf(dev, "Unable to create RX DMA tag\n");
1386 		goto fail;
1387 	}
1388 
1389 	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
1390 		rxbuf = &rxr->rx_buffers[i];
1391 		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1392 		if (error) {
1393 			device_printf(dev, "Unable to create RX dma map\n");
1394 			goto fail;
1395 		}
1396 	}
1397 
1398 	return (0);
1399 
1400 fail:
1401 	/* Frees all, but can handle partial completion */
1402 	ixgbe_free_receive_structures(adapter);
1403 	return (error);
1404 }
1405 
1406 
1407 static void
1408 ixgbe_free_receive_ring(struct rx_ring *rxr)
1409 {
1410 	struct ixgbe_rx_buf       *rxbuf;
1411 	int i;
1412 
1413 	for (i = 0; i < rxr->num_desc; i++) {
1414 		rxbuf = &rxr->rx_buffers[i];
1415 		if (rxbuf->buf != NULL) {
1416 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1417 			    BUS_DMASYNC_POSTREAD);
1418 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1419 			rxbuf->buf->m_flags |= M_PKTHDR;
1420 			m_freem(rxbuf->buf);
1421 			rxbuf->buf = NULL;
1422 			rxbuf->flags = 0;
1423 		}
1424 	}
1425 }
1426 
1427 
1428 /*********************************************************************
1429  *
1430  *  Initialize a receive ring and its buffers.
1431  *
1432  **********************************************************************/
1433 static int
1434 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1435 {
1436 	struct	adapter 	*adapter;
1437 	struct ifnet		*ifp;
1438 	device_t		dev;
1439 	struct ixgbe_rx_buf	*rxbuf;
1440 	bus_dma_segment_t	seg[1];
1441 	struct lro_ctrl		*lro = &rxr->lro;
1442 	int			rsize, nsegs, error = 0;
1443 #ifdef DEV_NETMAP
1444 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1445 	struct netmap_slot *slot;
1446 #endif /* DEV_NETMAP */
1447 
1448 	adapter = rxr->adapter;
1449 	ifp = adapter->ifp;
1450 	dev = adapter->dev;
1451 
1452 	/* Clear the ring contents */
1453 	IXGBE_RX_LOCK(rxr);
1454 #ifdef DEV_NETMAP
1455 	/* same as in ixgbe_setup_transmit_ring() */
1456 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
1457 #endif /* DEV_NETMAP */
1458 	rsize = roundup2(adapter->num_rx_desc *
1459 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1460 	bzero((void *)rxr->rx_base, rsize);
1461 	/* Cache the size */
1462 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1463 
1464 	/* Free current RX buffer structs and their mbufs */
1465 	ixgbe_free_receive_ring(rxr);
1466 
1467 	/* Now replenish the mbufs */
1468 	for (int j = 0; j != rxr->num_desc; ++j) {
1469 		struct mbuf	*mp;
1470 
1471 		rxbuf = &rxr->rx_buffers[j];
1472 #ifdef DEV_NETMAP
1473 		/*
1474 		 * In netmap mode, fill the map and set the buffer
1475 		 * address in the NIC ring, considering the offset
1476 		 * between the netmap and NIC rings (see comment in
1477 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1478 		 * an mbuf, so end the block with a continue;
1479 		 */
1480 		if (slot) {
1481 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1482 			uint64_t paddr;
1483 			void *addr;
1484 
1485 			addr = PNMB(na, slot + sj, &paddr);
1486 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1487 			/* Update descriptor and the cached value */
1488 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1489 			rxbuf->addr = htole64(paddr);
1490 			continue;
1491 		}
1492 #endif /* DEV_NETMAP */
1493 		rxbuf->flags = 0;
1494 		rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1495 		    M_PKTHDR, adapter->rx_mbuf_sz);
1496 		if (rxbuf->buf == NULL) {
1497 			error = ENOBUFS;
1498                         goto fail;
1499 		}
1500 		mp = rxbuf->buf;
1501 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1502 		/* Get the memory mapping */
1503 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1504 		    rxbuf->pmap, mp, seg,
1505 		    &nsegs, BUS_DMA_NOWAIT);
1506 		if (error != 0)
1507                         goto fail;
1508 		bus_dmamap_sync(rxr->ptag,
1509 		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
1510 		/* Update the descriptor and the cached value */
1511 		rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1512 		rxbuf->addr = htole64(seg[0].ds_addr);
1513 	}
1514 
1515 
1516 	/* Setup our descriptor indices */
1517 	rxr->next_to_check = 0;
1518 	rxr->next_to_refresh = 0;
1519 	rxr->lro_enabled = FALSE;
1520 	rxr->rx_copies = 0;
1521 	rxr->rx_bytes = 0;
1522 	rxr->vtag_strip = FALSE;
1523 
1524 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1525 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1526 
1527 	/*
1528 	** Now set up the LRO interface:
1529 	*/
1530 	if (ixgbe_rsc_enable)
1531 		ixgbe_setup_hw_rsc(rxr);
1532 	else if (ifp->if_capenable & IFCAP_LRO) {
1533 		int err = tcp_lro_init(lro);
1534 		if (err) {
1535 			device_printf(dev, "LRO Initialization failed!\n");
1536 			goto fail;
1537 		}
1538 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1539 		rxr->lro_enabled = TRUE;
1540 		lro->ifp = adapter->ifp;
1541 	}
1542 
1543 	IXGBE_RX_UNLOCK(rxr);
1544 	return (0);
1545 
1546 fail:
1547 	ixgbe_free_receive_ring(rxr);
1548 	IXGBE_RX_UNLOCK(rxr);
1549 	return (error);
1550 }
1551 
1552 /*********************************************************************
1553  *
1554  *  Initialize all receive rings.
1555  *
1556  **********************************************************************/
1557 int
1558 ixgbe_setup_receive_structures(struct adapter *adapter)
1559 {
1560 	struct rx_ring *rxr = adapter->rx_rings;
1561 	int j;
1562 
1563 	for (j = 0; j < adapter->num_queues; j++, rxr++)
1564 		if (ixgbe_setup_receive_ring(rxr))
1565 			goto fail;
1566 
1567 	return (0);
1568 fail:
1569 	/*
1570 	 * Free RX buffers allocated so far, we will only handle
1571 	 * the rings that completed, the failing case will have
1572 	 * cleaned up for itself. 'j' failed, so its the terminus.
1573 	 */
1574 	for (int i = 0; i < j; ++i) {
1575 		rxr = &adapter->rx_rings[i];
1576 		ixgbe_free_receive_ring(rxr);
1577 	}
1578 
1579 	return (ENOBUFS);
1580 }
1581 
1582 
1583 /*********************************************************************
1584  *
1585  *  Free all receive rings.
1586  *
1587  **********************************************************************/
1588 void
1589 ixgbe_free_receive_structures(struct adapter *adapter)
1590 {
1591 	struct rx_ring *rxr = adapter->rx_rings;
1592 
1593 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1594 
1595 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1596 		struct lro_ctrl		*lro = &rxr->lro;
1597 		ixgbe_free_receive_buffers(rxr);
1598 		/* Free LRO memory */
1599 		tcp_lro_free(lro);
1600 		/* Free the ring memory as well */
1601 		ixgbe_dma_free(adapter, &rxr->rxdma);
1602 	}
1603 
1604 	free(adapter->rx_rings, M_DEVBUF);
1605 }
1606 
1607 
1608 /*********************************************************************
1609  *
1610  *  Free receive ring data structures
1611  *
1612  **********************************************************************/
1613 void
1614 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1615 {
1616 	struct adapter		*adapter = rxr->adapter;
1617 	struct ixgbe_rx_buf	*rxbuf;
1618 
1619 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1620 
1621 	/* Cleanup any existing buffers */
1622 	if (rxr->rx_buffers != NULL) {
1623 		for (int i = 0; i < adapter->num_rx_desc; i++) {
1624 			rxbuf = &rxr->rx_buffers[i];
1625 			if (rxbuf->buf != NULL) {
1626 				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1627 				    BUS_DMASYNC_POSTREAD);
1628 				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1629 				rxbuf->buf->m_flags |= M_PKTHDR;
1630 				m_freem(rxbuf->buf);
1631 			}
1632 			rxbuf->buf = NULL;
1633 			if (rxbuf->pmap != NULL) {
1634 				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1635 				rxbuf->pmap = NULL;
1636 			}
1637 		}
1638 		if (rxr->rx_buffers != NULL) {
1639 			free(rxr->rx_buffers, M_DEVBUF);
1640 			rxr->rx_buffers = NULL;
1641 		}
1642 	}
1643 
1644 	if (rxr->ptag != NULL) {
1645 		bus_dma_tag_destroy(rxr->ptag);
1646 		rxr->ptag = NULL;
1647 	}
1648 
1649 	return;
1650 }
1651 
1652 static __inline void
1653 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1654 {
1655 
1656         /*
1657          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1658          * should be computed by hardware. Also it should not have VLAN tag in
1659          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1660          */
1661         if (rxr->lro_enabled &&
1662             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1663             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1664             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1665             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1666             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1667             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1668             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1669             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1670                 /*
1671                  * Send to the stack if:
1672                  **  - LRO not enabled, or
1673                  **  - no LRO resources, or
1674                  **  - lro enqueue fails
1675                  */
1676                 if (rxr->lro.lro_cnt != 0)
1677                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1678                                 return;
1679         }
1680 	IXGBE_RX_UNLOCK(rxr);
1681         (*ifp->if_input)(ifp, m);
1682 	IXGBE_RX_LOCK(rxr);
1683 }
1684 
1685 static __inline void
1686 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1687 {
1688 	struct ixgbe_rx_buf	*rbuf;
1689 
1690 	rbuf = &rxr->rx_buffers[i];
1691 
1692 
1693 	/*
1694 	** With advanced descriptors the writeback
1695 	** clobbers the buffer addrs, so its easier
1696 	** to just free the existing mbufs and take
1697 	** the normal refresh path to get new buffers
1698 	** and mapping.
1699 	*/
1700 
1701 	if (rbuf->fmp != NULL) {/* Partial chain ? */
1702 		rbuf->fmp->m_flags |= M_PKTHDR;
1703 		m_freem(rbuf->fmp);
1704 		rbuf->fmp = NULL;
1705 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1706 	} else if (rbuf->buf) {
1707 		m_free(rbuf->buf);
1708 		rbuf->buf = NULL;
1709 	}
1710 	bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1711 
1712 	rbuf->flags = 0;
1713 
1714 	return;
1715 }
1716 
1717 
1718 /*********************************************************************
1719  *
1720  *  This routine executes in interrupt context. It replenishes
1721  *  the mbufs in the descriptor and sends data which has been
1722  *  dma'ed into host memory to upper layer.
1723  *
1724  *  Return TRUE for more work, FALSE for all clean.
1725  *********************************************************************/
1726 bool
1727 ixgbe_rxeof(struct ix_queue *que)
1728 {
1729 	struct adapter		*adapter = que->adapter;
1730 	struct rx_ring		*rxr = que->rxr;
1731 	struct ifnet		*ifp = adapter->ifp;
1732 	struct lro_ctrl		*lro = &rxr->lro;
1733 	struct lro_entry	*queued;
1734 	int			i, nextp, processed = 0;
1735 	u32			staterr = 0;
1736 	u16			count = rxr->process_limit;
1737 	union ixgbe_adv_rx_desc	*cur;
1738 	struct ixgbe_rx_buf	*rbuf, *nbuf;
1739 	u16			pkt_info;
1740 
1741 	IXGBE_RX_LOCK(rxr);
1742 
1743 #ifdef DEV_NETMAP
1744 	/* Same as the txeof routine: wakeup clients on intr. */
1745 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1746 		IXGBE_RX_UNLOCK(rxr);
1747 		return (FALSE);
1748 	}
1749 #endif /* DEV_NETMAP */
1750 
1751 	for (i = rxr->next_to_check; count != 0;) {
1752 		struct mbuf	*sendmp, *mp;
1753 		u32		rsc, ptype;
1754 		u16		len;
1755 		u16		vtag = 0;
1756 		bool		eop;
1757 
1758 		/* Sync the ring. */
1759 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1760 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1761 
1762 		cur = &rxr->rx_base[i];
1763 		staterr = le32toh(cur->wb.upper.status_error);
1764 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1765 
1766 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1767 			break;
1768 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1769 			break;
1770 
1771 		count--;
1772 		sendmp = NULL;
1773 		nbuf = NULL;
1774 		rsc = 0;
1775 		cur->wb.upper.status_error = 0;
1776 		rbuf = &rxr->rx_buffers[i];
1777 		mp = rbuf->buf;
1778 
1779 		len = le16toh(cur->wb.upper.length);
1780 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1781 		    IXGBE_RXDADV_PKTTYPE_MASK;
1782 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1783 
1784 		/* Make sure bad packets are discarded */
1785 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1786 #if __FreeBSD_version >= 1100036
1787 			if (IXGBE_IS_VF(adapter))
1788 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1789 #endif
1790 			rxr->rx_discarded++;
1791 			ixgbe_rx_discard(rxr, i);
1792 			goto next_desc;
1793 		}
1794 
1795 		/*
1796 		** On 82599 which supports a hardware
1797 		** LRO (called HW RSC), packets need
1798 		** not be fragmented across sequential
1799 		** descriptors, rather the next descriptor
1800 		** is indicated in bits of the descriptor.
1801 		** This also means that we might proceses
1802 		** more than one packet at a time, something
1803 		** that has never been true before, it
1804 		** required eliminating global chain pointers
1805 		** in favor of what we are doing here.  -jfv
1806 		*/
1807 		if (!eop) {
1808 			/*
1809 			** Figure out the next descriptor
1810 			** of this frame.
1811 			*/
1812 			if (rxr->hw_rsc == TRUE) {
1813 				rsc = ixgbe_rsc_count(cur);
1814 				rxr->rsc_num += (rsc - 1);
1815 			}
1816 			if (rsc) { /* Get hardware index */
1817 				nextp = ((staterr &
1818 				    IXGBE_RXDADV_NEXTP_MASK) >>
1819 				    IXGBE_RXDADV_NEXTP_SHIFT);
1820 			} else { /* Just sequential */
1821 				nextp = i + 1;
1822 				if (nextp == adapter->num_rx_desc)
1823 					nextp = 0;
1824 			}
1825 			nbuf = &rxr->rx_buffers[nextp];
1826 			prefetch(nbuf);
1827 		}
1828 		/*
1829 		** Rather than using the fmp/lmp global pointers
1830 		** we now keep the head of a packet chain in the
1831 		** buffer struct and pass this along from one
1832 		** descriptor to the next, until we get EOP.
1833 		*/
1834 		mp->m_len = len;
1835 		/*
1836 		** See if there is a stored head
1837 		** that determines what we are
1838 		*/
1839 		sendmp = rbuf->fmp;
1840 		if (sendmp != NULL) {  /* secondary frag */
1841 			rbuf->buf = rbuf->fmp = NULL;
1842 			mp->m_flags &= ~M_PKTHDR;
1843 			sendmp->m_pkthdr.len += mp->m_len;
1844 		} else {
1845 			/*
1846 			 * Optimize.  This might be a small packet,
1847 			 * maybe just a TCP ACK.  Do a fast copy that
1848 			 * is cache aligned into a new mbuf, and
1849 			 * leave the old mbuf+cluster for re-use.
1850 			 */
1851 			if (eop && len <= IXGBE_RX_COPY_LEN) {
1852 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1853 				if (sendmp != NULL) {
1854 					sendmp->m_data +=
1855 					    IXGBE_RX_COPY_ALIGN;
1856 					ixgbe_bcopy(mp->m_data,
1857 					    sendmp->m_data, len);
1858 					sendmp->m_len = len;
1859 					rxr->rx_copies++;
1860 					rbuf->flags |= IXGBE_RX_COPY;
1861 				}
1862 			}
1863 			if (sendmp == NULL) {
1864 				rbuf->buf = rbuf->fmp = NULL;
1865 				sendmp = mp;
1866 			}
1867 
1868 			/* first desc of a non-ps chain */
1869 			sendmp->m_flags |= M_PKTHDR;
1870 			sendmp->m_pkthdr.len = mp->m_len;
1871 		}
1872 		++processed;
1873 
1874 		/* Pass the head pointer on */
1875 		if (eop == 0) {
1876 			nbuf->fmp = sendmp;
1877 			sendmp = NULL;
1878 			mp->m_next = nbuf->buf;
1879 		} else { /* Sending this frame */
1880 			sendmp->m_pkthdr.rcvif = ifp;
1881 			rxr->rx_packets++;
1882 			/* capture data for AIM */
1883 			rxr->bytes += sendmp->m_pkthdr.len;
1884 			rxr->rx_bytes += sendmp->m_pkthdr.len;
1885 			/* Process vlan info */
1886 			if ((rxr->vtag_strip) &&
1887 			    (staterr & IXGBE_RXD_STAT_VP))
1888 				vtag = le16toh(cur->wb.upper.vlan);
1889 			if (vtag) {
1890 				sendmp->m_pkthdr.ether_vtag = vtag;
1891 				sendmp->m_flags |= M_VLANTAG;
1892 			}
1893 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1894 				ixgbe_rx_checksum(staterr, sendmp, ptype);
1895 #if __FreeBSD_version >= 800000
1896 			sendmp->m_pkthdr.flowid = que->msix;
1897 #endif /* FreeBSD_version */
1898 		}
1899 next_desc:
1900 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1901 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1902 
1903 		/* Advance our pointers to the next descriptor. */
1904 		if (++i == rxr->num_desc)
1905 			i = 0;
1906 
1907 		/* Now send to the stack or do LRO */
1908 		if (sendmp != NULL) {
1909 			rxr->next_to_check = i;
1910 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1911 			i = rxr->next_to_check;
1912 		}
1913 
1914                /* Every 8 descriptors we go to refresh mbufs */
1915 		if (processed == 8) {
1916 			ixgbe_refresh_mbufs(rxr, i);
1917 			processed = 0;
1918 		}
1919 	}
1920 
1921 	/* Refresh any remaining buf structs */
1922 	if (ixgbe_rx_unrefreshed(rxr))
1923 		ixgbe_refresh_mbufs(rxr, i);
1924 
1925 	rxr->next_to_check = i;
1926 
1927 	/*
1928 	 * Flush any outstanding LRO work
1929 	 */
1930 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1931 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1932 		tcp_lro_flush(lro, queued);
1933 	}
1934 
1935 	IXGBE_RX_UNLOCK(rxr);
1936 
1937 	/*
1938 	** Still have cleaning to do?
1939 	*/
1940 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1941 		return (TRUE);
1942 	else
1943 		return (FALSE);
1944 }
1945 
1946 
1947 /*********************************************************************
1948  *
1949  *  Verify that the hardware indicated that the checksum is valid.
1950  *  Inform the stack about the status of checksum so that stack
1951  *  doesn't spend time verifying the checksum.
1952  *
1953  *********************************************************************/
1954 static void
1955 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1956 {
1957 	u16	status = (u16) staterr;
1958 	u8	errors = (u8) (staterr >> 24);
1959 	bool	sctp = FALSE;
1960 
1961 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1962 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1963 		sctp = TRUE;
1964 
1965 	if (status & IXGBE_RXD_STAT_IPCS) {
1966 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
1967 			/* IP Checksum Good */
1968 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1969 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1970 
1971 		} else
1972 			mp->m_pkthdr.csum_flags = 0;
1973 	}
1974 	if (status & IXGBE_RXD_STAT_L4CS) {
1975 		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1976 #if __FreeBSD_version >= 800000
1977 		if (sctp)
1978 			type = CSUM_SCTP_VALID;
1979 #endif
1980 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1981 			mp->m_pkthdr.csum_flags |= type;
1982 			if (!sctp)
1983 				mp->m_pkthdr.csum_data = htons(0xffff);
1984 		}
1985 	}
1986 	return;
1987 }
1988 
1989 /********************************************************************
1990  * Manage DMA'able memory.
1991  *******************************************************************/
1992 static void
1993 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
1994 {
1995 	if (error)
1996 		return;
1997 	*(bus_addr_t *) arg = segs->ds_addr;
1998 	return;
1999 }
2000 
2001 int
2002 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2003 		struct ixgbe_dma_alloc *dma, int mapflags)
2004 {
2005 	device_t dev = adapter->dev;
2006 	int             r;
2007 
2008 	r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),	/* parent */
2009 			       DBA_ALIGN, 0,	/* alignment, bounds */
2010 			       BUS_SPACE_MAXADDR,	/* lowaddr */
2011 			       BUS_SPACE_MAXADDR,	/* highaddr */
2012 			       NULL, NULL,	/* filter, filterarg */
2013 			       size,	/* maxsize */
2014 			       1,	/* nsegments */
2015 			       size,	/* maxsegsize */
2016 			       BUS_DMA_ALLOCNOW,	/* flags */
2017 			       NULL,	/* lockfunc */
2018 			       NULL,	/* lockfuncarg */
2019 			       &dma->dma_tag);
2020 	if (r != 0) {
2021 		device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2022 		       "error %u\n", r);
2023 		goto fail_0;
2024 	}
2025 	r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2026 			     BUS_DMA_NOWAIT, &dma->dma_map);
2027 	if (r != 0) {
2028 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2029 		       "error %u\n", r);
2030 		goto fail_1;
2031 	}
2032 	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2033 			    size,
2034 			    ixgbe_dmamap_cb,
2035 			    &dma->dma_paddr,
2036 			    mapflags | BUS_DMA_NOWAIT);
2037 	if (r != 0) {
2038 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2039 		       "error %u\n", r);
2040 		goto fail_2;
2041 	}
2042 	dma->dma_size = size;
2043 	return (0);
2044 fail_2:
2045 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2046 fail_1:
2047 	bus_dma_tag_destroy(dma->dma_tag);
2048 fail_0:
2049 	dma->dma_tag = NULL;
2050 	return (r);
2051 }
2052 
2053 void
2054 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2055 {
2056 	bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2057 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2058 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2059 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2060 	bus_dma_tag_destroy(dma->dma_tag);
2061 }
2062 
2063 
2064 /*********************************************************************
2065  *
2066  *  Allocate memory for the transmit and receive rings, and then
2067  *  the descriptors associated with each, called only once at attach.
2068  *
2069  **********************************************************************/
2070 int
2071 ixgbe_allocate_queues(struct adapter *adapter)
2072 {
2073 	device_t	dev = adapter->dev;
2074 	struct ix_queue	*que;
2075 	struct tx_ring	*txr;
2076 	struct rx_ring	*rxr;
2077 	int rsize, tsize, error = IXGBE_SUCCESS;
2078 	int txconf = 0, rxconf = 0;
2079 
2080         /* First allocate the top level queue structs */
2081         if (!(adapter->queues =
2082             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2083             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2084                 device_printf(dev, "Unable to allocate queue memory\n");
2085                 error = ENOMEM;
2086                 goto fail;
2087         }
2088 
2089 	/* First allocate the TX ring struct memory */
2090 	if (!(adapter->tx_rings =
2091 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2092 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2093 		device_printf(dev, "Unable to allocate TX ring memory\n");
2094 		error = ENOMEM;
2095 		goto tx_fail;
2096 	}
2097 
2098 	/* Next allocate the RX */
2099 	if (!(adapter->rx_rings =
2100 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2101 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2102 		device_printf(dev, "Unable to allocate RX ring memory\n");
2103 		error = ENOMEM;
2104 		goto rx_fail;
2105 	}
2106 
2107 	/* For the ring itself */
2108 	tsize = roundup2(adapter->num_tx_desc *
2109 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2110 
2111 	/*
2112 	 * Now set up the TX queues, txconf is needed to handle the
2113 	 * possibility that things fail midcourse and we need to
2114 	 * undo memory gracefully
2115 	 */
2116 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2117 		/* Set up some basics */
2118 		txr = &adapter->tx_rings[i];
2119 		txr->adapter = adapter;
2120 		txr->me = i;
2121 		txr->num_desc = adapter->num_tx_desc;
2122 
2123 		/* Initialize the TX side lock */
2124 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2125 		    device_get_nameunit(dev), txr->me);
2126 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2127 
2128 		if (ixgbe_dma_malloc(adapter, tsize,
2129 			&txr->txdma, BUS_DMA_NOWAIT)) {
2130 			device_printf(dev,
2131 			    "Unable to allocate TX Descriptor memory\n");
2132 			error = ENOMEM;
2133 			goto err_tx_desc;
2134 		}
2135 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2136 		bzero((void *)txr->tx_base, tsize);
2137 
2138         	/* Now allocate transmit buffers for the ring */
2139         	if (ixgbe_allocate_transmit_buffers(txr)) {
2140 			device_printf(dev,
2141 			    "Critical Failure setting up transmit buffers\n");
2142 			error = ENOMEM;
2143 			goto err_tx_desc;
2144         	}
2145 #ifndef IXGBE_LEGACY_TX
2146 		/* Allocate a buf ring */
2147 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2148 		    M_WAITOK, &txr->tx_mtx);
2149 		if (txr->br == NULL) {
2150 			device_printf(dev,
2151 			    "Critical Failure setting up buf ring\n");
2152 			error = ENOMEM;
2153 			goto err_tx_desc;
2154         	}
2155 #endif
2156 	}
2157 
2158 	/*
2159 	 * Next the RX queues...
2160 	 */
2161 	rsize = roundup2(adapter->num_rx_desc *
2162 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2163 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2164 		rxr = &adapter->rx_rings[i];
2165 		/* Set up some basics */
2166 		rxr->adapter = adapter;
2167 		rxr->me = i;
2168 		rxr->num_desc = adapter->num_rx_desc;
2169 
2170 		/* Initialize the RX side lock */
2171 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2172 		    device_get_nameunit(dev), rxr->me);
2173 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2174 
2175 		if (ixgbe_dma_malloc(adapter, rsize,
2176 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2177 			device_printf(dev,
2178 			    "Unable to allocate RxDescriptor memory\n");
2179 			error = ENOMEM;
2180 			goto err_rx_desc;
2181 		}
2182 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2183 		bzero((void *)rxr->rx_base, rsize);
2184 
2185         	/* Allocate receive buffers for the ring*/
2186 		if (ixgbe_allocate_receive_buffers(rxr)) {
2187 			device_printf(dev,
2188 			    "Critical Failure setting up receive buffers\n");
2189 			error = ENOMEM;
2190 			goto err_rx_desc;
2191 		}
2192 	}
2193 
2194 	/*
2195 	** Finally set up the queue holding structs
2196 	*/
2197 	for (int i = 0; i < adapter->num_queues; i++) {
2198 		que = &adapter->queues[i];
2199 		que->adapter = adapter;
2200 		que->me = i;
2201 		que->txr = &adapter->tx_rings[i];
2202 		que->rxr = &adapter->rx_rings[i];
2203 	}
2204 
2205 	return (0);
2206 
2207 err_rx_desc:
2208 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2209 		ixgbe_dma_free(adapter, &rxr->rxdma);
2210 err_tx_desc:
2211 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2212 		ixgbe_dma_free(adapter, &txr->txdma);
2213 	free(adapter->rx_rings, M_DEVBUF);
2214 rx_fail:
2215 	free(adapter->tx_rings, M_DEVBUF);
2216 tx_fail:
2217 	free(adapter->queues, M_DEVBUF);
2218 fail:
2219 	return (error);
2220 }
2221