1 /******************************************************************************
2 
3   Copyright (c) 2001-2017, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD: stable/10/sys/dev/ixgbe/ixv_txrx.c 315333 2017-03-15 21:20:17Z erj $*/
34 
35 
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40 
41 #include "ixv.h"
42 
43 extern int ix_crcstrip;
44 
45 /*
46  * HW RSC control:
47  *  this feature only works with
48  *  IPv4, and only on 82599 and later.
49  *  Also this will cause IP forwarding to
50  *  fail and that can't be controlled by
51  *  the stack as LRO can. For all these
52  *  reasons I've deemed it best to leave
53  *  this off and not bother with a tuneable
54  *  interface, this would need to be compiled
55  *  to enable.
56  */
57 static bool ixgbe_rsc_enable = FALSE;
58 
59 /************************************************************************
60  *  Local Function prototypes
61  ************************************************************************/
62 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
63 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
64 static int           ixgbe_setup_receive_ring(struct rx_ring *);
65 static void          ixgbe_free_receive_buffers(struct rx_ring *);
66 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32);
67 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
68 static int           ixgbe_xmit(struct tx_ring *, struct mbuf **);
69 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
70                                         struct mbuf *, u32 *, u32 *);
71 static int           ixgbe_tso_setup(struct tx_ring *,
72                                      struct mbuf *, u32 *, u32 *);
73 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
74 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
75                                     struct mbuf *, u32);
76 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
77                                       struct ixgbe_dma_alloc *, int);
78 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
79 
80 MALLOC_DECLARE(M_IXV);
81 
82 /************************************************************************
83  * ixv_legacy_start_locked - Transmit entry point
84  *
85  *   Called by the stack to initiate a transmit.
86  *   The driver will remain in this routine as long as there are
87  *   packets to transmit and transmit resources are available.
88  *   In case resources are not available, the stack is notified
89  *   and the packet is requeued.
90  ************************************************************************/
91 int
ixv_legacy_start_locked(struct ifnet * ifp,struct tx_ring * txr)92 ixv_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
93 {
94 	struct mbuf    *m_head;
95 	struct adapter *adapter = txr->adapter;
96 
97 	IXGBE_TX_LOCK_ASSERT(txr);
98 
99 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
100 		return (ENETDOWN);
101 	if (!adapter->link_active)
102 		return (ENETDOWN);
103 
104 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
105 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
106 			break;
107 
108 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
109 		if (m_head == NULL)
110 			break;
111 
112 		if (ixgbe_xmit(txr, &m_head)) {
113 			if (m_head != NULL)
114 				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
115 			break;
116 		}
117 		/* Send a copy of the frame to the BPF listener */
118 		ETHER_BPF_MTAP(ifp, m_head);
119 	}
120 
121 	return IXGBE_SUCCESS;
122 } /* ixv_legacy_start_locked */
123 
124 /************************************************************************
125  * ixv_legacy_start
126  *
127  *   Called by the stack, this always uses the first tx ring,
128  *   and should not be used with multiqueue tx enabled.
129  ************************************************************************/
130 void
ixv_legacy_start(struct ifnet * ifp)131 ixv_legacy_start(struct ifnet *ifp)
132 {
133 	struct adapter *adapter = ifp->if_softc;
134 	struct tx_ring *txr = adapter->tx_rings;
135 
136 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
137 		IXGBE_TX_LOCK(txr);
138 		ixv_legacy_start_locked(ifp, txr);
139 		IXGBE_TX_UNLOCK(txr);
140 	}
141 } /* ixv_legacy_start */
142 
143 /************************************************************************
144  * ixv_mq_start - Multiqueue Transmit Entry Point
145  *
146  *   (if_transmit function)
147  ************************************************************************/
148 int
ixv_mq_start(struct ifnet * ifp,struct mbuf * m)149 ixv_mq_start(struct ifnet *ifp, struct mbuf *m)
150 {
151 	struct adapter  *adapter = ifp->if_softc;
152 	struct ix_queue *que;
153 	struct tx_ring  *txr;
154 	int             i, err = 0;
155 	uint32_t        bucket_id;
156 
157 	/*
158 	 * When doing RSS, map it to the same outbound queue
159 	 * as the incoming flow would be mapped to.
160 	 *
161 	 * If everything is setup correctly, it should be the
162 	 * same bucket that the current CPU we're on is.
163 	 */
164 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
165 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
166 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
167 		    &bucket_id) == 0)) {
168 			i = bucket_id % adapter->num_queues;
169 #ifdef IXGBE_DEBUG
170 			if (bucket_id > adapter->num_queues)
171 				if_printf(ifp,
172 				    "bucket_id (%d) > num_queues (%d)\n",
173 				    bucket_id, adapter->num_queues);
174 #endif
175 		} else
176 			i = m->m_pkthdr.flowid % adapter->num_queues;
177 	} else
178 		i = curcpu % adapter->num_queues;
179 
180 	/* Check for a hung queue and pick alternative */
181 	if (((1 << i) & adapter->active_queues) == 0)
182 		i = ffsl(adapter->active_queues);
183 
184 	txr = &adapter->tx_rings[i];
185 	que = &adapter->queues[i];
186 
187 	err = drbr_enqueue(ifp, txr->br, m);
188 	if (err)
189 		return (err);
190 	if (IXGBE_TX_TRYLOCK(txr)) {
191 		ixv_mq_start_locked(ifp, txr);
192 		IXGBE_TX_UNLOCK(txr);
193 	} else
194 		taskqueue_enqueue(que->tq, &txr->txq_task);
195 
196 	return (0);
197 } /* ixv_mq_start */
198 
199 /************************************************************************
200  * ixv_mq_start_locked
201  ************************************************************************/
202 int
ixv_mq_start_locked(struct ifnet * ifp,struct tx_ring * txr)203 ixv_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
204 {
205 	struct mbuf    *next;
206 	int            enqueued = 0, err = 0;
207 
208 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
209 		return (ENETDOWN);
210 	if (!txr->adapter->link_active)
211 		return (ENETDOWN);
212 
213 	/* Process the queue */
214 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
215 		err = ixgbe_xmit(txr, &next);
216 		if (err != 0) {
217 			if (next == NULL)
218 				drbr_advance(ifp, txr->br);
219 			else
220 				drbr_putback(ifp, txr->br, next);
221 			break;
222 		}
223 		drbr_advance(ifp, txr->br);
224 		enqueued++;
225 #if __FreeBSD_version >= 1100036
226 		/*
227 		 * Since we're looking at the tx ring, we can check
228 		 * to see if we're a VF by examing our tail register
229 		 * address.
230 		 */
231 		if (next->m_flags & M_MCAST)
232 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
233 #endif
234 		/* Send a copy of the frame to the BPF listener */
235 		ETHER_BPF_MTAP(ifp, next);
236 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
237 			break;
238 	}
239 
240 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
241 		ixv_txeof(txr);
242 
243 	return (err);
244 } /* ixv_mq_start_locked */
245 
246 /************************************************************************
247  * ixv_deferred_mq_start
248  *
249  *   Called from a taskqueue to drain queued transmit packets.
250  ************************************************************************/
251 void
ixv_deferred_mq_start(void * arg,int pending)252 ixv_deferred_mq_start(void *arg, int pending)
253 {
254 	struct tx_ring *txr = arg;
255 	struct adapter *adapter = txr->adapter;
256 	struct ifnet   *ifp = adapter->ifp;
257 
258 	IXGBE_TX_LOCK(txr);
259 	if (!drbr_empty(ifp, txr->br))
260 		ixv_mq_start_locked(ifp, txr);
261 	IXGBE_TX_UNLOCK(txr);
262 } /* ixv_deferred_mq_start */
263 
264 /************************************************************************
265  * ixv_qflush - Flush all ring buffers
266  ************************************************************************/
267 void
ixv_qflush(struct ifnet * ifp)268 ixv_qflush(struct ifnet *ifp)
269 {
270 	struct adapter *adapter = ifp->if_softc;
271 	struct tx_ring *txr = adapter->tx_rings;
272 	struct mbuf    *m;
273 
274 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
275 		IXGBE_TX_LOCK(txr);
276 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
277 			m_freem(m);
278 		IXGBE_TX_UNLOCK(txr);
279 	}
280 	if_qflush(ifp);
281 } /* ixv_qflush */
282 
283 
284 /************************************************************************
285  * ixgbe_xmit
286  *
287  *   This routine maps the mbufs to tx descriptors, allowing the
288  *   TX engine to transmit the packets.
289  *
290  *   Return 0 on success, positive on failure
291  ************************************************************************/
292 static int
ixgbe_xmit(struct tx_ring * txr,struct mbuf ** m_headp)293 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
294 {
295 	struct adapter          *adapter = txr->adapter;
296 	struct ixgbe_tx_buf     *txbuf;
297 	union ixgbe_adv_tx_desc *txd = NULL;
298 	struct mbuf             *m_head;
299 	int                     i, j, error, nsegs;
300 	int                     first;
301 	u32                     olinfo_status = 0, cmd_type_len;
302 	bool                    remap = TRUE;
303 	bus_dma_segment_t       segs[adapter->num_segs];
304 	bus_dmamap_t            map;
305 
306 	m_head = *m_headp;
307 
308 	/* Basic descriptor defines */
309 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
310 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
311 
312 	if (m_head->m_flags & M_VLANTAG)
313 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
314 
315 	/*
316 	 * Important to capture the first descriptor
317 	 * used because it will contain the index of
318 	 * the one we tell the hardware to report back
319 	 */
320 	first = txr->next_avail_desc;
321 	txbuf = &txr->tx_buffers[first];
322 	map = txbuf->map;
323 
324 	/*
325 	 * Map the packet for DMA.
326 	 */
327 retry:
328 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs,
329 	    &nsegs, BUS_DMA_NOWAIT);
330 
331 	if (__predict_false(error)) {
332 		struct mbuf *m;
333 
334 		switch (error) {
335 		case EFBIG:
336 			/* Try it again? - one try */
337 			if (remap == TRUE) {
338 				remap = FALSE;
339 				/*
340 				 * XXX: m_defrag will choke on
341 				 * non-MCLBYTES-sized clusters
342 				 */
343 				m = m_defrag(*m_headp, M_NOWAIT);
344 				if (m == NULL) {
345 					adapter->mbuf_defrag_failed++;
346 					m_freem(*m_headp);
347 					*m_headp = NULL;
348 					return (ENOBUFS);
349 				}
350 				*m_headp = m;
351 				goto retry;
352 			} else
353 				return (error);
354 		case ENOMEM:
355 			txr->no_tx_dma_setup++;
356 			return (error);
357 		default:
358 			txr->no_tx_dma_setup++;
359 			m_freem(*m_headp);
360 			*m_headp = NULL;
361 			return (error);
362 		}
363 	}
364 
365 	/* Make certain there are enough descriptors */
366 	if (txr->tx_avail < (nsegs + 2)) {
367 		txr->no_desc_avail++;
368 		bus_dmamap_unload(txr->txtag, map);
369 		return (ENOBUFS);
370 	}
371 	m_head = *m_headp;
372 
373 	/*
374 	 * Set up the appropriate offload context
375 	 * this will consume the first descriptor
376 	 */
377 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
378 	if (__predict_false(error)) {
379 		if (error == ENOBUFS)
380 			*m_headp = NULL;
381 		return (error);
382 	}
383 
384 	olinfo_status |= IXGBE_ADVTXD_CC;
385 	i = txr->next_avail_desc;
386 	for (j = 0; j < nsegs; j++) {
387 		bus_size_t seglen;
388 		bus_addr_t segaddr;
389 
390 		txbuf = &txr->tx_buffers[i];
391 		txd = &txr->tx_base[i];
392 		seglen = segs[j].ds_len;
393 		segaddr = htole64(segs[j].ds_addr);
394 
395 		txd->read.buffer_addr = segaddr;
396 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
397 		    cmd_type_len | seglen);
398 		txd->read.olinfo_status = htole32(olinfo_status);
399 
400 		if (++i == txr->num_desc)
401 			i = 0;
402 	}
403 
404 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
405 	txr->tx_avail -= nsegs;
406 	txr->next_avail_desc = i;
407 
408 	txbuf->m_head = m_head;
409 	/*
410 	 * Here we swap the map so the last descriptor,
411 	 * which gets the completion interrupt has the
412 	 * real map, and the first descriptor gets the
413 	 * unused map from this descriptor.
414 	 */
415 	txr->tx_buffers[first].map = txbuf->map;
416 	txbuf->map = map;
417 	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
418 
419 	/* Set the EOP descriptor that will be marked done */
420 	txbuf = &txr->tx_buffers[first];
421 	txbuf->eop = txd;
422 
423 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
424 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
425 	/*
426 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
427 	 * hardware that this frame is available to transmit.
428 	 */
429 	++txr->total_packets;
430 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
431 
432 	/* Mark queue as having work */
433 	if (txr->busy == 0)
434 		txr->busy = 1;
435 
436 	return (0);
437 } /* ixgbe_xmit */
438 
439 
440 /************************************************************************
441  * ixgbe_allocate_transmit_buffers
442  *
443  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
444  *   the information needed to transmit a packet on the wire. This is
445  *   called only once at attach, setup is done every reset.
446  ************************************************************************/
447 static int
ixgbe_allocate_transmit_buffers(struct tx_ring * txr)448 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
449 {
450 	struct adapter      *adapter = txr->adapter;
451 	device_t            dev = adapter->dev;
452 	struct ixgbe_tx_buf *txbuf;
453 	int                 error, i;
454 
455 	/*
456 	 * Setup DMA descriptor areas.
457 	 */
458 	error = bus_dma_tag_create(
459 	         /*      parent */ bus_get_dma_tag(adapter->dev),
460 	         /*   alignment */ 1,
461 	         /*      bounds */ 0,
462 	         /*     lowaddr */ BUS_SPACE_MAXADDR,
463 	         /*    highaddr */ BUS_SPACE_MAXADDR,
464 	         /*      filter */ NULL,
465 	         /*   filterarg */ NULL,
466 	         /*     maxsize */ IXGBE_TSO_SIZE,
467 	         /*   nsegments */ adapter->num_segs,
468 	         /*  maxsegsize */ PAGE_SIZE,
469 	         /*       flags */ 0,
470 	         /*    lockfunc */ NULL,
471 	         /* lockfuncarg */ NULL,
472 	                           &txr->txtag);
473 	if (error) {
474 		device_printf(dev, "Unable to allocate TX DMA tag\n");
475 		goto fail;
476 	}
477 
478 	txr->tx_buffers =
479 	    (struct ixgbe_tx_buf *)malloc(sizeof(struct ixgbe_tx_buf) *
480 	    adapter->num_tx_desc, M_IXV, M_NOWAIT | M_ZERO);
481 	if (!txr->tx_buffers) {
482 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
483 		error = ENOMEM;
484 		goto fail;
485 	}
486 
487 	/* Create the descriptor buffer dma maps */
488 	txbuf = txr->tx_buffers;
489 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
490 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
491 		if (error != 0) {
492 			device_printf(dev, "Unable to create TX DMA map\n");
493 			goto fail;
494 		}
495 	}
496 
497 	return 0;
498 fail:
499 	/* We free all, it handles case where we are in the middle */
500 	ixv_free_transmit_structures(adapter);
501 
502 	return (error);
503 } /* ixgbe_allocate_transmit_buffers */
504 
505 /************************************************************************
506  *
507  *  Initialize a transmit ring.
508  *
509  ************************************************************************/
510 static void
ixgbe_setup_transmit_ring(struct tx_ring * txr)511 ixgbe_setup_transmit_ring(struct tx_ring *txr)
512 {
513 	struct adapter        *adapter = txr->adapter;
514 	struct ixgbe_tx_buf   *txbuf;
515 #ifdef DEV_NETMAP
516 	struct netmap_adapter *na = NA(adapter->ifp);
517 	struct netmap_slot    *slot;
518 #endif /* DEV_NETMAP */
519 
520 	/* Clear the old ring contents */
521 	IXGBE_TX_LOCK(txr);
522 
523 #ifdef DEV_NETMAP
524 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
525 		/*
526 		 * (under lock): if in netmap mode, do some consistency
527 		 * checks and set slot to entry 0 of the netmap ring.
528 		 */
529 		slot = netmap_reset(na, NR_TX, txr->me, 0);
530 	}
531 #endif /* DEV_NETMAP */
532 
533 	bzero((void *)txr->tx_base,
534 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
535 	/* Reset indices */
536 	txr->next_avail_desc = 0;
537 	txr->next_to_clean = 0;
538 
539 	/* Free any existing tx buffers. */
540 	txbuf = txr->tx_buffers;
541 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
542 		if (txbuf->m_head != NULL) {
543 			bus_dmamap_sync(txr->txtag, txbuf->map,
544 			    BUS_DMASYNC_POSTWRITE);
545 			bus_dmamap_unload(txr->txtag, txbuf->map);
546 			m_freem(txbuf->m_head);
547 			txbuf->m_head = NULL;
548 		}
549 
550 #ifdef DEV_NETMAP
551 		/*
552 		 * In netmap mode, set the map for the packet buffer.
553 		 * NOTE: Some drivers (not this one) also need to set
554 		 * the physical buffer address in the NIC ring.
555 		 * Slots in the netmap ring (indexed by "si") are
556 		 * kring->nkr_hwofs positions "ahead" wrt the
557 		 * corresponding slot in the NIC ring. In some drivers
558 		 * (not here) nkr_hwofs can be negative. Function
559 		 * netmap_idx_n2k() handles wraparounds properly.
560 		 */
561 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
562 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
563 			netmap_load_map(na, txr->txtag,
564 			    txbuf->map, NMB(na, slot + si));
565 		}
566 #endif /* DEV_NETMAP */
567 
568 		/* Clear the EOP descriptor pointer */
569 		txbuf->eop = NULL;
570 	}
571 
572 	/* Set number of descriptors available */
573 	txr->tx_avail = adapter->num_tx_desc;
574 
575 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
576 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
577 	IXGBE_TX_UNLOCK(txr);
578 } /* ixgbe_setup_transmit_ring */
579 
580 /************************************************************************
581  * ixv_setup_transmit_structures - Initialize all transmit rings.
582  ************************************************************************/
583 int
ixv_setup_transmit_structures(struct adapter * adapter)584 ixv_setup_transmit_structures(struct adapter *adapter)
585 {
586 	struct tx_ring *txr = adapter->tx_rings;
587 
588 	for (int i = 0; i < adapter->num_queues; i++, txr++)
589 		ixgbe_setup_transmit_ring(txr);
590 
591 	return (0);
592 } /* ixv_setup_transmit_structures */
593 
594 /************************************************************************
595  * ixv_free_transmit_structures - Free all transmit rings.
596  ************************************************************************/
597 void
ixv_free_transmit_structures(struct adapter * adapter)598 ixv_free_transmit_structures(struct adapter *adapter)
599 {
600 	struct tx_ring *txr = adapter->tx_rings;
601 
602 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
603 		IXGBE_TX_LOCK(txr);
604 		ixgbe_free_transmit_buffers(txr);
605 		ixgbe_dma_free(adapter, &txr->txdma);
606 		IXGBE_TX_UNLOCK(txr);
607 		IXGBE_TX_LOCK_DESTROY(txr);
608 	}
609 	free(adapter->tx_rings, M_IXV);
610 } /* ixv_free_transmit_structures */
611 
612 /************************************************************************
613  * ixgbe_free_transmit_buffers
614  *
615  *   Free transmit ring related data structures.
616  ************************************************************************/
617 static void
ixgbe_free_transmit_buffers(struct tx_ring * txr)618 ixgbe_free_transmit_buffers(struct tx_ring *txr)
619 {
620 	struct adapter      *adapter = txr->adapter;
621 	struct ixgbe_tx_buf *tx_buffer;
622 	int                 i;
623 
624 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
625 
626 	if (txr->tx_buffers == NULL)
627 		return;
628 
629 	tx_buffer = txr->tx_buffers;
630 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
631 		if (tx_buffer->m_head != NULL) {
632 			bus_dmamap_sync(txr->txtag, tx_buffer->map,
633 			    BUS_DMASYNC_POSTWRITE);
634 			bus_dmamap_unload(txr->txtag, tx_buffer->map);
635 			m_freem(tx_buffer->m_head);
636 			tx_buffer->m_head = NULL;
637 			if (tx_buffer->map != NULL) {
638 				bus_dmamap_destroy(txr->txtag, tx_buffer->map);
639 				tx_buffer->map = NULL;
640 			}
641 		} else if (tx_buffer->map != NULL) {
642 			bus_dmamap_unload(txr->txtag, tx_buffer->map);
643 			bus_dmamap_destroy(txr->txtag, tx_buffer->map);
644 			tx_buffer->map = NULL;
645 		}
646 	}
647 	if (txr->br != NULL)
648 		buf_ring_free(txr->br, M_IXV);
649 	if (txr->tx_buffers != NULL) {
650 		free(txr->tx_buffers, M_IXV);
651 		txr->tx_buffers = NULL;
652 	}
653 	if (txr->txtag != NULL) {
654 		bus_dma_tag_destroy(txr->txtag);
655 		txr->txtag = NULL;
656 	}
657 } /* ixgbe_free_transmit_buffers */
658 
659 /************************************************************************
660  * ixgbe_tx_ctx_setup
661  *
662  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
663  ************************************************************************/
664 static int
ixgbe_tx_ctx_setup(struct tx_ring * txr,struct mbuf * mp,u32 * cmd_type_len,u32 * olinfo_status)665 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
666     u32 *cmd_type_len, u32 *olinfo_status)
667 {
668 	struct ixgbe_adv_tx_context_desc *TXD;
669 	struct ether_vlan_header         *eh;
670 #ifdef INET
671 	struct ip                        *ip;
672 #endif
673 #ifdef INET6
674 	struct ip6_hdr                   *ip6;
675 #endif
676 	int                              ehdrlen, ip_hlen = 0;
677 	int                              offload = TRUE;
678 	int                              ctxd = txr->next_avail_desc;
679 	u32                              vlan_macip_lens = 0;
680 	u32                              type_tucmd_mlhl = 0;
681 	u16                              vtag = 0;
682 	u16                              etype;
683 	u8                               ipproto = 0;
684 	caddr_t                          l3d;
685 
686 
687 	/* First check if TSO is to be used */
688 	if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO | CSUM_IP6_TSO))
689 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
690 
691 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
692 		offload = FALSE;
693 
694 	/* Indicate the whole packet as payload when not doing TSO */
695 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
696 
697 	/* Now ready a context descriptor */
698 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
699 
700 	/*
701 	 * In advanced descriptors the vlan tag must
702 	 * be placed into the context descriptor. Hence
703 	 * we need to make one even if not doing offloads.
704 	 */
705 	if (mp->m_flags & M_VLANTAG) {
706 		vtag = htole16(mp->m_pkthdr.ether_vtag);
707 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
708 	} else if (!IXGBE_IS_X550VF(txr->adapter) && (offload == FALSE))
709 		return (0);
710 
711 	/*
712 	 * Determine where frame payload starts.
713 	 * Jump over vlan headers if already present,
714 	 * helpful for QinQ too.
715 	 */
716 	eh = mtod(mp, struct ether_vlan_header *);
717 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
718 		etype = ntohs(eh->evl_proto);
719 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
720 	} else {
721 		etype = ntohs(eh->evl_encap_proto);
722 		ehdrlen = ETHER_HDR_LEN;
723 	}
724 
725 	/* Set the ether header length */
726 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
727 
728 	if (offload == FALSE)
729 		goto no_offloads;
730 
731 	/*
732 	 * If the first mbuf only includes the ethernet header,
733 	 * jump to the next one
734 	 * XXX: This assumes the stack splits mbufs containing headers
735 	 *      on header boundaries
736 	 * XXX: And assumes the entire IP header is contained in one mbuf
737 	 */
738 	if (mp->m_len == ehdrlen && mp->m_next)
739 		l3d = mtod(mp->m_next, caddr_t);
740 	else
741 		l3d = mtod(mp, caddr_t) + ehdrlen;
742 
743 	switch (etype) {
744 #ifdef INET
745 		case ETHERTYPE_IP:
746 			ip = (struct ip *)(l3d);
747 			ip_hlen = ip->ip_hl << 2;
748 			ipproto = ip->ip_p;
749 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
750 			/* Insert IPv4 checksum into data descriptors */
751 			if (mp->m_pkthdr.csum_flags & CSUM_IP) {
752 				ip->ip_sum = 0;
753 				*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
754 			}
755 			break;
756 #endif
757 #ifdef INET6
758 		case ETHERTYPE_IPV6:
759 			ip6 = (struct ip6_hdr *)(l3d);
760 			ip_hlen = sizeof(struct ip6_hdr);
761 			ipproto = ip6->ip6_nxt;
762 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
763 			break;
764 #endif
765 		default:
766 			offload = FALSE;
767 			break;
768 	}
769 
770 	vlan_macip_lens |= ip_hlen;
771 
772 	/* No support for offloads for non-L4 next headers */
773 	switch (ipproto) {
774 		case IPPROTO_TCP:
775 			if (mp->m_pkthdr.csum_flags &
776 			    (CSUM_IP_TCP | CSUM_IP6_TCP))
777 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
778 			else
779 				offload = false;
780 			break;
781 		case IPPROTO_UDP:
782 			if (mp->m_pkthdr.csum_flags &
783 			    (CSUM_IP_UDP | CSUM_IP6_UDP))
784 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
785 			else
786 				offload = false;
787 			break;
788 		case IPPROTO_SCTP:
789 			if (mp->m_pkthdr.csum_flags &
790 			    (CSUM_IP_SCTP | CSUM_IP6_SCTP))
791 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
792 			else
793 				offload = false;
794 			break;
795 		default:
796 			offload = false;
797 			break;
798 	}
799 
800 	if (offload) /* Insert L4 checksum into data descriptors */
801 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
802 
803 no_offloads:
804 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
805 
806 	/* Now copy bits into descriptor */
807 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
808 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
809 	TXD->seqnum_seed = htole32(0);
810 	TXD->mss_l4len_idx = htole32(0);
811 
812 	/* We've consumed the first desc, adjust counters */
813 	if (++ctxd == txr->num_desc)
814 		ctxd = 0;
815 	txr->next_avail_desc = ctxd;
816 	--txr->tx_avail;
817 
818 	return (0);
819 } /* ixgbe_tx_ctx_setup */
820 
821 /************************************************************************
822  * ixgbe_tso_setup
823  *
824  *   Setup work for hardware segmentation offload (TSO) on
825  *   adapters using advanced tx descriptors
826  ************************************************************************/
827 static int
ixgbe_tso_setup(struct tx_ring * txr,struct mbuf * mp,u32 * cmd_type_len,u32 * olinfo_status)828 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
829     u32 *olinfo_status)
830 {
831 	struct ixgbe_adv_tx_context_desc *TXD;
832 	struct ether_vlan_header         *eh;
833 #ifdef INET6
834 	struct ip6_hdr                   *ip6;
835 #endif
836 #ifdef INET
837 	struct ip                        *ip;
838 #endif
839 	struct tcphdr                    *th;
840 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
841 	u32                              vlan_macip_lens = 0;
842 	u32                              type_tucmd_mlhl = 0;
843 	u32                              mss_l4len_idx = 0, paylen;
844 	u16                              vtag = 0, eh_type;
845 
846 	/*
847 	 * Determine where frame payload starts.
848 	 * Jump over vlan headers if already present
849 	 */
850 	eh = mtod(mp, struct ether_vlan_header *);
851 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
852 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
853 		eh_type = eh->evl_proto;
854 	} else {
855 		ehdrlen = ETHER_HDR_LEN;
856 		eh_type = eh->evl_encap_proto;
857 	}
858 
859 	switch (ntohs(eh_type)) {
860 #ifdef INET
861 	case ETHERTYPE_IP:
862 		ip = (struct ip *)(mp->m_data + ehdrlen);
863 		if (ip->ip_p != IPPROTO_TCP)
864 			return (ENXIO);
865 		ip->ip_sum = 0;
866 		ip_hlen = ip->ip_hl << 2;
867 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
868 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
869 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
870 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
871 		/* Tell transmit desc to also do IPv4 checksum. */
872 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
873 		break;
874 #endif
875 #ifdef INET6
876 	case ETHERTYPE_IPV6:
877 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
878 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
879 		if (ip6->ip6_nxt != IPPROTO_TCP)
880 			return (ENXIO);
881 		ip_hlen = sizeof(struct ip6_hdr);
882 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
883 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
884 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
885 		break;
886 #endif
887 	default:
888 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
889 		    __func__, ntohs(eh_type));
890 		break;
891 	}
892 
893 	ctxd = txr->next_avail_desc;
894 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
895 
896 	tcp_hlen = th->th_off << 2;
897 
898 	/* This is used in the transmit desc in encap */
899 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
900 
901 	/* VLAN MACLEN IPLEN */
902 	if (mp->m_flags & M_VLANTAG) {
903 		vtag = htole16(mp->m_pkthdr.ether_vtag);
904 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
905 	}
906 
907 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
908 	vlan_macip_lens |= ip_hlen;
909 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
910 
911 	/* ADV DTYPE TUCMD */
912 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
913 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
914 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
915 
916 	/* MSS L4LEN IDX */
917 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
918 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
919 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
920 
921 	TXD->seqnum_seed = htole32(0);
922 
923 	if (++ctxd == txr->num_desc)
924 		ctxd = 0;
925 
926 	txr->tx_avail--;
927 	txr->next_avail_desc = ctxd;
928 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
929 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
930 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
931 	++txr->tso_tx;
932 
933 	return (0);
934 } /* ixgbe_tso_setup */
935 
936 
937 /************************************************************************
938  * ixv_txeof
939  *
940  *   Examine each tx_buffer in the used queue. If the hardware is done
941  *   processing the packet then free associated resources. The
942  *   tx_buffer is put back on the free queue.
943  ************************************************************************/
944 void
ixv_txeof(struct tx_ring * txr)945 ixv_txeof(struct tx_ring *txr)
946 {
947 	struct adapter          *adapter = txr->adapter;
948 	struct ixgbe_tx_buf     *buf;
949 	union ixgbe_adv_tx_desc *txd;
950 	u32                     work, processed = 0;
951 	u32                     limit = adapter->tx_process_limit;
952 
953 	mtx_assert(&txr->tx_mtx, MA_OWNED);
954 
955 #ifdef DEV_NETMAP
956 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
957 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
958 		struct netmap_adapter *na = NA(adapter->ifp);
959 		struct netmap_kring *kring = &na->tx_rings[txr->me];
960 		txd = txr->tx_base;
961 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
962 		    BUS_DMASYNC_POSTREAD);
963 		/*
964 		 * In netmap mode, all the work is done in the context
965 		 * of the client thread. Interrupt handlers only wake up
966 		 * clients, which may be sleeping on individual rings
967 		 * or on a global resource for all rings.
968 		 * To implement tx interrupt mitigation, we wake up the client
969 		 * thread roughly every half ring, even if the NIC interrupts
970 		 * more frequently. This is implemented as follows:
971 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
972 		 *   the slot that should wake up the thread (nkr_num_slots
973 		 *   means the user thread should not be woken up);
974 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
975 		 *   or the slot has the DD bit set.
976 		 */
977 		if (!netmap_mitigate ||
978 		    (kring->nr_kflags < kring->nkr_num_slots &&
979 		     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
980 			netmap_tx_irq(adapter->ifp, txr->me);
981 		}
982 		return;
983 	}
984 #endif /* DEV_NETMAP */
985 
986 	if (txr->tx_avail == txr->num_desc) {
987 		txr->busy = 0;
988 		return;
989 	}
990 
991 	/* Get work starting point */
992 	work = txr->next_to_clean;
993 	buf = &txr->tx_buffers[work];
994 	txd = &txr->tx_base[work];
995 	work -= txr->num_desc; /* The distance to ring end */
996 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
997 	    BUS_DMASYNC_POSTREAD);
998 
999 	do {
1000 		union ixgbe_adv_tx_desc *eop = buf->eop;
1001 		if (eop == NULL) /* No work */
1002 			break;
1003 
1004 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1005 			break;	/* I/O not complete */
1006 
1007 		if (buf->m_head) {
1008 			txr->bytes += buf->m_head->m_pkthdr.len;
1009 			bus_dmamap_sync(txr->txtag, buf->map,
1010 			    BUS_DMASYNC_POSTWRITE);
1011 			bus_dmamap_unload(txr->txtag, buf->map);
1012 			m_freem(buf->m_head);
1013 			buf->m_head = NULL;
1014 		}
1015 		buf->eop = NULL;
1016 		++txr->tx_avail;
1017 
1018 		/* We clean the range if multi segment */
1019 		while (txd != eop) {
1020 			++txd;
1021 			++buf;
1022 			++work;
1023 			/* wrap the ring? */
1024 			if (__predict_false(!work)) {
1025 				work -= txr->num_desc;
1026 				buf = txr->tx_buffers;
1027 				txd = txr->tx_base;
1028 			}
1029 			if (buf->m_head) {
1030 				txr->bytes += buf->m_head->m_pkthdr.len;
1031 				bus_dmamap_sync(txr->txtag, buf->map,
1032 				    BUS_DMASYNC_POSTWRITE);
1033 				bus_dmamap_unload(txr->txtag, buf->map);
1034 				m_freem(buf->m_head);
1035 				buf->m_head = NULL;
1036 			}
1037 			++txr->tx_avail;
1038 			buf->eop = NULL;
1039 
1040 		}
1041 		++txr->packets;
1042 		++processed;
1043 
1044 		/* Try the next packet */
1045 		++txd;
1046 		++buf;
1047 		++work;
1048 		/* reset with a wrap */
1049 		if (__predict_false(!work)) {
1050 			work -= txr->num_desc;
1051 			buf = txr->tx_buffers;
1052 			txd = txr->tx_base;
1053 		}
1054 		prefetch(txd);
1055 	} while (__predict_true(--limit));
1056 
1057 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1058 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1059 
1060 	work += txr->num_desc;
1061 	txr->next_to_clean = work;
1062 
1063 	/*
1064 	 * Queue Hang detection, we know there's
1065 	 * work outstanding or the first return
1066 	 * would have been taken, so increment busy
1067 	 * if nothing managed to get cleaned, then
1068 	 * in local_timer it will be checked and
1069 	 * marked as HUNG if it exceeds a MAX attempt.
1070 	 */
1071 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1072 		++txr->busy;
1073 	/*
1074 	 * If anything gets cleaned we reset state to 1,
1075 	 * note this will turn off HUNG if its set.
1076 	 */
1077 	if (processed)
1078 		txr->busy = 1;
1079 
1080 	if (txr->tx_avail == txr->num_desc)
1081 		txr->busy = 0;
1082 
1083 	return;
1084 } /* ixv_txeof */
1085 
1086 /************************************************************************
1087  * ixgbe_rsc_count
1088  *
1089  *   Used to detect a descriptor that has been merged by Hardware RSC.
1090  ************************************************************************/
1091 static inline u32
ixgbe_rsc_count(union ixgbe_adv_rx_desc * rx)1092 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1093 {
1094 	return (le32toh(rx->wb.lower.lo_dword.data) &
1095 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1096 } /* ixgbe_rsc_count */
1097 
1098 /************************************************************************
1099  * ixgbe_setup_hw_rsc
1100  *
1101  *   Initialize Hardware RSC (LRO) feature on 82599
1102  *   for an RX ring, this is toggled by the LRO capability
1103  *   even though it is transparent to the stack.
1104  *
1105  *   NOTE: Since this HW feature only works with IPv4 and
1106  *         testing has shown soft LRO to be as effective,
1107  *         this feature will be disabled by default.
1108  ************************************************************************/
1109 static void
ixgbe_setup_hw_rsc(struct rx_ring * rxr)1110 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1111 {
1112 	struct adapter  *adapter = rxr->adapter;
1113 	struct ixgbe_hw *hw = &adapter->hw;
1114 	u32             rscctrl, rdrxctl;
1115 
1116 	/* If turning LRO/RSC off we need to disable it */
1117 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1118 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1119 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1120 		return;
1121 	}
1122 
1123 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1124 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1125 #ifdef DEV_NETMAP
1126 	/* Always strip CRC unless Netmap disabled it */
1127 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1128 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1129 	    ix_crcstrip)
1130 #endif /* DEV_NETMAP */
1131 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1132 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1133 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1134 
1135 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1136 	rscctrl |= IXGBE_RSCCTL_RSCEN;
1137 	/*
1138 	 * Limit the total number of descriptors that
1139 	 * can be combined, so it does not exceed 64K
1140 	 */
1141 	if (rxr->mbuf_sz == MCLBYTES)
1142 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1143 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1144 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1145 	else if (rxr->mbuf_sz == MJUM9BYTES)
1146 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1147 	else  /* Using 16K cluster */
1148 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1149 
1150 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1151 
1152 	/* Enable TCP header recognition */
1153 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1154 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1155 
1156 	/* Disable RSC for ACK packets */
1157 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1158 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1159 
1160 	rxr->hw_rsc = TRUE;
1161 } /* ixgbe_setup_hw_rsc */
1162 
1163 /************************************************************************
1164  * ixgbe_refresh_mbufs
1165  *
1166  *   Refresh mbuf buffers for RX descriptor rings
1167  *    - now keeps its own state so discards due to resource
1168  *      exhaustion are unnecessary, if an mbuf cannot be obtained
1169  *      it just returns, keeping its placeholder, thus it can simply
1170  *      be recalled to try again.
1171  ************************************************************************/
1172 static void
ixgbe_refresh_mbufs(struct rx_ring * rxr,int limit)1173 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1174 {
1175 	struct adapter      *adapter = rxr->adapter;
1176 	struct ixgbe_rx_buf *rxbuf;
1177 	struct mbuf         *mp;
1178 	bus_dma_segment_t   seg[1];
1179 	int                 i, j, nsegs, error;
1180 	bool                refreshed = FALSE;
1181 
1182 	i = j = rxr->next_to_refresh;
1183 	/* Control the loop with one beyond */
1184 	if (++j == rxr->num_desc)
1185 		j = 0;
1186 
1187 	while (j != limit) {
1188 		rxbuf = &rxr->rx_buffers[i];
1189 		if (rxbuf->buf == NULL) {
1190 			mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1191 			    rxr->mbuf_sz);
1192 			if (mp == NULL)
1193 				goto update;
1194 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1195 				m_adj(mp, ETHER_ALIGN);
1196 		} else
1197 			mp = rxbuf->buf;
1198 
1199 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1200 
1201 		/* If we're dealing with an mbuf that was copied rather
1202 		 * than replaced, there's no need to go through busdma.
1203 		 */
1204 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1205 			/* Get the memory mapping */
1206 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1207 			error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap,
1208 			    mp, seg, &nsegs, BUS_DMA_NOWAIT);
1209 			if (error != 0) {
1210 				printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1211 				m_free(mp);
1212 				rxbuf->buf = NULL;
1213 				goto update;
1214 			}
1215 			rxbuf->buf = mp;
1216 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1217 			    BUS_DMASYNC_PREREAD);
1218 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1219 			    htole64(seg[0].ds_addr);
1220 		} else {
1221 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1222 			rxbuf->flags &= ~IXGBE_RX_COPY;
1223 		}
1224 
1225 		refreshed = TRUE;
1226 		/* Next is precalculated */
1227 		i = j;
1228 		rxr->next_to_refresh = i;
1229 		if (++j == rxr->num_desc)
1230 			j = 0;
1231 	}
1232 
1233 update:
1234 	if (refreshed) /* Update hardware tail index */
1235 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1236 
1237 	return;
1238 } /* ixgbe_refresh_mbufs */
1239 
1240 /************************************************************************
1241  * ixgbe_allocate_receive_buffers
1242  *
1243  *   Allocate memory for rx_buffer structures. Since we use one
1244  *   rx_buffer per received packet, the maximum number of rx_buffer's
1245  *   that we'll need is equal to the number of receive descriptors
1246  *   that we've allocated.
1247  ************************************************************************/
1248 static int
ixgbe_allocate_receive_buffers(struct rx_ring * rxr)1249 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1250 {
1251 	struct adapter      *adapter = rxr->adapter;
1252 	device_t            dev = adapter->dev;
1253 	struct ixgbe_rx_buf *rxbuf;
1254 	int                 bsize, error;
1255 
1256 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1257 	rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_IXV,
1258 	    M_NOWAIT | M_ZERO);
1259 	if (!rxr->rx_buffers) {
1260 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1261 		error = ENOMEM;
1262 		goto fail;
1263 	}
1264 
1265 	error = bus_dma_tag_create(
1266 	         /*      parent */ bus_get_dma_tag(dev),
1267 	         /*   alignment */ 1,
1268 	         /*      bounds */ 0,
1269 	         /*     lowaddr */ BUS_SPACE_MAXADDR,
1270 	         /*    highaddr */ BUS_SPACE_MAXADDR,
1271 	         /*      filter */ NULL,
1272 	         /*   filterarg */ NULL,
1273 	         /*     maxsize */ MJUM16BYTES,
1274 	         /*   nsegments */ 1,
1275 	         /*  maxsegsize */ MJUM16BYTES,
1276 	         /*       flags */ 0,
1277 	         /*    lockfunc */ NULL,
1278 	         /* lockfuncarg */ NULL,
1279 	                           &rxr->ptag);
1280 	if (error) {
1281 		device_printf(dev, "Unable to create RX DMA tag\n");
1282 		goto fail;
1283 	}
1284 
1285 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1286 		rxbuf = &rxr->rx_buffers[i];
1287 		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1288 		if (error) {
1289 			device_printf(dev, "Unable to create RX dma map\n");
1290 			goto fail;
1291 		}
1292 	}
1293 
1294 	return (0);
1295 
1296 fail:
1297 	/* Frees all, but can handle partial completion */
1298 	ixv_free_receive_structures(adapter);
1299 
1300 	return (error);
1301 } /* ixgbe_allocate_receive_buffers */
1302 
1303 /************************************************************************
1304  * ixgbe_free_receive_ring
1305  ************************************************************************/
1306 static void
ixgbe_free_receive_ring(struct rx_ring * rxr)1307 ixgbe_free_receive_ring(struct rx_ring *rxr)
1308 {
1309 	struct ixgbe_rx_buf *rxbuf;
1310 
1311 	for (int i = 0; i < rxr->num_desc; i++) {
1312 		rxbuf = &rxr->rx_buffers[i];
1313 		if (rxbuf->buf != NULL) {
1314 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1315 			    BUS_DMASYNC_POSTREAD);
1316 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1317 			rxbuf->buf->m_flags |= M_PKTHDR;
1318 			m_freem(rxbuf->buf);
1319 			rxbuf->buf = NULL;
1320 			rxbuf->flags = 0;
1321 		}
1322 	}
1323 } /* ixgbe_free_receive_ring */
1324 
1325 /************************************************************************
1326  * ixgbe_setup_receive_ring
1327  *
1328  *   Initialize a receive ring and its buffers.
1329  ************************************************************************/
1330 static int
ixgbe_setup_receive_ring(struct rx_ring * rxr)1331 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1332 {
1333 	struct adapter        *adapter;
1334 	struct ifnet          *ifp;
1335 	device_t              dev;
1336 	struct ixgbe_rx_buf   *rxbuf;
1337 	struct lro_ctrl       *lro = &rxr->lro;
1338 #ifdef DEV_NETMAP
1339 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1340 	struct netmap_slot    *slot;
1341 #endif /* DEV_NETMAP */
1342 	bus_dma_segment_t     seg[1];
1343 	int                   rsize, nsegs, error = 0;
1344 
1345 	adapter = rxr->adapter;
1346 	ifp = adapter->ifp;
1347 	dev = adapter->dev;
1348 
1349 	/* Clear the ring contents */
1350 	IXGBE_RX_LOCK(rxr);
1351 
1352 #ifdef DEV_NETMAP
1353 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1354 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
1355 #endif /* DEV_NETMAP */
1356 
1357 	rsize = roundup2(adapter->num_rx_desc *
1358 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1359 	bzero((void *)rxr->rx_base, rsize);
1360 	/* Cache the size */
1361 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1362 
1363 	/* Free current RX buffer structs and their mbufs */
1364 	ixgbe_free_receive_ring(rxr);
1365 
1366 	/* Now replenish the mbufs */
1367 	for (int j = 0; j != rxr->num_desc; ++j) {
1368 		struct mbuf *mp;
1369 
1370 		rxbuf = &rxr->rx_buffers[j];
1371 
1372 #ifdef DEV_NETMAP
1373 		/*
1374 		 * In netmap mode, fill the map and set the buffer
1375 		 * address in the NIC ring, considering the offset
1376 		 * between the netmap and NIC rings (see comment in
1377 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1378 		 * an mbuf, so end the block with a continue;
1379 		 */
1380 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1381 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1382 			uint64_t paddr;
1383 			void *addr;
1384 
1385 			addr = PNMB(na, slot + sj, &paddr);
1386 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1387 			/* Update descriptor and the cached value */
1388 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1389 			rxbuf->addr = htole64(paddr);
1390 			continue;
1391 		}
1392 #endif /* DEV_NETMAP */
1393 
1394 		rxbuf->flags = 0;
1395 		rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1396 		    adapter->rx_mbuf_sz);
1397 		if (rxbuf->buf == NULL) {
1398 			error = ENOBUFS;
1399 			goto fail;
1400 		}
1401 		mp = rxbuf->buf;
1402 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1403 		/* Get the memory mapping */
1404 		error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg,
1405 		    &nsegs, BUS_DMA_NOWAIT);
1406 		if (error != 0)
1407 			goto fail;
1408 		bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD);
1409 		/* Update the descriptor and the cached value */
1410 		rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1411 		rxbuf->addr = htole64(seg[0].ds_addr);
1412 	}
1413 
1414 
1415 	/* Setup our descriptor indices */
1416 	rxr->next_to_check = 0;
1417 	rxr->next_to_refresh = 0;
1418 	rxr->lro_enabled = FALSE;
1419 	rxr->rx_copies = 0;
1420 	rxr->rx_bytes = 0;
1421 	rxr->vtag_strip = FALSE;
1422 
1423 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1424 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1425 
1426 	/*
1427 	 * Now set up the LRO interface
1428 	 */
1429 	if (ixgbe_rsc_enable)
1430 		ixgbe_setup_hw_rsc(rxr);
1431 	else if (ifp->if_capenable & IFCAP_LRO) {
1432 		int err = tcp_lro_init(lro);
1433 		if (err) {
1434 			device_printf(dev, "LRO Initialization failed!\n");
1435 			goto fail;
1436 		}
1437 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1438 		rxr->lro_enabled = TRUE;
1439 		lro->ifp = adapter->ifp;
1440 	}
1441 
1442 	IXGBE_RX_UNLOCK(rxr);
1443 
1444 	return (0);
1445 
1446 fail:
1447 	ixgbe_free_receive_ring(rxr);
1448 	IXGBE_RX_UNLOCK(rxr);
1449 
1450 	return (error);
1451 } /* ixgbe_setup_receive_ring */
1452 
1453 /************************************************************************
1454  * ixv_setup_receive_structures - Initialize all receive rings.
1455  ************************************************************************/
1456 int
ixv_setup_receive_structures(struct adapter * adapter)1457 ixv_setup_receive_structures(struct adapter *adapter)
1458 {
1459 	struct rx_ring *rxr = adapter->rx_rings;
1460 	int            j;
1461 
1462 	for (j = 0; j < adapter->num_queues; j++, rxr++)
1463 		if (ixgbe_setup_receive_ring(rxr))
1464 			goto fail;
1465 
1466 	return (0);
1467 fail:
1468 	/*
1469 	 * Free RX buffers allocated so far, we will only handle
1470 	 * the rings that completed, the failing case will have
1471 	 * cleaned up for itself. 'j' failed, so its the terminus.
1472 	 */
1473 	for (int i = 0; i < j; ++i) {
1474 		rxr = &adapter->rx_rings[i];
1475 		ixgbe_free_receive_ring(rxr);
1476 	}
1477 
1478 	return (ENOBUFS);
1479 } /* ixv_setup_receive_structures */
1480 
1481 
1482 /************************************************************************
1483  * ixv_free_receive_structures - Free all receive rings.
1484  ************************************************************************/
1485 void
ixv_free_receive_structures(struct adapter * adapter)1486 ixv_free_receive_structures(struct adapter *adapter)
1487 {
1488 	struct rx_ring *rxr = adapter->rx_rings;
1489 	struct lro_ctrl *lro;
1490 
1491 	INIT_DEBUGOUT("ixv_free_receive_structures: begin");
1492 
1493 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1494 		lro = &rxr->lro;
1495 		ixgbe_free_receive_buffers(rxr);
1496 		/* Free LRO memory */
1497 		tcp_lro_free(lro);
1498 		/* Free the ring memory as well */
1499 		ixgbe_dma_free(adapter, &rxr->rxdma);
1500 	}
1501 
1502 	free(adapter->rx_rings, M_IXV);
1503 } /* ixv_free_receive_structures */
1504 
1505 
1506 /************************************************************************
1507  * ixgbe_free_receive_buffers - Free receive ring data structures
1508  ************************************************************************/
1509 static void
ixgbe_free_receive_buffers(struct rx_ring * rxr)1510 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1511 {
1512 	struct adapter      *adapter = rxr->adapter;
1513 	struct ixgbe_rx_buf *rxbuf;
1514 
1515 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1516 
1517 	/* Cleanup any existing buffers */
1518 	if (rxr->rx_buffers != NULL) {
1519 		for (int i = 0; i < adapter->num_rx_desc; i++) {
1520 			rxbuf = &rxr->rx_buffers[i];
1521 			if (rxbuf->buf != NULL) {
1522 				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1523 				    BUS_DMASYNC_POSTREAD);
1524 				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1525 				rxbuf->buf->m_flags |= M_PKTHDR;
1526 				m_freem(rxbuf->buf);
1527 			}
1528 			rxbuf->buf = NULL;
1529 			if (rxbuf->pmap != NULL) {
1530 				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1531 				rxbuf->pmap = NULL;
1532 			}
1533 		}
1534 		if (rxr->rx_buffers != NULL) {
1535 			free(rxr->rx_buffers, M_IXV);
1536 			rxr->rx_buffers = NULL;
1537 		}
1538 	}
1539 
1540 	if (rxr->ptag != NULL) {
1541 		bus_dma_tag_destroy(rxr->ptag);
1542 		rxr->ptag = NULL;
1543 	}
1544 
1545 	return;
1546 } /* ixgbe_free_receive_buffers */
1547 
1548 /************************************************************************
1549  * ixgbe_rx_input
1550  ************************************************************************/
1551 static __inline void
ixgbe_rx_input(struct rx_ring * rxr,struct ifnet * ifp,struct mbuf * m,u32 ptype)1552 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1553     u32 ptype)
1554 {
1555 	/*
1556 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1557 	 * should be computed by hardware. Also it should not have VLAN tag in
1558 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1559 	 */
1560 	if (rxr->lro_enabled &&
1561 	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1562 	    (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1563 	    ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1564 	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1565 	     (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1566 	     (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1567 	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1568 	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1569 		/*
1570 		 * Send to the stack if:
1571 		 *  - LRO not enabled, or
1572 		 *  - no LRO resources, or
1573 		 *  - lro enqueue fails
1574 		 */
1575 		if (rxr->lro.lro_cnt != 0)
1576 			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1577 				return;
1578 	}
1579 	IXGBE_RX_UNLOCK(rxr);
1580 	(*ifp->if_input)(ifp, m);
1581 	IXGBE_RX_LOCK(rxr);
1582 } /* ixgbe_rx_input */
1583 
1584 /************************************************************************
1585  * ixgbe_rx_discard
1586  ************************************************************************/
1587 static __inline void
ixgbe_rx_discard(struct rx_ring * rxr,int i)1588 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1589 {
1590 	struct ixgbe_rx_buf *rbuf;
1591 
1592 	rbuf = &rxr->rx_buffers[i];
1593 
1594 	/*
1595 	 * With advanced descriptors the writeback
1596 	 * clobbers the buffer addrs, so its easier
1597 	 * to just free the existing mbufs and take
1598 	 * the normal refresh path to get new buffers
1599 	 * and mapping.
1600 	 */
1601 
1602 	if (rbuf->fmp != NULL) {/* Partial chain ? */
1603 		rbuf->fmp->m_flags |= M_PKTHDR;
1604 		m_freem(rbuf->fmp);
1605 		rbuf->fmp = NULL;
1606 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1607 	} else if (rbuf->buf) {
1608 		m_free(rbuf->buf);
1609 		rbuf->buf = NULL;
1610 	}
1611 	bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1612 
1613 	rbuf->flags = 0;
1614 
1615 	return;
1616 } /* ixgbe_rx_discard */
1617 
1618 
1619 /************************************************************************
1620  * ixv_rxeof
1621  *
1622  *   This routine executes in interrupt context. It replenishes
1623  *   the mbufs in the descriptor and sends data which has been
1624  *   dma'ed into host memory to upper layer.
1625  *
1626  *   Return TRUE for more work, FALSE for all clean.
1627  ************************************************************************/
1628 bool
ixv_rxeof(struct ix_queue * que)1629 ixv_rxeof(struct ix_queue *que)
1630 {
1631 	struct adapter          *adapter = que->adapter;
1632 	struct rx_ring          *rxr = que->rxr;
1633 	struct ifnet            *ifp = adapter->ifp;
1634 	struct lro_ctrl         *lro = &rxr->lro;
1635 #if __FreeBSD_version < 1100105
1636 	struct lro_entry        *queued;
1637 #endif
1638 	union ixgbe_adv_rx_desc *cur;
1639 	struct ixgbe_rx_buf     *rbuf, *nbuf;
1640 	int                     i, nextp, processed = 0;
1641 	u32                     staterr = 0;
1642 	u32                     count = adapter->rx_process_limit;
1643 	u16                     pkt_info;
1644 
1645 	IXGBE_RX_LOCK(rxr);
1646 
1647 #ifdef DEV_NETMAP
1648 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1649 		/* Same as the txeof routine: wakeup clients on intr. */
1650 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1651 			IXGBE_RX_UNLOCK(rxr);
1652 			return (FALSE);
1653 		}
1654 	}
1655 #endif /* DEV_NETMAP */
1656 
1657 	for (i = rxr->next_to_check; count != 0;) {
1658 		struct mbuf *sendmp, *mp;
1659 		u32         rsc, ptype;
1660 		u16         len;
1661 		u16         vtag = 0;
1662 		bool        eop;
1663 
1664 		/* Sync the ring. */
1665 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1666 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1667 
1668 		cur = &rxr->rx_base[i];
1669 		staterr = le32toh(cur->wb.upper.status_error);
1670 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1671 
1672 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1673 			break;
1674 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1675 			break;
1676 
1677 		count--;
1678 		sendmp = NULL;
1679 		nbuf = NULL;
1680 		rsc = 0;
1681 		cur->wb.upper.status_error = 0;
1682 		rbuf = &rxr->rx_buffers[i];
1683 		mp = rbuf->buf;
1684 
1685 		len = le16toh(cur->wb.upper.length);
1686 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1687 		    IXGBE_RXDADV_PKTTYPE_MASK;
1688 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1689 
1690 		/* Make sure bad packets are discarded */
1691 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1692 #if __FreeBSD_version >= 1100036
1693 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1694 #endif
1695 			rxr->rx_discarded++;
1696 			ixgbe_rx_discard(rxr, i);
1697 			goto next_desc;
1698 		}
1699 
1700 		/*
1701 		 * On 82599 which supports a hardware
1702 		 * LRO (called HW RSC), packets need
1703 		 * not be fragmented across sequential
1704 		 * descriptors, rather the next descriptor
1705 		 * is indicated in bits of the descriptor.
1706 		 * This also means that we might proceses
1707 		 * more than one packet at a time, something
1708 		 * that has never been true before, it
1709 		 * required eliminating global chain pointers
1710 		 * in favor of what we are doing here.  -jfv
1711 		 */
1712 		if (!eop) {
1713 			/*
1714 			 * Figure out the next descriptor
1715 			 * of this frame.
1716 			 */
1717 			if (rxr->hw_rsc == TRUE) {
1718 				rsc = ixgbe_rsc_count(cur);
1719 				rxr->rsc_num += (rsc - 1);
1720 			}
1721 			if (rsc) { /* Get hardware index */
1722 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1723 				    IXGBE_RXDADV_NEXTP_SHIFT);
1724 			} else { /* Just sequential */
1725 				nextp = i + 1;
1726 				if (nextp == adapter->num_rx_desc)
1727 					nextp = 0;
1728 			}
1729 			nbuf = &rxr->rx_buffers[nextp];
1730 			prefetch(nbuf);
1731 		}
1732 		/*
1733 		 * Rather than using the fmp/lmp global pointers
1734 		 * we now keep the head of a packet chain in the
1735 		 * buffer struct and pass this along from one
1736 		 * descriptor to the next, until we get EOP.
1737 		 */
1738 		mp->m_len = len;
1739 		/*
1740 		 * See if there is a stored head
1741 		 * that determines what we are
1742 		 */
1743 		sendmp = rbuf->fmp;
1744 		if (sendmp != NULL) {  /* secondary frag */
1745 			rbuf->buf = rbuf->fmp = NULL;
1746 			mp->m_flags &= ~M_PKTHDR;
1747 			sendmp->m_pkthdr.len += mp->m_len;
1748 		} else {
1749 			/*
1750 			 * Optimize.  This might be a small packet,
1751 			 * maybe just a TCP ACK.  Do a fast copy that
1752 			 * is cache aligned into a new mbuf, and
1753 			 * leave the old mbuf+cluster for re-use.
1754 			 */
1755 			if (eop && len <= IXGBE_RX_COPY_LEN) {
1756 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1757 				if (sendmp != NULL) {
1758 					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1759 					ixv_bcopy(mp->m_data, sendmp->m_data,
1760 					    len);
1761 					sendmp->m_len = len;
1762 					rxr->rx_copies++;
1763 					rbuf->flags |= IXGBE_RX_COPY;
1764 				}
1765 			}
1766 			if (sendmp == NULL) {
1767 				rbuf->buf = rbuf->fmp = NULL;
1768 				sendmp = mp;
1769 			}
1770 
1771 			/* first desc of a non-ps chain */
1772 			sendmp->m_flags |= M_PKTHDR;
1773 			sendmp->m_pkthdr.len = mp->m_len;
1774 		}
1775 		++processed;
1776 
1777 		/* Pass the head pointer on */
1778 		if (eop == 0) {
1779 			nbuf->fmp = sendmp;
1780 			sendmp = NULL;
1781 			mp->m_next = nbuf->buf;
1782 		} else { /* Sending this frame */
1783 			sendmp->m_pkthdr.rcvif = ifp;
1784 			rxr->rx_packets++;
1785 			/* capture data for AIM */
1786 			rxr->bytes += sendmp->m_pkthdr.len;
1787 			rxr->rx_bytes += sendmp->m_pkthdr.len;
1788 			/* Process vlan info */
1789 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1790 				vtag = le16toh(cur->wb.upper.vlan);
1791 			if (vtag) {
1792 				sendmp->m_pkthdr.ether_vtag = vtag;
1793 				sendmp->m_flags |= M_VLANTAG;
1794 			}
1795 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1796 				ixgbe_rx_checksum(staterr, sendmp, ptype);
1797 
1798 			/*
1799 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
1800 			 * and never cleared. This means we have RSS hash
1801 			 * available to be used.
1802 			 */
1803 			if (adapter->num_queues > 1) {
1804 				sendmp->m_pkthdr.flowid =
1805 				    le32toh(cur->wb.lower.hi_dword.rss);
1806 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1807 				case IXGBE_RXDADV_RSSTYPE_IPV4:
1808 					M_HASHTYPE_SET(sendmp,
1809 					    M_HASHTYPE_RSS_IPV4);
1810 					break;
1811 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1812 					M_HASHTYPE_SET(sendmp,
1813 					    M_HASHTYPE_RSS_TCP_IPV4);
1814 					break;
1815 				case IXGBE_RXDADV_RSSTYPE_IPV6:
1816 					M_HASHTYPE_SET(sendmp,
1817 					    M_HASHTYPE_RSS_IPV6);
1818 					break;
1819 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1820 					M_HASHTYPE_SET(sendmp,
1821 					    M_HASHTYPE_RSS_TCP_IPV6);
1822 					break;
1823 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1824 					M_HASHTYPE_SET(sendmp,
1825 					    M_HASHTYPE_RSS_IPV6_EX);
1826 					break;
1827 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1828 					M_HASHTYPE_SET(sendmp,
1829 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
1830 					break;
1831 #if __FreeBSD_version > 1100000
1832 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1833 					M_HASHTYPE_SET(sendmp,
1834 					    M_HASHTYPE_RSS_UDP_IPV4);
1835 					break;
1836 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1837 					M_HASHTYPE_SET(sendmp,
1838 					    M_HASHTYPE_RSS_UDP_IPV6);
1839 					break;
1840 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1841 					M_HASHTYPE_SET(sendmp,
1842 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
1843 					break;
1844 #endif
1845 				default:
1846 #if __FreeBSD_version < 1100116
1847 					M_HASHTYPE_SET(sendmp,
1848 					    M_HASHTYPE_OPAQUE);
1849 #else
1850 					M_HASHTYPE_SET(sendmp,
1851 					    M_HASHTYPE_OPAQUE_HASH);
1852 #endif
1853 				}
1854 			} else {
1855 				sendmp->m_pkthdr.flowid = que->msix;
1856 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1857 			}
1858 		}
1859 next_desc:
1860 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1861 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1862 
1863 		/* Advance our pointers to the next descriptor. */
1864 		if (++i == rxr->num_desc)
1865 			i = 0;
1866 
1867 		/* Now send to the stack or do LRO */
1868 		if (sendmp != NULL) {
1869 			rxr->next_to_check = i;
1870 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1871 			i = rxr->next_to_check;
1872 		}
1873 
1874 		/* Every 8 descriptors we go to refresh mbufs */
1875 		if (processed == 8) {
1876 			ixgbe_refresh_mbufs(rxr, i);
1877 			processed = 0;
1878 		}
1879 	}
1880 
1881 	/* Refresh any remaining buf structs */
1882 	if (ixgbe_rx_unrefreshed(rxr))
1883 		ixgbe_refresh_mbufs(rxr, i);
1884 
1885 	rxr->next_to_check = i;
1886 
1887 	/*
1888 	 * Flush any outstanding LRO work
1889 	 */
1890 #if __FreeBSD_version < 1100105
1891 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1892 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1893 		tcp_lro_flush(lro, queued);
1894 	}
1895 #else
1896 	tcp_lro_flush_all(lro);
1897 #endif
1898 
1899 	IXGBE_RX_UNLOCK(rxr);
1900 
1901 	/*
1902 	 * Still have cleaning to do?
1903 	 */
1904 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1905 		return (TRUE);
1906 
1907 	return (FALSE);
1908 } /* ixv_rxeof */
1909 
1910 
1911 /************************************************************************
1912  * ixgbe_rx_checksum
1913  *
1914  *   Verify that the hardware indicated that the checksum is valid.
1915  *   Inform the stack about the status of checksum so that stack
1916  *   doesn't spend time verifying the checksum.
1917  ************************************************************************/
1918 static void
ixgbe_rx_checksum(u32 staterr,struct mbuf * mp,u32 ptype)1919 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1920 {
1921 	u16  status = (u16)staterr;
1922 	u8   errors = (u8)(staterr >> 24);
1923 	bool sctp = false;
1924 
1925 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1926 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1927 		sctp = true;
1928 
1929 	/* IPv4 checksum */
1930 	if (status & IXGBE_RXD_STAT_IPCS) {
1931 		mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
1932 		/* IP Checksum Good */
1933 		if (!(errors & IXGBE_RXD_ERR_IPE))
1934 			mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
1935 	}
1936 	/* TCP/UDP/SCTP checksum */
1937 	if (status & IXGBE_RXD_STAT_L4CS) {
1938 		mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
1939 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1940 			mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
1941 			if (!sctp)
1942 				mp->m_pkthdr.csum_data = htons(0xffff);
1943 		}
1944 	}
1945 } /* ixgbe_rx_checksum */
1946 
1947 /************************************************************************
1948  * ixgbe_dmamap_cb - Manage DMA'able memory.
1949  ************************************************************************/
1950 static void
ixgbe_dmamap_cb(void * arg,bus_dma_segment_t * segs,int nseg,int error)1951 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
1952 {
1953 	if (error)
1954 		return;
1955 	*(bus_addr_t *)arg = segs->ds_addr;
1956 
1957 	return;
1958 } /* ixgbe_dmamap_cb */
1959 
1960 /************************************************************************
1961  * ixgbe_dma_malloc
1962  ************************************************************************/
1963 static int
ixgbe_dma_malloc(struct adapter * adapter,bus_size_t size,struct ixgbe_dma_alloc * dma,int mapflags)1964 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
1965                  struct ixgbe_dma_alloc *dma, int mapflags)
1966 {
1967 	device_t dev = adapter->dev;
1968 	int      r;
1969 
1970 	r = bus_dma_tag_create(
1971 	     /*      parent */ bus_get_dma_tag(adapter->dev),
1972 	     /*   alignment */ DBA_ALIGN,
1973 	     /*      bounds */ 0,
1974 	     /*     lowaddr */ BUS_SPACE_MAXADDR,
1975 	     /*    highaddr */ BUS_SPACE_MAXADDR,
1976 	     /*      filter */ NULL,
1977 	     /*   filterarg */ NULL,
1978 	     /*     maxsize */ size,
1979 	     /*   nsegments */ 1,
1980 	     /*  maxsegsize */ size,
1981 	     /*       flags */ BUS_DMA_ALLOCNOW,
1982 	     /*    lockfunc */ NULL,
1983 	     /* lockfuncarg */ NULL,
1984 	                       &dma->dma_tag);
1985 	if (r != 0) {
1986 		device_printf(dev,
1987 		    "ixgbe_dma_malloc: bus_dma_tag_create failed; error %u\n",
1988 		    r);
1989 		goto fail_0;
1990 	}
1991 	r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
1992 	    BUS_DMA_NOWAIT, &dma->dma_map);
1993 	if (r != 0) {
1994 		device_printf(dev,
1995 		    "ixgbe_dma_malloc: bus_dmamem_alloc failed; error %u\n", r);
1996 		goto fail_1;
1997 	}
1998 	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size,
1999 	    ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2000 	if (r != 0) {
2001 		device_printf(dev,
2002 		    "ixgbe_dma_malloc: bus_dmamap_load failed; error %u\n", r);
2003 		goto fail_2;
2004 	}
2005 	dma->dma_size = size;
2006 
2007 	return (0);
2008 fail_2:
2009 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2010 fail_1:
2011 	bus_dma_tag_destroy(dma->dma_tag);
2012 fail_0:
2013 	dma->dma_tag = NULL;
2014 
2015 	return (r);
2016 } /* ixgbe_dma_malloc */
2017 
2018 static void
ixgbe_dma_free(struct adapter * adapter,struct ixgbe_dma_alloc * dma)2019 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2020 {
2021 	bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2022 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2023 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2024 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2025 	bus_dma_tag_destroy(dma->dma_tag);
2026 } /* ixgbe_dma_free */
2027 
2028 
2029 /************************************************************************
2030  * ixv_allocate_queues
2031  *
2032  *   Allocate memory for the transmit and receive rings, and then
2033  *   the descriptors associated with each, called only once at attach.
2034  ************************************************************************/
2035 int
ixv_allocate_queues(struct adapter * adapter)2036 ixv_allocate_queues(struct adapter *adapter)
2037 {
2038 	device_t        dev = adapter->dev;
2039 	struct ix_queue *que;
2040 	struct tx_ring  *txr;
2041 	struct rx_ring  *rxr;
2042 	int             rsize, tsize, error = IXGBE_SUCCESS;
2043 	int             txconf = 0, rxconf = 0;
2044 
2045 	/* First, allocate the top level queue structs */
2046 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2047 	    adapter->num_queues, M_IXV, M_NOWAIT | M_ZERO);
2048 	if (!adapter->queues) {
2049 		device_printf(dev, "Unable to allocate queue memory\n");
2050 		error = ENOMEM;
2051 		goto fail;
2052 	}
2053 
2054 	/* Second, allocate the TX ring struct memory */
2055 	adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2056 	    adapter->num_queues, M_IXV, M_NOWAIT | M_ZERO);
2057 	if (!adapter->tx_rings) {
2058 		device_printf(dev, "Unable to allocate TX ring memory\n");
2059 		error = ENOMEM;
2060 		goto tx_fail;
2061 	}
2062 
2063 	/* Third, allocate the RX ring */
2064 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2065 	    adapter->num_queues, M_IXV, M_NOWAIT | M_ZERO);
2066 	if (!adapter->rx_rings) {
2067 		device_printf(dev, "Unable to allocate RX ring memory\n");
2068 		error = ENOMEM;
2069 		goto rx_fail;
2070 	}
2071 
2072 	/* For the ring itself */
2073 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2074 	    DBA_ALIGN);
2075 
2076 	/*
2077 	 * Now set up the TX queues, txconf is needed to handle the
2078 	 * possibility that things fail midcourse and we need to
2079 	 * undo memory gracefully
2080 	 */
2081 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2082 		/* Set up some basics */
2083 		txr = &adapter->tx_rings[i];
2084 		txr->adapter = adapter;
2085 		txr->br = NULL;
2086 		txr->me = i;
2087 		txr->num_desc = adapter->num_tx_desc;
2088 
2089 		/* Initialize the TX side lock */
2090 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2091 		    device_get_nameunit(dev), txr->me);
2092 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2093 
2094 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2095 		    BUS_DMA_NOWAIT)) {
2096 			device_printf(dev,
2097 			    "Unable to allocate TX Descriptor memory\n");
2098 			error = ENOMEM;
2099 			goto err_tx_desc;
2100 		}
2101 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2102 		bzero((void *)txr->tx_base, tsize);
2103 
2104 		/* Now allocate transmit buffers for the ring */
2105 		if (ixgbe_allocate_transmit_buffers(txr)) {
2106 			device_printf(dev,
2107 			    "Critical Failure setting up transmit buffers\n");
2108 			error = ENOMEM;
2109 			goto err_tx_desc;
2110 		}
2111 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2112 			/* Allocate a buf ring */
2113 			txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_IXV,
2114 			    M_WAITOK, &txr->tx_mtx);
2115 			if (txr->br == NULL) {
2116 				device_printf(dev,
2117 				    "Critical Failure setting up buf ring\n");
2118 				error = ENOMEM;
2119 				goto err_tx_desc;
2120 			}
2121 		}
2122 	}
2123 
2124 	/*
2125 	 * Next the RX queues...
2126 	 */
2127 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2128 	    DBA_ALIGN);
2129 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2130 		rxr = &adapter->rx_rings[i];
2131 		/* Set up some basics */
2132 		rxr->adapter = adapter;
2133 		rxr->me = i;
2134 		rxr->num_desc = adapter->num_rx_desc;
2135 
2136 		/* Initialize the RX side lock */
2137 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2138 		    device_get_nameunit(dev), rxr->me);
2139 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2140 
2141 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2142 		    BUS_DMA_NOWAIT)) {
2143 			device_printf(dev,
2144 			    "Unable to allocate RxDescriptor memory\n");
2145 			error = ENOMEM;
2146 			goto err_rx_desc;
2147 		}
2148 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2149 		bzero((void *)rxr->rx_base, rsize);
2150 
2151 		/* Allocate receive buffers for the ring */
2152 		if (ixgbe_allocate_receive_buffers(rxr)) {
2153 			device_printf(dev,
2154 			    "Critical Failure setting up receive buffers\n");
2155 			error = ENOMEM;
2156 			goto err_rx_desc;
2157 		}
2158 	}
2159 
2160 	/*
2161 	 * Finally set up the queue holding structs
2162 	 */
2163 	for (int i = 0; i < adapter->num_queues; i++) {
2164 		que = &adapter->queues[i];
2165 		que->adapter = adapter;
2166 		que->me = i;
2167 		que->txr = &adapter->tx_rings[i];
2168 		que->rxr = &adapter->rx_rings[i];
2169 	}
2170 
2171 	return (0);
2172 
2173 err_rx_desc:
2174 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2175 		ixgbe_dma_free(adapter, &rxr->rxdma);
2176 err_tx_desc:
2177 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2178 		ixgbe_dma_free(adapter, &txr->txdma);
2179 	free(adapter->rx_rings, M_IXV);
2180 rx_fail:
2181 	free(adapter->tx_rings, M_IXV);
2182 tx_fail:
2183 	free(adapter->queues, M_IXV);
2184 fail:
2185 	return (error);
2186 } /* ixv_allocate_queues */
2187