1 /******************************************************************************
2
3 Copyright (c) 2001-2017, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD: stable/10/sys/dev/ixgbe/ixv_txrx.c 315333 2017-03-15 21:20:17Z erj $*/
34
35
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40
41 #include "ixv.h"
42
43 extern int ix_crcstrip;
44
45 /*
46 * HW RSC control:
47 * this feature only works with
48 * IPv4, and only on 82599 and later.
49 * Also this will cause IP forwarding to
50 * fail and that can't be controlled by
51 * the stack as LRO can. For all these
52 * reasons I've deemed it best to leave
53 * this off and not bother with a tuneable
54 * interface, this would need to be compiled
55 * to enable.
56 */
57 static bool ixgbe_rsc_enable = FALSE;
58
59 /************************************************************************
60 * Local Function prototypes
61 ************************************************************************/
62 static void ixgbe_setup_transmit_ring(struct tx_ring *);
63 static void ixgbe_free_transmit_buffers(struct tx_ring *);
64 static int ixgbe_setup_receive_ring(struct rx_ring *);
65 static void ixgbe_free_receive_buffers(struct rx_ring *);
66 static void ixgbe_rx_checksum(u32, struct mbuf *, u32);
67 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
68 static int ixgbe_xmit(struct tx_ring *, struct mbuf **);
69 static int ixgbe_tx_ctx_setup(struct tx_ring *,
70 struct mbuf *, u32 *, u32 *);
71 static int ixgbe_tso_setup(struct tx_ring *,
72 struct mbuf *, u32 *, u32 *);
73 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
74 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
75 struct mbuf *, u32);
76 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
77 struct ixgbe_dma_alloc *, int);
78 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
79
80 MALLOC_DECLARE(M_IXV);
81
82 /************************************************************************
83 * ixv_legacy_start_locked - Transmit entry point
84 *
85 * Called by the stack to initiate a transmit.
86 * The driver will remain in this routine as long as there are
87 * packets to transmit and transmit resources are available.
88 * In case resources are not available, the stack is notified
89 * and the packet is requeued.
90 ************************************************************************/
91 int
ixv_legacy_start_locked(struct ifnet * ifp,struct tx_ring * txr)92 ixv_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
93 {
94 struct mbuf *m_head;
95 struct adapter *adapter = txr->adapter;
96
97 IXGBE_TX_LOCK_ASSERT(txr);
98
99 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
100 return (ENETDOWN);
101 if (!adapter->link_active)
102 return (ENETDOWN);
103
104 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
105 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
106 break;
107
108 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
109 if (m_head == NULL)
110 break;
111
112 if (ixgbe_xmit(txr, &m_head)) {
113 if (m_head != NULL)
114 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
115 break;
116 }
117 /* Send a copy of the frame to the BPF listener */
118 ETHER_BPF_MTAP(ifp, m_head);
119 }
120
121 return IXGBE_SUCCESS;
122 } /* ixv_legacy_start_locked */
123
124 /************************************************************************
125 * ixv_legacy_start
126 *
127 * Called by the stack, this always uses the first tx ring,
128 * and should not be used with multiqueue tx enabled.
129 ************************************************************************/
130 void
ixv_legacy_start(struct ifnet * ifp)131 ixv_legacy_start(struct ifnet *ifp)
132 {
133 struct adapter *adapter = ifp->if_softc;
134 struct tx_ring *txr = adapter->tx_rings;
135
136 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
137 IXGBE_TX_LOCK(txr);
138 ixv_legacy_start_locked(ifp, txr);
139 IXGBE_TX_UNLOCK(txr);
140 }
141 } /* ixv_legacy_start */
142
143 /************************************************************************
144 * ixv_mq_start - Multiqueue Transmit Entry Point
145 *
146 * (if_transmit function)
147 ************************************************************************/
148 int
ixv_mq_start(struct ifnet * ifp,struct mbuf * m)149 ixv_mq_start(struct ifnet *ifp, struct mbuf *m)
150 {
151 struct adapter *adapter = ifp->if_softc;
152 struct ix_queue *que;
153 struct tx_ring *txr;
154 int i, err = 0;
155 uint32_t bucket_id;
156
157 /*
158 * When doing RSS, map it to the same outbound queue
159 * as the incoming flow would be mapped to.
160 *
161 * If everything is setup correctly, it should be the
162 * same bucket that the current CPU we're on is.
163 */
164 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
165 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
166 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
167 &bucket_id) == 0)) {
168 i = bucket_id % adapter->num_queues;
169 #ifdef IXGBE_DEBUG
170 if (bucket_id > adapter->num_queues)
171 if_printf(ifp,
172 "bucket_id (%d) > num_queues (%d)\n",
173 bucket_id, adapter->num_queues);
174 #endif
175 } else
176 i = m->m_pkthdr.flowid % adapter->num_queues;
177 } else
178 i = curcpu % adapter->num_queues;
179
180 /* Check for a hung queue and pick alternative */
181 if (((1 << i) & adapter->active_queues) == 0)
182 i = ffsl(adapter->active_queues);
183
184 txr = &adapter->tx_rings[i];
185 que = &adapter->queues[i];
186
187 err = drbr_enqueue(ifp, txr->br, m);
188 if (err)
189 return (err);
190 if (IXGBE_TX_TRYLOCK(txr)) {
191 ixv_mq_start_locked(ifp, txr);
192 IXGBE_TX_UNLOCK(txr);
193 } else
194 taskqueue_enqueue(que->tq, &txr->txq_task);
195
196 return (0);
197 } /* ixv_mq_start */
198
199 /************************************************************************
200 * ixv_mq_start_locked
201 ************************************************************************/
202 int
ixv_mq_start_locked(struct ifnet * ifp,struct tx_ring * txr)203 ixv_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
204 {
205 struct mbuf *next;
206 int enqueued = 0, err = 0;
207
208 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
209 return (ENETDOWN);
210 if (!txr->adapter->link_active)
211 return (ENETDOWN);
212
213 /* Process the queue */
214 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
215 err = ixgbe_xmit(txr, &next);
216 if (err != 0) {
217 if (next == NULL)
218 drbr_advance(ifp, txr->br);
219 else
220 drbr_putback(ifp, txr->br, next);
221 break;
222 }
223 drbr_advance(ifp, txr->br);
224 enqueued++;
225 #if __FreeBSD_version >= 1100036
226 /*
227 * Since we're looking at the tx ring, we can check
228 * to see if we're a VF by examing our tail register
229 * address.
230 */
231 if (next->m_flags & M_MCAST)
232 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
233 #endif
234 /* Send a copy of the frame to the BPF listener */
235 ETHER_BPF_MTAP(ifp, next);
236 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
237 break;
238 }
239
240 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
241 ixv_txeof(txr);
242
243 return (err);
244 } /* ixv_mq_start_locked */
245
246 /************************************************************************
247 * ixv_deferred_mq_start
248 *
249 * Called from a taskqueue to drain queued transmit packets.
250 ************************************************************************/
251 void
ixv_deferred_mq_start(void * arg,int pending)252 ixv_deferred_mq_start(void *arg, int pending)
253 {
254 struct tx_ring *txr = arg;
255 struct adapter *adapter = txr->adapter;
256 struct ifnet *ifp = adapter->ifp;
257
258 IXGBE_TX_LOCK(txr);
259 if (!drbr_empty(ifp, txr->br))
260 ixv_mq_start_locked(ifp, txr);
261 IXGBE_TX_UNLOCK(txr);
262 } /* ixv_deferred_mq_start */
263
264 /************************************************************************
265 * ixv_qflush - Flush all ring buffers
266 ************************************************************************/
267 void
ixv_qflush(struct ifnet * ifp)268 ixv_qflush(struct ifnet *ifp)
269 {
270 struct adapter *adapter = ifp->if_softc;
271 struct tx_ring *txr = adapter->tx_rings;
272 struct mbuf *m;
273
274 for (int i = 0; i < adapter->num_queues; i++, txr++) {
275 IXGBE_TX_LOCK(txr);
276 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
277 m_freem(m);
278 IXGBE_TX_UNLOCK(txr);
279 }
280 if_qflush(ifp);
281 } /* ixv_qflush */
282
283
284 /************************************************************************
285 * ixgbe_xmit
286 *
287 * This routine maps the mbufs to tx descriptors, allowing the
288 * TX engine to transmit the packets.
289 *
290 * Return 0 on success, positive on failure
291 ************************************************************************/
292 static int
ixgbe_xmit(struct tx_ring * txr,struct mbuf ** m_headp)293 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
294 {
295 struct adapter *adapter = txr->adapter;
296 struct ixgbe_tx_buf *txbuf;
297 union ixgbe_adv_tx_desc *txd = NULL;
298 struct mbuf *m_head;
299 int i, j, error, nsegs;
300 int first;
301 u32 olinfo_status = 0, cmd_type_len;
302 bool remap = TRUE;
303 bus_dma_segment_t segs[adapter->num_segs];
304 bus_dmamap_t map;
305
306 m_head = *m_headp;
307
308 /* Basic descriptor defines */
309 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
310 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
311
312 if (m_head->m_flags & M_VLANTAG)
313 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
314
315 /*
316 * Important to capture the first descriptor
317 * used because it will contain the index of
318 * the one we tell the hardware to report back
319 */
320 first = txr->next_avail_desc;
321 txbuf = &txr->tx_buffers[first];
322 map = txbuf->map;
323
324 /*
325 * Map the packet for DMA.
326 */
327 retry:
328 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs,
329 &nsegs, BUS_DMA_NOWAIT);
330
331 if (__predict_false(error)) {
332 struct mbuf *m;
333
334 switch (error) {
335 case EFBIG:
336 /* Try it again? - one try */
337 if (remap == TRUE) {
338 remap = FALSE;
339 /*
340 * XXX: m_defrag will choke on
341 * non-MCLBYTES-sized clusters
342 */
343 m = m_defrag(*m_headp, M_NOWAIT);
344 if (m == NULL) {
345 adapter->mbuf_defrag_failed++;
346 m_freem(*m_headp);
347 *m_headp = NULL;
348 return (ENOBUFS);
349 }
350 *m_headp = m;
351 goto retry;
352 } else
353 return (error);
354 case ENOMEM:
355 txr->no_tx_dma_setup++;
356 return (error);
357 default:
358 txr->no_tx_dma_setup++;
359 m_freem(*m_headp);
360 *m_headp = NULL;
361 return (error);
362 }
363 }
364
365 /* Make certain there are enough descriptors */
366 if (txr->tx_avail < (nsegs + 2)) {
367 txr->no_desc_avail++;
368 bus_dmamap_unload(txr->txtag, map);
369 return (ENOBUFS);
370 }
371 m_head = *m_headp;
372
373 /*
374 * Set up the appropriate offload context
375 * this will consume the first descriptor
376 */
377 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
378 if (__predict_false(error)) {
379 if (error == ENOBUFS)
380 *m_headp = NULL;
381 return (error);
382 }
383
384 olinfo_status |= IXGBE_ADVTXD_CC;
385 i = txr->next_avail_desc;
386 for (j = 0; j < nsegs; j++) {
387 bus_size_t seglen;
388 bus_addr_t segaddr;
389
390 txbuf = &txr->tx_buffers[i];
391 txd = &txr->tx_base[i];
392 seglen = segs[j].ds_len;
393 segaddr = htole64(segs[j].ds_addr);
394
395 txd->read.buffer_addr = segaddr;
396 txd->read.cmd_type_len = htole32(txr->txd_cmd |
397 cmd_type_len | seglen);
398 txd->read.olinfo_status = htole32(olinfo_status);
399
400 if (++i == txr->num_desc)
401 i = 0;
402 }
403
404 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
405 txr->tx_avail -= nsegs;
406 txr->next_avail_desc = i;
407
408 txbuf->m_head = m_head;
409 /*
410 * Here we swap the map so the last descriptor,
411 * which gets the completion interrupt has the
412 * real map, and the first descriptor gets the
413 * unused map from this descriptor.
414 */
415 txr->tx_buffers[first].map = txbuf->map;
416 txbuf->map = map;
417 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
418
419 /* Set the EOP descriptor that will be marked done */
420 txbuf = &txr->tx_buffers[first];
421 txbuf->eop = txd;
422
423 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
424 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
425 /*
426 * Advance the Transmit Descriptor Tail (Tdt), this tells the
427 * hardware that this frame is available to transmit.
428 */
429 ++txr->total_packets;
430 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
431
432 /* Mark queue as having work */
433 if (txr->busy == 0)
434 txr->busy = 1;
435
436 return (0);
437 } /* ixgbe_xmit */
438
439
440 /************************************************************************
441 * ixgbe_allocate_transmit_buffers
442 *
443 * Allocate memory for tx_buffer structures. The tx_buffer stores all
444 * the information needed to transmit a packet on the wire. This is
445 * called only once at attach, setup is done every reset.
446 ************************************************************************/
447 static int
ixgbe_allocate_transmit_buffers(struct tx_ring * txr)448 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
449 {
450 struct adapter *adapter = txr->adapter;
451 device_t dev = adapter->dev;
452 struct ixgbe_tx_buf *txbuf;
453 int error, i;
454
455 /*
456 * Setup DMA descriptor areas.
457 */
458 error = bus_dma_tag_create(
459 /* parent */ bus_get_dma_tag(adapter->dev),
460 /* alignment */ 1,
461 /* bounds */ 0,
462 /* lowaddr */ BUS_SPACE_MAXADDR,
463 /* highaddr */ BUS_SPACE_MAXADDR,
464 /* filter */ NULL,
465 /* filterarg */ NULL,
466 /* maxsize */ IXGBE_TSO_SIZE,
467 /* nsegments */ adapter->num_segs,
468 /* maxsegsize */ PAGE_SIZE,
469 /* flags */ 0,
470 /* lockfunc */ NULL,
471 /* lockfuncarg */ NULL,
472 &txr->txtag);
473 if (error) {
474 device_printf(dev, "Unable to allocate TX DMA tag\n");
475 goto fail;
476 }
477
478 txr->tx_buffers =
479 (struct ixgbe_tx_buf *)malloc(sizeof(struct ixgbe_tx_buf) *
480 adapter->num_tx_desc, M_IXV, M_NOWAIT | M_ZERO);
481 if (!txr->tx_buffers) {
482 device_printf(dev, "Unable to allocate tx_buffer memory\n");
483 error = ENOMEM;
484 goto fail;
485 }
486
487 /* Create the descriptor buffer dma maps */
488 txbuf = txr->tx_buffers;
489 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
490 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
491 if (error != 0) {
492 device_printf(dev, "Unable to create TX DMA map\n");
493 goto fail;
494 }
495 }
496
497 return 0;
498 fail:
499 /* We free all, it handles case where we are in the middle */
500 ixv_free_transmit_structures(adapter);
501
502 return (error);
503 } /* ixgbe_allocate_transmit_buffers */
504
505 /************************************************************************
506 *
507 * Initialize a transmit ring.
508 *
509 ************************************************************************/
510 static void
ixgbe_setup_transmit_ring(struct tx_ring * txr)511 ixgbe_setup_transmit_ring(struct tx_ring *txr)
512 {
513 struct adapter *adapter = txr->adapter;
514 struct ixgbe_tx_buf *txbuf;
515 #ifdef DEV_NETMAP
516 struct netmap_adapter *na = NA(adapter->ifp);
517 struct netmap_slot *slot;
518 #endif /* DEV_NETMAP */
519
520 /* Clear the old ring contents */
521 IXGBE_TX_LOCK(txr);
522
523 #ifdef DEV_NETMAP
524 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
525 /*
526 * (under lock): if in netmap mode, do some consistency
527 * checks and set slot to entry 0 of the netmap ring.
528 */
529 slot = netmap_reset(na, NR_TX, txr->me, 0);
530 }
531 #endif /* DEV_NETMAP */
532
533 bzero((void *)txr->tx_base,
534 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
535 /* Reset indices */
536 txr->next_avail_desc = 0;
537 txr->next_to_clean = 0;
538
539 /* Free any existing tx buffers. */
540 txbuf = txr->tx_buffers;
541 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
542 if (txbuf->m_head != NULL) {
543 bus_dmamap_sync(txr->txtag, txbuf->map,
544 BUS_DMASYNC_POSTWRITE);
545 bus_dmamap_unload(txr->txtag, txbuf->map);
546 m_freem(txbuf->m_head);
547 txbuf->m_head = NULL;
548 }
549
550 #ifdef DEV_NETMAP
551 /*
552 * In netmap mode, set the map for the packet buffer.
553 * NOTE: Some drivers (not this one) also need to set
554 * the physical buffer address in the NIC ring.
555 * Slots in the netmap ring (indexed by "si") are
556 * kring->nkr_hwofs positions "ahead" wrt the
557 * corresponding slot in the NIC ring. In some drivers
558 * (not here) nkr_hwofs can be negative. Function
559 * netmap_idx_n2k() handles wraparounds properly.
560 */
561 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
562 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
563 netmap_load_map(na, txr->txtag,
564 txbuf->map, NMB(na, slot + si));
565 }
566 #endif /* DEV_NETMAP */
567
568 /* Clear the EOP descriptor pointer */
569 txbuf->eop = NULL;
570 }
571
572 /* Set number of descriptors available */
573 txr->tx_avail = adapter->num_tx_desc;
574
575 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
576 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
577 IXGBE_TX_UNLOCK(txr);
578 } /* ixgbe_setup_transmit_ring */
579
580 /************************************************************************
581 * ixv_setup_transmit_structures - Initialize all transmit rings.
582 ************************************************************************/
583 int
ixv_setup_transmit_structures(struct adapter * adapter)584 ixv_setup_transmit_structures(struct adapter *adapter)
585 {
586 struct tx_ring *txr = adapter->tx_rings;
587
588 for (int i = 0; i < adapter->num_queues; i++, txr++)
589 ixgbe_setup_transmit_ring(txr);
590
591 return (0);
592 } /* ixv_setup_transmit_structures */
593
594 /************************************************************************
595 * ixv_free_transmit_structures - Free all transmit rings.
596 ************************************************************************/
597 void
ixv_free_transmit_structures(struct adapter * adapter)598 ixv_free_transmit_structures(struct adapter *adapter)
599 {
600 struct tx_ring *txr = adapter->tx_rings;
601
602 for (int i = 0; i < adapter->num_queues; i++, txr++) {
603 IXGBE_TX_LOCK(txr);
604 ixgbe_free_transmit_buffers(txr);
605 ixgbe_dma_free(adapter, &txr->txdma);
606 IXGBE_TX_UNLOCK(txr);
607 IXGBE_TX_LOCK_DESTROY(txr);
608 }
609 free(adapter->tx_rings, M_IXV);
610 } /* ixv_free_transmit_structures */
611
612 /************************************************************************
613 * ixgbe_free_transmit_buffers
614 *
615 * Free transmit ring related data structures.
616 ************************************************************************/
617 static void
ixgbe_free_transmit_buffers(struct tx_ring * txr)618 ixgbe_free_transmit_buffers(struct tx_ring *txr)
619 {
620 struct adapter *adapter = txr->adapter;
621 struct ixgbe_tx_buf *tx_buffer;
622 int i;
623
624 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
625
626 if (txr->tx_buffers == NULL)
627 return;
628
629 tx_buffer = txr->tx_buffers;
630 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
631 if (tx_buffer->m_head != NULL) {
632 bus_dmamap_sync(txr->txtag, tx_buffer->map,
633 BUS_DMASYNC_POSTWRITE);
634 bus_dmamap_unload(txr->txtag, tx_buffer->map);
635 m_freem(tx_buffer->m_head);
636 tx_buffer->m_head = NULL;
637 if (tx_buffer->map != NULL) {
638 bus_dmamap_destroy(txr->txtag, tx_buffer->map);
639 tx_buffer->map = NULL;
640 }
641 } else if (tx_buffer->map != NULL) {
642 bus_dmamap_unload(txr->txtag, tx_buffer->map);
643 bus_dmamap_destroy(txr->txtag, tx_buffer->map);
644 tx_buffer->map = NULL;
645 }
646 }
647 if (txr->br != NULL)
648 buf_ring_free(txr->br, M_IXV);
649 if (txr->tx_buffers != NULL) {
650 free(txr->tx_buffers, M_IXV);
651 txr->tx_buffers = NULL;
652 }
653 if (txr->txtag != NULL) {
654 bus_dma_tag_destroy(txr->txtag);
655 txr->txtag = NULL;
656 }
657 } /* ixgbe_free_transmit_buffers */
658
659 /************************************************************************
660 * ixgbe_tx_ctx_setup
661 *
662 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
663 ************************************************************************/
664 static int
ixgbe_tx_ctx_setup(struct tx_ring * txr,struct mbuf * mp,u32 * cmd_type_len,u32 * olinfo_status)665 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
666 u32 *cmd_type_len, u32 *olinfo_status)
667 {
668 struct ixgbe_adv_tx_context_desc *TXD;
669 struct ether_vlan_header *eh;
670 #ifdef INET
671 struct ip *ip;
672 #endif
673 #ifdef INET6
674 struct ip6_hdr *ip6;
675 #endif
676 int ehdrlen, ip_hlen = 0;
677 int offload = TRUE;
678 int ctxd = txr->next_avail_desc;
679 u32 vlan_macip_lens = 0;
680 u32 type_tucmd_mlhl = 0;
681 u16 vtag = 0;
682 u16 etype;
683 u8 ipproto = 0;
684 caddr_t l3d;
685
686
687 /* First check if TSO is to be used */
688 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO | CSUM_IP6_TSO))
689 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
690
691 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
692 offload = FALSE;
693
694 /* Indicate the whole packet as payload when not doing TSO */
695 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
696
697 /* Now ready a context descriptor */
698 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
699
700 /*
701 * In advanced descriptors the vlan tag must
702 * be placed into the context descriptor. Hence
703 * we need to make one even if not doing offloads.
704 */
705 if (mp->m_flags & M_VLANTAG) {
706 vtag = htole16(mp->m_pkthdr.ether_vtag);
707 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
708 } else if (!IXGBE_IS_X550VF(txr->adapter) && (offload == FALSE))
709 return (0);
710
711 /*
712 * Determine where frame payload starts.
713 * Jump over vlan headers if already present,
714 * helpful for QinQ too.
715 */
716 eh = mtod(mp, struct ether_vlan_header *);
717 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
718 etype = ntohs(eh->evl_proto);
719 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
720 } else {
721 etype = ntohs(eh->evl_encap_proto);
722 ehdrlen = ETHER_HDR_LEN;
723 }
724
725 /* Set the ether header length */
726 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
727
728 if (offload == FALSE)
729 goto no_offloads;
730
731 /*
732 * If the first mbuf only includes the ethernet header,
733 * jump to the next one
734 * XXX: This assumes the stack splits mbufs containing headers
735 * on header boundaries
736 * XXX: And assumes the entire IP header is contained in one mbuf
737 */
738 if (mp->m_len == ehdrlen && mp->m_next)
739 l3d = mtod(mp->m_next, caddr_t);
740 else
741 l3d = mtod(mp, caddr_t) + ehdrlen;
742
743 switch (etype) {
744 #ifdef INET
745 case ETHERTYPE_IP:
746 ip = (struct ip *)(l3d);
747 ip_hlen = ip->ip_hl << 2;
748 ipproto = ip->ip_p;
749 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
750 /* Insert IPv4 checksum into data descriptors */
751 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
752 ip->ip_sum = 0;
753 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
754 }
755 break;
756 #endif
757 #ifdef INET6
758 case ETHERTYPE_IPV6:
759 ip6 = (struct ip6_hdr *)(l3d);
760 ip_hlen = sizeof(struct ip6_hdr);
761 ipproto = ip6->ip6_nxt;
762 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
763 break;
764 #endif
765 default:
766 offload = FALSE;
767 break;
768 }
769
770 vlan_macip_lens |= ip_hlen;
771
772 /* No support for offloads for non-L4 next headers */
773 switch (ipproto) {
774 case IPPROTO_TCP:
775 if (mp->m_pkthdr.csum_flags &
776 (CSUM_IP_TCP | CSUM_IP6_TCP))
777 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
778 else
779 offload = false;
780 break;
781 case IPPROTO_UDP:
782 if (mp->m_pkthdr.csum_flags &
783 (CSUM_IP_UDP | CSUM_IP6_UDP))
784 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
785 else
786 offload = false;
787 break;
788 case IPPROTO_SCTP:
789 if (mp->m_pkthdr.csum_flags &
790 (CSUM_IP_SCTP | CSUM_IP6_SCTP))
791 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
792 else
793 offload = false;
794 break;
795 default:
796 offload = false;
797 break;
798 }
799
800 if (offload) /* Insert L4 checksum into data descriptors */
801 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
802
803 no_offloads:
804 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
805
806 /* Now copy bits into descriptor */
807 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
808 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
809 TXD->seqnum_seed = htole32(0);
810 TXD->mss_l4len_idx = htole32(0);
811
812 /* We've consumed the first desc, adjust counters */
813 if (++ctxd == txr->num_desc)
814 ctxd = 0;
815 txr->next_avail_desc = ctxd;
816 --txr->tx_avail;
817
818 return (0);
819 } /* ixgbe_tx_ctx_setup */
820
821 /************************************************************************
822 * ixgbe_tso_setup
823 *
824 * Setup work for hardware segmentation offload (TSO) on
825 * adapters using advanced tx descriptors
826 ************************************************************************/
827 static int
ixgbe_tso_setup(struct tx_ring * txr,struct mbuf * mp,u32 * cmd_type_len,u32 * olinfo_status)828 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
829 u32 *olinfo_status)
830 {
831 struct ixgbe_adv_tx_context_desc *TXD;
832 struct ether_vlan_header *eh;
833 #ifdef INET6
834 struct ip6_hdr *ip6;
835 #endif
836 #ifdef INET
837 struct ip *ip;
838 #endif
839 struct tcphdr *th;
840 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
841 u32 vlan_macip_lens = 0;
842 u32 type_tucmd_mlhl = 0;
843 u32 mss_l4len_idx = 0, paylen;
844 u16 vtag = 0, eh_type;
845
846 /*
847 * Determine where frame payload starts.
848 * Jump over vlan headers if already present
849 */
850 eh = mtod(mp, struct ether_vlan_header *);
851 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
852 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
853 eh_type = eh->evl_proto;
854 } else {
855 ehdrlen = ETHER_HDR_LEN;
856 eh_type = eh->evl_encap_proto;
857 }
858
859 switch (ntohs(eh_type)) {
860 #ifdef INET
861 case ETHERTYPE_IP:
862 ip = (struct ip *)(mp->m_data + ehdrlen);
863 if (ip->ip_p != IPPROTO_TCP)
864 return (ENXIO);
865 ip->ip_sum = 0;
866 ip_hlen = ip->ip_hl << 2;
867 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
868 th->th_sum = in_pseudo(ip->ip_src.s_addr,
869 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
870 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
871 /* Tell transmit desc to also do IPv4 checksum. */
872 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
873 break;
874 #endif
875 #ifdef INET6
876 case ETHERTYPE_IPV6:
877 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
878 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
879 if (ip6->ip6_nxt != IPPROTO_TCP)
880 return (ENXIO);
881 ip_hlen = sizeof(struct ip6_hdr);
882 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
883 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
884 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
885 break;
886 #endif
887 default:
888 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
889 __func__, ntohs(eh_type));
890 break;
891 }
892
893 ctxd = txr->next_avail_desc;
894 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
895
896 tcp_hlen = th->th_off << 2;
897
898 /* This is used in the transmit desc in encap */
899 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
900
901 /* VLAN MACLEN IPLEN */
902 if (mp->m_flags & M_VLANTAG) {
903 vtag = htole16(mp->m_pkthdr.ether_vtag);
904 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
905 }
906
907 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
908 vlan_macip_lens |= ip_hlen;
909 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
910
911 /* ADV DTYPE TUCMD */
912 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
913 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
914 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
915
916 /* MSS L4LEN IDX */
917 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
918 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
919 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
920
921 TXD->seqnum_seed = htole32(0);
922
923 if (++ctxd == txr->num_desc)
924 ctxd = 0;
925
926 txr->tx_avail--;
927 txr->next_avail_desc = ctxd;
928 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
929 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
930 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
931 ++txr->tso_tx;
932
933 return (0);
934 } /* ixgbe_tso_setup */
935
936
937 /************************************************************************
938 * ixv_txeof
939 *
940 * Examine each tx_buffer in the used queue. If the hardware is done
941 * processing the packet then free associated resources. The
942 * tx_buffer is put back on the free queue.
943 ************************************************************************/
944 void
ixv_txeof(struct tx_ring * txr)945 ixv_txeof(struct tx_ring *txr)
946 {
947 struct adapter *adapter = txr->adapter;
948 struct ixgbe_tx_buf *buf;
949 union ixgbe_adv_tx_desc *txd;
950 u32 work, processed = 0;
951 u32 limit = adapter->tx_process_limit;
952
953 mtx_assert(&txr->tx_mtx, MA_OWNED);
954
955 #ifdef DEV_NETMAP
956 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
957 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
958 struct netmap_adapter *na = NA(adapter->ifp);
959 struct netmap_kring *kring = &na->tx_rings[txr->me];
960 txd = txr->tx_base;
961 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
962 BUS_DMASYNC_POSTREAD);
963 /*
964 * In netmap mode, all the work is done in the context
965 * of the client thread. Interrupt handlers only wake up
966 * clients, which may be sleeping on individual rings
967 * or on a global resource for all rings.
968 * To implement tx interrupt mitigation, we wake up the client
969 * thread roughly every half ring, even if the NIC interrupts
970 * more frequently. This is implemented as follows:
971 * - ixgbe_txsync() sets kring->nr_kflags with the index of
972 * the slot that should wake up the thread (nkr_num_slots
973 * means the user thread should not be woken up);
974 * - the driver ignores tx interrupts unless netmap_mitigate=0
975 * or the slot has the DD bit set.
976 */
977 if (!netmap_mitigate ||
978 (kring->nr_kflags < kring->nkr_num_slots &&
979 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
980 netmap_tx_irq(adapter->ifp, txr->me);
981 }
982 return;
983 }
984 #endif /* DEV_NETMAP */
985
986 if (txr->tx_avail == txr->num_desc) {
987 txr->busy = 0;
988 return;
989 }
990
991 /* Get work starting point */
992 work = txr->next_to_clean;
993 buf = &txr->tx_buffers[work];
994 txd = &txr->tx_base[work];
995 work -= txr->num_desc; /* The distance to ring end */
996 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
997 BUS_DMASYNC_POSTREAD);
998
999 do {
1000 union ixgbe_adv_tx_desc *eop = buf->eop;
1001 if (eop == NULL) /* No work */
1002 break;
1003
1004 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1005 break; /* I/O not complete */
1006
1007 if (buf->m_head) {
1008 txr->bytes += buf->m_head->m_pkthdr.len;
1009 bus_dmamap_sync(txr->txtag, buf->map,
1010 BUS_DMASYNC_POSTWRITE);
1011 bus_dmamap_unload(txr->txtag, buf->map);
1012 m_freem(buf->m_head);
1013 buf->m_head = NULL;
1014 }
1015 buf->eop = NULL;
1016 ++txr->tx_avail;
1017
1018 /* We clean the range if multi segment */
1019 while (txd != eop) {
1020 ++txd;
1021 ++buf;
1022 ++work;
1023 /* wrap the ring? */
1024 if (__predict_false(!work)) {
1025 work -= txr->num_desc;
1026 buf = txr->tx_buffers;
1027 txd = txr->tx_base;
1028 }
1029 if (buf->m_head) {
1030 txr->bytes += buf->m_head->m_pkthdr.len;
1031 bus_dmamap_sync(txr->txtag, buf->map,
1032 BUS_DMASYNC_POSTWRITE);
1033 bus_dmamap_unload(txr->txtag, buf->map);
1034 m_freem(buf->m_head);
1035 buf->m_head = NULL;
1036 }
1037 ++txr->tx_avail;
1038 buf->eop = NULL;
1039
1040 }
1041 ++txr->packets;
1042 ++processed;
1043
1044 /* Try the next packet */
1045 ++txd;
1046 ++buf;
1047 ++work;
1048 /* reset with a wrap */
1049 if (__predict_false(!work)) {
1050 work -= txr->num_desc;
1051 buf = txr->tx_buffers;
1052 txd = txr->tx_base;
1053 }
1054 prefetch(txd);
1055 } while (__predict_true(--limit));
1056
1057 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1058 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1059
1060 work += txr->num_desc;
1061 txr->next_to_clean = work;
1062
1063 /*
1064 * Queue Hang detection, we know there's
1065 * work outstanding or the first return
1066 * would have been taken, so increment busy
1067 * if nothing managed to get cleaned, then
1068 * in local_timer it will be checked and
1069 * marked as HUNG if it exceeds a MAX attempt.
1070 */
1071 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1072 ++txr->busy;
1073 /*
1074 * If anything gets cleaned we reset state to 1,
1075 * note this will turn off HUNG if its set.
1076 */
1077 if (processed)
1078 txr->busy = 1;
1079
1080 if (txr->tx_avail == txr->num_desc)
1081 txr->busy = 0;
1082
1083 return;
1084 } /* ixv_txeof */
1085
1086 /************************************************************************
1087 * ixgbe_rsc_count
1088 *
1089 * Used to detect a descriptor that has been merged by Hardware RSC.
1090 ************************************************************************/
1091 static inline u32
ixgbe_rsc_count(union ixgbe_adv_rx_desc * rx)1092 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1093 {
1094 return (le32toh(rx->wb.lower.lo_dword.data) &
1095 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1096 } /* ixgbe_rsc_count */
1097
1098 /************************************************************************
1099 * ixgbe_setup_hw_rsc
1100 *
1101 * Initialize Hardware RSC (LRO) feature on 82599
1102 * for an RX ring, this is toggled by the LRO capability
1103 * even though it is transparent to the stack.
1104 *
1105 * NOTE: Since this HW feature only works with IPv4 and
1106 * testing has shown soft LRO to be as effective,
1107 * this feature will be disabled by default.
1108 ************************************************************************/
1109 static void
ixgbe_setup_hw_rsc(struct rx_ring * rxr)1110 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1111 {
1112 struct adapter *adapter = rxr->adapter;
1113 struct ixgbe_hw *hw = &adapter->hw;
1114 u32 rscctrl, rdrxctl;
1115
1116 /* If turning LRO/RSC off we need to disable it */
1117 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1118 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1119 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1120 return;
1121 }
1122
1123 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1124 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1125 #ifdef DEV_NETMAP
1126 /* Always strip CRC unless Netmap disabled it */
1127 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1128 !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1129 ix_crcstrip)
1130 #endif /* DEV_NETMAP */
1131 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1132 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1133 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1134
1135 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1136 rscctrl |= IXGBE_RSCCTL_RSCEN;
1137 /*
1138 * Limit the total number of descriptors that
1139 * can be combined, so it does not exceed 64K
1140 */
1141 if (rxr->mbuf_sz == MCLBYTES)
1142 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1143 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1144 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1145 else if (rxr->mbuf_sz == MJUM9BYTES)
1146 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1147 else /* Using 16K cluster */
1148 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1149
1150 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1151
1152 /* Enable TCP header recognition */
1153 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1154 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1155
1156 /* Disable RSC for ACK packets */
1157 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1158 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1159
1160 rxr->hw_rsc = TRUE;
1161 } /* ixgbe_setup_hw_rsc */
1162
1163 /************************************************************************
1164 * ixgbe_refresh_mbufs
1165 *
1166 * Refresh mbuf buffers for RX descriptor rings
1167 * - now keeps its own state so discards due to resource
1168 * exhaustion are unnecessary, if an mbuf cannot be obtained
1169 * it just returns, keeping its placeholder, thus it can simply
1170 * be recalled to try again.
1171 ************************************************************************/
1172 static void
ixgbe_refresh_mbufs(struct rx_ring * rxr,int limit)1173 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1174 {
1175 struct adapter *adapter = rxr->adapter;
1176 struct ixgbe_rx_buf *rxbuf;
1177 struct mbuf *mp;
1178 bus_dma_segment_t seg[1];
1179 int i, j, nsegs, error;
1180 bool refreshed = FALSE;
1181
1182 i = j = rxr->next_to_refresh;
1183 /* Control the loop with one beyond */
1184 if (++j == rxr->num_desc)
1185 j = 0;
1186
1187 while (j != limit) {
1188 rxbuf = &rxr->rx_buffers[i];
1189 if (rxbuf->buf == NULL) {
1190 mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1191 rxr->mbuf_sz);
1192 if (mp == NULL)
1193 goto update;
1194 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1195 m_adj(mp, ETHER_ALIGN);
1196 } else
1197 mp = rxbuf->buf;
1198
1199 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1200
1201 /* If we're dealing with an mbuf that was copied rather
1202 * than replaced, there's no need to go through busdma.
1203 */
1204 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1205 /* Get the memory mapping */
1206 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1207 error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap,
1208 mp, seg, &nsegs, BUS_DMA_NOWAIT);
1209 if (error != 0) {
1210 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1211 m_free(mp);
1212 rxbuf->buf = NULL;
1213 goto update;
1214 }
1215 rxbuf->buf = mp;
1216 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1217 BUS_DMASYNC_PREREAD);
1218 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1219 htole64(seg[0].ds_addr);
1220 } else {
1221 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1222 rxbuf->flags &= ~IXGBE_RX_COPY;
1223 }
1224
1225 refreshed = TRUE;
1226 /* Next is precalculated */
1227 i = j;
1228 rxr->next_to_refresh = i;
1229 if (++j == rxr->num_desc)
1230 j = 0;
1231 }
1232
1233 update:
1234 if (refreshed) /* Update hardware tail index */
1235 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1236
1237 return;
1238 } /* ixgbe_refresh_mbufs */
1239
1240 /************************************************************************
1241 * ixgbe_allocate_receive_buffers
1242 *
1243 * Allocate memory for rx_buffer structures. Since we use one
1244 * rx_buffer per received packet, the maximum number of rx_buffer's
1245 * that we'll need is equal to the number of receive descriptors
1246 * that we've allocated.
1247 ************************************************************************/
1248 static int
ixgbe_allocate_receive_buffers(struct rx_ring * rxr)1249 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1250 {
1251 struct adapter *adapter = rxr->adapter;
1252 device_t dev = adapter->dev;
1253 struct ixgbe_rx_buf *rxbuf;
1254 int bsize, error;
1255
1256 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1257 rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_IXV,
1258 M_NOWAIT | M_ZERO);
1259 if (!rxr->rx_buffers) {
1260 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1261 error = ENOMEM;
1262 goto fail;
1263 }
1264
1265 error = bus_dma_tag_create(
1266 /* parent */ bus_get_dma_tag(dev),
1267 /* alignment */ 1,
1268 /* bounds */ 0,
1269 /* lowaddr */ BUS_SPACE_MAXADDR,
1270 /* highaddr */ BUS_SPACE_MAXADDR,
1271 /* filter */ NULL,
1272 /* filterarg */ NULL,
1273 /* maxsize */ MJUM16BYTES,
1274 /* nsegments */ 1,
1275 /* maxsegsize */ MJUM16BYTES,
1276 /* flags */ 0,
1277 /* lockfunc */ NULL,
1278 /* lockfuncarg */ NULL,
1279 &rxr->ptag);
1280 if (error) {
1281 device_printf(dev, "Unable to create RX DMA tag\n");
1282 goto fail;
1283 }
1284
1285 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1286 rxbuf = &rxr->rx_buffers[i];
1287 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1288 if (error) {
1289 device_printf(dev, "Unable to create RX dma map\n");
1290 goto fail;
1291 }
1292 }
1293
1294 return (0);
1295
1296 fail:
1297 /* Frees all, but can handle partial completion */
1298 ixv_free_receive_structures(adapter);
1299
1300 return (error);
1301 } /* ixgbe_allocate_receive_buffers */
1302
1303 /************************************************************************
1304 * ixgbe_free_receive_ring
1305 ************************************************************************/
1306 static void
ixgbe_free_receive_ring(struct rx_ring * rxr)1307 ixgbe_free_receive_ring(struct rx_ring *rxr)
1308 {
1309 struct ixgbe_rx_buf *rxbuf;
1310
1311 for (int i = 0; i < rxr->num_desc; i++) {
1312 rxbuf = &rxr->rx_buffers[i];
1313 if (rxbuf->buf != NULL) {
1314 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1315 BUS_DMASYNC_POSTREAD);
1316 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1317 rxbuf->buf->m_flags |= M_PKTHDR;
1318 m_freem(rxbuf->buf);
1319 rxbuf->buf = NULL;
1320 rxbuf->flags = 0;
1321 }
1322 }
1323 } /* ixgbe_free_receive_ring */
1324
1325 /************************************************************************
1326 * ixgbe_setup_receive_ring
1327 *
1328 * Initialize a receive ring and its buffers.
1329 ************************************************************************/
1330 static int
ixgbe_setup_receive_ring(struct rx_ring * rxr)1331 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1332 {
1333 struct adapter *adapter;
1334 struct ifnet *ifp;
1335 device_t dev;
1336 struct ixgbe_rx_buf *rxbuf;
1337 struct lro_ctrl *lro = &rxr->lro;
1338 #ifdef DEV_NETMAP
1339 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1340 struct netmap_slot *slot;
1341 #endif /* DEV_NETMAP */
1342 bus_dma_segment_t seg[1];
1343 int rsize, nsegs, error = 0;
1344
1345 adapter = rxr->adapter;
1346 ifp = adapter->ifp;
1347 dev = adapter->dev;
1348
1349 /* Clear the ring contents */
1350 IXGBE_RX_LOCK(rxr);
1351
1352 #ifdef DEV_NETMAP
1353 if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1354 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1355 #endif /* DEV_NETMAP */
1356
1357 rsize = roundup2(adapter->num_rx_desc *
1358 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1359 bzero((void *)rxr->rx_base, rsize);
1360 /* Cache the size */
1361 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1362
1363 /* Free current RX buffer structs and their mbufs */
1364 ixgbe_free_receive_ring(rxr);
1365
1366 /* Now replenish the mbufs */
1367 for (int j = 0; j != rxr->num_desc; ++j) {
1368 struct mbuf *mp;
1369
1370 rxbuf = &rxr->rx_buffers[j];
1371
1372 #ifdef DEV_NETMAP
1373 /*
1374 * In netmap mode, fill the map and set the buffer
1375 * address in the NIC ring, considering the offset
1376 * between the netmap and NIC rings (see comment in
1377 * ixgbe_setup_transmit_ring() ). No need to allocate
1378 * an mbuf, so end the block with a continue;
1379 */
1380 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1381 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1382 uint64_t paddr;
1383 void *addr;
1384
1385 addr = PNMB(na, slot + sj, &paddr);
1386 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1387 /* Update descriptor and the cached value */
1388 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1389 rxbuf->addr = htole64(paddr);
1390 continue;
1391 }
1392 #endif /* DEV_NETMAP */
1393
1394 rxbuf->flags = 0;
1395 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1396 adapter->rx_mbuf_sz);
1397 if (rxbuf->buf == NULL) {
1398 error = ENOBUFS;
1399 goto fail;
1400 }
1401 mp = rxbuf->buf;
1402 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1403 /* Get the memory mapping */
1404 error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg,
1405 &nsegs, BUS_DMA_NOWAIT);
1406 if (error != 0)
1407 goto fail;
1408 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD);
1409 /* Update the descriptor and the cached value */
1410 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1411 rxbuf->addr = htole64(seg[0].ds_addr);
1412 }
1413
1414
1415 /* Setup our descriptor indices */
1416 rxr->next_to_check = 0;
1417 rxr->next_to_refresh = 0;
1418 rxr->lro_enabled = FALSE;
1419 rxr->rx_copies = 0;
1420 rxr->rx_bytes = 0;
1421 rxr->vtag_strip = FALSE;
1422
1423 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1424 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1425
1426 /*
1427 * Now set up the LRO interface
1428 */
1429 if (ixgbe_rsc_enable)
1430 ixgbe_setup_hw_rsc(rxr);
1431 else if (ifp->if_capenable & IFCAP_LRO) {
1432 int err = tcp_lro_init(lro);
1433 if (err) {
1434 device_printf(dev, "LRO Initialization failed!\n");
1435 goto fail;
1436 }
1437 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1438 rxr->lro_enabled = TRUE;
1439 lro->ifp = adapter->ifp;
1440 }
1441
1442 IXGBE_RX_UNLOCK(rxr);
1443
1444 return (0);
1445
1446 fail:
1447 ixgbe_free_receive_ring(rxr);
1448 IXGBE_RX_UNLOCK(rxr);
1449
1450 return (error);
1451 } /* ixgbe_setup_receive_ring */
1452
1453 /************************************************************************
1454 * ixv_setup_receive_structures - Initialize all receive rings.
1455 ************************************************************************/
1456 int
ixv_setup_receive_structures(struct adapter * adapter)1457 ixv_setup_receive_structures(struct adapter *adapter)
1458 {
1459 struct rx_ring *rxr = adapter->rx_rings;
1460 int j;
1461
1462 for (j = 0; j < adapter->num_queues; j++, rxr++)
1463 if (ixgbe_setup_receive_ring(rxr))
1464 goto fail;
1465
1466 return (0);
1467 fail:
1468 /*
1469 * Free RX buffers allocated so far, we will only handle
1470 * the rings that completed, the failing case will have
1471 * cleaned up for itself. 'j' failed, so its the terminus.
1472 */
1473 for (int i = 0; i < j; ++i) {
1474 rxr = &adapter->rx_rings[i];
1475 ixgbe_free_receive_ring(rxr);
1476 }
1477
1478 return (ENOBUFS);
1479 } /* ixv_setup_receive_structures */
1480
1481
1482 /************************************************************************
1483 * ixv_free_receive_structures - Free all receive rings.
1484 ************************************************************************/
1485 void
ixv_free_receive_structures(struct adapter * adapter)1486 ixv_free_receive_structures(struct adapter *adapter)
1487 {
1488 struct rx_ring *rxr = adapter->rx_rings;
1489 struct lro_ctrl *lro;
1490
1491 INIT_DEBUGOUT("ixv_free_receive_structures: begin");
1492
1493 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1494 lro = &rxr->lro;
1495 ixgbe_free_receive_buffers(rxr);
1496 /* Free LRO memory */
1497 tcp_lro_free(lro);
1498 /* Free the ring memory as well */
1499 ixgbe_dma_free(adapter, &rxr->rxdma);
1500 }
1501
1502 free(adapter->rx_rings, M_IXV);
1503 } /* ixv_free_receive_structures */
1504
1505
1506 /************************************************************************
1507 * ixgbe_free_receive_buffers - Free receive ring data structures
1508 ************************************************************************/
1509 static void
ixgbe_free_receive_buffers(struct rx_ring * rxr)1510 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1511 {
1512 struct adapter *adapter = rxr->adapter;
1513 struct ixgbe_rx_buf *rxbuf;
1514
1515 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1516
1517 /* Cleanup any existing buffers */
1518 if (rxr->rx_buffers != NULL) {
1519 for (int i = 0; i < adapter->num_rx_desc; i++) {
1520 rxbuf = &rxr->rx_buffers[i];
1521 if (rxbuf->buf != NULL) {
1522 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1523 BUS_DMASYNC_POSTREAD);
1524 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1525 rxbuf->buf->m_flags |= M_PKTHDR;
1526 m_freem(rxbuf->buf);
1527 }
1528 rxbuf->buf = NULL;
1529 if (rxbuf->pmap != NULL) {
1530 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1531 rxbuf->pmap = NULL;
1532 }
1533 }
1534 if (rxr->rx_buffers != NULL) {
1535 free(rxr->rx_buffers, M_IXV);
1536 rxr->rx_buffers = NULL;
1537 }
1538 }
1539
1540 if (rxr->ptag != NULL) {
1541 bus_dma_tag_destroy(rxr->ptag);
1542 rxr->ptag = NULL;
1543 }
1544
1545 return;
1546 } /* ixgbe_free_receive_buffers */
1547
1548 /************************************************************************
1549 * ixgbe_rx_input
1550 ************************************************************************/
1551 static __inline void
ixgbe_rx_input(struct rx_ring * rxr,struct ifnet * ifp,struct mbuf * m,u32 ptype)1552 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1553 u32 ptype)
1554 {
1555 /*
1556 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1557 * should be computed by hardware. Also it should not have VLAN tag in
1558 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1559 */
1560 if (rxr->lro_enabled &&
1561 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1562 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1563 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1564 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1565 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1566 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1567 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1568 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1569 /*
1570 * Send to the stack if:
1571 * - LRO not enabled, or
1572 * - no LRO resources, or
1573 * - lro enqueue fails
1574 */
1575 if (rxr->lro.lro_cnt != 0)
1576 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1577 return;
1578 }
1579 IXGBE_RX_UNLOCK(rxr);
1580 (*ifp->if_input)(ifp, m);
1581 IXGBE_RX_LOCK(rxr);
1582 } /* ixgbe_rx_input */
1583
1584 /************************************************************************
1585 * ixgbe_rx_discard
1586 ************************************************************************/
1587 static __inline void
ixgbe_rx_discard(struct rx_ring * rxr,int i)1588 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1589 {
1590 struct ixgbe_rx_buf *rbuf;
1591
1592 rbuf = &rxr->rx_buffers[i];
1593
1594 /*
1595 * With advanced descriptors the writeback
1596 * clobbers the buffer addrs, so its easier
1597 * to just free the existing mbufs and take
1598 * the normal refresh path to get new buffers
1599 * and mapping.
1600 */
1601
1602 if (rbuf->fmp != NULL) {/* Partial chain ? */
1603 rbuf->fmp->m_flags |= M_PKTHDR;
1604 m_freem(rbuf->fmp);
1605 rbuf->fmp = NULL;
1606 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1607 } else if (rbuf->buf) {
1608 m_free(rbuf->buf);
1609 rbuf->buf = NULL;
1610 }
1611 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1612
1613 rbuf->flags = 0;
1614
1615 return;
1616 } /* ixgbe_rx_discard */
1617
1618
1619 /************************************************************************
1620 * ixv_rxeof
1621 *
1622 * This routine executes in interrupt context. It replenishes
1623 * the mbufs in the descriptor and sends data which has been
1624 * dma'ed into host memory to upper layer.
1625 *
1626 * Return TRUE for more work, FALSE for all clean.
1627 ************************************************************************/
1628 bool
ixv_rxeof(struct ix_queue * que)1629 ixv_rxeof(struct ix_queue *que)
1630 {
1631 struct adapter *adapter = que->adapter;
1632 struct rx_ring *rxr = que->rxr;
1633 struct ifnet *ifp = adapter->ifp;
1634 struct lro_ctrl *lro = &rxr->lro;
1635 #if __FreeBSD_version < 1100105
1636 struct lro_entry *queued;
1637 #endif
1638 union ixgbe_adv_rx_desc *cur;
1639 struct ixgbe_rx_buf *rbuf, *nbuf;
1640 int i, nextp, processed = 0;
1641 u32 staterr = 0;
1642 u32 count = adapter->rx_process_limit;
1643 u16 pkt_info;
1644
1645 IXGBE_RX_LOCK(rxr);
1646
1647 #ifdef DEV_NETMAP
1648 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1649 /* Same as the txeof routine: wakeup clients on intr. */
1650 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1651 IXGBE_RX_UNLOCK(rxr);
1652 return (FALSE);
1653 }
1654 }
1655 #endif /* DEV_NETMAP */
1656
1657 for (i = rxr->next_to_check; count != 0;) {
1658 struct mbuf *sendmp, *mp;
1659 u32 rsc, ptype;
1660 u16 len;
1661 u16 vtag = 0;
1662 bool eop;
1663
1664 /* Sync the ring. */
1665 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1666 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1667
1668 cur = &rxr->rx_base[i];
1669 staterr = le32toh(cur->wb.upper.status_error);
1670 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1671
1672 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1673 break;
1674 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1675 break;
1676
1677 count--;
1678 sendmp = NULL;
1679 nbuf = NULL;
1680 rsc = 0;
1681 cur->wb.upper.status_error = 0;
1682 rbuf = &rxr->rx_buffers[i];
1683 mp = rbuf->buf;
1684
1685 len = le16toh(cur->wb.upper.length);
1686 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1687 IXGBE_RXDADV_PKTTYPE_MASK;
1688 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1689
1690 /* Make sure bad packets are discarded */
1691 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1692 #if __FreeBSD_version >= 1100036
1693 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1694 #endif
1695 rxr->rx_discarded++;
1696 ixgbe_rx_discard(rxr, i);
1697 goto next_desc;
1698 }
1699
1700 /*
1701 * On 82599 which supports a hardware
1702 * LRO (called HW RSC), packets need
1703 * not be fragmented across sequential
1704 * descriptors, rather the next descriptor
1705 * is indicated in bits of the descriptor.
1706 * This also means that we might proceses
1707 * more than one packet at a time, something
1708 * that has never been true before, it
1709 * required eliminating global chain pointers
1710 * in favor of what we are doing here. -jfv
1711 */
1712 if (!eop) {
1713 /*
1714 * Figure out the next descriptor
1715 * of this frame.
1716 */
1717 if (rxr->hw_rsc == TRUE) {
1718 rsc = ixgbe_rsc_count(cur);
1719 rxr->rsc_num += (rsc - 1);
1720 }
1721 if (rsc) { /* Get hardware index */
1722 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1723 IXGBE_RXDADV_NEXTP_SHIFT);
1724 } else { /* Just sequential */
1725 nextp = i + 1;
1726 if (nextp == adapter->num_rx_desc)
1727 nextp = 0;
1728 }
1729 nbuf = &rxr->rx_buffers[nextp];
1730 prefetch(nbuf);
1731 }
1732 /*
1733 * Rather than using the fmp/lmp global pointers
1734 * we now keep the head of a packet chain in the
1735 * buffer struct and pass this along from one
1736 * descriptor to the next, until we get EOP.
1737 */
1738 mp->m_len = len;
1739 /*
1740 * See if there is a stored head
1741 * that determines what we are
1742 */
1743 sendmp = rbuf->fmp;
1744 if (sendmp != NULL) { /* secondary frag */
1745 rbuf->buf = rbuf->fmp = NULL;
1746 mp->m_flags &= ~M_PKTHDR;
1747 sendmp->m_pkthdr.len += mp->m_len;
1748 } else {
1749 /*
1750 * Optimize. This might be a small packet,
1751 * maybe just a TCP ACK. Do a fast copy that
1752 * is cache aligned into a new mbuf, and
1753 * leave the old mbuf+cluster for re-use.
1754 */
1755 if (eop && len <= IXGBE_RX_COPY_LEN) {
1756 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1757 if (sendmp != NULL) {
1758 sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1759 ixv_bcopy(mp->m_data, sendmp->m_data,
1760 len);
1761 sendmp->m_len = len;
1762 rxr->rx_copies++;
1763 rbuf->flags |= IXGBE_RX_COPY;
1764 }
1765 }
1766 if (sendmp == NULL) {
1767 rbuf->buf = rbuf->fmp = NULL;
1768 sendmp = mp;
1769 }
1770
1771 /* first desc of a non-ps chain */
1772 sendmp->m_flags |= M_PKTHDR;
1773 sendmp->m_pkthdr.len = mp->m_len;
1774 }
1775 ++processed;
1776
1777 /* Pass the head pointer on */
1778 if (eop == 0) {
1779 nbuf->fmp = sendmp;
1780 sendmp = NULL;
1781 mp->m_next = nbuf->buf;
1782 } else { /* Sending this frame */
1783 sendmp->m_pkthdr.rcvif = ifp;
1784 rxr->rx_packets++;
1785 /* capture data for AIM */
1786 rxr->bytes += sendmp->m_pkthdr.len;
1787 rxr->rx_bytes += sendmp->m_pkthdr.len;
1788 /* Process vlan info */
1789 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1790 vtag = le16toh(cur->wb.upper.vlan);
1791 if (vtag) {
1792 sendmp->m_pkthdr.ether_vtag = vtag;
1793 sendmp->m_flags |= M_VLANTAG;
1794 }
1795 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1796 ixgbe_rx_checksum(staterr, sendmp, ptype);
1797
1798 /*
1799 * In case of multiqueue, we have RXCSUM.PCSD bit set
1800 * and never cleared. This means we have RSS hash
1801 * available to be used.
1802 */
1803 if (adapter->num_queues > 1) {
1804 sendmp->m_pkthdr.flowid =
1805 le32toh(cur->wb.lower.hi_dword.rss);
1806 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1807 case IXGBE_RXDADV_RSSTYPE_IPV4:
1808 M_HASHTYPE_SET(sendmp,
1809 M_HASHTYPE_RSS_IPV4);
1810 break;
1811 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1812 M_HASHTYPE_SET(sendmp,
1813 M_HASHTYPE_RSS_TCP_IPV4);
1814 break;
1815 case IXGBE_RXDADV_RSSTYPE_IPV6:
1816 M_HASHTYPE_SET(sendmp,
1817 M_HASHTYPE_RSS_IPV6);
1818 break;
1819 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1820 M_HASHTYPE_SET(sendmp,
1821 M_HASHTYPE_RSS_TCP_IPV6);
1822 break;
1823 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1824 M_HASHTYPE_SET(sendmp,
1825 M_HASHTYPE_RSS_IPV6_EX);
1826 break;
1827 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1828 M_HASHTYPE_SET(sendmp,
1829 M_HASHTYPE_RSS_TCP_IPV6_EX);
1830 break;
1831 #if __FreeBSD_version > 1100000
1832 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1833 M_HASHTYPE_SET(sendmp,
1834 M_HASHTYPE_RSS_UDP_IPV4);
1835 break;
1836 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1837 M_HASHTYPE_SET(sendmp,
1838 M_HASHTYPE_RSS_UDP_IPV6);
1839 break;
1840 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1841 M_HASHTYPE_SET(sendmp,
1842 M_HASHTYPE_RSS_UDP_IPV6_EX);
1843 break;
1844 #endif
1845 default:
1846 #if __FreeBSD_version < 1100116
1847 M_HASHTYPE_SET(sendmp,
1848 M_HASHTYPE_OPAQUE);
1849 #else
1850 M_HASHTYPE_SET(sendmp,
1851 M_HASHTYPE_OPAQUE_HASH);
1852 #endif
1853 }
1854 } else {
1855 sendmp->m_pkthdr.flowid = que->msix;
1856 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1857 }
1858 }
1859 next_desc:
1860 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1861 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1862
1863 /* Advance our pointers to the next descriptor. */
1864 if (++i == rxr->num_desc)
1865 i = 0;
1866
1867 /* Now send to the stack or do LRO */
1868 if (sendmp != NULL) {
1869 rxr->next_to_check = i;
1870 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1871 i = rxr->next_to_check;
1872 }
1873
1874 /* Every 8 descriptors we go to refresh mbufs */
1875 if (processed == 8) {
1876 ixgbe_refresh_mbufs(rxr, i);
1877 processed = 0;
1878 }
1879 }
1880
1881 /* Refresh any remaining buf structs */
1882 if (ixgbe_rx_unrefreshed(rxr))
1883 ixgbe_refresh_mbufs(rxr, i);
1884
1885 rxr->next_to_check = i;
1886
1887 /*
1888 * Flush any outstanding LRO work
1889 */
1890 #if __FreeBSD_version < 1100105
1891 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1892 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1893 tcp_lro_flush(lro, queued);
1894 }
1895 #else
1896 tcp_lro_flush_all(lro);
1897 #endif
1898
1899 IXGBE_RX_UNLOCK(rxr);
1900
1901 /*
1902 * Still have cleaning to do?
1903 */
1904 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1905 return (TRUE);
1906
1907 return (FALSE);
1908 } /* ixv_rxeof */
1909
1910
1911 /************************************************************************
1912 * ixgbe_rx_checksum
1913 *
1914 * Verify that the hardware indicated that the checksum is valid.
1915 * Inform the stack about the status of checksum so that stack
1916 * doesn't spend time verifying the checksum.
1917 ************************************************************************/
1918 static void
ixgbe_rx_checksum(u32 staterr,struct mbuf * mp,u32 ptype)1919 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1920 {
1921 u16 status = (u16)staterr;
1922 u8 errors = (u8)(staterr >> 24);
1923 bool sctp = false;
1924
1925 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1926 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1927 sctp = true;
1928
1929 /* IPv4 checksum */
1930 if (status & IXGBE_RXD_STAT_IPCS) {
1931 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
1932 /* IP Checksum Good */
1933 if (!(errors & IXGBE_RXD_ERR_IPE))
1934 mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
1935 }
1936 /* TCP/UDP/SCTP checksum */
1937 if (status & IXGBE_RXD_STAT_L4CS) {
1938 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
1939 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1940 mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
1941 if (!sctp)
1942 mp->m_pkthdr.csum_data = htons(0xffff);
1943 }
1944 }
1945 } /* ixgbe_rx_checksum */
1946
1947 /************************************************************************
1948 * ixgbe_dmamap_cb - Manage DMA'able memory.
1949 ************************************************************************/
1950 static void
ixgbe_dmamap_cb(void * arg,bus_dma_segment_t * segs,int nseg,int error)1951 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
1952 {
1953 if (error)
1954 return;
1955 *(bus_addr_t *)arg = segs->ds_addr;
1956
1957 return;
1958 } /* ixgbe_dmamap_cb */
1959
1960 /************************************************************************
1961 * ixgbe_dma_malloc
1962 ************************************************************************/
1963 static int
ixgbe_dma_malloc(struct adapter * adapter,bus_size_t size,struct ixgbe_dma_alloc * dma,int mapflags)1964 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
1965 struct ixgbe_dma_alloc *dma, int mapflags)
1966 {
1967 device_t dev = adapter->dev;
1968 int r;
1969
1970 r = bus_dma_tag_create(
1971 /* parent */ bus_get_dma_tag(adapter->dev),
1972 /* alignment */ DBA_ALIGN,
1973 /* bounds */ 0,
1974 /* lowaddr */ BUS_SPACE_MAXADDR,
1975 /* highaddr */ BUS_SPACE_MAXADDR,
1976 /* filter */ NULL,
1977 /* filterarg */ NULL,
1978 /* maxsize */ size,
1979 /* nsegments */ 1,
1980 /* maxsegsize */ size,
1981 /* flags */ BUS_DMA_ALLOCNOW,
1982 /* lockfunc */ NULL,
1983 /* lockfuncarg */ NULL,
1984 &dma->dma_tag);
1985 if (r != 0) {
1986 device_printf(dev,
1987 "ixgbe_dma_malloc: bus_dma_tag_create failed; error %u\n",
1988 r);
1989 goto fail_0;
1990 }
1991 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
1992 BUS_DMA_NOWAIT, &dma->dma_map);
1993 if (r != 0) {
1994 device_printf(dev,
1995 "ixgbe_dma_malloc: bus_dmamem_alloc failed; error %u\n", r);
1996 goto fail_1;
1997 }
1998 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size,
1999 ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2000 if (r != 0) {
2001 device_printf(dev,
2002 "ixgbe_dma_malloc: bus_dmamap_load failed; error %u\n", r);
2003 goto fail_2;
2004 }
2005 dma->dma_size = size;
2006
2007 return (0);
2008 fail_2:
2009 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2010 fail_1:
2011 bus_dma_tag_destroy(dma->dma_tag);
2012 fail_0:
2013 dma->dma_tag = NULL;
2014
2015 return (r);
2016 } /* ixgbe_dma_malloc */
2017
2018 static void
ixgbe_dma_free(struct adapter * adapter,struct ixgbe_dma_alloc * dma)2019 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2020 {
2021 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2022 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2023 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2024 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2025 bus_dma_tag_destroy(dma->dma_tag);
2026 } /* ixgbe_dma_free */
2027
2028
2029 /************************************************************************
2030 * ixv_allocate_queues
2031 *
2032 * Allocate memory for the transmit and receive rings, and then
2033 * the descriptors associated with each, called only once at attach.
2034 ************************************************************************/
2035 int
ixv_allocate_queues(struct adapter * adapter)2036 ixv_allocate_queues(struct adapter *adapter)
2037 {
2038 device_t dev = adapter->dev;
2039 struct ix_queue *que;
2040 struct tx_ring *txr;
2041 struct rx_ring *rxr;
2042 int rsize, tsize, error = IXGBE_SUCCESS;
2043 int txconf = 0, rxconf = 0;
2044
2045 /* First, allocate the top level queue structs */
2046 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2047 adapter->num_queues, M_IXV, M_NOWAIT | M_ZERO);
2048 if (!adapter->queues) {
2049 device_printf(dev, "Unable to allocate queue memory\n");
2050 error = ENOMEM;
2051 goto fail;
2052 }
2053
2054 /* Second, allocate the TX ring struct memory */
2055 adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2056 adapter->num_queues, M_IXV, M_NOWAIT | M_ZERO);
2057 if (!adapter->tx_rings) {
2058 device_printf(dev, "Unable to allocate TX ring memory\n");
2059 error = ENOMEM;
2060 goto tx_fail;
2061 }
2062
2063 /* Third, allocate the RX ring */
2064 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2065 adapter->num_queues, M_IXV, M_NOWAIT | M_ZERO);
2066 if (!adapter->rx_rings) {
2067 device_printf(dev, "Unable to allocate RX ring memory\n");
2068 error = ENOMEM;
2069 goto rx_fail;
2070 }
2071
2072 /* For the ring itself */
2073 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2074 DBA_ALIGN);
2075
2076 /*
2077 * Now set up the TX queues, txconf is needed to handle the
2078 * possibility that things fail midcourse and we need to
2079 * undo memory gracefully
2080 */
2081 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2082 /* Set up some basics */
2083 txr = &adapter->tx_rings[i];
2084 txr->adapter = adapter;
2085 txr->br = NULL;
2086 txr->me = i;
2087 txr->num_desc = adapter->num_tx_desc;
2088
2089 /* Initialize the TX side lock */
2090 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2091 device_get_nameunit(dev), txr->me);
2092 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2093
2094 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2095 BUS_DMA_NOWAIT)) {
2096 device_printf(dev,
2097 "Unable to allocate TX Descriptor memory\n");
2098 error = ENOMEM;
2099 goto err_tx_desc;
2100 }
2101 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2102 bzero((void *)txr->tx_base, tsize);
2103
2104 /* Now allocate transmit buffers for the ring */
2105 if (ixgbe_allocate_transmit_buffers(txr)) {
2106 device_printf(dev,
2107 "Critical Failure setting up transmit buffers\n");
2108 error = ENOMEM;
2109 goto err_tx_desc;
2110 }
2111 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2112 /* Allocate a buf ring */
2113 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_IXV,
2114 M_WAITOK, &txr->tx_mtx);
2115 if (txr->br == NULL) {
2116 device_printf(dev,
2117 "Critical Failure setting up buf ring\n");
2118 error = ENOMEM;
2119 goto err_tx_desc;
2120 }
2121 }
2122 }
2123
2124 /*
2125 * Next the RX queues...
2126 */
2127 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2128 DBA_ALIGN);
2129 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2130 rxr = &adapter->rx_rings[i];
2131 /* Set up some basics */
2132 rxr->adapter = adapter;
2133 rxr->me = i;
2134 rxr->num_desc = adapter->num_rx_desc;
2135
2136 /* Initialize the RX side lock */
2137 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2138 device_get_nameunit(dev), rxr->me);
2139 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2140
2141 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2142 BUS_DMA_NOWAIT)) {
2143 device_printf(dev,
2144 "Unable to allocate RxDescriptor memory\n");
2145 error = ENOMEM;
2146 goto err_rx_desc;
2147 }
2148 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2149 bzero((void *)rxr->rx_base, rsize);
2150
2151 /* Allocate receive buffers for the ring */
2152 if (ixgbe_allocate_receive_buffers(rxr)) {
2153 device_printf(dev,
2154 "Critical Failure setting up receive buffers\n");
2155 error = ENOMEM;
2156 goto err_rx_desc;
2157 }
2158 }
2159
2160 /*
2161 * Finally set up the queue holding structs
2162 */
2163 for (int i = 0; i < adapter->num_queues; i++) {
2164 que = &adapter->queues[i];
2165 que->adapter = adapter;
2166 que->me = i;
2167 que->txr = &adapter->tx_rings[i];
2168 que->rxr = &adapter->rx_rings[i];
2169 }
2170
2171 return (0);
2172
2173 err_rx_desc:
2174 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2175 ixgbe_dma_free(adapter, &rxr->rxdma);
2176 err_tx_desc:
2177 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2178 ixgbe_dma_free(adapter, &txr->txdma);
2179 free(adapter->rx_rings, M_IXV);
2180 rx_fail:
2181 free(adapter->tx_rings, M_IXV);
2182 tx_fail:
2183 free(adapter->queues, M_IXV);
2184 fail:
2185 return (error);
2186 } /* ixv_allocate_queues */
2187