1 /******************************************************************************
2
3 Copyright (c) 2013-2019, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 /*
36 ** IXL driver TX/RX Routines:
37 ** This was seperated to allow usage by
38 ** both the PF and VF drivers.
39 */
40
41 #ifndef IXL_STANDALONE_BUILD
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_rss.h"
45 #endif
46
47 #include "ixl.h"
48
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52
53 /* Local Prototypes */
54 static void ixl_rx_checksum(struct mbuf *, u32, u32, u8);
55 static void ixl_refresh_mbufs(struct ixl_queue *, int);
56 static int ixl_xmit(struct ixl_queue *, struct mbuf **);
57 static int ixl_tx_setup_offload(struct ixl_queue *,
58 struct mbuf *, u32 *, u32 *);
59 static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *);
60 static void ixl_queue_sw_irq(struct ixl_vsi *, int);
61
62 static inline void ixl_rx_discard(struct rx_ring *, int);
63 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
64 struct mbuf *, u8);
65
66 static inline bool ixl_tso_detect_sparse(struct mbuf *mp);
67 static inline u32 ixl_get_tx_head(struct ixl_queue *que);
68
69 #ifdef DEV_NETMAP
70 #include <net/netmap.h>
71 #include <sys/selinfo.h>
72 #include <dev/netmap/netmap_kern.h>
73 #endif /* DEV_NETMAP */
74
75 #ifdef IXL_DEBUG
76 static int ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS);
77 static int ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS);
78 #endif
79
80 /*
81 * @key key is saved into this parameter
82 */
83 void
ixl_get_default_rss_key(u32 * key)84 ixl_get_default_rss_key(u32 *key)
85 {
86 MPASS(key != NULL);
87
88 u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
89 0x183cfd8c, 0xce880440, 0x580cbc3c,
90 0x35897377, 0x328b25e1, 0x4fa98922,
91 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
92 0x0, 0x0, 0x0};
93
94 bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
95 }
96
97 /**
98 * i40e_vc_stat_str - convert virtchnl status err code to a string
99 * @hw: pointer to the HW structure
100 * @stat_err: the status error code to convert
101 **/
102 const char *
i40e_vc_stat_str(struct i40e_hw * hw,enum virtchnl_status_code stat_err)103 i40e_vc_stat_str(struct i40e_hw *hw, enum virtchnl_status_code stat_err)
104 {
105 switch (stat_err) {
106 case VIRTCHNL_STATUS_SUCCESS:
107 return "OK";
108 case VIRTCHNL_ERR_PARAM:
109 return "VIRTCHNL_ERR_PARAM";
110 case VIRTCHNL_STATUS_ERR_NO_MEMORY:
111 return "VIRTCHNL_STATUS_ERR_NO_MEMORY";
112 case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH:
113 return "VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH";
114 case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR:
115 return "VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR";
116 case VIRTCHNL_STATUS_ERR_INVALID_VF_ID:
117 return "VIRTCHNL_STATUS_ERR_INVALID_VF_ID";
118 case VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR:
119 return "VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR";
120 case VIRTCHNL_STATUS_ERR_NOT_SUPPORTED:
121 return "VIRTCHNL_STATUS_ERR_NOT_SUPPORTED";
122 }
123
124 snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
125 return hw->err_str;
126 }
127
128 /*
129 * PCI BUSMASTER needs to be set for proper operation.
130 */
131 void
ixl_set_busmaster(device_t dev)132 ixl_set_busmaster(device_t dev)
133 {
134 u16 pci_cmd_word;
135
136 pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
137 pci_cmd_word |= PCIM_CMD_BUSMASTEREN;
138 pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2);
139 }
140
141 /*
142 * Rewrite the ENABLE bit in the MSIX control register
143 */
144 void
ixl_set_msix_enable(device_t dev)145 ixl_set_msix_enable(device_t dev)
146 {
147 int msix_ctrl, rid;
148
149 pci_find_cap(dev, PCIY_MSIX, &rid);
150 rid += PCIR_MSIX_CTRL;
151 msix_ctrl = pci_read_config(dev, rid, 2);
152 msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
153 pci_write_config(dev, rid, msix_ctrl, 2);
154 }
155
156
157 /*
158 ** Multiqueue Transmit driver
159 */
160 int
ixl_mq_start(struct ifnet * ifp,struct mbuf * m)161 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
162 {
163 struct ixl_vsi *vsi = ifp->if_softc;
164 struct ixl_queue *que;
165 struct tx_ring *txr;
166 int err, i;
167 #ifdef RSS
168 u32 bucket_id;
169 #endif
170
171 /*
172 * Which queue to use:
173 *
174 * When doing RSS, map it to the same outbound
175 * queue as the incoming flow would be mapped to.
176 * If everything is setup correctly, it should be
177 * the same bucket that the current CPU we're on is.
178 */
179 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
180 #ifdef RSS
181 if (rss_hash2bucket(m->m_pkthdr.flowid,
182 M_HASHTYPE_GET(m), &bucket_id) == 0) {
183 i = bucket_id % vsi->num_queues;
184 } else
185 #endif
186 i = m->m_pkthdr.flowid % vsi->num_queues;
187 } else
188 i = curcpu % vsi->num_queues;
189
190 que = &vsi->queues[i];
191 txr = &que->txr;
192
193 err = drbr_enqueue(ifp, txr->br, m);
194 if (err)
195 return (err);
196 if (IXL_TX_TRYLOCK(txr)) {
197 ixl_mq_start_locked(ifp, txr);
198 IXL_TX_UNLOCK(txr);
199 } else
200 taskqueue_enqueue(que->tq, &que->tx_task);
201
202 return (0);
203 }
204
205 int
ixl_mq_start_locked(struct ifnet * ifp,struct tx_ring * txr)206 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
207 {
208 struct ixl_queue *que = txr->que;
209 struct ixl_vsi *vsi = que->vsi;
210 struct mbuf *next;
211 int err = 0;
212
213
214 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
215 vsi->link_active == 0)
216 return (ENETDOWN);
217
218 /* Process the transmit queue */
219 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
220 if ((err = ixl_xmit(que, &next)) != 0) {
221 if (next == NULL)
222 drbr_advance(ifp, txr->br);
223 else
224 drbr_putback(ifp, txr->br, next);
225 break;
226 }
227 drbr_advance(ifp, txr->br);
228 /* Send a copy of the frame to the BPF listener */
229 ETHER_BPF_MTAP(ifp, next);
230 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
231 break;
232 }
233
234 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
235 ixl_txeof(que);
236
237 return (err);
238 }
239
240 /*
241 * Called from a taskqueue to drain queued transmit packets.
242 */
243 void
ixl_deferred_mq_start(void * arg,int pending)244 ixl_deferred_mq_start(void *arg, int pending)
245 {
246 struct ixl_queue *que = arg;
247 struct tx_ring *txr = &que->txr;
248 struct ixl_vsi *vsi = que->vsi;
249 struct ifnet *ifp = vsi->ifp;
250
251 IXL_TX_LOCK(txr);
252 if (!drbr_empty(ifp, txr->br))
253 ixl_mq_start_locked(ifp, txr);
254 IXL_TX_UNLOCK(txr);
255 }
256
257 /*
258 ** Flush all queue ring buffers
259 */
260 void
ixl_qflush(struct ifnet * ifp)261 ixl_qflush(struct ifnet *ifp)
262 {
263 struct ixl_vsi *vsi = ifp->if_softc;
264
265 for (int i = 0; i < vsi->num_queues; i++) {
266 struct ixl_queue *que = &vsi->queues[i];
267 struct tx_ring *txr = &que->txr;
268 struct mbuf *m;
269 IXL_TX_LOCK(txr);
270 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
271 m_freem(m);
272 IXL_TX_UNLOCK(txr);
273 }
274 if_qflush(ifp);
275 }
276
277 static inline bool
ixl_tso_detect_sparse(struct mbuf * mp)278 ixl_tso_detect_sparse(struct mbuf *mp)
279 {
280 struct mbuf *m;
281 int num, mss;
282
283 num = 0;
284 mss = mp->m_pkthdr.tso_segsz;
285
286 /* Exclude first mbuf; assume it contains all headers */
287 for (m = mp->m_next; m != NULL; m = m->m_next) {
288 if (m == NULL)
289 break;
290 num++;
291 mss -= m->m_len % mp->m_pkthdr.tso_segsz;
292
293 if (num > IXL_SPARSE_CHAIN)
294 return (true);
295 if (mss < 1) {
296 num = (mss == 0) ? 0 : 1;
297 mss += mp->m_pkthdr.tso_segsz;
298 }
299 }
300
301 return (false);
302 }
303
304
305 /*********************************************************************
306 *
307 * This routine maps the mbufs to tx descriptors, allowing the
308 * TX engine to transmit the packets.
309 * - return 0 on success, positive on failure
310 *
311 **********************************************************************/
312 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
313
314 static int
ixl_xmit(struct ixl_queue * que,struct mbuf ** m_headp)315 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
316 {
317 struct ixl_vsi *vsi = que->vsi;
318 struct i40e_hw *hw = vsi->hw;
319 struct tx_ring *txr = &que->txr;
320 struct ixl_tx_buf *buf;
321 struct i40e_tx_desc *txd = NULL;
322 struct mbuf *m_head, *m;
323 int i, j, error, nsegs;
324 int first, last = 0;
325 u16 vtag = 0;
326 u32 cmd, off;
327 bus_dmamap_t map;
328 bus_dma_tag_t tag;
329 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS];
330
331 cmd = off = 0;
332 m_head = *m_headp;
333
334 /*
335 * Important to capture the first descriptor
336 * used because it will contain the index of
337 * the one we tell the hardware to report back
338 */
339 first = txr->next_avail;
340 buf = &txr->buffers[first];
341 map = buf->map;
342 tag = txr->tx_tag;
343
344 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
345 /* Use larger mapping for TSO */
346 tag = txr->tso_tag;
347 if (ixl_tso_detect_sparse(m_head)) {
348 m = m_defrag(m_head, M_NOWAIT);
349 if (m == NULL) {
350 m_freem(*m_headp);
351 *m_headp = NULL;
352 return (ENOBUFS);
353 }
354 *m_headp = m;
355 }
356 }
357
358 /*
359 * Map the packet for DMA.
360 */
361 error = bus_dmamap_load_mbuf_sg(tag, map,
362 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
363
364 if (error == EFBIG) {
365 struct mbuf *m;
366
367 m = m_defrag(*m_headp, M_NOWAIT);
368 if (m == NULL) {
369 que->mbuf_defrag_failed++;
370 m_freem(*m_headp);
371 *m_headp = NULL;
372 return (ENOBUFS);
373 }
374 *m_headp = m;
375
376 /* Try it again */
377 error = bus_dmamap_load_mbuf_sg(tag, map,
378 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
379
380 if (error != 0) {
381 que->tx_dmamap_failed++;
382 m_freem(*m_headp);
383 *m_headp = NULL;
384 return (error);
385 }
386 } else if (error != 0) {
387 que->tx_dmamap_failed++;
388 m_freem(*m_headp);
389 *m_headp = NULL;
390 return (error);
391 }
392
393 /* Make certain there are enough descriptors */
394 if (nsegs > txr->avail - 2) {
395 txr->no_desc++;
396 error = ENOBUFS;
397 goto xmit_fail;
398 }
399 m_head = *m_headp;
400
401 /* Set up the TSO/CSUM offload */
402 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
403 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
404 if (error)
405 goto xmit_fail;
406 }
407
408 cmd |= I40E_TX_DESC_CMD_ICRC;
409 /* Grab the VLAN tag */
410 if (m_head->m_flags & M_VLANTAG) {
411 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
412 vtag = htole16(m_head->m_pkthdr.ether_vtag);
413 }
414
415 i = txr->next_avail;
416 for (j = 0; j < nsegs; j++) {
417 bus_size_t seglen;
418
419 buf = &txr->buffers[i];
420 buf->tag = tag; /* Keep track of the type tag */
421 txd = &txr->base[i];
422 seglen = segs[j].ds_len;
423
424 txd->buffer_addr = htole64(segs[j].ds_addr);
425 txd->cmd_type_offset_bsz =
426 htole64(I40E_TX_DESC_DTYPE_DATA
427 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT)
428 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
429 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
430 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT));
431
432 last = i; /* descriptor that will get completion IRQ */
433
434 if (++i == que->num_tx_desc)
435 i = 0;
436
437 buf->m_head = NULL;
438 buf->eop_index = -1;
439 }
440 /* Set the last descriptor for report */
441 txd->cmd_type_offset_bsz |=
442 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
443 txr->avail -= nsegs;
444 txr->next_avail = i;
445
446 buf->m_head = m_head;
447 /* Swap the dma map between the first and last descriptor.
448 * The descriptor that gets checked on completion will now
449 * have the real map from the first descriptor.
450 */
451 txr->buffers[first].map = buf->map;
452 buf->map = map;
453 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
454
455 /* Set the index of the descriptor that will be marked done */
456 buf = &txr->buffers[first];
457 buf->eop_index = last;
458
459 bus_dmamap_sync(txr->dma.tag, txr->dma.map,
460 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
461 /*
462 * Advance the Transmit Descriptor Tail (Tdt), this tells the
463 * hardware that this frame is available to transmit.
464 */
465 ++txr->total_packets;
466 wr32(hw, txr->tail, i);
467
468 /* Mark outstanding work */
469 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
470 return (0);
471
472 xmit_fail:
473 bus_dmamap_unload(tag, buf->map);
474 return (error);
475 }
476
477
478 /*********************************************************************
479 *
480 * Allocate memory for tx_buffer structures. The tx_buffer stores all
481 * the information needed to transmit a packet on the wire. This is
482 * called only once at attach, setup is done every reset.
483 *
484 **********************************************************************/
485 int
ixl_allocate_tx_data(struct ixl_queue * que)486 ixl_allocate_tx_data(struct ixl_queue *que)
487 {
488 struct tx_ring *txr = &que->txr;
489 struct ixl_vsi *vsi = que->vsi;
490 device_t dev = vsi->dev;
491 struct ixl_tx_buf *buf;
492 int i, error = 0;
493
494 /*
495 * Setup DMA descriptor areas.
496 */
497 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
498 1, 0, /* alignment, bounds */
499 BUS_SPACE_MAXADDR, /* lowaddr */
500 BUS_SPACE_MAXADDR, /* highaddr */
501 NULL, NULL, /* filter, filterarg */
502 IXL_TSO_SIZE, /* maxsize */
503 IXL_MAX_TX_SEGS, /* nsegments */
504 IXL_MAX_DMA_SEG_SIZE, /* maxsegsize */
505 0, /* flags */
506 NULL, /* lockfunc */
507 NULL, /* lockfuncarg */
508 &txr->tx_tag))) {
509 device_printf(dev,"Unable to allocate TX DMA tag\n");
510 return (error);
511 }
512
513 /* Make a special tag for TSO */
514 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
515 1, 0, /* alignment, bounds */
516 BUS_SPACE_MAXADDR, /* lowaddr */
517 BUS_SPACE_MAXADDR, /* highaddr */
518 NULL, NULL, /* filter, filterarg */
519 IXL_TSO_SIZE, /* maxsize */
520 IXL_MAX_TSO_SEGS, /* nsegments */
521 IXL_MAX_DMA_SEG_SIZE, /* maxsegsize */
522 0, /* flags */
523 NULL, /* lockfunc */
524 NULL, /* lockfuncarg */
525 &txr->tso_tag))) {
526 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
527 goto free_tx_dma;
528 }
529
530 if (!(txr->buffers =
531 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
532 que->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
533 device_printf(dev, "Unable to allocate tx_buffer memory\n");
534 error = ENOMEM;
535 goto free_tx_tso_dma;
536 }
537
538 /* Create the descriptor buffer default dma maps */
539 buf = txr->buffers;
540 for (i = 0; i < que->num_tx_desc; i++, buf++) {
541 buf->tag = txr->tx_tag;
542 error = bus_dmamap_create(buf->tag, 0, &buf->map);
543 if (error != 0) {
544 device_printf(dev, "Unable to create TX DMA map\n");
545 goto free_buffers;
546 }
547 }
548
549 return 0;
550
551 free_buffers:
552 while (i--) {
553 buf--;
554 bus_dmamap_destroy(buf->tag, buf->map);
555 }
556
557 free(txr->buffers, M_DEVBUF);
558 txr->buffers = NULL;
559 free_tx_tso_dma:
560 bus_dma_tag_destroy(txr->tso_tag);
561 txr->tso_tag = NULL;
562 free_tx_dma:
563 bus_dma_tag_destroy(txr->tx_tag);
564 txr->tx_tag = NULL;
565
566 return (error);
567 }
568
569
570 /*********************************************************************
571 *
572 * (Re)Initialize a queue transmit ring.
573 * - called by init, it clears the descriptor ring,
574 * and frees any stale mbufs
575 *
576 **********************************************************************/
577 void
ixl_init_tx_ring(struct ixl_queue * que)578 ixl_init_tx_ring(struct ixl_queue *que)
579 {
580 #ifdef DEV_NETMAP
581 struct netmap_adapter *na = NA(que->vsi->ifp);
582 struct netmap_slot *slot;
583 #endif /* DEV_NETMAP */
584 struct tx_ring *txr = &que->txr;
585 struct ixl_tx_buf *buf;
586
587 /* Clear the old ring contents */
588 IXL_TX_LOCK(txr);
589
590 #ifdef DEV_NETMAP
591 /*
592 * (under lock): if in netmap mode, do some consistency
593 * checks and set slot to entry 0 of the netmap ring.
594 */
595 slot = netmap_reset(na, NR_TX, que->me, 0);
596 #endif /* DEV_NETMAP */
597
598 bzero((void *)txr->base,
599 (sizeof(struct i40e_tx_desc)) * que->num_tx_desc);
600
601 /* Reset indices */
602 txr->next_avail = 0;
603 txr->next_to_clean = 0;
604
605 /* Reset watchdog status */
606 txr->watchdog_timer = 0;
607
608 /* Free any existing tx mbufs. */
609 buf = txr->buffers;
610 for (int i = 0; i < que->num_tx_desc; i++, buf++) {
611 if (buf->m_head != NULL) {
612 bus_dmamap_sync(buf->tag, buf->map,
613 BUS_DMASYNC_POSTWRITE);
614 bus_dmamap_unload(buf->tag, buf->map);
615 m_freem(buf->m_head);
616 buf->m_head = NULL;
617 }
618 #ifdef DEV_NETMAP
619 /*
620 * In netmap mode, set the map for the packet buffer.
621 * NOTE: Some drivers (not this one) also need to set
622 * the physical buffer address in the NIC ring.
623 * netmap_idx_n2k() maps a nic index, i, into the corresponding
624 * netmap slot index, si
625 */
626 if (slot) {
627 int si = netmap_idx_n2k(na->tx_rings[que->me], i);
628 netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
629 }
630 #endif /* DEV_NETMAP */
631 /* Clear the EOP index */
632 buf->eop_index = -1;
633 }
634
635 /* Set number of descriptors available */
636 txr->avail = que->num_tx_desc;
637
638 bus_dmamap_sync(txr->dma.tag, txr->dma.map,
639 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
640 IXL_TX_UNLOCK(txr);
641 }
642
643
644 /*********************************************************************
645 *
646 * Free transmit ring related data structures.
647 *
648 **********************************************************************/
649 void
ixl_free_que_tx(struct ixl_queue * que)650 ixl_free_que_tx(struct ixl_queue *que)
651 {
652 struct tx_ring *txr = &que->txr;
653 struct ixl_tx_buf *buf;
654
655 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
656
657 for (int i = 0; i < que->num_tx_desc; i++) {
658 buf = &txr->buffers[i];
659 if (buf->m_head != NULL) {
660 bus_dmamap_sync(buf->tag, buf->map,
661 BUS_DMASYNC_POSTWRITE);
662 m_freem(buf->m_head);
663 buf->m_head = NULL;
664 }
665 bus_dmamap_unload(buf->tag, buf->map);
666 bus_dmamap_destroy(buf->tag, buf->map);
667 }
668 if (txr->buffers != NULL) {
669 free(txr->buffers, M_DEVBUF);
670 txr->buffers = NULL;
671 }
672 if (txr->tx_tag != NULL) {
673 bus_dma_tag_destroy(txr->tx_tag);
674 txr->tx_tag = NULL;
675 }
676 if (txr->tso_tag != NULL) {
677 bus_dma_tag_destroy(txr->tso_tag);
678 txr->tso_tag = NULL;
679 }
680
681 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
682 return;
683 }
684
685 /*********************************************************************
686 *
687 * Setup descriptor for hw offloads
688 *
689 **********************************************************************/
690
691 static int
ixl_tx_setup_offload(struct ixl_queue * que,struct mbuf * mp,u32 * cmd,u32 * off)692 ixl_tx_setup_offload(struct ixl_queue *que,
693 struct mbuf *mp, u32 *cmd, u32 *off)
694 {
695 struct ether_vlan_header *eh;
696 #ifdef INET
697 struct ip *ip = NULL;
698 #endif
699 struct tcphdr *th = NULL;
700 #ifdef INET6
701 struct ip6_hdr *ip6;
702 #endif
703 int elen, ip_hlen = 0, tcp_hlen;
704 u16 etype;
705 u8 ipproto = 0;
706 bool tso = FALSE;
707
708 /* Set up the TSO context descriptor if required */
709 if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
710 tso = ixl_tso_setup(que, mp);
711 if (tso)
712 ++que->tso;
713 else
714 return (ENXIO);
715 }
716
717 /*
718 * Determine where frame payload starts.
719 * Jump over vlan headers if already present,
720 * helpful for QinQ too.
721 */
722 eh = mtod(mp, struct ether_vlan_header *);
723 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
724 etype = ntohs(eh->evl_proto);
725 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
726 } else {
727 etype = ntohs(eh->evl_encap_proto);
728 elen = ETHER_HDR_LEN;
729 }
730
731 switch (etype) {
732 #ifdef INET
733 case ETHERTYPE_IP:
734 ip = (struct ip *)(mp->m_data + elen);
735 ip_hlen = ip->ip_hl << 2;
736 ipproto = ip->ip_p;
737 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
738 /* The IP checksum must be recalculated with TSO */
739 if (tso)
740 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
741 else
742 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
743 break;
744 #endif
745 #ifdef INET6
746 case ETHERTYPE_IPV6:
747 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
748 ip_hlen = sizeof(struct ip6_hdr);
749 ipproto = ip6->ip6_nxt;
750 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
751 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
752 break;
753 #endif
754 default:
755 break;
756 }
757
758 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
759 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
760
761 switch (ipproto) {
762 case IPPROTO_TCP:
763 tcp_hlen = th->th_off << 2;
764 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
765 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
766 *off |= (tcp_hlen >> 2) <<
767 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
768 }
769 break;
770 case IPPROTO_UDP:
771 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
772 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
773 *off |= (sizeof(struct udphdr) >> 2) <<
774 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
775 }
776 break;
777 case IPPROTO_SCTP:
778 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
779 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
780 *off |= (sizeof(struct sctphdr) >> 2) <<
781 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
782 }
783 /* Fall Thru */
784 default:
785 break;
786 }
787
788 return (0);
789 }
790
791
792 /**********************************************************************
793 *
794 * Setup context for hardware segmentation offload (TSO)
795 *
796 **********************************************************************/
797 static bool
ixl_tso_setup(struct ixl_queue * que,struct mbuf * mp)798 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
799 {
800 struct tx_ring *txr = &que->txr;
801 struct i40e_tx_context_desc *TXD;
802 struct ixl_tx_buf *buf;
803 u32 cmd, mss, type, tsolen;
804 u16 etype;
805 int idx, elen, ip_hlen, tcp_hlen;
806 struct ether_vlan_header *eh;
807 #ifdef INET
808 struct ip *ip;
809 #endif
810 #ifdef INET6
811 struct ip6_hdr *ip6;
812 #endif
813 #if defined(INET6) || defined(INET)
814 struct tcphdr *th;
815 #endif
816 u64 type_cmd_tso_mss;
817
818 /*
819 * Determine where frame payload starts.
820 * Jump over vlan headers if already present
821 */
822 eh = mtod(mp, struct ether_vlan_header *);
823 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
824 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
825 etype = eh->evl_proto;
826 } else {
827 elen = ETHER_HDR_LEN;
828 etype = eh->evl_encap_proto;
829 }
830
831 switch (ntohs(etype)) {
832 #ifdef INET6
833 case ETHERTYPE_IPV6:
834 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
835 if (ip6->ip6_nxt != IPPROTO_TCP)
836 return (ENXIO);
837 ip_hlen = sizeof(struct ip6_hdr);
838 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
839 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
840 tcp_hlen = th->th_off << 2;
841 /*
842 * The corresponding flag is set by the stack in the IPv4
843 * TSO case, but not in IPv6 (at least in FreeBSD 10.2).
844 * So, set it here because the rest of the flow requires it.
845 */
846 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
847 break;
848 #endif
849 #ifdef INET
850 case ETHERTYPE_IP:
851 ip = (struct ip *)(mp->m_data + elen);
852 if (ip->ip_p != IPPROTO_TCP)
853 return (ENXIO);
854 ip->ip_sum = 0;
855 ip_hlen = ip->ip_hl << 2;
856 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
857 th->th_sum = in_pseudo(ip->ip_src.s_addr,
858 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
859 tcp_hlen = th->th_off << 2;
860 break;
861 #endif
862 default:
863 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
864 __func__, ntohs(etype));
865 return FALSE;
866 }
867
868 /* Ensure we have at least the IP+TCP header in the first mbuf. */
869 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
870 return FALSE;
871
872 idx = txr->next_avail;
873 buf = &txr->buffers[idx];
874 TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
875 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
876
877 type = I40E_TX_DESC_DTYPE_CONTEXT;
878 cmd = I40E_TX_CTX_DESC_TSO;
879 /* TSO MSS must not be less than 64 */
880 if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) {
881 que->mss_too_small++;
882 mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS;
883 }
884 mss = mp->m_pkthdr.tso_segsz;
885
886 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
887 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
888 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
889 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
890 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
891
892 TXD->tunneling_params = htole32(0);
893 buf->m_head = NULL;
894 buf->eop_index = -1;
895
896 if (++idx == que->num_tx_desc)
897 idx = 0;
898
899 txr->avail--;
900 txr->next_avail = idx;
901
902 return TRUE;
903 }
904
905 /*
906 * ixl_get_tx_head - Retrieve the value from the
907 * location the HW records its HEAD index
908 */
909 static inline u32
ixl_get_tx_head(struct ixl_queue * que)910 ixl_get_tx_head(struct ixl_queue *que)
911 {
912 struct tx_ring *txr = &que->txr;
913 void *head = &txr->base[que->num_tx_desc];
914 return LE32_TO_CPU(*(volatile __le32 *)head);
915 }
916
917 /**********************************************************************
918 *
919 * Get index of last used descriptor/buffer from hardware, and clean
920 * the descriptors/buffers up to that index.
921 *
922 **********************************************************************/
923 static bool
ixl_txeof_hwb(struct ixl_queue * que)924 ixl_txeof_hwb(struct ixl_queue *que)
925 {
926 struct tx_ring *txr = &que->txr;
927 u32 first, last, head, done;
928 struct ixl_tx_buf *buf;
929 struct i40e_tx_desc *tx_desc, *eop_desc;
930
931 mtx_assert(&txr->mtx, MA_OWNED);
932
933 #ifdef DEV_NETMAP
934 // XXX todo: implement moderation
935 if (netmap_tx_irq(que->vsi->ifp, que->me))
936 return FALSE;
937 #endif /* DEF_NETMAP */
938
939 /* These are not the descriptors you seek, move along :) */
940 if (txr->avail == que->num_tx_desc) {
941 atomic_store_rel_32(&txr->watchdog_timer, 0);
942 return FALSE;
943 }
944
945 first = txr->next_to_clean;
946 buf = &txr->buffers[first];
947 tx_desc = (struct i40e_tx_desc *)&txr->base[first];
948 last = buf->eop_index;
949 if (last == -1)
950 return FALSE;
951 eop_desc = (struct i40e_tx_desc *)&txr->base[last];
952
953 /* Sync DMA before reading head index from ring */
954 bus_dmamap_sync(txr->dma.tag, txr->dma.map,
955 BUS_DMASYNC_POSTREAD);
956
957 /* Get the Head WB value */
958 head = ixl_get_tx_head(que);
959
960 /*
961 ** Get the index of the first descriptor
962 ** BEYOND the EOP and call that 'done'.
963 ** I do this so the comparison in the
964 ** inner while loop below can be simple
965 */
966 if (++last == que->num_tx_desc) last = 0;
967 done = last;
968
969 /*
970 ** The HEAD index of the ring is written in a
971 ** defined location, this rather than a done bit
972 ** is what is used to keep track of what must be
973 ** 'cleaned'.
974 */
975 while (first != head) {
976 /* We clean the range of the packet */
977 while (first != done) {
978 ++txr->avail;
979
980 if (buf->m_head) {
981 txr->bytes += /* for ITR adjustment */
982 buf->m_head->m_pkthdr.len;
983 txr->tx_bytes += /* for TX stats */
984 buf->m_head->m_pkthdr.len;
985 bus_dmamap_sync(buf->tag,
986 buf->map,
987 BUS_DMASYNC_POSTWRITE);
988 bus_dmamap_unload(buf->tag,
989 buf->map);
990 m_freem(buf->m_head);
991 buf->m_head = NULL;
992 }
993 buf->eop_index = -1;
994
995 if (++first == que->num_tx_desc)
996 first = 0;
997
998 buf = &txr->buffers[first];
999 tx_desc = &txr->base[first];
1000 }
1001 ++txr->packets;
1002 /* If a packet was successfully cleaned, reset the watchdog timer */
1003 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
1004 /* See if there is more work now */
1005 last = buf->eop_index;
1006 if (last != -1) {
1007 eop_desc = &txr->base[last];
1008 /* Get next done point */
1009 if (++last == que->num_tx_desc) last = 0;
1010 done = last;
1011 } else
1012 break;
1013 }
1014 bus_dmamap_sync(txr->dma.tag, txr->dma.map,
1015 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1016
1017 txr->next_to_clean = first;
1018
1019 /*
1020 * If there are no pending descriptors, clear the timeout.
1021 */
1022 if (txr->avail == que->num_tx_desc) {
1023 atomic_store_rel_32(&txr->watchdog_timer, 0);
1024 return FALSE;
1025 }
1026
1027 return TRUE;
1028 }
1029
1030 /**********************************************************************
1031 *
1032 * Use index kept by driver and the flag on each descriptor to find used
1033 * descriptor/buffers and clean them up for re-use.
1034 *
1035 * This method of reclaiming descriptors is current incompatible with
1036 * DEV_NETMAP.
1037 *
1038 * Returns TRUE if there are more descriptors to be cleaned after this
1039 * function exits.
1040 *
1041 **********************************************************************/
1042 static bool
ixl_txeof_dwb(struct ixl_queue * que)1043 ixl_txeof_dwb(struct ixl_queue *que)
1044 {
1045 struct tx_ring *txr = &que->txr;
1046 u32 first, last, done;
1047 u32 limit = 256;
1048 struct ixl_tx_buf *buf;
1049 struct i40e_tx_desc *tx_desc, *eop_desc;
1050
1051 mtx_assert(&txr->mtx, MA_OWNED);
1052
1053 /* There are no descriptors to clean */
1054 if (txr->avail == que->num_tx_desc) {
1055 atomic_store_rel_32(&txr->watchdog_timer, 0);
1056 return FALSE;
1057 }
1058
1059 /* Set starting index/descriptor/buffer */
1060 first = txr->next_to_clean;
1061 buf = &txr->buffers[first];
1062 tx_desc = &txr->base[first];
1063
1064 /*
1065 * This function operates per-packet -- identifies the start of the
1066 * packet and gets the index of the last descriptor of the packet from
1067 * it, from eop_index.
1068 *
1069 * If the last descriptor is marked "done" by the hardware, then all
1070 * of the descriptors for the packet are cleaned.
1071 */
1072 last = buf->eop_index;
1073 if (last == -1)
1074 return FALSE;
1075 eop_desc = &txr->base[last];
1076
1077 /* Sync DMA before reading from ring */
1078 bus_dmamap_sync(txr->dma.tag, txr->dma.map, BUS_DMASYNC_POSTREAD);
1079
1080 /*
1081 * Get the index of the first descriptor beyond the EOP and call that
1082 * 'done'. Simplifies the comparison for the inner loop below.
1083 */
1084 if (++last == que->num_tx_desc)
1085 last = 0;
1086 done = last;
1087
1088 /*
1089 * We find the last completed descriptor by examining each
1090 * descriptor's status bits to see if it's done.
1091 */
1092 do {
1093 /* Break if last descriptor in packet isn't marked done */
1094 if ((eop_desc->cmd_type_offset_bsz & I40E_TXD_QW1_DTYPE_MASK)
1095 != I40E_TX_DESC_DTYPE_DESC_DONE)
1096 break;
1097
1098 /* Clean the descriptors that make up the processed packet */
1099 while (first != done) {
1100 /*
1101 * If there was a buffer attached to this descriptor,
1102 * prevent the adapter from accessing it, and add its
1103 * length to the queue's TX stats.
1104 */
1105 if (buf->m_head) {
1106 txr->bytes += buf->m_head->m_pkthdr.len;
1107 txr->tx_bytes += buf->m_head->m_pkthdr.len;
1108 bus_dmamap_sync(buf->tag, buf->map,
1109 BUS_DMASYNC_POSTWRITE);
1110 bus_dmamap_unload(buf->tag, buf->map);
1111 m_freem(buf->m_head);
1112 buf->m_head = NULL;
1113 }
1114 buf->eop_index = -1;
1115 ++txr->avail;
1116
1117 if (++first == que->num_tx_desc)
1118 first = 0;
1119 buf = &txr->buffers[first];
1120 tx_desc = &txr->base[first];
1121 }
1122 ++txr->packets;
1123 /* If a packet was successfully cleaned, reset the watchdog timer */
1124 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
1125
1126 /*
1127 * Since buf is the first buffer after the one that was just
1128 * cleaned, check if the packet it starts is done, too.
1129 */
1130 last = buf->eop_index;
1131 if (last != -1) {
1132 eop_desc = &txr->base[last];
1133 /* Get next done point */
1134 if (++last == que->num_tx_desc) last = 0;
1135 done = last;
1136 } else
1137 break;
1138 } while (--limit);
1139
1140 bus_dmamap_sync(txr->dma.tag, txr->dma.map,
1141 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1142
1143 txr->next_to_clean = first;
1144
1145 /*
1146 * If there are no pending descriptors, clear the watchdog timer.
1147 */
1148 if (txr->avail == que->num_tx_desc) {
1149 atomic_store_rel_32(&txr->watchdog_timer, 0);
1150 return FALSE;
1151 }
1152
1153 return TRUE;
1154 }
1155
1156 bool
ixl_txeof(struct ixl_queue * que)1157 ixl_txeof(struct ixl_queue *que)
1158 {
1159 struct ixl_vsi *vsi = que->vsi;
1160
1161 return (vsi->enable_head_writeback) ? ixl_txeof_hwb(que)
1162 : ixl_txeof_dwb(que);
1163 }
1164
1165
1166 /*********************************************************************
1167 *
1168 * Refresh mbuf buffers for RX descriptor rings
1169 * - now keeps its own state so discards due to resource
1170 * exhaustion are unnecessary, if an mbuf cannot be obtained
1171 * it just returns, keeping its placeholder, thus it can simply
1172 * be recalled to try again.
1173 *
1174 **********************************************************************/
1175 static void
ixl_refresh_mbufs(struct ixl_queue * que,int limit)1176 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
1177 {
1178 struct ixl_vsi *vsi = que->vsi;
1179 struct rx_ring *rxr = &que->rxr;
1180 bus_dma_segment_t hseg[1];
1181 bus_dma_segment_t pseg[1];
1182 struct ixl_rx_buf *buf;
1183 struct mbuf *mh, *mp;
1184 int i, j, nsegs, error;
1185 bool refreshed = FALSE;
1186
1187 i = j = rxr->next_refresh;
1188 /* Control the loop with one beyond */
1189 if (++j == que->num_rx_desc)
1190 j = 0;
1191
1192 while (j != limit) {
1193 buf = &rxr->buffers[i];
1194 if (rxr->hdr_split == FALSE)
1195 goto no_split;
1196
1197 if (buf->m_head == NULL) {
1198 mh = m_gethdr(M_NOWAIT, MT_DATA);
1199 if (mh == NULL)
1200 goto update;
1201 } else
1202 mh = buf->m_head;
1203
1204 mh->m_pkthdr.len = mh->m_len = MHLEN;
1205 mh->m_len = MHLEN;
1206 mh->m_flags |= M_PKTHDR;
1207 /* Get the memory mapping */
1208 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1209 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
1210 if (error != 0) {
1211 printf("Refresh mbufs: hdr dmamap load"
1212 " failure - %d\n", error);
1213 m_free(mh);
1214 buf->m_head = NULL;
1215 goto update;
1216 }
1217 buf->m_head = mh;
1218 bus_dmamap_sync(rxr->htag, buf->hmap,
1219 BUS_DMASYNC_PREREAD);
1220 rxr->base[i].read.hdr_addr =
1221 htole64(hseg[0].ds_addr);
1222
1223 no_split:
1224 if (buf->m_pack == NULL) {
1225 mp = m_getjcl(M_NOWAIT, MT_DATA,
1226 M_PKTHDR, rxr->mbuf_sz);
1227 if (mp == NULL)
1228 goto update;
1229 } else
1230 mp = buf->m_pack;
1231
1232 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1233 /* Get the memory mapping */
1234 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1235 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1236 if (error != 0) {
1237 printf("Refresh mbufs: payload dmamap load"
1238 " failure - %d\n", error);
1239 m_free(mp);
1240 buf->m_pack = NULL;
1241 goto update;
1242 }
1243 buf->m_pack = mp;
1244 bus_dmamap_sync(rxr->ptag, buf->pmap,
1245 BUS_DMASYNC_PREREAD);
1246 rxr->base[i].read.pkt_addr =
1247 htole64(pseg[0].ds_addr);
1248 /* Used only when doing header split */
1249 rxr->base[i].read.hdr_addr = 0;
1250
1251 refreshed = TRUE;
1252 /* Next is precalculated */
1253 i = j;
1254 rxr->next_refresh = i;
1255 if (++j == que->num_rx_desc)
1256 j = 0;
1257 }
1258 update:
1259 if (refreshed) /* Update hardware tail index */
1260 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1261 return;
1262 }
1263
1264
1265 /*********************************************************************
1266 *
1267 * Allocate memory for rx_buffer structures. Since we use one
1268 * rx_buffer per descriptor, the maximum number of rx_buffer's
1269 * that we'll need is equal to the number of receive descriptors
1270 * that we've defined.
1271 *
1272 **********************************************************************/
1273 int
ixl_allocate_rx_data(struct ixl_queue * que)1274 ixl_allocate_rx_data(struct ixl_queue *que)
1275 {
1276 struct rx_ring *rxr = &que->rxr;
1277 struct ixl_vsi *vsi = que->vsi;
1278 device_t dev = vsi->dev;
1279 struct ixl_rx_buf *buf;
1280 int i, bsize, error;
1281
1282 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1283 1, 0, /* alignment, bounds */
1284 BUS_SPACE_MAXADDR, /* lowaddr */
1285 BUS_SPACE_MAXADDR, /* highaddr */
1286 NULL, NULL, /* filter, filterarg */
1287 MSIZE, /* maxsize */
1288 1, /* nsegments */
1289 MSIZE, /* maxsegsize */
1290 0, /* flags */
1291 NULL, /* lockfunc */
1292 NULL, /* lockfuncarg */
1293 &rxr->htag))) {
1294 device_printf(dev, "Unable to create RX DMA htag\n");
1295 return (error);
1296 }
1297
1298 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1299 1, 0, /* alignment, bounds */
1300 BUS_SPACE_MAXADDR, /* lowaddr */
1301 BUS_SPACE_MAXADDR, /* highaddr */
1302 NULL, NULL, /* filter, filterarg */
1303 MJUM16BYTES, /* maxsize */
1304 1, /* nsegments */
1305 MJUM16BYTES, /* maxsegsize */
1306 0, /* flags */
1307 NULL, /* lockfunc */
1308 NULL, /* lockfuncarg */
1309 &rxr->ptag))) {
1310 device_printf(dev, "Unable to create RX DMA ptag\n");
1311 goto free_rx_htag;
1312 }
1313
1314 bsize = sizeof(struct ixl_rx_buf) * que->num_rx_desc;
1315 if (!(rxr->buffers =
1316 (struct ixl_rx_buf *) malloc(bsize,
1317 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1318 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1319 error = ENOMEM;
1320 goto free_rx_ptag;
1321 }
1322
1323 for (i = 0; i < que->num_rx_desc; i++) {
1324 buf = &rxr->buffers[i];
1325 error = bus_dmamap_create(rxr->htag,
1326 BUS_DMA_NOWAIT, &buf->hmap);
1327 if (error) {
1328 device_printf(dev, "Unable to create RX head map\n");
1329 goto free_buffers;
1330 }
1331 error = bus_dmamap_create(rxr->ptag,
1332 BUS_DMA_NOWAIT, &buf->pmap);
1333 if (error) {
1334 bus_dmamap_destroy(rxr->htag, buf->hmap);
1335 device_printf(dev, "Unable to create RX pkt map\n");
1336 goto free_buffers;
1337 }
1338 }
1339
1340 return 0;
1341 free_buffers:
1342 while (i--) {
1343 buf = &rxr->buffers[i];
1344 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1345 bus_dmamap_destroy(rxr->htag, buf->hmap);
1346 }
1347 free(rxr->buffers, M_DEVBUF);
1348 rxr->buffers = NULL;
1349 free_rx_ptag:
1350 bus_dma_tag_destroy(rxr->ptag);
1351 rxr->ptag = NULL;
1352 free_rx_htag:
1353 bus_dma_tag_destroy(rxr->htag);
1354 rxr->htag = NULL;
1355 return (error);
1356 }
1357
1358
1359 /*********************************************************************
1360 *
1361 * (Re)Initialize the queue receive ring and its buffers.
1362 *
1363 **********************************************************************/
1364 int
ixl_init_rx_ring(struct ixl_queue * que)1365 ixl_init_rx_ring(struct ixl_queue *que)
1366 {
1367 struct rx_ring *rxr = &que->rxr;
1368 struct ixl_vsi *vsi = que->vsi;
1369 #if defined(INET6) || defined(INET)
1370 struct ifnet *ifp = vsi->ifp;
1371 struct lro_ctrl *lro = &rxr->lro;
1372 #endif
1373 struct ixl_rx_buf *buf;
1374 bus_dma_segment_t pseg[1], hseg[1];
1375 int rsize, nsegs, error = 0;
1376 #ifdef DEV_NETMAP
1377 struct netmap_adapter *na = NA(que->vsi->ifp);
1378 struct netmap_slot *slot;
1379 #endif /* DEV_NETMAP */
1380
1381 IXL_RX_LOCK(rxr);
1382 #ifdef DEV_NETMAP
1383 /* same as in ixl_init_tx_ring() */
1384 slot = netmap_reset(na, NR_RX, que->me, 0);
1385 #endif /* DEV_NETMAP */
1386 /* Clear the ring contents */
1387 rsize = roundup2(que->num_rx_desc *
1388 sizeof(union i40e_rx_desc), DBA_ALIGN);
1389 bzero((void *)rxr->base, rsize);
1390 /* Cleanup any existing buffers */
1391 for (int i = 0; i < que->num_rx_desc; i++) {
1392 buf = &rxr->buffers[i];
1393 if (buf->m_head != NULL) {
1394 bus_dmamap_sync(rxr->htag, buf->hmap,
1395 BUS_DMASYNC_POSTREAD);
1396 bus_dmamap_unload(rxr->htag, buf->hmap);
1397 buf->m_head->m_flags |= M_PKTHDR;
1398 m_freem(buf->m_head);
1399 }
1400 if (buf->m_pack != NULL) {
1401 bus_dmamap_sync(rxr->ptag, buf->pmap,
1402 BUS_DMASYNC_POSTREAD);
1403 bus_dmamap_unload(rxr->ptag, buf->pmap);
1404 buf->m_pack->m_flags |= M_PKTHDR;
1405 m_freem(buf->m_pack);
1406 }
1407 buf->m_head = NULL;
1408 buf->m_pack = NULL;
1409 }
1410
1411 /* header split is off */
1412 rxr->hdr_split = FALSE;
1413
1414 /* Now replenish the mbufs */
1415 for (int j = 0; j != que->num_rx_desc; ++j) {
1416 struct mbuf *mh, *mp;
1417
1418 buf = &rxr->buffers[j];
1419 #ifdef DEV_NETMAP
1420 /*
1421 * In netmap mode, fill the map and set the buffer
1422 * address in the NIC ring, considering the offset
1423 * between the netmap and NIC rings (see comment in
1424 * ixgbe_setup_transmit_ring() ). No need to allocate
1425 * an mbuf, so end the block with a continue;
1426 */
1427 if (slot) {
1428 int sj = netmap_idx_n2k(na->rx_rings[que->me], j);
1429 uint64_t paddr;
1430 void *addr;
1431
1432 addr = PNMB(na, slot + sj, &paddr);
1433 netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
1434 /* Update descriptor and the cached value */
1435 rxr->base[j].read.pkt_addr = htole64(paddr);
1436 rxr->base[j].read.hdr_addr = 0;
1437 continue;
1438 }
1439 #endif /* DEV_NETMAP */
1440 /*
1441 ** Don't allocate mbufs if not
1442 ** doing header split, its wasteful
1443 */
1444 if (rxr->hdr_split == FALSE)
1445 goto skip_head;
1446
1447 /* First the header */
1448 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1449 if (buf->m_head == NULL) {
1450 error = ENOBUFS;
1451 goto fail;
1452 }
1453 m_adj(buf->m_head, ETHER_ALIGN);
1454 mh = buf->m_head;
1455 mh->m_len = mh->m_pkthdr.len = MHLEN;
1456 mh->m_flags |= M_PKTHDR;
1457 /* Get the memory mapping */
1458 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1459 buf->hmap, buf->m_head, hseg,
1460 &nsegs, BUS_DMA_NOWAIT);
1461 if (error != 0) /* Nothing elegant to do here */
1462 goto fail;
1463 bus_dmamap_sync(rxr->htag,
1464 buf->hmap, BUS_DMASYNC_PREREAD);
1465 /* Update descriptor */
1466 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1467
1468 skip_head:
1469 /* Now the payload cluster */
1470 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1471 M_PKTHDR, rxr->mbuf_sz);
1472 if (buf->m_pack == NULL) {
1473 error = ENOBUFS;
1474 goto fail;
1475 }
1476 mp = buf->m_pack;
1477 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1478 /* Get the memory mapping */
1479 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1480 buf->pmap, mp, pseg,
1481 &nsegs, BUS_DMA_NOWAIT);
1482 if (error != 0)
1483 goto fail;
1484 bus_dmamap_sync(rxr->ptag,
1485 buf->pmap, BUS_DMASYNC_PREREAD);
1486 /* Update descriptor */
1487 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1488 rxr->base[j].read.hdr_addr = 0;
1489 }
1490
1491
1492 /* Setup our descriptor indices */
1493 rxr->next_check = 0;
1494 rxr->next_refresh = 0;
1495 rxr->lro_enabled = FALSE;
1496 rxr->split = 0;
1497 rxr->bytes = 0;
1498 rxr->discard = FALSE;
1499
1500 wr32(vsi->hw, rxr->tail, que->num_rx_desc - 1);
1501 ixl_flush(vsi->hw);
1502
1503 #if defined(INET6) || defined(INET)
1504 /*
1505 ** Now set up the LRO interface:
1506 */
1507 if (ifp->if_capenable & IFCAP_LRO) {
1508 int err = tcp_lro_init(lro);
1509 if (err) {
1510 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1511 goto fail;
1512 }
1513 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1514 rxr->lro_enabled = TRUE;
1515 lro->ifp = vsi->ifp;
1516 }
1517 #endif
1518
1519 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1520 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1521
1522 fail:
1523 IXL_RX_UNLOCK(rxr);
1524 return (error);
1525 }
1526
1527
1528 /*********************************************************************
1529 *
1530 * Free station receive ring data structures
1531 *
1532 **********************************************************************/
1533 void
ixl_free_que_rx(struct ixl_queue * que)1534 ixl_free_que_rx(struct ixl_queue *que)
1535 {
1536 struct rx_ring *rxr = &que->rxr;
1537 struct ixl_rx_buf *buf;
1538
1539 /* Cleanup any existing buffers */
1540 if (rxr->buffers != NULL) {
1541 for (int i = 0; i < que->num_rx_desc; i++) {
1542 buf = &rxr->buffers[i];
1543
1544 /* Free buffers and unload dma maps */
1545 ixl_rx_discard(rxr, i);
1546
1547 bus_dmamap_destroy(rxr->htag, buf->hmap);
1548 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1549 }
1550 free(rxr->buffers, M_DEVBUF);
1551 rxr->buffers = NULL;
1552 }
1553
1554 if (rxr->htag != NULL) {
1555 bus_dma_tag_destroy(rxr->htag);
1556 rxr->htag = NULL;
1557 }
1558 if (rxr->ptag != NULL) {
1559 bus_dma_tag_destroy(rxr->ptag);
1560 rxr->ptag = NULL;
1561 }
1562 }
1563
1564 static inline void
ixl_rx_input(struct rx_ring * rxr,struct ifnet * ifp,struct mbuf * m,u8 ptype)1565 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1566 {
1567
1568 #if defined(INET6) || defined(INET)
1569 /*
1570 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1571 * should be computed by hardware. Also it should not have VLAN tag in
1572 * ethernet header.
1573 */
1574 if (rxr->lro_enabled &&
1575 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1576 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1577 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1578 /*
1579 * Send to the stack if:
1580 ** - LRO not enabled, or
1581 ** - no LRO resources, or
1582 ** - lro enqueue fails
1583 */
1584 if (rxr->lro.lro_cnt != 0)
1585 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1586 return;
1587 }
1588 #endif
1589 IXL_RX_UNLOCK(rxr);
1590 (*ifp->if_input)(ifp, m);
1591 IXL_RX_LOCK(rxr);
1592 }
1593
1594
1595 static inline void
ixl_rx_discard(struct rx_ring * rxr,int i)1596 ixl_rx_discard(struct rx_ring *rxr, int i)
1597 {
1598 struct ixl_rx_buf *rbuf;
1599
1600 KASSERT(rxr != NULL, ("Receive ring pointer cannot be null"));
1601 KASSERT(i < rxr->que->num_rx_desc, ("Descriptor index must be less than que->num_rx_desc"));
1602
1603 rbuf = &rxr->buffers[i];
1604
1605 /* Free the mbufs in the current chain for the packet */
1606 if (rbuf->fmp != NULL) {
1607 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1608 m_freem(rbuf->fmp);
1609 rbuf->fmp = NULL;
1610 }
1611
1612 /*
1613 * Free the mbufs for the current descriptor; and let ixl_refresh_mbufs()
1614 * assign new mbufs to these.
1615 */
1616 if (rbuf->m_head) {
1617 bus_dmamap_sync(rxr->htag, rbuf->hmap, BUS_DMASYNC_POSTREAD);
1618 bus_dmamap_unload(rxr->htag, rbuf->hmap);
1619 m_free(rbuf->m_head);
1620 rbuf->m_head = NULL;
1621 }
1622
1623 if (rbuf->m_pack) {
1624 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1625 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1626 m_free(rbuf->m_pack);
1627 rbuf->m_pack = NULL;
1628 }
1629 }
1630
1631 #ifdef RSS
1632 /*
1633 ** i40e_ptype_to_hash: parse the packet type
1634 ** to determine the appropriate hash.
1635 */
1636 static inline int
ixl_ptype_to_hash(u8 ptype)1637 ixl_ptype_to_hash(u8 ptype)
1638 {
1639 struct i40e_rx_ptype_decoded decoded;
1640 u8 ex = 0;
1641
1642 decoded = decode_rx_desc_ptype(ptype);
1643 ex = decoded.outer_frag;
1644
1645 if (!decoded.known)
1646 return M_HASHTYPE_OPAQUE_HASH;
1647
1648 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2)
1649 return M_HASHTYPE_OPAQUE_HASH;
1650
1651 /* Note: anything that gets to this point is IP */
1652 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
1653 switch (decoded.inner_prot) {
1654 case I40E_RX_PTYPE_INNER_PROT_TCP:
1655 if (ex)
1656 return M_HASHTYPE_RSS_TCP_IPV6_EX;
1657 else
1658 return M_HASHTYPE_RSS_TCP_IPV6;
1659 case I40E_RX_PTYPE_INNER_PROT_UDP:
1660 if (ex)
1661 return M_HASHTYPE_RSS_UDP_IPV6_EX;
1662 else
1663 return M_HASHTYPE_RSS_UDP_IPV6;
1664 default:
1665 if (ex)
1666 return M_HASHTYPE_RSS_IPV6_EX;
1667 else
1668 return M_HASHTYPE_RSS_IPV6;
1669 }
1670 }
1671 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
1672 switch (decoded.inner_prot) {
1673 case I40E_RX_PTYPE_INNER_PROT_TCP:
1674 return M_HASHTYPE_RSS_TCP_IPV4;
1675 case I40E_RX_PTYPE_INNER_PROT_UDP:
1676 if (ex)
1677 return M_HASHTYPE_RSS_UDP_IPV4_EX;
1678 else
1679 return M_HASHTYPE_RSS_UDP_IPV4;
1680 default:
1681 return M_HASHTYPE_RSS_IPV4;
1682 }
1683 }
1684 /* We should never get here!! */
1685 return M_HASHTYPE_OPAQUE_HASH;
1686 }
1687 #endif /* RSS */
1688
1689 /*********************************************************************
1690 *
1691 * This routine executes in interrupt context. It replenishes
1692 * the mbufs in the descriptor and sends data which has been
1693 * dma'ed into host memory to upper layer.
1694 *
1695 * We loop at most count times if count is > 0, or until done if
1696 * count < 0.
1697 *
1698 * Return TRUE for more work, FALSE for all clean.
1699 *********************************************************************/
1700 bool
ixl_rxeof(struct ixl_queue * que,int count)1701 ixl_rxeof(struct ixl_queue *que, int count)
1702 {
1703 struct ixl_vsi *vsi = que->vsi;
1704 struct rx_ring *rxr = &que->rxr;
1705 struct ifnet *ifp = vsi->ifp;
1706 #if defined(INET6) || defined(INET)
1707 struct lro_ctrl *lro = &rxr->lro;
1708 #endif
1709 int i, nextp, processed = 0;
1710 union i40e_rx_desc *cur;
1711 struct ixl_rx_buf *rbuf, *nbuf;
1712
1713 IXL_RX_LOCK(rxr);
1714
1715 #ifdef DEV_NETMAP
1716 if (netmap_rx_irq(ifp, que->me, &count)) {
1717 IXL_RX_UNLOCK(rxr);
1718 return (FALSE);
1719 }
1720 #endif /* DEV_NETMAP */
1721
1722 for (i = rxr->next_check; count != 0;) {
1723 struct mbuf *sendmp, *mh, *mp;
1724 u32 status, error;
1725 u16 hlen, plen, vtag;
1726 u64 qword;
1727 u8 ptype;
1728 bool eop;
1729
1730 /* Sync the ring. */
1731 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1732 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1733
1734 cur = &rxr->base[i];
1735 qword = le64toh(cur->wb.qword1.status_error_len);
1736 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1737 >> I40E_RXD_QW1_STATUS_SHIFT;
1738 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1739 >> I40E_RXD_QW1_ERROR_SHIFT;
1740 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1741 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1742 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1743 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1744 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1745 >> I40E_RXD_QW1_PTYPE_SHIFT;
1746
1747 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1748 ++rxr->not_done;
1749 break;
1750 }
1751 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1752 break;
1753
1754 count--;
1755 sendmp = NULL;
1756 nbuf = NULL;
1757 cur->wb.qword1.status_error_len = 0;
1758 rbuf = &rxr->buffers[i];
1759 mh = rbuf->m_head;
1760 mp = rbuf->m_pack;
1761 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1762 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1763 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1764 else
1765 vtag = 0;
1766
1767 /* Remove device access to the rx buffers. */
1768 if (rbuf->m_head != NULL) {
1769 bus_dmamap_sync(rxr->htag, rbuf->hmap,
1770 BUS_DMASYNC_POSTREAD);
1771 bus_dmamap_unload(rxr->htag, rbuf->hmap);
1772 }
1773 if (rbuf->m_pack != NULL) {
1774 bus_dmamap_sync(rxr->ptag, rbuf->pmap,
1775 BUS_DMASYNC_POSTREAD);
1776 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1777 }
1778
1779 /*
1780 ** Make sure bad packets are discarded,
1781 ** note that only EOP descriptor has valid
1782 ** error results.
1783 */
1784 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1785 rxr->desc_errs++;
1786 ixl_rx_discard(rxr, i);
1787 goto next_desc;
1788 }
1789
1790 /* Prefetch the next buffer */
1791 if (!eop) {
1792 nextp = i + 1;
1793 if (nextp == que->num_rx_desc)
1794 nextp = 0;
1795 nbuf = &rxr->buffers[nextp];
1796 prefetch(nbuf);
1797 }
1798
1799 /*
1800 ** The header mbuf is ONLY used when header
1801 ** split is enabled, otherwise we get normal
1802 ** behavior, ie, both header and payload
1803 ** are DMA'd into the payload buffer.
1804 **
1805 ** Rather than using the fmp/lmp global pointers
1806 ** we now keep the head of a packet chain in the
1807 ** buffer struct and pass this along from one
1808 ** descriptor to the next, until we get EOP.
1809 */
1810 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1811 if (hlen > IXL_RX_HDR)
1812 hlen = IXL_RX_HDR;
1813 mh->m_len = hlen;
1814 mh->m_flags |= M_PKTHDR;
1815 mh->m_next = NULL;
1816 mh->m_pkthdr.len = mh->m_len;
1817 /* Null buf pointer so it is refreshed */
1818 rbuf->m_head = NULL;
1819 /*
1820 ** Check the payload length, this
1821 ** could be zero if its a small
1822 ** packet.
1823 */
1824 if (plen > 0) {
1825 mp->m_len = plen;
1826 mp->m_next = NULL;
1827 mp->m_flags &= ~M_PKTHDR;
1828 mh->m_next = mp;
1829 mh->m_pkthdr.len += mp->m_len;
1830 /* Null buf pointer so it is refreshed */
1831 rbuf->m_pack = NULL;
1832 rxr->split++;
1833 }
1834 /*
1835 ** Now create the forward
1836 ** chain so when complete
1837 ** we wont have to.
1838 */
1839 if (eop == 0) {
1840 /* stash the chain head */
1841 nbuf->fmp = mh;
1842 /* Make forward chain */
1843 if (plen)
1844 mp->m_next = nbuf->m_pack;
1845 else
1846 mh->m_next = nbuf->m_pack;
1847 } else {
1848 /* Singlet, prepare to send */
1849 sendmp = mh;
1850 if (vtag) {
1851 sendmp->m_pkthdr.ether_vtag = vtag;
1852 sendmp->m_flags |= M_VLANTAG;
1853 }
1854 }
1855 } else {
1856 /*
1857 ** Either no header split, or a
1858 ** secondary piece of a fragmented
1859 ** split packet.
1860 */
1861 mp->m_len = plen;
1862 /*
1863 ** See if there is a stored head
1864 ** that determines what we are
1865 */
1866 sendmp = rbuf->fmp;
1867 rbuf->m_pack = rbuf->fmp = NULL;
1868
1869 if (sendmp != NULL) /* secondary frag */
1870 sendmp->m_pkthdr.len += mp->m_len;
1871 else {
1872 /* first desc of a non-ps chain */
1873 sendmp = mp;
1874 sendmp->m_flags |= M_PKTHDR;
1875 sendmp->m_pkthdr.len = mp->m_len;
1876 }
1877 /* Pass the head pointer on */
1878 if (eop == 0) {
1879 nbuf->fmp = sendmp;
1880 sendmp = NULL;
1881 mp->m_next = nbuf->m_pack;
1882 }
1883 }
1884 ++processed;
1885 /* Sending this frame? */
1886 if (eop) {
1887 sendmp->m_pkthdr.rcvif = ifp;
1888 /* gather stats */
1889 rxr->rx_packets++;
1890 rxr->rx_bytes += sendmp->m_pkthdr.len;
1891 /* capture data for dynamic ITR adjustment */
1892 rxr->packets++;
1893 rxr->bytes += sendmp->m_pkthdr.len;
1894 /* Set VLAN tag (field only valid in eop desc) */
1895 if (vtag) {
1896 sendmp->m_pkthdr.ether_vtag = vtag;
1897 sendmp->m_flags |= M_VLANTAG;
1898 }
1899 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1900 ixl_rx_checksum(sendmp, status, error, ptype);
1901 #ifdef RSS
1902 sendmp->m_pkthdr.flowid =
1903 le32toh(cur->wb.qword0.hi_dword.rss);
1904 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1905 #else
1906 sendmp->m_pkthdr.flowid = que->msix;
1907 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1908 #endif
1909 }
1910 next_desc:
1911 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1912 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1913
1914 /* Advance our pointers to the next descriptor. */
1915 if (++i == que->num_rx_desc)
1916 i = 0;
1917
1918 /* Now send to the stack or do LRO */
1919 if (sendmp != NULL) {
1920 rxr->next_check = i;
1921 ixl_rx_input(rxr, ifp, sendmp, ptype);
1922 /*
1923 * Update index used in loop in case another
1924 * ixl_rxeof() call executes when lock is released
1925 */
1926 i = rxr->next_check;
1927 }
1928
1929 /* Every 8 descriptors we go to refresh mbufs */
1930 if (processed == 8) {
1931 ixl_refresh_mbufs(que, i);
1932 processed = 0;
1933 }
1934 }
1935
1936 /* Refresh any remaining buf structs */
1937 if (ixl_rx_unrefreshed(que))
1938 ixl_refresh_mbufs(que, i);
1939
1940 rxr->next_check = i;
1941
1942 #if defined(INET6) || defined(INET)
1943 /*
1944 * Flush any outstanding LRO work
1945 */
1946 #if __FreeBSD_version >= 1100105
1947 tcp_lro_flush_all(lro);
1948 #else
1949 struct lro_entry *queued;
1950 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1951 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1952 tcp_lro_flush(lro, queued);
1953 }
1954 #endif
1955 #endif /* defined(INET6) || defined(INET) */
1956
1957 IXL_RX_UNLOCK(rxr);
1958 return (FALSE);
1959 }
1960
1961
1962 /*********************************************************************
1963 *
1964 * Verify that the hardware indicated that the checksum is valid.
1965 * Inform the stack about the status of checksum so that stack
1966 * doesn't spend time verifying the checksum.
1967 *
1968 *********************************************************************/
1969 static void
ixl_rx_checksum(struct mbuf * mp,u32 status,u32 error,u8 ptype)1970 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1971 {
1972 struct i40e_rx_ptype_decoded decoded;
1973
1974 decoded = decode_rx_desc_ptype(ptype);
1975
1976 /* Errors? */
1977 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1978 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1979 mp->m_pkthdr.csum_flags = 0;
1980 return;
1981 }
1982
1983 /* IPv6 with extension headers likely have bad csum */
1984 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1985 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1986 if (status &
1987 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1988 mp->m_pkthdr.csum_flags = 0;
1989 return;
1990 }
1991
1992
1993 /* IP Checksum Good */
1994 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1995 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1996
1997 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1998 mp->m_pkthdr.csum_flags |=
1999 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2000 mp->m_pkthdr.csum_data |= htons(0xffff);
2001 }
2002 return;
2003 }
2004
2005 #if __FreeBSD_version >= 1100000
2006 uint64_t
ixl_get_counter(if_t ifp,ift_counter cnt)2007 ixl_get_counter(if_t ifp, ift_counter cnt)
2008 {
2009 struct ixl_vsi *vsi;
2010
2011 vsi = if_getsoftc(ifp);
2012
2013 switch (cnt) {
2014 case IFCOUNTER_IPACKETS:
2015 return (vsi->ipackets);
2016 case IFCOUNTER_IERRORS:
2017 return (vsi->ierrors);
2018 case IFCOUNTER_OPACKETS:
2019 return (vsi->opackets);
2020 case IFCOUNTER_OERRORS:
2021 return (vsi->oerrors);
2022 case IFCOUNTER_COLLISIONS:
2023 /* Collisions are by standard impossible in 40G/10G Ethernet */
2024 return (0);
2025 case IFCOUNTER_IBYTES:
2026 return (vsi->ibytes);
2027 case IFCOUNTER_OBYTES:
2028 return (vsi->obytes);
2029 case IFCOUNTER_IMCASTS:
2030 return (vsi->imcasts);
2031 case IFCOUNTER_OMCASTS:
2032 return (vsi->omcasts);
2033 case IFCOUNTER_IQDROPS:
2034 return (vsi->iqdrops);
2035 case IFCOUNTER_OQDROPS:
2036 return (vsi->oqdrops);
2037 case IFCOUNTER_NOPROTO:
2038 return (vsi->noproto);
2039 default:
2040 return (if_get_counter_default(ifp, cnt));
2041 }
2042 }
2043 #endif
2044
2045 /*
2046 * Set TX and RX ring size adjusting value to supported range
2047 */
2048 void
ixl_vsi_setup_rings_size(struct ixl_vsi * vsi,int tx_ring_size,int rx_ring_size)2049 ixl_vsi_setup_rings_size(struct ixl_vsi * vsi, int tx_ring_size, int rx_ring_size)
2050 {
2051 struct device * dev = vsi->dev;
2052
2053 if (tx_ring_size < IXL_MIN_RING
2054 || tx_ring_size > IXL_MAX_RING
2055 || tx_ring_size % IXL_RING_INCREMENT != 0) {
2056 device_printf(dev, "Invalid tx_ring_size value of %d set!\n",
2057 tx_ring_size);
2058 device_printf(dev, "tx_ring_size must be between %d and %d, "
2059 "inclusive, and must be a multiple of %d\n",
2060 IXL_MIN_RING, IXL_MAX_RING, IXL_RING_INCREMENT);
2061 device_printf(dev, "Using default value of %d instead\n",
2062 IXL_DEFAULT_RING);
2063 vsi->num_tx_desc = IXL_DEFAULT_RING;
2064 } else
2065 vsi->num_tx_desc = tx_ring_size;
2066
2067 if (rx_ring_size < IXL_MIN_RING
2068 || rx_ring_size > IXL_MAX_RING
2069 || rx_ring_size % IXL_RING_INCREMENT != 0) {
2070 device_printf(dev, "Invalid rx_ring_size value of %d set!\n",
2071 rx_ring_size);
2072 device_printf(dev, "rx_ring_size must be between %d and %d, "
2073 "inclusive, and must be a multiple of %d\n",
2074 IXL_MIN_RING, IXL_MAX_RING, IXL_RING_INCREMENT);
2075 device_printf(dev, "Using default value of %d instead\n",
2076 IXL_DEFAULT_RING);
2077 vsi->num_rx_desc = IXL_DEFAULT_RING;
2078 } else
2079 vsi->num_rx_desc = rx_ring_size;
2080
2081 device_printf(dev, "using %d tx descriptors and %d rx descriptors\n",
2082 vsi->num_tx_desc, vsi->num_rx_desc);
2083
2084 }
2085
2086 void
ixl_vsi_add_queues_stats(struct ixl_vsi * vsi)2087 ixl_vsi_add_queues_stats(struct ixl_vsi * vsi)
2088 {
2089 char queue_namebuf[IXL_QUEUE_NAME_LEN];
2090 struct sysctl_oid_list *vsi_list, *queue_list;
2091 struct ixl_queue *queues = vsi->queues;
2092 struct sysctl_oid *queue_node;
2093 struct sysctl_ctx_list *ctx;
2094 struct tx_ring *txr;
2095 struct rx_ring *rxr;
2096
2097 vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
2098 ctx = &vsi->sysctl_ctx;
2099
2100 /* Queue statistics */
2101 for (int q = 0; q < vsi->num_queues; q++) {
2102 snprintf(queue_namebuf, IXL_QUEUE_NAME_LEN, "que%d", q);
2103 queue_node = SYSCTL_ADD_NODE(ctx, vsi_list,
2104 OID_AUTO, queue_namebuf, CTLFLAG_RD, NULL, "Queue #");
2105 queue_list = SYSCTL_CHILDREN(queue_node);
2106
2107 txr = &(queues[q].txr);
2108 rxr = &(queues[q].rxr);
2109
2110 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mbuf_defrag_failed",
2111 CTLFLAG_RD, &(queues[q].mbuf_defrag_failed),
2112 "m_defrag() failed");
2113 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
2114 CTLFLAG_RD, &(queues[q].irqs),
2115 "irqs on this queue");
2116 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso_tx",
2117 CTLFLAG_RD, &(queues[q].tso),
2118 "TSO");
2119 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_dmamap_failed",
2120 CTLFLAG_RD, &(queues[q].tx_dmamap_failed),
2121 "Driver tx dma failure in xmit");
2122 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mss_too_small",
2123 CTLFLAG_RD, &(queues[q].mss_too_small),
2124 "TSO sends with an MSS less than 64");
2125 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
2126 CTLFLAG_RD, &(txr->no_desc),
2127 "Queue No Descriptor Available");
2128 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
2129 CTLFLAG_RD, &(txr->total_packets),
2130 "Queue Packets Transmitted");
2131 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_bytes",
2132 CTLFLAG_RD, &(txr->tx_bytes),
2133 "Queue Bytes Transmitted");
2134 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
2135 CTLFLAG_RD, &(rxr->rx_packets),
2136 "Queue Packets Received");
2137 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
2138 CTLFLAG_RD, &(rxr->rx_bytes),
2139 "Queue Bytes Received");
2140 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_desc_err",
2141 CTLFLAG_RD, &(rxr->desc_errs),
2142 "Queue Rx Descriptor Errors");
2143 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_itr",
2144 CTLFLAG_RD, &(rxr->itr), 0,
2145 "Queue Rx ITR Interval");
2146 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "tx_itr",
2147 CTLFLAG_RD, &(txr->itr), 0,
2148 "Queue Tx ITR Interval");
2149 #ifdef IXL_DEBUG
2150 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "txr_watchdog",
2151 CTLFLAG_RD, &(txr->watchdog_timer), 0,
2152 "Ticks before watchdog timer causes interface reinit");
2153 SYSCTL_ADD_U16(ctx, queue_list, OID_AUTO, "tx_next_avail",
2154 CTLFLAG_RD, &(txr->next_avail), 0,
2155 "Next TX descriptor to be used");
2156 SYSCTL_ADD_U16(ctx, queue_list, OID_AUTO, "tx_next_to_clean",
2157 CTLFLAG_RD, &(txr->next_to_clean), 0,
2158 "Next TX descriptor to be cleaned");
2159 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_not_done",
2160 CTLFLAG_RD, &(rxr->not_done),
2161 "Queue Rx Descriptors not Done");
2162 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_next_refresh",
2163 CTLFLAG_RD, &(rxr->next_refresh), 0,
2164 "Queue Rx Descriptors not Done");
2165 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_next_check",
2166 CTLFLAG_RD, &(rxr->next_check), 0,
2167 "Queue Rx Descriptors not Done");
2168 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qrx_tail",
2169 CTLTYPE_UINT | CTLFLAG_RD, &queues[q],
2170 sizeof(struct ixl_queue),
2171 ixl_sysctl_qrx_tail_handler, "IU",
2172 "Queue Receive Descriptor Tail");
2173 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qtx_tail",
2174 CTLTYPE_UINT | CTLFLAG_RD, &queues[q],
2175 sizeof(struct ixl_queue),
2176 ixl_sysctl_qtx_tail_handler, "IU",
2177 "Queue Transmit Descriptor Tail");
2178 #endif
2179 }
2180
2181 }
2182
2183 void
ixl_add_sysctls_eth_stats(struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child,struct i40e_eth_stats * eth_stats)2184 ixl_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx,
2185 struct sysctl_oid_list *child,
2186 struct i40e_eth_stats *eth_stats)
2187 {
2188 struct ixl_sysctl_info ctls[] =
2189 {
2190 {ð_stats->rx_bytes, "good_octets_rcvd", "Good Octets Received"},
2191 {ð_stats->rx_unicast, "ucast_pkts_rcvd",
2192 "Unicast Packets Received"},
2193 {ð_stats->rx_multicast, "mcast_pkts_rcvd",
2194 "Multicast Packets Received"},
2195 {ð_stats->rx_broadcast, "bcast_pkts_rcvd",
2196 "Broadcast Packets Received"},
2197 {ð_stats->rx_discards, "rx_discards", "Discarded RX packets"},
2198 {ð_stats->rx_unknown_protocol, "rx_unknown_proto",
2199 "RX unknown protocol packets"},
2200 {ð_stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted"},
2201 {ð_stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted"},
2202 {ð_stats->tx_multicast, "mcast_pkts_txd",
2203 "Multicast Packets Transmitted"},
2204 {ð_stats->tx_broadcast, "bcast_pkts_txd",
2205 "Broadcast Packets Transmitted"},
2206 {ð_stats->tx_errors, "tx_errors", "TX packet errors"},
2207 // end
2208 {0,0,0}
2209 };
2210
2211 struct ixl_sysctl_info *entry = ctls;
2212
2213 while (entry->stat != 0)
2214 {
2215 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, entry->name,
2216 CTLFLAG_RD, entry->stat,
2217 entry->description);
2218 entry++;
2219 }
2220 }
2221
2222 #ifdef IXL_DEBUG
2223 /**
2224 * ixl_sysctl_qtx_tail_handler
2225 * Retrieves I40E_QTX_TAIL value from hardware
2226 * for a sysctl.
2227 */
2228 static int
ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS)2229 ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS)
2230 {
2231 struct ixl_queue *que;
2232 int error;
2233 u32 val;
2234
2235 que = ((struct ixl_queue *)oidp->oid_arg1);
2236 if (!que) return 0;
2237
2238 val = rd32(que->vsi->hw, que->txr.tail);
2239 error = sysctl_handle_int(oidp, &val, 0, req);
2240 if (error || !req->newptr)
2241 return error;
2242 return (0);
2243 }
2244
2245 /**
2246 * ixl_sysctl_qrx_tail_handler
2247 * Retrieves I40E_QRX_TAIL value from hardware
2248 * for a sysctl.
2249 */
2250 static int
ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS)2251 ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS)
2252 {
2253 struct ixl_queue *que;
2254 int error;
2255 u32 val;
2256
2257 que = ((struct ixl_queue *)oidp->oid_arg1);
2258 if (!que) return 0;
2259
2260 val = rd32(que->vsi->hw, que->rxr.tail);
2261 error = sysctl_handle_int(oidp, &val, 0, req);
2262 if (error || !req->newptr)
2263 return error;
2264 return (0);
2265 }
2266 #endif
2267
2268 static void
ixl_queue_sw_irq(struct ixl_vsi * vsi,int qidx)2269 ixl_queue_sw_irq(struct ixl_vsi *vsi, int qidx)
2270 {
2271 struct i40e_hw *hw = vsi->hw;
2272 u32 reg, mask;
2273
2274 if (IXL_VSI_IS_PF(vsi)) {
2275 mask = (I40E_PFINT_DYN_CTLN_INTENA_MASK |
2276 I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
2277 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK);
2278
2279 reg = ((vsi->flags & IXL_FLAGS_USES_MSIX) != 0) ?
2280 I40E_PFINT_DYN_CTLN(qidx) : I40E_PFINT_DYN_CTL0;
2281 } else {
2282 mask = (I40E_VFINT_DYN_CTLN1_INTENA_MASK |
2283 I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK |
2284 I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK);
2285
2286 reg = I40E_VFINT_DYN_CTLN1(qidx);
2287 }
2288
2289 wr32(hw, reg, mask);
2290 }
2291
2292 int
ixl_queue_hang_check(struct ixl_vsi * vsi)2293 ixl_queue_hang_check(struct ixl_vsi *vsi)
2294 {
2295 struct ixl_queue *que = vsi->queues;
2296 device_t dev = vsi->dev;
2297 struct tx_ring *txr;
2298 s32 timer, new_timer;
2299 int hung = 0;
2300
2301 for (int i = 0; i < vsi->num_queues; i++, que++) {
2302 txr = &que->txr;
2303 /*
2304 * If watchdog_timer is equal to defualt value set by ixl_txeof
2305 * just substract hz and move on - the queue is most probably
2306 * running. Otherwise check the value.
2307 */
2308 if (atomic_cmpset_rel_32(&txr->watchdog_timer,
2309 IXL_WATCHDOG, (IXL_WATCHDOG) - hz) == 0) {
2310 timer = atomic_load_acq_32(&txr->watchdog_timer);
2311 /*
2312 * Again - if the timer was reset to default value
2313 * then queue is running. Otherwise check if watchdog
2314 * expired and act accrdingly.
2315 */
2316
2317 if (timer > 0 && timer != IXL_WATCHDOG) {
2318 new_timer = timer - hz;
2319 if (new_timer <= 0) {
2320 atomic_store_rel_32(&txr->watchdog_timer, -1);
2321 device_printf(dev, "WARNING: queue %d "
2322 "appears to be hung!\n", que->me);
2323 ++hung;
2324 /* Try to unblock the queue with SW IRQ */
2325 ixl_queue_sw_irq(vsi, i);
2326 } else {
2327 /*
2328 * If this fails, that means something in the TX path
2329 * has updated the watchdog, so it means the TX path
2330 * is still working and the watchdog doesn't need
2331 * to countdown.
2332 */
2333 atomic_cmpset_rel_32(&txr->watchdog_timer,
2334 timer, new_timer);
2335 }
2336 }
2337 }
2338 }
2339
2340 return (hung);
2341 }
2342
2343