1 /* $NetBSD: ix_txrx.c,v 1.117 2024/06/29 12:11:12 riastradh Exp $ */
2 
3 /******************************************************************************
4 
5   Copyright (c) 2001-2017, Intel Corporation
6   All rights reserved.
7 
8   Redistribution and use in source and binary forms, with or without
9   modification, are permitted provided that the following conditions are met:
10 
11    1. Redistributions of source code must retain the above copyright notice,
12       this list of conditions and the following disclaimer.
13 
14    2. Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17 
18    3. Neither the name of the Intel Corporation nor the names of its
19       contributors may be used to endorse or promote products derived from
20       this software without specific prior written permission.
21 
22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32   POSSIBILITY OF SUCH DAMAGE.
33 
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36 
37 /*
38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
39  * All rights reserved.
40  *
41  * This code is derived from software contributed to The NetBSD Foundation
42  * by Coyote Point Systems, Inc.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  * 1. Redistributions of source code must retain the above copyright
48  *    notice, this list of conditions and the following disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63  * POSSIBILITY OF SUCH DAMAGE.
64  */
65 
66 #include <sys/cdefs.h>
67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.117 2024/06/29 12:11:12 riastradh Exp $");
68 
69 #include "opt_inet.h"
70 #include "opt_inet6.h"
71 
72 #include "ixgbe.h"
73 
74 #ifdef RSC
75 /*
76  * HW RSC control:
77  *  this feature only works with
78  *  IPv4, and only on 82599 and later.
79  *  Also this will cause IP forwarding to
80  *  fail and that can't be controlled by
81  *  the stack as LRO can. For all these
82  *  reasons I've deemed it best to leave
83  *  this off and not bother with a tuneable
84  *  interface, this would need to be compiled
85  *  to enable.
86  */
87 static bool ixgbe_rsc_enable = FALSE;
88 #endif
89 
90 #ifdef IXGBE_FDIR
91 /*
92  * For Flow Director: this is the
93  * number of TX packets we sample
94  * for the filter pool, this means
95  * every 20th packet will be probed.
96  *
97  * This feature can be disabled by
98  * setting this to 0.
99  */
100 static int atr_sample_rate = 20;
101 #endif
102 
103 #define IXGBE_M_ADJ(sc, rxr, mp)                                                \
104           if (sc->max_frame_size <= (rxr->mbuf_sz - ETHER_ALIGN))     \
105                     m_adj(mp, ETHER_ALIGN)
106 
107 /************************************************************************
108  *  Local Function prototypes
109  ************************************************************************/
110 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
111 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
112 static int           ixgbe_setup_receive_ring(struct rx_ring *);
113 static void          ixgbe_free_receive_buffers(struct rx_ring *);
114 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
115                                        struct ixgbe_hw_stats *);
116 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
117 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
118 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
119 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
120                                         struct mbuf *, u32 *, u32 *);
121 static int           ixgbe_tso_setup(struct tx_ring *,
122                                      struct mbuf *, u32 *, u32 *);
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125                                     struct mbuf *, u32);
126 static int           ixgbe_dma_malloc(struct ixgbe_softc *, bus_size_t,
127                                       struct ixgbe_dma_alloc *, int);
128 static void          ixgbe_dma_free(struct ixgbe_softc *, struct ixgbe_dma_alloc *);
129 #ifdef RSC
130 static void              ixgbe_setup_hw_rsc(struct rx_ring *);
131 #endif
132 
133 /************************************************************************
134  * ixgbe_legacy_start_locked - Transmit entry point
135  *
136  *   Called by the stack to initiate a transmit.
137  *   The driver will remain in this routine as long as there are
138  *   packets to transmit and transmit resources are available.
139  *   In case resources are not available, the stack is notified
140  *   and the packet is requeued.
141  ************************************************************************/
142 int
ixgbe_legacy_start_locked(struct ifnet * ifp,struct tx_ring * txr)143 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
144 {
145           int rc;
146           struct mbuf    *m_head;
147           struct ixgbe_softc *sc = txr->sc;
148 
149           IXGBE_TX_LOCK_ASSERT(txr);
150 
151           if (sc->link_active != LINK_STATE_UP) {
152                     /*
153                      * discard all packets buffered in IFQ to avoid
154                      * sending old packets at next link up timing.
155                      */
156                     ixgbe_drain(ifp, txr);
157                     return (ENETDOWN);
158           }
159           if ((ifp->if_flags & IFF_RUNNING) == 0)
160                     return (ENETDOWN);
161           if (txr->txr_no_space)
162                     return (ENETDOWN);
163 
164           while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
165                     if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
166                               break;
167 
168                     IFQ_POLL(&ifp->if_snd, m_head);
169                     if (m_head == NULL)
170                               break;
171 
172                     if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
173                               break;
174                     }
175                     IFQ_DEQUEUE(&ifp->if_snd, m_head);
176                     if (rc != 0) {
177                               m_freem(m_head);
178                               continue;
179                     }
180 
181                     /* Send a copy of the frame to the BPF listener */
182                     bpf_mtap(ifp, m_head, BPF_D_OUT);
183           }
184 
185           return IXGBE_SUCCESS;
186 } /* ixgbe_legacy_start_locked */
187 
188 /************************************************************************
189  * ixgbe_legacy_start
190  *
191  *   Called by the stack, this always uses the first tx ring,
192  *   and should not be used with multiqueue tx enabled.
193  ************************************************************************/
194 void
ixgbe_legacy_start(struct ifnet * ifp)195 ixgbe_legacy_start(struct ifnet *ifp)
196 {
197           struct ixgbe_softc *sc = ifp->if_softc;
198           struct tx_ring *txr = sc->tx_rings;
199 
200           if (ifp->if_flags & IFF_RUNNING) {
201                     IXGBE_TX_LOCK(txr);
202                     ixgbe_legacy_start_locked(ifp, txr);
203                     IXGBE_TX_UNLOCK(txr);
204           }
205 } /* ixgbe_legacy_start */
206 
207 /************************************************************************
208  * ixgbe_mq_start - Multiqueue Transmit Entry Point
209  *
210  *   (if_transmit function)
211  ************************************************************************/
212 int
ixgbe_mq_start(struct ifnet * ifp,struct mbuf * m)213 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
214 {
215           struct ixgbe_softc *sc = ifp->if_softc;
216           struct tx_ring      *txr;
217           int                 i;
218 #ifdef RSS
219           uint32_t bucket_id;
220 #endif
221 
222           /*
223            * When doing RSS, map it to the same outbound queue
224            * as the incoming flow would be mapped to.
225            *
226            * If everything is setup correctly, it should be the
227            * same bucket that the current CPU we're on is.
228            */
229 #ifdef RSS
230           if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
231                     if ((sc->feat_en & IXGBE_FEATURE_RSS) &&
232                         (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
233                         &bucket_id) == 0)) {
234                               i = bucket_id % sc->num_queues;
235 #ifdef IXGBE_DEBUG
236                               if (bucket_id > sc->num_queues)
237                                         if_printf(ifp,
238                                             "bucket_id (%d) > num_queues (%d)\n",
239                                             bucket_id, sc->num_queues);
240 #endif
241                     } else
242                               i = m->m_pkthdr.flowid % sc->num_queues;
243           } else
244 #endif /* 0 */
245                     i = (cpu_index(curcpu()) % ncpu) % sc->num_queues;
246 
247           /* Check for a hung queue and pick alternative */
248           if (((1ULL << i) & sc->active_queues) == 0)
249                     i = ffs64(sc->active_queues);
250 
251           txr = &sc->tx_rings[i];
252 
253           if (__predict_false(!pcq_put(txr->txr_interq, m))) {
254                     m_freem(m);
255                     IXGBE_EVC_ADD(&txr->pcq_drops, 1);
256                     return ENOBUFS;
257           }
258 #ifdef IXGBE_ALWAYS_TXDEFER
259           kpreempt_disable();
260           softint_schedule(txr->txr_si);
261           kpreempt_enable();
262 #else
263           if (IXGBE_TX_TRYLOCK(txr)) {
264                     ixgbe_mq_start_locked(ifp, txr);
265                     IXGBE_TX_UNLOCK(txr);
266           } else {
267                     if (sc->txrx_use_workqueue) {
268                               u_int *enqueued;
269 
270                               /*
271                                * This function itself is not called in interrupt
272                                * context, however it can be called in fast softint
273                                * context right after receiving forwarding packets.
274                                * So, it is required to protect workqueue from twice
275                                * enqueuing when the machine uses both spontaneous
276                                * packets and forwarding packets.
277                                */
278                               enqueued = percpu_getref(sc->txr_wq_enqueued);
279                               if (*enqueued == 0) {
280                                         *enqueued = 1;
281                                         percpu_putref(sc->txr_wq_enqueued);
282                                         workqueue_enqueue(sc->txr_wq,
283                                             &txr->wq_cookie, curcpu());
284                               } else
285                                         percpu_putref(sc->txr_wq_enqueued);
286                     } else {
287                               kpreempt_disable();
288                               softint_schedule(txr->txr_si);
289                               kpreempt_enable();
290                     }
291           }
292 #endif
293 
294           return (0);
295 } /* ixgbe_mq_start */
296 
297 /************************************************************************
298  * ixgbe_mq_start_locked
299  ************************************************************************/
300 int
ixgbe_mq_start_locked(struct ifnet * ifp,struct tx_ring * txr)301 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
302 {
303           struct mbuf    *next;
304           int            enqueued = 0, err = 0;
305 
306           if (txr->sc->link_active != LINK_STATE_UP) {
307                     /*
308                      * discard all packets buffered in txr_interq to avoid
309                      * sending old packets at next link up timing.
310                      */
311                     ixgbe_drain(ifp, txr);
312                     return (ENETDOWN);
313           }
314           if ((ifp->if_flags & IFF_RUNNING) == 0)
315                     return (ENETDOWN);
316           if (txr->txr_no_space)
317                     return (ENETDOWN);
318 
319           /* Process the queue */
320           while ((next = pcq_get(txr->txr_interq)) != NULL) {
321                     if ((err = ixgbe_xmit(txr, next)) != 0) {
322                               m_freem(next);
323                               /* All errors are counted in ixgbe_xmit() */
324                               break;
325                     }
326                     enqueued++;
327 #if __FreeBSD_version >= 1100036
328                     /*
329                      * Since we're looking at the tx ring, we can check
330                      * to see if we're a VF by examining our tail register
331                      * address.
332                      */
333                     if ((txr->sc->feat_en & IXGBE_FEATURE_VF) &&
334                         (next->m_flags & M_MCAST))
335                               if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
336 #endif
337                     /* Send a copy of the frame to the BPF listener */
338                     bpf_mtap(ifp, next, BPF_D_OUT);
339                     if ((ifp->if_flags & IFF_RUNNING) == 0)
340                               break;
341           }
342 
343           if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->sc))
344                     ixgbe_txeof(txr);
345 
346           return (err);
347 } /* ixgbe_mq_start_locked */
348 
349 /************************************************************************
350  * ixgbe_deferred_mq_start
351  *
352  *   Called from a softint and workqueue (indirectly) to drain queued
353  *   transmit packets.
354  ************************************************************************/
355 void
ixgbe_deferred_mq_start(void * arg)356 ixgbe_deferred_mq_start(void *arg)
357 {
358           struct tx_ring *txr = arg;
359           struct ixgbe_softc *sc = txr->sc;
360           struct ifnet   *ifp = sc->ifp;
361 
362           IXGBE_TX_LOCK(txr);
363           if (pcq_peek(txr->txr_interq) != NULL)
364                     ixgbe_mq_start_locked(ifp, txr);
365           IXGBE_TX_UNLOCK(txr);
366 } /* ixgbe_deferred_mq_start */
367 
368 /************************************************************************
369  * ixgbe_deferred_mq_start_work
370  *
371  *   Called from a workqueue to drain queued transmit packets.
372  ************************************************************************/
373 void
ixgbe_deferred_mq_start_work(struct work * wk,void * arg)374 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
375 {
376           struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
377           struct ixgbe_softc *sc = txr->sc;
378           u_int *enqueued = percpu_getref(sc->txr_wq_enqueued);
379           *enqueued = 0;
380           percpu_putref(sc->txr_wq_enqueued);
381 
382           ixgbe_deferred_mq_start(txr);
383 } /* ixgbe_deferred_mq_start */
384 
385 /************************************************************************
386  * ixgbe_drain_all
387  ************************************************************************/
388 void
ixgbe_drain_all(struct ixgbe_softc * sc)389 ixgbe_drain_all(struct ixgbe_softc *sc)
390 {
391           struct ifnet *ifp = sc->ifp;
392           struct ix_queue *que = sc->queues;
393 
394           for (int i = 0; i < sc->num_queues; i++, que++) {
395                     struct tx_ring  *txr = que->txr;
396 
397                     IXGBE_TX_LOCK(txr);
398                     ixgbe_drain(ifp, txr);
399                     IXGBE_TX_UNLOCK(txr);
400           }
401 }
402 
403 /************************************************************************
404  * ixgbe_xmit
405  *
406  *   Maps the mbufs to tx descriptors, allowing the
407  *   TX engine to transmit the packets.
408  *
409  *   Return 0 on success, positive on failure
410  ************************************************************************/
411 static int
ixgbe_xmit(struct tx_ring * txr,struct mbuf * m_head)412 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
413 {
414           struct ixgbe_softc      *sc = txr->sc;
415           struct ixgbe_tx_buf     *txbuf;
416           union ixgbe_adv_tx_desc *txd = NULL;
417           struct ifnet                *ifp = sc->ifp;
418           int                     i, j, error;
419           int                     first;
420           u32                     olinfo_status = 0, cmd_type_len;
421           bool                    remap = TRUE;
422           bus_dmamap_t            map;
423 
424           /* Basic descriptor defines */
425           cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
426               IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
427 
428           if (vlan_has_tag(m_head))
429                     cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
430 
431           /*
432            * Important to capture the first descriptor
433            * used because it will contain the index of
434            * the one we tell the hardware to report back
435            */
436           first = txr->next_avail_desc;
437           txbuf = &txr->tx_buffers[first];
438           map = txbuf->map;
439 
440           /*
441            * Map the packet for DMA.
442            */
443 retry:
444           error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
445               BUS_DMA_NOWAIT);
446 
447           if (__predict_false(error)) {
448                     struct mbuf *m;
449 
450                     switch (error) {
451                     case EAGAIN:
452                               txr->q_eagain_tx_dma_setup++;
453                               return EAGAIN;
454                     case ENOMEM:
455                               txr->q_enomem_tx_dma_setup++;
456                               return EAGAIN;
457                     case EFBIG:
458                               /* Try it again? - one try */
459                               if (remap == TRUE) {
460                                         remap = FALSE;
461                                         /*
462                                          * XXX: m_defrag will choke on
463                                          * non-MCLBYTES-sized clusters
464                                          */
465                                         txr->q_efbig_tx_dma_setup++;
466                                         m = m_defrag(m_head, M_NOWAIT);
467                                         if (m == NULL) {
468                                                   txr->q_mbuf_defrag_failed++;
469                                                   return ENOBUFS;
470                                         }
471                                         m_head = m;
472                                         goto retry;
473                               } else {
474                                         txr->q_efbig2_tx_dma_setup++;
475                                         return error;
476                               }
477                     case EINVAL:
478                               txr->q_einval_tx_dma_setup++;
479                               return error;
480                     default:
481                               txr->q_other_tx_dma_setup++;
482                               return error;
483                     }
484           }
485 
486           /* Make certain there are enough descriptors */
487           if (txr->tx_avail < (map->dm_nsegs + 2)) {
488                     txr->txr_no_space = true;
489                     IXGBE_EVC_ADD(&txr->no_desc_avail, 1);
490                     ixgbe_dmamap_unload(txr->txtag, txbuf->map);
491                     return EAGAIN;
492           }
493 
494           /*
495            * Set up the appropriate offload context if requested,
496            * this may consume one TX descriptor.
497            */
498           error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
499           if (__predict_false(error)) {
500                     return (error);
501           }
502 
503 #ifdef IXGBE_FDIR
504           /* Do the flow director magic */
505           if ((sc->feat_en & IXGBE_FEATURE_FDIR) &&
506               (txr->atr_sample) && (!sc->fdir_reinit)) {
507                     ++txr->atr_count;
508                     if (txr->atr_count >= atr_sample_rate) {
509                               ixgbe_atr(txr, m_head);
510                               txr->atr_count = 0;
511                     }
512           }
513 #endif
514 
515           olinfo_status |= IXGBE_ADVTXD_CC;
516           i = txr->next_avail_desc;
517           for (j = 0; j < map->dm_nsegs; j++) {
518                     bus_size_t seglen;
519                     uint64_t segaddr;
520 
521                     txbuf = &txr->tx_buffers[i];
522                     txd = &txr->tx_base[i];
523                     seglen = map->dm_segs[j].ds_len;
524                     segaddr = htole64(map->dm_segs[j].ds_addr);
525 
526                     txd->read.buffer_addr = segaddr;
527                     txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
528                     txd->read.olinfo_status = htole32(olinfo_status);
529 
530                     if (++i == txr->num_desc)
531                               i = 0;
532           }
533 
534           txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
535           txr->tx_avail -= map->dm_nsegs;
536           txr->next_avail_desc = i;
537 
538           txbuf->m_head = m_head;
539           /*
540            * Here we swap the map so the last descriptor,
541            * which gets the completion interrupt has the
542            * real map, and the first descriptor gets the
543            * unused map from this descriptor.
544            */
545           txr->tx_buffers[first].map = txbuf->map;
546           txbuf->map = map;
547           bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
548               BUS_DMASYNC_PREWRITE);
549 
550           /* Set the EOP descriptor that will be marked done */
551           txbuf = &txr->tx_buffers[first];
552           txbuf->eop = txd;
553 
554           ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
555               BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
556           /*
557            * Advance the Transmit Descriptor Tail (Tdt), this tells the
558            * hardware that this frame is available to transmit.
559            */
560           IXGBE_EVC_ADD(&txr->total_packets, 1);
561           IXGBE_WRITE_REG(&sc->hw, txr->tail, i);
562 
563           net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
564           if_statadd_ref(ifp, nsr, if_obytes, m_head->m_pkthdr.len);
565           if (m_head->m_flags & M_MCAST)
566                     if_statinc_ref(ifp, nsr, if_omcasts);
567           IF_STAT_PUTREF(ifp);
568 
569           /* Mark queue as having work */
570           if (txr->busy == 0)
571                     txr->busy = 1;
572 
573           return (0);
574 } /* ixgbe_xmit */
575 
576 /************************************************************************
577  * ixgbe_drain
578  ************************************************************************/
579 static void
ixgbe_drain(struct ifnet * ifp,struct tx_ring * txr)580 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
581 {
582           struct mbuf *m;
583 
584           IXGBE_TX_LOCK_ASSERT(txr);
585 
586           if (txr->me == 0) {
587                     while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
588                               IFQ_DEQUEUE(&ifp->if_snd, m);
589                               m_freem(m);
590                               IF_DROP(&ifp->if_snd);
591                     }
592           }
593 
594           while ((m = pcq_get(txr->txr_interq)) != NULL) {
595                     m_freem(m);
596                     IXGBE_EVC_ADD(&txr->pcq_drops, 1);
597           }
598 }
599 
600 /************************************************************************
601  * ixgbe_allocate_transmit_buffers
602  *
603  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
604  *   the information needed to transmit a packet on the wire. This is
605  *   called only once at attach, setup is done every reset.
606  ************************************************************************/
607 static int
ixgbe_allocate_transmit_buffers(struct tx_ring * txr)608 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
609 {
610           struct ixgbe_softc  *sc = txr->sc;
611           device_t            dev = sc->dev;
612           struct ixgbe_tx_buf *txbuf;
613           int                 error, i;
614 
615           /*
616            * Setup DMA descriptor areas.
617            */
618           error = ixgbe_dma_tag_create(
619                    /*      parent */ sc->osdep.dmat,
620                    /*   alignment */ 1,
621                    /*      bounds */ 0,
622                    /*     maxsize */ IXGBE_TSO_SIZE,
623                    /*   nsegments */ sc->num_segs,
624                    /*  maxsegsize */ PAGE_SIZE,
625                    /*       flags */ 0,
626                                      &txr->txtag);
627           if (error != 0) {
628                     aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
629                     goto fail;
630           }
631 
632           txr->tx_buffers = kmem_zalloc(sizeof(struct ixgbe_tx_buf) *
633               sc->num_tx_desc, KM_SLEEP);
634 
635           /* Create the descriptor buffer dma maps */
636           txbuf = txr->tx_buffers;
637           for (i = 0; i < sc->num_tx_desc; i++, txbuf++) {
638                     error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
639                     if (error != 0) {
640                               aprint_error_dev(dev,
641                                   "Unable to create TX DMA map (%d)\n", error);
642                               goto fail;
643                     }
644           }
645 
646           return 0;
647 fail:
648           /* We free all, it handles case where we are in the middle */
649 #if 0 /* XXX was FreeBSD */
650           ixgbe_free_transmit_structures(sc);
651 #else
652           ixgbe_free_transmit_buffers(txr);
653 #endif
654           return (error);
655 } /* ixgbe_allocate_transmit_buffers */
656 
657 /************************************************************************
658  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
659  ************************************************************************/
660 static void
ixgbe_setup_transmit_ring(struct tx_ring * txr)661 ixgbe_setup_transmit_ring(struct tx_ring *txr)
662 {
663           struct ixgbe_softc    *sc = txr->sc;
664           struct ixgbe_tx_buf   *txbuf;
665 #ifdef DEV_NETMAP
666           struct netmap_sc      *na = NA(sc->ifp);
667           struct netmap_slot    *slot;
668 #endif /* DEV_NETMAP */
669 
670           /* Clear the old ring contents */
671           IXGBE_TX_LOCK(txr);
672 
673 #ifdef DEV_NETMAP
674           if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
675                     /*
676                      * (under lock): if in netmap mode, do some consistency
677                      * checks and set slot to entry 0 of the netmap ring.
678                      */
679                     slot = netmap_reset(na, NR_TX, txr->me, 0);
680           }
681 #endif /* DEV_NETMAP */
682 
683           bzero((void *)txr->tx_base,
684               (sizeof(union ixgbe_adv_tx_desc)) * sc->num_tx_desc);
685           /* Reset indices */
686           txr->next_avail_desc = 0;
687           txr->next_to_clean = 0;
688 
689           /* Free any existing tx buffers. */
690           txbuf = txr->tx_buffers;
691           for (int i = 0; i < txr->num_desc; i++, txbuf++) {
692                     if (txbuf->m_head != NULL) {
693                               bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
694                                   0, txbuf->m_head->m_pkthdr.len,
695                                   BUS_DMASYNC_POSTWRITE);
696                               ixgbe_dmamap_unload(txr->txtag, txbuf->map);
697                               m_freem(txbuf->m_head);
698                               txbuf->m_head = NULL;
699                     }
700 
701 #ifdef DEV_NETMAP
702                     /*
703                      * In netmap mode, set the map for the packet buffer.
704                      * NOTE: Some drivers (not this one) also need to set
705                      * the physical buffer address in the NIC ring.
706                      * Slots in the netmap ring (indexed by "si") are
707                      * kring->nkr_hwofs positions "ahead" wrt the
708                      * corresponding slot in the NIC ring. In some drivers
709                      * (not here) nkr_hwofs can be negative. Function
710                      * netmap_idx_n2k() handles wraparounds properly.
711                      */
712                     if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
713                               int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
714                               netmap_load_map(na, txr->txtag,
715                                   txbuf->map, NMB(na, slot + si));
716                     }
717 #endif /* DEV_NETMAP */
718 
719                     /* Clear the EOP descriptor pointer */
720                     txbuf->eop = NULL;
721           }
722 
723 #ifdef IXGBE_FDIR
724           /* Set the rate at which we sample packets */
725           if (sc->feat_en & IXGBE_FEATURE_FDIR)
726                     txr->atr_sample = atr_sample_rate;
727 #endif
728 
729           /* Set number of descriptors available */
730           txr->tx_avail = sc->num_tx_desc;
731 
732           ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
733               BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
734           IXGBE_TX_UNLOCK(txr);
735 } /* ixgbe_setup_transmit_ring */
736 
737 /************************************************************************
738  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
739  ************************************************************************/
740 int
ixgbe_setup_transmit_structures(struct ixgbe_softc * sc)741 ixgbe_setup_transmit_structures(struct ixgbe_softc *sc)
742 {
743           struct tx_ring *txr = sc->tx_rings;
744 
745           for (int i = 0; i < sc->num_queues; i++, txr++)
746                     ixgbe_setup_transmit_ring(txr);
747 
748           return (0);
749 } /* ixgbe_setup_transmit_structures */
750 
751 /************************************************************************
752  * ixgbe_free_transmit_structures - Free all transmit rings.
753  ************************************************************************/
754 void
ixgbe_free_transmit_structures(struct ixgbe_softc * sc)755 ixgbe_free_transmit_structures(struct ixgbe_softc *sc)
756 {
757           struct tx_ring *txr = sc->tx_rings;
758 
759           for (int i = 0; i < sc->num_queues; i++, txr++) {
760                     ixgbe_free_transmit_buffers(txr);
761                     ixgbe_dma_free(sc, &txr->txdma);
762                     IXGBE_TX_LOCK_DESTROY(txr);
763           }
764           kmem_free(sc->tx_rings, sizeof(struct tx_ring) * sc->num_queues);
765 } /* ixgbe_free_transmit_structures */
766 
767 /************************************************************************
768  * ixgbe_free_transmit_buffers
769  *
770  *   Free transmit ring related data structures.
771  ************************************************************************/
772 static void
ixgbe_free_transmit_buffers(struct tx_ring * txr)773 ixgbe_free_transmit_buffers(struct tx_ring *txr)
774 {
775           struct ixgbe_softc  *sc = txr->sc;
776           struct ixgbe_tx_buf *tx_buffer;
777           int                 i;
778 
779           INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
780 
781           if (txr->tx_buffers == NULL)
782                     return;
783 
784           tx_buffer = txr->tx_buffers;
785           for (i = 0; i < sc->num_tx_desc; i++, tx_buffer++) {
786                     if (tx_buffer->m_head != NULL) {
787                               bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
788                                   0, tx_buffer->m_head->m_pkthdr.len,
789                                   BUS_DMASYNC_POSTWRITE);
790                               ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
791                               m_freem(tx_buffer->m_head);
792                               tx_buffer->m_head = NULL;
793                               if (tx_buffer->map != NULL) {
794                                         ixgbe_dmamap_destroy(txr->txtag,
795                                             tx_buffer->map);
796                                         tx_buffer->map = NULL;
797                               }
798                     } else if (tx_buffer->map != NULL) {
799                               ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
800                               ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
801                               tx_buffer->map = NULL;
802                     }
803           }
804           if (txr->txr_interq != NULL) {
805                     struct mbuf *m;
806 
807                     while ((m = pcq_get(txr->txr_interq)) != NULL)
808                               m_freem(m);
809                     pcq_destroy(txr->txr_interq);
810           }
811           if (txr->tx_buffers != NULL) {
812                     kmem_free(txr->tx_buffers,
813                         sizeof(struct ixgbe_tx_buf) * sc->num_tx_desc);
814                     txr->tx_buffers = NULL;
815           }
816           if (txr->txtag != NULL) {
817                     ixgbe_dma_tag_destroy(txr->txtag);
818                     txr->txtag = NULL;
819           }
820 } /* ixgbe_free_transmit_buffers */
821 
822 /************************************************************************
823  * ixgbe_tx_ctx_setup
824  *
825  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
826  ************************************************************************/
827 static int
ixgbe_tx_ctx_setup(struct tx_ring * txr,struct mbuf * mp,u32 * cmd_type_len,u32 * olinfo_status)828 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
829     u32 *cmd_type_len, u32 *olinfo_status)
830 {
831           struct ixgbe_softc               *sc = txr->sc;
832           struct ixgbe_adv_tx_context_desc *TXD;
833           struct ether_vlan_header         *eh;
834 #ifdef INET
835           struct ip                        *ip;
836 #endif
837 #ifdef INET6
838           struct ip6_hdr                   *ip6;
839 #endif
840           int                              ehdrlen, ip_hlen = 0;
841           int                              offload = TRUE;
842           int                              ctxd = txr->next_avail_desc;
843           u32                              vlan_macip_lens = 0;
844           u32                              type_tucmd_mlhl = 0;
845           u16                              vtag = 0;
846           u16                              etype;
847           u8                               ipproto = 0;
848           char                             *l3d;
849 
850           /* First check if TSO is to be used */
851           if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
852                     int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
853 
854                     if (rv != 0)
855                               IXGBE_EVC_ADD(&sc->tso_err, 1);
856                     return rv;
857           }
858 
859           if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
860                     offload = FALSE;
861 
862           /* Indicate the whole packet as payload when not doing TSO */
863           *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
864 
865           /*
866            * In advanced descriptors the vlan tag must
867            * be placed into the context descriptor. Hence
868            * we need to make one even if not doing offloads.
869            */
870           if (vlan_has_tag(mp)) {
871                     vtag = htole16(vlan_get_tag(mp));
872                     vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
873           } else if (!(txr->sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
874                      (offload == FALSE))
875                     return (0);
876 
877           /*
878            * Determine where frame payload starts.
879            * Jump over vlan headers if already present,
880            * helpful for QinQ too.
881            */
882           KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
883           eh = mtod(mp, struct ether_vlan_header *);
884           if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
885                     KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
886                     etype = ntohs(eh->evl_proto);
887                     ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
888           } else {
889                     etype = ntohs(eh->evl_encap_proto);
890                     ehdrlen = ETHER_HDR_LEN;
891           }
892 
893           /* Set the ether header length */
894           vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
895 
896           if (offload == FALSE)
897                     goto no_offloads;
898 
899           /*
900            * If the first mbuf only includes the ethernet header,
901            * jump to the next one
902            * XXX: This assumes the stack splits mbufs containing headers
903            *      on header boundaries
904            * XXX: And assumes the entire IP header is contained in one mbuf
905            */
906           if (mp->m_len == ehdrlen && mp->m_next)
907                     l3d = mtod(mp->m_next, char *);
908           else
909                     l3d = mtod(mp, char *) + ehdrlen;
910 
911           switch (etype) {
912 #ifdef INET
913           case ETHERTYPE_IP:
914                     ip = (struct ip *)(l3d);
915                     ip_hlen = ip->ip_hl << 2;
916                     ipproto = ip->ip_p;
917                     type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
918                     KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
919                         ip->ip_sum == 0);
920                     break;
921 #endif
922 #ifdef INET6
923           case ETHERTYPE_IPV6:
924                     ip6 = (struct ip6_hdr *)(l3d);
925                     ip_hlen = sizeof(struct ip6_hdr);
926                     ipproto = ip6->ip6_nxt;
927                     type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
928                     break;
929 #endif
930           default:
931                     offload = false;
932                     break;
933           }
934 
935           if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
936                     *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
937 
938           vlan_macip_lens |= ip_hlen;
939 
940           /* No support for offloads for non-L4 next headers */
941           switch (ipproto) {
942           case IPPROTO_TCP:
943                     if (mp->m_pkthdr.csum_flags &
944                         (M_CSUM_TCPv4 | M_CSUM_TCPv6))
945                               type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
946                     else
947                               offload = false;
948                     break;
949           case IPPROTO_UDP:
950                     if (mp->m_pkthdr.csum_flags &
951                         (M_CSUM_UDPv4 | M_CSUM_UDPv6))
952                               type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
953                     else
954                               offload = false;
955                     break;
956           default:
957                     offload = false;
958                     break;
959           }
960 
961           if (offload) /* Insert L4 checksum into data descriptors */
962                     *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
963 
964 no_offloads:
965           type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
966 
967           /* Now ready a context descriptor */
968           TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
969 
970           /* Now copy bits into descriptor */
971           TXD->vlan_macip_lens = htole32(vlan_macip_lens);
972           TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
973           TXD->seqnum_seed = htole32(0);
974           TXD->mss_l4len_idx = htole32(0);
975 
976           /* We've consumed the first desc, adjust counters */
977           if (++ctxd == txr->num_desc)
978                     ctxd = 0;
979           txr->next_avail_desc = ctxd;
980           --txr->tx_avail;
981 
982           return (0);
983 } /* ixgbe_tx_ctx_setup */
984 
985 /************************************************************************
986  * ixgbe_tso_setup
987  *
988  *   Setup work for hardware segmentation offload (TSO) on
989  *   adapters using advanced tx descriptors
990  ************************************************************************/
991 static int
ixgbe_tso_setup(struct tx_ring * txr,struct mbuf * mp,u32 * cmd_type_len,u32 * olinfo_status)992 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
993     u32 *olinfo_status)
994 {
995           struct ixgbe_adv_tx_context_desc *TXD;
996           struct ether_vlan_header         *eh;
997 #ifdef INET6
998           struct ip6_hdr                   *ip6;
999 #endif
1000 #ifdef INET
1001           struct ip                        *ip;
1002 #endif
1003           struct tcphdr                    *th;
1004           int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
1005           u32                              vlan_macip_lens = 0;
1006           u32                              type_tucmd_mlhl = 0;
1007           u32                              mss_l4len_idx = 0, paylen;
1008           u16                              vtag = 0, eh_type;
1009 
1010           /*
1011            * Determine where frame payload starts.
1012            * Jump over vlan headers if already present
1013            */
1014           eh = mtod(mp, struct ether_vlan_header *);
1015           if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1016                     ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1017                     eh_type = eh->evl_proto;
1018           } else {
1019                     ehdrlen = ETHER_HDR_LEN;
1020                     eh_type = eh->evl_encap_proto;
1021           }
1022 
1023           switch (ntohs(eh_type)) {
1024 #ifdef INET
1025           case ETHERTYPE_IP:
1026                     ip = (struct ip *)(mp->m_data + ehdrlen);
1027                     if (ip->ip_p != IPPROTO_TCP)
1028                               return (ENXIO);
1029                     ip->ip_sum = 0;
1030                     ip_hlen = ip->ip_hl << 2;
1031                     th = (struct tcphdr *)((char *)ip + ip_hlen);
1032                     th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1033                         ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1034                     type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1035                     /* Tell transmit desc to also do IPv4 checksum. */
1036                     *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1037                     break;
1038 #endif
1039 #ifdef INET6
1040           case ETHERTYPE_IPV6:
1041                     ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1042                     /* XXX-BZ For now we do not pretend to support ext. hdrs. */
1043                     if (ip6->ip6_nxt != IPPROTO_TCP)
1044                               return (ENXIO);
1045                     ip_hlen = sizeof(struct ip6_hdr);
1046                     ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1047                     th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1048                     th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1049                         &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1050                     type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1051                     break;
1052 #endif
1053           default:
1054                     panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1055                         __func__, ntohs(eh_type));
1056                     break;
1057           }
1058 
1059           ctxd = txr->next_avail_desc;
1060           TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1061 
1062           tcp_hlen = th->th_off << 2;
1063 
1064           /* This is used in the transmit desc in encap */
1065           paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1066 
1067           /* VLAN MACLEN IPLEN */
1068           if (vlan_has_tag(mp)) {
1069                     vtag = htole16(vlan_get_tag(mp));
1070                     vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1071           }
1072 
1073           vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1074           vlan_macip_lens |= ip_hlen;
1075           TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1076 
1077           /* ADV DTYPE TUCMD */
1078           type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1079           type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1080           TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1081 
1082           /* MSS L4LEN IDX */
1083           mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1084           mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1085           TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1086 
1087           TXD->seqnum_seed = htole32(0);
1088 
1089           if (++ctxd == txr->num_desc)
1090                     ctxd = 0;
1091 
1092           txr->tx_avail--;
1093           txr->next_avail_desc = ctxd;
1094           *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1095           *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1096           *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1097           IXGBE_EVC_ADD(&txr->tso_tx, 1);
1098 
1099           return (0);
1100 } /* ixgbe_tso_setup */
1101 
1102 
1103 /************************************************************************
1104  * ixgbe_txeof
1105  *
1106  *   Examine each tx_buffer in the used queue. If the hardware is done
1107  *   processing the packet then free associated resources. The
1108  *   tx_buffer is put back on the free queue.
1109  ************************************************************************/
1110 bool
ixgbe_txeof(struct tx_ring * txr)1111 ixgbe_txeof(struct tx_ring *txr)
1112 {
1113           struct ixgbe_softc  *sc = txr->sc;
1114           struct ifnet                  *ifp = sc->ifp;
1115           struct ixgbe_tx_buf *buf;
1116           union ixgbe_adv_tx_desc *txd;
1117           u32                           work, processed = 0;
1118           u32                           limit = sc->tx_process_limit;
1119           u16                           avail;
1120 
1121           KASSERT(mutex_owned(&txr->tx_mtx));
1122 
1123 #ifdef DEV_NETMAP
1124           if ((sc->feat_en & IXGBE_FEATURE_NETMAP) &&
1125               (sc->ifp->if_capenable & IFCAP_NETMAP)) {
1126                     struct netmap_sc *na = NA(sc->ifp);
1127                     struct netmap_kring *kring = na->tx_rings[txr->me];
1128                     txd = txr->tx_base;
1129                     bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1130                         BUS_DMASYNC_POSTREAD);
1131                     /*
1132                      * In netmap mode, all the work is done in the context
1133                      * of the client thread. Interrupt handlers only wake up
1134                      * clients, which may be sleeping on individual rings
1135                      * or on a global resource for all rings.
1136                      * To implement tx interrupt mitigation, we wake up the client
1137                      * thread roughly every half ring, even if the NIC interrupts
1138                      * more frequently. This is implemented as follows:
1139                      * - ixgbe_txsync() sets kring->nr_kflags with the index of
1140                      *   the slot that should wake up the thread (nkr_num_slots
1141                      *   means the user thread should not be woken up);
1142                      * - the driver ignores tx interrupts unless netmap_mitigate=0
1143                      *   or the slot has the DD bit set.
1144                      */
1145                     if (kring->nr_kflags < kring->nkr_num_slots &&
1146                         le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
1147                               netmap_tx_irq(ifp, txr->me);
1148                     }
1149                     return false;
1150           }
1151 #endif /* DEV_NETMAP */
1152 
1153           if (txr->tx_avail == txr->num_desc) {
1154                     txr->busy = 0;
1155                     return false;
1156           }
1157 
1158           /* Get work starting point */
1159           work = txr->next_to_clean;
1160           buf = &txr->tx_buffers[work];
1161           txd = &txr->tx_base[work];
1162           work -= txr->num_desc; /* The distance to ring end */
1163           avail = txr->tx_avail;
1164           ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1165               BUS_DMASYNC_POSTREAD);
1166 
1167           do {
1168                     union ixgbe_adv_tx_desc *eop = buf->eop;
1169                     if (eop == NULL) /* No work */
1170                               break;
1171 
1172                     if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
1173                               break;    /* I/O not complete */
1174 
1175                     if (buf->m_head) {
1176                               txr->bytes += buf->m_head->m_pkthdr.len;
1177                               bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1178                                   0, buf->m_head->m_pkthdr.len,
1179                                   BUS_DMASYNC_POSTWRITE);
1180                               ixgbe_dmamap_unload(txr->txtag, buf->map);
1181                               m_freem(buf->m_head);
1182                               buf->m_head = NULL;
1183                     }
1184                     buf->eop = NULL;
1185                     ++avail;
1186 
1187                     /* We clean the range if multi segment */
1188                     while (txd != eop) {
1189                               ++txd;
1190                               ++buf;
1191                               ++work;
1192                               /* wrap the ring? */
1193                               if (__predict_false(!work)) {
1194                                         work -= txr->num_desc;
1195                                         buf = txr->tx_buffers;
1196                                         txd = txr->tx_base;
1197                               }
1198                               if (buf->m_head) {
1199                                         txr->bytes +=
1200                                             buf->m_head->m_pkthdr.len;
1201                                         bus_dmamap_sync(txr->txtag->dt_dmat,
1202                                             buf->map,
1203                                             0, buf->m_head->m_pkthdr.len,
1204                                             BUS_DMASYNC_POSTWRITE);
1205                                         ixgbe_dmamap_unload(txr->txtag,
1206                                             buf->map);
1207                                         m_freem(buf->m_head);
1208                                         buf->m_head = NULL;
1209                               }
1210                               ++avail;
1211                               buf->eop = NULL;
1212 
1213                     }
1214                     ++processed;
1215 
1216                     /* Try the next packet */
1217                     ++txd;
1218                     ++buf;
1219                     ++work;
1220                     /* reset with a wrap */
1221                     if (__predict_false(!work)) {
1222                               work -= txr->num_desc;
1223                               buf = txr->tx_buffers;
1224                               txd = txr->tx_base;
1225                     }
1226                     prefetch(txd);
1227           } while (__predict_true(--limit));
1228 
1229           ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1230               BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1231 
1232           work += txr->num_desc;
1233           txr->next_to_clean = work;
1234           if (processed) {
1235                     txr->tx_avail = avail;
1236                     txr->txr_no_space = false;
1237                     txr->packets += processed;
1238                     if_statadd(ifp, if_opackets, processed);
1239           }
1240 
1241           /*
1242            * Queue Hang detection, we know there's
1243            * work outstanding or the first return
1244            * would have been taken, so increment busy
1245            * if nothing managed to get cleaned, then
1246            * in local_timer it will be checked and
1247            * marked as HUNG if it exceeds a MAX attempt.
1248            */
1249           if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1250                     ++txr->busy;
1251           /*
1252            * If anything gets cleaned we reset state to 1,
1253            * note this will turn off HUNG if its set.
1254            */
1255           if (processed)
1256                     txr->busy = 1;
1257 
1258           if (txr->tx_avail == txr->num_desc)
1259                     txr->busy = 0;
1260 
1261           return ((limit > 0) ? false : true);
1262 } /* ixgbe_txeof */
1263 
1264 #ifdef RSC
1265 /************************************************************************
1266  * ixgbe_rsc_count
1267  *
1268  *   Used to detect a descriptor that has been merged by Hardware RSC.
1269  ************************************************************************/
1270 static inline u32
ixgbe_rsc_count(union ixgbe_adv_rx_desc * rx)1271 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1272 {
1273           return (le32toh(rx->wb.lower.lo_dword.data) &
1274               IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1275 } /* ixgbe_rsc_count */
1276 
1277 /************************************************************************
1278  * ixgbe_setup_hw_rsc
1279  *
1280  *   Initialize Hardware RSC (LRO) feature on 82599
1281  *   for an RX ring, this is toggled by the LRO capability
1282  *   even though it is transparent to the stack.
1283  *
1284  *   NOTE: Since this HW feature only works with IPv4 and
1285  *         testing has shown soft LRO to be as effective,
1286  *         this feature will be disabled by default.
1287  ************************************************************************/
1288 static void
ixgbe_setup_hw_rsc(struct rx_ring * rxr)1289 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1290 {
1291           struct ixgbe_softc *sc = rxr->sc;
1292           struct ixgbe_hw     *hw = &sc->hw;
1293           u32                 rscctrl, rdrxctl;
1294 
1295           /* If turning LRO/RSC off we need to disable it */
1296           if ((sc->ifp->if_capenable & IFCAP_LRO) == 0) {
1297                     rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1298                     rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1299                     return;
1300           }
1301 
1302           rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1303           rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1304 #ifdef DEV_NETMAP
1305           /* Always strip CRC unless Netmap disabled it */
1306           if (!(sc->feat_en & IXGBE_FEATURE_NETMAP) ||
1307               !(sc->ifp->if_capenable & IFCAP_NETMAP) ||
1308               ix_crcstrip)
1309 #endif /* DEV_NETMAP */
1310                     rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1311           rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1312           IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1313 
1314           rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1315           rscctrl |= IXGBE_RSCCTL_RSCEN;
1316           /*
1317            * Limit the total number of descriptors that
1318            * can be combined, so it does not exceed 64K
1319            */
1320           if (rxr->mbuf_sz == MCLBYTES)
1321                     rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1322           else if (rxr->mbuf_sz == MJUMPAGESIZE)
1323                     rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1324           else if (rxr->mbuf_sz == MJUM9BYTES)
1325                     rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1326           else  /* Using 16K cluster */
1327                     rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1328 
1329           IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1330 
1331           /* Enable TCP header recognition */
1332           IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1333               (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1334 
1335           /* Disable RSC for ACK packets */
1336           IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1337               (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1338 
1339           rxr->hw_rsc = TRUE;
1340 } /* ixgbe_setup_hw_rsc */
1341 #endif
1342 
1343 /************************************************************************
1344  * ixgbe_refresh_mbufs
1345  *
1346  *   Refresh mbuf buffers for RX descriptor rings
1347  *    - now keeps its own state so discards due to resource
1348  *      exhaustion are unnecessary, if an mbuf cannot be obtained
1349  *      it just returns, keeping its placeholder, thus it can simply
1350  *      be recalled to try again.
1351  ************************************************************************/
1352 static void
ixgbe_refresh_mbufs(struct rx_ring * rxr,int limit)1353 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1354 {
1355           struct ixgbe_softc  *sc = rxr->sc;
1356           struct ixgbe_rx_buf *rxbuf;
1357           struct mbuf         *mp;
1358           int                 i, error;
1359           bool                refreshed = false;
1360 
1361           i = rxr->next_to_refresh;
1362           /* next_to_refresh points to the previous one */
1363           if (++i == rxr->num_desc)
1364                     i = 0;
1365 
1366           while (i != limit) {
1367                     rxbuf = &rxr->rx_buffers[i];
1368                     if (__predict_false(rxbuf->buf == NULL)) {
1369                               mp = ixgbe_getcl();
1370                               if (mp == NULL) {
1371                                         IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1372                                         goto update;
1373                               }
1374                               mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1375                               IXGBE_M_ADJ(sc, rxr, mp);
1376                     } else
1377                               mp = rxbuf->buf;
1378 
1379                     /* If we're dealing with an mbuf that was copied rather
1380                      * than replaced, there's no need to go through busdma.
1381                      */
1382                     if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1383                               /* Get the memory mapping */
1384                               ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1385                               error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1386                                   rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1387                               if (__predict_false(error != 0)) {
1388                                         device_printf(sc->dev, "Refresh mbufs: "
1389                                             "payload dmamap load failure - %d\n",
1390                                             error);
1391                                         m_free(mp);
1392                                         rxbuf->buf = NULL;
1393                                         goto update;
1394                               }
1395                               rxbuf->buf = mp;
1396                               bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1397                                   0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1398                               rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1399                                   htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1400                     } else {
1401                               rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1402                               rxbuf->flags &= ~IXGBE_RX_COPY;
1403                     }
1404 
1405                     refreshed = true;
1406                     /* next_to_refresh points to the previous one */
1407                     rxr->next_to_refresh = i;
1408                     if (++i == rxr->num_desc)
1409                               i = 0;
1410           }
1411 
1412 update:
1413           if (refreshed) /* Update hardware tail index */
1414                     IXGBE_WRITE_REG(&sc->hw, rxr->tail, rxr->next_to_refresh);
1415 
1416           return;
1417 } /* ixgbe_refresh_mbufs */
1418 
1419 /************************************************************************
1420  * ixgbe_allocate_receive_buffers
1421  *
1422  *   Allocate memory for rx_buffer structures. Since we use one
1423  *   rx_buffer per received packet, the maximum number of rx_buffer's
1424  *   that we'll need is equal to the number of receive descriptors
1425  *   that we've allocated.
1426  ************************************************************************/
1427 static int
ixgbe_allocate_receive_buffers(struct rx_ring * rxr)1428 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1429 {
1430           struct ixgbe_softc  *sc = rxr->sc;
1431           device_t            dev = sc->dev;
1432           struct ixgbe_rx_buf *rxbuf;
1433           int                 bsize, error;
1434 
1435           bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1436           rxr->rx_buffers = kmem_zalloc(bsize, KM_SLEEP);
1437 
1438           error = ixgbe_dma_tag_create(
1439                    /*      parent */ sc->osdep.dmat,
1440                    /*   alignment */ 1,
1441                    /*      bounds */ 0,
1442                    /*     maxsize */ MJUM16BYTES,
1443                    /*   nsegments */ 1,
1444                    /*  maxsegsize */ MJUM16BYTES,
1445                    /*       flags */ 0,
1446                                      &rxr->ptag);
1447           if (error != 0) {
1448                     aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1449                     goto fail;
1450           }
1451 
1452           for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1453                     rxbuf = &rxr->rx_buffers[i];
1454                     error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1455                     if (error) {
1456                               aprint_error_dev(dev, "Unable to create RX dma map\n");
1457                               goto fail;
1458                     }
1459           }
1460 
1461           return (0);
1462 
1463 fail:
1464           /* Frees all, but can handle partial completion */
1465           ixgbe_free_receive_structures(sc);
1466 
1467           return (error);
1468 } /* ixgbe_allocate_receive_buffers */
1469 
1470 /************************************************************************
1471  * ixgbe_free_receive_ring
1472  ************************************************************************/
1473 static void
ixgbe_free_receive_ring(struct rx_ring * rxr)1474 ixgbe_free_receive_ring(struct rx_ring *rxr)
1475 {
1476           for (int i = 0; i < rxr->num_desc; i++) {
1477                     ixgbe_rx_discard(rxr, i);
1478           }
1479 } /* ixgbe_free_receive_ring */
1480 
1481 /************************************************************************
1482  * ixgbe_setup_receive_ring
1483  *
1484  *   Initialize a receive ring and its buffers.
1485  ************************************************************************/
1486 static int
ixgbe_setup_receive_ring(struct rx_ring * rxr)1487 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1488 {
1489           struct ixgbe_softc    *sc;
1490           struct ixgbe_rx_buf   *rxbuf;
1491 #ifdef LRO
1492           struct ifnet          *ifp;
1493           struct lro_ctrl       *lro = &rxr->lro;
1494 #endif /* LRO */
1495 #ifdef DEV_NETMAP
1496           struct netmap_sc      *na = NA(rxr->sc->ifp);
1497           struct netmap_slot    *slot;
1498 #endif /* DEV_NETMAP */
1499           int                   rsize, error = 0;
1500 
1501           sc = rxr->sc;
1502 #ifdef LRO
1503           ifp = sc->ifp;
1504 #endif /* LRO */
1505 
1506           /* Clear the ring contents */
1507           IXGBE_RX_LOCK(rxr);
1508 
1509 #ifdef DEV_NETMAP
1510           if (sc->feat_en & IXGBE_FEATURE_NETMAP)
1511                     slot = netmap_reset(na, NR_RX, rxr->me, 0);
1512 #endif /* DEV_NETMAP */
1513 
1514           rsize = sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc);
1515           KASSERT((rsize % DBA_ALIGN) == 0);
1516           bzero((void *)rxr->rx_base, rsize);
1517           /* Cache the size */
1518           rxr->mbuf_sz = sc->rx_mbuf_sz;
1519 
1520           /* Free current RX buffer structs and their mbufs */
1521           ixgbe_free_receive_ring(rxr);
1522 
1523           /* Now replenish the mbufs */
1524           for (int i = 0; i < rxr->num_desc; i++) {
1525                     struct mbuf *mp;
1526 
1527                     rxbuf = &rxr->rx_buffers[i];
1528 
1529 #ifdef DEV_NETMAP
1530                     /*
1531                      * In netmap mode, fill the map and set the buffer
1532                      * address in the NIC ring, considering the offset
1533                      * between the netmap and NIC rings (see comment in
1534                      * ixgbe_setup_transmit_ring() ). No need to allocate
1535                      * an mbuf, so end the block with a continue;
1536                      */
1537                     if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1538                               int sj = netmap_idx_n2k(na->rx_rings[rxr->me], i);
1539                               uint64_t paddr;
1540                               void *addr;
1541 
1542                               addr = PNMB(na, slot + sj, &paddr);
1543                               netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1544                               /* Update descriptor and the cached value */
1545                               rxr->rx_base[i].read.pkt_addr = htole64(paddr);
1546                               rxbuf->addr = htole64(paddr);
1547                               continue;
1548                     }
1549 #endif /* DEV_NETMAP */
1550 
1551                     rxbuf->flags = 0;
1552                     rxbuf->buf = ixgbe_getcl();
1553                     if (rxbuf->buf == NULL) {
1554                               IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1555                               error = ENOBUFS;
1556                               goto fail;
1557                     }
1558                     mp = rxbuf->buf;
1559                     mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1560                     IXGBE_M_ADJ(sc, rxr, mp);
1561                     /* Get the memory mapping */
1562                     error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1563                         mp, BUS_DMA_NOWAIT);
1564                     if (error != 0) {
1565                               /*
1566                                * Clear this entry for later cleanup in
1567                                * ixgbe_discard() which is called via
1568                                * ixgbe_free_receive_ring().
1569                                */
1570                               m_freem(mp);
1571                               rxbuf->buf = NULL;
1572                               goto fail;
1573                     }
1574                     bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1575                         0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1576                     /* Update the descriptor and the cached value */
1577                     rxr->rx_base[i].read.pkt_addr =
1578                         htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1579                     rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1580           }
1581 
1582           /* Setup our descriptor indices */
1583           rxr->next_to_check = 0;
1584           rxr->next_to_refresh = sc->num_rx_desc - 1; /* Fully allocated */
1585 #ifdef LRO
1586           rxr->lro_enabled = FALSE;
1587 #endif
1588           rxr->discard_multidesc = false;
1589           IXGBE_EVC_STORE(&rxr->rx_copies, 0);
1590 #if 0 /* NetBSD */
1591           IXGBE_EVC_STORE(&rxr->rx_bytes, 0);
1592 #if 1     /* Fix inconsistency */
1593           IXGBE_EVC_STORE(&rxr->rx_packets, 0);
1594 #endif
1595 #endif
1596           rxr->vtag_strip = FALSE;
1597 
1598           ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1599               BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1600 
1601           /*
1602            * Now set up the LRO interface
1603            */
1604 #ifdef RSC
1605           if (ixgbe_rsc_enable)
1606                     ixgbe_setup_hw_rsc(rxr);
1607 #endif
1608 #ifdef LRO
1609 #ifdef RSC
1610           else
1611 #endif
1612           if (ifp->if_capenable & IFCAP_LRO) {
1613                     device_t dev = sc->dev;
1614                     int err = tcp_lro_init(lro);
1615                     if (err) {
1616                               device_printf(dev, "LRO Initialization failed!\n");
1617                               goto fail;
1618                     }
1619                     INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1620                     rxr->lro_enabled = TRUE;
1621                     lro->ifp = sc->ifp;
1622           }
1623 #endif /* LRO */
1624 
1625           IXGBE_RX_UNLOCK(rxr);
1626 
1627           return (0);
1628 
1629 fail:
1630           ixgbe_free_receive_ring(rxr);
1631           IXGBE_RX_UNLOCK(rxr);
1632 
1633           return (error);
1634 } /* ixgbe_setup_receive_ring */
1635 
1636 /************************************************************************
1637  * ixgbe_setup_receive_structures - Initialize all receive rings.
1638  ************************************************************************/
1639 int
ixgbe_setup_receive_structures(struct ixgbe_softc * sc)1640 ixgbe_setup_receive_structures(struct ixgbe_softc *sc)
1641 {
1642           struct rx_ring *rxr = sc->rx_rings;
1643           int            j;
1644 
1645           INIT_DEBUGOUT("ixgbe_setup_receive_structures");
1646           for (j = 0; j < sc->num_queues; j++, rxr++)
1647                     if (ixgbe_setup_receive_ring(rxr))
1648                               goto fail;
1649 
1650           return (0);
1651 fail:
1652           /*
1653            * Free RX buffers allocated so far, we will only handle
1654            * the rings that completed, the failing case will have
1655            * cleaned up for itself. 'j' failed, so its the terminus.
1656            */
1657           for (int i = 0; i < j; ++i) {
1658                     rxr = &sc->rx_rings[i];
1659                     IXGBE_RX_LOCK(rxr);
1660                     ixgbe_free_receive_ring(rxr);
1661                     IXGBE_RX_UNLOCK(rxr);
1662           }
1663 
1664           return (ENOBUFS);
1665 } /* ixgbe_setup_receive_structures */
1666 
1667 
1668 /************************************************************************
1669  * ixgbe_free_receive_structures - Free all receive rings.
1670  ************************************************************************/
1671 void
ixgbe_free_receive_structures(struct ixgbe_softc * sc)1672 ixgbe_free_receive_structures(struct ixgbe_softc *sc)
1673 {
1674           struct rx_ring *rxr = sc->rx_rings;
1675 
1676           INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1677 
1678           for (int i = 0; i < sc->num_queues; i++, rxr++) {
1679                     ixgbe_free_receive_buffers(rxr);
1680 #ifdef LRO
1681                     /* Free LRO memory */
1682                     tcp_lro_free(&rxr->lro);
1683 #endif /* LRO */
1684                     /* Free the ring memory as well */
1685                     ixgbe_dma_free(sc, &rxr->rxdma);
1686                     IXGBE_RX_LOCK_DESTROY(rxr);
1687           }
1688 
1689           kmem_free(sc->rx_rings, sizeof(struct rx_ring) * sc->num_queues);
1690 } /* ixgbe_free_receive_structures */
1691 
1692 
1693 /************************************************************************
1694  * ixgbe_free_receive_buffers - Free receive ring data structures
1695  ************************************************************************/
1696 static void
ixgbe_free_receive_buffers(struct rx_ring * rxr)1697 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1698 {
1699           struct ixgbe_softc  *sc = rxr->sc;
1700           struct ixgbe_rx_buf *rxbuf;
1701 
1702           INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1703 
1704           /* Cleanup any existing buffers */
1705           if (rxr->rx_buffers != NULL) {
1706                     for (int i = 0; i < sc->num_rx_desc; i++) {
1707                               rxbuf = &rxr->rx_buffers[i];
1708                               ixgbe_rx_discard(rxr, i);
1709                               if (rxbuf->pmap != NULL) {
1710                                         ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1711                                         rxbuf->pmap = NULL;
1712                               }
1713                     }
1714 
1715                     if (rxr->rx_buffers != NULL) {
1716                               kmem_free(rxr->rx_buffers,
1717                                   sizeof(struct ixgbe_rx_buf) * rxr->num_desc);
1718                               rxr->rx_buffers = NULL;
1719                     }
1720           }
1721 
1722           if (rxr->ptag != NULL) {
1723                     ixgbe_dma_tag_destroy(rxr->ptag);
1724                     rxr->ptag = NULL;
1725           }
1726 
1727           return;
1728 } /* ixgbe_free_receive_buffers */
1729 
1730 /************************************************************************
1731  * ixgbe_rx_input
1732  ************************************************************************/
1733 static __inline void
ixgbe_rx_input(struct rx_ring * rxr,struct ifnet * ifp,struct mbuf * m,u32 ptype)1734 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1735     u32 ptype)
1736 {
1737           struct ixgbe_softc *sc = ifp->if_softc;
1738 
1739 #ifdef LRO
1740           struct ethercom *ec = &sc->osdep.ec;
1741 
1742           /*
1743            * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1744            * should be computed by hardware. Also it should not have VLAN tag in
1745            * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1746            */
1747         if (rxr->lro_enabled &&
1748             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1749             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1750             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1751             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1752             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1753             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1754             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1755             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1756                 /*
1757                  * Send to the stack if:
1758                  *  - LRO not enabled, or
1759                  *  - no LRO resources, or
1760                  *  - lro enqueue fails
1761                  */
1762                 if (rxr->lro.lro_cnt != 0)
1763                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1764                                 return;
1765         }
1766 #endif /* LRO */
1767 
1768           if_percpuq_enqueue(sc->ipq, m);
1769 } /* ixgbe_rx_input */
1770 
1771 /************************************************************************
1772  * ixgbe_rx_discard
1773  ************************************************************************/
1774 static __inline void
ixgbe_rx_discard(struct rx_ring * rxr,int i)1775 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1776 {
1777           struct ixgbe_rx_buf *rbuf;
1778 
1779           rbuf = &rxr->rx_buffers[i];
1780 
1781           /*
1782            * With advanced descriptors the writeback clobbers the buffer addrs,
1783            * so its easier to just free the existing mbufs and take the normal
1784            * refresh path to get new buffers and mapping.
1785            */
1786 
1787           if (rbuf->fmp != NULL) {/* Partial chain ? */
1788                     bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1789                         rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1790                     ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1791                     m_freem(rbuf->fmp);
1792                     rbuf->fmp = NULL;
1793                     rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1794           } else if (rbuf->buf) {
1795                     bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1796                         rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1797                     ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1798                     m_free(rbuf->buf);
1799                     rbuf->buf = NULL;
1800           }
1801 
1802           rbuf->flags = 0;
1803 
1804           return;
1805 } /* ixgbe_rx_discard */
1806 
1807 
1808 /************************************************************************
1809  * ixgbe_rxeof
1810  *
1811  *   Executes in interrupt context. It replenishes the
1812  *   mbufs in the descriptor and sends data which has
1813  *   been dma'ed into host memory to upper layer.
1814  *
1815  *   Return TRUE for more work, FALSE for all clean.
1816  ************************************************************************/
1817 bool
ixgbe_rxeof(struct ix_queue * que)1818 ixgbe_rxeof(struct ix_queue *que)
1819 {
1820           struct ixgbe_softc  *sc = que->sc;
1821           struct rx_ring                *rxr = que->rxr;
1822           struct ifnet                  *ifp = sc->ifp;
1823 #ifdef LRO
1824           struct lro_ctrl               *lro = &rxr->lro;
1825 #endif /* LRO */
1826           union ixgbe_adv_rx_desc       *cur;
1827           struct ixgbe_rx_buf *rbuf, *nbuf;
1828           int                           i, nextp, processed = 0;
1829           u32                           staterr = 0;
1830           u32                           loopcount = 0, numdesc;
1831           u32                           limit = sc->rx_process_limit;
1832           u32                           rx_copy_len = sc->rx_copy_len;
1833           bool                          discard_multidesc = rxr->discard_multidesc;
1834           bool                          wraparound = false;
1835           unsigned int                  syncremain;
1836 #ifdef RSS
1837           u16                           pkt_info;
1838 #endif
1839 
1840           IXGBE_RX_LOCK(rxr);
1841 
1842 #ifdef DEV_NETMAP
1843           if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
1844                     /* Same as the txeof routine: wakeup clients on intr. */
1845                     if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1846                               IXGBE_RX_UNLOCK(rxr);
1847                               return (FALSE);
1848                     }
1849           }
1850 #endif /* DEV_NETMAP */
1851 
1852           /* Sync the ring. The size is rx_process_limit or the first half */
1853           if ((rxr->next_to_check + limit) <= rxr->num_desc) {
1854                     /* Non-wraparound */
1855                     numdesc = limit;
1856                     syncremain = 0;
1857           } else {
1858                     /* Wraparound. Sync the first half. */
1859                     numdesc = rxr->num_desc - rxr->next_to_check;
1860 
1861                     /* Set the size of the last half */
1862                     syncremain = limit - numdesc;
1863           }
1864           bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
1865               rxr->rxdma.dma_map,
1866               sizeof(union ixgbe_adv_rx_desc) * rxr->next_to_check,
1867               sizeof(union ixgbe_adv_rx_desc) * numdesc,
1868               BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1869 
1870           /*
1871            * The max number of loop is rx_process_limit. If discard_multidesc is
1872            * true, continue processing to not to send broken packet to the upper
1873            * layer.
1874            */
1875           for (i = rxr->next_to_check;
1876                (loopcount < limit) || (discard_multidesc == true);) {
1877 
1878                     struct mbuf *sendmp, *mp;
1879                     struct mbuf *newmp;
1880 #ifdef RSC
1881                     u32         rsc;
1882 #endif
1883                     u32         ptype;
1884                     u16         len;
1885                     u16         vtag = 0;
1886                     bool        eop;
1887                     bool        discard = false;
1888 
1889                     if (wraparound) {
1890                               /* Sync the last half. */
1891                               KASSERT(syncremain != 0);
1892                               numdesc = syncremain;
1893                               wraparound = false;
1894                     } else if (__predict_false(loopcount >= limit)) {
1895                               KASSERT(discard_multidesc == true);
1896                               numdesc = 1;
1897                     } else
1898                               numdesc = 0;
1899 
1900                     if (numdesc != 0)
1901                               bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
1902                                   rxr->rxdma.dma_map, 0,
1903                                   sizeof(union ixgbe_adv_rx_desc) * numdesc,
1904                                   BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1905 
1906                     cur = &rxr->rx_base[i];
1907                     staterr = le32toh(cur->wb.upper.status_error);
1908 #ifdef RSS
1909                     pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1910 #endif
1911 
1912                     if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1913                               break;
1914 
1915                     loopcount++;
1916                     sendmp = newmp = NULL;
1917                     nbuf = NULL;
1918 #ifdef RSC
1919                     rsc = 0;
1920 #endif
1921                     cur->wb.upper.status_error = 0;
1922                     rbuf = &rxr->rx_buffers[i];
1923                     mp = rbuf->buf;
1924 
1925                     len = le16toh(cur->wb.upper.length);
1926                     ptype = le32toh(cur->wb.lower.lo_dword.data) &
1927                         IXGBE_RXDADV_PKTTYPE_MASK;
1928                     eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1929 
1930                     /* Make sure bad packets are discarded */
1931                     if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1932 #if __FreeBSD_version >= 1100036
1933                               if (sc->feat_en & IXGBE_FEATURE_VF)
1934                                         if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1935 #endif
1936                               IXGBE_EVC_ADD(&rxr->rx_discarded, 1);
1937                               ixgbe_rx_discard(rxr, i);
1938                               discard_multidesc = false;
1939                               goto next_desc;
1940                     }
1941 
1942                     if (__predict_false(discard_multidesc))
1943                               discard = true;
1944                     else {
1945                               /* Pre-alloc new mbuf. */
1946 
1947                               if ((rbuf->fmp == NULL) &&
1948                                   eop && (len <= rx_copy_len)) {
1949                                         /* For short packet. See below. */
1950                                         sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1951                                         if (__predict_false(sendmp == NULL)) {
1952                                                   IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1953                                                   discard = true;
1954                                         }
1955                               } else {
1956                                         /* For long packet. */
1957                                         newmp = ixgbe_getcl();
1958                                         if (__predict_false(newmp == NULL)) {
1959                                                   IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1960                                                   discard = true;
1961                                         }
1962                               }
1963                     }
1964 
1965                     if (__predict_false(discard)) {
1966                               /*
1967                                * Descriptor initialization is already done by the
1968                                * above code (cur->wb.upper.status_error = 0).
1969                                * So, we can reuse current rbuf->buf for new packet.
1970                                *
1971                                * Rewrite the buffer addr, see comment in
1972                                * ixgbe_rx_discard().
1973                                */
1974                               cur->read.pkt_addr = rbuf->addr;
1975                               m_freem(rbuf->fmp);
1976                               rbuf->fmp = NULL;
1977                               if (!eop) {
1978                                         /* Discard the entire packet. */
1979                                         discard_multidesc = true;
1980                               } else
1981                                         discard_multidesc = false;
1982                               goto next_desc;
1983                     }
1984                     discard_multidesc = false;
1985 
1986                     bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1987                         rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1988 
1989                     /*
1990                      * On 82599 which supports a hardware
1991                      * LRO (called HW RSC), packets need
1992                      * not be fragmented across sequential
1993                      * descriptors, rather the next descriptor
1994                      * is indicated in bits of the descriptor.
1995                      * This also means that we might process
1996                      * more than one packet at a time, something
1997                      * that has never been true before, it
1998                      * required eliminating global chain pointers
1999                      * in favor of what we are doing here.  -jfv
2000                      */
2001                     if (!eop) {
2002                               /*
2003                                * Figure out the next descriptor
2004                                * of this frame.
2005                                */
2006 #ifdef RSC
2007                               if (rxr->hw_rsc == TRUE) {
2008                                         rsc = ixgbe_rsc_count(cur);
2009                                         rxr->rsc_num += (rsc - 1);
2010                               }
2011                               if (rsc) { /* Get hardware index */
2012                                         nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2013                                             IXGBE_RXDADV_NEXTP_SHIFT);
2014                               } else
2015 #endif
2016                               { /* Just sequential */
2017                                         nextp = i + 1;
2018                                         if (nextp == sc->num_rx_desc)
2019                                                   nextp = 0;
2020                               }
2021                               nbuf = &rxr->rx_buffers[nextp];
2022                               prefetch(nbuf);
2023                     }
2024                     /*
2025                      * Rather than using the fmp/lmp global pointers
2026                      * we now keep the head of a packet chain in the
2027                      * buffer struct and pass this along from one
2028                      * descriptor to the next, until we get EOP.
2029                      */
2030                     /*
2031                      * See if there is a stored head
2032                      * that determines what we are
2033                      */
2034                     if (rbuf->fmp != NULL) {
2035                               /* Secondary frag */
2036                               sendmp = rbuf->fmp;
2037 
2038                               /* Update new (used in future) mbuf */
2039                               newmp->m_pkthdr.len = newmp->m_len = rxr->mbuf_sz;
2040                               IXGBE_M_ADJ(sc, rxr, newmp);
2041                               rbuf->buf = newmp;
2042                               rbuf->fmp = NULL;
2043 
2044                               /* For secondary frag */
2045                               mp->m_len = len;
2046                               mp->m_flags &= ~M_PKTHDR;
2047 
2048                               /* For sendmp */
2049                               sendmp->m_pkthdr.len += mp->m_len;
2050                     } else {
2051                               /*
2052                                * It's the first segment of a multi descriptor
2053                                * packet or a single segment which contains a full
2054                                * packet.
2055                                */
2056 
2057                               if (eop && (len <= rx_copy_len)) {
2058                                         /*
2059                                          * Optimize.  This might be a small packet, may
2060                                          * be just a TCP ACK. Copy into a new mbuf, and
2061                                          * Leave the old mbuf+cluster for re-use.
2062                                          */
2063                                         sendmp->m_data += ETHER_ALIGN;
2064                                         memcpy(mtod(sendmp, void *),
2065                                             mtod(mp, void *), len);
2066                                         IXGBE_EVC_ADD(&rxr->rx_copies, 1);
2067                                         rbuf->flags |= IXGBE_RX_COPY;
2068                               } else {
2069                                         /* For long packet */
2070 
2071                                         /* Update new (used in future) mbuf */
2072                                         newmp->m_pkthdr.len = newmp->m_len
2073                                             = rxr->mbuf_sz;
2074                                         IXGBE_M_ADJ(sc, rxr, newmp);
2075                                         rbuf->buf = newmp;
2076                                         rbuf->fmp = NULL;
2077 
2078                                         /* For sendmp */
2079                                         sendmp = mp;
2080                               }
2081 
2082                               /* first desc of a non-ps chain */
2083                               sendmp->m_pkthdr.len = sendmp->m_len = len;
2084                     }
2085                     ++processed;
2086 
2087                     /* Pass the head pointer on */
2088                     if (eop == 0) {
2089                               nbuf->fmp = sendmp;
2090                               sendmp = NULL;
2091                               mp->m_next = nbuf->buf;
2092                     } else { /* Sending this frame */
2093                               m_set_rcvif(sendmp, ifp);
2094                               ++rxr->packets;
2095                               IXGBE_EVC_ADD(&rxr->rx_packets, 1);
2096                               /* capture data for AIM */
2097                               rxr->bytes += sendmp->m_pkthdr.len;
2098                               IXGBE_EVC_ADD(&rxr->rx_bytes, sendmp->m_pkthdr.len);
2099                               /* Process vlan info */
2100                               if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
2101                                         vtag = le16toh(cur->wb.upper.vlan);
2102                               if (vtag) {
2103                                         vlan_set_tag(sendmp, vtag);
2104                               }
2105                               if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
2106                                         ixgbe_rx_checksum(staterr, sendmp, ptype,
2107                                            &sc->stats.pf);
2108                               }
2109 
2110 #if 0 /* FreeBSD */
2111                               /*
2112                                * In case of multiqueue, we have RXCSUM.PCSD bit set
2113                                * and never cleared. This means we have RSS hash
2114                                * available to be used.
2115                                */
2116                               if (sc->num_queues > 1) {
2117                                         sendmp->m_pkthdr.flowid =
2118                                             le32toh(cur->wb.lower.hi_dword.rss);
2119                                         switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
2120                                         case IXGBE_RXDADV_RSSTYPE_IPV4:
2121                                                   M_HASHTYPE_SET(sendmp,
2122                                                       M_HASHTYPE_RSS_IPV4);
2123                                                   break;
2124                                         case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2125                                                   M_HASHTYPE_SET(sendmp,
2126                                                       M_HASHTYPE_RSS_TCP_IPV4);
2127                                                   break;
2128                                         case IXGBE_RXDADV_RSSTYPE_IPV6:
2129                                                   M_HASHTYPE_SET(sendmp,
2130                                                       M_HASHTYPE_RSS_IPV6);
2131                                                   break;
2132                                         case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
2133                                                   M_HASHTYPE_SET(sendmp,
2134                                                       M_HASHTYPE_RSS_TCP_IPV6);
2135                                                   break;
2136                                         case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
2137                                                   M_HASHTYPE_SET(sendmp,
2138                                                       M_HASHTYPE_RSS_IPV6_EX);
2139                                                   break;
2140                                         case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2141                                                   M_HASHTYPE_SET(sendmp,
2142                                                       M_HASHTYPE_RSS_TCP_IPV6_EX);
2143                                                   break;
2144 #if __FreeBSD_version > 1100000
2145                                         case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2146                                                   M_HASHTYPE_SET(sendmp,
2147                                                       M_HASHTYPE_RSS_UDP_IPV4);
2148                                                   break;
2149                                         case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2150                                                   M_HASHTYPE_SET(sendmp,
2151                                                       M_HASHTYPE_RSS_UDP_IPV6);
2152                                                   break;
2153                                         case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2154                                                   M_HASHTYPE_SET(sendmp,
2155                                                       M_HASHTYPE_RSS_UDP_IPV6_EX);
2156                                                   break;
2157 #endif
2158                                         default:
2159                                                   M_HASHTYPE_SET(sendmp,
2160                                                       M_HASHTYPE_OPAQUE_HASH);
2161                                         }
2162                               } else {
2163                                         sendmp->m_pkthdr.flowid = que->msix;
2164                                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2165                               }
2166 #endif
2167                     }
2168 next_desc:
2169                     ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2170                         BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2171 
2172                     /* Advance our pointers to the next descriptor. */
2173                     if (++i == rxr->num_desc) {
2174                               wraparound = true;
2175                               i = 0;
2176                     }
2177                     rxr->next_to_check = i;
2178 
2179                     /* Now send to the stack or do LRO */
2180                     if (sendmp != NULL)
2181                               ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2182 
2183                     /* Every 8 descriptors we go to refresh mbufs */
2184                     if (processed == 8) {
2185                               ixgbe_refresh_mbufs(rxr, i);
2186                               processed = 0;
2187                     }
2188           }
2189 
2190           /* Save the current status */
2191           rxr->discard_multidesc = discard_multidesc;
2192 
2193           /* Refresh any remaining buf structs */
2194           if (ixgbe_rx_unrefreshed(rxr))
2195                     ixgbe_refresh_mbufs(rxr, i);
2196 
2197           IXGBE_RX_UNLOCK(rxr);
2198 
2199 #ifdef LRO
2200           /*
2201            * Flush any outstanding LRO work
2202            */
2203           tcp_lro_flush_all(lro);
2204 #endif /* LRO */
2205 
2206           /*
2207            * Still have cleaning to do?
2208            */
2209           if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2210                     return (TRUE);
2211 
2212           return (FALSE);
2213 } /* ixgbe_rxeof */
2214 
2215 
2216 /************************************************************************
2217  * ixgbe_rx_checksum
2218  *
2219  *   Verify that the hardware indicated that the checksum is valid.
2220  *   Inform the stack about the status of checksum so that stack
2221  *   doesn't spend time verifying the checksum.
2222  ************************************************************************/
2223 static void
ixgbe_rx_checksum(u32 staterr,struct mbuf * mp,u32 ptype,struct ixgbe_hw_stats * stats)2224 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2225     struct ixgbe_hw_stats *stats)
2226 {
2227           u16  status = (u16)staterr;
2228           u8   errors = (u8)(staterr >> 24);
2229 #if 0
2230           bool sctp = false;
2231 
2232           if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2233               (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2234                     sctp = true;
2235 #endif
2236 
2237           /* IPv4 checksum */
2238           if (status & IXGBE_RXD_STAT_IPCS) {
2239                     IXGBE_EVC_ADD(&stats->ipcs, 1);
2240                     if (!(errors & IXGBE_RXD_ERR_IPE)) {
2241                               /* IP Checksum Good */
2242                               mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2243                     } else {
2244                               IXGBE_EVC_ADD(&stats->ipcs_bad, 1);
2245                               mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2246                     }
2247           }
2248           /* TCP/UDP/SCTP checksum */
2249           if (status & IXGBE_RXD_STAT_L4CS) {
2250                     IXGBE_EVC_ADD(&stats->l4cs, 1);
2251                     int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2252                     if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2253                               mp->m_pkthdr.csum_flags |= type;
2254                     } else {
2255                               IXGBE_EVC_ADD(&stats->l4cs_bad, 1);
2256                               mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2257                     }
2258           }
2259 } /* ixgbe_rx_checksum */
2260 
2261 /************************************************************************
2262  * ixgbe_dma_malloc
2263  ************************************************************************/
2264 int
ixgbe_dma_malloc(struct ixgbe_softc * sc,const bus_size_t size,struct ixgbe_dma_alloc * dma,const int mapflags)2265 ixgbe_dma_malloc(struct ixgbe_softc *sc, const bus_size_t size,
2266                     struct ixgbe_dma_alloc *dma, const int mapflags)
2267 {
2268           device_t dev = sc->dev;
2269           int      r, rsegs;
2270 
2271           r = ixgbe_dma_tag_create(
2272                /*      parent */ sc->osdep.dmat,
2273                /*   alignment */ DBA_ALIGN,
2274                /*      bounds */ 0,
2275                /*     maxsize */ size,
2276                /*   nsegments */ 1,
2277                /*  maxsegsize */ size,
2278                /*       flags */ BUS_DMA_ALLOCNOW,
2279                                      &dma->dma_tag);
2280           if (r != 0) {
2281                     aprint_error_dev(dev,
2282                         "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2283                         r);
2284                     goto fail_0;
2285           }
2286 
2287           r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2288               dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2289               &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2290           if (r != 0) {
2291                     aprint_error_dev(dev,
2292                         "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2293                     goto fail_1;
2294           }
2295 
2296           r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2297               size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
2298           if (r != 0) {
2299                     aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2300                         __func__, r);
2301                     goto fail_2;
2302           }
2303 
2304           r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2305           if (r != 0) {
2306                     aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2307                         __func__, r);
2308                     goto fail_3;
2309           }
2310 
2311           r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2312               dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2313           if (r != 0) {
2314                     aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2315                         __func__, r);
2316                     goto fail_4;
2317           }
2318           dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2319           dma->dma_size = size;
2320           return 0;
2321 fail_4:
2322           ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2323 fail_3:
2324           bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2325 fail_2:
2326           bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2327 fail_1:
2328           ixgbe_dma_tag_destroy(dma->dma_tag);
2329 fail_0:
2330 
2331           return (r);
2332 } /* ixgbe_dma_malloc */
2333 
2334 /************************************************************************
2335  * ixgbe_dma_free
2336  ************************************************************************/
2337 void
ixgbe_dma_free(struct ixgbe_softc * sc,struct ixgbe_dma_alloc * dma)2338 ixgbe_dma_free(struct ixgbe_softc *sc, struct ixgbe_dma_alloc *dma)
2339 {
2340           bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2341               BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2342           ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2343           bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, dma->dma_size);
2344           bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2345           ixgbe_dma_tag_destroy(dma->dma_tag);
2346 } /* ixgbe_dma_free */
2347 
2348 
2349 /************************************************************************
2350  * ixgbe_allocate_queues
2351  *
2352  *   Allocate memory for the transmit and receive rings, and then
2353  *   the descriptors associated with each, called only once at attach.
2354  ************************************************************************/
2355 int
ixgbe_allocate_queues(struct ixgbe_softc * sc)2356 ixgbe_allocate_queues(struct ixgbe_softc *sc)
2357 {
2358           device_t  dev = sc->dev;
2359           struct ix_queue     *que;
2360           struct tx_ring      *txr;
2361           struct rx_ring      *rxr;
2362           int             rsize, tsize, error = IXGBE_SUCCESS;
2363           int             txconf = 0, rxconf = 0;
2364 
2365           /* First, allocate the top level queue structs */
2366           sc->queues = kmem_zalloc(sizeof(struct ix_queue) * sc->num_queues,
2367               KM_SLEEP);
2368 
2369           /* Second, allocate the TX ring struct memory */
2370           sc->tx_rings = kmem_zalloc(sizeof(struct tx_ring) * sc->num_queues,
2371               KM_SLEEP);
2372 
2373           /* Third, allocate the RX ring */
2374           sc->rx_rings = kmem_zalloc(sizeof(struct rx_ring) * sc->num_queues,
2375               KM_SLEEP);
2376 
2377           /* For the ring itself */
2378           tsize = sc->num_tx_desc * sizeof(union ixgbe_adv_tx_desc);
2379           KASSERT((tsize % DBA_ALIGN) == 0);
2380 
2381           /*
2382            * Now set up the TX queues, txconf is needed to handle the
2383            * possibility that things fail midcourse and we need to
2384            * undo memory gracefully
2385            */
2386           for (int i = 0; i < sc->num_queues; i++, txconf++) {
2387                     /* Set up some basics */
2388                     txr = &sc->tx_rings[i];
2389                     txr->sc = sc;
2390                     txr->txr_interq = NULL;
2391                     /* In case SR-IOV is enabled, align the index properly */
2392 #ifdef PCI_IOV
2393                     txr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
2394                         i);
2395 #else
2396                     txr->me = i;
2397 #endif
2398                     txr->num_desc = sc->num_tx_desc;
2399 
2400                     /* Initialize the TX side lock */
2401                     mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2402 
2403                     if (ixgbe_dma_malloc(sc, tsize, &txr->txdma,
2404                         BUS_DMA_NOWAIT)) {
2405                               aprint_error_dev(dev,
2406                                   "Unable to allocate TX Descriptor memory\n");
2407                               error = ENOMEM;
2408                               goto err_tx_desc;
2409                     }
2410                     txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2411                     bzero((void *)txr->tx_base, tsize);
2412 
2413                     /* Now allocate transmit buffers for the ring */
2414                     if (ixgbe_allocate_transmit_buffers(txr)) {
2415                               aprint_error_dev(dev,
2416                                   "Critical Failure setting up transmit buffers\n");
2417                               error = ENOMEM;
2418                               goto err_tx_desc;
2419                     }
2420                     if (!(sc->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2421                               /* Allocate a buf ring */
2422                               txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2423                               if (txr->txr_interq == NULL) {
2424                                         aprint_error_dev(dev,
2425                                             "Critical Failure setting up buf ring\n");
2426                                         error = ENOMEM;
2427                                         goto err_tx_desc;
2428                               }
2429                     }
2430           }
2431 
2432           /*
2433            * Next the RX queues...
2434            */
2435           rsize = sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc);
2436           KASSERT((rsize % DBA_ALIGN) == 0);
2437           for (int i = 0; i < sc->num_queues; i++, rxconf++) {
2438                     rxr = &sc->rx_rings[i];
2439                     /* Set up some basics */
2440                     rxr->sc = sc;
2441 #ifdef PCI_IOV
2442                     /* In case SR-IOV is enabled, align the index properly */
2443                     rxr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
2444                         i);
2445 #else
2446                     rxr->me = i;
2447 #endif
2448                     rxr->num_desc = sc->num_rx_desc;
2449 
2450                     /* Initialize the RX side lock */
2451                     mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2452 
2453                     if (ixgbe_dma_malloc(sc, rsize, &rxr->rxdma,
2454                         BUS_DMA_NOWAIT)) {
2455                               aprint_error_dev(dev,
2456                                   "Unable to allocate RxDescriptor memory\n");
2457                               error = ENOMEM;
2458                               goto err_rx_desc;
2459                     }
2460                     rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2461                     bzero((void *)rxr->rx_base, rsize);
2462 
2463                     /* Allocate receive buffers for the ring */
2464                     if (ixgbe_allocate_receive_buffers(rxr)) {
2465                               aprint_error_dev(dev,
2466                                   "Critical Failure setting up receive buffers\n");
2467                               error = ENOMEM;
2468                               goto err_rx_desc;
2469                     }
2470           }
2471 
2472           /*
2473            * Finally set up the queue holding structs
2474            */
2475           for (int i = 0; i < sc->num_queues; i++) {
2476                     que = &sc->queues[i];
2477                     que->sc = sc;
2478                     que->me = i;
2479                     que->txr = &sc->tx_rings[i];
2480                     que->rxr = &sc->rx_rings[i];
2481 
2482                     mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2483                     que->disabled_count = 0;
2484           }
2485 
2486           return (0);
2487 
2488 err_rx_desc:
2489           for (rxr = sc->rx_rings; rxconf > 0; rxr++, rxconf--)
2490                     ixgbe_dma_free(sc, &rxr->rxdma);
2491 err_tx_desc:
2492           for (txr = sc->tx_rings; txconf > 0; txr++, txconf--)
2493                     ixgbe_dma_free(sc, &txr->txdma);
2494           kmem_free(sc->rx_rings, sizeof(struct rx_ring) * sc->num_queues);
2495           kmem_free(sc->tx_rings, sizeof(struct tx_ring) * sc->num_queues);
2496           kmem_free(sc->queues, sizeof(struct ix_queue) * sc->num_queues);
2497           return (error);
2498 } /* ixgbe_allocate_queues */
2499 
2500 /************************************************************************
2501  * ixgbe_free_queues
2502  *
2503  *   Free descriptors for the transmit and receive rings, and then
2504  *   the memory associated with each.
2505  ************************************************************************/
2506 void
ixgbe_free_queues(struct ixgbe_softc * sc)2507 ixgbe_free_queues(struct ixgbe_softc *sc)
2508 {
2509           struct ix_queue *que;
2510           int i;
2511 
2512           ixgbe_free_transmit_structures(sc);
2513           ixgbe_free_receive_structures(sc);
2514           for (i = 0; i < sc->num_queues; i++) {
2515                     que = &sc->queues[i];
2516                     mutex_destroy(&que->dc_mtx);
2517           }
2518           kmem_free(sc->queues, sizeof(struct ix_queue) * sc->num_queues);
2519 } /* ixgbe_free_queues */
2520