1 /* $NetBSD: ixgbe_netmap.c,v 1.6 2023/10/06 14:37:04 msaitoh Exp $ */
2 /******************************************************************************
3 
4   Copyright (c) 2001-2017, Intel Corporation
5   All rights reserved.
6 
7   Redistribution and use in source and binary forms, with or without
8   modification, are permitted provided that the following conditions are met:
9 
10    1. Redistributions of source code must retain the above copyright notice,
11       this list of conditions and the following disclaimer.
12 
13    2. Redistributions in binary form must reproduce the above copyright
14       notice, this list of conditions and the following disclaimer in the
15       documentation and/or other materials provided with the distribution.
16 
17    3. Neither the name of the Intel Corporation nor the names of its
18       contributors may be used to endorse or promote products derived from
19       this software without specific prior written permission.
20 
21   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31   POSSIBILITY OF SUCH DAMAGE.
32 
33 ******************************************************************************/
34 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe_netmap.c 320688 2017-07-05 17:27:03Z erj $*/
35 
36 /*
37  * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  */
60 
61 /*
62  * $FreeBSD: head/sys/dev/ixgbe/ixgbe_netmap.c 320688 2017-07-05 17:27:03Z erj $
63  *
64  * netmap support for: ixgbe
65  *
66  * This file is meant to be a reference on how to implement
67  * netmap support for a network driver.
68  * This file contains code but only static or inline functions used
69  * by a single driver. To avoid replication of code we just #include
70  * it near the beginning of the standard driver.
71  */
72 
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: ixgbe_netmap.c,v 1.6 2023/10/06 14:37:04 msaitoh Exp $");
75 
76 #ifdef DEV_NETMAP
77 /*
78  * Some drivers may need the following headers. Others
79  * already include them by default
80 
81 #include <vm/vm.h>
82 #include <vm/pmap.h>
83 
84  */
85 #include "ixgbe.h"
86 
87 /*
88  * device-specific sysctl variables:
89  *
90  * ix_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
91  *        During regular operations the CRC is stripped, but on some
92  *        hardware reception of frames not multiple of 64 is slower,
93  *        so using crcstrip=0 helps in benchmarks.
94  *
95  * ix_rx_miss, ix_rx_miss_bufs:
96  *        count packets that might be missed due to lost interrupts.
97  */
98 SYSCTL_DECL(_dev_netmap);
99 static int ix_rx_miss, ix_rx_miss_bufs;
100 int ix_crcstrip;
101 SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip,
102     CTLFLAG_RW, &ix_crcstrip, 0, "strip CRC on rx frames");
103 SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss,
104     CTLFLAG_RW, &ix_rx_miss, 0, "potentially missed rx intr");
105 SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs,
106     CTLFLAG_RW, &ix_rx_miss_bufs, 0, "potentially missed rx intr bufs");
107 
108 
109 static void
set_crcstrip(struct ixgbe_hw * hw,int onoff)110 set_crcstrip(struct ixgbe_hw *hw, int onoff)
111 {
112           /* crc stripping is set in two places:
113            * IXGBE_HLREG0 (modified on init_locked and hw reset)
114            * IXGBE_RDRXCTL (set by the original driver in
115            *        ixgbe_setup_hw_rsc() called in init_locked.
116            *        We disable the setting when netmap is compiled in).
117            * We update the values here, but also in ixgbe.c because
118            * init_locked sometimes is called outside our control.
119            */
120           uint32_t hl, rxc;
121 
122           hl = IXGBE_READ_REG(hw, IXGBE_HLREG0);
123           rxc = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
124 #ifdef D
125           if (netmap_verbose)
126                     D("%s read  HLREG 0x%x rxc 0x%x",
127                               onoff ? "enter" : "exit", hl, rxc);
128 #endif
129           /* hw requirements ... */
130           rxc &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
131           rxc |= IXGBE_RDRXCTL_RSCACKC;
132           if (onoff && !ix_crcstrip) {
133                     /* keep the crc. Fast rx */
134                     hl &= ~IXGBE_HLREG0_RXCRCSTRP;
135                     rxc &= ~IXGBE_RDRXCTL_CRCSTRIP;
136           } else {
137                     /* reset default mode */
138                     hl |= IXGBE_HLREG0_RXCRCSTRP;
139                     rxc |= IXGBE_RDRXCTL_CRCSTRIP;
140           }
141 #ifdef D
142           if (netmap_verbose)
143                     D("%s write HLREG 0x%x rxc 0x%x",
144                               onoff ? "enter" : "exit", hl, rxc);
145 #endif
146           IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hl);
147           IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc);
148 }
149 
150 
151 /*
152  * Register/unregister. We are already under netmap lock.
153  * Only called on the first register or the last unregister.
154  */
155 static int
ixgbe_netmap_reg(struct netmap_adapter * na,int onoff)156 ixgbe_netmap_reg(struct netmap_adapter *na, int onoff)
157 {
158           struct ifnet *ifp = na->ifp;
159           struct ixgbe_softc *sc = ifp->if_softc;
160 
161           IXGBE_CORE_LOCK(sc);
162           sc->stop_locked(sc);
163 
164           set_crcstrip(&sc->hw, onoff);
165           /* enable or disable flags and callbacks in na and ifp */
166           if (onoff) {
167                     nm_set_native_flags(na);
168           } else {
169                     nm_clear_native_flags(na);
170           }
171           sc->init_locked(sc);          /* also enables intr */
172           set_crcstrip(&sc->hw, onoff); // XXX why twice ?
173           IXGBE_CORE_UNLOCK(sc);
174           return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
175 }
176 
177 
178 /*
179  * Reconcile kernel and user view of the transmit ring.
180  *
181  * All information is in the kring.
182  * Userspace wants to send packets up to the one before kring->rhead,
183  * kernel knows kring->nr_hwcur is the first unsent packet.
184  *
185  * Here we push packets out (as many as possible), and possibly
186  * reclaim buffers from previously completed transmission.
187  *
188  * The caller (netmap) guarantees that there is only one instance
189  * running at any time. Any interference with other driver
190  * methods should be handled by the individual drivers.
191  */
192 static int
ixgbe_netmap_txsync(struct netmap_kring * kring,int flags)193 ixgbe_netmap_txsync(struct netmap_kring *kring, int flags)
194 {
195           struct netmap_adapter *na = kring->na;
196           struct ifnet *ifp = na->ifp;
197           struct netmap_ring *ring = kring->ring;
198           u_int nm_i;         /* index into the netmap ring */
199           u_int nic_i;        /* index into the NIC ring */
200           u_int n;
201           u_int const lim = kring->nkr_num_slots - 1;
202           u_int const head = kring->rhead;
203           /*
204            * interrupts on every tx packet are expensive so request
205            * them every half ring, or where NS_REPORT is set
206            */
207           u_int report_frequency = kring->nkr_num_slots >> 1;
208 
209           /* device-specific */
210           struct ixgbe_softc *sc = ifp->if_softc;
211           struct tx_ring *txr = &sc->tx_rings[kring->ring_id];
212           int reclaim_tx;
213 
214           bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
215                               BUS_DMASYNC_POSTREAD);
216 
217           /*
218            * First part: process new packets to send.
219            * nm_i is the current index in the netmap ring,
220            * nic_i is the corresponding index in the NIC ring.
221            * The two numbers differ because upon a *_init() we reset
222            * the NIC ring but leave the netmap ring unchanged.
223            * For the transmit ring, we have
224            *
225            *                  nm_i = kring->nr_hwcur
226            *                  nic_i = IXGBE_TDT (not tracked in the driver)
227            * and
228            *                  nm_i == (nic_i + kring->nkr_hwofs) % ring_size
229            *
230            * In this driver kring->nkr_hwofs >= 0, but for other
231            * drivers it might be negative as well.
232            */
233 
234           /*
235            * If we have packets to send (kring->nr_hwcur != kring->rhead)
236            * iterate over the netmap ring, fetch length and update
237            * the corresponding slot in the NIC ring. Some drivers also
238            * need to update the buffer's physical address in the NIC slot
239            * even NS_BUF_CHANGED is not set (PNMB computes the addresses).
240            *
241            * The netmap_reload_map() calls is especially expensive,
242            * even when (as in this case) the tag is 0, so do only
243            * when the buffer has actually changed.
244            *
245            * If possible do not set the report/intr bit on all slots,
246            * but only a few times per ring or when NS_REPORT is set.
247            *
248            * Finally, on 10G and faster drivers, it might be useful
249            * to prefetch the next slot and txr entry.
250            */
251 
252           nm_i = kring->nr_hwcur;
253           if (nm_i != head) { /* we have new packets to send */
254                     nic_i = netmap_idx_k2n(kring, nm_i);
255 
256                     __builtin_prefetch(&ring->slot[nm_i]);
257                     __builtin_prefetch(&txr->tx_buffers[nic_i]);
258 
259                     for (n = 0; nm_i != head; n++) {
260                               struct netmap_slot *slot = &ring->slot[nm_i];
261                               u_int len = slot->len;
262                               uint64_t paddr;
263                               void *addr = PNMB(na, slot, &paddr);
264 
265                               /* device-specific */
266                               union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i];
267                               struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[nic_i];
268                               int flags = (slot->flags & NS_REPORT ||
269                                         nic_i == 0 || nic_i == report_frequency) ?
270                                         IXGBE_TXD_CMD_RS : 0;
271 
272                               /* prefetch for next round */
273                               __builtin_prefetch(&ring->slot[nm_i + 1]);
274                               __builtin_prefetch(&txr->tx_buffers[nic_i + 1]);
275 
276                               NM_CHECK_ADDR_LEN(na, addr, len);
277 
278                               if (slot->flags & NS_BUF_CHANGED) {
279                                         /* buffer has changed, reload map */
280                                         netmap_reload_map(na, txr->txtag, txbuf->map, addr);
281                               }
282                               slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
283 
284                               /* Fill the slot in the NIC ring. */
285                               /* Use legacy descriptor, they are faster? */
286                               curr->read.buffer_addr = htole64(paddr);
287                               curr->read.olinfo_status = 0;
288                               curr->read.cmd_type_len = htole32(len | flags |
289                                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP);
290 
291                               /* make sure changes to the buffer are synced */
292                               bus_dmamap_sync(txr->txtag, txbuf->map,
293                                         BUS_DMASYNC_PREWRITE);
294 
295                               nm_i = nm_next(nm_i, lim);
296                               nic_i = nm_next(nic_i, lim);
297                     }
298                     kring->nr_hwcur = head;
299 
300                     /* synchronize the NIC ring */
301                     bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
302                               BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
303 
304                     /* (re)start the tx unit up to slot nic_i (excluded) */
305                     IXGBE_WRITE_REG(&sc->hw, txr->tail, nic_i);
306           }
307 
308           /*
309            * Second part: reclaim buffers for completed transmissions.
310            * Because this is expensive (we read a NIC register etc.)
311            * we only do it in specific cases (see below).
312            */
313           if (flags & NAF_FORCE_RECLAIM) {
314                     reclaim_tx = 1; /* forced reclaim */
315           } else if (!nm_kr_txempty(kring)) {
316                     reclaim_tx = 0; /* have buffers, no reclaim */
317           } else {
318                     /*
319                      * No buffers available. Locate previous slot with
320                      * REPORT_STATUS set.
321                      * If the slot has DD set, we can reclaim space,
322                      * otherwise wait for the next interrupt.
323                      * This enables interrupt moderation on the tx
324                      * side though it might reduce throughput.
325                      */
326                     struct ixgbe_legacy_tx_desc *txd =
327                         (struct ixgbe_legacy_tx_desc *)txr->tx_base;
328 
329                     nic_i = txr->next_to_clean + report_frequency;
330                     if (nic_i > lim)
331                               nic_i -= lim + 1;
332                     // round to the closest with dd set
333                     nic_i = (nic_i < kring->nkr_num_slots / 4 ||
334                                nic_i >= kring->nkr_num_slots*3/4) ?
335                               0 : report_frequency;
336                     reclaim_tx = txd[nic_i].upper.fields.status & IXGBE_TXD_STAT_DD;      // XXX cpu_to_le32 ?
337           }
338           if (reclaim_tx) {
339                     /*
340                      * Record completed transmissions.
341                      * We (re)use the driver's txr->next_to_clean to keep
342                      * track of the most recently completed transmission.
343                      *
344                      * The datasheet discourages the use of TDH to find
345                      * out the number of sent packets, but we only set
346                      * REPORT_STATUS in a few slots so TDH is the only
347                      * good way.
348                      */
349                     nic_i = IXGBE_READ_REG(&sc->hw, IXGBE_TDH(kring->ring_id));
350                     if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
351 #ifdef D
352                               D("TDH wrap %d", nic_i);
353 #endif
354                               nic_i -= kring->nkr_num_slots;
355                     }
356                     if (nic_i != txr->next_to_clean) {
357                               /* some tx completed, increment avail */
358                               txr->next_to_clean = nic_i;
359                               kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
360                     }
361           }
362 
363           return 0;
364 }
365 
366 
367 /*
368  * Reconcile kernel and user view of the receive ring.
369  * Same as for the txsync, this routine must be efficient.
370  * The caller guarantees a single invocations, but races against
371  * the rest of the driver should be handled here.
372  *
373  * On call, kring->rhead is the first packet that userspace wants
374  * to keep, and kring->rcur is the wakeup point.
375  * The kernel has previously reported packets up to kring->rtail.
376  *
377  * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
378  * of whether or not we received an interrupt.
379  */
380 static int
ixgbe_netmap_rxsync(struct netmap_kring * kring,int flags)381 ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
382 {
383           struct netmap_adapter *na = kring->na;
384           struct ifnet *ifp = na->ifp;
385           struct netmap_ring *ring = kring->ring;
386           u_int nm_i;         /* index into the netmap ring */
387           u_int nic_i;        /* index into the NIC ring */
388           u_int n;
389           u_int const lim = kring->nkr_num_slots - 1;
390           u_int const head = kring->rhead;
391           int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
392 
393           /* device-specific */
394           struct ixgbe_softc *sc = ifp->if_softc;
395           struct rx_ring *rxr = &sc->rx_rings[kring->ring_id];
396 
397           if (head > lim)
398                     return netmap_ring_reinit(kring);
399 
400           /* XXX check sync modes */
401           bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
402                               BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
403 
404           /*
405            * First part: import newly received packets.
406            *
407            * nm_i is the index of the next free slot in the netmap ring,
408            * nic_i is the index of the next received packet in the NIC ring,
409            * and they may differ in case if_init() has been called while
410            * in netmap mode. For the receive ring we have
411            *
412            *        nic_i = rxr->next_to_check;
413            *        nm_i = kring->nr_hwtail (previous)
414            * and
415            *        nm_i == (nic_i + kring->nkr_hwofs) % ring_size
416            *
417            * rxr->next_to_check is set to 0 on a ring reinit
418            */
419           if (netmap_no_pendintr || force_update) {
420                     int crclen = (ix_crcstrip) ? 0 : 4;
421 
422                     nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail)
423                     nm_i = netmap_idx_n2k(kring, nic_i);
424 
425                     for (n = 0; ; n++) {
426                               union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
427                               uint32_t staterr = le32toh(curr->wb.upper.status_error);
428 
429                               if ((staterr & IXGBE_RXD_STAT_DD) == 0)
430                                         break;
431                               ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen;
432                               ring->slot[nm_i].flags = 0;
433                               bus_dmamap_sync(rxr->ptag,
434                                   rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
435                               nm_i = nm_next(nm_i, lim);
436                               nic_i = nm_next(nic_i, lim);
437                     }
438                     if (n) { /* update the state variables */
439                               if (netmap_no_pendintr && !force_update) {
440                                         /* diagnostics */
441                                         ix_rx_miss ++;
442                                         ix_rx_miss_bufs += n;
443                               }
444                               rxr->next_to_check = nic_i;
445                               kring->nr_hwtail = nm_i;
446                     }
447                     kring->nr_kflags &= ~NKR_PENDINTR;
448           }
449 
450           /*
451            * Second part: skip past packets that userspace has released.
452            * (kring->nr_hwcur to kring->rhead excluded),
453            * and make the buffers available for reception.
454            * As usual nm_i is the index in the netmap ring,
455            * nic_i is the index in the NIC ring, and
456            * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
457            */
458           nm_i = kring->nr_hwcur;
459           if (nm_i != head) {
460                     nic_i = netmap_idx_k2n(kring, nm_i);
461                     for (n = 0; nm_i != head; n++) {
462                               struct netmap_slot *slot = &ring->slot[nm_i];
463                               uint64_t paddr;
464                               void *addr = PNMB(na, slot, &paddr);
465 
466                               union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
467                               struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
468 
469                               if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
470                                         goto ring_reset;
471 
472                               if (slot->flags & NS_BUF_CHANGED) {
473                                         /* buffer has changed, reload map */
474                                         netmap_reload_map(na, rxr->ptag, rxbuf->pmap, addr);
475                                         slot->flags &= ~NS_BUF_CHANGED;
476                               }
477                               curr->wb.upper.status_error = 0;
478                               curr->read.pkt_addr = htole64(paddr);
479                               bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
480                                   BUS_DMASYNC_PREREAD);
481                               nm_i = nm_next(nm_i, lim);
482                               nic_i = nm_next(nic_i, lim);
483                     }
484                     kring->nr_hwcur = head;
485 
486                     bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
487                         BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
488                     /*
489                      * IMPORTANT: we must leave one free slot in the ring,
490                      * so move nic_i back by one unit
491                      */
492                     nic_i = nm_prev(nic_i, lim);
493                     IXGBE_WRITE_REG(&sc->hw, rxr->tail, nic_i);
494           }
495 
496           return 0;
497 
498 ring_reset:
499           return netmap_ring_reinit(kring);
500 }
501 
502 
503 /*
504  * The attach routine, called near the end of ixgbe_attach(),
505  * fills the parameters for netmap_attach() and calls it.
506  * It cannot fail, in the worst case (such as no memory)
507  * netmap mode will be disabled and the driver will only
508  * operate in standard mode.
509  */
510 void
ixgbe_netmap_attach(struct ixgbe_softc * sc)511 ixgbe_netmap_attach(struct ixgbe_softc *sc)
512 {
513           struct netmap_adapter na;
514 
515           bzero(&na, sizeof(na));
516 
517           na.ifp = sc->ifp;
518           na.na_flags = NAF_BDG_MAYSLEEP;
519           na.num_tx_desc = sc->num_tx_desc;
520           na.num_rx_desc = sc->num_rx_desc;
521           na.nm_txsync = ixgbe_netmap_txsync;
522           na.nm_rxsync = ixgbe_netmap_rxsync;
523           na.nm_register = ixgbe_netmap_reg;
524           na.num_tx_rings = na.num_rx_rings = sc->num_queues;
525           netmap_attach(&na);
526 }
527 
528 #endif /* DEV_NETMAP */
529 
530 /* end of file */
531