1 /******************************************************************************
2 
3   Copyright (c) 2001-2017, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD: stable/10/sys/dev/ixgbe/ixv_netmap.c 323830 2017-09-20 21:22:20Z marius $*/
34 
35 /*
36  * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  *
47  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57  * SUCH DAMAGE.
58  */
59 
60 /*
61  * $FreeBSD: stable/10/sys/dev/ixgbe/ixv_netmap.c 323830 2017-09-20 21:22:20Z marius $
62  *
63  * netmap support for: ixv
64  *
65  * This file is meant to be a reference on how to implement
66  * netmap support for a network driver.
67  * This file contains code but only static or inline functions used
68  * by a single driver. To avoid replication of code we just #include
69  * it near the beginning of the standard driver.
70  */
71 
72 #ifdef DEV_NETMAP
73 /*
74  * Some drivers may need the following headers. Others
75  * already include them by default
76 
77 #include <vm/vm.h>
78 #include <vm/pmap.h>
79 
80  */
81 #include "ixv.h"
82 
83 /*
84  * device-specific sysctl variables:
85  *
86  * ixv_rx_miss, ixv_rx_miss_bufs:
87  *	count packets that might be missed due to lost interrupts.
88  */
89 SYSCTL_DECL(_dev_netmap);
90 static int ixv_rx_miss, ixv_rx_miss_bufs;
91 SYSCTL_INT(_dev_netmap, OID_AUTO, ixv_rx_miss,
92     CTLFLAG_RW, &ixv_rx_miss, 0, "potentially missed rx intr");
93 SYSCTL_INT(_dev_netmap, OID_AUTO, ixv_rx_miss_bufs,
94     CTLFLAG_RW, &ixv_rx_miss_bufs, 0, "potentially missed rx intr bufs");
95 
96 
97 /*
98  * Register/unregister. We are already under netmap lock.
99  * Only called on the first register or the last unregister.
100  */
101 static int
ixv_netmap_reg(struct netmap_adapter * na,int onoff)102 ixv_netmap_reg(struct netmap_adapter *na, int onoff)
103 {
104 	struct ifnet *ifp = na->ifp;
105 	struct adapter *adapter = ifp->if_softc;
106 
107 	IXGBE_CORE_LOCK(adapter);
108 	adapter->stop_locked(adapter);
109 
110 	/* enable or disable flags and callbacks in na and ifp */
111 	if (onoff) {
112 		nm_set_native_flags(na);
113 	} else {
114 		nm_clear_native_flags(na);
115 	}
116 	adapter->init_locked(adapter);	/* also enables intr */
117 	IXGBE_CORE_UNLOCK(adapter);
118 	return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
119 }
120 
121 
122 /*
123  * Reconcile kernel and user view of the transmit ring.
124  *
125  * All information is in the kring.
126  * Userspace wants to send packets up to the one before kring->rhead,
127  * kernel knows kring->nr_hwcur is the first unsent packet.
128  *
129  * Here we push packets out (as many as possible), and possibly
130  * reclaim buffers from previously completed transmission.
131  *
132  * The caller (netmap) guarantees that there is only one instance
133  * running at any time. Any interference with other driver
134  * methods should be handled by the individual drivers.
135  */
136 static int
ixv_netmap_txsync(struct netmap_kring * kring,int flags)137 ixv_netmap_txsync(struct netmap_kring *kring, int flags)
138 {
139 	struct netmap_adapter *na = kring->na;
140 	struct ifnet *ifp = na->ifp;
141 	struct netmap_ring *ring = kring->ring;
142 	u_int nm_i;	/* index into the netmap ring */
143 	u_int nic_i;	/* index into the NIC ring */
144 	u_int n;
145 	u_int const lim = kring->nkr_num_slots - 1;
146 	u_int const head = kring->rhead;
147 	/*
148 	 * interrupts on every tx packet are expensive so request
149 	 * them every half ring, or where NS_REPORT is set
150 	 */
151 	u_int report_frequency = kring->nkr_num_slots >> 1;
152 
153 	/* device-specific */
154 	struct adapter *adapter = ifp->if_softc;
155 	struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
156 	int reclaim_tx;
157 
158 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
159 			BUS_DMASYNC_POSTREAD);
160 
161 	/*
162 	 * First part: process new packets to send.
163 	 * nm_i is the current index in the netmap ring,
164 	 * nic_i is the corresponding index in the NIC ring.
165 	 * The two numbers differ because upon a *_init() we reset
166 	 * the NIC ring but leave the netmap ring unchanged.
167 	 * For the transmit ring, we have
168 	 *
169 	 *		nm_i = kring->nr_hwcur
170 	 *		nic_i = IXGBE_TDT (not tracked in the driver)
171 	 * and
172 	 * 		nm_i == (nic_i + kring->nkr_hwofs) % ring_size
173 	 *
174 	 * In this driver kring->nkr_hwofs >= 0, but for other
175 	 * drivers it might be negative as well.
176 	 */
177 
178 	/*
179 	 * If we have packets to send (kring->nr_hwcur != kring->rhead)
180 	 * iterate over the netmap ring, fetch length and update
181 	 * the corresponding slot in the NIC ring. Some drivers also
182 	 * need to update the buffer's physical address in the NIC slot
183 	 * even NS_BUF_CHANGED is not set (PNMB computes the addresses).
184 	 *
185 	 * The netmap_reload_map() calls is especially expensive,
186 	 * even when (as in this case) the tag is 0, so do only
187 	 * when the buffer has actually changed.
188 	 *
189 	 * If possible do not set the report/intr bit on all slots,
190 	 * but only a few times per ring or when NS_REPORT is set.
191 	 *
192 	 * Finally, on 10G and faster drivers, it might be useful
193 	 * to prefetch the next slot and txr entry.
194 	 */
195 
196 	nm_i = kring->nr_hwcur;
197 	if (nm_i != head) {	/* we have new packets to send */
198 		nic_i = netmap_idx_k2n(kring, nm_i);
199 
200 		__builtin_prefetch(&ring->slot[nm_i]);
201 		__builtin_prefetch(&txr->tx_buffers[nic_i]);
202 
203 		for (n = 0; nm_i != head; n++) {
204 			struct netmap_slot *slot = &ring->slot[nm_i];
205 			u_int len = slot->len;
206 			uint64_t paddr;
207 			void *addr = PNMB(na, slot, &paddr);
208 
209 			/* device-specific */
210 			union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i];
211 			struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[nic_i];
212 			int flags = (slot->flags & NS_REPORT ||
213 				nic_i == 0 || nic_i == report_frequency) ?
214 				IXGBE_TXD_CMD_RS : 0;
215 
216 			/* prefetch for next round */
217 			__builtin_prefetch(&ring->slot[nm_i + 1]);
218 			__builtin_prefetch(&txr->tx_buffers[nic_i + 1]);
219 
220 			NM_CHECK_ADDR_LEN(na, addr, len);
221 
222 			if (slot->flags & NS_BUF_CHANGED) {
223 				/* buffer has changed, reload map */
224 				netmap_reload_map(na, txr->txtag, txbuf->map, addr);
225 			}
226 			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
227 
228 			/* Fill the slot in the NIC ring. */
229 			/* Use legacy descriptor, they are faster? */
230 			curr->read.buffer_addr = htole64(paddr);
231 			curr->read.olinfo_status = 0;
232 			curr->read.cmd_type_len = htole32(len | flags |
233 				IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP);
234 
235 			/* make sure changes to the buffer are synced */
236 			bus_dmamap_sync(txr->txtag, txbuf->map,
237 				BUS_DMASYNC_PREWRITE);
238 
239 			nm_i = nm_next(nm_i, lim);
240 			nic_i = nm_next(nic_i, lim);
241 		}
242 		kring->nr_hwcur = head;
243 
244 		/* synchronize the NIC ring */
245 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
246 			BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
247 
248 		/* (re)start the tx unit up to slot nic_i (excluded) */
249 		IXGBE_WRITE_REG(&adapter->hw, txr->tail, nic_i);
250 	}
251 
252 	/*
253 	 * Second part: reclaim buffers for completed transmissions.
254 	 * Because this is expensive (we read a NIC register etc.)
255 	 * we only do it in specific cases (see below).
256 	 */
257 	if (flags & NAF_FORCE_RECLAIM) {
258 		reclaim_tx = 1; /* forced reclaim */
259 	} else if (!nm_kr_txempty(kring)) {
260 		reclaim_tx = 0; /* have buffers, no reclaim */
261 	} else {
262 		/*
263 		 * No buffers available. Locate previous slot with
264 		 * REPORT_STATUS set.
265 		 * If the slot has DD set, we can reclaim space,
266 		 * otherwise wait for the next interrupt.
267 		 * This enables interrupt moderation on the tx
268 		 * side though it might reduce throughput.
269 		 */
270 		struct ixgbe_legacy_tx_desc *txd =
271 		    (struct ixgbe_legacy_tx_desc *)txr->tx_base;
272 
273 		nic_i = txr->next_to_clean + report_frequency;
274 		if (nic_i > lim)
275 			nic_i -= lim + 1;
276 		// round to the closest with dd set
277 		nic_i = (nic_i < kring->nkr_num_slots / 4 ||
278 			 nic_i >= kring->nkr_num_slots*3/4) ?
279 			0 : report_frequency;
280 		reclaim_tx = txd[nic_i].upper.fields.status & IXGBE_TXD_STAT_DD;	// XXX cpu_to_le32 ?
281 	}
282 	if (reclaim_tx) {
283 		/*
284 		 * Record completed transmissions.
285 		 * We (re)use the driver's txr->next_to_clean to keep
286 		 * track of the most recently completed transmission.
287 		 *
288 		 * The datasheet discourages the use of TDH to find
289 		 * out the number of sent packets, but we only set
290 		 * REPORT_STATUS in a few slots so TDH is the only
291 		 * good way.
292 		 */
293 		nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_VFTDH(kring->ring_id));
294 		if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
295 			D("TDH wrap %d", nic_i);
296 			nic_i -= kring->nkr_num_slots;
297 		}
298 		if (nic_i != txr->next_to_clean) {
299 			/* some tx completed, increment avail */
300 			txr->next_to_clean = nic_i;
301 			kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
302 		}
303 	}
304 
305 	nm_txsync_finalize(kring);
306 
307 	return 0;
308 }
309 
310 
311 /*
312  * Reconcile kernel and user view of the receive ring.
313  * Same as for the txsync, this routine must be efficient.
314  * The caller guarantees a single invocations, but races against
315  * the rest of the driver should be handled here.
316  *
317  * On call, kring->rhead is the first packet that userspace wants
318  * to keep, and kring->rcur is the wakeup point.
319  * The kernel has previously reported packets up to kring->rtail.
320  *
321  * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
322  * of whether or not we received an interrupt.
323  */
324 static int
ixv_netmap_rxsync(struct netmap_kring * kring,int flags)325 ixv_netmap_rxsync(struct netmap_kring *kring, int flags)
326 {
327 	struct netmap_adapter *na = kring->na;
328 	struct ifnet *ifp = na->ifp;
329 	struct netmap_ring *ring = kring->ring;
330 	u_int nm_i;	/* index into the netmap ring */
331 	u_int nic_i;	/* index into the NIC ring */
332 	u_int n;
333 	u_int const lim = kring->nkr_num_slots - 1;
334 	u_int const head = nm_rxsync_prologue(kring);
335 	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
336 
337 	/* device-specific */
338 	struct adapter *adapter = ifp->if_softc;
339 	struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
340 
341 	if (head > lim)
342 		return netmap_ring_reinit(kring);
343 
344 	/* XXX check sync modes */
345 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
346 			BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
347 
348 	/*
349 	 * First part: import newly received packets.
350 	 *
351 	 * nm_i is the index of the next free slot in the netmap ring,
352 	 * nic_i is the index of the next received packet in the NIC ring,
353 	 * and they may differ in case if_init() has been called while
354 	 * in netmap mode. For the receive ring we have
355 	 *
356 	 *	nic_i = rxr->next_to_check;
357 	 *	nm_i = kring->nr_hwtail (previous)
358 	 * and
359 	 *	nm_i == (nic_i + kring->nkr_hwofs) % ring_size
360 	 *
361 	 * rxr->next_to_check is set to 0 on a ring reinit
362 	 */
363 	if (netmap_no_pendintr || force_update) {
364 		int crclen = 0;
365 		uint16_t slot_flags = kring->nkr_slot_flags;
366 
367 		nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail)
368 		nm_i = netmap_idx_n2k(kring, nic_i);
369 
370 		for (n = 0; ; n++) {
371 			union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
372 			uint32_t staterr = le32toh(curr->wb.upper.status_error);
373 
374 			if ((staterr & IXGBE_RXD_STAT_DD) == 0)
375 				break;
376 			ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen;
377 			ring->slot[nm_i].flags = slot_flags;
378 			bus_dmamap_sync(rxr->ptag,
379 			    rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
380 			nm_i = nm_next(nm_i, lim);
381 			nic_i = nm_next(nic_i, lim);
382 		}
383 		if (n) { /* update the state variables */
384 			if (netmap_no_pendintr && !force_update) {
385 				/* diagnostics */
386 				ixv_rx_miss ++;
387 				ixv_rx_miss_bufs += n;
388 			}
389 			rxr->next_to_check = nic_i;
390 			kring->nr_hwtail = nm_i;
391 		}
392 		kring->nr_kflags &= ~NKR_PENDINTR;
393 	}
394 
395 	/*
396 	 * Second part: skip past packets that userspace has released.
397 	 * (kring->nr_hwcur to kring->rhead excluded),
398 	 * and make the buffers available for reception.
399 	 * As usual nm_i is the index in the netmap ring,
400 	 * nic_i is the index in the NIC ring, and
401 	 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
402 	 */
403 	nm_i = kring->nr_hwcur;
404 	if (nm_i != head) {
405 		nic_i = netmap_idx_k2n(kring, nm_i);
406 		for (n = 0; nm_i != head; n++) {
407 			struct netmap_slot *slot = &ring->slot[nm_i];
408 			uint64_t paddr;
409 			void *addr = PNMB(na, slot, &paddr);
410 
411 			union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
412 			struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
413 
414 			if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
415 				goto ring_reset;
416 
417 			if (slot->flags & NS_BUF_CHANGED) {
418 				/* buffer has changed, reload map */
419 				netmap_reload_map(na, rxr->ptag, rxbuf->pmap, addr);
420 				slot->flags &= ~NS_BUF_CHANGED;
421 			}
422 			curr->wb.upper.status_error = 0;
423 			curr->read.pkt_addr = htole64(paddr);
424 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
425 			    BUS_DMASYNC_PREREAD);
426 			nm_i = nm_next(nm_i, lim);
427 			nic_i = nm_next(nic_i, lim);
428 		}
429 		kring->nr_hwcur = head;
430 
431 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
432 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
433 		/*
434 		 * IMPORTANT: we must leave one free slot in the ring,
435 		 * so move nic_i back by one unit
436 		 */
437 		nic_i = nm_prev(nic_i, lim);
438 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, nic_i);
439 	}
440 
441 	/* tell userspace that there might be new packets */
442 	nm_rxsync_finalize(kring);
443 
444 	return 0;
445 
446 ring_reset:
447 	return netmap_ring_reinit(kring);
448 }
449 
450 
451 /*
452  * The attach routine, called near the end of ixgbe_attach(),
453  * fills the parameters for netmap_attach() and calls it.
454  * It cannot fail, in the worst case (such as no memory)
455  * netmap mode will be disabled and the driver will only
456  * operate in standard mode.
457  */
458 void
ixv_netmap_attach(struct adapter * adapter)459 ixv_netmap_attach(struct adapter *adapter)
460 {
461 	struct netmap_adapter na;
462 
463 	bzero(&na, sizeof(na));
464 
465 	na.ifp = adapter->ifp;
466 	na.na_flags = NAF_BDG_MAYSLEEP;
467 	na.num_tx_desc = adapter->num_tx_desc;
468 	na.num_rx_desc = adapter->num_rx_desc;
469 	na.nm_txsync = ixv_netmap_txsync;
470 	na.nm_rxsync = ixv_netmap_rxsync;
471 	na.nm_register = ixv_netmap_reg;
472 	na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
473 	netmap_attach(&na);
474 }
475 
476 #endif /* DEV_NETMAP */
477 
478 /* end of file */
479