1 /*-
2  * Copyright (c) 2013 Tsubai Masanari
3  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4  * Copyright (c) 2018 Patrick Kelsey
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
19  */
20 
21 /* Driver for VMware vmxnet3 virtual ethernet devices. */
22 
23 #include <sys/cdefs.h>
24 __FBSDID("$FreeBSD: stable/12/sys/dev/vmware/vmxnet3/if_vmx.c 371456 2021-12-27 12:02:18Z avg $");
25 
26 #include "opt_rss.h"
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/endian.h>
32 #include <sys/sockio.h>
33 #include <sys/mbuf.h>
34 #include <sys/malloc.h>
35 #include <sys/module.h>
36 #include <sys/socket.h>
37 #include <sys/sysctl.h>
38 #include <sys/smp.h>
39 #include <vm/vm.h>
40 #include <vm/pmap.h>
41 
42 #include <net/ethernet.h>
43 #include <net/if.h>
44 #include <net/if_var.h>
45 #include <net/if_arp.h>
46 #include <net/if_dl.h>
47 #include <net/if_types.h>
48 #include <net/if_media.h>
49 #include <net/if_vlan_var.h>
50 #include <net/iflib.h>
51 #ifdef RSS
52 #include <net/rss_config.h>
53 #endif
54 
55 #include <netinet/in_systm.h>
56 #include <netinet/in.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip6.h>
59 #include <netinet6/ip6_var.h>
60 #include <netinet/udp.h>
61 #include <netinet/tcp.h>
62 
63 #include <machine/bus.h>
64 #include <machine/resource.h>
65 #include <sys/bus.h>
66 #include <sys/rman.h>
67 
68 #include <dev/pci/pcireg.h>
69 #include <dev/pci/pcivar.h>
70 
71 #include "ifdi_if.h"
72 
73 #include "if_vmxreg.h"
74 #include "if_vmxvar.h"
75 
76 #include "opt_inet.h"
77 #include "opt_inet6.h"
78 
79 
80 #define VMXNET3_VMWARE_VENDOR_ID	0x15AD
81 #define VMXNET3_VMWARE_DEVICE_ID	0x07B0
82 
83 static pci_vendor_info_t vmxnet3_vendor_info_array[] =
84 {
85 	PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
86 	/* required last entry */
87 	PVID_END
88 };
89 
90 static void	*vmxnet3_register(device_t);
91 static int	vmxnet3_attach_pre(if_ctx_t);
92 static int	vmxnet3_msix_intr_assign(if_ctx_t, int);
93 static void	vmxnet3_free_irqs(struct vmxnet3_softc *);
94 static int	vmxnet3_attach_post(if_ctx_t);
95 static int	vmxnet3_detach(if_ctx_t);
96 static int	vmxnet3_shutdown(if_ctx_t);
97 static int	vmxnet3_suspend(if_ctx_t);
98 static int	vmxnet3_resume(if_ctx_t);
99 
100 static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
101 static void	vmxnet3_free_resources(struct vmxnet3_softc *);
102 static int	vmxnet3_check_version(struct vmxnet3_softc *);
103 static void	vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
104 
105 static int	vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
106 static void	vmxnet3_init_txq(struct vmxnet3_softc *, int);
107 static int	vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
108 static void	vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
109 static int	vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
110 static void	vmxnet3_queues_free(if_ctx_t);
111 
112 static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
113 static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
114 static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
115 static void	vmxnet3_free_mcast_table(struct vmxnet3_softc *);
116 static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
117 static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
118 static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
119 static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
120 static void	vmxnet3_free_data(struct vmxnet3_softc *);
121 
122 static void	vmxnet3_evintr(struct vmxnet3_softc *);
123 static int	vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
124 static void	vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
125 static int	vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
126 static int	vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
127 static int	vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
128 static void	vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
129 static void	vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
130 static int	vmxnet3_legacy_intr(void *);
131 static int	vmxnet3_rxq_intr(void *);
132 static int	vmxnet3_event_intr(void *);
133 
134 static void	vmxnet3_stop(if_ctx_t);
135 
136 static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
137 static void	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
138 static void	vmxnet3_reinit_queues(struct vmxnet3_softc *);
139 static int	vmxnet3_enable_device(struct vmxnet3_softc *);
140 static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
141 static void	vmxnet3_init(if_ctx_t);
142 static void	vmxnet3_multi_set(if_ctx_t);
143 static int	vmxnet3_mtu_set(if_ctx_t, uint32_t);
144 static void	vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
145 static int	vmxnet3_media_change(if_ctx_t);
146 static int	vmxnet3_promisc_set(if_ctx_t, int);
147 static uint64_t	vmxnet3_get_counter(if_ctx_t, ift_counter);
148 static void	vmxnet3_update_admin_status(if_ctx_t);
149 static void	vmxnet3_txq_timer(if_ctx_t, uint16_t);
150 
151 static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
152 		    uint16_t);
153 static void	vmxnet3_vlan_register(if_ctx_t, uint16_t);
154 static void	vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
155 static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
156 
157 static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
158 static int	vmxnet3_link_is_up(struct vmxnet3_softc *);
159 static void	vmxnet3_link_status(struct vmxnet3_softc *);
160 static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
161 static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
162 
163 static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
164 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
165 static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
166 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
167 static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
168 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
169 static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
170 
171 static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
172 		    uint32_t);
173 static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
174 static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
175 		    uint32_t);
176 static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
177 static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
178 
179 static int	vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
180 static int	vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
181 static void	vmxnet3_link_intr_enable(if_ctx_t);
182 static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
183 static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
184 static void	vmxnet3_intr_enable_all(if_ctx_t);
185 static void	vmxnet3_intr_disable_all(if_ctx_t);
186 
187 typedef enum {
188 	VMXNET3_BARRIER_RD,
189 	VMXNET3_BARRIER_WR,
190 	VMXNET3_BARRIER_RDWR,
191 } vmxnet3_barrier_t;
192 
193 static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
194 
195 
196 static device_method_t vmxnet3_methods[] = {
197 	/* Device interface */
198 	DEVMETHOD(device_register, vmxnet3_register),
199 	DEVMETHOD(device_probe, iflib_device_probe),
200 	DEVMETHOD(device_attach, iflib_device_attach),
201 	DEVMETHOD(device_detach, iflib_device_detach),
202 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
203 	DEVMETHOD(device_suspend, iflib_device_suspend),
204 	DEVMETHOD(device_resume, iflib_device_resume),
205 	DEVMETHOD_END
206 };
207 
208 static driver_t vmxnet3_driver = {
209 	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
210 };
211 
212 static devclass_t vmxnet3_devclass;
213 DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
214 IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
215 MODULE_VERSION(vmx, 2);
216 
217 MODULE_DEPEND(vmx, pci, 1, 1, 1);
218 MODULE_DEPEND(vmx, ether, 1, 1, 1);
219 MODULE_DEPEND(vmx, iflib, 1, 1, 1);
220 
221 static device_method_t vmxnet3_iflib_methods[] = {
222 	DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
223 	DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
224 	DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
225 
226 	DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
227 	DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
228 	DEVMETHOD(ifdi_detach, vmxnet3_detach),
229 
230 	DEVMETHOD(ifdi_init, vmxnet3_init),
231 	DEVMETHOD(ifdi_stop, vmxnet3_stop),
232 	DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
233 	DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
234 	DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
235 	DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
236 	DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
237 	DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
238 	DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
239 	DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
240 
241 	DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
242 	DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
243 	DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
244 	DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
245 	DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
246 	DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
247 
248 	DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
249 	DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
250 
251 	DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
252 	DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
253 	DEVMETHOD(ifdi_resume, vmxnet3_resume),
254 
255 	DEVMETHOD_END
256 };
257 
258 static driver_t vmxnet3_iflib_driver = {
259 	"vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
260 };
261 
262 struct if_txrx vmxnet3_txrx = {
263 	.ift_txd_encap = vmxnet3_isc_txd_encap,
264 	.ift_txd_flush = vmxnet3_isc_txd_flush,
265 	.ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
266 	.ift_rxd_available = vmxnet3_isc_rxd_available,
267 	.ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
268 	.ift_rxd_refill = vmxnet3_isc_rxd_refill,
269 	.ift_rxd_flush = vmxnet3_isc_rxd_flush,
270 	.ift_legacy_intr = vmxnet3_legacy_intr
271 };
272 
273 static struct if_shared_ctx vmxnet3_sctx_init = {
274 	.isc_magic = IFLIB_MAGIC,
275 	.isc_q_align = 512,
276 
277 	.isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
278 	.isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
279 	.isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
280 	.isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
281 
282 	/*
283 	 * These values are used to configure the busdma tag used for
284 	 * receive descriptors.  Each receive descriptor only points to one
285 	 * buffer.
286 	 */
287 	.isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
288 	.isc_rx_nsegments = 1,  /* One mapping per descriptor */
289 	.isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
290 
291 	.isc_admin_intrcnt = 1,
292 	.isc_vendor_info = vmxnet3_vendor_info_array,
293 	.isc_driver_version = "2",
294 	.isc_driver = &vmxnet3_iflib_driver,
295 	.isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY,
296 
297 	/*
298 	 * Number of receive queues per receive queue set, with associated
299 	 * descriptor settings for each.
300 	 */
301 	.isc_nrxqs = 3,
302 	.isc_nfl = 2, /* one free list for each receive command queue */
303 	.isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
304 	.isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
305 	.isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
306 
307 	/*
308 	 * Number of transmit queues per transmit queue set, with associated
309 	 * descriptor settings for each.
310 	 */
311 	.isc_ntxqs = 2,
312 	.isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
313 	.isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
314 	.isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
315 };
316 
317 static void *
vmxnet3_register(device_t dev)318 vmxnet3_register(device_t dev)
319 {
320 	return (&vmxnet3_sctx_init);
321 }
322 
323 static int
trunc_powerof2(int val)324 trunc_powerof2(int val)
325 {
326 
327 	return (1U << (fls(val) - 1));
328 }
329 
330 static int
vmxnet3_attach_pre(if_ctx_t ctx)331 vmxnet3_attach_pre(if_ctx_t ctx)
332 {
333 	device_t dev;
334 	if_softc_ctx_t scctx;
335 	struct vmxnet3_softc *sc;
336 	uint32_t intr_config;
337 	int error;
338 
339 	dev = iflib_get_dev(ctx);
340 	sc = iflib_get_softc(ctx);
341 	sc->vmx_dev = dev;
342 	sc->vmx_ctx = ctx;
343 	sc->vmx_sctx = iflib_get_sctx(ctx);
344 	sc->vmx_scctx = iflib_get_softc_ctx(ctx);
345 	sc->vmx_ifp = iflib_get_ifp(ctx);
346 	sc->vmx_media = iflib_get_media(ctx);
347 	scctx = sc->vmx_scctx;
348 
349 	scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
350 	scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
351 	/* isc_tx_tso_size_max doesn't include possible vlan header */
352 	scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
353 	scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
354 	scctx->isc_txrx = &vmxnet3_txrx;
355 
356 	/* If 0, the iflib tunable was not set, so set to the default */
357 	if (scctx->isc_nrxqsets == 0)
358 		scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
359 	scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets);
360 	scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
361 	scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max);
362 
363 	/* If 0, the iflib tunable was not set, so set to the default */
364 	if (scctx->isc_ntxqsets == 0)
365 		scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
366 	scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets);
367 	scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
368 	scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max);
369 
370 	/*
371 	 * Enforce that the transmit completion queue descriptor count is
372 	 * the same as the transmit command queue descriptor count.
373 	 */
374 	scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
375 	scctx->isc_txqsizes[0] =
376 	    sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
377 	scctx->isc_txqsizes[1] =
378 	    sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
379 
380 	/*
381 	 * Enforce that the receive completion queue descriptor count is the
382 	 * sum of the receive command queue descriptor counts, and that the
383 	 * second receive command queue descriptor count is the same as the
384 	 * first one.
385 	 */
386 	scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
387 	scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
388 	scctx->isc_rxqsizes[0] =
389 	    sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
390 	scctx->isc_rxqsizes[1] =
391 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
392 	scctx->isc_rxqsizes[2] =
393 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
394 
395 	/*
396 	 * Initialize the max frame size and descriptor queue buffer
397 	 * sizes.
398 	 */
399 	vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp));
400 
401 	scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
402 
403 	/* Map PCI BARs */
404 	error = vmxnet3_alloc_resources(sc);
405 	if (error)
406 		goto fail;
407 
408 	/* Check device versions */
409 	error = vmxnet3_check_version(sc);
410 	if (error)
411 		goto fail;
412 
413 	/*
414 	 * The interrupt mode can be set in the hypervisor configuration via
415 	 * the parameter ethernet<N>.intrMode.
416 	 */
417 	intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
418 	sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
419 
420 	/*
421 	 * Configure the softc context to attempt to configure the interrupt
422 	 * mode now indicated by intr_config.  iflib will follow the usual
423 	 * fallback path MSIX -> MSI -> LEGACY, starting at the configured
424 	 * starting mode.
425 	 */
426 	switch (intr_config & 0x03) {
427 	case VMXNET3_IT_AUTO:
428 	case VMXNET3_IT_MSIX:
429 		scctx->isc_msix_bar = pci_msix_table_bar(dev);
430 		break;
431 	case VMXNET3_IT_MSI:
432 		scctx->isc_msix_bar = -1;
433 		scctx->isc_disable_msix = 1;
434 		break;
435 	case VMXNET3_IT_LEGACY:
436 		scctx->isc_msix_bar = 0;
437 		break;
438 	}
439 
440 	scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
441 	scctx->isc_capabilities = scctx->isc_capenable =
442 	    IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
443 	    IFCAP_TSO4 | IFCAP_TSO6 |
444 	    IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
445 	    IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
446 	    IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
447 	    IFCAP_JUMBO_MTU;
448 
449 	/* These capabilities are not enabled by default. */
450 	scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
451 
452 	vmxnet3_get_lladdr(sc);
453 	iflib_set_mac(ctx, sc->vmx_lladdr);
454 
455 	return (0);
456 fail:
457 	/*
458 	 * We must completely clean up anything allocated above as iflib
459 	 * will not invoke any other driver entry points as a result of this
460 	 * failure.
461 	 */
462 	vmxnet3_free_resources(sc);
463 
464 	return (error);
465 }
466 
467 static int
vmxnet3_msix_intr_assign(if_ctx_t ctx,int msix)468 vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
469 {
470 	struct vmxnet3_softc *sc;
471 	if_softc_ctx_t scctx;
472 	struct vmxnet3_rxqueue *rxq;
473 	int error;
474 	int i;
475 	char irq_name[16];
476 
477 	sc = iflib_get_softc(ctx);
478 	scctx = sc->vmx_scctx;
479 
480 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
481 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
482 
483 		rxq = &sc->vmx_rxq[i];
484 		error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
485 		    IFLIB_INTR_RXTX, vmxnet3_rxq_intr, rxq, i, irq_name);
486 		if (error) {
487 			device_printf(iflib_get_dev(ctx),
488 			    "Failed to register rxq %d interrupt handler\n", i);
489 			return (error);
490 		}
491 	}
492 
493 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
494 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
495 
496 		/*
497 		 * Don't provide the corresponding rxq irq for reference -
498 		 * we want the transmit task to be attached to a task queue
499 		 * that is different from the one used by the corresponding
500 		 * rxq irq.  That is because the TX doorbell writes are very
501 		 * expensive as virtualized MMIO operations, so we want to
502 		 * be able to defer them to another core when possible so
503 		 * that they don't steal receive processing cycles during
504 		 * stack turnarounds like TCP ACK generation.  The other
505 		 * piece to this approach is enabling the iflib abdicate
506 		 * option (currently via an interface-specific
507 		 * tunable/sysctl).
508 		 */
509 		iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
510 		    irq_name);
511 	}
512 
513 	error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
514 	    scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
515 	    "event");
516 	if (error) {
517 		device_printf(iflib_get_dev(ctx),
518 		    "Failed to register event interrupt handler\n");
519 		return (error);
520 	}
521 
522 	return (0);
523 }
524 
525 static void
vmxnet3_free_irqs(struct vmxnet3_softc * sc)526 vmxnet3_free_irqs(struct vmxnet3_softc *sc)
527 {
528 	if_softc_ctx_t scctx;
529 	struct vmxnet3_rxqueue *rxq;
530 	int i;
531 
532 	scctx = sc->vmx_scctx;
533 
534 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
535 		rxq = &sc->vmx_rxq[i];
536 		iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
537 	}
538 
539 	iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
540 }
541 
542 static int
vmxnet3_attach_post(if_ctx_t ctx)543 vmxnet3_attach_post(if_ctx_t ctx)
544 {
545 	device_t dev;
546 	if_softc_ctx_t scctx;
547 	struct vmxnet3_softc *sc;
548 	int error;
549 
550 	dev = iflib_get_dev(ctx);
551 	scctx = iflib_get_softc_ctx(ctx);
552 	sc = iflib_get_softc(ctx);
553 
554 	if (scctx->isc_nrxqsets > 1)
555 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
556 
557 	error = vmxnet3_alloc_data(sc);
558 	if (error)
559 		goto fail;
560 
561 	vmxnet3_set_interrupt_idx(sc);
562 	vmxnet3_setup_sysctl(sc);
563 
564 	ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
565 	ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
566 
567 fail:
568 	return (error);
569 }
570 
571 static int
vmxnet3_detach(if_ctx_t ctx)572 vmxnet3_detach(if_ctx_t ctx)
573 {
574 	struct vmxnet3_softc *sc;
575 
576 	sc = iflib_get_softc(ctx);
577 
578 	vmxnet3_free_irqs(sc);
579 	vmxnet3_free_data(sc);
580 	vmxnet3_free_resources(sc);
581 
582 	return (0);
583 }
584 
585 static int
vmxnet3_shutdown(if_ctx_t ctx)586 vmxnet3_shutdown(if_ctx_t ctx)
587 {
588 
589 	return (0);
590 }
591 
592 static int
vmxnet3_suspend(if_ctx_t ctx)593 vmxnet3_suspend(if_ctx_t ctx)
594 {
595 
596 	return (0);
597 }
598 
599 static int
vmxnet3_resume(if_ctx_t ctx)600 vmxnet3_resume(if_ctx_t ctx)
601 {
602 
603 	return (0);
604 }
605 
606 static int
vmxnet3_alloc_resources(struct vmxnet3_softc * sc)607 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
608 {
609 	device_t dev;
610 	int rid;
611 
612 	dev = sc->vmx_dev;
613 
614 	rid = PCIR_BAR(0);
615 	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
616 	    RF_ACTIVE);
617 	if (sc->vmx_res0 == NULL) {
618 		device_printf(dev,
619 		    "could not map BAR0 memory\n");
620 		return (ENXIO);
621 	}
622 
623 	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
624 	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
625 
626 	rid = PCIR_BAR(1);
627 	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
628 	    RF_ACTIVE);
629 	if (sc->vmx_res1 == NULL) {
630 		device_printf(dev,
631 		    "could not map BAR1 memory\n");
632 		return (ENXIO);
633 	}
634 
635 	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
636 	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
637 
638 	return (0);
639 }
640 
641 static void
vmxnet3_free_resources(struct vmxnet3_softc * sc)642 vmxnet3_free_resources(struct vmxnet3_softc *sc)
643 {
644 	device_t dev;
645 
646 	dev = sc->vmx_dev;
647 
648 	if (sc->vmx_res0 != NULL) {
649 		bus_release_resource(dev, SYS_RES_MEMORY,
650 		    rman_get_rid(sc->vmx_res0), sc->vmx_res0);
651 		sc->vmx_res0 = NULL;
652 	}
653 
654 	if (sc->vmx_res1 != NULL) {
655 		bus_release_resource(dev, SYS_RES_MEMORY,
656 		    rman_get_rid(sc->vmx_res1), sc->vmx_res1);
657 		sc->vmx_res1 = NULL;
658 	}
659 }
660 
661 static int
vmxnet3_check_version(struct vmxnet3_softc * sc)662 vmxnet3_check_version(struct vmxnet3_softc *sc)
663 {
664 	device_t dev;
665 	uint32_t version;
666 
667 	dev = sc->vmx_dev;
668 
669 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
670 	if ((version & 0x01) == 0) {
671 		device_printf(dev, "unsupported hardware version %#x\n",
672 		    version);
673 		return (ENOTSUP);
674 	}
675 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
676 
677 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
678 	if ((version & 0x01) == 0) {
679 		device_printf(dev, "unsupported UPT version %#x\n", version);
680 		return (ENOTSUP);
681 	}
682 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
683 
684 	return (0);
685 }
686 
687 static void
vmxnet3_set_interrupt_idx(struct vmxnet3_softc * sc)688 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
689 {
690 	if_softc_ctx_t scctx;
691 	struct vmxnet3_txqueue *txq;
692 	struct vmxnet3_txq_shared *txs;
693 	struct vmxnet3_rxqueue *rxq;
694 	struct vmxnet3_rxq_shared *rxs;
695 	int intr_idx;
696 	int i;
697 
698 	scctx = sc->vmx_scctx;
699 
700 	/*
701 	 * There is always one interrupt per receive queue, assigned
702 	 * starting with the first interrupt.  When there is only one
703 	 * interrupt available, the event interrupt shares the receive queue
704 	 * interrupt, otherwise it uses the interrupt following the last
705 	 * receive queue interrupt.  Transmit queues are not assigned
706 	 * interrupts, so they are given indexes beyond the indexes that
707 	 * correspond to the real interrupts.
708 	 */
709 
710 	/* The event interrupt is always the last vector. */
711 	sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
712 
713 	intr_idx = 0;
714 	for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
715 		rxq = &sc->vmx_rxq[i];
716 		rxs = rxq->vxrxq_rs;
717 		rxq->vxrxq_intr_idx = intr_idx;
718 		rxs->intr_idx = rxq->vxrxq_intr_idx;
719 	}
720 
721 	/*
722 	 * Assign the tx queues interrupt indexes above what we are actually
723 	 * using.  These interrupts will never be enabled.
724 	 */
725 	intr_idx = scctx->isc_vectors;
726 	for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
727 		txq = &sc->vmx_txq[i];
728 		txs = txq->vxtxq_ts;
729 		txq->vxtxq_intr_idx = intr_idx;
730 		txs->intr_idx = txq->vxtxq_intr_idx;
731 	}
732 }
733 
734 static int
vmxnet3_queues_shared_alloc(struct vmxnet3_softc * sc)735 vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
736 {
737 	if_softc_ctx_t scctx;
738 	int size;
739 	int error;
740 
741 	scctx = sc->vmx_scctx;
742 
743 	/*
744 	 * The txq and rxq shared data areas must be allocated contiguously
745 	 * as vmxnet3_driver_shared contains only a single address member
746 	 * for the shared queue data area.
747 	 */
748 	size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
749 	    scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
750 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
751 	if (error) {
752 		device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
753 		return (error);
754 	}
755 
756 	return (0);
757 }
758 
759 static void
vmxnet3_init_txq(struct vmxnet3_softc * sc,int q)760 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
761 {
762 	struct vmxnet3_txqueue *txq;
763 	struct vmxnet3_comp_ring *txc;
764 	struct vmxnet3_txring *txr;
765 	if_softc_ctx_t scctx;
766 
767 	txq = &sc->vmx_txq[q];
768 	txc = &txq->vxtxq_comp_ring;
769 	txr = &txq->vxtxq_cmd_ring;
770 	scctx = sc->vmx_scctx;
771 
772 	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
773 	    device_get_nameunit(sc->vmx_dev), q);
774 
775 	txq->vxtxq_sc = sc;
776 	txq->vxtxq_id = q;
777 	txc->vxcr_ndesc = scctx->isc_ntxd[0];
778 	txr->vxtxr_ndesc = scctx->isc_ntxd[1];
779 }
780 
781 static int
vmxnet3_tx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int ntxqs,int ntxqsets)782 vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
783     int ntxqs, int ntxqsets)
784 {
785 	struct vmxnet3_softc *sc;
786 	int q;
787 	int error;
788 	caddr_t kva;
789 
790 	sc = iflib_get_softc(ctx);
791 
792 	/* Allocate the array of transmit queues */
793 	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
794 	    ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
795 	if (sc->vmx_txq == NULL)
796 		return (ENOMEM);
797 
798 	/* Initialize driver state for each transmit queue */
799 	for (q = 0; q < ntxqsets; q++)
800 		vmxnet3_init_txq(sc, q);
801 
802 	/*
803 	 * Allocate queue state that is shared with the device.  This check
804 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
805 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
806 	 * order iflib invokes those routines in.
807 	 */
808 	if (sc->vmx_qs_dma.idi_size == 0) {
809 		error = vmxnet3_queues_shared_alloc(sc);
810 		if (error)
811 			return (error);
812 	}
813 
814 	kva = sc->vmx_qs_dma.idi_vaddr;
815 	for (q = 0; q < ntxqsets; q++) {
816 		sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
817 		kva += sizeof(struct vmxnet3_txq_shared);
818 	}
819 
820 	/* Record descriptor ring vaddrs and paddrs */
821 	for (q = 0; q < ntxqsets; q++) {
822 		struct vmxnet3_txqueue *txq;
823 		struct vmxnet3_txring *txr;
824 		struct vmxnet3_comp_ring *txc;
825 
826 		txq = &sc->vmx_txq[q];
827 		txc = &txq->vxtxq_comp_ring;
828 		txr = &txq->vxtxq_cmd_ring;
829 
830 		/* Completion ring */
831 		txc->vxcr_u.txcd =
832 		    (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
833 		txc->vxcr_paddr = paddrs[q * ntxqs + 0];
834 
835 		/* Command ring */
836 		txr->vxtxr_txd =
837 		    (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
838 		txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
839 	}
840 
841 	return (0);
842 }
843 
844 static void
vmxnet3_init_rxq(struct vmxnet3_softc * sc,int q,int nrxqs)845 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
846 {
847 	struct vmxnet3_rxqueue *rxq;
848 	struct vmxnet3_comp_ring *rxc;
849 	struct vmxnet3_rxring *rxr;
850 	if_softc_ctx_t scctx;
851 	int i;
852 
853 	rxq = &sc->vmx_rxq[q];
854 	rxc = &rxq->vxrxq_comp_ring;
855 	scctx = sc->vmx_scctx;
856 
857 	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
858 	    device_get_nameunit(sc->vmx_dev), q);
859 
860 	rxq->vxrxq_sc = sc;
861 	rxq->vxrxq_id = q;
862 
863 	/*
864 	 * First rxq is the completion queue, so there are nrxqs - 1 command
865 	 * rings starting at iflib queue id 1.
866 	 */
867 	rxc->vxcr_ndesc = scctx->isc_nrxd[0];
868 	for (i = 0; i < nrxqs - 1; i++) {
869 		rxr = &rxq->vxrxq_cmd_ring[i];
870 		rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
871 	}
872 }
873 
874 static int
vmxnet3_rx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int nrxqs,int nrxqsets)875 vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
876     int nrxqs, int nrxqsets)
877 {
878 	struct vmxnet3_softc *sc;
879 	if_softc_ctx_t scctx;
880 	int q;
881 	int i;
882 	int error;
883 	caddr_t kva;
884 
885 	sc = iflib_get_softc(ctx);
886 	scctx = sc->vmx_scctx;
887 
888 	/* Allocate the array of receive queues */
889 	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
890 	    nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
891 	if (sc->vmx_rxq == NULL)
892 		return (ENOMEM);
893 
894 	/* Initialize driver state for each receive queue */
895 	for (q = 0; q < nrxqsets; q++)
896 		vmxnet3_init_rxq(sc, q, nrxqs);
897 
898 	/*
899 	 * Allocate queue state that is shared with the device.  This check
900 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
901 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
902 	 * order iflib invokes those routines in.
903 	 */
904 	if (sc->vmx_qs_dma.idi_size == 0) {
905 		error = vmxnet3_queues_shared_alloc(sc);
906 		if (error)
907 			return (error);
908 	}
909 
910 	kva = sc->vmx_qs_dma.idi_vaddr +
911 	    scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
912 	for (q = 0; q < nrxqsets; q++) {
913 		sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
914 		kva += sizeof(struct vmxnet3_rxq_shared);
915 	}
916 
917 	/* Record descriptor ring vaddrs and paddrs */
918 	for (q = 0; q < nrxqsets; q++) {
919 		struct vmxnet3_rxqueue *rxq;
920 		struct vmxnet3_rxring *rxr;
921 		struct vmxnet3_comp_ring *rxc;
922 
923 		rxq = &sc->vmx_rxq[q];
924 		rxc = &rxq->vxrxq_comp_ring;
925 
926 		/* Completion ring */
927 		rxc->vxcr_u.rxcd =
928 		    (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
929 		rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
930 
931 		/* Command ring(s) */
932 		for (i = 0; i < nrxqs - 1; i++) {
933 			rxr = &rxq->vxrxq_cmd_ring[i];
934 
935 			rxr->vxrxr_rxd =
936 			    (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
937 			rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
938 		}
939 	}
940 
941 	return (0);
942 }
943 
944 static void
vmxnet3_queues_free(if_ctx_t ctx)945 vmxnet3_queues_free(if_ctx_t ctx)
946 {
947 	struct vmxnet3_softc *sc;
948 
949 	sc = iflib_get_softc(ctx);
950 
951 	/* Free queue state area that is shared with the device */
952 	if (sc->vmx_qs_dma.idi_size != 0) {
953 		iflib_dma_free(&sc->vmx_qs_dma);
954 		sc->vmx_qs_dma.idi_size = 0;
955 	}
956 
957 	/* Free array of receive queues */
958 	if (sc->vmx_rxq != NULL) {
959 		free(sc->vmx_rxq, M_DEVBUF);
960 		sc->vmx_rxq = NULL;
961 	}
962 
963 	/* Free array of transmit queues */
964 	if (sc->vmx_txq != NULL) {
965 		free(sc->vmx_txq, M_DEVBUF);
966 		sc->vmx_txq = NULL;
967 	}
968 }
969 
970 static int
vmxnet3_alloc_shared_data(struct vmxnet3_softc * sc)971 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
972 {
973 	device_t dev;
974 	size_t size;
975 	int error;
976 
977 	dev = sc->vmx_dev;
978 
979 	/* Top level state structure shared with the device */
980 	size = sizeof(struct vmxnet3_driver_shared);
981 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
982 	if (error) {
983 		device_printf(dev, "cannot alloc shared memory\n");
984 		return (error);
985 	}
986 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
987 
988 	/* RSS table state shared with the device */
989 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
990 		size = sizeof(struct vmxnet3_rss_shared);
991 		error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
992 		    &sc->vmx_rss_dma, 0);
993 		if (error) {
994 			device_printf(dev, "cannot alloc rss shared memory\n");
995 			return (error);
996 		}
997 		sc->vmx_rss =
998 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
999 	}
1000 
1001 	return (0);
1002 }
1003 
1004 static void
vmxnet3_free_shared_data(struct vmxnet3_softc * sc)1005 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1006 {
1007 
1008 	/* Free RSS table state shared with the device */
1009 	if (sc->vmx_rss != NULL) {
1010 		iflib_dma_free(&sc->vmx_rss_dma);
1011 		sc->vmx_rss = NULL;
1012 	}
1013 
1014 	/* Free top level state structure shared with the device */
1015 	if (sc->vmx_ds != NULL) {
1016 		iflib_dma_free(&sc->vmx_ds_dma);
1017 		sc->vmx_ds = NULL;
1018 	}
1019 }
1020 
1021 static int
vmxnet3_alloc_mcast_table(struct vmxnet3_softc * sc)1022 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1023 {
1024 	int error;
1025 
1026 	/* Multicast table state shared with the device */
1027 	error = iflib_dma_alloc_align(sc->vmx_ctx,
1028 	    VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
1029 	if (error)
1030 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1031 	else
1032 		sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
1033 
1034 	return (error);
1035 }
1036 
1037 static void
vmxnet3_free_mcast_table(struct vmxnet3_softc * sc)1038 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1039 {
1040 
1041 	/* Free multicast table state shared with the device */
1042 	if (sc->vmx_mcast != NULL) {
1043 		iflib_dma_free(&sc->vmx_mcast_dma);
1044 		sc->vmx_mcast = NULL;
1045 	}
1046 }
1047 
1048 static void
vmxnet3_init_shared_data(struct vmxnet3_softc * sc)1049 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1050 {
1051 	struct vmxnet3_driver_shared *ds;
1052 	if_shared_ctx_t sctx;
1053 	if_softc_ctx_t scctx;
1054 	struct vmxnet3_txqueue *txq;
1055 	struct vmxnet3_txq_shared *txs;
1056 	struct vmxnet3_rxqueue *rxq;
1057 	struct vmxnet3_rxq_shared *rxs;
1058 	int i;
1059 
1060 	ds = sc->vmx_ds;
1061 	sctx = sc->vmx_sctx;
1062 	scctx = sc->vmx_scctx;
1063 
1064 	/*
1065 	 * Initialize fields of the shared data that remains the same across
1066 	 * reinits. Note the shared data is zero'd when allocated.
1067 	 */
1068 
1069 	ds->magic = VMXNET3_REV1_MAGIC;
1070 
1071 	/* DriverInfo */
1072 	ds->version = VMXNET3_DRIVER_VERSION;
1073 	ds->guest = VMXNET3_GOS_FREEBSD |
1074 #ifdef __LP64__
1075 	    VMXNET3_GOS_64BIT;
1076 #else
1077 	    VMXNET3_GOS_32BIT;
1078 #endif
1079 	ds->vmxnet3_revision = 1;
1080 	ds->upt_version = 1;
1081 
1082 	/* Misc. conf */
1083 	ds->driver_data = vtophys(sc);
1084 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1085 	ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
1086 	ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
1087 	ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
1088 
1089 	/* RSS conf */
1090 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1091 		ds->rss.version = 1;
1092 		ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
1093 		ds->rss.len = sc->vmx_rss_dma.idi_size;
1094 	}
1095 
1096 	/* Interrupt control. */
1097 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1098 	/*
1099 	 * Total number of interrupt indexes we are using in the shared
1100 	 * config data, even though we don't actually allocate interrupt
1101 	 * resources for the tx queues.  Some versions of the device will
1102 	 * fail to initialize successfully if interrupt indexes are used in
1103 	 * the shared config that exceed the number of interrupts configured
1104 	 * here.
1105 	 */
1106 	ds->nintr = (scctx->isc_vectors == 1) ?
1107 	    2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
1108 	ds->evintr = sc->vmx_event_intr_idx;
1109 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1110 
1111 	for (i = 0; i < ds->nintr; i++)
1112 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1113 
1114 	/* Receive filter. */
1115 	ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
1116 	ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
1117 
1118 	/* Tx queues */
1119 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
1120 		txq = &sc->vmx_txq[i];
1121 		txs = txq->vxtxq_ts;
1122 
1123 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
1124 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1125 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
1126 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1127 		txs->driver_data = vtophys(txq);
1128 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1129 	}
1130 
1131 	/* Rx queues */
1132 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
1133 		rxq = &sc->vmx_rxq[i];
1134 		rxs = rxq->vxrxq_rs;
1135 
1136 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
1137 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1138 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
1139 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1140 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
1141 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1142 		rxs->driver_data = vtophys(rxq);
1143 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1144 	}
1145 }
1146 
1147 static void
vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc * sc)1148 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1149 {
1150 	/*
1151 	 * Use the same key as the Linux driver until FreeBSD can do
1152 	 * RSS (presumably Toeplitz) in software.
1153 	 */
1154 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1155 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1156 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1157 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1158 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1159 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1160 	};
1161 
1162 	struct vmxnet3_driver_shared *ds;
1163 	if_softc_ctx_t scctx;
1164 	struct vmxnet3_rss_shared *rss;
1165 #ifdef RSS
1166 	uint8_t rss_algo;
1167 #endif
1168 	int i;
1169 
1170 	ds = sc->vmx_ds;
1171 	scctx = sc->vmx_scctx;
1172 	rss = sc->vmx_rss;
1173 
1174 	rss->hash_type =
1175 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1176 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1177 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1178 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1179 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1180 #ifdef RSS
1181 	/*
1182 	 * If the software RSS is configured to anything else other than
1183 	 * Toeplitz, then just do Toeplitz in "hardware" for the sake of
1184 	 * the packet distribution, but report the hash as opaque to
1185 	 * disengage from the software RSS.
1186 	 */
1187 	rss_algo = rss_gethashalgo();
1188 	if (rss_algo == RSS_HASH_TOEPLITZ) {
1189 		rss_getkey(rss->hash_key);
1190 		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) {
1191 			rss->ind_table[i] = rss_get_indirection_to_bucket(i) %
1192 			    scctx->isc_nrxqsets;
1193 		}
1194 		sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS;
1195 	} else
1196 #endif
1197 	{
1198 		memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1199 		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1200 			rss->ind_table[i] = i % scctx->isc_nrxqsets;
1201 		sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS;
1202 	}
1203 }
1204 
1205 static void
vmxnet3_reinit_shared_data(struct vmxnet3_softc * sc)1206 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1207 {
1208 	struct ifnet *ifp;
1209 	struct vmxnet3_driver_shared *ds;
1210 	if_softc_ctx_t scctx;
1211 
1212 	ifp = sc->vmx_ifp;
1213 	ds = sc->vmx_ds;
1214 	scctx = sc->vmx_scctx;
1215 
1216 	ds->mtu = ifp->if_mtu;
1217 	ds->ntxqueue = scctx->isc_ntxqsets;
1218 	ds->nrxqueue = scctx->isc_nrxqsets;
1219 
1220 	ds->upt_features = 0;
1221 	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1222 		ds->upt_features |= UPT1_F_CSUM;
1223 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1224 		ds->upt_features |= UPT1_F_VLAN;
1225 	if (ifp->if_capenable & IFCAP_LRO)
1226 		ds->upt_features |= UPT1_F_LRO;
1227 
1228 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1229 		ds->upt_features |= UPT1_F_RSS;
1230 		vmxnet3_reinit_rss_shared_data(sc);
1231 	}
1232 
1233 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
1234 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1235 	    (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
1236 }
1237 
1238 static int
vmxnet3_alloc_data(struct vmxnet3_softc * sc)1239 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1240 {
1241 	int error;
1242 
1243 	error = vmxnet3_alloc_shared_data(sc);
1244 	if (error)
1245 		return (error);
1246 
1247 	error = vmxnet3_alloc_mcast_table(sc);
1248 	if (error)
1249 		return (error);
1250 
1251 	vmxnet3_init_shared_data(sc);
1252 
1253 	return (0);
1254 }
1255 
1256 static void
vmxnet3_free_data(struct vmxnet3_softc * sc)1257 vmxnet3_free_data(struct vmxnet3_softc *sc)
1258 {
1259 
1260 	vmxnet3_free_mcast_table(sc);
1261 	vmxnet3_free_shared_data(sc);
1262 }
1263 
1264 static void
vmxnet3_evintr(struct vmxnet3_softc * sc)1265 vmxnet3_evintr(struct vmxnet3_softc *sc)
1266 {
1267 	device_t dev;
1268 	struct vmxnet3_txq_shared *ts;
1269 	struct vmxnet3_rxq_shared *rs;
1270 	uint32_t event;
1271 
1272 	dev = sc->vmx_dev;
1273 
1274 	/* Clear events. */
1275 	event = sc->vmx_ds->event;
1276 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1277 
1278 	if (event & VMXNET3_EVENT_LINK)
1279 		vmxnet3_link_status(sc);
1280 
1281 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1282 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1283 		ts = sc->vmx_txq[0].vxtxq_ts;
1284 		if (ts->stopped != 0)
1285 			device_printf(dev, "Tx queue error %#x\n", ts->error);
1286 		rs = sc->vmx_rxq[0].vxrxq_rs;
1287 		if (rs->stopped != 0)
1288 			device_printf(dev, "Rx queue error %#x\n", rs->error);
1289 
1290 		/* XXX - rely on liflib watchdog to reset us? */
1291 		device_printf(dev, "Rx/Tx queue error event ... "
1292 		    "waiting for iflib watchdog reset\n");
1293 	}
1294 
1295 	if (event & VMXNET3_EVENT_DIC)
1296 		device_printf(dev, "device implementation change event\n");
1297 	if (event & VMXNET3_EVENT_DEBUG)
1298 		device_printf(dev, "debug event\n");
1299 }
1300 
1301 static int
vmxnet3_isc_txd_encap(void * vsc,if_pkt_info_t pi)1302 vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
1303 {
1304 	struct vmxnet3_softc *sc;
1305 	struct vmxnet3_txqueue *txq;
1306 	struct vmxnet3_txring *txr;
1307 	struct vmxnet3_txdesc *txd, *sop;
1308 	bus_dma_segment_t *segs;
1309 	int nsegs;
1310 	int pidx;
1311 	int hdrlen;
1312 	int i;
1313 	int gen;
1314 
1315 	sc = vsc;
1316 	txq = &sc->vmx_txq[pi->ipi_qsidx];
1317 	txr = &txq->vxtxq_cmd_ring;
1318 	segs = pi->ipi_segs;
1319 	nsegs = pi->ipi_nsegs;
1320 	pidx = pi->ipi_pidx;
1321 
1322 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
1323 	    ("%s: packet with too many segments %d", __func__, nsegs));
1324 
1325 	sop = &txr->vxtxr_txd[pidx];
1326 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
1327 
1328 	for (i = 0; i < nsegs; i++) {
1329 		txd = &txr->vxtxr_txd[pidx];
1330 
1331 		txd->addr = segs[i].ds_addr;
1332 		txd->len = segs[i].ds_len;
1333 		txd->gen = gen;
1334 		txd->dtype = 0;
1335 		txd->offload_mode = VMXNET3_OM_NONE;
1336 		txd->offload_pos = 0;
1337 		txd->hlen = 0;
1338 		txd->eop = 0;
1339 		txd->compreq = 0;
1340 		txd->vtag_mode = 0;
1341 		txd->vtag = 0;
1342 
1343 		if (++pidx == txr->vxtxr_ndesc) {
1344 			pidx = 0;
1345 			txr->vxtxr_gen ^= 1;
1346 		}
1347 		gen = txr->vxtxr_gen;
1348 	}
1349 	txd->eop = 1;
1350 	txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
1351 	pi->ipi_new_pidx = pidx;
1352 
1353 	/*
1354 	 * VLAN
1355 	 */
1356 	if (pi->ipi_mflags & M_VLANTAG) {
1357 		sop->vtag_mode = 1;
1358 		sop->vtag = pi->ipi_vtag;
1359 	}
1360 
1361 	/*
1362 	 * TSO and checksum offloads
1363 	 */
1364 	hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
1365 	if (pi->ipi_csum_flags & CSUM_TSO) {
1366 		sop->offload_mode = VMXNET3_OM_TSO;
1367 		sop->hlen = hdrlen + pi->ipi_tcp_hlen;
1368 		sop->offload_pos = pi->ipi_tso_segsz;
1369 	} else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
1370 	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
1371 		sop->offload_mode = VMXNET3_OM_CSUM;
1372 		sop->hlen = hdrlen;
1373 		sop->offload_pos = hdrlen +
1374 		    ((pi->ipi_ipproto == IPPROTO_TCP) ?
1375 			offsetof(struct tcphdr, th_sum) :
1376 			offsetof(struct udphdr, uh_sum));
1377 	}
1378 
1379 	/* Finally, change the ownership. */
1380 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
1381 	sop->gen ^= 1;
1382 
1383 	return (0);
1384 }
1385 
1386 static void
vmxnet3_isc_txd_flush(void * vsc,uint16_t txqid,qidx_t pidx)1387 vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
1388 {
1389 	struct vmxnet3_softc *sc;
1390 	struct vmxnet3_txqueue *txq;
1391 
1392 	sc = vsc;
1393 	txq = &sc->vmx_txq[txqid];
1394 
1395 	/*
1396 	 * pidx is what we last set ipi_new_pidx to in
1397 	 * vmxnet3_isc_txd_encap()
1398 	 */
1399 
1400 	/*
1401 	 * Avoid expensive register updates if the flush request is
1402 	 * redundant.
1403 	 */
1404 	if (txq->vxtxq_last_flush == pidx)
1405 		return;
1406 	txq->vxtxq_last_flush = pidx;
1407 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
1408 }
1409 
1410 static int
vmxnet3_isc_txd_credits_update(void * vsc,uint16_t txqid,bool clear)1411 vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
1412 {
1413 	struct vmxnet3_softc *sc;
1414 	struct vmxnet3_txqueue *txq;
1415 	struct vmxnet3_comp_ring *txc;
1416 	struct vmxnet3_txcompdesc *txcd;
1417 	struct vmxnet3_txring *txr;
1418 	int processed;
1419 
1420 	sc = vsc;
1421 	txq = &sc->vmx_txq[txqid];
1422 	txc = &txq->vxtxq_comp_ring;
1423 	txr = &txq->vxtxq_cmd_ring;
1424 
1425 	/*
1426 	 * If clear is true, we need to report the number of TX command ring
1427 	 * descriptors that have been processed by the device.  If clear is
1428 	 * false, we just need to report whether or not at least one TX
1429 	 * command ring descriptor has been processed by the device.
1430 	 */
1431 	processed = 0;
1432 	for (;;) {
1433 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1434 		if (txcd->gen != txc->vxcr_gen)
1435 			break;
1436 		else if (!clear)
1437 			return (1);
1438 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1439 
1440 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1441 			txc->vxcr_next = 0;
1442 			txc->vxcr_gen ^= 1;
1443 		}
1444 
1445 		if (txcd->eop_idx < txr->vxtxr_next)
1446 			processed += txr->vxtxr_ndesc -
1447 			    (txr->vxtxr_next - txcd->eop_idx) + 1;
1448 		else
1449 			processed += txcd->eop_idx - txr->vxtxr_next + 1;
1450 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1451 	}
1452 
1453 	return (processed);
1454 }
1455 
1456 static int
vmxnet3_isc_rxd_available(void * vsc,uint16_t rxqid,qidx_t idx,qidx_t budget)1457 vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
1458 {
1459 	struct vmxnet3_softc *sc;
1460 	struct vmxnet3_rxqueue *rxq;
1461 	struct vmxnet3_comp_ring *rxc;
1462 	struct vmxnet3_rxcompdesc *rxcd;
1463 	int avail;
1464 	int completed_gen;
1465 #ifdef INVARIANTS
1466 	int expect_sop = 1;
1467 #endif
1468 	sc = vsc;
1469 	rxq = &sc->vmx_rxq[rxqid];
1470 	rxc = &rxq->vxrxq_comp_ring;
1471 
1472 	avail = 0;
1473 	completed_gen = rxc->vxcr_gen;
1474 	for (;;) {
1475 		rxcd = &rxc->vxcr_u.rxcd[idx];
1476 		if (rxcd->gen != completed_gen)
1477 			break;
1478 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1479 
1480 #ifdef INVARIANTS
1481 		if (expect_sop)
1482 			KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1483 		else
1484 			KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
1485 		expect_sop = rxcd->eop;
1486 #endif
1487 		if (rxcd->eop && (rxcd->len != 0))
1488 			avail++;
1489 		if (avail > budget)
1490 			break;
1491 		if (++idx == rxc->vxcr_ndesc) {
1492 			idx = 0;
1493 			completed_gen ^= 1;
1494 		}
1495 	}
1496 
1497 	return (avail);
1498 }
1499 
1500 static int
vmxnet3_isc_rxd_pkt_get(void * vsc,if_rxd_info_t ri)1501 vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
1502 {
1503 	struct vmxnet3_softc *sc;
1504 	if_softc_ctx_t scctx;
1505 	struct vmxnet3_rxqueue *rxq;
1506 	struct vmxnet3_comp_ring *rxc;
1507 	struct vmxnet3_rxcompdesc *rxcd;
1508 	if_rxd_frag_t frag;
1509 	int cqidx;
1510 	uint16_t total_len;
1511 	uint8_t nfrags;
1512 	uint8_t i;
1513 	uint8_t flid;
1514 
1515 	sc = vsc;
1516 	scctx = sc->vmx_scctx;
1517 	rxq = &sc->vmx_rxq[ri->iri_qsidx];
1518 	rxc = &rxq->vxrxq_comp_ring;
1519 
1520 	/*
1521 	 * Get a single packet starting at the given index in the completion
1522 	 * queue.  That we have been called indicates that
1523 	 * vmxnet3_isc_rxd_available() has already verified that either
1524 	 * there is a complete packet available starting at the given index,
1525 	 * or there are one or more zero length packets starting at the
1526 	 * given index followed by a complete packet, so no verification of
1527 	 * ownership of the descriptors (and no associated read barrier) is
1528 	 * required here.
1529 	 */
1530 	cqidx = ri->iri_cidx;
1531 	rxcd = &rxc->vxcr_u.rxcd[cqidx];
1532 	while (rxcd->len == 0) {
1533 		KASSERT(rxcd->sop && rxcd->eop,
1534 		    ("%s: zero-length packet without both sop and eop set",
1535 			__func__));
1536 		rxc->vxcr_zero_length++;
1537 		if (++cqidx == rxc->vxcr_ndesc) {
1538 			cqidx = 0;
1539 			rxc->vxcr_gen ^= 1;
1540 		}
1541 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1542 	}
1543 	KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1544 
1545 	/*
1546 	 * RSS and flow ID.
1547 	 * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should
1548 	 * be used only if the software RSS is enabled and it uses the same
1549 	 * algorithm and the hash key as the "hardware".  If the software RSS
1550 	 * is not enabled, then it's simply pointless to use those types.
1551 	 * If it's enabled but with different parameters, then hash values will
1552 	 * not match.
1553 	 */
1554 	ri->iri_flowid = rxcd->rss_hash;
1555 #ifdef RSS
1556 	if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) {
1557 		switch (rxcd->rss_type) {
1558 		case VMXNET3_RCD_RSS_TYPE_NONE:
1559 			ri->iri_flowid = ri->iri_qsidx;
1560 			ri->iri_rsstype = M_HASHTYPE_NONE;
1561 			break;
1562 		case VMXNET3_RCD_RSS_TYPE_IPV4:
1563 			ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
1564 			break;
1565 		case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
1566 			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
1567 			break;
1568 		case VMXNET3_RCD_RSS_TYPE_IPV6:
1569 			ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
1570 			break;
1571 		case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
1572 			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
1573 			break;
1574 		default:
1575 			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1576 			break;
1577 		}
1578 	} else
1579 #endif
1580 	{
1581 		switch (rxcd->rss_type) {
1582 		case VMXNET3_RCD_RSS_TYPE_NONE:
1583 			ri->iri_flowid = ri->iri_qsidx;
1584 			ri->iri_rsstype = M_HASHTYPE_NONE;
1585 			break;
1586 		default:
1587 			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1588 			break;
1589 		}
1590 	}
1591 
1592 	/*
1593 	 * The queue numbering scheme used for rxcd->qid is as follows:
1594 	 *  - All of the command ring 0s are numbered [0, nrxqsets - 1]
1595 	 *  - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
1596 	 *
1597 	 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
1598 	 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
1599 	 * indicates command ring (and flid) 1.
1600 	 */
1601 	nfrags = 0;
1602 	total_len = 0;
1603 	do {
1604 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1605 		KASSERT(rxcd->gen == rxc->vxcr_gen,
1606 		    ("%s: generation mismatch", __func__));
1607 		KASSERT(nfrags < IFLIB_MAX_RX_SEGS,
1608 		    ("%s: too many fragments", __func__));
1609 		if (__predict_true(rxcd->len != 0)) {
1610 			frag = &ri->iri_frags[nfrags];
1611 			flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
1612 			frag->irf_flid = flid;
1613 			frag->irf_idx = rxcd->rxd_idx;
1614 			frag->irf_len = rxcd->len;
1615 			total_len += rxcd->len;
1616 			nfrags++;
1617 		} else {
1618 			rxc->vcxr_zero_length_frag++;
1619 		}
1620 		if (++cqidx == rxc->vxcr_ndesc) {
1621 			cqidx = 0;
1622 			rxc->vxcr_gen ^= 1;
1623 		}
1624 	} while (!rxcd->eop);
1625 
1626 	ri->iri_cidx = cqidx;
1627 	ri->iri_nfrags = nfrags;
1628 	ri->iri_len = total_len;
1629 
1630 	/*
1631 	 * If there's an error, the last descriptor in the packet will
1632 	 * have the error indicator set.  In this case, set all
1633 	 * fragment lengths to zero.  This will cause iflib to discard
1634 	 * the packet, but process all associated descriptors through
1635 	 * the refill mechanism.
1636 	 */
1637 	if (__predict_false(rxcd->error)) {
1638 		rxc->vxcr_pkt_errors++;
1639 		for (i = 0; i < nfrags; i++) {
1640 			frag = &ri->iri_frags[i];
1641 			frag->irf_len = 0;
1642 		}
1643 	} else {
1644 		/* Checksum offload information is in the last descriptor. */
1645 		if (!rxcd->no_csum) {
1646 			uint32_t csum_flags = 0;
1647 
1648 			if (rxcd->ipv4) {
1649 				csum_flags |= CSUM_IP_CHECKED;
1650 				if (rxcd->ipcsum_ok)
1651 					csum_flags |= CSUM_IP_VALID;
1652 			}
1653 			if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
1654 				csum_flags |= CSUM_L4_CALC;
1655 				if (rxcd->csum_ok) {
1656 					csum_flags |= CSUM_L4_VALID;
1657 					ri->iri_csum_data = 0xffff;
1658 				}
1659 			}
1660 			ri->iri_csum_flags = csum_flags;
1661 		}
1662 
1663 		/* VLAN information is in the last descriptor. */
1664 		if (rxcd->vlan) {
1665 			ri->iri_flags |= M_VLANTAG;
1666 			ri->iri_vtag = rxcd->vtag;
1667 		}
1668 	}
1669 
1670 	return (0);
1671 }
1672 
1673 static void
vmxnet3_isc_rxd_refill(void * vsc,if_rxd_update_t iru)1674 vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
1675 {
1676 	struct vmxnet3_softc *sc;
1677 	struct vmxnet3_rxqueue *rxq;
1678 	struct vmxnet3_rxring *rxr;
1679 	struct vmxnet3_rxdesc *rxd;
1680 	uint64_t *paddrs;
1681 	int count;
1682 	int len;
1683 	int idx;
1684 	int i;
1685 	uint8_t flid;
1686 	uint8_t btype;
1687 
1688 	count = iru->iru_count;
1689 	len = iru->iru_buf_size;
1690 	flid = iru->iru_flidx;
1691 	paddrs = iru->iru_paddrs;
1692 
1693 	sc = vsc;
1694 	rxq = &sc->vmx_rxq[iru->iru_qsidx];
1695 	rxr = &rxq->vxrxq_cmd_ring[flid];
1696 	rxd = rxr->vxrxr_rxd;
1697 
1698 	/*
1699 	 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
1700 	 * command ring 1 is filled with BTYPE_BODY descriptors.
1701 	 */
1702 	btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
1703 	/*
1704 	 * The refill entries from iflib will advance monotonically,
1705 	 * but the refilled descriptors may not be contiguous due to
1706 	 * earlier skipping of descriptors by the device.  The refill
1707 	 * entries from iflib need an entire state update, while the
1708 	 * descriptors previously skipped by the device only need to
1709 	 * have their generation numbers updated.
1710 	 */
1711 	idx = rxr->vxrxr_refill_start;
1712 	i = 0;
1713 	do {
1714 		if (idx == iru->iru_idxs[i]) {
1715 			rxd[idx].addr = paddrs[i];
1716 			rxd[idx].len = len;
1717 			rxd[idx].btype = btype;
1718 			i++;
1719 		} else
1720 			rxr->vxrxr_desc_skips++;
1721 		rxd[idx].gen = rxr->vxrxr_gen;
1722 
1723 		if (++idx == rxr->vxrxr_ndesc) {
1724 			idx = 0;
1725 			rxr->vxrxr_gen ^= 1;
1726 		}
1727 	} while (i != count);
1728 	rxr->vxrxr_refill_start = idx;
1729 }
1730 
1731 static void
vmxnet3_isc_rxd_flush(void * vsc,uint16_t rxqid,uint8_t flid,qidx_t pidx)1732 vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
1733 {
1734 	struct vmxnet3_softc *sc;
1735 	struct vmxnet3_rxqueue *rxq;
1736 	struct vmxnet3_rxring *rxr;
1737 	bus_size_t r;
1738 
1739 	sc = vsc;
1740 	rxq = &sc->vmx_rxq[rxqid];
1741 	rxr = &rxq->vxrxq_cmd_ring[flid];
1742 
1743 	if (flid == 0)
1744 		r = VMXNET3_BAR0_RXH1(rxqid);
1745 	else
1746 		r = VMXNET3_BAR0_RXH2(rxqid);
1747 
1748 	vmxnet3_write_bar0(sc, r, pidx);
1749 }
1750 
1751 static int
vmxnet3_legacy_intr(void * xsc)1752 vmxnet3_legacy_intr(void *xsc)
1753 {
1754 	struct vmxnet3_softc *sc;
1755 	if_softc_ctx_t scctx;
1756 	if_ctx_t ctx;
1757 
1758 	sc = xsc;
1759 	scctx = sc->vmx_scctx;
1760 	ctx = sc->vmx_ctx;
1761 
1762 	/*
1763 	 * When there is only a single interrupt configured, this routine
1764 	 * runs in fast interrupt context, following which the rxq 0 task
1765 	 * will be enqueued.
1766 	 */
1767 	if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
1768 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
1769 			return (FILTER_HANDLED);
1770 	}
1771 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1772 		vmxnet3_intr_disable_all(ctx);
1773 
1774 	if (sc->vmx_ds->event != 0)
1775 		iflib_admin_intr_deferred(ctx);
1776 
1777 	/*
1778 	 * XXX - When there is both rxq and event activity, do we care
1779 	 * whether the rxq 0 task or the admin task re-enables the interrupt
1780 	 * first?
1781 	 */
1782 	return (FILTER_SCHEDULE_THREAD);
1783 }
1784 
1785 static int
vmxnet3_rxq_intr(void * vrxq)1786 vmxnet3_rxq_intr(void *vrxq)
1787 {
1788 	struct vmxnet3_softc *sc;
1789 	struct vmxnet3_rxqueue *rxq;
1790 
1791 	rxq = vrxq;
1792 	sc = rxq->vxrxq_sc;
1793 
1794 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1795 		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
1796 
1797 	return (FILTER_SCHEDULE_THREAD);
1798 }
1799 
1800 static int
vmxnet3_event_intr(void * vsc)1801 vmxnet3_event_intr(void *vsc)
1802 {
1803 	struct vmxnet3_softc *sc;
1804 
1805 	sc = vsc;
1806 
1807 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1808 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
1809 
1810 	/*
1811 	 * The work will be done via vmxnet3_update_admin_status(), and the
1812 	 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
1813 	 *
1814 	 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
1815 	 */
1816 	return (FILTER_SCHEDULE_THREAD);
1817 }
1818 
1819 static void
vmxnet3_stop(if_ctx_t ctx)1820 vmxnet3_stop(if_ctx_t ctx)
1821 {
1822 	struct vmxnet3_softc *sc;
1823 
1824 	sc = iflib_get_softc(ctx);
1825 
1826 	sc->vmx_link_active = 0;
1827 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
1828 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
1829 }
1830 
1831 static void
vmxnet3_txinit(struct vmxnet3_softc * sc,struct vmxnet3_txqueue * txq)1832 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
1833 {
1834 	struct vmxnet3_txring *txr;
1835 	struct vmxnet3_comp_ring *txc;
1836 
1837 	txq->vxtxq_last_flush = -1;
1838 
1839 	txr = &txq->vxtxq_cmd_ring;
1840 	txr->vxtxr_next = 0;
1841 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
1842 	/*
1843 	 * iflib has zeroed out the descriptor array during the prior attach
1844 	 * or stop
1845 	 */
1846 
1847 	txc = &txq->vxtxq_comp_ring;
1848 	txc->vxcr_next = 0;
1849 	txc->vxcr_gen = VMXNET3_INIT_GEN;
1850 	/*
1851 	 * iflib has zeroed out the descriptor array during the prior attach
1852 	 * or stop
1853 	 */
1854 }
1855 
1856 static void
vmxnet3_rxinit(struct vmxnet3_softc * sc,struct vmxnet3_rxqueue * rxq)1857 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
1858 {
1859 	struct vmxnet3_rxring *rxr;
1860 	struct vmxnet3_comp_ring *rxc;
1861 	int i;
1862 
1863 	/*
1864 	 * The descriptors will be populated with buffers during a
1865 	 * subsequent invocation of vmxnet3_isc_rxd_refill()
1866 	 */
1867 	for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
1868 		rxr = &rxq->vxrxq_cmd_ring[i];
1869 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
1870 		rxr->vxrxr_desc_skips = 0;
1871 		rxr->vxrxr_refill_start = 0;
1872 		/*
1873 		 * iflib has zeroed out the descriptor array during the
1874 		 * prior attach or stop
1875 		 */
1876 	}
1877 
1878 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
1879 		rxr = &rxq->vxrxq_cmd_ring[i];
1880 		rxr->vxrxr_gen = 0;
1881 		rxr->vxrxr_desc_skips = 0;
1882 		rxr->vxrxr_refill_start = 0;
1883 		bzero(rxr->vxrxr_rxd,
1884 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
1885 	}
1886 
1887 	rxc = &rxq->vxrxq_comp_ring;
1888 	rxc->vxcr_next = 0;
1889 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
1890 	rxc->vxcr_zero_length = 0;
1891 	rxc->vcxr_zero_length_frag = 0;
1892 	rxc->vxcr_pkt_errors = 0;
1893 	/*
1894 	 * iflib has zeroed out the descriptor array during the prior attach
1895 	 * or stop
1896 	 */
1897 }
1898 
1899 static void
vmxnet3_reinit_queues(struct vmxnet3_softc * sc)1900 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
1901 {
1902 	if_softc_ctx_t scctx;
1903 	int q;
1904 
1905 	scctx = sc->vmx_scctx;
1906 
1907 	for (q = 0; q < scctx->isc_ntxqsets; q++)
1908 		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
1909 
1910 	for (q = 0; q < scctx->isc_nrxqsets; q++)
1911 		vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
1912 }
1913 
1914 static int
vmxnet3_enable_device(struct vmxnet3_softc * sc)1915 vmxnet3_enable_device(struct vmxnet3_softc *sc)
1916 {
1917 	if_softc_ctx_t scctx;
1918 	int q;
1919 
1920 	scctx = sc->vmx_scctx;
1921 
1922 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
1923 		device_printf(sc->vmx_dev, "device enable command failed!\n");
1924 		return (1);
1925 	}
1926 
1927 	/* Reset the Rx queue heads. */
1928 	for (q = 0; q < scctx->isc_nrxqsets; q++) {
1929 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
1930 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
1931 	}
1932 
1933 	return (0);
1934 }
1935 
1936 static void
vmxnet3_reinit_rxfilters(struct vmxnet3_softc * sc)1937 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
1938 {
1939 	struct ifnet *ifp;
1940 
1941 	ifp = sc->vmx_ifp;
1942 
1943 	vmxnet3_set_rxfilter(sc, if_getflags(ifp));
1944 
1945 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1946 		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
1947 		    sizeof(sc->vmx_ds->vlan_filter));
1948 	else
1949 		bzero(sc->vmx_ds->vlan_filter,
1950 		    sizeof(sc->vmx_ds->vlan_filter));
1951 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
1952 }
1953 
1954 static void
vmxnet3_init(if_ctx_t ctx)1955 vmxnet3_init(if_ctx_t ctx)
1956 {
1957 	struct vmxnet3_softc *sc;
1958 
1959 	sc = iflib_get_softc(ctx);
1960 
1961 	/* Use the current MAC address. */
1962 	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1963 	vmxnet3_set_lladdr(sc);
1964 
1965 	vmxnet3_reinit_shared_data(sc);
1966 	vmxnet3_reinit_queues(sc);
1967 
1968 	vmxnet3_enable_device(sc);
1969 
1970 	vmxnet3_reinit_rxfilters(sc);
1971 	vmxnet3_link_status(sc);
1972 }
1973 
1974 static void
vmxnet3_multi_set(if_ctx_t ctx)1975 vmxnet3_multi_set(if_ctx_t ctx)
1976 {
1977 
1978 	vmxnet3_set_rxfilter(iflib_get_softc(ctx),
1979 	    if_getflags(iflib_get_ifp(ctx)));
1980 }
1981 
1982 static int
vmxnet3_mtu_set(if_ctx_t ctx,uint32_t mtu)1983 vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
1984 {
1985 	struct vmxnet3_softc *sc;
1986 	if_softc_ctx_t scctx;
1987 
1988 	sc = iflib_get_softc(ctx);
1989 	scctx = sc->vmx_scctx;
1990 
1991 	if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
1992 		ETHER_CRC_LEN))
1993 		return (EINVAL);
1994 
1995 	/*
1996 	 * Update the max frame size so that the rx mbuf size is
1997 	 * chosen based on the new mtu during the interface init that
1998 	 * will occur after this routine returns.
1999 	 */
2000 	scctx->isc_max_frame_size = mtu +
2001 		ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
2002 	/* RX completion queue - n/a */
2003 	scctx->isc_rxd_buf_size[0] = 0;
2004 	/*
2005 	 * For header-type descriptors (used for first segment of
2006 	 * packet), let iflib determine the buffer size based on the
2007 	 * max frame size.
2008 	 */
2009 	scctx->isc_rxd_buf_size[1] = 0;
2010 	/*
2011 	 * For body-type descriptors (used for jumbo frames and LRO),
2012 	 * always use page-sized buffers.
2013 	 */
2014 	scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE;
2015 
2016 	return (0);
2017 }
2018 
2019 static void
vmxnet3_media_status(if_ctx_t ctx,struct ifmediareq * ifmr)2020 vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
2021 {
2022 	struct vmxnet3_softc *sc;
2023 
2024 	sc = iflib_get_softc(ctx);
2025 
2026 	ifmr->ifm_status = IFM_AVALID;
2027 	ifmr->ifm_active = IFM_ETHER;
2028 
2029 	if (vmxnet3_link_is_up(sc) != 0) {
2030 		ifmr->ifm_status |= IFM_ACTIVE;
2031 		ifmr->ifm_active |= IFM_AUTO;
2032 	} else
2033 		ifmr->ifm_active |= IFM_NONE;
2034 }
2035 
2036 static int
vmxnet3_media_change(if_ctx_t ctx)2037 vmxnet3_media_change(if_ctx_t ctx)
2038 {
2039 
2040 	/* Ignore. */
2041 	return (0);
2042 }
2043 
2044 static int
vmxnet3_promisc_set(if_ctx_t ctx,int flags)2045 vmxnet3_promisc_set(if_ctx_t ctx, int flags)
2046 {
2047 
2048 	vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
2049 
2050 	return (0);
2051 }
2052 
2053 static uint64_t
vmxnet3_get_counter(if_ctx_t ctx,ift_counter cnt)2054 vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
2055 {
2056 	if_t ifp = iflib_get_ifp(ctx);
2057 
2058 	if (cnt < IFCOUNTERS)
2059 		return if_get_counter_default(ifp, cnt);
2060 
2061 	return (0);
2062 }
2063 
2064 static void
vmxnet3_update_admin_status(if_ctx_t ctx)2065 vmxnet3_update_admin_status(if_ctx_t ctx)
2066 {
2067 	struct vmxnet3_softc *sc;
2068 
2069 	sc = iflib_get_softc(ctx);
2070 	if (sc->vmx_ds->event != 0)
2071 		vmxnet3_evintr(sc);
2072 
2073 	vmxnet3_refresh_host_stats(sc);
2074 }
2075 
2076 static void
vmxnet3_txq_timer(if_ctx_t ctx,uint16_t qid)2077 vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
2078 {
2079 	/* Host stats refresh is global, so just trigger it on txq 0 */
2080 	if (qid == 0)
2081 		vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
2082 }
2083 
2084 static void
vmxnet3_update_vlan_filter(struct vmxnet3_softc * sc,int add,uint16_t tag)2085 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
2086 {
2087 	int idx, bit;
2088 
2089 	if (tag == 0 || tag > 4095)
2090 		return;
2091 
2092 	idx = (tag >> 5) & 0x7F;
2093 	bit = tag & 0x1F;
2094 
2095 	/* Update our private VLAN bitvector. */
2096 	if (add)
2097 		sc->vmx_vlan_filter[idx] |= (1 << bit);
2098 	else
2099 		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
2100 }
2101 
2102 static void
vmxnet3_vlan_register(if_ctx_t ctx,uint16_t tag)2103 vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
2104 {
2105 
2106 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
2107 }
2108 
2109 static void
vmxnet3_vlan_unregister(if_ctx_t ctx,uint16_t tag)2110 vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
2111 {
2112 
2113 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
2114 }
2115 
2116 static void
vmxnet3_set_rxfilter(struct vmxnet3_softc * sc,int flags)2117 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
2118 {
2119 	struct ifnet *ifp;
2120 	struct vmxnet3_driver_shared *ds;
2121 	struct ifmultiaddr *ifma;
2122 	u_int mode;
2123 
2124 	ifp = sc->vmx_ifp;
2125 	ds = sc->vmx_ds;
2126 
2127 	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
2128 	if (flags & IFF_PROMISC)
2129 		mode |= VMXNET3_RXMODE_PROMISC;
2130 	if (flags & IFF_ALLMULTI)
2131 		mode |= VMXNET3_RXMODE_ALLMULTI;
2132 	else {
2133 		int cnt = 0, overflow = 0;
2134 
2135 		if_maddr_rlock(ifp);
2136 		CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2137 			if (ifma->ifma_addr->sa_family != AF_LINK)
2138 				continue;
2139 			else if (cnt == VMXNET3_MULTICAST_MAX) {
2140 				overflow = 1;
2141 				break;
2142 			}
2143 
2144 			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2145 			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
2146 			cnt++;
2147 		}
2148 		if_maddr_runlock(ifp);
2149 
2150 		if (overflow != 0) {
2151 			cnt = 0;
2152 			mode |= VMXNET3_RXMODE_ALLMULTI;
2153 		} else if (cnt > 0)
2154 			mode |= VMXNET3_RXMODE_MCAST;
2155 		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
2156 	}
2157 
2158 	ds->rxmode = mode;
2159 
2160 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
2161 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
2162 }
2163 
2164 static void
vmxnet3_refresh_host_stats(struct vmxnet3_softc * sc)2165 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
2166 {
2167 
2168 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
2169 }
2170 
2171 static int
vmxnet3_link_is_up(struct vmxnet3_softc * sc)2172 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
2173 {
2174 	uint32_t status;
2175 
2176 	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
2177 	return !!(status & 0x1);
2178 }
2179 
2180 static void
vmxnet3_link_status(struct vmxnet3_softc * sc)2181 vmxnet3_link_status(struct vmxnet3_softc *sc)
2182 {
2183 	if_ctx_t ctx;
2184 	uint64_t speed;
2185 	int link;
2186 
2187 	ctx = sc->vmx_ctx;
2188 	link = vmxnet3_link_is_up(sc);
2189 	speed = IF_Gbps(10);
2190 
2191 	if (link != 0 && sc->vmx_link_active == 0) {
2192 		sc->vmx_link_active = 1;
2193 		iflib_link_state_change(ctx, LINK_STATE_UP, speed);
2194 	} else if (link == 0 && sc->vmx_link_active != 0) {
2195 		sc->vmx_link_active = 0;
2196 		iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
2197 	}
2198 }
2199 
2200 static void
vmxnet3_set_lladdr(struct vmxnet3_softc * sc)2201 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
2202 {
2203 	uint32_t ml, mh;
2204 
2205 	ml  = sc->vmx_lladdr[0];
2206 	ml |= sc->vmx_lladdr[1] << 8;
2207 	ml |= sc->vmx_lladdr[2] << 16;
2208 	ml |= sc->vmx_lladdr[3] << 24;
2209 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
2210 
2211 	mh  = sc->vmx_lladdr[4];
2212 	mh |= sc->vmx_lladdr[5] << 8;
2213 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
2214 }
2215 
2216 static void
vmxnet3_get_lladdr(struct vmxnet3_softc * sc)2217 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
2218 {
2219 	uint32_t ml, mh;
2220 
2221 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
2222 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
2223 
2224 	sc->vmx_lladdr[0] = ml;
2225 	sc->vmx_lladdr[1] = ml >> 8;
2226 	sc->vmx_lladdr[2] = ml >> 16;
2227 	sc->vmx_lladdr[3] = ml >> 24;
2228 	sc->vmx_lladdr[4] = mh;
2229 	sc->vmx_lladdr[5] = mh >> 8;
2230 }
2231 
2232 static void
vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue * txq,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2233 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
2234     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2235 {
2236 	struct sysctl_oid *node, *txsnode;
2237 	struct sysctl_oid_list *list, *txslist;
2238 	struct UPT1_TxStats *txstats;
2239 	char namebuf[16];
2240 
2241 	txstats = &txq->vxtxq_ts->stats;
2242 
2243 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
2244 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
2245 	    NULL, "Transmit Queue");
2246 	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
2247 
2248 	/*
2249 	 * Add statistics reported by the host. These are updated by the
2250 	 * iflib txq timer on txq 0.
2251 	 */
2252 	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
2253 	    NULL, "Host Statistics");
2254 	txslist = SYSCTL_CHILDREN(txsnode);
2255 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
2256 	    &txstats->TSO_packets, "TSO packets");
2257 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
2258 	    &txstats->TSO_bytes, "TSO bytes");
2259 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2260 	    &txstats->ucast_packets, "Unicast packets");
2261 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2262 	    &txstats->ucast_bytes, "Unicast bytes");
2263 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2264 	    &txstats->mcast_packets, "Multicast packets");
2265 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2266 	    &txstats->mcast_bytes, "Multicast bytes");
2267 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
2268 	    &txstats->error, "Errors");
2269 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
2270 	    &txstats->discard, "Discards");
2271 }
2272 
2273 static void
vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue * rxq,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2274 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
2275     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2276 {
2277 	struct sysctl_oid *node, *rxsnode;
2278 	struct sysctl_oid_list *list, *rxslist;
2279 	struct UPT1_RxStats *rxstats;
2280 	char namebuf[16];
2281 
2282 	rxstats = &rxq->vxrxq_rs->stats;
2283 
2284 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
2285 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
2286 	    NULL, "Receive Queue");
2287 	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
2288 
2289 	/*
2290 	 * Add statistics reported by the host. These are updated by the
2291 	 * iflib txq timer on txq 0.
2292 	 */
2293 	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
2294 	    NULL, "Host Statistics");
2295 	rxslist = SYSCTL_CHILDREN(rxsnode);
2296 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
2297 	    &rxstats->LRO_packets, "LRO packets");
2298 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
2299 	    &rxstats->LRO_bytes, "LRO bytes");
2300 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2301 	    &rxstats->ucast_packets, "Unicast packets");
2302 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2303 	    &rxstats->ucast_bytes, "Unicast bytes");
2304 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2305 	    &rxstats->mcast_packets, "Multicast packets");
2306 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2307 	    &rxstats->mcast_bytes, "Multicast bytes");
2308 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
2309 	    &rxstats->bcast_packets, "Broadcast packets");
2310 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
2311 	    &rxstats->bcast_bytes, "Broadcast bytes");
2312 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
2313 	    &rxstats->nobuffer, "No buffer");
2314 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
2315 	    &rxstats->error, "Errors");
2316 }
2317 
2318 static void
vmxnet3_setup_debug_sysctl(struct vmxnet3_softc * sc,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2319 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
2320     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2321 {
2322 	if_softc_ctx_t scctx;
2323 	struct sysctl_oid *node;
2324 	struct sysctl_oid_list *list;
2325 	int i;
2326 
2327 	scctx = sc->vmx_scctx;
2328 
2329 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
2330 		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
2331 
2332 		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
2333 		    "debug", CTLFLAG_RD, NULL, "");
2334 		list = SYSCTL_CHILDREN(node);
2335 
2336 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
2337 		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
2338 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
2339 		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
2340 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
2341 		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
2342 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
2343 		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
2344 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2345 		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
2346 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2347 		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
2348 	}
2349 
2350 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
2351 		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
2352 
2353 		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
2354 		    "debug", CTLFLAG_RD, NULL, "");
2355 		list = SYSCTL_CHILDREN(node);
2356 
2357 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
2358 		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
2359 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
2360 		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
2361 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD,
2362 		    &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, "");
2363 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
2364 		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
2365 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
2366 		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
2367 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD,
2368 		    &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, "");
2369 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2370 		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
2371 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2372 		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
2373 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD,
2374 		    &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, "");
2375 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length_frag",
2376 		    CTLFLAG_RD, &rxq->vxrxq_comp_ring.vcxr_zero_length_frag,
2377 		    0, "");
2378 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD,
2379 		    &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, "");
2380 	}
2381 }
2382 
2383 static void
vmxnet3_setup_queue_sysctl(struct vmxnet3_softc * sc,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2384 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
2385     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2386 {
2387 	if_softc_ctx_t scctx;
2388 	int i;
2389 
2390 	scctx = sc->vmx_scctx;
2391 
2392 	for (i = 0; i < scctx->isc_ntxqsets; i++)
2393 		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
2394 	for (i = 0; i < scctx->isc_nrxqsets; i++)
2395 		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
2396 
2397 	vmxnet3_setup_debug_sysctl(sc, ctx, child);
2398 }
2399 
2400 static void
vmxnet3_setup_sysctl(struct vmxnet3_softc * sc)2401 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
2402 {
2403 	device_t dev;
2404 	struct sysctl_ctx_list *ctx;
2405 	struct sysctl_oid *tree;
2406 	struct sysctl_oid_list *child;
2407 
2408 	dev = sc->vmx_dev;
2409 	ctx = device_get_sysctl_ctx(dev);
2410 	tree = device_get_sysctl_tree(dev);
2411 	child = SYSCTL_CHILDREN(tree);
2412 
2413 	vmxnet3_setup_queue_sysctl(sc, ctx, child);
2414 }
2415 
2416 static void
vmxnet3_write_bar0(struct vmxnet3_softc * sc,bus_size_t r,uint32_t v)2417 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2418 {
2419 
2420 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
2421 }
2422 
2423 static uint32_t
vmxnet3_read_bar1(struct vmxnet3_softc * sc,bus_size_t r)2424 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
2425 {
2426 
2427 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
2428 }
2429 
2430 static void
vmxnet3_write_bar1(struct vmxnet3_softc * sc,bus_size_t r,uint32_t v)2431 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2432 {
2433 
2434 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
2435 }
2436 
2437 static void
vmxnet3_write_cmd(struct vmxnet3_softc * sc,uint32_t cmd)2438 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2439 {
2440 
2441 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
2442 }
2443 
2444 static uint32_t
vmxnet3_read_cmd(struct vmxnet3_softc * sc,uint32_t cmd)2445 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2446 {
2447 
2448 	vmxnet3_write_cmd(sc, cmd);
2449 	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
2450 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2451 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
2452 }
2453 
2454 static void
vmxnet3_enable_intr(struct vmxnet3_softc * sc,int irq)2455 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
2456 {
2457 
2458 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
2459 }
2460 
2461 static void
vmxnet3_disable_intr(struct vmxnet3_softc * sc,int irq)2462 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
2463 {
2464 
2465 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
2466 }
2467 
2468 static int
vmxnet3_tx_queue_intr_enable(if_ctx_t ctx,uint16_t qid)2469 vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2470 {
2471 	/* Not using interrupts for TX */
2472 	return (0);
2473 }
2474 
2475 static int
vmxnet3_rx_queue_intr_enable(if_ctx_t ctx,uint16_t qid)2476 vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2477 {
2478 	struct vmxnet3_softc *sc;
2479 
2480 	sc = iflib_get_softc(ctx);
2481 	vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
2482 	return (0);
2483 }
2484 
2485 static void
vmxnet3_link_intr_enable(if_ctx_t ctx)2486 vmxnet3_link_intr_enable(if_ctx_t ctx)
2487 {
2488 	struct vmxnet3_softc *sc;
2489 
2490 	sc = iflib_get_softc(ctx);
2491 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2492 }
2493 
2494 static void
vmxnet3_intr_enable_all(if_ctx_t ctx)2495 vmxnet3_intr_enable_all(if_ctx_t ctx)
2496 {
2497 	struct vmxnet3_softc *sc;
2498 	if_softc_ctx_t scctx;
2499 	int i;
2500 
2501 	sc = iflib_get_softc(ctx);
2502 	scctx = sc->vmx_scctx;
2503 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
2504 	for (i = 0; i < scctx->isc_vectors; i++)
2505 		vmxnet3_enable_intr(sc, i);
2506 }
2507 
2508 static void
vmxnet3_intr_disable_all(if_ctx_t ctx)2509 vmxnet3_intr_disable_all(if_ctx_t ctx)
2510 {
2511 	struct vmxnet3_softc *sc;
2512 	int i;
2513 
2514 	sc = iflib_get_softc(ctx);
2515 	/*
2516 	 * iflib may invoke this routine before vmxnet3_attach_post() has
2517 	 * run, which is before the top level shared data area is
2518 	 * initialized and the device made aware of it.
2519 	 */
2520 	if (sc->vmx_ds != NULL)
2521 		sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
2522 	for (i = 0; i < VMXNET3_MAX_INTRS; i++)
2523 		vmxnet3_disable_intr(sc, i);
2524 }
2525 
2526 /*
2527  * Since this is a purely paravirtualized device, we do not have
2528  * to worry about DMA coherency. But at times, we must make sure
2529  * both the compiler and CPU do not reorder memory operations.
2530  */
2531 static inline void
vmxnet3_barrier(struct vmxnet3_softc * sc,vmxnet3_barrier_t type)2532 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
2533 {
2534 
2535 	switch (type) {
2536 	case VMXNET3_BARRIER_RD:
2537 		rmb();
2538 		break;
2539 	case VMXNET3_BARRIER_WR:
2540 		wmb();
2541 		break;
2542 	case VMXNET3_BARRIER_RDWR:
2543 		mb();
2544 		break;
2545 	default:
2546 		panic("%s: bad barrier type %d", __func__, type);
2547 	}
2548 }
2549