xref: /freebsd-13-stable/sys/dev/ice/if_ice_iflib.c (revision 438e078d3166feafa4e7887471faa1da3e70a74a)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2024, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file if_ice_iflib.c
34  * @brief iflib driver implementation
35  *
36  * Contains the main entry point for the iflib driver implementation. It
37  * implements the various ifdi driver methods, and sets up the module and
38  * driver values to load an iflib driver.
39  */
40 
41 #include "ice_iflib.h"
42 #include "ice_drv_info.h"
43 #include "ice_switch.h"
44 #include "ice_sched.h"
45 
46 #include <sys/module.h>
47 #include <sys/sockio.h>
48 #include <sys/smp.h>
49 #include <dev/pci/pcivar.h>
50 #include <dev/pci/pcireg.h>
51 
52 /*
53  * Device method prototypes
54  */
55 
56 static void *ice_register(device_t);
57 static int  ice_if_attach_pre(if_ctx_t);
58 static int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
59 static int  ice_if_attach_post(if_ctx_t);
60 static void ice_attach_post_recovery_mode(struct ice_softc *sc);
61 static int  ice_if_detach(if_ctx_t);
62 static int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
63 static int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
64 static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
65 static void ice_if_queues_free(if_ctx_t ctx);
66 static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
67 static void ice_if_intr_enable(if_ctx_t ctx);
68 static void ice_if_intr_disable(if_ctx_t ctx);
69 static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
70 static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
71 static int ice_if_promisc_set(if_ctx_t ctx, int flags);
72 static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
73 static int ice_if_media_change(if_ctx_t ctx);
74 static void ice_if_init(if_ctx_t ctx);
75 static void ice_if_timer(if_ctx_t ctx, uint16_t qid);
76 static void ice_if_update_admin_status(if_ctx_t ctx);
77 static void ice_if_multi_set(if_ctx_t ctx);
78 static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
79 static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
80 static void ice_if_stop(if_ctx_t ctx);
81 static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
82 static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
83 static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
84 static int ice_if_suspend(if_ctx_t ctx);
85 static int ice_if_resume(if_ctx_t ctx);
86 static bool ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event);
87 static int ice_setup_mirror_vsi(struct ice_mirr_if *mif);
88 static int ice_wire_mirror_intrs(struct ice_mirr_if *mif);
89 static void ice_free_irqvs_subif(struct ice_mirr_if *mif);
90 static void *ice_subif_register(device_t);
91 static void ice_subif_setup_scctx(struct ice_mirr_if *mif);
92 static int ice_subif_rebuild(struct ice_softc *sc);
93 static int ice_subif_rebuild_vsi_qmap(struct ice_softc *sc);
94 
95 /* Iflib API */
96 static int ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
97     uint64_t *paddrs, int ntxqs, int ntxqsets);
98 static int ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
99     uint64_t *paddrs, int nrxqs, int nrxqsets);
100 static int ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
101 static int ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
102 static void ice_subif_if_intr_enable(if_ctx_t ctx);
103 static int ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix);
104 static void ice_subif_if_init(if_ctx_t ctx);
105 static void ice_subif_if_stop(if_ctx_t ctx);
106 static void ice_subif_if_queues_free(if_ctx_t ctx);
107 static int ice_subif_if_attach_pre(if_ctx_t);
108 static int ice_subif_if_attach_post(if_ctx_t);
109 static void ice_subif_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
110 static int ice_subif_if_promisc_set(if_ctx_t ctx, int flags);
111 
112 static int ice_msix_que(void *arg);
113 static int ice_msix_admin(void *arg);
114 
115 /*
116  * Helper function prototypes
117  */
118 static int ice_pci_mapping(struct ice_softc *sc);
119 static void ice_free_pci_mapping(struct ice_softc *sc);
120 static void ice_update_link_status(struct ice_softc *sc, bool update_media);
121 static void ice_init_device_features(struct ice_softc *sc);
122 static void ice_init_tx_tracking(struct ice_vsi *vsi);
123 static void ice_handle_reset_event(struct ice_softc *sc);
124 static void ice_handle_pf_reset_request(struct ice_softc *sc);
125 static void ice_prepare_for_reset(struct ice_softc *sc);
126 static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
127 static void ice_rebuild(struct ice_softc *sc);
128 static void ice_rebuild_recovery_mode(struct ice_softc *sc);
129 static void ice_free_irqvs(struct ice_softc *sc);
130 static void ice_update_rx_mbuf_sz(struct ice_softc *sc);
131 static void ice_poll_for_media_avail(struct ice_softc *sc);
132 static void ice_setup_scctx(struct ice_softc *sc);
133 static int ice_allocate_msix(struct ice_softc *sc);
134 static void ice_admin_timer(void *arg);
135 static void ice_transition_recovery_mode(struct ice_softc *sc);
136 static void ice_transition_safe_mode(struct ice_softc *sc);
137 static void ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask);
138 
139 /*
140  * Device Interface Declaration
141  */
142 
143 /**
144  * @var ice_methods
145  * @brief ice driver method entry points
146  *
147  * List of device methods implementing the generic device interface used by
148  * the device stack to interact with the ice driver. Since this is an iflib
149  * driver, most of the methods point to the generic iflib implementation.
150  */
151 static device_method_t ice_methods[] = {
152 	/* Device interface */
153 	DEVMETHOD(device_register, ice_register),
154 	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
155 	DEVMETHOD(device_attach,   iflib_device_attach),
156 	DEVMETHOD(device_detach,   iflib_device_detach),
157 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
158 	DEVMETHOD(device_suspend,  iflib_device_suspend),
159 	DEVMETHOD(device_resume,   iflib_device_resume),
160 	DEVMETHOD_END
161 };
162 
163 /**
164  * @var ice_iflib_methods
165  * @brief iflib method entry points
166  *
167  * List of device methods used by the iflib stack to interact with this
168  * driver. These are the real main entry points used to interact with this
169  * driver.
170  */
171 static device_method_t ice_iflib_methods[] = {
172 	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
173 	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
174 	DEVMETHOD(ifdi_detach, ice_if_detach),
175 	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
176 	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
177 	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
178 	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
179 	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
180 	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
181 	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
182 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
183 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
184 	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
185 	DEVMETHOD(ifdi_media_status, ice_if_media_status),
186 	DEVMETHOD(ifdi_media_change, ice_if_media_change),
187 	DEVMETHOD(ifdi_init, ice_if_init),
188 	DEVMETHOD(ifdi_stop, ice_if_stop),
189 	DEVMETHOD(ifdi_timer, ice_if_timer),
190 	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
191 	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
192 	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
193 	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
194 	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
195 	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
196 	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
197 	DEVMETHOD(ifdi_suspend, ice_if_suspend),
198 	DEVMETHOD(ifdi_resume, ice_if_resume),
199 	DEVMETHOD(ifdi_needs_restart, ice_if_needs_restart),
200 	DEVMETHOD_END
201 };
202 
203 /**
204  * @var ice_driver
205  * @brief driver structure for the generic device stack
206  *
207  * driver_t definition used to setup the generic device methods.
208  */
209 static driver_t ice_driver = {
210 	.name = "ice",
211 	.methods = ice_methods,
212 	.size = sizeof(struct ice_softc),
213 };
214 
215 /**
216  * @var ice_iflib_driver
217  * @brief driver structure for the iflib stack
218  *
219  * driver_t definition used to setup the iflib device methods.
220  */
221 static driver_t ice_iflib_driver = {
222 	.name = "ice",
223 	.methods = ice_iflib_methods,
224 	.size = sizeof(struct ice_softc),
225 };
226 
227 extern struct if_txrx ice_txrx;
228 extern struct if_txrx ice_recovery_txrx;
229 
230 /**
231  * @var ice_sctx
232  * @brief ice driver shared context
233  *
234  * Structure defining shared values (context) that is used by all instances of
235  * the device. Primarily used to setup details about how the iflib stack
236  * should treat this driver. Also defines the default, minimum, and maximum
237  * number of descriptors in each ring.
238  */
239 static struct if_shared_ctx ice_sctx = {
240 	.isc_magic = IFLIB_MAGIC,
241 	.isc_q_align = PAGE_SIZE,
242 
243 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
244 	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
245 	 * that doesn't make sense since that would be larger than the maximum
246 	 * size of a single packet.
247 	 */
248 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
249 
250 	/* XXX: This is only used by iflib to ensure that
251 	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
252 	 */
253 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
254 	/* XXX: This is used by iflib to set the number of segments in the TSO
255 	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
256 	 * related ifnet parameter.
257 	 */
258 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
259 
260 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
261 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
262 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
263 
264 	.isc_nfl = 1,
265 	.isc_ntxqs = 1,
266 	.isc_nrxqs = 1,
267 
268 	.isc_admin_intrcnt = 1,
269 	.isc_vendor_info = ice_vendor_info_array,
270 	.isc_driver_version = __DECONST(char *, ice_driver_version),
271 	.isc_driver = &ice_iflib_driver,
272 
273 	/*
274 	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
275 	 * for hardware checksum offload
276 	 *
277 	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
278 	 * IP sum field, required by our hardware to calculate valid TSO
279 	 * checksums.
280 	 *
281 	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
282 	 * even when the interface is down.
283 	 *
284 	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
285 	 * vectors manually instead of relying on iflib code to do this.
286 	 */
287 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
288 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
289 
290 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
291 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
292 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
293 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
294 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
295 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
296 };
297 
298 /**
299  * @var ice_devclass
300  * @brief ice driver device class
301  *
302  * device class used to setup the ice driver module kobject class.
303  */
304 devclass_t ice_devclass;
305 DRIVER_MODULE(ice, pci, ice_driver, ice_devclass, ice_module_event_handler, 0);
306 
307 MODULE_VERSION(ice, 1);
308 MODULE_DEPEND(ice, pci, 1, 1, 1);
309 MODULE_DEPEND(ice, ether, 1, 1, 1);
310 MODULE_DEPEND(ice, iflib, 1, 1, 1);
311 
312 IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
313 
314 /* Static driver-wide sysctls */
315 #include "ice_iflib_sysctls.h"
316 
317 /**
318  * ice_pci_mapping - Map PCI BAR memory
319  * @sc: device private softc
320  *
321  * Map PCI BAR 0 for device operation.
322  */
323 static int
ice_pci_mapping(struct ice_softc * sc)324 ice_pci_mapping(struct ice_softc *sc)
325 {
326 	int rc;
327 
328 	/* Map BAR0 */
329 	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
330 	if (rc)
331 		return rc;
332 
333 	return 0;
334 }
335 
336 /**
337  * ice_free_pci_mapping - Release PCI BAR memory
338  * @sc: device private softc
339  *
340  * Release PCI BARs which were previously mapped by ice_pci_mapping().
341  */
342 static void
ice_free_pci_mapping(struct ice_softc * sc)343 ice_free_pci_mapping(struct ice_softc *sc)
344 {
345 	/* Free BAR0 */
346 	ice_free_bar(sc->dev, &sc->bar0);
347 }
348 
349 /*
350  * Device methods
351  */
352 
353 /**
354  * ice_register - register device method callback
355  * @dev: the device being registered
356  *
357  * Returns a pointer to the shared context structure, which is used by iflib.
358  */
359 static void *
ice_register(device_t dev __unused)360 ice_register(device_t dev __unused)
361 {
362 	return &ice_sctx;
363 } /* ice_register */
364 
365 /**
366  * ice_setup_scctx - Setup the iflib softc context structure
367  * @sc: the device private structure
368  *
369  * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
370  * when loading.
371  */
372 static void
ice_setup_scctx(struct ice_softc * sc)373 ice_setup_scctx(struct ice_softc *sc)
374 {
375 	if_softc_ctx_t scctx = sc->scctx;
376 	struct ice_hw *hw = &sc->hw;
377 	device_t dev = sc->dev;
378 	bool safe_mode, recovery_mode;
379 
380 	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
381 	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
382 
383 	/*
384 	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
385 	 * a single queue pair.
386 	 */
387 	if (safe_mode || recovery_mode) {
388 		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
389 		scctx->isc_ntxqsets_max = 1;
390 		scctx->isc_nrxqsets_max = 1;
391 	} else {
392 		/*
393 		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
394 		 * the values of the override sysctls. Cache these initial
395 		 * values so that the driver can be aware of what the iflib
396 		 * sysctl value is when setting up MSI-X vectors.
397 		 */
398 		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
399 		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
400 
401 		if (scctx->isc_ntxqsets == 0)
402 			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
403 		if (scctx->isc_nrxqsets == 0)
404 			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
405 
406 		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
407 		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
408 
409 		/*
410 		 * Sanity check that the iflib sysctl values are within the
411 		 * maximum supported range.
412 		 */
413 		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
414 			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
415 		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
416 			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
417 	}
418 
419 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
420 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
421 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
422 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
423 
424 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
425 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
426 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
427 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
428 
429 	scctx->isc_msix_bar = pci_msix_table_bar(dev);
430 	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
431 
432 	/*
433 	 * If the driver loads in recovery mode, disable Tx/Rx functionality
434 	 */
435 	if (recovery_mode)
436 		scctx->isc_txrx = &ice_recovery_txrx;
437 	else
438 		scctx->isc_txrx = &ice_txrx;
439 
440 	/*
441 	 * If the driver loads in Safe mode or Recovery mode, disable
442 	 * advanced features including hardware offloads.
443 	 */
444 	if (safe_mode || recovery_mode) {
445 		scctx->isc_capenable = ICE_SAFE_CAPS;
446 		scctx->isc_tx_csum_flags = 0;
447 	} else {
448 		scctx->isc_capenable = ICE_FULL_CAPS;
449 		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
450 	}
451 
452 	scctx->isc_capabilities = scctx->isc_capenable;
453 } /* ice_setup_scctx */
454 
455 /**
456  * ice_if_attach_pre - Early device attach logic
457  * @ctx: the iflib context structure
458  *
459  * Called by iflib during the attach process. Earliest main driver entry
460  * point which performs necessary hardware and driver initialization. Called
461  * before the Tx and Rx queues are allocated.
462  */
463 static int
ice_if_attach_pre(if_ctx_t ctx)464 ice_if_attach_pre(if_ctx_t ctx)
465 {
466 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
467 	enum ice_fw_modes fw_mode;
468 	enum ice_status status;
469 	if_softc_ctx_t scctx;
470 	struct ice_hw *hw;
471 	device_t dev;
472 	int err;
473 
474 	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
475 
476 	ice_set_state(&sc->state, ICE_STATE_ATTACHING);
477 
478 	sc->ctx = ctx;
479 	sc->media = iflib_get_media(ctx);
480 	sc->sctx = iflib_get_sctx(ctx);
481 	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
482 
483 	dev = sc->dev = iflib_get_dev(ctx);
484 	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
485 
486 	hw = &sc->hw;
487 	hw->back = sc;
488 
489 	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
490 		 "%s:admin", device_get_nameunit(dev));
491 	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
492 	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
493 
494 	ASSERT_CTX_LOCKED(sc);
495 
496 	if (ice_pci_mapping(sc)) {
497 		err = (ENXIO);
498 		goto destroy_admin_timer;
499 	}
500 
501 	/* Save off the PCI information */
502 	ice_save_pci_info(hw, dev);
503 
504 	/* create tunables as early as possible */
505 	ice_add_device_tunables(sc);
506 
507 	/* Setup ControlQ lengths */
508 	ice_set_ctrlq_len(hw);
509 
510 reinit_hw:
511 
512 	fw_mode = ice_get_fw_mode(hw);
513 	if (fw_mode == ICE_FW_MODE_REC) {
514 		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
515 
516 		err = ice_attach_pre_recovery_mode(sc);
517 		if (err)
518 			goto free_pci_mapping;
519 
520 		return (0);
521 	}
522 
523 	/* Initialize the hw data structure */
524 	status = ice_init_hw(hw);
525 	if (status) {
526 		if (status == ICE_ERR_FW_API_VER) {
527 			/* Enter recovery mode, so that the driver remains
528 			 * loaded. This way, if the system administrator
529 			 * cannot update the driver, they may still attempt to
530 			 * downgrade the NVM.
531 			 */
532 			err = ice_attach_pre_recovery_mode(sc);
533 			if (err)
534 				goto free_pci_mapping;
535 
536 			return (0);
537 		} else {
538 			err = EIO;
539 			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
540 				      ice_status_str(status),
541 				      ice_aq_str(hw->adminq.sq_last_status));
542 		}
543 		goto free_pci_mapping;
544 	}
545 
546 	ice_init_device_features(sc);
547 
548 	/* Keep flag set by default */
549 	ice_set_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
550 
551 	/* Notify firmware of the device driver version */
552 	err = ice_send_version(sc);
553 	if (err)
554 		goto deinit_hw;
555 
556 	/*
557 	 * Success indicates a change was made that requires a reinitialization
558 	 * of the hardware
559 	 */
560 	err = ice_load_pkg_file(sc);
561 	if (err == ICE_SUCCESS) {
562 		ice_deinit_hw(hw);
563 		goto reinit_hw;
564 	}
565 
566 	err = ice_init_link_events(sc);
567 	if (err) {
568 		device_printf(dev, "ice_init_link_events failed: %s\n",
569 			      ice_err_str(err));
570 		goto deinit_hw;
571 	}
572 
573 	/* Initialize VLAN mode in FW; if dual VLAN mode is supported by the package
574 	 * and firmware, this will force them to use single VLAN mode.
575 	 */
576 	status = ice_set_vlan_mode(hw);
577 	if (status) {
578 		err = EIO;
579 		device_printf(dev, "Unable to initialize VLAN mode, err %s aq_err %s\n",
580 			      ice_status_str(status),
581 			      ice_aq_str(hw->adminq.sq_last_status));
582 		goto deinit_hw;
583 	}
584 
585 	ice_print_nvm_version(sc);
586 
587 	/* Setup the MAC address */
588 	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
589 
590 	/* Setup the iflib softc context structure */
591 	ice_setup_scctx(sc);
592 
593 	/* Initialize the Tx queue manager */
594 	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
595 	if (err) {
596 		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
597 			      ice_err_str(err));
598 		goto deinit_hw;
599 	}
600 
601 	/* Initialize the Rx queue manager */
602 	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
603 	if (err) {
604 		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
605 			      ice_err_str(err));
606 		goto free_tx_qmgr;
607 	}
608 
609 	/* Initialize the PF device interrupt resource manager */
610 	err = ice_alloc_intr_tracking(sc);
611 	if (err)
612 		/* Errors are already printed */
613 		goto free_rx_qmgr;
614 
615 	/* Determine maximum number of VSIs we'll prepare for */
616 	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
617 				    hw->func_caps.guar_num_vsi);
618 
619 	if (!sc->num_available_vsi) {
620 		err = EIO;
621 		device_printf(dev, "No VSIs allocated to host\n");
622 		goto free_intr_tracking;
623 	}
624 
625 	/* Allocate storage for the VSI pointers */
626 	sc->all_vsi = (struct ice_vsi **)
627 		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
628 		       M_ICE, M_WAITOK | M_ZERO);
629 	if (!sc->all_vsi) {
630 		err = ENOMEM;
631 		device_printf(dev, "Unable to allocate VSI array\n");
632 		goto free_intr_tracking;
633 	}
634 
635 	/*
636 	 * Prepare the statically allocated primary PF VSI in the softc
637 	 * structure. Other VSIs will be dynamically allocated as needed.
638 	 */
639 	ice_setup_pf_vsi(sc);
640 
641 	ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
642 	    scctx->isc_nrxqsets_max);
643 
644 	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
645 	err = ice_allocate_msix(sc);
646 	if (err)
647 		goto free_main_vsi;
648 
649 	return 0;
650 
651 free_main_vsi:
652 	/* ice_release_vsi will free the queue maps if they were allocated */
653 	ice_release_vsi(&sc->pf_vsi);
654 	free(sc->all_vsi, M_ICE);
655 	sc->all_vsi = NULL;
656 free_intr_tracking:
657 	ice_free_intr_tracking(sc);
658 free_rx_qmgr:
659 	ice_resmgr_destroy(&sc->rx_qmgr);
660 free_tx_qmgr:
661 	ice_resmgr_destroy(&sc->tx_qmgr);
662 deinit_hw:
663 	ice_deinit_hw(hw);
664 free_pci_mapping:
665 	ice_free_pci_mapping(sc);
666 destroy_admin_timer:
667 	mtx_lock(&sc->admin_mtx);
668 	callout_stop(&sc->admin_timer);
669 	mtx_unlock(&sc->admin_mtx);
670 	mtx_destroy(&sc->admin_mtx);
671 	return err;
672 } /* ice_if_attach_pre */
673 
674 /**
675  * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
676  * @sc: the device private softc
677  *
678  * Loads the device driver in limited Firmware Recovery mode, intended to
679  * allow users to update the firmware to attempt to recover the device.
680  *
681  * @remark We may enter recovery mode in case either (a) the firmware is
682  * detected to be in an invalid state and must be re-programmed, or (b) the
683  * driver detects that the loaded firmware has a non-compatible API version
684  * that the driver cannot operate with.
685  */
686 static int
ice_attach_pre_recovery_mode(struct ice_softc * sc)687 ice_attach_pre_recovery_mode(struct ice_softc *sc)
688 {
689 	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
690 
691 	/* Setup the iflib softc context */
692 	ice_setup_scctx(sc);
693 
694 	/* Setup the PF VSI back pointer */
695 	sc->pf_vsi.sc = sc;
696 
697 	/*
698 	 * We still need to allocate MSI-X vectors since we need one vector to
699 	 * run the administrative admin interrupt
700 	 */
701 	return ice_allocate_msix(sc);
702 }
703 
704 /**
705  * ice_update_link_status - notify OS of link state change
706  * @sc: device private softc structure
707  * @update_media: true if we should update media even if link didn't change
708  *
709  * Called to notify iflib core of link status changes. Should be called once
710  * during attach_post, and whenever link status changes during runtime.
711  *
712  * This call only updates the currently supported media types if the link
713  * status changed, or if update_media is set to true.
714  */
715 static void
ice_update_link_status(struct ice_softc * sc,bool update_media)716 ice_update_link_status(struct ice_softc *sc, bool update_media)
717 {
718 	struct ice_hw *hw = &sc->hw;
719 	enum ice_status status;
720 
721 	/* Never report link up when in recovery mode */
722 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
723 		return;
724 
725 	/* Report link status to iflib only once each time it changes */
726 	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
727 		if (sc->link_up) { /* link is up */
728 			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
729 
730 			if (!(hw->port_info->phy.link_info_old.link_info & ICE_AQ_LINK_UP))
731 				ice_set_default_local_lldp_mib(sc);
732 
733 			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
734 			ice_rdma_link_change(sc, LINK_STATE_UP, baudrate);
735 
736 			ice_link_up_msg(sc);
737 		} else { /* link is down */
738 			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
739 			ice_rdma_link_change(sc, LINK_STATE_DOWN, 0);
740 		}
741 		update_media = true;
742 	}
743 
744 	/* Update the supported media types */
745 	if (update_media && !ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
746 		status = ice_add_media_types(sc, sc->media);
747 		if (status)
748 			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
749 				      ice_status_str(status),
750 				      ice_aq_str(hw->adminq.sq_last_status));
751 	}
752 }
753 
754 /**
755  * ice_if_attach_post - Late device attach logic
756  * @ctx: the iflib context structure
757  *
758  * Called by iflib to finish up attaching the device. Performs any attach
759  * logic which must wait until after the Tx and Rx queues have been
760  * allocated.
761  */
762 static int
ice_if_attach_post(if_ctx_t ctx)763 ice_if_attach_post(if_ctx_t ctx)
764 {
765 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
766 	if_t ifp = iflib_get_ifp(ctx);
767 	enum ice_status status;
768 	int err;
769 
770 	ASSERT_CTX_LOCKED(sc);
771 
772 	/* We don't yet support loading if MSI-X is not supported */
773 	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
774 		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
775 		return (ENOTSUP);
776 	}
777 
778 	/* The ifnet structure hasn't yet been initialized when the attach_pre
779 	 * handler is called, so wait until attach_post to setup the
780 	 * isc_max_frame_size.
781 	 */
782 
783 	sc->ifp = ifp;
784 	sc->scctx->isc_max_frame_size = ifp->if_mtu +
785 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
786 
787 	/*
788 	 * If we are in recovery mode, only perform a limited subset of
789 	 * initialization to support NVM recovery.
790 	 */
791 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
792 		ice_attach_post_recovery_mode(sc);
793 		return (0);
794 	}
795 
796 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
797 
798 	err = ice_initialize_vsi(&sc->pf_vsi);
799 	if (err) {
800 		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
801 			      ice_err_str(err));
802 		return err;
803 	}
804 
805 	/* Enable FW health event reporting */
806 	ice_init_health_events(sc);
807 
808 	/* Configure the main PF VSI for RSS */
809 	err = ice_config_rss(&sc->pf_vsi);
810 	if (err) {
811 		device_printf(sc->dev,
812 			      "Unable to configure RSS for the main VSI, err %s\n",
813 			      ice_err_str(err));
814 		return err;
815 	}
816 
817 	/* Configure switch to drop transmitted LLDP and PAUSE frames */
818 	err = ice_cfg_pf_ethertype_filters(sc);
819 	if (err)
820 		return err;
821 
822 	ice_get_and_print_bus_info(sc);
823 
824 	ice_set_link_management_mode(sc);
825 
826 	ice_init_saved_phy_cfg(sc);
827 
828 	ice_cfg_pba_num(sc);
829 
830 	/* Set a default value for PFC mode on attach since the FW state is unknown
831 	 * before sysctl tunables are executed and it can't be queried. This fixes an
832 	 * issue when loading the driver with the FW LLDP agent enabled but the FW
833 	 * was previously in DSCP PFC mode.
834 	 */
835 	status = ice_aq_set_pfc_mode(&sc->hw, ICE_AQC_PFC_VLAN_BASED_PFC, NULL);
836 	if (status != ICE_SUCCESS)
837 		device_printf(sc->dev, "Setting pfc mode failed, status %s\n", ice_status_str(status));
838 
839 	ice_add_device_sysctls(sc);
840 
841 	/* Get DCBX/LLDP state and start DCBX agent */
842 	ice_init_dcb_setup(sc);
843 
844 	/* Setup link configuration parameters */
845 	ice_init_link_configuration(sc);
846 	ice_update_link_status(sc, true);
847 
848 	/* Configure interrupt causes for the administrative interrupt */
849 	ice_configure_misc_interrupts(sc);
850 
851 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
852 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
853 
854 	err = ice_rdma_pf_attach(sc);
855 	if (err)
856 		return (err);
857 
858 	/* Start the admin timer */
859 	mtx_lock(&sc->admin_mtx);
860 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
861 	mtx_unlock(&sc->admin_mtx);
862 
863 	if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
864 		 !ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
865 		ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK);
866 
867 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
868 
869 	return 0;
870 } /* ice_if_attach_post */
871 
872 /**
873  * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
874  * @sc: the device private softc
875  *
876  * Performs minimal work to prepare the driver to recover an NVM in case the
877  * firmware is in recovery mode.
878  */
879 static void
ice_attach_post_recovery_mode(struct ice_softc * sc)880 ice_attach_post_recovery_mode(struct ice_softc *sc)
881 {
882 	/* Configure interrupt causes for the administrative interrupt */
883 	ice_configure_misc_interrupts(sc);
884 
885 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
886 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
887 
888 	/* Start the admin timer */
889 	mtx_lock(&sc->admin_mtx);
890 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
891 	mtx_unlock(&sc->admin_mtx);
892 
893 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
894 }
895 
896 /**
897  * ice_free_irqvs - Free IRQ vector memory
898  * @sc: the device private softc structure
899  *
900  * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
901  */
902 static void
ice_free_irqvs(struct ice_softc * sc)903 ice_free_irqvs(struct ice_softc *sc)
904 {
905 	struct ice_vsi *vsi = &sc->pf_vsi;
906 	if_ctx_t ctx = sc->ctx;
907 	int i;
908 
909 	/* If the irqvs array is NULL, then there are no vectors to free */
910 	if (sc->irqvs == NULL)
911 		return;
912 
913 	/* Free the IRQ vectors */
914 	for (i = 0; i < sc->num_irq_vectors; i++)
915 		iflib_irq_free(ctx, &sc->irqvs[i].irq);
916 
917 	/* Clear the irqv pointers */
918 	for (i = 0; i < vsi->num_rx_queues; i++)
919 		vsi->rx_queues[i].irqv = NULL;
920 
921 	for (i = 0; i < vsi->num_tx_queues; i++)
922 		vsi->tx_queues[i].irqv = NULL;
923 
924 	/* Release the vector array memory */
925 	free(sc->irqvs, M_ICE);
926 	sc->irqvs = NULL;
927 	sc->num_irq_vectors = 0;
928 }
929 
930 /**
931  * ice_if_detach - Device driver detach logic
932  * @ctx: iflib context structure
933  *
934  * Perform device shutdown logic to detach the device driver.
935  *
936  * Note that there is no guarantee of the ordering of ice_if_queues_free() and
937  * ice_if_detach(). It is possible for the functions to be called in either
938  * order, and they must not assume to have a strict ordering.
939  */
940 static int
ice_if_detach(if_ctx_t ctx)941 ice_if_detach(if_ctx_t ctx)
942 {
943 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
944 	struct ice_vsi *vsi = &sc->pf_vsi;
945 	enum ice_status status;
946 	int i;
947 
948 	ASSERT_CTX_LOCKED(sc);
949 
950 	/* Indicate that we're detaching */
951 	ice_set_state(&sc->state, ICE_STATE_DETACHING);
952 
953 	/* Stop the admin timer */
954 	mtx_lock(&sc->admin_mtx);
955 	callout_stop(&sc->admin_timer);
956 	mtx_unlock(&sc->admin_mtx);
957 	mtx_destroy(&sc->admin_mtx);
958 
959 	/* Remove additional interfaces if they exist */
960 	if (sc->mirr_if)
961 		ice_destroy_mirror_interface(sc);
962 	ice_rdma_pf_detach(sc);
963 
964 	/* Free allocated media types */
965 	ifmedia_removeall(sc->media);
966 
967 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
968 	 * pointers. Note, the calls here and those in ice_if_queues_free()
969 	 * are *BOTH* necessary, as we cannot guarantee which path will be
970 	 * run first
971 	 */
972 	ice_vsi_del_txqs_ctx(vsi);
973 	ice_vsi_del_rxqs_ctx(vsi);
974 
975 	/* Release MSI-X resources */
976 	ice_free_irqvs(sc);
977 
978 	for (i = 0; i < sc->num_available_vsi; i++) {
979 		if (sc->all_vsi[i])
980 			ice_release_vsi(sc->all_vsi[i]);
981 	}
982 
983 	if (sc->all_vsi) {
984 		free(sc->all_vsi, M_ICE);
985 		sc->all_vsi = NULL;
986 	}
987 
988 	/* Release MSI-X memory */
989 	pci_release_msi(sc->dev);
990 
991 	if (sc->msix_table != NULL) {
992 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
993 				     rman_get_rid(sc->msix_table),
994 				     sc->msix_table);
995 		sc->msix_table = NULL;
996 	}
997 
998 	ice_free_intr_tracking(sc);
999 
1000 	/* Destroy the queue managers */
1001 	ice_resmgr_destroy(&sc->tx_qmgr);
1002 	ice_resmgr_destroy(&sc->rx_qmgr);
1003 
1004 	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1005 		ice_deinit_hw(&sc->hw);
1006 
1007 	IFLIB_CTX_UNLOCK(sc);
1008 	status = ice_reset(&sc->hw, ICE_RESET_PFR);
1009 	IFLIB_CTX_LOCK(sc);
1010 	if (status) {
1011 		device_printf(sc->dev, "device PF reset failed, err %s\n",
1012 			      ice_status_str(status));
1013 	}
1014 
1015 	ice_free_pci_mapping(sc);
1016 
1017 	return 0;
1018 } /* ice_if_detach */
1019 
1020 /**
1021  * ice_if_tx_queues_alloc - Allocate Tx queue memory
1022  * @ctx: iflib context structure
1023  * @vaddrs: virtual addresses for the queue memory
1024  * @paddrs: physical addresses for the queue memory
1025  * @ntxqs: the number of Tx queues per set (should always be 1)
1026  * @ntxqsets: the number of Tx queue sets to allocate
1027  *
1028  * Called by iflib to allocate Tx queues for the device. Allocates driver
1029  * memory to track each queue, the status arrays used for descriptor
1030  * status reporting, and Tx queue sysctls.
1031  */
1032 static int
ice_if_tx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int __invariant_only ntxqs,int ntxqsets)1033 ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1034 		       int __invariant_only ntxqs, int ntxqsets)
1035 {
1036 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1037 	struct ice_vsi *vsi = &sc->pf_vsi;
1038 	struct ice_tx_queue *txq;
1039 	int err, i, j;
1040 
1041 	MPASS(ntxqs == 1);
1042 	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
1043 	ASSERT_CTX_LOCKED(sc);
1044 
1045 	/* Do not bother allocating queues if we're in recovery mode */
1046 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1047 		return (0);
1048 
1049 	/* Allocate queue structure memory */
1050 	if (!(vsi->tx_queues =
1051 	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1052 		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
1053 		return (ENOMEM);
1054 	}
1055 
1056 	/* Allocate report status arrays */
1057 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1058 		if (!(txq->tx_rsq =
1059 		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
1060 			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
1061 			err = ENOMEM;
1062 			goto free_tx_queues;
1063 		}
1064 		/* Initialize report status array */
1065 		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
1066 			txq->tx_rsq[j] = QIDX_INVALID;
1067 	}
1068 
1069 	/* Assign queues from PF space to the main VSI */
1070 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
1071 	if (err) {
1072 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1073 			      ice_err_str(err));
1074 		goto free_tx_queues;
1075 	}
1076 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1077 
1078 	/* Add Tx queue sysctls context */
1079 	ice_vsi_add_txqs_ctx(vsi);
1080 
1081 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1082 		/* q_handle == me when only one TC */
1083 		txq->me = txq->q_handle = i;
1084 		txq->vsi = vsi;
1085 
1086 		/* store the queue size for easier access */
1087 		txq->desc_count = sc->scctx->isc_ntxd[0];
1088 
1089 		/* get the virtual and physical address of the hardware queues */
1090 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
1091 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
1092 		txq->tx_paddr = paddrs[i];
1093 
1094 		ice_add_txq_sysctls(txq);
1095 	}
1096 
1097 	vsi->num_tx_queues = ntxqsets;
1098 
1099 	return (0);
1100 
1101 free_tx_queues:
1102 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1103 		if (txq->tx_rsq != NULL) {
1104 			free(txq->tx_rsq, M_ICE);
1105 			txq->tx_rsq = NULL;
1106 		}
1107 	}
1108 	free(vsi->tx_queues, M_ICE);
1109 	vsi->tx_queues = NULL;
1110 	return err;
1111 }
1112 
1113 /**
1114  * ice_if_rx_queues_alloc - Allocate Rx queue memory
1115  * @ctx: iflib context structure
1116  * @vaddrs: virtual addresses for the queue memory
1117  * @paddrs: physical addresses for the queue memory
1118  * @nrxqs: number of Rx queues per set (should always be 1)
1119  * @nrxqsets: number of Rx queue sets to allocate
1120  *
1121  * Called by iflib to allocate Rx queues for the device. Allocates driver
1122  * memory to track each queue, as well as sets up the Rx queue sysctls.
1123  */
1124 static int
ice_if_rx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int __invariant_only nrxqs,int nrxqsets)1125 ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1126 		       int __invariant_only nrxqs, int nrxqsets)
1127 {
1128 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1129 	struct ice_vsi *vsi = &sc->pf_vsi;
1130 	struct ice_rx_queue *rxq;
1131 	int err, i;
1132 
1133 	MPASS(nrxqs == 1);
1134 	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
1135 	ASSERT_CTX_LOCKED(sc);
1136 
1137 	/* Do not bother allocating queues if we're in recovery mode */
1138 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1139 		return (0);
1140 
1141 	/* Allocate queue structure memory */
1142 	if (!(vsi->rx_queues =
1143 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1144 		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
1145 		return (ENOMEM);
1146 	}
1147 
1148 	/* Assign queues from PF space to the main VSI */
1149 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
1150 	if (err) {
1151 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1152 			      ice_err_str(err));
1153 		goto free_rx_queues;
1154 	}
1155 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1156 
1157 	/* Add Rx queue sysctls context */
1158 	ice_vsi_add_rxqs_ctx(vsi);
1159 
1160 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
1161 		rxq->me = i;
1162 		rxq->vsi = vsi;
1163 
1164 		/* store the queue size for easier access */
1165 		rxq->desc_count = sc->scctx->isc_nrxd[0];
1166 
1167 		/* get the virtual and physical address of the hardware queues */
1168 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
1169 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
1170 		rxq->rx_paddr = paddrs[i];
1171 
1172 		ice_add_rxq_sysctls(rxq);
1173 	}
1174 
1175 	vsi->num_rx_queues = nrxqsets;
1176 
1177 	return (0);
1178 
1179 free_rx_queues:
1180 	free(vsi->rx_queues, M_ICE);
1181 	vsi->rx_queues = NULL;
1182 	return err;
1183 }
1184 
1185 /**
1186  * ice_if_queues_free - Free queue memory
1187  * @ctx: the iflib context structure
1188  *
1189  * Free queue memory allocated by ice_if_tx_queues_alloc() and
1190  * ice_if_rx_queues_alloc().
1191  *
1192  * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
1193  * called in the same order. It's possible for ice_if_queues_free() to be
1194  * called prior to ice_if_detach(), and vice versa.
1195  *
1196  * For this reason, the main VSI is a static member of the ice_softc, which is
1197  * not free'd until after iflib finishes calling both of these functions.
1198  *
1199  * Thus, care must be taken in how we manage the memory being freed by this
1200  * function, and in what tasks it can and must perform.
1201  */
1202 static void
ice_if_queues_free(if_ctx_t ctx)1203 ice_if_queues_free(if_ctx_t ctx)
1204 {
1205 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1206 	struct ice_vsi *vsi = &sc->pf_vsi;
1207 	struct ice_tx_queue *txq;
1208 	int i;
1209 
1210 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
1211 	 * pointers. Note, the calls here and those in ice_if_detach()
1212 	 * are *BOTH* necessary, as we cannot guarantee which path will be
1213 	 * run first
1214 	 */
1215 	ice_vsi_del_txqs_ctx(vsi);
1216 	ice_vsi_del_rxqs_ctx(vsi);
1217 
1218 	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
1219 	ice_free_irqvs(sc);
1220 
1221 	if (vsi->tx_queues != NULL) {
1222 		/* free the tx_rsq arrays */
1223 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1224 			if (txq->tx_rsq != NULL) {
1225 				free(txq->tx_rsq, M_ICE);
1226 				txq->tx_rsq = NULL;
1227 			}
1228 		}
1229 		free(vsi->tx_queues, M_ICE);
1230 		vsi->tx_queues = NULL;
1231 		vsi->num_tx_queues = 0;
1232 	}
1233 	if (vsi->rx_queues != NULL) {
1234 		free(vsi->rx_queues, M_ICE);
1235 		vsi->rx_queues = NULL;
1236 		vsi->num_rx_queues = 0;
1237 	}
1238 }
1239 
1240 /**
1241  * ice_msix_que - Fast interrupt handler for MSI-X receive queues
1242  * @arg: The Rx queue memory
1243  *
1244  * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
1245  * an MSI-X interrupt for a given queue is triggered. Currently this just asks
1246  * iflib to schedule the main Rx thread.
1247  */
1248 static int
ice_msix_que(void * arg)1249 ice_msix_que(void *arg)
1250 {
1251 	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
1252 
1253 	/* TODO: dynamic ITR algorithm?? */
1254 
1255 	return (FILTER_SCHEDULE_THREAD);
1256 }
1257 
1258 /**
1259  * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
1260  * @arg: pointer to device softc memory
1261  *
1262  * Called by iflib when an administrative interrupt occurs. Should perform any
1263  * fast logic for handling the interrupt cause, and then indicate whether the
1264  * admin task needs to be queued.
1265  */
1266 static int
ice_msix_admin(void * arg)1267 ice_msix_admin(void *arg)
1268 {
1269 	struct ice_softc *sc = (struct ice_softc *)arg;
1270 	struct ice_hw *hw = &sc->hw;
1271 	device_t dev = sc->dev;
1272 	u32 oicr;
1273 
1274 	/* There is no safe way to modify the enabled miscellaneous causes of
1275 	 * the OICR vector at runtime, as doing so would be prone to race
1276 	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
1277 	 * causes and allow future interrupts to occur. The admin interrupt
1278 	 * vector will not be re-enabled until after we exit this function,
1279 	 * but any delayed tasks must be resilient against possible "late
1280 	 * arrival" interrupts that occur while we're already handling the
1281 	 * task. This is done by using state bits and serializing these
1282 	 * delayed tasks via the admin status task function.
1283 	 */
1284 	oicr = rd32(hw, PFINT_OICR);
1285 
1286 	/* Processing multiple controlq interrupts on a single vector does not
1287 	 * provide an indication of which controlq triggered the interrupt.
1288 	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
1289 	 * registers. However, the INTEVENT bit is not guaranteed to be set as
1290 	 * it gets automatically cleared when the hardware acknowledges the
1291 	 * interrupt.
1292 	 *
1293 	 * This means we don't really have a good indication of whether or
1294 	 * which controlq triggered this interrupt. We'll just notify the
1295 	 * admin task that it should check all the controlqs.
1296 	 */
1297 	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
1298 
1299 	if (oicr & PFINT_OICR_VFLR_M) {
1300 		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
1301 	}
1302 
1303 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
1304 		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
1305 	}
1306 
1307 	if (oicr & PFINT_OICR_GRST_M) {
1308 		u32 reset;
1309 
1310 		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1311 			GLGEN_RSTAT_RESET_TYPE_S;
1312 
1313 		if (reset == ICE_RESET_CORER)
1314 			sc->soft_stats.corer_count++;
1315 		else if (reset == ICE_RESET_GLOBR)
1316 			sc->soft_stats.globr_count++;
1317 		else
1318 			sc->soft_stats.empr_count++;
1319 
1320 		/* There are a couple of bits at play for handling resets.
1321 		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
1322 		 * indicate that the driver has received an OICR with a reset
1323 		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
1324 		 * happen. Second, we set hw->reset_ongoing to indicate that
1325 		 * the hardware is in reset. We will set this back to false as
1326 		 * soon as the driver has determined that the hardware is out
1327 		 * of reset.
1328 		 *
1329 		 * If the driver wishes to trigger a request, it can set one of
1330 		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
1331 		 * correct type of reset.
1332 		 */
1333 		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV)) {
1334 			hw->reset_ongoing = true;
1335 			/*
1336 			 * During the NVM update process, there is a driver reset and link
1337 			 * goes down and then up. The below if-statement prevents a second
1338 			 * link flap from occurring in ice_if_init().
1339 			 */
1340 			if (if_getflags(sc->ifp) & IFF_UP)
1341 				ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK);
1342 		}
1343 	}
1344 
1345 	if (oicr & PFINT_OICR_ECC_ERR_M) {
1346 		device_printf(dev, "ECC Error detected!\n");
1347 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1348 	}
1349 
1350 	if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M)) {
1351 		if (oicr & PFINT_OICR_HMC_ERR_M)
1352 			/* Log the HMC errors */
1353 			ice_log_hmc_error(hw, dev);
1354 		ice_rdma_notify_pe_intr(sc, oicr);
1355 	}
1356 
1357 	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
1358 		device_printf(dev, "PCI Exception detected!\n");
1359 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1360 	}
1361 
1362 	return (FILTER_SCHEDULE_THREAD);
1363 }
1364 
1365 /**
1366  * ice_allocate_msix - Allocate MSI-X vectors for the interface
1367  * @sc: the device private softc
1368  *
1369  * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
1370  *
1371  * First, determine a suitable total number of vectors based on the number
1372  * of CPUs, RSS buckets, the administrative vector, and other demands such as
1373  * RDMA.
1374  *
1375  * Request the desired amount of vectors, and see how many we obtain. If we
1376  * don't obtain as many as desired, reduce the demands by lowering the number
1377  * of requested queues or reducing the demand from other features such as
1378  * RDMA.
1379  *
1380  * @remark This function is required because the driver sets the
1381  * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
1382  * manually.
1383  *
1384  * @remark This driver will only use MSI-X vectors. If this is not possible,
1385  * neither MSI or legacy interrupts will be tried.
1386  *
1387  * @remark if it exists, os_imgr is initialized here for keeping track of
1388  * the assignments of extra MSIX vectors.
1389  *
1390  * @post on success this function must set the following scctx parameters:
1391  * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
1392  *
1393  * @returns zero on success or an error code on failure.
1394  */
1395 static int
ice_allocate_msix(struct ice_softc * sc)1396 ice_allocate_msix(struct ice_softc *sc)
1397 {
1398 	bool iflib_override_queue_count = false;
1399 	if_softc_ctx_t scctx = sc->scctx;
1400 	device_t dev = sc->dev;
1401 	cpuset_t cpus;
1402 	int bar, queues, vectors, requested;
1403 	int err = 0;
1404 	int rdma;
1405 
1406 	/* Allocate the MSI-X bar */
1407 	bar = scctx->isc_msix_bar;
1408 	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
1409 	if (!sc->msix_table) {
1410 		device_printf(dev, "Unable to map MSI-X table\n");
1411 		return (ENOMEM);
1412 	}
1413 
1414 	/* Check if the iflib queue count sysctls have been set */
1415 	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
1416 		iflib_override_queue_count = true;
1417 
1418 	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
1419 	if (err) {
1420 		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
1421 			      __func__, ice_err_str(err));
1422 		CPU_COPY(&all_cpus, &cpus);
1423 	}
1424 
1425 	/* Attempt to mimic behavior of iflib_msix_init */
1426 	if (iflib_override_queue_count) {
1427 		/*
1428 		 * If the override sysctls have been set, limit the queues to
1429 		 * the number of logical CPUs.
1430 		 */
1431 		queues = mp_ncpus;
1432 	} else {
1433 		/*
1434 		 * Otherwise, limit the queue count to the CPUs associated
1435 		 * with the NUMA node the device is associated with.
1436 		 */
1437 		queues = CPU_COUNT(&cpus);
1438 	}
1439 
1440 	/* Clamp to the number of RSS buckets */
1441 	queues = imin(queues, rss_getnumbuckets());
1442 
1443 	/*
1444 	 * Clamp the number of queue pairs to the minimum of the requested Tx
1445 	 * and Rx queues.
1446 	 */
1447 	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
1448 	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
1449 
1450 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) {
1451 		/*
1452 		 * Choose a number of RDMA vectors based on the number of CPUs
1453 		 * up to a maximum
1454 		 */
1455 		rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX);
1456 
1457 		/* Further limit by the user configurable tunable */
1458 		rdma = min(rdma, ice_rdma_max_msix);
1459 	} else {
1460 		rdma = 0;
1461 	}
1462 
1463 	/*
1464 	 * Determine the number of vectors to request. Note that we also need
1465 	 * to allocate one vector for administrative tasks.
1466 	 */
1467 	requested = rdma + queues + 1;
1468 	/* Add extra vectors requested by the user for later subinterface
1469 	 * creation.
1470 	 */
1471 	if_ctx_t ctx = sc->ctx;
1472 	u32 extra_vectors = iflib_get_extra_msix_vectors_sysctl(ctx);
1473 	requested += extra_vectors;
1474 
1475 	vectors = requested;
1476 	err = pci_alloc_msix(dev, &vectors);
1477 	if (err) {
1478 		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
1479 			      vectors, ice_err_str(err));
1480 		goto err_free_msix_table;
1481 	}
1482 
1483 	/* If we don't receive enough vectors, reduce demands */
1484 	if (vectors < requested) {
1485 		int diff = requested - vectors;
1486 
1487 		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
1488 			      requested, vectors);
1489 
1490 		diff += extra_vectors;
1491 		extra_vectors = 0;
1492 		/*
1493 		 * The OS didn't grant us the requested number of vectors.
1494 		 * Check to see if we can reduce demands by limiting the
1495 		 * number of vectors allocated to certain features.
1496 		 */
1497 
1498 		if (rdma >= diff) {
1499 			/* Reduce the number of RDMA vectors we reserve */
1500 			rdma -= diff;
1501 			diff = 0;
1502 		} else {
1503 			/* Disable RDMA and reduce the difference */
1504 			ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
1505 			diff -= rdma;
1506 			rdma = 0;
1507 		}
1508 
1509 		/*
1510 		 * If we still have a difference, we need to reduce the number
1511 		 * of queue pairs.
1512 		 *
1513 		 * However, we still need at least one vector for the admin
1514 		 * interrupt and one queue pair.
1515 		 */
1516 		if (queues <= diff) {
1517 			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
1518 			err = (ERANGE);
1519 			goto err_pci_release_msi;
1520 		}
1521 
1522 		queues -= diff;
1523 	}
1524 
1525 	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
1526 	if (rdma)
1527 		device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n",
1528 			      rdma);
1529 	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
1530 		      vectors);
1531 
1532 	/* Split resulting vectors back into requested splits */
1533 	scctx->isc_vectors = vectors;
1534 	scctx->isc_nrxqsets = queues;
1535 	scctx->isc_ntxqsets = queues;
1536 	scctx->isc_intr = IFLIB_INTR_MSIX;
1537 
1538 	sc->irdma_vectors = rdma;
1539 
1540 	/* Interrupt allocation tracking isn't required in recovery mode,
1541 	 * since neither RDMA nor VFs are enabled.
1542 	 */
1543 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1544 		return (0);
1545 
1546 	/* Keep track of which interrupt indices are being used for what */
1547 	sc->lan_vectors = vectors - rdma;
1548 	sc->lan_vectors -= extra_vectors;
1549 	err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->pf_imap, sc->lan_vectors);
1550 	if (err) {
1551 		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
1552 			      ice_err_str(err));
1553 		goto err_pci_release_msi;
1554 	}
1555 	err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->rdma_imap, rdma);
1556 	if (err) {
1557 		device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n",
1558 			      ice_err_str(err));
1559 		goto err_release_pf_imap;
1560 	}
1561 	sc->extra_vectors = extra_vectors;
1562 	/* Setup another resource manager to track the assignments of extra OS
1563 	 * vectors. These OS interrupt allocations don't need to be contiguous,
1564 	 * unlike the ones that come from the device.
1565 	 */
1566 	err = ice_resmgr_init(&sc->os_imgr, sc->extra_vectors);
1567 	if (err) {
1568 		device_printf(dev, "Unable to initialize OS extra interrupt manager: %s\n",
1569 			      ice_err_str(err));
1570 		ice_resmgr_release_map(&sc->dev_imgr, sc->rdma_imap,
1571 					    rdma);
1572 		goto err_release_pf_imap;
1573 	}
1574 	return (0);
1575 
1576 err_release_pf_imap:
1577 	ice_resmgr_release_map(&sc->dev_imgr, sc->pf_imap,
1578 				    sc->lan_vectors);
1579 err_pci_release_msi:
1580 	pci_release_msi(dev);
1581 err_free_msix_table:
1582 	if (sc->msix_table != NULL) {
1583 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1584 				rman_get_rid(sc->msix_table),
1585 				sc->msix_table);
1586 		sc->msix_table = NULL;
1587 	}
1588 
1589 	return (err);
1590 }
1591 
1592 /**
1593  * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
1594  * @ctx: the iflib context structure
1595  * @msix: the number of vectors we were assigned
1596  *
1597  * Called by iflib to assign MSI-X vectors to queues. Currently requires that
1598  * we get at least the same number of vectors as we have queues, and that we
1599  * always have the same number of Tx and Rx queues.
1600  *
1601  * Tx queues use a softirq instead of using their own hardware interrupt.
1602  */
1603 static int
ice_if_msix_intr_assign(if_ctx_t ctx,int msix)1604 ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
1605 {
1606 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1607 	struct ice_vsi *vsi = &sc->pf_vsi;
1608 	int err, i, vector;
1609 
1610 	ASSERT_CTX_LOCKED(sc);
1611 
1612 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
1613 		device_printf(sc->dev,
1614 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
1615 			      vsi->num_tx_queues, vsi->num_rx_queues);
1616 		return (EOPNOTSUPP);
1617 	}
1618 
1619 	if (msix < (vsi->num_rx_queues + 1)) {
1620 		device_printf(sc->dev,
1621 			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
1622 		return (EOPNOTSUPP);
1623 	}
1624 
1625 	/* Save the number of vectors for future use */
1626 	sc->num_irq_vectors = vsi->num_rx_queues + 1;
1627 
1628 	/* Allocate space to store the IRQ vector data */
1629 	if (!(sc->irqvs =
1630 	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
1631 					       M_ICE, M_NOWAIT))) {
1632 		device_printf(sc->dev,
1633 			      "Unable to allocate irqv memory\n");
1634 		return (ENOMEM);
1635 	}
1636 
1637 	/* Administrative interrupt events will use vector 0 */
1638 	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
1639 				      ice_msix_admin, sc, 0, "admin");
1640 	if (err) {
1641 		device_printf(sc->dev,
1642 			      "Failed to register Admin queue handler: %s\n",
1643 			      ice_err_str(err));
1644 		goto free_irqvs;
1645 	}
1646 	sc->irqvs[0].me = 0;
1647 
1648 	/* Do not allocate queue interrupts when in recovery mode */
1649 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1650 		return (0);
1651 
1652 	int rid;
1653 	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
1654 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1655 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1656 		char irq_name[16];
1657 
1658 		rid = vector + 1;
1659 
1660 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
1661 		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
1662 					      IFLIB_INTR_RXTX, ice_msix_que,
1663 					      rxq, rxq->me, irq_name);
1664 		if (err) {
1665 			device_printf(sc->dev,
1666 				      "Failed to allocate q int %d err: %s\n",
1667 				      i, ice_err_str(err));
1668 			vector--;
1669 			i--;
1670 			goto fail;
1671 		}
1672 		sc->irqvs[vector].me = vector;
1673 		rxq->irqv = &sc->irqvs[vector];
1674 
1675 		bzero(irq_name, sizeof(irq_name));
1676 
1677 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
1678 		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
1679 					    IFLIB_INTR_TX, txq,
1680 					    txq->me, irq_name);
1681 		txq->irqv = &sc->irqvs[vector];
1682 	}
1683 
1684 	/* For future interrupt assignments */
1685 	sc->last_rid = rid + sc->irdma_vectors;
1686 
1687 	return (0);
1688 fail:
1689 	for (; i >= 0; i--, vector--)
1690 		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
1691 	iflib_irq_free(ctx, &sc->irqvs[0].irq);
1692 free_irqvs:
1693 	free(sc->irqvs, M_ICE);
1694 	sc->irqvs = NULL;
1695 	return err;
1696 }
1697 
1698 /**
1699  * ice_if_mtu_set - Set the device MTU
1700  * @ctx: iflib context structure
1701  * @mtu: the MTU requested
1702  *
1703  * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
1704  *
1705  * @pre assumes the caller holds the iflib CTX lock
1706  */
1707 static int
ice_if_mtu_set(if_ctx_t ctx,uint32_t mtu)1708 ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
1709 {
1710 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1711 
1712 	ASSERT_CTX_LOCKED(sc);
1713 
1714 	/* Do not support configuration when in recovery mode */
1715 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1716 		return (ENOSYS);
1717 
1718 	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
1719 		return (EINVAL);
1720 
1721 	sc->scctx->isc_max_frame_size = mtu +
1722 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1723 
1724 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
1725 
1726 	return (0);
1727 }
1728 
1729 /**
1730  * ice_if_intr_enable - Enable device interrupts
1731  * @ctx: iflib context structure
1732  *
1733  * Called by iflib to request enabling device interrupts.
1734  */
1735 static void
ice_if_intr_enable(if_ctx_t ctx)1736 ice_if_intr_enable(if_ctx_t ctx)
1737 {
1738 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1739 	struct ice_vsi *vsi = &sc->pf_vsi;
1740 	struct ice_hw *hw = &sc->hw;
1741 
1742 	ASSERT_CTX_LOCKED(sc);
1743 
1744 	/* Enable ITR 0 */
1745 	ice_enable_intr(hw, sc->irqvs[0].me);
1746 
1747 	/* Do not enable queue interrupts in recovery mode */
1748 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1749 		return;
1750 
1751 	/* Enable all queue interrupts */
1752 	for (int i = 0; i < vsi->num_rx_queues; i++)
1753 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
1754 }
1755 
1756 /**
1757  * ice_if_intr_disable - Disable device interrupts
1758  * @ctx: iflib context structure
1759  *
1760  * Called by iflib to request disabling device interrupts.
1761  */
1762 static void
ice_if_intr_disable(if_ctx_t ctx)1763 ice_if_intr_disable(if_ctx_t ctx)
1764 {
1765 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1766 	struct ice_hw *hw = &sc->hw;
1767 	unsigned int i;
1768 
1769 	ASSERT_CTX_LOCKED(sc);
1770 
1771 	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
1772 	 * assigned to queues. Instead of assuming that the interrupt
1773 	 * assignment in the rx_queues structure is valid, just disable all
1774 	 * possible interrupts
1775 	 *
1776 	 * Note that we choose not to disable ITR 0 because this handles the
1777 	 * AdminQ interrupts, and we want to keep processing these even when
1778 	 * the interface is offline.
1779 	 */
1780 	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
1781 		ice_disable_intr(hw, i);
1782 }
1783 
1784 /**
1785  * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
1786  * @ctx: iflib context structure
1787  * @rxqid: the Rx queue to enable
1788  *
1789  * Enable a specific Rx queue interrupt.
1790  *
1791  * This function is not protected by the iflib CTX lock.
1792  */
1793 static int
ice_if_rx_queue_intr_enable(if_ctx_t ctx,uint16_t rxqid)1794 ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
1795 {
1796 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1797 	struct ice_vsi *vsi = &sc->pf_vsi;
1798 	struct ice_hw *hw = &sc->hw;
1799 
1800 	/* Do not enable queue interrupts in recovery mode */
1801 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1802 		return (ENOSYS);
1803 
1804 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
1805 	return (0);
1806 }
1807 
1808 /**
1809  * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
1810  * @ctx: iflib context structure
1811  * @txqid: the Tx queue to enable
1812  *
1813  * Enable a specific Tx queue interrupt.
1814  *
1815  * This function is not protected by the iflib CTX lock.
1816  */
1817 static int
ice_if_tx_queue_intr_enable(if_ctx_t ctx,uint16_t txqid)1818 ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
1819 {
1820 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1821 	struct ice_vsi *vsi = &sc->pf_vsi;
1822 	struct ice_hw *hw = &sc->hw;
1823 
1824 	/* Do not enable queue interrupts in recovery mode */
1825 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1826 		return (ENOSYS);
1827 
1828 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
1829 	return (0);
1830 }
1831 
1832 /**
1833  * ice_set_default_promisc_mask - Set default config for promisc settings
1834  * @promisc_mask: bitmask to setup
1835  *
1836  * The ice_(set|clear)_vsi_promisc() function expects a mask of promiscuous
1837  * modes to operate on. The mask used in here is the default one for the
1838  * driver, where promiscuous is enabled/disabled for all types of
1839  * non-VLAN-tagged/VLAN 0 traffic.
1840  */
1841 static void
ice_set_default_promisc_mask(ice_bitmap_t * promisc_mask)1842 ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask)
1843 {
1844 	ice_zero_bitmap(promisc_mask, ICE_PROMISC_MAX);
1845 	ice_set_bit(ICE_PROMISC_UCAST_TX, promisc_mask);
1846 	ice_set_bit(ICE_PROMISC_UCAST_RX, promisc_mask);
1847 	ice_set_bit(ICE_PROMISC_MCAST_TX, promisc_mask);
1848 	ice_set_bit(ICE_PROMISC_MCAST_RX, promisc_mask);
1849 }
1850 
1851 /**
1852  * ice_if_promisc_set - Set device promiscuous mode
1853  * @ctx: iflib context structure
1854  * @flags: promiscuous flags to configure
1855  *
1856  * Called by iflib to configure device promiscuous mode.
1857  *
1858  * @remark Calls to this function will always overwrite the previous setting
1859  */
1860 static int
ice_if_promisc_set(if_ctx_t ctx,int flags)1861 ice_if_promisc_set(if_ctx_t ctx, int flags)
1862 {
1863 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1864 	struct ice_hw *hw = &sc->hw;
1865 	device_t dev = sc->dev;
1866 	enum ice_status status;
1867 	bool promisc_enable = flags & IFF_PROMISC;
1868 	bool multi_enable = flags & IFF_ALLMULTI;
1869 	ice_declare_bitmap(promisc_mask, ICE_PROMISC_MAX);
1870 
1871 	/* Do not support configuration when in recovery mode */
1872 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1873 		return (ENOSYS);
1874 
1875 	ice_set_default_promisc_mask(promisc_mask);
1876 
1877 	if (multi_enable)
1878 		return (EOPNOTSUPP);
1879 
1880 	if (promisc_enable) {
1881 		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1882 					     promisc_mask, 0);
1883 		if (status && status != ICE_ERR_ALREADY_EXISTS) {
1884 			device_printf(dev,
1885 				      "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n",
1886 				      ice_status_str(status),
1887 				      ice_aq_str(hw->adminq.sq_last_status));
1888 			return (EIO);
1889 		}
1890 	} else {
1891 		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
1892 					       promisc_mask, 0);
1893 		if (status) {
1894 			device_printf(dev,
1895 				      "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n",
1896 				      ice_status_str(status),
1897 				      ice_aq_str(hw->adminq.sq_last_status));
1898 			return (EIO);
1899 		}
1900 	}
1901 
1902 	return (0);
1903 }
1904 
1905 /**
1906  * ice_if_media_change - Change device media
1907  * @ctx: device ctx structure
1908  *
1909  * Called by iflib when a media change is requested. This operation is not
1910  * supported by the hardware, so we just return an error code.
1911  */
1912 static int
ice_if_media_change(if_ctx_t ctx)1913 ice_if_media_change(if_ctx_t ctx)
1914 {
1915 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1916 
1917 	device_printf(sc->dev, "Media change is not supported.\n");
1918 	return (ENODEV);
1919 }
1920 
1921 /**
1922  * ice_if_media_status - Report current device media
1923  * @ctx: iflib context structure
1924  * @ifmr: ifmedia request structure to update
1925  *
1926  * Updates the provided ifmr with current device media status, including link
1927  * status and media type.
1928  */
1929 static void
ice_if_media_status(if_ctx_t ctx,struct ifmediareq * ifmr)1930 ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
1931 {
1932 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1933 	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
1934 
1935 	ifmr->ifm_status = IFM_AVALID;
1936 	ifmr->ifm_active = IFM_ETHER;
1937 
1938 	/* Never report link up or media types when in recovery mode */
1939 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1940 		return;
1941 
1942 	if (!sc->link_up)
1943 		return;
1944 
1945 	ifmr->ifm_status |= IFM_ACTIVE;
1946 	ifmr->ifm_active |= IFM_FDX;
1947 
1948 	if (li->phy_type_low)
1949 		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
1950 	else if (li->phy_type_high)
1951 		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
1952 	else
1953 		ifmr->ifm_active |= IFM_UNKNOWN;
1954 
1955 	/* Report flow control status as well */
1956 	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
1957 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1958 	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
1959 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1960 }
1961 
1962 /**
1963  * ice_init_tx_tracking - Initialize Tx queue software tracking values
1964  * @vsi: the VSI to initialize
1965  *
1966  * Initialize Tx queue software tracking values, including the Report Status
1967  * queue, and related software tracking values.
1968  */
1969 static void
ice_init_tx_tracking(struct ice_vsi * vsi)1970 ice_init_tx_tracking(struct ice_vsi *vsi)
1971 {
1972 	struct ice_tx_queue *txq;
1973 	size_t j;
1974 	int i;
1975 
1976 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1977 
1978 		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
1979 
1980 		/* Initialize the last processed descriptor to be the end of
1981 		 * the ring, rather than the start, so that we avoid an
1982 		 * off-by-one error in ice_ift_txd_credits_update for the
1983 		 * first packet.
1984 		 */
1985 		txq->tx_cidx_processed = txq->desc_count - 1;
1986 
1987 		for (j = 0; j < txq->desc_count; j++)
1988 			txq->tx_rsq[j] = QIDX_INVALID;
1989 	}
1990 }
1991 
1992 /**
1993  * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
1994  * @sc: the device softc
1995  *
1996  * Called to update the Rx queue mbuf_sz parameter for configuring the receive
1997  * buffer sizes when programming hardware.
1998  */
1999 static void
ice_update_rx_mbuf_sz(struct ice_softc * sc)2000 ice_update_rx_mbuf_sz(struct ice_softc *sc)
2001 {
2002 	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
2003 	struct ice_vsi *vsi = &sc->pf_vsi;
2004 
2005 	MPASS(mbuf_sz <= UINT16_MAX);
2006 	vsi->mbuf_sz = mbuf_sz;
2007 }
2008 
2009 /**
2010  * ice_if_init - Initialize the device
2011  * @ctx: iflib ctx structure
2012  *
2013  * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
2014  * device filters and prepares the Tx and Rx engines.
2015  *
2016  * @pre assumes the caller holds the iflib CTX lock
2017  */
2018 static void
ice_if_init(if_ctx_t ctx)2019 ice_if_init(if_ctx_t ctx)
2020 {
2021 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
2022 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2023 	device_t dev = sc->dev;
2024 	int err;
2025 
2026 	ASSERT_CTX_LOCKED(sc);
2027 
2028 	/*
2029 	 * We've seen an issue with 11.3/12.1 where sideband routines are
2030 	 * called after detach is called.  This would call routines after
2031 	 * if_stop, causing issues with the teardown process.  This has
2032 	 * seemingly been fixed in STABLE snapshots, but it seems like a
2033 	 * good idea to have this guard here regardless.
2034 	 */
2035 	if (ice_driver_is_detaching(sc))
2036 		return;
2037 
2038 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2039 		return;
2040 
2041 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
2042 		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
2043 		return;
2044 	}
2045 
2046 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
2047 		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
2048 		return;
2049 	}
2050 
2051 	ice_update_rx_mbuf_sz(sc);
2052 
2053 	/* Update the MAC address... User might use a LAA */
2054 	err = ice_update_laa_mac(sc);
2055 	if (err) {
2056 		device_printf(dev,
2057 			      "LAA address change failed, err %s\n",
2058 			      ice_err_str(err));
2059 		return;
2060 	}
2061 
2062 	/* Initialize software Tx tracking values */
2063 	ice_init_tx_tracking(&sc->pf_vsi);
2064 
2065 	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
2066 	if (err) {
2067 		device_printf(dev,
2068 			      "Unable to configure the main VSI for Tx: %s\n",
2069 			      ice_err_str(err));
2070 		return;
2071 	}
2072 
2073 	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
2074 	if (err) {
2075 		device_printf(dev,
2076 			      "Unable to configure the main VSI for Rx: %s\n",
2077 			      ice_err_str(err));
2078 		goto err_cleanup_tx;
2079 	}
2080 
2081 	err = ice_control_all_rx_queues(&sc->pf_vsi, true);
2082 	if (err) {
2083 		device_printf(dev,
2084 			      "Unable to enable Rx rings for transmit: %s\n",
2085 			      ice_err_str(err));
2086 		goto err_cleanup_tx;
2087 	}
2088 
2089 	err = ice_cfg_pf_default_mac_filters(sc);
2090 	if (err) {
2091 		device_printf(dev,
2092 			      "Unable to configure default MAC filters: %s\n",
2093 			      ice_err_str(err));
2094 		goto err_stop_rx;
2095 	}
2096 
2097 	/* We use software interrupts for Tx, so we only program the hardware
2098 	 * interrupts for Rx.
2099 	 */
2100 	ice_configure_all_rxq_interrupts(&sc->pf_vsi);
2101 	ice_configure_rx_itr(&sc->pf_vsi);
2102 
2103 	/* Configure promiscuous mode */
2104 	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
2105 
2106 	if (!ice_testandclear_state(&sc->state, ICE_STATE_FIRST_INIT_LINK))
2107 		if (!sc->link_up && ((if_getflags(sc->ifp) & IFF_UP) ||
2108 			 ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN)))
2109 			ice_set_link(sc, true);
2110 
2111 	ice_rdma_pf_init(sc);
2112 
2113 	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
2114 
2115 	if (sc->mirr_if && ice_testandclear_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) {
2116 		ice_clear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED);
2117 		iflib_request_reset(sc->mirr_if->subctx);
2118 		iflib_admin_intr_deferred(sc->mirr_if->subctx);
2119 	}
2120 
2121 	return;
2122 
2123 err_stop_rx:
2124 	ice_control_all_rx_queues(&sc->pf_vsi, false);
2125 err_cleanup_tx:
2126 	ice_vsi_disable_tx(&sc->pf_vsi);
2127 }
2128 
2129 /**
2130  * ice_poll_for_media_avail - Re-enable link if media is detected
2131  * @sc: device private structure
2132  *
2133  * Intended to be called from the driver's timer function, this function
2134  * sends the Get Link Status AQ command and re-enables HW link if the
2135  * command says that media is available.
2136  *
2137  * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
2138  * since media removal events are supposed to be sent to the driver through
2139  * a link status event.
2140  */
2141 static void
ice_poll_for_media_avail(struct ice_softc * sc)2142 ice_poll_for_media_avail(struct ice_softc *sc)
2143 {
2144 	struct ice_hw *hw = &sc->hw;
2145 	struct ice_port_info *pi = hw->port_info;
2146 
2147 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2148 		pi->phy.get_link_info = true;
2149 		ice_get_link_status(pi, &sc->link_up);
2150 
2151 		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2152 			enum ice_status status;
2153 
2154 			/* Re-enable link and re-apply user link settings */
2155 			if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) ||
2156 			    (if_getflags(sc->ifp) & IFF_UP)) {
2157 				ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
2158 
2159 				/* Update the OS about changes in media capability */
2160 				status = ice_add_media_types(sc, sc->media);
2161 				if (status)
2162 					device_printf(sc->dev,
2163 					    "Error adding device media types: %s aq_err %s\n",
2164 					    ice_status_str(status),
2165 					    ice_aq_str(hw->adminq.sq_last_status));
2166 			}
2167 
2168 			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
2169 		}
2170 	}
2171 }
2172 
2173 /**
2174  * ice_if_timer - called by iflib periodically
2175  * @ctx: iflib ctx structure
2176  * @qid: the queue this timer was called for
2177  *
2178  * This callback is triggered by iflib periodically. We use it to update the
2179  * hw statistics.
2180  *
2181  * @remark this function is not protected by the iflib CTX lock.
2182  */
2183 static void
ice_if_timer(if_ctx_t ctx,uint16_t qid)2184 ice_if_timer(if_ctx_t ctx, uint16_t qid)
2185 {
2186 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2187 	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
2188 
2189 	if (qid != 0)
2190 		return;
2191 
2192 	/* Do not attempt to update stats when in recovery mode */
2193 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2194 		return;
2195 
2196 	/* Update device statistics */
2197 	ice_update_pf_stats(sc);
2198 
2199 	/*
2200 	 * For proper watchdog management, the iflib stack needs to know if
2201 	 * we've been paused during the last interval. Check if the
2202 	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
2203 	 */
2204 	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
2205 		sc->scctx->isc_pause_frames = 1;
2206 
2207 	/* Update the primary VSI stats */
2208 	ice_update_vsi_hw_stats(&sc->pf_vsi);
2209 
2210 	/* Update mirror VSI stats */
2211 	if (sc->mirr_if && sc->mirr_if->if_attached)
2212 		ice_update_vsi_hw_stats(sc->mirr_if->vsi);
2213 }
2214 
2215 /**
2216  * ice_admin_timer - called periodically to trigger the admin task
2217  * @arg: callout(9) argument pointing to the device private softc structure
2218  *
2219  * Timer function used as part of a callout(9) timer that will periodically
2220  * trigger the admin task, even when the interface is down.
2221  *
2222  * @remark this function is not called by iflib and is not protected by the
2223  * iflib CTX lock.
2224  *
2225  * @remark because this is a callout function, it cannot sleep and should not
2226  * attempt taking the iflib CTX lock.
2227  */
2228 static void
ice_admin_timer(void * arg)2229 ice_admin_timer(void *arg)
2230 {
2231 	struct ice_softc *sc = (struct ice_softc *)arg;
2232 
2233 	/*
2234 	 * There is a point where callout routines are no longer
2235 	 * cancelable.  So there exists a window of time where the
2236 	 * driver enters detach() and tries to cancel the callout, but the
2237 	 * callout routine has passed the cancellation point.  The detach()
2238 	 * routine is unaware of this and tries to free resources that the
2239 	 * callout routine needs.  So we check for the detach state flag to
2240 	 * at least shrink the window of opportunity.
2241 	 */
2242 	if (ice_driver_is_detaching(sc))
2243 		return;
2244 
2245 	/* Fire off the admin task */
2246 	iflib_admin_intr_deferred(sc->ctx);
2247 
2248 	/* Reschedule the admin timer */
2249 	callout_schedule(&sc->admin_timer, hz/2);
2250 }
2251 
2252 /**
2253  * ice_transition_recovery_mode - Transition to recovery mode
2254  * @sc: the device private softc
2255  *
2256  * Called when the driver detects that the firmware has entered recovery mode
2257  * at run time.
2258  */
2259 static void
ice_transition_recovery_mode(struct ice_softc * sc)2260 ice_transition_recovery_mode(struct ice_softc *sc)
2261 {
2262 	struct ice_vsi *vsi = &sc->pf_vsi;
2263 	int i;
2264 
2265 	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
2266 
2267 	/* Tell the stack that the link has gone down */
2268 	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
2269 
2270 	/* Request that the device be re-initialized */
2271 	ice_request_stack_reinit(sc);
2272 
2273 	ice_rdma_pf_detach(sc);
2274 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2275 
2276 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2277 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2278 
2279 	ice_vsi_del_txqs_ctx(vsi);
2280 	ice_vsi_del_rxqs_ctx(vsi);
2281 
2282 	for (i = 0; i < sc->num_available_vsi; i++) {
2283 		if (sc->all_vsi[i])
2284 			ice_release_vsi(sc->all_vsi[i]);
2285 	}
2286 	sc->num_available_vsi = 0;
2287 
2288 	if (sc->all_vsi) {
2289 		free(sc->all_vsi, M_ICE);
2290 		sc->all_vsi = NULL;
2291 	}
2292 
2293 	/* Destroy the interrupt manager */
2294 	ice_resmgr_destroy(&sc->dev_imgr);
2295 	/* Destroy the queue managers */
2296 	ice_resmgr_destroy(&sc->tx_qmgr);
2297 	ice_resmgr_destroy(&sc->rx_qmgr);
2298 
2299 	ice_deinit_hw(&sc->hw);
2300 }
2301 
2302 /**
2303  * ice_transition_safe_mode - Transition to safe mode
2304  * @sc: the device private softc
2305  *
2306  * Called when the driver attempts to reload the DDP package during a device
2307  * reset, and the new download fails. If so, we must transition to safe mode
2308  * at run time.
2309  *
2310  * @remark although safe mode normally allocates only a single queue, we can't
2311  * change the number of queues dynamically when using iflib. Due to this, we
2312  * do not attempt to reduce the number of queues.
2313  */
2314 static void
ice_transition_safe_mode(struct ice_softc * sc)2315 ice_transition_safe_mode(struct ice_softc *sc)
2316 {
2317 	/* Indicate that we are in Safe mode */
2318 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
2319 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
2320 
2321 	ice_rdma_pf_detach(sc);
2322 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2323 
2324 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2325 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2326 
2327 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2328 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
2329 }
2330 
2331 /**
2332  * ice_if_update_admin_status - update admin status
2333  * @ctx: iflib ctx structure
2334  *
2335  * Called by iflib to update the admin status. For our purposes, this means
2336  * check the adminq, and update the link status. It's ultimately triggered by
2337  * our admin interrupt, or by the ice_if_timer periodically.
2338  *
2339  * @pre assumes the caller holds the iflib CTX lock
2340  */
2341 static void
ice_if_update_admin_status(if_ctx_t ctx)2342 ice_if_update_admin_status(if_ctx_t ctx)
2343 {
2344 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2345 	enum ice_fw_modes fw_mode;
2346 	bool reschedule = false;
2347 	u16 pending = 0;
2348 
2349 	ASSERT_CTX_LOCKED(sc);
2350 
2351 	/* Check if the firmware entered recovery mode at run time */
2352 	fw_mode = ice_get_fw_mode(&sc->hw);
2353 	if (fw_mode == ICE_FW_MODE_REC) {
2354 		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2355 			/* If we just entered recovery mode, log a warning to
2356 			 * the system administrator and deinit driver state
2357 			 * that is no longer functional.
2358 			 */
2359 			ice_transition_recovery_mode(sc);
2360 		}
2361 	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
2362 		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
2363 			/* Rollback mode isn't fatal, but we don't want to
2364 			 * repeatedly post a message about it.
2365 			 */
2366 			ice_print_rollback_msg(&sc->hw);
2367 		}
2368 	}
2369 
2370 	/* Handle global reset events */
2371 	ice_handle_reset_event(sc);
2372 
2373 	/* Handle PF reset requests */
2374 	ice_handle_pf_reset_request(sc);
2375 
2376 	/* Handle MDD events */
2377 	ice_handle_mdd_event(sc);
2378 
2379 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
2380 	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
2381 	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2382 		/*
2383 		 * If we know the control queues are disabled, skip processing
2384 		 * the control queues entirely.
2385 		 */
2386 		;
2387 	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
2388 		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
2389 		if (pending > 0)
2390 			reschedule = true;
2391 
2392 		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
2393 		if (pending > 0)
2394 			reschedule = true;
2395 	}
2396 
2397 	/* Poll for link up */
2398 	ice_poll_for_media_avail(sc);
2399 
2400 	/* Check and update link status */
2401 	ice_update_link_status(sc, false);
2402 
2403 	/*
2404 	 * If there are still messages to process, we need to reschedule
2405 	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
2406 	 * woken up at the next interrupt or timer event.
2407 	 */
2408 	if (reschedule) {
2409 		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
2410 		iflib_admin_intr_deferred(ctx);
2411 	} else {
2412 		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2413 	}
2414 }
2415 
2416 /**
2417  * ice_prepare_for_reset - Prepare device for an impending reset
2418  * @sc: The device private softc
2419  *
2420  * Prepare the driver for an impending reset, shutting down VSIs, clearing the
2421  * scheduler setup, and shutting down controlqs. Uses the
2422  * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
2423  * driver for reset or not.
2424  */
2425 static void
ice_prepare_for_reset(struct ice_softc * sc)2426 ice_prepare_for_reset(struct ice_softc *sc)
2427 {
2428 	struct ice_hw *hw = &sc->hw;
2429 
2430 	/* If we're already prepared, there's nothing to do */
2431 	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
2432 		return;
2433 
2434 	log(LOG_INFO, "%s: preparing to reset device logic\n", sc->ifp->if_xname);
2435 
2436 	/* In recovery mode, hardware is not initialized */
2437 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2438 		return;
2439 
2440 	/* inform the RDMA client */
2441 	ice_rdma_notify_reset(sc);
2442 	/* stop the RDMA client */
2443 	ice_rdma_pf_stop(sc);
2444 
2445 	/* Release the main PF VSI queue mappings */
2446 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2447 				    sc->pf_vsi.num_tx_queues);
2448 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2449 				    sc->pf_vsi.num_rx_queues);
2450 	if (sc->mirr_if) {
2451 		ice_resmgr_release_map(&sc->tx_qmgr, sc->mirr_if->vsi->tx_qmap,
2452 		    sc->mirr_if->num_irq_vectors);
2453 		ice_resmgr_release_map(&sc->rx_qmgr, sc->mirr_if->vsi->rx_qmap,
2454 		    sc->mirr_if->num_irq_vectors);
2455 	}
2456 
2457 	ice_clear_hw_tbls(hw);
2458 
2459 	if (hw->port_info)
2460 		ice_sched_cleanup_all(hw);
2461 
2462 	ice_shutdown_all_ctrlq(hw, false);
2463 }
2464 
2465 /**
2466  * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
2467  * @sc: the device softc pointer
2468  *
2469  * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
2470  * mapping after a reset occurred.
2471  */
2472 static int
ice_rebuild_pf_vsi_qmap(struct ice_softc * sc)2473 ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
2474 {
2475 	struct ice_vsi *vsi = &sc->pf_vsi;
2476 	struct ice_tx_queue *txq;
2477 	struct ice_rx_queue *rxq;
2478 	int err, i;
2479 
2480 	/* Re-assign Tx queues from PF space to the main VSI */
2481 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
2482 					    vsi->num_tx_queues);
2483 	if (err) {
2484 		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
2485 			      ice_err_str(err));
2486 		return (err);
2487 	}
2488 
2489 	/* Re-assign Rx queues from PF space to this VSI */
2490 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
2491 					    vsi->num_rx_queues);
2492 	if (err) {
2493 		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
2494 			      ice_err_str(err));
2495 		goto err_release_tx_queues;
2496 	}
2497 
2498 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
2499 
2500 	/* Re-assign Tx queue tail pointers */
2501 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
2502 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
2503 
2504 	/* Re-assign Rx queue tail pointers */
2505 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
2506 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
2507 
2508 	return (0);
2509 
2510 err_release_tx_queues:
2511 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2512 				   sc->pf_vsi.num_tx_queues);
2513 
2514 	return (err);
2515 }
2516 
2517 /* determine if the iflib context is active */
2518 #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
2519 
2520 /**
2521  * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
2522  * @sc: The device private softc
2523  *
2524  * Handle a driver rebuild while in recovery mode. This will only rebuild the
2525  * limited functionality supported while in recovery mode.
2526  */
2527 static void
ice_rebuild_recovery_mode(struct ice_softc * sc)2528 ice_rebuild_recovery_mode(struct ice_softc *sc)
2529 {
2530 	device_t dev = sc->dev;
2531 
2532 	/* enable PCIe bus master */
2533 	pci_enable_busmaster(dev);
2534 
2535 	/* Configure interrupt causes for the administrative interrupt */
2536 	ice_configure_misc_interrupts(sc);
2537 
2538 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2539 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2540 
2541 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2542 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2543 
2544 	log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname);
2545 
2546 	/* In order to completely restore device functionality, the iflib core
2547 	 * needs to be reset. We need to request an iflib reset. Additionally,
2548 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2549 	 * the iflib core, we also want re-run the admin task so that iflib
2550 	 * resets immediately instead of waiting for the next interrupt.
2551 	 */
2552 	ice_request_stack_reinit(sc);
2553 
2554 	return;
2555 }
2556 
2557 /**
2558  * ice_rebuild - Rebuild driver state post reset
2559  * @sc: The device private softc
2560  *
2561  * Restore driver state after a reset occurred. Restart the controlqs, setup
2562  * the hardware port, and re-enable the VSIs.
2563  */
2564 static void
ice_rebuild(struct ice_softc * sc)2565 ice_rebuild(struct ice_softc *sc)
2566 {
2567 	struct ice_hw *hw = &sc->hw;
2568 	device_t dev = sc->dev;
2569 	enum ice_ddp_state pkg_state;
2570 	enum ice_status status;
2571 	int err;
2572 
2573 	sc->rebuild_ticks = ticks;
2574 
2575 	/* If we're rebuilding, then a reset has succeeded. */
2576 	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
2577 
2578 	/*
2579 	 * If the firmware is in recovery mode, only restore the limited
2580 	 * functionality supported by recovery mode.
2581 	 */
2582 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2583 		ice_rebuild_recovery_mode(sc);
2584 		return;
2585 	}
2586 
2587 	/* enable PCIe bus master */
2588 	pci_enable_busmaster(dev);
2589 
2590 	status = ice_init_all_ctrlq(hw);
2591 	if (status) {
2592 		device_printf(dev, "failed to re-init controlqs, err %s\n",
2593 			      ice_status_str(status));
2594 		goto err_shutdown_ctrlq;
2595 	}
2596 
2597 	/* Query the allocated resources for Tx scheduler */
2598 	status = ice_sched_query_res_alloc(hw);
2599 	if (status) {
2600 		device_printf(dev,
2601 			      "Failed to query scheduler resources, err %s aq_err %s\n",
2602 			      ice_status_str(status),
2603 			      ice_aq_str(hw->adminq.sq_last_status));
2604 		goto err_shutdown_ctrlq;
2605 	}
2606 
2607 	/* Re-enable FW logging. Keep going even if this fails */
2608 	status = ice_fwlog_set(hw, &hw->fwlog_cfg);
2609 	if (!status) {
2610 		/*
2611 		 * We should have the most updated cached copy of the
2612 		 * configuration, regardless of whether we're rebuilding
2613 		 * or not.  So we'll simply check to see if logging was
2614 		 * enabled pre-rebuild.
2615 		 */
2616 		if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2617 			status = ice_fwlog_register(hw);
2618 			if (status)
2619 				device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n",
2620 				   ice_status_str(status),
2621 				   ice_aq_str(hw->adminq.sq_last_status));
2622 		}
2623 	} else
2624 		device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n",
2625 		   ice_status_str(status),
2626 		   ice_aq_str(hw->adminq.sq_last_status));
2627 
2628 	err = ice_send_version(sc);
2629 	if (err)
2630 		goto err_shutdown_ctrlq;
2631 
2632 	err = ice_init_link_events(sc);
2633 	if (err) {
2634 		device_printf(dev, "ice_init_link_events failed: %s\n",
2635 			      ice_err_str(err));
2636 		goto err_shutdown_ctrlq;
2637 	}
2638 
2639 	status = ice_clear_pf_cfg(hw);
2640 	if (status) {
2641 		device_printf(dev, "failed to clear PF configuration, err %s\n",
2642 			      ice_status_str(status));
2643 		goto err_shutdown_ctrlq;
2644 	}
2645 
2646 	ice_clean_all_vsi_rss_cfg(sc);
2647 
2648 	ice_clear_pxe_mode(hw);
2649 
2650 	status = ice_get_caps(hw);
2651 	if (status) {
2652 		device_printf(dev, "failed to get capabilities, err %s\n",
2653 			      ice_status_str(status));
2654 		goto err_shutdown_ctrlq;
2655 	}
2656 
2657 	status = ice_sched_init_port(hw->port_info);
2658 	if (status) {
2659 		device_printf(dev, "failed to initialize port, err %s\n",
2660 			      ice_status_str(status));
2661 		goto err_sched_cleanup;
2662 	}
2663 
2664 	/* If we previously loaded the package, it needs to be reloaded now */
2665 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
2666 		pkg_state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
2667 		if (!ice_is_init_pkg_successful(pkg_state)) {
2668 			ice_log_pkg_init(sc, pkg_state);
2669 			ice_transition_safe_mode(sc);
2670 		}
2671 	}
2672 
2673 	ice_reset_pf_stats(sc);
2674 
2675 	err = ice_rebuild_pf_vsi_qmap(sc);
2676 	if (err) {
2677 		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
2678 			      ice_err_str(err));
2679 		goto err_sched_cleanup;
2680 	}
2681 	err = ice_initialize_vsi(&sc->pf_vsi);
2682 	if (err) {
2683 		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
2684 			      ice_err_str(err));
2685 		goto err_release_queue_allocations;
2686 	}
2687 
2688 	/* Replay all VSI configuration */
2689 	err = ice_replay_all_vsi_cfg(sc);
2690 	if (err)
2691 		goto err_deinit_pf_vsi;
2692 
2693 	/* Re-enable FW health event reporting */
2694 	ice_init_health_events(sc);
2695 
2696 	/* Reconfigure the main PF VSI for RSS */
2697 	err = ice_config_rss(&sc->pf_vsi);
2698 	if (err) {
2699 		device_printf(sc->dev,
2700 			      "Unable to reconfigure RSS for the main VSI, err %s\n",
2701 			      ice_err_str(err));
2702 		goto err_deinit_pf_vsi;
2703 	}
2704 
2705 	if (hw->port_info->qos_cfg.is_sw_lldp)
2706 		ice_add_rx_lldp_filter(sc);
2707 
2708 	/* Refresh link status */
2709 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2710 	sc->hw.port_info->phy.get_link_info = true;
2711 	ice_get_link_status(sc->hw.port_info, &sc->link_up);
2712 	ice_update_link_status(sc, true);
2713 
2714 	/* RDMA interface will be restarted by the stack re-init */
2715 
2716 	/* Configure interrupt causes for the administrative interrupt */
2717 	ice_configure_misc_interrupts(sc);
2718 
2719 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2720 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2721 
2722 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2723 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2724 
2725 	/* Reconfigure the subinterface */
2726 	if (sc->mirr_if) {
2727 		err = ice_subif_rebuild(sc);
2728 		if (err)
2729 			goto err_deinit_pf_vsi;
2730 	}
2731 
2732 	log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname);
2733 
2734 	/* In order to completely restore device functionality, the iflib core
2735 	 * needs to be reset. We need to request an iflib reset. Additionally,
2736 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2737 	 * the iflib core, we also want re-run the admin task so that iflib
2738 	 * resets immediately instead of waiting for the next interrupt.
2739 	 * If LLDP is enabled we need to reconfig DCB to properly reinit all TC
2740 	 * queues, not only 0. It contains ice_request_stack_reinit as well.
2741 	 */
2742 	if (hw->port_info->qos_cfg.is_sw_lldp)
2743 		ice_request_stack_reinit(sc);
2744 	else
2745 		ice_do_dcb_reconfig(sc, false);
2746 
2747 	return;
2748 
2749 err_deinit_pf_vsi:
2750 	ice_deinit_vsi(&sc->pf_vsi);
2751 err_release_queue_allocations:
2752 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2753 				    sc->pf_vsi.num_tx_queues);
2754 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2755 				    sc->pf_vsi.num_rx_queues);
2756 err_sched_cleanup:
2757 	ice_sched_cleanup_all(hw);
2758 err_shutdown_ctrlq:
2759 	ice_shutdown_all_ctrlq(hw, false);
2760 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2761 	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2762 	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
2763 }
2764 
2765 /**
2766  * ice_handle_reset_event - Handle reset events triggered by OICR
2767  * @sc: The device private softc
2768  *
2769  * Handle reset events triggered by an OICR notification. This includes CORER,
2770  * GLOBR, and EMPR resets triggered by software on this or any other PF or by
2771  * firmware.
2772  *
2773  * @pre assumes the iflib context lock is held, and will unlock it while
2774  * waiting for the hardware to finish reset.
2775  */
2776 static void
ice_handle_reset_event(struct ice_softc * sc)2777 ice_handle_reset_event(struct ice_softc *sc)
2778 {
2779 	struct ice_hw *hw = &sc->hw;
2780 	enum ice_status status;
2781 	device_t dev = sc->dev;
2782 
2783 	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
2784 	 * trigger an OICR interrupt. Our OICR handler will determine when
2785 	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
2786 	 * appropriate.
2787 	 */
2788 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
2789 		return;
2790 
2791 	ice_prepare_for_reset(sc);
2792 
2793 	/*
2794 	 * Release the iflib context lock and wait for the device to finish
2795 	 * resetting.
2796 	 */
2797 	IFLIB_CTX_UNLOCK(sc);
2798 	status = ice_check_reset(hw);
2799 	IFLIB_CTX_LOCK(sc);
2800 	if (status) {
2801 		device_printf(dev, "Device never came out of reset, err %s\n",
2802 			      ice_status_str(status));
2803 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2804 		return;
2805 	}
2806 
2807 	/* We're done with the reset, so we can rebuild driver state */
2808 	sc->hw.reset_ongoing = false;
2809 	ice_rebuild(sc);
2810 
2811 	/* In the unlikely event that a PF reset request occurs at the same
2812 	 * time as a global reset, clear the request now. This avoids
2813 	 * resetting a second time right after we reset due to a global event.
2814 	 */
2815 	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2816 		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
2817 }
2818 
2819 /**
2820  * ice_handle_pf_reset_request - Initiate PF reset requested by software
2821  * @sc: The device private softc
2822  *
2823  * Initiate a PF reset requested by software. We handle this in the admin task
2824  * so that only one thread actually handles driver preparation and cleanup,
2825  * rather than having multiple threads possibly attempt to run this code
2826  * simultaneously.
2827  *
2828  * @pre assumes the iflib context lock is held and will unlock it while
2829  * waiting for the PF reset to complete.
2830  */
2831 static void
ice_handle_pf_reset_request(struct ice_softc * sc)2832 ice_handle_pf_reset_request(struct ice_softc *sc)
2833 {
2834 	struct ice_hw *hw = &sc->hw;
2835 	enum ice_status status;
2836 
2837 	/* Check for PF reset requests */
2838 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2839 		return;
2840 
2841 	/* Make sure we're prepared for reset */
2842 	ice_prepare_for_reset(sc);
2843 
2844 	/*
2845 	 * Release the iflib context lock and wait for the device to finish
2846 	 * resetting.
2847 	 */
2848 	IFLIB_CTX_UNLOCK(sc);
2849 	status = ice_reset(hw, ICE_RESET_PFR);
2850 	IFLIB_CTX_LOCK(sc);
2851 	if (status) {
2852 		device_printf(sc->dev, "device PF reset failed, err %s\n",
2853 			      ice_status_str(status));
2854 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2855 		return;
2856 	}
2857 
2858 	sc->soft_stats.pfr_count++;
2859 	ice_rebuild(sc);
2860 }
2861 
2862 /**
2863  * ice_init_device_features - Init device driver features
2864  * @sc: driver softc structure
2865  *
2866  * @pre assumes that the function capabilities bits have been set up by
2867  * ice_init_hw().
2868  */
2869 static void
ice_init_device_features(struct ice_softc * sc)2870 ice_init_device_features(struct ice_softc *sc)
2871 {
2872 	struct ice_hw *hw = &sc->hw;
2873 
2874 	/* Set capabilities that all devices support */
2875 	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2876 	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
2877 	ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2878 	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
2879 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
2880 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
2881 	ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2882 	ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2883 	ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap);
2884 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_cap);
2885 	ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2886 
2887 	/* Disable features due to hardware limitations... */
2888 	if (!hw->func_caps.common_cap.rss_table_size)
2889 		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2890 	if (!hw->func_caps.common_cap.iwarp || !ice_enable_irdma)
2891 		ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2892 	if (!hw->func_caps.common_cap.dcb)
2893 		ice_clear_bit(ICE_FEATURE_DCB, sc->feat_cap);
2894 	/* Disable features due to firmware limitations... */
2895 	if (!ice_is_fw_health_report_supported(hw))
2896 		ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2897 	if (!ice_fwlog_supported(hw))
2898 		ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2899 	if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2900 		if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING))
2901 			ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en);
2902 		else
2903 			ice_fwlog_unregister(hw);
2904 	}
2905 
2906 	/* Disable capabilities not supported by the OS */
2907 	ice_disable_unsupported_features(sc->feat_cap);
2908 
2909 	/* RSS is always enabled for iflib */
2910 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
2911 		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
2912 
2913 	/* Disable features based on sysctl settings */
2914 	if (!ice_tx_balance_en)
2915 		ice_clear_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2916 
2917 	if (hw->dev_caps.supported_sensors & ICE_SENSOR_SUPPORT_E810_INT_TEMP) {
2918 		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_cap);
2919 		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_en);
2920 	}
2921 }
2922 
2923 /**
2924  * ice_if_multi_set - Callback to update Multicast filters in HW
2925  * @ctx: iflib ctx structure
2926  *
2927  * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
2928  * the if_multiaddrs list and determine which filters have been added or
2929  * removed from the list, and update HW programming to reflect the new list.
2930  *
2931  * @pre assumes the caller holds the iflib CTX lock
2932  */
2933 static void
ice_if_multi_set(if_ctx_t ctx)2934 ice_if_multi_set(if_ctx_t ctx)
2935 {
2936 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2937 	int err;
2938 
2939 	ASSERT_CTX_LOCKED(sc);
2940 
2941 	/* Do not handle multicast configuration in recovery mode */
2942 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2943 		return;
2944 
2945 	err = ice_sync_multicast_filters(sc);
2946 	if (err) {
2947 		device_printf(sc->dev,
2948 			      "Failed to synchronize multicast filter list: %s\n",
2949 			      ice_err_str(err));
2950 		return;
2951 	}
2952 }
2953 
2954 /**
2955  * ice_if_vlan_register - Register a VLAN with the hardware
2956  * @ctx: iflib ctx pointer
2957  * @vtag: VLAN to add
2958  *
2959  * Programs the main PF VSI with a hardware filter for the given VLAN.
2960  *
2961  * @pre assumes the caller holds the iflib CTX lock
2962  */
2963 static void
ice_if_vlan_register(if_ctx_t ctx,u16 vtag)2964 ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
2965 {
2966 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2967 	enum ice_status status;
2968 
2969 	ASSERT_CTX_LOCKED(sc);
2970 
2971 	/* Do not handle VLAN configuration in recovery mode */
2972 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2973 		return;
2974 
2975 	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
2976 	if (status) {
2977 		device_printf(sc->dev,
2978 			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
2979 			      vtag, ice_status_str(status),
2980 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2981 	}
2982 }
2983 
2984 /**
2985  * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
2986  * @ctx: iflib ctx pointer
2987  * @vtag: VLAN to add
2988  *
2989  * Removes the previously programmed VLAN filter from the main PF VSI.
2990  *
2991  * @pre assumes the caller holds the iflib CTX lock
2992  */
2993 static void
ice_if_vlan_unregister(if_ctx_t ctx,u16 vtag)2994 ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
2995 {
2996 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2997 	enum ice_status status;
2998 
2999 	ASSERT_CTX_LOCKED(sc);
3000 
3001 	/* Do not handle VLAN configuration in recovery mode */
3002 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
3003 		return;
3004 
3005 	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
3006 	if (status) {
3007 		device_printf(sc->dev,
3008 			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
3009 			      vtag, ice_status_str(status),
3010 			      ice_aq_str(sc->hw.adminq.sq_last_status));
3011 	}
3012 }
3013 
3014 /**
3015  * ice_if_stop - Stop the device
3016  * @ctx: iflib context structure
3017  *
3018  * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
3019  * down)
3020  *
3021  * @pre assumes the caller holds the iflib CTX lock
3022  */
3023 static void
ice_if_stop(if_ctx_t ctx)3024 ice_if_stop(if_ctx_t ctx)
3025 {
3026 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3027 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3028 
3029 	ASSERT_CTX_LOCKED(sc);
3030 
3031 	/*
3032 	 * The iflib core may call IFDI_STOP prior to the first call to
3033 	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
3034 	 * don't have, and disable Tx queues which aren't yet configured.
3035 	 * Although it is likely these extra operations are harmless, they do
3036 	 * cause spurious warning messages to be displayed, which may confuse
3037 	 * users.
3038 	 *
3039 	 * To avoid these messages, we use a state bit indicating if we've
3040 	 * been initialized. It will be set when ice_if_init is called, and
3041 	 * cleared here in ice_if_stop.
3042 	 */
3043 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
3044 		return;
3045 
3046 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
3047 		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
3048 		return;
3049 	}
3050 
3051 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
3052 		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
3053 		return;
3054 	}
3055 
3056 	ice_rdma_pf_stop(sc);
3057 
3058 	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
3059 	 * return of these functions because there's nothing we can really do
3060 	 * if they fail, and the functions already print error messages.
3061 	 * Just try to shut down as much as we can.
3062 	 */
3063 	ice_rm_pf_default_mac_filters(sc);
3064 
3065 	/* Dissociate the Tx and Rx queues from the interrupts */
3066 	ice_flush_txq_interrupts(&sc->pf_vsi);
3067 	ice_flush_rxq_interrupts(&sc->pf_vsi);
3068 
3069 	/* Disable the Tx and Rx queues */
3070 	ice_vsi_disable_tx(&sc->pf_vsi);
3071 	ice_control_all_rx_queues(&sc->pf_vsi, false);
3072 
3073 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
3074 		 !(if_getflags(sc->ifp) & IFF_UP) && sc->link_up)
3075 		ice_set_link(sc, false);
3076 
3077 	if (sc->mirr_if && ice_test_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) {
3078 		ice_subif_if_stop(sc->mirr_if->subctx);
3079 		device_printf(sc->dev, "The subinterface also comes down and up after reset\n");
3080 	}
3081 }
3082 
3083 /**
3084  * ice_if_get_counter - Get current value of an ifnet statistic
3085  * @ctx: iflib context pointer
3086  * @counter: ifnet counter to read
3087  *
3088  * Reads the current value of an ifnet counter for the device.
3089  *
3090  * This function is not protected by the iflib CTX lock.
3091  */
3092 static uint64_t
ice_if_get_counter(if_ctx_t ctx,ift_counter counter)3093 ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
3094 {
3095 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3096 
3097 	/* Return the counter for the main PF VSI */
3098 	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
3099 }
3100 
3101 /**
3102  * ice_request_stack_reinit - Request that iflib re-initialize
3103  * @sc: the device private softc
3104  *
3105  * Request that the device be brought down and up, to re-initialize. For
3106  * example, this may be called when a device reset occurs, or when Tx and Rx
3107  * queues need to be re-initialized.
3108  *
3109  * This is required because the iflib state is outside the driver, and must be
3110  * re-initialized if we need to resart Tx and Rx queues.
3111  */
3112 void
ice_request_stack_reinit(struct ice_softc * sc)3113 ice_request_stack_reinit(struct ice_softc *sc)
3114 {
3115 	if (CTX_ACTIVE(sc->ctx)) {
3116 		iflib_request_reset(sc->ctx);
3117 		iflib_admin_intr_deferred(sc->ctx);
3118 	}
3119 }
3120 
3121 /**
3122  * ice_driver_is_detaching - Check if the driver is detaching/unloading
3123  * @sc: device private softc
3124  *
3125  * Returns true if the driver is detaching, false otherwise.
3126  *
3127  * @remark on newer kernels, take advantage of iflib_in_detach in order to
3128  * report detachment correctly as early as possible.
3129  *
3130  * @remark this function is used by various code paths that want to avoid
3131  * running if the driver is about to be removed. This includes sysctls and
3132  * other driver access points. Note that it does not fully resolve
3133  * detach-based race conditions as it is possible for a thread to race with
3134  * iflib_in_detach.
3135  */
3136 bool
ice_driver_is_detaching(struct ice_softc * sc)3137 ice_driver_is_detaching(struct ice_softc *sc)
3138 {
3139 	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
3140 		iflib_in_detach(sc->ctx));
3141 }
3142 
3143 /**
3144  * ice_if_priv_ioctl - Device private ioctl handler
3145  * @ctx: iflib context pointer
3146  * @command: The ioctl command issued
3147  * @data: ioctl specific data
3148  *
3149  * iflib callback for handling custom driver specific ioctls.
3150  *
3151  * @pre Assumes that the iflib context lock is held.
3152  */
3153 static int
ice_if_priv_ioctl(if_ctx_t ctx,u_long command,caddr_t data)3154 ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
3155 {
3156 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3157 	struct ifdrv *ifd;
3158 	device_t dev = sc->dev;
3159 
3160 	if (data == NULL)
3161 		return (EINVAL);
3162 
3163 	ASSERT_CTX_LOCKED(sc);
3164 
3165 	/* Make sure the command type is valid */
3166 	switch (command) {
3167 	case SIOCSDRVSPEC:
3168 	case SIOCGDRVSPEC:
3169 		/* Accepted commands */
3170 		break;
3171 	case SIOCGPRIVATE_0:
3172 		/*
3173 		 * Although we do not support this ioctl command, it's
3174 		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
3175 		 * handler. Do not print a message in this case
3176 		 */
3177 		return (ENOTSUP);
3178 	default:
3179 		/*
3180 		 * If we get a different command for this function, it's
3181 		 * definitely unexpected, so log a message indicating what
3182 		 * command we got for debugging purposes.
3183 		 */
3184 		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
3185 			      __func__, command);
3186 		return (EINVAL);
3187 	}
3188 
3189 	ifd = (struct ifdrv *)data;
3190 
3191 	switch (ifd->ifd_cmd) {
3192 	case ICE_NVM_ACCESS:
3193 		return ice_handle_nvm_access_ioctl(sc, ifd);
3194 	case ICE_DEBUG_DUMP:
3195 		return ice_handle_debug_dump_ioctl(sc, ifd);
3196 	default:
3197 		return EINVAL;
3198 	}
3199 }
3200 
3201 /**
3202  * ice_if_i2c_req - I2C request handler for iflib
3203  * @ctx: iflib context pointer
3204  * @req: The I2C parameters to use
3205  *
3206  * Read from the port's I2C eeprom using the parameters from the ioctl.
3207  *
3208  * @remark The iflib-only part is pretty simple.
3209  */
3210 static int
ice_if_i2c_req(if_ctx_t ctx,struct ifi2creq * req)3211 ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
3212 {
3213 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3214 
3215 	return ice_handle_i2c_req(sc, req);
3216 }
3217 
3218 /**
3219  * ice_if_suspend - PCI device suspend handler for iflib
3220  * @ctx: iflib context pointer
3221  *
3222  * Deinitializes the driver and clears HW resources in preparation for
3223  * suspend or an FLR.
3224  *
3225  * @returns 0; this return value is ignored
3226  */
3227 static int
ice_if_suspend(if_ctx_t ctx)3228 ice_if_suspend(if_ctx_t ctx)
3229 {
3230 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3231 
3232 	/* At least a PFR is always going to happen after this;
3233 	 * either via FLR or during the D3->D0 transition.
3234 	 */
3235 	ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
3236 
3237 	ice_prepare_for_reset(sc);
3238 
3239 	return (0);
3240 }
3241 
3242 /**
3243  * ice_if_resume - PCI device resume handler for iflib
3244  * @ctx: iflib context pointer
3245  *
3246  * Reinitializes the driver and the HW after PCI resume or after
3247  * an FLR. An init is performed by iflib after this function is finished.
3248  *
3249  * @returns 0; this return value is ignored
3250  */
3251 static int
ice_if_resume(if_ctx_t ctx)3252 ice_if_resume(if_ctx_t ctx)
3253 {
3254 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3255 
3256 	ice_rebuild(sc);
3257 
3258 	return (0);
3259 }
3260 
3261 /**
3262  * ice_if_needs_restart - Tell iflib when the driver needs to be reinitialized
3263  * @ctx: iflib context pointer
3264  * @event: event code to check
3265  *
3266  * Defaults to returning true for unknown events.
3267  *
3268  * @returns true if iflib needs to reinit the interface
3269  */
3270 static bool
ice_if_needs_restart(if_ctx_t ctx,enum iflib_restart_event event)3271 ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event)
3272 {
3273 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3274 
3275 	switch (event) {
3276 	case IFLIB_RESTART_VLAN_CONFIG:
3277 		if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
3278 			 !(if_getflags(sc->ifp) & IFF_UP))
3279 			return false;
3280 	default:
3281 		return true;
3282 	}
3283 }
3284 
3285 extern struct if_txrx ice_subif_txrx;
3286 
3287 /**
3288  * @var ice_subif_methods
3289  * @brief ice driver method entry points
3290  */
3291 static device_method_t ice_subif_methods[] = {
3292 	/* Device interface */
3293 	DEVMETHOD(device_register, ice_subif_register),
3294 	DEVMETHOD_END
3295 };
3296 
3297 /**
3298  * @var ice_subif_driver
3299  * @brief driver structure for the device API
3300  */
3301 static driver_t ice_subif_driver = {
3302 	.name = "ice_subif",
3303 	.methods = ice_subif_methods,
3304 	.size = sizeof(struct ice_mirr_if),
3305 };
3306 
3307 static device_method_t ice_iflib_subif_methods[] = {
3308 	DEVMETHOD(ifdi_attach_pre, ice_subif_if_attach_pre),
3309 	DEVMETHOD(ifdi_attach_post, ice_subif_if_attach_post),
3310 	DEVMETHOD(ifdi_tx_queues_alloc, ice_subif_if_tx_queues_alloc),
3311 	DEVMETHOD(ifdi_rx_queues_alloc, ice_subif_if_rx_queues_alloc),
3312 	DEVMETHOD(ifdi_msix_intr_assign, ice_subif_if_msix_intr_assign),
3313 	DEVMETHOD(ifdi_intr_enable, ice_subif_if_intr_enable),
3314 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_subif_if_rx_queue_intr_enable),
3315 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_subif_if_tx_queue_intr_enable),
3316 	DEVMETHOD(ifdi_init, ice_subif_if_init),
3317 	DEVMETHOD(ifdi_stop, ice_subif_if_stop),
3318 	DEVMETHOD(ifdi_queues_free, ice_subif_if_queues_free),
3319 	DEVMETHOD(ifdi_media_status, ice_subif_if_media_status),
3320 	DEVMETHOD(ifdi_promisc_set, ice_subif_if_promisc_set),
3321 };
3322 
3323 /**
3324  * @var ice_iflib_subif_driver
3325  * @brief driver structure for the iflib stack
3326  *
3327  * driver_t definition used to setup the iflib device methods.
3328  */
3329 static driver_t ice_iflib_subif_driver = {
3330 	.name = "ice_subif",
3331 	.methods = ice_iflib_subif_methods,
3332 	.size = sizeof(struct ice_mirr_if),
3333 };
3334 
3335 /**
3336  * @var ice_subif_sctx
3337  * @brief ice driver shared context
3338  *
3339  * Similar to the existing ice_sctx, this structure has these differences:
3340  * - isc_admin_intrcnt is set to 0
3341  * - Uses subif iflib driver methods
3342  * - Flagged as a VF for iflib
3343  */
3344 static struct if_shared_ctx ice_subif_sctx = {
3345 	.isc_magic = IFLIB_MAGIC,
3346 	.isc_q_align = PAGE_SIZE,
3347 
3348 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
3349 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
3350 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
3351 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
3352 
3353 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
3354 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
3355 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
3356 
3357 	.isc_nfl = 1,
3358 	.isc_ntxqs = 1,
3359 	.isc_nrxqs = 1,
3360 
3361 	.isc_admin_intrcnt = 0,
3362 	.isc_vendor_info = ice_vendor_info_array,
3363 	.isc_driver_version = __DECONST(char *, ice_driver_version),
3364 	.isc_driver = &ice_iflib_subif_driver,
3365 
3366 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
3367 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX |
3368 		IFLIB_IS_VF,
3369 
3370 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
3371 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
3372 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
3373 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
3374 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
3375 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
3376 };
3377 
3378 static void *
ice_subif_register(device_t dev __unused)3379 ice_subif_register(device_t dev __unused)
3380 {
3381 	return (&ice_subif_sctx);
3382 }
3383 
3384 static void
ice_subif_setup_scctx(struct ice_mirr_if * mif)3385 ice_subif_setup_scctx(struct ice_mirr_if *mif)
3386 {
3387 	if_softc_ctx_t scctx = mif->subscctx;
3388 
3389 	scctx->isc_txrx = &ice_subif_txrx;
3390 
3391 	scctx->isc_capenable = ICE_FULL_CAPS;
3392 	scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
3393 
3394 	scctx->isc_ntxqsets = 4;
3395 	scctx->isc_nrxqsets = 4;
3396 	scctx->isc_vectors = scctx->isc_nrxqsets;
3397 
3398 	scctx->isc_ntxqsets_max = 256;
3399 	scctx->isc_nrxqsets_max = 256;
3400 
3401 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
3402 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
3403 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
3404 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
3405 
3406 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
3407 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
3408 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
3409 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
3410 }
3411 
3412 static int
ice_subif_if_attach_pre(if_ctx_t ctx)3413 ice_subif_if_attach_pre(if_ctx_t ctx)
3414 {
3415 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3416 	device_t dev = iflib_get_dev(ctx);
3417 
3418 	mif->subctx = ctx;
3419 	mif->subdev = dev;
3420 	mif->subscctx = iflib_get_softc_ctx(ctx);
3421 
3422 	/* Setup the iflib softc context structure */
3423 	ice_subif_setup_scctx(mif);
3424 
3425 	return (0);
3426 }
3427 
3428 static int
ice_subif_if_attach_post(if_ctx_t ctx __unused)3429 ice_subif_if_attach_post(if_ctx_t ctx __unused)
3430 {
3431 	return (0);
3432 }
3433 
3434 /**
3435  * ice_destroy_mirror_interface - destroy mirror interface
3436  * @sc: driver private data
3437  *
3438  * Destroys all resources associated with the mirroring interface.
3439  * Will not exit early on failure.
3440  *
3441  * @pre: Mirror interface already exists and is initialized.
3442  */
3443 void
ice_destroy_mirror_interface(struct ice_softc * sc)3444 ice_destroy_mirror_interface(struct ice_softc *sc)
3445 {
3446 	struct ice_mirr_if *mif = sc->mirr_if;
3447 	struct ice_vsi *vsi = mif->vsi;
3448 	bool is_locked = false;
3449 	int ret;
3450 
3451 	is_locked = sx_xlocked(sc->iflib_ctx_lock);
3452 	if (is_locked)
3453 		IFLIB_CTX_UNLOCK(sc);
3454 
3455 	if (mif->ifp) {
3456 		ret = iflib_device_deregister(mif->subctx);
3457 		if (ret) {
3458 			device_printf(sc->dev,
3459 			    "iflib_device_deregister for mirror interface failed: %d\n",
3460 			    ret);
3461 		}
3462 	}
3463 
3464 	bus_topo_lock();
3465 	ret = device_delete_child(sc->dev, mif->subdev);
3466 	bus_topo_unlock();
3467 	if (ret) {
3468 		device_printf(sc->dev,
3469 		    "device_delete_child for mirror interface failed: %d\n",
3470 		    ret);
3471 	}
3472 
3473 	if (is_locked)
3474 		IFLIB_CTX_LOCK(sc);
3475 
3476 	if (mif->if_imap) {
3477 		free(mif->if_imap, M_ICE);
3478 		mif->if_imap = NULL;
3479 	}
3480 	if (mif->os_imap) {
3481 		free(mif->os_imap, M_ICE);
3482 		mif->os_imap = NULL;
3483 	}
3484 
3485 	/* These are freed via ice_subif_queues_free_subif
3486 	 * vsi:
3487 	 * - rx_irqvs
3488 	 * - tx_queues
3489 	 * - rx_queues
3490 	 */
3491 	ice_release_vsi(vsi);
3492 
3493 	free(mif, M_ICE);
3494 	sc->mirr_if = NULL;
3495 
3496 }
3497 
3498 /**
3499  * ice_setup_mirror_vsi - Initialize mirror VSI
3500  * @mif: driver private data for mirror interface
3501  *
3502  * Allocates a VSI for a mirror interface, and sets that VSI up for use as a
3503  * mirror for the main PF VSI.
3504  *
3505  * Returns 0 on success, or a standard error code on failure.
3506  */
3507 static int
ice_setup_mirror_vsi(struct ice_mirr_if * mif)3508 ice_setup_mirror_vsi(struct ice_mirr_if *mif)
3509 {
3510 	struct ice_softc *sc = mif->back;
3511 	device_t dev = sc->dev;
3512 	struct ice_vsi *vsi;
3513 	int ret = 0;
3514 
3515 	/* vsi is for the new mirror vsi, not the PF's main VSI */
3516 	vsi = ice_alloc_vsi(sc, ICE_VSI_VMDQ2);
3517 	if (!vsi) {
3518 		/* Already prints an error message */
3519 		return (ENOMEM);
3520 	}
3521 	mif->vsi = vsi;
3522 
3523 	/* Reserve VSI queue allocation from PF queues */
3524 	ice_alloc_vsi_qmap(vsi, ICE_DEFAULT_VF_QUEUES, ICE_DEFAULT_VF_QUEUES);
3525 	vsi->num_tx_queues = vsi->num_rx_queues = ICE_DEFAULT_VF_QUEUES;
3526 
3527 	/* Assign Tx queues from PF space */
3528 	ret = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap,
3529 	    vsi->num_tx_queues);
3530 	if (ret) {
3531 		device_printf(dev, "Unable to assign mirror VSI Tx queues: %s\n",
3532 		    ice_err_str(ret));
3533 		goto release_vsi;
3534 	}
3535 	/* Assign Rx queues from PF space */
3536 	ret = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap,
3537 	    vsi->num_rx_queues);
3538 	if (ret) {
3539 		device_printf(dev, "Unable to assign mirror VSI Rx queues: %s\n",
3540 		    ice_err_str(ret));
3541 		goto release_vsi;
3542 	}
3543 	vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED;
3544 	vsi->max_frame_size = ICE_MAX_FRAME_SIZE;
3545 
3546 	ret = ice_initialize_vsi(vsi);
3547 	if (ret) {
3548 		device_printf(dev, "%s: Error in ice_initialize_vsi for mirror VSI: %s\n",
3549 		    __func__, ice_err_str(ret));
3550 		goto release_vsi;
3551 	}
3552 
3553 	/* Setup this VSI for receiving traffic */
3554 	ret = ice_config_rss(vsi);
3555 	if (ret) {
3556 		device_printf(dev,
3557 		    "Unable to configure RSS for mirror VSI: %s\n",
3558 		    ice_err_str(ret));
3559 		goto release_vsi;
3560 	}
3561 
3562 	/* Set HW rules for mirroring traffic */
3563 	vsi->mirror_src_vsi = sc->pf_vsi.idx;
3564 
3565 	ice_debug(&sc->hw, ICE_DBG_INIT,
3566 	    "Configuring mirroring from VSI %d to %d\n",
3567 	    vsi->mirror_src_vsi, vsi->idx);
3568 	ice_debug(&sc->hw, ICE_DBG_INIT, "(HW num: VSI %d to %d)\n",
3569 	    ice_get_hw_vsi_num(&sc->hw, vsi->mirror_src_vsi),
3570 	    ice_get_hw_vsi_num(&sc->hw, vsi->idx));
3571 
3572 	ret = ice_setup_vsi_mirroring(vsi);
3573 	if (ret) {
3574 		device_printf(dev,
3575 		    "Unable to configure mirroring for VSI: %s\n",
3576 		    ice_err_str(ret));
3577 		goto release_vsi;
3578 	}
3579 
3580 	return (0);
3581 
3582 release_vsi:
3583 	ice_release_vsi(vsi);
3584 	mif->vsi = NULL;
3585 	return (ret);
3586 }
3587 
3588 /**
3589  * ice_create_mirror_interface - Initialize mirror interface
3590  * @sc: driver private data
3591  *
3592  * Creates and sets up a mirror interface that will mirror traffic from
3593  * the main PF interface. Includes a call to iflib_device_register() in order
3594  * to setup necessary iflib structures for this new interface as well.
3595  *
3596  * If it returns successfully, a new interface will be created and will show
3597  * up in the ifconfig interface list.
3598  *
3599  * Returns 0 on success, or a standard error code on failure.
3600  */
3601 int
ice_create_mirror_interface(struct ice_softc * sc)3602 ice_create_mirror_interface(struct ice_softc *sc)
3603 {
3604 	device_t dev = sc->dev;
3605 	struct ice_mirr_if *mif;
3606 	struct ifmedia *media;
3607 	struct sbuf *sb;
3608 	int ret = 0;
3609 
3610 	mif = (struct ice_mirr_if *)malloc(sizeof(*mif), M_ICE, M_ZERO | M_NOWAIT);
3611 	if (!mif) {
3612 		device_printf(dev, "malloc() error allocating mirror interface\n");
3613 		return (ENOMEM);
3614 	}
3615 
3616 	/* Set pointers */
3617 	sc->mirr_if = mif;
3618 	mif->back = sc;
3619 
3620 	/* Do early setup because these will be called during iflib_device_register():
3621 	 * - ice_subif_if_tx_queues_alloc
3622 	 * - ice_subif_if_rx_queues_alloc
3623 	 */
3624 	ret = ice_setup_mirror_vsi(mif);
3625 	if (ret)
3626 		goto out;
3627 
3628 	/* Determine name for new interface:
3629 	 * (base interface name)(modifier name)(modifier unit number)
3630 	 * e.g. for ice0 with a new mirror interface (modifier m)
3631 	 * of index 0, this equals "ice0m0"
3632 	 */
3633 	sb = sbuf_new_auto();
3634 	MPASS(sb != NULL);
3635 	sbuf_printf(sb, "%sm", device_get_nameunit(dev));
3636 	sbuf_finish(sb);
3637 
3638 	bus_topo_lock();
3639 	mif->subdev = device_add_child(dev, sbuf_data(sb), 0);
3640 	bus_topo_unlock();
3641 
3642 	if (!mif->subdev) {
3643 		device_printf(dev, "device_add_child failed for %s0\n", sbuf_data(sb));
3644 		sbuf_delete(sb);
3645 		free(mif, M_ICE);
3646 		sc->mirr_if = NULL;
3647 		return (ENOMEM);
3648 	}
3649 	sbuf_delete(sb);
3650 
3651 	device_set_driver(mif->subdev, &ice_subif_driver);
3652 
3653 	/* Use iflib_device_register() directly because the driver already
3654 	 * has an initialized softc to pass to iflib
3655 	 */
3656 	ret = iflib_device_register(mif->subdev, mif, &ice_subif_sctx, &mif->subctx);
3657 	if (ret)
3658 		goto out;
3659 
3660 	/* Indicate that created interface will be just for monitoring */
3661 	mif->ifp = iflib_get_ifp(mif->subctx);
3662 	if_setflagbits(mif->ifp, IFF_MONITOR, 0);
3663 
3664 	/* Use autoselect media by default */
3665 	media = iflib_get_media(mif->subctx);
3666 	ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
3667 	ifmedia_set(media, IFM_ETHER | IFM_AUTO);
3668 
3669 	device_printf(dev, "Created dev %s and ifnet %s for mirroring\n",
3670 	    device_get_nameunit(mif->subdev), if_name(mif->ifp));
3671 
3672 	ice_add_vsi_sysctls(mif->vsi);
3673 
3674 	ret = ice_wire_mirror_intrs(mif);
3675 	if (ret)
3676 		goto out;
3677 
3678 	mif->if_attached = true;
3679 	return (0);
3680 
3681 out:
3682 	ice_destroy_mirror_interface(sc);
3683 	return (ret);
3684 }
3685 
3686 /**
3687  * ice_wire_mirror_intrs
3688  * @mif: driver private subinterface structure
3689  *
3690  * Helper function that sets up driver interrupt data and calls
3691  * into iflib in order to setup interrupts in its data structures as well.
3692  *
3693  * Like ice_if_msix_intr_assign, currently requires that we get at least the same
3694  * number of vectors as we have queues, and that we always have the same number
3695  * of Tx and Rx queues. Unlike that function, this calls a special
3696  * iflib_irq_alloc_generic_subif() function for RX interrupts because the
3697  * driver needs to get MSI-X resources from the parent device.
3698  *
3699  * Tx queues use a softirq instead of using their own hardware interrupt so that
3700  * remains unchanged.
3701  *
3702  * Returns 0 on success or an error code from iflib_irq_alloc_generic_subctx()
3703  * on failure.
3704  */
3705 static int
ice_wire_mirror_intrs(struct ice_mirr_if * mif)3706 ice_wire_mirror_intrs(struct ice_mirr_if *mif)
3707 {
3708 	struct ice_softc *sc = mif->back;
3709 	struct ice_hw *hw = &sc->hw;
3710 	struct ice_vsi *vsi = mif->vsi;
3711 	device_t dev = mif->subdev;
3712 	int err, i, rid;
3713 
3714 	if_ctx_t ctx = mif->subctx;
3715 
3716 	ice_debug(hw, ICE_DBG_INIT, "%s: Last rid: %d\n", __func__, sc->last_rid);
3717 
3718 	rid = sc->last_rid + 1;
3719 	for (i = 0; i < vsi->num_rx_queues; i++, rid++) {
3720 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
3721 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
3722 		char irq_name[16];
3723 
3724 		// TODO: Change to use dynamic interface number
3725 		snprintf(irq_name, sizeof(irq_name), "m0rxq%d", i);
3726 		/* First arg is parent device (physical port's) iflib ctx */
3727 		err = iflib_irq_alloc_generic_subctx(sc->ctx, ctx,
3728 		    &mif->rx_irqvs[i].irq, rid, IFLIB_INTR_RXTX, ice_msix_que,
3729 		    rxq, rxq->me, irq_name);
3730 		if (err) {
3731 			device_printf(dev,
3732 			    "Failed to allocate q int %d err: %s\n",
3733 			    i, ice_err_str(err));
3734 			i--;
3735 			goto fail;
3736 		}
3737 		MPASS(rid - 1 > 0);
3738 		/* Set vector number used in interrupt enable/disable functions */
3739 		mif->rx_irqvs[i].me = rid - 1;
3740 		rxq->irqv = &mif->rx_irqvs[i];
3741 
3742 		bzero(irq_name, sizeof(irq_name));
3743 		snprintf(irq_name, sizeof(irq_name), "m0txq%d", i);
3744 		iflib_softirq_alloc_generic(ctx, &mif->rx_irqvs[i].irq,
3745 		    IFLIB_INTR_TX, txq, txq->me, irq_name);
3746 		txq->irqv = &mif->rx_irqvs[i];
3747 	}
3748 
3749 	sc->last_rid = rid - 1;
3750 
3751 	ice_debug(hw, ICE_DBG_INIT, "%s: New last rid: %d\n", __func__,
3752 	    sc->last_rid);
3753 
3754 	return (0);
3755 
3756 fail:
3757 	for (; i >= 0; i--)
3758 		iflib_irq_free(ctx, &mif->rx_irqvs[i].irq);
3759 	return (err);
3760 }
3761 
3762 /**
3763  * ice_subif_rebuild - Rebuild subinterface post reset
3764  * @sc: The device private softc
3765  *
3766  * Restore subinterface state after a reset occurred.
3767  * Restart the VSI and enable the mirroring.
3768  */
3769 static int
ice_subif_rebuild(struct ice_softc * sc)3770 ice_subif_rebuild(struct ice_softc *sc)
3771 {
3772 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(sc->ctx);
3773 	struct ice_vsi *vsi = sc->mirr_if->vsi;
3774 	int err;
3775 
3776 	err = ice_subif_rebuild_vsi_qmap(sc);
3777 	if (err) {
3778 		device_printf(sc->dev, "Unable to re-assign mirror VSI queues, err %s\n",
3779 		      ice_err_str(err));
3780 		return (err);
3781 	}
3782 
3783 	err = ice_initialize_vsi(vsi);
3784 	if (err) {
3785 		device_printf(sc->dev, "Unable to re-initialize mirror VSI, err %s\n",
3786 		      ice_err_str(err));
3787 		goto err_release_queue_allocations_subif;
3788 	}
3789 
3790 	err = ice_config_rss(vsi);
3791 	if (err) {
3792 		device_printf(sc->dev,
3793 		      "Unable to reconfigure RSS for the mirror VSI, err %s\n",
3794 		      ice_err_str(err));
3795 		goto err_deinit_subif_vsi;
3796 	}
3797 
3798 	vsi->mirror_src_vsi = sc->pf_vsi.idx;
3799 
3800 	err = ice_setup_vsi_mirroring(vsi);
3801 	if (err) {
3802 		device_printf(sc->dev,
3803 		      "Unable to configure mirroring for VSI: %s\n",
3804 		      ice_err_str(err));
3805 		goto err_deinit_subif_vsi;
3806 	}
3807 
3808 	ice_set_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT);
3809 
3810 	return (0);
3811 
3812 err_deinit_subif_vsi:
3813 	ice_deinit_vsi(vsi);
3814 err_release_queue_allocations_subif:
3815 	ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap,
3816 	    sc->mirr_if->num_irq_vectors);
3817 	ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap,
3818 	    sc->mirr_if->num_irq_vectors);
3819 
3820 	return (err);
3821 }
3822 
3823 /**
3824  * ice_subif_rebuild_vsi_qmap - Rebuild the mirror VSI queue mapping
3825  * @sc: the device softc pointer
3826  *
3827  * Loops over the Tx and Rx queues for the mirror VSI and reassigns the queue
3828  * mapping after a reset occurred.
3829  */
3830 static int
ice_subif_rebuild_vsi_qmap(struct ice_softc * sc)3831 ice_subif_rebuild_vsi_qmap(struct ice_softc *sc)
3832 {
3833 	struct ice_vsi *vsi = sc->mirr_if->vsi;
3834 	struct ice_tx_queue *txq;
3835 	struct ice_rx_queue *rxq;
3836 	int err, i;
3837 
3838 	err = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap, sc->mirr_if->num_irq_vectors);
3839 	if (err) {
3840 		device_printf(sc->dev, "Unable to assign mirror VSI Tx queues: %s\n",
3841 		      ice_err_str(err));
3842 		return (err);
3843 	}
3844 
3845 	err = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap, sc->mirr_if->num_irq_vectors);
3846 	if (err) {
3847 		device_printf(sc->dev, "Unable to assign mirror VSI Rx queues: %s\n",
3848 		      ice_err_str(err));
3849 		goto err_release_tx_queues;
3850 	}
3851 
3852 	vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED;
3853 
3854 	/* Re-assign Tx queue tail pointers */
3855 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
3856 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
3857 
3858 	/* Re-assign Rx queue tail pointers */
3859 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
3860 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
3861 
3862 	return (0);
3863 
3864 err_release_tx_queues:
3865 	ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap, vsi->num_tx_queues);
3866 
3867 	return (err);
3868 }
3869 
3870 /**
3871  * ice_subif_if_tx_queues_alloc - Allocate Tx queue memory for subinterfaces
3872  * @ctx: iflib context structure
3873  * @vaddrs: virtual addresses for the queue memory
3874  * @paddrs: physical addresses for the queue memory
3875  * @ntxqs: the number of Tx queues per set (should always be 1)
3876  * @ntxqsets: the number of Tx queue sets to allocate
3877  *
3878  * See ice_if_tx_queues_alloc() description. Similar to that function, but
3879  * for subinterfaces instead.
3880  */
3881 static int
ice_subif_if_tx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int __invariant_only ntxqs,int ntxqsets)3882 ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
3883 			     int __invariant_only ntxqs, int ntxqsets)
3884 {
3885 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3886 	struct ice_tx_queue *txq;
3887 	device_t dev = mif->subdev;
3888 	struct ice_vsi *vsi;
3889 	int err, i, j;
3890 
3891 	MPASS(mif != NULL);
3892 	MPASS(ntxqs == 1);
3893 	MPASS(mif->subscctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
3894 
3895 	vsi = mif->vsi;
3896 
3897 	MPASS(vsi->num_tx_queues == ntxqsets);
3898 
3899 	/* Allocate queue structure memory */
3900 	if (!(vsi->tx_queues =
3901 	      (struct ice_tx_queue *)malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
3902 		device_printf(dev, "%s: Unable to allocate Tx queue memory for subfunction\n",
3903 		    __func__);
3904 		return (ENOMEM);
3905 	}
3906 
3907 	/* Allocate report status arrays */
3908 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3909 		if (!(txq->tx_rsq =
3910 		      (uint16_t *)malloc(sizeof(uint16_t) * mif->subscctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
3911 			device_printf(dev,
3912 			    "%s: Unable to allocate tx_rsq memory for subfunction\n", __func__);
3913 			err = ENOMEM;
3914 			goto free_tx_queues;
3915 		}
3916 		/* Initialize report status array */
3917 		for (j = 0; j < mif->subscctx->isc_ntxd[0]; j++)
3918 			txq->tx_rsq[j] = QIDX_INVALID;
3919 	}
3920 
3921 	/* Add Tx queue sysctls context */
3922 	ice_vsi_add_txqs_ctx(vsi);
3923 
3924 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3925 		/* q_handle == me when only one TC */
3926 		txq->me = txq->q_handle = i;
3927 		txq->vsi = vsi;
3928 
3929 		/* store the queue size for easier access */
3930 		txq->desc_count = mif->subscctx->isc_ntxd[0];
3931 
3932 		/* get the virtual and physical address of the hardware queues */
3933 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
3934 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
3935 		txq->tx_paddr = paddrs[i];
3936 
3937 		ice_add_txq_sysctls(txq);
3938 	}
3939 
3940 	return (0);
3941 
3942 free_tx_queues:
3943 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3944 		if (txq->tx_rsq != NULL) {
3945 			free(txq->tx_rsq, M_ICE);
3946 			txq->tx_rsq = NULL;
3947 		}
3948 	}
3949 	free(vsi->tx_queues, M_ICE);
3950 	vsi->tx_queues = NULL;
3951 	return (err);
3952 }
3953 
3954 /**
3955  * ice_subif_if_rx_queues_alloc - Allocate Rx queue memory for subinterfaces
3956  * @ctx: iflib context structure
3957  * @vaddrs: virtual addresses for the queue memory
3958  * @paddrs: physical addresses for the queue memory
3959  * @nrxqs: number of Rx queues per set (should always be 1)
3960  * @nrxqsets: number of Rx queue sets to allocate
3961  *
3962  * See ice_if_rx_queues_alloc() for general summary; this is similar to that
3963  * but implemented for subinterfaces.
3964  */
3965 static int
ice_subif_if_rx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int __invariant_only nrxqs,int nrxqsets)3966 ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
3967     int __invariant_only nrxqs, int nrxqsets)
3968 {
3969 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3970 	struct ice_rx_queue *rxq;
3971 	device_t dev = mif->subdev;
3972 	struct ice_vsi *vsi;
3973 	int i;
3974 
3975 	MPASS(mif != NULL);
3976 	MPASS(nrxqs == 1);
3977 	MPASS(mif->subscctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
3978 
3979 	vsi = mif->vsi;
3980 
3981 	MPASS(vsi->num_rx_queues == nrxqsets);
3982 
3983 	/* Allocate queue structure memory */
3984 	if (!(vsi->rx_queues =
3985 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
3986 		device_printf(dev, "%s: Unable to allocate Rx queue memory for subfunction\n",
3987 		    __func__);
3988 		return (ENOMEM);
3989 	}
3990 
3991 	/* Add Rx queue sysctls context */
3992 	ice_vsi_add_rxqs_ctx(vsi);
3993 
3994 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
3995 		rxq->me = i;
3996 		rxq->vsi = vsi;
3997 
3998 		/* store the queue size for easier access */
3999 		rxq->desc_count = mif->subscctx->isc_nrxd[0];
4000 
4001 		/* get the virtual and physical address of the hardware queues */
4002 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
4003 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
4004 		rxq->rx_paddr = paddrs[i];
4005 
4006 		ice_add_rxq_sysctls(rxq);
4007 	}
4008 
4009 	return (0);
4010 }
4011 
4012 /**
4013  * ice_subif_if_msix_intr_assign - Assign MSI-X interrupts to new sub interface
4014  * @ctx: the iflib context structure
4015  * @msix: the number of vectors we were assigned
4016  *
4017  * Allocates and assigns driver private resources for MSI-X interrupt tracking.
4018  *
4019  * @pre OS MSI-X resources have been pre-allocated by parent interface.
4020  */
4021 static int
ice_subif_if_msix_intr_assign(if_ctx_t ctx,int msix)4022 ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix)
4023 {
4024 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4025 	struct ice_softc *sc = mif->back;
4026 	struct ice_vsi *vsi = mif->vsi;
4027 
4028 	device_t dev = mif->subdev;
4029 	int ret;
4030 
4031 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
4032 		device_printf(dev,
4033 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
4034 			      vsi->num_tx_queues, vsi->num_rx_queues);
4035 		return (EOPNOTSUPP);
4036 	}
4037 
4038 	if (msix > sc->extra_vectors) {
4039 		device_printf(dev,
4040 		     "%s: Not enough spare (%d) msix vectors for new sub-interface requested (%d)\n",
4041 		     __func__, sc->extra_vectors, msix);
4042 		return (ENOSPC);
4043 	}
4044 	device_printf(dev, "%s: Using %d vectors for sub-interface\n", __func__,
4045 	    msix);
4046 
4047 	/* Allocate space to store the IRQ vector data */
4048 	mif->num_irq_vectors = vsi->num_rx_queues;
4049 	mif->rx_irqvs = (struct ice_irq_vector *)
4050 	    malloc(sizeof(struct ice_irq_vector) * (mif->num_irq_vectors),
4051 		   M_ICE, M_NOWAIT);
4052 	if (!mif->rx_irqvs) {
4053 		device_printf(dev,
4054 			      "Unable to allocate RX irqv memory for mirror's %d vectors\n",
4055 			      mif->num_irq_vectors);
4056 		return (ENOMEM);
4057 	}
4058 
4059 	/* Assign mirror interface interrupts from PF device space */
4060 	if (!(mif->if_imap =
4061 	      (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors,
4062 	      M_ICE, M_NOWAIT))) {
4063 		device_printf(dev, "Unable to allocate mirror intfc if_imap memory\n");
4064 		ret = ENOMEM;
4065 		goto free_irqvs;
4066 	}
4067 	ret = ice_resmgr_assign_contiguous(&sc->dev_imgr, mif->if_imap, mif->num_irq_vectors);
4068 	if (ret) {
4069 		device_printf(dev, "Unable to assign mirror intfc PF device interrupt mapping: %s\n",
4070 			      ice_err_str(ret));
4071 		goto free_if_imap;
4072 	}
4073 	/* Assign mirror interface interrupts from OS interrupt allocation space */
4074 	if (!(mif->os_imap =
4075 	      (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors,
4076 	      M_ICE, M_NOWAIT))) {
4077 		device_printf(dev, "Unable to allocate mirror intfc os_imap memory\n");
4078 		ret = ENOMEM;
4079 		goto free_if_imap;
4080 	}
4081 	ret = ice_resmgr_assign_contiguous(&sc->os_imgr, mif->os_imap, mif->num_irq_vectors);
4082 	if (ret) {
4083 		device_printf(dev, "Unable to assign mirror intfc OS interrupt mapping: %s\n",
4084 			      ice_err_str(ret));
4085 		goto free_if_imap;
4086 	}
4087 
4088 	return (0);
4089 
4090 free_if_imap:
4091 	free(mif->if_imap, M_ICE);
4092 	mif->if_imap = NULL;
4093 free_irqvs:
4094 	free(mif->rx_irqvs, M_ICE);
4095 	mif->rx_irqvs = NULL;
4096 	return (ret);
4097 }
4098 
4099 /**
4100  * ice_subif_if_intr_enable - Enable device interrupts for a subinterface
4101  * @ctx: iflib context structure
4102  *
4103  * Called by iflib to request enabling all interrupts that belong to a
4104  * subinterface.
4105  */
4106 static void
ice_subif_if_intr_enable(if_ctx_t ctx)4107 ice_subif_if_intr_enable(if_ctx_t ctx)
4108 {
4109 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4110 	struct ice_softc *sc = mif->back;
4111 	struct ice_vsi *vsi = mif->vsi;
4112 	struct ice_hw *hw = &sc->hw;
4113 
4114 	/* Do not enable queue interrupts in recovery mode */
4115 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4116 		return;
4117 
4118 	/* Enable all queue interrupts */
4119 	for (int i = 0; i < vsi->num_rx_queues; i++)
4120 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
4121 }
4122 
4123 /**
4124  * ice_subif_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
4125  * @ctx: iflib context structure
4126  * @rxqid: the Rx queue to enable
4127  *
4128  * Enable a specific Rx queue interrupt.
4129  *
4130  * This function is not protected by the iflib CTX lock.
4131  */
4132 static int
ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx,uint16_t rxqid)4133 ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
4134 {
4135 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4136 	struct ice_softc *sc = mif->back;
4137 	struct ice_vsi *vsi = mif->vsi;
4138 	struct ice_hw *hw = &sc->hw;
4139 
4140 	/* Do not enable queue interrupts in recovery mode */
4141 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4142 		return (ENOSYS);
4143 
4144 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
4145 	return (0);
4146 }
4147 
4148 /**
4149  * ice_subif_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
4150  * @ctx: iflib context structure
4151  * @txqid: the Tx queue to enable
4152  *
4153  * Enable a specific Tx queue interrupt.
4154  *
4155  * This function is not protected by the iflib CTX lock.
4156  */
4157 static int
ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx,uint16_t txqid)4158 ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
4159 {
4160 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4161 	struct ice_softc *sc = mif->back;
4162 	struct ice_vsi *vsi = mif->vsi;
4163 	struct ice_hw *hw = &sc->hw;
4164 
4165 	/* Do not enable queue interrupts in recovery mode */
4166 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4167 		return (ENOSYS);
4168 
4169 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
4170 	return (0);
4171 }
4172 
4173 /**
4174  * ice_subif_if_init - Initialize the subinterface
4175  * @ctx: iflib ctx structure
4176  *
4177  * Called by iflib to bring the device up, i.e. ifconfig ice0m0 up.
4178  * Prepares the Tx and Rx engines and enables interrupts.
4179  *
4180  * @pre assumes the caller holds the iflib CTX lock
4181  */
4182 static void
ice_subif_if_init(if_ctx_t ctx)4183 ice_subif_if_init(if_ctx_t ctx)
4184 {
4185 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4186 	struct ice_softc *sc = mif->back;
4187 	struct ice_vsi *vsi = mif->vsi;
4188 	device_t dev = mif->subdev;
4189 	int err;
4190 
4191 	if (ice_driver_is_detaching(sc))
4192 		return;
4193 
4194 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4195 		return;
4196 
4197 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
4198 		device_printf(dev,
4199 		    "request to start interface cannot be completed as the parent device %s failed to reset\n",
4200 		    device_get_nameunit(sc->dev));
4201 		return;
4202 	}
4203 
4204 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
4205 		device_printf(dev,
4206 		    "request to start interface cannot be completed while parent device %s is prepared for impending reset\n",
4207 		    device_get_nameunit(sc->dev));
4208 		return;
4209 	}
4210 
4211 	/* XXX: Equiv to ice_update_rx_mbuf_sz */
4212 	vsi->mbuf_sz = iflib_get_rx_mbuf_sz(ctx);
4213 
4214 	/* Initialize software Tx tracking values */
4215 	ice_init_tx_tracking(vsi);
4216 
4217 	err = ice_cfg_vsi_for_tx(vsi);
4218 	if (err) {
4219 		device_printf(dev,
4220 			      "Unable to configure subif VSI for Tx: %s\n",
4221 			      ice_err_str(err));
4222 		return;
4223 	}
4224 
4225 	err = ice_cfg_vsi_for_rx(vsi);
4226 	if (err) {
4227 		device_printf(dev,
4228 			      "Unable to configure subif VSI for Rx: %s\n",
4229 			      ice_err_str(err));
4230 		goto err_cleanup_tx;
4231 	}
4232 
4233 	err = ice_control_all_rx_queues(vsi, true);
4234 	if (err) {
4235 		device_printf(dev,
4236 			      "Unable to enable subif Rx rings for receive: %s\n",
4237 			      ice_err_str(err));
4238 		goto err_cleanup_tx;
4239 	}
4240 
4241 	ice_configure_all_rxq_interrupts(vsi);
4242 	ice_configure_rx_itr(vsi);
4243 
4244 	ice_set_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED);
4245 	return;
4246 
4247 err_cleanup_tx:
4248 	ice_vsi_disable_tx(vsi);
4249 }
4250 
4251 /**
4252  * ice_if_stop_subif - Stop the subinterface
4253  * @ctx: iflib context structure
4254  * @ifs: subinterface context structure
4255  *
4256  * Called by iflib to stop the subinterface and bring it down.
4257  * (e.g. ifconfig ice0m0 down)
4258  *
4259  * @pre assumes the caller holds the iflib CTX lock
4260  */
4261 static void
ice_subif_if_stop(if_ctx_t ctx)4262 ice_subif_if_stop(if_ctx_t ctx)
4263 {
4264 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4265 	struct ice_softc *sc = mif->back;
4266 	struct ice_vsi *vsi = mif->vsi;
4267 	device_t dev = mif->subdev;
4268 
4269 	if (!ice_testandclear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED))
4270 		return;
4271 
4272 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
4273 		device_printf(dev,
4274 		    "request to stop interface cannot be completed as the parent device %s failed to reset\n",
4275 		    device_get_nameunit(sc->dev));
4276 		return;
4277 	}
4278 
4279 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
4280 		device_printf(dev,
4281 		    "request to stop interface cannot be completed while parent device %s is prepared for impending reset\n",
4282 		    device_get_nameunit(sc->dev));
4283 		return;
4284 	}
4285 
4286 	/* Dissociate the Tx and Rx queues from the interrupts */
4287 	ice_flush_txq_interrupts(vsi);
4288 	ice_flush_rxq_interrupts(vsi);
4289 
4290 	/* Disable the Tx and Rx queues */
4291 	ice_vsi_disable_tx(vsi);
4292 	ice_control_all_rx_queues(vsi, false);
4293 }
4294 
4295 /**
4296  * ice_free_irqvs_subif - Free IRQ vector memory for subinterfaces
4297  * @mif: Mirror interface private structure
4298  *
4299  * Free IRQ vector memory allocated during ice_subif_if_msix_intr_assign.
4300  */
4301 static void
ice_free_irqvs_subif(struct ice_mirr_if * mif)4302 ice_free_irqvs_subif(struct ice_mirr_if *mif)
4303 {
4304 	struct ice_softc *sc = mif->back;
4305 	struct ice_vsi *vsi = mif->vsi;
4306 	if_ctx_t ctx = sc->ctx;
4307 	int i;
4308 
4309 	/* If the irqvs array is NULL, then there are no vectors to free */
4310 	if (mif->rx_irqvs == NULL)
4311 		return;
4312 
4313 	/* Free the IRQ vectors -- currently subinterfaces have number
4314 	 * of vectors equal to number of RX queues
4315 	 *
4316 	 * XXX: ctx is parent device's ctx, not the subinterface ctx
4317 	 */
4318 	for (i = 0; i < vsi->num_rx_queues; i++)
4319 		iflib_irq_free(ctx, &mif->rx_irqvs[i].irq);
4320 
4321 	ice_resmgr_release_map(&sc->os_imgr, mif->os_imap,
4322 	    mif->num_irq_vectors);
4323 	ice_resmgr_release_map(&sc->dev_imgr, mif->if_imap,
4324 	    mif->num_irq_vectors);
4325 
4326 	sc->last_rid -= vsi->num_rx_queues;
4327 
4328 	/* Clear the irqv pointers */
4329 	for (i = 0; i < vsi->num_rx_queues; i++)
4330 		vsi->rx_queues[i].irqv = NULL;
4331 
4332 	for (i = 0; i < vsi->num_tx_queues; i++)
4333 		vsi->tx_queues[i].irqv = NULL;
4334 
4335 	/* Release the vector array memory */
4336 	free(mif->rx_irqvs, M_ICE);
4337 	mif->rx_irqvs = NULL;
4338 }
4339 
4340 /**
4341  * ice_subif_if_queues_free - Free queue memory for subinterfaces
4342  * @ctx: the iflib context structure
4343  *
4344  * Free queue memory allocated by ice_subif_tx_queues_alloc() and
4345  * ice_subif_if_rx_queues_alloc().
4346  */
4347 static void
ice_subif_if_queues_free(if_ctx_t ctx)4348 ice_subif_if_queues_free(if_ctx_t ctx)
4349 {
4350 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4351 	struct ice_vsi *vsi = mif->vsi;
4352 	struct ice_tx_queue *txq;
4353 	int i;
4354 
4355 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
4356 	 * pointers.
4357 	 */
4358 	ice_vsi_del_txqs_ctx(vsi);
4359 	ice_vsi_del_rxqs_ctx(vsi);
4360 
4361 	/* Release MSI-X IRQ vectors */
4362 	ice_free_irqvs_subif(mif);
4363 
4364 	if (vsi->tx_queues != NULL) {
4365 		/* free the tx_rsq arrays */
4366 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
4367 			if (txq->tx_rsq != NULL) {
4368 				free(txq->tx_rsq, M_ICE);
4369 				txq->tx_rsq = NULL;
4370 			}
4371 		}
4372 		free(vsi->tx_queues, M_ICE);
4373 		vsi->tx_queues = NULL;
4374 	}
4375 	if (vsi->rx_queues != NULL) {
4376 		free(vsi->rx_queues, M_ICE);
4377 		vsi->rx_queues = NULL;
4378 	}
4379 }
4380 
4381 /**
4382  * ice_subif_if_media_status - Report subinterface media
4383  * @ctx: iflib context structure
4384  * @ifmr: ifmedia request structure to update
4385  *
4386  * Updates the provided ifmr with something, in order to prevent a
4387  * "no media types?" message from ifconfig.
4388  *
4389  * Mirror interfaces are always up.
4390  */
4391 static void
ice_subif_if_media_status(if_ctx_t ctx __unused,struct ifmediareq * ifmr)4392 ice_subif_if_media_status(if_ctx_t ctx __unused, struct ifmediareq *ifmr)
4393 {
4394 	ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE;
4395 	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
4396 }
4397 
4398 /**
4399  * ice_subif_if_promisc_set - Set subinterface promiscuous mode
4400  * @ctx: iflib context structure
4401  * @flags: promiscuous flags to configure
4402  *
4403  * Called by iflib to configure device promiscuous mode.
4404  *
4405  * @remark This does not need to be implemented for now.
4406  */
4407 static int
ice_subif_if_promisc_set(if_ctx_t ctx __unused,int flags __unused)4408 ice_subif_if_promisc_set(if_ctx_t ctx __unused, int flags __unused)
4409 {
4410 	return (0);
4411 }
4412 
4413