xref: /freebsd-11-stable/sys/dev/e1000/if_igb.c (revision 6382424915737ef0477e2e29f8f8621c80552040)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "opt_rss.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #include "opt_altq.h"
43 #endif
44 
45 #include "if_igb.h"
46 
47 /*********************************************************************
48  *  Driver version:
49  *********************************************************************/
50 char igb_driver_version[] = "2.5.3-k";
51 
52 
53 /*********************************************************************
54  *  PCI Device ID Table
55  *
56  *  Used by probe to select devices to load on
57  *  Last field stores an index into e1000_strings
58  *  Last entry must be all 0s
59  *
60  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
61  *********************************************************************/
62 
63 static igb_vendor_info_t igb_vendor_info_array[] =
64 {
65 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
66 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
67 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
68 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
69 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
70 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
71 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER,	0, 0, 0},
72 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
73 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
74 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
75 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
76 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
77 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
78 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER,	0, 0, 0},
79 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
80 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII,	0, 0, 0},
81 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
82 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
83 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
84 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
85 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
86 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
87 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER,	0, 0, 0},
88 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER,	0, 0, 0},
89 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES,	0, 0, 0},
90 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII,	0, 0, 0},
91 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
92 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER,	0, 0, 0},
93 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
94 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
95 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
96 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
97 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER,	0, 0, 0},
98 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES,	0, 0, 0},
99 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII,	0, 0, 0},
100 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER,	0, 0, 0},
101 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
102 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
103 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII,	0, 0, 0},
104 	/* required last entry */
105 	{0, 0, 0, 0, 0}
106 };
107 
108 /*********************************************************************
109  *  Table of branding strings for all supported NICs.
110  *********************************************************************/
111 
112 static char *igb_strings[] = {
113 	"Intel(R) PRO/1000 Network Connection"
114 };
115 
116 /*********************************************************************
117  *  Function prototypes
118  *********************************************************************/
119 static int	igb_probe(device_t);
120 static int	igb_attach(device_t);
121 static int	igb_detach(device_t);
122 static int	igb_shutdown(device_t);
123 static int	igb_suspend(device_t);
124 static int	igb_resume(device_t);
125 #ifndef IGB_LEGACY_TX
126 static int	igb_mq_start(struct ifnet *, struct mbuf *);
127 static int	igb_mq_start_locked(struct ifnet *, struct tx_ring *);
128 static void	igb_qflush(struct ifnet *);
129 static void	igb_deferred_mq_start(void *, int);
130 #else
131 static void	igb_start(struct ifnet *);
132 static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
133 #endif
134 static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
135 static uint64_t	igb_get_counter(if_t, ift_counter);
136 static void	igb_init(void *);
137 static void	igb_init_locked(struct adapter *);
138 static void	igb_stop(void *);
139 static void	igb_media_status(struct ifnet *, struct ifmediareq *);
140 static int	igb_media_change(struct ifnet *);
141 static void	igb_identify_hardware(struct adapter *);
142 static int	igb_allocate_pci_resources(struct adapter *);
143 static int	igb_allocate_msix(struct adapter *);
144 static int	igb_allocate_legacy(struct adapter *);
145 static int	igb_setup_msix(struct adapter *);
146 static void	igb_free_pci_resources(struct adapter *);
147 static void	igb_local_timer(void *);
148 static void	igb_reset(struct adapter *);
149 static int	igb_setup_interface(device_t, struct adapter *);
150 static int	igb_allocate_queues(struct adapter *);
151 static void	igb_configure_queues(struct adapter *);
152 
153 static int	igb_allocate_transmit_buffers(struct tx_ring *);
154 static void	igb_setup_transmit_structures(struct adapter *);
155 static void	igb_setup_transmit_ring(struct tx_ring *);
156 static void	igb_initialize_transmit_units(struct adapter *);
157 static void	igb_free_transmit_structures(struct adapter *);
158 static void	igb_free_transmit_buffers(struct tx_ring *);
159 
160 static int	igb_allocate_receive_buffers(struct rx_ring *);
161 static int	igb_setup_receive_structures(struct adapter *);
162 static int	igb_setup_receive_ring(struct rx_ring *);
163 static void	igb_initialize_receive_units(struct adapter *);
164 static void	igb_free_receive_structures(struct adapter *);
165 static void	igb_free_receive_buffers(struct rx_ring *);
166 static void	igb_free_receive_ring(struct rx_ring *);
167 
168 static void	igb_enable_intr(struct adapter *);
169 static void	igb_disable_intr(struct adapter *);
170 static void	igb_update_stats_counters(struct adapter *);
171 static bool	igb_txeof(struct tx_ring *);
172 
173 static __inline	void igb_rx_discard(struct rx_ring *, int);
174 static __inline void igb_rx_input(struct rx_ring *,
175 		    struct ifnet *, struct mbuf *, u32);
176 
177 static bool	igb_rxeof(struct igb_queue *, int, int *);
178 static void	igb_rx_checksum(u32, struct mbuf *, u32);
179 static int	igb_tx_ctx_setup(struct tx_ring *,
180 		    struct mbuf *, u32 *, u32 *);
181 static int	igb_tso_setup(struct tx_ring *,
182 		    struct mbuf *, u32 *, u32 *);
183 static void	igb_set_promisc(struct adapter *);
184 static void	igb_disable_promisc(struct adapter *);
185 static void	igb_set_multi(struct adapter *);
186 static void	igb_update_link_status(struct adapter *);
187 static void	igb_refresh_mbufs(struct rx_ring *, int);
188 
189 static void	igb_register_vlan(void *, struct ifnet *, u16);
190 static void	igb_unregister_vlan(void *, struct ifnet *, u16);
191 static void	igb_setup_vlan_hw_support(struct adapter *);
192 
193 static int	igb_xmit(struct tx_ring *, struct mbuf **);
194 static int	igb_dma_malloc(struct adapter *, bus_size_t,
195 		    struct igb_dma_alloc *, int);
196 static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
197 static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
198 static void	igb_print_nvm_info(struct adapter *);
199 static int 	igb_is_valid_ether_addr(u8 *);
200 static void     igb_add_hw_stats(struct adapter *);
201 
202 static void	igb_vf_init_stats(struct adapter *);
203 static void	igb_update_vf_stats_counters(struct adapter *);
204 
205 /* Management and WOL Support */
206 static void	igb_init_manageability(struct adapter *);
207 static void	igb_release_manageability(struct adapter *);
208 static void     igb_get_hw_control(struct adapter *);
209 static void     igb_release_hw_control(struct adapter *);
210 static void     igb_enable_wakeup(device_t);
211 static void     igb_led_func(void *, int);
212 
213 static int	igb_irq_fast(void *);
214 static void	igb_msix_que(void *);
215 static void	igb_msix_link(void *);
216 static void	igb_handle_que(void *context, int pending);
217 static void	igb_handle_link(void *context, int pending);
218 static void	igb_handle_link_locked(struct adapter *);
219 
220 static void	igb_set_sysctl_value(struct adapter *, const char *,
221 		    const char *, int *, int);
222 static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
223 static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
224 static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
225 
226 #ifdef DEVICE_POLLING
227 static poll_handler_t igb_poll;
228 #endif /* POLLING */
229 
230 /*********************************************************************
231  *  FreeBSD Device Interface Entry Points
232  *********************************************************************/
233 
234 static device_method_t igb_methods[] = {
235 	/* Device interface */
236 	DEVMETHOD(device_probe, igb_probe),
237 	DEVMETHOD(device_attach, igb_attach),
238 	DEVMETHOD(device_detach, igb_detach),
239 	DEVMETHOD(device_shutdown, igb_shutdown),
240 	DEVMETHOD(device_suspend, igb_suspend),
241 	DEVMETHOD(device_resume, igb_resume),
242 	DEVMETHOD_END
243 };
244 
245 static driver_t igb_driver = {
246 	"igb", igb_methods, sizeof(struct adapter),
247 };
248 
249 static devclass_t igb_devclass;
250 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
251 MODULE_DEPEND(igb, pci, 1, 1, 1);
252 MODULE_DEPEND(igb, ether, 1, 1, 1);
253 #ifdef DEV_NETMAP
254 MODULE_DEPEND(igb, netmap, 1, 1, 1);
255 #endif /* DEV_NETMAP */
256 
257 /*********************************************************************
258  *  Tunable default values.
259  *********************************************************************/
260 
261 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
262 
263 /* Descriptor defaults */
264 static int igb_rxd = IGB_DEFAULT_RXD;
265 static int igb_txd = IGB_DEFAULT_TXD;
266 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
267     "Number of receive descriptors per queue");
268 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
269     "Number of transmit descriptors per queue");
270 
271 /*
272 ** AIM: Adaptive Interrupt Moderation
273 ** which means that the interrupt rate
274 ** is varied over time based on the
275 ** traffic for that interrupt vector
276 */
277 static int igb_enable_aim = TRUE;
278 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
279     "Enable adaptive interrupt moderation");
280 
281 /*
282  * MSIX should be the default for best performance,
283  * but this allows it to be forced off for testing.
284  */
285 static int igb_enable_msix = 1;
286 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
287     "Enable MSI-X interrupts");
288 
289 /*
290 ** Tuneable Interrupt rate
291 */
292 static int igb_max_interrupt_rate = 8000;
293 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295 
296 #ifndef IGB_LEGACY_TX
297 /*
298 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
299 */
300 static int igb_buf_ring_size = IGB_BR_SIZE;
301 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
302     &igb_buf_ring_size, 0, "Size of the bufring");
303 #endif
304 
305 /*
306 ** Header split causes the packet header to
307 ** be dma'd to a separate mbuf from the payload.
308 ** this can have memory alignment benefits. But
309 ** another plus is that small packets often fit
310 ** into the header and thus use no cluster. Its
311 ** a very workload dependent type feature.
312 */
313 static int igb_header_split = FALSE;
314 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
315     "Enable receive mbuf header split");
316 
317 /*
318 ** This will autoconfigure based on the
319 ** number of CPUs and max supported
320 ** MSIX messages if left at 0.
321 */
322 static int igb_num_queues = 0;
323 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
324     "Number of queues to configure, 0 indicates autoconfigure");
325 
326 /*
327 ** Global variable to store last used CPU when binding queues
328 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
329 ** queue is bound to a cpu.
330 */
331 static int igb_last_bind_cpu = -1;
332 
333 /* How many packets rxeof tries to clean at a time */
334 static int igb_rx_process_limit = 100;
335 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
336     &igb_rx_process_limit, 0,
337     "Maximum number of received packets to process at a time, -1 means unlimited");
338 
339 /* How many packets txeof tries to clean at a time */
340 static int igb_tx_process_limit = -1;
341 SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
342     &igb_tx_process_limit, 0,
343     "Maximum number of sent packets to process at a time, -1 means unlimited");
344 
345 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
346 #include <dev/netmap/if_igb_netmap.h>
347 #endif /* DEV_NETMAP */
348 /*********************************************************************
349  *  Device identification routine
350  *
351  *  igb_probe determines if the driver should be loaded on
352  *  adapter based on PCI vendor/device id of the adapter.
353  *
354  *  return BUS_PROBE_DEFAULT on success, positive on failure
355  *********************************************************************/
356 
357 static int
igb_probe(device_t dev)358 igb_probe(device_t dev)
359 {
360 	char		adapter_name[256];
361 	uint16_t	pci_vendor_id = 0;
362 	uint16_t	pci_device_id = 0;
363 	uint16_t	pci_subvendor_id = 0;
364 	uint16_t	pci_subdevice_id = 0;
365 	igb_vendor_info_t *ent;
366 
367 	INIT_DEBUGOUT("igb_probe: begin");
368 
369 	pci_vendor_id = pci_get_vendor(dev);
370 	if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
371 		return (ENXIO);
372 
373 	pci_device_id = pci_get_device(dev);
374 	pci_subvendor_id = pci_get_subvendor(dev);
375 	pci_subdevice_id = pci_get_subdevice(dev);
376 
377 	ent = igb_vendor_info_array;
378 	while (ent->vendor_id != 0) {
379 		if ((pci_vendor_id == ent->vendor_id) &&
380 		    (pci_device_id == ent->device_id) &&
381 
382 		    ((pci_subvendor_id == ent->subvendor_id) ||
383 		    (ent->subvendor_id == 0)) &&
384 
385 		    ((pci_subdevice_id == ent->subdevice_id) ||
386 		    (ent->subdevice_id == 0))) {
387 			sprintf(adapter_name, "%s, Version - %s",
388 				igb_strings[ent->index],
389 				igb_driver_version);
390 			device_set_desc_copy(dev, adapter_name);
391 			return (BUS_PROBE_DEFAULT);
392 		}
393 		ent++;
394 	}
395 	return (ENXIO);
396 }
397 
398 /*********************************************************************
399  *  Device initialization routine
400  *
401  *  The attach entry point is called when the driver is being loaded.
402  *  This routine identifies the type of hardware, allocates all resources
403  *  and initializes the hardware.
404  *
405  *  return 0 on success, positive on failure
406  *********************************************************************/
407 
408 static int
igb_attach(device_t dev)409 igb_attach(device_t dev)
410 {
411 	struct adapter	*adapter;
412 	int		error = 0;
413 	u16		eeprom_data;
414 
415 	INIT_DEBUGOUT("igb_attach: begin");
416 
417 	if (resource_disabled("igb", device_get_unit(dev))) {
418 		device_printf(dev, "Disabled by device hint\n");
419 		return (ENXIO);
420 	}
421 
422 	adapter = device_get_softc(dev);
423 	adapter->dev = adapter->osdep.dev = dev;
424 	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
425 
426 	/* SYSCTLs */
427 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
428 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
429 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
430 	    igb_sysctl_nvm_info, "I", "NVM Information");
431 
432 	igb_set_sysctl_value(adapter, "enable_aim",
433 	    "Interrupt Moderation", &adapter->enable_aim,
434 	    igb_enable_aim);
435 
436 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
437 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
438 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
439 	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
440 
441 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
442 
443 	/* Determine hardware and mac info */
444 	igb_identify_hardware(adapter);
445 
446 	/* Setup PCI resources */
447 	if (igb_allocate_pci_resources(adapter)) {
448 		device_printf(dev, "Allocation of PCI resources failed\n");
449 		error = ENXIO;
450 		goto err_pci;
451 	}
452 
453 	/* Do Shared Code initialization */
454 	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
455 		device_printf(dev, "Setup of Shared code failed\n");
456 		error = ENXIO;
457 		goto err_pci;
458 	}
459 
460 	e1000_get_bus_info(&adapter->hw);
461 
462 	/* Sysctls for limiting the amount of work done in the taskqueues */
463 	igb_set_sysctl_value(adapter, "rx_processing_limit",
464 	    "max number of rx packets to process",
465 	    &adapter->rx_process_limit, igb_rx_process_limit);
466 
467 	igb_set_sysctl_value(adapter, "tx_processing_limit",
468 	    "max number of tx packets to process",
469 	    &adapter->tx_process_limit, igb_tx_process_limit);
470 
471 	/*
472 	 * Validate number of transmit and receive descriptors. It
473 	 * must not exceed hardware maximum, and must be multiple
474 	 * of E1000_DBA_ALIGN.
475 	 */
476 	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
477 	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
478 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
479 		    IGB_DEFAULT_TXD, igb_txd);
480 		adapter->num_tx_desc = IGB_DEFAULT_TXD;
481 	} else
482 		adapter->num_tx_desc = igb_txd;
483 	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
484 	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
485 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
486 		    IGB_DEFAULT_RXD, igb_rxd);
487 		adapter->num_rx_desc = IGB_DEFAULT_RXD;
488 	} else
489 		adapter->num_rx_desc = igb_rxd;
490 
491 	adapter->hw.mac.autoneg = DO_AUTO_NEG;
492 	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
493 	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
494 
495 	/* Copper options */
496 	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
497 		adapter->hw.phy.mdix = AUTO_ALL_MODES;
498 		adapter->hw.phy.disable_polarity_correction = FALSE;
499 		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
500 	}
501 
502 	/*
503 	 * Set the frame limits assuming
504 	 * standard ethernet sized frames.
505 	 */
506 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
507 
508 	/*
509 	** Allocate and Setup Queues
510 	*/
511 	if (igb_allocate_queues(adapter)) {
512 		error = ENOMEM;
513 		goto err_pci;
514 	}
515 
516 	/* Allocate the appropriate stats memory */
517 	if (adapter->vf_ifp) {
518 		adapter->stats =
519 		    (struct e1000_vf_stats *)malloc(sizeof \
520 		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
521 		igb_vf_init_stats(adapter);
522 	} else
523 		adapter->stats =
524 		    (struct e1000_hw_stats *)malloc(sizeof \
525 		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526 	if (adapter->stats == NULL) {
527 		device_printf(dev, "Can not allocate stats memory\n");
528 		error = ENOMEM;
529 		goto err_late;
530 	}
531 
532 	/* Allocate multicast array memory. */
533 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
534 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535 	if (adapter->mta == NULL) {
536 		device_printf(dev, "Can not allocate multicast setup array\n");
537 		error = ENOMEM;
538 		goto err_late;
539 	}
540 
541 	/* Some adapter-specific advanced features */
542 	if (adapter->hw.mac.type >= e1000_i350) {
543 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
544 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
545 		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
546 		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
547 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549 		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
550 		    adapter, 0, igb_sysctl_eee, "I",
551 		    "Disable Energy Efficient Ethernet");
552 		if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553 			if (adapter->hw.mac.type == e1000_i354)
554 				e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
555 			else
556 				e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
557 		}
558 	}
559 
560 	/*
561 	** Start from a known state, this is
562 	** important in reading the nvm and
563 	** mac from that.
564 	*/
565 	e1000_reset_hw(&adapter->hw);
566 
567 	/* Make sure we have a good EEPROM before we read from it */
568 	if (((adapter->hw.mac.type != e1000_i210) &&
569 	    (adapter->hw.mac.type != e1000_i211)) &&
570 	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
571 		/*
572 		** Some PCI-E parts fail the first check due to
573 		** the link being in sleep state, call it again,
574 		** if it fails a second time its a real issue.
575 		*/
576 		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
577 			device_printf(dev,
578 			    "The EEPROM Checksum Is Not Valid\n");
579 			error = EIO;
580 			goto err_late;
581 		}
582 	}
583 
584 	/*
585 	** Copy the permanent MAC address out of the EEPROM
586 	*/
587 	if (e1000_read_mac_addr(&adapter->hw) < 0) {
588 		device_printf(dev, "EEPROM read error while reading MAC"
589 		    " address\n");
590 		error = EIO;
591 		goto err_late;
592 	}
593 	/* Check its sanity */
594 	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
595 		device_printf(dev, "Invalid MAC address\n");
596 		error = EIO;
597 		goto err_late;
598 	}
599 
600 	/* Setup OS specific network interface */
601 	if (igb_setup_interface(dev, adapter) != 0)
602 		goto err_late;
603 
604 	/* Now get a good starting state */
605 	igb_reset(adapter);
606 
607 	/* Initialize statistics */
608 	igb_update_stats_counters(adapter);
609 
610 	adapter->hw.mac.get_link_status = 1;
611 	igb_update_link_status(adapter);
612 
613 	/* Indicate SOL/IDER usage */
614 	if (e1000_check_reset_block(&adapter->hw))
615 		device_printf(dev,
616 		    "PHY reset is blocked due to SOL/IDER session.\n");
617 
618 	/* Determine if we have to control management hardware */
619 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
620 
621 	/*
622 	 * Setup Wake-on-Lan
623 	 */
624 	/* APME bit in EEPROM is mapped to WUC.APME */
625 	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
626 	if (eeprom_data)
627 		adapter->wol = E1000_WUFC_MAG;
628 
629 	/* Register for VLAN events */
630 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
631 	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
632 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
633 	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
634 
635 	igb_add_hw_stats(adapter);
636 
637 	/* Tell the stack that the interface is not active */
638 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
639 	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
640 
641 	adapter->led_dev = led_create(igb_led_func, adapter,
642 	    device_get_nameunit(dev));
643 
644 	/*
645 	** Configure Interrupts
646 	*/
647 	if ((adapter->msix > 1) && (igb_enable_msix))
648 		error = igb_allocate_msix(adapter);
649 	else /* MSI or Legacy */
650 		error = igb_allocate_legacy(adapter);
651 	if (error)
652 		goto err_late;
653 
654 #ifdef DEV_NETMAP
655 	igb_netmap_attach(adapter);
656 #endif /* DEV_NETMAP */
657 	INIT_DEBUGOUT("igb_attach: end");
658 
659 	return (0);
660 
661 err_late:
662 	if (igb_detach(dev) == 0) /* igb_detach() already did the cleanup */
663 		return(error);
664 	igb_free_transmit_structures(adapter);
665 	igb_free_receive_structures(adapter);
666 	igb_release_hw_control(adapter);
667 err_pci:
668 	igb_free_pci_resources(adapter);
669 	if (adapter->ifp != NULL)
670 		if_free(adapter->ifp);
671 	free(adapter->mta, M_DEVBUF);
672 	IGB_CORE_LOCK_DESTROY(adapter);
673 
674 	return (error);
675 }
676 
677 /*********************************************************************
678  *  Device removal routine
679  *
680  *  The detach entry point is called when the driver is being removed.
681  *  This routine stops the adapter and deallocates all the resources
682  *  that were allocated for driver operation.
683  *
684  *  return 0 on success, positive on failure
685  *********************************************************************/
686 
687 static int
igb_detach(device_t dev)688 igb_detach(device_t dev)
689 {
690 	struct adapter	*adapter = device_get_softc(dev);
691 	struct ifnet	*ifp = adapter->ifp;
692 
693 	INIT_DEBUGOUT("igb_detach: begin");
694 
695 	/* Make sure VLANS are not using driver */
696 	if (adapter->ifp->if_vlantrunk != NULL) {
697 		device_printf(dev,"Vlan in use, detach first\n");
698 		return (EBUSY);
699 	}
700 
701 	ether_ifdetach(adapter->ifp);
702 
703 	if (adapter->led_dev != NULL)
704 		led_destroy(adapter->led_dev);
705 
706 #ifdef DEVICE_POLLING
707 	if (ifp->if_capenable & IFCAP_POLLING)
708 		ether_poll_deregister(ifp);
709 #endif
710 
711 	IGB_CORE_LOCK(adapter);
712 	adapter->in_detach = 1;
713 	igb_stop(adapter);
714 	IGB_CORE_UNLOCK(adapter);
715 
716 	e1000_phy_hw_reset(&adapter->hw);
717 
718 	/* Give control back to firmware */
719 	igb_release_manageability(adapter);
720 	igb_release_hw_control(adapter);
721 
722 	/* Unregister VLAN events */
723 	if (adapter->vlan_attach != NULL)
724 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
725 	if (adapter->vlan_detach != NULL)
726 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
727 
728 	callout_drain(&adapter->timer);
729 
730 #ifdef DEV_NETMAP
731 	netmap_detach(adapter->ifp);
732 #endif /* DEV_NETMAP */
733 	igb_free_pci_resources(adapter);
734 	bus_generic_detach(dev);
735 	if_free(ifp);
736 
737 	igb_free_transmit_structures(adapter);
738 	igb_free_receive_structures(adapter);
739 	if (adapter->mta != NULL)
740 		free(adapter->mta, M_DEVBUF);
741 
742 	IGB_CORE_LOCK_DESTROY(adapter);
743 
744 	return (0);
745 }
746 
747 /*********************************************************************
748  *
749  *  Shutdown entry point
750  *
751  **********************************************************************/
752 
753 static int
igb_shutdown(device_t dev)754 igb_shutdown(device_t dev)
755 {
756 	return igb_suspend(dev);
757 }
758 
759 /*
760  * Suspend/resume device methods.
761  */
762 static int
igb_suspend(device_t dev)763 igb_suspend(device_t dev)
764 {
765 	struct adapter *adapter = device_get_softc(dev);
766 
767 	IGB_CORE_LOCK(adapter);
768 
769 	igb_stop(adapter);
770 
771         igb_release_manageability(adapter);
772 	igb_release_hw_control(adapter);
773 	igb_enable_wakeup(dev);
774 
775 	IGB_CORE_UNLOCK(adapter);
776 
777 	return bus_generic_suspend(dev);
778 }
779 
780 static int
igb_resume(device_t dev)781 igb_resume(device_t dev)
782 {
783 	struct adapter *adapter = device_get_softc(dev);
784 	struct tx_ring	*txr = adapter->tx_rings;
785 	struct ifnet *ifp = adapter->ifp;
786 
787 	IGB_CORE_LOCK(adapter);
788 	igb_init_locked(adapter);
789 	igb_init_manageability(adapter);
790 
791 	if ((ifp->if_flags & IFF_UP) &&
792 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
793 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
794 			IGB_TX_LOCK(txr);
795 #ifndef IGB_LEGACY_TX
796 			/* Process the stack queue only if not depleted */
797 			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
798 			    !drbr_empty(ifp, txr->br))
799 				igb_mq_start_locked(ifp, txr);
800 #else
801 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
802 				igb_start_locked(txr, ifp);
803 #endif
804 			IGB_TX_UNLOCK(txr);
805 		}
806 	}
807 	IGB_CORE_UNLOCK(adapter);
808 
809 	return bus_generic_resume(dev);
810 }
811 
812 
813 #ifdef IGB_LEGACY_TX
814 
815 /*********************************************************************
816  *  Transmit entry point
817  *
818  *  igb_start is called by the stack to initiate a transmit.
819  *  The driver will remain in this routine as long as there are
820  *  packets to transmit and transmit resources are available.
821  *  In case resources are not available stack is notified and
822  *  the packet is requeued.
823  **********************************************************************/
824 
825 static void
igb_start_locked(struct tx_ring * txr,struct ifnet * ifp)826 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
827 {
828 	struct adapter	*adapter = ifp->if_softc;
829 	struct mbuf	*m_head;
830 
831 	IGB_TX_LOCK_ASSERT(txr);
832 
833 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
834 	    IFF_DRV_RUNNING)
835 		return;
836 	if (!adapter->link_active)
837 		return;
838 
839 	/* Call cleanup if number of TX descriptors low */
840 	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
841 		igb_txeof(txr);
842 
843 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
844 		if (txr->tx_avail <= IGB_MAX_SCATTER) {
845 			txr->queue_status |= IGB_QUEUE_DEPLETED;
846 			break;
847 		}
848 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
849 		if (m_head == NULL)
850 			break;
851 		/*
852 		 *  Encapsulation can modify our pointer, and or make it
853 		 *  NULL on failure.  In that event, we can't requeue.
854 		 */
855 		if (igb_xmit(txr, &m_head)) {
856 			if (m_head != NULL)
857 				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
858 			if (txr->tx_avail <= IGB_MAX_SCATTER)
859 				txr->queue_status |= IGB_QUEUE_DEPLETED;
860 			break;
861 		}
862 
863 		/* Send a copy of the frame to the BPF listener */
864 		ETHER_BPF_MTAP(ifp, m_head);
865 
866 		/* Set watchdog on */
867 		txr->watchdog_time = ticks;
868 		txr->queue_status |= IGB_QUEUE_WORKING;
869 	}
870 }
871 
872 /*
873  * Legacy TX driver routine, called from the
874  * stack, always uses tx[0], and spins for it.
875  * Should not be used with multiqueue tx
876  */
877 static void
igb_start(struct ifnet * ifp)878 igb_start(struct ifnet *ifp)
879 {
880 	struct adapter	*adapter = ifp->if_softc;
881 	struct tx_ring	*txr = adapter->tx_rings;
882 
883 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
884 		IGB_TX_LOCK(txr);
885 		igb_start_locked(txr, ifp);
886 		IGB_TX_UNLOCK(txr);
887 	}
888 	return;
889 }
890 
891 #else /* ~IGB_LEGACY_TX */
892 
893 /*
894 ** Multiqueue Transmit Entry:
895 **  quick turnaround to the stack
896 **
897 */
898 static int
igb_mq_start(struct ifnet * ifp,struct mbuf * m)899 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
900 {
901 	struct adapter		*adapter = ifp->if_softc;
902 	struct igb_queue	*que;
903 	struct tx_ring		*txr;
904 	int 			i, err = 0;
905 #ifdef	RSS
906 	uint32_t		bucket_id;
907 #endif
908 
909 	/* Which queue to use */
910 	/*
911 	 * When doing RSS, map it to the same outbound queue
912 	 * as the incoming flow would be mapped to.
913 	 *
914 	 * If everything is setup correctly, it should be the
915 	 * same bucket that the current CPU we're on is.
916 	 */
917 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
918 #ifdef	RSS
919 		if (rss_hash2bucket(m->m_pkthdr.flowid,
920 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
921 			/* XXX TODO: spit out something if bucket_id > num_queues? */
922 			i = bucket_id % adapter->num_queues;
923 		} else {
924 #endif
925 			i = m->m_pkthdr.flowid % adapter->num_queues;
926 #ifdef	RSS
927 		}
928 #endif
929 	} else {
930 		i = curcpu % adapter->num_queues;
931 	}
932 	txr = &adapter->tx_rings[i];
933 	que = &adapter->queues[i];
934 
935 	err = drbr_enqueue(ifp, txr->br, m);
936 	if (err)
937 		return (err);
938 	if (IGB_TX_TRYLOCK(txr)) {
939 		igb_mq_start_locked(ifp, txr);
940 		IGB_TX_UNLOCK(txr);
941 	} else
942 		taskqueue_enqueue(que->tq, &txr->txq_task);
943 
944 	return (0);
945 }
946 
947 static int
igb_mq_start_locked(struct ifnet * ifp,struct tx_ring * txr)948 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
949 {
950 	struct adapter  *adapter = txr->adapter;
951         struct mbuf     *next;
952         int             err = 0, enq = 0;
953 
954 	IGB_TX_LOCK_ASSERT(txr);
955 
956 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
957 	    adapter->link_active == 0)
958 		return (ENETDOWN);
959 
960 	/* Process the queue */
961 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
962 		if ((err = igb_xmit(txr, &next)) != 0) {
963 			if (next == NULL) {
964 				/* It was freed, move forward */
965 				drbr_advance(ifp, txr->br);
966 			} else {
967 				/*
968 				 * Still have one left, it may not be
969 				 * the same since the transmit function
970 				 * may have changed it.
971 				 */
972 				drbr_putback(ifp, txr->br, next);
973 			}
974 			break;
975 		}
976 		drbr_advance(ifp, txr->br);
977 		enq++;
978 		if (next->m_flags & M_MCAST && adapter->vf_ifp)
979 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
980 		ETHER_BPF_MTAP(ifp, next);
981 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
982 			break;
983 	}
984 	if (enq > 0) {
985 		/* Set the watchdog */
986 		txr->queue_status |= IGB_QUEUE_WORKING;
987 		txr->watchdog_time = ticks;
988 	}
989 	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
990 		igb_txeof(txr);
991 	if (txr->tx_avail <= IGB_MAX_SCATTER)
992 		txr->queue_status |= IGB_QUEUE_DEPLETED;
993 	return (err);
994 }
995 
996 /*
997  * Called from a taskqueue to drain queued transmit packets.
998  */
999 static void
igb_deferred_mq_start(void * arg,int pending)1000 igb_deferred_mq_start(void *arg, int pending)
1001 {
1002 	struct tx_ring *txr = arg;
1003 	struct adapter *adapter = txr->adapter;
1004 	struct ifnet *ifp = adapter->ifp;
1005 
1006 	IGB_TX_LOCK(txr);
1007 	if (!drbr_empty(ifp, txr->br))
1008 		igb_mq_start_locked(ifp, txr);
1009 	IGB_TX_UNLOCK(txr);
1010 }
1011 
1012 /*
1013 ** Flush all ring buffers
1014 */
1015 static void
igb_qflush(struct ifnet * ifp)1016 igb_qflush(struct ifnet *ifp)
1017 {
1018 	struct adapter	*adapter = ifp->if_softc;
1019 	struct tx_ring	*txr = adapter->tx_rings;
1020 	struct mbuf	*m;
1021 
1022 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1023 		IGB_TX_LOCK(txr);
1024 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1025 			m_freem(m);
1026 		IGB_TX_UNLOCK(txr);
1027 	}
1028 	if_qflush(ifp);
1029 }
1030 #endif /* ~IGB_LEGACY_TX */
1031 
1032 /*********************************************************************
1033  *  Ioctl entry point
1034  *
1035  *  igb_ioctl is called when the user wants to configure the
1036  *  interface.
1037  *
1038  *  return 0 on success, positive on failure
1039  **********************************************************************/
1040 
1041 static int
igb_ioctl(struct ifnet * ifp,u_long command,caddr_t data)1042 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1043 {
1044 	struct adapter	*adapter = ifp->if_softc;
1045 	struct ifreq	*ifr = (struct ifreq *)data;
1046 #if defined(INET) || defined(INET6)
1047 	struct ifaddr	*ifa = (struct ifaddr *)data;
1048 #endif
1049 	bool		avoid_reset = FALSE;
1050 	int		error = 0;
1051 
1052 	if (adapter->in_detach)
1053 		return (error);
1054 
1055 	switch (command) {
1056 	case SIOCSIFADDR:
1057 #ifdef INET
1058 		if (ifa->ifa_addr->sa_family == AF_INET)
1059 			avoid_reset = TRUE;
1060 #endif
1061 #ifdef INET6
1062 		if (ifa->ifa_addr->sa_family == AF_INET6)
1063 			avoid_reset = TRUE;
1064 #endif
1065 		/*
1066 		** Calling init results in link renegotiation,
1067 		** so we avoid doing it when possible.
1068 		*/
1069 		if (avoid_reset) {
1070 			ifp->if_flags |= IFF_UP;
1071 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1072 				igb_init(adapter);
1073 #ifdef INET
1074 			if (!(ifp->if_flags & IFF_NOARP))
1075 				arp_ifinit(ifp, ifa);
1076 #endif
1077 		} else
1078 			error = ether_ioctl(ifp, command, data);
1079 		break;
1080 	case SIOCSIFMTU:
1081 	    {
1082 		int max_frame_size;
1083 
1084 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1085 
1086 		IGB_CORE_LOCK(adapter);
1087 		max_frame_size = 9234;
1088 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1089 		    ETHER_CRC_LEN) {
1090 			IGB_CORE_UNLOCK(adapter);
1091 			error = EINVAL;
1092 			break;
1093 		}
1094 
1095 		ifp->if_mtu = ifr->ifr_mtu;
1096 		adapter->max_frame_size =
1097 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1098 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
1099 			igb_init_locked(adapter);
1100 		IGB_CORE_UNLOCK(adapter);
1101 		break;
1102 	    }
1103 	case SIOCSIFFLAGS:
1104 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1105 		    SIOCSIFFLAGS (Set Interface Flags)");
1106 		IGB_CORE_LOCK(adapter);
1107 		if (ifp->if_flags & IFF_UP) {
1108 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1109 				if ((ifp->if_flags ^ adapter->if_flags) &
1110 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1111 					igb_disable_promisc(adapter);
1112 					igb_set_promisc(adapter);
1113 				}
1114 			} else
1115 				igb_init_locked(adapter);
1116 		} else
1117 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1118 				igb_stop(adapter);
1119 		adapter->if_flags = ifp->if_flags;
1120 		IGB_CORE_UNLOCK(adapter);
1121 		break;
1122 	case SIOCADDMULTI:
1123 	case SIOCDELMULTI:
1124 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1125 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1126 			IGB_CORE_LOCK(adapter);
1127 			igb_disable_intr(adapter);
1128 			igb_set_multi(adapter);
1129 #ifdef DEVICE_POLLING
1130 			if (!(ifp->if_capenable & IFCAP_POLLING))
1131 #endif
1132 				igb_enable_intr(adapter);
1133 			IGB_CORE_UNLOCK(adapter);
1134 		}
1135 		break;
1136 	case SIOCSIFMEDIA:
1137 		/* Check SOL/IDER usage */
1138 		IGB_CORE_LOCK(adapter);
1139 		if (e1000_check_reset_block(&adapter->hw)) {
1140 			IGB_CORE_UNLOCK(adapter);
1141 			device_printf(adapter->dev, "Media change is"
1142 			    " blocked due to SOL/IDER session.\n");
1143 			break;
1144 		}
1145 		IGB_CORE_UNLOCK(adapter);
1146 	case SIOCGIFMEDIA:
1147 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1148 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1149 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1150 		break;
1151 	case SIOCSIFCAP:
1152 	    {
1153 		int mask, reinit;
1154 
1155 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1156 		reinit = 0;
1157 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1158 #ifdef DEVICE_POLLING
1159 		if (mask & IFCAP_POLLING) {
1160 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1161 				error = ether_poll_register(igb_poll, ifp);
1162 				if (error)
1163 					return (error);
1164 				IGB_CORE_LOCK(adapter);
1165 				igb_disable_intr(adapter);
1166 				ifp->if_capenable |= IFCAP_POLLING;
1167 				IGB_CORE_UNLOCK(adapter);
1168 			} else {
1169 				error = ether_poll_deregister(ifp);
1170 				/* Enable interrupt even in error case */
1171 				IGB_CORE_LOCK(adapter);
1172 				igb_enable_intr(adapter);
1173 				ifp->if_capenable &= ~IFCAP_POLLING;
1174 				IGB_CORE_UNLOCK(adapter);
1175 			}
1176 		}
1177 #endif
1178 #if __FreeBSD_version >= 1000000
1179 		/* HW cannot turn these on/off separately */
1180 		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1181 			ifp->if_capenable ^= IFCAP_RXCSUM;
1182 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1183 			reinit = 1;
1184 		}
1185 		if (mask & IFCAP_TXCSUM) {
1186 			ifp->if_capenable ^= IFCAP_TXCSUM;
1187 			reinit = 1;
1188 		}
1189 		if (mask & IFCAP_TXCSUM_IPV6) {
1190 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1191 			reinit = 1;
1192 		}
1193 #else
1194 		if (mask & IFCAP_HWCSUM) {
1195 			ifp->if_capenable ^= IFCAP_HWCSUM;
1196 			reinit = 1;
1197 		}
1198 #endif
1199 		if (mask & IFCAP_TSO4) {
1200 			ifp->if_capenable ^= IFCAP_TSO4;
1201 			reinit = 1;
1202 		}
1203 		if (mask & IFCAP_TSO6) {
1204 			ifp->if_capenable ^= IFCAP_TSO6;
1205 			reinit = 1;
1206 		}
1207 		if (mask & IFCAP_VLAN_HWTAGGING) {
1208 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1209 			reinit = 1;
1210 		}
1211 		if (mask & IFCAP_VLAN_HWFILTER) {
1212 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1213 			reinit = 1;
1214 		}
1215 		if (mask & IFCAP_VLAN_HWTSO) {
1216 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1217 			reinit = 1;
1218 		}
1219 		if (mask & IFCAP_LRO) {
1220 			ifp->if_capenable ^= IFCAP_LRO;
1221 			reinit = 1;
1222 		}
1223 		if (mask & IFCAP_WOL) {
1224 			if (mask & IFCAP_WOL_MAGIC)
1225 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1226 			if (mask & IFCAP_WOL_MCAST)
1227 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1228 			if (mask & IFCAP_WOL_UCAST)
1229 				ifp->if_capenable ^= IFCAP_WOL_UCAST;
1230 		}
1231 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1232 			igb_init(adapter);
1233 		VLAN_CAPABILITIES(ifp);
1234 		break;
1235 	    }
1236 
1237 	default:
1238 		error = ether_ioctl(ifp, command, data);
1239 		break;
1240 	}
1241 
1242 	return (error);
1243 }
1244 
1245 
1246 /*********************************************************************
1247  *  Init entry point
1248  *
1249  *  This routine is used in two ways. It is used by the stack as
1250  *  init entry point in network interface structure. It is also used
1251  *  by the driver as a hw/sw initialization routine to get to a
1252  *  consistent state.
1253  *
1254  *  return 0 on success, positive on failure
1255  **********************************************************************/
1256 
1257 static void
igb_init_locked(struct adapter * adapter)1258 igb_init_locked(struct adapter *adapter)
1259 {
1260 	struct ifnet	*ifp = adapter->ifp;
1261 	device_t	dev = adapter->dev;
1262 
1263 	INIT_DEBUGOUT("igb_init: begin");
1264 
1265 	IGB_CORE_LOCK_ASSERT(adapter);
1266 
1267 	igb_disable_intr(adapter);
1268 	callout_stop(&adapter->timer);
1269 
1270 	/* Get the latest mac address, User can use a LAA */
1271         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1272               ETHER_ADDR_LEN);
1273 
1274 	/* Put the address into the Receive Address Array */
1275 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1276 
1277 	igb_reset(adapter);
1278 	igb_update_link_status(adapter);
1279 
1280 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1281 
1282 	/* Set hardware offload abilities */
1283 	ifp->if_hwassist = 0;
1284 	if (ifp->if_capenable & IFCAP_TXCSUM) {
1285 #if __FreeBSD_version >= 1000000
1286 		ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP);
1287 		if (adapter->hw.mac.type != e1000_82575)
1288 			ifp->if_hwassist |= CSUM_IP_SCTP;
1289 #else
1290 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1291 #if __FreeBSD_version >= 800000
1292 		if (adapter->hw.mac.type != e1000_82575)
1293 			ifp->if_hwassist |= CSUM_SCTP;
1294 #endif
1295 #endif
1296 	}
1297 
1298 #if __FreeBSD_version >= 1000000
1299 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) {
1300 		ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP);
1301 		if (adapter->hw.mac.type != e1000_82575)
1302 			ifp->if_hwassist |= CSUM_IP6_SCTP;
1303 	}
1304 #endif
1305 	if (ifp->if_capenable & IFCAP_TSO)
1306 		ifp->if_hwassist |= CSUM_TSO;
1307 
1308 	/* Clear bad data from Rx FIFOs */
1309 	e1000_rx_fifo_flush_82575(&adapter->hw);
1310 
1311 	/* Configure for OS presence */
1312 	igb_init_manageability(adapter);
1313 
1314 	/* Prepare transmit descriptors and buffers */
1315 	igb_setup_transmit_structures(adapter);
1316 	igb_initialize_transmit_units(adapter);
1317 
1318 	/* Setup Multicast table */
1319 	igb_set_multi(adapter);
1320 
1321 	/*
1322 	** Figure out the desired mbuf pool
1323 	** for doing jumbo/packetsplit
1324 	*/
1325 	if (adapter->max_frame_size <= 2048)
1326 		adapter->rx_mbuf_sz = MCLBYTES;
1327 #ifndef CONTIGMALLOC_WORKS
1328        else
1329                adapter->rx_mbuf_sz = MJUMPAGESIZE;
1330 #else
1331 	else if (adapter->max_frame_size <= 4096)
1332 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1333 	else
1334 		adapter->rx_mbuf_sz = MJUM9BYTES;
1335 #endif
1336 
1337 	/* Prepare receive descriptors and buffers */
1338 	if (igb_setup_receive_structures(adapter)) {
1339 		device_printf(dev, "Could not setup receive structures\n");
1340 		return;
1341 	}
1342 	igb_initialize_receive_units(adapter);
1343 
1344         /* Enable VLAN support */
1345 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1346 		igb_setup_vlan_hw_support(adapter);
1347 
1348 	/* Don't lose promiscuous settings */
1349 	igb_set_promisc(adapter);
1350 
1351 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1352 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1353 
1354 	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1355 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1356 
1357 	if (adapter->msix > 1) /* Set up queue routing */
1358 		igb_configure_queues(adapter);
1359 
1360 	/* this clears any pending interrupts */
1361 	E1000_READ_REG(&adapter->hw, E1000_ICR);
1362 #ifdef DEVICE_POLLING
1363 	/*
1364 	 * Only enable interrupts if we are not polling, make sure
1365 	 * they are off otherwise.
1366 	 */
1367 	if (ifp->if_capenable & IFCAP_POLLING)
1368 		igb_disable_intr(adapter);
1369 	else
1370 #endif /* DEVICE_POLLING */
1371 	{
1372 		igb_enable_intr(adapter);
1373 		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1374 	}
1375 
1376 	/* Set Energy Efficient Ethernet */
1377 	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1378 		if (adapter->hw.mac.type == e1000_i354)
1379 			e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
1380 		else
1381 			e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
1382 	}
1383 }
1384 
1385 static void
igb_init(void * arg)1386 igb_init(void *arg)
1387 {
1388 	struct adapter *adapter = arg;
1389 
1390 	IGB_CORE_LOCK(adapter);
1391 	igb_init_locked(adapter);
1392 	IGB_CORE_UNLOCK(adapter);
1393 }
1394 
1395 
1396 static void
igb_handle_que(void * context,int pending)1397 igb_handle_que(void *context, int pending)
1398 {
1399 	struct igb_queue *que = context;
1400 	struct adapter *adapter = que->adapter;
1401 	struct tx_ring *txr = que->txr;
1402 	struct ifnet	*ifp = adapter->ifp;
1403 
1404 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1405 		bool	more;
1406 
1407 		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1408 
1409 		IGB_TX_LOCK(txr);
1410 		igb_txeof(txr);
1411 #ifndef IGB_LEGACY_TX
1412 		/* Process the stack queue only if not depleted */
1413 		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1414 		    !drbr_empty(ifp, txr->br))
1415 			igb_mq_start_locked(ifp, txr);
1416 #else
1417 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1418 			igb_start_locked(txr, ifp);
1419 #endif
1420 		IGB_TX_UNLOCK(txr);
1421 		/* Do we need another? */
1422 		if (more) {
1423 			taskqueue_enqueue(que->tq, &que->que_task);
1424 			return;
1425 		}
1426 	}
1427 
1428 #ifdef DEVICE_POLLING
1429 	if (ifp->if_capenable & IFCAP_POLLING)
1430 		return;
1431 #endif
1432 	/* Reenable this interrupt */
1433 	if (que->eims)
1434 		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1435 	else
1436 		igb_enable_intr(adapter);
1437 }
1438 
1439 /* Deal with link in a sleepable context */
1440 static void
igb_handle_link(void * context,int pending)1441 igb_handle_link(void *context, int pending)
1442 {
1443 	struct adapter *adapter = context;
1444 
1445 	IGB_CORE_LOCK(adapter);
1446 	igb_handle_link_locked(adapter);
1447 	IGB_CORE_UNLOCK(adapter);
1448 }
1449 
1450 static void
igb_handle_link_locked(struct adapter * adapter)1451 igb_handle_link_locked(struct adapter *adapter)
1452 {
1453 	struct tx_ring	*txr = adapter->tx_rings;
1454 	struct ifnet *ifp = adapter->ifp;
1455 
1456 	IGB_CORE_LOCK_ASSERT(adapter);
1457 	adapter->hw.mac.get_link_status = 1;
1458 	igb_update_link_status(adapter);
1459 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1460 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1461 			IGB_TX_LOCK(txr);
1462 #ifndef IGB_LEGACY_TX
1463 			/* Process the stack queue only if not depleted */
1464 			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1465 			    !drbr_empty(ifp, txr->br))
1466 				igb_mq_start_locked(ifp, txr);
1467 #else
1468 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1469 				igb_start_locked(txr, ifp);
1470 #endif
1471 			IGB_TX_UNLOCK(txr);
1472 		}
1473 	}
1474 }
1475 
1476 /*********************************************************************
1477  *
1478  *  MSI/Legacy Deferred
1479  *  Interrupt Service routine
1480  *
1481  *********************************************************************/
1482 static int
igb_irq_fast(void * arg)1483 igb_irq_fast(void *arg)
1484 {
1485 	struct adapter		*adapter = arg;
1486 	struct igb_queue	*que = adapter->queues;
1487 	u32			reg_icr;
1488 
1489 
1490 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1491 
1492 	/* Hot eject?  */
1493 	if (reg_icr == 0xffffffff)
1494 		return FILTER_STRAY;
1495 
1496 	/* Definitely not our interrupt.  */
1497 	if (reg_icr == 0x0)
1498 		return FILTER_STRAY;
1499 
1500 	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1501 		return FILTER_STRAY;
1502 
1503 	/*
1504 	 * Mask interrupts until the taskqueue is finished running.  This is
1505 	 * cheap, just assume that it is needed.  This also works around the
1506 	 * MSI message reordering errata on certain systems.
1507 	 */
1508 	igb_disable_intr(adapter);
1509 	taskqueue_enqueue(que->tq, &que->que_task);
1510 
1511 	/* Link status change */
1512 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1513 		taskqueue_enqueue(que->tq, &adapter->link_task);
1514 
1515 	if (reg_icr & E1000_ICR_RXO)
1516 		adapter->rx_overruns++;
1517 	return FILTER_HANDLED;
1518 }
1519 
1520 #ifdef DEVICE_POLLING
1521 #if __FreeBSD_version >= 800000
1522 #define POLL_RETURN_COUNT(a) (a)
1523 static int
1524 #else
1525 #define POLL_RETURN_COUNT(a)
1526 static void
1527 #endif
igb_poll(struct ifnet * ifp,enum poll_cmd cmd,int count)1528 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1529 {
1530 	struct adapter		*adapter = ifp->if_softc;
1531 	struct igb_queue	*que;
1532 	struct tx_ring		*txr;
1533 	u32			reg_icr, rx_done = 0;
1534 	u32			loop = IGB_MAX_LOOP;
1535 	bool			more;
1536 
1537 	IGB_CORE_LOCK(adapter);
1538 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1539 		IGB_CORE_UNLOCK(adapter);
1540 		return POLL_RETURN_COUNT(rx_done);
1541 	}
1542 
1543 	if (cmd == POLL_AND_CHECK_STATUS) {
1544 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1545 		/* Link status change */
1546 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1547 			igb_handle_link_locked(adapter);
1548 
1549 		if (reg_icr & E1000_ICR_RXO)
1550 			adapter->rx_overruns++;
1551 	}
1552 	IGB_CORE_UNLOCK(adapter);
1553 
1554 	for (int i = 0; i < adapter->num_queues; i++) {
1555 		que = &adapter->queues[i];
1556 		txr = que->txr;
1557 
1558 		igb_rxeof(que, count, &rx_done);
1559 
1560 		IGB_TX_LOCK(txr);
1561 		do {
1562 			more = igb_txeof(txr);
1563 		} while (loop-- && more);
1564 #ifndef IGB_LEGACY_TX
1565 		if (!drbr_empty(ifp, txr->br))
1566 			igb_mq_start_locked(ifp, txr);
1567 #else
1568 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1569 			igb_start_locked(txr, ifp);
1570 #endif
1571 		IGB_TX_UNLOCK(txr);
1572 	}
1573 
1574 	return POLL_RETURN_COUNT(rx_done);
1575 }
1576 #endif /* DEVICE_POLLING */
1577 
1578 /*********************************************************************
1579  *
1580  *  MSIX Que Interrupt Service routine
1581  *
1582  **********************************************************************/
1583 static void
igb_msix_que(void * arg)1584 igb_msix_que(void *arg)
1585 {
1586 	struct igb_queue *que = arg;
1587 	struct adapter *adapter = que->adapter;
1588 	struct ifnet   *ifp = adapter->ifp;
1589 	struct tx_ring *txr = que->txr;
1590 	struct rx_ring *rxr = que->rxr;
1591 	u32		newitr = 0;
1592 	bool		more_rx;
1593 
1594 	/* Ignore spurious interrupts */
1595 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1596 		return;
1597 
1598 	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1599 	++que->irqs;
1600 
1601 	IGB_TX_LOCK(txr);
1602 	igb_txeof(txr);
1603 #ifndef IGB_LEGACY_TX
1604 	/* Process the stack queue only if not depleted */
1605 	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1606 	    !drbr_empty(ifp, txr->br))
1607 		igb_mq_start_locked(ifp, txr);
1608 #else
1609 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1610 		igb_start_locked(txr, ifp);
1611 #endif
1612 	IGB_TX_UNLOCK(txr);
1613 
1614 	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1615 
1616 	if (adapter->enable_aim == FALSE)
1617 		goto no_calc;
1618 	/*
1619 	** Do Adaptive Interrupt Moderation:
1620         **  - Write out last calculated setting
1621 	**  - Calculate based on average size over
1622 	**    the last interval.
1623 	*/
1624         if (que->eitr_setting)
1625                 E1000_WRITE_REG(&adapter->hw,
1626                     E1000_EITR(que->msix), que->eitr_setting);
1627 
1628         que->eitr_setting = 0;
1629 
1630         /* Idle, do nothing */
1631         if ((txr->bytes == 0) && (rxr->bytes == 0))
1632                 goto no_calc;
1633 
1634         /* Used half Default if sub-gig */
1635         if (adapter->link_speed != 1000)
1636                 newitr = IGB_DEFAULT_ITR / 2;
1637         else {
1638 		if ((txr->bytes) && (txr->packets))
1639                 	newitr = txr->bytes/txr->packets;
1640 		if ((rxr->bytes) && (rxr->packets))
1641 			newitr = max(newitr,
1642 			    (rxr->bytes / rxr->packets));
1643                 newitr += 24; /* account for hardware frame, crc */
1644 		/* set an upper boundary */
1645 		newitr = min(newitr, 3000);
1646 		/* Be nice to the mid range */
1647                 if ((newitr > 300) && (newitr < 1200))
1648                         newitr = (newitr / 3);
1649                 else
1650                         newitr = (newitr / 2);
1651         }
1652         newitr &= 0x7FFC;  /* Mask invalid bits */
1653         if (adapter->hw.mac.type == e1000_82575)
1654                 newitr |= newitr << 16;
1655         else
1656                 newitr |= E1000_EITR_CNT_IGNR;
1657 
1658         /* save for next interrupt */
1659         que->eitr_setting = newitr;
1660 
1661         /* Reset state */
1662         txr->bytes = 0;
1663         txr->packets = 0;
1664         rxr->bytes = 0;
1665         rxr->packets = 0;
1666 
1667 no_calc:
1668 	/* Schedule a clean task if needed*/
1669 	if (more_rx)
1670 		taskqueue_enqueue(que->tq, &que->que_task);
1671 	else
1672 		/* Reenable this interrupt */
1673 		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1674 	return;
1675 }
1676 
1677 
1678 /*********************************************************************
1679  *
1680  *  MSIX Link Interrupt Service routine
1681  *
1682  **********************************************************************/
1683 
1684 static void
igb_msix_link(void * arg)1685 igb_msix_link(void *arg)
1686 {
1687 	struct adapter	*adapter = arg;
1688 	u32       	icr;
1689 
1690 	++adapter->link_irq;
1691 	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1692 	if (!(icr & E1000_ICR_LSC))
1693 		goto spurious;
1694 	igb_handle_link(adapter, 0);
1695 
1696 spurious:
1697 	/* Rearm */
1698 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1699 	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1700 	return;
1701 }
1702 
1703 
1704 /*********************************************************************
1705  *
1706  *  Media Ioctl callback
1707  *
1708  *  This routine is called whenever the user queries the status of
1709  *  the interface using ifconfig.
1710  *
1711  **********************************************************************/
1712 static void
igb_media_status(struct ifnet * ifp,struct ifmediareq * ifmr)1713 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1714 {
1715 	struct adapter *adapter = ifp->if_softc;
1716 
1717 	INIT_DEBUGOUT("igb_media_status: begin");
1718 
1719 	IGB_CORE_LOCK(adapter);
1720 	igb_update_link_status(adapter);
1721 
1722 	ifmr->ifm_status = IFM_AVALID;
1723 	ifmr->ifm_active = IFM_ETHER;
1724 
1725 	if (!adapter->link_active) {
1726 		IGB_CORE_UNLOCK(adapter);
1727 		return;
1728 	}
1729 
1730 	ifmr->ifm_status |= IFM_ACTIVE;
1731 
1732 	switch (adapter->link_speed) {
1733 	case 10:
1734 		ifmr->ifm_active |= IFM_10_T;
1735 		break;
1736 	case 100:
1737 		/*
1738 		** Support for 100Mb SFP - these are Fiber
1739 		** but the media type appears as serdes
1740 		*/
1741 		if (adapter->hw.phy.media_type ==
1742 		    e1000_media_type_internal_serdes)
1743 			ifmr->ifm_active |= IFM_100_FX;
1744 		else
1745 			ifmr->ifm_active |= IFM_100_TX;
1746 		break;
1747 	case 1000:
1748 		ifmr->ifm_active |= IFM_1000_T;
1749 		break;
1750 	case 2500:
1751 		ifmr->ifm_active |= IFM_2500_SX;
1752 		break;
1753 	}
1754 
1755 	if (adapter->link_duplex == FULL_DUPLEX)
1756 		ifmr->ifm_active |= IFM_FDX;
1757 	else
1758 		ifmr->ifm_active |= IFM_HDX;
1759 
1760 	IGB_CORE_UNLOCK(adapter);
1761 }
1762 
1763 /*********************************************************************
1764  *
1765  *  Media Ioctl callback
1766  *
1767  *  This routine is called when the user changes speed/duplex using
1768  *  media/mediopt option with ifconfig.
1769  *
1770  **********************************************************************/
1771 static int
igb_media_change(struct ifnet * ifp)1772 igb_media_change(struct ifnet *ifp)
1773 {
1774 	struct adapter *adapter = ifp->if_softc;
1775 	struct ifmedia  *ifm = &adapter->media;
1776 
1777 	INIT_DEBUGOUT("igb_media_change: begin");
1778 
1779 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1780 		return (EINVAL);
1781 
1782 	IGB_CORE_LOCK(adapter);
1783 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1784 	case IFM_AUTO:
1785 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1786 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1787 		break;
1788 	case IFM_1000_LX:
1789 	case IFM_1000_SX:
1790 	case IFM_1000_T:
1791 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1792 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1793 		break;
1794 	case IFM_100_TX:
1795 		adapter->hw.mac.autoneg = FALSE;
1796 		adapter->hw.phy.autoneg_advertised = 0;
1797 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1798 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1799 		else
1800 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1801 		break;
1802 	case IFM_10_T:
1803 		adapter->hw.mac.autoneg = FALSE;
1804 		adapter->hw.phy.autoneg_advertised = 0;
1805 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1806 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1807 		else
1808 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1809 		break;
1810 	default:
1811 		device_printf(adapter->dev, "Unsupported media type\n");
1812 	}
1813 
1814 	igb_init_locked(adapter);
1815 	IGB_CORE_UNLOCK(adapter);
1816 
1817 	return (0);
1818 }
1819 
1820 
1821 /*********************************************************************
1822  *
1823  *  This routine maps the mbufs to Advanced TX descriptors.
1824  *
1825  **********************************************************************/
1826 static int
igb_xmit(struct tx_ring * txr,struct mbuf ** m_headp)1827 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1828 {
1829 	struct adapter  *adapter = txr->adapter;
1830 	u32		olinfo_status = 0, cmd_type_len;
1831 	int             i, j, error, nsegs;
1832 	int		first;
1833 	bool		remap = TRUE;
1834 	struct mbuf	*m_head;
1835 	bus_dma_segment_t segs[IGB_MAX_SCATTER];
1836 	bus_dmamap_t	map;
1837 	struct igb_tx_buf *txbuf;
1838 	union e1000_adv_tx_desc *txd = NULL;
1839 
1840 	m_head = *m_headp;
1841 
1842 	/* Basic descriptor defines */
1843         cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1844 	    E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1845 
1846 	if (m_head->m_flags & M_VLANTAG)
1847         	cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1848 
1849         /*
1850          * Important to capture the first descriptor
1851          * used because it will contain the index of
1852          * the one we tell the hardware to report back
1853          */
1854         first = txr->next_avail_desc;
1855 	txbuf = &txr->tx_buffers[first];
1856 	map = txbuf->map;
1857 
1858 	/*
1859 	 * Map the packet for DMA.
1860 	 */
1861 retry:
1862 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1863 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1864 
1865 	if (__predict_false(error)) {
1866 		struct mbuf *m;
1867 
1868 		switch (error) {
1869 		case EFBIG:
1870 			/* Try it again? - one try */
1871 			if (remap == TRUE) {
1872 				remap = FALSE;
1873 				m = m_collapse(*m_headp, M_NOWAIT,
1874 				    IGB_MAX_SCATTER);
1875 				if (m == NULL) {
1876 					adapter->mbuf_defrag_failed++;
1877 					m_freem(*m_headp);
1878 					*m_headp = NULL;
1879 					return (ENOBUFS);
1880 				}
1881 				*m_headp = m;
1882 				goto retry;
1883 			} else
1884 				return (error);
1885 		default:
1886 			txr->no_tx_dma_setup++;
1887 			m_freem(*m_headp);
1888 			*m_headp = NULL;
1889 			return (error);
1890 		}
1891 	}
1892 
1893 	/* Make certain there are enough descriptors */
1894 	if (txr->tx_avail < (nsegs + 2)) {
1895 		txr->no_desc_avail++;
1896 		bus_dmamap_unload(txr->txtag, map);
1897 		return (ENOBUFS);
1898 	}
1899 	m_head = *m_headp;
1900 
1901 	/*
1902 	** Set up the appropriate offload context
1903 	** this will consume the first descriptor
1904 	*/
1905 	error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1906 	if (__predict_false(error)) {
1907 		m_freem(*m_headp);
1908 		*m_headp = NULL;
1909 		return (error);
1910 	}
1911 
1912 	/* 82575 needs the queue index added */
1913 	if (adapter->hw.mac.type == e1000_82575)
1914 		olinfo_status |= txr->me << 4;
1915 
1916 	i = txr->next_avail_desc;
1917 	for (j = 0; j < nsegs; j++) {
1918 		bus_size_t seglen;
1919 		bus_addr_t segaddr;
1920 
1921 		txbuf = &txr->tx_buffers[i];
1922 		txd = &txr->tx_base[i];
1923 		seglen = segs[j].ds_len;
1924 		segaddr = htole64(segs[j].ds_addr);
1925 
1926 		txd->read.buffer_addr = segaddr;
1927 		txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1928 		    cmd_type_len | seglen);
1929 		txd->read.olinfo_status = htole32(olinfo_status);
1930 
1931 		if (++i == txr->num_desc)
1932 			i = 0;
1933 	}
1934 
1935 	txd->read.cmd_type_len |=
1936 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1937 	txr->tx_avail -= nsegs;
1938 	txr->next_avail_desc = i;
1939 
1940 	txbuf->m_head = m_head;
1941 	/*
1942 	** Here we swap the map so the last descriptor,
1943 	** which gets the completion interrupt has the
1944 	** real map, and the first descriptor gets the
1945 	** unused map from this descriptor.
1946 	*/
1947 	txr->tx_buffers[first].map = txbuf->map;
1948 	txbuf->map = map;
1949 	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1950 
1951         /* Set the EOP descriptor that will be marked done */
1952         txbuf = &txr->tx_buffers[first];
1953 	txbuf->eop = txd;
1954 
1955         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1956             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1957 	/*
1958 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1959 	 * hardware that this frame is available to transmit.
1960 	 */
1961 	++txr->total_packets;
1962 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1963 
1964 	return (0);
1965 }
1966 static void
igb_set_promisc(struct adapter * adapter)1967 igb_set_promisc(struct adapter *adapter)
1968 {
1969 	struct ifnet	*ifp = adapter->ifp;
1970 	struct e1000_hw *hw = &adapter->hw;
1971 	u32		reg;
1972 
1973 	if (adapter->vf_ifp) {
1974 		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1975 		return;
1976 	}
1977 
1978 	reg = E1000_READ_REG(hw, E1000_RCTL);
1979 	if (ifp->if_flags & IFF_PROMISC) {
1980 		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1981 		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1982 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1983 		reg |= E1000_RCTL_MPE;
1984 		reg &= ~E1000_RCTL_UPE;
1985 		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1986 	}
1987 }
1988 
1989 static void
igb_disable_promisc(struct adapter * adapter)1990 igb_disable_promisc(struct adapter *adapter)
1991 {
1992 	struct e1000_hw *hw = &adapter->hw;
1993 	struct ifnet	*ifp = adapter->ifp;
1994 	u32		reg;
1995 	int		mcnt = 0;
1996 
1997 	if (adapter->vf_ifp) {
1998 		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1999 		return;
2000 	}
2001 	reg = E1000_READ_REG(hw, E1000_RCTL);
2002 	reg &=  (~E1000_RCTL_UPE);
2003 	if (ifp->if_flags & IFF_ALLMULTI)
2004 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2005 	else {
2006 		struct  ifmultiaddr *ifma;
2007 #if __FreeBSD_version < 800000
2008 		IF_ADDR_LOCK(ifp);
2009 #else
2010 		if_maddr_rlock(ifp);
2011 #endif
2012 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2013 			if (ifma->ifma_addr->sa_family != AF_LINK)
2014 				continue;
2015 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2016 				break;
2017 			mcnt++;
2018 		}
2019 #if __FreeBSD_version < 800000
2020 		IF_ADDR_UNLOCK(ifp);
2021 #else
2022 		if_maddr_runlock(ifp);
2023 #endif
2024 	}
2025 	/* Don't disable if in MAX groups */
2026 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2027 		reg &=  (~E1000_RCTL_MPE);
2028 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2029 }
2030 
2031 
2032 /*********************************************************************
2033  *  Multicast Update
2034  *
2035  *  This routine is called whenever multicast address list is updated.
2036  *
2037  **********************************************************************/
2038 
2039 static void
igb_set_multi(struct adapter * adapter)2040 igb_set_multi(struct adapter *adapter)
2041 {
2042 	struct ifnet	*ifp = adapter->ifp;
2043 	struct ifmultiaddr *ifma;
2044 	u32 reg_rctl = 0;
2045 	u8  *mta;
2046 
2047 	int mcnt = 0;
2048 
2049 	IOCTL_DEBUGOUT("igb_set_multi: begin");
2050 
2051 	mta = adapter->mta;
2052 	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2053 	    MAX_NUM_MULTICAST_ADDRESSES);
2054 
2055 #if __FreeBSD_version < 800000
2056 	IF_ADDR_LOCK(ifp);
2057 #else
2058 	if_maddr_rlock(ifp);
2059 #endif
2060 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2061 		if (ifma->ifma_addr->sa_family != AF_LINK)
2062 			continue;
2063 
2064 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2065 			break;
2066 
2067 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2068 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2069 		mcnt++;
2070 	}
2071 #if __FreeBSD_version < 800000
2072 	IF_ADDR_UNLOCK(ifp);
2073 #else
2074 	if_maddr_runlock(ifp);
2075 #endif
2076 
2077 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2078 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2079 		reg_rctl |= E1000_RCTL_MPE;
2080 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2081 	} else
2082 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2083 }
2084 
2085 
2086 /*********************************************************************
2087  *  Timer routine:
2088  *  	This routine checks for link status,
2089  *	updates statistics, and does the watchdog.
2090  *
2091  **********************************************************************/
2092 
2093 static void
igb_local_timer(void * arg)2094 igb_local_timer(void *arg)
2095 {
2096 	struct adapter		*adapter = arg;
2097 	device_t		dev = adapter->dev;
2098 	struct ifnet		*ifp = adapter->ifp;
2099 	struct tx_ring		*txr = adapter->tx_rings;
2100 	struct igb_queue	*que = adapter->queues;
2101 	int			hung = 0, busy = 0;
2102 
2103 
2104 	IGB_CORE_LOCK_ASSERT(adapter);
2105 
2106 	igb_update_link_status(adapter);
2107 	igb_update_stats_counters(adapter);
2108 
2109         /*
2110         ** Check the TX queues status
2111 	**	- central locked handling of OACTIVE
2112 	**	- watchdog only if all queues show hung
2113         */
2114 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2115 		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2116 		    (adapter->pause_frames == 0))
2117 			++hung;
2118 		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2119 			++busy;
2120 		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2121 			taskqueue_enqueue(que->tq, &que->que_task);
2122 	}
2123 	if (hung == adapter->num_queues)
2124 		goto timeout;
2125 	if (busy == adapter->num_queues)
2126 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2127 	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2128 	    (busy < adapter->num_queues))
2129 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2130 
2131 	adapter->pause_frames = 0;
2132 	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2133 #ifndef DEVICE_POLLING
2134 	/* Schedule all queue interrupts - deadlock protection */
2135 	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2136 #endif
2137 	return;
2138 
2139 timeout:
2140 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2141 	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2142             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2143             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2144 	device_printf(dev,"TX(%d) desc avail = %d,"
2145             "Next TX to Clean = %d\n",
2146             txr->me, txr->tx_avail, txr->next_to_clean);
2147 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2148 	adapter->watchdog_events++;
2149 	igb_init_locked(adapter);
2150 }
2151 
2152 static void
igb_update_link_status(struct adapter * adapter)2153 igb_update_link_status(struct adapter *adapter)
2154 {
2155 	struct e1000_hw		*hw = &adapter->hw;
2156 	struct e1000_fc_info	*fc = &hw->fc;
2157 	struct ifnet		*ifp = adapter->ifp;
2158 	device_t		dev = adapter->dev;
2159 	struct tx_ring		*txr = adapter->tx_rings;
2160 	u32			link_check, thstat, ctrl;
2161 	char			*flowctl = NULL;
2162 
2163 	link_check = thstat = ctrl = 0;
2164 
2165 	/* Get the cached link value or read for real */
2166         switch (hw->phy.media_type) {
2167         case e1000_media_type_copper:
2168                 if (hw->mac.get_link_status) {
2169 			/* Do the work to read phy */
2170                         e1000_check_for_link(hw);
2171                         link_check = !hw->mac.get_link_status;
2172                 } else
2173                         link_check = TRUE;
2174                 break;
2175         case e1000_media_type_fiber:
2176                 e1000_check_for_link(hw);
2177                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2178                                  E1000_STATUS_LU);
2179                 break;
2180         case e1000_media_type_internal_serdes:
2181                 e1000_check_for_link(hw);
2182                 link_check = adapter->hw.mac.serdes_has_link;
2183                 break;
2184 	/* VF device is type_unknown */
2185         case e1000_media_type_unknown:
2186                 e1000_check_for_link(hw);
2187 		link_check = !hw->mac.get_link_status;
2188 		/* Fall thru */
2189         default:
2190                 break;
2191         }
2192 
2193 	/* Check for thermal downshift or shutdown */
2194 	if (hw->mac.type == e1000_i350) {
2195 		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2196 		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2197 	}
2198 
2199 	/* Get the flow control for display */
2200 	switch (fc->current_mode) {
2201 	case e1000_fc_rx_pause:
2202 		flowctl = "RX";
2203 		break;
2204 	case e1000_fc_tx_pause:
2205 		flowctl = "TX";
2206 		break;
2207 	case e1000_fc_full:
2208 		flowctl = "Full";
2209 		break;
2210 	case e1000_fc_none:
2211 	default:
2212 		flowctl = "None";
2213 		break;
2214 	}
2215 
2216 	/* Now we check if a transition has happened */
2217 	if (link_check && (adapter->link_active == 0)) {
2218 		e1000_get_speed_and_duplex(&adapter->hw,
2219 		    &adapter->link_speed, &adapter->link_duplex);
2220 		if (bootverbose)
2221 			device_printf(dev, "Link is up %d Mbps %s,"
2222 			    " Flow Control: %s\n",
2223 			    adapter->link_speed,
2224 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2225 			    "Full Duplex" : "Half Duplex"), flowctl);
2226 		adapter->link_active = 1;
2227 		ifp->if_baudrate = adapter->link_speed * 1000000;
2228 		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2229 		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2230 			device_printf(dev, "Link: thermal downshift\n");
2231 		/* Delay Link Up for Phy update */
2232 		if (((hw->mac.type == e1000_i210) ||
2233 		    (hw->mac.type == e1000_i211)) &&
2234 		    (hw->phy.id == I210_I_PHY_ID))
2235 			msec_delay(I210_LINK_DELAY);
2236 		/* Reset if the media type changed. */
2237 		if (hw->dev_spec._82575.media_changed) {
2238 			hw->dev_spec._82575.media_changed = false;
2239 			adapter->flags |= IGB_MEDIA_RESET;
2240 			igb_reset(adapter);
2241 		}
2242 		/* This can sleep */
2243 		if_link_state_change(ifp, LINK_STATE_UP);
2244 	} else if (!link_check && (adapter->link_active == 1)) {
2245 		ifp->if_baudrate = adapter->link_speed = 0;
2246 		adapter->link_duplex = 0;
2247 		if (bootverbose)
2248 			device_printf(dev, "Link is Down\n");
2249 		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2250 		    (thstat & E1000_THSTAT_PWR_DOWN))
2251 			device_printf(dev, "Link: thermal shutdown\n");
2252 		adapter->link_active = 0;
2253 		/* This can sleep */
2254 		if_link_state_change(ifp, LINK_STATE_DOWN);
2255 		/* Reset queue state */
2256 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2257 			txr->queue_status = IGB_QUEUE_IDLE;
2258 	}
2259 }
2260 
2261 /*********************************************************************
2262  *
2263  *  This routine disables all traffic on the adapter by issuing a
2264  *  global reset on the MAC and deallocates TX/RX buffers.
2265  *
2266  **********************************************************************/
2267 
2268 static void
igb_stop(void * arg)2269 igb_stop(void *arg)
2270 {
2271 	struct adapter	*adapter = arg;
2272 	struct ifnet	*ifp = adapter->ifp;
2273 	struct tx_ring *txr = adapter->tx_rings;
2274 
2275 	IGB_CORE_LOCK_ASSERT(adapter);
2276 
2277 	INIT_DEBUGOUT("igb_stop: begin");
2278 
2279 	igb_disable_intr(adapter);
2280 
2281 	callout_stop(&adapter->timer);
2282 
2283 	/* Tell the stack that the interface is no longer active */
2284 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2285 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2286 
2287 	/* Disarm watchdog timer. */
2288 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2289 		IGB_TX_LOCK(txr);
2290 		txr->queue_status = IGB_QUEUE_IDLE;
2291 		IGB_TX_UNLOCK(txr);
2292 	}
2293 
2294 	e1000_reset_hw(&adapter->hw);
2295 	E1000_WRITE_REG(&adapter->hw, E1000_WUFC, 0);
2296 
2297 	e1000_led_off(&adapter->hw);
2298 	e1000_cleanup_led(&adapter->hw);
2299 }
2300 
2301 
2302 /*********************************************************************
2303  *
2304  *  Determine hardware revision.
2305  *
2306  **********************************************************************/
2307 static void
igb_identify_hardware(struct adapter * adapter)2308 igb_identify_hardware(struct adapter *adapter)
2309 {
2310 	device_t dev = adapter->dev;
2311 
2312 	/* Make sure our PCI config space has the necessary stuff set */
2313 	pci_enable_busmaster(dev);
2314 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2315 
2316 	/* Save off the information about this board */
2317 	adapter->hw.vendor_id = pci_get_vendor(dev);
2318 	adapter->hw.device_id = pci_get_device(dev);
2319 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2320 	adapter->hw.subsystem_vendor_id =
2321 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2322 	adapter->hw.subsystem_device_id =
2323 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2324 
2325 	/* Set MAC type early for PCI setup */
2326 	e1000_set_mac_type(&adapter->hw);
2327 
2328 	/* Are we a VF device? */
2329 	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2330 	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2331 		adapter->vf_ifp = 1;
2332 	else
2333 		adapter->vf_ifp = 0;
2334 }
2335 
2336 static int
igb_allocate_pci_resources(struct adapter * adapter)2337 igb_allocate_pci_resources(struct adapter *adapter)
2338 {
2339 	device_t	dev = adapter->dev;
2340 	int		rid;
2341 
2342 	rid = PCIR_BAR(0);
2343 	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2344 	    &rid, RF_ACTIVE);
2345 	if (adapter->pci_mem == NULL) {
2346 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2347 		return (ENXIO);
2348 	}
2349 	adapter->osdep.mem_bus_space_tag =
2350 	    rman_get_bustag(adapter->pci_mem);
2351 	adapter->osdep.mem_bus_space_handle =
2352 	    rman_get_bushandle(adapter->pci_mem);
2353 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2354 
2355 	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2356 
2357 	/* This will setup either MSI/X or MSI */
2358 	adapter->msix = igb_setup_msix(adapter);
2359 	adapter->hw.back = &adapter->osdep;
2360 
2361 	return (0);
2362 }
2363 
2364 /*********************************************************************
2365  *
2366  *  Setup the Legacy or MSI Interrupt handler
2367  *
2368  **********************************************************************/
2369 static int
igb_allocate_legacy(struct adapter * adapter)2370 igb_allocate_legacy(struct adapter *adapter)
2371 {
2372 	device_t		dev = adapter->dev;
2373 	struct igb_queue	*que = adapter->queues;
2374 #ifndef IGB_LEGACY_TX
2375 	struct tx_ring		*txr = adapter->tx_rings;
2376 #endif
2377 	int			error, rid = 0;
2378 
2379 	/* Turn off all interrupts */
2380 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2381 
2382 	/* MSI RID is 1 */
2383 	if (adapter->msix == 1)
2384 		rid = 1;
2385 
2386 	/* We allocate a single interrupt resource */
2387 	adapter->res = bus_alloc_resource_any(dev,
2388 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2389 	if (adapter->res == NULL) {
2390 		device_printf(dev, "Unable to allocate bus resource: "
2391 		    "interrupt\n");
2392 		return (ENXIO);
2393 	}
2394 
2395 #ifndef IGB_LEGACY_TX
2396 	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2397 #endif
2398 
2399 	/*
2400 	 * Try allocating a fast interrupt and the associated deferred
2401 	 * processing contexts.
2402 	 */
2403 	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2404 	/* Make tasklet for deferred link handling */
2405 	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2406 	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2407 	    taskqueue_thread_enqueue, &que->tq);
2408 	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2409 	    device_get_nameunit(adapter->dev));
2410 	if ((error = bus_setup_intr(dev, adapter->res,
2411 	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2412 	    adapter, &adapter->tag)) != 0) {
2413 		device_printf(dev, "Failed to register fast interrupt "
2414 			    "handler: %d\n", error);
2415 		taskqueue_free(que->tq);
2416 		que->tq = NULL;
2417 		return (error);
2418 	}
2419 
2420 	return (0);
2421 }
2422 
2423 
2424 /*********************************************************************
2425  *
2426  *  Setup the MSIX Queue Interrupt handlers:
2427  *
2428  **********************************************************************/
2429 static int
igb_allocate_msix(struct adapter * adapter)2430 igb_allocate_msix(struct adapter *adapter)
2431 {
2432 	device_t		dev = adapter->dev;
2433 	struct igb_queue	*que = adapter->queues;
2434 	int			error, rid, vector = 0;
2435 	int			cpu_id = 0;
2436 #ifdef	RSS
2437 	cpuset_t cpu_mask;
2438 #endif
2439 
2440 	/* Be sure to start with all interrupts disabled */
2441 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2442 	E1000_WRITE_FLUSH(&adapter->hw);
2443 
2444 #ifdef	RSS
2445 	/*
2446 	 * If we're doing RSS, the number of queues needs to
2447 	 * match the number of RSS buckets that are configured.
2448 	 *
2449 	 * + If there's more queues than RSS buckets, we'll end
2450 	 *   up with queues that get no traffic.
2451 	 *
2452 	 * + If there's more RSS buckets than queues, we'll end
2453 	 *   up having multiple RSS buckets map to the same queue,
2454 	 *   so there'll be some contention.
2455 	 */
2456 	if (adapter->num_queues != rss_getnumbuckets()) {
2457 		device_printf(dev,
2458 		    "%s: number of queues (%d) != number of RSS buckets (%d)"
2459 		    "; performance will be impacted.\n",
2460 		    __func__,
2461 		    adapter->num_queues,
2462 		    rss_getnumbuckets());
2463 	}
2464 #endif
2465 
2466 	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2467 		rid = vector +1;
2468 		que->res = bus_alloc_resource_any(dev,
2469 		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2470 		if (que->res == NULL) {
2471 			device_printf(dev,
2472 			    "Unable to allocate bus resource: "
2473 			    "MSIX Queue Interrupt\n");
2474 			return (ENXIO);
2475 		}
2476 		error = bus_setup_intr(dev, que->res,
2477 	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2478 		    igb_msix_que, que, &que->tag);
2479 		if (error) {
2480 			que->res = NULL;
2481 			device_printf(dev, "Failed to register Queue handler");
2482 			return (error);
2483 		}
2484 #if __FreeBSD_version >= 800504
2485 		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2486 #endif
2487 		que->msix = vector;
2488 		if (adapter->hw.mac.type == e1000_82575)
2489 			que->eims = E1000_EICR_TX_QUEUE0 << i;
2490 		else
2491 			que->eims = 1 << vector;
2492 
2493 #ifdef	RSS
2494 		/*
2495 		 * The queue ID is used as the RSS layer bucket ID.
2496 		 * We look up the queue ID -> RSS CPU ID and select
2497 		 * that.
2498 		 */
2499 		cpu_id = rss_getcpu(i % rss_getnumbuckets());
2500 #else
2501 		/*
2502 		 * Bind the msix vector, and thus the
2503 		 * rings to the corresponding cpu.
2504 		 *
2505 		 * This just happens to match the default RSS round-robin
2506 		 * bucket -> queue -> CPU allocation.
2507 		 */
2508 		if (adapter->num_queues > 1) {
2509 			if (igb_last_bind_cpu < 0)
2510 				igb_last_bind_cpu = CPU_FIRST();
2511 			cpu_id = igb_last_bind_cpu;
2512 		}
2513 #endif
2514 
2515 		if (adapter->num_queues > 1) {
2516 			bus_bind_intr(dev, que->res, cpu_id);
2517 #ifdef	RSS
2518 			device_printf(dev,
2519 				"Bound queue %d to RSS bucket %d\n",
2520 				i, cpu_id);
2521 #else
2522 			device_printf(dev,
2523 				"Bound queue %d to cpu %d\n",
2524 				i, cpu_id);
2525 #endif
2526 		}
2527 
2528 #ifndef IGB_LEGACY_TX
2529 		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2530 		    que->txr);
2531 #endif
2532 		/* Make tasklet for deferred handling */
2533 		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2534 		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2535 		    taskqueue_thread_enqueue, &que->tq);
2536 		if (adapter->num_queues > 1) {
2537 			/*
2538 			 * Only pin the taskqueue thread to a CPU if
2539 			 * RSS is in use.
2540 			 *
2541 			 * This again just happens to match the default RSS
2542 			 * round-robin bucket -> queue -> CPU allocation.
2543 			 */
2544 #ifdef	RSS
2545 			CPU_SETOF(cpu_id, &cpu_mask);
2546 			taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2547 			    &cpu_mask,
2548 			    "%s que (bucket %d)",
2549 			    device_get_nameunit(adapter->dev),
2550 			    cpu_id);
2551 #else
2552 			taskqueue_start_threads(&que->tq, 1, PI_NET,
2553 			    "%s que (qid %d)",
2554 			    device_get_nameunit(adapter->dev),
2555 			    cpu_id);
2556 #endif
2557 		} else {
2558 			taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2559 			    device_get_nameunit(adapter->dev));
2560 		}
2561 
2562 		/* Finally update the last bound CPU id */
2563 		if (adapter->num_queues > 1)
2564 			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2565 	}
2566 
2567 	/* And Link */
2568 	rid = vector + 1;
2569 	adapter->res = bus_alloc_resource_any(dev,
2570 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2571 	if (adapter->res == NULL) {
2572 		device_printf(dev,
2573 		    "Unable to allocate bus resource: "
2574 		    "MSIX Link Interrupt\n");
2575 		return (ENXIO);
2576 	}
2577 	if ((error = bus_setup_intr(dev, adapter->res,
2578 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2579 	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2580 		device_printf(dev, "Failed to register Link handler");
2581 		return (error);
2582 	}
2583 #if __FreeBSD_version >= 800504
2584 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2585 #endif
2586 	adapter->linkvec = vector;
2587 
2588 	return (0);
2589 }
2590 
2591 
2592 static void
igb_configure_queues(struct adapter * adapter)2593 igb_configure_queues(struct adapter *adapter)
2594 {
2595 	struct	e1000_hw	*hw = &adapter->hw;
2596 	struct	igb_queue	*que;
2597 	u32			tmp, ivar = 0, newitr = 0;
2598 
2599 	/* First turn on RSS capability */
2600 	if (adapter->hw.mac.type != e1000_82575)
2601 		E1000_WRITE_REG(hw, E1000_GPIE,
2602 		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2603 		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2604 
2605 	/* Turn on MSIX */
2606 	switch (adapter->hw.mac.type) {
2607 	case e1000_82580:
2608 	case e1000_i350:
2609 	case e1000_i354:
2610 	case e1000_i210:
2611 	case e1000_i211:
2612 	case e1000_vfadapt:
2613 	case e1000_vfadapt_i350:
2614 		/* RX entries */
2615 		for (int i = 0; i < adapter->num_queues; i++) {
2616 			u32 index = i >> 1;
2617 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2618 			que = &adapter->queues[i];
2619 			if (i & 1) {
2620 				ivar &= 0xFF00FFFF;
2621 				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2622 			} else {
2623 				ivar &= 0xFFFFFF00;
2624 				ivar |= que->msix | E1000_IVAR_VALID;
2625 			}
2626 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2627 		}
2628 		/* TX entries */
2629 		for (int i = 0; i < adapter->num_queues; i++) {
2630 			u32 index = i >> 1;
2631 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2632 			que = &adapter->queues[i];
2633 			if (i & 1) {
2634 				ivar &= 0x00FFFFFF;
2635 				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2636 			} else {
2637 				ivar &= 0xFFFF00FF;
2638 				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2639 			}
2640 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2641 			adapter->que_mask |= que->eims;
2642 		}
2643 
2644 		/* And for the link interrupt */
2645 		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2646 		adapter->link_mask = 1 << adapter->linkvec;
2647 		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2648 		break;
2649 	case e1000_82576:
2650 		/* RX entries */
2651 		for (int i = 0; i < adapter->num_queues; i++) {
2652 			u32 index = i & 0x7; /* Each IVAR has two entries */
2653 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2654 			que = &adapter->queues[i];
2655 			if (i < 8) {
2656 				ivar &= 0xFFFFFF00;
2657 				ivar |= que->msix | E1000_IVAR_VALID;
2658 			} else {
2659 				ivar &= 0xFF00FFFF;
2660 				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2661 			}
2662 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2663 			adapter->que_mask |= que->eims;
2664 		}
2665 		/* TX entries */
2666 		for (int i = 0; i < adapter->num_queues; i++) {
2667 			u32 index = i & 0x7; /* Each IVAR has two entries */
2668 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2669 			que = &adapter->queues[i];
2670 			if (i < 8) {
2671 				ivar &= 0xFFFF00FF;
2672 				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2673 			} else {
2674 				ivar &= 0x00FFFFFF;
2675 				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2676 			}
2677 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2678 			adapter->que_mask |= que->eims;
2679 		}
2680 
2681 		/* And for the link interrupt */
2682 		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2683 		adapter->link_mask = 1 << adapter->linkvec;
2684 		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2685 		break;
2686 
2687 	case e1000_82575:
2688                 /* enable MSI-X support*/
2689 		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2690                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2691                 /* Auto-Mask interrupts upon ICR read. */
2692                 tmp |= E1000_CTRL_EXT_EIAME;
2693                 tmp |= E1000_CTRL_EXT_IRCA;
2694                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2695 
2696 		/* Queues */
2697 		for (int i = 0; i < adapter->num_queues; i++) {
2698 			que = &adapter->queues[i];
2699 			tmp = E1000_EICR_RX_QUEUE0 << i;
2700 			tmp |= E1000_EICR_TX_QUEUE0 << i;
2701 			que->eims = tmp;
2702 			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2703 			    i, que->eims);
2704 			adapter->que_mask |= que->eims;
2705 		}
2706 
2707 		/* Link */
2708 		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2709 		    E1000_EIMS_OTHER);
2710 		adapter->link_mask |= E1000_EIMS_OTHER;
2711 	default:
2712 		break;
2713 	}
2714 
2715 	/* Set the starting interrupt rate */
2716 	if (igb_max_interrupt_rate > 0)
2717 		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2718 
2719         if (hw->mac.type == e1000_82575)
2720                 newitr |= newitr << 16;
2721         else
2722                 newitr |= E1000_EITR_CNT_IGNR;
2723 
2724 	for (int i = 0; i < adapter->num_queues; i++) {
2725 		que = &adapter->queues[i];
2726 		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2727 	}
2728 
2729 	return;
2730 }
2731 
2732 
2733 static void
igb_free_pci_resources(struct adapter * adapter)2734 igb_free_pci_resources(struct adapter *adapter)
2735 {
2736 	struct		igb_queue *que = adapter->queues;
2737 	device_t	dev = adapter->dev;
2738 	int		rid;
2739 
2740 	/*
2741 	** There is a slight possibility of a failure mode
2742 	** in attach that will result in entering this function
2743 	** before interrupt resources have been initialized, and
2744 	** in that case we do not want to execute the loops below
2745 	** We can detect this reliably by the state of the adapter
2746 	** res pointer.
2747 	*/
2748 	if (adapter->res == NULL)
2749 		goto mem;
2750 
2751 	/*
2752 	 * First release all the interrupt resources:
2753 	 */
2754 	for (int i = 0; i < adapter->num_queues; i++, que++) {
2755 		rid = que->msix + 1;
2756 		if (que->tag != NULL) {
2757 			bus_teardown_intr(dev, que->res, que->tag);
2758 			que->tag = NULL;
2759 		}
2760 		if (que->res != NULL)
2761 			bus_release_resource(dev,
2762 			    SYS_RES_IRQ, rid, que->res);
2763 	}
2764 
2765 	/* Clean the Legacy or Link interrupt last */
2766 	if (adapter->linkvec) /* we are doing MSIX */
2767 		rid = adapter->linkvec + 1;
2768 	else
2769 		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2770 
2771 	que = adapter->queues;
2772 	if (adapter->tag != NULL) {
2773 		taskqueue_drain(que->tq, &adapter->link_task);
2774 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2775 		adapter->tag = NULL;
2776 	}
2777 	if (adapter->res != NULL)
2778 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2779 
2780 	for (int i = 0; i < adapter->num_queues; i++, que++) {
2781 		if (que->tq != NULL) {
2782 #ifndef IGB_LEGACY_TX
2783 			taskqueue_drain(que->tq, &que->txr->txq_task);
2784 #endif
2785 			taskqueue_drain(que->tq, &que->que_task);
2786 			taskqueue_free(que->tq);
2787 		}
2788 	}
2789 mem:
2790 	if (adapter->msix)
2791 		pci_release_msi(dev);
2792 
2793 	if (adapter->msix_mem != NULL)
2794 		bus_release_resource(dev, SYS_RES_MEMORY,
2795 		    adapter->memrid, adapter->msix_mem);
2796 
2797 	if (adapter->pci_mem != NULL)
2798 		bus_release_resource(dev, SYS_RES_MEMORY,
2799 		    PCIR_BAR(0), adapter->pci_mem);
2800 
2801 }
2802 
2803 /*
2804  * Setup Either MSI/X or MSI
2805  */
2806 static int
igb_setup_msix(struct adapter * adapter)2807 igb_setup_msix(struct adapter *adapter)
2808 {
2809 	device_t	dev = adapter->dev;
2810 	int		bar, want, queues, msgs, maxqueues;
2811 
2812 	/* tuneable override */
2813 	if (igb_enable_msix == 0)
2814 		goto msi;
2815 
2816 	/* First try MSI/X */
2817 	msgs = pci_msix_count(dev);
2818 	if (msgs == 0)
2819 		goto msi;
2820 	/*
2821 	** Some new devices, as with ixgbe, now may
2822 	** use a different BAR, so we need to keep
2823 	** track of which is used.
2824 	*/
2825 	adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2826 	bar = pci_read_config(dev, adapter->memrid, 4);
2827 	if (bar == 0) /* use next bar */
2828 		adapter->memrid += 4;
2829 	adapter->msix_mem = bus_alloc_resource_any(dev,
2830 	    SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2831        	if (adapter->msix_mem == NULL) {
2832 		/* May not be enabled */
2833 		device_printf(adapter->dev,
2834 		    "Unable to map MSIX table \n");
2835 		goto msi;
2836 	}
2837 
2838 	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2839 
2840 	/* Override via tuneable */
2841 	if (igb_num_queues != 0)
2842 		queues = igb_num_queues;
2843 
2844 #ifdef	RSS
2845 	/* If we're doing RSS, clamp at the number of RSS buckets */
2846 	if (queues > rss_getnumbuckets())
2847 		queues = rss_getnumbuckets();
2848 #endif
2849 
2850 
2851 	/* Sanity check based on HW */
2852 	switch (adapter->hw.mac.type) {
2853 		case e1000_82575:
2854 			maxqueues = 4;
2855 			break;
2856 		case e1000_82576:
2857 		case e1000_82580:
2858 		case e1000_i350:
2859 		case e1000_i354:
2860 			maxqueues = 8;
2861 			break;
2862 		case e1000_i210:
2863 			maxqueues = 4;
2864 			break;
2865 		case e1000_i211:
2866 			maxqueues = 2;
2867 			break;
2868 		default:  /* VF interfaces */
2869 			maxqueues = 1;
2870 			break;
2871 	}
2872 
2873 	/* Final clamp on the actual hardware capability */
2874 	if (queues > maxqueues)
2875 		queues = maxqueues;
2876 
2877 	/*
2878 	** One vector (RX/TX pair) per queue
2879 	** plus an additional for Link interrupt
2880 	*/
2881 	want = queues + 1;
2882 	if (msgs >= want)
2883 		msgs = want;
2884 	else {
2885                	device_printf(adapter->dev,
2886 		    "MSIX Configuration Problem, "
2887 		    "%d vectors configured, but %d queues wanted!\n",
2888 		    msgs, want);
2889 		goto msi;
2890 	}
2891 	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2892                	device_printf(adapter->dev,
2893 		    "Using MSIX interrupts with %d vectors\n", msgs);
2894 		adapter->num_queues = queues;
2895 		return (msgs);
2896 	}
2897 	/*
2898 	** If MSIX alloc failed or provided us with
2899 	** less than needed, free and fall through to MSI
2900 	*/
2901 	pci_release_msi(dev);
2902 
2903 msi:
2904        	if (adapter->msix_mem != NULL) {
2905 		bus_release_resource(dev, SYS_RES_MEMORY,
2906 		    adapter->memrid, adapter->msix_mem);
2907 		adapter->msix_mem = NULL;
2908 	}
2909        	msgs = 1;
2910 	if (pci_alloc_msi(dev, &msgs) == 0) {
2911 		device_printf(adapter->dev," Using an MSI interrupt\n");
2912 		return (msgs);
2913 	}
2914 	device_printf(adapter->dev," Using a Legacy interrupt\n");
2915 	return (0);
2916 }
2917 
2918 /*********************************************************************
2919  *
2920  *  Initialize the DMA Coalescing feature
2921  *
2922  **********************************************************************/
2923 static void
igb_init_dmac(struct adapter * adapter,u32 pba)2924 igb_init_dmac(struct adapter *adapter, u32 pba)
2925 {
2926 	device_t	dev = adapter->dev;
2927 	struct e1000_hw *hw = &adapter->hw;
2928 	u32 		dmac, reg = ~E1000_DMACR_DMAC_EN;
2929 	u16		hwm;
2930 
2931 	if (hw->mac.type == e1000_i211)
2932 		return;
2933 
2934 	if (hw->mac.type > e1000_82580) {
2935 
2936 		if (adapter->dmac == 0) { /* Disabling it */
2937 			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2938 			return;
2939 		} else
2940 			device_printf(dev, "DMA Coalescing enabled\n");
2941 
2942 		/* Set starting threshold */
2943 		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2944 
2945 		hwm = 64 * pba - adapter->max_frame_size / 16;
2946 		if (hwm < 64 * (pba - 6))
2947 			hwm = 64 * (pba - 6);
2948 		reg = E1000_READ_REG(hw, E1000_FCRTC);
2949 		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2950 		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2951 		    & E1000_FCRTC_RTH_COAL_MASK);
2952 		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2953 
2954 
2955 		dmac = pba - adapter->max_frame_size / 512;
2956 		if (dmac < pba - 10)
2957 			dmac = pba - 10;
2958 		reg = E1000_READ_REG(hw, E1000_DMACR);
2959 		reg &= ~E1000_DMACR_DMACTHR_MASK;
2960 		reg |= ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2961 		    & E1000_DMACR_DMACTHR_MASK);
2962 
2963 		/* transition to L0x or L1 if available..*/
2964 		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2965 
2966 		/* Check if status is 2.5Gb backplane connection
2967 		* before configuration of watchdog timer, which is
2968 		* in msec values in 12.8usec intervals
2969 		* watchdog timer= msec values in 32usec intervals
2970 		* for non 2.5Gb connection
2971 		*/
2972 		if (hw->mac.type == e1000_i354) {
2973 			int status = E1000_READ_REG(hw, E1000_STATUS);
2974 			if ((status & E1000_STATUS_2P5_SKU) &&
2975 			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2976 				reg |= ((adapter->dmac * 5) >> 6);
2977 			else
2978 				reg |= (adapter->dmac >> 5);
2979 		} else {
2980 			reg |= (adapter->dmac >> 5);
2981 		}
2982 
2983 		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2984 
2985 		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2986 
2987 		/* Set the interval before transition */
2988 		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2989 		if (hw->mac.type == e1000_i350)
2990 			reg |= IGB_DMCTLX_DCFLUSH_DIS;
2991 		/*
2992 		** in 2.5Gb connection, TTLX unit is 0.4 usec
2993 		** which is 0x4*2 = 0xA. But delay is still 4 usec
2994 		*/
2995 		if (hw->mac.type == e1000_i354) {
2996 			int status = E1000_READ_REG(hw, E1000_STATUS);
2997 			if ((status & E1000_STATUS_2P5_SKU) &&
2998 			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2999 				reg |= 0xA;
3000 			else
3001 				reg |= 0x4;
3002 		} else {
3003 			reg |= 0x4;
3004 		}
3005 
3006 		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3007 
3008 		/* free space in tx packet buffer to wake from DMA coal */
3009 		E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
3010 		    (2 * adapter->max_frame_size)) >> 6);
3011 
3012 		/* make low power state decision controlled by DMA coal */
3013 		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3014 		reg &= ~E1000_PCIEMISC_LX_DECISION;
3015 		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3016 
3017 	} else if (hw->mac.type == e1000_82580) {
3018 		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3019 		E1000_WRITE_REG(hw, E1000_PCIEMISC,
3020 		    reg & ~E1000_PCIEMISC_LX_DECISION);
3021 		E1000_WRITE_REG(hw, E1000_DMACR, 0);
3022 	}
3023 }
3024 
3025 
3026 /*********************************************************************
3027  *
3028  *  Set up an fresh starting state
3029  *
3030  **********************************************************************/
3031 static void
igb_reset(struct adapter * adapter)3032 igb_reset(struct adapter *adapter)
3033 {
3034 	device_t	dev = adapter->dev;
3035 	struct e1000_hw *hw = &adapter->hw;
3036 	struct e1000_fc_info *fc = &hw->fc;
3037 	struct ifnet	*ifp = adapter->ifp;
3038 	u32		pba = 0;
3039 	u16		hwm;
3040 
3041 	INIT_DEBUGOUT("igb_reset: begin");
3042 
3043 	/* Let the firmware know the OS is in control */
3044 	igb_get_hw_control(adapter);
3045 
3046 	/*
3047 	 * Packet Buffer Allocation (PBA)
3048 	 * Writing PBA sets the receive portion of the buffer
3049 	 * the remainder is used for the transmit buffer.
3050 	 */
3051 	switch (hw->mac.type) {
3052 	case e1000_82575:
3053 		pba = E1000_PBA_32K;
3054 		break;
3055 	case e1000_82576:
3056 	case e1000_vfadapt:
3057 		pba = E1000_READ_REG(hw, E1000_RXPBS);
3058 		pba &= E1000_RXPBS_SIZE_MASK_82576;
3059 		break;
3060 	case e1000_82580:
3061 	case e1000_i350:
3062 	case e1000_i354:
3063 	case e1000_vfadapt_i350:
3064 		pba = E1000_READ_REG(hw, E1000_RXPBS);
3065 		pba = e1000_rxpbs_adjust_82580(pba);
3066 		break;
3067 	case e1000_i210:
3068 	case e1000_i211:
3069 		pba = E1000_PBA_34K;
3070 	default:
3071 		break;
3072 	}
3073 
3074 	/* Special needs in case of Jumbo frames */
3075 	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3076 		u32 tx_space, min_tx, min_rx;
3077 		pba = E1000_READ_REG(hw, E1000_PBA);
3078 		tx_space = pba >> 16;
3079 		pba &= 0xffff;
3080 		min_tx = (adapter->max_frame_size +
3081 		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3082 		min_tx = roundup2(min_tx, 1024);
3083 		min_tx >>= 10;
3084                 min_rx = adapter->max_frame_size;
3085                 min_rx = roundup2(min_rx, 1024);
3086                 min_rx >>= 10;
3087 		if (tx_space < min_tx &&
3088 		    ((min_tx - tx_space) < pba)) {
3089 			pba = pba - (min_tx - tx_space);
3090 			/*
3091                          * if short on rx space, rx wins
3092                          * and must trump tx adjustment
3093 			 */
3094                         if (pba < min_rx)
3095                                 pba = min_rx;
3096 		}
3097 		E1000_WRITE_REG(hw, E1000_PBA, pba);
3098 	}
3099 
3100 	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3101 
3102 	/*
3103 	 * These parameters control the automatic generation (Tx) and
3104 	 * response (Rx) to Ethernet PAUSE frames.
3105 	 * - High water mark should allow for at least two frames to be
3106 	 *   received after sending an XOFF.
3107 	 * - Low water mark works best when it is very near the high water mark.
3108 	 *   This allows the receiver to restart by sending XON when it has
3109 	 *   drained a bit.
3110 	 */
3111 	hwm = min(((pba << 10) * 9 / 10),
3112 	    ((pba << 10) - 2 * adapter->max_frame_size));
3113 
3114 	if (hw->mac.type < e1000_82576) {
3115 		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3116 		fc->low_water = fc->high_water - 8;
3117 	} else {
3118 		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3119 		fc->low_water = fc->high_water - 16;
3120 	}
3121 
3122 	fc->pause_time = IGB_FC_PAUSE_TIME;
3123 	fc->send_xon = TRUE;
3124 	if (adapter->fc)
3125 		fc->requested_mode = adapter->fc;
3126 	else
3127 		fc->requested_mode = e1000_fc_default;
3128 
3129 	/* Issue a global reset */
3130 	e1000_reset_hw(hw);
3131 	E1000_WRITE_REG(hw, E1000_WUFC, 0);
3132 
3133 	/* Reset for AutoMediaDetect */
3134 	if (adapter->flags & IGB_MEDIA_RESET) {
3135 		e1000_setup_init_funcs(hw, TRUE);
3136 		e1000_get_bus_info(hw);
3137 		adapter->flags &= ~IGB_MEDIA_RESET;
3138 	}
3139 
3140 	if (e1000_init_hw(hw) < 0)
3141 		device_printf(dev, "Hardware Initialization Failed\n");
3142 
3143 	/* Setup DMA Coalescing */
3144 	igb_init_dmac(adapter, pba);
3145 
3146 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3147 	e1000_get_phy_info(hw);
3148 	e1000_check_for_link(hw);
3149 	return;
3150 }
3151 
3152 /*********************************************************************
3153  *
3154  *  Setup networking device structure and register an interface.
3155  *
3156  **********************************************************************/
3157 static int
igb_setup_interface(device_t dev,struct adapter * adapter)3158 igb_setup_interface(device_t dev, struct adapter *adapter)
3159 {
3160 	struct ifnet   *ifp;
3161 
3162 	INIT_DEBUGOUT("igb_setup_interface: begin");
3163 
3164 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3165 	if (ifp == NULL) {
3166 		device_printf(dev, "can not allocate ifnet structure\n");
3167 		return (-1);
3168 	}
3169 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3170 	ifp->if_init =  igb_init;
3171 	ifp->if_softc = adapter;
3172 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3173 	ifp->if_ioctl = igb_ioctl;
3174 	ifp->if_get_counter = igb_get_counter;
3175 
3176 	/* TSO parameters */
3177 	ifp->if_hw_tsomax = IP_MAXPACKET;
3178 	ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER;
3179 	ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE;
3180 
3181 #ifndef IGB_LEGACY_TX
3182 	ifp->if_transmit = igb_mq_start;
3183 	ifp->if_qflush = igb_qflush;
3184 #else
3185 	ifp->if_start = igb_start;
3186 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3187 	ifp->if_snd.ifq_drv_maxlen = 0;
3188 	IFQ_SET_READY(&ifp->if_snd);
3189 #endif
3190 
3191 	ether_ifattach(ifp, adapter->hw.mac.addr);
3192 
3193 	ifp->if_capabilities = ifp->if_capenable = 0;
3194 
3195 	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3196 #if __FreeBSD_version >= 1000000
3197 	ifp->if_capabilities |= IFCAP_HWCSUM_IPV6;
3198 #endif
3199 	ifp->if_capabilities |= IFCAP_TSO;
3200 	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3201 	ifp->if_capenable = ifp->if_capabilities;
3202 
3203 	/* Don't enable LRO by default */
3204 	ifp->if_capabilities |= IFCAP_LRO;
3205 
3206 #ifdef DEVICE_POLLING
3207 	ifp->if_capabilities |= IFCAP_POLLING;
3208 #endif
3209 
3210 	/*
3211 	 * Tell the upper layer(s) we
3212 	 * support full VLAN capability.
3213 	 */
3214 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3215 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3216 			     |  IFCAP_VLAN_HWTSO
3217 			     |  IFCAP_VLAN_MTU;
3218 	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3219 			  |  IFCAP_VLAN_HWTSO
3220 			  |  IFCAP_VLAN_MTU;
3221 
3222 	/*
3223 	 * Enable only WOL MAGIC by default if WOL is enabled in EEPROM.
3224 	 */
3225 	ifp->if_capabilities |= IFCAP_WOL;
3226 	if (adapter->wol)
3227 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3228 
3229 	/*
3230 	** Don't turn this on by default, if vlans are
3231 	** created on another pseudo device (eg. lagg)
3232 	** then vlan events are not passed thru, breaking
3233 	** operation, but with HW FILTER off it works. If
3234 	** using vlans directly on the igb driver you can
3235 	** enable this and get full hardware tag filtering.
3236 	*/
3237 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3238 
3239 	/*
3240 	 * Specify the media types supported by this adapter and register
3241 	 * callbacks to update media and link information
3242 	 */
3243 	ifmedia_init(&adapter->media, IFM_IMASK,
3244 	    igb_media_change, igb_media_status);
3245 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3246 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3247 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3248 			    0, NULL);
3249 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3250 	} else {
3251 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3252 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3253 			    0, NULL);
3254 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3255 			    0, NULL);
3256 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3257 			    0, NULL);
3258 		if (adapter->hw.phy.type != e1000_phy_ife) {
3259 			ifmedia_add(&adapter->media,
3260 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3261 			ifmedia_add(&adapter->media,
3262 				IFM_ETHER | IFM_1000_T, 0, NULL);
3263 		}
3264 	}
3265 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3266 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3267 	return (0);
3268 }
3269 
3270 
3271 /*
3272  * Manage DMA'able memory.
3273  */
3274 static void
igb_dmamap_cb(void * arg,bus_dma_segment_t * segs,int nseg,int error)3275 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3276 {
3277 	if (error)
3278 		return;
3279 	*(bus_addr_t *) arg = segs[0].ds_addr;
3280 }
3281 
3282 static int
igb_dma_malloc(struct adapter * adapter,bus_size_t size,struct igb_dma_alloc * dma,int mapflags)3283 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3284         struct igb_dma_alloc *dma, int mapflags)
3285 {
3286 	int error;
3287 
3288 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3289 				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3290 				BUS_SPACE_MAXADDR,	/* lowaddr */
3291 				BUS_SPACE_MAXADDR,	/* highaddr */
3292 				NULL, NULL,		/* filter, filterarg */
3293 				size,			/* maxsize */
3294 				1,			/* nsegments */
3295 				size,			/* maxsegsize */
3296 				0,			/* flags */
3297 				NULL,			/* lockfunc */
3298 				NULL,			/* lockarg */
3299 				&dma->dma_tag);
3300 	if (error) {
3301 		device_printf(adapter->dev,
3302 		    "%s: bus_dma_tag_create failed: %d\n",
3303 		    __func__, error);
3304 		goto fail_0;
3305 	}
3306 
3307 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3308 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3309 	if (error) {
3310 		device_printf(adapter->dev,
3311 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3312 		    __func__, (uintmax_t)size, error);
3313 		goto fail_2;
3314 	}
3315 
3316 	dma->dma_paddr = 0;
3317 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3318 	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3319 	if (error || dma->dma_paddr == 0) {
3320 		device_printf(adapter->dev,
3321 		    "%s: bus_dmamap_load failed: %d\n",
3322 		    __func__, error);
3323 		goto fail_3;
3324 	}
3325 
3326 	return (0);
3327 
3328 fail_3:
3329 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3330 fail_2:
3331 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3332 	bus_dma_tag_destroy(dma->dma_tag);
3333 fail_0:
3334 	dma->dma_tag = NULL;
3335 
3336 	return (error);
3337 }
3338 
3339 static void
igb_dma_free(struct adapter * adapter,struct igb_dma_alloc * dma)3340 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3341 {
3342 	if (dma->dma_tag == NULL)
3343 		return;
3344 	if (dma->dma_paddr != 0) {
3345 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3346 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3347 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3348 		dma->dma_paddr = 0;
3349 	}
3350 	if (dma->dma_vaddr != NULL) {
3351 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3352 		dma->dma_vaddr = NULL;
3353 	}
3354 	bus_dma_tag_destroy(dma->dma_tag);
3355 	dma->dma_tag = NULL;
3356 }
3357 
3358 
3359 /*********************************************************************
3360  *
3361  *  Allocate memory for the transmit and receive rings, and then
3362  *  the descriptors associated with each, called only once at attach.
3363  *
3364  **********************************************************************/
3365 static int
igb_allocate_queues(struct adapter * adapter)3366 igb_allocate_queues(struct adapter *adapter)
3367 {
3368 	device_t dev = adapter->dev;
3369 	struct igb_queue	*que = NULL;
3370 	struct tx_ring		*txr = NULL;
3371 	struct rx_ring		*rxr = NULL;
3372 	int rsize, tsize, error = E1000_SUCCESS;
3373 	int txconf = 0, rxconf = 0;
3374 
3375 	/* First allocate the top level queue structs */
3376 	if (!(adapter->queues =
3377 	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3378 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3379 		device_printf(dev, "Unable to allocate queue memory\n");
3380 		error = ENOMEM;
3381 		goto fail;
3382 	}
3383 
3384 	/* Next allocate the TX ring struct memory */
3385 	if (!(adapter->tx_rings =
3386 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3387 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3388 		device_printf(dev, "Unable to allocate TX ring memory\n");
3389 		error = ENOMEM;
3390 		goto tx_fail;
3391 	}
3392 
3393 	/* Now allocate the RX */
3394 	if (!(adapter->rx_rings =
3395 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3396 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3397 		device_printf(dev, "Unable to allocate RX ring memory\n");
3398 		error = ENOMEM;
3399 		goto rx_fail;
3400 	}
3401 
3402 	tsize = roundup2(adapter->num_tx_desc *
3403 	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3404 	/*
3405 	 * Now set up the TX queues, txconf is needed to handle the
3406 	 * possibility that things fail midcourse and we need to
3407 	 * undo memory gracefully
3408 	 */
3409 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3410 		/* Set up some basics */
3411 		txr = &adapter->tx_rings[i];
3412 		txr->adapter = adapter;
3413 		txr->me = i;
3414 		txr->num_desc = adapter->num_tx_desc;
3415 
3416 		/* Initialize the TX lock */
3417 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3418 		    device_get_nameunit(dev), txr->me);
3419 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3420 
3421 		if (igb_dma_malloc(adapter, tsize,
3422 			&txr->txdma, BUS_DMA_NOWAIT)) {
3423 			device_printf(dev,
3424 			    "Unable to allocate TX Descriptor memory\n");
3425 			error = ENOMEM;
3426 			goto err_tx_desc;
3427 		}
3428 		txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3429 		bzero((void *)txr->tx_base, tsize);
3430 
3431         	/* Now allocate transmit buffers for the ring */
3432         	if (igb_allocate_transmit_buffers(txr)) {
3433 			device_printf(dev,
3434 			    "Critical Failure setting up transmit buffers\n");
3435 			error = ENOMEM;
3436 			goto err_tx_desc;
3437         	}
3438 #ifndef IGB_LEGACY_TX
3439 		/* Allocate a buf ring */
3440 		txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3441 		    M_WAITOK, &txr->tx_mtx);
3442 #endif
3443 	}
3444 
3445 	/*
3446 	 * Next the RX queues...
3447 	 */
3448 	rsize = roundup2(adapter->num_rx_desc *
3449 	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3450 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3451 		rxr = &adapter->rx_rings[i];
3452 		rxr->adapter = adapter;
3453 		rxr->me = i;
3454 
3455 		/* Initialize the RX lock */
3456 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3457 		    device_get_nameunit(dev), txr->me);
3458 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3459 
3460 		if (igb_dma_malloc(adapter, rsize,
3461 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3462 			device_printf(dev,
3463 			    "Unable to allocate RxDescriptor memory\n");
3464 			error = ENOMEM;
3465 			goto err_rx_desc;
3466 		}
3467 		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3468 		bzero((void *)rxr->rx_base, rsize);
3469 
3470         	/* Allocate receive buffers for the ring*/
3471 		if (igb_allocate_receive_buffers(rxr)) {
3472 			device_printf(dev,
3473 			    "Critical Failure setting up receive buffers\n");
3474 			error = ENOMEM;
3475 			goto err_rx_desc;
3476 		}
3477 	}
3478 
3479 	/*
3480 	** Finally set up the queue holding structs
3481 	*/
3482 	for (int i = 0; i < adapter->num_queues; i++) {
3483 		que = &adapter->queues[i];
3484 		que->adapter = adapter;
3485 		que->txr = &adapter->tx_rings[i];
3486 		que->rxr = &adapter->rx_rings[i];
3487 	}
3488 
3489 	return (0);
3490 
3491 err_rx_desc:
3492 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3493 		igb_dma_free(adapter, &rxr->rxdma);
3494 err_tx_desc:
3495 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3496 		igb_dma_free(adapter, &txr->txdma);
3497 	free(adapter->rx_rings, M_DEVBUF);
3498 rx_fail:
3499 #ifndef IGB_LEGACY_TX
3500 	buf_ring_free(txr->br, M_DEVBUF);
3501 #endif
3502 	free(adapter->tx_rings, M_DEVBUF);
3503 tx_fail:
3504 	free(adapter->queues, M_DEVBUF);
3505 fail:
3506 	return (error);
3507 }
3508 
3509 /*********************************************************************
3510  *
3511  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3512  *  the information needed to transmit a packet on the wire. This is
3513  *  called only once at attach, setup is done every reset.
3514  *
3515  **********************************************************************/
3516 static int
igb_allocate_transmit_buffers(struct tx_ring * txr)3517 igb_allocate_transmit_buffers(struct tx_ring *txr)
3518 {
3519 	struct adapter *adapter = txr->adapter;
3520 	device_t dev = adapter->dev;
3521 	struct igb_tx_buf *txbuf;
3522 	int error, i;
3523 
3524 	/*
3525 	 * Setup DMA descriptor areas.
3526 	 */
3527 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3528 			       1, 0,			/* alignment, bounds */
3529 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3530 			       BUS_SPACE_MAXADDR,	/* highaddr */
3531 			       NULL, NULL,		/* filter, filterarg */
3532 			       IGB_TSO_SIZE,		/* maxsize */
3533 			       IGB_MAX_SCATTER,		/* nsegments */
3534 			       PAGE_SIZE,		/* maxsegsize */
3535 			       0,			/* flags */
3536 			       NULL,			/* lockfunc */
3537 			       NULL,			/* lockfuncarg */
3538 			       &txr->txtag))) {
3539 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3540 		goto fail;
3541 	}
3542 
3543 	if (!(txr->tx_buffers =
3544 	    (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3545 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3546 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3547 		error = ENOMEM;
3548 		goto fail;
3549 	}
3550 
3551         /* Create the descriptor buffer dma maps */
3552 	txbuf = txr->tx_buffers;
3553 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3554 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3555 		if (error != 0) {
3556 			device_printf(dev, "Unable to create TX DMA map\n");
3557 			goto fail;
3558 		}
3559 	}
3560 
3561 	return 0;
3562 fail:
3563 	/* We free all, it handles case where we are in the middle */
3564 	igb_free_transmit_structures(adapter);
3565 	return (error);
3566 }
3567 
3568 /*********************************************************************
3569  *
3570  *  Initialize a transmit ring.
3571  *
3572  **********************************************************************/
3573 static void
igb_setup_transmit_ring(struct tx_ring * txr)3574 igb_setup_transmit_ring(struct tx_ring *txr)
3575 {
3576 	struct adapter *adapter = txr->adapter;
3577 	struct igb_tx_buf *txbuf;
3578 	int i;
3579 #ifdef DEV_NETMAP
3580 	struct netmap_adapter *na = NA(adapter->ifp);
3581 	struct netmap_slot *slot;
3582 #endif /* DEV_NETMAP */
3583 
3584 	/* Clear the old descriptor contents */
3585 	IGB_TX_LOCK(txr);
3586 #ifdef DEV_NETMAP
3587 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3588 #endif /* DEV_NETMAP */
3589 	bzero((void *)txr->tx_base,
3590 	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3591 	/* Reset indices */
3592 	txr->next_avail_desc = 0;
3593 	txr->next_to_clean = 0;
3594 
3595 	/* Free any existing tx buffers. */
3596         txbuf = txr->tx_buffers;
3597 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3598 		if (txbuf->m_head != NULL) {
3599 			bus_dmamap_sync(txr->txtag, txbuf->map,
3600 			    BUS_DMASYNC_POSTWRITE);
3601 			bus_dmamap_unload(txr->txtag, txbuf->map);
3602 			m_freem(txbuf->m_head);
3603 			txbuf->m_head = NULL;
3604 		}
3605 #ifdef DEV_NETMAP
3606 		if (slot) {
3607 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
3608 			/* no need to set the address */
3609 			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3610 		}
3611 #endif /* DEV_NETMAP */
3612 		/* clear the watch index */
3613 		txbuf->eop = NULL;
3614         }
3615 
3616 	/* Set number of descriptors available */
3617 	txr->tx_avail = adapter->num_tx_desc;
3618 
3619 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3620 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3621 	IGB_TX_UNLOCK(txr);
3622 }
3623 
3624 /*********************************************************************
3625  *
3626  *  Initialize all transmit rings.
3627  *
3628  **********************************************************************/
3629 static void
igb_setup_transmit_structures(struct adapter * adapter)3630 igb_setup_transmit_structures(struct adapter *adapter)
3631 {
3632 	struct tx_ring *txr = adapter->tx_rings;
3633 
3634 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3635 		igb_setup_transmit_ring(txr);
3636 
3637 	return;
3638 }
3639 
3640 /*********************************************************************
3641  *
3642  *  Enable transmit unit.
3643  *
3644  **********************************************************************/
3645 static void
igb_initialize_transmit_units(struct adapter * adapter)3646 igb_initialize_transmit_units(struct adapter *adapter)
3647 {
3648 	struct tx_ring	*txr = adapter->tx_rings;
3649 	struct e1000_hw *hw = &adapter->hw;
3650 	u32		tctl, txdctl;
3651 
3652 	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3653 	tctl = txdctl = 0;
3654 
3655 	/* Setup the Tx Descriptor Rings */
3656 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3657 		u64 bus_addr = txr->txdma.dma_paddr;
3658 
3659 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3660 		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3661 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3662 		    (uint32_t)(bus_addr >> 32));
3663 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3664 		    (uint32_t)bus_addr);
3665 
3666 		/* Setup the HW Tx Head and Tail descriptor pointers */
3667 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3668 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3669 
3670 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3671 		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3672 		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3673 
3674 		txr->queue_status = IGB_QUEUE_IDLE;
3675 
3676 		txdctl |= IGB_TX_PTHRESH;
3677 		txdctl |= IGB_TX_HTHRESH << 8;
3678 		txdctl |= IGB_TX_WTHRESH << 16;
3679 		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3680 		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3681 	}
3682 
3683 	if (adapter->vf_ifp)
3684 		return;
3685 
3686 	e1000_config_collision_dist(hw);
3687 
3688 	/* Program the Transmit Control Register */
3689 	tctl = E1000_READ_REG(hw, E1000_TCTL);
3690 	tctl &= ~E1000_TCTL_CT;
3691 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3692 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3693 
3694 	/* This write will effectively turn on the transmit unit. */
3695 	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3696 }
3697 
3698 /*********************************************************************
3699  *
3700  *  Free all transmit rings.
3701  *
3702  **********************************************************************/
3703 static void
igb_free_transmit_structures(struct adapter * adapter)3704 igb_free_transmit_structures(struct adapter *adapter)
3705 {
3706 	struct tx_ring *txr = adapter->tx_rings;
3707 
3708 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3709 		IGB_TX_LOCK(txr);
3710 		igb_free_transmit_buffers(txr);
3711 		igb_dma_free(adapter, &txr->txdma);
3712 		IGB_TX_UNLOCK(txr);
3713 		IGB_TX_LOCK_DESTROY(txr);
3714 	}
3715 	free(adapter->tx_rings, M_DEVBUF);
3716 }
3717 
3718 /*********************************************************************
3719  *
3720  *  Free transmit ring related data structures.
3721  *
3722  **********************************************************************/
3723 static void
igb_free_transmit_buffers(struct tx_ring * txr)3724 igb_free_transmit_buffers(struct tx_ring *txr)
3725 {
3726 	struct adapter *adapter = txr->adapter;
3727 	struct igb_tx_buf *tx_buffer;
3728 	int             i;
3729 
3730 	INIT_DEBUGOUT("free_transmit_ring: begin");
3731 
3732 	if (txr->tx_buffers == NULL)
3733 		return;
3734 
3735 	tx_buffer = txr->tx_buffers;
3736 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3737 		if (tx_buffer->m_head != NULL) {
3738 			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3739 			    BUS_DMASYNC_POSTWRITE);
3740 			bus_dmamap_unload(txr->txtag,
3741 			    tx_buffer->map);
3742 			m_freem(tx_buffer->m_head);
3743 			tx_buffer->m_head = NULL;
3744 			if (tx_buffer->map != NULL) {
3745 				bus_dmamap_destroy(txr->txtag,
3746 				    tx_buffer->map);
3747 				tx_buffer->map = NULL;
3748 			}
3749 		} else if (tx_buffer->map != NULL) {
3750 			bus_dmamap_unload(txr->txtag,
3751 			    tx_buffer->map);
3752 			bus_dmamap_destroy(txr->txtag,
3753 			    tx_buffer->map);
3754 			tx_buffer->map = NULL;
3755 		}
3756 	}
3757 #ifndef IGB_LEGACY_TX
3758 	if (txr->br != NULL)
3759 		buf_ring_free(txr->br, M_DEVBUF);
3760 #endif
3761 	if (txr->tx_buffers != NULL) {
3762 		free(txr->tx_buffers, M_DEVBUF);
3763 		txr->tx_buffers = NULL;
3764 	}
3765 	if (txr->txtag != NULL) {
3766 		bus_dma_tag_destroy(txr->txtag);
3767 		txr->txtag = NULL;
3768 	}
3769 	return;
3770 }
3771 
3772 /**********************************************************************
3773  *
3774  *  Setup work for hardware segmentation offload (TSO) on
3775  *  adapters using advanced tx descriptors
3776  *
3777  **********************************************************************/
3778 static int
igb_tso_setup(struct tx_ring * txr,struct mbuf * mp,u32 * cmd_type_len,u32 * olinfo_status)3779 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3780     u32 *cmd_type_len, u32 *olinfo_status)
3781 {
3782 	struct adapter *adapter = txr->adapter;
3783 	struct e1000_adv_tx_context_desc *TXD;
3784 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3785 	u32 mss_l4len_idx = 0, paylen;
3786 	u16 vtag = 0, eh_type;
3787 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3788 	struct ether_vlan_header *eh;
3789 #ifdef INET6
3790 	struct ip6_hdr *ip6;
3791 #endif
3792 #ifdef INET
3793 	struct ip *ip;
3794 #endif
3795 	struct tcphdr *th;
3796 
3797 
3798 	/*
3799 	 * Determine where frame payload starts.
3800 	 * Jump over vlan headers if already present
3801 	 */
3802 	eh = mtod(mp, struct ether_vlan_header *);
3803 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3804 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3805 		eh_type = eh->evl_proto;
3806 	} else {
3807 		ehdrlen = ETHER_HDR_LEN;
3808 		eh_type = eh->evl_encap_proto;
3809 	}
3810 
3811 	switch (ntohs(eh_type)) {
3812 #ifdef INET6
3813 	case ETHERTYPE_IPV6:
3814 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3815 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
3816 		if (ip6->ip6_nxt != IPPROTO_TCP)
3817 			return (ENXIO);
3818 		ip_hlen = sizeof(struct ip6_hdr);
3819 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3820 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3821 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3822 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3823 		break;
3824 #endif
3825 #ifdef INET
3826 	case ETHERTYPE_IP:
3827 		ip = (struct ip *)(mp->m_data + ehdrlen);
3828 		if (ip->ip_p != IPPROTO_TCP)
3829 			return (ENXIO);
3830 		ip->ip_sum = 0;
3831 		ip_hlen = ip->ip_hl << 2;
3832 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3833 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3834 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3835 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3836 		/* Tell transmit desc to also do IPv4 checksum. */
3837 		*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3838 		break;
3839 #endif
3840 	default:
3841 		device_printf(adapter->dev,
3842 		    "CSUM_TSO but no supported IP version (0x%04x)",
3843 		    ntohs(eh_type));
3844 		return (ENXIO);
3845 	}
3846 
3847 	ctxd = txr->next_avail_desc;
3848 	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3849 
3850 	tcp_hlen = th->th_off << 2;
3851 
3852 	/* This is used in the transmit desc in encap */
3853 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3854 
3855 	/* VLAN MACLEN IPLEN */
3856 	if (mp->m_flags & M_VLANTAG) {
3857 		vtag = htole16(mp->m_pkthdr.ether_vtag);
3858                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3859 	}
3860 
3861 	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3862 	vlan_macip_lens |= ip_hlen;
3863 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3864 
3865 	/* ADV DTYPE TUCMD */
3866 	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3867 	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3868 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3869 
3870 	/* MSS L4LEN IDX */
3871 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3872 	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3873 	/* 82575 needs the queue index added */
3874 	if (adapter->hw.mac.type == e1000_82575)
3875 		mss_l4len_idx |= txr->me << 4;
3876 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3877 
3878 	TXD->seqnum_seed = htole32(0);
3879 
3880 	if (++ctxd == txr->num_desc)
3881 		ctxd = 0;
3882 
3883 	txr->tx_avail--;
3884 	txr->next_avail_desc = ctxd;
3885 	*cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3886 	*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3887 	*olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3888 	++txr->tso_tx;
3889 	return (0);
3890 }
3891 
3892 /*********************************************************************
3893  *
3894  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3895  *
3896  **********************************************************************/
3897 
3898 static int
igb_tx_ctx_setup(struct tx_ring * txr,struct mbuf * mp,u32 * cmd_type_len,u32 * olinfo_status)3899 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3900     u32 *cmd_type_len, u32 *olinfo_status)
3901 {
3902 	struct e1000_adv_tx_context_desc *TXD;
3903 	struct adapter *adapter = txr->adapter;
3904 	struct ether_vlan_header *eh;
3905 	struct ip *ip;
3906 	struct ip6_hdr *ip6;
3907 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3908 	int	ehdrlen, ip_hlen = 0;
3909 	u16	etype;
3910 	u8	ipproto = 0;
3911 	int	ctxd = txr->next_avail_desc;
3912 	u16	vtag = 0;
3913 
3914 	/* First check if TSO is to be used */
3915 	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3916 		return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3917 
3918 	/* Indicate the whole packet as payload when not doing TSO */
3919        	*olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3920 
3921 	/* Now ready a context descriptor */
3922 	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3923 
3924 	/*
3925 	** In advanced descriptors the vlan tag must
3926 	** be placed into the context descriptor. Hence
3927 	** we need to make one even if not doing offloads.
3928 	*/
3929 	if (mp->m_flags & M_VLANTAG) {
3930 		vtag = htole16(mp->m_pkthdr.ether_vtag);
3931 		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3932 	} else if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) {
3933 		return (0);
3934 	}
3935 
3936 	/*
3937 	 * Determine where frame payload starts.
3938 	 * Jump over vlan headers if already present,
3939 	 * helpful for QinQ too.
3940 	 */
3941 	eh = mtod(mp, struct ether_vlan_header *);
3942 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3943 		etype = ntohs(eh->evl_proto);
3944 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3945 	} else {
3946 		etype = ntohs(eh->evl_encap_proto);
3947 		ehdrlen = ETHER_HDR_LEN;
3948 	}
3949 
3950 	/* Set the ether header length */
3951 	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3952 
3953 	switch (etype) {
3954 		case ETHERTYPE_IP:
3955 			ip = (struct ip *)(mp->m_data + ehdrlen);
3956 			ip_hlen = ip->ip_hl << 2;
3957 			ipproto = ip->ip_p;
3958 			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3959 			break;
3960 		case ETHERTYPE_IPV6:
3961 			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3962 			ip_hlen = sizeof(struct ip6_hdr);
3963 			/* XXX-BZ this will go badly in case of ext hdrs. */
3964 			ipproto = ip6->ip6_nxt;
3965 			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3966 			break;
3967 		default:
3968 			break;
3969 	}
3970 
3971 	vlan_macip_lens |= ip_hlen;
3972 	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3973 
3974 	switch (ipproto) {
3975 		case IPPROTO_TCP:
3976 #if __FreeBSD_version >= 1000000
3977 			if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) {
3978 #else
3979 			if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3980 #endif
3981 				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3982 				*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3983 			}
3984 			break;
3985 		case IPPROTO_UDP:
3986 #if __FreeBSD_version >= 1000000
3987 			if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) {
3988 #else
3989 			if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3990 #endif
3991 				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3992 				*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3993 			}
3994 			break;
3995 
3996 #if __FreeBSD_version >= 800000
3997 		case IPPROTO_SCTP:
3998 #if __FreeBSD_version >= 1000000
3999 			if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) {
4000 #else
4001 			if (mp->m_pkthdr.csum_flags & CSUM_SCTP) {
4002 #endif
4003 				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4004 				*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4005 			}
4006 			break;
4007 #endif
4008 		default:
4009 			break;
4010 	}
4011 
4012 	/* 82575 needs the queue index added */
4013 	if (adapter->hw.mac.type == e1000_82575)
4014 		mss_l4len_idx = txr->me << 4;
4015 
4016 	/* Now copy bits into descriptor */
4017 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
4018 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
4019 	TXD->seqnum_seed = htole32(0);
4020 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
4021 
4022 	/* We've consumed the first desc, adjust counters */
4023 	if (++ctxd == txr->num_desc)
4024 		ctxd = 0;
4025 	txr->next_avail_desc = ctxd;
4026 	--txr->tx_avail;
4027 
4028         return (0);
4029 }
4030 
4031 /**********************************************************************
4032  *
4033  *  Examine each tx_buffer in the used queue. If the hardware is done
4034  *  processing the packet then free associated resources. The
4035  *  tx_buffer is put back on the free queue.
4036  *
4037  *  TRUE return means there's work in the ring to clean, FALSE its empty.
4038  **********************************************************************/
4039 static bool
4040 igb_txeof(struct tx_ring *txr)
4041 {
4042 	struct adapter		*adapter = txr->adapter;
4043 #ifdef DEV_NETMAP
4044 	struct ifnet		*ifp = adapter->ifp;
4045 #endif /* DEV_NETMAP */
4046 	u32			work, processed = 0;
4047 	int			limit = adapter->tx_process_limit;
4048 	struct igb_tx_buf	*buf;
4049 	union e1000_adv_tx_desc *txd;
4050 
4051 	mtx_assert(&txr->tx_mtx, MA_OWNED);
4052 
4053 #ifdef DEV_NETMAP
4054 	if (netmap_tx_irq(ifp, txr->me))
4055 		return (FALSE);
4056 #endif /* DEV_NETMAP */
4057 
4058 	if (txr->tx_avail == txr->num_desc) {
4059 		txr->queue_status = IGB_QUEUE_IDLE;
4060 		return FALSE;
4061 	}
4062 
4063 	/* Get work starting point */
4064 	work = txr->next_to_clean;
4065 	buf = &txr->tx_buffers[work];
4066 	txd = &txr->tx_base[work];
4067 	work -= txr->num_desc; /* The distance to ring end */
4068         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4069             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4070 	do {
4071 		union e1000_adv_tx_desc *eop = buf->eop;
4072 		if (eop == NULL) /* No work */
4073 			break;
4074 
4075 		if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4076 			break;	/* I/O not complete */
4077 
4078 		if (buf->m_head) {
4079 			txr->bytes +=
4080 			    buf->m_head->m_pkthdr.len;
4081 			bus_dmamap_sync(txr->txtag,
4082 			    buf->map,
4083 			    BUS_DMASYNC_POSTWRITE);
4084 			bus_dmamap_unload(txr->txtag,
4085 			    buf->map);
4086 			m_freem(buf->m_head);
4087 			buf->m_head = NULL;
4088 		}
4089 		buf->eop = NULL;
4090 		++txr->tx_avail;
4091 
4092 		/* We clean the range if multi segment */
4093 		while (txd != eop) {
4094 			++txd;
4095 			++buf;
4096 			++work;
4097 			/* wrap the ring? */
4098 			if (__predict_false(!work)) {
4099 				work -= txr->num_desc;
4100 				buf = txr->tx_buffers;
4101 				txd = txr->tx_base;
4102 			}
4103 			if (buf->m_head) {
4104 				txr->bytes +=
4105 				    buf->m_head->m_pkthdr.len;
4106 				bus_dmamap_sync(txr->txtag,
4107 				    buf->map,
4108 				    BUS_DMASYNC_POSTWRITE);
4109 				bus_dmamap_unload(txr->txtag,
4110 				    buf->map);
4111 				m_freem(buf->m_head);
4112 				buf->m_head = NULL;
4113 			}
4114 			++txr->tx_avail;
4115 			buf->eop = NULL;
4116 
4117 		}
4118 		++txr->packets;
4119 		++processed;
4120 		txr->watchdog_time = ticks;
4121 
4122 		/* Try the next packet */
4123 		++txd;
4124 		++buf;
4125 		++work;
4126 		/* reset with a wrap */
4127 		if (__predict_false(!work)) {
4128 			work -= txr->num_desc;
4129 			buf = txr->tx_buffers;
4130 			txd = txr->tx_base;
4131 		}
4132 		prefetch(txd);
4133 	} while (__predict_true(--limit));
4134 
4135 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4136 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4137 
4138 	work += txr->num_desc;
4139 	txr->next_to_clean = work;
4140 
4141 	/*
4142 	** Watchdog calculation, we know there's
4143 	** work outstanding or the first return
4144 	** would have been taken, so none processed
4145 	** for too long indicates a hang.
4146 	*/
4147 	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4148 		txr->queue_status |= IGB_QUEUE_HUNG;
4149 
4150 	if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4151 		txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4152 
4153 	if (txr->tx_avail == txr->num_desc) {
4154 		txr->queue_status = IGB_QUEUE_IDLE;
4155 		return (FALSE);
4156 	}
4157 
4158 	return (TRUE);
4159 }
4160 
4161 /*********************************************************************
4162  *
4163  *  Refresh mbuf buffers for RX descriptor rings
4164  *   - now keeps its own state so discards due to resource
4165  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4166  *     it just returns, keeping its placeholder, thus it can simply
4167  *     be recalled to try again.
4168  *
4169  **********************************************************************/
4170 static void
4171 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4172 {
4173 	struct adapter		*adapter = rxr->adapter;
4174 	bus_dma_segment_t	hseg[1];
4175 	bus_dma_segment_t	pseg[1];
4176 	struct igb_rx_buf	*rxbuf;
4177 	struct mbuf		*mh, *mp;
4178 	int			i, j, nsegs, error;
4179 	bool			refreshed = FALSE;
4180 
4181 	i = j = rxr->next_to_refresh;
4182 	/*
4183 	** Get one descriptor beyond
4184 	** our work mark to control
4185 	** the loop.
4186         */
4187 	if (++j == adapter->num_rx_desc)
4188 		j = 0;
4189 
4190 	while (j != limit) {
4191 		rxbuf = &rxr->rx_buffers[i];
4192 		/* No hdr mbuf used with header split off */
4193 		if (rxr->hdr_split == FALSE)
4194 			goto no_split;
4195 		if (rxbuf->m_head == NULL) {
4196 			mh = m_gethdr(M_NOWAIT, MT_DATA);
4197 			if (mh == NULL)
4198 				goto update;
4199 		} else
4200 			mh = rxbuf->m_head;
4201 
4202 		mh->m_pkthdr.len = mh->m_len = MHLEN;
4203 		mh->m_len = MHLEN;
4204 		mh->m_flags |= M_PKTHDR;
4205 		/* Get the memory mapping */
4206 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4207 		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4208 		if (error != 0) {
4209 			printf("Refresh mbufs: hdr dmamap load"
4210 			    " failure - %d\n", error);
4211 			m_free(mh);
4212 			rxbuf->m_head = NULL;
4213 			goto update;
4214 		}
4215 		rxbuf->m_head = mh;
4216 		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4217 		    BUS_DMASYNC_PREREAD);
4218 		rxr->rx_base[i].read.hdr_addr =
4219 		    htole64(hseg[0].ds_addr);
4220 no_split:
4221 		if (rxbuf->m_pack == NULL) {
4222 			mp = m_getjcl(M_NOWAIT, MT_DATA,
4223 			    M_PKTHDR, adapter->rx_mbuf_sz);
4224 			if (mp == NULL)
4225 				goto update;
4226 		} else
4227 			mp = rxbuf->m_pack;
4228 
4229 		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4230 		/* Get the memory mapping */
4231 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4232 		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4233 		if (error != 0) {
4234 			printf("Refresh mbufs: payload dmamap load"
4235 			    " failure - %d\n", error);
4236 			m_free(mp);
4237 			rxbuf->m_pack = NULL;
4238 			goto update;
4239 		}
4240 		rxbuf->m_pack = mp;
4241 		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4242 		    BUS_DMASYNC_PREREAD);
4243 		rxr->rx_base[i].read.pkt_addr =
4244 		    htole64(pseg[0].ds_addr);
4245 		refreshed = TRUE; /* I feel wefreshed :) */
4246 
4247 		i = j; /* our next is precalculated */
4248 		rxr->next_to_refresh = i;
4249 		if (++j == adapter->num_rx_desc)
4250 			j = 0;
4251 	}
4252 update:
4253 	if (refreshed) /* update tail */
4254 		E1000_WRITE_REG(&adapter->hw,
4255 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4256 	return;
4257 }
4258 
4259 
4260 /*********************************************************************
4261  *
4262  *  Allocate memory for rx_buffer structures. Since we use one
4263  *  rx_buffer per received packet, the maximum number of rx_buffer's
4264  *  that we'll need is equal to the number of receive descriptors
4265  *  that we've allocated.
4266  *
4267  **********************************************************************/
4268 static int
4269 igb_allocate_receive_buffers(struct rx_ring *rxr)
4270 {
4271 	struct	adapter 	*adapter = rxr->adapter;
4272 	device_t 		dev = adapter->dev;
4273 	struct igb_rx_buf	*rxbuf;
4274 	int             	i, bsize, error;
4275 
4276 	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4277 	if (!(rxr->rx_buffers =
4278 	    (struct igb_rx_buf *) malloc(bsize,
4279 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4280 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4281 		error = ENOMEM;
4282 		goto fail;
4283 	}
4284 
4285 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4286 				   1, 0,		/* alignment, bounds */
4287 				   BUS_SPACE_MAXADDR,	/* lowaddr */
4288 				   BUS_SPACE_MAXADDR,	/* highaddr */
4289 				   NULL, NULL,		/* filter, filterarg */
4290 				   MSIZE,		/* maxsize */
4291 				   1,			/* nsegments */
4292 				   MSIZE,		/* maxsegsize */
4293 				   0,			/* flags */
4294 				   NULL,		/* lockfunc */
4295 				   NULL,		/* lockfuncarg */
4296 				   &rxr->htag))) {
4297 		device_printf(dev, "Unable to create RX DMA tag\n");
4298 		goto fail;
4299 	}
4300 
4301 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4302 				   1, 0,		/* alignment, bounds */
4303 				   BUS_SPACE_MAXADDR,	/* lowaddr */
4304 				   BUS_SPACE_MAXADDR,	/* highaddr */
4305 				   NULL, NULL,		/* filter, filterarg */
4306 				   MJUM9BYTES,		/* maxsize */
4307 				   1,			/* nsegments */
4308 				   MJUM9BYTES,		/* maxsegsize */
4309 				   0,			/* flags */
4310 				   NULL,		/* lockfunc */
4311 				   NULL,		/* lockfuncarg */
4312 				   &rxr->ptag))) {
4313 		device_printf(dev, "Unable to create RX payload DMA tag\n");
4314 		goto fail;
4315 	}
4316 
4317 	for (i = 0; i < adapter->num_rx_desc; i++) {
4318 		rxbuf = &rxr->rx_buffers[i];
4319 		error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4320 		if (error) {
4321 			device_printf(dev,
4322 			    "Unable to create RX head DMA maps\n");
4323 			goto fail;
4324 		}
4325 		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4326 		if (error) {
4327 			device_printf(dev,
4328 			    "Unable to create RX packet DMA maps\n");
4329 			goto fail;
4330 		}
4331 	}
4332 
4333 	return (0);
4334 
4335 fail:
4336 	/* Frees all, but can handle partial completion */
4337 	igb_free_receive_structures(adapter);
4338 	return (error);
4339 }
4340 
4341 
4342 static void
4343 igb_free_receive_ring(struct rx_ring *rxr)
4344 {
4345 	struct	adapter		*adapter = rxr->adapter;
4346 	struct igb_rx_buf	*rxbuf;
4347 
4348 
4349 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4350 		rxbuf = &rxr->rx_buffers[i];
4351 		if (rxbuf->m_head != NULL) {
4352 			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4353 			    BUS_DMASYNC_POSTREAD);
4354 			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4355 			rxbuf->m_head->m_flags |= M_PKTHDR;
4356 			m_freem(rxbuf->m_head);
4357 		}
4358 		if (rxbuf->m_pack != NULL) {
4359 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4360 			    BUS_DMASYNC_POSTREAD);
4361 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4362 			rxbuf->m_pack->m_flags |= M_PKTHDR;
4363 			m_freem(rxbuf->m_pack);
4364 		}
4365 		rxbuf->m_head = NULL;
4366 		rxbuf->m_pack = NULL;
4367 	}
4368 }
4369 
4370 
4371 /*********************************************************************
4372  *
4373  *  Initialize a receive ring and its buffers.
4374  *
4375  **********************************************************************/
4376 static int
4377 igb_setup_receive_ring(struct rx_ring *rxr)
4378 {
4379 	struct	adapter		*adapter;
4380 	struct  ifnet		*ifp;
4381 	device_t		dev;
4382 	struct igb_rx_buf	*rxbuf;
4383 	bus_dma_segment_t	pseg[1], hseg[1];
4384 	struct lro_ctrl		*lro = &rxr->lro;
4385 	int			rsize, nsegs, error = 0;
4386 #ifdef DEV_NETMAP
4387 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4388 	struct netmap_slot *slot;
4389 #endif /* DEV_NETMAP */
4390 
4391 	adapter = rxr->adapter;
4392 	dev = adapter->dev;
4393 	ifp = adapter->ifp;
4394 
4395 	/* Clear the ring contents */
4396 	IGB_RX_LOCK(rxr);
4397 #ifdef DEV_NETMAP
4398 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4399 #endif /* DEV_NETMAP */
4400 	rsize = roundup2(adapter->num_rx_desc *
4401 	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4402 	bzero((void *)rxr->rx_base, rsize);
4403 
4404 	/*
4405 	** Free current RX buffer structures and their mbufs
4406 	*/
4407 	igb_free_receive_ring(rxr);
4408 
4409 	/* Configure for header split? */
4410 	if (igb_header_split)
4411 		rxr->hdr_split = TRUE;
4412 
4413         /* Now replenish the ring mbufs */
4414 	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4415 		struct mbuf	*mh, *mp;
4416 
4417 		rxbuf = &rxr->rx_buffers[j];
4418 #ifdef DEV_NETMAP
4419 		if (slot) {
4420 			/* slot sj is mapped to the j-th NIC-ring entry */
4421 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
4422 			uint64_t paddr;
4423 			void *addr;
4424 
4425 			addr = PNMB(na, slot + sj, &paddr);
4426 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4427 			/* Update descriptor */
4428 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4429 			continue;
4430 		}
4431 #endif /* DEV_NETMAP */
4432 		if (rxr->hdr_split == FALSE)
4433 			goto skip_head;
4434 
4435 		/* First the header */
4436 		rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4437 		if (rxbuf->m_head == NULL) {
4438 			error = ENOBUFS;
4439                         goto fail;
4440 		}
4441 		m_adj(rxbuf->m_head, ETHER_ALIGN);
4442 		mh = rxbuf->m_head;
4443 		mh->m_len = mh->m_pkthdr.len = MHLEN;
4444 		mh->m_flags |= M_PKTHDR;
4445 		/* Get the memory mapping */
4446 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4447 		    rxbuf->hmap, rxbuf->m_head, hseg,
4448 		    &nsegs, BUS_DMA_NOWAIT);
4449 		if (error != 0) /* Nothing elegant to do here */
4450                         goto fail;
4451 		bus_dmamap_sync(rxr->htag,
4452 		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4453 		/* Update descriptor */
4454 		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4455 
4456 skip_head:
4457 		/* Now the payload cluster */
4458 		rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4459 		    M_PKTHDR, adapter->rx_mbuf_sz);
4460 		if (rxbuf->m_pack == NULL) {
4461 			error = ENOBUFS;
4462                         goto fail;
4463 		}
4464 		mp = rxbuf->m_pack;
4465 		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4466 		/* Get the memory mapping */
4467 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4468 		    rxbuf->pmap, mp, pseg,
4469 		    &nsegs, BUS_DMA_NOWAIT);
4470 		if (error != 0)
4471                         goto fail;
4472 		bus_dmamap_sync(rxr->ptag,
4473 		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4474 		/* Update descriptor */
4475 		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4476         }
4477 
4478 	/* Setup our descriptor indices */
4479 	rxr->next_to_check = 0;
4480 	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4481 	rxr->lro_enabled = FALSE;
4482 	rxr->rx_split_packets = 0;
4483 	rxr->rx_bytes = 0;
4484 
4485 	rxr->fmp = NULL;
4486 	rxr->lmp = NULL;
4487 
4488 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4489 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4490 
4491 	/*
4492 	** Now set up the LRO interface, we
4493 	** also only do head split when LRO
4494 	** is enabled, since so often they
4495 	** are undesirable in similar setups.
4496 	*/
4497 	if (ifp->if_capenable & IFCAP_LRO) {
4498 		error = tcp_lro_init(lro);
4499 		if (error) {
4500 			device_printf(dev, "LRO Initialization failed!\n");
4501 			goto fail;
4502 		}
4503 		INIT_DEBUGOUT("RX LRO Initialized\n");
4504 		rxr->lro_enabled = TRUE;
4505 		lro->ifp = adapter->ifp;
4506 	}
4507 
4508 	IGB_RX_UNLOCK(rxr);
4509 	return (0);
4510 
4511 fail:
4512 	igb_free_receive_ring(rxr);
4513 	IGB_RX_UNLOCK(rxr);
4514 	return (error);
4515 }
4516 
4517 
4518 /*********************************************************************
4519  *
4520  *  Initialize all receive rings.
4521  *
4522  **********************************************************************/
4523 static int
4524 igb_setup_receive_structures(struct adapter *adapter)
4525 {
4526 	struct rx_ring *rxr = adapter->rx_rings;
4527 	int i;
4528 
4529 	for (i = 0; i < adapter->num_queues; i++, rxr++)
4530 		if (igb_setup_receive_ring(rxr))
4531 			goto fail;
4532 
4533 	return (0);
4534 fail:
4535 	/*
4536 	 * Free RX buffers allocated so far, we will only handle
4537 	 * the rings that completed, the failing case will have
4538 	 * cleaned up for itself. 'i' is the endpoint.
4539 	 */
4540 	for (int j = 0; j < i; ++j) {
4541 		rxr = &adapter->rx_rings[j];
4542 		IGB_RX_LOCK(rxr);
4543 		igb_free_receive_ring(rxr);
4544 		IGB_RX_UNLOCK(rxr);
4545 	}
4546 
4547 	return (ENOBUFS);
4548 }
4549 
4550 /*
4551  * Initialise the RSS mapping for NICs that support multiple transmit/
4552  * receive rings.
4553  */
4554 static void
4555 igb_initialise_rss_mapping(struct adapter *adapter)
4556 {
4557 	struct e1000_hw *hw = &adapter->hw;
4558 	int i;
4559 	int queue_id;
4560 	u32 reta;
4561 	u32 rss_key[10], mrqc, shift = 0;
4562 
4563 	/* XXX? */
4564 	if (adapter->hw.mac.type == e1000_82575)
4565 		shift = 6;
4566 
4567 	/*
4568 	 * The redirection table controls which destination
4569 	 * queue each bucket redirects traffic to.
4570 	 * Each DWORD represents four queues, with the LSB
4571 	 * being the first queue in the DWORD.
4572 	 *
4573 	 * This just allocates buckets to queues using round-robin
4574 	 * allocation.
4575 	 *
4576 	 * NOTE: It Just Happens to line up with the default
4577 	 * RSS allocation method.
4578 	 */
4579 
4580 	/* Warning FM follows */
4581 	reta = 0;
4582 	for (i = 0; i < 128; i++) {
4583 #ifdef	RSS
4584 		queue_id = rss_get_indirection_to_bucket(i);
4585 		/*
4586 		 * If we have more queues than buckets, we'll
4587 		 * end up mapping buckets to a subset of the
4588 		 * queues.
4589 		 *
4590 		 * If we have more buckets than queues, we'll
4591 		 * end up instead assigning multiple buckets
4592 		 * to queues.
4593 		 *
4594 		 * Both are suboptimal, but we need to handle
4595 		 * the case so we don't go out of bounds
4596 		 * indexing arrays and such.
4597 		 */
4598 		queue_id = queue_id % adapter->num_queues;
4599 #else
4600 		queue_id = (i % adapter->num_queues);
4601 #endif
4602 		/* Adjust if required */
4603 		queue_id = queue_id << shift;
4604 
4605 		/*
4606 		 * The low 8 bits are for hash value (n+0);
4607 		 * The next 8 bits are for hash value (n+1), etc.
4608 		 */
4609 		reta = reta >> 8;
4610 		reta = reta | ( ((uint32_t) queue_id) << 24);
4611 		if ((i & 3) == 3) {
4612 			E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4613 			reta = 0;
4614 		}
4615 	}
4616 
4617 	/* Now fill in hash table */
4618 
4619 	/*
4620 	 * MRQC: Multiple Receive Queues Command
4621 	 * Set queuing to RSS control, number depends on the device.
4622 	 */
4623 	mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4624 
4625 #ifdef	RSS
4626 	/* XXX ew typecasting */
4627 	rss_getkey((uint8_t *) &rss_key);
4628 #else
4629 	arc4rand(&rss_key, sizeof(rss_key), 0);
4630 #endif
4631 	for (i = 0; i < 10; i++)
4632 		E1000_WRITE_REG_ARRAY(hw,
4633 		    E1000_RSSRK(0), i, rss_key[i]);
4634 
4635 	/*
4636 	 * Configure the RSS fields to hash upon.
4637 	 */
4638 	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4639 	    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4640 	mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4641 	    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4642 	mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4643 	    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4644 	mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4645 	    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4646 
4647 	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4648 }
4649 
4650 /*********************************************************************
4651  *
4652  *  Enable receive unit.
4653  *
4654  **********************************************************************/
4655 static void
4656 igb_initialize_receive_units(struct adapter *adapter)
4657 {
4658 	struct rx_ring	*rxr = adapter->rx_rings;
4659 	struct ifnet	*ifp = adapter->ifp;
4660 	struct e1000_hw *hw = &adapter->hw;
4661 	u32		rctl, rxcsum, psize, srrctl = 0;
4662 
4663 	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4664 
4665 	/*
4666 	 * Make sure receives are disabled while setting
4667 	 * up the descriptor ring
4668 	 */
4669 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4670 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4671 
4672 	/*
4673 	** Set up for header split
4674 	*/
4675 	if (igb_header_split) {
4676 		/* Use a standard mbuf for the header */
4677 		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4678 		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4679 	} else
4680 		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4681 
4682 	/*
4683 	** Set up for jumbo frames
4684 	*/
4685 	if (ifp->if_mtu > ETHERMTU) {
4686 		rctl |= E1000_RCTL_LPE;
4687 		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4688 			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4689 			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4690 		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4691 			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4692 			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4693 		}
4694 		/* Set maximum packet len */
4695 		psize = adapter->max_frame_size;
4696 		/* are we on a vlan? */
4697 		if (adapter->ifp->if_vlantrunk != NULL)
4698 			psize += VLAN_TAG_SIZE;
4699 		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4700 	} else {
4701 		rctl &= ~E1000_RCTL_LPE;
4702 		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4703 		rctl |= E1000_RCTL_SZ_2048;
4704 	}
4705 
4706 	/*
4707 	 * If TX flow control is disabled and there's >1 queue defined,
4708 	 * enable DROP.
4709 	 *
4710 	 * This drops frames rather than hanging the RX MAC for all queues.
4711 	 */
4712 	if ((adapter->num_queues > 1) &&
4713 	    (adapter->fc == e1000_fc_none ||
4714 	     adapter->fc == e1000_fc_rx_pause)) {
4715 		srrctl |= E1000_SRRCTL_DROP_EN;
4716 	}
4717 
4718 	/* Setup the Base and Length of the Rx Descriptor Rings */
4719 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4720 		u64 bus_addr = rxr->rxdma.dma_paddr;
4721 		u32 rxdctl;
4722 
4723 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4724 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4725 		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4726 		    (uint32_t)(bus_addr >> 32));
4727 		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4728 		    (uint32_t)bus_addr);
4729 		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4730 		/* Enable this Queue */
4731 		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4732 		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4733 		rxdctl &= 0xFFF00000;
4734 		rxdctl |= IGB_RX_PTHRESH;
4735 		rxdctl |= IGB_RX_HTHRESH << 8;
4736 		rxdctl |= IGB_RX_WTHRESH << 16;
4737 		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4738 	}
4739 
4740 	/*
4741 	** Setup for RX MultiQueue
4742 	*/
4743 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4744 	if (adapter->num_queues >1) {
4745 
4746 		/* rss setup */
4747 		igb_initialise_rss_mapping(adapter);
4748 
4749 		/*
4750 		** NOTE: Receive Full-Packet Checksum Offload
4751 		** is mutually exclusive with Multiqueue. However
4752 		** this is not the same as TCP/IP checksums which
4753 		** still work.
4754 		*/
4755 		rxcsum |= E1000_RXCSUM_PCSD;
4756 #if __FreeBSD_version >= 800000
4757 		/* For SCTP Offload */
4758 		if ((hw->mac.type != e1000_82575) &&
4759 		    (ifp->if_capenable & IFCAP_RXCSUM))
4760 			rxcsum |= E1000_RXCSUM_CRCOFL;
4761 #endif
4762 	} else {
4763 		/* Non RSS setup */
4764 		if (ifp->if_capenable & IFCAP_RXCSUM) {
4765 			rxcsum |= E1000_RXCSUM_IPPCSE;
4766 #if __FreeBSD_version >= 800000
4767 			if (adapter->hw.mac.type != e1000_82575)
4768 				rxcsum |= E1000_RXCSUM_CRCOFL;
4769 #endif
4770 		} else
4771 			rxcsum &= ~E1000_RXCSUM_TUOFL;
4772 	}
4773 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4774 
4775 	/* Setup the Receive Control Register */
4776 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4777 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4778 		   E1000_RCTL_RDMTS_HALF |
4779 		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4780 	/* Strip CRC bytes. */
4781 	rctl |= E1000_RCTL_SECRC;
4782 	/* Make sure VLAN Filters are off */
4783 	rctl &= ~E1000_RCTL_VFE;
4784 	/* Don't store bad packets */
4785 	rctl &= ~E1000_RCTL_SBP;
4786 
4787 	/* Enable Receives */
4788 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4789 
4790 	/*
4791 	 * Setup the HW Rx Head and Tail Descriptor Pointers
4792 	 *   - needs to be after enable
4793 	 */
4794 	for (int i = 0; i < adapter->num_queues; i++) {
4795 		rxr = &adapter->rx_rings[i];
4796 		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4797 #ifdef DEV_NETMAP
4798 		/*
4799 		 * an init() while a netmap client is active must
4800 		 * preserve the rx buffers passed to userspace.
4801 		 * In this driver it means we adjust RDT to
4802 		 * something different from next_to_refresh
4803 		 * (which is not used in netmap mode).
4804 		 */
4805 		if (ifp->if_capenable & IFCAP_NETMAP) {
4806 			struct netmap_adapter *na = NA(adapter->ifp);
4807 			struct netmap_kring *kring = na->rx_rings[i];
4808 			int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4809 
4810 			if (t >= adapter->num_rx_desc)
4811 				t -= adapter->num_rx_desc;
4812 			else if (t < 0)
4813 				t += adapter->num_rx_desc;
4814 			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4815 		} else
4816 #endif /* DEV_NETMAP */
4817 		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4818 	}
4819 	return;
4820 }
4821 
4822 /*********************************************************************
4823  *
4824  *  Free receive rings.
4825  *
4826  **********************************************************************/
4827 static void
4828 igb_free_receive_structures(struct adapter *adapter)
4829 {
4830 	struct rx_ring *rxr = adapter->rx_rings;
4831 
4832 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4833 		struct lro_ctrl	*lro = &rxr->lro;
4834 		igb_free_receive_buffers(rxr);
4835 		tcp_lro_free(lro);
4836 		igb_dma_free(adapter, &rxr->rxdma);
4837 	}
4838 
4839 	free(adapter->rx_rings, M_DEVBUF);
4840 }
4841 
4842 /*********************************************************************
4843  *
4844  *  Free receive ring data structures.
4845  *
4846  **********************************************************************/
4847 static void
4848 igb_free_receive_buffers(struct rx_ring *rxr)
4849 {
4850 	struct adapter		*adapter = rxr->adapter;
4851 	struct igb_rx_buf	*rxbuf;
4852 	int i;
4853 
4854 	INIT_DEBUGOUT("free_receive_structures: begin");
4855 
4856 	/* Cleanup any existing buffers */
4857 	if (rxr->rx_buffers != NULL) {
4858 		for (i = 0; i < adapter->num_rx_desc; i++) {
4859 			rxbuf = &rxr->rx_buffers[i];
4860 			if (rxbuf->m_head != NULL) {
4861 				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4862 				    BUS_DMASYNC_POSTREAD);
4863 				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4864 				rxbuf->m_head->m_flags |= M_PKTHDR;
4865 				m_freem(rxbuf->m_head);
4866 			}
4867 			if (rxbuf->m_pack != NULL) {
4868 				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4869 				    BUS_DMASYNC_POSTREAD);
4870 				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4871 				rxbuf->m_pack->m_flags |= M_PKTHDR;
4872 				m_freem(rxbuf->m_pack);
4873 			}
4874 			rxbuf->m_head = NULL;
4875 			rxbuf->m_pack = NULL;
4876 			if (rxbuf->hmap != NULL) {
4877 				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4878 				rxbuf->hmap = NULL;
4879 			}
4880 			if (rxbuf->pmap != NULL) {
4881 				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4882 				rxbuf->pmap = NULL;
4883 			}
4884 		}
4885 		if (rxr->rx_buffers != NULL) {
4886 			free(rxr->rx_buffers, M_DEVBUF);
4887 			rxr->rx_buffers = NULL;
4888 		}
4889 	}
4890 
4891 	if (rxr->htag != NULL) {
4892 		bus_dma_tag_destroy(rxr->htag);
4893 		rxr->htag = NULL;
4894 	}
4895 	if (rxr->ptag != NULL) {
4896 		bus_dma_tag_destroy(rxr->ptag);
4897 		rxr->ptag = NULL;
4898 	}
4899 }
4900 
4901 static __inline void
4902 igb_rx_discard(struct rx_ring *rxr, int i)
4903 {
4904 	struct igb_rx_buf	*rbuf;
4905 
4906 	rbuf = &rxr->rx_buffers[i];
4907 
4908 	/* Partially received? Free the chain */
4909 	if (rxr->fmp != NULL) {
4910 		rxr->fmp->m_flags |= M_PKTHDR;
4911 		m_freem(rxr->fmp);
4912 		rxr->fmp = NULL;
4913 		rxr->lmp = NULL;
4914 	}
4915 
4916 	/*
4917 	** With advanced descriptors the writeback
4918 	** clobbers the buffer addrs, so its easier
4919 	** to just free the existing mbufs and take
4920 	** the normal refresh path to get new buffers
4921 	** and mapping.
4922 	*/
4923 	if (rbuf->m_head) {
4924 		m_free(rbuf->m_head);
4925 		rbuf->m_head = NULL;
4926 		bus_dmamap_unload(rxr->htag, rbuf->hmap);
4927 	}
4928 
4929 	if (rbuf->m_pack) {
4930 		m_free(rbuf->m_pack);
4931 		rbuf->m_pack = NULL;
4932 		bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4933 	}
4934 
4935 	return;
4936 }
4937 
4938 static __inline void
4939 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4940 {
4941 
4942 	/*
4943 	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4944 	 * should be computed by hardware. Also it should not have VLAN tag in
4945 	 * ethernet header.
4946 	 */
4947 	if (rxr->lro_enabled &&
4948 	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4949 	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4950 	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4951 	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4952 	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4953 	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4954 		/*
4955 		 * Send to the stack if:
4956 		 **  - LRO not enabled, or
4957 		 **  - no LRO resources, or
4958 		 **  - lro enqueue fails
4959 		 */
4960 		if (rxr->lro.lro_cnt != 0)
4961 			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4962 				return;
4963 	}
4964 	IGB_RX_UNLOCK(rxr);
4965 	(*ifp->if_input)(ifp, m);
4966 	IGB_RX_LOCK(rxr);
4967 }
4968 
4969 /*********************************************************************
4970  *
4971  *  This routine executes in interrupt context. It replenishes
4972  *  the mbufs in the descriptor and sends data which has been
4973  *  dma'ed into host memory to upper layer.
4974  *
4975  *  We loop at most count times if count is > 0, or until done if
4976  *  count < 0.
4977  *
4978  *  Return TRUE if more to clean, FALSE otherwise
4979  *********************************************************************/
4980 static bool
4981 igb_rxeof(struct igb_queue *que, int count, int *done)
4982 {
4983 	struct adapter		*adapter = que->adapter;
4984 	struct rx_ring		*rxr = que->rxr;
4985 	struct ifnet		*ifp = adapter->ifp;
4986 	struct lro_ctrl		*lro = &rxr->lro;
4987 	int			i, processed = 0, rxdone = 0;
4988 	u32			ptype, staterr = 0;
4989 	union e1000_adv_rx_desc	*cur;
4990 
4991 	IGB_RX_LOCK(rxr);
4992 	/* Sync the ring. */
4993 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4994 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4995 
4996 #ifdef DEV_NETMAP
4997 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4998 		IGB_RX_UNLOCK(rxr);
4999 		return (FALSE);
5000 	}
5001 #endif /* DEV_NETMAP */
5002 
5003 	/* Main clean loop */
5004 	for (i = rxr->next_to_check; count != 0;) {
5005 		struct mbuf		*sendmp, *mh, *mp;
5006 		struct igb_rx_buf	*rxbuf;
5007 		u16			hlen, plen, hdr, vtag, pkt_info;
5008 		bool			eop = FALSE;
5009 
5010 		cur = &rxr->rx_base[i];
5011 		staterr = le32toh(cur->wb.upper.status_error);
5012 		if ((staterr & E1000_RXD_STAT_DD) == 0)
5013 			break;
5014 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
5015 			break;
5016 		count--;
5017 		sendmp = mh = mp = NULL;
5018 		cur->wb.upper.status_error = 0;
5019 		rxbuf = &rxr->rx_buffers[i];
5020 		plen = le16toh(cur->wb.upper.length);
5021 		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
5022 		if (((adapter->hw.mac.type == e1000_i350) ||
5023 		    (adapter->hw.mac.type == e1000_i354)) &&
5024 		    (staterr & E1000_RXDEXT_STATERR_LB))
5025 			vtag = be16toh(cur->wb.upper.vlan);
5026 		else
5027 			vtag = le16toh(cur->wb.upper.vlan);
5028 		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
5029 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
5030 		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
5031 
5032 		/*
5033 		 * Free the frame (all segments) if we're at EOP and
5034 		 * it's an error.
5035 		 *
5036 		 * The datasheet states that EOP + status is only valid for
5037 		 * the final segment in a multi-segment frame.
5038 		 */
5039 		if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
5040 			adapter->dropped_pkts++;
5041 			++rxr->rx_discarded;
5042 			igb_rx_discard(rxr, i);
5043 			goto next_desc;
5044 		}
5045 
5046 		/*
5047 		** The way the hardware is configured to
5048 		** split, it will ONLY use the header buffer
5049 		** when header split is enabled, otherwise we
5050 		** get normal behavior, ie, both header and
5051 		** payload are DMA'd into the payload buffer.
5052 		**
5053 		** The fmp test is to catch the case where a
5054 		** packet spans multiple descriptors, in that
5055 		** case only the first header is valid.
5056 		*/
5057 		if (rxr->hdr_split && rxr->fmp == NULL) {
5058 			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
5059 			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5060 			    E1000_RXDADV_HDRBUFLEN_SHIFT;
5061 			if (hlen > IGB_HDR_BUF)
5062 				hlen = IGB_HDR_BUF;
5063 			mh = rxr->rx_buffers[i].m_head;
5064 			mh->m_len = hlen;
5065 			/* clear buf pointer for refresh */
5066 			rxbuf->m_head = NULL;
5067 			/*
5068 			** Get the payload length, this
5069 			** could be zero if its a small
5070 			** packet.
5071 			*/
5072 			if (plen > 0) {
5073 				mp = rxr->rx_buffers[i].m_pack;
5074 				mp->m_len = plen;
5075 				mh->m_next = mp;
5076 				/* clear buf pointer */
5077 				rxbuf->m_pack = NULL;
5078 				rxr->rx_split_packets++;
5079 			}
5080 		} else {
5081 			/*
5082 			** Either no header split, or a
5083 			** secondary piece of a fragmented
5084 			** split packet.
5085 			*/
5086 			mh = rxr->rx_buffers[i].m_pack;
5087 			mh->m_len = plen;
5088 			/* clear buf info for refresh */
5089 			rxbuf->m_pack = NULL;
5090 		}
5091 		bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5092 
5093 		++processed; /* So we know when to refresh */
5094 
5095 		/* Initial frame - setup */
5096 		if (rxr->fmp == NULL) {
5097 			mh->m_pkthdr.len = mh->m_len;
5098 			/* Save the head of the chain */
5099 			rxr->fmp = mh;
5100 			rxr->lmp = mh;
5101 			if (mp != NULL) {
5102 				/* Add payload if split */
5103 				mh->m_pkthdr.len += mp->m_len;
5104 				rxr->lmp = mh->m_next;
5105 			}
5106 		} else {
5107 			/* Chain mbuf's together */
5108 			rxr->lmp->m_next = mh;
5109 			rxr->lmp = rxr->lmp->m_next;
5110 			rxr->fmp->m_pkthdr.len += mh->m_len;
5111 		}
5112 
5113 		if (eop) {
5114 			rxr->fmp->m_pkthdr.rcvif = ifp;
5115 			rxr->rx_packets++;
5116 			/* capture data for AIM */
5117 			rxr->packets++;
5118 			rxr->bytes += rxr->fmp->m_pkthdr.len;
5119 			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5120 
5121 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5122 				igb_rx_checksum(staterr, rxr->fmp, ptype);
5123 
5124 			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5125 			    (staterr & E1000_RXD_STAT_VP) != 0) {
5126 				rxr->fmp->m_pkthdr.ether_vtag = vtag;
5127 				rxr->fmp->m_flags |= M_VLANTAG;
5128 			}
5129 
5130 			/*
5131 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
5132 			 * and never cleared. This means we have RSS hash
5133 			 * available to be used.
5134 			 */
5135 			if (adapter->num_queues > 1) {
5136 				rxr->fmp->m_pkthdr.flowid =
5137 				    le32toh(cur->wb.lower.hi_dword.rss);
5138 				switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5139 					case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5140 						M_HASHTYPE_SET(rxr->fmp,
5141 						    M_HASHTYPE_RSS_TCP_IPV4);
5142 					break;
5143 					case E1000_RXDADV_RSSTYPE_IPV4:
5144 						M_HASHTYPE_SET(rxr->fmp,
5145 						    M_HASHTYPE_RSS_IPV4);
5146 					break;
5147 					case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5148 						M_HASHTYPE_SET(rxr->fmp,
5149 						    M_HASHTYPE_RSS_TCP_IPV6);
5150 					break;
5151 					case E1000_RXDADV_RSSTYPE_IPV6_EX:
5152 						M_HASHTYPE_SET(rxr->fmp,
5153 						    M_HASHTYPE_RSS_IPV6_EX);
5154 					break;
5155 					case E1000_RXDADV_RSSTYPE_IPV6:
5156 						M_HASHTYPE_SET(rxr->fmp,
5157 						    M_HASHTYPE_RSS_IPV6);
5158 					break;
5159 					case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5160 						M_HASHTYPE_SET(rxr->fmp,
5161 						    M_HASHTYPE_RSS_TCP_IPV6_EX);
5162 					break;
5163 					default:
5164 						/* XXX fallthrough */
5165 						M_HASHTYPE_SET(rxr->fmp,
5166 						    M_HASHTYPE_OPAQUE_HASH);
5167 				}
5168 			} else {
5169 #ifndef IGB_LEGACY_TX
5170 				rxr->fmp->m_pkthdr.flowid = que->msix;
5171 				M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5172 #endif
5173 			}
5174 			sendmp = rxr->fmp;
5175 			/* Make sure to set M_PKTHDR. */
5176 			sendmp->m_flags |= M_PKTHDR;
5177 			rxr->fmp = NULL;
5178 			rxr->lmp = NULL;
5179 		}
5180 
5181 next_desc:
5182 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5183 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5184 
5185 		/* Advance our pointers to the next descriptor. */
5186 		if (++i == adapter->num_rx_desc)
5187 			i = 0;
5188 		/*
5189 		** Send to the stack or LRO
5190 		*/
5191 		if (sendmp != NULL) {
5192 			rxr->next_to_check = i;
5193 			igb_rx_input(rxr, ifp, sendmp, ptype);
5194 			i = rxr->next_to_check;
5195 			rxdone++;
5196 		}
5197 
5198 		/* Every 8 descriptors we go to refresh mbufs */
5199 		if (processed == 8) {
5200                         igb_refresh_mbufs(rxr, i);
5201                         processed = 0;
5202 		}
5203 	}
5204 
5205 	/* Catch any remainders */
5206 	if (igb_rx_unrefreshed(rxr))
5207 		igb_refresh_mbufs(rxr, i);
5208 
5209 	rxr->next_to_check = i;
5210 
5211 	/*
5212 	 * Flush any outstanding LRO work
5213 	 */
5214 	tcp_lro_flush_all(lro);
5215 
5216 	if (done != NULL)
5217 		*done += rxdone;
5218 
5219 	IGB_RX_UNLOCK(rxr);
5220 	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5221 }
5222 
5223 /*********************************************************************
5224  *
5225  *  Verify that the hardware indicated that the checksum is valid.
5226  *  Inform the stack about the status of checksum so that stack
5227  *  doesn't spend time verifying the checksum.
5228  *
5229  *********************************************************************/
5230 static void
5231 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5232 {
5233 	u16 status = (u16)staterr;
5234 	u8  errors = (u8) (staterr >> 24);
5235 	int sctp;
5236 
5237 	/* Ignore Checksum bit is set */
5238 	if (status & E1000_RXD_STAT_IXSM) {
5239 		mp->m_pkthdr.csum_flags = 0;
5240 		return;
5241 	}
5242 
5243 	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5244 	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5245 		sctp = 1;
5246 	else
5247 		sctp = 0;
5248 	if (status & E1000_RXD_STAT_IPCS) {
5249 		/* Did it pass? */
5250 		if (!(errors & E1000_RXD_ERR_IPE)) {
5251 			/* IP Checksum Good */
5252 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5253 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5254 		} else
5255 			mp->m_pkthdr.csum_flags = 0;
5256 	}
5257 
5258 	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5259 		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5260 #if __FreeBSD_version >= 800000
5261 		if (sctp) /* reassign */
5262 			type = CSUM_SCTP_VALID;
5263 #endif
5264 		/* Did it pass? */
5265 		if (!(errors & E1000_RXD_ERR_TCPE)) {
5266 			mp->m_pkthdr.csum_flags |= type;
5267 			if (sctp == 0)
5268 				mp->m_pkthdr.csum_data = htons(0xffff);
5269 		}
5270 	}
5271 	return;
5272 }
5273 
5274 /*
5275  * This routine is run via an vlan
5276  * config EVENT
5277  */
5278 static void
5279 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5280 {
5281 	struct adapter	*adapter = ifp->if_softc;
5282 	u32		index, bit;
5283 
5284 	if (ifp->if_softc !=  arg)   /* Not our event */
5285 		return;
5286 
5287 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5288                 return;
5289 
5290 	IGB_CORE_LOCK(adapter);
5291 	index = (vtag >> 5) & 0x7F;
5292 	bit = vtag & 0x1F;
5293 	adapter->shadow_vfta[index] |= (1 << bit);
5294 	++adapter->num_vlans;
5295 	/* Change hw filter setting */
5296 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5297 		igb_setup_vlan_hw_support(adapter);
5298 	IGB_CORE_UNLOCK(adapter);
5299 }
5300 
5301 /*
5302  * This routine is run via an vlan
5303  * unconfig EVENT
5304  */
5305 static void
5306 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5307 {
5308 	struct adapter	*adapter = ifp->if_softc;
5309 	u32		index, bit;
5310 
5311 	if (ifp->if_softc !=  arg)
5312 		return;
5313 
5314 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5315                 return;
5316 
5317 	IGB_CORE_LOCK(adapter);
5318 	index = (vtag >> 5) & 0x7F;
5319 	bit = vtag & 0x1F;
5320 	adapter->shadow_vfta[index] &= ~(1 << bit);
5321 	--adapter->num_vlans;
5322 	/* Change hw filter setting */
5323 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5324 		igb_setup_vlan_hw_support(adapter);
5325 	IGB_CORE_UNLOCK(adapter);
5326 }
5327 
5328 static void
5329 igb_setup_vlan_hw_support(struct adapter *adapter)
5330 {
5331 	struct e1000_hw *hw = &adapter->hw;
5332 	struct ifnet	*ifp = adapter->ifp;
5333 	u32             reg;
5334 
5335 	if (adapter->vf_ifp) {
5336 		e1000_rlpml_set_vf(hw,
5337 		    adapter->max_frame_size + VLAN_TAG_SIZE);
5338 		return;
5339 	}
5340 
5341 	reg = E1000_READ_REG(hw, E1000_CTRL);
5342 	reg |= E1000_CTRL_VME;
5343 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5344 
5345 	/* Enable the Filter Table */
5346 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5347 		reg = E1000_READ_REG(hw, E1000_RCTL);
5348 		reg &= ~E1000_RCTL_CFIEN;
5349 		reg |= E1000_RCTL_VFE;
5350 		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5351 	}
5352 
5353 	/* Update the frame size */
5354 	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5355 	    adapter->max_frame_size + VLAN_TAG_SIZE);
5356 
5357 	/* Don't bother with table if no vlans */
5358 	if ((adapter->num_vlans == 0) ||
5359 	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5360                 return;
5361 	/*
5362 	** A soft reset zero's out the VFTA, so
5363 	** we need to repopulate it now.
5364 	*/
5365 	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5366                 if (adapter->shadow_vfta[i] != 0) {
5367 			if (adapter->vf_ifp)
5368 				e1000_vfta_set_vf(hw,
5369 				    adapter->shadow_vfta[i], TRUE);
5370 			else
5371 				e1000_write_vfta(hw,
5372 				    i, adapter->shadow_vfta[i]);
5373 		}
5374 }
5375 
5376 static void
5377 igb_enable_intr(struct adapter *adapter)
5378 {
5379 	/* With RSS set up what to auto clear */
5380 	if (adapter->msix_mem) {
5381 		u32 mask = (adapter->que_mask | adapter->link_mask);
5382 		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5383 		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5384 		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5385 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5386 		    E1000_IMS_LSC);
5387 	} else {
5388 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5389 		    IMS_ENABLE_MASK);
5390 	}
5391 	E1000_WRITE_FLUSH(&adapter->hw);
5392 
5393 	return;
5394 }
5395 
5396 static void
5397 igb_disable_intr(struct adapter *adapter)
5398 {
5399 	if (adapter->msix_mem) {
5400 		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5401 		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5402 	}
5403 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5404 	E1000_WRITE_FLUSH(&adapter->hw);
5405 	return;
5406 }
5407 
5408 /*
5409  * Bit of a misnomer, what this really means is
5410  * to enable OS management of the system... aka
5411  * to disable special hardware management features
5412  */
5413 static void
5414 igb_init_manageability(struct adapter *adapter)
5415 {
5416 	if (adapter->has_manage) {
5417 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5418 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5419 
5420 		/* disable hardware interception of ARP */
5421 		manc &= ~(E1000_MANC_ARP_EN);
5422 
5423                 /* enable receiving management packets to the host */
5424 		manc |= E1000_MANC_EN_MNG2HOST;
5425 		manc2h |= 1 << 5;  /* Mng Port 623 */
5426 		manc2h |= 1 << 6;  /* Mng Port 664 */
5427 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5428 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5429 	}
5430 }
5431 
5432 /*
5433  * Give control back to hardware management
5434  * controller if there is one.
5435  */
5436 static void
5437 igb_release_manageability(struct adapter *adapter)
5438 {
5439 	if (adapter->has_manage) {
5440 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5441 
5442 		/* re-enable hardware interception of ARP */
5443 		manc |= E1000_MANC_ARP_EN;
5444 		manc &= ~E1000_MANC_EN_MNG2HOST;
5445 
5446 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5447 	}
5448 }
5449 
5450 /*
5451  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5452  * For ASF and Pass Through versions of f/w this means that
5453  * the driver is loaded.
5454  *
5455  */
5456 static void
5457 igb_get_hw_control(struct adapter *adapter)
5458 {
5459 	u32 ctrl_ext;
5460 
5461 	if (adapter->vf_ifp)
5462 		return;
5463 
5464 	/* Let firmware know the driver has taken over */
5465 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5466 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5467 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5468 }
5469 
5470 /*
5471  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5472  * For ASF and Pass Through versions of f/w this means that the
5473  * driver is no longer loaded.
5474  *
5475  */
5476 static void
5477 igb_release_hw_control(struct adapter *adapter)
5478 {
5479 	u32 ctrl_ext;
5480 
5481 	if (adapter->vf_ifp)
5482 		return;
5483 
5484 	/* Let firmware taken over control of h/w */
5485 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5486 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5487 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5488 }
5489 
5490 static int
5491 igb_is_valid_ether_addr(uint8_t *addr)
5492 {
5493 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5494 
5495 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5496 		return (FALSE);
5497 	}
5498 
5499 	return (TRUE);
5500 }
5501 
5502 
5503 /*
5504  * Enable PCI Wake On Lan capability
5505  */
5506 static void
5507 igb_enable_wakeup(device_t dev)
5508 {
5509 	struct adapter	*adapter = device_get_softc(dev);
5510 	struct ifnet	*ifp = adapter->ifp;
5511 	u32		pmc, ctrl, ctrl_ext, rctl, wuc;
5512 	u16		status;
5513 
5514 	if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5515 		return;
5516 
5517 	adapter->wol = E1000_READ_REG(&adapter->hw, E1000_WUFC);
5518 	if (ifp->if_capenable & IFCAP_WOL_MAGIC)
5519 		adapter->wol |=  E1000_WUFC_MAG;
5520 	else
5521 		adapter->wol &= ~E1000_WUFC_MAG;
5522 
5523 	if (ifp->if_capenable & IFCAP_WOL_MCAST) {
5524 		adapter->wol |=  E1000_WUFC_MC;
5525 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5526 		rctl |= E1000_RCTL_MPE;
5527 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5528 	} else
5529 		adapter->wol &= ~E1000_WUFC_MC;
5530 
5531 	if (ifp->if_capenable & IFCAP_WOL_UCAST)
5532 		adapter->wol |=  E1000_WUFC_EX;
5533 	else
5534 		adapter->wol &= ~E1000_WUFC_EX;
5535 
5536 	if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5537 		goto pme;
5538 
5539 	/* Advertise the wakeup capability */
5540 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5541 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5542 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5543 
5544 	/* Keep the laser running on Fiber adapters */
5545 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5546 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5547 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5548 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5549 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5550 	}
5551 
5552 	/* Enable wakeup by the MAC */
5553 	wuc = E1000_READ_REG(&adapter->hw, E1000_WUC);
5554 	wuc |= E1000_WUC_PME_EN | E1000_WUC_APME;
5555 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, wuc);
5556 	E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5557 
5558 pme:
5559 	status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5560 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5561 	if (ifp->if_capenable & IFCAP_WOL)
5562 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5563 	pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5564 }
5565 
5566 static void
5567 igb_led_func(void *arg, int onoff)
5568 {
5569 	struct adapter	*adapter = arg;
5570 
5571 	IGB_CORE_LOCK(adapter);
5572 	if (onoff) {
5573 		e1000_setup_led(&adapter->hw);
5574 		e1000_led_on(&adapter->hw);
5575 	} else {
5576 		e1000_led_off(&adapter->hw);
5577 		e1000_cleanup_led(&adapter->hw);
5578 	}
5579 	IGB_CORE_UNLOCK(adapter);
5580 }
5581 
5582 static uint64_t
5583 igb_get_vf_counter(if_t ifp, ift_counter cnt)
5584 {
5585 	struct adapter *adapter;
5586 	struct e1000_vf_stats *stats;
5587 #ifndef IGB_LEGACY_TX
5588 	struct tx_ring *txr;
5589 	uint64_t rv;
5590 #endif
5591 
5592 	adapter = if_getsoftc(ifp);
5593 	stats = (struct e1000_vf_stats *)adapter->stats;
5594 
5595 	switch (cnt) {
5596 	case IFCOUNTER_IPACKETS:
5597 		return (stats->gprc);
5598 	case IFCOUNTER_OPACKETS:
5599 		return (stats->gptc);
5600 	case IFCOUNTER_IBYTES:
5601 		return (stats->gorc);
5602 	case IFCOUNTER_OBYTES:
5603 		return (stats->gotc);
5604 	case IFCOUNTER_IMCASTS:
5605 		return (stats->mprc);
5606 	case IFCOUNTER_IERRORS:
5607 		return (adapter->dropped_pkts);
5608 	case IFCOUNTER_OERRORS:
5609 		return (adapter->watchdog_events);
5610 #ifndef IGB_LEGACY_TX
5611 	case IFCOUNTER_OQDROPS:
5612 		rv = 0;
5613 		txr = adapter->tx_rings;
5614 		for (int i = 0; i < adapter->num_queues; i++, txr++)
5615 			rv += txr->br->br_drops;
5616 		return (rv);
5617 #endif
5618 	default:
5619 		return (if_get_counter_default(ifp, cnt));
5620 	}
5621 }
5622 
5623 static uint64_t
5624 igb_get_counter(if_t ifp, ift_counter cnt)
5625 {
5626 	struct adapter *adapter;
5627 	struct e1000_hw_stats *stats;
5628 #ifndef IGB_LEGACY_TX
5629 	struct tx_ring *txr;
5630 	uint64_t rv;
5631 #endif
5632 
5633 	adapter = if_getsoftc(ifp);
5634 	if (adapter->vf_ifp)
5635 		return (igb_get_vf_counter(ifp, cnt));
5636 
5637 	stats = (struct e1000_hw_stats *)adapter->stats;
5638 
5639 	switch (cnt) {
5640 	case IFCOUNTER_IPACKETS:
5641 		return (stats->gprc);
5642 	case IFCOUNTER_OPACKETS:
5643 		return (stats->gptc);
5644 	case IFCOUNTER_IBYTES:
5645 		return (stats->gorc);
5646 	case IFCOUNTER_OBYTES:
5647 		return (stats->gotc);
5648 	case IFCOUNTER_IMCASTS:
5649 		return (stats->mprc);
5650 	case IFCOUNTER_OMCASTS:
5651 		return (stats->mptc);
5652 	case IFCOUNTER_IERRORS:
5653 		return (adapter->dropped_pkts + stats->rxerrc +
5654 		    stats->crcerrs + stats->algnerrc +
5655 		    stats->ruc + stats->roc + stats->cexterr);
5656 	case IFCOUNTER_OERRORS:
5657 		return (stats->ecol + stats->latecol +
5658 		    adapter->watchdog_events);
5659 	case IFCOUNTER_COLLISIONS:
5660 		return (stats->colc);
5661 	case IFCOUNTER_IQDROPS:
5662 		return (stats->mpc);
5663 #ifndef IGB_LEGACY_TX
5664 	case IFCOUNTER_OQDROPS:
5665 		rv = 0;
5666 		txr = adapter->tx_rings;
5667 		for (int i = 0; i < adapter->num_queues; i++, txr++)
5668 			rv += txr->br->br_drops;
5669 		return (rv);
5670 #endif
5671 	default:
5672 		return (if_get_counter_default(ifp, cnt));
5673 	}
5674 }
5675 
5676 /**********************************************************************
5677  *
5678  *  Update the board statistics counters.
5679  *
5680  **********************************************************************/
5681 static void
5682 igb_update_stats_counters(struct adapter *adapter)
5683 {
5684         struct e1000_hw		*hw = &adapter->hw;
5685 	struct e1000_hw_stats	*stats;
5686 
5687 	/*
5688 	** The virtual function adapter has only a
5689 	** small controlled set of stats, do only
5690 	** those and return.
5691 	*/
5692 	if (adapter->vf_ifp) {
5693 		igb_update_vf_stats_counters(adapter);
5694 		return;
5695 	}
5696 
5697 	stats = (struct e1000_hw_stats	*)adapter->stats;
5698 
5699 	if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5700 	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5701 		stats->symerrs +=
5702 		    E1000_READ_REG(hw,E1000_SYMERRS);
5703 		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5704 	}
5705 
5706 	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5707 	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5708 	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5709 	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5710 
5711 	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5712 	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5713 	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5714 	stats->dc += E1000_READ_REG(hw, E1000_DC);
5715 	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5716 	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5717 	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5718 	/*
5719 	** For watchdog management we need to know if we have been
5720 	** paused during the last interval, so capture that here.
5721 	*/
5722         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5723         stats->xoffrxc += adapter->pause_frames;
5724 	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5725 	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5726 	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5727 	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5728 	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5729 	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5730 	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5731 	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5732 	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5733 	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5734 	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5735 	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5736 
5737 	/* For the 64-bit byte counters the low dword must be read first. */
5738 	/* Both registers clear on the read of the high dword */
5739 
5740 	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5741 	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5742 	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5743 	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5744 
5745 	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5746 	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5747 	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5748 	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5749 	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5750 
5751 	stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5752 	stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5753 	stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5754 
5755 	stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5756 	    ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5757 	stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5758 	    ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5759 
5760 	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5761 	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5762 	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5763 	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5764 	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5765 	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5766 	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5767 	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5768 	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5769 	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5770 
5771 	/* Interrupt Counts */
5772 
5773 	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5774 	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5775 	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5776 	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5777 	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5778 	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5779 	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5780 	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5781 	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5782 
5783 	/* Host to Card Statistics */
5784 
5785 	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5786 	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5787 	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5788 	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5789 	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5790 	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5791 	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5792 	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5793 	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5794 	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5795 	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5796 	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5797 	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5798 	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5799 
5800 	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5801 	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5802 	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5803 	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5804 	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5805 	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5806 
5807 	/* Driver specific counters */
5808 	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5809 	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5810 	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5811 	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5812 	adapter->packet_buf_alloc_tx =
5813 	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5814 	adapter->packet_buf_alloc_rx =
5815 	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5816 }
5817 
5818 
5819 /**********************************************************************
5820  *
5821  *  Initialize the VF board statistics counters.
5822  *
5823  **********************************************************************/
5824 static void
5825 igb_vf_init_stats(struct adapter *adapter)
5826 {
5827         struct e1000_hw *hw = &adapter->hw;
5828 	struct e1000_vf_stats	*stats;
5829 
5830 	stats = (struct e1000_vf_stats	*)adapter->stats;
5831 	if (stats == NULL)
5832 		return;
5833         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5834         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5835         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5836         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5837         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5838 }
5839 
5840 /**********************************************************************
5841  *
5842  *  Update the VF board statistics counters.
5843  *
5844  **********************************************************************/
5845 static void
5846 igb_update_vf_stats_counters(struct adapter *adapter)
5847 {
5848 	struct e1000_hw *hw = &adapter->hw;
5849 	struct e1000_vf_stats	*stats;
5850 
5851 	if (adapter->link_speed == 0)
5852 		return;
5853 
5854 	stats = (struct e1000_vf_stats	*)adapter->stats;
5855 
5856 	UPDATE_VF_REG(E1000_VFGPRC,
5857 	    stats->last_gprc, stats->gprc);
5858 	UPDATE_VF_REG(E1000_VFGORC,
5859 	    stats->last_gorc, stats->gorc);
5860 	UPDATE_VF_REG(E1000_VFGPTC,
5861 	    stats->last_gptc, stats->gptc);
5862 	UPDATE_VF_REG(E1000_VFGOTC,
5863 	    stats->last_gotc, stats->gotc);
5864 	UPDATE_VF_REG(E1000_VFMPRC,
5865 	    stats->last_mprc, stats->mprc);
5866 }
5867 
5868 /* Export a single 32-bit register via a read-only sysctl. */
5869 static int
5870 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5871 {
5872 	struct adapter *adapter;
5873 	u_int val;
5874 
5875 	adapter = oidp->oid_arg1;
5876 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5877 	return (sysctl_handle_int(oidp, &val, 0, req));
5878 }
5879 
5880 /*
5881 **  Tuneable interrupt rate handler
5882 */
5883 static int
5884 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5885 {
5886 	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5887 	int			error;
5888 	u32			reg, usec, rate;
5889 
5890 	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5891 	usec = ((reg & 0x7FFC) >> 2);
5892 	if (usec > 0)
5893 		rate = 1000000 / usec;
5894 	else
5895 		rate = 0;
5896 	error = sysctl_handle_int(oidp, &rate, 0, req);
5897 	if (error || !req->newptr)
5898 		return error;
5899 	return 0;
5900 }
5901 
5902 /*
5903  * Add sysctl variables, one per statistic, to the system.
5904  */
5905 static void
5906 igb_add_hw_stats(struct adapter *adapter)
5907 {
5908 	device_t dev = adapter->dev;
5909 
5910 	struct tx_ring *txr = adapter->tx_rings;
5911 	struct rx_ring *rxr = adapter->rx_rings;
5912 
5913 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5914 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5915 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5916 	struct e1000_hw_stats *stats = adapter->stats;
5917 
5918 	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5919 	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5920 
5921 #define QUEUE_NAME_LEN 32
5922 	char namebuf[QUEUE_NAME_LEN];
5923 
5924 	/* Driver Statistics */
5925 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5926 			CTLFLAG_RD, &adapter->dropped_pkts,
5927 			"Driver dropped packets");
5928 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5929 			CTLFLAG_RD, &adapter->link_irq,
5930 			"Link MSIX IRQ Handled");
5931 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5932 			CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5933 			"Defragmenting mbuf chain failed");
5934 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5935 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5936 			"Driver tx dma failure in xmit");
5937 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5938 			CTLFLAG_RD, &adapter->rx_overruns,
5939 			"RX overruns");
5940 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5941 			CTLFLAG_RD, &adapter->watchdog_events,
5942 			"Watchdog timeouts");
5943 
5944 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5945 			CTLFLAG_RD, &adapter->device_control,
5946 			"Device Control Register");
5947 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5948 			CTLFLAG_RD, &adapter->rx_control,
5949 			"Receiver Control Register");
5950 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5951 			CTLFLAG_RD, &adapter->int_mask,
5952 			"Interrupt Mask");
5953 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5954 			CTLFLAG_RD, &adapter->eint_mask,
5955 			"Extended Interrupt Mask");
5956 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5957 			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5958 			"Transmit Buffer Packet Allocation");
5959 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5960 			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5961 			"Receive Buffer Packet Allocation");
5962 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5963 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5964 			"Flow Control High Watermark");
5965 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5966 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5967 			"Flow Control Low Watermark");
5968 
5969 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5970 		struct lro_ctrl *lro = &rxr->lro;
5971 
5972 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5973 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5974 					    CTLFLAG_RD, NULL, "Queue Name");
5975 		queue_list = SYSCTL_CHILDREN(queue_node);
5976 
5977 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5978 				CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5979 				sizeof(&adapter->queues[i]),
5980 				igb_sysctl_interrupt_rate_handler,
5981 				"IU", "Interrupt Rate");
5982 
5983 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5984 				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5985 				igb_sysctl_reg_handler, "IU",
5986  				"Transmit Descriptor Head");
5987 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5988 				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5989 				igb_sysctl_reg_handler, "IU",
5990  				"Transmit Descriptor Tail");
5991 		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5992 				CTLFLAG_RD, &txr->no_desc_avail,
5993 				"Queue Descriptors Unavailable");
5994 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5995 				CTLFLAG_RD, &txr->total_packets,
5996 				"Queue Packets Transmitted");
5997 
5998 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5999 				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
6000 				igb_sysctl_reg_handler, "IU",
6001 				"Receive Descriptor Head");
6002 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
6003 				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
6004 				igb_sysctl_reg_handler, "IU",
6005 				"Receive Descriptor Tail");
6006 		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
6007 				CTLFLAG_RD, &rxr->rx_packets,
6008 				"Queue Packets Received");
6009 		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
6010 				CTLFLAG_RD, &rxr->rx_bytes,
6011 				"Queue Bytes Received");
6012 		SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued",
6013 				CTLFLAG_RD, &lro->lro_queued, 0,
6014 				"LRO Queued");
6015 		SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed",
6016 				CTLFLAG_RD, &lro->lro_flushed, 0,
6017 				"LRO Flushed");
6018 	}
6019 
6020 	/* MAC stats get their own sub node */
6021 
6022 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
6023 				    CTLFLAG_RD, NULL, "MAC Statistics");
6024 	stat_list = SYSCTL_CHILDREN(stat_node);
6025 
6026 	/*
6027 	** VF adapter has a very limited set of stats
6028 	** since its not managing the metal, so to speak.
6029 	*/
6030 	if (adapter->vf_ifp) {
6031 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6032 			CTLFLAG_RD, &stats->gprc,
6033 			"Good Packets Received");
6034 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6035 			CTLFLAG_RD, &stats->gptc,
6036 			"Good Packets Transmitted");
6037  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
6038  			CTLFLAG_RD, &stats->gorc,
6039  			"Good Octets Received");
6040  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
6041  			CTLFLAG_RD, &stats->gotc,
6042  			"Good Octets Transmitted");
6043 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6044 			CTLFLAG_RD, &stats->mprc,
6045 			"Multicast Packets Received");
6046 		return;
6047 	}
6048 
6049 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
6050 			CTLFLAG_RD, &stats->ecol,
6051 			"Excessive collisions");
6052 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
6053 			CTLFLAG_RD, &stats->scc,
6054 			"Single collisions");
6055 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
6056 			CTLFLAG_RD, &stats->mcc,
6057 			"Multiple collisions");
6058 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
6059 			CTLFLAG_RD, &stats->latecol,
6060 			"Late collisions");
6061 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
6062 			CTLFLAG_RD, &stats->colc,
6063 			"Collision Count");
6064 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
6065 			CTLFLAG_RD, &stats->symerrs,
6066 			"Symbol Errors");
6067 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
6068 			CTLFLAG_RD, &stats->sec,
6069 			"Sequence Errors");
6070 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
6071 			CTLFLAG_RD, &stats->dc,
6072 			"Defer Count");
6073 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
6074 			CTLFLAG_RD, &stats->mpc,
6075 			"Missed Packets");
6076 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
6077 			CTLFLAG_RD, &stats->rlec,
6078 			"Receive Length Errors");
6079 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
6080 			CTLFLAG_RD, &stats->rnbc,
6081 			"Receive No Buffers");
6082 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
6083 			CTLFLAG_RD, &stats->ruc,
6084 			"Receive Undersize");
6085 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
6086 			CTLFLAG_RD, &stats->rfc,
6087 			"Fragmented Packets Received");
6088 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
6089 			CTLFLAG_RD, &stats->roc,
6090 			"Oversized Packets Received");
6091 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
6092 			CTLFLAG_RD, &stats->rjc,
6093 			"Recevied Jabber");
6094 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
6095 			CTLFLAG_RD, &stats->rxerrc,
6096 			"Receive Errors");
6097 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
6098 			CTLFLAG_RD, &stats->crcerrs,
6099 			"CRC errors");
6100 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
6101 			CTLFLAG_RD, &stats->algnerrc,
6102 			"Alignment Errors");
6103 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
6104 			CTLFLAG_RD, &stats->tncrs,
6105 			"Transmit with No CRS");
6106 	/* On 82575 these are collision counts */
6107 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6108 			CTLFLAG_RD, &stats->cexterr,
6109 			"Collision/Carrier extension errors");
6110 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6111 			CTLFLAG_RD, &stats->xonrxc,
6112 			"XON Received");
6113 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6114 			CTLFLAG_RD, &stats->xontxc,
6115 			"XON Transmitted");
6116 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6117 			CTLFLAG_RD, &stats->xoffrxc,
6118 			"XOFF Received");
6119 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6120 			CTLFLAG_RD, &stats->xofftxc,
6121 			"XOFF Transmitted");
6122 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6123 			CTLFLAG_RD, &stats->fcruc,
6124 			"Unsupported Flow Control Received");
6125 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6126 			CTLFLAG_RD, &stats->mgprc,
6127 			"Management Packets Received");
6128 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6129 			CTLFLAG_RD, &stats->mgpdc,
6130 			"Management Packets Dropped");
6131 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6132 			CTLFLAG_RD, &stats->mgptc,
6133 			"Management Packets Transmitted");
6134 	/* Packet Reception Stats */
6135 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6136 			CTLFLAG_RD, &stats->tpr,
6137 			"Total Packets Received");
6138 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6139 			CTLFLAG_RD, &stats->gprc,
6140 			"Good Packets Received");
6141 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6142 			CTLFLAG_RD, &stats->bprc,
6143 			"Broadcast Packets Received");
6144 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6145 			CTLFLAG_RD, &stats->mprc,
6146 			"Multicast Packets Received");
6147 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6148 			CTLFLAG_RD, &stats->prc64,
6149 			"64 byte frames received");
6150 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6151 			CTLFLAG_RD, &stats->prc127,
6152 			"65-127 byte frames received");
6153 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6154 			CTLFLAG_RD, &stats->prc255,
6155 			"128-255 byte frames received");
6156 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6157 			CTLFLAG_RD, &stats->prc511,
6158 			"256-511 byte frames received");
6159 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6160 			CTLFLAG_RD, &stats->prc1023,
6161 			"512-1023 byte frames received");
6162 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6163 			CTLFLAG_RD, &stats->prc1522,
6164 			"1023-1522 byte frames received");
6165  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
6166  			CTLFLAG_RD, &stats->gorc,
6167 			"Good Octets Received");
6168 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd",
6169 			CTLFLAG_RD, &stats->tor,
6170 			"Total Octets Received");
6171 
6172 	/* Packet Transmission Stats */
6173  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
6174  			CTLFLAG_RD, &stats->gotc,
6175  			"Good Octets Transmitted");
6176 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd",
6177 			CTLFLAG_RD, &stats->tot,
6178 			"Total Octets Transmitted");
6179 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6180 			CTLFLAG_RD, &stats->tpt,
6181 			"Total Packets Transmitted");
6182 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6183 			CTLFLAG_RD, &stats->gptc,
6184 			"Good Packets Transmitted");
6185 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6186 			CTLFLAG_RD, &stats->bptc,
6187 			"Broadcast Packets Transmitted");
6188 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6189 			CTLFLAG_RD, &stats->mptc,
6190 			"Multicast Packets Transmitted");
6191 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6192 			CTLFLAG_RD, &stats->ptc64,
6193 			"64 byte frames transmitted");
6194 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6195 			CTLFLAG_RD, &stats->ptc127,
6196 			"65-127 byte frames transmitted");
6197 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6198 			CTLFLAG_RD, &stats->ptc255,
6199 			"128-255 byte frames transmitted");
6200 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6201 			CTLFLAG_RD, &stats->ptc511,
6202 			"256-511 byte frames transmitted");
6203 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6204 			CTLFLAG_RD, &stats->ptc1023,
6205 			"512-1023 byte frames transmitted");
6206 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6207 			CTLFLAG_RD, &stats->ptc1522,
6208 			"1024-1522 byte frames transmitted");
6209 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6210 			CTLFLAG_RD, &stats->tsctc,
6211 			"TSO Contexts Transmitted");
6212 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6213 			CTLFLAG_RD, &stats->tsctfc,
6214 			"TSO Contexts Failed");
6215 
6216 
6217 	/* Interrupt Stats */
6218 
6219 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
6220 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
6221 	int_list = SYSCTL_CHILDREN(int_node);
6222 
6223 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6224 			CTLFLAG_RD, &stats->iac,
6225 			"Interrupt Assertion Count");
6226 
6227 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6228 			CTLFLAG_RD, &stats->icrxptc,
6229 			"Interrupt Cause Rx Pkt Timer Expire Count");
6230 
6231 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6232 			CTLFLAG_RD, &stats->icrxatc,
6233 			"Interrupt Cause Rx Abs Timer Expire Count");
6234 
6235 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6236 			CTLFLAG_RD, &stats->ictxptc,
6237 			"Interrupt Cause Tx Pkt Timer Expire Count");
6238 
6239 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6240 			CTLFLAG_RD, &stats->ictxatc,
6241 			"Interrupt Cause Tx Abs Timer Expire Count");
6242 
6243 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6244 			CTLFLAG_RD, &stats->ictxqec,
6245 			"Interrupt Cause Tx Queue Empty Count");
6246 
6247 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6248 			CTLFLAG_RD, &stats->ictxqmtc,
6249 			"Interrupt Cause Tx Queue Min Thresh Count");
6250 
6251 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6252 			CTLFLAG_RD, &stats->icrxdmtc,
6253 			"Interrupt Cause Rx Desc Min Thresh Count");
6254 
6255 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6256 			CTLFLAG_RD, &stats->icrxoc,
6257 			"Interrupt Cause Receiver Overrun Count");
6258 
6259 	/* Host to Card Stats */
6260 
6261 	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
6262 				    CTLFLAG_RD, NULL,
6263 				    "Host to Card Statistics");
6264 
6265 	host_list = SYSCTL_CHILDREN(host_node);
6266 
6267 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6268 			CTLFLAG_RD, &stats->cbtmpc,
6269 			"Circuit Breaker Tx Packet Count");
6270 
6271 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6272 			CTLFLAG_RD, &stats->htdpmc,
6273 			"Host Transmit Discarded Packets");
6274 
6275 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6276 			CTLFLAG_RD, &stats->rpthc,
6277 			"Rx Packets To Host");
6278 
6279 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6280 			CTLFLAG_RD, &stats->cbrmpc,
6281 			"Circuit Breaker Rx Packet Count");
6282 
6283 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6284 			CTLFLAG_RD, &stats->cbrdpc,
6285 			"Circuit Breaker Rx Dropped Count");
6286 
6287 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6288 			CTLFLAG_RD, &stats->hgptc,
6289 			"Host Good Packets Tx Count");
6290 
6291 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6292 			CTLFLAG_RD, &stats->htcbdpc,
6293 			"Host Tx Circuit Breaker Dropped Count");
6294 
6295 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6296 			CTLFLAG_RD, &stats->hgorc,
6297 			"Host Good Octets Received Count");
6298 
6299 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6300 			CTLFLAG_RD, &stats->hgotc,
6301 			"Host Good Octets Transmit Count");
6302 
6303 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6304 			CTLFLAG_RD, &stats->lenerrs,
6305 			"Length Errors");
6306 
6307 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6308 			CTLFLAG_RD, &stats->scvpc,
6309 			"SerDes/SGMII Code Violation Pkt Count");
6310 
6311 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6312 			CTLFLAG_RD, &stats->hrmpc,
6313 			"Header Redirection Missed Packet Count");
6314 }
6315 
6316 
6317 /**********************************************************************
6318  *
6319  *  This routine provides a way to dump out the adapter eeprom,
6320  *  often a useful debug/service tool. This only dumps the first
6321  *  32 words, stuff that matters is in that extent.
6322  *
6323  **********************************************************************/
6324 static int
6325 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6326 {
6327 	struct adapter *adapter;
6328 	int error;
6329 	int result;
6330 
6331 	result = -1;
6332 	error = sysctl_handle_int(oidp, &result, 0, req);
6333 
6334 	if (error || !req->newptr)
6335 		return (error);
6336 
6337 	/*
6338 	 * This value will cause a hex dump of the
6339 	 * first 32 16-bit words of the EEPROM to
6340 	 * the screen.
6341 	 */
6342 	if (result == 1) {
6343 		adapter = (struct adapter *)arg1;
6344 		igb_print_nvm_info(adapter);
6345         }
6346 
6347 	return (error);
6348 }
6349 
6350 static void
6351 igb_print_nvm_info(struct adapter *adapter)
6352 {
6353 	u16	eeprom_data;
6354 	int	i, j, row = 0;
6355 
6356 	/* Its a bit crude, but it gets the job done */
6357 	printf("\nInterface EEPROM Dump:\n");
6358 	printf("Offset\n0x0000  ");
6359 	for (i = 0, j = 0; i < 32; i++, j++) {
6360 		if (j == 8) { /* Make the offset block */
6361 			j = 0; ++row;
6362 			printf("\n0x00%x0  ",row);
6363 		}
6364 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6365 		printf("%04x ", eeprom_data);
6366 	}
6367 	printf("\n");
6368 }
6369 
6370 static void
6371 igb_set_sysctl_value(struct adapter *adapter, const char *name,
6372 	const char *description, int *limit, int value)
6373 {
6374 	*limit = value;
6375 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6376 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6377 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6378 }
6379 
6380 /*
6381 ** Set flow control using sysctl:
6382 ** Flow control values:
6383 ** 	0 - off
6384 **	1 - rx pause
6385 **	2 - tx pause
6386 **	3 - full
6387 */
6388 static int
6389 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6390 {
6391 	int		error;
6392 	static int	input = 3; /* default is full */
6393 	struct adapter	*adapter = (struct adapter *) arg1;
6394 
6395 	error = sysctl_handle_int(oidp, &input, 0, req);
6396 
6397 	if ((error) || (req->newptr == NULL))
6398 		return (error);
6399 
6400 	switch (input) {
6401 		case e1000_fc_rx_pause:
6402 		case e1000_fc_tx_pause:
6403 		case e1000_fc_full:
6404 		case e1000_fc_none:
6405 			adapter->hw.fc.requested_mode = input;
6406 			adapter->fc = input;
6407 			break;
6408 		default:
6409 			/* Do nothing */
6410 			return (error);
6411 	}
6412 
6413 	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6414 	e1000_force_mac_fc(&adapter->hw);
6415 	/* XXX TODO: update DROP_EN on each RX queue if appropriate */
6416 	return (error);
6417 }
6418 
6419 /*
6420 ** Manage DMA Coalesce:
6421 ** Control values:
6422 ** 	0/1 - off/on
6423 **	Legal timer values are:
6424 **	250,500,1000-10000 in thousands
6425 */
6426 static int
6427 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6428 {
6429 	struct adapter *adapter = (struct adapter *) arg1;
6430 	int		error;
6431 
6432 	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6433 
6434 	if ((error) || (req->newptr == NULL))
6435 		return (error);
6436 
6437 	switch (adapter->dmac) {
6438 		case 0:
6439 			/* Disabling */
6440 			break;
6441 		case 1: /* Just enable and use default */
6442 			adapter->dmac = 1000;
6443 			break;
6444 		case 250:
6445 		case 500:
6446 		case 1000:
6447 		case 2000:
6448 		case 3000:
6449 		case 4000:
6450 		case 5000:
6451 		case 6000:
6452 		case 7000:
6453 		case 8000:
6454 		case 9000:
6455 		case 10000:
6456 			/* Legal values - allow */
6457 			break;
6458 		default:
6459 			/* Do nothing, illegal value */
6460 			adapter->dmac = 0;
6461 			return (EINVAL);
6462 	}
6463 	/* Reinit the interface */
6464 	igb_init(adapter);
6465 	return (error);
6466 }
6467 
6468 /*
6469 ** Manage Energy Efficient Ethernet:
6470 ** Control values:
6471 **     0/1 - enabled/disabled
6472 */
6473 static int
6474 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6475 {
6476 	struct adapter	*adapter = (struct adapter *) arg1;
6477 	int		error, value;
6478 
6479 	value = adapter->hw.dev_spec._82575.eee_disable;
6480 	error = sysctl_handle_int(oidp, &value, 0, req);
6481 	if (error || req->newptr == NULL)
6482 		return (error);
6483 	IGB_CORE_LOCK(adapter);
6484 	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6485 	igb_init_locked(adapter);
6486 	IGB_CORE_UNLOCK(adapter);
6487 	return (0);
6488 }
6489