xref: /NextBSD/sys/dev/e1000/if_igb.c (revision 4557fabb34e865d7f40be64b39c9e34fa41dbb60)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "opt_rss.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #include "opt_altq.h"
43 #endif
44 
45 #include "if_igb.h"
46 
47 /*********************************************************************
48  *  Driver version:
49  *********************************************************************/
50 char igb_driver_version[] = "2.5.2";
51 
52 
53 /*********************************************************************
54  *  PCI Device ID Table
55  *
56  *  Used by probe to select devices to load on
57  *  Last field stores an index into e1000_strings
58  *  Last entry must be all 0s
59  *
60  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
61  *********************************************************************/
62 
63 static igb_vendor_info_t igb_vendor_info_array[] =
64 {
65 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
66 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
67 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
68 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
69 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
70 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
71 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER,	0, 0, 0},
72 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
73 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
74 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
75 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
76 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
77 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
78 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER,	0, 0, 0},
79 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
80 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII,	0, 0, 0},
81 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
82 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
83 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
84 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
85 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
86 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
87 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER,	0, 0, 0},
88 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER,	0, 0, 0},
89 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES,	0, 0, 0},
90 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII,	0, 0, 0},
91 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
92 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER,	0, 0, 0},
93 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
94 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
95 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
96 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
97 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER,	0, 0, 0},
98 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES,	0, 0, 0},
99 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII,	0, 0, 0},
100 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER,	0, 0, 0},
101 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
102 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
103 	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII,	0, 0, 0},
104 	/* required last entry */
105 	{0, 0, 0, 0, 0}
106 };
107 
108 /*********************************************************************
109  *  Table of branding strings for all supported NICs.
110  *********************************************************************/
111 
112 static char *igb_strings[] = {
113 	"Intel(R) PRO/1000 Network Connection"
114 };
115 
116 /*********************************************************************
117  *  Function prototypes
118  *********************************************************************/
119 static int	igb_probe(device_t);
120 static int	igb_attach(device_t);
121 static int	igb_detach(device_t);
122 static int	igb_shutdown(device_t);
123 static int	igb_suspend(device_t);
124 static int	igb_resume(device_t);
125 #ifndef IGB_LEGACY_TX
126 static int	igb_mq_start(struct ifnet *, struct mbuf *);
127 static int	igb_mq_start_locked(struct ifnet *, struct tx_ring *);
128 static void	igb_qflush(struct ifnet *);
129 static void	igb_deferred_mq_start(void *, int);
130 #else
131 static void	igb_start(struct ifnet *);
132 static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
133 #endif
134 static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
135 static uint64_t	igb_get_counter(if_t, ift_counter);
136 static void	igb_init(void *);
137 static void	igb_init_locked(struct adapter *);
138 static void	igb_stop(void *);
139 static void	igb_media_status(struct ifnet *, struct ifmediareq *);
140 static int	igb_media_change(struct ifnet *);
141 static void	igb_identify_hardware(struct adapter *);
142 static int	igb_allocate_pci_resources(struct adapter *);
143 static int	igb_allocate_msix(struct adapter *);
144 static int	igb_allocate_legacy(struct adapter *);
145 static int	igb_setup_msix(struct adapter *);
146 static void	igb_free_pci_resources(struct adapter *);
147 static void	igb_local_timer(void *);
148 static void	igb_reset(struct adapter *);
149 static int	igb_setup_interface(device_t, struct adapter *);
150 static int	igb_allocate_queues(struct adapter *);
151 static void	igb_configure_queues(struct adapter *);
152 
153 static int	igb_allocate_transmit_buffers(struct tx_ring *);
154 static void	igb_setup_transmit_structures(struct adapter *);
155 static void	igb_setup_transmit_ring(struct tx_ring *);
156 static void	igb_initialize_transmit_units(struct adapter *);
157 static void	igb_free_transmit_structures(struct adapter *);
158 static void	igb_free_transmit_buffers(struct tx_ring *);
159 
160 static int	igb_allocate_receive_buffers(struct rx_ring *);
161 static int	igb_setup_receive_structures(struct adapter *);
162 static int	igb_setup_receive_ring(struct rx_ring *);
163 static void	igb_initialize_receive_units(struct adapter *);
164 static void	igb_free_receive_structures(struct adapter *);
165 static void	igb_free_receive_buffers(struct rx_ring *);
166 static void	igb_free_receive_ring(struct rx_ring *);
167 
168 static void	igb_enable_intr(struct adapter *);
169 static void	igb_disable_intr(struct adapter *);
170 static void	igb_update_stats_counters(struct adapter *);
171 static bool	igb_txeof(struct tx_ring *);
172 
173 static __inline	void igb_rx_discard(struct rx_ring *, int);
174 static __inline void igb_rx_input(struct rx_ring *,
175 		    struct ifnet *, struct mbuf *, u32);
176 
177 static bool	igb_rxeof(struct igb_queue *, int, int *);
178 static void	igb_rx_checksum(u32, struct mbuf *, u32);
179 static int	igb_tx_ctx_setup(struct tx_ring *,
180 		    struct mbuf *, u32 *, u32 *);
181 static int	igb_tso_setup(struct tx_ring *,
182 		    struct mbuf *, u32 *, u32 *);
183 static void	igb_set_promisc(struct adapter *);
184 static void	igb_disable_promisc(struct adapter *);
185 static void	igb_set_multi(struct adapter *);
186 static void	igb_update_link_status(struct adapter *);
187 static void	igb_refresh_mbufs(struct rx_ring *, int);
188 
189 static void	igb_register_vlan(void *, struct ifnet *, u16);
190 static void	igb_unregister_vlan(void *, struct ifnet *, u16);
191 static void	igb_setup_vlan_hw_support(struct adapter *);
192 
193 static int	igb_xmit(struct tx_ring *, struct mbuf **);
194 static int	igb_dma_malloc(struct adapter *, bus_size_t,
195 		    struct igb_dma_alloc *, int);
196 static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
197 static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
198 static void	igb_print_nvm_info(struct adapter *);
199 static int 	igb_is_valid_ether_addr(u8 *);
200 static void     igb_add_hw_stats(struct adapter *);
201 
202 static void	igb_vf_init_stats(struct adapter *);
203 static void	igb_update_vf_stats_counters(struct adapter *);
204 
205 /* Management and WOL Support */
206 static void	igb_init_manageability(struct adapter *);
207 static void	igb_release_manageability(struct adapter *);
208 static void     igb_get_hw_control(struct adapter *);
209 static void     igb_release_hw_control(struct adapter *);
210 static void     igb_enable_wakeup(device_t);
211 static void     igb_led_func(void *, int);
212 
213 static int	igb_irq_fast(void *);
214 static void	igb_msix_que(void *);
215 static void	igb_msix_link(void *);
216 static void	igb_handle_que(void *context, int pending);
217 static void	igb_handle_link(void *context, int pending);
218 static void	igb_handle_link_locked(struct adapter *);
219 
220 static void	igb_set_sysctl_value(struct adapter *, const char *,
221 		    const char *, int *, int);
222 static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
223 static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
224 static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
225 
226 #ifdef DEVICE_POLLING
227 static poll_handler_t igb_poll;
228 #endif /* POLLING */
229 
230 /*********************************************************************
231  *  FreeBSD Device Interface Entry Points
232  *********************************************************************/
233 
234 static device_method_t igb_methods[] = {
235 	/* Device interface */
236 	DEVMETHOD(device_probe, igb_probe),
237 	DEVMETHOD(device_attach, igb_attach),
238 	DEVMETHOD(device_detach, igb_detach),
239 	DEVMETHOD(device_shutdown, igb_shutdown),
240 	DEVMETHOD(device_suspend, igb_suspend),
241 	DEVMETHOD(device_resume, igb_resume),
242 	DEVMETHOD_END
243 };
244 
245 static driver_t igb_driver = {
246 	"igb", igb_methods, sizeof(struct adapter),
247 };
248 
249 static devclass_t igb_devclass;
250 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
251 MODULE_DEPEND(igb, pci, 1, 1, 1);
252 MODULE_DEPEND(igb, ether, 1, 1, 1);
253 #ifdef DEV_NETMAP
254 MODULE_DEPEND(igb, netmap, 1, 1, 1);
255 #endif /* DEV_NETMAP */
256 
257 /*********************************************************************
258  *  Tunable default values.
259  *********************************************************************/
260 
261 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
262 
263 /* Descriptor defaults */
264 static int igb_rxd = IGB_DEFAULT_RXD;
265 static int igb_txd = IGB_DEFAULT_TXD;
266 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
267     "Number of receive descriptors per queue");
268 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
269     "Number of transmit descriptors per queue");
270 
271 /*
272 ** AIM: Adaptive Interrupt Moderation
273 ** which means that the interrupt rate
274 ** is varied over time based on the
275 ** traffic for that interrupt vector
276 */
277 static int igb_enable_aim = TRUE;
278 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
279     "Enable adaptive interrupt moderation");
280 
281 /*
282  * MSIX should be the default for best performance,
283  * but this allows it to be forced off for testing.
284  */
285 static int igb_enable_msix = 1;
286 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
287     "Enable MSI-X interrupts");
288 
289 /*
290 ** Tuneable Interrupt rate
291 */
292 static int igb_max_interrupt_rate = 8000;
293 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295 
296 #ifndef IGB_LEGACY_TX
297 /*
298 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
299 */
300 static int igb_buf_ring_size = IGB_BR_SIZE;
301 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
302     &igb_buf_ring_size, 0, "Size of the bufring");
303 #endif
304 
305 /*
306 ** Header split causes the packet header to
307 ** be dma'd to a seperate mbuf from the payload.
308 ** this can have memory alignment benefits. But
309 ** another plus is that small packets often fit
310 ** into the header and thus use no cluster. Its
311 ** a very workload dependent type feature.
312 */
313 static int igb_header_split = FALSE;
314 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
315     "Enable receive mbuf header split");
316 
317 /*
318 ** This will autoconfigure based on the
319 ** number of CPUs and max supported
320 ** MSIX messages if left at 0.
321 */
322 static int igb_num_queues = 0;
323 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
324     "Number of queues to configure, 0 indicates autoconfigure");
325 
326 /*
327 ** Global variable to store last used CPU when binding queues
328 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
329 ** queue is bound to a cpu.
330 */
331 static int igb_last_bind_cpu = -1;
332 
333 /* How many packets rxeof tries to clean at a time */
334 static int igb_rx_process_limit = 100;
335 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
336     &igb_rx_process_limit, 0,
337     "Maximum number of received packets to process at a time, -1 means unlimited");
338 
339 /* How many packets txeof tries to clean at a time */
340 static int igb_tx_process_limit = -1;
341 SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
342     &igb_tx_process_limit, 0,
343     "Maximum number of sent packets to process at a time, -1 means unlimited");
344 
345 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
346 #include <dev/netmap/if_igb_netmap.h>
347 #endif /* DEV_NETMAP */
348 /*********************************************************************
349  *  Device identification routine
350  *
351  *  igb_probe determines if the driver should be loaded on
352  *  adapter based on PCI vendor/device id of the adapter.
353  *
354  *  return BUS_PROBE_DEFAULT on success, positive on failure
355  *********************************************************************/
356 
357 static int
igb_probe(device_t dev)358 igb_probe(device_t dev)
359 {
360 	char		adapter_name[256];
361 	uint16_t	pci_vendor_id = 0;
362 	uint16_t	pci_device_id = 0;
363 	uint16_t	pci_subvendor_id = 0;
364 	uint16_t	pci_subdevice_id = 0;
365 	igb_vendor_info_t *ent;
366 
367 	INIT_DEBUGOUT("igb_probe: begin");
368 
369 	pci_vendor_id = pci_get_vendor(dev);
370 	if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
371 		return (ENXIO);
372 
373 	pci_device_id = pci_get_device(dev);
374 	pci_subvendor_id = pci_get_subvendor(dev);
375 	pci_subdevice_id = pci_get_subdevice(dev);
376 
377 	ent = igb_vendor_info_array;
378 	while (ent->vendor_id != 0) {
379 		if ((pci_vendor_id == ent->vendor_id) &&
380 		    (pci_device_id == ent->device_id) &&
381 
382 		    ((pci_subvendor_id == ent->subvendor_id) ||
383 		    (ent->subvendor_id == 0)) &&
384 
385 		    ((pci_subdevice_id == ent->subdevice_id) ||
386 		    (ent->subdevice_id == 0))) {
387 			sprintf(adapter_name, "%s, Version - %s",
388 				igb_strings[ent->index],
389 				igb_driver_version);
390 			device_set_desc_copy(dev, adapter_name);
391 			return (BUS_PROBE_DEFAULT);
392 		}
393 		ent++;
394 	}
395 	return (ENXIO);
396 }
397 
398 /*********************************************************************
399  *  Device initialization routine
400  *
401  *  The attach entry point is called when the driver is being loaded.
402  *  This routine identifies the type of hardware, allocates all resources
403  *  and initializes the hardware.
404  *
405  *  return 0 on success, positive on failure
406  *********************************************************************/
407 
408 static int
igb_attach(device_t dev)409 igb_attach(device_t dev)
410 {
411 	struct adapter	*adapter;
412 	int		error = 0;
413 	u16		eeprom_data;
414 
415 	INIT_DEBUGOUT("igb_attach: begin");
416 
417 	if (resource_disabled("igb", device_get_unit(dev))) {
418 		device_printf(dev, "Disabled by device hint\n");
419 		return (ENXIO);
420 	}
421 
422 	adapter = device_get_softc(dev);
423 	adapter->dev = adapter->osdep.dev = dev;
424 	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
425 
426 	/* SYSCTLs */
427 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
428 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
429 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
430 	    igb_sysctl_nvm_info, "I", "NVM Information");
431 
432 	igb_set_sysctl_value(adapter, "enable_aim",
433 	    "Interrupt Moderation", &adapter->enable_aim,
434 	    igb_enable_aim);
435 
436 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
437 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
438 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
439 	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
440 
441 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
442 
443 	/* Determine hardware and mac info */
444 	igb_identify_hardware(adapter);
445 
446 	/* Setup PCI resources */
447 	if (igb_allocate_pci_resources(adapter)) {
448 		device_printf(dev, "Allocation of PCI resources failed\n");
449 		error = ENXIO;
450 		goto err_pci;
451 	}
452 
453 	/* Do Shared Code initialization */
454 	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
455 		device_printf(dev, "Setup of Shared code failed\n");
456 		error = ENXIO;
457 		goto err_pci;
458 	}
459 
460 	e1000_get_bus_info(&adapter->hw);
461 
462 	/* Sysctls for limiting the amount of work done in the taskqueues */
463 	igb_set_sysctl_value(adapter, "rx_processing_limit",
464 	    "max number of rx packets to process",
465 	    &adapter->rx_process_limit, igb_rx_process_limit);
466 
467 	igb_set_sysctl_value(adapter, "tx_processing_limit",
468 	    "max number of tx packets to process",
469 	    &adapter->tx_process_limit, igb_tx_process_limit);
470 
471 	/*
472 	 * Validate number of transmit and receive descriptors. It
473 	 * must not exceed hardware maximum, and must be multiple
474 	 * of E1000_DBA_ALIGN.
475 	 */
476 	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
477 	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
478 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
479 		    IGB_DEFAULT_TXD, igb_txd);
480 		adapter->num_tx_desc = IGB_DEFAULT_TXD;
481 	} else
482 		adapter->num_tx_desc = igb_txd;
483 	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
484 	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
485 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
486 		    IGB_DEFAULT_RXD, igb_rxd);
487 		adapter->num_rx_desc = IGB_DEFAULT_RXD;
488 	} else
489 		adapter->num_rx_desc = igb_rxd;
490 
491 	adapter->hw.mac.autoneg = DO_AUTO_NEG;
492 	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
493 	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
494 
495 	/* Copper options */
496 	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
497 		adapter->hw.phy.mdix = AUTO_ALL_MODES;
498 		adapter->hw.phy.disable_polarity_correction = FALSE;
499 		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
500 	}
501 
502 	/*
503 	 * Set the frame limits assuming
504 	 * standard ethernet sized frames.
505 	 */
506 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
507 
508 	/*
509 	** Allocate and Setup Queues
510 	*/
511 	if (igb_allocate_queues(adapter)) {
512 		error = ENOMEM;
513 		goto err_pci;
514 	}
515 
516 	/* Allocate the appropriate stats memory */
517 	if (adapter->vf_ifp) {
518 		adapter->stats =
519 		    (struct e1000_vf_stats *)malloc(sizeof \
520 		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
521 		igb_vf_init_stats(adapter);
522 	} else
523 		adapter->stats =
524 		    (struct e1000_hw_stats *)malloc(sizeof \
525 		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526 	if (adapter->stats == NULL) {
527 		device_printf(dev, "Can not allocate stats memory\n");
528 		error = ENOMEM;
529 		goto err_late;
530 	}
531 
532 	/* Allocate multicast array memory. */
533 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
534 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535 	if (adapter->mta == NULL) {
536 		device_printf(dev, "Can not allocate multicast setup array\n");
537 		error = ENOMEM;
538 		goto err_late;
539 	}
540 
541 	/* Some adapter-specific advanced features */
542 	if (adapter->hw.mac.type >= e1000_i350) {
543 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
544 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
545 		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
546 		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
547 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549 		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
550 		    adapter, 0, igb_sysctl_eee, "I",
551 		    "Disable Energy Efficient Ethernet");
552 		if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553 			if (adapter->hw.mac.type == e1000_i354)
554 				e1000_set_eee_i354(&adapter->hw);
555 			else
556 				e1000_set_eee_i350(&adapter->hw);
557 		}
558 	}
559 
560 	/*
561 	** Start from a known state, this is
562 	** important in reading the nvm and
563 	** mac from that.
564 	*/
565 	e1000_reset_hw(&adapter->hw);
566 
567 	/* Make sure we have a good EEPROM before we read from it */
568 	if (((adapter->hw.mac.type != e1000_i210) &&
569 	    (adapter->hw.mac.type != e1000_i211)) &&
570 	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
571 		/*
572 		** Some PCI-E parts fail the first check due to
573 		** the link being in sleep state, call it again,
574 		** if it fails a second time its a real issue.
575 		*/
576 		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
577 			device_printf(dev,
578 			    "The EEPROM Checksum Is Not Valid\n");
579 			error = EIO;
580 			goto err_late;
581 		}
582 	}
583 
584 	/*
585 	** Copy the permanent MAC address out of the EEPROM
586 	*/
587 	if (e1000_read_mac_addr(&adapter->hw) < 0) {
588 		device_printf(dev, "EEPROM read error while reading MAC"
589 		    " address\n");
590 		error = EIO;
591 		goto err_late;
592 	}
593 	/* Check its sanity */
594 	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
595 		device_printf(dev, "Invalid MAC address\n");
596 		error = EIO;
597 		goto err_late;
598 	}
599 
600 	/* Setup OS specific network interface */
601 	if (igb_setup_interface(dev, adapter) != 0)
602 		goto err_late;
603 
604 	/* Now get a good starting state */
605 	igb_reset(adapter);
606 
607 	/* Initialize statistics */
608 	igb_update_stats_counters(adapter);
609 
610 	adapter->hw.mac.get_link_status = 1;
611 	igb_update_link_status(adapter);
612 
613 	/* Indicate SOL/IDER usage */
614 	if (e1000_check_reset_block(&adapter->hw))
615 		device_printf(dev,
616 		    "PHY reset is blocked due to SOL/IDER session.\n");
617 
618 	/* Determine if we have to control management hardware */
619 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
620 
621 	/*
622 	 * Setup Wake-on-Lan
623 	 */
624 	/* APME bit in EEPROM is mapped to WUC.APME */
625 	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
626 	if (eeprom_data)
627 		adapter->wol = E1000_WUFC_MAG;
628 
629 	/* Register for VLAN events */
630 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
631 	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
632 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
633 	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
634 
635 	igb_add_hw_stats(adapter);
636 
637 	/* Tell the stack that the interface is not active */
638 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
639 	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
640 
641 	adapter->led_dev = led_create(igb_led_func, adapter,
642 	    device_get_nameunit(dev));
643 
644 	/*
645 	** Configure Interrupts
646 	*/
647 	if ((adapter->msix > 1) && (igb_enable_msix))
648 		error = igb_allocate_msix(adapter);
649 	else /* MSI or Legacy */
650 		error = igb_allocate_legacy(adapter);
651 	if (error)
652 		goto err_late;
653 
654 #ifdef DEV_NETMAP
655 	igb_netmap_attach(adapter);
656 #endif /* DEV_NETMAP */
657 	INIT_DEBUGOUT("igb_attach: end");
658 
659 	return (0);
660 
661 err_late:
662 	igb_detach(dev);
663 	igb_free_transmit_structures(adapter);
664 	igb_free_receive_structures(adapter);
665 	igb_release_hw_control(adapter);
666 err_pci:
667 	igb_free_pci_resources(adapter);
668 	if (adapter->ifp != NULL)
669 		if_free(adapter->ifp);
670 	free(adapter->mta, M_DEVBUF);
671 	IGB_CORE_LOCK_DESTROY(adapter);
672 
673 	return (error);
674 }
675 
676 /*********************************************************************
677  *  Device removal routine
678  *
679  *  The detach entry point is called when the driver is being removed.
680  *  This routine stops the adapter and deallocates all the resources
681  *  that were allocated for driver operation.
682  *
683  *  return 0 on success, positive on failure
684  *********************************************************************/
685 
686 static int
igb_detach(device_t dev)687 igb_detach(device_t dev)
688 {
689 	struct adapter	*adapter = device_get_softc(dev);
690 	struct ifnet	*ifp = adapter->ifp;
691 
692 	INIT_DEBUGOUT("igb_detach: begin");
693 
694 	/* Make sure VLANS are not using driver */
695 	if (adapter->ifp->if_vlantrunk != NULL) {
696 		device_printf(dev,"Vlan in use, detach first\n");
697 		return (EBUSY);
698 	}
699 
700 	ether_ifdetach(adapter->ifp);
701 
702 	if (adapter->led_dev != NULL)
703 		led_destroy(adapter->led_dev);
704 
705 #ifdef DEVICE_POLLING
706 	if (ifp->if_capenable & IFCAP_POLLING)
707 		ether_poll_deregister(ifp);
708 #endif
709 
710 	IGB_CORE_LOCK(adapter);
711 	adapter->in_detach = 1;
712 	igb_stop(adapter);
713 	IGB_CORE_UNLOCK(adapter);
714 
715 	e1000_phy_hw_reset(&adapter->hw);
716 
717 	/* Give control back to firmware */
718 	igb_release_manageability(adapter);
719 	igb_release_hw_control(adapter);
720 
721 	if (adapter->wol) {
722 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
723 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
724 		igb_enable_wakeup(dev);
725 	}
726 
727 	/* Unregister VLAN events */
728 	if (adapter->vlan_attach != NULL)
729 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
730 	if (adapter->vlan_detach != NULL)
731 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
732 
733 	callout_drain(&adapter->timer);
734 
735 #ifdef DEV_NETMAP
736 	netmap_detach(adapter->ifp);
737 #endif /* DEV_NETMAP */
738 	igb_free_pci_resources(adapter);
739 	bus_generic_detach(dev);
740 	if_free(ifp);
741 
742 	igb_free_transmit_structures(adapter);
743 	igb_free_receive_structures(adapter);
744 	if (adapter->mta != NULL)
745 		free(adapter->mta, M_DEVBUF);
746 
747 	IGB_CORE_LOCK_DESTROY(adapter);
748 
749 	return (0);
750 }
751 
752 /*********************************************************************
753  *
754  *  Shutdown entry point
755  *
756  **********************************************************************/
757 
758 static int
igb_shutdown(device_t dev)759 igb_shutdown(device_t dev)
760 {
761 	return igb_suspend(dev);
762 }
763 
764 /*
765  * Suspend/resume device methods.
766  */
767 static int
igb_suspend(device_t dev)768 igb_suspend(device_t dev)
769 {
770 	struct adapter *adapter = device_get_softc(dev);
771 
772 	IGB_CORE_LOCK(adapter);
773 
774 	igb_stop(adapter);
775 
776         igb_release_manageability(adapter);
777 	igb_release_hw_control(adapter);
778 
779         if (adapter->wol) {
780                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
781                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
782                 igb_enable_wakeup(dev);
783         }
784 
785 	IGB_CORE_UNLOCK(adapter);
786 
787 	return bus_generic_suspend(dev);
788 }
789 
790 static int
igb_resume(device_t dev)791 igb_resume(device_t dev)
792 {
793 	struct adapter *adapter = device_get_softc(dev);
794 	struct tx_ring	*txr = adapter->tx_rings;
795 	struct ifnet *ifp = adapter->ifp;
796 
797 	IGB_CORE_LOCK(adapter);
798 	igb_init_locked(adapter);
799 	igb_init_manageability(adapter);
800 
801 	if ((ifp->if_flags & IFF_UP) &&
802 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
803 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
804 			IGB_TX_LOCK(txr);
805 #ifndef IGB_LEGACY_TX
806 			/* Process the stack queue only if not depleted */
807 			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
808 			    !drbr_empty(ifp, txr->br))
809 				igb_mq_start_locked(ifp, txr);
810 #else
811 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
812 				igb_start_locked(txr, ifp);
813 #endif
814 			IGB_TX_UNLOCK(txr);
815 		}
816 	}
817 	IGB_CORE_UNLOCK(adapter);
818 
819 	return bus_generic_resume(dev);
820 }
821 
822 
823 #ifdef IGB_LEGACY_TX
824 
825 /*********************************************************************
826  *  Transmit entry point
827  *
828  *  igb_start is called by the stack to initiate a transmit.
829  *  The driver will remain in this routine as long as there are
830  *  packets to transmit and transmit resources are available.
831  *  In case resources are not available stack is notified and
832  *  the packet is requeued.
833  **********************************************************************/
834 
835 static void
igb_start_locked(struct tx_ring * txr,struct ifnet * ifp)836 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
837 {
838 	struct adapter	*adapter = ifp->if_softc;
839 	struct mbuf	*m_head;
840 
841 	IGB_TX_LOCK_ASSERT(txr);
842 
843 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
844 	    IFF_DRV_RUNNING)
845 		return;
846 	if (!adapter->link_active)
847 		return;
848 
849 	/* Call cleanup if number of TX descriptors low */
850 	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
851 		igb_txeof(txr);
852 
853 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
854 		if (txr->tx_avail <= IGB_MAX_SCATTER) {
855 			txr->queue_status |= IGB_QUEUE_DEPLETED;
856 			break;
857 		}
858 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
859 		if (m_head == NULL)
860 			break;
861 		/*
862 		 *  Encapsulation can modify our pointer, and or make it
863 		 *  NULL on failure.  In that event, we can't requeue.
864 		 */
865 		if (igb_xmit(txr, &m_head)) {
866 			if (m_head != NULL)
867 				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
868 			if (txr->tx_avail <= IGB_MAX_SCATTER)
869 				txr->queue_status |= IGB_QUEUE_DEPLETED;
870 			break;
871 		}
872 
873 		/* Send a copy of the frame to the BPF listener */
874 		ETHER_BPF_MTAP(ifp, m_head);
875 
876 		/* Set watchdog on */
877 		txr->watchdog_time = ticks;
878 		txr->queue_status |= IGB_QUEUE_WORKING;
879 	}
880 }
881 
882 /*
883  * Legacy TX driver routine, called from the
884  * stack, always uses tx[0], and spins for it.
885  * Should not be used with multiqueue tx
886  */
887 static void
igb_start(struct ifnet * ifp)888 igb_start(struct ifnet *ifp)
889 {
890 	struct adapter	*adapter = ifp->if_softc;
891 	struct tx_ring	*txr = adapter->tx_rings;
892 
893 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
894 		IGB_TX_LOCK(txr);
895 		igb_start_locked(txr, ifp);
896 		IGB_TX_UNLOCK(txr);
897 	}
898 	return;
899 }
900 
901 #else /* ~IGB_LEGACY_TX */
902 
903 /*
904 ** Multiqueue Transmit Entry:
905 **  quick turnaround to the stack
906 **
907 */
908 static int
igb_mq_start(struct ifnet * ifp,struct mbuf * m)909 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
910 {
911 	struct adapter		*adapter = ifp->if_softc;
912 	struct igb_queue	*que;
913 	struct tx_ring		*txr;
914 	int 			i, err = 0;
915 #ifdef	RSS
916 	uint32_t		bucket_id;
917 #endif
918 
919 	/* Which queue to use */
920 	/*
921 	 * When doing RSS, map it to the same outbound queue
922 	 * as the incoming flow would be mapped to.
923 	 *
924 	 * If everything is setup correctly, it should be the
925 	 * same bucket that the current CPU we're on is.
926 	 */
927 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
928 #ifdef	RSS
929 		if (rss_hash2bucket(m->m_pkthdr.flowid,
930 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
931 			/* XXX TODO: spit out something if bucket_id > num_queues? */
932 			i = bucket_id % adapter->num_queues;
933 		} else {
934 #endif
935 			i = m->m_pkthdr.flowid % adapter->num_queues;
936 #ifdef	RSS
937 		}
938 #endif
939 	} else {
940 		i = curcpu % adapter->num_queues;
941 	}
942 	txr = &adapter->tx_rings[i];
943 	que = &adapter->queues[i];
944 
945 	err = drbr_enqueue(ifp, txr->br, m);
946 	if (err)
947 		return (err);
948 	if (IGB_TX_TRYLOCK(txr)) {
949 		igb_mq_start_locked(ifp, txr);
950 		IGB_TX_UNLOCK(txr);
951 	} else
952 		taskqueue_enqueue(que->tq, &txr->txq_task);
953 
954 	return (0);
955 }
956 
957 static int
igb_mq_start_locked(struct ifnet * ifp,struct tx_ring * txr)958 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
959 {
960 	struct adapter  *adapter = txr->adapter;
961         struct mbuf     *next;
962         int             err = 0, enq = 0;
963 
964 	IGB_TX_LOCK_ASSERT(txr);
965 
966 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
967 	    adapter->link_active == 0)
968 		return (ENETDOWN);
969 
970 	/* Process the queue */
971 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
972 		if ((err = igb_xmit(txr, &next)) != 0) {
973 			if (next == NULL) {
974 				/* It was freed, move forward */
975 				drbr_advance(ifp, txr->br);
976 			} else {
977 				/*
978 				 * Still have one left, it may not be
979 				 * the same since the transmit function
980 				 * may have changed it.
981 				 */
982 				drbr_putback(ifp, txr->br, next);
983 			}
984 			break;
985 		}
986 		drbr_advance(ifp, txr->br);
987 		enq++;
988 		if (next->m_flags & M_MCAST && adapter->vf_ifp)
989 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
990 		ETHER_BPF_MTAP(ifp, next);
991 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
992 			break;
993 	}
994 	if (enq > 0) {
995 		/* Set the watchdog */
996 		txr->queue_status |= IGB_QUEUE_WORKING;
997 		txr->watchdog_time = ticks;
998 	}
999 	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1000 		igb_txeof(txr);
1001 	if (txr->tx_avail <= IGB_MAX_SCATTER)
1002 		txr->queue_status |= IGB_QUEUE_DEPLETED;
1003 	return (err);
1004 }
1005 
1006 /*
1007  * Called from a taskqueue to drain queued transmit packets.
1008  */
1009 static void
igb_deferred_mq_start(void * arg,int pending)1010 igb_deferred_mq_start(void *arg, int pending)
1011 {
1012 	struct tx_ring *txr = arg;
1013 	struct adapter *adapter = txr->adapter;
1014 	struct ifnet *ifp = adapter->ifp;
1015 
1016 	IGB_TX_LOCK(txr);
1017 	if (!drbr_empty(ifp, txr->br))
1018 		igb_mq_start_locked(ifp, txr);
1019 	IGB_TX_UNLOCK(txr);
1020 }
1021 
1022 /*
1023 ** Flush all ring buffers
1024 */
1025 static void
igb_qflush(struct ifnet * ifp)1026 igb_qflush(struct ifnet *ifp)
1027 {
1028 	struct adapter	*adapter = ifp->if_softc;
1029 	struct tx_ring	*txr = adapter->tx_rings;
1030 	struct mbuf	*m;
1031 
1032 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1033 		IGB_TX_LOCK(txr);
1034 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1035 			m_freem(m);
1036 		IGB_TX_UNLOCK(txr);
1037 	}
1038 	if_qflush(ifp);
1039 }
1040 #endif /* ~IGB_LEGACY_TX */
1041 
1042 /*********************************************************************
1043  *  Ioctl entry point
1044  *
1045  *  igb_ioctl is called when the user wants to configure the
1046  *  interface.
1047  *
1048  *  return 0 on success, positive on failure
1049  **********************************************************************/
1050 
1051 static int
igb_ioctl(struct ifnet * ifp,u_long command,caddr_t data)1052 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1053 {
1054 	struct adapter	*adapter = ifp->if_softc;
1055 	struct ifreq	*ifr = (struct ifreq *)data;
1056 #if defined(INET) || defined(INET6)
1057 	struct ifaddr	*ifa = (struct ifaddr *)data;
1058 #endif
1059 	bool		avoid_reset = FALSE;
1060 	int		error = 0;
1061 
1062 	if (adapter->in_detach)
1063 		return (error);
1064 
1065 	switch (command) {
1066 	case SIOCSIFADDR:
1067 #ifdef INET
1068 		if (ifa->ifa_addr->sa_family == AF_INET)
1069 			avoid_reset = TRUE;
1070 #endif
1071 #ifdef INET6
1072 		if (ifa->ifa_addr->sa_family == AF_INET6)
1073 			avoid_reset = TRUE;
1074 #endif
1075 		/*
1076 		** Calling init results in link renegotiation,
1077 		** so we avoid doing it when possible.
1078 		*/
1079 		if (avoid_reset) {
1080 			ifp->if_flags |= IFF_UP;
1081 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1082 				igb_init(adapter);
1083 #ifdef INET
1084 			if (!(ifp->if_flags & IFF_NOARP))
1085 				arp_ifinit(ifp, ifa);
1086 #endif
1087 		} else
1088 			error = ether_ioctl(ifp, command, data);
1089 		break;
1090 	case SIOCSIFMTU:
1091 	    {
1092 		int max_frame_size;
1093 
1094 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1095 
1096 		IGB_CORE_LOCK(adapter);
1097 		max_frame_size = 9234;
1098 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1099 		    ETHER_CRC_LEN) {
1100 			IGB_CORE_UNLOCK(adapter);
1101 			error = EINVAL;
1102 			break;
1103 		}
1104 
1105 		ifp->if_mtu = ifr->ifr_mtu;
1106 		adapter->max_frame_size =
1107 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1108 		igb_init_locked(adapter);
1109 		IGB_CORE_UNLOCK(adapter);
1110 		break;
1111 	    }
1112 	case SIOCSIFFLAGS:
1113 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1114 		    SIOCSIFFLAGS (Set Interface Flags)");
1115 		IGB_CORE_LOCK(adapter);
1116 		if (ifp->if_flags & IFF_UP) {
1117 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1118 				if ((ifp->if_flags ^ adapter->if_flags) &
1119 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1120 					igb_disable_promisc(adapter);
1121 					igb_set_promisc(adapter);
1122 				}
1123 			} else
1124 				igb_init_locked(adapter);
1125 		} else
1126 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1127 				igb_stop(adapter);
1128 		adapter->if_flags = ifp->if_flags;
1129 		IGB_CORE_UNLOCK(adapter);
1130 		break;
1131 	case SIOCADDMULTI:
1132 	case SIOCDELMULTI:
1133 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1134 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1135 			IGB_CORE_LOCK(adapter);
1136 			igb_disable_intr(adapter);
1137 			igb_set_multi(adapter);
1138 #ifdef DEVICE_POLLING
1139 			if (!(ifp->if_capenable & IFCAP_POLLING))
1140 #endif
1141 				igb_enable_intr(adapter);
1142 			IGB_CORE_UNLOCK(adapter);
1143 		}
1144 		break;
1145 	case SIOCSIFMEDIA:
1146 		/* Check SOL/IDER usage */
1147 		IGB_CORE_LOCK(adapter);
1148 		if (e1000_check_reset_block(&adapter->hw)) {
1149 			IGB_CORE_UNLOCK(adapter);
1150 			device_printf(adapter->dev, "Media change is"
1151 			    " blocked due to SOL/IDER session.\n");
1152 			break;
1153 		}
1154 		IGB_CORE_UNLOCK(adapter);
1155 	case SIOCGIFMEDIA:
1156 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1157 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1158 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1159 		break;
1160 	case SIOCSIFCAP:
1161 	    {
1162 		int mask, reinit;
1163 
1164 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1165 		reinit = 0;
1166 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1167 #ifdef DEVICE_POLLING
1168 		if (mask & IFCAP_POLLING) {
1169 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1170 				error = ether_poll_register(igb_poll, ifp);
1171 				if (error)
1172 					return (error);
1173 				IGB_CORE_LOCK(adapter);
1174 				igb_disable_intr(adapter);
1175 				ifp->if_capenable |= IFCAP_POLLING;
1176 				IGB_CORE_UNLOCK(adapter);
1177 			} else {
1178 				error = ether_poll_deregister(ifp);
1179 				/* Enable interrupt even in error case */
1180 				IGB_CORE_LOCK(adapter);
1181 				igb_enable_intr(adapter);
1182 				ifp->if_capenable &= ~IFCAP_POLLING;
1183 				IGB_CORE_UNLOCK(adapter);
1184 			}
1185 		}
1186 #endif
1187 		if (mask & IFCAP_HWCSUM) {
1188 			ifp->if_capenable ^= IFCAP_HWCSUM;
1189 			reinit = 1;
1190 		}
1191 		if (mask & IFCAP_TSO4) {
1192 			ifp->if_capenable ^= IFCAP_TSO4;
1193 			reinit = 1;
1194 		}
1195 		if (mask & IFCAP_TSO6) {
1196 			ifp->if_capenable ^= IFCAP_TSO6;
1197 			reinit = 1;
1198 		}
1199 		if (mask & IFCAP_VLAN_HWTAGGING) {
1200 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1201 			reinit = 1;
1202 		}
1203 		if (mask & IFCAP_VLAN_HWFILTER) {
1204 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1205 			reinit = 1;
1206 		}
1207 		if (mask & IFCAP_VLAN_HWTSO) {
1208 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1209 			reinit = 1;
1210 		}
1211 		if (mask & IFCAP_LRO) {
1212 			ifp->if_capenable ^= IFCAP_LRO;
1213 			reinit = 1;
1214 		}
1215 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1216 			igb_init(adapter);
1217 		VLAN_CAPABILITIES(ifp);
1218 		break;
1219 	    }
1220 
1221 	default:
1222 		error = ether_ioctl(ifp, command, data);
1223 		break;
1224 	}
1225 
1226 	return (error);
1227 }
1228 
1229 
1230 /*********************************************************************
1231  *  Init entry point
1232  *
1233  *  This routine is used in two ways. It is used by the stack as
1234  *  init entry point in network interface structure. It is also used
1235  *  by the driver as a hw/sw initialization routine to get to a
1236  *  consistent state.
1237  *
1238  *  return 0 on success, positive on failure
1239  **********************************************************************/
1240 
1241 static void
igb_init_locked(struct adapter * adapter)1242 igb_init_locked(struct adapter *adapter)
1243 {
1244 	struct ifnet	*ifp = adapter->ifp;
1245 	device_t	dev = adapter->dev;
1246 
1247 	INIT_DEBUGOUT("igb_init: begin");
1248 
1249 	IGB_CORE_LOCK_ASSERT(adapter);
1250 
1251 	igb_disable_intr(adapter);
1252 	callout_stop(&adapter->timer);
1253 
1254 	/* Get the latest mac address, User can use a LAA */
1255         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1256               ETHER_ADDR_LEN);
1257 
1258 	/* Put the address into the Receive Address Array */
1259 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1260 
1261 	igb_reset(adapter);
1262 	igb_update_link_status(adapter);
1263 
1264 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1265 
1266 	/* Set hardware offload abilities */
1267 	ifp->if_hwassist = 0;
1268 	if (ifp->if_capenable & IFCAP_TXCSUM) {
1269 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1270 #if __FreeBSD_version >= 800000
1271 		if ((adapter->hw.mac.type == e1000_82576) ||
1272 		    (adapter->hw.mac.type == e1000_82580))
1273 			ifp->if_hwassist |= CSUM_SCTP;
1274 #endif
1275 	}
1276 
1277 	if (ifp->if_capenable & IFCAP_TSO)
1278 		ifp->if_hwassist |= CSUM_TSO;
1279 
1280 	/* Configure for OS presence */
1281 	igb_init_manageability(adapter);
1282 
1283 	/* Prepare transmit descriptors and buffers */
1284 	igb_setup_transmit_structures(adapter);
1285 	igb_initialize_transmit_units(adapter);
1286 
1287 	/* Setup Multicast table */
1288 	igb_set_multi(adapter);
1289 
1290 	/*
1291 	** Figure out the desired mbuf pool
1292 	** for doing jumbo/packetsplit
1293 	*/
1294 	if (adapter->max_frame_size <= 2048)
1295 		adapter->rx_mbuf_sz = MCLBYTES;
1296 	else if (adapter->max_frame_size <= 4096)
1297 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1298 	else
1299 		adapter->rx_mbuf_sz = MJUM9BYTES;
1300 
1301 	/* Prepare receive descriptors and buffers */
1302 	if (igb_setup_receive_structures(adapter)) {
1303 		device_printf(dev, "Could not setup receive structures\n");
1304 		return;
1305 	}
1306 	igb_initialize_receive_units(adapter);
1307 	e1000_rx_fifo_flush_82575(&adapter->hw);
1308 
1309         /* Enable VLAN support */
1310 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1311 		igb_setup_vlan_hw_support(adapter);
1312 
1313 	/* Don't lose promiscuous settings */
1314 	igb_set_promisc(adapter);
1315 
1316 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1317 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1318 
1319 	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1320 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1321 
1322 	if (adapter->msix > 1) /* Set up queue routing */
1323 		igb_configure_queues(adapter);
1324 
1325 	/* this clears any pending interrupts */
1326 	E1000_READ_REG(&adapter->hw, E1000_ICR);
1327 #ifdef DEVICE_POLLING
1328 	/*
1329 	 * Only enable interrupts if we are not polling, make sure
1330 	 * they are off otherwise.
1331 	 */
1332 	if (ifp->if_capenable & IFCAP_POLLING)
1333 		igb_disable_intr(adapter);
1334 	else
1335 #endif /* DEVICE_POLLING */
1336 	{
1337 		igb_enable_intr(adapter);
1338 		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1339 	}
1340 
1341 	/* Set Energy Efficient Ethernet */
1342 	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1343 		if (adapter->hw.mac.type == e1000_i354)
1344 			e1000_set_eee_i354(&adapter->hw);
1345 		else
1346 			e1000_set_eee_i350(&adapter->hw);
1347 	}
1348 }
1349 
1350 static void
igb_init(void * arg)1351 igb_init(void *arg)
1352 {
1353 	struct adapter *adapter = arg;
1354 
1355 	IGB_CORE_LOCK(adapter);
1356 	igb_init_locked(adapter);
1357 	IGB_CORE_UNLOCK(adapter);
1358 }
1359 
1360 
1361 static void
igb_handle_que(void * context,int pending)1362 igb_handle_que(void *context, int pending)
1363 {
1364 	struct igb_queue *que = context;
1365 	struct adapter *adapter = que->adapter;
1366 	struct tx_ring *txr = que->txr;
1367 	struct ifnet	*ifp = adapter->ifp;
1368 
1369 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1370 		bool	more;
1371 
1372 		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1373 
1374 		IGB_TX_LOCK(txr);
1375 		igb_txeof(txr);
1376 #ifndef IGB_LEGACY_TX
1377 		/* Process the stack queue only if not depleted */
1378 		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1379 		    !drbr_empty(ifp, txr->br))
1380 			igb_mq_start_locked(ifp, txr);
1381 #else
1382 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1383 			igb_start_locked(txr, ifp);
1384 #endif
1385 		IGB_TX_UNLOCK(txr);
1386 		/* Do we need another? */
1387 		if (more) {
1388 			taskqueue_enqueue(que->tq, &que->que_task);
1389 			return;
1390 		}
1391 	}
1392 
1393 #ifdef DEVICE_POLLING
1394 	if (ifp->if_capenable & IFCAP_POLLING)
1395 		return;
1396 #endif
1397 	/* Reenable this interrupt */
1398 	if (que->eims)
1399 		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1400 	else
1401 		igb_enable_intr(adapter);
1402 }
1403 
1404 /* Deal with link in a sleepable context */
1405 static void
igb_handle_link(void * context,int pending)1406 igb_handle_link(void *context, int pending)
1407 {
1408 	struct adapter *adapter = context;
1409 
1410 	IGB_CORE_LOCK(adapter);
1411 	igb_handle_link_locked(adapter);
1412 	IGB_CORE_UNLOCK(adapter);
1413 }
1414 
1415 static void
igb_handle_link_locked(struct adapter * adapter)1416 igb_handle_link_locked(struct adapter *adapter)
1417 {
1418 	struct tx_ring	*txr = adapter->tx_rings;
1419 	struct ifnet *ifp = adapter->ifp;
1420 
1421 	IGB_CORE_LOCK_ASSERT(adapter);
1422 	adapter->hw.mac.get_link_status = 1;
1423 	igb_update_link_status(adapter);
1424 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1425 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1426 			IGB_TX_LOCK(txr);
1427 #ifndef IGB_LEGACY_TX
1428 			/* Process the stack queue only if not depleted */
1429 			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1430 			    !drbr_empty(ifp, txr->br))
1431 				igb_mq_start_locked(ifp, txr);
1432 #else
1433 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1434 				igb_start_locked(txr, ifp);
1435 #endif
1436 			IGB_TX_UNLOCK(txr);
1437 		}
1438 	}
1439 }
1440 
1441 /*********************************************************************
1442  *
1443  *  MSI/Legacy Deferred
1444  *  Interrupt Service routine
1445  *
1446  *********************************************************************/
1447 static int
igb_irq_fast(void * arg)1448 igb_irq_fast(void *arg)
1449 {
1450 	struct adapter		*adapter = arg;
1451 	struct igb_queue	*que = adapter->queues;
1452 	u32			reg_icr;
1453 
1454 
1455 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1456 
1457 	/* Hot eject?  */
1458 	if (reg_icr == 0xffffffff)
1459 		return FILTER_STRAY;
1460 
1461 	/* Definitely not our interrupt.  */
1462 	if (reg_icr == 0x0)
1463 		return FILTER_STRAY;
1464 
1465 	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1466 		return FILTER_STRAY;
1467 
1468 	/*
1469 	 * Mask interrupts until the taskqueue is finished running.  This is
1470 	 * cheap, just assume that it is needed.  This also works around the
1471 	 * MSI message reordering errata on certain systems.
1472 	 */
1473 	igb_disable_intr(adapter);
1474 	taskqueue_enqueue(que->tq, &que->que_task);
1475 
1476 	/* Link status change */
1477 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1478 		taskqueue_enqueue(que->tq, &adapter->link_task);
1479 
1480 	if (reg_icr & E1000_ICR_RXO)
1481 		adapter->rx_overruns++;
1482 	return FILTER_HANDLED;
1483 }
1484 
1485 #ifdef DEVICE_POLLING
1486 #if __FreeBSD_version >= 800000
1487 #define POLL_RETURN_COUNT(a) (a)
1488 static int
1489 #else
1490 #define POLL_RETURN_COUNT(a)
1491 static void
1492 #endif
igb_poll(struct ifnet * ifp,enum poll_cmd cmd,int count)1493 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1494 {
1495 	struct adapter		*adapter = ifp->if_softc;
1496 	struct igb_queue	*que;
1497 	struct tx_ring		*txr;
1498 	u32			reg_icr, rx_done = 0;
1499 	u32			loop = IGB_MAX_LOOP;
1500 	bool			more;
1501 
1502 	IGB_CORE_LOCK(adapter);
1503 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1504 		IGB_CORE_UNLOCK(adapter);
1505 		return POLL_RETURN_COUNT(rx_done);
1506 	}
1507 
1508 	if (cmd == POLL_AND_CHECK_STATUS) {
1509 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1510 		/* Link status change */
1511 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1512 			igb_handle_link_locked(adapter);
1513 
1514 		if (reg_icr & E1000_ICR_RXO)
1515 			adapter->rx_overruns++;
1516 	}
1517 	IGB_CORE_UNLOCK(adapter);
1518 
1519 	for (int i = 0; i < adapter->num_queues; i++) {
1520 		que = &adapter->queues[i];
1521 		txr = que->txr;
1522 
1523 		igb_rxeof(que, count, &rx_done);
1524 
1525 		IGB_TX_LOCK(txr);
1526 		do {
1527 			more = igb_txeof(txr);
1528 		} while (loop-- && more);
1529 #ifndef IGB_LEGACY_TX
1530 		if (!drbr_empty(ifp, txr->br))
1531 			igb_mq_start_locked(ifp, txr);
1532 #else
1533 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1534 			igb_start_locked(txr, ifp);
1535 #endif
1536 		IGB_TX_UNLOCK(txr);
1537 	}
1538 
1539 	return POLL_RETURN_COUNT(rx_done);
1540 }
1541 #endif /* DEVICE_POLLING */
1542 
1543 /*********************************************************************
1544  *
1545  *  MSIX Que Interrupt Service routine
1546  *
1547  **********************************************************************/
1548 static void
igb_msix_que(void * arg)1549 igb_msix_que(void *arg)
1550 {
1551 	struct igb_queue *que = arg;
1552 	struct adapter *adapter = que->adapter;
1553 	struct ifnet   *ifp = adapter->ifp;
1554 	struct tx_ring *txr = que->txr;
1555 	struct rx_ring *rxr = que->rxr;
1556 	u32		newitr = 0;
1557 	bool		more_rx;
1558 
1559 	/* Ignore spurious interrupts */
1560 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1561 		return;
1562 
1563 	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1564 	++que->irqs;
1565 
1566 	IGB_TX_LOCK(txr);
1567 	igb_txeof(txr);
1568 #ifndef IGB_LEGACY_TX
1569 	/* Process the stack queue only if not depleted */
1570 	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1571 	    !drbr_empty(ifp, txr->br))
1572 		igb_mq_start_locked(ifp, txr);
1573 #else
1574 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1575 		igb_start_locked(txr, ifp);
1576 #endif
1577 	IGB_TX_UNLOCK(txr);
1578 
1579 	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1580 
1581 	if (adapter->enable_aim == FALSE)
1582 		goto no_calc;
1583 	/*
1584 	** Do Adaptive Interrupt Moderation:
1585         **  - Write out last calculated setting
1586 	**  - Calculate based on average size over
1587 	**    the last interval.
1588 	*/
1589         if (que->eitr_setting)
1590                 E1000_WRITE_REG(&adapter->hw,
1591                     E1000_EITR(que->msix), que->eitr_setting);
1592 
1593         que->eitr_setting = 0;
1594 
1595         /* Idle, do nothing */
1596         if ((txr->bytes == 0) && (rxr->bytes == 0))
1597                 goto no_calc;
1598 
1599         /* Used half Default if sub-gig */
1600         if (adapter->link_speed != 1000)
1601                 newitr = IGB_DEFAULT_ITR / 2;
1602         else {
1603 		if ((txr->bytes) && (txr->packets))
1604                 	newitr = txr->bytes/txr->packets;
1605 		if ((rxr->bytes) && (rxr->packets))
1606 			newitr = max(newitr,
1607 			    (rxr->bytes / rxr->packets));
1608                 newitr += 24; /* account for hardware frame, crc */
1609 		/* set an upper boundary */
1610 		newitr = min(newitr, 3000);
1611 		/* Be nice to the mid range */
1612                 if ((newitr > 300) && (newitr < 1200))
1613                         newitr = (newitr / 3);
1614                 else
1615                         newitr = (newitr / 2);
1616         }
1617         newitr &= 0x7FFC;  /* Mask invalid bits */
1618         if (adapter->hw.mac.type == e1000_82575)
1619                 newitr |= newitr << 16;
1620         else
1621                 newitr |= E1000_EITR_CNT_IGNR;
1622 
1623         /* save for next interrupt */
1624         que->eitr_setting = newitr;
1625 
1626         /* Reset state */
1627         txr->bytes = 0;
1628         txr->packets = 0;
1629         rxr->bytes = 0;
1630         rxr->packets = 0;
1631 
1632 no_calc:
1633 	/* Schedule a clean task if needed*/
1634 	if (more_rx)
1635 		taskqueue_enqueue(que->tq, &que->que_task);
1636 	else
1637 		/* Reenable this interrupt */
1638 		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1639 	return;
1640 }
1641 
1642 
1643 /*********************************************************************
1644  *
1645  *  MSIX Link Interrupt Service routine
1646  *
1647  **********************************************************************/
1648 
1649 static void
igb_msix_link(void * arg)1650 igb_msix_link(void *arg)
1651 {
1652 	struct adapter	*adapter = arg;
1653 	u32       	icr;
1654 
1655 	++adapter->link_irq;
1656 	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1657 	if (!(icr & E1000_ICR_LSC))
1658 		goto spurious;
1659 	igb_handle_link(adapter, 0);
1660 
1661 spurious:
1662 	/* Rearm */
1663 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1664 	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1665 	return;
1666 }
1667 
1668 
1669 /*********************************************************************
1670  *
1671  *  Media Ioctl callback
1672  *
1673  *  This routine is called whenever the user queries the status of
1674  *  the interface using ifconfig.
1675  *
1676  **********************************************************************/
1677 static void
igb_media_status(struct ifnet * ifp,struct ifmediareq * ifmr)1678 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1679 {
1680 	struct adapter *adapter = ifp->if_softc;
1681 
1682 	INIT_DEBUGOUT("igb_media_status: begin");
1683 
1684 	IGB_CORE_LOCK(adapter);
1685 	igb_update_link_status(adapter);
1686 
1687 	ifmr->ifm_status = IFM_AVALID;
1688 	ifmr->ifm_active = IFM_ETHER;
1689 
1690 	if (!adapter->link_active) {
1691 		IGB_CORE_UNLOCK(adapter);
1692 		return;
1693 	}
1694 
1695 	ifmr->ifm_status |= IFM_ACTIVE;
1696 
1697 	switch (adapter->link_speed) {
1698 	case 10:
1699 		ifmr->ifm_active |= IFM_10_T;
1700 		break;
1701 	case 100:
1702 		/*
1703 		** Support for 100Mb SFP - these are Fiber
1704 		** but the media type appears as serdes
1705 		*/
1706 		if (adapter->hw.phy.media_type ==
1707 		    e1000_media_type_internal_serdes)
1708 			ifmr->ifm_active |= IFM_100_FX;
1709 		else
1710 			ifmr->ifm_active |= IFM_100_TX;
1711 		break;
1712 	case 1000:
1713 		ifmr->ifm_active |= IFM_1000_T;
1714 		break;
1715 	case 2500:
1716 		ifmr->ifm_active |= IFM_2500_SX;
1717 		break;
1718 	}
1719 
1720 	if (adapter->link_duplex == FULL_DUPLEX)
1721 		ifmr->ifm_active |= IFM_FDX;
1722 	else
1723 		ifmr->ifm_active |= IFM_HDX;
1724 
1725 	IGB_CORE_UNLOCK(adapter);
1726 }
1727 
1728 /*********************************************************************
1729  *
1730  *  Media Ioctl callback
1731  *
1732  *  This routine is called when the user changes speed/duplex using
1733  *  media/mediopt option with ifconfig.
1734  *
1735  **********************************************************************/
1736 static int
igb_media_change(struct ifnet * ifp)1737 igb_media_change(struct ifnet *ifp)
1738 {
1739 	struct adapter *adapter = ifp->if_softc;
1740 	struct ifmedia  *ifm = &adapter->media;
1741 
1742 	INIT_DEBUGOUT("igb_media_change: begin");
1743 
1744 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1745 		return (EINVAL);
1746 
1747 	IGB_CORE_LOCK(adapter);
1748 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1749 	case IFM_AUTO:
1750 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1751 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1752 		break;
1753 	case IFM_1000_LX:
1754 	case IFM_1000_SX:
1755 	case IFM_1000_T:
1756 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1757 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1758 		break;
1759 	case IFM_100_TX:
1760 		adapter->hw.mac.autoneg = FALSE;
1761 		adapter->hw.phy.autoneg_advertised = 0;
1762 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1763 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1764 		else
1765 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1766 		break;
1767 	case IFM_10_T:
1768 		adapter->hw.mac.autoneg = FALSE;
1769 		adapter->hw.phy.autoneg_advertised = 0;
1770 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1771 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1772 		else
1773 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1774 		break;
1775 	default:
1776 		device_printf(adapter->dev, "Unsupported media type\n");
1777 	}
1778 
1779 	igb_init_locked(adapter);
1780 	IGB_CORE_UNLOCK(adapter);
1781 
1782 	return (0);
1783 }
1784 
1785 
1786 /*********************************************************************
1787  *
1788  *  This routine maps the mbufs to Advanced TX descriptors.
1789  *
1790  **********************************************************************/
1791 static int
igb_xmit(struct tx_ring * txr,struct mbuf ** m_headp)1792 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1793 {
1794 	struct adapter  *adapter = txr->adapter;
1795 	u32		olinfo_status = 0, cmd_type_len;
1796 	int             i, j, error, nsegs;
1797 	int		first;
1798 	bool		remap = TRUE;
1799 	struct mbuf	*m_head;
1800 	bus_dma_segment_t segs[IGB_MAX_SCATTER];
1801 	bus_dmamap_t	map;
1802 	struct igb_tx_buf *txbuf;
1803 	union e1000_adv_tx_desc *txd = NULL;
1804 
1805 	m_head = *m_headp;
1806 
1807 	/* Basic descriptor defines */
1808         cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1809 	    E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1810 
1811 	if (m_head->m_flags & M_VLANTAG)
1812         	cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1813 
1814         /*
1815          * Important to capture the first descriptor
1816          * used because it will contain the index of
1817          * the one we tell the hardware to report back
1818          */
1819         first = txr->next_avail_desc;
1820 	txbuf = &txr->tx_buffers[first];
1821 	map = txbuf->map;
1822 
1823 	/*
1824 	 * Map the packet for DMA.
1825 	 */
1826 retry:
1827 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1828 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1829 
1830 	if (__predict_false(error)) {
1831 		struct mbuf *m;
1832 
1833 		switch (error) {
1834 		case EFBIG:
1835 			/* Try it again? - one try */
1836 			if (remap == TRUE) {
1837 				remap = FALSE;
1838 				m = m_defrag(*m_headp, M_NOWAIT);
1839 				if (m == NULL) {
1840 					adapter->mbuf_defrag_failed++;
1841 					m_freem(*m_headp);
1842 					*m_headp = NULL;
1843 					return (ENOBUFS);
1844 				}
1845 				*m_headp = m;
1846 				goto retry;
1847 			} else
1848 				return (error);
1849 		default:
1850 			txr->no_tx_dma_setup++;
1851 			m_freem(*m_headp);
1852 			*m_headp = NULL;
1853 			return (error);
1854 		}
1855 	}
1856 
1857 	/* Make certain there are enough descriptors */
1858 	if (nsegs > txr->tx_avail - 2) {
1859 		txr->no_desc_avail++;
1860 		bus_dmamap_unload(txr->txtag, map);
1861 		return (ENOBUFS);
1862 	}
1863 	m_head = *m_headp;
1864 
1865 	/*
1866 	** Set up the appropriate offload context
1867 	** this will consume the first descriptor
1868 	*/
1869 	error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1870 	if (__predict_false(error)) {
1871 		m_freem(*m_headp);
1872 		*m_headp = NULL;
1873 		return (error);
1874 	}
1875 
1876 	/* 82575 needs the queue index added */
1877 	if (adapter->hw.mac.type == e1000_82575)
1878 		olinfo_status |= txr->me << 4;
1879 
1880 	i = txr->next_avail_desc;
1881 	for (j = 0; j < nsegs; j++) {
1882 		bus_size_t seglen;
1883 		bus_addr_t segaddr;
1884 
1885 		txbuf = &txr->tx_buffers[i];
1886 		txd = &txr->tx_base[i];
1887 		seglen = segs[j].ds_len;
1888 		segaddr = htole64(segs[j].ds_addr);
1889 
1890 		txd->read.buffer_addr = segaddr;
1891 		txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1892 		    cmd_type_len | seglen);
1893 		txd->read.olinfo_status = htole32(olinfo_status);
1894 
1895 		if (++i == txr->num_desc)
1896 			i = 0;
1897 	}
1898 
1899 	txd->read.cmd_type_len |=
1900 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1901 	txr->tx_avail -= nsegs;
1902 	txr->next_avail_desc = i;
1903 
1904 	txbuf->m_head = m_head;
1905 	/*
1906 	** Here we swap the map so the last descriptor,
1907 	** which gets the completion interrupt has the
1908 	** real map, and the first descriptor gets the
1909 	** unused map from this descriptor.
1910 	*/
1911 	txr->tx_buffers[first].map = txbuf->map;
1912 	txbuf->map = map;
1913 	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1914 
1915         /* Set the EOP descriptor that will be marked done */
1916         txbuf = &txr->tx_buffers[first];
1917 	txbuf->eop = txd;
1918 
1919         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1920             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1921 	/*
1922 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1923 	 * hardware that this frame is available to transmit.
1924 	 */
1925 	++txr->total_packets;
1926 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1927 
1928 	return (0);
1929 }
1930 static void
igb_set_promisc(struct adapter * adapter)1931 igb_set_promisc(struct adapter *adapter)
1932 {
1933 	struct ifnet	*ifp = adapter->ifp;
1934 	struct e1000_hw *hw = &adapter->hw;
1935 	u32		reg;
1936 
1937 	if (adapter->vf_ifp) {
1938 		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1939 		return;
1940 	}
1941 
1942 	reg = E1000_READ_REG(hw, E1000_RCTL);
1943 	if (ifp->if_flags & IFF_PROMISC) {
1944 		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1945 		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1946 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1947 		reg |= E1000_RCTL_MPE;
1948 		reg &= ~E1000_RCTL_UPE;
1949 		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1950 	}
1951 }
1952 
1953 static void
igb_disable_promisc(struct adapter * adapter)1954 igb_disable_promisc(struct adapter *adapter)
1955 {
1956 	struct e1000_hw *hw = &adapter->hw;
1957 	struct ifnet	*ifp = adapter->ifp;
1958 	u32		reg;
1959 	int		mcnt = 0;
1960 
1961 	if (adapter->vf_ifp) {
1962 		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1963 		return;
1964 	}
1965 	reg = E1000_READ_REG(hw, E1000_RCTL);
1966 	reg &=  (~E1000_RCTL_UPE);
1967 	if (ifp->if_flags & IFF_ALLMULTI)
1968 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1969 	else {
1970 		struct  ifmultiaddr *ifma;
1971 #if __FreeBSD_version < 800000
1972 		IF_ADDR_LOCK(ifp);
1973 #else
1974 		if_maddr_rlock(ifp);
1975 #endif
1976 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1977 			if (ifma->ifma_addr->sa_family != AF_LINK)
1978 				continue;
1979 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1980 				break;
1981 			mcnt++;
1982 		}
1983 #if __FreeBSD_version < 800000
1984 		IF_ADDR_UNLOCK(ifp);
1985 #else
1986 		if_maddr_runlock(ifp);
1987 #endif
1988 	}
1989 	/* Don't disable if in MAX groups */
1990 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1991 		reg &=  (~E1000_RCTL_MPE);
1992 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
1993 }
1994 
1995 
1996 /*********************************************************************
1997  *  Multicast Update
1998  *
1999  *  This routine is called whenever multicast address list is updated.
2000  *
2001  **********************************************************************/
2002 
2003 static void
igb_set_multi(struct adapter * adapter)2004 igb_set_multi(struct adapter *adapter)
2005 {
2006 	struct ifnet	*ifp = adapter->ifp;
2007 	struct ifmultiaddr *ifma;
2008 	u32 reg_rctl = 0;
2009 	u8  *mta;
2010 
2011 	int mcnt = 0;
2012 
2013 	IOCTL_DEBUGOUT("igb_set_multi: begin");
2014 
2015 	mta = adapter->mta;
2016 	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2017 	    MAX_NUM_MULTICAST_ADDRESSES);
2018 
2019 #if __FreeBSD_version < 800000
2020 	IF_ADDR_LOCK(ifp);
2021 #else
2022 	if_maddr_rlock(ifp);
2023 #endif
2024 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2025 		if (ifma->ifma_addr->sa_family != AF_LINK)
2026 			continue;
2027 
2028 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2029 			break;
2030 
2031 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2032 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2033 		mcnt++;
2034 	}
2035 #if __FreeBSD_version < 800000
2036 	IF_ADDR_UNLOCK(ifp);
2037 #else
2038 	if_maddr_runlock(ifp);
2039 #endif
2040 
2041 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2042 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2043 		reg_rctl |= E1000_RCTL_MPE;
2044 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2045 	} else
2046 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2047 }
2048 
2049 
2050 /*********************************************************************
2051  *  Timer routine:
2052  *  	This routine checks for link status,
2053  *	updates statistics, and does the watchdog.
2054  *
2055  **********************************************************************/
2056 
2057 static void
igb_local_timer(void * arg)2058 igb_local_timer(void *arg)
2059 {
2060 	struct adapter		*adapter = arg;
2061 	device_t		dev = adapter->dev;
2062 	struct ifnet		*ifp = adapter->ifp;
2063 	struct tx_ring		*txr = adapter->tx_rings;
2064 	struct igb_queue	*que = adapter->queues;
2065 	int			hung = 0, busy = 0;
2066 
2067 
2068 	IGB_CORE_LOCK_ASSERT(adapter);
2069 
2070 	igb_update_link_status(adapter);
2071 	igb_update_stats_counters(adapter);
2072 
2073         /*
2074         ** Check the TX queues status
2075 	**	- central locked handling of OACTIVE
2076 	**	- watchdog only if all queues show hung
2077         */
2078 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2079 		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2080 		    (adapter->pause_frames == 0))
2081 			++hung;
2082 		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2083 			++busy;
2084 		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2085 			taskqueue_enqueue(que->tq, &que->que_task);
2086 	}
2087 	if (hung == adapter->num_queues)
2088 		goto timeout;
2089 	if (busy == adapter->num_queues)
2090 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2091 	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2092 	    (busy < adapter->num_queues))
2093 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2094 
2095 	adapter->pause_frames = 0;
2096 	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2097 #ifndef DEVICE_POLLING
2098 	/* Schedule all queue interrupts - deadlock protection */
2099 	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2100 #endif
2101 	return;
2102 
2103 timeout:
2104 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2105 	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2106             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2107             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2108 	device_printf(dev,"TX(%d) desc avail = %d,"
2109             "Next TX to Clean = %d\n",
2110             txr->me, txr->tx_avail, txr->next_to_clean);
2111 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2112 	adapter->watchdog_events++;
2113 	igb_init_locked(adapter);
2114 }
2115 
2116 static void
igb_update_link_status(struct adapter * adapter)2117 igb_update_link_status(struct adapter *adapter)
2118 {
2119 	struct e1000_hw		*hw = &adapter->hw;
2120 	struct e1000_fc_info	*fc = &hw->fc;
2121 	struct ifnet		*ifp = adapter->ifp;
2122 	device_t		dev = adapter->dev;
2123 	struct tx_ring		*txr = adapter->tx_rings;
2124 	u32			link_check, thstat, ctrl;
2125 	char			*flowctl = NULL;
2126 
2127 	link_check = thstat = ctrl = 0;
2128 
2129 	/* Get the cached link value or read for real */
2130         switch (hw->phy.media_type) {
2131         case e1000_media_type_copper:
2132                 if (hw->mac.get_link_status) {
2133 			/* Do the work to read phy */
2134                         e1000_check_for_link(hw);
2135                         link_check = !hw->mac.get_link_status;
2136                 } else
2137                         link_check = TRUE;
2138                 break;
2139         case e1000_media_type_fiber:
2140                 e1000_check_for_link(hw);
2141                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2142                                  E1000_STATUS_LU);
2143                 break;
2144         case e1000_media_type_internal_serdes:
2145                 e1000_check_for_link(hw);
2146                 link_check = adapter->hw.mac.serdes_has_link;
2147                 break;
2148 	/* VF device is type_unknown */
2149         case e1000_media_type_unknown:
2150                 e1000_check_for_link(hw);
2151 		link_check = !hw->mac.get_link_status;
2152 		/* Fall thru */
2153         default:
2154                 break;
2155         }
2156 
2157 	/* Check for thermal downshift or shutdown */
2158 	if (hw->mac.type == e1000_i350) {
2159 		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2160 		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2161 	}
2162 
2163 	/* Get the flow control for display */
2164 	switch (fc->current_mode) {
2165 	case e1000_fc_rx_pause:
2166 		flowctl = "RX";
2167 		break;
2168 	case e1000_fc_tx_pause:
2169 		flowctl = "TX";
2170 		break;
2171 	case e1000_fc_full:
2172 		flowctl = "Full";
2173 		break;
2174 	case e1000_fc_none:
2175 	default:
2176 		flowctl = "None";
2177 		break;
2178 	}
2179 
2180 	/* Now we check if a transition has happened */
2181 	if (link_check && (adapter->link_active == 0)) {
2182 		e1000_get_speed_and_duplex(&adapter->hw,
2183 		    &adapter->link_speed, &adapter->link_duplex);
2184 		if (bootverbose)
2185 			device_printf(dev, "Link is up %d Mbps %s,"
2186 			    " Flow Control: %s\n",
2187 			    adapter->link_speed,
2188 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2189 			    "Full Duplex" : "Half Duplex"), flowctl);
2190 		adapter->link_active = 1;
2191 		ifp->if_baudrate = adapter->link_speed * 1000000;
2192 		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2193 		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2194 			device_printf(dev, "Link: thermal downshift\n");
2195 		/* Delay Link Up for Phy update */
2196 		if (((hw->mac.type == e1000_i210) ||
2197 		    (hw->mac.type == e1000_i211)) &&
2198 		    (hw->phy.id == I210_I_PHY_ID))
2199 			msec_delay(I210_LINK_DELAY);
2200 		/* Reset if the media type changed. */
2201 		if (hw->dev_spec._82575.media_changed) {
2202 			hw->dev_spec._82575.media_changed = false;
2203 			adapter->flags |= IGB_MEDIA_RESET;
2204 			igb_reset(adapter);
2205 		}
2206 		/* This can sleep */
2207 		if_link_state_change(ifp, LINK_STATE_UP);
2208 	} else if (!link_check && (adapter->link_active == 1)) {
2209 		ifp->if_baudrate = adapter->link_speed = 0;
2210 		adapter->link_duplex = 0;
2211 		if (bootverbose)
2212 			device_printf(dev, "Link is Down\n");
2213 		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2214 		    (thstat & E1000_THSTAT_PWR_DOWN))
2215 			device_printf(dev, "Link: thermal shutdown\n");
2216 		adapter->link_active = 0;
2217 		/* This can sleep */
2218 		if_link_state_change(ifp, LINK_STATE_DOWN);
2219 		/* Reset queue state */
2220 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2221 			txr->queue_status = IGB_QUEUE_IDLE;
2222 	}
2223 }
2224 
2225 /*********************************************************************
2226  *
2227  *  This routine disables all traffic on the adapter by issuing a
2228  *  global reset on the MAC and deallocates TX/RX buffers.
2229  *
2230  **********************************************************************/
2231 
2232 static void
igb_stop(void * arg)2233 igb_stop(void *arg)
2234 {
2235 	struct adapter	*adapter = arg;
2236 	struct ifnet	*ifp = adapter->ifp;
2237 	struct tx_ring *txr = adapter->tx_rings;
2238 
2239 	IGB_CORE_LOCK_ASSERT(adapter);
2240 
2241 	INIT_DEBUGOUT("igb_stop: begin");
2242 
2243 	igb_disable_intr(adapter);
2244 
2245 	callout_stop(&adapter->timer);
2246 
2247 	/* Tell the stack that the interface is no longer active */
2248 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2249 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2250 
2251 	/* Disarm watchdog timer. */
2252 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2253 		IGB_TX_LOCK(txr);
2254 		txr->queue_status = IGB_QUEUE_IDLE;
2255 		IGB_TX_UNLOCK(txr);
2256 	}
2257 
2258 	e1000_reset_hw(&adapter->hw);
2259 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2260 
2261 	e1000_led_off(&adapter->hw);
2262 	e1000_cleanup_led(&adapter->hw);
2263 }
2264 
2265 
2266 /*********************************************************************
2267  *
2268  *  Determine hardware revision.
2269  *
2270  **********************************************************************/
2271 static void
igb_identify_hardware(struct adapter * adapter)2272 igb_identify_hardware(struct adapter *adapter)
2273 {
2274 	device_t dev = adapter->dev;
2275 
2276 	/* Make sure our PCI config space has the necessary stuff set */
2277 	pci_enable_busmaster(dev);
2278 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2279 
2280 	/* Save off the information about this board */
2281 	adapter->hw.vendor_id = pci_get_vendor(dev);
2282 	adapter->hw.device_id = pci_get_device(dev);
2283 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2284 	adapter->hw.subsystem_vendor_id =
2285 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2286 	adapter->hw.subsystem_device_id =
2287 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2288 
2289 	/* Set MAC type early for PCI setup */
2290 	e1000_set_mac_type(&adapter->hw);
2291 
2292 	/* Are we a VF device? */
2293 	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2294 	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2295 		adapter->vf_ifp = 1;
2296 	else
2297 		adapter->vf_ifp = 0;
2298 }
2299 
2300 static int
igb_allocate_pci_resources(struct adapter * adapter)2301 igb_allocate_pci_resources(struct adapter *adapter)
2302 {
2303 	device_t	dev = adapter->dev;
2304 	int		rid;
2305 
2306 	rid = PCIR_BAR(0);
2307 	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2308 	    &rid, RF_ACTIVE);
2309 	if (adapter->pci_mem == NULL) {
2310 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2311 		return (ENXIO);
2312 	}
2313 	adapter->osdep.mem_bus_space_tag =
2314 	    rman_get_bustag(adapter->pci_mem);
2315 	adapter->osdep.mem_bus_space_handle =
2316 	    rman_get_bushandle(adapter->pci_mem);
2317 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2318 
2319 	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2320 
2321 	/* This will setup either MSI/X or MSI */
2322 	adapter->msix = igb_setup_msix(adapter);
2323 	adapter->hw.back = &adapter->osdep;
2324 
2325 	return (0);
2326 }
2327 
2328 /*********************************************************************
2329  *
2330  *  Setup the Legacy or MSI Interrupt handler
2331  *
2332  **********************************************************************/
2333 static int
igb_allocate_legacy(struct adapter * adapter)2334 igb_allocate_legacy(struct adapter *adapter)
2335 {
2336 	device_t		dev = adapter->dev;
2337 	struct igb_queue	*que = adapter->queues;
2338 #ifndef IGB_LEGACY_TX
2339 	struct tx_ring		*txr = adapter->tx_rings;
2340 #endif
2341 	int			error, rid = 0;
2342 
2343 	/* Turn off all interrupts */
2344 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2345 
2346 	/* MSI RID is 1 */
2347 	if (adapter->msix == 1)
2348 		rid = 1;
2349 
2350 	/* We allocate a single interrupt resource */
2351 	adapter->res = bus_alloc_resource_any(dev,
2352 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2353 	if (adapter->res == NULL) {
2354 		device_printf(dev, "Unable to allocate bus resource: "
2355 		    "interrupt\n");
2356 		return (ENXIO);
2357 	}
2358 
2359 #ifndef IGB_LEGACY_TX
2360 	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2361 #endif
2362 
2363 	/*
2364 	 * Try allocating a fast interrupt and the associated deferred
2365 	 * processing contexts.
2366 	 */
2367 	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2368 	/* Make tasklet for deferred link handling */
2369 	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2370 	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2371 	    taskqueue_thread_enqueue, &que->tq);
2372 	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2373 	    device_get_nameunit(adapter->dev));
2374 	if ((error = bus_setup_intr(dev, adapter->res,
2375 	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2376 	    adapter, &adapter->tag)) != 0) {
2377 		device_printf(dev, "Failed to register fast interrupt "
2378 			    "handler: %d\n", error);
2379 		taskqueue_free(que->tq);
2380 		que->tq = NULL;
2381 		return (error);
2382 	}
2383 
2384 	return (0);
2385 }
2386 
2387 
2388 /*********************************************************************
2389  *
2390  *  Setup the MSIX Queue Interrupt handlers:
2391  *
2392  **********************************************************************/
2393 static int
igb_allocate_msix(struct adapter * adapter)2394 igb_allocate_msix(struct adapter *adapter)
2395 {
2396 	device_t		dev = adapter->dev;
2397 	struct igb_queue	*que = adapter->queues;
2398 	int			error, rid, vector = 0;
2399 	int			cpu_id = 0;
2400 #ifdef	RSS
2401 	cpuset_t cpu_mask;
2402 #endif
2403 
2404 	/* Be sure to start with all interrupts disabled */
2405 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2406 	E1000_WRITE_FLUSH(&adapter->hw);
2407 
2408 #ifdef	RSS
2409 	/*
2410 	 * If we're doing RSS, the number of queues needs to
2411 	 * match the number of RSS buckets that are configured.
2412 	 *
2413 	 * + If there's more queues than RSS buckets, we'll end
2414 	 *   up with queues that get no traffic.
2415 	 *
2416 	 * + If there's more RSS buckets than queues, we'll end
2417 	 *   up having multiple RSS buckets map to the same queue,
2418 	 *   so there'll be some contention.
2419 	 */
2420 	if (adapter->num_queues != rss_getnumbuckets()) {
2421 		device_printf(dev,
2422 		    "%s: number of queues (%d) != number of RSS buckets (%d)"
2423 		    "; performance will be impacted.\n",
2424 		    __func__,
2425 		    adapter->num_queues,
2426 		    rss_getnumbuckets());
2427 	}
2428 #endif
2429 
2430 	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2431 		rid = vector +1;
2432 		que->res = bus_alloc_resource_any(dev,
2433 		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2434 		if (que->res == NULL) {
2435 			device_printf(dev,
2436 			    "Unable to allocate bus resource: "
2437 			    "MSIX Queue Interrupt\n");
2438 			return (ENXIO);
2439 		}
2440 		error = bus_setup_intr(dev, que->res,
2441 	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2442 		    igb_msix_que, que, &que->tag);
2443 		if (error) {
2444 			que->res = NULL;
2445 			device_printf(dev, "Failed to register Queue handler");
2446 			return (error);
2447 		}
2448 #if __FreeBSD_version >= 800504
2449 		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2450 #endif
2451 		que->msix = vector;
2452 		if (adapter->hw.mac.type == e1000_82575)
2453 			que->eims = E1000_EICR_TX_QUEUE0 << i;
2454 		else
2455 			que->eims = 1 << vector;
2456 
2457 #ifdef	RSS
2458 		/*
2459 		 * The queue ID is used as the RSS layer bucket ID.
2460 		 * We look up the queue ID -> RSS CPU ID and select
2461 		 * that.
2462 		 */
2463 		cpu_id = rss_getcpu(i % rss_getnumbuckets());
2464 #else
2465 		/*
2466 		 * Bind the msix vector, and thus the
2467 		 * rings to the corresponding cpu.
2468 		 *
2469 		 * This just happens to match the default RSS round-robin
2470 		 * bucket -> queue -> CPU allocation.
2471 		 */
2472 		if (adapter->num_queues > 1) {
2473 			if (igb_last_bind_cpu < 0)
2474 				igb_last_bind_cpu = CPU_FIRST();
2475 			cpu_id = igb_last_bind_cpu;
2476 		}
2477 #endif
2478 
2479 		if (adapter->num_queues > 1) {
2480 			bus_bind_intr(dev, que->res, cpu_id);
2481 #ifdef	RSS
2482 			device_printf(dev,
2483 				"Bound queue %d to RSS bucket %d\n",
2484 				i, cpu_id);
2485 #else
2486 			device_printf(dev,
2487 				"Bound queue %d to cpu %d\n",
2488 				i, cpu_id);
2489 #endif
2490 		}
2491 
2492 #ifndef IGB_LEGACY_TX
2493 		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2494 		    que->txr);
2495 #endif
2496 		/* Make tasklet for deferred handling */
2497 		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2498 		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2499 		    taskqueue_thread_enqueue, &que->tq);
2500 		if (adapter->num_queues > 1) {
2501 			/*
2502 			 * Only pin the taskqueue thread to a CPU if
2503 			 * RSS is in use.
2504 			 *
2505 			 * This again just happens to match the default RSS
2506 			 * round-robin bucket -> queue -> CPU allocation.
2507 			 */
2508 #ifdef	RSS
2509 			CPU_SETOF(cpu_id, &cpu_mask);
2510 			taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2511 			    &cpu_mask,
2512 			    "%s que (bucket %d)",
2513 			    device_get_nameunit(adapter->dev),
2514 			    cpu_id);
2515 #else
2516 			taskqueue_start_threads(&que->tq, 1, PI_NET,
2517 			    "%s que (qid %d)",
2518 			    device_get_nameunit(adapter->dev),
2519 			    cpu_id);
2520 #endif
2521 		} else {
2522 			taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2523 			    device_get_nameunit(adapter->dev));
2524 		}
2525 
2526 		/* Finally update the last bound CPU id */
2527 		if (adapter->num_queues > 1)
2528 			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2529 	}
2530 
2531 	/* And Link */
2532 	rid = vector + 1;
2533 	adapter->res = bus_alloc_resource_any(dev,
2534 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2535 	if (adapter->res == NULL) {
2536 		device_printf(dev,
2537 		    "Unable to allocate bus resource: "
2538 		    "MSIX Link Interrupt\n");
2539 		return (ENXIO);
2540 	}
2541 	if ((error = bus_setup_intr(dev, adapter->res,
2542 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2543 	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2544 		device_printf(dev, "Failed to register Link handler");
2545 		return (error);
2546 	}
2547 #if __FreeBSD_version >= 800504
2548 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2549 #endif
2550 	adapter->linkvec = vector;
2551 
2552 	return (0);
2553 }
2554 
2555 
2556 static void
igb_configure_queues(struct adapter * adapter)2557 igb_configure_queues(struct adapter *adapter)
2558 {
2559 	struct	e1000_hw	*hw = &adapter->hw;
2560 	struct	igb_queue	*que;
2561 	u32			tmp, ivar = 0, newitr = 0;
2562 
2563 	/* First turn on RSS capability */
2564 	if (adapter->hw.mac.type != e1000_82575)
2565 		E1000_WRITE_REG(hw, E1000_GPIE,
2566 		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2567 		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2568 
2569 	/* Turn on MSIX */
2570 	switch (adapter->hw.mac.type) {
2571 	case e1000_82580:
2572 	case e1000_i350:
2573 	case e1000_i354:
2574 	case e1000_i210:
2575 	case e1000_i211:
2576 	case e1000_vfadapt:
2577 	case e1000_vfadapt_i350:
2578 		/* RX entries */
2579 		for (int i = 0; i < adapter->num_queues; i++) {
2580 			u32 index = i >> 1;
2581 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2582 			que = &adapter->queues[i];
2583 			if (i & 1) {
2584 				ivar &= 0xFF00FFFF;
2585 				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2586 			} else {
2587 				ivar &= 0xFFFFFF00;
2588 				ivar |= que->msix | E1000_IVAR_VALID;
2589 			}
2590 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2591 		}
2592 		/* TX entries */
2593 		for (int i = 0; i < adapter->num_queues; i++) {
2594 			u32 index = i >> 1;
2595 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2596 			que = &adapter->queues[i];
2597 			if (i & 1) {
2598 				ivar &= 0x00FFFFFF;
2599 				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2600 			} else {
2601 				ivar &= 0xFFFF00FF;
2602 				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2603 			}
2604 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2605 			adapter->que_mask |= que->eims;
2606 		}
2607 
2608 		/* And for the link interrupt */
2609 		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2610 		adapter->link_mask = 1 << adapter->linkvec;
2611 		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2612 		break;
2613 	case e1000_82576:
2614 		/* RX entries */
2615 		for (int i = 0; i < adapter->num_queues; i++) {
2616 			u32 index = i & 0x7; /* Each IVAR has two entries */
2617 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2618 			que = &adapter->queues[i];
2619 			if (i < 8) {
2620 				ivar &= 0xFFFFFF00;
2621 				ivar |= que->msix | E1000_IVAR_VALID;
2622 			} else {
2623 				ivar &= 0xFF00FFFF;
2624 				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2625 			}
2626 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2627 			adapter->que_mask |= que->eims;
2628 		}
2629 		/* TX entries */
2630 		for (int i = 0; i < adapter->num_queues; i++) {
2631 			u32 index = i & 0x7; /* Each IVAR has two entries */
2632 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2633 			que = &adapter->queues[i];
2634 			if (i < 8) {
2635 				ivar &= 0xFFFF00FF;
2636 				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2637 			} else {
2638 				ivar &= 0x00FFFFFF;
2639 				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2640 			}
2641 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2642 			adapter->que_mask |= que->eims;
2643 		}
2644 
2645 		/* And for the link interrupt */
2646 		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2647 		adapter->link_mask = 1 << adapter->linkvec;
2648 		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2649 		break;
2650 
2651 	case e1000_82575:
2652                 /* enable MSI-X support*/
2653 		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2654                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2655                 /* Auto-Mask interrupts upon ICR read. */
2656                 tmp |= E1000_CTRL_EXT_EIAME;
2657                 tmp |= E1000_CTRL_EXT_IRCA;
2658                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2659 
2660 		/* Queues */
2661 		for (int i = 0; i < adapter->num_queues; i++) {
2662 			que = &adapter->queues[i];
2663 			tmp = E1000_EICR_RX_QUEUE0 << i;
2664 			tmp |= E1000_EICR_TX_QUEUE0 << i;
2665 			que->eims = tmp;
2666 			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2667 			    i, que->eims);
2668 			adapter->que_mask |= que->eims;
2669 		}
2670 
2671 		/* Link */
2672 		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2673 		    E1000_EIMS_OTHER);
2674 		adapter->link_mask |= E1000_EIMS_OTHER;
2675 	default:
2676 		break;
2677 	}
2678 
2679 	/* Set the starting interrupt rate */
2680 	if (igb_max_interrupt_rate > 0)
2681 		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2682 
2683         if (hw->mac.type == e1000_82575)
2684                 newitr |= newitr << 16;
2685         else
2686                 newitr |= E1000_EITR_CNT_IGNR;
2687 
2688 	for (int i = 0; i < adapter->num_queues; i++) {
2689 		que = &adapter->queues[i];
2690 		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2691 	}
2692 
2693 	return;
2694 }
2695 
2696 
2697 static void
igb_free_pci_resources(struct adapter * adapter)2698 igb_free_pci_resources(struct adapter *adapter)
2699 {
2700 	struct		igb_queue *que = adapter->queues;
2701 	device_t	dev = adapter->dev;
2702 	int		rid;
2703 
2704 	/*
2705 	** There is a slight possibility of a failure mode
2706 	** in attach that will result in entering this function
2707 	** before interrupt resources have been initialized, and
2708 	** in that case we do not want to execute the loops below
2709 	** We can detect this reliably by the state of the adapter
2710 	** res pointer.
2711 	*/
2712 	if (adapter->res == NULL)
2713 		goto mem;
2714 
2715 	/*
2716 	 * First release all the interrupt resources:
2717 	 */
2718 	for (int i = 0; i < adapter->num_queues; i++, que++) {
2719 		rid = que->msix + 1;
2720 		if (que->tag != NULL) {
2721 			bus_teardown_intr(dev, que->res, que->tag);
2722 			que->tag = NULL;
2723 		}
2724 		if (que->res != NULL)
2725 			bus_release_resource(dev,
2726 			    SYS_RES_IRQ, rid, que->res);
2727 	}
2728 
2729 	/* Clean the Legacy or Link interrupt last */
2730 	if (adapter->linkvec) /* we are doing MSIX */
2731 		rid = adapter->linkvec + 1;
2732 	else
2733 		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2734 
2735 	que = adapter->queues;
2736 	if (adapter->tag != NULL) {
2737 		taskqueue_drain(que->tq, &adapter->link_task);
2738 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2739 		adapter->tag = NULL;
2740 	}
2741 	if (adapter->res != NULL)
2742 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2743 
2744 	for (int i = 0; i < adapter->num_queues; i++, que++) {
2745 		if (que->tq != NULL) {
2746 #ifndef IGB_LEGACY_TX
2747 			taskqueue_drain(que->tq, &que->txr->txq_task);
2748 #endif
2749 			taskqueue_drain(que->tq, &que->que_task);
2750 			taskqueue_free(que->tq);
2751 		}
2752 	}
2753 mem:
2754 	if (adapter->msix)
2755 		pci_release_msi(dev);
2756 
2757 	if (adapter->msix_mem != NULL)
2758 		bus_release_resource(dev, SYS_RES_MEMORY,
2759 		    adapter->memrid, adapter->msix_mem);
2760 
2761 	if (adapter->pci_mem != NULL)
2762 		bus_release_resource(dev, SYS_RES_MEMORY,
2763 		    PCIR_BAR(0), adapter->pci_mem);
2764 
2765 }
2766 
2767 /*
2768  * Setup Either MSI/X or MSI
2769  */
2770 static int
igb_setup_msix(struct adapter * adapter)2771 igb_setup_msix(struct adapter *adapter)
2772 {
2773 	device_t	dev = adapter->dev;
2774 	int		bar, want, queues, msgs, maxqueues;
2775 
2776 	/* tuneable override */
2777 	if (igb_enable_msix == 0)
2778 		goto msi;
2779 
2780 	/* First try MSI/X */
2781 	msgs = pci_msix_count(dev);
2782 	if (msgs == 0)
2783 		goto msi;
2784 	/*
2785 	** Some new devices, as with ixgbe, now may
2786 	** use a different BAR, so we need to keep
2787 	** track of which is used.
2788 	*/
2789 	adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2790 	bar = pci_read_config(dev, adapter->memrid, 4);
2791 	if (bar == 0) /* use next bar */
2792 		adapter->memrid += 4;
2793 	adapter->msix_mem = bus_alloc_resource_any(dev,
2794 	    SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2795        	if (adapter->msix_mem == NULL) {
2796 		/* May not be enabled */
2797 		device_printf(adapter->dev,
2798 		    "Unable to map MSIX table \n");
2799 		goto msi;
2800 	}
2801 
2802 	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2803 
2804 	/* Override via tuneable */
2805 	if (igb_num_queues != 0)
2806 		queues = igb_num_queues;
2807 
2808 #ifdef	RSS
2809 	/* If we're doing RSS, clamp at the number of RSS buckets */
2810 	if (queues > rss_getnumbuckets())
2811 		queues = rss_getnumbuckets();
2812 #endif
2813 
2814 
2815 	/* Sanity check based on HW */
2816 	switch (adapter->hw.mac.type) {
2817 		case e1000_82575:
2818 			maxqueues = 4;
2819 			break;
2820 		case e1000_82576:
2821 		case e1000_82580:
2822 		case e1000_i350:
2823 		case e1000_i354:
2824 			maxqueues = 8;
2825 			break;
2826 		case e1000_i210:
2827 			maxqueues = 4;
2828 			break;
2829 		case e1000_i211:
2830 			maxqueues = 2;
2831 			break;
2832 		default:  /* VF interfaces */
2833 			maxqueues = 1;
2834 			break;
2835 	}
2836 
2837 	/* Final clamp on the actual hardware capability */
2838 	if (queues > maxqueues)
2839 		queues = maxqueues;
2840 
2841 	/*
2842 	** One vector (RX/TX pair) per queue
2843 	** plus an additional for Link interrupt
2844 	*/
2845 	want = queues + 1;
2846 	if (msgs >= want)
2847 		msgs = want;
2848 	else {
2849                	device_printf(adapter->dev,
2850 		    "MSIX Configuration Problem, "
2851 		    "%d vectors configured, but %d queues wanted!\n",
2852 		    msgs, want);
2853 		goto msi;
2854 	}
2855 	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2856                	device_printf(adapter->dev,
2857 		    "Using MSIX interrupts with %d vectors\n", msgs);
2858 		adapter->num_queues = queues;
2859 		return (msgs);
2860 	}
2861 	/*
2862 	** If MSIX alloc failed or provided us with
2863 	** less than needed, free and fall through to MSI
2864 	*/
2865 	pci_release_msi(dev);
2866 
2867 msi:
2868        	if (adapter->msix_mem != NULL) {
2869 		bus_release_resource(dev, SYS_RES_MEMORY,
2870 		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2871 		adapter->msix_mem = NULL;
2872 	}
2873        	msgs = 1;
2874 	if (pci_alloc_msi(dev, &msgs) == 0) {
2875 		device_printf(adapter->dev," Using an MSI interrupt\n");
2876 		return (msgs);
2877 	}
2878 	device_printf(adapter->dev," Using a Legacy interrupt\n");
2879 	return (0);
2880 }
2881 
2882 /*********************************************************************
2883  *
2884  *  Initialize the DMA Coalescing feature
2885  *
2886  **********************************************************************/
2887 static void
igb_init_dmac(struct adapter * adapter,u32 pba)2888 igb_init_dmac(struct adapter *adapter, u32 pba)
2889 {
2890 	device_t	dev = adapter->dev;
2891 	struct e1000_hw *hw = &adapter->hw;
2892 	u32 		dmac, reg = ~E1000_DMACR_DMAC_EN;
2893 	u16		hwm;
2894 
2895 	if (hw->mac.type == e1000_i211)
2896 		return;
2897 
2898 	if (hw->mac.type > e1000_82580) {
2899 
2900 		if (adapter->dmac == 0) { /* Disabling it */
2901 			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2902 			return;
2903 		} else
2904 			device_printf(dev, "DMA Coalescing enabled\n");
2905 
2906 		/* Set starting threshold */
2907 		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2908 
2909 		hwm = 64 * pba - adapter->max_frame_size / 16;
2910 		if (hwm < 64 * (pba - 6))
2911 			hwm = 64 * (pba - 6);
2912 		reg = E1000_READ_REG(hw, E1000_FCRTC);
2913 		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2914 		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2915 		    & E1000_FCRTC_RTH_COAL_MASK);
2916 		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2917 
2918 
2919 		dmac = pba - adapter->max_frame_size / 512;
2920 		if (dmac < pba - 10)
2921 			dmac = pba - 10;
2922 		reg = E1000_READ_REG(hw, E1000_DMACR);
2923 		reg &= ~E1000_DMACR_DMACTHR_MASK;
2924 		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2925 		    & E1000_DMACR_DMACTHR_MASK);
2926 
2927 		/* transition to L0x or L1 if available..*/
2928 		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2929 
2930 		/* Check if status is 2.5Gb backplane connection
2931 		* before configuration of watchdog timer, which is
2932 		* in msec values in 12.8usec intervals
2933 		* watchdog timer= msec values in 32usec intervals
2934 		* for non 2.5Gb connection
2935 		*/
2936 		if (hw->mac.type == e1000_i354) {
2937 			int status = E1000_READ_REG(hw, E1000_STATUS);
2938 			if ((status & E1000_STATUS_2P5_SKU) &&
2939 			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2940 				reg |= ((adapter->dmac * 5) >> 6);
2941 			else
2942 				reg |= (adapter->dmac >> 5);
2943 		} else {
2944 			reg |= (adapter->dmac >> 5);
2945 		}
2946 
2947 		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2948 
2949 		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2950 
2951 		/* Set the interval before transition */
2952 		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2953 		if (hw->mac.type == e1000_i350)
2954 			reg |= IGB_DMCTLX_DCFLUSH_DIS;
2955 		/*
2956 		** in 2.5Gb connection, TTLX unit is 0.4 usec
2957 		** which is 0x4*2 = 0xA. But delay is still 4 usec
2958 		*/
2959 		if (hw->mac.type == e1000_i354) {
2960 			int status = E1000_READ_REG(hw, E1000_STATUS);
2961 			if ((status & E1000_STATUS_2P5_SKU) &&
2962 			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2963 				reg |= 0xA;
2964 			else
2965 				reg |= 0x4;
2966 		} else {
2967 			reg |= 0x4;
2968 		}
2969 
2970 		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2971 
2972 		/* free space in tx packet buffer to wake from DMA coal */
2973 		E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2974 		    (2 * adapter->max_frame_size)) >> 6);
2975 
2976 		/* make low power state decision controlled by DMA coal */
2977 		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2978 		reg &= ~E1000_PCIEMISC_LX_DECISION;
2979 		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2980 
2981 	} else if (hw->mac.type == e1000_82580) {
2982 		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2983 		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2984 		    reg & ~E1000_PCIEMISC_LX_DECISION);
2985 		E1000_WRITE_REG(hw, E1000_DMACR, 0);
2986 	}
2987 }
2988 
2989 
2990 /*********************************************************************
2991  *
2992  *  Set up an fresh starting state
2993  *
2994  **********************************************************************/
2995 static void
igb_reset(struct adapter * adapter)2996 igb_reset(struct adapter *adapter)
2997 {
2998 	device_t	dev = adapter->dev;
2999 	struct e1000_hw *hw = &adapter->hw;
3000 	struct e1000_fc_info *fc = &hw->fc;
3001 	struct ifnet	*ifp = adapter->ifp;
3002 	u32		pba = 0;
3003 	u16		hwm;
3004 
3005 	INIT_DEBUGOUT("igb_reset: begin");
3006 
3007 	/* Let the firmware know the OS is in control */
3008 	igb_get_hw_control(adapter);
3009 
3010 	/*
3011 	 * Packet Buffer Allocation (PBA)
3012 	 * Writing PBA sets the receive portion of the buffer
3013 	 * the remainder is used for the transmit buffer.
3014 	 */
3015 	switch (hw->mac.type) {
3016 	case e1000_82575:
3017 		pba = E1000_PBA_32K;
3018 		break;
3019 	case e1000_82576:
3020 	case e1000_vfadapt:
3021 		pba = E1000_READ_REG(hw, E1000_RXPBS);
3022 		pba &= E1000_RXPBS_SIZE_MASK_82576;
3023 		break;
3024 	case e1000_82580:
3025 	case e1000_i350:
3026 	case e1000_i354:
3027 	case e1000_vfadapt_i350:
3028 		pba = E1000_READ_REG(hw, E1000_RXPBS);
3029 		pba = e1000_rxpbs_adjust_82580(pba);
3030 		break;
3031 	case e1000_i210:
3032 	case e1000_i211:
3033 		pba = E1000_PBA_34K;
3034 	default:
3035 		break;
3036 	}
3037 
3038 	/* Special needs in case of Jumbo frames */
3039 	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3040 		u32 tx_space, min_tx, min_rx;
3041 		pba = E1000_READ_REG(hw, E1000_PBA);
3042 		tx_space = pba >> 16;
3043 		pba &= 0xffff;
3044 		min_tx = (adapter->max_frame_size +
3045 		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3046 		min_tx = roundup2(min_tx, 1024);
3047 		min_tx >>= 10;
3048                 min_rx = adapter->max_frame_size;
3049                 min_rx = roundup2(min_rx, 1024);
3050                 min_rx >>= 10;
3051 		if (tx_space < min_tx &&
3052 		    ((min_tx - tx_space) < pba)) {
3053 			pba = pba - (min_tx - tx_space);
3054 			/*
3055                          * if short on rx space, rx wins
3056                          * and must trump tx adjustment
3057 			 */
3058                         if (pba < min_rx)
3059                                 pba = min_rx;
3060 		}
3061 		E1000_WRITE_REG(hw, E1000_PBA, pba);
3062 	}
3063 
3064 	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3065 
3066 	/*
3067 	 * These parameters control the automatic generation (Tx) and
3068 	 * response (Rx) to Ethernet PAUSE frames.
3069 	 * - High water mark should allow for at least two frames to be
3070 	 *   received after sending an XOFF.
3071 	 * - Low water mark works best when it is very near the high water mark.
3072 	 *   This allows the receiver to restart by sending XON when it has
3073 	 *   drained a bit.
3074 	 */
3075 	hwm = min(((pba << 10) * 9 / 10),
3076 	    ((pba << 10) - 2 * adapter->max_frame_size));
3077 
3078 	if (hw->mac.type < e1000_82576) {
3079 		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3080 		fc->low_water = fc->high_water - 8;
3081 	} else {
3082 		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3083 		fc->low_water = fc->high_water - 16;
3084 	}
3085 
3086 	fc->pause_time = IGB_FC_PAUSE_TIME;
3087 	fc->send_xon = TRUE;
3088 	if (adapter->fc)
3089 		fc->requested_mode = adapter->fc;
3090 	else
3091 		fc->requested_mode = e1000_fc_default;
3092 
3093 	/* Issue a global reset */
3094 	e1000_reset_hw(hw);
3095 	E1000_WRITE_REG(hw, E1000_WUC, 0);
3096 
3097 	/* Reset for AutoMediaDetect */
3098 	if (adapter->flags & IGB_MEDIA_RESET) {
3099 		e1000_setup_init_funcs(hw, TRUE);
3100 		e1000_get_bus_info(hw);
3101 		adapter->flags &= ~IGB_MEDIA_RESET;
3102 	}
3103 
3104 	if (e1000_init_hw(hw) < 0)
3105 		device_printf(dev, "Hardware Initialization Failed\n");
3106 
3107 	/* Setup DMA Coalescing */
3108 	igb_init_dmac(adapter, pba);
3109 
3110 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3111 	e1000_get_phy_info(hw);
3112 	e1000_check_for_link(hw);
3113 	return;
3114 }
3115 
3116 /*********************************************************************
3117  *
3118  *  Setup networking device structure and register an interface.
3119  *
3120  **********************************************************************/
3121 static int
igb_setup_interface(device_t dev,struct adapter * adapter)3122 igb_setup_interface(device_t dev, struct adapter *adapter)
3123 {
3124 	struct ifnet   *ifp;
3125 
3126 	INIT_DEBUGOUT("igb_setup_interface: begin");
3127 
3128 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3129 	if (ifp == NULL) {
3130 		device_printf(dev, "can not allocate ifnet structure\n");
3131 		return (-1);
3132 	}
3133 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3134 	ifp->if_init =  igb_init;
3135 	ifp->if_softc = adapter;
3136 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3137 	ifp->if_ioctl = igb_ioctl;
3138 	ifp->if_get_counter = igb_get_counter;
3139 #ifndef IGB_LEGACY_TX
3140 	ifp->if_transmit = igb_mq_start;
3141 	ifp->if_qflush = igb_qflush;
3142 #else
3143 	ifp->if_start = igb_start;
3144 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3145 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3146 	IFQ_SET_READY(&ifp->if_snd);
3147 #endif
3148 
3149 	ether_ifattach(ifp, adapter->hw.mac.addr);
3150 
3151 	ifp->if_capabilities = ifp->if_capenable = 0;
3152 
3153 	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3154 	ifp->if_capabilities |= IFCAP_TSO;
3155 	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3156 	ifp->if_capenable = ifp->if_capabilities;
3157 
3158 	/* Don't enable LRO by default */
3159 	ifp->if_capabilities |= IFCAP_LRO;
3160 
3161 #ifdef DEVICE_POLLING
3162 	ifp->if_capabilities |= IFCAP_POLLING;
3163 #endif
3164 
3165 	/*
3166 	 * Tell the upper layer(s) we
3167 	 * support full VLAN capability.
3168 	 */
3169 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3170 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3171 			     |  IFCAP_VLAN_HWTSO
3172 			     |  IFCAP_VLAN_MTU;
3173 	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3174 			  |  IFCAP_VLAN_HWTSO
3175 			  |  IFCAP_VLAN_MTU;
3176 
3177 	/*
3178 	** Don't turn this on by default, if vlans are
3179 	** created on another pseudo device (eg. lagg)
3180 	** then vlan events are not passed thru, breaking
3181 	** operation, but with HW FILTER off it works. If
3182 	** using vlans directly on the igb driver you can
3183 	** enable this and get full hardware tag filtering.
3184 	*/
3185 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3186 
3187 	/*
3188 	 * Specify the media types supported by this adapter and register
3189 	 * callbacks to update media and link information
3190 	 */
3191 	ifmedia_init(&adapter->media, IFM_IMASK,
3192 	    igb_media_change, igb_media_status);
3193 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3194 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3195 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3196 			    0, NULL);
3197 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3198 	} else {
3199 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3200 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3201 			    0, NULL);
3202 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3203 			    0, NULL);
3204 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3205 			    0, NULL);
3206 		if (adapter->hw.phy.type != e1000_phy_ife) {
3207 			ifmedia_add(&adapter->media,
3208 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3209 			ifmedia_add(&adapter->media,
3210 				IFM_ETHER | IFM_1000_T, 0, NULL);
3211 		}
3212 	}
3213 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3214 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3215 	return (0);
3216 }
3217 
3218 
3219 /*
3220  * Manage DMA'able memory.
3221  */
3222 static void
igb_dmamap_cb(void * arg,bus_dma_segment_t * segs,int nseg,int error)3223 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3224 {
3225 	if (error)
3226 		return;
3227 	*(bus_addr_t *) arg = segs[0].ds_addr;
3228 }
3229 
3230 static int
igb_dma_malloc(struct adapter * adapter,bus_size_t size,struct igb_dma_alloc * dma,int mapflags)3231 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3232         struct igb_dma_alloc *dma, int mapflags)
3233 {
3234 	int error;
3235 
3236 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3237 				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3238 				BUS_SPACE_MAXADDR,	/* lowaddr */
3239 				BUS_SPACE_MAXADDR,	/* highaddr */
3240 				NULL, NULL,		/* filter, filterarg */
3241 				size,			/* maxsize */
3242 				1,			/* nsegments */
3243 				size,			/* maxsegsize */
3244 				0,			/* flags */
3245 				NULL,			/* lockfunc */
3246 				NULL,			/* lockarg */
3247 				&dma->dma_tag);
3248 	if (error) {
3249 		device_printf(adapter->dev,
3250 		    "%s: bus_dma_tag_create failed: %d\n",
3251 		    __func__, error);
3252 		goto fail_0;
3253 	}
3254 
3255 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3256 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3257 	if (error) {
3258 		device_printf(adapter->dev,
3259 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3260 		    __func__, (uintmax_t)size, error);
3261 		goto fail_2;
3262 	}
3263 
3264 	dma->dma_paddr = 0;
3265 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3266 	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3267 	if (error || dma->dma_paddr == 0) {
3268 		device_printf(adapter->dev,
3269 		    "%s: bus_dmamap_load failed: %d\n",
3270 		    __func__, error);
3271 		goto fail_3;
3272 	}
3273 
3274 	return (0);
3275 
3276 fail_3:
3277 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3278 fail_2:
3279 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3280 	bus_dma_tag_destroy(dma->dma_tag);
3281 fail_0:
3282 	dma->dma_tag = NULL;
3283 
3284 	return (error);
3285 }
3286 
3287 static void
igb_dma_free(struct adapter * adapter,struct igb_dma_alloc * dma)3288 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3289 {
3290 	if (dma->dma_tag == NULL)
3291 		return;
3292 	if (dma->dma_paddr != 0) {
3293 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3294 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3295 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3296 		dma->dma_paddr = 0;
3297 	}
3298 	if (dma->dma_vaddr != NULL) {
3299 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3300 		dma->dma_vaddr = NULL;
3301 	}
3302 	bus_dma_tag_destroy(dma->dma_tag);
3303 	dma->dma_tag = NULL;
3304 }
3305 
3306 
3307 /*********************************************************************
3308  *
3309  *  Allocate memory for the transmit and receive rings, and then
3310  *  the descriptors associated with each, called only once at attach.
3311  *
3312  **********************************************************************/
3313 static int
igb_allocate_queues(struct adapter * adapter)3314 igb_allocate_queues(struct adapter *adapter)
3315 {
3316 	device_t dev = adapter->dev;
3317 	struct igb_queue	*que = NULL;
3318 	struct tx_ring		*txr = NULL;
3319 	struct rx_ring		*rxr = NULL;
3320 	int rsize, tsize, error = E1000_SUCCESS;
3321 	int txconf = 0, rxconf = 0;
3322 
3323 	/* First allocate the top level queue structs */
3324 	if (!(adapter->queues =
3325 	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3326 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3327 		device_printf(dev, "Unable to allocate queue memory\n");
3328 		error = ENOMEM;
3329 		goto fail;
3330 	}
3331 
3332 	/* Next allocate the TX ring struct memory */
3333 	if (!(adapter->tx_rings =
3334 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3335 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3336 		device_printf(dev, "Unable to allocate TX ring memory\n");
3337 		error = ENOMEM;
3338 		goto tx_fail;
3339 	}
3340 
3341 	/* Now allocate the RX */
3342 	if (!(adapter->rx_rings =
3343 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3344 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3345 		device_printf(dev, "Unable to allocate RX ring memory\n");
3346 		error = ENOMEM;
3347 		goto rx_fail;
3348 	}
3349 
3350 	tsize = roundup2(adapter->num_tx_desc *
3351 	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3352 	/*
3353 	 * Now set up the TX queues, txconf is needed to handle the
3354 	 * possibility that things fail midcourse and we need to
3355 	 * undo memory gracefully
3356 	 */
3357 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3358 		/* Set up some basics */
3359 		txr = &adapter->tx_rings[i];
3360 		txr->adapter = adapter;
3361 		txr->me = i;
3362 		txr->num_desc = adapter->num_tx_desc;
3363 
3364 		/* Initialize the TX lock */
3365 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3366 		    device_get_nameunit(dev), txr->me);
3367 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3368 
3369 		if (igb_dma_malloc(adapter, tsize,
3370 			&txr->txdma, BUS_DMA_NOWAIT)) {
3371 			device_printf(dev,
3372 			    "Unable to allocate TX Descriptor memory\n");
3373 			error = ENOMEM;
3374 			goto err_tx_desc;
3375 		}
3376 		txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3377 		bzero((void *)txr->tx_base, tsize);
3378 
3379         	/* Now allocate transmit buffers for the ring */
3380         	if (igb_allocate_transmit_buffers(txr)) {
3381 			device_printf(dev,
3382 			    "Critical Failure setting up transmit buffers\n");
3383 			error = ENOMEM;
3384 			goto err_tx_desc;
3385         	}
3386 #ifndef IGB_LEGACY_TX
3387 		/* Allocate a buf ring */
3388 		txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3389 		    M_WAITOK, &txr->tx_mtx);
3390 #endif
3391 	}
3392 
3393 	/*
3394 	 * Next the RX queues...
3395 	 */
3396 	rsize = roundup2(adapter->num_rx_desc *
3397 	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3398 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3399 		rxr = &adapter->rx_rings[i];
3400 		rxr->adapter = adapter;
3401 		rxr->me = i;
3402 
3403 		/* Initialize the RX lock */
3404 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3405 		    device_get_nameunit(dev), txr->me);
3406 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3407 
3408 		if (igb_dma_malloc(adapter, rsize,
3409 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3410 			device_printf(dev,
3411 			    "Unable to allocate RxDescriptor memory\n");
3412 			error = ENOMEM;
3413 			goto err_rx_desc;
3414 		}
3415 		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3416 		bzero((void *)rxr->rx_base, rsize);
3417 
3418         	/* Allocate receive buffers for the ring*/
3419 		if (igb_allocate_receive_buffers(rxr)) {
3420 			device_printf(dev,
3421 			    "Critical Failure setting up receive buffers\n");
3422 			error = ENOMEM;
3423 			goto err_rx_desc;
3424 		}
3425 	}
3426 
3427 	/*
3428 	** Finally set up the queue holding structs
3429 	*/
3430 	for (int i = 0; i < adapter->num_queues; i++) {
3431 		que = &adapter->queues[i];
3432 		que->adapter = adapter;
3433 		que->txr = &adapter->tx_rings[i];
3434 		que->rxr = &adapter->rx_rings[i];
3435 	}
3436 
3437 	return (0);
3438 
3439 err_rx_desc:
3440 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3441 		igb_dma_free(adapter, &rxr->rxdma);
3442 err_tx_desc:
3443 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3444 		igb_dma_free(adapter, &txr->txdma);
3445 	free(adapter->rx_rings, M_DEVBUF);
3446 rx_fail:
3447 #ifndef IGB_LEGACY_TX
3448 	buf_ring_free(txr->br, M_DEVBUF);
3449 #endif
3450 	free(adapter->tx_rings, M_DEVBUF);
3451 tx_fail:
3452 	free(adapter->queues, M_DEVBUF);
3453 fail:
3454 	return (error);
3455 }
3456 
3457 /*********************************************************************
3458  *
3459  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3460  *  the information needed to transmit a packet on the wire. This is
3461  *  called only once at attach, setup is done every reset.
3462  *
3463  **********************************************************************/
3464 static int
igb_allocate_transmit_buffers(struct tx_ring * txr)3465 igb_allocate_transmit_buffers(struct tx_ring *txr)
3466 {
3467 	struct adapter *adapter = txr->adapter;
3468 	device_t dev = adapter->dev;
3469 	struct igb_tx_buf *txbuf;
3470 	int error, i;
3471 
3472 	/*
3473 	 * Setup DMA descriptor areas.
3474 	 */
3475 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3476 			       1, 0,			/* alignment, bounds */
3477 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3478 			       BUS_SPACE_MAXADDR,	/* highaddr */
3479 			       NULL, NULL,		/* filter, filterarg */
3480 			       IGB_TSO_SIZE,		/* maxsize */
3481 			       IGB_MAX_SCATTER,		/* nsegments */
3482 			       PAGE_SIZE,		/* maxsegsize */
3483 			       0,			/* flags */
3484 			       NULL,			/* lockfunc */
3485 			       NULL,			/* lockfuncarg */
3486 			       &txr->txtag))) {
3487 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3488 		goto fail;
3489 	}
3490 
3491 	if (!(txr->tx_buffers =
3492 	    (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3493 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3494 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3495 		error = ENOMEM;
3496 		goto fail;
3497 	}
3498 
3499         /* Create the descriptor buffer dma maps */
3500 	txbuf = txr->tx_buffers;
3501 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3502 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3503 		if (error != 0) {
3504 			device_printf(dev, "Unable to create TX DMA map\n");
3505 			goto fail;
3506 		}
3507 	}
3508 
3509 	return 0;
3510 fail:
3511 	/* We free all, it handles case where we are in the middle */
3512 	igb_free_transmit_structures(adapter);
3513 	return (error);
3514 }
3515 
3516 /*********************************************************************
3517  *
3518  *  Initialize a transmit ring.
3519  *
3520  **********************************************************************/
3521 static void
igb_setup_transmit_ring(struct tx_ring * txr)3522 igb_setup_transmit_ring(struct tx_ring *txr)
3523 {
3524 	struct adapter *adapter = txr->adapter;
3525 	struct igb_tx_buf *txbuf;
3526 	int i;
3527 #ifdef DEV_NETMAP
3528 	struct netmap_adapter *na = NA(adapter->ifp);
3529 	struct netmap_slot *slot;
3530 #endif /* DEV_NETMAP */
3531 
3532 	/* Clear the old descriptor contents */
3533 	IGB_TX_LOCK(txr);
3534 #ifdef DEV_NETMAP
3535 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3536 #endif /* DEV_NETMAP */
3537 	bzero((void *)txr->tx_base,
3538 	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3539 	/* Reset indices */
3540 	txr->next_avail_desc = 0;
3541 	txr->next_to_clean = 0;
3542 
3543 	/* Free any existing tx buffers. */
3544         txbuf = txr->tx_buffers;
3545 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3546 		if (txbuf->m_head != NULL) {
3547 			bus_dmamap_sync(txr->txtag, txbuf->map,
3548 			    BUS_DMASYNC_POSTWRITE);
3549 			bus_dmamap_unload(txr->txtag, txbuf->map);
3550 			m_freem(txbuf->m_head);
3551 			txbuf->m_head = NULL;
3552 		}
3553 #ifdef DEV_NETMAP
3554 		if (slot) {
3555 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3556 			/* no need to set the address */
3557 			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3558 		}
3559 #endif /* DEV_NETMAP */
3560 		/* clear the watch index */
3561 		txbuf->eop = NULL;
3562         }
3563 
3564 	/* Set number of descriptors available */
3565 	txr->tx_avail = adapter->num_tx_desc;
3566 
3567 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3568 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3569 	IGB_TX_UNLOCK(txr);
3570 }
3571 
3572 /*********************************************************************
3573  *
3574  *  Initialize all transmit rings.
3575  *
3576  **********************************************************************/
3577 static void
igb_setup_transmit_structures(struct adapter * adapter)3578 igb_setup_transmit_structures(struct adapter *adapter)
3579 {
3580 	struct tx_ring *txr = adapter->tx_rings;
3581 
3582 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3583 		igb_setup_transmit_ring(txr);
3584 
3585 	return;
3586 }
3587 
3588 /*********************************************************************
3589  *
3590  *  Enable transmit unit.
3591  *
3592  **********************************************************************/
3593 static void
igb_initialize_transmit_units(struct adapter * adapter)3594 igb_initialize_transmit_units(struct adapter *adapter)
3595 {
3596 	struct tx_ring	*txr = adapter->tx_rings;
3597 	struct e1000_hw *hw = &adapter->hw;
3598 	u32		tctl, txdctl;
3599 
3600 	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3601 	tctl = txdctl = 0;
3602 
3603 	/* Setup the Tx Descriptor Rings */
3604 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3605 		u64 bus_addr = txr->txdma.dma_paddr;
3606 
3607 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3608 		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3609 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3610 		    (uint32_t)(bus_addr >> 32));
3611 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3612 		    (uint32_t)bus_addr);
3613 
3614 		/* Setup the HW Tx Head and Tail descriptor pointers */
3615 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3616 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3617 
3618 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3619 		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3620 		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3621 
3622 		txr->queue_status = IGB_QUEUE_IDLE;
3623 
3624 		txdctl |= IGB_TX_PTHRESH;
3625 		txdctl |= IGB_TX_HTHRESH << 8;
3626 		txdctl |= IGB_TX_WTHRESH << 16;
3627 		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3628 		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3629 	}
3630 
3631 	if (adapter->vf_ifp)
3632 		return;
3633 
3634 	e1000_config_collision_dist(hw);
3635 
3636 	/* Program the Transmit Control Register */
3637 	tctl = E1000_READ_REG(hw, E1000_TCTL);
3638 	tctl &= ~E1000_TCTL_CT;
3639 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3640 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3641 
3642 	/* This write will effectively turn on the transmit unit. */
3643 	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3644 }
3645 
3646 /*********************************************************************
3647  *
3648  *  Free all transmit rings.
3649  *
3650  **********************************************************************/
3651 static void
igb_free_transmit_structures(struct adapter * adapter)3652 igb_free_transmit_structures(struct adapter *adapter)
3653 {
3654 	struct tx_ring *txr = adapter->tx_rings;
3655 
3656 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3657 		IGB_TX_LOCK(txr);
3658 		igb_free_transmit_buffers(txr);
3659 		igb_dma_free(adapter, &txr->txdma);
3660 		IGB_TX_UNLOCK(txr);
3661 		IGB_TX_LOCK_DESTROY(txr);
3662 	}
3663 	free(adapter->tx_rings, M_DEVBUF);
3664 }
3665 
3666 /*********************************************************************
3667  *
3668  *  Free transmit ring related data structures.
3669  *
3670  **********************************************************************/
3671 static void
igb_free_transmit_buffers(struct tx_ring * txr)3672 igb_free_transmit_buffers(struct tx_ring *txr)
3673 {
3674 	struct adapter *adapter = txr->adapter;
3675 	struct igb_tx_buf *tx_buffer;
3676 	int             i;
3677 
3678 	INIT_DEBUGOUT("free_transmit_ring: begin");
3679 
3680 	if (txr->tx_buffers == NULL)
3681 		return;
3682 
3683 	tx_buffer = txr->tx_buffers;
3684 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3685 		if (tx_buffer->m_head != NULL) {
3686 			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3687 			    BUS_DMASYNC_POSTWRITE);
3688 			bus_dmamap_unload(txr->txtag,
3689 			    tx_buffer->map);
3690 			m_freem(tx_buffer->m_head);
3691 			tx_buffer->m_head = NULL;
3692 			if (tx_buffer->map != NULL) {
3693 				bus_dmamap_destroy(txr->txtag,
3694 				    tx_buffer->map);
3695 				tx_buffer->map = NULL;
3696 			}
3697 		} else if (tx_buffer->map != NULL) {
3698 			bus_dmamap_unload(txr->txtag,
3699 			    tx_buffer->map);
3700 			bus_dmamap_destroy(txr->txtag,
3701 			    tx_buffer->map);
3702 			tx_buffer->map = NULL;
3703 		}
3704 	}
3705 #ifndef IGB_LEGACY_TX
3706 	if (txr->br != NULL)
3707 		buf_ring_free(txr->br, M_DEVBUF);
3708 #endif
3709 	if (txr->tx_buffers != NULL) {
3710 		free(txr->tx_buffers, M_DEVBUF);
3711 		txr->tx_buffers = NULL;
3712 	}
3713 	if (txr->txtag != NULL) {
3714 		bus_dma_tag_destroy(txr->txtag);
3715 		txr->txtag = NULL;
3716 	}
3717 	return;
3718 }
3719 
3720 /**********************************************************************
3721  *
3722  *  Setup work for hardware segmentation offload (TSO) on
3723  *  adapters using advanced tx descriptors
3724  *
3725  **********************************************************************/
3726 static int
igb_tso_setup(struct tx_ring * txr,struct mbuf * mp,u32 * cmd_type_len,u32 * olinfo_status)3727 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3728     u32 *cmd_type_len, u32 *olinfo_status)
3729 {
3730 	struct adapter *adapter = txr->adapter;
3731 	struct e1000_adv_tx_context_desc *TXD;
3732 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3733 	u32 mss_l4len_idx = 0, paylen;
3734 	u16 vtag = 0, eh_type;
3735 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3736 	struct ether_vlan_header *eh;
3737 #ifdef INET6
3738 	struct ip6_hdr *ip6;
3739 #endif
3740 #ifdef INET
3741 	struct ip *ip;
3742 #endif
3743 	struct tcphdr *th;
3744 
3745 
3746 	/*
3747 	 * Determine where frame payload starts.
3748 	 * Jump over vlan headers if already present
3749 	 */
3750 	eh = mtod(mp, struct ether_vlan_header *);
3751 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3752 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3753 		eh_type = eh->evl_proto;
3754 	} else {
3755 		ehdrlen = ETHER_HDR_LEN;
3756 		eh_type = eh->evl_encap_proto;
3757 	}
3758 
3759 	switch (ntohs(eh_type)) {
3760 #ifdef INET6
3761 	case ETHERTYPE_IPV6:
3762 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3763 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
3764 		if (ip6->ip6_nxt != IPPROTO_TCP)
3765 			return (ENXIO);
3766 		ip_hlen = sizeof(struct ip6_hdr);
3767 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3768 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3769 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3770 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3771 		break;
3772 #endif
3773 #ifdef INET
3774 	case ETHERTYPE_IP:
3775 		ip = (struct ip *)(mp->m_data + ehdrlen);
3776 		if (ip->ip_p != IPPROTO_TCP)
3777 			return (ENXIO);
3778 		ip->ip_sum = 0;
3779 		ip_hlen = ip->ip_hl << 2;
3780 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3781 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3782 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3783 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3784 		/* Tell transmit desc to also do IPv4 checksum. */
3785 		*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3786 		break;
3787 #endif
3788 	default:
3789 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3790 		    __func__, ntohs(eh_type));
3791 		break;
3792 	}
3793 
3794 	ctxd = txr->next_avail_desc;
3795 	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3796 
3797 	tcp_hlen = th->th_off << 2;
3798 
3799 	/* This is used in the transmit desc in encap */
3800 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3801 
3802 	/* VLAN MACLEN IPLEN */
3803 	if (mp->m_flags & M_VLANTAG) {
3804 		vtag = htole16(mp->m_pkthdr.ether_vtag);
3805                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3806 	}
3807 
3808 	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3809 	vlan_macip_lens |= ip_hlen;
3810 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3811 
3812 	/* ADV DTYPE TUCMD */
3813 	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3814 	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3815 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3816 
3817 	/* MSS L4LEN IDX */
3818 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3819 	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3820 	/* 82575 needs the queue index added */
3821 	if (adapter->hw.mac.type == e1000_82575)
3822 		mss_l4len_idx |= txr->me << 4;
3823 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3824 
3825 	TXD->seqnum_seed = htole32(0);
3826 
3827 	if (++ctxd == txr->num_desc)
3828 		ctxd = 0;
3829 
3830 	txr->tx_avail--;
3831 	txr->next_avail_desc = ctxd;
3832 	*cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3833 	*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3834 	*olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3835 	++txr->tso_tx;
3836 	return (0);
3837 }
3838 
3839 /*********************************************************************
3840  *
3841  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3842  *
3843  **********************************************************************/
3844 
3845 static int
igb_tx_ctx_setup(struct tx_ring * txr,struct mbuf * mp,u32 * cmd_type_len,u32 * olinfo_status)3846 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3847     u32 *cmd_type_len, u32 *olinfo_status)
3848 {
3849 	struct e1000_adv_tx_context_desc *TXD;
3850 	struct adapter *adapter = txr->adapter;
3851 	struct ether_vlan_header *eh;
3852 	struct ip *ip;
3853 	struct ip6_hdr *ip6;
3854 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3855 	int	ehdrlen, ip_hlen = 0;
3856 	u16	etype;
3857 	u8	ipproto = 0;
3858 	int	offload = TRUE;
3859 	int	ctxd = txr->next_avail_desc;
3860 	u16	vtag = 0;
3861 
3862 	/* First check if TSO is to be used */
3863 	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3864 		return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3865 
3866 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3867 		offload = FALSE;
3868 
3869 	/* Indicate the whole packet as payload when not doing TSO */
3870        	*olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3871 
3872 	/* Now ready a context descriptor */
3873 	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3874 
3875 	/*
3876 	** In advanced descriptors the vlan tag must
3877 	** be placed into the context descriptor. Hence
3878 	** we need to make one even if not doing offloads.
3879 	*/
3880 	if (mp->m_flags & M_VLANTAG) {
3881 		vtag = htole16(mp->m_pkthdr.ether_vtag);
3882 		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3883 	} else if (offload == FALSE) /* ... no offload to do */
3884 		return (0);
3885 
3886 	/*
3887 	 * Determine where frame payload starts.
3888 	 * Jump over vlan headers if already present,
3889 	 * helpful for QinQ too.
3890 	 */
3891 	eh = mtod(mp, struct ether_vlan_header *);
3892 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3893 		etype = ntohs(eh->evl_proto);
3894 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3895 	} else {
3896 		etype = ntohs(eh->evl_encap_proto);
3897 		ehdrlen = ETHER_HDR_LEN;
3898 	}
3899 
3900 	/* Set the ether header length */
3901 	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3902 
3903 	switch (etype) {
3904 		case ETHERTYPE_IP:
3905 			ip = (struct ip *)(mp->m_data + ehdrlen);
3906 			ip_hlen = ip->ip_hl << 2;
3907 			ipproto = ip->ip_p;
3908 			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3909 			break;
3910 		case ETHERTYPE_IPV6:
3911 			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3912 			ip_hlen = sizeof(struct ip6_hdr);
3913 			/* XXX-BZ this will go badly in case of ext hdrs. */
3914 			ipproto = ip6->ip6_nxt;
3915 			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3916 			break;
3917 		default:
3918 			offload = FALSE;
3919 			break;
3920 	}
3921 
3922 	vlan_macip_lens |= ip_hlen;
3923 	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3924 
3925 	switch (ipproto) {
3926 		case IPPROTO_TCP:
3927 			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3928 				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3929 			break;
3930 		case IPPROTO_UDP:
3931 			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3932 				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3933 			break;
3934 
3935 #if __FreeBSD_version >= 800000
3936 		case IPPROTO_SCTP:
3937 			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3938 				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3939 			break;
3940 #endif
3941 		default:
3942 			offload = FALSE;
3943 			break;
3944 	}
3945 
3946 	if (offload) /* For the TX descriptor setup */
3947 		*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3948 
3949 	/* 82575 needs the queue index added */
3950 	if (adapter->hw.mac.type == e1000_82575)
3951 		mss_l4len_idx = txr->me << 4;
3952 
3953 	/* Now copy bits into descriptor */
3954 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3955 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3956 	TXD->seqnum_seed = htole32(0);
3957 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3958 
3959 	/* We've consumed the first desc, adjust counters */
3960 	if (++ctxd == txr->num_desc)
3961 		ctxd = 0;
3962 	txr->next_avail_desc = ctxd;
3963 	--txr->tx_avail;
3964 
3965         return (0);
3966 }
3967 
3968 /**********************************************************************
3969  *
3970  *  Examine each tx_buffer in the used queue. If the hardware is done
3971  *  processing the packet then free associated resources. The
3972  *  tx_buffer is put back on the free queue.
3973  *
3974  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3975  **********************************************************************/
3976 static bool
igb_txeof(struct tx_ring * txr)3977 igb_txeof(struct tx_ring *txr)
3978 {
3979 	struct adapter		*adapter = txr->adapter;
3980 #ifdef DEV_NETMAP
3981 	struct ifnet		*ifp = adapter->ifp;
3982 #endif /* DEV_NETMAP */
3983 	u32			work, processed = 0;
3984 	int			limit = adapter->tx_process_limit;
3985 	struct igb_tx_buf	*buf;
3986 	union e1000_adv_tx_desc *txd;
3987 
3988 	mtx_assert(&txr->tx_mtx, MA_OWNED);
3989 
3990 #ifdef DEV_NETMAP
3991 	if (netmap_tx_irq(ifp, txr->me))
3992 		return (FALSE);
3993 #endif /* DEV_NETMAP */
3994 
3995 	if (txr->tx_avail == txr->num_desc) {
3996 		txr->queue_status = IGB_QUEUE_IDLE;
3997 		return FALSE;
3998 	}
3999 
4000 	/* Get work starting point */
4001 	work = txr->next_to_clean;
4002 	buf = &txr->tx_buffers[work];
4003 	txd = &txr->tx_base[work];
4004 	work -= txr->num_desc; /* The distance to ring end */
4005         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4006             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4007 	do {
4008 		union e1000_adv_tx_desc *eop = buf->eop;
4009 		if (eop == NULL) /* No work */
4010 			break;
4011 
4012 		if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4013 			break;	/* I/O not complete */
4014 
4015 		if (buf->m_head) {
4016 			txr->bytes +=
4017 			    buf->m_head->m_pkthdr.len;
4018 			bus_dmamap_sync(txr->txtag,
4019 			    buf->map,
4020 			    BUS_DMASYNC_POSTWRITE);
4021 			bus_dmamap_unload(txr->txtag,
4022 			    buf->map);
4023 			m_freem(buf->m_head);
4024 			buf->m_head = NULL;
4025 		}
4026 		buf->eop = NULL;
4027 		++txr->tx_avail;
4028 
4029 		/* We clean the range if multi segment */
4030 		while (txd != eop) {
4031 			++txd;
4032 			++buf;
4033 			++work;
4034 			/* wrap the ring? */
4035 			if (__predict_false(!work)) {
4036 				work -= txr->num_desc;
4037 				buf = txr->tx_buffers;
4038 				txd = txr->tx_base;
4039 			}
4040 			if (buf->m_head) {
4041 				txr->bytes +=
4042 				    buf->m_head->m_pkthdr.len;
4043 				bus_dmamap_sync(txr->txtag,
4044 				    buf->map,
4045 				    BUS_DMASYNC_POSTWRITE);
4046 				bus_dmamap_unload(txr->txtag,
4047 				    buf->map);
4048 				m_freem(buf->m_head);
4049 				buf->m_head = NULL;
4050 			}
4051 			++txr->tx_avail;
4052 			buf->eop = NULL;
4053 
4054 		}
4055 		++txr->packets;
4056 		++processed;
4057 		txr->watchdog_time = ticks;
4058 
4059 		/* Try the next packet */
4060 		++txd;
4061 		++buf;
4062 		++work;
4063 		/* reset with a wrap */
4064 		if (__predict_false(!work)) {
4065 			work -= txr->num_desc;
4066 			buf = txr->tx_buffers;
4067 			txd = txr->tx_base;
4068 		}
4069 		prefetch(txd);
4070 	} while (__predict_true(--limit));
4071 
4072 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4073 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4074 
4075 	work += txr->num_desc;
4076 	txr->next_to_clean = work;
4077 
4078 	/*
4079 	** Watchdog calculation, we know there's
4080 	** work outstanding or the first return
4081 	** would have been taken, so none processed
4082 	** for too long indicates a hang.
4083 	*/
4084 	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4085 		txr->queue_status |= IGB_QUEUE_HUNG;
4086 
4087 	if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4088 		txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4089 
4090 	if (txr->tx_avail == txr->num_desc) {
4091 		txr->queue_status = IGB_QUEUE_IDLE;
4092 		return (FALSE);
4093 	}
4094 
4095 	return (TRUE);
4096 }
4097 
4098 /*********************************************************************
4099  *
4100  *  Refresh mbuf buffers for RX descriptor rings
4101  *   - now keeps its own state so discards due to resource
4102  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4103  *     it just returns, keeping its placeholder, thus it can simply
4104  *     be recalled to try again.
4105  *
4106  **********************************************************************/
4107 static void
igb_refresh_mbufs(struct rx_ring * rxr,int limit)4108 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4109 {
4110 	struct adapter		*adapter = rxr->adapter;
4111 	bus_dma_segment_t	hseg[1];
4112 	bus_dma_segment_t	pseg[1];
4113 	struct igb_rx_buf	*rxbuf;
4114 	struct mbuf		*mh, *mp;
4115 	int			i, j, nsegs, error;
4116 	bool			refreshed = FALSE;
4117 
4118 	i = j = rxr->next_to_refresh;
4119 	/*
4120 	** Get one descriptor beyond
4121 	** our work mark to control
4122 	** the loop.
4123         */
4124 	if (++j == adapter->num_rx_desc)
4125 		j = 0;
4126 
4127 	while (j != limit) {
4128 		rxbuf = &rxr->rx_buffers[i];
4129 		/* No hdr mbuf used with header split off */
4130 		if (rxr->hdr_split == FALSE)
4131 			goto no_split;
4132 		if (rxbuf->m_head == NULL) {
4133 			mh = m_gethdr(M_NOWAIT, MT_DATA);
4134 			if (mh == NULL)
4135 				goto update;
4136 		} else
4137 			mh = rxbuf->m_head;
4138 
4139 		mh->m_pkthdr.len = mh->m_len = MHLEN;
4140 		mh->m_len = MHLEN;
4141 		mh->m_flags |= M_PKTHDR;
4142 		/* Get the memory mapping */
4143 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4144 		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4145 		if (error != 0) {
4146 			printf("Refresh mbufs: hdr dmamap load"
4147 			    " failure - %d\n", error);
4148 			m_free(mh);
4149 			rxbuf->m_head = NULL;
4150 			goto update;
4151 		}
4152 		rxbuf->m_head = mh;
4153 		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4154 		    BUS_DMASYNC_PREREAD);
4155 		rxr->rx_base[i].read.hdr_addr =
4156 		    htole64(hseg[0].ds_addr);
4157 no_split:
4158 		if (rxbuf->m_pack == NULL) {
4159 			mp = m_getjcl(M_NOWAIT, MT_DATA,
4160 			    M_PKTHDR, adapter->rx_mbuf_sz);
4161 			if (mp == NULL)
4162 				goto update;
4163 		} else
4164 			mp = rxbuf->m_pack;
4165 
4166 		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4167 		/* Get the memory mapping */
4168 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4169 		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4170 		if (error != 0) {
4171 			printf("Refresh mbufs: payload dmamap load"
4172 			    " failure - %d\n", error);
4173 			m_free(mp);
4174 			rxbuf->m_pack = NULL;
4175 			goto update;
4176 		}
4177 		rxbuf->m_pack = mp;
4178 		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4179 		    BUS_DMASYNC_PREREAD);
4180 		rxr->rx_base[i].read.pkt_addr =
4181 		    htole64(pseg[0].ds_addr);
4182 		refreshed = TRUE; /* I feel wefreshed :) */
4183 
4184 		i = j; /* our next is precalculated */
4185 		rxr->next_to_refresh = i;
4186 		if (++j == adapter->num_rx_desc)
4187 			j = 0;
4188 	}
4189 update:
4190 	if (refreshed) /* update tail */
4191 		E1000_WRITE_REG(&adapter->hw,
4192 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4193 	return;
4194 }
4195 
4196 
4197 /*********************************************************************
4198  *
4199  *  Allocate memory for rx_buffer structures. Since we use one
4200  *  rx_buffer per received packet, the maximum number of rx_buffer's
4201  *  that we'll need is equal to the number of receive descriptors
4202  *  that we've allocated.
4203  *
4204  **********************************************************************/
4205 static int
igb_allocate_receive_buffers(struct rx_ring * rxr)4206 igb_allocate_receive_buffers(struct rx_ring *rxr)
4207 {
4208 	struct	adapter 	*adapter = rxr->adapter;
4209 	device_t 		dev = adapter->dev;
4210 	struct igb_rx_buf	*rxbuf;
4211 	int             	i, bsize, error;
4212 
4213 	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4214 	if (!(rxr->rx_buffers =
4215 	    (struct igb_rx_buf *) malloc(bsize,
4216 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4217 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4218 		error = ENOMEM;
4219 		goto fail;
4220 	}
4221 
4222 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4223 				   1, 0,		/* alignment, bounds */
4224 				   BUS_SPACE_MAXADDR,	/* lowaddr */
4225 				   BUS_SPACE_MAXADDR,	/* highaddr */
4226 				   NULL, NULL,		/* filter, filterarg */
4227 				   MSIZE,		/* maxsize */
4228 				   1,			/* nsegments */
4229 				   MSIZE,		/* maxsegsize */
4230 				   0,			/* flags */
4231 				   NULL,		/* lockfunc */
4232 				   NULL,		/* lockfuncarg */
4233 				   &rxr->htag))) {
4234 		device_printf(dev, "Unable to create RX DMA tag\n");
4235 		goto fail;
4236 	}
4237 
4238 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4239 				   1, 0,		/* alignment, bounds */
4240 				   BUS_SPACE_MAXADDR,	/* lowaddr */
4241 				   BUS_SPACE_MAXADDR,	/* highaddr */
4242 				   NULL, NULL,		/* filter, filterarg */
4243 				   MJUM9BYTES,		/* maxsize */
4244 				   1,			/* nsegments */
4245 				   MJUM9BYTES,		/* maxsegsize */
4246 				   0,			/* flags */
4247 				   NULL,		/* lockfunc */
4248 				   NULL,		/* lockfuncarg */
4249 				   &rxr->ptag))) {
4250 		device_printf(dev, "Unable to create RX payload DMA tag\n");
4251 		goto fail;
4252 	}
4253 
4254 	for (i = 0; i < adapter->num_rx_desc; i++) {
4255 		rxbuf = &rxr->rx_buffers[i];
4256 		error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4257 		if (error) {
4258 			device_printf(dev,
4259 			    "Unable to create RX head DMA maps\n");
4260 			goto fail;
4261 		}
4262 		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4263 		if (error) {
4264 			device_printf(dev,
4265 			    "Unable to create RX packet DMA maps\n");
4266 			goto fail;
4267 		}
4268 	}
4269 
4270 	return (0);
4271 
4272 fail:
4273 	/* Frees all, but can handle partial completion */
4274 	igb_free_receive_structures(adapter);
4275 	return (error);
4276 }
4277 
4278 
4279 static void
igb_free_receive_ring(struct rx_ring * rxr)4280 igb_free_receive_ring(struct rx_ring *rxr)
4281 {
4282 	struct	adapter		*adapter = rxr->adapter;
4283 	struct igb_rx_buf	*rxbuf;
4284 
4285 
4286 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4287 		rxbuf = &rxr->rx_buffers[i];
4288 		if (rxbuf->m_head != NULL) {
4289 			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4290 			    BUS_DMASYNC_POSTREAD);
4291 			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4292 			rxbuf->m_head->m_flags |= M_PKTHDR;
4293 			m_freem(rxbuf->m_head);
4294 		}
4295 		if (rxbuf->m_pack != NULL) {
4296 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4297 			    BUS_DMASYNC_POSTREAD);
4298 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4299 			rxbuf->m_pack->m_flags |= M_PKTHDR;
4300 			m_freem(rxbuf->m_pack);
4301 		}
4302 		rxbuf->m_head = NULL;
4303 		rxbuf->m_pack = NULL;
4304 	}
4305 }
4306 
4307 
4308 /*********************************************************************
4309  *
4310  *  Initialize a receive ring and its buffers.
4311  *
4312  **********************************************************************/
4313 static int
igb_setup_receive_ring(struct rx_ring * rxr)4314 igb_setup_receive_ring(struct rx_ring *rxr)
4315 {
4316 	struct	adapter		*adapter;
4317 	struct  ifnet		*ifp;
4318 	device_t		dev;
4319 	struct igb_rx_buf	*rxbuf;
4320 	bus_dma_segment_t	pseg[1], hseg[1];
4321 	struct lro_ctrl		*lro = &rxr->lro;
4322 	int			rsize, nsegs, error = 0;
4323 #ifdef DEV_NETMAP
4324 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4325 	struct netmap_slot *slot;
4326 #endif /* DEV_NETMAP */
4327 
4328 	adapter = rxr->adapter;
4329 	dev = adapter->dev;
4330 	ifp = adapter->ifp;
4331 
4332 	/* Clear the ring contents */
4333 	IGB_RX_LOCK(rxr);
4334 #ifdef DEV_NETMAP
4335 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4336 #endif /* DEV_NETMAP */
4337 	rsize = roundup2(adapter->num_rx_desc *
4338 	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4339 	bzero((void *)rxr->rx_base, rsize);
4340 
4341 	/*
4342 	** Free current RX buffer structures and their mbufs
4343 	*/
4344 	igb_free_receive_ring(rxr);
4345 
4346 	/* Configure for header split? */
4347 	if (igb_header_split)
4348 		rxr->hdr_split = TRUE;
4349 
4350         /* Now replenish the ring mbufs */
4351 	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4352 		struct mbuf	*mh, *mp;
4353 
4354 		rxbuf = &rxr->rx_buffers[j];
4355 #ifdef DEV_NETMAP
4356 		if (slot) {
4357 			/* slot sj is mapped to the j-th NIC-ring entry */
4358 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4359 			uint64_t paddr;
4360 			void *addr;
4361 
4362 			addr = PNMB(na, slot + sj, &paddr);
4363 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4364 			/* Update descriptor */
4365 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4366 			continue;
4367 		}
4368 #endif /* DEV_NETMAP */
4369 		if (rxr->hdr_split == FALSE)
4370 			goto skip_head;
4371 
4372 		/* First the header */
4373 		rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4374 		if (rxbuf->m_head == NULL) {
4375 			error = ENOBUFS;
4376                         goto fail;
4377 		}
4378 		m_adj(rxbuf->m_head, ETHER_ALIGN);
4379 		mh = rxbuf->m_head;
4380 		mh->m_len = mh->m_pkthdr.len = MHLEN;
4381 		mh->m_flags |= M_PKTHDR;
4382 		/* Get the memory mapping */
4383 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4384 		    rxbuf->hmap, rxbuf->m_head, hseg,
4385 		    &nsegs, BUS_DMA_NOWAIT);
4386 		if (error != 0) /* Nothing elegant to do here */
4387                         goto fail;
4388 		bus_dmamap_sync(rxr->htag,
4389 		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4390 		/* Update descriptor */
4391 		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4392 
4393 skip_head:
4394 		/* Now the payload cluster */
4395 		rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4396 		    M_PKTHDR, adapter->rx_mbuf_sz);
4397 		if (rxbuf->m_pack == NULL) {
4398 			error = ENOBUFS;
4399                         goto fail;
4400 		}
4401 		mp = rxbuf->m_pack;
4402 		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4403 		/* Get the memory mapping */
4404 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4405 		    rxbuf->pmap, mp, pseg,
4406 		    &nsegs, BUS_DMA_NOWAIT);
4407 		if (error != 0)
4408                         goto fail;
4409 		bus_dmamap_sync(rxr->ptag,
4410 		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4411 		/* Update descriptor */
4412 		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4413         }
4414 
4415 	/* Setup our descriptor indices */
4416 	rxr->next_to_check = 0;
4417 	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4418 	rxr->lro_enabled = FALSE;
4419 	rxr->rx_split_packets = 0;
4420 	rxr->rx_bytes = 0;
4421 
4422 	rxr->fmp = NULL;
4423 	rxr->lmp = NULL;
4424 
4425 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4426 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4427 
4428 	/*
4429 	** Now set up the LRO interface, we
4430 	** also only do head split when LRO
4431 	** is enabled, since so often they
4432 	** are undesireable in similar setups.
4433 	*/
4434 	if (ifp->if_capenable & IFCAP_LRO) {
4435 		error = tcp_lro_init(lro);
4436 		if (error) {
4437 			device_printf(dev, "LRO Initialization failed!\n");
4438 			goto fail;
4439 		}
4440 		INIT_DEBUGOUT("RX LRO Initialized\n");
4441 		rxr->lro_enabled = TRUE;
4442 		lro->ifp = adapter->ifp;
4443 	}
4444 
4445 	IGB_RX_UNLOCK(rxr);
4446 	return (0);
4447 
4448 fail:
4449 	igb_free_receive_ring(rxr);
4450 	IGB_RX_UNLOCK(rxr);
4451 	return (error);
4452 }
4453 
4454 
4455 /*********************************************************************
4456  *
4457  *  Initialize all receive rings.
4458  *
4459  **********************************************************************/
4460 static int
igb_setup_receive_structures(struct adapter * adapter)4461 igb_setup_receive_structures(struct adapter *adapter)
4462 {
4463 	struct rx_ring *rxr = adapter->rx_rings;
4464 	int i;
4465 
4466 	for (i = 0; i < adapter->num_queues; i++, rxr++)
4467 		if (igb_setup_receive_ring(rxr))
4468 			goto fail;
4469 
4470 	return (0);
4471 fail:
4472 	/*
4473 	 * Free RX buffers allocated so far, we will only handle
4474 	 * the rings that completed, the failing case will have
4475 	 * cleaned up for itself. 'i' is the endpoint.
4476 	 */
4477 	for (int j = 0; j < i; ++j) {
4478 		rxr = &adapter->rx_rings[j];
4479 		IGB_RX_LOCK(rxr);
4480 		igb_free_receive_ring(rxr);
4481 		IGB_RX_UNLOCK(rxr);
4482 	}
4483 
4484 	return (ENOBUFS);
4485 }
4486 
4487 /*
4488  * Initialise the RSS mapping for NICs that support multiple transmit/
4489  * receive rings.
4490  */
4491 static void
igb_initialise_rss_mapping(struct adapter * adapter)4492 igb_initialise_rss_mapping(struct adapter *adapter)
4493 {
4494 	struct e1000_hw *hw = &adapter->hw;
4495 	int i;
4496 	int queue_id;
4497 	u32 reta;
4498 	u32 rss_key[10], mrqc, shift = 0;
4499 
4500 	/* XXX? */
4501 	if (adapter->hw.mac.type == e1000_82575)
4502 		shift = 6;
4503 
4504 	/*
4505 	 * The redirection table controls which destination
4506 	 * queue each bucket redirects traffic to.
4507 	 * Each DWORD represents four queues, with the LSB
4508 	 * being the first queue in the DWORD.
4509 	 *
4510 	 * This just allocates buckets to queues using round-robin
4511 	 * allocation.
4512 	 *
4513 	 * NOTE: It Just Happens to line up with the default
4514 	 * RSS allocation method.
4515 	 */
4516 
4517 	/* Warning FM follows */
4518 	reta = 0;
4519 	for (i = 0; i < 128; i++) {
4520 #ifdef	RSS
4521 		queue_id = rss_get_indirection_to_bucket(i);
4522 		/*
4523 		 * If we have more queues than buckets, we'll
4524 		 * end up mapping buckets to a subset of the
4525 		 * queues.
4526 		 *
4527 		 * If we have more buckets than queues, we'll
4528 		 * end up instead assigning multiple buckets
4529 		 * to queues.
4530 		 *
4531 		 * Both are suboptimal, but we need to handle
4532 		 * the case so we don't go out of bounds
4533 		 * indexing arrays and such.
4534 		 */
4535 		queue_id = queue_id % adapter->num_queues;
4536 #else
4537 		queue_id = (i % adapter->num_queues);
4538 #endif
4539 		/* Adjust if required */
4540 		queue_id = queue_id << shift;
4541 
4542 		/*
4543 		 * The low 8 bits are for hash value (n+0);
4544 		 * The next 8 bits are for hash value (n+1), etc.
4545 		 */
4546 		reta = reta >> 8;
4547 		reta = reta | ( ((uint32_t) queue_id) << 24);
4548 		if ((i & 3) == 3) {
4549 			E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4550 			reta = 0;
4551 		}
4552 	}
4553 
4554 	/* Now fill in hash table */
4555 
4556 	/*
4557 	 * MRQC: Multiple Receive Queues Command
4558 	 * Set queuing to RSS control, number depends on the device.
4559 	 */
4560 	mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4561 
4562 #ifdef	RSS
4563 	/* XXX ew typecasting */
4564 	rss_getkey((uint8_t *) &rss_key);
4565 #else
4566 	arc4rand(&rss_key, sizeof(rss_key), 0);
4567 #endif
4568 	for (i = 0; i < 10; i++)
4569 		E1000_WRITE_REG_ARRAY(hw,
4570 		    E1000_RSSRK(0), i, rss_key[i]);
4571 
4572 	/*
4573 	 * Configure the RSS fields to hash upon.
4574 	 */
4575 	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4576 	    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4577 	mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4578 	    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4579 	mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4580 	    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4581 	mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4582 	    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4583 
4584 	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4585 }
4586 
4587 /*********************************************************************
4588  *
4589  *  Enable receive unit.
4590  *
4591  **********************************************************************/
4592 static void
igb_initialize_receive_units(struct adapter * adapter)4593 igb_initialize_receive_units(struct adapter *adapter)
4594 {
4595 	struct rx_ring	*rxr = adapter->rx_rings;
4596 	struct ifnet	*ifp = adapter->ifp;
4597 	struct e1000_hw *hw = &adapter->hw;
4598 	u32		rctl, rxcsum, psize, srrctl = 0;
4599 
4600 	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4601 
4602 	/*
4603 	 * Make sure receives are disabled while setting
4604 	 * up the descriptor ring
4605 	 */
4606 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4607 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4608 
4609 	/*
4610 	** Set up for header split
4611 	*/
4612 	if (igb_header_split) {
4613 		/* Use a standard mbuf for the header */
4614 		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4615 		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4616 	} else
4617 		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4618 
4619 	/*
4620 	** Set up for jumbo frames
4621 	*/
4622 	if (ifp->if_mtu > ETHERMTU) {
4623 		rctl |= E1000_RCTL_LPE;
4624 		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4625 			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4626 			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4627 		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4628 			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4629 			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4630 		}
4631 		/* Set maximum packet len */
4632 		psize = adapter->max_frame_size;
4633 		/* are we on a vlan? */
4634 		if (adapter->ifp->if_vlantrunk != NULL)
4635 			psize += VLAN_TAG_SIZE;
4636 		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4637 	} else {
4638 		rctl &= ~E1000_RCTL_LPE;
4639 		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4640 		rctl |= E1000_RCTL_SZ_2048;
4641 	}
4642 
4643 	/*
4644 	 * If TX flow control is disabled and there's >1 queue defined,
4645 	 * enable DROP.
4646 	 *
4647 	 * This drops frames rather than hanging the RX MAC for all queues.
4648 	 */
4649 	if ((adapter->num_queues > 1) &&
4650 	    (adapter->fc == e1000_fc_none ||
4651 	     adapter->fc == e1000_fc_rx_pause)) {
4652 		srrctl |= E1000_SRRCTL_DROP_EN;
4653 	}
4654 
4655 	/* Setup the Base and Length of the Rx Descriptor Rings */
4656 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4657 		u64 bus_addr = rxr->rxdma.dma_paddr;
4658 		u32 rxdctl;
4659 
4660 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4661 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4662 		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4663 		    (uint32_t)(bus_addr >> 32));
4664 		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4665 		    (uint32_t)bus_addr);
4666 		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4667 		/* Enable this Queue */
4668 		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4669 		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4670 		rxdctl &= 0xFFF00000;
4671 		rxdctl |= IGB_RX_PTHRESH;
4672 		rxdctl |= IGB_RX_HTHRESH << 8;
4673 		rxdctl |= IGB_RX_WTHRESH << 16;
4674 		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4675 	}
4676 
4677 	/*
4678 	** Setup for RX MultiQueue
4679 	*/
4680 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4681 	if (adapter->num_queues >1) {
4682 
4683 		/* rss setup */
4684 		igb_initialise_rss_mapping(adapter);
4685 
4686 		/*
4687 		** NOTE: Receive Full-Packet Checksum Offload
4688 		** is mutually exclusive with Multiqueue. However
4689 		** this is not the same as TCP/IP checksums which
4690 		** still work.
4691 		*/
4692 		rxcsum |= E1000_RXCSUM_PCSD;
4693 #if __FreeBSD_version >= 800000
4694 		/* For SCTP Offload */
4695 		if (((hw->mac.type == e1000_82576) ||
4696 		     (hw->mac.type == e1000_82580)) &&
4697 		    (ifp->if_capenable & IFCAP_RXCSUM))
4698 			rxcsum |= E1000_RXCSUM_CRCOFL;
4699 #endif
4700 	} else {
4701 		/* Non RSS setup */
4702 		if (ifp->if_capenable & IFCAP_RXCSUM) {
4703 			rxcsum |= E1000_RXCSUM_IPPCSE;
4704 #if __FreeBSD_version >= 800000
4705 			if ((adapter->hw.mac.type == e1000_82576) ||
4706 			    (adapter->hw.mac.type == e1000_82580))
4707 				rxcsum |= E1000_RXCSUM_CRCOFL;
4708 #endif
4709 		} else
4710 			rxcsum &= ~E1000_RXCSUM_TUOFL;
4711 	}
4712 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4713 
4714 	/* Setup the Receive Control Register */
4715 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4716 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4717 		   E1000_RCTL_RDMTS_HALF |
4718 		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4719 	/* Strip CRC bytes. */
4720 	rctl |= E1000_RCTL_SECRC;
4721 	/* Make sure VLAN Filters are off */
4722 	rctl &= ~E1000_RCTL_VFE;
4723 	/* Don't store bad packets */
4724 	rctl &= ~E1000_RCTL_SBP;
4725 
4726 	/* Enable Receives */
4727 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4728 
4729 	/*
4730 	 * Setup the HW Rx Head and Tail Descriptor Pointers
4731 	 *   - needs to be after enable
4732 	 */
4733 	for (int i = 0; i < adapter->num_queues; i++) {
4734 		rxr = &adapter->rx_rings[i];
4735 		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4736 #ifdef DEV_NETMAP
4737 		/*
4738 		 * an init() while a netmap client is active must
4739 		 * preserve the rx buffers passed to userspace.
4740 		 * In this driver it means we adjust RDT to
4741 		 * something different from next_to_refresh
4742 		 * (which is not used in netmap mode).
4743 		 */
4744 		if (ifp->if_capenable & IFCAP_NETMAP) {
4745 			struct netmap_adapter *na = NA(adapter->ifp);
4746 			struct netmap_kring *kring = &na->rx_rings[i];
4747 			int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4748 
4749 			if (t >= adapter->num_rx_desc)
4750 				t -= adapter->num_rx_desc;
4751 			else if (t < 0)
4752 				t += adapter->num_rx_desc;
4753 			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4754 		} else
4755 #endif /* DEV_NETMAP */
4756 		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4757 	}
4758 	return;
4759 }
4760 
4761 /*********************************************************************
4762  *
4763  *  Free receive rings.
4764  *
4765  **********************************************************************/
4766 static void
igb_free_receive_structures(struct adapter * adapter)4767 igb_free_receive_structures(struct adapter *adapter)
4768 {
4769 	struct rx_ring *rxr = adapter->rx_rings;
4770 
4771 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4772 		struct lro_ctrl	*lro = &rxr->lro;
4773 		igb_free_receive_buffers(rxr);
4774 		tcp_lro_free(lro);
4775 		igb_dma_free(adapter, &rxr->rxdma);
4776 	}
4777 
4778 	free(adapter->rx_rings, M_DEVBUF);
4779 }
4780 
4781 /*********************************************************************
4782  *
4783  *  Free receive ring data structures.
4784  *
4785  **********************************************************************/
4786 static void
igb_free_receive_buffers(struct rx_ring * rxr)4787 igb_free_receive_buffers(struct rx_ring *rxr)
4788 {
4789 	struct adapter		*adapter = rxr->adapter;
4790 	struct igb_rx_buf	*rxbuf;
4791 	int i;
4792 
4793 	INIT_DEBUGOUT("free_receive_structures: begin");
4794 
4795 	/* Cleanup any existing buffers */
4796 	if (rxr->rx_buffers != NULL) {
4797 		for (i = 0; i < adapter->num_rx_desc; i++) {
4798 			rxbuf = &rxr->rx_buffers[i];
4799 			if (rxbuf->m_head != NULL) {
4800 				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4801 				    BUS_DMASYNC_POSTREAD);
4802 				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4803 				rxbuf->m_head->m_flags |= M_PKTHDR;
4804 				m_freem(rxbuf->m_head);
4805 			}
4806 			if (rxbuf->m_pack != NULL) {
4807 				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4808 				    BUS_DMASYNC_POSTREAD);
4809 				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4810 				rxbuf->m_pack->m_flags |= M_PKTHDR;
4811 				m_freem(rxbuf->m_pack);
4812 			}
4813 			rxbuf->m_head = NULL;
4814 			rxbuf->m_pack = NULL;
4815 			if (rxbuf->hmap != NULL) {
4816 				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4817 				rxbuf->hmap = NULL;
4818 			}
4819 			if (rxbuf->pmap != NULL) {
4820 				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4821 				rxbuf->pmap = NULL;
4822 			}
4823 		}
4824 		if (rxr->rx_buffers != NULL) {
4825 			free(rxr->rx_buffers, M_DEVBUF);
4826 			rxr->rx_buffers = NULL;
4827 		}
4828 	}
4829 
4830 	if (rxr->htag != NULL) {
4831 		bus_dma_tag_destroy(rxr->htag);
4832 		rxr->htag = NULL;
4833 	}
4834 	if (rxr->ptag != NULL) {
4835 		bus_dma_tag_destroy(rxr->ptag);
4836 		rxr->ptag = NULL;
4837 	}
4838 }
4839 
4840 static __inline void
igb_rx_discard(struct rx_ring * rxr,int i)4841 igb_rx_discard(struct rx_ring *rxr, int i)
4842 {
4843 	struct igb_rx_buf	*rbuf;
4844 
4845 	rbuf = &rxr->rx_buffers[i];
4846 
4847 	/* Partially received? Free the chain */
4848 	if (rxr->fmp != NULL) {
4849 		rxr->fmp->m_flags |= M_PKTHDR;
4850 		m_freem(rxr->fmp);
4851 		rxr->fmp = NULL;
4852 		rxr->lmp = NULL;
4853 	}
4854 
4855 	/*
4856 	** With advanced descriptors the writeback
4857 	** clobbers the buffer addrs, so its easier
4858 	** to just free the existing mbufs and take
4859 	** the normal refresh path to get new buffers
4860 	** and mapping.
4861 	*/
4862 	if (rbuf->m_head) {
4863 		m_free(rbuf->m_head);
4864 		rbuf->m_head = NULL;
4865 		bus_dmamap_unload(rxr->htag, rbuf->hmap);
4866 	}
4867 
4868 	if (rbuf->m_pack) {
4869 		m_free(rbuf->m_pack);
4870 		rbuf->m_pack = NULL;
4871 		bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4872 	}
4873 
4874 	return;
4875 }
4876 
4877 static __inline void
igb_rx_input(struct rx_ring * rxr,struct ifnet * ifp,struct mbuf * m,u32 ptype)4878 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4879 {
4880 
4881 	/*
4882 	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4883 	 * should be computed by hardware. Also it should not have VLAN tag in
4884 	 * ethernet header.
4885 	 */
4886 	if (rxr->lro_enabled &&
4887 	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4888 	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4889 	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4890 	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4891 	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4892 	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4893 		/*
4894 		 * Send to the stack if:
4895 		 **  - LRO not enabled, or
4896 		 **  - no LRO resources, or
4897 		 **  - lro enqueue fails
4898 		 */
4899 		if (rxr->lro.lro_cnt != 0)
4900 			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4901 				return;
4902 	}
4903 	IGB_RX_UNLOCK(rxr);
4904 	(*ifp->if_input)(ifp, m);
4905 	IGB_RX_LOCK(rxr);
4906 }
4907 
4908 /*********************************************************************
4909  *
4910  *  This routine executes in interrupt context. It replenishes
4911  *  the mbufs in the descriptor and sends data which has been
4912  *  dma'ed into host memory to upper layer.
4913  *
4914  *  We loop at most count times if count is > 0, or until done if
4915  *  count < 0.
4916  *
4917  *  Return TRUE if more to clean, FALSE otherwise
4918  *********************************************************************/
4919 static bool
igb_rxeof(struct igb_queue * que,int count,int * done)4920 igb_rxeof(struct igb_queue *que, int count, int *done)
4921 {
4922 	struct adapter		*adapter = que->adapter;
4923 	struct rx_ring		*rxr = que->rxr;
4924 	struct ifnet		*ifp = adapter->ifp;
4925 	struct lro_ctrl		*lro = &rxr->lro;
4926 	struct lro_entry	*queued;
4927 	int			i, processed = 0, rxdone = 0;
4928 	u32			ptype, staterr = 0;
4929 	union e1000_adv_rx_desc	*cur;
4930 
4931 	IGB_RX_LOCK(rxr);
4932 	/* Sync the ring. */
4933 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4934 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4935 
4936 #ifdef DEV_NETMAP
4937 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4938 		IGB_RX_UNLOCK(rxr);
4939 		return (FALSE);
4940 	}
4941 #endif /* DEV_NETMAP */
4942 
4943 	/* Main clean loop */
4944 	for (i = rxr->next_to_check; count != 0;) {
4945 		struct mbuf		*sendmp, *mh, *mp;
4946 		struct igb_rx_buf	*rxbuf;
4947 		u16			hlen, plen, hdr, vtag, pkt_info;
4948 		bool			eop = FALSE;
4949 
4950 		cur = &rxr->rx_base[i];
4951 		staterr = le32toh(cur->wb.upper.status_error);
4952 		if ((staterr & E1000_RXD_STAT_DD) == 0)
4953 			break;
4954 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4955 			break;
4956 		count--;
4957 		sendmp = mh = mp = NULL;
4958 		cur->wb.upper.status_error = 0;
4959 		rxbuf = &rxr->rx_buffers[i];
4960 		plen = le16toh(cur->wb.upper.length);
4961 		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4962 		if (((adapter->hw.mac.type == e1000_i350) ||
4963 		    (adapter->hw.mac.type == e1000_i354)) &&
4964 		    (staterr & E1000_RXDEXT_STATERR_LB))
4965 			vtag = be16toh(cur->wb.upper.vlan);
4966 		else
4967 			vtag = le16toh(cur->wb.upper.vlan);
4968 		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4969 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
4970 		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4971 
4972 		/*
4973 		 * Free the frame (all segments) if we're at EOP and
4974 		 * it's an error.
4975 		 *
4976 		 * The datasheet states that EOP + status is only valid for
4977 		 * the final segment in a multi-segment frame.
4978 		 */
4979 		if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
4980 			adapter->dropped_pkts++;
4981 			++rxr->rx_discarded;
4982 			igb_rx_discard(rxr, i);
4983 			goto next_desc;
4984 		}
4985 
4986 		/*
4987 		** The way the hardware is configured to
4988 		** split, it will ONLY use the header buffer
4989 		** when header split is enabled, otherwise we
4990 		** get normal behavior, ie, both header and
4991 		** payload are DMA'd into the payload buffer.
4992 		**
4993 		** The fmp test is to catch the case where a
4994 		** packet spans multiple descriptors, in that
4995 		** case only the first header is valid.
4996 		*/
4997 		if (rxr->hdr_split && rxr->fmp == NULL) {
4998 			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4999 			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5000 			    E1000_RXDADV_HDRBUFLEN_SHIFT;
5001 			if (hlen > IGB_HDR_BUF)
5002 				hlen = IGB_HDR_BUF;
5003 			mh = rxr->rx_buffers[i].m_head;
5004 			mh->m_len = hlen;
5005 			/* clear buf pointer for refresh */
5006 			rxbuf->m_head = NULL;
5007 			/*
5008 			** Get the payload length, this
5009 			** could be zero if its a small
5010 			** packet.
5011 			*/
5012 			if (plen > 0) {
5013 				mp = rxr->rx_buffers[i].m_pack;
5014 				mp->m_len = plen;
5015 				mh->m_next = mp;
5016 				/* clear buf pointer */
5017 				rxbuf->m_pack = NULL;
5018 				rxr->rx_split_packets++;
5019 			}
5020 		} else {
5021 			/*
5022 			** Either no header split, or a
5023 			** secondary piece of a fragmented
5024 			** split packet.
5025 			*/
5026 			mh = rxr->rx_buffers[i].m_pack;
5027 			mh->m_len = plen;
5028 			/* clear buf info for refresh */
5029 			rxbuf->m_pack = NULL;
5030 		}
5031 		bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5032 
5033 		++processed; /* So we know when to refresh */
5034 
5035 		/* Initial frame - setup */
5036 		if (rxr->fmp == NULL) {
5037 			mh->m_pkthdr.len = mh->m_len;
5038 			/* Save the head of the chain */
5039 			rxr->fmp = mh;
5040 			rxr->lmp = mh;
5041 			if (mp != NULL) {
5042 				/* Add payload if split */
5043 				mh->m_pkthdr.len += mp->m_len;
5044 				rxr->lmp = mh->m_next;
5045 			}
5046 		} else {
5047 			/* Chain mbuf's together */
5048 			rxr->lmp->m_next = mh;
5049 			rxr->lmp = rxr->lmp->m_next;
5050 			rxr->fmp->m_pkthdr.len += mh->m_len;
5051 		}
5052 
5053 		if (eop) {
5054 			rxr->fmp->m_pkthdr.rcvif = ifp;
5055 			rxr->rx_packets++;
5056 			/* capture data for AIM */
5057 			rxr->packets++;
5058 			rxr->bytes += rxr->fmp->m_pkthdr.len;
5059 			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5060 
5061 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5062 				igb_rx_checksum(staterr, rxr->fmp, ptype);
5063 
5064 			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5065 			    (staterr & E1000_RXD_STAT_VP) != 0) {
5066 				rxr->fmp->m_pkthdr.ether_vtag = vtag;
5067 				rxr->fmp->m_flags |= M_VLANTAG;
5068 			}
5069 
5070 			/*
5071 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
5072 			 * and never cleared. This means we have RSS hash
5073 			 * available to be used.
5074 			 */
5075 			if (adapter->num_queues > 1) {
5076 				rxr->fmp->m_pkthdr.flowid =
5077 				    le32toh(cur->wb.lower.hi_dword.rss);
5078 				switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5079 					case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5080 						M_HASHTYPE_SET(rxr->fmp,
5081 						    M_HASHTYPE_RSS_TCP_IPV4);
5082 					break;
5083 					case E1000_RXDADV_RSSTYPE_IPV4:
5084 						M_HASHTYPE_SET(rxr->fmp,
5085 						    M_HASHTYPE_RSS_IPV4);
5086 					break;
5087 					case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5088 						M_HASHTYPE_SET(rxr->fmp,
5089 						    M_HASHTYPE_RSS_TCP_IPV6);
5090 					break;
5091 					case E1000_RXDADV_RSSTYPE_IPV6_EX:
5092 						M_HASHTYPE_SET(rxr->fmp,
5093 						    M_HASHTYPE_RSS_IPV6_EX);
5094 					break;
5095 					case E1000_RXDADV_RSSTYPE_IPV6:
5096 						M_HASHTYPE_SET(rxr->fmp,
5097 						    M_HASHTYPE_RSS_IPV6);
5098 					break;
5099 					case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5100 						M_HASHTYPE_SET(rxr->fmp,
5101 						    M_HASHTYPE_RSS_TCP_IPV6_EX);
5102 					break;
5103 					default:
5104 						/* XXX fallthrough */
5105 						M_HASHTYPE_SET(rxr->fmp,
5106 						    M_HASHTYPE_OPAQUE);
5107 				}
5108 			} else {
5109 #ifndef IGB_LEGACY_TX
5110 				rxr->fmp->m_pkthdr.flowid = que->msix;
5111 				M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5112 #endif
5113 			}
5114 			sendmp = rxr->fmp;
5115 			/* Make sure to set M_PKTHDR. */
5116 			sendmp->m_flags |= M_PKTHDR;
5117 			rxr->fmp = NULL;
5118 			rxr->lmp = NULL;
5119 		}
5120 
5121 next_desc:
5122 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5123 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5124 
5125 		/* Advance our pointers to the next descriptor. */
5126 		if (++i == adapter->num_rx_desc)
5127 			i = 0;
5128 		/*
5129 		** Send to the stack or LRO
5130 		*/
5131 		if (sendmp != NULL) {
5132 			rxr->next_to_check = i;
5133 			igb_rx_input(rxr, ifp, sendmp, ptype);
5134 			i = rxr->next_to_check;
5135 			rxdone++;
5136 		}
5137 
5138 		/* Every 8 descriptors we go to refresh mbufs */
5139 		if (processed == 8) {
5140                         igb_refresh_mbufs(rxr, i);
5141                         processed = 0;
5142 		}
5143 	}
5144 
5145 	/* Catch any remainders */
5146 	if (igb_rx_unrefreshed(rxr))
5147 		igb_refresh_mbufs(rxr, i);
5148 
5149 	rxr->next_to_check = i;
5150 
5151 	/*
5152 	 * Flush any outstanding LRO work
5153 	 */
5154 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5155 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
5156 		tcp_lro_flush(lro, queued);
5157 	}
5158 
5159 	if (done != NULL)
5160 		*done += rxdone;
5161 
5162 	IGB_RX_UNLOCK(rxr);
5163 	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5164 }
5165 
5166 /*********************************************************************
5167  *
5168  *  Verify that the hardware indicated that the checksum is valid.
5169  *  Inform the stack about the status of checksum so that stack
5170  *  doesn't spend time verifying the checksum.
5171  *
5172  *********************************************************************/
5173 static void
igb_rx_checksum(u32 staterr,struct mbuf * mp,u32 ptype)5174 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5175 {
5176 	u16 status = (u16)staterr;
5177 	u8  errors = (u8) (staterr >> 24);
5178 	int sctp;
5179 
5180 	/* Ignore Checksum bit is set */
5181 	if (status & E1000_RXD_STAT_IXSM) {
5182 		mp->m_pkthdr.csum_flags = 0;
5183 		return;
5184 	}
5185 
5186 	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5187 	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5188 		sctp = 1;
5189 	else
5190 		sctp = 0;
5191 	if (status & E1000_RXD_STAT_IPCS) {
5192 		/* Did it pass? */
5193 		if (!(errors & E1000_RXD_ERR_IPE)) {
5194 			/* IP Checksum Good */
5195 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5196 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5197 		} else
5198 			mp->m_pkthdr.csum_flags = 0;
5199 	}
5200 
5201 	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5202 		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5203 #if __FreeBSD_version >= 800000
5204 		if (sctp) /* reassign */
5205 			type = CSUM_SCTP_VALID;
5206 #endif
5207 		/* Did it pass? */
5208 		if (!(errors & E1000_RXD_ERR_TCPE)) {
5209 			mp->m_pkthdr.csum_flags |= type;
5210 			if (sctp == 0)
5211 				mp->m_pkthdr.csum_data = htons(0xffff);
5212 		}
5213 	}
5214 	return;
5215 }
5216 
5217 /*
5218  * This routine is run via an vlan
5219  * config EVENT
5220  */
5221 static void
igb_register_vlan(void * arg,struct ifnet * ifp,u16 vtag)5222 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5223 {
5224 	struct adapter	*adapter = ifp->if_softc;
5225 	u32		index, bit;
5226 
5227 	if (ifp->if_softc !=  arg)   /* Not our event */
5228 		return;
5229 
5230 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5231                 return;
5232 
5233 	IGB_CORE_LOCK(adapter);
5234 	index = (vtag >> 5) & 0x7F;
5235 	bit = vtag & 0x1F;
5236 	adapter->shadow_vfta[index] |= (1 << bit);
5237 	++adapter->num_vlans;
5238 	/* Change hw filter setting */
5239 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5240 		igb_setup_vlan_hw_support(adapter);
5241 	IGB_CORE_UNLOCK(adapter);
5242 }
5243 
5244 /*
5245  * This routine is run via an vlan
5246  * unconfig EVENT
5247  */
5248 static void
igb_unregister_vlan(void * arg,struct ifnet * ifp,u16 vtag)5249 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5250 {
5251 	struct adapter	*adapter = ifp->if_softc;
5252 	u32		index, bit;
5253 
5254 	if (ifp->if_softc !=  arg)
5255 		return;
5256 
5257 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5258                 return;
5259 
5260 	IGB_CORE_LOCK(adapter);
5261 	index = (vtag >> 5) & 0x7F;
5262 	bit = vtag & 0x1F;
5263 	adapter->shadow_vfta[index] &= ~(1 << bit);
5264 	--adapter->num_vlans;
5265 	/* Change hw filter setting */
5266 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5267 		igb_setup_vlan_hw_support(adapter);
5268 	IGB_CORE_UNLOCK(adapter);
5269 }
5270 
5271 static void
igb_setup_vlan_hw_support(struct adapter * adapter)5272 igb_setup_vlan_hw_support(struct adapter *adapter)
5273 {
5274 	struct e1000_hw *hw = &adapter->hw;
5275 	struct ifnet	*ifp = adapter->ifp;
5276 	u32             reg;
5277 
5278 	if (adapter->vf_ifp) {
5279 		e1000_rlpml_set_vf(hw,
5280 		    adapter->max_frame_size + VLAN_TAG_SIZE);
5281 		return;
5282 	}
5283 
5284 	reg = E1000_READ_REG(hw, E1000_CTRL);
5285 	reg |= E1000_CTRL_VME;
5286 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5287 
5288 	/* Enable the Filter Table */
5289 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5290 		reg = E1000_READ_REG(hw, E1000_RCTL);
5291 		reg &= ~E1000_RCTL_CFIEN;
5292 		reg |= E1000_RCTL_VFE;
5293 		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5294 	}
5295 
5296 	/* Update the frame size */
5297 	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5298 	    adapter->max_frame_size + VLAN_TAG_SIZE);
5299 
5300 	/* Don't bother with table if no vlans */
5301 	if ((adapter->num_vlans == 0) ||
5302 	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5303                 return;
5304 	/*
5305 	** A soft reset zero's out the VFTA, so
5306 	** we need to repopulate it now.
5307 	*/
5308 	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5309                 if (adapter->shadow_vfta[i] != 0) {
5310 			if (adapter->vf_ifp)
5311 				e1000_vfta_set_vf(hw,
5312 				    adapter->shadow_vfta[i], TRUE);
5313 			else
5314 				e1000_write_vfta(hw,
5315 				    i, adapter->shadow_vfta[i]);
5316 		}
5317 }
5318 
5319 static void
igb_enable_intr(struct adapter * adapter)5320 igb_enable_intr(struct adapter *adapter)
5321 {
5322 	/* With RSS set up what to auto clear */
5323 	if (adapter->msix_mem) {
5324 		u32 mask = (adapter->que_mask | adapter->link_mask);
5325 		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5326 		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5327 		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5328 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5329 		    E1000_IMS_LSC);
5330 	} else {
5331 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5332 		    IMS_ENABLE_MASK);
5333 	}
5334 	E1000_WRITE_FLUSH(&adapter->hw);
5335 
5336 	return;
5337 }
5338 
5339 static void
igb_disable_intr(struct adapter * adapter)5340 igb_disable_intr(struct adapter *adapter)
5341 {
5342 	if (adapter->msix_mem) {
5343 		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5344 		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5345 	}
5346 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5347 	E1000_WRITE_FLUSH(&adapter->hw);
5348 	return;
5349 }
5350 
5351 /*
5352  * Bit of a misnomer, what this really means is
5353  * to enable OS management of the system... aka
5354  * to disable special hardware management features
5355  */
5356 static void
igb_init_manageability(struct adapter * adapter)5357 igb_init_manageability(struct adapter *adapter)
5358 {
5359 	if (adapter->has_manage) {
5360 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5361 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5362 
5363 		/* disable hardware interception of ARP */
5364 		manc &= ~(E1000_MANC_ARP_EN);
5365 
5366                 /* enable receiving management packets to the host */
5367 		manc |= E1000_MANC_EN_MNG2HOST;
5368 		manc2h |= 1 << 5;  /* Mng Port 623 */
5369 		manc2h |= 1 << 6;  /* Mng Port 664 */
5370 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5371 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5372 	}
5373 }
5374 
5375 /*
5376  * Give control back to hardware management
5377  * controller if there is one.
5378  */
5379 static void
igb_release_manageability(struct adapter * adapter)5380 igb_release_manageability(struct adapter *adapter)
5381 {
5382 	if (adapter->has_manage) {
5383 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5384 
5385 		/* re-enable hardware interception of ARP */
5386 		manc |= E1000_MANC_ARP_EN;
5387 		manc &= ~E1000_MANC_EN_MNG2HOST;
5388 
5389 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5390 	}
5391 }
5392 
5393 /*
5394  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5395  * For ASF and Pass Through versions of f/w this means that
5396  * the driver is loaded.
5397  *
5398  */
5399 static void
igb_get_hw_control(struct adapter * adapter)5400 igb_get_hw_control(struct adapter *adapter)
5401 {
5402 	u32 ctrl_ext;
5403 
5404 	if (adapter->vf_ifp)
5405 		return;
5406 
5407 	/* Let firmware know the driver has taken over */
5408 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5409 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5410 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5411 }
5412 
5413 /*
5414  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5415  * For ASF and Pass Through versions of f/w this means that the
5416  * driver is no longer loaded.
5417  *
5418  */
5419 static void
igb_release_hw_control(struct adapter * adapter)5420 igb_release_hw_control(struct adapter *adapter)
5421 {
5422 	u32 ctrl_ext;
5423 
5424 	if (adapter->vf_ifp)
5425 		return;
5426 
5427 	/* Let firmware taken over control of h/w */
5428 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5429 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5430 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5431 }
5432 
5433 static int
igb_is_valid_ether_addr(uint8_t * addr)5434 igb_is_valid_ether_addr(uint8_t *addr)
5435 {
5436 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5437 
5438 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5439 		return (FALSE);
5440 	}
5441 
5442 	return (TRUE);
5443 }
5444 
5445 
5446 /*
5447  * Enable PCI Wake On Lan capability
5448  */
5449 static void
igb_enable_wakeup(device_t dev)5450 igb_enable_wakeup(device_t dev)
5451 {
5452 	u16     cap, status;
5453 	u8      id;
5454 
5455 	/* First find the capabilities pointer*/
5456 	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5457 	/* Read the PM Capabilities */
5458 	id = pci_read_config(dev, cap, 1);
5459 	if (id != PCIY_PMG)     /* Something wrong */
5460 		return;
5461 	/* OK, we have the power capabilities, so
5462 	   now get the status register */
5463 	cap += PCIR_POWER_STATUS;
5464 	status = pci_read_config(dev, cap, 2);
5465 	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5466 	pci_write_config(dev, cap, status, 2);
5467 	return;
5468 }
5469 
5470 static void
igb_led_func(void * arg,int onoff)5471 igb_led_func(void *arg, int onoff)
5472 {
5473 	struct adapter	*adapter = arg;
5474 
5475 	IGB_CORE_LOCK(adapter);
5476 	if (onoff) {
5477 		e1000_setup_led(&adapter->hw);
5478 		e1000_led_on(&adapter->hw);
5479 	} else {
5480 		e1000_led_off(&adapter->hw);
5481 		e1000_cleanup_led(&adapter->hw);
5482 	}
5483 	IGB_CORE_UNLOCK(adapter);
5484 }
5485 
5486 static uint64_t
igb_get_vf_counter(if_t ifp,ift_counter cnt)5487 igb_get_vf_counter(if_t ifp, ift_counter cnt)
5488 {
5489 	struct adapter *adapter;
5490 	struct e1000_vf_stats *stats;
5491 #ifndef IGB_LEGACY_TX
5492 	struct tx_ring *txr;
5493 	uint64_t rv;
5494 #endif
5495 
5496 	adapter = if_getsoftc(ifp);
5497 	stats = (struct e1000_vf_stats *)adapter->stats;
5498 
5499 	switch (cnt) {
5500 	case IFCOUNTER_IPACKETS:
5501 		return (stats->gprc);
5502 	case IFCOUNTER_OPACKETS:
5503 		return (stats->gptc);
5504 	case IFCOUNTER_IBYTES:
5505 		return (stats->gorc);
5506 	case IFCOUNTER_OBYTES:
5507 		return (stats->gotc);
5508 	case IFCOUNTER_IMCASTS:
5509 		return (stats->mprc);
5510 	case IFCOUNTER_IERRORS:
5511 		return (adapter->dropped_pkts);
5512 	case IFCOUNTER_OERRORS:
5513 		return (adapter->watchdog_events);
5514 #ifndef IGB_LEGACY_TX
5515 	case IFCOUNTER_OQDROPS:
5516 		rv = 0;
5517 		txr = adapter->tx_rings;
5518 		for (int i = 0; i < adapter->num_queues; i++, txr++)
5519 			rv += txr->br->br_drops;
5520 		return (rv);
5521 #endif
5522 	default:
5523 		return (if_get_counter_default(ifp, cnt));
5524 	}
5525 }
5526 
5527 static uint64_t
igb_get_counter(if_t ifp,ift_counter cnt)5528 igb_get_counter(if_t ifp, ift_counter cnt)
5529 {
5530 	struct adapter *adapter;
5531 	struct e1000_hw_stats *stats;
5532 #ifndef IGB_LEGACY_TX
5533 	struct tx_ring *txr;
5534 	uint64_t rv;
5535 #endif
5536 
5537 	adapter = if_getsoftc(ifp);
5538 	if (adapter->vf_ifp)
5539 		return (igb_get_vf_counter(ifp, cnt));
5540 
5541 	stats = (struct e1000_hw_stats *)adapter->stats;
5542 
5543 	switch (cnt) {
5544 	case IFCOUNTER_IPACKETS:
5545 		return (stats->gprc);
5546 	case IFCOUNTER_OPACKETS:
5547 		return (stats->gptc);
5548 	case IFCOUNTER_IBYTES:
5549 		return (stats->gorc);
5550 	case IFCOUNTER_OBYTES:
5551 		return (stats->gotc);
5552 	case IFCOUNTER_IMCASTS:
5553 		return (stats->mprc);
5554 	case IFCOUNTER_OMCASTS:
5555 		return (stats->mptc);
5556 	case IFCOUNTER_IERRORS:
5557 		return (adapter->dropped_pkts + stats->rxerrc +
5558 		    stats->crcerrs + stats->algnerrc +
5559 		    stats->ruc + stats->roc + stats->cexterr);
5560 	case IFCOUNTER_OERRORS:
5561 		return (stats->ecol + stats->latecol +
5562 		    adapter->watchdog_events);
5563 	case IFCOUNTER_COLLISIONS:
5564 		return (stats->colc);
5565 	case IFCOUNTER_IQDROPS:
5566 		return (stats->mpc);
5567 #ifndef IGB_LEGACY_TX
5568 	case IFCOUNTER_OQDROPS:
5569 		rv = 0;
5570 		txr = adapter->tx_rings;
5571 		for (int i = 0; i < adapter->num_queues; i++, txr++)
5572 			rv += txr->br->br_drops;
5573 		return (rv);
5574 #endif
5575 	default:
5576 		return (if_get_counter_default(ifp, cnt));
5577 	}
5578 }
5579 
5580 /**********************************************************************
5581  *
5582  *  Update the board statistics counters.
5583  *
5584  **********************************************************************/
5585 static void
igb_update_stats_counters(struct adapter * adapter)5586 igb_update_stats_counters(struct adapter *adapter)
5587 {
5588         struct e1000_hw		*hw = &adapter->hw;
5589 	struct e1000_hw_stats	*stats;
5590 
5591 	/*
5592 	** The virtual function adapter has only a
5593 	** small controlled set of stats, do only
5594 	** those and return.
5595 	*/
5596 	if (adapter->vf_ifp) {
5597 		igb_update_vf_stats_counters(adapter);
5598 		return;
5599 	}
5600 
5601 	stats = (struct e1000_hw_stats	*)adapter->stats;
5602 
5603 	if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5604 	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5605 		stats->symerrs +=
5606 		    E1000_READ_REG(hw,E1000_SYMERRS);
5607 		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5608 	}
5609 
5610 	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5611 	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5612 	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5613 	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5614 
5615 	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5616 	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5617 	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5618 	stats->dc += E1000_READ_REG(hw, E1000_DC);
5619 	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5620 	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5621 	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5622 	/*
5623 	** For watchdog management we need to know if we have been
5624 	** paused during the last interval, so capture that here.
5625 	*/
5626         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5627         stats->xoffrxc += adapter->pause_frames;
5628 	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5629 	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5630 	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5631 	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5632 	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5633 	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5634 	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5635 	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5636 	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5637 	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5638 	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5639 	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5640 
5641 	/* For the 64-bit byte counters the low dword must be read first. */
5642 	/* Both registers clear on the read of the high dword */
5643 
5644 	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5645 	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5646 	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5647 	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5648 
5649 	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5650 	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5651 	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5652 	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5653 	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5654 
5655 	stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5656 	stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5657 	stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5658 
5659 	stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5660 	    ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5661 	stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5662 	    ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5663 
5664 	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5665 	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5666 	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5667 	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5668 	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5669 	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5670 	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5671 	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5672 	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5673 	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5674 
5675 	/* Interrupt Counts */
5676 
5677 	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5678 	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5679 	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5680 	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5681 	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5682 	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5683 	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5684 	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5685 	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5686 
5687 	/* Host to Card Statistics */
5688 
5689 	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5690 	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5691 	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5692 	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5693 	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5694 	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5695 	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5696 	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5697 	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5698 	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5699 	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5700 	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5701 	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5702 	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5703 
5704 	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5705 	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5706 	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5707 	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5708 	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5709 	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5710 
5711 	/* Driver specific counters */
5712 	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5713 	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5714 	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5715 	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5716 	adapter->packet_buf_alloc_tx =
5717 	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5718 	adapter->packet_buf_alloc_rx =
5719 	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5720 }
5721 
5722 
5723 /**********************************************************************
5724  *
5725  *  Initialize the VF board statistics counters.
5726  *
5727  **********************************************************************/
5728 static void
igb_vf_init_stats(struct adapter * adapter)5729 igb_vf_init_stats(struct adapter *adapter)
5730 {
5731         struct e1000_hw *hw = &adapter->hw;
5732 	struct e1000_vf_stats	*stats;
5733 
5734 	stats = (struct e1000_vf_stats	*)adapter->stats;
5735 	if (stats == NULL)
5736 		return;
5737         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5738         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5739         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5740         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5741         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5742 }
5743 
5744 /**********************************************************************
5745  *
5746  *  Update the VF board statistics counters.
5747  *
5748  **********************************************************************/
5749 static void
igb_update_vf_stats_counters(struct adapter * adapter)5750 igb_update_vf_stats_counters(struct adapter *adapter)
5751 {
5752 	struct e1000_hw *hw = &adapter->hw;
5753 	struct e1000_vf_stats	*stats;
5754 
5755 	if (adapter->link_speed == 0)
5756 		return;
5757 
5758 	stats = (struct e1000_vf_stats	*)adapter->stats;
5759 
5760 	UPDATE_VF_REG(E1000_VFGPRC,
5761 	    stats->last_gprc, stats->gprc);
5762 	UPDATE_VF_REG(E1000_VFGORC,
5763 	    stats->last_gorc, stats->gorc);
5764 	UPDATE_VF_REG(E1000_VFGPTC,
5765 	    stats->last_gptc, stats->gptc);
5766 	UPDATE_VF_REG(E1000_VFGOTC,
5767 	    stats->last_gotc, stats->gotc);
5768 	UPDATE_VF_REG(E1000_VFMPRC,
5769 	    stats->last_mprc, stats->mprc);
5770 }
5771 
5772 /* Export a single 32-bit register via a read-only sysctl. */
5773 static int
igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)5774 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5775 {
5776 	struct adapter *adapter;
5777 	u_int val;
5778 
5779 	adapter = oidp->oid_arg1;
5780 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5781 	return (sysctl_handle_int(oidp, &val, 0, req));
5782 }
5783 
5784 /*
5785 **  Tuneable interrupt rate handler
5786 */
5787 static int
igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)5788 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5789 {
5790 	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5791 	int			error;
5792 	u32			reg, usec, rate;
5793 
5794 	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5795 	usec = ((reg & 0x7FFC) >> 2);
5796 	if (usec > 0)
5797 		rate = 1000000 / usec;
5798 	else
5799 		rate = 0;
5800 	error = sysctl_handle_int(oidp, &rate, 0, req);
5801 	if (error || !req->newptr)
5802 		return error;
5803 	return 0;
5804 }
5805 
5806 /*
5807  * Add sysctl variables, one per statistic, to the system.
5808  */
5809 static void
igb_add_hw_stats(struct adapter * adapter)5810 igb_add_hw_stats(struct adapter *adapter)
5811 {
5812 	device_t dev = adapter->dev;
5813 
5814 	struct tx_ring *txr = adapter->tx_rings;
5815 	struct rx_ring *rxr = adapter->rx_rings;
5816 
5817 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5818 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5819 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5820 	struct e1000_hw_stats *stats = adapter->stats;
5821 
5822 	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5823 	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5824 
5825 #define QUEUE_NAME_LEN 32
5826 	char namebuf[QUEUE_NAME_LEN];
5827 
5828 	/* Driver Statistics */
5829 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5830 			CTLFLAG_RD, &adapter->link_irq,
5831 			"Link MSIX IRQ Handled");
5832 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5833 			CTLFLAG_RD, &adapter->dropped_pkts,
5834 			"Driver dropped packets");
5835 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5836 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5837 			"Driver tx dma failure in xmit");
5838 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5839 			CTLFLAG_RD, &adapter->rx_overruns,
5840 			"RX overruns");
5841 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5842 			CTLFLAG_RD, &adapter->watchdog_events,
5843 			"Watchdog timeouts");
5844 
5845 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5846 			CTLFLAG_RD, &adapter->device_control,
5847 			"Device Control Register");
5848 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5849 			CTLFLAG_RD, &adapter->rx_control,
5850 			"Receiver Control Register");
5851 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5852 			CTLFLAG_RD, &adapter->int_mask,
5853 			"Interrupt Mask");
5854 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5855 			CTLFLAG_RD, &adapter->eint_mask,
5856 			"Extended Interrupt Mask");
5857 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5858 			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5859 			"Transmit Buffer Packet Allocation");
5860 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5861 			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5862 			"Receive Buffer Packet Allocation");
5863 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5864 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5865 			"Flow Control High Watermark");
5866 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5867 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5868 			"Flow Control Low Watermark");
5869 
5870 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5871 		struct lro_ctrl *lro = &rxr->lro;
5872 
5873 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5874 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5875 					    CTLFLAG_RD, NULL, "Queue Name");
5876 		queue_list = SYSCTL_CHILDREN(queue_node);
5877 
5878 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5879 				CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5880 				sizeof(&adapter->queues[i]),
5881 				igb_sysctl_interrupt_rate_handler,
5882 				"IU", "Interrupt Rate");
5883 
5884 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5885 				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5886 				igb_sysctl_reg_handler, "IU",
5887  				"Transmit Descriptor Head");
5888 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5889 				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5890 				igb_sysctl_reg_handler, "IU",
5891  				"Transmit Descriptor Tail");
5892 		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5893 				CTLFLAG_RD, &txr->no_desc_avail,
5894 				"Queue Descriptors Unavailable");
5895 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5896 				CTLFLAG_RD, &txr->total_packets,
5897 				"Queue Packets Transmitted");
5898 
5899 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5900 				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5901 				igb_sysctl_reg_handler, "IU",
5902 				"Receive Descriptor Head");
5903 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5904 				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5905 				igb_sysctl_reg_handler, "IU",
5906 				"Receive Descriptor Tail");
5907 		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5908 				CTLFLAG_RD, &rxr->rx_packets,
5909 				"Queue Packets Received");
5910 		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5911 				CTLFLAG_RD, &rxr->rx_bytes,
5912 				"Queue Bytes Received");
5913 		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5914 				CTLFLAG_RD, &lro->lro_queued, 0,
5915 				"LRO Queued");
5916 		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5917 				CTLFLAG_RD, &lro->lro_flushed, 0,
5918 				"LRO Flushed");
5919 	}
5920 
5921 	/* MAC stats get their own sub node */
5922 
5923 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5924 				    CTLFLAG_RD, NULL, "MAC Statistics");
5925 	stat_list = SYSCTL_CHILDREN(stat_node);
5926 
5927 	/*
5928 	** VF adapter has a very limited set of stats
5929 	** since its not managing the metal, so to speak.
5930 	*/
5931 	if (adapter->vf_ifp) {
5932 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5933 			CTLFLAG_RD, &stats->gprc,
5934 			"Good Packets Received");
5935 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5936 			CTLFLAG_RD, &stats->gptc,
5937 			"Good Packets Transmitted");
5938  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5939  			CTLFLAG_RD, &stats->gorc,
5940  			"Good Octets Received");
5941  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5942  			CTLFLAG_RD, &stats->gotc,
5943  			"Good Octets Transmitted");
5944 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5945 			CTLFLAG_RD, &stats->mprc,
5946 			"Multicast Packets Received");
5947 		return;
5948 	}
5949 
5950 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5951 			CTLFLAG_RD, &stats->ecol,
5952 			"Excessive collisions");
5953 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5954 			CTLFLAG_RD, &stats->scc,
5955 			"Single collisions");
5956 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5957 			CTLFLAG_RD, &stats->mcc,
5958 			"Multiple collisions");
5959 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5960 			CTLFLAG_RD, &stats->latecol,
5961 			"Late collisions");
5962 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5963 			CTLFLAG_RD, &stats->colc,
5964 			"Collision Count");
5965 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5966 			CTLFLAG_RD, &stats->symerrs,
5967 			"Symbol Errors");
5968 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5969 			CTLFLAG_RD, &stats->sec,
5970 			"Sequence Errors");
5971 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5972 			CTLFLAG_RD, &stats->dc,
5973 			"Defer Count");
5974 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5975 			CTLFLAG_RD, &stats->mpc,
5976 			"Missed Packets");
5977 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
5978 			CTLFLAG_RD, &stats->rlec,
5979 			"Receive Length Errors");
5980 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5981 			CTLFLAG_RD, &stats->rnbc,
5982 			"Receive No Buffers");
5983 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5984 			CTLFLAG_RD, &stats->ruc,
5985 			"Receive Undersize");
5986 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5987 			CTLFLAG_RD, &stats->rfc,
5988 			"Fragmented Packets Received");
5989 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5990 			CTLFLAG_RD, &stats->roc,
5991 			"Oversized Packets Received");
5992 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5993 			CTLFLAG_RD, &stats->rjc,
5994 			"Recevied Jabber");
5995 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5996 			CTLFLAG_RD, &stats->rxerrc,
5997 			"Receive Errors");
5998 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5999 			CTLFLAG_RD, &stats->crcerrs,
6000 			"CRC errors");
6001 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
6002 			CTLFLAG_RD, &stats->algnerrc,
6003 			"Alignment Errors");
6004 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
6005 			CTLFLAG_RD, &stats->tncrs,
6006 			"Transmit with No CRS");
6007 	/* On 82575 these are collision counts */
6008 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6009 			CTLFLAG_RD, &stats->cexterr,
6010 			"Collision/Carrier extension errors");
6011 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6012 			CTLFLAG_RD, &stats->xonrxc,
6013 			"XON Received");
6014 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6015 			CTLFLAG_RD, &stats->xontxc,
6016 			"XON Transmitted");
6017 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6018 			CTLFLAG_RD, &stats->xoffrxc,
6019 			"XOFF Received");
6020 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6021 			CTLFLAG_RD, &stats->xofftxc,
6022 			"XOFF Transmitted");
6023 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6024 			CTLFLAG_RD, &stats->fcruc,
6025 			"Unsupported Flow Control Received");
6026 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6027 			CTLFLAG_RD, &stats->mgprc,
6028 			"Management Packets Received");
6029 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6030 			CTLFLAG_RD, &stats->mgpdc,
6031 			"Management Packets Dropped");
6032 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6033 			CTLFLAG_RD, &stats->mgptc,
6034 			"Management Packets Transmitted");
6035 	/* Packet Reception Stats */
6036 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6037 			CTLFLAG_RD, &stats->tpr,
6038 			"Total Packets Received");
6039 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6040 			CTLFLAG_RD, &stats->gprc,
6041 			"Good Packets Received");
6042 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6043 			CTLFLAG_RD, &stats->bprc,
6044 			"Broadcast Packets Received");
6045 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6046 			CTLFLAG_RD, &stats->mprc,
6047 			"Multicast Packets Received");
6048 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6049 			CTLFLAG_RD, &stats->prc64,
6050 			"64 byte frames received");
6051 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6052 			CTLFLAG_RD, &stats->prc127,
6053 			"65-127 byte frames received");
6054 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6055 			CTLFLAG_RD, &stats->prc255,
6056 			"128-255 byte frames received");
6057 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6058 			CTLFLAG_RD, &stats->prc511,
6059 			"256-511 byte frames received");
6060 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6061 			CTLFLAG_RD, &stats->prc1023,
6062 			"512-1023 byte frames received");
6063 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6064 			CTLFLAG_RD, &stats->prc1522,
6065 			"1023-1522 byte frames received");
6066  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
6067  			CTLFLAG_RD, &stats->gorc,
6068 			"Good Octets Received");
6069 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd",
6070 			CTLFLAG_RD, &stats->tor,
6071 			"Total Octets Received");
6072 
6073 	/* Packet Transmission Stats */
6074  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
6075  			CTLFLAG_RD, &stats->gotc,
6076  			"Good Octets Transmitted");
6077 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd",
6078 			CTLFLAG_RD, &stats->tot,
6079 			"Total Octets Transmitted");
6080 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6081 			CTLFLAG_RD, &stats->tpt,
6082 			"Total Packets Transmitted");
6083 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6084 			CTLFLAG_RD, &stats->gptc,
6085 			"Good Packets Transmitted");
6086 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6087 			CTLFLAG_RD, &stats->bptc,
6088 			"Broadcast Packets Transmitted");
6089 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6090 			CTLFLAG_RD, &stats->mptc,
6091 			"Multicast Packets Transmitted");
6092 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6093 			CTLFLAG_RD, &stats->ptc64,
6094 			"64 byte frames transmitted");
6095 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6096 			CTLFLAG_RD, &stats->ptc127,
6097 			"65-127 byte frames transmitted");
6098 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6099 			CTLFLAG_RD, &stats->ptc255,
6100 			"128-255 byte frames transmitted");
6101 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6102 			CTLFLAG_RD, &stats->ptc511,
6103 			"256-511 byte frames transmitted");
6104 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6105 			CTLFLAG_RD, &stats->ptc1023,
6106 			"512-1023 byte frames transmitted");
6107 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6108 			CTLFLAG_RD, &stats->ptc1522,
6109 			"1024-1522 byte frames transmitted");
6110 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6111 			CTLFLAG_RD, &stats->tsctc,
6112 			"TSO Contexts Transmitted");
6113 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6114 			CTLFLAG_RD, &stats->tsctfc,
6115 			"TSO Contexts Failed");
6116 
6117 
6118 	/* Interrupt Stats */
6119 
6120 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
6121 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
6122 	int_list = SYSCTL_CHILDREN(int_node);
6123 
6124 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6125 			CTLFLAG_RD, &stats->iac,
6126 			"Interrupt Assertion Count");
6127 
6128 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6129 			CTLFLAG_RD, &stats->icrxptc,
6130 			"Interrupt Cause Rx Pkt Timer Expire Count");
6131 
6132 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6133 			CTLFLAG_RD, &stats->icrxatc,
6134 			"Interrupt Cause Rx Abs Timer Expire Count");
6135 
6136 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6137 			CTLFLAG_RD, &stats->ictxptc,
6138 			"Interrupt Cause Tx Pkt Timer Expire Count");
6139 
6140 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6141 			CTLFLAG_RD, &stats->ictxatc,
6142 			"Interrupt Cause Tx Abs Timer Expire Count");
6143 
6144 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6145 			CTLFLAG_RD, &stats->ictxqec,
6146 			"Interrupt Cause Tx Queue Empty Count");
6147 
6148 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6149 			CTLFLAG_RD, &stats->ictxqmtc,
6150 			"Interrupt Cause Tx Queue Min Thresh Count");
6151 
6152 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6153 			CTLFLAG_RD, &stats->icrxdmtc,
6154 			"Interrupt Cause Rx Desc Min Thresh Count");
6155 
6156 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6157 			CTLFLAG_RD, &stats->icrxoc,
6158 			"Interrupt Cause Receiver Overrun Count");
6159 
6160 	/* Host to Card Stats */
6161 
6162 	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
6163 				    CTLFLAG_RD, NULL,
6164 				    "Host to Card Statistics");
6165 
6166 	host_list = SYSCTL_CHILDREN(host_node);
6167 
6168 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6169 			CTLFLAG_RD, &stats->cbtmpc,
6170 			"Circuit Breaker Tx Packet Count");
6171 
6172 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6173 			CTLFLAG_RD, &stats->htdpmc,
6174 			"Host Transmit Discarded Packets");
6175 
6176 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6177 			CTLFLAG_RD, &stats->rpthc,
6178 			"Rx Packets To Host");
6179 
6180 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6181 			CTLFLAG_RD, &stats->cbrmpc,
6182 			"Circuit Breaker Rx Packet Count");
6183 
6184 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6185 			CTLFLAG_RD, &stats->cbrdpc,
6186 			"Circuit Breaker Rx Dropped Count");
6187 
6188 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6189 			CTLFLAG_RD, &stats->hgptc,
6190 			"Host Good Packets Tx Count");
6191 
6192 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6193 			CTLFLAG_RD, &stats->htcbdpc,
6194 			"Host Tx Circuit Breaker Dropped Count");
6195 
6196 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6197 			CTLFLAG_RD, &stats->hgorc,
6198 			"Host Good Octets Received Count");
6199 
6200 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6201 			CTLFLAG_RD, &stats->hgotc,
6202 			"Host Good Octets Transmit Count");
6203 
6204 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6205 			CTLFLAG_RD, &stats->lenerrs,
6206 			"Length Errors");
6207 
6208 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6209 			CTLFLAG_RD, &stats->scvpc,
6210 			"SerDes/SGMII Code Violation Pkt Count");
6211 
6212 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6213 			CTLFLAG_RD, &stats->hrmpc,
6214 			"Header Redirection Missed Packet Count");
6215 }
6216 
6217 
6218 /**********************************************************************
6219  *
6220  *  This routine provides a way to dump out the adapter eeprom,
6221  *  often a useful debug/service tool. This only dumps the first
6222  *  32 words, stuff that matters is in that extent.
6223  *
6224  **********************************************************************/
6225 static int
igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)6226 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6227 {
6228 	struct adapter *adapter;
6229 	int error;
6230 	int result;
6231 
6232 	result = -1;
6233 	error = sysctl_handle_int(oidp, &result, 0, req);
6234 
6235 	if (error || !req->newptr)
6236 		return (error);
6237 
6238 	/*
6239 	 * This value will cause a hex dump of the
6240 	 * first 32 16-bit words of the EEPROM to
6241 	 * the screen.
6242 	 */
6243 	if (result == 1) {
6244 		adapter = (struct adapter *)arg1;
6245 		igb_print_nvm_info(adapter);
6246         }
6247 
6248 	return (error);
6249 }
6250 
6251 static void
igb_print_nvm_info(struct adapter * adapter)6252 igb_print_nvm_info(struct adapter *adapter)
6253 {
6254 	u16	eeprom_data;
6255 	int	i, j, row = 0;
6256 
6257 	/* Its a bit crude, but it gets the job done */
6258 	printf("\nInterface EEPROM Dump:\n");
6259 	printf("Offset\n0x0000  ");
6260 	for (i = 0, j = 0; i < 32; i++, j++) {
6261 		if (j == 8) { /* Make the offset block */
6262 			j = 0; ++row;
6263 			printf("\n0x00%x0  ",row);
6264 		}
6265 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6266 		printf("%04x ", eeprom_data);
6267 	}
6268 	printf("\n");
6269 }
6270 
6271 static void
igb_set_sysctl_value(struct adapter * adapter,const char * name,const char * description,int * limit,int value)6272 igb_set_sysctl_value(struct adapter *adapter, const char *name,
6273 	const char *description, int *limit, int value)
6274 {
6275 	*limit = value;
6276 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6277 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6278 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6279 }
6280 
6281 /*
6282 ** Set flow control using sysctl:
6283 ** Flow control values:
6284 ** 	0 - off
6285 **	1 - rx pause
6286 **	2 - tx pause
6287 **	3 - full
6288 */
6289 static int
igb_set_flowcntl(SYSCTL_HANDLER_ARGS)6290 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6291 {
6292 	int		error;
6293 	static int	input = 3; /* default is full */
6294 	struct adapter	*adapter = (struct adapter *) arg1;
6295 
6296 	error = sysctl_handle_int(oidp, &input, 0, req);
6297 
6298 	if ((error) || (req->newptr == NULL))
6299 		return (error);
6300 
6301 	switch (input) {
6302 		case e1000_fc_rx_pause:
6303 		case e1000_fc_tx_pause:
6304 		case e1000_fc_full:
6305 		case e1000_fc_none:
6306 			adapter->hw.fc.requested_mode = input;
6307 			adapter->fc = input;
6308 			break;
6309 		default:
6310 			/* Do nothing */
6311 			return (error);
6312 	}
6313 
6314 	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6315 	e1000_force_mac_fc(&adapter->hw);
6316 	/* XXX TODO: update DROP_EN on each RX queue if appropriate */
6317 	return (error);
6318 }
6319 
6320 /*
6321 ** Manage DMA Coalesce:
6322 ** Control values:
6323 ** 	0/1 - off/on
6324 **	Legal timer values are:
6325 **	250,500,1000-10000 in thousands
6326 */
6327 static int
igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)6328 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6329 {
6330 	struct adapter *adapter = (struct adapter *) arg1;
6331 	int		error;
6332 
6333 	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6334 
6335 	if ((error) || (req->newptr == NULL))
6336 		return (error);
6337 
6338 	switch (adapter->dmac) {
6339 		case 0:
6340 			/* Disabling */
6341 			break;
6342 		case 1: /* Just enable and use default */
6343 			adapter->dmac = 1000;
6344 			break;
6345 		case 250:
6346 		case 500:
6347 		case 1000:
6348 		case 2000:
6349 		case 3000:
6350 		case 4000:
6351 		case 5000:
6352 		case 6000:
6353 		case 7000:
6354 		case 8000:
6355 		case 9000:
6356 		case 10000:
6357 			/* Legal values - allow */
6358 			break;
6359 		default:
6360 			/* Do nothing, illegal value */
6361 			adapter->dmac = 0;
6362 			return (EINVAL);
6363 	}
6364 	/* Reinit the interface */
6365 	igb_init(adapter);
6366 	return (error);
6367 }
6368 
6369 /*
6370 ** Manage Energy Efficient Ethernet:
6371 ** Control values:
6372 **     0/1 - enabled/disabled
6373 */
6374 static int
igb_sysctl_eee(SYSCTL_HANDLER_ARGS)6375 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6376 {
6377 	struct adapter	*adapter = (struct adapter *) arg1;
6378 	int		error, value;
6379 
6380 	value = adapter->hw.dev_spec._82575.eee_disable;
6381 	error = sysctl_handle_int(oidp, &value, 0, req);
6382 	if (error || req->newptr == NULL)
6383 		return (error);
6384 	IGB_CORE_LOCK(adapter);
6385 	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6386 	igb_init_locked(adapter);
6387 	IGB_CORE_UNLOCK(adapter);
6388 	return (0);
6389 }
6390