xref: /trueos/sys/dev/e1000/if_em.c (revision 5868f7205430cd67aa3b655419d3f15f83b70119)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2014, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37 
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62 
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_arp.h>
67 #include <net/if_dl.h>
68 #include <net/if_media.h>
69 
70 #include <net/if_types.h>
71 #include <net/if_vlan_var.h>
72 
73 #include <netinet/in_systm.h>
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/ip.h>
77 #include <netinet/ip6.h>
78 #include <netinet/tcp.h>
79 #include <netinet/udp.h>
80 
81 #include <machine/in_cksum.h>
82 #include <dev/led/led.h>
83 #include <dev/pci/pcivar.h>
84 #include <dev/pci/pcireg.h>
85 
86 #include "e1000_api.h"
87 #include "e1000_82571.h"
88 #include "if_em.h"
89 
90 /*********************************************************************
91  *  Set this to one to display debug statistics
92  *********************************************************************/
93 int	em_display_debug_stats = 0;
94 
95 /*********************************************************************
96  *  Driver version:
97  *********************************************************************/
98 char em_driver_version[] = "7.4.2";
99 
100 /*********************************************************************
101  *  PCI Device ID Table
102  *
103  *  Used by probe to select devices to load on
104  *  Last field stores an index into e1000_strings
105  *  Last entry must be all 0s
106  *
107  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108  *********************************************************************/
109 
110 static em_vendor_info_t em_vendor_info_array[] =
111 {
112 	/* Intel(R) PRO/1000 Network Connection */
113 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
116 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
117 						PCI_ANY_ID, PCI_ANY_ID, 0},
118 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
119 						PCI_ANY_ID, PCI_ANY_ID, 0},
120 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
121 						PCI_ANY_ID, PCI_ANY_ID, 0},
122 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
123 						PCI_ANY_ID, PCI_ANY_ID, 0},
124 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
125 						PCI_ANY_ID, PCI_ANY_ID, 0},
126 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
127 						PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
132 
133 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
138 						PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
140 						PCI_ANY_ID, PCI_ANY_ID, 0},
141 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
142 						PCI_ANY_ID, PCI_ANY_ID, 0},
143 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
144 						PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
179 						PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
181 						PCI_ANY_ID, PCI_ANY_ID, 0},
182 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
184 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	/* required last entry */
187 	{ 0, 0, 0, 0, 0}
188 };
189 
190 /*********************************************************************
191  *  Table of branding strings for all supported NICs.
192  *********************************************************************/
193 
194 static char *em_strings[] = {
195 	"Intel(R) PRO/1000 Network Connection"
196 };
197 
198 /*********************************************************************
199  *  Function prototypes
200  *********************************************************************/
201 static int	em_probe(device_t);
202 static int	em_attach(device_t);
203 static int	em_detach(device_t);
204 static int	em_shutdown(device_t);
205 static int	em_suspend(device_t);
206 static int	em_resume(device_t);
207 #ifdef EM_MULTIQUEUE
208 static int	em_mq_start(struct ifnet *, struct mbuf *);
209 static int	em_mq_start_locked(struct ifnet *,
210 		    struct tx_ring *, struct mbuf *);
211 static void	em_qflush(struct ifnet *);
212 #else
213 static void	em_start(struct ifnet *);
214 static void	em_start_locked(struct ifnet *, struct tx_ring *);
215 #endif
216 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
217 static void	em_init(void *);
218 static void	em_init_locked(struct adapter *);
219 static void	em_stop(void *);
220 static void	em_media_status(struct ifnet *, struct ifmediareq *);
221 static int	em_media_change(struct ifnet *);
222 static void	em_identify_hardware(struct adapter *);
223 static int	em_allocate_pci_resources(struct adapter *);
224 static int	em_allocate_legacy(struct adapter *);
225 static int	em_allocate_msix(struct adapter *);
226 static int	em_allocate_queues(struct adapter *);
227 static int	em_setup_msix(struct adapter *);
228 static void	em_free_pci_resources(struct adapter *);
229 static void	em_local_timer(void *);
230 static void	em_reset(struct adapter *);
231 static int	em_setup_interface(device_t, struct adapter *);
232 
233 static void	em_setup_transmit_structures(struct adapter *);
234 static void	em_initialize_transmit_unit(struct adapter *);
235 static int	em_allocate_transmit_buffers(struct tx_ring *);
236 static void	em_free_transmit_structures(struct adapter *);
237 static void	em_free_transmit_buffers(struct tx_ring *);
238 
239 static int	em_setup_receive_structures(struct adapter *);
240 static int	em_allocate_receive_buffers(struct rx_ring *);
241 static void	em_initialize_receive_unit(struct adapter *);
242 static void	em_free_receive_structures(struct adapter *);
243 static void	em_free_receive_buffers(struct rx_ring *);
244 
245 static void	em_enable_intr(struct adapter *);
246 static void	em_disable_intr(struct adapter *);
247 static void	em_update_stats_counters(struct adapter *);
248 static void	em_add_hw_stats(struct adapter *adapter);
249 static void	em_txeof(struct tx_ring *);
250 static bool	em_rxeof(struct rx_ring *, int, int *);
251 #ifndef __NO_STRICT_ALIGNMENT
252 static int	em_fixup_rx(struct rx_ring *);
253 #endif
254 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
255 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
256 		    struct ip *, u32 *, u32 *);
257 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
258 		    struct tcphdr *, u32 *, u32 *);
259 static void	em_set_promisc(struct adapter *);
260 static void	em_disable_promisc(struct adapter *);
261 static void	em_set_multi(struct adapter *);
262 static void	em_update_link_status(struct adapter *);
263 static void	em_refresh_mbufs(struct rx_ring *, int);
264 static void	em_register_vlan(void *, struct ifnet *, u16);
265 static void	em_unregister_vlan(void *, struct ifnet *, u16);
266 static void	em_setup_vlan_hw_support(struct adapter *);
267 static int	em_xmit(struct tx_ring *, struct mbuf **);
268 static int	em_dma_malloc(struct adapter *, bus_size_t,
269 		    struct em_dma_alloc *, int);
270 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
271 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
272 static void	em_print_nvm_info(struct adapter *);
273 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
274 static void	em_print_debug_info(struct adapter *);
275 static int 	em_is_valid_ether_addr(u8 *);
276 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
277 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
278 		    const char *, struct em_int_delay_info *, int, int);
279 /* Management and WOL Support */
280 static void	em_init_manageability(struct adapter *);
281 static void	em_release_manageability(struct adapter *);
282 static void     em_get_hw_control(struct adapter *);
283 static void     em_release_hw_control(struct adapter *);
284 static void	em_get_wakeup(device_t);
285 static void     em_enable_wakeup(device_t);
286 static int	em_enable_phy_wakeup(struct adapter *);
287 static void	em_led_func(void *, int);
288 static void	em_disable_aspm(struct adapter *);
289 
290 static int	em_irq_fast(void *);
291 
292 /* MSIX handlers */
293 static void	em_msix_tx(void *);
294 static void	em_msix_rx(void *);
295 static void	em_msix_link(void *);
296 static void	em_handle_tx(void *context, int pending);
297 static void	em_handle_rx(void *context, int pending);
298 static void	em_handle_link(void *context, int pending);
299 
300 static void	em_set_sysctl_value(struct adapter *, const char *,
301 		    const char *, int *, int);
302 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
303 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
304 
305 static __inline void em_rx_discard(struct rx_ring *, int);
306 
307 #ifdef DEVICE_POLLING
308 static poll_handler_t em_poll;
309 #endif /* POLLING */
310 
311 /*********************************************************************
312  *  FreeBSD Device Interface Entry Points
313  *********************************************************************/
314 
315 static device_method_t em_methods[] = {
316 	/* Device interface */
317 	DEVMETHOD(device_probe, em_probe),
318 	DEVMETHOD(device_attach, em_attach),
319 	DEVMETHOD(device_detach, em_detach),
320 	DEVMETHOD(device_shutdown, em_shutdown),
321 	DEVMETHOD(device_suspend, em_suspend),
322 	DEVMETHOD(device_resume, em_resume),
323 	DEVMETHOD_END
324 };
325 
326 static driver_t em_driver = {
327 	"em", em_methods, sizeof(struct adapter),
328 };
329 
330 devclass_t em_devclass;
331 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
332 MODULE_DEPEND(em, pci, 1, 1, 1);
333 MODULE_DEPEND(em, ether, 1, 1, 1);
334 
335 /*********************************************************************
336  *  Tunable default values.
337  *********************************************************************/
338 
339 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
340 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
341 #define M_TSO_LEN			66
342 
343 #define MAX_INTS_PER_SEC	8000
344 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
345 
346 /* Allow common code without TSO */
347 #ifndef CSUM_TSO
348 #define CSUM_TSO	0
349 #endif
350 
351 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
352 
353 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
354 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
355 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
356 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
357 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
358     0, "Default transmit interrupt delay in usecs");
359 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
360     0, "Default receive interrupt delay in usecs");
361 
362 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
363 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
364 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
365 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
366 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
367     &em_tx_abs_int_delay_dflt, 0,
368     "Default transmit interrupt delay limit in usecs");
369 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
370     &em_rx_abs_int_delay_dflt, 0,
371     "Default receive interrupt delay limit in usecs");
372 
373 static int em_rxd = EM_DEFAULT_RXD;
374 static int em_txd = EM_DEFAULT_TXD;
375 TUNABLE_INT("hw.em.rxd", &em_rxd);
376 TUNABLE_INT("hw.em.txd", &em_txd);
377 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
378     "Number of receive descriptors per queue");
379 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
380     "Number of transmit descriptors per queue");
381 
382 static int em_smart_pwr_down = FALSE;
383 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
384 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
385     0, "Set to true to leave smart power down enabled on newer adapters");
386 
387 /* Controls whether promiscuous also shows bad packets */
388 static int em_debug_sbp = FALSE;
389 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
390 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
391     "Show bad packets in promiscuous mode");
392 
393 static int em_enable_msix = TRUE;
394 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
395 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
396     "Enable MSI-X interrupts");
397 
398 /* How many packets rxeof tries to clean at a time */
399 static int em_rx_process_limit = 100;
400 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
401 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
402     &em_rx_process_limit, 0,
403     "Maximum number of received packets to process "
404     "at a time, -1 means unlimited");
405 
406 /* Energy efficient ethernet - default to OFF */
407 static int eee_setting = 1;
408 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
409 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
410     "Enable Energy Efficient Ethernet");
411 
412 /* Global used in WOL setup with multiport cards */
413 static int global_quad_port_a = 0;
414 
415 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
416 #include <dev/netmap/if_em_netmap.h>
417 #endif /* DEV_NETMAP */
418 
419 /*********************************************************************
420  *  Device identification routine
421  *
422  *  em_probe determines if the driver should be loaded on
423  *  adapter based on PCI vendor/device id of the adapter.
424  *
425  *  return BUS_PROBE_DEFAULT on success, positive on failure
426  *********************************************************************/
427 
428 static int
em_probe(device_t dev)429 em_probe(device_t dev)
430 {
431 	char		adapter_name[60];
432 	u16		pci_vendor_id = 0;
433 	u16		pci_device_id = 0;
434 	u16		pci_subvendor_id = 0;
435 	u16		pci_subdevice_id = 0;
436 	em_vendor_info_t *ent;
437 
438 	INIT_DEBUGOUT("em_probe: begin");
439 
440 	pci_vendor_id = pci_get_vendor(dev);
441 	if (pci_vendor_id != EM_VENDOR_ID)
442 		return (ENXIO);
443 
444 	pci_device_id = pci_get_device(dev);
445 	pci_subvendor_id = pci_get_subvendor(dev);
446 	pci_subdevice_id = pci_get_subdevice(dev);
447 
448 	ent = em_vendor_info_array;
449 	while (ent->vendor_id != 0) {
450 		if ((pci_vendor_id == ent->vendor_id) &&
451 		    (pci_device_id == ent->device_id) &&
452 
453 		    ((pci_subvendor_id == ent->subvendor_id) ||
454 		    (ent->subvendor_id == PCI_ANY_ID)) &&
455 
456 		    ((pci_subdevice_id == ent->subdevice_id) ||
457 		    (ent->subdevice_id == PCI_ANY_ID))) {
458 			sprintf(adapter_name, "%s %s",
459 				em_strings[ent->index],
460 				em_driver_version);
461 			device_set_desc_copy(dev, adapter_name);
462 			return (BUS_PROBE_DEFAULT);
463 		}
464 		ent++;
465 	}
466 
467 	return (ENXIO);
468 }
469 
470 /*********************************************************************
471  *  Device initialization routine
472  *
473  *  The attach entry point is called when the driver is being loaded.
474  *  This routine identifies the type of hardware, allocates all resources
475  *  and initializes the hardware.
476  *
477  *  return 0 on success, positive on failure
478  *********************************************************************/
479 
480 static int
em_attach(device_t dev)481 em_attach(device_t dev)
482 {
483 	struct adapter	*adapter;
484 	struct e1000_hw	*hw;
485 	int		error = 0;
486 
487 	INIT_DEBUGOUT("em_attach: begin");
488 
489 	if (resource_disabled("em", device_get_unit(dev))) {
490 		device_printf(dev, "Disabled by device hint\n");
491 		return (ENXIO);
492 	}
493 
494 	adapter = device_get_softc(dev);
495 	adapter->dev = adapter->osdep.dev = dev;
496 	hw = &adapter->hw;
497 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
498 
499 	/* SYSCTL stuff */
500 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
501 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
502 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
503 	    em_sysctl_nvm_info, "I", "NVM Information");
504 
505 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
506 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
507 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
508 	    em_sysctl_debug_info, "I", "Debug Information");
509 
510 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
511 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
512 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
513 	    em_set_flowcntl, "I", "Flow Control");
514 
515 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
516 
517 	/* Determine hardware and mac info */
518 	em_identify_hardware(adapter);
519 
520 	/* Setup PCI resources */
521 	if (em_allocate_pci_resources(adapter)) {
522 		device_printf(dev, "Allocation of PCI resources failed\n");
523 		error = ENXIO;
524 		goto err_pci;
525 	}
526 
527 	/*
528 	** For ICH8 and family we need to
529 	** map the flash memory, and this
530 	** must happen after the MAC is
531 	** identified
532 	*/
533 	if ((hw->mac.type == e1000_ich8lan) ||
534 	    (hw->mac.type == e1000_ich9lan) ||
535 	    (hw->mac.type == e1000_ich10lan) ||
536 	    (hw->mac.type == e1000_pchlan) ||
537 	    (hw->mac.type == e1000_pch2lan) ||
538 	    (hw->mac.type == e1000_pch_lpt)) {
539 		int rid = EM_BAR_TYPE_FLASH;
540 		adapter->flash = bus_alloc_resource_any(dev,
541 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
542 		if (adapter->flash == NULL) {
543 			device_printf(dev, "Mapping of Flash failed\n");
544 			error = ENXIO;
545 			goto err_pci;
546 		}
547 		/* This is used in the shared code */
548 		hw->flash_address = (u8 *)adapter->flash;
549 		adapter->osdep.flash_bus_space_tag =
550 		    rman_get_bustag(adapter->flash);
551 		adapter->osdep.flash_bus_space_handle =
552 		    rman_get_bushandle(adapter->flash);
553 	}
554 
555 	/* Do Shared Code initialization */
556 	if (e1000_setup_init_funcs(hw, TRUE)) {
557 		device_printf(dev, "Setup of Shared code failed\n");
558 		error = ENXIO;
559 		goto err_pci;
560 	}
561 
562 	e1000_get_bus_info(hw);
563 
564 	/* Set up some sysctls for the tunable interrupt delays */
565 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
566 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
567 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
568 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
569 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
570 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
571 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
572 	    "receive interrupt delay limit in usecs",
573 	    &adapter->rx_abs_int_delay,
574 	    E1000_REGISTER(hw, E1000_RADV),
575 	    em_rx_abs_int_delay_dflt);
576 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
577 	    "transmit interrupt delay limit in usecs",
578 	    &adapter->tx_abs_int_delay,
579 	    E1000_REGISTER(hw, E1000_TADV),
580 	    em_tx_abs_int_delay_dflt);
581 	em_add_int_delay_sysctl(adapter, "itr",
582 	    "interrupt delay limit in usecs/4",
583 	    &adapter->tx_itr,
584 	    E1000_REGISTER(hw, E1000_ITR),
585 	    DEFAULT_ITR);
586 
587 	/* Sysctl for limiting the amount of work done in the taskqueue */
588 	em_set_sysctl_value(adapter, "rx_processing_limit",
589 	    "max number of rx packets to process", &adapter->rx_process_limit,
590 	    em_rx_process_limit);
591 
592 	/*
593 	 * Validate number of transmit and receive descriptors. It
594 	 * must not exceed hardware maximum, and must be multiple
595 	 * of E1000_DBA_ALIGN.
596 	 */
597 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
598 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
599 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
600 		    EM_DEFAULT_TXD, em_txd);
601 		adapter->num_tx_desc = EM_DEFAULT_TXD;
602 	} else
603 		adapter->num_tx_desc = em_txd;
604 
605 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
606 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
607 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
608 		    EM_DEFAULT_RXD, em_rxd);
609 		adapter->num_rx_desc = EM_DEFAULT_RXD;
610 	} else
611 		adapter->num_rx_desc = em_rxd;
612 
613 	hw->mac.autoneg = DO_AUTO_NEG;
614 	hw->phy.autoneg_wait_to_complete = FALSE;
615 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
616 
617 	/* Copper options */
618 	if (hw->phy.media_type == e1000_media_type_copper) {
619 		hw->phy.mdix = AUTO_ALL_MODES;
620 		hw->phy.disable_polarity_correction = FALSE;
621 		hw->phy.ms_type = EM_MASTER_SLAVE;
622 	}
623 
624 	/*
625 	 * Set the frame limits assuming
626 	 * standard ethernet sized frames.
627 	 */
628 	adapter->hw.mac.max_frame_size =
629 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
630 
631 	/*
632 	 * This controls when hardware reports transmit completion
633 	 * status.
634 	 */
635 	hw->mac.report_tx_early = 1;
636 
637 	/*
638 	** Get queue/ring memory
639 	*/
640 	if (em_allocate_queues(adapter)) {
641 		error = ENOMEM;
642 		goto err_pci;
643 	}
644 
645 	/* Allocate multicast array memory. */
646 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
647 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
648 	if (adapter->mta == NULL) {
649 		device_printf(dev, "Can not allocate multicast setup array\n");
650 		error = ENOMEM;
651 		goto err_late;
652 	}
653 
654 	/* Check SOL/IDER usage */
655 	if (e1000_check_reset_block(hw))
656 		device_printf(dev, "PHY reset is blocked"
657 		    " due to SOL/IDER session.\n");
658 
659 	/* Sysctl for setting Energy Efficient Ethernet */
660 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
661 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
662 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
663 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
664 	    adapter, 0, em_sysctl_eee, "I",
665 	    "Disable Energy Efficient Ethernet");
666 
667 	/*
668 	** Start from a known state, this is
669 	** important in reading the nvm and
670 	** mac from that.
671 	*/
672 	e1000_reset_hw(hw);
673 
674 
675 	/* Make sure we have a good EEPROM before we read from it */
676 	if (e1000_validate_nvm_checksum(hw) < 0) {
677 		/*
678 		** Some PCI-E parts fail the first check due to
679 		** the link being in sleep state, call it again,
680 		** if it fails a second time its a real issue.
681 		*/
682 		if (e1000_validate_nvm_checksum(hw) < 0) {
683 			device_printf(dev,
684 			    "The EEPROM Checksum Is Not Valid\n");
685 			error = EIO;
686 			goto err_late;
687 		}
688 	}
689 
690 	/* Copy the permanent MAC address out of the EEPROM */
691 	if (e1000_read_mac_addr(hw) < 0) {
692 		device_printf(dev, "EEPROM read error while reading MAC"
693 		    " address\n");
694 		error = EIO;
695 		goto err_late;
696 	}
697 
698 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
699 		device_printf(dev, "Invalid MAC address\n");
700 		error = EIO;
701 		goto err_late;
702 	}
703 
704 	/* Disable ULP support */
705 	e1000_disable_ulp_lpt_lp(hw, TRUE);
706 
707 	/*
708 	**  Do interrupt configuration
709 	*/
710 	if (adapter->msix > 1) /* Do MSIX */
711 		error = em_allocate_msix(adapter);
712 	else  /* MSI or Legacy */
713 		error = em_allocate_legacy(adapter);
714 	if (error)
715 		goto err_late;
716 
717 	/*
718 	 * Get Wake-on-Lan and Management info for later use
719 	 */
720 	em_get_wakeup(dev);
721 
722 	/* Setup OS specific network interface */
723 	if (em_setup_interface(dev, adapter) != 0)
724 		goto err_late;
725 
726 	em_reset(adapter);
727 
728 	/* Initialize statistics */
729 	em_update_stats_counters(adapter);
730 
731 	hw->mac.get_link_status = 1;
732 	em_update_link_status(adapter);
733 
734 	/* Register for VLAN events */
735 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
736 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
737 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
738 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
739 
740 	em_add_hw_stats(adapter);
741 
742 	/* Non-AMT based hardware can now take control from firmware */
743 	if (adapter->has_manage && !adapter->has_amt)
744 		em_get_hw_control(adapter);
745 
746 	/* Tell the stack that the interface is not active */
747 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
748 	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
749 
750 	adapter->led_dev = led_create(em_led_func, adapter,
751 	    device_get_nameunit(dev));
752 #ifdef DEV_NETMAP
753 	em_netmap_attach(adapter);
754 #endif /* DEV_NETMAP */
755 
756 	INIT_DEBUGOUT("em_attach: end");
757 
758 	return (0);
759 
760 err_late:
761 	em_free_transmit_structures(adapter);
762 	em_free_receive_structures(adapter);
763 	em_release_hw_control(adapter);
764 	if (adapter->ifp != NULL)
765 		if_free(adapter->ifp);
766 err_pci:
767 	em_free_pci_resources(adapter);
768 	free(adapter->mta, M_DEVBUF);
769 	EM_CORE_LOCK_DESTROY(adapter);
770 
771 	return (error);
772 }
773 
774 /*********************************************************************
775  *  Device removal routine
776  *
777  *  The detach entry point is called when the driver is being removed.
778  *  This routine stops the adapter and deallocates all the resources
779  *  that were allocated for driver operation.
780  *
781  *  return 0 on success, positive on failure
782  *********************************************************************/
783 
784 static int
em_detach(device_t dev)785 em_detach(device_t dev)
786 {
787 	struct adapter	*adapter = device_get_softc(dev);
788 	struct ifnet	*ifp = adapter->ifp;
789 
790 	INIT_DEBUGOUT("em_detach: begin");
791 
792 	/* Make sure VLANS are not using driver */
793 	if (adapter->ifp->if_vlantrunk != NULL) {
794 		device_printf(dev,"Vlan in use, detach first\n");
795 		return (EBUSY);
796 	}
797 
798 #ifdef DEVICE_POLLING
799 	if (ifp->if_capenable & IFCAP_POLLING)
800 		ether_poll_deregister(ifp);
801 #endif
802 
803 	if (adapter->led_dev != NULL)
804 		led_destroy(adapter->led_dev);
805 
806 	EM_CORE_LOCK(adapter);
807 	adapter->in_detach = 1;
808 	em_stop(adapter);
809 	EM_CORE_UNLOCK(adapter);
810 	EM_CORE_LOCK_DESTROY(adapter);
811 
812 	e1000_phy_hw_reset(&adapter->hw);
813 
814 	em_release_manageability(adapter);
815 	em_release_hw_control(adapter);
816 
817 	/* Unregister VLAN events */
818 	if (adapter->vlan_attach != NULL)
819 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
820 	if (adapter->vlan_detach != NULL)
821 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
822 
823 	ether_ifdetach(adapter->ifp);
824 	callout_drain(&adapter->timer);
825 
826 #ifdef DEV_NETMAP
827 	netmap_detach(ifp);
828 #endif /* DEV_NETMAP */
829 
830 	em_free_pci_resources(adapter);
831 	bus_generic_detach(dev);
832 	if_free(ifp);
833 
834 	em_free_transmit_structures(adapter);
835 	em_free_receive_structures(adapter);
836 
837 	em_release_hw_control(adapter);
838 	free(adapter->mta, M_DEVBUF);
839 
840 	return (0);
841 }
842 
843 /*********************************************************************
844  *
845  *  Shutdown entry point
846  *
847  **********************************************************************/
848 
849 static int
em_shutdown(device_t dev)850 em_shutdown(device_t dev)
851 {
852 	return em_suspend(dev);
853 }
854 
855 /*
856  * Suspend/resume device methods.
857  */
858 static int
em_suspend(device_t dev)859 em_suspend(device_t dev)
860 {
861 	struct adapter *adapter = device_get_softc(dev);
862 
863 	EM_CORE_LOCK(adapter);
864 
865         em_release_manageability(adapter);
866 	em_release_hw_control(adapter);
867 	em_enable_wakeup(dev);
868 
869 	EM_CORE_UNLOCK(adapter);
870 
871 	return bus_generic_suspend(dev);
872 }
873 
874 static int
em_resume(device_t dev)875 em_resume(device_t dev)
876 {
877 	struct adapter *adapter = device_get_softc(dev);
878 	struct tx_ring	*txr = adapter->tx_rings;
879 	struct ifnet *ifp = adapter->ifp;
880 
881 	EM_CORE_LOCK(adapter);
882 	if (adapter->hw.mac.type == e1000_pch2lan)
883 		e1000_resume_workarounds_pchlan(&adapter->hw);
884 	em_init_locked(adapter);
885 	em_init_manageability(adapter);
886 
887 	if ((ifp->if_flags & IFF_UP) &&
888 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
889 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
890 			EM_TX_LOCK(txr);
891 #ifdef EM_MULTIQUEUE
892 			if (!drbr_empty(ifp, txr->br))
893 				em_mq_start_locked(ifp, txr, NULL);
894 #else
895 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
896 				em_start_locked(ifp, txr);
897 #endif
898 			EM_TX_UNLOCK(txr);
899 		}
900 	}
901 	EM_CORE_UNLOCK(adapter);
902 
903 	return bus_generic_resume(dev);
904 }
905 
906 
907 #ifdef EM_MULTIQUEUE
908 /*********************************************************************
909  *  Multiqueue Transmit routines
910  *
911  *  em_mq_start is called by the stack to initiate a transmit.
912  *  however, if busy the driver can queue the request rather
913  *  than do an immediate send. It is this that is an advantage
914  *  in this driver, rather than also having multiple tx queues.
915  **********************************************************************/
916 static int
em_mq_start_locked(struct ifnet * ifp,struct tx_ring * txr,struct mbuf * m)917 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
918 {
919 	struct adapter  *adapter = txr->adapter;
920         struct mbuf     *next;
921         int             err = 0, enq = 0;
922 
923 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
924 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
925 		if (m != NULL)
926 			err = drbr_enqueue(ifp, txr->br, m);
927 		return (err);
928 	}
929 
930 	enq = 0;
931 	if (m != NULL) {
932 		err = drbr_enqueue(ifp, txr->br, m);
933 		if (err)
934 			return (err);
935 	}
936 
937 	/* Process the queue */
938 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
939 		if ((err = em_xmit(txr, &next)) != 0) {
940 			if (next == NULL)
941 				drbr_advance(ifp, txr->br);
942 			else
943 				drbr_putback(ifp, txr->br, next);
944 			break;
945 		}
946 		drbr_advance(ifp, txr->br);
947 		enq++;
948 		ifp->if_obytes += next->m_pkthdr.len;
949 		if (next->m_flags & M_MCAST)
950 			ifp->if_omcasts++;
951 		ETHER_BPF_MTAP(ifp, next);
952 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
953                         break;
954 	}
955 
956 	if (enq > 0) {
957                 /* Set the watchdog */
958                 txr->queue_status = EM_QUEUE_WORKING;
959 		txr->watchdog_time = ticks;
960 	}
961 
962 	if (txr->tx_avail < EM_MAX_SCATTER)
963 		em_txeof(txr);
964 	if (txr->tx_avail < EM_MAX_SCATTER)
965 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
966 	return (err);
967 }
968 
969 /*
970 ** Multiqueue capable stack interface
971 */
972 static int
em_mq_start(struct ifnet * ifp,struct mbuf * m)973 em_mq_start(struct ifnet *ifp, struct mbuf *m)
974 {
975 	struct adapter	*adapter = ifp->if_softc;
976 	struct tx_ring	*txr = adapter->tx_rings;
977 	int 		error;
978 
979 	if (EM_TX_TRYLOCK(txr)) {
980 		error = em_mq_start_locked(ifp, txr, m);
981 		EM_TX_UNLOCK(txr);
982 	} else
983 		error = drbr_enqueue(ifp, txr->br, m);
984 
985 	return (error);
986 }
987 
988 /*
989 ** Flush all ring buffers
990 */
991 static void
em_qflush(struct ifnet * ifp)992 em_qflush(struct ifnet *ifp)
993 {
994 	struct adapter  *adapter = ifp->if_softc;
995 	struct tx_ring  *txr = adapter->tx_rings;
996 	struct mbuf     *m;
997 
998 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
999 		EM_TX_LOCK(txr);
1000 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1001 			m_freem(m);
1002 		EM_TX_UNLOCK(txr);
1003 	}
1004 	if_qflush(ifp);
1005 }
1006 #else  /* !EM_MULTIQUEUE */
1007 
1008 static void
em_start_locked(struct ifnet * ifp,struct tx_ring * txr)1009 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1010 {
1011 	struct adapter	*adapter = ifp->if_softc;
1012 	struct mbuf	*m_head;
1013 
1014 	EM_TX_LOCK_ASSERT(txr);
1015 
1016 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1017 	    IFF_DRV_RUNNING)
1018 		return;
1019 
1020 	if (!adapter->link_active)
1021 		return;
1022 
1023 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1024         	/* Call cleanup if number of TX descriptors low */
1025 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1026 			em_txeof(txr);
1027 		if (txr->tx_avail < EM_MAX_SCATTER) {
1028 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1029 			break;
1030 		}
1031                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1032 		if (m_head == NULL)
1033 			break;
1034 		/*
1035 		 *  Encapsulation can modify our pointer, and or make it
1036 		 *  NULL on failure.  In that event, we can't requeue.
1037 		 */
1038 		if (em_xmit(txr, &m_head)) {
1039 			if (m_head == NULL)
1040 				break;
1041 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1042 			break;
1043 		}
1044 
1045 		/* Send a copy of the frame to the BPF listener */
1046 		ETHER_BPF_MTAP(ifp, m_head);
1047 
1048 		/* Set timeout in case hardware has problems transmitting. */
1049 		txr->watchdog_time = ticks;
1050                 txr->queue_status = EM_QUEUE_WORKING;
1051 	}
1052 
1053 	return;
1054 }
1055 
1056 static void
em_start(struct ifnet * ifp)1057 em_start(struct ifnet *ifp)
1058 {
1059 	struct adapter	*adapter = ifp->if_softc;
1060 	struct tx_ring	*txr = adapter->tx_rings;
1061 
1062 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1063 		EM_TX_LOCK(txr);
1064 		em_start_locked(ifp, txr);
1065 		EM_TX_UNLOCK(txr);
1066 	}
1067 	return;
1068 }
1069 #endif /* EM_MULTIQUEUE */
1070 
1071 /*********************************************************************
1072  *  Ioctl entry point
1073  *
1074  *  em_ioctl is called when the user wants to configure the
1075  *  interface.
1076  *
1077  *  return 0 on success, positive on failure
1078  **********************************************************************/
1079 
1080 static int
em_ioctl(struct ifnet * ifp,u_long command,caddr_t data)1081 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1082 {
1083 	struct adapter	*adapter = ifp->if_softc;
1084 	struct ifreq	*ifr = (struct ifreq *)data;
1085 #if defined(INET) || defined(INET6)
1086 	struct ifaddr	*ifa = (struct ifaddr *)data;
1087 #endif
1088 	bool		avoid_reset = FALSE;
1089 	int		error = 0;
1090 
1091 	if (adapter->in_detach)
1092 		return (error);
1093 
1094 	switch (command) {
1095 	case SIOCSIFADDR:
1096 #ifdef INET
1097 		if (ifa->ifa_addr->sa_family == AF_INET)
1098 			avoid_reset = TRUE;
1099 #endif
1100 #ifdef INET6
1101 		if (ifa->ifa_addr->sa_family == AF_INET6)
1102 			avoid_reset = TRUE;
1103 #endif
1104 		/*
1105 		** Calling init results in link renegotiation,
1106 		** so we avoid doing it when possible.
1107 		*/
1108 		if (avoid_reset) {
1109 			ifp->if_flags |= IFF_UP;
1110 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1111 				em_init(adapter);
1112 #ifdef INET
1113 			if (!(ifp->if_flags & IFF_NOARP))
1114 				arp_ifinit(ifp, ifa);
1115 #endif
1116 		} else
1117 			error = ether_ioctl(ifp, command, data);
1118 		break;
1119 	case SIOCSIFMTU:
1120 	    {
1121 		int max_frame_size;
1122 
1123 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1124 
1125 		EM_CORE_LOCK(adapter);
1126 		switch (adapter->hw.mac.type) {
1127 		case e1000_82571:
1128 		case e1000_82572:
1129 		case e1000_ich9lan:
1130 		case e1000_ich10lan:
1131 		case e1000_pch2lan:
1132 		case e1000_pch_lpt:
1133 		case e1000_82574:
1134 		case e1000_82583:
1135 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1136 			max_frame_size = 9234;
1137 			break;
1138 		case e1000_pchlan:
1139 			max_frame_size = 4096;
1140 			break;
1141 			/* Adapters that do not support jumbo frames */
1142 		case e1000_ich8lan:
1143 			max_frame_size = ETHER_MAX_LEN;
1144 			break;
1145 		default:
1146 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1147 		}
1148 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1149 		    ETHER_CRC_LEN) {
1150 			EM_CORE_UNLOCK(adapter);
1151 			error = EINVAL;
1152 			break;
1153 		}
1154 
1155 		ifp->if_mtu = ifr->ifr_mtu;
1156 		adapter->hw.mac.max_frame_size =
1157 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1158 		em_init_locked(adapter);
1159 		EM_CORE_UNLOCK(adapter);
1160 		break;
1161 	    }
1162 	case SIOCSIFFLAGS:
1163 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1164 		    SIOCSIFFLAGS (Set Interface Flags)");
1165 		EM_CORE_LOCK(adapter);
1166 		if (ifp->if_flags & IFF_UP) {
1167 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1168 				if ((ifp->if_flags ^ adapter->if_flags) &
1169 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1170 					em_disable_promisc(adapter);
1171 					em_set_promisc(adapter);
1172 				}
1173 			} else
1174 				em_init_locked(adapter);
1175 		} else
1176 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1177 				em_stop(adapter);
1178 		adapter->if_flags = ifp->if_flags;
1179 		EM_CORE_UNLOCK(adapter);
1180 		break;
1181 	case SIOCADDMULTI:
1182 	case SIOCDELMULTI:
1183 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1184 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1185 			EM_CORE_LOCK(adapter);
1186 			em_disable_intr(adapter);
1187 			em_set_multi(adapter);
1188 #ifdef DEVICE_POLLING
1189 			if (!(ifp->if_capenable & IFCAP_POLLING))
1190 #endif
1191 				em_enable_intr(adapter);
1192 			EM_CORE_UNLOCK(adapter);
1193 		}
1194 		break;
1195 	case SIOCSIFMEDIA:
1196 		/* Check SOL/IDER usage */
1197 		EM_CORE_LOCK(adapter);
1198 		if (e1000_check_reset_block(&adapter->hw)) {
1199 			EM_CORE_UNLOCK(adapter);
1200 			device_printf(adapter->dev, "Media change is"
1201 			    " blocked due to SOL/IDER session.\n");
1202 			break;
1203 		}
1204 		EM_CORE_UNLOCK(adapter);
1205 		/* falls thru */
1206 	case SIOCGIFMEDIA:
1207 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1208 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1209 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1210 		break;
1211 	case SIOCSIFCAP:
1212 	    {
1213 		int mask, reinit;
1214 
1215 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1216 		reinit = 0;
1217 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1218 #ifdef DEVICE_POLLING
1219 		if (mask & IFCAP_POLLING) {
1220 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1221 				error = ether_poll_register(em_poll, ifp);
1222 				if (error)
1223 					return (error);
1224 				EM_CORE_LOCK(adapter);
1225 				em_disable_intr(adapter);
1226 				ifp->if_capenable |= IFCAP_POLLING;
1227 				EM_CORE_UNLOCK(adapter);
1228 			} else {
1229 				error = ether_poll_deregister(ifp);
1230 				/* Enable interrupt even in error case */
1231 				EM_CORE_LOCK(adapter);
1232 				em_enable_intr(adapter);
1233 				ifp->if_capenable &= ~IFCAP_POLLING;
1234 				EM_CORE_UNLOCK(adapter);
1235 			}
1236 		}
1237 #endif
1238 		if (mask & IFCAP_HWCSUM) {
1239 			ifp->if_capenable ^= IFCAP_HWCSUM;
1240 			reinit = 1;
1241 		}
1242 		if (mask & IFCAP_TSO4) {
1243 			ifp->if_capenable ^= IFCAP_TSO4;
1244 			reinit = 1;
1245 		}
1246 		if (mask & IFCAP_VLAN_HWTAGGING) {
1247 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1248 			reinit = 1;
1249 		}
1250 		if (mask & IFCAP_VLAN_HWFILTER) {
1251 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1252 			reinit = 1;
1253 		}
1254 		if (mask & IFCAP_VLAN_HWTSO) {
1255 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1256 			reinit = 1;
1257 		}
1258 		if ((mask & IFCAP_WOL) &&
1259 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1260 			if (mask & IFCAP_WOL_MCAST)
1261 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1262 			if (mask & IFCAP_WOL_MAGIC)
1263 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1264 		}
1265 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1266 			em_init(adapter);
1267 		VLAN_CAPABILITIES(ifp);
1268 		break;
1269 	    }
1270 
1271 	default:
1272 		error = ether_ioctl(ifp, command, data);
1273 		break;
1274 	}
1275 
1276 	return (error);
1277 }
1278 
1279 
1280 /*********************************************************************
1281  *  Init entry point
1282  *
1283  *  This routine is used in two ways. It is used by the stack as
1284  *  init entry point in network interface structure. It is also used
1285  *  by the driver as a hw/sw initialization routine to get to a
1286  *  consistent state.
1287  *
1288  *  return 0 on success, positive on failure
1289  **********************************************************************/
1290 
1291 static void
em_init_locked(struct adapter * adapter)1292 em_init_locked(struct adapter *adapter)
1293 {
1294 	struct ifnet	*ifp = adapter->ifp;
1295 	device_t	dev = adapter->dev;
1296 
1297 	INIT_DEBUGOUT("em_init: begin");
1298 
1299 	EM_CORE_LOCK_ASSERT(adapter);
1300 
1301 	em_disable_intr(adapter);
1302 	callout_stop(&adapter->timer);
1303 
1304 	/* Get the latest mac address, User can use a LAA */
1305         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1306               ETHER_ADDR_LEN);
1307 
1308 	/* Put the address into the Receive Address Array */
1309 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1310 
1311 	/*
1312 	 * With the 82571 adapter, RAR[0] may be overwritten
1313 	 * when the other port is reset, we make a duplicate
1314 	 * in RAR[14] for that eventuality, this assures
1315 	 * the interface continues to function.
1316 	 */
1317 	if (adapter->hw.mac.type == e1000_82571) {
1318 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1319 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1320 		    E1000_RAR_ENTRIES - 1);
1321 	}
1322 
1323 	/* Initialize the hardware */
1324 	em_reset(adapter);
1325 	em_update_link_status(adapter);
1326 
1327 	/* Setup VLAN support, basic and offload if available */
1328 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1329 
1330 	/* Set hardware offload abilities */
1331 	ifp->if_hwassist = 0;
1332 	if (ifp->if_capenable & IFCAP_TXCSUM)
1333 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1334 	if (ifp->if_capenable & IFCAP_TSO4)
1335 		ifp->if_hwassist |= CSUM_TSO;
1336 
1337 	/* Configure for OS presence */
1338 	em_init_manageability(adapter);
1339 
1340 	/* Prepare transmit descriptors and buffers */
1341 	em_setup_transmit_structures(adapter);
1342 	em_initialize_transmit_unit(adapter);
1343 
1344 	/* Setup Multicast table */
1345 	em_set_multi(adapter);
1346 
1347 	/*
1348 	** Figure out the desired mbuf
1349 	** pool for doing jumbos
1350 	*/
1351 	if (adapter->hw.mac.max_frame_size <= 2048)
1352 		adapter->rx_mbuf_sz = MCLBYTES;
1353 	else if (adapter->hw.mac.max_frame_size <= 4096)
1354 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1355 	else
1356 		adapter->rx_mbuf_sz = MJUM9BYTES;
1357 
1358 	/* Prepare receive descriptors and buffers */
1359 	if (em_setup_receive_structures(adapter)) {
1360 		device_printf(dev, "Could not setup receive structures\n");
1361 		em_stop(adapter);
1362 		return;
1363 	}
1364 	em_initialize_receive_unit(adapter);
1365 
1366 	/* Use real VLAN Filter support? */
1367 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1368 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1369 			/* Use real VLAN Filter support */
1370 			em_setup_vlan_hw_support(adapter);
1371 		else {
1372 			u32 ctrl;
1373 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1374 			ctrl |= E1000_CTRL_VME;
1375 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1376 		}
1377 	}
1378 
1379 	/* Don't lose promiscuous settings */
1380 	em_set_promisc(adapter);
1381 
1382 	/* Set the interface as ACTIVE */
1383 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1384 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1385 
1386 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1387 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1388 
1389 	/* MSI/X configuration for 82574 */
1390 	if (adapter->hw.mac.type == e1000_82574) {
1391 		int tmp;
1392 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1393 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1394 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1395 		/* Set the IVAR - interrupt vector routing. */
1396 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1397 	}
1398 
1399 #ifdef DEVICE_POLLING
1400 	/*
1401 	 * Only enable interrupts if we are not polling, make sure
1402 	 * they are off otherwise.
1403 	 */
1404 	if (ifp->if_capenable & IFCAP_POLLING)
1405 		em_disable_intr(adapter);
1406 	else
1407 #endif /* DEVICE_POLLING */
1408 		em_enable_intr(adapter);
1409 
1410 	/* AMT based hardware can now take control from firmware */
1411 	if (adapter->has_manage && adapter->has_amt)
1412 		em_get_hw_control(adapter);
1413 }
1414 
1415 static void
em_init(void * arg)1416 em_init(void *arg)
1417 {
1418 	struct adapter *adapter = arg;
1419 
1420 	EM_CORE_LOCK(adapter);
1421 	em_init_locked(adapter);
1422 	EM_CORE_UNLOCK(adapter);
1423 }
1424 
1425 
1426 #ifdef DEVICE_POLLING
1427 /*********************************************************************
1428  *
1429  *  Legacy polling routine: note this only works with single queue
1430  *
1431  *********************************************************************/
1432 static int
em_poll(struct ifnet * ifp,enum poll_cmd cmd,int count)1433 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1434 {
1435 	struct adapter *adapter = ifp->if_softc;
1436 	struct tx_ring	*txr = adapter->tx_rings;
1437 	struct rx_ring	*rxr = adapter->rx_rings;
1438 	u32		reg_icr;
1439 	int		rx_done;
1440 
1441 	EM_CORE_LOCK(adapter);
1442 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1443 		EM_CORE_UNLOCK(adapter);
1444 		return (0);
1445 	}
1446 
1447 	if (cmd == POLL_AND_CHECK_STATUS) {
1448 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1449 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1450 			callout_stop(&adapter->timer);
1451 			adapter->hw.mac.get_link_status = 1;
1452 			em_update_link_status(adapter);
1453 			callout_reset(&adapter->timer, hz,
1454 			    em_local_timer, adapter);
1455 		}
1456 	}
1457 	EM_CORE_UNLOCK(adapter);
1458 
1459 	em_rxeof(rxr, count, &rx_done);
1460 
1461 	EM_TX_LOCK(txr);
1462 	em_txeof(txr);
1463 #ifdef EM_MULTIQUEUE
1464 	if (!drbr_empty(ifp, txr->br))
1465 		em_mq_start_locked(ifp, txr, NULL);
1466 #else
1467 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1468 		em_start_locked(ifp, txr);
1469 #endif
1470 	EM_TX_UNLOCK(txr);
1471 
1472 	return (rx_done);
1473 }
1474 #endif /* DEVICE_POLLING */
1475 
1476 
1477 /*********************************************************************
1478  *
1479  *  Fast Legacy/MSI Combined Interrupt Service routine
1480  *
1481  *********************************************************************/
1482 static int
em_irq_fast(void * arg)1483 em_irq_fast(void *arg)
1484 {
1485 	struct adapter	*adapter = arg;
1486 	struct ifnet	*ifp;
1487 	u32		reg_icr;
1488 
1489 	ifp = adapter->ifp;
1490 
1491 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1492 
1493 	/* Hot eject?  */
1494 	if (reg_icr == 0xffffffff)
1495 		return FILTER_STRAY;
1496 
1497 	/* Definitely not our interrupt.  */
1498 	if (reg_icr == 0x0)
1499 		return FILTER_STRAY;
1500 
1501 	/*
1502 	 * Starting with the 82571 chip, bit 31 should be used to
1503 	 * determine whether the interrupt belongs to us.
1504 	 */
1505 	if (adapter->hw.mac.type >= e1000_82571 &&
1506 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1507 		return FILTER_STRAY;
1508 
1509 	em_disable_intr(adapter);
1510 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1511 
1512 	/* Link status change */
1513 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1514 		adapter->hw.mac.get_link_status = 1;
1515 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1516 	}
1517 
1518 	if (reg_icr & E1000_ICR_RXO)
1519 		adapter->rx_overruns++;
1520 	return FILTER_HANDLED;
1521 }
1522 
1523 /* Combined RX/TX handler, used by Legacy and MSI */
1524 static void
em_handle_que(void * context,int pending)1525 em_handle_que(void *context, int pending)
1526 {
1527 	struct adapter	*adapter = context;
1528 	struct ifnet	*ifp = adapter->ifp;
1529 	struct tx_ring	*txr = adapter->tx_rings;
1530 	struct rx_ring	*rxr = adapter->rx_rings;
1531 
1532 
1533 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1534 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1535 		EM_TX_LOCK(txr);
1536 		em_txeof(txr);
1537 #ifdef EM_MULTIQUEUE
1538 		if (!drbr_empty(ifp, txr->br))
1539 			em_mq_start_locked(ifp, txr, NULL);
1540 #else
1541 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1542 			em_start_locked(ifp, txr);
1543 #endif
1544 		EM_TX_UNLOCK(txr);
1545 		if (more) {
1546 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1547 			return;
1548 		}
1549 	}
1550 
1551 	em_enable_intr(adapter);
1552 	return;
1553 }
1554 
1555 
1556 /*********************************************************************
1557  *
1558  *  MSIX Interrupt Service Routines
1559  *
1560  **********************************************************************/
1561 static void
em_msix_tx(void * arg)1562 em_msix_tx(void *arg)
1563 {
1564 	struct tx_ring *txr = arg;
1565 	struct adapter *adapter = txr->adapter;
1566 	struct ifnet	*ifp = adapter->ifp;
1567 
1568 	++txr->tx_irq;
1569 	EM_TX_LOCK(txr);
1570 	em_txeof(txr);
1571 #ifdef EM_MULTIQUEUE
1572 	if (!drbr_empty(ifp, txr->br))
1573 		em_mq_start_locked(ifp, txr, NULL);
1574 #else
1575 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1576 		em_start_locked(ifp, txr);
1577 #endif
1578 	/* Reenable this interrupt */
1579 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1580 	EM_TX_UNLOCK(txr);
1581 	return;
1582 }
1583 
1584 /*********************************************************************
1585  *
1586  *  MSIX RX Interrupt Service routine
1587  *
1588  **********************************************************************/
1589 
1590 static void
em_msix_rx(void * arg)1591 em_msix_rx(void *arg)
1592 {
1593 	struct rx_ring	*rxr = arg;
1594 	struct adapter	*adapter = rxr->adapter;
1595 	bool		more;
1596 
1597 	++rxr->rx_irq;
1598 	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1599 		return;
1600 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1601 	if (more)
1602 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1603 	else
1604 		/* Reenable this interrupt */
1605 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1606 	return;
1607 }
1608 
1609 /*********************************************************************
1610  *
1611  *  MSIX Link Fast Interrupt Service routine
1612  *
1613  **********************************************************************/
1614 static void
em_msix_link(void * arg)1615 em_msix_link(void *arg)
1616 {
1617 	struct adapter	*adapter = arg;
1618 	u32		reg_icr;
1619 
1620 	++adapter->link_irq;
1621 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1622 
1623 	if (reg_icr & E1000_ICR_RXO)
1624 		adapter->rx_overruns++;
1625 
1626 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1627 		adapter->hw.mac.get_link_status = 1;
1628 		em_handle_link(adapter, 0);
1629 	} else
1630 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1631 		    EM_MSIX_LINK | E1000_IMS_LSC);
1632 	return;
1633 }
1634 
1635 static void
em_handle_rx(void * context,int pending)1636 em_handle_rx(void *context, int pending)
1637 {
1638 	struct rx_ring	*rxr = context;
1639 	struct adapter	*adapter = rxr->adapter;
1640         bool            more;
1641 
1642 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1643 	if (more)
1644 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1645 	else
1646 		/* Reenable this interrupt */
1647 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1648 }
1649 
1650 static void
em_handle_tx(void * context,int pending)1651 em_handle_tx(void *context, int pending)
1652 {
1653 	struct tx_ring	*txr = context;
1654 	struct adapter	*adapter = txr->adapter;
1655 	struct ifnet	*ifp = adapter->ifp;
1656 
1657 	EM_TX_LOCK(txr);
1658 	em_txeof(txr);
1659 #ifdef EM_MULTIQUEUE
1660 	if (!drbr_empty(ifp, txr->br))
1661 		em_mq_start_locked(ifp, txr, NULL);
1662 #else
1663 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1664 		em_start_locked(ifp, txr);
1665 #endif
1666 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1667 	EM_TX_UNLOCK(txr);
1668 }
1669 
1670 static void
em_handle_link(void * context,int pending)1671 em_handle_link(void *context, int pending)
1672 {
1673 	struct adapter	*adapter = context;
1674 	struct tx_ring	*txr = adapter->tx_rings;
1675 	struct ifnet *ifp = adapter->ifp;
1676 
1677 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1678 		return;
1679 
1680 	EM_CORE_LOCK(adapter);
1681 	callout_stop(&adapter->timer);
1682 	em_update_link_status(adapter);
1683 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1684 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1685 	    EM_MSIX_LINK | E1000_IMS_LSC);
1686 	if (adapter->link_active) {
1687 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1688 			EM_TX_LOCK(txr);
1689 #ifdef EM_MULTIQUEUE
1690 			if (!drbr_empty(ifp, txr->br))
1691 				em_mq_start_locked(ifp, txr, NULL);
1692 #else
1693 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1694 				em_start_locked(ifp, txr);
1695 #endif
1696 			EM_TX_UNLOCK(txr);
1697 		}
1698 	}
1699 	EM_CORE_UNLOCK(adapter);
1700 }
1701 
1702 
1703 /*********************************************************************
1704  *
1705  *  Media Ioctl callback
1706  *
1707  *  This routine is called whenever the user queries the status of
1708  *  the interface using ifconfig.
1709  *
1710  **********************************************************************/
1711 static void
em_media_status(struct ifnet * ifp,struct ifmediareq * ifmr)1712 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1713 {
1714 	struct adapter *adapter = ifp->if_softc;
1715 	u_char fiber_type = IFM_1000_SX;
1716 
1717 	INIT_DEBUGOUT("em_media_status: begin");
1718 
1719 	EM_CORE_LOCK(adapter);
1720 	em_update_link_status(adapter);
1721 
1722 	ifmr->ifm_status = IFM_AVALID;
1723 	ifmr->ifm_active = IFM_ETHER;
1724 
1725 	if (!adapter->link_active) {
1726 		EM_CORE_UNLOCK(adapter);
1727 		return;
1728 	}
1729 
1730 	ifmr->ifm_status |= IFM_ACTIVE;
1731 
1732 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1733 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1734 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1735 	} else {
1736 		switch (adapter->link_speed) {
1737 		case 10:
1738 			ifmr->ifm_active |= IFM_10_T;
1739 			break;
1740 		case 100:
1741 			ifmr->ifm_active |= IFM_100_TX;
1742 			break;
1743 		case 1000:
1744 			ifmr->ifm_active |= IFM_1000_T;
1745 			break;
1746 		}
1747 		if (adapter->link_duplex == FULL_DUPLEX)
1748 			ifmr->ifm_active |= IFM_FDX;
1749 		else
1750 			ifmr->ifm_active |= IFM_HDX;
1751 	}
1752 	EM_CORE_UNLOCK(adapter);
1753 }
1754 
1755 /*********************************************************************
1756  *
1757  *  Media Ioctl callback
1758  *
1759  *  This routine is called when the user changes speed/duplex using
1760  *  media/mediopt option with ifconfig.
1761  *
1762  **********************************************************************/
1763 static int
em_media_change(struct ifnet * ifp)1764 em_media_change(struct ifnet *ifp)
1765 {
1766 	struct adapter *adapter = ifp->if_softc;
1767 	struct ifmedia  *ifm = &adapter->media;
1768 
1769 	INIT_DEBUGOUT("em_media_change: begin");
1770 
1771 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1772 		return (EINVAL);
1773 
1774 	EM_CORE_LOCK(adapter);
1775 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1776 	case IFM_AUTO:
1777 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1778 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1779 		break;
1780 	case IFM_1000_LX:
1781 	case IFM_1000_SX:
1782 	case IFM_1000_T:
1783 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1784 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1785 		break;
1786 	case IFM_100_TX:
1787 		adapter->hw.mac.autoneg = FALSE;
1788 		adapter->hw.phy.autoneg_advertised = 0;
1789 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1790 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1791 		else
1792 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1793 		break;
1794 	case IFM_10_T:
1795 		adapter->hw.mac.autoneg = FALSE;
1796 		adapter->hw.phy.autoneg_advertised = 0;
1797 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1798 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1799 		else
1800 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1801 		break;
1802 	default:
1803 		device_printf(adapter->dev, "Unsupported media type\n");
1804 	}
1805 
1806 	em_init_locked(adapter);
1807 	EM_CORE_UNLOCK(adapter);
1808 
1809 	return (0);
1810 }
1811 
1812 /*********************************************************************
1813  *
1814  *  This routine maps the mbufs to tx descriptors.
1815  *
1816  *  return 0 on success, positive on failure
1817  **********************************************************************/
1818 
1819 static int
em_xmit(struct tx_ring * txr,struct mbuf ** m_headp)1820 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1821 {
1822 	struct adapter		*adapter = txr->adapter;
1823 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1824 	bus_dmamap_t		map;
1825 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1826 	struct e1000_tx_desc	*ctxd = NULL;
1827 	struct mbuf		*m_head;
1828 	struct ether_header	*eh;
1829 	struct ip		*ip = NULL;
1830 	struct tcphdr		*tp = NULL;
1831 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1832 	int			ip_off, poff;
1833 	int			nsegs, i, j, first, last = 0;
1834 	int			error, do_tso, tso_desc = 0, remap = 1;
1835 
1836 	m_head = *m_headp;
1837 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1838 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1839 	ip_off = poff = 0;
1840 
1841 	/*
1842 	 * Intel recommends entire IP/TCP header length reside in a single
1843 	 * buffer. If multiple descriptors are used to describe the IP and
1844 	 * TCP header, each descriptor should describe one or more
1845 	 * complete headers; descriptors referencing only parts of headers
1846 	 * are not supported. If all layer headers are not coalesced into
1847 	 * a single buffer, each buffer should not cross a 4KB boundary,
1848 	 * or be larger than the maximum read request size.
1849 	 * Controller also requires modifing IP/TCP header to make TSO work
1850 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1851 	 * IP/TCP header into a single buffer to meet the requirement of
1852 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1853 	 * which also has similiar restrictions.
1854 	 */
1855 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1856 		if (do_tso || (m_head->m_next != NULL &&
1857 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1858 			if (M_WRITABLE(*m_headp) == 0) {
1859 				m_head = m_dup(*m_headp, M_NOWAIT);
1860 				m_freem(*m_headp);
1861 				if (m_head == NULL) {
1862 					*m_headp = NULL;
1863 					return (ENOBUFS);
1864 				}
1865 				*m_headp = m_head;
1866 			}
1867 		}
1868 		/*
1869 		 * XXX
1870 		 * Assume IPv4, we don't have TSO/checksum offload support
1871 		 * for IPv6 yet.
1872 		 */
1873 		ip_off = sizeof(struct ether_header);
1874 		m_head = m_pullup(m_head, ip_off);
1875 		if (m_head == NULL) {
1876 			*m_headp = NULL;
1877 			return (ENOBUFS);
1878 		}
1879 		eh = mtod(m_head, struct ether_header *);
1880 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1881 			ip_off = sizeof(struct ether_vlan_header);
1882 			m_head = m_pullup(m_head, ip_off);
1883 			if (m_head == NULL) {
1884 				*m_headp = NULL;
1885 				return (ENOBUFS);
1886 			}
1887 		}
1888 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1889 		if (m_head == NULL) {
1890 			*m_headp = NULL;
1891 			return (ENOBUFS);
1892 		}
1893 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1894 		poff = ip_off + (ip->ip_hl << 2);
1895 		if (do_tso) {
1896 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1897 			if (m_head == NULL) {
1898 				*m_headp = NULL;
1899 				return (ENOBUFS);
1900 			}
1901 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1902 			/*
1903 			 * TSO workaround:
1904 			 *   pull 4 more bytes of data into it.
1905 			 */
1906 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1907 			if (m_head == NULL) {
1908 				*m_headp = NULL;
1909 				return (ENOBUFS);
1910 			}
1911 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1912 			ip->ip_len = 0;
1913 			ip->ip_sum = 0;
1914 			/*
1915 			 * The pseudo TCP checksum does not include TCP payload
1916 			 * length so driver should recompute the checksum here
1917 			 * what hardware expect to see. This is adherence of
1918 			 * Microsoft's Large Send specification.
1919 			 */
1920 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1921 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1922 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1923 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1924 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1925 			if (m_head == NULL) {
1926 				*m_headp = NULL;
1927 				return (ENOBUFS);
1928 			}
1929 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1930 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1931 			if (m_head == NULL) {
1932 				*m_headp = NULL;
1933 				return (ENOBUFS);
1934 			}
1935 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1936 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1937 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1938 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1939 			if (m_head == NULL) {
1940 				*m_headp = NULL;
1941 				return (ENOBUFS);
1942 			}
1943 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1944 		}
1945 		*m_headp = m_head;
1946 	}
1947 
1948 	/*
1949 	 * Map the packet for DMA
1950 	 *
1951 	 * Capture the first descriptor index,
1952 	 * this descriptor will have the index
1953 	 * of the EOP which is the only one that
1954 	 * now gets a DONE bit writeback.
1955 	 */
1956 	first = txr->next_avail_desc;
1957 	tx_buffer = &txr->tx_buffers[first];
1958 	tx_buffer_mapped = tx_buffer;
1959 	map = tx_buffer->map;
1960 
1961 retry:
1962 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1963 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1964 
1965 	/*
1966 	 * There are two types of errors we can (try) to handle:
1967 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1968 	 *   out of segments.  Defragment the mbuf chain and try again.
1969 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1970 	 *   at this point in time.  Defer sending and try again later.
1971 	 * All other errors, in particular EINVAL, are fatal and prevent the
1972 	 * mbuf chain from ever going through.  Drop it and report error.
1973 	 */
1974 	if (error == EFBIG && remap) {
1975 		struct mbuf *m;
1976 
1977 		m = m_defrag(*m_headp, M_NOWAIT);
1978 		if (m == NULL) {
1979 			adapter->mbuf_alloc_failed++;
1980 			m_freem(*m_headp);
1981 			*m_headp = NULL;
1982 			return (ENOBUFS);
1983 		}
1984 		*m_headp = m;
1985 
1986 		/* Try it again, but only once */
1987 		remap = 0;
1988 		goto retry;
1989 	} else if (error == ENOMEM) {
1990 		adapter->no_tx_dma_setup++;
1991 		return (error);
1992 	} else if (error != 0) {
1993 		adapter->no_tx_dma_setup++;
1994 		m_freem(*m_headp);
1995 		*m_headp = NULL;
1996 		return (error);
1997 	}
1998 
1999 	/*
2000 	 * TSO Hardware workaround, if this packet is not
2001 	 * TSO, and is only a single descriptor long, and
2002 	 * it follows a TSO burst, then we need to add a
2003 	 * sentinel descriptor to prevent premature writeback.
2004 	 */
2005 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
2006 		if (nsegs == 1)
2007 			tso_desc = TRUE;
2008 		txr->tx_tso = FALSE;
2009 	}
2010 
2011         if (nsegs > (txr->tx_avail - 2)) {
2012                 txr->no_desc_avail++;
2013 		bus_dmamap_unload(txr->txtag, map);
2014 		return (ENOBUFS);
2015         }
2016 	m_head = *m_headp;
2017 
2018 	/* Do hardware assists */
2019 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2020 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2021 		    &txd_upper, &txd_lower);
2022 		/* we need to make a final sentinel transmit desc */
2023 		tso_desc = TRUE;
2024 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2025 		em_transmit_checksum_setup(txr, m_head,
2026 		    ip_off, ip, &txd_upper, &txd_lower);
2027 
2028 	if (m_head->m_flags & M_VLANTAG) {
2029 		/* Set the vlan id. */
2030 		txd_upper |=
2031 		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2032                 /* Tell hardware to add tag */
2033                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2034         }
2035 
2036 	i = txr->next_avail_desc;
2037 
2038 	/* Set up our transmit descriptors */
2039 	for (j = 0; j < nsegs; j++) {
2040 		bus_size_t seg_len;
2041 		bus_addr_t seg_addr;
2042 
2043 		tx_buffer = &txr->tx_buffers[i];
2044 		ctxd = &txr->tx_base[i];
2045 		seg_addr = segs[j].ds_addr;
2046 		seg_len  = segs[j].ds_len;
2047 		/*
2048 		** TSO Workaround:
2049 		** If this is the last descriptor, we want to
2050 		** split it so we have a small final sentinel
2051 		*/
2052 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2053 			seg_len -= 4;
2054 			ctxd->buffer_addr = htole64(seg_addr);
2055 			ctxd->lower.data = htole32(
2056 			adapter->txd_cmd | txd_lower | seg_len);
2057 			ctxd->upper.data =
2058 			    htole32(txd_upper);
2059 			if (++i == adapter->num_tx_desc)
2060 				i = 0;
2061 			/* Now make the sentinel */
2062 			++txd_used; /* using an extra txd */
2063 			ctxd = &txr->tx_base[i];
2064 			tx_buffer = &txr->tx_buffers[i];
2065 			ctxd->buffer_addr =
2066 			    htole64(seg_addr + seg_len);
2067 			ctxd->lower.data = htole32(
2068 			adapter->txd_cmd | txd_lower | 4);
2069 			ctxd->upper.data =
2070 			    htole32(txd_upper);
2071 			last = i;
2072 			if (++i == adapter->num_tx_desc)
2073 				i = 0;
2074 		} else {
2075 			ctxd->buffer_addr = htole64(seg_addr);
2076 			ctxd->lower.data = htole32(
2077 			adapter->txd_cmd | txd_lower | seg_len);
2078 			ctxd->upper.data =
2079 			    htole32(txd_upper);
2080 			last = i;
2081 			if (++i == adapter->num_tx_desc)
2082 				i = 0;
2083 		}
2084 		tx_buffer->m_head = NULL;
2085 		tx_buffer->next_eop = -1;
2086 	}
2087 
2088 	txr->next_avail_desc = i;
2089 	txr->tx_avail -= nsegs;
2090 	if (tso_desc) /* TSO used an extra for sentinel */
2091 		txr->tx_avail -= txd_used;
2092 
2093         tx_buffer->m_head = m_head;
2094 	/*
2095 	** Here we swap the map so the last descriptor,
2096 	** which gets the completion interrupt has the
2097 	** real map, and the first descriptor gets the
2098 	** unused map from this descriptor.
2099 	*/
2100 	tx_buffer_mapped->map = tx_buffer->map;
2101 	tx_buffer->map = map;
2102         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2103 
2104         /*
2105          * Last Descriptor of Packet
2106 	 * needs End Of Packet (EOP)
2107 	 * and Report Status (RS)
2108          */
2109         ctxd->lower.data |=
2110 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2111 	/*
2112 	 * Keep track in the first buffer which
2113 	 * descriptor will be written back
2114 	 */
2115 	tx_buffer = &txr->tx_buffers[first];
2116 	tx_buffer->next_eop = last;
2117 	/* Update the watchdog time early and often */
2118 	txr->watchdog_time = ticks;
2119 
2120 	/*
2121 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2122 	 * that this frame is available to transmit.
2123 	 */
2124 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2125 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2126 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2127 
2128 	return (0);
2129 }
2130 
2131 static void
em_set_promisc(struct adapter * adapter)2132 em_set_promisc(struct adapter *adapter)
2133 {
2134 	struct ifnet	*ifp = adapter->ifp;
2135 	u32		reg_rctl;
2136 
2137 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2138 
2139 	if (ifp->if_flags & IFF_PROMISC) {
2140 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2141 		/* Turn this on if you want to see bad packets */
2142 		if (em_debug_sbp)
2143 			reg_rctl |= E1000_RCTL_SBP;
2144 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2145 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2146 		reg_rctl |= E1000_RCTL_MPE;
2147 		reg_rctl &= ~E1000_RCTL_UPE;
2148 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2149 	}
2150 }
2151 
2152 static void
em_disable_promisc(struct adapter * adapter)2153 em_disable_promisc(struct adapter *adapter)
2154 {
2155 	struct ifnet	*ifp = adapter->ifp;
2156 	u32		reg_rctl;
2157 	int		mcnt = 0;
2158 
2159 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2160 	reg_rctl &=  (~E1000_RCTL_UPE);
2161 	if (ifp->if_flags & IFF_ALLMULTI)
2162 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2163 	else {
2164 		struct  ifmultiaddr *ifma;
2165 #if __FreeBSD_version < 800000
2166 		IF_ADDR_LOCK(ifp);
2167 #else
2168 		if_maddr_rlock(ifp);
2169 #endif
2170 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2171 			if (ifma->ifma_addr->sa_family != AF_LINK)
2172 				continue;
2173 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2174 				break;
2175 			mcnt++;
2176 		}
2177 #if __FreeBSD_version < 800000
2178 		IF_ADDR_UNLOCK(ifp);
2179 #else
2180 		if_maddr_runlock(ifp);
2181 #endif
2182 	}
2183 	/* Don't disable if in MAX groups */
2184 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2185 		reg_rctl &=  (~E1000_RCTL_MPE);
2186 	reg_rctl &=  (~E1000_RCTL_SBP);
2187 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2188 }
2189 
2190 
2191 /*********************************************************************
2192  *  Multicast Update
2193  *
2194  *  This routine is called whenever multicast address list is updated.
2195  *
2196  **********************************************************************/
2197 
2198 static void
em_set_multi(struct adapter * adapter)2199 em_set_multi(struct adapter *adapter)
2200 {
2201 	struct ifnet	*ifp = adapter->ifp;
2202 	struct ifmultiaddr *ifma;
2203 	u32 reg_rctl = 0;
2204 	u8  *mta; /* Multicast array memory */
2205 	int mcnt = 0;
2206 
2207 	IOCTL_DEBUGOUT("em_set_multi: begin");
2208 
2209 	mta = adapter->mta;
2210 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2211 
2212 	if (adapter->hw.mac.type == e1000_82542 &&
2213 	    adapter->hw.revision_id == E1000_REVISION_2) {
2214 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2215 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2216 			e1000_pci_clear_mwi(&adapter->hw);
2217 		reg_rctl |= E1000_RCTL_RST;
2218 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2219 		msec_delay(5);
2220 	}
2221 
2222 #if __FreeBSD_version < 800000
2223 	IF_ADDR_LOCK(ifp);
2224 #else
2225 	if_maddr_rlock(ifp);
2226 #endif
2227 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2228 		if (ifma->ifma_addr->sa_family != AF_LINK)
2229 			continue;
2230 
2231 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2232 			break;
2233 
2234 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2235 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2236 		mcnt++;
2237 	}
2238 #if __FreeBSD_version < 800000
2239 	IF_ADDR_UNLOCK(ifp);
2240 #else
2241 	if_maddr_runlock(ifp);
2242 #endif
2243 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2244 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2245 		reg_rctl |= E1000_RCTL_MPE;
2246 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2247 	} else
2248 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2249 
2250 	if (adapter->hw.mac.type == e1000_82542 &&
2251 	    adapter->hw.revision_id == E1000_REVISION_2) {
2252 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2253 		reg_rctl &= ~E1000_RCTL_RST;
2254 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2255 		msec_delay(5);
2256 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2257 			e1000_pci_set_mwi(&adapter->hw);
2258 	}
2259 }
2260 
2261 
2262 /*********************************************************************
2263  *  Timer routine
2264  *
2265  *  This routine checks for link status and updates statistics.
2266  *
2267  **********************************************************************/
2268 
2269 static void
em_local_timer(void * arg)2270 em_local_timer(void *arg)
2271 {
2272 	struct adapter	*adapter = arg;
2273 	struct ifnet	*ifp = adapter->ifp;
2274 	struct tx_ring	*txr = adapter->tx_rings;
2275 	struct rx_ring	*rxr = adapter->rx_rings;
2276 	u32		trigger;
2277 
2278 	EM_CORE_LOCK_ASSERT(adapter);
2279 
2280 	em_update_link_status(adapter);
2281 	em_update_stats_counters(adapter);
2282 
2283 	/* Reset LAA into RAR[0] on 82571 */
2284 	if ((adapter->hw.mac.type == e1000_82571) &&
2285 	    e1000_get_laa_state_82571(&adapter->hw))
2286 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2287 
2288 	/* Mask to use in the irq trigger */
2289 	if (adapter->msix_mem)
2290 		trigger = rxr->ims;
2291 	else
2292 		trigger = E1000_ICS_RXDMT0;
2293 
2294 	/*
2295 	** Check on the state of the TX queue(s), this
2296 	** can be done without the lock because its RO
2297 	** and the HUNG state will be static if set.
2298 	*/
2299 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2300 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2301 		    (adapter->pause_frames == 0))
2302 			goto hung;
2303 		/* Schedule a TX tasklet if needed */
2304 		if (txr->tx_avail <= EM_MAX_SCATTER)
2305 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2306 	}
2307 
2308 	adapter->pause_frames = 0;
2309 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2310 #ifndef DEVICE_POLLING
2311 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2312 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2313 #endif
2314 	return;
2315 hung:
2316 	/* Looks like we're hung */
2317 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2318 	device_printf(adapter->dev,
2319 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2320 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2321 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2322 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2323 	    "Next TX to Clean = %d\n",
2324 	    txr->me, txr->tx_avail, txr->next_to_clean);
2325 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2326 	adapter->watchdog_events++;
2327 	adapter->pause_frames = 0;
2328 	em_init_locked(adapter);
2329 }
2330 
2331 
2332 static void
em_update_link_status(struct adapter * adapter)2333 em_update_link_status(struct adapter *adapter)
2334 {
2335 	struct e1000_hw *hw = &adapter->hw;
2336 	struct ifnet *ifp = adapter->ifp;
2337 	device_t dev = adapter->dev;
2338 	struct tx_ring *txr = adapter->tx_rings;
2339 	u32 link_check = 0;
2340 
2341 	/* Get the cached link value or read phy for real */
2342 	switch (hw->phy.media_type) {
2343 	case e1000_media_type_copper:
2344 		if (hw->mac.get_link_status) {
2345 			/* Do the work to read phy */
2346 			e1000_check_for_link(hw);
2347 			link_check = !hw->mac.get_link_status;
2348 			if (link_check) /* ESB2 fix */
2349 				e1000_cfg_on_link_up(hw);
2350 		} else
2351 			link_check = TRUE;
2352 		break;
2353 	case e1000_media_type_fiber:
2354 		e1000_check_for_link(hw);
2355 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2356                                  E1000_STATUS_LU);
2357 		break;
2358 	case e1000_media_type_internal_serdes:
2359 		e1000_check_for_link(hw);
2360 		link_check = adapter->hw.mac.serdes_has_link;
2361 		break;
2362 	default:
2363 	case e1000_media_type_unknown:
2364 		break;
2365 	}
2366 
2367 	/* Now check for a transition */
2368 	if (link_check && (adapter->link_active == 0)) {
2369 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2370 		    &adapter->link_duplex);
2371 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2372 		if ((adapter->link_speed != SPEED_1000) &&
2373 		    ((hw->mac.type == e1000_82571) ||
2374 		    (hw->mac.type == e1000_82572))) {
2375 			int tarc0;
2376 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2377 			tarc0 &= ~SPEED_MODE_BIT;
2378 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2379 		}
2380 		if (bootverbose)
2381 			device_printf(dev, "Link is up %d Mbps %s\n",
2382 			    adapter->link_speed,
2383 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2384 			    "Full Duplex" : "Half Duplex"));
2385 		adapter->link_active = 1;
2386 		adapter->smartspeed = 0;
2387 		ifp->if_baudrate = adapter->link_speed * 1000000;
2388 		if_link_state_change(ifp, LINK_STATE_UP);
2389 	} else if (!link_check && (adapter->link_active == 1)) {
2390 		ifp->if_baudrate = adapter->link_speed = 0;
2391 		adapter->link_duplex = 0;
2392 		if (bootverbose)
2393 			device_printf(dev, "Link is Down\n");
2394 		adapter->link_active = 0;
2395 		/* Link down, disable watchdog */
2396 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2397 			txr->queue_status = EM_QUEUE_IDLE;
2398 		if_link_state_change(ifp, LINK_STATE_DOWN);
2399 	}
2400 }
2401 
2402 /*********************************************************************
2403  *
2404  *  This routine disables all traffic on the adapter by issuing a
2405  *  global reset on the MAC and deallocates TX/RX buffers.
2406  *
2407  *  This routine should always be called with BOTH the CORE
2408  *  and TX locks.
2409  **********************************************************************/
2410 
2411 static void
em_stop(void * arg)2412 em_stop(void *arg)
2413 {
2414 	struct adapter	*adapter = arg;
2415 	struct ifnet	*ifp = adapter->ifp;
2416 	struct tx_ring	*txr = adapter->tx_rings;
2417 
2418 	EM_CORE_LOCK_ASSERT(adapter);
2419 
2420 	INIT_DEBUGOUT("em_stop: begin");
2421 
2422 	em_disable_intr(adapter);
2423 	callout_stop(&adapter->timer);
2424 
2425 	/* Tell the stack that the interface is no longer active */
2426 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2427 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2428 
2429         /* Unarm watchdog timer. */
2430 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2431 		EM_TX_LOCK(txr);
2432 		txr->queue_status = EM_QUEUE_IDLE;
2433 		EM_TX_UNLOCK(txr);
2434 	}
2435 
2436 	e1000_reset_hw(&adapter->hw);
2437 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2438 
2439 	e1000_led_off(&adapter->hw);
2440 	e1000_cleanup_led(&adapter->hw);
2441 }
2442 
2443 
2444 /*********************************************************************
2445  *
2446  *  Determine hardware revision.
2447  *
2448  **********************************************************************/
2449 static void
em_identify_hardware(struct adapter * adapter)2450 em_identify_hardware(struct adapter *adapter)
2451 {
2452 	device_t dev = adapter->dev;
2453 
2454 	/* Make sure our PCI config space has the necessary stuff set */
2455 	pci_enable_busmaster(dev);
2456 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2457 
2458 	/* Save off the information about this board */
2459 	adapter->hw.vendor_id = pci_get_vendor(dev);
2460 	adapter->hw.device_id = pci_get_device(dev);
2461 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2462 	adapter->hw.subsystem_vendor_id =
2463 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2464 	adapter->hw.subsystem_device_id =
2465 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2466 
2467 	/* Do Shared Code Init and Setup */
2468 	if (e1000_set_mac_type(&adapter->hw)) {
2469 		device_printf(dev, "Setup init failure\n");
2470 		return;
2471 	}
2472 }
2473 
2474 static int
em_allocate_pci_resources(struct adapter * adapter)2475 em_allocate_pci_resources(struct adapter *adapter)
2476 {
2477 	device_t	dev = adapter->dev;
2478 	int		rid;
2479 
2480 	rid = PCIR_BAR(0);
2481 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2482 	    &rid, RF_ACTIVE);
2483 	if (adapter->memory == NULL) {
2484 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2485 		return (ENXIO);
2486 	}
2487 	adapter->osdep.mem_bus_space_tag =
2488 	    rman_get_bustag(adapter->memory);
2489 	adapter->osdep.mem_bus_space_handle =
2490 	    rman_get_bushandle(adapter->memory);
2491 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2492 
2493 	/* Default to a single queue */
2494 	adapter->num_queues = 1;
2495 
2496 	/*
2497 	 * Setup MSI/X or MSI if PCI Express
2498 	 */
2499 	adapter->msix = em_setup_msix(adapter);
2500 
2501 	adapter->hw.back = &adapter->osdep;
2502 
2503 	return (0);
2504 }
2505 
2506 /*********************************************************************
2507  *
2508  *  Setup the Legacy or MSI Interrupt handler
2509  *
2510  **********************************************************************/
2511 int
em_allocate_legacy(struct adapter * adapter)2512 em_allocate_legacy(struct adapter *adapter)
2513 {
2514 	device_t dev = adapter->dev;
2515 	struct tx_ring	*txr = adapter->tx_rings;
2516 	int error, rid = 0;
2517 
2518 	/* Manually turn off all interrupts */
2519 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2520 
2521 	if (adapter->msix == 1) /* using MSI */
2522 		rid = 1;
2523 	/* We allocate a single interrupt resource */
2524 	adapter->res = bus_alloc_resource_any(dev,
2525 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2526 	if (adapter->res == NULL) {
2527 		device_printf(dev, "Unable to allocate bus resource: "
2528 		    "interrupt\n");
2529 		return (ENXIO);
2530 	}
2531 
2532 	/*
2533 	 * Allocate a fast interrupt and the associated
2534 	 * deferred processing contexts.
2535 	 */
2536 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2537 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2538 	    taskqueue_thread_enqueue, &adapter->tq);
2539 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2540 	    device_get_nameunit(adapter->dev));
2541 	/* Use a TX only tasklet for local timer */
2542 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2543 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2544 	    taskqueue_thread_enqueue, &txr->tq);
2545 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2546 	    device_get_nameunit(adapter->dev));
2547 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2548 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2549 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2550 		device_printf(dev, "Failed to register fast interrupt "
2551 			    "handler: %d\n", error);
2552 		taskqueue_free(adapter->tq);
2553 		adapter->tq = NULL;
2554 		return (error);
2555 	}
2556 
2557 	return (0);
2558 }
2559 
2560 /*********************************************************************
2561  *
2562  *  Setup the MSIX Interrupt handlers
2563  *   This is not really Multiqueue, rather
2564  *   its just seperate interrupt vectors
2565  *   for TX, RX, and Link.
2566  *
2567  **********************************************************************/
2568 int
em_allocate_msix(struct adapter * adapter)2569 em_allocate_msix(struct adapter *adapter)
2570 {
2571 	device_t	dev = adapter->dev;
2572 	struct		tx_ring *txr = adapter->tx_rings;
2573 	struct		rx_ring *rxr = adapter->rx_rings;
2574 	int		error, rid, vector = 0;
2575 
2576 
2577 	/* Make sure all interrupts are disabled */
2578 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2579 
2580 	/* First set up ring resources */
2581 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2582 
2583 		/* RX ring */
2584 		rid = vector + 1;
2585 
2586 		rxr->res = bus_alloc_resource_any(dev,
2587 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2588 		if (rxr->res == NULL) {
2589 			device_printf(dev,
2590 			    "Unable to allocate bus resource: "
2591 			    "RX MSIX Interrupt %d\n", i);
2592 			return (ENXIO);
2593 		}
2594 		if ((error = bus_setup_intr(dev, rxr->res,
2595 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2596 		    rxr, &rxr->tag)) != 0) {
2597 			device_printf(dev, "Failed to register RX handler");
2598 			return (error);
2599 		}
2600 #if __FreeBSD_version >= 800504
2601 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2602 #endif
2603 		rxr->msix = vector++; /* NOTE increment vector for TX */
2604 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2605 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2606 		    taskqueue_thread_enqueue, &rxr->tq);
2607 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2608 		    device_get_nameunit(adapter->dev));
2609 		/*
2610 		** Set the bit to enable interrupt
2611 		** in E1000_IMS -- bits 20 and 21
2612 		** are for RX0 and RX1, note this has
2613 		** NOTHING to do with the MSIX vector
2614 		*/
2615 		rxr->ims = 1 << (20 + i);
2616 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2617 
2618 		/* TX ring */
2619 		rid = vector + 1;
2620 		txr->res = bus_alloc_resource_any(dev,
2621 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2622 		if (txr->res == NULL) {
2623 			device_printf(dev,
2624 			    "Unable to allocate bus resource: "
2625 			    "TX MSIX Interrupt %d\n", i);
2626 			return (ENXIO);
2627 		}
2628 		if ((error = bus_setup_intr(dev, txr->res,
2629 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2630 		    txr, &txr->tag)) != 0) {
2631 			device_printf(dev, "Failed to register TX handler");
2632 			return (error);
2633 		}
2634 #if __FreeBSD_version >= 800504
2635 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2636 #endif
2637 		txr->msix = vector++; /* Increment vector for next pass */
2638 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2639 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2640 		    taskqueue_thread_enqueue, &txr->tq);
2641 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2642 		    device_get_nameunit(adapter->dev));
2643 		/*
2644 		** Set the bit to enable interrupt
2645 		** in E1000_IMS -- bits 22 and 23
2646 		** are for TX0 and TX1, note this has
2647 		** NOTHING to do with the MSIX vector
2648 		*/
2649 		txr->ims = 1 << (22 + i);
2650 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2651 	}
2652 
2653 	/* Link interrupt */
2654 	++rid;
2655 	adapter->res = bus_alloc_resource_any(dev,
2656 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2657 	if (!adapter->res) {
2658 		device_printf(dev,"Unable to allocate "
2659 		    "bus resource: Link interrupt [%d]\n", rid);
2660 		return (ENXIO);
2661         }
2662 	/* Set the link handler function */
2663 	error = bus_setup_intr(dev, adapter->res,
2664 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2665 	    em_msix_link, adapter, &adapter->tag);
2666 	if (error) {
2667 		adapter->res = NULL;
2668 		device_printf(dev, "Failed to register LINK handler");
2669 		return (error);
2670 	}
2671 #if __FreeBSD_version >= 800504
2672 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2673 #endif
2674 	adapter->linkvec = vector;
2675 	adapter->ivars |=  (8 | vector) << 16;
2676 	adapter->ivars |= 0x80000000;
2677 
2678 	return (0);
2679 }
2680 
2681 
2682 static void
em_free_pci_resources(struct adapter * adapter)2683 em_free_pci_resources(struct adapter *adapter)
2684 {
2685 	device_t	dev = adapter->dev;
2686 	struct tx_ring	*txr;
2687 	struct rx_ring	*rxr;
2688 	int		rid;
2689 
2690 
2691 	/*
2692 	** Release all the queue interrupt resources:
2693 	*/
2694 	for (int i = 0; i < adapter->num_queues; i++) {
2695 		txr = &adapter->tx_rings[i];
2696 		rxr = &adapter->rx_rings[i];
2697 		/* an early abort? */
2698 		if ((txr == NULL) || (rxr == NULL))
2699 			break;
2700 		rid = txr->msix +1;
2701 		if (txr->tag != NULL) {
2702 			bus_teardown_intr(dev, txr->res, txr->tag);
2703 			txr->tag = NULL;
2704 		}
2705 		if (txr->res != NULL)
2706 			bus_release_resource(dev, SYS_RES_IRQ,
2707 			    rid, txr->res);
2708 		rid = rxr->msix +1;
2709 		if (rxr->tag != NULL) {
2710 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2711 			rxr->tag = NULL;
2712 		}
2713 		if (rxr->res != NULL)
2714 			bus_release_resource(dev, SYS_RES_IRQ,
2715 			    rid, rxr->res);
2716 	}
2717 
2718         if (adapter->linkvec) /* we are doing MSIX */
2719                 rid = adapter->linkvec + 1;
2720         else
2721                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2722 
2723 	if (adapter->tag != NULL) {
2724 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2725 		adapter->tag = NULL;
2726 	}
2727 
2728 	if (adapter->res != NULL)
2729 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2730 
2731 
2732 	if (adapter->msix)
2733 		pci_release_msi(dev);
2734 
2735 	if (adapter->msix_mem != NULL)
2736 		bus_release_resource(dev, SYS_RES_MEMORY,
2737 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2738 
2739 	if (adapter->memory != NULL)
2740 		bus_release_resource(dev, SYS_RES_MEMORY,
2741 		    PCIR_BAR(0), adapter->memory);
2742 
2743 	if (adapter->flash != NULL)
2744 		bus_release_resource(dev, SYS_RES_MEMORY,
2745 		    EM_FLASH, adapter->flash);
2746 }
2747 
2748 /*
2749  * Setup MSI or MSI/X
2750  */
2751 static int
em_setup_msix(struct adapter * adapter)2752 em_setup_msix(struct adapter *adapter)
2753 {
2754 	device_t dev = adapter->dev;
2755 	int val;
2756 
2757 	/*
2758 	** Setup MSI/X for Hartwell: tests have shown
2759 	** use of two queues to be unstable, and to
2760 	** provide no great gain anyway, so we simply
2761 	** seperate the interrupts and use a single queue.
2762 	*/
2763 	if ((adapter->hw.mac.type == e1000_82574) &&
2764 	    (em_enable_msix == TRUE)) {
2765 		/* Map the MSIX BAR */
2766 		int rid = PCIR_BAR(EM_MSIX_BAR);
2767 		adapter->msix_mem = bus_alloc_resource_any(dev,
2768 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2769        		if (adapter->msix_mem == NULL) {
2770 			/* May not be enabled */
2771                		device_printf(adapter->dev,
2772 			    "Unable to map MSIX table \n");
2773 			goto msi;
2774        		}
2775 		val = pci_msix_count(dev);
2776 		/* We only need/want 3 vectors */
2777 		if (val >= 3)
2778 			val = 3;
2779 		else {
2780                		device_printf(adapter->dev,
2781 			    "MSIX: insufficient vectors, using MSI\n");
2782 			goto msi;
2783 		}
2784 
2785 		if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2786 			device_printf(adapter->dev,
2787 			    "Using MSIX interrupts "
2788 			    "with %d vectors\n", val);
2789 			return (val);
2790 		}
2791 
2792 		/*
2793 		** If MSIX alloc failed or provided us with
2794 		** less than needed, free and fall through to MSI
2795 		*/
2796 		pci_release_msi(dev);
2797 	}
2798 msi:
2799 	if (adapter->msix_mem != NULL) {
2800 		bus_release_resource(dev, SYS_RES_MEMORY,
2801 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2802 		adapter->msix_mem = NULL;
2803 	}
2804        	val = 1;
2805        	if (pci_alloc_msi(dev, &val) == 0) {
2806                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2807 		return (val);
2808 	}
2809 	/* Should only happen due to manual configuration */
2810 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2811 	return (0);
2812 }
2813 
2814 
2815 /*********************************************************************
2816  *
2817  *  Initialize the hardware to a configuration
2818  *  as specified by the adapter structure.
2819  *
2820  **********************************************************************/
2821 static void
em_reset(struct adapter * adapter)2822 em_reset(struct adapter *adapter)
2823 {
2824 	device_t	dev = adapter->dev;
2825 	struct ifnet	*ifp = adapter->ifp;
2826 	struct e1000_hw	*hw = &adapter->hw;
2827 	u16		rx_buffer_size;
2828 	u32		pba;
2829 
2830 	INIT_DEBUGOUT("em_reset: begin");
2831 
2832 	/* Set up smart power down as default off on newer adapters. */
2833 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2834 	    hw->mac.type == e1000_82572)) {
2835 		u16 phy_tmp = 0;
2836 
2837 		/* Speed up time to link by disabling smart power down. */
2838 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2839 		phy_tmp &= ~IGP02E1000_PM_SPD;
2840 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2841 	}
2842 
2843 	/*
2844 	 * Packet Buffer Allocation (PBA)
2845 	 * Writing PBA sets the receive portion of the buffer
2846 	 * the remainder is used for the transmit buffer.
2847 	 */
2848 	switch (hw->mac.type) {
2849 	/* Total Packet Buffer on these is 48K */
2850 	case e1000_82571:
2851 	case e1000_82572:
2852 	case e1000_80003es2lan:
2853 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2854 		break;
2855 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2856 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2857 		break;
2858 	case e1000_82574:
2859 	case e1000_82583:
2860 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2861 		break;
2862 	case e1000_ich8lan:
2863 		pba = E1000_PBA_8K;
2864 		break;
2865 	case e1000_ich9lan:
2866 	case e1000_ich10lan:
2867 		/* Boost Receive side for jumbo frames */
2868 		if (adapter->hw.mac.max_frame_size > 4096)
2869 			pba = E1000_PBA_14K;
2870 		else
2871 			pba = E1000_PBA_10K;
2872 		break;
2873 	case e1000_pchlan:
2874 	case e1000_pch2lan:
2875 	case e1000_pch_lpt:
2876 		pba = E1000_PBA_26K;
2877 		break;
2878 	default:
2879 		if (adapter->hw.mac.max_frame_size > 8192)
2880 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2881 		else
2882 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2883 	}
2884 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2885 
2886 	/*
2887 	 * These parameters control the automatic generation (Tx) and
2888 	 * response (Rx) to Ethernet PAUSE frames.
2889 	 * - High water mark should allow for at least two frames to be
2890 	 *   received after sending an XOFF.
2891 	 * - Low water mark works best when it is very near the high water mark.
2892 	 *   This allows the receiver to restart by sending XON when it has
2893 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2894 	 *   restart after one full frame is pulled from the buffer. There
2895 	 *   could be several smaller frames in the buffer and if so they will
2896 	 *   not trigger the XON until their total number reduces the buffer
2897 	 *   by 1500.
2898 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2899 	 */
2900 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2901 	hw->fc.high_water = rx_buffer_size -
2902 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2903 	hw->fc.low_water = hw->fc.high_water - 1500;
2904 
2905 	if (adapter->fc) /* locally set flow control value? */
2906 		hw->fc.requested_mode = adapter->fc;
2907 	else
2908 		hw->fc.requested_mode = e1000_fc_full;
2909 
2910 	if (hw->mac.type == e1000_80003es2lan)
2911 		hw->fc.pause_time = 0xFFFF;
2912 	else
2913 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2914 
2915 	hw->fc.send_xon = TRUE;
2916 
2917 	/* Device specific overrides/settings */
2918 	switch (hw->mac.type) {
2919 	case e1000_pchlan:
2920 		/* Workaround: no TX flow ctrl for PCH */
2921                 hw->fc.requested_mode = e1000_fc_rx_pause;
2922 		hw->fc.pause_time = 0xFFFF; /* override */
2923 		if (ifp->if_mtu > ETHERMTU) {
2924 			hw->fc.high_water = 0x3500;
2925 			hw->fc.low_water = 0x1500;
2926 		} else {
2927 			hw->fc.high_water = 0x5000;
2928 			hw->fc.low_water = 0x3000;
2929 		}
2930 		hw->fc.refresh_time = 0x1000;
2931 		break;
2932 	case e1000_pch2lan:
2933 	case e1000_pch_lpt:
2934 		hw->fc.high_water = 0x5C20;
2935 		hw->fc.low_water = 0x5048;
2936 		hw->fc.pause_time = 0x0650;
2937 		hw->fc.refresh_time = 0x0400;
2938 		/* Jumbos need adjusted PBA */
2939 		if (ifp->if_mtu > ETHERMTU)
2940 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2941 		else
2942 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2943 		break;
2944         case e1000_ich9lan:
2945         case e1000_ich10lan:
2946 		if (ifp->if_mtu > ETHERMTU) {
2947 			hw->fc.high_water = 0x2800;
2948 			hw->fc.low_water = hw->fc.high_water - 8;
2949 			break;
2950 		}
2951 		/* else fall thru */
2952 	default:
2953 		if (hw->mac.type == e1000_80003es2lan)
2954 			hw->fc.pause_time = 0xFFFF;
2955 		break;
2956 	}
2957 
2958 	/* Issue a global reset */
2959 	e1000_reset_hw(hw);
2960 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2961 	em_disable_aspm(adapter);
2962 	/* and a re-init */
2963 	if (e1000_init_hw(hw) < 0) {
2964 		device_printf(dev, "Hardware Initialization Failed\n");
2965 		return;
2966 	}
2967 
2968 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2969 	e1000_get_phy_info(hw);
2970 	e1000_check_for_link(hw);
2971 	return;
2972 }
2973 
2974 /*********************************************************************
2975  *
2976  *  Setup networking device structure and register an interface.
2977  *
2978  **********************************************************************/
2979 static int
em_setup_interface(device_t dev,struct adapter * adapter)2980 em_setup_interface(device_t dev, struct adapter *adapter)
2981 {
2982 	struct ifnet   *ifp;
2983 
2984 	INIT_DEBUGOUT("em_setup_interface: begin");
2985 
2986 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2987 	if (ifp == NULL) {
2988 		device_printf(dev, "can not allocate ifnet structure\n");
2989 		return (-1);
2990 	}
2991 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2992 	ifp->if_init =  em_init;
2993 	ifp->if_softc = adapter;
2994 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2995 	ifp->if_ioctl = em_ioctl;
2996 #ifdef EM_MULTIQUEUE
2997 	/* Multiqueue stack interface */
2998 	ifp->if_transmit = em_mq_start;
2999 	ifp->if_qflush = em_qflush;
3000 #else
3001 	ifp->if_start = em_start;
3002 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3003 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3004 	IFQ_SET_READY(&ifp->if_snd);
3005 #endif
3006 
3007 	ether_ifattach(ifp, adapter->hw.mac.addr);
3008 
3009 	ifp->if_capabilities = ifp->if_capenable = 0;
3010 
3011 
3012 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3013 	ifp->if_capabilities |= IFCAP_TSO4;
3014 	/*
3015 	 * Tell the upper layer(s) we
3016 	 * support full VLAN capability
3017 	 */
3018 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3019 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3020 			     |  IFCAP_VLAN_HWTSO
3021 			     |  IFCAP_VLAN_MTU;
3022 	ifp->if_capenable = ifp->if_capabilities;
3023 
3024 	/*
3025 	** Don't turn this on by default, if vlans are
3026 	** created on another pseudo device (eg. lagg)
3027 	** then vlan events are not passed thru, breaking
3028 	** operation, but with HW FILTER off it works. If
3029 	** using vlans directly on the em driver you can
3030 	** enable this and get full hardware tag filtering.
3031 	*/
3032 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3033 
3034 #ifdef DEVICE_POLLING
3035 	ifp->if_capabilities |= IFCAP_POLLING;
3036 #endif
3037 
3038 	/* Enable only WOL MAGIC by default */
3039 	if (adapter->wol) {
3040 		ifp->if_capabilities |= IFCAP_WOL;
3041 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3042 	}
3043 
3044 	/*
3045 	 * Specify the media types supported by this adapter and register
3046 	 * callbacks to update media and link information
3047 	 */
3048 	ifmedia_init(&adapter->media, IFM_IMASK,
3049 	    em_media_change, em_media_status);
3050 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3051 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3052 		u_char fiber_type = IFM_1000_SX;	/* default type */
3053 
3054 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3055 			    0, NULL);
3056 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3057 	} else {
3058 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3059 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3060 			    0, NULL);
3061 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3062 			    0, NULL);
3063 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3064 			    0, NULL);
3065 		if (adapter->hw.phy.type != e1000_phy_ife) {
3066 			ifmedia_add(&adapter->media,
3067 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3068 			ifmedia_add(&adapter->media,
3069 				IFM_ETHER | IFM_1000_T, 0, NULL);
3070 		}
3071 	}
3072 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3073 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3074 	return (0);
3075 }
3076 
3077 
3078 /*
3079  * Manage DMA'able memory.
3080  */
3081 static void
em_dmamap_cb(void * arg,bus_dma_segment_t * segs,int nseg,int error)3082 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3083 {
3084 	if (error)
3085 		return;
3086 	*(bus_addr_t *) arg = segs[0].ds_addr;
3087 }
3088 
3089 static int
em_dma_malloc(struct adapter * adapter,bus_size_t size,struct em_dma_alloc * dma,int mapflags)3090 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3091         struct em_dma_alloc *dma, int mapflags)
3092 {
3093 	int error;
3094 
3095 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3096 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3097 				BUS_SPACE_MAXADDR,	/* lowaddr */
3098 				BUS_SPACE_MAXADDR,	/* highaddr */
3099 				NULL, NULL,		/* filter, filterarg */
3100 				size,			/* maxsize */
3101 				1,			/* nsegments */
3102 				size,			/* maxsegsize */
3103 				0,			/* flags */
3104 				NULL,			/* lockfunc */
3105 				NULL,			/* lockarg */
3106 				&dma->dma_tag);
3107 	if (error) {
3108 		device_printf(adapter->dev,
3109 		    "%s: bus_dma_tag_create failed: %d\n",
3110 		    __func__, error);
3111 		goto fail_0;
3112 	}
3113 
3114 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3115 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3116 	if (error) {
3117 		device_printf(adapter->dev,
3118 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3119 		    __func__, (uintmax_t)size, error);
3120 		goto fail_2;
3121 	}
3122 
3123 	dma->dma_paddr = 0;
3124 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3125 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3126 	if (error || dma->dma_paddr == 0) {
3127 		device_printf(adapter->dev,
3128 		    "%s: bus_dmamap_load failed: %d\n",
3129 		    __func__, error);
3130 		goto fail_3;
3131 	}
3132 
3133 	return (0);
3134 
3135 fail_3:
3136 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3137 fail_2:
3138 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3139 	bus_dma_tag_destroy(dma->dma_tag);
3140 fail_0:
3141 	dma->dma_map = NULL;
3142 	dma->dma_tag = NULL;
3143 
3144 	return (error);
3145 }
3146 
3147 static void
em_dma_free(struct adapter * adapter,struct em_dma_alloc * dma)3148 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3149 {
3150 	if (dma->dma_tag == NULL)
3151 		return;
3152 	if (dma->dma_map != NULL) {
3153 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3154 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3155 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3156 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3157 		dma->dma_map = NULL;
3158 	}
3159 	bus_dma_tag_destroy(dma->dma_tag);
3160 	dma->dma_tag = NULL;
3161 }
3162 
3163 
3164 /*********************************************************************
3165  *
3166  *  Allocate memory for the transmit and receive rings, and then
3167  *  the descriptors associated with each, called only once at attach.
3168  *
3169  **********************************************************************/
3170 static int
em_allocate_queues(struct adapter * adapter)3171 em_allocate_queues(struct adapter *adapter)
3172 {
3173 	device_t		dev = adapter->dev;
3174 	struct tx_ring		*txr = NULL;
3175 	struct rx_ring		*rxr = NULL;
3176 	int rsize, tsize, error = E1000_SUCCESS;
3177 	int txconf = 0, rxconf = 0;
3178 
3179 
3180 	/* Allocate the TX ring struct memory */
3181 	if (!(adapter->tx_rings =
3182 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3183 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3184 		device_printf(dev, "Unable to allocate TX ring memory\n");
3185 		error = ENOMEM;
3186 		goto fail;
3187 	}
3188 
3189 	/* Now allocate the RX */
3190 	if (!(adapter->rx_rings =
3191 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3192 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3193 		device_printf(dev, "Unable to allocate RX ring memory\n");
3194 		error = ENOMEM;
3195 		goto rx_fail;
3196 	}
3197 
3198 	tsize = roundup2(adapter->num_tx_desc *
3199 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3200 	/*
3201 	 * Now set up the TX queues, txconf is needed to handle the
3202 	 * possibility that things fail midcourse and we need to
3203 	 * undo memory gracefully
3204 	 */
3205 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3206 		/* Set up some basics */
3207 		txr = &adapter->tx_rings[i];
3208 		txr->adapter = adapter;
3209 		txr->me = i;
3210 
3211 		/* Initialize the TX lock */
3212 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3213 		    device_get_nameunit(dev), txr->me);
3214 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3215 
3216 		if (em_dma_malloc(adapter, tsize,
3217 			&txr->txdma, BUS_DMA_NOWAIT)) {
3218 			device_printf(dev,
3219 			    "Unable to allocate TX Descriptor memory\n");
3220 			error = ENOMEM;
3221 			goto err_tx_desc;
3222 		}
3223 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3224 		bzero((void *)txr->tx_base, tsize);
3225 
3226         	if (em_allocate_transmit_buffers(txr)) {
3227 			device_printf(dev,
3228 			    "Critical Failure setting up transmit buffers\n");
3229 			error = ENOMEM;
3230 			goto err_tx_desc;
3231         	}
3232 #if __FreeBSD_version >= 800000
3233 		/* Allocate a buf ring */
3234 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3235 		    M_WAITOK, &txr->tx_mtx);
3236 #endif
3237 	}
3238 
3239 	/*
3240 	 * Next the RX queues...
3241 	 */
3242 	rsize = roundup2(adapter->num_rx_desc *
3243 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3244 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3245 		rxr = &adapter->rx_rings[i];
3246 		rxr->adapter = adapter;
3247 		rxr->me = i;
3248 
3249 		/* Initialize the RX lock */
3250 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3251 		    device_get_nameunit(dev), txr->me);
3252 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3253 
3254 		if (em_dma_malloc(adapter, rsize,
3255 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3256 			device_printf(dev,
3257 			    "Unable to allocate RxDescriptor memory\n");
3258 			error = ENOMEM;
3259 			goto err_rx_desc;
3260 		}
3261 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3262 		bzero((void *)rxr->rx_base, rsize);
3263 
3264         	/* Allocate receive buffers for the ring*/
3265 		if (em_allocate_receive_buffers(rxr)) {
3266 			device_printf(dev,
3267 			    "Critical Failure setting up receive buffers\n");
3268 			error = ENOMEM;
3269 			goto err_rx_desc;
3270 		}
3271 	}
3272 
3273 	return (0);
3274 
3275 err_rx_desc:
3276 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3277 		em_dma_free(adapter, &rxr->rxdma);
3278 err_tx_desc:
3279 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3280 		em_dma_free(adapter, &txr->txdma);
3281 	free(adapter->rx_rings, M_DEVBUF);
3282 rx_fail:
3283 #if __FreeBSD_version >= 800000
3284 	buf_ring_free(txr->br, M_DEVBUF);
3285 #endif
3286 	free(adapter->tx_rings, M_DEVBUF);
3287 fail:
3288 	return (error);
3289 }
3290 
3291 
3292 /*********************************************************************
3293  *
3294  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3295  *  the information needed to transmit a packet on the wire. This is
3296  *  called only once at attach, setup is done every reset.
3297  *
3298  **********************************************************************/
3299 static int
em_allocate_transmit_buffers(struct tx_ring * txr)3300 em_allocate_transmit_buffers(struct tx_ring *txr)
3301 {
3302 	struct adapter *adapter = txr->adapter;
3303 	device_t dev = adapter->dev;
3304 	struct em_buffer *txbuf;
3305 	int error, i;
3306 
3307 	/*
3308 	 * Setup DMA descriptor areas.
3309 	 */
3310 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3311 			       1, 0,			/* alignment, bounds */
3312 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3313 			       BUS_SPACE_MAXADDR,	/* highaddr */
3314 			       NULL, NULL,		/* filter, filterarg */
3315 			       EM_TSO_SIZE,		/* maxsize */
3316 			       EM_MAX_SCATTER,		/* nsegments */
3317 			       PAGE_SIZE,		/* maxsegsize */
3318 			       0,			/* flags */
3319 			       NULL,			/* lockfunc */
3320 			       NULL,			/* lockfuncarg */
3321 			       &txr->txtag))) {
3322 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3323 		goto fail;
3324 	}
3325 
3326 	if (!(txr->tx_buffers =
3327 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3328 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3329 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3330 		error = ENOMEM;
3331 		goto fail;
3332 	}
3333 
3334         /* Create the descriptor buffer dma maps */
3335 	txbuf = txr->tx_buffers;
3336 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3337 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3338 		if (error != 0) {
3339 			device_printf(dev, "Unable to create TX DMA map\n");
3340 			goto fail;
3341 		}
3342 	}
3343 
3344 	return 0;
3345 fail:
3346 	/* We free all, it handles case where we are in the middle */
3347 	em_free_transmit_structures(adapter);
3348 	return (error);
3349 }
3350 
3351 /*********************************************************************
3352  *
3353  *  Initialize a transmit ring.
3354  *
3355  **********************************************************************/
3356 static void
em_setup_transmit_ring(struct tx_ring * txr)3357 em_setup_transmit_ring(struct tx_ring *txr)
3358 {
3359 	struct adapter *adapter = txr->adapter;
3360 	struct em_buffer *txbuf;
3361 	int i;
3362 #ifdef DEV_NETMAP
3363 	struct netmap_adapter *na = NA(adapter->ifp);
3364 	struct netmap_slot *slot;
3365 #endif /* DEV_NETMAP */
3366 
3367 	/* Clear the old descriptor contents */
3368 	EM_TX_LOCK(txr);
3369 #ifdef DEV_NETMAP
3370 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3371 #endif /* DEV_NETMAP */
3372 
3373 	bzero((void *)txr->tx_base,
3374 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3375 	/* Reset indices */
3376 	txr->next_avail_desc = 0;
3377 	txr->next_to_clean = 0;
3378 
3379 	/* Free any existing tx buffers. */
3380         txbuf = txr->tx_buffers;
3381 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3382 		if (txbuf->m_head != NULL) {
3383 			bus_dmamap_sync(txr->txtag, txbuf->map,
3384 			    BUS_DMASYNC_POSTWRITE);
3385 			bus_dmamap_unload(txr->txtag, txbuf->map);
3386 			m_freem(txbuf->m_head);
3387 			txbuf->m_head = NULL;
3388 		}
3389 #ifdef DEV_NETMAP
3390 		if (slot) {
3391 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3392 			uint64_t paddr;
3393 			void *addr;
3394 
3395 			addr = PNMB(na, slot + si, &paddr);
3396 			txr->tx_base[i].buffer_addr = htole64(paddr);
3397 			/* reload the map for netmap mode */
3398 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3399 		}
3400 #endif /* DEV_NETMAP */
3401 
3402 		/* clear the watch index */
3403 		txbuf->next_eop = -1;
3404         }
3405 
3406 	/* Set number of descriptors available */
3407 	txr->tx_avail = adapter->num_tx_desc;
3408 	txr->queue_status = EM_QUEUE_IDLE;
3409 
3410 	/* Clear checksum offload context. */
3411 	txr->last_hw_offload = 0;
3412 	txr->last_hw_ipcss = 0;
3413 	txr->last_hw_ipcso = 0;
3414 	txr->last_hw_tucss = 0;
3415 	txr->last_hw_tucso = 0;
3416 
3417 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3418 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3419 	EM_TX_UNLOCK(txr);
3420 }
3421 
3422 /*********************************************************************
3423  *
3424  *  Initialize all transmit rings.
3425  *
3426  **********************************************************************/
3427 static void
em_setup_transmit_structures(struct adapter * adapter)3428 em_setup_transmit_structures(struct adapter *adapter)
3429 {
3430 	struct tx_ring *txr = adapter->tx_rings;
3431 
3432 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3433 		em_setup_transmit_ring(txr);
3434 
3435 	return;
3436 }
3437 
3438 /*********************************************************************
3439  *
3440  *  Enable transmit unit.
3441  *
3442  **********************************************************************/
3443 static void
em_initialize_transmit_unit(struct adapter * adapter)3444 em_initialize_transmit_unit(struct adapter *adapter)
3445 {
3446 	struct tx_ring	*txr = adapter->tx_rings;
3447 	struct e1000_hw	*hw = &adapter->hw;
3448 	u32	tctl, tarc, tipg = 0;
3449 
3450 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3451 
3452 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3453 		u64 bus_addr = txr->txdma.dma_paddr;
3454 		/* Base and Len of TX Ring */
3455 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3456 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3457 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3458 	    	    (u32)(bus_addr >> 32));
3459 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3460 	    	    (u32)bus_addr);
3461 		/* Init the HEAD/TAIL indices */
3462 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3463 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3464 
3465 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3466 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3467 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3468 
3469 		txr->queue_status = EM_QUEUE_IDLE;
3470 	}
3471 
3472 	/* Set the default values for the Tx Inter Packet Gap timer */
3473 	switch (adapter->hw.mac.type) {
3474 	case e1000_80003es2lan:
3475 		tipg = DEFAULT_82543_TIPG_IPGR1;
3476 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3477 		    E1000_TIPG_IPGR2_SHIFT;
3478 		break;
3479 	default:
3480 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3481 		    (adapter->hw.phy.media_type ==
3482 		    e1000_media_type_internal_serdes))
3483 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3484 		else
3485 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3486 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3487 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3488 	}
3489 
3490 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3491 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3492 
3493 	if(adapter->hw.mac.type >= e1000_82540)
3494 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3495 		    adapter->tx_abs_int_delay.value);
3496 
3497 	if ((adapter->hw.mac.type == e1000_82571) ||
3498 	    (adapter->hw.mac.type == e1000_82572)) {
3499 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3500 		tarc |= SPEED_MODE_BIT;
3501 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3502 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3503 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3504 		tarc |= 1;
3505 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3506 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3507 		tarc |= 1;
3508 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3509 	}
3510 
3511 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3512 	if (adapter->tx_int_delay.value > 0)
3513 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3514 
3515 	/* Program the Transmit Control Register */
3516 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3517 	tctl &= ~E1000_TCTL_CT;
3518 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3519 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3520 
3521 	if (adapter->hw.mac.type >= e1000_82571)
3522 		tctl |= E1000_TCTL_MULR;
3523 
3524 	/* This write will effectively turn on the transmit unit. */
3525 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3526 
3527 }
3528 
3529 
3530 /*********************************************************************
3531  *
3532  *  Free all transmit rings.
3533  *
3534  **********************************************************************/
3535 static void
em_free_transmit_structures(struct adapter * adapter)3536 em_free_transmit_structures(struct adapter *adapter)
3537 {
3538 	struct tx_ring *txr = adapter->tx_rings;
3539 
3540 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3541 		EM_TX_LOCK(txr);
3542 		em_free_transmit_buffers(txr);
3543 		em_dma_free(adapter, &txr->txdma);
3544 		EM_TX_UNLOCK(txr);
3545 		EM_TX_LOCK_DESTROY(txr);
3546 	}
3547 
3548 	free(adapter->tx_rings, M_DEVBUF);
3549 }
3550 
3551 /*********************************************************************
3552  *
3553  *  Free transmit ring related data structures.
3554  *
3555  **********************************************************************/
3556 static void
em_free_transmit_buffers(struct tx_ring * txr)3557 em_free_transmit_buffers(struct tx_ring *txr)
3558 {
3559 	struct adapter		*adapter = txr->adapter;
3560 	struct em_buffer	*txbuf;
3561 
3562 	INIT_DEBUGOUT("free_transmit_ring: begin");
3563 
3564 	if (txr->tx_buffers == NULL)
3565 		return;
3566 
3567 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3568 		txbuf = &txr->tx_buffers[i];
3569 		if (txbuf->m_head != NULL) {
3570 			bus_dmamap_sync(txr->txtag, txbuf->map,
3571 			    BUS_DMASYNC_POSTWRITE);
3572 			bus_dmamap_unload(txr->txtag,
3573 			    txbuf->map);
3574 			m_freem(txbuf->m_head);
3575 			txbuf->m_head = NULL;
3576 			if (txbuf->map != NULL) {
3577 				bus_dmamap_destroy(txr->txtag,
3578 				    txbuf->map);
3579 				txbuf->map = NULL;
3580 			}
3581 		} else if (txbuf->map != NULL) {
3582 			bus_dmamap_unload(txr->txtag,
3583 			    txbuf->map);
3584 			bus_dmamap_destroy(txr->txtag,
3585 			    txbuf->map);
3586 			txbuf->map = NULL;
3587 		}
3588 	}
3589 #if __FreeBSD_version >= 800000
3590 	if (txr->br != NULL)
3591 		buf_ring_free(txr->br, M_DEVBUF);
3592 #endif
3593 	if (txr->tx_buffers != NULL) {
3594 		free(txr->tx_buffers, M_DEVBUF);
3595 		txr->tx_buffers = NULL;
3596 	}
3597 	if (txr->txtag != NULL) {
3598 		bus_dma_tag_destroy(txr->txtag);
3599 		txr->txtag = NULL;
3600 	}
3601 	return;
3602 }
3603 
3604 
3605 /*********************************************************************
3606  *  The offload context is protocol specific (TCP/UDP) and thus
3607  *  only needs to be set when the protocol changes. The occasion
3608  *  of a context change can be a performance detriment, and
3609  *  might be better just disabled. The reason arises in the way
3610  *  in which the controller supports pipelined requests from the
3611  *  Tx data DMA. Up to four requests can be pipelined, and they may
3612  *  belong to the same packet or to multiple packets. However all
3613  *  requests for one packet are issued before a request is issued
3614  *  for a subsequent packet and if a request for the next packet
3615  *  requires a context change, that request will be stalled
3616  *  until the previous request completes. This means setting up
3617  *  a new context effectively disables pipelined Tx data DMA which
3618  *  in turn greatly slow down performance to send small sized
3619  *  frames.
3620  **********************************************************************/
3621 static void
em_transmit_checksum_setup(struct tx_ring * txr,struct mbuf * mp,int ip_off,struct ip * ip,u32 * txd_upper,u32 * txd_lower)3622 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3623     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3624 {
3625 	struct adapter			*adapter = txr->adapter;
3626 	struct e1000_context_desc	*TXD = NULL;
3627 	struct em_buffer		*tx_buffer;
3628 	int				cur, hdr_len;
3629 	u32				cmd = 0;
3630 	u16				offload = 0;
3631 	u8				ipcso, ipcss, tucso, tucss;
3632 
3633 	ipcss = ipcso = tucss = tucso = 0;
3634 	hdr_len = ip_off + (ip->ip_hl << 2);
3635 	cur = txr->next_avail_desc;
3636 
3637 	/* Setup of IP header checksum. */
3638 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3639 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3640 		offload |= CSUM_IP;
3641 		ipcss = ip_off;
3642 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3643 		/*
3644 		 * Start offset for header checksum calculation.
3645 		 * End offset for header checksum calculation.
3646 		 * Offset of place to put the checksum.
3647 		 */
3648 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3649 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3650 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3651 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3652 		cmd |= E1000_TXD_CMD_IP;
3653 	}
3654 
3655 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3656  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3657  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3658  		offload |= CSUM_TCP;
3659  		tucss = hdr_len;
3660  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3661  		/*
3662  		 * Setting up new checksum offload context for every frames
3663  		 * takes a lot of processing time for hardware. This also
3664  		 * reduces performance a lot for small sized frames so avoid
3665  		 * it if driver can use previously configured checksum
3666  		 * offload context.
3667  		 */
3668  		if (txr->last_hw_offload == offload) {
3669  			if (offload & CSUM_IP) {
3670  				if (txr->last_hw_ipcss == ipcss &&
3671  				    txr->last_hw_ipcso == ipcso &&
3672  				    txr->last_hw_tucss == tucss &&
3673  				    txr->last_hw_tucso == tucso)
3674  					return;
3675  			} else {
3676  				if (txr->last_hw_tucss == tucss &&
3677  				    txr->last_hw_tucso == tucso)
3678  					return;
3679  			}
3680   		}
3681  		txr->last_hw_offload = offload;
3682  		txr->last_hw_tucss = tucss;
3683  		txr->last_hw_tucso = tucso;
3684  		/*
3685  		 * Start offset for payload checksum calculation.
3686  		 * End offset for payload checksum calculation.
3687  		 * Offset of place to put the checksum.
3688  		 */
3689 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3690  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3691  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3692  		TXD->upper_setup.tcp_fields.tucso = tucso;
3693  		cmd |= E1000_TXD_CMD_TCP;
3694  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3695  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3696  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3697  		tucss = hdr_len;
3698  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3699  		/*
3700  		 * Setting up new checksum offload context for every frames
3701  		 * takes a lot of processing time for hardware. This also
3702  		 * reduces performance a lot for small sized frames so avoid
3703  		 * it if driver can use previously configured checksum
3704  		 * offload context.
3705  		 */
3706  		if (txr->last_hw_offload == offload) {
3707  			if (offload & CSUM_IP) {
3708  				if (txr->last_hw_ipcss == ipcss &&
3709  				    txr->last_hw_ipcso == ipcso &&
3710  				    txr->last_hw_tucss == tucss &&
3711  				    txr->last_hw_tucso == tucso)
3712  					return;
3713  			} else {
3714  				if (txr->last_hw_tucss == tucss &&
3715  				    txr->last_hw_tucso == tucso)
3716  					return;
3717  			}
3718  		}
3719  		txr->last_hw_offload = offload;
3720  		txr->last_hw_tucss = tucss;
3721  		txr->last_hw_tucso = tucso;
3722  		/*
3723  		 * Start offset for header checksum calculation.
3724  		 * End offset for header checksum calculation.
3725  		 * Offset of place to put the checksum.
3726  		 */
3727 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3728  		TXD->upper_setup.tcp_fields.tucss = tucss;
3729  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3730  		TXD->upper_setup.tcp_fields.tucso = tucso;
3731   	}
3732 
3733  	if (offload & CSUM_IP) {
3734  		txr->last_hw_ipcss = ipcss;
3735  		txr->last_hw_ipcso = ipcso;
3736   	}
3737 
3738 	TXD->tcp_seg_setup.data = htole32(0);
3739 	TXD->cmd_and_length =
3740 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3741 	tx_buffer = &txr->tx_buffers[cur];
3742 	tx_buffer->m_head = NULL;
3743 	tx_buffer->next_eop = -1;
3744 
3745 	if (++cur == adapter->num_tx_desc)
3746 		cur = 0;
3747 
3748 	txr->tx_avail--;
3749 	txr->next_avail_desc = cur;
3750 }
3751 
3752 
3753 /**********************************************************************
3754  *
3755  *  Setup work for hardware segmentation offload (TSO)
3756  *
3757  **********************************************************************/
3758 static void
em_tso_setup(struct tx_ring * txr,struct mbuf * mp,int ip_off,struct ip * ip,struct tcphdr * tp,u32 * txd_upper,u32 * txd_lower)3759 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3760     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3761 {
3762 	struct adapter			*adapter = txr->adapter;
3763 	struct e1000_context_desc	*TXD;
3764 	struct em_buffer		*tx_buffer;
3765 	int cur, hdr_len;
3766 
3767 	/*
3768 	 * In theory we can use the same TSO context if and only if
3769 	 * frame is the same type(IP/TCP) and the same MSS. However
3770 	 * checking whether a frame has the same IP/TCP structure is
3771 	 * hard thing so just ignore that and always restablish a
3772 	 * new TSO context.
3773 	 */
3774 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3775 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3776 		      E1000_TXD_DTYP_D |	/* Data descr type */
3777 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3778 
3779 	/* IP and/or TCP header checksum calculation and insertion. */
3780 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3781 
3782 	cur = txr->next_avail_desc;
3783 	tx_buffer = &txr->tx_buffers[cur];
3784 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3785 
3786 	/*
3787 	 * Start offset for header checksum calculation.
3788 	 * End offset for header checksum calculation.
3789 	 * Offset of place put the checksum.
3790 	 */
3791 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3792 	TXD->lower_setup.ip_fields.ipcse =
3793 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3794 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3795 	/*
3796 	 * Start offset for payload checksum calculation.
3797 	 * End offset for payload checksum calculation.
3798 	 * Offset of place to put the checksum.
3799 	 */
3800 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3801 	TXD->upper_setup.tcp_fields.tucse = 0;
3802 	TXD->upper_setup.tcp_fields.tucso =
3803 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3804 	/*
3805 	 * Payload size per packet w/o any headers.
3806 	 * Length of all headers up to payload.
3807 	 */
3808 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3809 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3810 
3811 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3812 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3813 				E1000_TXD_CMD_TSE |	/* TSE context */
3814 				E1000_TXD_CMD_IP |	/* Do IP csum */
3815 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3816 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3817 
3818 	tx_buffer->m_head = NULL;
3819 	tx_buffer->next_eop = -1;
3820 
3821 	if (++cur == adapter->num_tx_desc)
3822 		cur = 0;
3823 
3824 	txr->tx_avail--;
3825 	txr->next_avail_desc = cur;
3826 	txr->tx_tso = TRUE;
3827 }
3828 
3829 
3830 /**********************************************************************
3831  *
3832  *  Examine each tx_buffer in the used queue. If the hardware is done
3833  *  processing the packet then free associated resources. The
3834  *  tx_buffer is put back on the free queue.
3835  *
3836  **********************************************************************/
3837 static void
em_txeof(struct tx_ring * txr)3838 em_txeof(struct tx_ring *txr)
3839 {
3840 	struct adapter	*adapter = txr->adapter;
3841         int first, last, done, processed;
3842         struct em_buffer *tx_buffer;
3843         struct e1000_tx_desc   *tx_desc, *eop_desc;
3844 	struct ifnet   *ifp = adapter->ifp;
3845 
3846 	EM_TX_LOCK_ASSERT(txr);
3847 #ifdef DEV_NETMAP
3848 	if (netmap_tx_irq(ifp, txr->me))
3849 		return;
3850 #endif /* DEV_NETMAP */
3851 
3852 	/* No work, make sure watchdog is off */
3853         if (txr->tx_avail == adapter->num_tx_desc) {
3854 		txr->queue_status = EM_QUEUE_IDLE;
3855                 return;
3856 	}
3857 
3858 	processed = 0;
3859         first = txr->next_to_clean;
3860         tx_desc = &txr->tx_base[first];
3861         tx_buffer = &txr->tx_buffers[first];
3862 	last = tx_buffer->next_eop;
3863         eop_desc = &txr->tx_base[last];
3864 
3865 	/*
3866 	 * What this does is get the index of the
3867 	 * first descriptor AFTER the EOP of the
3868 	 * first packet, that way we can do the
3869 	 * simple comparison on the inner while loop.
3870 	 */
3871 	if (++last == adapter->num_tx_desc)
3872  		last = 0;
3873 	done = last;
3874 
3875         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3876             BUS_DMASYNC_POSTREAD);
3877 
3878         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3879 		/* We clean the range of the packet */
3880 		while (first != done) {
3881                 	tx_desc->upper.data = 0;
3882                 	tx_desc->lower.data = 0;
3883                 	tx_desc->buffer_addr = 0;
3884                 	++txr->tx_avail;
3885 			++processed;
3886 
3887 			if (tx_buffer->m_head) {
3888 				bus_dmamap_sync(txr->txtag,
3889 				    tx_buffer->map,
3890 				    BUS_DMASYNC_POSTWRITE);
3891 				bus_dmamap_unload(txr->txtag,
3892 				    tx_buffer->map);
3893                         	m_freem(tx_buffer->m_head);
3894                         	tx_buffer->m_head = NULL;
3895                 	}
3896 			tx_buffer->next_eop = -1;
3897 			txr->watchdog_time = ticks;
3898 
3899 	                if (++first == adapter->num_tx_desc)
3900 				first = 0;
3901 
3902 	                tx_buffer = &txr->tx_buffers[first];
3903 			tx_desc = &txr->tx_base[first];
3904 		}
3905 		++ifp->if_opackets;
3906 		/* See if we can continue to the next packet */
3907 		last = tx_buffer->next_eop;
3908 		if (last != -1) {
3909         		eop_desc = &txr->tx_base[last];
3910 			/* Get new done point */
3911 			if (++last == adapter->num_tx_desc) last = 0;
3912 			done = last;
3913 		} else
3914 			break;
3915         }
3916         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3917             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3918 
3919         txr->next_to_clean = first;
3920 
3921 	/*
3922 	** Watchdog calculation, we know there's
3923 	** work outstanding or the first return
3924 	** would have been taken, so none processed
3925 	** for too long indicates a hang. local timer
3926 	** will examine this and do a reset if needed.
3927 	*/
3928 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3929 		txr->queue_status = EM_QUEUE_HUNG;
3930 
3931         /*
3932          * If we have a minimum free, clear IFF_DRV_OACTIVE
3933          * to tell the stack that it is OK to send packets.
3934 	 * Notice that all writes of OACTIVE happen under the
3935 	 * TX lock which, with a single queue, guarantees
3936 	 * sanity.
3937          */
3938         if (txr->tx_avail >= EM_MAX_SCATTER)
3939 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3940 
3941 	/* Disable watchdog if all clean */
3942 	if (txr->tx_avail == adapter->num_tx_desc) {
3943 		txr->queue_status = EM_QUEUE_IDLE;
3944 	}
3945 }
3946 
3947 
3948 /*********************************************************************
3949  *
3950  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3951  *
3952  **********************************************************************/
3953 static void
em_refresh_mbufs(struct rx_ring * rxr,int limit)3954 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3955 {
3956 	struct adapter		*adapter = rxr->adapter;
3957 	struct mbuf		*m;
3958 	bus_dma_segment_t	segs[1];
3959 	struct em_buffer	*rxbuf;
3960 	int			i, j, error, nsegs;
3961 	bool			cleaned = FALSE;
3962 
3963 	i = j = rxr->next_to_refresh;
3964 	/*
3965 	** Get one descriptor beyond
3966 	** our work mark to control
3967 	** the loop.
3968 	*/
3969 	if (++j == adapter->num_rx_desc)
3970 		j = 0;
3971 
3972 	while (j != limit) {
3973 		rxbuf = &rxr->rx_buffers[i];
3974 		if (rxbuf->m_head == NULL) {
3975 			m = m_getjcl(M_NOWAIT, MT_DATA,
3976 			    M_PKTHDR, adapter->rx_mbuf_sz);
3977 			/*
3978 			** If we have a temporary resource shortage
3979 			** that causes a failure, just abort refresh
3980 			** for now, we will return to this point when
3981 			** reinvoked from em_rxeof.
3982 			*/
3983 			if (m == NULL)
3984 				goto update;
3985 		} else
3986 			m = rxbuf->m_head;
3987 
3988 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3989 		m->m_flags |= M_PKTHDR;
3990 		m->m_data = m->m_ext.ext_buf;
3991 
3992 		/* Use bus_dma machinery to setup the memory mapping  */
3993 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3994 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3995 		if (error != 0) {
3996 			printf("Refresh mbufs: hdr dmamap load"
3997 			    " failure - %d\n", error);
3998 			m_free(m);
3999 			rxbuf->m_head = NULL;
4000 			goto update;
4001 		}
4002 		rxbuf->m_head = m;
4003 		bus_dmamap_sync(rxr->rxtag,
4004 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4005 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
4006 		cleaned = TRUE;
4007 
4008 		i = j; /* Next is precalulated for us */
4009 		rxr->next_to_refresh = i;
4010 		/* Calculate next controlling index */
4011 		if (++j == adapter->num_rx_desc)
4012 			j = 0;
4013 	}
4014 update:
4015 	/*
4016 	** Update the tail pointer only if,
4017 	** and as far as we have refreshed.
4018 	*/
4019 	if (cleaned)
4020 		E1000_WRITE_REG(&adapter->hw,
4021 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4022 
4023 	return;
4024 }
4025 
4026 
4027 /*********************************************************************
4028  *
4029  *  Allocate memory for rx_buffer structures. Since we use one
4030  *  rx_buffer per received packet, the maximum number of rx_buffer's
4031  *  that we'll need is equal to the number of receive descriptors
4032  *  that we've allocated.
4033  *
4034  **********************************************************************/
4035 static int
em_allocate_receive_buffers(struct rx_ring * rxr)4036 em_allocate_receive_buffers(struct rx_ring *rxr)
4037 {
4038 	struct adapter		*adapter = rxr->adapter;
4039 	device_t		dev = adapter->dev;
4040 	struct em_buffer	*rxbuf;
4041 	int			error;
4042 
4043 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4044 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4045 	if (rxr->rx_buffers == NULL) {
4046 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4047 		return (ENOMEM);
4048 	}
4049 
4050 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4051 				1, 0,			/* alignment, bounds */
4052 				BUS_SPACE_MAXADDR,	/* lowaddr */
4053 				BUS_SPACE_MAXADDR,	/* highaddr */
4054 				NULL, NULL,		/* filter, filterarg */
4055 				MJUM9BYTES,		/* maxsize */
4056 				1,			/* nsegments */
4057 				MJUM9BYTES,		/* maxsegsize */
4058 				0,			/* flags */
4059 				NULL,			/* lockfunc */
4060 				NULL,			/* lockarg */
4061 				&rxr->rxtag);
4062 	if (error) {
4063 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4064 		    __func__, error);
4065 		goto fail;
4066 	}
4067 
4068 	rxbuf = rxr->rx_buffers;
4069 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4070 		rxbuf = &rxr->rx_buffers[i];
4071 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4072 		if (error) {
4073 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4074 			    __func__, error);
4075 			goto fail;
4076 		}
4077 	}
4078 
4079 	return (0);
4080 
4081 fail:
4082 	em_free_receive_structures(adapter);
4083 	return (error);
4084 }
4085 
4086 
4087 /*********************************************************************
4088  *
4089  *  Initialize a receive ring and its buffers.
4090  *
4091  **********************************************************************/
4092 static int
em_setup_receive_ring(struct rx_ring * rxr)4093 em_setup_receive_ring(struct rx_ring *rxr)
4094 {
4095 	struct	adapter 	*adapter = rxr->adapter;
4096 	struct em_buffer	*rxbuf;
4097 	bus_dma_segment_t	seg[1];
4098 	int			rsize, nsegs, error = 0;
4099 #ifdef DEV_NETMAP
4100 	struct netmap_adapter *na = NA(adapter->ifp);
4101 	struct netmap_slot *slot;
4102 #endif
4103 
4104 
4105 	/* Clear the ring contents */
4106 	EM_RX_LOCK(rxr);
4107 	rsize = roundup2(adapter->num_rx_desc *
4108 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4109 	bzero((void *)rxr->rx_base, rsize);
4110 #ifdef DEV_NETMAP
4111 	slot = netmap_reset(na, NR_RX, 0, 0);
4112 #endif
4113 
4114 	/*
4115 	** Free current RX buffer structs and their mbufs
4116 	*/
4117 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4118 		rxbuf = &rxr->rx_buffers[i];
4119 		if (rxbuf->m_head != NULL) {
4120 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4121 			    BUS_DMASYNC_POSTREAD);
4122 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4123 			m_freem(rxbuf->m_head);
4124 			rxbuf->m_head = NULL; /* mark as freed */
4125 		}
4126 	}
4127 
4128 	/* Now replenish the mbufs */
4129         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4130 		rxbuf = &rxr->rx_buffers[j];
4131 #ifdef DEV_NETMAP
4132 		if (slot) {
4133 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4134 			uint64_t paddr;
4135 			void *addr;
4136 
4137 			addr = PNMB(na, slot + si, &paddr);
4138 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4139 			/* Update descriptor */
4140 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4141 			continue;
4142 		}
4143 #endif /* DEV_NETMAP */
4144 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4145 		    M_PKTHDR, adapter->rx_mbuf_sz);
4146 		if (rxbuf->m_head == NULL) {
4147 			error = ENOBUFS;
4148 			goto fail;
4149 		}
4150 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4151 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4152 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4153 
4154 		/* Get the memory mapping */
4155 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4156 		    rxbuf->map, rxbuf->m_head, seg,
4157 		    &nsegs, BUS_DMA_NOWAIT);
4158 		if (error != 0) {
4159 			m_freem(rxbuf->m_head);
4160 			rxbuf->m_head = NULL;
4161 			goto fail;
4162 		}
4163 		bus_dmamap_sync(rxr->rxtag,
4164 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4165 
4166 		/* Update descriptor */
4167 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4168 	}
4169 	rxr->next_to_check = 0;
4170 	rxr->next_to_refresh = 0;
4171 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4172 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4173 
4174 fail:
4175 	EM_RX_UNLOCK(rxr);
4176 	return (error);
4177 }
4178 
4179 /*********************************************************************
4180  *
4181  *  Initialize all receive rings.
4182  *
4183  **********************************************************************/
4184 static int
em_setup_receive_structures(struct adapter * adapter)4185 em_setup_receive_structures(struct adapter *adapter)
4186 {
4187 	struct rx_ring *rxr = adapter->rx_rings;
4188 	int q;
4189 
4190 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4191 		if (em_setup_receive_ring(rxr))
4192 			goto fail;
4193 
4194 	return (0);
4195 fail:
4196 	/*
4197 	 * Free RX buffers allocated so far, we will only handle
4198 	 * the rings that completed, the failing case will have
4199 	 * cleaned up for itself. 'q' failed, so its the terminus.
4200 	 */
4201 	for (int i = 0; i < q; ++i) {
4202 		rxr = &adapter->rx_rings[i];
4203 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4204 			struct em_buffer *rxbuf;
4205 			rxbuf = &rxr->rx_buffers[n];
4206 			if (rxbuf->m_head != NULL) {
4207 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4208 			  	  BUS_DMASYNC_POSTREAD);
4209 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4210 				m_freem(rxbuf->m_head);
4211 				rxbuf->m_head = NULL;
4212 			}
4213 		}
4214 		rxr->next_to_check = 0;
4215 		rxr->next_to_refresh = 0;
4216 	}
4217 
4218 	return (ENOBUFS);
4219 }
4220 
4221 /*********************************************************************
4222  *
4223  *  Free all receive rings.
4224  *
4225  **********************************************************************/
4226 static void
em_free_receive_structures(struct adapter * adapter)4227 em_free_receive_structures(struct adapter *adapter)
4228 {
4229 	struct rx_ring *rxr = adapter->rx_rings;
4230 
4231 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4232 		em_free_receive_buffers(rxr);
4233 		/* Free the ring memory as well */
4234 		em_dma_free(adapter, &rxr->rxdma);
4235 		EM_RX_LOCK_DESTROY(rxr);
4236 	}
4237 
4238 	free(adapter->rx_rings, M_DEVBUF);
4239 }
4240 
4241 
4242 /*********************************************************************
4243  *
4244  *  Free receive ring data structures
4245  *
4246  **********************************************************************/
4247 static void
em_free_receive_buffers(struct rx_ring * rxr)4248 em_free_receive_buffers(struct rx_ring *rxr)
4249 {
4250 	struct adapter		*adapter = rxr->adapter;
4251 	struct em_buffer	*rxbuf = NULL;
4252 
4253 	INIT_DEBUGOUT("free_receive_buffers: begin");
4254 
4255 	if (rxr->rx_buffers != NULL) {
4256 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4257 			rxbuf = &rxr->rx_buffers[i];
4258 			if (rxbuf->map != NULL) {
4259 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4260 				    BUS_DMASYNC_POSTREAD);
4261 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4262 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4263 			}
4264 			if (rxbuf->m_head != NULL) {
4265 				m_freem(rxbuf->m_head);
4266 				rxbuf->m_head = NULL;
4267 			}
4268 		}
4269 		free(rxr->rx_buffers, M_DEVBUF);
4270 		rxr->rx_buffers = NULL;
4271 		rxr->next_to_check = 0;
4272 		rxr->next_to_refresh = 0;
4273 	}
4274 
4275 	if (rxr->rxtag != NULL) {
4276 		bus_dma_tag_destroy(rxr->rxtag);
4277 		rxr->rxtag = NULL;
4278 	}
4279 
4280 	return;
4281 }
4282 
4283 
4284 /*********************************************************************
4285  *
4286  *  Enable receive unit.
4287  *
4288  **********************************************************************/
4289 
4290 static void
em_initialize_receive_unit(struct adapter * adapter)4291 em_initialize_receive_unit(struct adapter *adapter)
4292 {
4293 	struct rx_ring	*rxr = adapter->rx_rings;
4294 	struct ifnet	*ifp = adapter->ifp;
4295 	struct e1000_hw	*hw = &adapter->hw;
4296 	u64	bus_addr;
4297 	u32	rctl, rxcsum;
4298 
4299 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4300 
4301 	/*
4302 	 * Make sure receives are disabled while setting
4303 	 * up the descriptor ring
4304 	 */
4305 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4306 	/* Do not disable if ever enabled on this hardware */
4307 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4308 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4309 
4310 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4311 	    adapter->rx_abs_int_delay.value);
4312 	/*
4313 	 * Set the interrupt throttling rate. Value is calculated
4314 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4315 	 */
4316 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4317 
4318 	/*
4319 	** When using MSIX interrupts we need to throttle
4320 	** using the EITR register (82574 only)
4321 	*/
4322 	if (hw->mac.type == e1000_82574) {
4323 		for (int i = 0; i < 4; i++)
4324 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4325 			    DEFAULT_ITR);
4326 		/* Disable accelerated acknowledge */
4327 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4328 	}
4329 
4330 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4331 	if (ifp->if_capenable & IFCAP_RXCSUM)
4332 		rxcsum |= E1000_RXCSUM_TUOFL;
4333 	else
4334 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4335 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4336 
4337 	/*
4338 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4339 	** long latencies are observed, like Lenovo X60. This
4340 	** change eliminates the problem, but since having positive
4341 	** values in RDTR is a known source of problems on other
4342 	** platforms another solution is being sought.
4343 	*/
4344 	if (hw->mac.type == e1000_82573)
4345 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4346 
4347 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4348 		/* Setup the Base and Length of the Rx Descriptor Ring */
4349 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4350 
4351 		bus_addr = rxr->rxdma.dma_paddr;
4352 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4353 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4354 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4355 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4356 		/* Setup the Head and Tail Descriptor Pointers */
4357 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4358 #ifdef DEV_NETMAP
4359 		/*
4360 		 * an init() while a netmap client is active must
4361 		 * preserve the rx buffers passed to userspace.
4362 		 */
4363 		if (ifp->if_capenable & IFCAP_NETMAP)
4364 			rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4365 #endif /* DEV_NETMAP */
4366 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4367 	}
4368 
4369 	/* Set PTHRESH for improved jumbo performance */
4370 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4371 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4372 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4373 	    (ifp->if_mtu > ETHERMTU)) {
4374 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4375 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4376 	}
4377 
4378 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4379 		if (ifp->if_mtu > ETHERMTU)
4380 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4381 		else
4382 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4383 	}
4384 
4385 	/* Setup the Receive Control Register */
4386 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4387 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4388 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4389 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4390 
4391         /* Strip the CRC */
4392         rctl |= E1000_RCTL_SECRC;
4393 
4394         /* Make sure VLAN Filters are off */
4395         rctl &= ~E1000_RCTL_VFE;
4396 	rctl &= ~E1000_RCTL_SBP;
4397 
4398 	if (adapter->rx_mbuf_sz == MCLBYTES)
4399 		rctl |= E1000_RCTL_SZ_2048;
4400 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4401 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4402 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4403 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4404 
4405 	if (ifp->if_mtu > ETHERMTU)
4406 		rctl |= E1000_RCTL_LPE;
4407 	else
4408 		rctl &= ~E1000_RCTL_LPE;
4409 
4410 	/* Write out the settings */
4411 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4412 
4413 	return;
4414 }
4415 
4416 
4417 /*********************************************************************
4418  *
4419  *  This routine executes in interrupt context. It replenishes
4420  *  the mbufs in the descriptor and sends data which has been
4421  *  dma'ed into host memory to upper layer.
4422  *
4423  *  We loop at most count times if count is > 0, or until done if
4424  *  count < 0.
4425  *
4426  *  For polling we also now return the number of cleaned packets
4427  *********************************************************************/
4428 static bool
em_rxeof(struct rx_ring * rxr,int count,int * done)4429 em_rxeof(struct rx_ring *rxr, int count, int *done)
4430 {
4431 	struct adapter		*adapter = rxr->adapter;
4432 	struct ifnet		*ifp = adapter->ifp;
4433 	struct mbuf		*mp, *sendmp;
4434 	u8			status = 0;
4435 	u16 			len;
4436 	int			i, processed, rxdone = 0;
4437 	bool			eop;
4438 	struct e1000_rx_desc	*cur;
4439 
4440 	EM_RX_LOCK(rxr);
4441 
4442 #ifdef DEV_NETMAP
4443 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4444 		EM_RX_UNLOCK(rxr);
4445 		return (FALSE);
4446 	}
4447 #endif /* DEV_NETMAP */
4448 
4449 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4450 
4451 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4452 			break;
4453 
4454 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4455 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4456 
4457 		cur = &rxr->rx_base[i];
4458 		status = cur->status;
4459 		mp = sendmp = NULL;
4460 
4461 		if ((status & E1000_RXD_STAT_DD) == 0)
4462 			break;
4463 
4464 		len = le16toh(cur->length);
4465 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4466 
4467 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4468 		    (rxr->discard == TRUE)) {
4469 			adapter->dropped_pkts++;
4470 			++rxr->rx_discarded;
4471 			if (!eop) /* Catch subsequent segs */
4472 				rxr->discard = TRUE;
4473 			else
4474 				rxr->discard = FALSE;
4475 			em_rx_discard(rxr, i);
4476 			goto next_desc;
4477 		}
4478 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4479 
4480 		/* Assign correct length to the current fragment */
4481 		mp = rxr->rx_buffers[i].m_head;
4482 		mp->m_len = len;
4483 
4484 		/* Trigger for refresh */
4485 		rxr->rx_buffers[i].m_head = NULL;
4486 
4487 		/* First segment? */
4488 		if (rxr->fmp == NULL) {
4489 			mp->m_pkthdr.len = len;
4490 			rxr->fmp = rxr->lmp = mp;
4491 		} else {
4492 			/* Chain mbuf's together */
4493 			mp->m_flags &= ~M_PKTHDR;
4494 			rxr->lmp->m_next = mp;
4495 			rxr->lmp = mp;
4496 			rxr->fmp->m_pkthdr.len += len;
4497 		}
4498 
4499 		if (eop) {
4500 			--count;
4501 			sendmp = rxr->fmp;
4502 			sendmp->m_pkthdr.rcvif = ifp;
4503 			ifp->if_ipackets++;
4504 			em_receive_checksum(cur, sendmp);
4505 #ifndef __NO_STRICT_ALIGNMENT
4506 			if (adapter->hw.mac.max_frame_size >
4507 			    (MCLBYTES - ETHER_ALIGN) &&
4508 			    em_fixup_rx(rxr) != 0)
4509 				goto skip;
4510 #endif
4511 			if (status & E1000_RXD_STAT_VP) {
4512 				sendmp->m_pkthdr.ether_vtag =
4513 				    le16toh(cur->special);
4514 				sendmp->m_flags |= M_VLANTAG;
4515 			}
4516 #ifndef __NO_STRICT_ALIGNMENT
4517 skip:
4518 #endif
4519 			rxr->fmp = rxr->lmp = NULL;
4520 		}
4521 next_desc:
4522 		/* Zero out the receive descriptors status. */
4523 		cur->status = 0;
4524 		++rxdone;	/* cumulative for POLL */
4525 		++processed;
4526 
4527 		/* Advance our pointers to the next descriptor. */
4528 		if (++i == adapter->num_rx_desc)
4529 			i = 0;
4530 
4531 		/* Send to the stack */
4532 		if (sendmp != NULL) {
4533 			rxr->next_to_check = i;
4534 			EM_RX_UNLOCK(rxr);
4535 			(*ifp->if_input)(ifp, sendmp);
4536 			EM_RX_LOCK(rxr);
4537 			i = rxr->next_to_check;
4538 		}
4539 
4540 		/* Only refresh mbufs every 8 descriptors */
4541 		if (processed == 8) {
4542 			em_refresh_mbufs(rxr, i);
4543 			processed = 0;
4544 		}
4545 	}
4546 
4547 	/* Catch any remaining refresh work */
4548 	if (e1000_rx_unrefreshed(rxr))
4549 		em_refresh_mbufs(rxr, i);
4550 
4551 	rxr->next_to_check = i;
4552 	if (done != NULL)
4553 		*done = rxdone;
4554 	EM_RX_UNLOCK(rxr);
4555 
4556 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4557 }
4558 
4559 static __inline void
em_rx_discard(struct rx_ring * rxr,int i)4560 em_rx_discard(struct rx_ring *rxr, int i)
4561 {
4562 	struct em_buffer	*rbuf;
4563 
4564 	rbuf = &rxr->rx_buffers[i];
4565 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4566 
4567 	/* Free any previous pieces */
4568 	if (rxr->fmp != NULL) {
4569 		rxr->fmp->m_flags |= M_PKTHDR;
4570 		m_freem(rxr->fmp);
4571 		rxr->fmp = NULL;
4572 		rxr->lmp = NULL;
4573 	}
4574 	/*
4575 	** Free buffer and allow em_refresh_mbufs()
4576 	** to clean up and recharge buffer.
4577 	*/
4578 	if (rbuf->m_head) {
4579 		m_free(rbuf->m_head);
4580 		rbuf->m_head = NULL;
4581 	}
4582 	return;
4583 }
4584 
4585 #ifndef __NO_STRICT_ALIGNMENT
4586 /*
4587  * When jumbo frames are enabled we should realign entire payload on
4588  * architecures with strict alignment. This is serious design mistake of 8254x
4589  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4590  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4591  * payload. On architecures without strict alignment restrictions 8254x still
4592  * performs unaligned memory access which would reduce the performance too.
4593  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4594  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4595  * existing mbuf chain.
4596  *
4597  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4598  * not used at all on architectures with strict alignment.
4599  */
4600 static int
em_fixup_rx(struct rx_ring * rxr)4601 em_fixup_rx(struct rx_ring *rxr)
4602 {
4603 	struct adapter *adapter = rxr->adapter;
4604 	struct mbuf *m, *n;
4605 	int error;
4606 
4607 	error = 0;
4608 	m = rxr->fmp;
4609 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4610 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4611 		m->m_data += ETHER_HDR_LEN;
4612 	} else {
4613 		MGETHDR(n, M_NOWAIT, MT_DATA);
4614 		if (n != NULL) {
4615 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4616 			m->m_data += ETHER_HDR_LEN;
4617 			m->m_len -= ETHER_HDR_LEN;
4618 			n->m_len = ETHER_HDR_LEN;
4619 			M_MOVE_PKTHDR(n, m);
4620 			n->m_next = m;
4621 			rxr->fmp = n;
4622 		} else {
4623 			adapter->dropped_pkts++;
4624 			m_freem(rxr->fmp);
4625 			rxr->fmp = NULL;
4626 			error = ENOMEM;
4627 		}
4628 	}
4629 
4630 	return (error);
4631 }
4632 #endif
4633 
4634 /*********************************************************************
4635  *
4636  *  Verify that the hardware indicated that the checksum is valid.
4637  *  Inform the stack about the status of checksum so that stack
4638  *  doesn't spend time verifying the checksum.
4639  *
4640  *********************************************************************/
4641 static void
em_receive_checksum(struct e1000_rx_desc * rx_desc,struct mbuf * mp)4642 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4643 {
4644 	mp->m_pkthdr.csum_flags = 0;
4645 
4646 	/* Ignore Checksum bit is set */
4647 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4648 		return;
4649 
4650 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4651 		return;
4652 
4653 	/* IP Checksum Good? */
4654 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4655 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4656 
4657 	/* TCP or UDP checksum */
4658 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4659 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4660 		mp->m_pkthdr.csum_data = htons(0xffff);
4661 	}
4662 }
4663 
4664 /*
4665  * This routine is run via an vlan
4666  * config EVENT
4667  */
4668 static void
em_register_vlan(void * arg,struct ifnet * ifp,u16 vtag)4669 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4670 {
4671 	struct adapter	*adapter = ifp->if_softc;
4672 	u32		index, bit;
4673 
4674 	if (ifp->if_softc !=  arg)   /* Not our event */
4675 		return;
4676 
4677 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4678                 return;
4679 
4680 	EM_CORE_LOCK(adapter);
4681 	index = (vtag >> 5) & 0x7F;
4682 	bit = vtag & 0x1F;
4683 	adapter->shadow_vfta[index] |= (1 << bit);
4684 	++adapter->num_vlans;
4685 	/* Re-init to load the changes */
4686 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4687 		em_init_locked(adapter);
4688 	EM_CORE_UNLOCK(adapter);
4689 }
4690 
4691 /*
4692  * This routine is run via an vlan
4693  * unconfig EVENT
4694  */
4695 static void
em_unregister_vlan(void * arg,struct ifnet * ifp,u16 vtag)4696 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4697 {
4698 	struct adapter	*adapter = ifp->if_softc;
4699 	u32		index, bit;
4700 
4701 	if (ifp->if_softc !=  arg)
4702 		return;
4703 
4704 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4705                 return;
4706 
4707 	EM_CORE_LOCK(adapter);
4708 	index = (vtag >> 5) & 0x7F;
4709 	bit = vtag & 0x1F;
4710 	adapter->shadow_vfta[index] &= ~(1 << bit);
4711 	--adapter->num_vlans;
4712 	/* Re-init to load the changes */
4713 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4714 		em_init_locked(adapter);
4715 	EM_CORE_UNLOCK(adapter);
4716 }
4717 
4718 static void
em_setup_vlan_hw_support(struct adapter * adapter)4719 em_setup_vlan_hw_support(struct adapter *adapter)
4720 {
4721 	struct e1000_hw *hw = &adapter->hw;
4722 	u32             reg;
4723 
4724 	/*
4725 	** We get here thru init_locked, meaning
4726 	** a soft reset, this has already cleared
4727 	** the VFTA and other state, so if there
4728 	** have been no vlan's registered do nothing.
4729 	*/
4730 	if (adapter->num_vlans == 0)
4731                 return;
4732 
4733 	/*
4734 	** A soft reset zero's out the VFTA, so
4735 	** we need to repopulate it now.
4736 	*/
4737 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4738                 if (adapter->shadow_vfta[i] != 0)
4739 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4740                             i, adapter->shadow_vfta[i]);
4741 
4742 	reg = E1000_READ_REG(hw, E1000_CTRL);
4743 	reg |= E1000_CTRL_VME;
4744 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4745 
4746 	/* Enable the Filter Table */
4747 	reg = E1000_READ_REG(hw, E1000_RCTL);
4748 	reg &= ~E1000_RCTL_CFIEN;
4749 	reg |= E1000_RCTL_VFE;
4750 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4751 }
4752 
4753 static void
em_enable_intr(struct adapter * adapter)4754 em_enable_intr(struct adapter *adapter)
4755 {
4756 	struct e1000_hw *hw = &adapter->hw;
4757 	u32 ims_mask = IMS_ENABLE_MASK;
4758 
4759 	if (hw->mac.type == e1000_82574) {
4760 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4761 		ims_mask |= EM_MSIX_MASK;
4762 	}
4763 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4764 }
4765 
4766 static void
em_disable_intr(struct adapter * adapter)4767 em_disable_intr(struct adapter *adapter)
4768 {
4769 	struct e1000_hw *hw = &adapter->hw;
4770 
4771 	if (hw->mac.type == e1000_82574)
4772 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4773 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4774 }
4775 
4776 /*
4777  * Bit of a misnomer, what this really means is
4778  * to enable OS management of the system... aka
4779  * to disable special hardware management features
4780  */
4781 static void
em_init_manageability(struct adapter * adapter)4782 em_init_manageability(struct adapter *adapter)
4783 {
4784 	/* A shared code workaround */
4785 #define E1000_82542_MANC2H E1000_MANC2H
4786 	if (adapter->has_manage) {
4787 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4788 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4789 
4790 		/* disable hardware interception of ARP */
4791 		manc &= ~(E1000_MANC_ARP_EN);
4792 
4793                 /* enable receiving management packets to the host */
4794 		manc |= E1000_MANC_EN_MNG2HOST;
4795 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4796 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4797 		manc2h |= E1000_MNG2HOST_PORT_623;
4798 		manc2h |= E1000_MNG2HOST_PORT_664;
4799 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4800 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4801 	}
4802 }
4803 
4804 /*
4805  * Give control back to hardware management
4806  * controller if there is one.
4807  */
4808 static void
em_release_manageability(struct adapter * adapter)4809 em_release_manageability(struct adapter *adapter)
4810 {
4811 	if (adapter->has_manage) {
4812 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4813 
4814 		/* re-enable hardware interception of ARP */
4815 		manc |= E1000_MANC_ARP_EN;
4816 		manc &= ~E1000_MANC_EN_MNG2HOST;
4817 
4818 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4819 	}
4820 }
4821 
4822 /*
4823  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4824  * For ASF and Pass Through versions of f/w this means
4825  * that the driver is loaded. For AMT version type f/w
4826  * this means that the network i/f is open.
4827  */
4828 static void
em_get_hw_control(struct adapter * adapter)4829 em_get_hw_control(struct adapter *adapter)
4830 {
4831 	u32 ctrl_ext, swsm;
4832 
4833 	if (adapter->hw.mac.type == e1000_82573) {
4834 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4835 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4836 		    swsm | E1000_SWSM_DRV_LOAD);
4837 		return;
4838 	}
4839 	/* else */
4840 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4841 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4842 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4843 	return;
4844 }
4845 
4846 /*
4847  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4848  * For ASF and Pass Through versions of f/w this means that
4849  * the driver is no longer loaded. For AMT versions of the
4850  * f/w this means that the network i/f is closed.
4851  */
4852 static void
em_release_hw_control(struct adapter * adapter)4853 em_release_hw_control(struct adapter *adapter)
4854 {
4855 	u32 ctrl_ext, swsm;
4856 
4857 	if (!adapter->has_manage)
4858 		return;
4859 
4860 	if (adapter->hw.mac.type == e1000_82573) {
4861 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4862 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4863 		    swsm & ~E1000_SWSM_DRV_LOAD);
4864 		return;
4865 	}
4866 	/* else */
4867 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4868 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4869 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4870 	return;
4871 }
4872 
4873 static int
em_is_valid_ether_addr(u8 * addr)4874 em_is_valid_ether_addr(u8 *addr)
4875 {
4876 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4877 
4878 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4879 		return (FALSE);
4880 	}
4881 
4882 	return (TRUE);
4883 }
4884 
4885 /*
4886 ** Parse the interface capabilities with regard
4887 ** to both system management and wake-on-lan for
4888 ** later use.
4889 */
4890 static void
em_get_wakeup(device_t dev)4891 em_get_wakeup(device_t dev)
4892 {
4893 	struct adapter	*adapter = device_get_softc(dev);
4894 	u16		eeprom_data = 0, device_id, apme_mask;
4895 
4896 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4897 	apme_mask = EM_EEPROM_APME;
4898 
4899 	switch (adapter->hw.mac.type) {
4900 	case e1000_82573:
4901 	case e1000_82583:
4902 		adapter->has_amt = TRUE;
4903 		/* Falls thru */
4904 	case e1000_82571:
4905 	case e1000_82572:
4906 	case e1000_80003es2lan:
4907 		if (adapter->hw.bus.func == 1) {
4908 			e1000_read_nvm(&adapter->hw,
4909 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4910 			break;
4911 		} else
4912 			e1000_read_nvm(&adapter->hw,
4913 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4914 		break;
4915 	case e1000_ich8lan:
4916 	case e1000_ich9lan:
4917 	case e1000_ich10lan:
4918 	case e1000_pchlan:
4919 	case e1000_pch2lan:
4920 		apme_mask = E1000_WUC_APME;
4921 		adapter->has_amt = TRUE;
4922 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4923 		break;
4924 	default:
4925 		e1000_read_nvm(&adapter->hw,
4926 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4927 		break;
4928 	}
4929 	if (eeprom_data & apme_mask)
4930 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4931 	/*
4932          * We have the eeprom settings, now apply the special cases
4933          * where the eeprom may be wrong or the board won't support
4934          * wake on lan on a particular port
4935 	 */
4936 	device_id = pci_get_device(dev);
4937         switch (device_id) {
4938 	case E1000_DEV_ID_82571EB_FIBER:
4939 		/* Wake events only supported on port A for dual fiber
4940 		 * regardless of eeprom setting */
4941 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4942 		    E1000_STATUS_FUNC_1)
4943 			adapter->wol = 0;
4944 		break;
4945 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4946 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4947 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4948                 /* if quad port adapter, disable WoL on all but port A */
4949 		if (global_quad_port_a != 0)
4950 			adapter->wol = 0;
4951 		/* Reset for multiple quad port adapters */
4952 		if (++global_quad_port_a == 4)
4953 			global_quad_port_a = 0;
4954                 break;
4955 	}
4956 	return;
4957 }
4958 
4959 
4960 /*
4961  * Enable PCI Wake On Lan capability
4962  */
4963 static void
em_enable_wakeup(device_t dev)4964 em_enable_wakeup(device_t dev)
4965 {
4966 	struct adapter	*adapter = device_get_softc(dev);
4967 	struct ifnet	*ifp = adapter->ifp;
4968 	u32		pmc, ctrl, ctrl_ext, rctl;
4969 	u16     	status;
4970 
4971 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4972 		return;
4973 
4974 	/* Advertise the wakeup capability */
4975 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4976 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4977 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4978 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4979 
4980 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4981 	    (adapter->hw.mac.type == e1000_pchlan) ||
4982 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4983 	    (adapter->hw.mac.type == e1000_ich10lan))
4984 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4985 
4986 	/* Keep the laser running on Fiber adapters */
4987 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4988 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4989 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4990 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4991 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4992 	}
4993 
4994 	/*
4995 	** Determine type of Wakeup: note that wol
4996 	** is set with all bits on by default.
4997 	*/
4998 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4999 		adapter->wol &= ~E1000_WUFC_MAG;
5000 
5001 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5002 		adapter->wol &= ~E1000_WUFC_MC;
5003 	else {
5004 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5005 		rctl |= E1000_RCTL_MPE;
5006 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5007 	}
5008 
5009 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5010 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5011 		if (em_enable_phy_wakeup(adapter))
5012 			return;
5013 	} else {
5014 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5015 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5016 	}
5017 
5018 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5019 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5020 
5021         /* Request PME */
5022         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5023 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5024 	if (ifp->if_capenable & IFCAP_WOL)
5025 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5026         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5027 
5028 	return;
5029 }
5030 
5031 /*
5032 ** WOL in the newer chipset interfaces (pchlan)
5033 ** require thing to be copied into the phy
5034 */
5035 static int
em_enable_phy_wakeup(struct adapter * adapter)5036 em_enable_phy_wakeup(struct adapter *adapter)
5037 {
5038 	struct e1000_hw *hw = &adapter->hw;
5039 	u32 mreg, ret = 0;
5040 	u16 preg;
5041 
5042 	/* copy MAC RARs to PHY RARs */
5043 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5044 
5045 	/* copy MAC MTA to PHY MTA */
5046 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5047 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5048 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5049 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5050 		    (u16)((mreg >> 16) & 0xFFFF));
5051 	}
5052 
5053 	/* configure PHY Rx Control register */
5054 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5055 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5056 	if (mreg & E1000_RCTL_UPE)
5057 		preg |= BM_RCTL_UPE;
5058 	if (mreg & E1000_RCTL_MPE)
5059 		preg |= BM_RCTL_MPE;
5060 	preg &= ~(BM_RCTL_MO_MASK);
5061 	if (mreg & E1000_RCTL_MO_3)
5062 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5063 				<< BM_RCTL_MO_SHIFT);
5064 	if (mreg & E1000_RCTL_BAM)
5065 		preg |= BM_RCTL_BAM;
5066 	if (mreg & E1000_RCTL_PMCF)
5067 		preg |= BM_RCTL_PMCF;
5068 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5069 	if (mreg & E1000_CTRL_RFCE)
5070 		preg |= BM_RCTL_RFCE;
5071 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5072 
5073 	/* enable PHY wakeup in MAC register */
5074 	E1000_WRITE_REG(hw, E1000_WUC,
5075 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5076 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5077 
5078 	/* configure and enable PHY wakeup in PHY registers */
5079 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5080 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5081 
5082 	/* activate PHY wakeup */
5083 	ret = hw->phy.ops.acquire(hw);
5084 	if (ret) {
5085 		printf("Could not acquire PHY\n");
5086 		return ret;
5087 	}
5088 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5089 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5090 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5091 	if (ret) {
5092 		printf("Could not read PHY page 769\n");
5093 		goto out;
5094 	}
5095 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5096 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5097 	if (ret)
5098 		printf("Could not set PHY Host Wakeup bit\n");
5099 out:
5100 	hw->phy.ops.release(hw);
5101 
5102 	return ret;
5103 }
5104 
5105 static void
em_led_func(void * arg,int onoff)5106 em_led_func(void *arg, int onoff)
5107 {
5108 	struct adapter	*adapter = arg;
5109 
5110 	EM_CORE_LOCK(adapter);
5111 	if (onoff) {
5112 		e1000_setup_led(&adapter->hw);
5113 		e1000_led_on(&adapter->hw);
5114 	} else {
5115 		e1000_led_off(&adapter->hw);
5116 		e1000_cleanup_led(&adapter->hw);
5117 	}
5118 	EM_CORE_UNLOCK(adapter);
5119 }
5120 
5121 /*
5122 ** Disable the L0S and L1 LINK states
5123 */
5124 static void
em_disable_aspm(struct adapter * adapter)5125 em_disable_aspm(struct adapter *adapter)
5126 {
5127 	int		base, reg;
5128 	u16		link_cap,link_ctrl;
5129 	device_t	dev = adapter->dev;
5130 
5131 	switch (adapter->hw.mac.type) {
5132 		case e1000_82573:
5133 		case e1000_82574:
5134 		case e1000_82583:
5135 			break;
5136 		default:
5137 			return;
5138 	}
5139 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5140 		return;
5141 	reg = base + PCIER_LINK_CAP;
5142 	link_cap = pci_read_config(dev, reg, 2);
5143 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5144 		return;
5145 	reg = base + PCIER_LINK_CTL;
5146 	link_ctrl = pci_read_config(dev, reg, 2);
5147 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5148 	pci_write_config(dev, reg, link_ctrl, 2);
5149 	return;
5150 }
5151 
5152 /**********************************************************************
5153  *
5154  *  Update the board statistics counters.
5155  *
5156  **********************************************************************/
5157 static void
em_update_stats_counters(struct adapter * adapter)5158 em_update_stats_counters(struct adapter *adapter)
5159 {
5160 	struct ifnet   *ifp;
5161 
5162 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5163 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5164 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5165 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5166 	}
5167 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5168 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5169 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5170 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5171 
5172 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5173 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5174 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5175 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5176 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5177 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5178 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5179 	/*
5180 	** For watchdog management we need to know if we have been
5181 	** paused during the last interval, so capture that here.
5182 	*/
5183 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5184 	adapter->stats.xoffrxc += adapter->pause_frames;
5185 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5186 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5187 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5188 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5189 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5190 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5191 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5192 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5193 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5194 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5195 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5196 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5197 
5198 	/* For the 64-bit byte counters the low dword must be read first. */
5199 	/* Both registers clear on the read of the high dword */
5200 
5201 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5202 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5203 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5204 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5205 
5206 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5207 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5208 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5209 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5210 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5211 
5212 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5213 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5214 
5215 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5216 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5217 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5218 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5219 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5220 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5221 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5222 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5223 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5224 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5225 
5226 	/* Interrupt Counts */
5227 
5228 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5229 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5230 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5231 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5232 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5233 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5234 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5235 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5236 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5237 
5238 	if (adapter->hw.mac.type >= e1000_82543) {
5239 		adapter->stats.algnerrc +=
5240 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5241 		adapter->stats.rxerrc +=
5242 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5243 		adapter->stats.tncrs +=
5244 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5245 		adapter->stats.cexterr +=
5246 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5247 		adapter->stats.tsctc +=
5248 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5249 		adapter->stats.tsctfc +=
5250 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5251 	}
5252 	ifp = adapter->ifp;
5253 
5254 	ifp->if_collisions = adapter->stats.colc;
5255 
5256 	/* Rx Errors */
5257 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5258 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5259 	    adapter->stats.ruc + adapter->stats.roc +
5260 	    adapter->stats.mpc + adapter->stats.cexterr;
5261 
5262 	/* Tx Errors */
5263 	ifp->if_oerrors = adapter->stats.ecol +
5264 	    adapter->stats.latecol + adapter->watchdog_events;
5265 }
5266 
5267 /* Export a single 32-bit register via a read-only sysctl. */
5268 static int
em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)5269 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5270 {
5271 	struct adapter *adapter;
5272 	u_int val;
5273 
5274 	adapter = oidp->oid_arg1;
5275 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5276 	return (sysctl_handle_int(oidp, &val, 0, req));
5277 }
5278 
5279 /*
5280  * Add sysctl variables, one per statistic, to the system.
5281  */
5282 static void
em_add_hw_stats(struct adapter * adapter)5283 em_add_hw_stats(struct adapter *adapter)
5284 {
5285 	device_t dev = adapter->dev;
5286 
5287 	struct tx_ring *txr = adapter->tx_rings;
5288 	struct rx_ring *rxr = adapter->rx_rings;
5289 
5290 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5291 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5292 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5293 	struct e1000_hw_stats *stats = &adapter->stats;
5294 
5295 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5296 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5297 
5298 #define QUEUE_NAME_LEN 32
5299 	char namebuf[QUEUE_NAME_LEN];
5300 
5301 	/* Driver Statistics */
5302 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5303 			CTLFLAG_RD, &adapter->link_irq,
5304 			"Link MSIX IRQ Handled");
5305 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5306 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5307 			 "Std mbuf failed");
5308 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5309 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5310 			 "Std mbuf cluster failed");
5311 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5312 			CTLFLAG_RD, &adapter->dropped_pkts,
5313 			"Driver dropped packets");
5314 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5315 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5316 			"Driver tx dma failure in xmit");
5317 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5318 			CTLFLAG_RD, &adapter->rx_overruns,
5319 			"RX overruns");
5320 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5321 			CTLFLAG_RD, &adapter->watchdog_events,
5322 			"Watchdog timeouts");
5323 
5324 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5325 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5326 			em_sysctl_reg_handler, "IU",
5327 			"Device Control Register");
5328 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5329 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5330 			em_sysctl_reg_handler, "IU",
5331 			"Receiver Control Register");
5332 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5333 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5334 			"Flow Control High Watermark");
5335 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5336 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5337 			"Flow Control Low Watermark");
5338 
5339 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5340 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5341 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5342 					    CTLFLAG_RD, NULL, "Queue Name");
5343 		queue_list = SYSCTL_CHILDREN(queue_node);
5344 
5345 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5346 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5347 				E1000_TDH(txr->me),
5348 				em_sysctl_reg_handler, "IU",
5349  				"Transmit Descriptor Head");
5350 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5351 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5352 				E1000_TDT(txr->me),
5353 				em_sysctl_reg_handler, "IU",
5354  				"Transmit Descriptor Tail");
5355 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5356 				CTLFLAG_RD, &txr->tx_irq,
5357 				"Queue MSI-X Transmit Interrupts");
5358 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5359 				CTLFLAG_RD, &txr->no_desc_avail,
5360 				"Queue No Descriptor Available");
5361 
5362 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5363 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5364 				E1000_RDH(rxr->me),
5365 				em_sysctl_reg_handler, "IU",
5366 				"Receive Descriptor Head");
5367 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5368 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5369 				E1000_RDT(rxr->me),
5370 				em_sysctl_reg_handler, "IU",
5371 				"Receive Descriptor Tail");
5372 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5373 				CTLFLAG_RD, &rxr->rx_irq,
5374 				"Queue MSI-X Receive Interrupts");
5375 	}
5376 
5377 	/* MAC stats get their own sub node */
5378 
5379 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5380 				    CTLFLAG_RD, NULL, "Statistics");
5381 	stat_list = SYSCTL_CHILDREN(stat_node);
5382 
5383 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5384 			CTLFLAG_RD, &stats->ecol,
5385 			"Excessive collisions");
5386 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5387 			CTLFLAG_RD, &stats->scc,
5388 			"Single collisions");
5389 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5390 			CTLFLAG_RD, &stats->mcc,
5391 			"Multiple collisions");
5392 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5393 			CTLFLAG_RD, &stats->latecol,
5394 			"Late collisions");
5395 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5396 			CTLFLAG_RD, &stats->colc,
5397 			"Collision Count");
5398 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5399 			CTLFLAG_RD, &adapter->stats.symerrs,
5400 			"Symbol Errors");
5401 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5402 			CTLFLAG_RD, &adapter->stats.sec,
5403 			"Sequence Errors");
5404 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5405 			CTLFLAG_RD, &adapter->stats.dc,
5406 			"Defer Count");
5407 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5408 			CTLFLAG_RD, &adapter->stats.mpc,
5409 			"Missed Packets");
5410 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5411 			CTLFLAG_RD, &adapter->stats.rnbc,
5412 			"Receive No Buffers");
5413 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5414 			CTLFLAG_RD, &adapter->stats.ruc,
5415 			"Receive Undersize");
5416 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5417 			CTLFLAG_RD, &adapter->stats.rfc,
5418 			"Fragmented Packets Received ");
5419 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5420 			CTLFLAG_RD, &adapter->stats.roc,
5421 			"Oversized Packets Received");
5422 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5423 			CTLFLAG_RD, &adapter->stats.rjc,
5424 			"Recevied Jabber");
5425 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5426 			CTLFLAG_RD, &adapter->stats.rxerrc,
5427 			"Receive Errors");
5428 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5429 			CTLFLAG_RD, &adapter->stats.crcerrs,
5430 			"CRC errors");
5431 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5432 			CTLFLAG_RD, &adapter->stats.algnerrc,
5433 			"Alignment Errors");
5434 	/* On 82575 these are collision counts */
5435 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5436 			CTLFLAG_RD, &adapter->stats.cexterr,
5437 			"Collision/Carrier extension errors");
5438 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5439 			CTLFLAG_RD, &adapter->stats.xonrxc,
5440 			"XON Received");
5441 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5442 			CTLFLAG_RD, &adapter->stats.xontxc,
5443 			"XON Transmitted");
5444 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5445 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5446 			"XOFF Received");
5447 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5448 			CTLFLAG_RD, &adapter->stats.xofftxc,
5449 			"XOFF Transmitted");
5450 
5451 	/* Packet Reception Stats */
5452 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5453 			CTLFLAG_RD, &adapter->stats.tpr,
5454 			"Total Packets Received ");
5455 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5456 			CTLFLAG_RD, &adapter->stats.gprc,
5457 			"Good Packets Received");
5458 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5459 			CTLFLAG_RD, &adapter->stats.bprc,
5460 			"Broadcast Packets Received");
5461 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5462 			CTLFLAG_RD, &adapter->stats.mprc,
5463 			"Multicast Packets Received");
5464 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5465 			CTLFLAG_RD, &adapter->stats.prc64,
5466 			"64 byte frames received ");
5467 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5468 			CTLFLAG_RD, &adapter->stats.prc127,
5469 			"65-127 byte frames received");
5470 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5471 			CTLFLAG_RD, &adapter->stats.prc255,
5472 			"128-255 byte frames received");
5473 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5474 			CTLFLAG_RD, &adapter->stats.prc511,
5475 			"256-511 byte frames received");
5476 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5477 			CTLFLAG_RD, &adapter->stats.prc1023,
5478 			"512-1023 byte frames received");
5479 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5480 			CTLFLAG_RD, &adapter->stats.prc1522,
5481 			"1023-1522 byte frames received");
5482  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5483  			CTLFLAG_RD, &adapter->stats.gorc,
5484  			"Good Octets Received");
5485 
5486 	/* Packet Transmission Stats */
5487  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5488  			CTLFLAG_RD, &adapter->stats.gotc,
5489  			"Good Octets Transmitted");
5490 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5491 			CTLFLAG_RD, &adapter->stats.tpt,
5492 			"Total Packets Transmitted");
5493 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5494 			CTLFLAG_RD, &adapter->stats.gptc,
5495 			"Good Packets Transmitted");
5496 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5497 			CTLFLAG_RD, &adapter->stats.bptc,
5498 			"Broadcast Packets Transmitted");
5499 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5500 			CTLFLAG_RD, &adapter->stats.mptc,
5501 			"Multicast Packets Transmitted");
5502 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5503 			CTLFLAG_RD, &adapter->stats.ptc64,
5504 			"64 byte frames transmitted ");
5505 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5506 			CTLFLAG_RD, &adapter->stats.ptc127,
5507 			"65-127 byte frames transmitted");
5508 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5509 			CTLFLAG_RD, &adapter->stats.ptc255,
5510 			"128-255 byte frames transmitted");
5511 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5512 			CTLFLAG_RD, &adapter->stats.ptc511,
5513 			"256-511 byte frames transmitted");
5514 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5515 			CTLFLAG_RD, &adapter->stats.ptc1023,
5516 			"512-1023 byte frames transmitted");
5517 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5518 			CTLFLAG_RD, &adapter->stats.ptc1522,
5519 			"1024-1522 byte frames transmitted");
5520 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5521 			CTLFLAG_RD, &adapter->stats.tsctc,
5522 			"TSO Contexts Transmitted");
5523 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5524 			CTLFLAG_RD, &adapter->stats.tsctfc,
5525 			"TSO Contexts Failed");
5526 
5527 
5528 	/* Interrupt Stats */
5529 
5530 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5531 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5532 	int_list = SYSCTL_CHILDREN(int_node);
5533 
5534 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5535 			CTLFLAG_RD, &adapter->stats.iac,
5536 			"Interrupt Assertion Count");
5537 
5538 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5539 			CTLFLAG_RD, &adapter->stats.icrxptc,
5540 			"Interrupt Cause Rx Pkt Timer Expire Count");
5541 
5542 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5543 			CTLFLAG_RD, &adapter->stats.icrxatc,
5544 			"Interrupt Cause Rx Abs Timer Expire Count");
5545 
5546 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5547 			CTLFLAG_RD, &adapter->stats.ictxptc,
5548 			"Interrupt Cause Tx Pkt Timer Expire Count");
5549 
5550 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5551 			CTLFLAG_RD, &adapter->stats.ictxatc,
5552 			"Interrupt Cause Tx Abs Timer Expire Count");
5553 
5554 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5555 			CTLFLAG_RD, &adapter->stats.ictxqec,
5556 			"Interrupt Cause Tx Queue Empty Count");
5557 
5558 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5559 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5560 			"Interrupt Cause Tx Queue Min Thresh Count");
5561 
5562 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5563 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5564 			"Interrupt Cause Rx Desc Min Thresh Count");
5565 
5566 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5567 			CTLFLAG_RD, &adapter->stats.icrxoc,
5568 			"Interrupt Cause Receiver Overrun Count");
5569 }
5570 
5571 /**********************************************************************
5572  *
5573  *  This routine provides a way to dump out the adapter eeprom,
5574  *  often a useful debug/service tool. This only dumps the first
5575  *  32 words, stuff that matters is in that extent.
5576  *
5577  **********************************************************************/
5578 static int
em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)5579 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5580 {
5581 	struct adapter *adapter = (struct adapter *)arg1;
5582 	int error;
5583 	int result;
5584 
5585 	result = -1;
5586 	error = sysctl_handle_int(oidp, &result, 0, req);
5587 
5588 	if (error || !req->newptr)
5589 		return (error);
5590 
5591 	/*
5592 	 * This value will cause a hex dump of the
5593 	 * first 32 16-bit words of the EEPROM to
5594 	 * the screen.
5595 	 */
5596 	if (result == 1)
5597 		em_print_nvm_info(adapter);
5598 
5599 	return (error);
5600 }
5601 
5602 static void
em_print_nvm_info(struct adapter * adapter)5603 em_print_nvm_info(struct adapter *adapter)
5604 {
5605 	u16	eeprom_data;
5606 	int	i, j, row = 0;
5607 
5608 	/* Its a bit crude, but it gets the job done */
5609 	printf("\nInterface EEPROM Dump:\n");
5610 	printf("Offset\n0x0000  ");
5611 	for (i = 0, j = 0; i < 32; i++, j++) {
5612 		if (j == 8) { /* Make the offset block */
5613 			j = 0; ++row;
5614 			printf("\n0x00%x0  ",row);
5615 		}
5616 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5617 		printf("%04x ", eeprom_data);
5618 	}
5619 	printf("\n");
5620 }
5621 
5622 static int
em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)5623 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5624 {
5625 	struct em_int_delay_info *info;
5626 	struct adapter *adapter;
5627 	u32 regval;
5628 	int error, usecs, ticks;
5629 
5630 	info = (struct em_int_delay_info *)arg1;
5631 	usecs = info->value;
5632 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5633 	if (error != 0 || req->newptr == NULL)
5634 		return (error);
5635 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5636 		return (EINVAL);
5637 	info->value = usecs;
5638 	ticks = EM_USECS_TO_TICKS(usecs);
5639 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5640 		ticks *= 4;
5641 
5642 	adapter = info->adapter;
5643 
5644 	EM_CORE_LOCK(adapter);
5645 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5646 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5647 	/* Handle a few special cases. */
5648 	switch (info->offset) {
5649 	case E1000_RDTR:
5650 		break;
5651 	case E1000_TIDV:
5652 		if (ticks == 0) {
5653 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5654 			/* Don't write 0 into the TIDV register. */
5655 			regval++;
5656 		} else
5657 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5658 		break;
5659 	}
5660 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5661 	EM_CORE_UNLOCK(adapter);
5662 	return (0);
5663 }
5664 
5665 static void
em_add_int_delay_sysctl(struct adapter * adapter,const char * name,const char * description,struct em_int_delay_info * info,int offset,int value)5666 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5667 	const char *description, struct em_int_delay_info *info,
5668 	int offset, int value)
5669 {
5670 	info->adapter = adapter;
5671 	info->offset = offset;
5672 	info->value = value;
5673 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5674 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5675 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5676 	    info, 0, em_sysctl_int_delay, "I", description);
5677 }
5678 
5679 static void
em_set_sysctl_value(struct adapter * adapter,const char * name,const char * description,int * limit,int value)5680 em_set_sysctl_value(struct adapter *adapter, const char *name,
5681 	const char *description, int *limit, int value)
5682 {
5683 	*limit = value;
5684 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5685 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5686 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5687 }
5688 
5689 
5690 /*
5691 ** Set flow control using sysctl:
5692 ** Flow control values:
5693 **      0 - off
5694 **      1 - rx pause
5695 **      2 - tx pause
5696 **      3 - full
5697 */
5698 static int
em_set_flowcntl(SYSCTL_HANDLER_ARGS)5699 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5700 {
5701         int		error;
5702 	static int	input = 3; /* default is full */
5703         struct adapter	*adapter = (struct adapter *) arg1;
5704 
5705         error = sysctl_handle_int(oidp, &input, 0, req);
5706 
5707         if ((error) || (req->newptr == NULL))
5708                 return (error);
5709 
5710 	if (input == adapter->fc) /* no change? */
5711 		return (error);
5712 
5713         switch (input) {
5714                 case e1000_fc_rx_pause:
5715                 case e1000_fc_tx_pause:
5716                 case e1000_fc_full:
5717                 case e1000_fc_none:
5718                         adapter->hw.fc.requested_mode = input;
5719 			adapter->fc = input;
5720                         break;
5721                 default:
5722 			/* Do nothing */
5723 			return (error);
5724         }
5725 
5726         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5727         e1000_force_mac_fc(&adapter->hw);
5728         return (error);
5729 }
5730 
5731 /*
5732 ** Manage Energy Efficient Ethernet:
5733 ** Control values:
5734 **     0/1 - enabled/disabled
5735 */
5736 static int
em_sysctl_eee(SYSCTL_HANDLER_ARGS)5737 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5738 {
5739        struct adapter *adapter = (struct adapter *) arg1;
5740        int             error, value;
5741 
5742        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5743        error = sysctl_handle_int(oidp, &value, 0, req);
5744        if (error || req->newptr == NULL)
5745                return (error);
5746        EM_CORE_LOCK(adapter);
5747        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5748        em_init_locked(adapter);
5749        EM_CORE_UNLOCK(adapter);
5750        return (0);
5751 }
5752 
5753 static int
em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)5754 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5755 {
5756 	struct adapter *adapter;
5757 	int error;
5758 	int result;
5759 
5760 	result = -1;
5761 	error = sysctl_handle_int(oidp, &result, 0, req);
5762 
5763 	if (error || !req->newptr)
5764 		return (error);
5765 
5766 	if (result == 1) {
5767 		adapter = (struct adapter *)arg1;
5768 		em_print_debug_info(adapter);
5769         }
5770 
5771 	return (error);
5772 }
5773 
5774 /*
5775 ** This routine is meant to be fluid, add whatever is
5776 ** needed for debugging a problem.  -jfv
5777 */
5778 static void
em_print_debug_info(struct adapter * adapter)5779 em_print_debug_info(struct adapter *adapter)
5780 {
5781 	device_t dev = adapter->dev;
5782 	struct tx_ring *txr = adapter->tx_rings;
5783 	struct rx_ring *rxr = adapter->rx_rings;
5784 
5785 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5786 		printf("Interface is RUNNING ");
5787 	else
5788 		printf("Interface is NOT RUNNING\n");
5789 
5790 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5791 		printf("and INACTIVE\n");
5792 	else
5793 		printf("and ACTIVE\n");
5794 
5795 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5796 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5797 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5798 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5799 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5800 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5801 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5802 	device_printf(dev, "TX descriptors avail = %d\n",
5803 	    txr->tx_avail);
5804 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5805 	    txr->no_desc_avail);
5806 	device_printf(dev, "RX discarded packets = %ld\n",
5807 	    rxr->rx_discarded);
5808 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5809 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5810 }
5811