1 /******************************************************************************
2 
3   Copyright (c) 2001-2014, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD: stable/9/sys/dev/e1000/if_em.c 273912 2014-10-31 18:18:04Z hselasky $*/
34 
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37 
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62 
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_arp.h>
67 #include <net/if_dl.h>
68 #include <net/if_media.h>
69 
70 #include <net/if_types.h>
71 #include <net/if_vlan_var.h>
72 
73 #include <netinet/in_systm.h>
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/ip.h>
77 #include <netinet/ip6.h>
78 #include <netinet/tcp.h>
79 #include <netinet/udp.h>
80 
81 #include <machine/in_cksum.h>
82 #include <dev/led/led.h>
83 #include <dev/pci/pcivar.h>
84 #include <dev/pci/pcireg.h>
85 
86 #include "e1000_api.h"
87 #include "e1000_82571.h"
88 #include "if_em.h"
89 
90 /*********************************************************************
91  *  Set this to one to display debug statistics
92  *********************************************************************/
93 int	em_display_debug_stats = 0;
94 
95 /*********************************************************************
96  *  Driver version:
97  *********************************************************************/
98 char em_driver_version[] = "7.4.2";
99 
100 /*********************************************************************
101  *  PCI Device ID Table
102  *
103  *  Used by probe to select devices to load on
104  *  Last field stores an index into e1000_strings
105  *  Last entry must be all 0s
106  *
107  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108  *********************************************************************/
109 
110 static em_vendor_info_t em_vendor_info_array[] =
111 {
112 	/* Intel(R) PRO/1000 Network Connection */
113 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
116 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
117 						PCI_ANY_ID, PCI_ANY_ID, 0},
118 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
119 						PCI_ANY_ID, PCI_ANY_ID, 0},
120 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
121 						PCI_ANY_ID, PCI_ANY_ID, 0},
122 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
123 						PCI_ANY_ID, PCI_ANY_ID, 0},
124 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
125 						PCI_ANY_ID, PCI_ANY_ID, 0},
126 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
127 						PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
132 
133 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
138 						PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
140 						PCI_ANY_ID, PCI_ANY_ID, 0},
141 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
142 						PCI_ANY_ID, PCI_ANY_ID, 0},
143 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
144 						PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
179 						PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
181 						PCI_ANY_ID, PCI_ANY_ID, 0},
182 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
184 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	/* required last entry */
187 	{ 0, 0, 0, 0, 0}
188 };
189 
190 /*********************************************************************
191  *  Table of branding strings for all supported NICs.
192  *********************************************************************/
193 
194 static char *em_strings[] = {
195 	"Intel(R) PRO/1000 Network Connection"
196 };
197 
198 /*********************************************************************
199  *  Function prototypes
200  *********************************************************************/
201 static int	em_probe(device_t);
202 static int	em_attach(device_t);
203 static int	em_detach(device_t);
204 static int	em_shutdown(device_t);
205 static int	em_suspend(device_t);
206 static int	em_resume(device_t);
207 #ifdef EM_MULTIQUEUE
208 static int	em_mq_start(struct ifnet *, struct mbuf *);
209 static int	em_mq_start_locked(struct ifnet *,
210 		    struct tx_ring *, struct mbuf *);
211 static void	em_qflush(struct ifnet *);
212 #else
213 static void	em_start(struct ifnet *);
214 static void	em_start_locked(struct ifnet *, struct tx_ring *);
215 #endif
216 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
217 static void	em_init(void *);
218 static void	em_init_locked(struct adapter *);
219 static void	em_stop(void *);
220 static void	em_media_status(struct ifnet *, struct ifmediareq *);
221 static int	em_media_change(struct ifnet *);
222 static void	em_identify_hardware(struct adapter *);
223 static int	em_allocate_pci_resources(struct adapter *);
224 static int	em_allocate_legacy(struct adapter *);
225 static int	em_allocate_msix(struct adapter *);
226 static int	em_allocate_queues(struct adapter *);
227 static int	em_setup_msix(struct adapter *);
228 static void	em_free_pci_resources(struct adapter *);
229 static void	em_local_timer(void *);
230 static void	em_reset(struct adapter *);
231 static int	em_setup_interface(device_t, struct adapter *);
232 
233 static void	em_setup_transmit_structures(struct adapter *);
234 static void	em_initialize_transmit_unit(struct adapter *);
235 static int	em_allocate_transmit_buffers(struct tx_ring *);
236 static void	em_free_transmit_structures(struct adapter *);
237 static void	em_free_transmit_buffers(struct tx_ring *);
238 
239 static int	em_setup_receive_structures(struct adapter *);
240 static int	em_allocate_receive_buffers(struct rx_ring *);
241 static void	em_initialize_receive_unit(struct adapter *);
242 static void	em_free_receive_structures(struct adapter *);
243 static void	em_free_receive_buffers(struct rx_ring *);
244 
245 static void	em_enable_intr(struct adapter *);
246 static void	em_disable_intr(struct adapter *);
247 static void	em_update_stats_counters(struct adapter *);
248 static void	em_add_hw_stats(struct adapter *adapter);
249 static void	em_txeof(struct tx_ring *);
250 static bool	em_rxeof(struct rx_ring *, int, int *);
251 #ifndef __NO_STRICT_ALIGNMENT
252 static int	em_fixup_rx(struct rx_ring *);
253 #endif
254 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
255 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
256 		    struct ip *, u32 *, u32 *);
257 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
258 		    struct tcphdr *, u32 *, u32 *);
259 static void	em_set_promisc(struct adapter *);
260 static void	em_disable_promisc(struct adapter *);
261 static void	em_set_multi(struct adapter *);
262 static void	em_update_link_status(struct adapter *);
263 static void	em_refresh_mbufs(struct rx_ring *, int);
264 static void	em_register_vlan(void *, struct ifnet *, u16);
265 static void	em_unregister_vlan(void *, struct ifnet *, u16);
266 static void	em_setup_vlan_hw_support(struct adapter *);
267 static int	em_xmit(struct tx_ring *, struct mbuf **);
268 static int	em_dma_malloc(struct adapter *, bus_size_t,
269 		    struct em_dma_alloc *, int);
270 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
271 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
272 static void	em_print_nvm_info(struct adapter *);
273 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
274 static void	em_print_debug_info(struct adapter *);
275 static int 	em_is_valid_ether_addr(u8 *);
276 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
277 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
278 		    const char *, struct em_int_delay_info *, int, int);
279 /* Management and WOL Support */
280 static void	em_init_manageability(struct adapter *);
281 static void	em_release_manageability(struct adapter *);
282 static void     em_get_hw_control(struct adapter *);
283 static void     em_release_hw_control(struct adapter *);
284 static void	em_get_wakeup(device_t);
285 static void     em_enable_wakeup(device_t);
286 static int	em_enable_phy_wakeup(struct adapter *);
287 static void	em_led_func(void *, int);
288 static void	em_disable_aspm(struct adapter *);
289 
290 static int	em_irq_fast(void *);
291 
292 /* MSIX handlers */
293 static void	em_msix_tx(void *);
294 static void	em_msix_rx(void *);
295 static void	em_msix_link(void *);
296 static void	em_handle_tx(void *context, int pending);
297 static void	em_handle_rx(void *context, int pending);
298 static void	em_handle_link(void *context, int pending);
299 
300 static void	em_set_sysctl_value(struct adapter *, const char *,
301 		    const char *, int *, int);
302 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
303 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
304 
305 static __inline void em_rx_discard(struct rx_ring *, int);
306 
307 #ifdef DEVICE_POLLING
308 static poll_handler_t em_poll;
309 #endif /* POLLING */
310 
311 /*********************************************************************
312  *  FreeBSD Device Interface Entry Points
313  *********************************************************************/
314 
315 static device_method_t em_methods[] = {
316 	/* Device interface */
317 	DEVMETHOD(device_probe, em_probe),
318 	DEVMETHOD(device_attach, em_attach),
319 	DEVMETHOD(device_detach, em_detach),
320 	DEVMETHOD(device_shutdown, em_shutdown),
321 	DEVMETHOD(device_suspend, em_suspend),
322 	DEVMETHOD(device_resume, em_resume),
323 	DEVMETHOD_END
324 };
325 
326 static driver_t em_driver = {
327 	"em", em_methods, sizeof(struct adapter),
328 };
329 
330 devclass_t em_devclass;
331 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
332 MODULE_DEPEND(em, pci, 1, 1, 1);
333 MODULE_DEPEND(em, ether, 1, 1, 1);
334 
335 /*********************************************************************
336  *  Tunable default values.
337  *********************************************************************/
338 
339 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
340 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
341 #define M_TSO_LEN			66
342 
343 #define MAX_INTS_PER_SEC	8000
344 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
345 
346 /* Allow common code without TSO */
347 #ifndef CSUM_TSO
348 #define CSUM_TSO	0
349 #endif
350 
351 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
352 
353 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
354 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
355 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
356 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
357 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
358     0, "Default transmit interrupt delay in usecs");
359 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
360     0, "Default receive interrupt delay in usecs");
361 
362 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
363 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
364 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
365 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
366 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
367     &em_tx_abs_int_delay_dflt, 0,
368     "Default transmit interrupt delay limit in usecs");
369 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
370     &em_rx_abs_int_delay_dflt, 0,
371     "Default receive interrupt delay limit in usecs");
372 
373 static int em_rxd = EM_DEFAULT_RXD;
374 static int em_txd = EM_DEFAULT_TXD;
375 TUNABLE_INT("hw.em.rxd", &em_rxd);
376 TUNABLE_INT("hw.em.txd", &em_txd);
377 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
378     "Number of receive descriptors per queue");
379 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
380     "Number of transmit descriptors per queue");
381 
382 static int em_smart_pwr_down = FALSE;
383 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
384 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
385     0, "Set to true to leave smart power down enabled on newer adapters");
386 
387 /* Controls whether promiscuous also shows bad packets */
388 static int em_debug_sbp = FALSE;
389 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
390 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
391     "Show bad packets in promiscuous mode");
392 
393 static int em_enable_msix = TRUE;
394 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
395 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
396     "Enable MSI-X interrupts");
397 
398 /* How many packets rxeof tries to clean at a time */
399 static int em_rx_process_limit = 100;
400 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
401 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
402     &em_rx_process_limit, 0,
403     "Maximum number of received packets to process "
404     "at a time, -1 means unlimited");
405 
406 /* Energy efficient ethernet - default to OFF */
407 static int eee_setting = 1;
408 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
409 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
410     "Enable Energy Efficient Ethernet");
411 
412 /* Global used in WOL setup with multiport cards */
413 static int global_quad_port_a = 0;
414 
415 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
416 #include <dev/netmap/if_em_netmap.h>
417 #endif /* DEV_NETMAP */
418 
419 /*********************************************************************
420  *  Device identification routine
421  *
422  *  em_probe determines if the driver should be loaded on
423  *  adapter based on PCI vendor/device id of the adapter.
424  *
425  *  return BUS_PROBE_DEFAULT on success, positive on failure
426  *********************************************************************/
427 
428 static int
em_probe(device_t dev)429 em_probe(device_t dev)
430 {
431 	char		adapter_name[60];
432 	u16		pci_vendor_id = 0;
433 	u16		pci_device_id = 0;
434 	u16		pci_subvendor_id = 0;
435 	u16		pci_subdevice_id = 0;
436 	em_vendor_info_t *ent;
437 
438 	INIT_DEBUGOUT("em_probe: begin");
439 
440 	pci_vendor_id = pci_get_vendor(dev);
441 	if (pci_vendor_id != EM_VENDOR_ID)
442 		return (ENXIO);
443 
444 	pci_device_id = pci_get_device(dev);
445 	pci_subvendor_id = pci_get_subvendor(dev);
446 	pci_subdevice_id = pci_get_subdevice(dev);
447 
448 	ent = em_vendor_info_array;
449 	while (ent->vendor_id != 0) {
450 		if ((pci_vendor_id == ent->vendor_id) &&
451 		    (pci_device_id == ent->device_id) &&
452 
453 		    ((pci_subvendor_id == ent->subvendor_id) ||
454 		    (ent->subvendor_id == PCI_ANY_ID)) &&
455 
456 		    ((pci_subdevice_id == ent->subdevice_id) ||
457 		    (ent->subdevice_id == PCI_ANY_ID))) {
458 			sprintf(adapter_name, "%s %s",
459 				em_strings[ent->index],
460 				em_driver_version);
461 			device_set_desc_copy(dev, adapter_name);
462 			return (BUS_PROBE_DEFAULT);
463 		}
464 		ent++;
465 	}
466 
467 	return (ENXIO);
468 }
469 
470 /*********************************************************************
471  *  Device initialization routine
472  *
473  *  The attach entry point is called when the driver is being loaded.
474  *  This routine identifies the type of hardware, allocates all resources
475  *  and initializes the hardware.
476  *
477  *  return 0 on success, positive on failure
478  *********************************************************************/
479 
480 static int
em_attach(device_t dev)481 em_attach(device_t dev)
482 {
483 	struct adapter	*adapter;
484 	struct e1000_hw	*hw;
485 	int		error = 0;
486 
487 	INIT_DEBUGOUT("em_attach: begin");
488 
489 	if (resource_disabled("em", device_get_unit(dev))) {
490 		device_printf(dev, "Disabled by device hint\n");
491 		return (ENXIO);
492 	}
493 
494 	adapter = device_get_softc(dev);
495 	adapter->dev = adapter->osdep.dev = dev;
496 	hw = &adapter->hw;
497 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
498 
499 	/* SYSCTL stuff */
500 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
501 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
502 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
503 	    em_sysctl_nvm_info, "I", "NVM Information");
504 
505 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
506 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
507 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
508 	    em_sysctl_debug_info, "I", "Debug Information");
509 
510 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
511 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
512 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
513 	    em_set_flowcntl, "I", "Flow Control");
514 
515 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
516 
517 	/* Determine hardware and mac info */
518 	em_identify_hardware(adapter);
519 
520 	/* Setup PCI resources */
521 	if (em_allocate_pci_resources(adapter)) {
522 		device_printf(dev, "Allocation of PCI resources failed\n");
523 		error = ENXIO;
524 		goto err_pci;
525 	}
526 
527 	/*
528 	** For ICH8 and family we need to
529 	** map the flash memory, and this
530 	** must happen after the MAC is
531 	** identified
532 	*/
533 	if ((hw->mac.type == e1000_ich8lan) ||
534 	    (hw->mac.type == e1000_ich9lan) ||
535 	    (hw->mac.type == e1000_ich10lan) ||
536 	    (hw->mac.type == e1000_pchlan) ||
537 	    (hw->mac.type == e1000_pch2lan) ||
538 	    (hw->mac.type == e1000_pch_lpt)) {
539 		int rid = EM_BAR_TYPE_FLASH;
540 		adapter->flash = bus_alloc_resource_any(dev,
541 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
542 		if (adapter->flash == NULL) {
543 			device_printf(dev, "Mapping of Flash failed\n");
544 			error = ENXIO;
545 			goto err_pci;
546 		}
547 		/* This is used in the shared code */
548 		hw->flash_address = (u8 *)adapter->flash;
549 		adapter->osdep.flash_bus_space_tag =
550 		    rman_get_bustag(adapter->flash);
551 		adapter->osdep.flash_bus_space_handle =
552 		    rman_get_bushandle(adapter->flash);
553 	}
554 
555 	/* Do Shared Code initialization */
556 	if (e1000_setup_init_funcs(hw, TRUE)) {
557 		device_printf(dev, "Setup of Shared code failed\n");
558 		error = ENXIO;
559 		goto err_pci;
560 	}
561 
562 	e1000_get_bus_info(hw);
563 
564 	/* Set up some sysctls for the tunable interrupt delays */
565 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
566 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
567 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
568 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
569 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
570 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
571 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
572 	    "receive interrupt delay limit in usecs",
573 	    &adapter->rx_abs_int_delay,
574 	    E1000_REGISTER(hw, E1000_RADV),
575 	    em_rx_abs_int_delay_dflt);
576 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
577 	    "transmit interrupt delay limit in usecs",
578 	    &adapter->tx_abs_int_delay,
579 	    E1000_REGISTER(hw, E1000_TADV),
580 	    em_tx_abs_int_delay_dflt);
581 	em_add_int_delay_sysctl(adapter, "itr",
582 	    "interrupt delay limit in usecs/4",
583 	    &adapter->tx_itr,
584 	    E1000_REGISTER(hw, E1000_ITR),
585 	    DEFAULT_ITR);
586 
587 	/* Sysctl for limiting the amount of work done in the taskqueue */
588 	em_set_sysctl_value(adapter, "rx_processing_limit",
589 	    "max number of rx packets to process", &adapter->rx_process_limit,
590 	    em_rx_process_limit);
591 
592 	/*
593 	 * Validate number of transmit and receive descriptors. It
594 	 * must not exceed hardware maximum, and must be multiple
595 	 * of E1000_DBA_ALIGN.
596 	 */
597 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
598 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
599 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
600 		    EM_DEFAULT_TXD, em_txd);
601 		adapter->num_tx_desc = EM_DEFAULT_TXD;
602 	} else
603 		adapter->num_tx_desc = em_txd;
604 
605 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
606 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
607 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
608 		    EM_DEFAULT_RXD, em_rxd);
609 		adapter->num_rx_desc = EM_DEFAULT_RXD;
610 	} else
611 		adapter->num_rx_desc = em_rxd;
612 
613 	hw->mac.autoneg = DO_AUTO_NEG;
614 	hw->phy.autoneg_wait_to_complete = FALSE;
615 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
616 
617 	/* Copper options */
618 	if (hw->phy.media_type == e1000_media_type_copper) {
619 		hw->phy.mdix = AUTO_ALL_MODES;
620 		hw->phy.disable_polarity_correction = FALSE;
621 		hw->phy.ms_type = EM_MASTER_SLAVE;
622 	}
623 
624 	/*
625 	 * Set the frame limits assuming
626 	 * standard ethernet sized frames.
627 	 */
628 	adapter->hw.mac.max_frame_size =
629 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
630 
631 	/*
632 	 * This controls when hardware reports transmit completion
633 	 * status.
634 	 */
635 	hw->mac.report_tx_early = 1;
636 
637 	/*
638 	** Get queue/ring memory
639 	*/
640 	if (em_allocate_queues(adapter)) {
641 		error = ENOMEM;
642 		goto err_pci;
643 	}
644 
645 	/* Allocate multicast array memory. */
646 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
647 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
648 	if (adapter->mta == NULL) {
649 		device_printf(dev, "Can not allocate multicast setup array\n");
650 		error = ENOMEM;
651 		goto err_late;
652 	}
653 
654 	/* Check SOL/IDER usage */
655 	if (e1000_check_reset_block(hw))
656 		device_printf(dev, "PHY reset is blocked"
657 		    " due to SOL/IDER session.\n");
658 
659 	/* Sysctl for setting Energy Efficient Ethernet */
660 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
661 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
662 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
663 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
664 	    adapter, 0, em_sysctl_eee, "I",
665 	    "Disable Energy Efficient Ethernet");
666 
667 	/*
668 	** Start from a known state, this is
669 	** important in reading the nvm and
670 	** mac from that.
671 	*/
672 	e1000_reset_hw(hw);
673 
674 
675 	/* Make sure we have a good EEPROM before we read from it */
676 	if (e1000_validate_nvm_checksum(hw) < 0) {
677 		/*
678 		** Some PCI-E parts fail the first check due to
679 		** the link being in sleep state, call it again,
680 		** if it fails a second time its a real issue.
681 		*/
682 		if (e1000_validate_nvm_checksum(hw) < 0) {
683 			device_printf(dev,
684 			    "The EEPROM Checksum Is Not Valid\n");
685 			error = EIO;
686 			goto err_late;
687 		}
688 	}
689 
690 	/* Copy the permanent MAC address out of the EEPROM */
691 	if (e1000_read_mac_addr(hw) < 0) {
692 		device_printf(dev, "EEPROM read error while reading MAC"
693 		    " address\n");
694 		error = EIO;
695 		goto err_late;
696 	}
697 
698 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
699 		device_printf(dev, "Invalid MAC address\n");
700 		error = EIO;
701 		goto err_late;
702 	}
703 
704 	/* Disable ULP support */
705 	e1000_disable_ulp_lpt_lp(hw, TRUE);
706 
707 	/*
708 	**  Do interrupt configuration
709 	*/
710 	if (adapter->msix > 1) /* Do MSIX */
711 		error = em_allocate_msix(adapter);
712 	else  /* MSI or Legacy */
713 		error = em_allocate_legacy(adapter);
714 	if (error)
715 		goto err_late;
716 
717 	/*
718 	 * Get Wake-on-Lan and Management info for later use
719 	 */
720 	em_get_wakeup(dev);
721 
722 	/* Setup OS specific network interface */
723 	if (em_setup_interface(dev, adapter) != 0)
724 		goto err_late;
725 
726 	em_reset(adapter);
727 
728 	/* Initialize statistics */
729 	em_update_stats_counters(adapter);
730 
731 	hw->mac.get_link_status = 1;
732 	em_update_link_status(adapter);
733 
734 	/* Register for VLAN events */
735 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
736 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
737 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
738 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
739 
740 	em_add_hw_stats(adapter);
741 
742 	/* Non-AMT based hardware can now take control from firmware */
743 	if (adapter->has_manage && !adapter->has_amt)
744 		em_get_hw_control(adapter);
745 
746 	/* Tell the stack that the interface is not active */
747 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
748 	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
749 
750 	adapter->led_dev = led_create(em_led_func, adapter,
751 	    device_get_nameunit(dev));
752 #ifdef DEV_NETMAP
753 	em_netmap_attach(adapter);
754 #endif /* DEV_NETMAP */
755 
756 	INIT_DEBUGOUT("em_attach: end");
757 
758 	return (0);
759 
760 err_late:
761 	em_free_transmit_structures(adapter);
762 	em_free_receive_structures(adapter);
763 	em_release_hw_control(adapter);
764 	if (adapter->ifp != NULL)
765 		if_free(adapter->ifp);
766 err_pci:
767 	em_free_pci_resources(adapter);
768 	free(adapter->mta, M_DEVBUF);
769 	EM_CORE_LOCK_DESTROY(adapter);
770 
771 	return (error);
772 }
773 
774 /*********************************************************************
775  *  Device removal routine
776  *
777  *  The detach entry point is called when the driver is being removed.
778  *  This routine stops the adapter and deallocates all the resources
779  *  that were allocated for driver operation.
780  *
781  *  return 0 on success, positive on failure
782  *********************************************************************/
783 
784 static int
em_detach(device_t dev)785 em_detach(device_t dev)
786 {
787 	struct adapter	*adapter = device_get_softc(dev);
788 	struct ifnet	*ifp = adapter->ifp;
789 
790 	INIT_DEBUGOUT("em_detach: begin");
791 
792 	/* Make sure VLANS are not using driver */
793 	if (adapter->ifp->if_vlantrunk != NULL) {
794 		device_printf(dev,"Vlan in use, detach first\n");
795 		return (EBUSY);
796 	}
797 
798 #ifdef DEVICE_POLLING
799 	if (ifp->if_capenable & IFCAP_POLLING)
800 		ether_poll_deregister(ifp);
801 #endif
802 
803 	if (adapter->led_dev != NULL)
804 		led_destroy(adapter->led_dev);
805 
806 	EM_CORE_LOCK(adapter);
807 	adapter->in_detach = 1;
808 	em_stop(adapter);
809 	EM_CORE_UNLOCK(adapter);
810 	EM_CORE_LOCK_DESTROY(adapter);
811 
812 	e1000_phy_hw_reset(&adapter->hw);
813 
814 	em_release_manageability(adapter);
815 	em_release_hw_control(adapter);
816 
817 	/* Unregister VLAN events */
818 	if (adapter->vlan_attach != NULL)
819 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
820 	if (adapter->vlan_detach != NULL)
821 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
822 
823 	ether_ifdetach(adapter->ifp);
824 	callout_drain(&adapter->timer);
825 
826 #ifdef DEV_NETMAP
827 	netmap_detach(ifp);
828 #endif /* DEV_NETMAP */
829 
830 	em_free_pci_resources(adapter);
831 	bus_generic_detach(dev);
832 	if_free(ifp);
833 
834 	em_free_transmit_structures(adapter);
835 	em_free_receive_structures(adapter);
836 
837 	em_release_hw_control(adapter);
838 	free(adapter->mta, M_DEVBUF);
839 
840 	return (0);
841 }
842 
843 /*********************************************************************
844  *
845  *  Shutdown entry point
846  *
847  **********************************************************************/
848 
849 static int
em_shutdown(device_t dev)850 em_shutdown(device_t dev)
851 {
852 	return em_suspend(dev);
853 }
854 
855 /*
856  * Suspend/resume device methods.
857  */
858 static int
em_suspend(device_t dev)859 em_suspend(device_t dev)
860 {
861 	struct adapter *adapter = device_get_softc(dev);
862 
863 	EM_CORE_LOCK(adapter);
864 
865         em_release_manageability(adapter);
866 	em_release_hw_control(adapter);
867 	em_enable_wakeup(dev);
868 
869 	EM_CORE_UNLOCK(adapter);
870 
871 	return bus_generic_suspend(dev);
872 }
873 
874 static int
em_resume(device_t dev)875 em_resume(device_t dev)
876 {
877 	struct adapter *adapter = device_get_softc(dev);
878 	struct tx_ring	*txr = adapter->tx_rings;
879 	struct ifnet *ifp = adapter->ifp;
880 
881 	EM_CORE_LOCK(adapter);
882 	if (adapter->hw.mac.type == e1000_pch2lan)
883 		e1000_resume_workarounds_pchlan(&adapter->hw);
884 	em_init_locked(adapter);
885 	em_init_manageability(adapter);
886 
887 	if ((ifp->if_flags & IFF_UP) &&
888 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
889 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
890 			EM_TX_LOCK(txr);
891 #ifdef EM_MULTIQUEUE
892 			if (!drbr_empty(ifp, txr->br))
893 				em_mq_start_locked(ifp, txr, NULL);
894 #else
895 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
896 				em_start_locked(ifp, txr);
897 #endif
898 			EM_TX_UNLOCK(txr);
899 		}
900 	}
901 	EM_CORE_UNLOCK(adapter);
902 
903 	return bus_generic_resume(dev);
904 }
905 
906 
907 #ifdef EM_MULTIQUEUE
908 /*********************************************************************
909  *  Multiqueue Transmit routines
910  *
911  *  em_mq_start is called by the stack to initiate a transmit.
912  *  however, if busy the driver can queue the request rather
913  *  than do an immediate send. It is this that is an advantage
914  *  in this driver, rather than also having multiple tx queues.
915  **********************************************************************/
916 static int
em_mq_start_locked(struct ifnet * ifp,struct tx_ring * txr,struct mbuf * m)917 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
918 {
919 	struct adapter  *adapter = txr->adapter;
920         struct mbuf     *next;
921         int             err = 0, enq = 0;
922 
923 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
924 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
925 		if (m != NULL)
926 			err = drbr_enqueue(ifp, txr->br, m);
927 		return (err);
928 	}
929 
930 	enq = 0;
931 	if (m != NULL) {
932 		err = drbr_enqueue(ifp, txr->br, m);
933 		if (err)
934 			return (err);
935 	}
936 
937 	/* Process the queue */
938 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
939 		if ((err = em_xmit(txr, &next)) != 0) {
940 			if (next == NULL)
941 				drbr_advance(ifp, txr->br);
942 			else
943 				drbr_putback(ifp, txr->br, next);
944 			break;
945 		}
946 		drbr_advance(ifp, txr->br);
947 		enq++;
948 		ifp->if_obytes += next->m_pkthdr.len;
949 		if (next->m_flags & M_MCAST)
950 			ifp->if_omcasts++;
951 		ETHER_BPF_MTAP(ifp, next);
952 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
953                         break;
954 	}
955 
956 	if (enq > 0) {
957                 /* Set the watchdog */
958                 txr->queue_status = EM_QUEUE_WORKING;
959 		txr->watchdog_time = ticks;
960 	}
961 
962 	if (txr->tx_avail < EM_MAX_SCATTER)
963 		em_txeof(txr);
964 	if (txr->tx_avail < EM_MAX_SCATTER)
965 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
966 	return (err);
967 }
968 
969 /*
970 ** Multiqueue capable stack interface
971 */
972 static int
em_mq_start(struct ifnet * ifp,struct mbuf * m)973 em_mq_start(struct ifnet *ifp, struct mbuf *m)
974 {
975 	struct adapter	*adapter = ifp->if_softc;
976 	struct tx_ring	*txr = adapter->tx_rings;
977 	int 		error;
978 
979 	if (EM_TX_TRYLOCK(txr)) {
980 		error = em_mq_start_locked(ifp, txr, m);
981 		EM_TX_UNLOCK(txr);
982 	} else
983 		error = drbr_enqueue(ifp, txr->br, m);
984 
985 	return (error);
986 }
987 
988 /*
989 ** Flush all ring buffers
990 */
991 static void
em_qflush(struct ifnet * ifp)992 em_qflush(struct ifnet *ifp)
993 {
994 	struct adapter  *adapter = ifp->if_softc;
995 	struct tx_ring  *txr = adapter->tx_rings;
996 	struct mbuf     *m;
997 
998 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
999 		EM_TX_LOCK(txr);
1000 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1001 			m_freem(m);
1002 		EM_TX_UNLOCK(txr);
1003 	}
1004 	if_qflush(ifp);
1005 }
1006 #else  /* !EM_MULTIQUEUE */
1007 
1008 static void
em_start_locked(struct ifnet * ifp,struct tx_ring * txr)1009 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1010 {
1011 	struct adapter	*adapter = ifp->if_softc;
1012 	struct mbuf	*m_head;
1013 
1014 	EM_TX_LOCK_ASSERT(txr);
1015 
1016 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1017 	    IFF_DRV_RUNNING)
1018 		return;
1019 
1020 	if (!adapter->link_active)
1021 		return;
1022 
1023 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1024         	/* Call cleanup if number of TX descriptors low */
1025 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1026 			em_txeof(txr);
1027 		if (txr->tx_avail < EM_MAX_SCATTER) {
1028 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1029 			break;
1030 		}
1031                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1032 		if (m_head == NULL)
1033 			break;
1034 		/*
1035 		 *  Encapsulation can modify our pointer, and or make it
1036 		 *  NULL on failure.  In that event, we can't requeue.
1037 		 */
1038 		if (em_xmit(txr, &m_head)) {
1039 			if (m_head == NULL)
1040 				break;
1041 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1042 			break;
1043 		}
1044 
1045 		/* Send a copy of the frame to the BPF listener */
1046 		ETHER_BPF_MTAP(ifp, m_head);
1047 
1048 		/* Set timeout in case hardware has problems transmitting. */
1049 		txr->watchdog_time = ticks;
1050                 txr->queue_status = EM_QUEUE_WORKING;
1051 	}
1052 
1053 	return;
1054 }
1055 
1056 static void
em_start(struct ifnet * ifp)1057 em_start(struct ifnet *ifp)
1058 {
1059 	struct adapter	*adapter = ifp->if_softc;
1060 	struct tx_ring	*txr = adapter->tx_rings;
1061 
1062 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1063 		EM_TX_LOCK(txr);
1064 		em_start_locked(ifp, txr);
1065 		EM_TX_UNLOCK(txr);
1066 	}
1067 	return;
1068 }
1069 #endif /* EM_MULTIQUEUE */
1070 
1071 /*********************************************************************
1072  *  Ioctl entry point
1073  *
1074  *  em_ioctl is called when the user wants to configure the
1075  *  interface.
1076  *
1077  *  return 0 on success, positive on failure
1078  **********************************************************************/
1079 
1080 static int
em_ioctl(struct ifnet * ifp,u_long command,caddr_t data)1081 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1082 {
1083 	struct adapter	*adapter = ifp->if_softc;
1084 	struct ifreq	*ifr = (struct ifreq *)data;
1085 #if defined(INET) || defined(INET6)
1086 	struct ifaddr	*ifa = (struct ifaddr *)data;
1087 #endif
1088 	bool		avoid_reset = FALSE;
1089 	int		error = 0;
1090 
1091 	if (adapter->in_detach)
1092 		return (error);
1093 
1094 	switch (command) {
1095 	case SIOCSIFADDR:
1096 #ifdef INET
1097 		if (ifa->ifa_addr->sa_family == AF_INET)
1098 			avoid_reset = TRUE;
1099 #endif
1100 #ifdef INET6
1101 		if (ifa->ifa_addr->sa_family == AF_INET6)
1102 			avoid_reset = TRUE;
1103 #endif
1104 		/*
1105 		** Calling init results in link renegotiation,
1106 		** so we avoid doing it when possible.
1107 		*/
1108 		if (avoid_reset) {
1109 			ifp->if_flags |= IFF_UP;
1110 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1111 				em_init(adapter);
1112 #ifdef INET
1113 			if (!(ifp->if_flags & IFF_NOARP))
1114 				arp_ifinit(ifp, ifa);
1115 #endif
1116 		} else
1117 			error = ether_ioctl(ifp, command, data);
1118 		break;
1119 	case SIOCSIFMTU:
1120 	    {
1121 		int max_frame_size;
1122 
1123 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1124 
1125 		EM_CORE_LOCK(adapter);
1126 		switch (adapter->hw.mac.type) {
1127 		case e1000_82571:
1128 		case e1000_82572:
1129 		case e1000_ich9lan:
1130 		case e1000_ich10lan:
1131 		case e1000_pch2lan:
1132 		case e1000_pch_lpt:
1133 		case e1000_82574:
1134 		case e1000_82583:
1135 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1136 			max_frame_size = 9234;
1137 			break;
1138 		case e1000_pchlan:
1139 			max_frame_size = 4096;
1140 			break;
1141 			/* Adapters that do not support jumbo frames */
1142 		case e1000_ich8lan:
1143 			max_frame_size = ETHER_MAX_LEN;
1144 			break;
1145 		default:
1146 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1147 		}
1148 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1149 		    ETHER_CRC_LEN) {
1150 			EM_CORE_UNLOCK(adapter);
1151 			error = EINVAL;
1152 			break;
1153 		}
1154 
1155 		ifp->if_mtu = ifr->ifr_mtu;
1156 		adapter->hw.mac.max_frame_size =
1157 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1158 		em_init_locked(adapter);
1159 		EM_CORE_UNLOCK(adapter);
1160 		break;
1161 	    }
1162 	case SIOCSIFFLAGS:
1163 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1164 		    SIOCSIFFLAGS (Set Interface Flags)");
1165 		EM_CORE_LOCK(adapter);
1166 		if (ifp->if_flags & IFF_UP) {
1167 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1168 				if ((ifp->if_flags ^ adapter->if_flags) &
1169 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1170 					em_disable_promisc(adapter);
1171 					em_set_promisc(adapter);
1172 				}
1173 			} else
1174 				em_init_locked(adapter);
1175 		} else
1176 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1177 				em_stop(adapter);
1178 		adapter->if_flags = ifp->if_flags;
1179 		EM_CORE_UNLOCK(adapter);
1180 		break;
1181 	case SIOCADDMULTI:
1182 	case SIOCDELMULTI:
1183 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1184 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1185 			EM_CORE_LOCK(adapter);
1186 			em_disable_intr(adapter);
1187 			em_set_multi(adapter);
1188 #ifdef DEVICE_POLLING
1189 			if (!(ifp->if_capenable & IFCAP_POLLING))
1190 #endif
1191 				em_enable_intr(adapter);
1192 			EM_CORE_UNLOCK(adapter);
1193 		}
1194 		break;
1195 	case SIOCSIFMEDIA:
1196 		/* Check SOL/IDER usage */
1197 		EM_CORE_LOCK(adapter);
1198 		if (e1000_check_reset_block(&adapter->hw)) {
1199 			EM_CORE_UNLOCK(adapter);
1200 			device_printf(adapter->dev, "Media change is"
1201 			    " blocked due to SOL/IDER session.\n");
1202 			break;
1203 		}
1204 		EM_CORE_UNLOCK(adapter);
1205 		/* falls thru */
1206 	case SIOCGIFMEDIA:
1207 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1208 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1209 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1210 		break;
1211 	case SIOCSIFCAP:
1212 	    {
1213 		int mask, reinit;
1214 
1215 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1216 		reinit = 0;
1217 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1218 #ifdef DEVICE_POLLING
1219 		if (mask & IFCAP_POLLING) {
1220 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1221 				error = ether_poll_register(em_poll, ifp);
1222 				if (error)
1223 					return (error);
1224 				EM_CORE_LOCK(adapter);
1225 				em_disable_intr(adapter);
1226 				ifp->if_capenable |= IFCAP_POLLING;
1227 				EM_CORE_UNLOCK(adapter);
1228 			} else {
1229 				error = ether_poll_deregister(ifp);
1230 				/* Enable interrupt even in error case */
1231 				EM_CORE_LOCK(adapter);
1232 				em_enable_intr(adapter);
1233 				ifp->if_capenable &= ~IFCAP_POLLING;
1234 				EM_CORE_UNLOCK(adapter);
1235 			}
1236 		}
1237 #endif
1238 		if (mask & IFCAP_HWCSUM) {
1239 			ifp->if_capenable ^= IFCAP_HWCSUM;
1240 			reinit = 1;
1241 		}
1242 		if (mask & IFCAP_TSO4) {
1243 			ifp->if_capenable ^= IFCAP_TSO4;
1244 			reinit = 1;
1245 		}
1246 		if (mask & IFCAP_VLAN_HWTAGGING) {
1247 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1248 			reinit = 1;
1249 		}
1250 		if (mask & IFCAP_VLAN_HWFILTER) {
1251 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1252 			reinit = 1;
1253 		}
1254 		if (mask & IFCAP_VLAN_HWTSO) {
1255 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1256 			reinit = 1;
1257 		}
1258 		if ((mask & IFCAP_WOL) &&
1259 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1260 			if (mask & IFCAP_WOL_MCAST)
1261 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1262 			if (mask & IFCAP_WOL_MAGIC)
1263 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1264 		}
1265 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1266 			em_init(adapter);
1267 		VLAN_CAPABILITIES(ifp);
1268 		break;
1269 	    }
1270 
1271 	default:
1272 		error = ether_ioctl(ifp, command, data);
1273 		break;
1274 	}
1275 
1276 	return (error);
1277 }
1278 
1279 
1280 /*********************************************************************
1281  *  Init entry point
1282  *
1283  *  This routine is used in two ways. It is used by the stack as
1284  *  init entry point in network interface structure. It is also used
1285  *  by the driver as a hw/sw initialization routine to get to a
1286  *  consistent state.
1287  *
1288  *  return 0 on success, positive on failure
1289  **********************************************************************/
1290 
1291 static void
em_init_locked(struct adapter * adapter)1292 em_init_locked(struct adapter *adapter)
1293 {
1294 	struct ifnet	*ifp = adapter->ifp;
1295 	device_t	dev = adapter->dev;
1296 
1297 	INIT_DEBUGOUT("em_init: begin");
1298 
1299 	EM_CORE_LOCK_ASSERT(adapter);
1300 
1301 	em_disable_intr(adapter);
1302 	callout_stop(&adapter->timer);
1303 
1304 	/* Get the latest mac address, User can use a LAA */
1305         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1306               ETHER_ADDR_LEN);
1307 
1308 	/* Put the address into the Receive Address Array */
1309 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1310 
1311 	/*
1312 	 * With the 82571 adapter, RAR[0] may be overwritten
1313 	 * when the other port is reset, we make a duplicate
1314 	 * in RAR[14] for that eventuality, this assures
1315 	 * the interface continues to function.
1316 	 */
1317 	if (adapter->hw.mac.type == e1000_82571) {
1318 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1319 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1320 		    E1000_RAR_ENTRIES - 1);
1321 	}
1322 
1323 	/* Initialize the hardware */
1324 	em_reset(adapter);
1325 	em_update_link_status(adapter);
1326 
1327 	/* Setup VLAN support, basic and offload if available */
1328 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1329 
1330 	/* Set hardware offload abilities */
1331 	ifp->if_hwassist = 0;
1332 	if (ifp->if_capenable & IFCAP_TXCSUM)
1333 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1334 	if (ifp->if_capenable & IFCAP_TSO4)
1335 		ifp->if_hwassist |= CSUM_TSO;
1336 
1337 	/* Configure for OS presence */
1338 	em_init_manageability(adapter);
1339 
1340 	/* Prepare transmit descriptors and buffers */
1341 	em_setup_transmit_structures(adapter);
1342 	em_initialize_transmit_unit(adapter);
1343 
1344 	/* Setup Multicast table */
1345 	em_set_multi(adapter);
1346 
1347 	/*
1348 	** Figure out the desired mbuf
1349 	** pool for doing jumbos
1350 	*/
1351 	if (adapter->hw.mac.max_frame_size <= 2048)
1352 		adapter->rx_mbuf_sz = MCLBYTES;
1353 	else if (adapter->hw.mac.max_frame_size <= 4096)
1354 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1355 	else
1356 		adapter->rx_mbuf_sz = MJUM9BYTES;
1357 
1358 	/* Prepare receive descriptors and buffers */
1359 	if (em_setup_receive_structures(adapter)) {
1360 		device_printf(dev, "Could not setup receive structures\n");
1361 		em_stop(adapter);
1362 		return;
1363 	}
1364 	em_initialize_receive_unit(adapter);
1365 
1366 	/* Use real VLAN Filter support? */
1367 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1368 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1369 			/* Use real VLAN Filter support */
1370 			em_setup_vlan_hw_support(adapter);
1371 		else {
1372 			u32 ctrl;
1373 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1374 			ctrl |= E1000_CTRL_VME;
1375 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1376 		}
1377 	}
1378 
1379 	/* Don't lose promiscuous settings */
1380 	em_set_promisc(adapter);
1381 
1382 	/* Set the interface as ACTIVE */
1383 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1384 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1385 
1386 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1387 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1388 
1389 	/* MSI/X configuration for 82574 */
1390 	if (adapter->hw.mac.type == e1000_82574) {
1391 		int tmp;
1392 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1393 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1394 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1395 		/* Set the IVAR - interrupt vector routing. */
1396 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1397 	}
1398 
1399 #ifdef DEVICE_POLLING
1400 	/*
1401 	 * Only enable interrupts if we are not polling, make sure
1402 	 * they are off otherwise.
1403 	 */
1404 	if (ifp->if_capenable & IFCAP_POLLING)
1405 		em_disable_intr(adapter);
1406 	else
1407 #endif /* DEVICE_POLLING */
1408 		em_enable_intr(adapter);
1409 
1410 	/* AMT based hardware can now take control from firmware */
1411 	if (adapter->has_manage && adapter->has_amt)
1412 		em_get_hw_control(adapter);
1413 }
1414 
1415 static void
em_init(void * arg)1416 em_init(void *arg)
1417 {
1418 	struct adapter *adapter = arg;
1419 
1420 	EM_CORE_LOCK(adapter);
1421 	em_init_locked(adapter);
1422 	EM_CORE_UNLOCK(adapter);
1423 }
1424 
1425 
1426 #ifdef DEVICE_POLLING
1427 /*********************************************************************
1428  *
1429  *  Legacy polling routine: note this only works with single queue
1430  *
1431  *********************************************************************/
1432 static int
em_poll(struct ifnet * ifp,enum poll_cmd cmd,int count)1433 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1434 {
1435 	struct adapter *adapter = ifp->if_softc;
1436 	struct tx_ring	*txr = adapter->tx_rings;
1437 	struct rx_ring	*rxr = adapter->rx_rings;
1438 	u32		reg_icr;
1439 	int		rx_done;
1440 
1441 	EM_CORE_LOCK(adapter);
1442 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1443 		EM_CORE_UNLOCK(adapter);
1444 		return (0);
1445 	}
1446 
1447 	if (cmd == POLL_AND_CHECK_STATUS) {
1448 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1449 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1450 			callout_stop(&adapter->timer);
1451 			adapter->hw.mac.get_link_status = 1;
1452 			em_update_link_status(adapter);
1453 			callout_reset(&adapter->timer, hz,
1454 			    em_local_timer, adapter);
1455 		}
1456 	}
1457 	EM_CORE_UNLOCK(adapter);
1458 
1459 	em_rxeof(rxr, count, &rx_done);
1460 
1461 	EM_TX_LOCK(txr);
1462 	em_txeof(txr);
1463 #ifdef EM_MULTIQUEUE
1464 	if (!drbr_empty(ifp, txr->br))
1465 		em_mq_start_locked(ifp, txr, NULL);
1466 #else
1467 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1468 		em_start_locked(ifp, txr);
1469 #endif
1470 	EM_TX_UNLOCK(txr);
1471 
1472 	return (rx_done);
1473 }
1474 #endif /* DEVICE_POLLING */
1475 
1476 
1477 /*********************************************************************
1478  *
1479  *  Fast Legacy/MSI Combined Interrupt Service routine
1480  *
1481  *********************************************************************/
1482 static int
em_irq_fast(void * arg)1483 em_irq_fast(void *arg)
1484 {
1485 	struct adapter	*adapter = arg;
1486 	struct ifnet	*ifp;
1487 	u32		reg_icr;
1488 
1489 	ifp = adapter->ifp;
1490 
1491 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1492 
1493 	/* Hot eject?  */
1494 	if (reg_icr == 0xffffffff)
1495 		return FILTER_STRAY;
1496 
1497 	/* Definitely not our interrupt.  */
1498 	if (reg_icr == 0x0)
1499 		return FILTER_STRAY;
1500 
1501 	/*
1502 	 * Starting with the 82571 chip, bit 31 should be used to
1503 	 * determine whether the interrupt belongs to us.
1504 	 */
1505 	if (adapter->hw.mac.type >= e1000_82571 &&
1506 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1507 		return FILTER_STRAY;
1508 
1509 	em_disable_intr(adapter);
1510 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1511 
1512 	/* Link status change */
1513 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1514 		adapter->hw.mac.get_link_status = 1;
1515 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1516 	}
1517 
1518 	if (reg_icr & E1000_ICR_RXO)
1519 		adapter->rx_overruns++;
1520 	return FILTER_HANDLED;
1521 }
1522 
1523 /* Combined RX/TX handler, used by Legacy and MSI */
1524 static void
em_handle_que(void * context,int pending)1525 em_handle_que(void *context, int pending)
1526 {
1527 	struct adapter	*adapter = context;
1528 	struct ifnet	*ifp = adapter->ifp;
1529 	struct tx_ring	*txr = adapter->tx_rings;
1530 	struct rx_ring	*rxr = adapter->rx_rings;
1531 
1532 
1533 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1534 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1535 		EM_TX_LOCK(txr);
1536 		em_txeof(txr);
1537 #ifdef EM_MULTIQUEUE
1538 		if (!drbr_empty(ifp, txr->br))
1539 			em_mq_start_locked(ifp, txr, NULL);
1540 #else
1541 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1542 			em_start_locked(ifp, txr);
1543 #endif
1544 		EM_TX_UNLOCK(txr);
1545 		if (more) {
1546 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1547 			return;
1548 		}
1549 	}
1550 
1551 	em_enable_intr(adapter);
1552 	return;
1553 }
1554 
1555 
1556 /*********************************************************************
1557  *
1558  *  MSIX Interrupt Service Routines
1559  *
1560  **********************************************************************/
1561 static void
em_msix_tx(void * arg)1562 em_msix_tx(void *arg)
1563 {
1564 	struct tx_ring *txr = arg;
1565 	struct adapter *adapter = txr->adapter;
1566 	struct ifnet	*ifp = adapter->ifp;
1567 
1568 	++txr->tx_irq;
1569 	EM_TX_LOCK(txr);
1570 	em_txeof(txr);
1571 #ifdef EM_MULTIQUEUE
1572 	if (!drbr_empty(ifp, txr->br))
1573 		em_mq_start_locked(ifp, txr, NULL);
1574 #else
1575 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1576 		em_start_locked(ifp, txr);
1577 #endif
1578 	/* Reenable this interrupt */
1579 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1580 	EM_TX_UNLOCK(txr);
1581 	return;
1582 }
1583 
1584 /*********************************************************************
1585  *
1586  *  MSIX RX Interrupt Service routine
1587  *
1588  **********************************************************************/
1589 
1590 static void
em_msix_rx(void * arg)1591 em_msix_rx(void *arg)
1592 {
1593 	struct rx_ring	*rxr = arg;
1594 	struct adapter	*adapter = rxr->adapter;
1595 	bool		more;
1596 
1597 	++rxr->rx_irq;
1598 	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1599 		return;
1600 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1601 	if (more)
1602 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1603 	else
1604 		/* Reenable this interrupt */
1605 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1606 	return;
1607 }
1608 
1609 /*********************************************************************
1610  *
1611  *  MSIX Link Fast Interrupt Service routine
1612  *
1613  **********************************************************************/
1614 static void
em_msix_link(void * arg)1615 em_msix_link(void *arg)
1616 {
1617 	struct adapter	*adapter = arg;
1618 	u32		reg_icr;
1619 
1620 	++adapter->link_irq;
1621 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1622 
1623 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1624 		adapter->hw.mac.get_link_status = 1;
1625 		em_handle_link(adapter, 0);
1626 	} else
1627 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1628 		    EM_MSIX_LINK | E1000_IMS_LSC);
1629 	return;
1630 }
1631 
1632 static void
em_handle_rx(void * context,int pending)1633 em_handle_rx(void *context, int pending)
1634 {
1635 	struct rx_ring	*rxr = context;
1636 	struct adapter	*adapter = rxr->adapter;
1637         bool            more;
1638 
1639 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1640 	if (more)
1641 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1642 	else
1643 		/* Reenable this interrupt */
1644 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1645 }
1646 
1647 static void
em_handle_tx(void * context,int pending)1648 em_handle_tx(void *context, int pending)
1649 {
1650 	struct tx_ring	*txr = context;
1651 	struct adapter	*adapter = txr->adapter;
1652 	struct ifnet	*ifp = adapter->ifp;
1653 
1654 	EM_TX_LOCK(txr);
1655 	em_txeof(txr);
1656 #ifdef EM_MULTIQUEUE
1657 	if (!drbr_empty(ifp, txr->br))
1658 		em_mq_start_locked(ifp, txr, NULL);
1659 #else
1660 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1661 		em_start_locked(ifp, txr);
1662 #endif
1663 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1664 	EM_TX_UNLOCK(txr);
1665 }
1666 
1667 static void
em_handle_link(void * context,int pending)1668 em_handle_link(void *context, int pending)
1669 {
1670 	struct adapter	*adapter = context;
1671 	struct tx_ring	*txr = adapter->tx_rings;
1672 	struct ifnet *ifp = adapter->ifp;
1673 
1674 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1675 		return;
1676 
1677 	EM_CORE_LOCK(adapter);
1678 	callout_stop(&adapter->timer);
1679 	em_update_link_status(adapter);
1680 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1681 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1682 	    EM_MSIX_LINK | E1000_IMS_LSC);
1683 	if (adapter->link_active) {
1684 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1685 			EM_TX_LOCK(txr);
1686 #ifdef EM_MULTIQUEUE
1687 			if (!drbr_empty(ifp, txr->br))
1688 				em_mq_start_locked(ifp, txr, NULL);
1689 #else
1690 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1691 				em_start_locked(ifp, txr);
1692 #endif
1693 			EM_TX_UNLOCK(txr);
1694 		}
1695 	}
1696 	EM_CORE_UNLOCK(adapter);
1697 }
1698 
1699 
1700 /*********************************************************************
1701  *
1702  *  Media Ioctl callback
1703  *
1704  *  This routine is called whenever the user queries the status of
1705  *  the interface using ifconfig.
1706  *
1707  **********************************************************************/
1708 static void
em_media_status(struct ifnet * ifp,struct ifmediareq * ifmr)1709 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1710 {
1711 	struct adapter *adapter = ifp->if_softc;
1712 	u_char fiber_type = IFM_1000_SX;
1713 
1714 	INIT_DEBUGOUT("em_media_status: begin");
1715 
1716 	EM_CORE_LOCK(adapter);
1717 	em_update_link_status(adapter);
1718 
1719 	ifmr->ifm_status = IFM_AVALID;
1720 	ifmr->ifm_active = IFM_ETHER;
1721 
1722 	if (!adapter->link_active) {
1723 		EM_CORE_UNLOCK(adapter);
1724 		return;
1725 	}
1726 
1727 	ifmr->ifm_status |= IFM_ACTIVE;
1728 
1729 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1730 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1731 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1732 	} else {
1733 		switch (adapter->link_speed) {
1734 		case 10:
1735 			ifmr->ifm_active |= IFM_10_T;
1736 			break;
1737 		case 100:
1738 			ifmr->ifm_active |= IFM_100_TX;
1739 			break;
1740 		case 1000:
1741 			ifmr->ifm_active |= IFM_1000_T;
1742 			break;
1743 		}
1744 		if (adapter->link_duplex == FULL_DUPLEX)
1745 			ifmr->ifm_active |= IFM_FDX;
1746 		else
1747 			ifmr->ifm_active |= IFM_HDX;
1748 	}
1749 	EM_CORE_UNLOCK(adapter);
1750 }
1751 
1752 /*********************************************************************
1753  *
1754  *  Media Ioctl callback
1755  *
1756  *  This routine is called when the user changes speed/duplex using
1757  *  media/mediopt option with ifconfig.
1758  *
1759  **********************************************************************/
1760 static int
em_media_change(struct ifnet * ifp)1761 em_media_change(struct ifnet *ifp)
1762 {
1763 	struct adapter *adapter = ifp->if_softc;
1764 	struct ifmedia  *ifm = &adapter->media;
1765 
1766 	INIT_DEBUGOUT("em_media_change: begin");
1767 
1768 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1769 		return (EINVAL);
1770 
1771 	EM_CORE_LOCK(adapter);
1772 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1773 	case IFM_AUTO:
1774 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1775 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1776 		break;
1777 	case IFM_1000_LX:
1778 	case IFM_1000_SX:
1779 	case IFM_1000_T:
1780 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1781 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1782 		break;
1783 	case IFM_100_TX:
1784 		adapter->hw.mac.autoneg = FALSE;
1785 		adapter->hw.phy.autoneg_advertised = 0;
1786 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1787 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1788 		else
1789 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1790 		break;
1791 	case IFM_10_T:
1792 		adapter->hw.mac.autoneg = FALSE;
1793 		adapter->hw.phy.autoneg_advertised = 0;
1794 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1795 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1796 		else
1797 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1798 		break;
1799 	default:
1800 		device_printf(adapter->dev, "Unsupported media type\n");
1801 	}
1802 
1803 	em_init_locked(adapter);
1804 	EM_CORE_UNLOCK(adapter);
1805 
1806 	return (0);
1807 }
1808 
1809 /*********************************************************************
1810  *
1811  *  This routine maps the mbufs to tx descriptors.
1812  *
1813  *  return 0 on success, positive on failure
1814  **********************************************************************/
1815 
1816 static int
em_xmit(struct tx_ring * txr,struct mbuf ** m_headp)1817 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1818 {
1819 	struct adapter		*adapter = txr->adapter;
1820 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1821 	bus_dmamap_t		map;
1822 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1823 	struct e1000_tx_desc	*ctxd = NULL;
1824 	struct mbuf		*m_head;
1825 	struct ether_header	*eh;
1826 	struct ip		*ip = NULL;
1827 	struct tcphdr		*tp = NULL;
1828 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1829 	int			ip_off, poff;
1830 	int			nsegs, i, j, first, last = 0;
1831 	int			error, do_tso, tso_desc = 0, remap = 1;
1832 
1833 	m_head = *m_headp;
1834 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1835 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1836 	ip_off = poff = 0;
1837 
1838 	/*
1839 	 * Intel recommends entire IP/TCP header length reside in a single
1840 	 * buffer. If multiple descriptors are used to describe the IP and
1841 	 * TCP header, each descriptor should describe one or more
1842 	 * complete headers; descriptors referencing only parts of headers
1843 	 * are not supported. If all layer headers are not coalesced into
1844 	 * a single buffer, each buffer should not cross a 4KB boundary,
1845 	 * or be larger than the maximum read request size.
1846 	 * Controller also requires modifing IP/TCP header to make TSO work
1847 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1848 	 * IP/TCP header into a single buffer to meet the requirement of
1849 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1850 	 * which also has similiar restrictions.
1851 	 */
1852 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1853 		if (do_tso || (m_head->m_next != NULL &&
1854 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1855 			if (M_WRITABLE(*m_headp) == 0) {
1856 				m_head = m_dup(*m_headp, M_NOWAIT);
1857 				m_freem(*m_headp);
1858 				if (m_head == NULL) {
1859 					*m_headp = NULL;
1860 					return (ENOBUFS);
1861 				}
1862 				*m_headp = m_head;
1863 			}
1864 		}
1865 		/*
1866 		 * XXX
1867 		 * Assume IPv4, we don't have TSO/checksum offload support
1868 		 * for IPv6 yet.
1869 		 */
1870 		ip_off = sizeof(struct ether_header);
1871 		m_head = m_pullup(m_head, ip_off);
1872 		if (m_head == NULL) {
1873 			*m_headp = NULL;
1874 			return (ENOBUFS);
1875 		}
1876 		eh = mtod(m_head, struct ether_header *);
1877 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1878 			ip_off = sizeof(struct ether_vlan_header);
1879 			m_head = m_pullup(m_head, ip_off);
1880 			if (m_head == NULL) {
1881 				*m_headp = NULL;
1882 				return (ENOBUFS);
1883 			}
1884 		}
1885 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1886 		if (m_head == NULL) {
1887 			*m_headp = NULL;
1888 			return (ENOBUFS);
1889 		}
1890 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1891 		poff = ip_off + (ip->ip_hl << 2);
1892 		if (do_tso) {
1893 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1894 			if (m_head == NULL) {
1895 				*m_headp = NULL;
1896 				return (ENOBUFS);
1897 			}
1898 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1899 			/*
1900 			 * TSO workaround:
1901 			 *   pull 4 more bytes of data into it.
1902 			 */
1903 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1904 			if (m_head == NULL) {
1905 				*m_headp = NULL;
1906 				return (ENOBUFS);
1907 			}
1908 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1909 			ip->ip_len = 0;
1910 			ip->ip_sum = 0;
1911 			/*
1912 			 * The pseudo TCP checksum does not include TCP payload
1913 			 * length so driver should recompute the checksum here
1914 			 * what hardware expect to see. This is adherence of
1915 			 * Microsoft's Large Send specification.
1916 			 */
1917 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1918 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1919 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1920 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1921 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1922 			if (m_head == NULL) {
1923 				*m_headp = NULL;
1924 				return (ENOBUFS);
1925 			}
1926 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1927 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1928 			if (m_head == NULL) {
1929 				*m_headp = NULL;
1930 				return (ENOBUFS);
1931 			}
1932 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1933 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1934 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1935 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1936 			if (m_head == NULL) {
1937 				*m_headp = NULL;
1938 				return (ENOBUFS);
1939 			}
1940 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1941 		}
1942 		*m_headp = m_head;
1943 	}
1944 
1945 	/*
1946 	 * Map the packet for DMA
1947 	 *
1948 	 * Capture the first descriptor index,
1949 	 * this descriptor will have the index
1950 	 * of the EOP which is the only one that
1951 	 * now gets a DONE bit writeback.
1952 	 */
1953 	first = txr->next_avail_desc;
1954 	tx_buffer = &txr->tx_buffers[first];
1955 	tx_buffer_mapped = tx_buffer;
1956 	map = tx_buffer->map;
1957 
1958 retry:
1959 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1960 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1961 
1962 	/*
1963 	 * There are two types of errors we can (try) to handle:
1964 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1965 	 *   out of segments.  Defragment the mbuf chain and try again.
1966 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1967 	 *   at this point in time.  Defer sending and try again later.
1968 	 * All other errors, in particular EINVAL, are fatal and prevent the
1969 	 * mbuf chain from ever going through.  Drop it and report error.
1970 	 */
1971 	if (error == EFBIG && remap) {
1972 		struct mbuf *m;
1973 
1974 		m = m_defrag(*m_headp, M_NOWAIT);
1975 		if (m == NULL) {
1976 			adapter->mbuf_alloc_failed++;
1977 			m_freem(*m_headp);
1978 			*m_headp = NULL;
1979 			return (ENOBUFS);
1980 		}
1981 		*m_headp = m;
1982 
1983 		/* Try it again, but only once */
1984 		remap = 0;
1985 		goto retry;
1986 	} else if (error == ENOMEM) {
1987 		adapter->no_tx_dma_setup++;
1988 		return (error);
1989 	} else if (error != 0) {
1990 		adapter->no_tx_dma_setup++;
1991 		m_freem(*m_headp);
1992 		*m_headp = NULL;
1993 		return (error);
1994 	}
1995 
1996 	/*
1997 	 * TSO Hardware workaround, if this packet is not
1998 	 * TSO, and is only a single descriptor long, and
1999 	 * it follows a TSO burst, then we need to add a
2000 	 * sentinel descriptor to prevent premature writeback.
2001 	 */
2002 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
2003 		if (nsegs == 1)
2004 			tso_desc = TRUE;
2005 		txr->tx_tso = FALSE;
2006 	}
2007 
2008         if (nsegs > (txr->tx_avail - 2)) {
2009                 txr->no_desc_avail++;
2010 		bus_dmamap_unload(txr->txtag, map);
2011 		return (ENOBUFS);
2012         }
2013 	m_head = *m_headp;
2014 
2015 	/* Do hardware assists */
2016 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2017 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2018 		    &txd_upper, &txd_lower);
2019 		/* we need to make a final sentinel transmit desc */
2020 		tso_desc = TRUE;
2021 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2022 		em_transmit_checksum_setup(txr, m_head,
2023 		    ip_off, ip, &txd_upper, &txd_lower);
2024 
2025 	if (m_head->m_flags & M_VLANTAG) {
2026 		/* Set the vlan id. */
2027 		txd_upper |=
2028 		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2029                 /* Tell hardware to add tag */
2030                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2031         }
2032 
2033 	i = txr->next_avail_desc;
2034 
2035 	/* Set up our transmit descriptors */
2036 	for (j = 0; j < nsegs; j++) {
2037 		bus_size_t seg_len;
2038 		bus_addr_t seg_addr;
2039 
2040 		tx_buffer = &txr->tx_buffers[i];
2041 		ctxd = &txr->tx_base[i];
2042 		seg_addr = segs[j].ds_addr;
2043 		seg_len  = segs[j].ds_len;
2044 		/*
2045 		** TSO Workaround:
2046 		** If this is the last descriptor, we want to
2047 		** split it so we have a small final sentinel
2048 		*/
2049 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2050 			seg_len -= 4;
2051 			ctxd->buffer_addr = htole64(seg_addr);
2052 			ctxd->lower.data = htole32(
2053 			adapter->txd_cmd | txd_lower | seg_len);
2054 			ctxd->upper.data =
2055 			    htole32(txd_upper);
2056 			if (++i == adapter->num_tx_desc)
2057 				i = 0;
2058 			/* Now make the sentinel */
2059 			++txd_used; /* using an extra txd */
2060 			ctxd = &txr->tx_base[i];
2061 			tx_buffer = &txr->tx_buffers[i];
2062 			ctxd->buffer_addr =
2063 			    htole64(seg_addr + seg_len);
2064 			ctxd->lower.data = htole32(
2065 			adapter->txd_cmd | txd_lower | 4);
2066 			ctxd->upper.data =
2067 			    htole32(txd_upper);
2068 			last = i;
2069 			if (++i == adapter->num_tx_desc)
2070 				i = 0;
2071 		} else {
2072 			ctxd->buffer_addr = htole64(seg_addr);
2073 			ctxd->lower.data = htole32(
2074 			adapter->txd_cmd | txd_lower | seg_len);
2075 			ctxd->upper.data =
2076 			    htole32(txd_upper);
2077 			last = i;
2078 			if (++i == adapter->num_tx_desc)
2079 				i = 0;
2080 		}
2081 		tx_buffer->m_head = NULL;
2082 		tx_buffer->next_eop = -1;
2083 	}
2084 
2085 	txr->next_avail_desc = i;
2086 	txr->tx_avail -= nsegs;
2087 	if (tso_desc) /* TSO used an extra for sentinel */
2088 		txr->tx_avail -= txd_used;
2089 
2090         tx_buffer->m_head = m_head;
2091 	/*
2092 	** Here we swap the map so the last descriptor,
2093 	** which gets the completion interrupt has the
2094 	** real map, and the first descriptor gets the
2095 	** unused map from this descriptor.
2096 	*/
2097 	tx_buffer_mapped->map = tx_buffer->map;
2098 	tx_buffer->map = map;
2099         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2100 
2101         /*
2102          * Last Descriptor of Packet
2103 	 * needs End Of Packet (EOP)
2104 	 * and Report Status (RS)
2105          */
2106         ctxd->lower.data |=
2107 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2108 	/*
2109 	 * Keep track in the first buffer which
2110 	 * descriptor will be written back
2111 	 */
2112 	tx_buffer = &txr->tx_buffers[first];
2113 	tx_buffer->next_eop = last;
2114 	/* Update the watchdog time early and often */
2115 	txr->watchdog_time = ticks;
2116 
2117 	/*
2118 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2119 	 * that this frame is available to transmit.
2120 	 */
2121 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2122 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2123 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2124 
2125 	return (0);
2126 }
2127 
2128 static void
em_set_promisc(struct adapter * adapter)2129 em_set_promisc(struct adapter *adapter)
2130 {
2131 	struct ifnet	*ifp = adapter->ifp;
2132 	u32		reg_rctl;
2133 
2134 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2135 
2136 	if (ifp->if_flags & IFF_PROMISC) {
2137 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2138 		/* Turn this on if you want to see bad packets */
2139 		if (em_debug_sbp)
2140 			reg_rctl |= E1000_RCTL_SBP;
2141 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2142 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2143 		reg_rctl |= E1000_RCTL_MPE;
2144 		reg_rctl &= ~E1000_RCTL_UPE;
2145 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2146 	}
2147 }
2148 
2149 static void
em_disable_promisc(struct adapter * adapter)2150 em_disable_promisc(struct adapter *adapter)
2151 {
2152 	struct ifnet	*ifp = adapter->ifp;
2153 	u32		reg_rctl;
2154 	int		mcnt = 0;
2155 
2156 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2157 	reg_rctl &=  (~E1000_RCTL_UPE);
2158 	if (ifp->if_flags & IFF_ALLMULTI)
2159 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2160 	else {
2161 		struct  ifmultiaddr *ifma;
2162 #if __FreeBSD_version < 800000
2163 		IF_ADDR_LOCK(ifp);
2164 #else
2165 		if_maddr_rlock(ifp);
2166 #endif
2167 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2168 			if (ifma->ifma_addr->sa_family != AF_LINK)
2169 				continue;
2170 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2171 				break;
2172 			mcnt++;
2173 		}
2174 #if __FreeBSD_version < 800000
2175 		IF_ADDR_UNLOCK(ifp);
2176 #else
2177 		if_maddr_runlock(ifp);
2178 #endif
2179 	}
2180 	/* Don't disable if in MAX groups */
2181 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2182 		reg_rctl &=  (~E1000_RCTL_MPE);
2183 	reg_rctl &=  (~E1000_RCTL_SBP);
2184 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2185 }
2186 
2187 
2188 /*********************************************************************
2189  *  Multicast Update
2190  *
2191  *  This routine is called whenever multicast address list is updated.
2192  *
2193  **********************************************************************/
2194 
2195 static void
em_set_multi(struct adapter * adapter)2196 em_set_multi(struct adapter *adapter)
2197 {
2198 	struct ifnet	*ifp = adapter->ifp;
2199 	struct ifmultiaddr *ifma;
2200 	u32 reg_rctl = 0;
2201 	u8  *mta; /* Multicast array memory */
2202 	int mcnt = 0;
2203 
2204 	IOCTL_DEBUGOUT("em_set_multi: begin");
2205 
2206 	mta = adapter->mta;
2207 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2208 
2209 	if (adapter->hw.mac.type == e1000_82542 &&
2210 	    adapter->hw.revision_id == E1000_REVISION_2) {
2211 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2212 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2213 			e1000_pci_clear_mwi(&adapter->hw);
2214 		reg_rctl |= E1000_RCTL_RST;
2215 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2216 		msec_delay(5);
2217 	}
2218 
2219 #if __FreeBSD_version < 800000
2220 	IF_ADDR_LOCK(ifp);
2221 #else
2222 	if_maddr_rlock(ifp);
2223 #endif
2224 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2225 		if (ifma->ifma_addr->sa_family != AF_LINK)
2226 			continue;
2227 
2228 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2229 			break;
2230 
2231 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2232 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2233 		mcnt++;
2234 	}
2235 #if __FreeBSD_version < 800000
2236 	IF_ADDR_UNLOCK(ifp);
2237 #else
2238 	if_maddr_runlock(ifp);
2239 #endif
2240 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2241 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2242 		reg_rctl |= E1000_RCTL_MPE;
2243 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2244 	} else
2245 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2246 
2247 	if (adapter->hw.mac.type == e1000_82542 &&
2248 	    adapter->hw.revision_id == E1000_REVISION_2) {
2249 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2250 		reg_rctl &= ~E1000_RCTL_RST;
2251 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2252 		msec_delay(5);
2253 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2254 			e1000_pci_set_mwi(&adapter->hw);
2255 	}
2256 }
2257 
2258 
2259 /*********************************************************************
2260  *  Timer routine
2261  *
2262  *  This routine checks for link status and updates statistics.
2263  *
2264  **********************************************************************/
2265 
2266 static void
em_local_timer(void * arg)2267 em_local_timer(void *arg)
2268 {
2269 	struct adapter	*adapter = arg;
2270 	struct ifnet	*ifp = adapter->ifp;
2271 	struct tx_ring	*txr = adapter->tx_rings;
2272 	struct rx_ring	*rxr = adapter->rx_rings;
2273 	u32		trigger;
2274 
2275 	EM_CORE_LOCK_ASSERT(adapter);
2276 
2277 	em_update_link_status(adapter);
2278 	em_update_stats_counters(adapter);
2279 
2280 	/* Reset LAA into RAR[0] on 82571 */
2281 	if ((adapter->hw.mac.type == e1000_82571) &&
2282 	    e1000_get_laa_state_82571(&adapter->hw))
2283 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2284 
2285 	/* Mask to use in the irq trigger */
2286 	if (adapter->msix_mem)
2287 		trigger = rxr->ims;
2288 	else
2289 		trigger = E1000_ICS_RXDMT0;
2290 
2291 	/*
2292 	** Check on the state of the TX queue(s), this
2293 	** can be done without the lock because its RO
2294 	** and the HUNG state will be static if set.
2295 	*/
2296 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2297 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2298 		    (adapter->pause_frames == 0))
2299 			goto hung;
2300 		/* Schedule a TX tasklet if needed */
2301 		if (txr->tx_avail <= EM_MAX_SCATTER)
2302 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2303 	}
2304 
2305 	adapter->pause_frames = 0;
2306 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2307 #ifndef DEVICE_POLLING
2308 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2309 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2310 #endif
2311 	return;
2312 hung:
2313 	/* Looks like we're hung */
2314 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2315 	device_printf(adapter->dev,
2316 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2317 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2318 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2319 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2320 	    "Next TX to Clean = %d\n",
2321 	    txr->me, txr->tx_avail, txr->next_to_clean);
2322 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2323 	adapter->watchdog_events++;
2324 	adapter->pause_frames = 0;
2325 	em_init_locked(adapter);
2326 }
2327 
2328 
2329 static void
em_update_link_status(struct adapter * adapter)2330 em_update_link_status(struct adapter *adapter)
2331 {
2332 	struct e1000_hw *hw = &adapter->hw;
2333 	struct ifnet *ifp = adapter->ifp;
2334 	device_t dev = adapter->dev;
2335 	struct tx_ring *txr = adapter->tx_rings;
2336 	u32 link_check = 0;
2337 
2338 	/* Get the cached link value or read phy for real */
2339 	switch (hw->phy.media_type) {
2340 	case e1000_media_type_copper:
2341 		if (hw->mac.get_link_status) {
2342 			/* Do the work to read phy */
2343 			e1000_check_for_link(hw);
2344 			link_check = !hw->mac.get_link_status;
2345 			if (link_check) /* ESB2 fix */
2346 				e1000_cfg_on_link_up(hw);
2347 		} else
2348 			link_check = TRUE;
2349 		break;
2350 	case e1000_media_type_fiber:
2351 		e1000_check_for_link(hw);
2352 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2353                                  E1000_STATUS_LU);
2354 		break;
2355 	case e1000_media_type_internal_serdes:
2356 		e1000_check_for_link(hw);
2357 		link_check = adapter->hw.mac.serdes_has_link;
2358 		break;
2359 	default:
2360 	case e1000_media_type_unknown:
2361 		break;
2362 	}
2363 
2364 	/* Now check for a transition */
2365 	if (link_check && (adapter->link_active == 0)) {
2366 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2367 		    &adapter->link_duplex);
2368 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2369 		if ((adapter->link_speed != SPEED_1000) &&
2370 		    ((hw->mac.type == e1000_82571) ||
2371 		    (hw->mac.type == e1000_82572))) {
2372 			int tarc0;
2373 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2374 			tarc0 &= ~SPEED_MODE_BIT;
2375 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2376 		}
2377 		if (bootverbose)
2378 			device_printf(dev, "Link is up %d Mbps %s\n",
2379 			    adapter->link_speed,
2380 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2381 			    "Full Duplex" : "Half Duplex"));
2382 		adapter->link_active = 1;
2383 		adapter->smartspeed = 0;
2384 		ifp->if_baudrate = adapter->link_speed * 1000000;
2385 		if_link_state_change(ifp, LINK_STATE_UP);
2386 	} else if (!link_check && (adapter->link_active == 1)) {
2387 		ifp->if_baudrate = adapter->link_speed = 0;
2388 		adapter->link_duplex = 0;
2389 		if (bootverbose)
2390 			device_printf(dev, "Link is Down\n");
2391 		adapter->link_active = 0;
2392 		/* Link down, disable watchdog */
2393 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2394 			txr->queue_status = EM_QUEUE_IDLE;
2395 		if_link_state_change(ifp, LINK_STATE_DOWN);
2396 	}
2397 }
2398 
2399 /*********************************************************************
2400  *
2401  *  This routine disables all traffic on the adapter by issuing a
2402  *  global reset on the MAC and deallocates TX/RX buffers.
2403  *
2404  *  This routine should always be called with BOTH the CORE
2405  *  and TX locks.
2406  **********************************************************************/
2407 
2408 static void
em_stop(void * arg)2409 em_stop(void *arg)
2410 {
2411 	struct adapter	*adapter = arg;
2412 	struct ifnet	*ifp = adapter->ifp;
2413 	struct tx_ring	*txr = adapter->tx_rings;
2414 
2415 	EM_CORE_LOCK_ASSERT(adapter);
2416 
2417 	INIT_DEBUGOUT("em_stop: begin");
2418 
2419 	em_disable_intr(adapter);
2420 	callout_stop(&adapter->timer);
2421 
2422 	/* Tell the stack that the interface is no longer active */
2423 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2424 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2425 
2426         /* Unarm watchdog timer. */
2427 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2428 		EM_TX_LOCK(txr);
2429 		txr->queue_status = EM_QUEUE_IDLE;
2430 		EM_TX_UNLOCK(txr);
2431 	}
2432 
2433 	e1000_reset_hw(&adapter->hw);
2434 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2435 
2436 	e1000_led_off(&adapter->hw);
2437 	e1000_cleanup_led(&adapter->hw);
2438 }
2439 
2440 
2441 /*********************************************************************
2442  *
2443  *  Determine hardware revision.
2444  *
2445  **********************************************************************/
2446 static void
em_identify_hardware(struct adapter * adapter)2447 em_identify_hardware(struct adapter *adapter)
2448 {
2449 	device_t dev = adapter->dev;
2450 
2451 	/* Make sure our PCI config space has the necessary stuff set */
2452 	pci_enable_busmaster(dev);
2453 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2454 
2455 	/* Save off the information about this board */
2456 	adapter->hw.vendor_id = pci_get_vendor(dev);
2457 	adapter->hw.device_id = pci_get_device(dev);
2458 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2459 	adapter->hw.subsystem_vendor_id =
2460 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2461 	adapter->hw.subsystem_device_id =
2462 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2463 
2464 	/* Do Shared Code Init and Setup */
2465 	if (e1000_set_mac_type(&adapter->hw)) {
2466 		device_printf(dev, "Setup init failure\n");
2467 		return;
2468 	}
2469 }
2470 
2471 static int
em_allocate_pci_resources(struct adapter * adapter)2472 em_allocate_pci_resources(struct adapter *adapter)
2473 {
2474 	device_t	dev = adapter->dev;
2475 	int		rid;
2476 
2477 	rid = PCIR_BAR(0);
2478 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2479 	    &rid, RF_ACTIVE);
2480 	if (adapter->memory == NULL) {
2481 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2482 		return (ENXIO);
2483 	}
2484 	adapter->osdep.mem_bus_space_tag =
2485 	    rman_get_bustag(adapter->memory);
2486 	adapter->osdep.mem_bus_space_handle =
2487 	    rman_get_bushandle(adapter->memory);
2488 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2489 
2490 	/* Default to a single queue */
2491 	adapter->num_queues = 1;
2492 
2493 	/*
2494 	 * Setup MSI/X or MSI if PCI Express
2495 	 */
2496 	adapter->msix = em_setup_msix(adapter);
2497 
2498 	adapter->hw.back = &adapter->osdep;
2499 
2500 	return (0);
2501 }
2502 
2503 /*********************************************************************
2504  *
2505  *  Setup the Legacy or MSI Interrupt handler
2506  *
2507  **********************************************************************/
2508 int
em_allocate_legacy(struct adapter * adapter)2509 em_allocate_legacy(struct adapter *adapter)
2510 {
2511 	device_t dev = adapter->dev;
2512 	struct tx_ring	*txr = adapter->tx_rings;
2513 	int error, rid = 0;
2514 
2515 	/* Manually turn off all interrupts */
2516 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2517 
2518 	if (adapter->msix == 1) /* using MSI */
2519 		rid = 1;
2520 	/* We allocate a single interrupt resource */
2521 	adapter->res = bus_alloc_resource_any(dev,
2522 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2523 	if (adapter->res == NULL) {
2524 		device_printf(dev, "Unable to allocate bus resource: "
2525 		    "interrupt\n");
2526 		return (ENXIO);
2527 	}
2528 
2529 	/*
2530 	 * Allocate a fast interrupt and the associated
2531 	 * deferred processing contexts.
2532 	 */
2533 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2534 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2535 	    taskqueue_thread_enqueue, &adapter->tq);
2536 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2537 	    device_get_nameunit(adapter->dev));
2538 	/* Use a TX only tasklet for local timer */
2539 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2540 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2541 	    taskqueue_thread_enqueue, &txr->tq);
2542 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2543 	    device_get_nameunit(adapter->dev));
2544 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2545 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2546 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2547 		device_printf(dev, "Failed to register fast interrupt "
2548 			    "handler: %d\n", error);
2549 		taskqueue_free(adapter->tq);
2550 		adapter->tq = NULL;
2551 		return (error);
2552 	}
2553 
2554 	return (0);
2555 }
2556 
2557 /*********************************************************************
2558  *
2559  *  Setup the MSIX Interrupt handlers
2560  *   This is not really Multiqueue, rather
2561  *   its just seperate interrupt vectors
2562  *   for TX, RX, and Link.
2563  *
2564  **********************************************************************/
2565 int
em_allocate_msix(struct adapter * adapter)2566 em_allocate_msix(struct adapter *adapter)
2567 {
2568 	device_t	dev = adapter->dev;
2569 	struct		tx_ring *txr = adapter->tx_rings;
2570 	struct		rx_ring *rxr = adapter->rx_rings;
2571 	int		error, rid, vector = 0;
2572 
2573 
2574 	/* Make sure all interrupts are disabled */
2575 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2576 
2577 	/* First set up ring resources */
2578 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2579 
2580 		/* RX ring */
2581 		rid = vector + 1;
2582 
2583 		rxr->res = bus_alloc_resource_any(dev,
2584 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2585 		if (rxr->res == NULL) {
2586 			device_printf(dev,
2587 			    "Unable to allocate bus resource: "
2588 			    "RX MSIX Interrupt %d\n", i);
2589 			return (ENXIO);
2590 		}
2591 		if ((error = bus_setup_intr(dev, rxr->res,
2592 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2593 		    rxr, &rxr->tag)) != 0) {
2594 			device_printf(dev, "Failed to register RX handler");
2595 			return (error);
2596 		}
2597 #if __FreeBSD_version >= 800504
2598 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2599 #endif
2600 		rxr->msix = vector++; /* NOTE increment vector for TX */
2601 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2602 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2603 		    taskqueue_thread_enqueue, &rxr->tq);
2604 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2605 		    device_get_nameunit(adapter->dev));
2606 		/*
2607 		** Set the bit to enable interrupt
2608 		** in E1000_IMS -- bits 20 and 21
2609 		** are for RX0 and RX1, note this has
2610 		** NOTHING to do with the MSIX vector
2611 		*/
2612 		rxr->ims = 1 << (20 + i);
2613 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2614 
2615 		/* TX ring */
2616 		rid = vector + 1;
2617 		txr->res = bus_alloc_resource_any(dev,
2618 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2619 		if (txr->res == NULL) {
2620 			device_printf(dev,
2621 			    "Unable to allocate bus resource: "
2622 			    "TX MSIX Interrupt %d\n", i);
2623 			return (ENXIO);
2624 		}
2625 		if ((error = bus_setup_intr(dev, txr->res,
2626 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2627 		    txr, &txr->tag)) != 0) {
2628 			device_printf(dev, "Failed to register TX handler");
2629 			return (error);
2630 		}
2631 #if __FreeBSD_version >= 800504
2632 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2633 #endif
2634 		txr->msix = vector++; /* Increment vector for next pass */
2635 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2636 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2637 		    taskqueue_thread_enqueue, &txr->tq);
2638 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2639 		    device_get_nameunit(adapter->dev));
2640 		/*
2641 		** Set the bit to enable interrupt
2642 		** in E1000_IMS -- bits 22 and 23
2643 		** are for TX0 and TX1, note this has
2644 		** NOTHING to do with the MSIX vector
2645 		*/
2646 		txr->ims = 1 << (22 + i);
2647 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2648 	}
2649 
2650 	/* Link interrupt */
2651 	++rid;
2652 	adapter->res = bus_alloc_resource_any(dev,
2653 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2654 	if (!adapter->res) {
2655 		device_printf(dev,"Unable to allocate "
2656 		    "bus resource: Link interrupt [%d]\n", rid);
2657 		return (ENXIO);
2658         }
2659 	/* Set the link handler function */
2660 	error = bus_setup_intr(dev, adapter->res,
2661 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2662 	    em_msix_link, adapter, &adapter->tag);
2663 	if (error) {
2664 		adapter->res = NULL;
2665 		device_printf(dev, "Failed to register LINK handler");
2666 		return (error);
2667 	}
2668 #if __FreeBSD_version >= 800504
2669 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2670 #endif
2671 	adapter->linkvec = vector;
2672 	adapter->ivars |=  (8 | vector) << 16;
2673 	adapter->ivars |= 0x80000000;
2674 
2675 	return (0);
2676 }
2677 
2678 
2679 static void
em_free_pci_resources(struct adapter * adapter)2680 em_free_pci_resources(struct adapter *adapter)
2681 {
2682 	device_t	dev = adapter->dev;
2683 	struct tx_ring	*txr;
2684 	struct rx_ring	*rxr;
2685 	int		rid;
2686 
2687 
2688 	/*
2689 	** Release all the queue interrupt resources:
2690 	*/
2691 	for (int i = 0; i < adapter->num_queues; i++) {
2692 		txr = &adapter->tx_rings[i];
2693 		rxr = &adapter->rx_rings[i];
2694 		/* an early abort? */
2695 		if ((txr == NULL) || (rxr == NULL))
2696 			break;
2697 		rid = txr->msix +1;
2698 		if (txr->tag != NULL) {
2699 			bus_teardown_intr(dev, txr->res, txr->tag);
2700 			txr->tag = NULL;
2701 		}
2702 		if (txr->res != NULL)
2703 			bus_release_resource(dev, SYS_RES_IRQ,
2704 			    rid, txr->res);
2705 		rid = rxr->msix +1;
2706 		if (rxr->tag != NULL) {
2707 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2708 			rxr->tag = NULL;
2709 		}
2710 		if (rxr->res != NULL)
2711 			bus_release_resource(dev, SYS_RES_IRQ,
2712 			    rid, rxr->res);
2713 	}
2714 
2715         if (adapter->linkvec) /* we are doing MSIX */
2716                 rid = adapter->linkvec + 1;
2717         else
2718                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2719 
2720 	if (adapter->tag != NULL) {
2721 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2722 		adapter->tag = NULL;
2723 	}
2724 
2725 	if (adapter->res != NULL)
2726 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2727 
2728 
2729 	if (adapter->msix)
2730 		pci_release_msi(dev);
2731 
2732 	if (adapter->msix_mem != NULL)
2733 		bus_release_resource(dev, SYS_RES_MEMORY,
2734 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2735 
2736 	if (adapter->memory != NULL)
2737 		bus_release_resource(dev, SYS_RES_MEMORY,
2738 		    PCIR_BAR(0), adapter->memory);
2739 
2740 	if (adapter->flash != NULL)
2741 		bus_release_resource(dev, SYS_RES_MEMORY,
2742 		    EM_FLASH, adapter->flash);
2743 }
2744 
2745 /*
2746  * Setup MSI or MSI/X
2747  */
2748 static int
em_setup_msix(struct adapter * adapter)2749 em_setup_msix(struct adapter *adapter)
2750 {
2751 	device_t dev = adapter->dev;
2752 	int val;
2753 
2754 	/*
2755 	** Setup MSI/X for Hartwell: tests have shown
2756 	** use of two queues to be unstable, and to
2757 	** provide no great gain anyway, so we simply
2758 	** seperate the interrupts and use a single queue.
2759 	*/
2760 	if ((adapter->hw.mac.type == e1000_82574) &&
2761 	    (em_enable_msix == TRUE)) {
2762 		/* Map the MSIX BAR */
2763 		int rid = PCIR_BAR(EM_MSIX_BAR);
2764 		adapter->msix_mem = bus_alloc_resource_any(dev,
2765 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2766        		if (adapter->msix_mem == NULL) {
2767 			/* May not be enabled */
2768                		device_printf(adapter->dev,
2769 			    "Unable to map MSIX table \n");
2770 			goto msi;
2771        		}
2772 		val = pci_msix_count(dev);
2773 		/* We only need/want 3 vectors */
2774 		if (val >= 3)
2775 			val = 3;
2776 		else {
2777                		device_printf(adapter->dev,
2778 			    "MSIX: insufficient vectors, using MSI\n");
2779 			goto msi;
2780 		}
2781 
2782 		if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2783 			device_printf(adapter->dev,
2784 			    "Using MSIX interrupts "
2785 			    "with %d vectors\n", val);
2786 			return (val);
2787 		}
2788 
2789 		/*
2790 		** If MSIX alloc failed or provided us with
2791 		** less than needed, free and fall through to MSI
2792 		*/
2793 		pci_release_msi(dev);
2794 	}
2795 msi:
2796 	if (adapter->msix_mem != NULL) {
2797 		bus_release_resource(dev, SYS_RES_MEMORY,
2798 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2799 		adapter->msix_mem = NULL;
2800 	}
2801        	val = 1;
2802        	if (pci_alloc_msi(dev, &val) == 0) {
2803                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2804 		return (val);
2805 	}
2806 	/* Should only happen due to manual configuration */
2807 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2808 	return (0);
2809 }
2810 
2811 
2812 /*********************************************************************
2813  *
2814  *  Initialize the hardware to a configuration
2815  *  as specified by the adapter structure.
2816  *
2817  **********************************************************************/
2818 static void
em_reset(struct adapter * adapter)2819 em_reset(struct adapter *adapter)
2820 {
2821 	device_t	dev = adapter->dev;
2822 	struct ifnet	*ifp = adapter->ifp;
2823 	struct e1000_hw	*hw = &adapter->hw;
2824 	u16		rx_buffer_size;
2825 	u32		pba;
2826 
2827 	INIT_DEBUGOUT("em_reset: begin");
2828 
2829 	/* Set up smart power down as default off on newer adapters. */
2830 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2831 	    hw->mac.type == e1000_82572)) {
2832 		u16 phy_tmp = 0;
2833 
2834 		/* Speed up time to link by disabling smart power down. */
2835 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2836 		phy_tmp &= ~IGP02E1000_PM_SPD;
2837 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2838 	}
2839 
2840 	/*
2841 	 * Packet Buffer Allocation (PBA)
2842 	 * Writing PBA sets the receive portion of the buffer
2843 	 * the remainder is used for the transmit buffer.
2844 	 */
2845 	switch (hw->mac.type) {
2846 	/* Total Packet Buffer on these is 48K */
2847 	case e1000_82571:
2848 	case e1000_82572:
2849 	case e1000_80003es2lan:
2850 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2851 		break;
2852 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2853 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2854 		break;
2855 	case e1000_82574:
2856 	case e1000_82583:
2857 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2858 		break;
2859 	case e1000_ich8lan:
2860 		pba = E1000_PBA_8K;
2861 		break;
2862 	case e1000_ich9lan:
2863 	case e1000_ich10lan:
2864 		/* Boost Receive side for jumbo frames */
2865 		if (adapter->hw.mac.max_frame_size > 4096)
2866 			pba = E1000_PBA_14K;
2867 		else
2868 			pba = E1000_PBA_10K;
2869 		break;
2870 	case e1000_pchlan:
2871 	case e1000_pch2lan:
2872 	case e1000_pch_lpt:
2873 		pba = E1000_PBA_26K;
2874 		break;
2875 	default:
2876 		if (adapter->hw.mac.max_frame_size > 8192)
2877 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2878 		else
2879 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2880 	}
2881 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2882 
2883 	/*
2884 	 * These parameters control the automatic generation (Tx) and
2885 	 * response (Rx) to Ethernet PAUSE frames.
2886 	 * - High water mark should allow for at least two frames to be
2887 	 *   received after sending an XOFF.
2888 	 * - Low water mark works best when it is very near the high water mark.
2889 	 *   This allows the receiver to restart by sending XON when it has
2890 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2891 	 *   restart after one full frame is pulled from the buffer. There
2892 	 *   could be several smaller frames in the buffer and if so they will
2893 	 *   not trigger the XON until their total number reduces the buffer
2894 	 *   by 1500.
2895 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2896 	 */
2897 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2898 	hw->fc.high_water = rx_buffer_size -
2899 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2900 	hw->fc.low_water = hw->fc.high_water - 1500;
2901 
2902 	if (adapter->fc) /* locally set flow control value? */
2903 		hw->fc.requested_mode = adapter->fc;
2904 	else
2905 		hw->fc.requested_mode = e1000_fc_full;
2906 
2907 	if (hw->mac.type == e1000_80003es2lan)
2908 		hw->fc.pause_time = 0xFFFF;
2909 	else
2910 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2911 
2912 	hw->fc.send_xon = TRUE;
2913 
2914 	/* Device specific overrides/settings */
2915 	switch (hw->mac.type) {
2916 	case e1000_pchlan:
2917 		/* Workaround: no TX flow ctrl for PCH */
2918                 hw->fc.requested_mode = e1000_fc_rx_pause;
2919 		hw->fc.pause_time = 0xFFFF; /* override */
2920 		if (ifp->if_mtu > ETHERMTU) {
2921 			hw->fc.high_water = 0x3500;
2922 			hw->fc.low_water = 0x1500;
2923 		} else {
2924 			hw->fc.high_water = 0x5000;
2925 			hw->fc.low_water = 0x3000;
2926 		}
2927 		hw->fc.refresh_time = 0x1000;
2928 		break;
2929 	case e1000_pch2lan:
2930 	case e1000_pch_lpt:
2931 		hw->fc.high_water = 0x5C20;
2932 		hw->fc.low_water = 0x5048;
2933 		hw->fc.pause_time = 0x0650;
2934 		hw->fc.refresh_time = 0x0400;
2935 		/* Jumbos need adjusted PBA */
2936 		if (ifp->if_mtu > ETHERMTU)
2937 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2938 		else
2939 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2940 		break;
2941         case e1000_ich9lan:
2942         case e1000_ich10lan:
2943 		if (ifp->if_mtu > ETHERMTU) {
2944 			hw->fc.high_water = 0x2800;
2945 			hw->fc.low_water = hw->fc.high_water - 8;
2946 			break;
2947 		}
2948 		/* else fall thru */
2949 	default:
2950 		if (hw->mac.type == e1000_80003es2lan)
2951 			hw->fc.pause_time = 0xFFFF;
2952 		break;
2953 	}
2954 
2955 	/* Issue a global reset */
2956 	e1000_reset_hw(hw);
2957 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2958 	em_disable_aspm(adapter);
2959 	/* and a re-init */
2960 	if (e1000_init_hw(hw) < 0) {
2961 		device_printf(dev, "Hardware Initialization Failed\n");
2962 		return;
2963 	}
2964 
2965 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2966 	e1000_get_phy_info(hw);
2967 	e1000_check_for_link(hw);
2968 	return;
2969 }
2970 
2971 /*********************************************************************
2972  *
2973  *  Setup networking device structure and register an interface.
2974  *
2975  **********************************************************************/
2976 static int
em_setup_interface(device_t dev,struct adapter * adapter)2977 em_setup_interface(device_t dev, struct adapter *adapter)
2978 {
2979 	struct ifnet   *ifp;
2980 
2981 	INIT_DEBUGOUT("em_setup_interface: begin");
2982 
2983 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2984 	if (ifp == NULL) {
2985 		device_printf(dev, "can not allocate ifnet structure\n");
2986 		return (-1);
2987 	}
2988 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2989 	ifp->if_init =  em_init;
2990 	ifp->if_softc = adapter;
2991 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2992 	ifp->if_ioctl = em_ioctl;
2993 #ifdef EM_MULTIQUEUE
2994 	/* Multiqueue stack interface */
2995 	ifp->if_transmit = em_mq_start;
2996 	ifp->if_qflush = em_qflush;
2997 #else
2998 	ifp->if_start = em_start;
2999 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3000 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3001 	IFQ_SET_READY(&ifp->if_snd);
3002 #endif
3003 
3004 	ether_ifattach(ifp, adapter->hw.mac.addr);
3005 
3006 	ifp->if_capabilities = ifp->if_capenable = 0;
3007 
3008 
3009 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3010 	ifp->if_capabilities |= IFCAP_TSO4;
3011 	/*
3012 	 * Tell the upper layer(s) we
3013 	 * support full VLAN capability
3014 	 */
3015 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3016 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3017 			     |  IFCAP_VLAN_HWTSO
3018 			     |  IFCAP_VLAN_MTU;
3019 	ifp->if_capenable = ifp->if_capabilities;
3020 
3021 	/*
3022 	** Don't turn this on by default, if vlans are
3023 	** created on another pseudo device (eg. lagg)
3024 	** then vlan events are not passed thru, breaking
3025 	** operation, but with HW FILTER off it works. If
3026 	** using vlans directly on the em driver you can
3027 	** enable this and get full hardware tag filtering.
3028 	*/
3029 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3030 
3031 #ifdef DEVICE_POLLING
3032 	ifp->if_capabilities |= IFCAP_POLLING;
3033 #endif
3034 
3035 	/* Enable only WOL MAGIC by default */
3036 	if (adapter->wol) {
3037 		ifp->if_capabilities |= IFCAP_WOL;
3038 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3039 	}
3040 
3041 	/*
3042 	 * Specify the media types supported by this adapter and register
3043 	 * callbacks to update media and link information
3044 	 */
3045 	ifmedia_init(&adapter->media, IFM_IMASK,
3046 	    em_media_change, em_media_status);
3047 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3048 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3049 		u_char fiber_type = IFM_1000_SX;	/* default type */
3050 
3051 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3052 			    0, NULL);
3053 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3054 	} else {
3055 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3056 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3057 			    0, NULL);
3058 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3059 			    0, NULL);
3060 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3061 			    0, NULL);
3062 		if (adapter->hw.phy.type != e1000_phy_ife) {
3063 			ifmedia_add(&adapter->media,
3064 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3065 			ifmedia_add(&adapter->media,
3066 				IFM_ETHER | IFM_1000_T, 0, NULL);
3067 		}
3068 	}
3069 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3070 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3071 	return (0);
3072 }
3073 
3074 
3075 /*
3076  * Manage DMA'able memory.
3077  */
3078 static void
em_dmamap_cb(void * arg,bus_dma_segment_t * segs,int nseg,int error)3079 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3080 {
3081 	if (error)
3082 		return;
3083 	*(bus_addr_t *) arg = segs[0].ds_addr;
3084 }
3085 
3086 static int
em_dma_malloc(struct adapter * adapter,bus_size_t size,struct em_dma_alloc * dma,int mapflags)3087 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3088         struct em_dma_alloc *dma, int mapflags)
3089 {
3090 	int error;
3091 
3092 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3093 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3094 				BUS_SPACE_MAXADDR,	/* lowaddr */
3095 				BUS_SPACE_MAXADDR,	/* highaddr */
3096 				NULL, NULL,		/* filter, filterarg */
3097 				size,			/* maxsize */
3098 				1,			/* nsegments */
3099 				size,			/* maxsegsize */
3100 				0,			/* flags */
3101 				NULL,			/* lockfunc */
3102 				NULL,			/* lockarg */
3103 				&dma->dma_tag);
3104 	if (error) {
3105 		device_printf(adapter->dev,
3106 		    "%s: bus_dma_tag_create failed: %d\n",
3107 		    __func__, error);
3108 		goto fail_0;
3109 	}
3110 
3111 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3112 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3113 	if (error) {
3114 		device_printf(adapter->dev,
3115 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3116 		    __func__, (uintmax_t)size, error);
3117 		goto fail_2;
3118 	}
3119 
3120 	dma->dma_paddr = 0;
3121 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3122 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3123 	if (error || dma->dma_paddr == 0) {
3124 		device_printf(adapter->dev,
3125 		    "%s: bus_dmamap_load failed: %d\n",
3126 		    __func__, error);
3127 		goto fail_3;
3128 	}
3129 
3130 	return (0);
3131 
3132 fail_3:
3133 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3134 fail_2:
3135 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3136 	bus_dma_tag_destroy(dma->dma_tag);
3137 fail_0:
3138 	dma->dma_map = NULL;
3139 	dma->dma_tag = NULL;
3140 
3141 	return (error);
3142 }
3143 
3144 static void
em_dma_free(struct adapter * adapter,struct em_dma_alloc * dma)3145 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3146 {
3147 	if (dma->dma_tag == NULL)
3148 		return;
3149 	if (dma->dma_map != NULL) {
3150 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3151 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3152 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3153 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3154 		dma->dma_map = NULL;
3155 	}
3156 	bus_dma_tag_destroy(dma->dma_tag);
3157 	dma->dma_tag = NULL;
3158 }
3159 
3160 
3161 /*********************************************************************
3162  *
3163  *  Allocate memory for the transmit and receive rings, and then
3164  *  the descriptors associated with each, called only once at attach.
3165  *
3166  **********************************************************************/
3167 static int
em_allocate_queues(struct adapter * adapter)3168 em_allocate_queues(struct adapter *adapter)
3169 {
3170 	device_t		dev = adapter->dev;
3171 	struct tx_ring		*txr = NULL;
3172 	struct rx_ring		*rxr = NULL;
3173 	int rsize, tsize, error = E1000_SUCCESS;
3174 	int txconf = 0, rxconf = 0;
3175 
3176 
3177 	/* Allocate the TX ring struct memory */
3178 	if (!(adapter->tx_rings =
3179 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3180 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3181 		device_printf(dev, "Unable to allocate TX ring memory\n");
3182 		error = ENOMEM;
3183 		goto fail;
3184 	}
3185 
3186 	/* Now allocate the RX */
3187 	if (!(adapter->rx_rings =
3188 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3189 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3190 		device_printf(dev, "Unable to allocate RX ring memory\n");
3191 		error = ENOMEM;
3192 		goto rx_fail;
3193 	}
3194 
3195 	tsize = roundup2(adapter->num_tx_desc *
3196 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3197 	/*
3198 	 * Now set up the TX queues, txconf is needed to handle the
3199 	 * possibility that things fail midcourse and we need to
3200 	 * undo memory gracefully
3201 	 */
3202 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3203 		/* Set up some basics */
3204 		txr = &adapter->tx_rings[i];
3205 		txr->adapter = adapter;
3206 		txr->me = i;
3207 
3208 		/* Initialize the TX lock */
3209 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3210 		    device_get_nameunit(dev), txr->me);
3211 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3212 
3213 		if (em_dma_malloc(adapter, tsize,
3214 			&txr->txdma, BUS_DMA_NOWAIT)) {
3215 			device_printf(dev,
3216 			    "Unable to allocate TX Descriptor memory\n");
3217 			error = ENOMEM;
3218 			goto err_tx_desc;
3219 		}
3220 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3221 		bzero((void *)txr->tx_base, tsize);
3222 
3223         	if (em_allocate_transmit_buffers(txr)) {
3224 			device_printf(dev,
3225 			    "Critical Failure setting up transmit buffers\n");
3226 			error = ENOMEM;
3227 			goto err_tx_desc;
3228         	}
3229 #if __FreeBSD_version >= 800000
3230 		/* Allocate a buf ring */
3231 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3232 		    M_WAITOK, &txr->tx_mtx);
3233 #endif
3234 	}
3235 
3236 	/*
3237 	 * Next the RX queues...
3238 	 */
3239 	rsize = roundup2(adapter->num_rx_desc *
3240 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3241 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3242 		rxr = &adapter->rx_rings[i];
3243 		rxr->adapter = adapter;
3244 		rxr->me = i;
3245 
3246 		/* Initialize the RX lock */
3247 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3248 		    device_get_nameunit(dev), txr->me);
3249 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3250 
3251 		if (em_dma_malloc(adapter, rsize,
3252 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3253 			device_printf(dev,
3254 			    "Unable to allocate RxDescriptor memory\n");
3255 			error = ENOMEM;
3256 			goto err_rx_desc;
3257 		}
3258 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3259 		bzero((void *)rxr->rx_base, rsize);
3260 
3261         	/* Allocate receive buffers for the ring*/
3262 		if (em_allocate_receive_buffers(rxr)) {
3263 			device_printf(dev,
3264 			    "Critical Failure setting up receive buffers\n");
3265 			error = ENOMEM;
3266 			goto err_rx_desc;
3267 		}
3268 	}
3269 
3270 	return (0);
3271 
3272 err_rx_desc:
3273 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3274 		em_dma_free(adapter, &rxr->rxdma);
3275 err_tx_desc:
3276 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3277 		em_dma_free(adapter, &txr->txdma);
3278 	free(adapter->rx_rings, M_DEVBUF);
3279 rx_fail:
3280 #if __FreeBSD_version >= 800000
3281 	buf_ring_free(txr->br, M_DEVBUF);
3282 #endif
3283 	free(adapter->tx_rings, M_DEVBUF);
3284 fail:
3285 	return (error);
3286 }
3287 
3288 
3289 /*********************************************************************
3290  *
3291  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3292  *  the information needed to transmit a packet on the wire. This is
3293  *  called only once at attach, setup is done every reset.
3294  *
3295  **********************************************************************/
3296 static int
em_allocate_transmit_buffers(struct tx_ring * txr)3297 em_allocate_transmit_buffers(struct tx_ring *txr)
3298 {
3299 	struct adapter *adapter = txr->adapter;
3300 	device_t dev = adapter->dev;
3301 	struct em_buffer *txbuf;
3302 	int error, i;
3303 
3304 	/*
3305 	 * Setup DMA descriptor areas.
3306 	 */
3307 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3308 			       1, 0,			/* alignment, bounds */
3309 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3310 			       BUS_SPACE_MAXADDR,	/* highaddr */
3311 			       NULL, NULL,		/* filter, filterarg */
3312 			       EM_TSO_SIZE,		/* maxsize */
3313 			       EM_MAX_SCATTER,		/* nsegments */
3314 			       PAGE_SIZE,		/* maxsegsize */
3315 			       0,			/* flags */
3316 			       NULL,			/* lockfunc */
3317 			       NULL,			/* lockfuncarg */
3318 			       &txr->txtag))) {
3319 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3320 		goto fail;
3321 	}
3322 
3323 	if (!(txr->tx_buffers =
3324 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3325 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3326 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3327 		error = ENOMEM;
3328 		goto fail;
3329 	}
3330 
3331         /* Create the descriptor buffer dma maps */
3332 	txbuf = txr->tx_buffers;
3333 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3334 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3335 		if (error != 0) {
3336 			device_printf(dev, "Unable to create TX DMA map\n");
3337 			goto fail;
3338 		}
3339 	}
3340 
3341 	return 0;
3342 fail:
3343 	/* We free all, it handles case where we are in the middle */
3344 	em_free_transmit_structures(adapter);
3345 	return (error);
3346 }
3347 
3348 /*********************************************************************
3349  *
3350  *  Initialize a transmit ring.
3351  *
3352  **********************************************************************/
3353 static void
em_setup_transmit_ring(struct tx_ring * txr)3354 em_setup_transmit_ring(struct tx_ring *txr)
3355 {
3356 	struct adapter *adapter = txr->adapter;
3357 	struct em_buffer *txbuf;
3358 	int i;
3359 #ifdef DEV_NETMAP
3360 	struct netmap_adapter *na = NA(adapter->ifp);
3361 	struct netmap_slot *slot;
3362 #endif /* DEV_NETMAP */
3363 
3364 	/* Clear the old descriptor contents */
3365 	EM_TX_LOCK(txr);
3366 #ifdef DEV_NETMAP
3367 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3368 #endif /* DEV_NETMAP */
3369 
3370 	bzero((void *)txr->tx_base,
3371 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3372 	/* Reset indices */
3373 	txr->next_avail_desc = 0;
3374 	txr->next_to_clean = 0;
3375 
3376 	/* Free any existing tx buffers. */
3377         txbuf = txr->tx_buffers;
3378 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3379 		if (txbuf->m_head != NULL) {
3380 			bus_dmamap_sync(txr->txtag, txbuf->map,
3381 			    BUS_DMASYNC_POSTWRITE);
3382 			bus_dmamap_unload(txr->txtag, txbuf->map);
3383 			m_freem(txbuf->m_head);
3384 			txbuf->m_head = NULL;
3385 		}
3386 #ifdef DEV_NETMAP
3387 		if (slot) {
3388 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3389 			uint64_t paddr;
3390 			void *addr;
3391 
3392 			addr = PNMB(slot + si, &paddr);
3393 			txr->tx_base[i].buffer_addr = htole64(paddr);
3394 			/* reload the map for netmap mode */
3395 			netmap_load_map(txr->txtag, txbuf->map, addr);
3396 		}
3397 #endif /* DEV_NETMAP */
3398 
3399 		/* clear the watch index */
3400 		txbuf->next_eop = -1;
3401         }
3402 
3403 	/* Set number of descriptors available */
3404 	txr->tx_avail = adapter->num_tx_desc;
3405 	txr->queue_status = EM_QUEUE_IDLE;
3406 
3407 	/* Clear checksum offload context. */
3408 	txr->last_hw_offload = 0;
3409 	txr->last_hw_ipcss = 0;
3410 	txr->last_hw_ipcso = 0;
3411 	txr->last_hw_tucss = 0;
3412 	txr->last_hw_tucso = 0;
3413 
3414 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3415 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3416 	EM_TX_UNLOCK(txr);
3417 }
3418 
3419 /*********************************************************************
3420  *
3421  *  Initialize all transmit rings.
3422  *
3423  **********************************************************************/
3424 static void
em_setup_transmit_structures(struct adapter * adapter)3425 em_setup_transmit_structures(struct adapter *adapter)
3426 {
3427 	struct tx_ring *txr = adapter->tx_rings;
3428 
3429 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3430 		em_setup_transmit_ring(txr);
3431 
3432 	return;
3433 }
3434 
3435 /*********************************************************************
3436  *
3437  *  Enable transmit unit.
3438  *
3439  **********************************************************************/
3440 static void
em_initialize_transmit_unit(struct adapter * adapter)3441 em_initialize_transmit_unit(struct adapter *adapter)
3442 {
3443 	struct tx_ring	*txr = adapter->tx_rings;
3444 	struct e1000_hw	*hw = &adapter->hw;
3445 	u32	tctl, tarc, tipg = 0;
3446 
3447 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3448 
3449 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3450 		u64 bus_addr = txr->txdma.dma_paddr;
3451 		/* Base and Len of TX Ring */
3452 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3453 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3454 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3455 	    	    (u32)(bus_addr >> 32));
3456 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3457 	    	    (u32)bus_addr);
3458 		/* Init the HEAD/TAIL indices */
3459 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3460 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3461 
3462 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3463 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3464 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3465 
3466 		txr->queue_status = EM_QUEUE_IDLE;
3467 	}
3468 
3469 	/* Set the default values for the Tx Inter Packet Gap timer */
3470 	switch (adapter->hw.mac.type) {
3471 	case e1000_80003es2lan:
3472 		tipg = DEFAULT_82543_TIPG_IPGR1;
3473 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3474 		    E1000_TIPG_IPGR2_SHIFT;
3475 		break;
3476 	default:
3477 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3478 		    (adapter->hw.phy.media_type ==
3479 		    e1000_media_type_internal_serdes))
3480 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3481 		else
3482 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3483 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3484 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3485 	}
3486 
3487 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3488 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3489 
3490 	if(adapter->hw.mac.type >= e1000_82540)
3491 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3492 		    adapter->tx_abs_int_delay.value);
3493 
3494 	if ((adapter->hw.mac.type == e1000_82571) ||
3495 	    (adapter->hw.mac.type == e1000_82572)) {
3496 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3497 		tarc |= SPEED_MODE_BIT;
3498 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3499 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3500 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3501 		tarc |= 1;
3502 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3503 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3504 		tarc |= 1;
3505 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3506 	}
3507 
3508 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3509 	if (adapter->tx_int_delay.value > 0)
3510 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3511 
3512 	/* Program the Transmit Control Register */
3513 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3514 	tctl &= ~E1000_TCTL_CT;
3515 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3516 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3517 
3518 	if (adapter->hw.mac.type >= e1000_82571)
3519 		tctl |= E1000_TCTL_MULR;
3520 
3521 	/* This write will effectively turn on the transmit unit. */
3522 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3523 
3524 }
3525 
3526 
3527 /*********************************************************************
3528  *
3529  *  Free all transmit rings.
3530  *
3531  **********************************************************************/
3532 static void
em_free_transmit_structures(struct adapter * adapter)3533 em_free_transmit_structures(struct adapter *adapter)
3534 {
3535 	struct tx_ring *txr = adapter->tx_rings;
3536 
3537 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3538 		EM_TX_LOCK(txr);
3539 		em_free_transmit_buffers(txr);
3540 		em_dma_free(adapter, &txr->txdma);
3541 		EM_TX_UNLOCK(txr);
3542 		EM_TX_LOCK_DESTROY(txr);
3543 	}
3544 
3545 	free(adapter->tx_rings, M_DEVBUF);
3546 }
3547 
3548 /*********************************************************************
3549  *
3550  *  Free transmit ring related data structures.
3551  *
3552  **********************************************************************/
3553 static void
em_free_transmit_buffers(struct tx_ring * txr)3554 em_free_transmit_buffers(struct tx_ring *txr)
3555 {
3556 	struct adapter		*adapter = txr->adapter;
3557 	struct em_buffer	*txbuf;
3558 
3559 	INIT_DEBUGOUT("free_transmit_ring: begin");
3560 
3561 	if (txr->tx_buffers == NULL)
3562 		return;
3563 
3564 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3565 		txbuf = &txr->tx_buffers[i];
3566 		if (txbuf->m_head != NULL) {
3567 			bus_dmamap_sync(txr->txtag, txbuf->map,
3568 			    BUS_DMASYNC_POSTWRITE);
3569 			bus_dmamap_unload(txr->txtag,
3570 			    txbuf->map);
3571 			m_freem(txbuf->m_head);
3572 			txbuf->m_head = NULL;
3573 			if (txbuf->map != NULL) {
3574 				bus_dmamap_destroy(txr->txtag,
3575 				    txbuf->map);
3576 				txbuf->map = NULL;
3577 			}
3578 		} else if (txbuf->map != NULL) {
3579 			bus_dmamap_unload(txr->txtag,
3580 			    txbuf->map);
3581 			bus_dmamap_destroy(txr->txtag,
3582 			    txbuf->map);
3583 			txbuf->map = NULL;
3584 		}
3585 	}
3586 #if __FreeBSD_version >= 800000
3587 	if (txr->br != NULL)
3588 		buf_ring_free(txr->br, M_DEVBUF);
3589 #endif
3590 	if (txr->tx_buffers != NULL) {
3591 		free(txr->tx_buffers, M_DEVBUF);
3592 		txr->tx_buffers = NULL;
3593 	}
3594 	if (txr->txtag != NULL) {
3595 		bus_dma_tag_destroy(txr->txtag);
3596 		txr->txtag = NULL;
3597 	}
3598 	return;
3599 }
3600 
3601 
3602 /*********************************************************************
3603  *  The offload context is protocol specific (TCP/UDP) and thus
3604  *  only needs to be set when the protocol changes. The occasion
3605  *  of a context change can be a performance detriment, and
3606  *  might be better just disabled. The reason arises in the way
3607  *  in which the controller supports pipelined requests from the
3608  *  Tx data DMA. Up to four requests can be pipelined, and they may
3609  *  belong to the same packet or to multiple packets. However all
3610  *  requests for one packet are issued before a request is issued
3611  *  for a subsequent packet and if a request for the next packet
3612  *  requires a context change, that request will be stalled
3613  *  until the previous request completes. This means setting up
3614  *  a new context effectively disables pipelined Tx data DMA which
3615  *  in turn greatly slow down performance to send small sized
3616  *  frames.
3617  **********************************************************************/
3618 static void
em_transmit_checksum_setup(struct tx_ring * txr,struct mbuf * mp,int ip_off,struct ip * ip,u32 * txd_upper,u32 * txd_lower)3619 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3620     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3621 {
3622 	struct adapter			*adapter = txr->adapter;
3623 	struct e1000_context_desc	*TXD = NULL;
3624 	struct em_buffer		*tx_buffer;
3625 	int				cur, hdr_len;
3626 	u32				cmd = 0;
3627 	u16				offload = 0;
3628 	u8				ipcso, ipcss, tucso, tucss;
3629 
3630 	ipcss = ipcso = tucss = tucso = 0;
3631 	hdr_len = ip_off + (ip->ip_hl << 2);
3632 	cur = txr->next_avail_desc;
3633 
3634 	/* Setup of IP header checksum. */
3635 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3636 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3637 		offload |= CSUM_IP;
3638 		ipcss = ip_off;
3639 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3640 		/*
3641 		 * Start offset for header checksum calculation.
3642 		 * End offset for header checksum calculation.
3643 		 * Offset of place to put the checksum.
3644 		 */
3645 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3646 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3647 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3648 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3649 		cmd |= E1000_TXD_CMD_IP;
3650 	}
3651 
3652 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3653  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3654  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3655  		offload |= CSUM_TCP;
3656  		tucss = hdr_len;
3657  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3658  		/*
3659  		 * Setting up new checksum offload context for every frames
3660  		 * takes a lot of processing time for hardware. This also
3661  		 * reduces performance a lot for small sized frames so avoid
3662  		 * it if driver can use previously configured checksum
3663  		 * offload context.
3664  		 */
3665  		if (txr->last_hw_offload == offload) {
3666  			if (offload & CSUM_IP) {
3667  				if (txr->last_hw_ipcss == ipcss &&
3668  				    txr->last_hw_ipcso == ipcso &&
3669  				    txr->last_hw_tucss == tucss &&
3670  				    txr->last_hw_tucso == tucso)
3671  					return;
3672  			} else {
3673  				if (txr->last_hw_tucss == tucss &&
3674  				    txr->last_hw_tucso == tucso)
3675  					return;
3676  			}
3677   		}
3678  		txr->last_hw_offload = offload;
3679  		txr->last_hw_tucss = tucss;
3680  		txr->last_hw_tucso = tucso;
3681  		/*
3682  		 * Start offset for payload checksum calculation.
3683  		 * End offset for payload checksum calculation.
3684  		 * Offset of place to put the checksum.
3685  		 */
3686 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3687  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3688  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3689  		TXD->upper_setup.tcp_fields.tucso = tucso;
3690  		cmd |= E1000_TXD_CMD_TCP;
3691  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3692  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3693  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3694  		tucss = hdr_len;
3695  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3696  		/*
3697  		 * Setting up new checksum offload context for every frames
3698  		 * takes a lot of processing time for hardware. This also
3699  		 * reduces performance a lot for small sized frames so avoid
3700  		 * it if driver can use previously configured checksum
3701  		 * offload context.
3702  		 */
3703  		if (txr->last_hw_offload == offload) {
3704  			if (offload & CSUM_IP) {
3705  				if (txr->last_hw_ipcss == ipcss &&
3706  				    txr->last_hw_ipcso == ipcso &&
3707  				    txr->last_hw_tucss == tucss &&
3708  				    txr->last_hw_tucso == tucso)
3709  					return;
3710  			} else {
3711  				if (txr->last_hw_tucss == tucss &&
3712  				    txr->last_hw_tucso == tucso)
3713  					return;
3714  			}
3715  		}
3716  		txr->last_hw_offload = offload;
3717  		txr->last_hw_tucss = tucss;
3718  		txr->last_hw_tucso = tucso;
3719  		/*
3720  		 * Start offset for header checksum calculation.
3721  		 * End offset for header checksum calculation.
3722  		 * Offset of place to put the checksum.
3723  		 */
3724 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3725  		TXD->upper_setup.tcp_fields.tucss = tucss;
3726  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3727  		TXD->upper_setup.tcp_fields.tucso = tucso;
3728   	}
3729 
3730  	if (offload & CSUM_IP) {
3731  		txr->last_hw_ipcss = ipcss;
3732  		txr->last_hw_ipcso = ipcso;
3733   	}
3734 
3735 	TXD->tcp_seg_setup.data = htole32(0);
3736 	TXD->cmd_and_length =
3737 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3738 	tx_buffer = &txr->tx_buffers[cur];
3739 	tx_buffer->m_head = NULL;
3740 	tx_buffer->next_eop = -1;
3741 
3742 	if (++cur == adapter->num_tx_desc)
3743 		cur = 0;
3744 
3745 	txr->tx_avail--;
3746 	txr->next_avail_desc = cur;
3747 }
3748 
3749 
3750 /**********************************************************************
3751  *
3752  *  Setup work for hardware segmentation offload (TSO)
3753  *
3754  **********************************************************************/
3755 static void
em_tso_setup(struct tx_ring * txr,struct mbuf * mp,int ip_off,struct ip * ip,struct tcphdr * tp,u32 * txd_upper,u32 * txd_lower)3756 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3757     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3758 {
3759 	struct adapter			*adapter = txr->adapter;
3760 	struct e1000_context_desc	*TXD;
3761 	struct em_buffer		*tx_buffer;
3762 	int cur, hdr_len;
3763 
3764 	/*
3765 	 * In theory we can use the same TSO context if and only if
3766 	 * frame is the same type(IP/TCP) and the same MSS. However
3767 	 * checking whether a frame has the same IP/TCP structure is
3768 	 * hard thing so just ignore that and always restablish a
3769 	 * new TSO context.
3770 	 */
3771 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3772 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3773 		      E1000_TXD_DTYP_D |	/* Data descr type */
3774 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3775 
3776 	/* IP and/or TCP header checksum calculation and insertion. */
3777 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3778 
3779 	cur = txr->next_avail_desc;
3780 	tx_buffer = &txr->tx_buffers[cur];
3781 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3782 
3783 	/*
3784 	 * Start offset for header checksum calculation.
3785 	 * End offset for header checksum calculation.
3786 	 * Offset of place put the checksum.
3787 	 */
3788 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3789 	TXD->lower_setup.ip_fields.ipcse =
3790 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3791 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3792 	/*
3793 	 * Start offset for payload checksum calculation.
3794 	 * End offset for payload checksum calculation.
3795 	 * Offset of place to put the checksum.
3796 	 */
3797 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3798 	TXD->upper_setup.tcp_fields.tucse = 0;
3799 	TXD->upper_setup.tcp_fields.tucso =
3800 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3801 	/*
3802 	 * Payload size per packet w/o any headers.
3803 	 * Length of all headers up to payload.
3804 	 */
3805 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3806 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3807 
3808 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3809 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3810 				E1000_TXD_CMD_TSE |	/* TSE context */
3811 				E1000_TXD_CMD_IP |	/* Do IP csum */
3812 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3813 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3814 
3815 	tx_buffer->m_head = NULL;
3816 	tx_buffer->next_eop = -1;
3817 
3818 	if (++cur == adapter->num_tx_desc)
3819 		cur = 0;
3820 
3821 	txr->tx_avail--;
3822 	txr->next_avail_desc = cur;
3823 	txr->tx_tso = TRUE;
3824 }
3825 
3826 
3827 /**********************************************************************
3828  *
3829  *  Examine each tx_buffer in the used queue. If the hardware is done
3830  *  processing the packet then free associated resources. The
3831  *  tx_buffer is put back on the free queue.
3832  *
3833  **********************************************************************/
3834 static void
em_txeof(struct tx_ring * txr)3835 em_txeof(struct tx_ring *txr)
3836 {
3837 	struct adapter	*adapter = txr->adapter;
3838         int first, last, done, processed;
3839         struct em_buffer *tx_buffer;
3840         struct e1000_tx_desc   *tx_desc, *eop_desc;
3841 	struct ifnet   *ifp = adapter->ifp;
3842 
3843 	EM_TX_LOCK_ASSERT(txr);
3844 #ifdef DEV_NETMAP
3845 	if (netmap_tx_irq(ifp, txr->me))
3846 		return;
3847 #endif /* DEV_NETMAP */
3848 
3849 	/* No work, make sure watchdog is off */
3850         if (txr->tx_avail == adapter->num_tx_desc) {
3851 		txr->queue_status = EM_QUEUE_IDLE;
3852                 return;
3853 	}
3854 
3855 	processed = 0;
3856         first = txr->next_to_clean;
3857         tx_desc = &txr->tx_base[first];
3858         tx_buffer = &txr->tx_buffers[first];
3859 	last = tx_buffer->next_eop;
3860         eop_desc = &txr->tx_base[last];
3861 
3862 	/*
3863 	 * What this does is get the index of the
3864 	 * first descriptor AFTER the EOP of the
3865 	 * first packet, that way we can do the
3866 	 * simple comparison on the inner while loop.
3867 	 */
3868 	if (++last == adapter->num_tx_desc)
3869  		last = 0;
3870 	done = last;
3871 
3872         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3873             BUS_DMASYNC_POSTREAD);
3874 
3875         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3876 		/* We clean the range of the packet */
3877 		while (first != done) {
3878                 	tx_desc->upper.data = 0;
3879                 	tx_desc->lower.data = 0;
3880                 	tx_desc->buffer_addr = 0;
3881                 	++txr->tx_avail;
3882 			++processed;
3883 
3884 			if (tx_buffer->m_head) {
3885 				bus_dmamap_sync(txr->txtag,
3886 				    tx_buffer->map,
3887 				    BUS_DMASYNC_POSTWRITE);
3888 				bus_dmamap_unload(txr->txtag,
3889 				    tx_buffer->map);
3890                         	m_freem(tx_buffer->m_head);
3891                         	tx_buffer->m_head = NULL;
3892                 	}
3893 			tx_buffer->next_eop = -1;
3894 			txr->watchdog_time = ticks;
3895 
3896 	                if (++first == adapter->num_tx_desc)
3897 				first = 0;
3898 
3899 	                tx_buffer = &txr->tx_buffers[first];
3900 			tx_desc = &txr->tx_base[first];
3901 		}
3902 		++ifp->if_opackets;
3903 		/* See if we can continue to the next packet */
3904 		last = tx_buffer->next_eop;
3905 		if (last != -1) {
3906         		eop_desc = &txr->tx_base[last];
3907 			/* Get new done point */
3908 			if (++last == adapter->num_tx_desc) last = 0;
3909 			done = last;
3910 		} else
3911 			break;
3912         }
3913         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3914             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3915 
3916         txr->next_to_clean = first;
3917 
3918 	/*
3919 	** Watchdog calculation, we know there's
3920 	** work outstanding or the first return
3921 	** would have been taken, so none processed
3922 	** for too long indicates a hang. local timer
3923 	** will examine this and do a reset if needed.
3924 	*/
3925 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3926 		txr->queue_status = EM_QUEUE_HUNG;
3927 
3928         /*
3929          * If we have a minimum free, clear IFF_DRV_OACTIVE
3930          * to tell the stack that it is OK to send packets.
3931 	 * Notice that all writes of OACTIVE happen under the
3932 	 * TX lock which, with a single queue, guarantees
3933 	 * sanity.
3934          */
3935         if (txr->tx_avail >= EM_MAX_SCATTER)
3936 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3937 
3938 	/* Disable watchdog if all clean */
3939 	if (txr->tx_avail == adapter->num_tx_desc) {
3940 		txr->queue_status = EM_QUEUE_IDLE;
3941 	}
3942 }
3943 
3944 
3945 /*********************************************************************
3946  *
3947  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3948  *
3949  **********************************************************************/
3950 static void
em_refresh_mbufs(struct rx_ring * rxr,int limit)3951 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3952 {
3953 	struct adapter		*adapter = rxr->adapter;
3954 	struct mbuf		*m;
3955 	bus_dma_segment_t	segs[1];
3956 	struct em_buffer	*rxbuf;
3957 	int			i, j, error, nsegs;
3958 	bool			cleaned = FALSE;
3959 
3960 	i = j = rxr->next_to_refresh;
3961 	/*
3962 	** Get one descriptor beyond
3963 	** our work mark to control
3964 	** the loop.
3965 	*/
3966 	if (++j == adapter->num_rx_desc)
3967 		j = 0;
3968 
3969 	while (j != limit) {
3970 		rxbuf = &rxr->rx_buffers[i];
3971 		if (rxbuf->m_head == NULL) {
3972 			m = m_getjcl(M_NOWAIT, MT_DATA,
3973 			    M_PKTHDR, adapter->rx_mbuf_sz);
3974 			/*
3975 			** If we have a temporary resource shortage
3976 			** that causes a failure, just abort refresh
3977 			** for now, we will return to this point when
3978 			** reinvoked from em_rxeof.
3979 			*/
3980 			if (m == NULL)
3981 				goto update;
3982 		} else
3983 			m = rxbuf->m_head;
3984 
3985 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3986 		m->m_flags |= M_PKTHDR;
3987 		m->m_data = m->m_ext.ext_buf;
3988 
3989 		/* Use bus_dma machinery to setup the memory mapping  */
3990 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3991 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3992 		if (error != 0) {
3993 			printf("Refresh mbufs: hdr dmamap load"
3994 			    " failure - %d\n", error);
3995 			m_free(m);
3996 			rxbuf->m_head = NULL;
3997 			goto update;
3998 		}
3999 		rxbuf->m_head = m;
4000 		bus_dmamap_sync(rxr->rxtag,
4001 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4002 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
4003 		cleaned = TRUE;
4004 
4005 		i = j; /* Next is precalulated for us */
4006 		rxr->next_to_refresh = i;
4007 		/* Calculate next controlling index */
4008 		if (++j == adapter->num_rx_desc)
4009 			j = 0;
4010 	}
4011 update:
4012 	/*
4013 	** Update the tail pointer only if,
4014 	** and as far as we have refreshed.
4015 	*/
4016 	if (cleaned)
4017 		E1000_WRITE_REG(&adapter->hw,
4018 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4019 
4020 	return;
4021 }
4022 
4023 
4024 /*********************************************************************
4025  *
4026  *  Allocate memory for rx_buffer structures. Since we use one
4027  *  rx_buffer per received packet, the maximum number of rx_buffer's
4028  *  that we'll need is equal to the number of receive descriptors
4029  *  that we've allocated.
4030  *
4031  **********************************************************************/
4032 static int
em_allocate_receive_buffers(struct rx_ring * rxr)4033 em_allocate_receive_buffers(struct rx_ring *rxr)
4034 {
4035 	struct adapter		*adapter = rxr->adapter;
4036 	device_t		dev = adapter->dev;
4037 	struct em_buffer	*rxbuf;
4038 	int			error;
4039 
4040 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4041 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4042 	if (rxr->rx_buffers == NULL) {
4043 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4044 		return (ENOMEM);
4045 	}
4046 
4047 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4048 				1, 0,			/* alignment, bounds */
4049 				BUS_SPACE_MAXADDR,	/* lowaddr */
4050 				BUS_SPACE_MAXADDR,	/* highaddr */
4051 				NULL, NULL,		/* filter, filterarg */
4052 				MJUM9BYTES,		/* maxsize */
4053 				1,			/* nsegments */
4054 				MJUM9BYTES,		/* maxsegsize */
4055 				0,			/* flags */
4056 				NULL,			/* lockfunc */
4057 				NULL,			/* lockarg */
4058 				&rxr->rxtag);
4059 	if (error) {
4060 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4061 		    __func__, error);
4062 		goto fail;
4063 	}
4064 
4065 	rxbuf = rxr->rx_buffers;
4066 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4067 		rxbuf = &rxr->rx_buffers[i];
4068 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4069 		if (error) {
4070 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4071 			    __func__, error);
4072 			goto fail;
4073 		}
4074 	}
4075 
4076 	return (0);
4077 
4078 fail:
4079 	em_free_receive_structures(adapter);
4080 	return (error);
4081 }
4082 
4083 
4084 /*********************************************************************
4085  *
4086  *  Initialize a receive ring and its buffers.
4087  *
4088  **********************************************************************/
4089 static int
em_setup_receive_ring(struct rx_ring * rxr)4090 em_setup_receive_ring(struct rx_ring *rxr)
4091 {
4092 	struct	adapter 	*adapter = rxr->adapter;
4093 	struct em_buffer	*rxbuf;
4094 	bus_dma_segment_t	seg[1];
4095 	int			rsize, nsegs, error = 0;
4096 #ifdef DEV_NETMAP
4097 	struct netmap_adapter *na = NA(adapter->ifp);
4098 	struct netmap_slot *slot;
4099 #endif
4100 
4101 
4102 	/* Clear the ring contents */
4103 	EM_RX_LOCK(rxr);
4104 	rsize = roundup2(adapter->num_rx_desc *
4105 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4106 	bzero((void *)rxr->rx_base, rsize);
4107 #ifdef DEV_NETMAP
4108 	slot = netmap_reset(na, NR_RX, 0, 0);
4109 #endif
4110 
4111 	/*
4112 	** Free current RX buffer structs and their mbufs
4113 	*/
4114 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4115 		rxbuf = &rxr->rx_buffers[i];
4116 		if (rxbuf->m_head != NULL) {
4117 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4118 			    BUS_DMASYNC_POSTREAD);
4119 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4120 			m_freem(rxbuf->m_head);
4121 			rxbuf->m_head = NULL; /* mark as freed */
4122 		}
4123 	}
4124 
4125 	/* Now replenish the mbufs */
4126         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4127 		rxbuf = &rxr->rx_buffers[j];
4128 #ifdef DEV_NETMAP
4129 		if (slot) {
4130 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4131 			uint64_t paddr;
4132 			void *addr;
4133 
4134 			addr = PNMB(slot + si, &paddr);
4135 			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4136 			/* Update descriptor */
4137 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4138 			continue;
4139 		}
4140 #endif /* DEV_NETMAP */
4141 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4142 		    M_PKTHDR, adapter->rx_mbuf_sz);
4143 		if (rxbuf->m_head == NULL) {
4144 			error = ENOBUFS;
4145 			goto fail;
4146 		}
4147 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4148 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4149 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4150 
4151 		/* Get the memory mapping */
4152 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4153 		    rxbuf->map, rxbuf->m_head, seg,
4154 		    &nsegs, BUS_DMA_NOWAIT);
4155 		if (error != 0) {
4156 			m_freem(rxbuf->m_head);
4157 			rxbuf->m_head = NULL;
4158 			goto fail;
4159 		}
4160 		bus_dmamap_sync(rxr->rxtag,
4161 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4162 
4163 		/* Update descriptor */
4164 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4165 	}
4166 	rxr->next_to_check = 0;
4167 	rxr->next_to_refresh = 0;
4168 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4169 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4170 
4171 fail:
4172 	EM_RX_UNLOCK(rxr);
4173 	return (error);
4174 }
4175 
4176 /*********************************************************************
4177  *
4178  *  Initialize all receive rings.
4179  *
4180  **********************************************************************/
4181 static int
em_setup_receive_structures(struct adapter * adapter)4182 em_setup_receive_structures(struct adapter *adapter)
4183 {
4184 	struct rx_ring *rxr = adapter->rx_rings;
4185 	int q;
4186 
4187 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4188 		if (em_setup_receive_ring(rxr))
4189 			goto fail;
4190 
4191 	return (0);
4192 fail:
4193 	/*
4194 	 * Free RX buffers allocated so far, we will only handle
4195 	 * the rings that completed, the failing case will have
4196 	 * cleaned up for itself. 'q' failed, so its the terminus.
4197 	 */
4198 	for (int i = 0; i < q; ++i) {
4199 		rxr = &adapter->rx_rings[i];
4200 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4201 			struct em_buffer *rxbuf;
4202 			rxbuf = &rxr->rx_buffers[n];
4203 			if (rxbuf->m_head != NULL) {
4204 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4205 			  	  BUS_DMASYNC_POSTREAD);
4206 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4207 				m_freem(rxbuf->m_head);
4208 				rxbuf->m_head = NULL;
4209 			}
4210 		}
4211 		rxr->next_to_check = 0;
4212 		rxr->next_to_refresh = 0;
4213 	}
4214 
4215 	return (ENOBUFS);
4216 }
4217 
4218 /*********************************************************************
4219  *
4220  *  Free all receive rings.
4221  *
4222  **********************************************************************/
4223 static void
em_free_receive_structures(struct adapter * adapter)4224 em_free_receive_structures(struct adapter *adapter)
4225 {
4226 	struct rx_ring *rxr = adapter->rx_rings;
4227 
4228 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4229 		em_free_receive_buffers(rxr);
4230 		/* Free the ring memory as well */
4231 		em_dma_free(adapter, &rxr->rxdma);
4232 		EM_RX_LOCK_DESTROY(rxr);
4233 	}
4234 
4235 	free(adapter->rx_rings, M_DEVBUF);
4236 }
4237 
4238 
4239 /*********************************************************************
4240  *
4241  *  Free receive ring data structures
4242  *
4243  **********************************************************************/
4244 static void
em_free_receive_buffers(struct rx_ring * rxr)4245 em_free_receive_buffers(struct rx_ring *rxr)
4246 {
4247 	struct adapter		*adapter = rxr->adapter;
4248 	struct em_buffer	*rxbuf = NULL;
4249 
4250 	INIT_DEBUGOUT("free_receive_buffers: begin");
4251 
4252 	if (rxr->rx_buffers != NULL) {
4253 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4254 			rxbuf = &rxr->rx_buffers[i];
4255 			if (rxbuf->map != NULL) {
4256 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4257 				    BUS_DMASYNC_POSTREAD);
4258 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4259 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4260 			}
4261 			if (rxbuf->m_head != NULL) {
4262 				m_freem(rxbuf->m_head);
4263 				rxbuf->m_head = NULL;
4264 			}
4265 		}
4266 		free(rxr->rx_buffers, M_DEVBUF);
4267 		rxr->rx_buffers = NULL;
4268 		rxr->next_to_check = 0;
4269 		rxr->next_to_refresh = 0;
4270 	}
4271 
4272 	if (rxr->rxtag != NULL) {
4273 		bus_dma_tag_destroy(rxr->rxtag);
4274 		rxr->rxtag = NULL;
4275 	}
4276 
4277 	return;
4278 }
4279 
4280 
4281 /*********************************************************************
4282  *
4283  *  Enable receive unit.
4284  *
4285  **********************************************************************/
4286 
4287 static void
em_initialize_receive_unit(struct adapter * adapter)4288 em_initialize_receive_unit(struct adapter *adapter)
4289 {
4290 	struct rx_ring	*rxr = adapter->rx_rings;
4291 	struct ifnet	*ifp = adapter->ifp;
4292 	struct e1000_hw	*hw = &adapter->hw;
4293 	u64	bus_addr;
4294 	u32	rctl, rxcsum;
4295 
4296 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4297 
4298 	/*
4299 	 * Make sure receives are disabled while setting
4300 	 * up the descriptor ring
4301 	 */
4302 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4303 	/* Do not disable if ever enabled on this hardware */
4304 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4305 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4306 
4307 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4308 	    adapter->rx_abs_int_delay.value);
4309 	/*
4310 	 * Set the interrupt throttling rate. Value is calculated
4311 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4312 	 */
4313 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4314 
4315 	/*
4316 	** When using MSIX interrupts we need to throttle
4317 	** using the EITR register (82574 only)
4318 	*/
4319 	if (hw->mac.type == e1000_82574) {
4320 		for (int i = 0; i < 4; i++)
4321 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4322 			    DEFAULT_ITR);
4323 		/* Disable accelerated acknowledge */
4324 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4325 	}
4326 
4327 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4328 	if (ifp->if_capenable & IFCAP_RXCSUM)
4329 		rxcsum |= E1000_RXCSUM_TUOFL;
4330 	else
4331 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4332 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4333 
4334 	/*
4335 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4336 	** long latencies are observed, like Lenovo X60. This
4337 	** change eliminates the problem, but since having positive
4338 	** values in RDTR is a known source of problems on other
4339 	** platforms another solution is being sought.
4340 	*/
4341 	if (hw->mac.type == e1000_82573)
4342 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4343 
4344 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4345 		/* Setup the Base and Length of the Rx Descriptor Ring */
4346 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4347 
4348 		bus_addr = rxr->rxdma.dma_paddr;
4349 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4350 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4351 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4352 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4353 		/* Setup the Head and Tail Descriptor Pointers */
4354 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4355 #ifdef DEV_NETMAP
4356 		/*
4357 		 * an init() while a netmap client is active must
4358 		 * preserve the rx buffers passed to userspace.
4359 		 */
4360 		if (ifp->if_capenable & IFCAP_NETMAP)
4361 			rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4362 #endif /* DEV_NETMAP */
4363 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4364 	}
4365 
4366 	/* Set PTHRESH for improved jumbo performance */
4367 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4368 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4369 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4370 	    (ifp->if_mtu > ETHERMTU)) {
4371 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4372 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4373 	}
4374 
4375 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4376 		if (ifp->if_mtu > ETHERMTU)
4377 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4378 		else
4379 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4380 	}
4381 
4382 	/* Setup the Receive Control Register */
4383 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4384 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4385 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4386 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4387 
4388         /* Strip the CRC */
4389         rctl |= E1000_RCTL_SECRC;
4390 
4391         /* Make sure VLAN Filters are off */
4392         rctl &= ~E1000_RCTL_VFE;
4393 	rctl &= ~E1000_RCTL_SBP;
4394 
4395 	if (adapter->rx_mbuf_sz == MCLBYTES)
4396 		rctl |= E1000_RCTL_SZ_2048;
4397 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4398 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4399 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4400 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4401 
4402 	if (ifp->if_mtu > ETHERMTU)
4403 		rctl |= E1000_RCTL_LPE;
4404 	else
4405 		rctl &= ~E1000_RCTL_LPE;
4406 
4407 	/* Write out the settings */
4408 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4409 
4410 	return;
4411 }
4412 
4413 
4414 /*********************************************************************
4415  *
4416  *  This routine executes in interrupt context. It replenishes
4417  *  the mbufs in the descriptor and sends data which has been
4418  *  dma'ed into host memory to upper layer.
4419  *
4420  *  We loop at most count times if count is > 0, or until done if
4421  *  count < 0.
4422  *
4423  *  For polling we also now return the number of cleaned packets
4424  *********************************************************************/
4425 static bool
em_rxeof(struct rx_ring * rxr,int count,int * done)4426 em_rxeof(struct rx_ring *rxr, int count, int *done)
4427 {
4428 	struct adapter		*adapter = rxr->adapter;
4429 	struct ifnet		*ifp = adapter->ifp;
4430 	struct mbuf		*mp, *sendmp;
4431 	u8			status = 0;
4432 	u16 			len;
4433 	int			i, processed, rxdone = 0;
4434 	bool			eop;
4435 	struct e1000_rx_desc	*cur;
4436 
4437 	EM_RX_LOCK(rxr);
4438 
4439 #ifdef DEV_NETMAP
4440 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4441 		EM_RX_UNLOCK(rxr);
4442 		return (FALSE);
4443 	}
4444 #endif /* DEV_NETMAP */
4445 
4446 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4447 
4448 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4449 			break;
4450 
4451 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4452 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4453 
4454 		cur = &rxr->rx_base[i];
4455 		status = cur->status;
4456 		mp = sendmp = NULL;
4457 
4458 		if ((status & E1000_RXD_STAT_DD) == 0)
4459 			break;
4460 
4461 		len = le16toh(cur->length);
4462 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4463 
4464 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4465 		    (rxr->discard == TRUE)) {
4466 			adapter->dropped_pkts++;
4467 			++rxr->rx_discarded;
4468 			if (!eop) /* Catch subsequent segs */
4469 				rxr->discard = TRUE;
4470 			else
4471 				rxr->discard = FALSE;
4472 			em_rx_discard(rxr, i);
4473 			goto next_desc;
4474 		}
4475 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4476 
4477 		/* Assign correct length to the current fragment */
4478 		mp = rxr->rx_buffers[i].m_head;
4479 		mp->m_len = len;
4480 
4481 		/* Trigger for refresh */
4482 		rxr->rx_buffers[i].m_head = NULL;
4483 
4484 		/* First segment? */
4485 		if (rxr->fmp == NULL) {
4486 			mp->m_pkthdr.len = len;
4487 			rxr->fmp = rxr->lmp = mp;
4488 		} else {
4489 			/* Chain mbuf's together */
4490 			mp->m_flags &= ~M_PKTHDR;
4491 			rxr->lmp->m_next = mp;
4492 			rxr->lmp = mp;
4493 			rxr->fmp->m_pkthdr.len += len;
4494 		}
4495 
4496 		if (eop) {
4497 			--count;
4498 			sendmp = rxr->fmp;
4499 			sendmp->m_pkthdr.rcvif = ifp;
4500 			ifp->if_ipackets++;
4501 			em_receive_checksum(cur, sendmp);
4502 #ifndef __NO_STRICT_ALIGNMENT
4503 			if (adapter->hw.mac.max_frame_size >
4504 			    (MCLBYTES - ETHER_ALIGN) &&
4505 			    em_fixup_rx(rxr) != 0)
4506 				goto skip;
4507 #endif
4508 			if (status & E1000_RXD_STAT_VP) {
4509 				sendmp->m_pkthdr.ether_vtag =
4510 				    le16toh(cur->special);
4511 				sendmp->m_flags |= M_VLANTAG;
4512 			}
4513 #ifndef __NO_STRICT_ALIGNMENT
4514 skip:
4515 #endif
4516 			rxr->fmp = rxr->lmp = NULL;
4517 		}
4518 next_desc:
4519 		/* Zero out the receive descriptors status. */
4520 		cur->status = 0;
4521 		++rxdone;	/* cumulative for POLL */
4522 		++processed;
4523 
4524 		/* Advance our pointers to the next descriptor. */
4525 		if (++i == adapter->num_rx_desc)
4526 			i = 0;
4527 
4528 		/* Send to the stack */
4529 		if (sendmp != NULL) {
4530 			rxr->next_to_check = i;
4531 			EM_RX_UNLOCK(rxr);
4532 			(*ifp->if_input)(ifp, sendmp);
4533 			EM_RX_LOCK(rxr);
4534 			i = rxr->next_to_check;
4535 		}
4536 
4537 		/* Only refresh mbufs every 8 descriptors */
4538 		if (processed == 8) {
4539 			em_refresh_mbufs(rxr, i);
4540 			processed = 0;
4541 		}
4542 	}
4543 
4544 	/* Catch any remaining refresh work */
4545 	if (e1000_rx_unrefreshed(rxr))
4546 		em_refresh_mbufs(rxr, i);
4547 
4548 	rxr->next_to_check = i;
4549 	if (done != NULL)
4550 		*done = rxdone;
4551 	EM_RX_UNLOCK(rxr);
4552 
4553 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4554 }
4555 
4556 static __inline void
em_rx_discard(struct rx_ring * rxr,int i)4557 em_rx_discard(struct rx_ring *rxr, int i)
4558 {
4559 	struct em_buffer	*rbuf;
4560 
4561 	rbuf = &rxr->rx_buffers[i];
4562 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4563 
4564 	/* Free any previous pieces */
4565 	if (rxr->fmp != NULL) {
4566 		rxr->fmp->m_flags |= M_PKTHDR;
4567 		m_freem(rxr->fmp);
4568 		rxr->fmp = NULL;
4569 		rxr->lmp = NULL;
4570 	}
4571 	/*
4572 	** Free buffer and allow em_refresh_mbufs()
4573 	** to clean up and recharge buffer.
4574 	*/
4575 	if (rbuf->m_head) {
4576 		m_free(rbuf->m_head);
4577 		rbuf->m_head = NULL;
4578 	}
4579 	return;
4580 }
4581 
4582 #ifndef __NO_STRICT_ALIGNMENT
4583 /*
4584  * When jumbo frames are enabled we should realign entire payload on
4585  * architecures with strict alignment. This is serious design mistake of 8254x
4586  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4587  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4588  * payload. On architecures without strict alignment restrictions 8254x still
4589  * performs unaligned memory access which would reduce the performance too.
4590  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4591  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4592  * existing mbuf chain.
4593  *
4594  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4595  * not used at all on architectures with strict alignment.
4596  */
4597 static int
em_fixup_rx(struct rx_ring * rxr)4598 em_fixup_rx(struct rx_ring *rxr)
4599 {
4600 	struct adapter *adapter = rxr->adapter;
4601 	struct mbuf *m, *n;
4602 	int error;
4603 
4604 	error = 0;
4605 	m = rxr->fmp;
4606 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4607 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4608 		m->m_data += ETHER_HDR_LEN;
4609 	} else {
4610 		MGETHDR(n, M_NOWAIT, MT_DATA);
4611 		if (n != NULL) {
4612 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4613 			m->m_data += ETHER_HDR_LEN;
4614 			m->m_len -= ETHER_HDR_LEN;
4615 			n->m_len = ETHER_HDR_LEN;
4616 			M_MOVE_PKTHDR(n, m);
4617 			n->m_next = m;
4618 			rxr->fmp = n;
4619 		} else {
4620 			adapter->dropped_pkts++;
4621 			m_freem(rxr->fmp);
4622 			rxr->fmp = NULL;
4623 			error = ENOMEM;
4624 		}
4625 	}
4626 
4627 	return (error);
4628 }
4629 #endif
4630 
4631 /*********************************************************************
4632  *
4633  *  Verify that the hardware indicated that the checksum is valid.
4634  *  Inform the stack about the status of checksum so that stack
4635  *  doesn't spend time verifying the checksum.
4636  *
4637  *********************************************************************/
4638 static void
em_receive_checksum(struct e1000_rx_desc * rx_desc,struct mbuf * mp)4639 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4640 {
4641 	mp->m_pkthdr.csum_flags = 0;
4642 
4643 	/* Ignore Checksum bit is set */
4644 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4645 		return;
4646 
4647 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4648 		return;
4649 
4650 	/* IP Checksum Good? */
4651 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4652 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4653 
4654 	/* TCP or UDP checksum */
4655 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4656 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4657 		mp->m_pkthdr.csum_data = htons(0xffff);
4658 	}
4659 }
4660 
4661 /*
4662  * This routine is run via an vlan
4663  * config EVENT
4664  */
4665 static void
em_register_vlan(void * arg,struct ifnet * ifp,u16 vtag)4666 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4667 {
4668 	struct adapter	*adapter = ifp->if_softc;
4669 	u32		index, bit;
4670 
4671 	if (ifp->if_softc !=  arg)   /* Not our event */
4672 		return;
4673 
4674 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4675                 return;
4676 
4677 	EM_CORE_LOCK(adapter);
4678 	index = (vtag >> 5) & 0x7F;
4679 	bit = vtag & 0x1F;
4680 	adapter->shadow_vfta[index] |= (1 << bit);
4681 	++adapter->num_vlans;
4682 	/* Re-init to load the changes */
4683 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4684 		em_init_locked(adapter);
4685 	EM_CORE_UNLOCK(adapter);
4686 }
4687 
4688 /*
4689  * This routine is run via an vlan
4690  * unconfig EVENT
4691  */
4692 static void
em_unregister_vlan(void * arg,struct ifnet * ifp,u16 vtag)4693 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4694 {
4695 	struct adapter	*adapter = ifp->if_softc;
4696 	u32		index, bit;
4697 
4698 	if (ifp->if_softc !=  arg)
4699 		return;
4700 
4701 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4702                 return;
4703 
4704 	EM_CORE_LOCK(adapter);
4705 	index = (vtag >> 5) & 0x7F;
4706 	bit = vtag & 0x1F;
4707 	adapter->shadow_vfta[index] &= ~(1 << bit);
4708 	--adapter->num_vlans;
4709 	/* Re-init to load the changes */
4710 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4711 		em_init_locked(adapter);
4712 	EM_CORE_UNLOCK(adapter);
4713 }
4714 
4715 static void
em_setup_vlan_hw_support(struct adapter * adapter)4716 em_setup_vlan_hw_support(struct adapter *adapter)
4717 {
4718 	struct e1000_hw *hw = &adapter->hw;
4719 	u32             reg;
4720 
4721 	/*
4722 	** We get here thru init_locked, meaning
4723 	** a soft reset, this has already cleared
4724 	** the VFTA and other state, so if there
4725 	** have been no vlan's registered do nothing.
4726 	*/
4727 	if (adapter->num_vlans == 0)
4728                 return;
4729 
4730 	/*
4731 	** A soft reset zero's out the VFTA, so
4732 	** we need to repopulate it now.
4733 	*/
4734 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4735                 if (adapter->shadow_vfta[i] != 0)
4736 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4737                             i, adapter->shadow_vfta[i]);
4738 
4739 	reg = E1000_READ_REG(hw, E1000_CTRL);
4740 	reg |= E1000_CTRL_VME;
4741 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4742 
4743 	/* Enable the Filter Table */
4744 	reg = E1000_READ_REG(hw, E1000_RCTL);
4745 	reg &= ~E1000_RCTL_CFIEN;
4746 	reg |= E1000_RCTL_VFE;
4747 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4748 }
4749 
4750 static void
em_enable_intr(struct adapter * adapter)4751 em_enable_intr(struct adapter *adapter)
4752 {
4753 	struct e1000_hw *hw = &adapter->hw;
4754 	u32 ims_mask = IMS_ENABLE_MASK;
4755 
4756 	if (hw->mac.type == e1000_82574) {
4757 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4758 		ims_mask |= EM_MSIX_MASK;
4759 	}
4760 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4761 }
4762 
4763 static void
em_disable_intr(struct adapter * adapter)4764 em_disable_intr(struct adapter *adapter)
4765 {
4766 	struct e1000_hw *hw = &adapter->hw;
4767 
4768 	if (hw->mac.type == e1000_82574)
4769 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4770 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4771 }
4772 
4773 /*
4774  * Bit of a misnomer, what this really means is
4775  * to enable OS management of the system... aka
4776  * to disable special hardware management features
4777  */
4778 static void
em_init_manageability(struct adapter * adapter)4779 em_init_manageability(struct adapter *adapter)
4780 {
4781 	/* A shared code workaround */
4782 #define E1000_82542_MANC2H E1000_MANC2H
4783 	if (adapter->has_manage) {
4784 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4785 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4786 
4787 		/* disable hardware interception of ARP */
4788 		manc &= ~(E1000_MANC_ARP_EN);
4789 
4790                 /* enable receiving management packets to the host */
4791 		manc |= E1000_MANC_EN_MNG2HOST;
4792 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4793 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4794 		manc2h |= E1000_MNG2HOST_PORT_623;
4795 		manc2h |= E1000_MNG2HOST_PORT_664;
4796 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4797 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4798 	}
4799 }
4800 
4801 /*
4802  * Give control back to hardware management
4803  * controller if there is one.
4804  */
4805 static void
em_release_manageability(struct adapter * adapter)4806 em_release_manageability(struct adapter *adapter)
4807 {
4808 	if (adapter->has_manage) {
4809 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4810 
4811 		/* re-enable hardware interception of ARP */
4812 		manc |= E1000_MANC_ARP_EN;
4813 		manc &= ~E1000_MANC_EN_MNG2HOST;
4814 
4815 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4816 	}
4817 }
4818 
4819 /*
4820  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4821  * For ASF and Pass Through versions of f/w this means
4822  * that the driver is loaded. For AMT version type f/w
4823  * this means that the network i/f is open.
4824  */
4825 static void
em_get_hw_control(struct adapter * adapter)4826 em_get_hw_control(struct adapter *adapter)
4827 {
4828 	u32 ctrl_ext, swsm;
4829 
4830 	if (adapter->hw.mac.type == e1000_82573) {
4831 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4832 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4833 		    swsm | E1000_SWSM_DRV_LOAD);
4834 		return;
4835 	}
4836 	/* else */
4837 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4838 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4839 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4840 	return;
4841 }
4842 
4843 /*
4844  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4845  * For ASF and Pass Through versions of f/w this means that
4846  * the driver is no longer loaded. For AMT versions of the
4847  * f/w this means that the network i/f is closed.
4848  */
4849 static void
em_release_hw_control(struct adapter * adapter)4850 em_release_hw_control(struct adapter *adapter)
4851 {
4852 	u32 ctrl_ext, swsm;
4853 
4854 	if (!adapter->has_manage)
4855 		return;
4856 
4857 	if (adapter->hw.mac.type == e1000_82573) {
4858 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4859 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4860 		    swsm & ~E1000_SWSM_DRV_LOAD);
4861 		return;
4862 	}
4863 	/* else */
4864 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4865 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4866 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4867 	return;
4868 }
4869 
4870 static int
em_is_valid_ether_addr(u8 * addr)4871 em_is_valid_ether_addr(u8 *addr)
4872 {
4873 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4874 
4875 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4876 		return (FALSE);
4877 	}
4878 
4879 	return (TRUE);
4880 }
4881 
4882 /*
4883 ** Parse the interface capabilities with regard
4884 ** to both system management and wake-on-lan for
4885 ** later use.
4886 */
4887 static void
em_get_wakeup(device_t dev)4888 em_get_wakeup(device_t dev)
4889 {
4890 	struct adapter	*adapter = device_get_softc(dev);
4891 	u16		eeprom_data = 0, device_id, apme_mask;
4892 
4893 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4894 	apme_mask = EM_EEPROM_APME;
4895 
4896 	switch (adapter->hw.mac.type) {
4897 	case e1000_82573:
4898 	case e1000_82583:
4899 		adapter->has_amt = TRUE;
4900 		/* Falls thru */
4901 	case e1000_82571:
4902 	case e1000_82572:
4903 	case e1000_80003es2lan:
4904 		if (adapter->hw.bus.func == 1) {
4905 			e1000_read_nvm(&adapter->hw,
4906 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4907 			break;
4908 		} else
4909 			e1000_read_nvm(&adapter->hw,
4910 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4911 		break;
4912 	case e1000_ich8lan:
4913 	case e1000_ich9lan:
4914 	case e1000_ich10lan:
4915 	case e1000_pchlan:
4916 	case e1000_pch2lan:
4917 		apme_mask = E1000_WUC_APME;
4918 		adapter->has_amt = TRUE;
4919 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4920 		break;
4921 	default:
4922 		e1000_read_nvm(&adapter->hw,
4923 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4924 		break;
4925 	}
4926 	if (eeprom_data & apme_mask)
4927 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4928 	/*
4929          * We have the eeprom settings, now apply the special cases
4930          * where the eeprom may be wrong or the board won't support
4931          * wake on lan on a particular port
4932 	 */
4933 	device_id = pci_get_device(dev);
4934         switch (device_id) {
4935 	case E1000_DEV_ID_82571EB_FIBER:
4936 		/* Wake events only supported on port A for dual fiber
4937 		 * regardless of eeprom setting */
4938 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4939 		    E1000_STATUS_FUNC_1)
4940 			adapter->wol = 0;
4941 		break;
4942 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4943 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4944 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4945                 /* if quad port adapter, disable WoL on all but port A */
4946 		if (global_quad_port_a != 0)
4947 			adapter->wol = 0;
4948 		/* Reset for multiple quad port adapters */
4949 		if (++global_quad_port_a == 4)
4950 			global_quad_port_a = 0;
4951                 break;
4952 	}
4953 	return;
4954 }
4955 
4956 
4957 /*
4958  * Enable PCI Wake On Lan capability
4959  */
4960 static void
em_enable_wakeup(device_t dev)4961 em_enable_wakeup(device_t dev)
4962 {
4963 	struct adapter	*adapter = device_get_softc(dev);
4964 	struct ifnet	*ifp = adapter->ifp;
4965 	u32		pmc, ctrl, ctrl_ext, rctl;
4966 	u16     	status;
4967 
4968 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4969 		return;
4970 
4971 	/* Advertise the wakeup capability */
4972 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4973 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4974 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4975 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4976 
4977 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4978 	    (adapter->hw.mac.type == e1000_pchlan) ||
4979 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4980 	    (adapter->hw.mac.type == e1000_ich10lan))
4981 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4982 
4983 	/* Keep the laser running on Fiber adapters */
4984 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4985 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4986 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4987 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4988 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4989 	}
4990 
4991 	/*
4992 	** Determine type of Wakeup: note that wol
4993 	** is set with all bits on by default.
4994 	*/
4995 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4996 		adapter->wol &= ~E1000_WUFC_MAG;
4997 
4998 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4999 		adapter->wol &= ~E1000_WUFC_MC;
5000 	else {
5001 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5002 		rctl |= E1000_RCTL_MPE;
5003 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5004 	}
5005 
5006 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5007 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5008 		if (em_enable_phy_wakeup(adapter))
5009 			return;
5010 	} else {
5011 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5012 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5013 	}
5014 
5015 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5016 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5017 
5018         /* Request PME */
5019         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5020 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5021 	if (ifp->if_capenable & IFCAP_WOL)
5022 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5023         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5024 
5025 	return;
5026 }
5027 
5028 /*
5029 ** WOL in the newer chipset interfaces (pchlan)
5030 ** require thing to be copied into the phy
5031 */
5032 static int
em_enable_phy_wakeup(struct adapter * adapter)5033 em_enable_phy_wakeup(struct adapter *adapter)
5034 {
5035 	struct e1000_hw *hw = &adapter->hw;
5036 	u32 mreg, ret = 0;
5037 	u16 preg;
5038 
5039 	/* copy MAC RARs to PHY RARs */
5040 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5041 
5042 	/* copy MAC MTA to PHY MTA */
5043 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5044 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5045 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5046 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5047 		    (u16)((mreg >> 16) & 0xFFFF));
5048 	}
5049 
5050 	/* configure PHY Rx Control register */
5051 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5052 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5053 	if (mreg & E1000_RCTL_UPE)
5054 		preg |= BM_RCTL_UPE;
5055 	if (mreg & E1000_RCTL_MPE)
5056 		preg |= BM_RCTL_MPE;
5057 	preg &= ~(BM_RCTL_MO_MASK);
5058 	if (mreg & E1000_RCTL_MO_3)
5059 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5060 				<< BM_RCTL_MO_SHIFT);
5061 	if (mreg & E1000_RCTL_BAM)
5062 		preg |= BM_RCTL_BAM;
5063 	if (mreg & E1000_RCTL_PMCF)
5064 		preg |= BM_RCTL_PMCF;
5065 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5066 	if (mreg & E1000_CTRL_RFCE)
5067 		preg |= BM_RCTL_RFCE;
5068 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5069 
5070 	/* enable PHY wakeup in MAC register */
5071 	E1000_WRITE_REG(hw, E1000_WUC,
5072 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5073 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5074 
5075 	/* configure and enable PHY wakeup in PHY registers */
5076 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5077 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5078 
5079 	/* activate PHY wakeup */
5080 	ret = hw->phy.ops.acquire(hw);
5081 	if (ret) {
5082 		printf("Could not acquire PHY\n");
5083 		return ret;
5084 	}
5085 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5086 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5087 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5088 	if (ret) {
5089 		printf("Could not read PHY page 769\n");
5090 		goto out;
5091 	}
5092 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5093 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5094 	if (ret)
5095 		printf("Could not set PHY Host Wakeup bit\n");
5096 out:
5097 	hw->phy.ops.release(hw);
5098 
5099 	return ret;
5100 }
5101 
5102 static void
em_led_func(void * arg,int onoff)5103 em_led_func(void *arg, int onoff)
5104 {
5105 	struct adapter	*adapter = arg;
5106 
5107 	EM_CORE_LOCK(adapter);
5108 	if (onoff) {
5109 		e1000_setup_led(&adapter->hw);
5110 		e1000_led_on(&adapter->hw);
5111 	} else {
5112 		e1000_led_off(&adapter->hw);
5113 		e1000_cleanup_led(&adapter->hw);
5114 	}
5115 	EM_CORE_UNLOCK(adapter);
5116 }
5117 
5118 /*
5119 ** Disable the L0S and L1 LINK states
5120 */
5121 static void
em_disable_aspm(struct adapter * adapter)5122 em_disable_aspm(struct adapter *adapter)
5123 {
5124 	int		base, reg;
5125 	u16		link_cap,link_ctrl;
5126 	device_t	dev = adapter->dev;
5127 
5128 	switch (adapter->hw.mac.type) {
5129 		case e1000_82573:
5130 		case e1000_82574:
5131 		case e1000_82583:
5132 			break;
5133 		default:
5134 			return;
5135 	}
5136 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5137 		return;
5138 	reg = base + PCIER_LINK_CAP;
5139 	link_cap = pci_read_config(dev, reg, 2);
5140 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5141 		return;
5142 	reg = base + PCIER_LINK_CTL;
5143 	link_ctrl = pci_read_config(dev, reg, 2);
5144 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5145 	pci_write_config(dev, reg, link_ctrl, 2);
5146 	return;
5147 }
5148 
5149 /**********************************************************************
5150  *
5151  *  Update the board statistics counters.
5152  *
5153  **********************************************************************/
5154 static void
em_update_stats_counters(struct adapter * adapter)5155 em_update_stats_counters(struct adapter *adapter)
5156 {
5157 	struct ifnet   *ifp;
5158 
5159 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5160 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5161 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5162 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5163 	}
5164 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5165 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5166 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5167 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5168 
5169 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5170 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5171 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5172 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5173 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5174 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5175 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5176 	/*
5177 	** For watchdog management we need to know if we have been
5178 	** paused during the last interval, so capture that here.
5179 	*/
5180 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5181 	adapter->stats.xoffrxc += adapter->pause_frames;
5182 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5183 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5184 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5185 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5186 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5187 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5188 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5189 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5190 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5191 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5192 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5193 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5194 
5195 	/* For the 64-bit byte counters the low dword must be read first. */
5196 	/* Both registers clear on the read of the high dword */
5197 
5198 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5199 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5200 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5201 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5202 
5203 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5204 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5205 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5206 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5207 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5208 
5209 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5210 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5211 
5212 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5213 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5214 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5215 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5216 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5217 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5218 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5219 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5220 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5221 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5222 
5223 	/* Interrupt Counts */
5224 
5225 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5226 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5227 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5228 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5229 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5230 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5231 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5232 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5233 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5234 
5235 	if (adapter->hw.mac.type >= e1000_82543) {
5236 		adapter->stats.algnerrc +=
5237 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5238 		adapter->stats.rxerrc +=
5239 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5240 		adapter->stats.tncrs +=
5241 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5242 		adapter->stats.cexterr +=
5243 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5244 		adapter->stats.tsctc +=
5245 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5246 		adapter->stats.tsctfc +=
5247 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5248 	}
5249 	ifp = adapter->ifp;
5250 
5251 	ifp->if_collisions = adapter->stats.colc;
5252 
5253 	/* Rx Errors */
5254 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5255 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5256 	    adapter->stats.ruc + adapter->stats.roc +
5257 	    adapter->stats.mpc + adapter->stats.cexterr;
5258 
5259 	/* Tx Errors */
5260 	ifp->if_oerrors = adapter->stats.ecol +
5261 	    adapter->stats.latecol + adapter->watchdog_events;
5262 }
5263 
5264 /* Export a single 32-bit register via a read-only sysctl. */
5265 static int
em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)5266 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5267 {
5268 	struct adapter *adapter;
5269 	u_int val;
5270 
5271 	adapter = oidp->oid_arg1;
5272 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5273 	return (sysctl_handle_int(oidp, &val, 0, req));
5274 }
5275 
5276 /*
5277  * Add sysctl variables, one per statistic, to the system.
5278  */
5279 static void
em_add_hw_stats(struct adapter * adapter)5280 em_add_hw_stats(struct adapter *adapter)
5281 {
5282 	device_t dev = adapter->dev;
5283 
5284 	struct tx_ring *txr = adapter->tx_rings;
5285 	struct rx_ring *rxr = adapter->rx_rings;
5286 
5287 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5288 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5289 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5290 	struct e1000_hw_stats *stats = &adapter->stats;
5291 
5292 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5293 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5294 
5295 #define QUEUE_NAME_LEN 32
5296 	char namebuf[QUEUE_NAME_LEN];
5297 
5298 	/* Driver Statistics */
5299 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5300 			CTLFLAG_RD, &adapter->link_irq,
5301 			"Link MSIX IRQ Handled");
5302 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5303 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5304 			 "Std mbuf failed");
5305 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5306 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5307 			 "Std mbuf cluster failed");
5308 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5309 			CTLFLAG_RD, &adapter->dropped_pkts,
5310 			"Driver dropped packets");
5311 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5312 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5313 			"Driver tx dma failure in xmit");
5314 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5315 			CTLFLAG_RD, &adapter->rx_overruns,
5316 			"RX overruns");
5317 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5318 			CTLFLAG_RD, &adapter->watchdog_events,
5319 			"Watchdog timeouts");
5320 
5321 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5322 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5323 			em_sysctl_reg_handler, "IU",
5324 			"Device Control Register");
5325 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5326 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5327 			em_sysctl_reg_handler, "IU",
5328 			"Receiver Control Register");
5329 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5330 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5331 			"Flow Control High Watermark");
5332 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5333 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5334 			"Flow Control Low Watermark");
5335 
5336 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5337 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5338 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5339 					    CTLFLAG_RD, NULL, "Queue Name");
5340 		queue_list = SYSCTL_CHILDREN(queue_node);
5341 
5342 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5343 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5344 				E1000_TDH(txr->me),
5345 				em_sysctl_reg_handler, "IU",
5346  				"Transmit Descriptor Head");
5347 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5348 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5349 				E1000_TDT(txr->me),
5350 				em_sysctl_reg_handler, "IU",
5351  				"Transmit Descriptor Tail");
5352 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5353 				CTLFLAG_RD, &txr->tx_irq,
5354 				"Queue MSI-X Transmit Interrupts");
5355 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5356 				CTLFLAG_RD, &txr->no_desc_avail,
5357 				"Queue No Descriptor Available");
5358 
5359 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5360 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5361 				E1000_RDH(rxr->me),
5362 				em_sysctl_reg_handler, "IU",
5363 				"Receive Descriptor Head");
5364 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5365 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5366 				E1000_RDT(rxr->me),
5367 				em_sysctl_reg_handler, "IU",
5368 				"Receive Descriptor Tail");
5369 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5370 				CTLFLAG_RD, &rxr->rx_irq,
5371 				"Queue MSI-X Receive Interrupts");
5372 	}
5373 
5374 	/* MAC stats get their own sub node */
5375 
5376 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5377 				    CTLFLAG_RD, NULL, "Statistics");
5378 	stat_list = SYSCTL_CHILDREN(stat_node);
5379 
5380 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5381 			CTLFLAG_RD, &stats->ecol,
5382 			"Excessive collisions");
5383 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5384 			CTLFLAG_RD, &stats->scc,
5385 			"Single collisions");
5386 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5387 			CTLFLAG_RD, &stats->mcc,
5388 			"Multiple collisions");
5389 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5390 			CTLFLAG_RD, &stats->latecol,
5391 			"Late collisions");
5392 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5393 			CTLFLAG_RD, &stats->colc,
5394 			"Collision Count");
5395 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5396 			CTLFLAG_RD, &adapter->stats.symerrs,
5397 			"Symbol Errors");
5398 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5399 			CTLFLAG_RD, &adapter->stats.sec,
5400 			"Sequence Errors");
5401 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5402 			CTLFLAG_RD, &adapter->stats.dc,
5403 			"Defer Count");
5404 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5405 			CTLFLAG_RD, &adapter->stats.mpc,
5406 			"Missed Packets");
5407 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5408 			CTLFLAG_RD, &adapter->stats.rnbc,
5409 			"Receive No Buffers");
5410 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5411 			CTLFLAG_RD, &adapter->stats.ruc,
5412 			"Receive Undersize");
5413 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5414 			CTLFLAG_RD, &adapter->stats.rfc,
5415 			"Fragmented Packets Received ");
5416 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5417 			CTLFLAG_RD, &adapter->stats.roc,
5418 			"Oversized Packets Received");
5419 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5420 			CTLFLAG_RD, &adapter->stats.rjc,
5421 			"Recevied Jabber");
5422 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5423 			CTLFLAG_RD, &adapter->stats.rxerrc,
5424 			"Receive Errors");
5425 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5426 			CTLFLAG_RD, &adapter->stats.crcerrs,
5427 			"CRC errors");
5428 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5429 			CTLFLAG_RD, &adapter->stats.algnerrc,
5430 			"Alignment Errors");
5431 	/* On 82575 these are collision counts */
5432 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5433 			CTLFLAG_RD, &adapter->stats.cexterr,
5434 			"Collision/Carrier extension errors");
5435 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5436 			CTLFLAG_RD, &adapter->stats.xonrxc,
5437 			"XON Received");
5438 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5439 			CTLFLAG_RD, &adapter->stats.xontxc,
5440 			"XON Transmitted");
5441 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5442 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5443 			"XOFF Received");
5444 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5445 			CTLFLAG_RD, &adapter->stats.xofftxc,
5446 			"XOFF Transmitted");
5447 
5448 	/* Packet Reception Stats */
5449 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5450 			CTLFLAG_RD, &adapter->stats.tpr,
5451 			"Total Packets Received ");
5452 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5453 			CTLFLAG_RD, &adapter->stats.gprc,
5454 			"Good Packets Received");
5455 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5456 			CTLFLAG_RD, &adapter->stats.bprc,
5457 			"Broadcast Packets Received");
5458 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5459 			CTLFLAG_RD, &adapter->stats.mprc,
5460 			"Multicast Packets Received");
5461 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5462 			CTLFLAG_RD, &adapter->stats.prc64,
5463 			"64 byte frames received ");
5464 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5465 			CTLFLAG_RD, &adapter->stats.prc127,
5466 			"65-127 byte frames received");
5467 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5468 			CTLFLAG_RD, &adapter->stats.prc255,
5469 			"128-255 byte frames received");
5470 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5471 			CTLFLAG_RD, &adapter->stats.prc511,
5472 			"256-511 byte frames received");
5473 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5474 			CTLFLAG_RD, &adapter->stats.prc1023,
5475 			"512-1023 byte frames received");
5476 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5477 			CTLFLAG_RD, &adapter->stats.prc1522,
5478 			"1023-1522 byte frames received");
5479  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5480  			CTLFLAG_RD, &adapter->stats.gorc,
5481  			"Good Octets Received");
5482 
5483 	/* Packet Transmission Stats */
5484  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5485  			CTLFLAG_RD, &adapter->stats.gotc,
5486  			"Good Octets Transmitted");
5487 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5488 			CTLFLAG_RD, &adapter->stats.tpt,
5489 			"Total Packets Transmitted");
5490 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5491 			CTLFLAG_RD, &adapter->stats.gptc,
5492 			"Good Packets Transmitted");
5493 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5494 			CTLFLAG_RD, &adapter->stats.bptc,
5495 			"Broadcast Packets Transmitted");
5496 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5497 			CTLFLAG_RD, &adapter->stats.mptc,
5498 			"Multicast Packets Transmitted");
5499 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5500 			CTLFLAG_RD, &adapter->stats.ptc64,
5501 			"64 byte frames transmitted ");
5502 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5503 			CTLFLAG_RD, &adapter->stats.ptc127,
5504 			"65-127 byte frames transmitted");
5505 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5506 			CTLFLAG_RD, &adapter->stats.ptc255,
5507 			"128-255 byte frames transmitted");
5508 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5509 			CTLFLAG_RD, &adapter->stats.ptc511,
5510 			"256-511 byte frames transmitted");
5511 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5512 			CTLFLAG_RD, &adapter->stats.ptc1023,
5513 			"512-1023 byte frames transmitted");
5514 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5515 			CTLFLAG_RD, &adapter->stats.ptc1522,
5516 			"1024-1522 byte frames transmitted");
5517 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5518 			CTLFLAG_RD, &adapter->stats.tsctc,
5519 			"TSO Contexts Transmitted");
5520 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5521 			CTLFLAG_RD, &adapter->stats.tsctfc,
5522 			"TSO Contexts Failed");
5523 
5524 
5525 	/* Interrupt Stats */
5526 
5527 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5528 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5529 	int_list = SYSCTL_CHILDREN(int_node);
5530 
5531 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5532 			CTLFLAG_RD, &adapter->stats.iac,
5533 			"Interrupt Assertion Count");
5534 
5535 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5536 			CTLFLAG_RD, &adapter->stats.icrxptc,
5537 			"Interrupt Cause Rx Pkt Timer Expire Count");
5538 
5539 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5540 			CTLFLAG_RD, &adapter->stats.icrxatc,
5541 			"Interrupt Cause Rx Abs Timer Expire Count");
5542 
5543 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5544 			CTLFLAG_RD, &adapter->stats.ictxptc,
5545 			"Interrupt Cause Tx Pkt Timer Expire Count");
5546 
5547 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5548 			CTLFLAG_RD, &adapter->stats.ictxatc,
5549 			"Interrupt Cause Tx Abs Timer Expire Count");
5550 
5551 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5552 			CTLFLAG_RD, &adapter->stats.ictxqec,
5553 			"Interrupt Cause Tx Queue Empty Count");
5554 
5555 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5556 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5557 			"Interrupt Cause Tx Queue Min Thresh Count");
5558 
5559 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5560 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5561 			"Interrupt Cause Rx Desc Min Thresh Count");
5562 
5563 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5564 			CTLFLAG_RD, &adapter->stats.icrxoc,
5565 			"Interrupt Cause Receiver Overrun Count");
5566 }
5567 
5568 /**********************************************************************
5569  *
5570  *  This routine provides a way to dump out the adapter eeprom,
5571  *  often a useful debug/service tool. This only dumps the first
5572  *  32 words, stuff that matters is in that extent.
5573  *
5574  **********************************************************************/
5575 static int
em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)5576 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5577 {
5578 	struct adapter *adapter = (struct adapter *)arg1;
5579 	int error;
5580 	int result;
5581 
5582 	result = -1;
5583 	error = sysctl_handle_int(oidp, &result, 0, req);
5584 
5585 	if (error || !req->newptr)
5586 		return (error);
5587 
5588 	/*
5589 	 * This value will cause a hex dump of the
5590 	 * first 32 16-bit words of the EEPROM to
5591 	 * the screen.
5592 	 */
5593 	if (result == 1)
5594 		em_print_nvm_info(adapter);
5595 
5596 	return (error);
5597 }
5598 
5599 static void
em_print_nvm_info(struct adapter * adapter)5600 em_print_nvm_info(struct adapter *adapter)
5601 {
5602 	u16	eeprom_data;
5603 	int	i, j, row = 0;
5604 
5605 	/* Its a bit crude, but it gets the job done */
5606 	printf("\nInterface EEPROM Dump:\n");
5607 	printf("Offset\n0x0000  ");
5608 	for (i = 0, j = 0; i < 32; i++, j++) {
5609 		if (j == 8) { /* Make the offset block */
5610 			j = 0; ++row;
5611 			printf("\n0x00%x0  ",row);
5612 		}
5613 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5614 		printf("%04x ", eeprom_data);
5615 	}
5616 	printf("\n");
5617 }
5618 
5619 static int
em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)5620 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5621 {
5622 	struct em_int_delay_info *info;
5623 	struct adapter *adapter;
5624 	u32 regval;
5625 	int error, usecs, ticks;
5626 
5627 	info = (struct em_int_delay_info *)arg1;
5628 	usecs = info->value;
5629 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5630 	if (error != 0 || req->newptr == NULL)
5631 		return (error);
5632 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5633 		return (EINVAL);
5634 	info->value = usecs;
5635 	ticks = EM_USECS_TO_TICKS(usecs);
5636 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5637 		ticks *= 4;
5638 
5639 	adapter = info->adapter;
5640 
5641 	EM_CORE_LOCK(adapter);
5642 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5643 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5644 	/* Handle a few special cases. */
5645 	switch (info->offset) {
5646 	case E1000_RDTR:
5647 		break;
5648 	case E1000_TIDV:
5649 		if (ticks == 0) {
5650 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5651 			/* Don't write 0 into the TIDV register. */
5652 			regval++;
5653 		} else
5654 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5655 		break;
5656 	}
5657 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5658 	EM_CORE_UNLOCK(adapter);
5659 	return (0);
5660 }
5661 
5662 static void
em_add_int_delay_sysctl(struct adapter * adapter,const char * name,const char * description,struct em_int_delay_info * info,int offset,int value)5663 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5664 	const char *description, struct em_int_delay_info *info,
5665 	int offset, int value)
5666 {
5667 	info->adapter = adapter;
5668 	info->offset = offset;
5669 	info->value = value;
5670 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5671 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5672 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5673 	    info, 0, em_sysctl_int_delay, "I", description);
5674 }
5675 
5676 static void
em_set_sysctl_value(struct adapter * adapter,const char * name,const char * description,int * limit,int value)5677 em_set_sysctl_value(struct adapter *adapter, const char *name,
5678 	const char *description, int *limit, int value)
5679 {
5680 	*limit = value;
5681 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5682 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5683 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5684 }
5685 
5686 
5687 /*
5688 ** Set flow control using sysctl:
5689 ** Flow control values:
5690 **      0 - off
5691 **      1 - rx pause
5692 **      2 - tx pause
5693 **      3 - full
5694 */
5695 static int
em_set_flowcntl(SYSCTL_HANDLER_ARGS)5696 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5697 {
5698         int		error;
5699 	static int	input = 3; /* default is full */
5700         struct adapter	*adapter = (struct adapter *) arg1;
5701 
5702         error = sysctl_handle_int(oidp, &input, 0, req);
5703 
5704         if ((error) || (req->newptr == NULL))
5705                 return (error);
5706 
5707 	if (input == adapter->fc) /* no change? */
5708 		return (error);
5709 
5710         switch (input) {
5711                 case e1000_fc_rx_pause:
5712                 case e1000_fc_tx_pause:
5713                 case e1000_fc_full:
5714                 case e1000_fc_none:
5715                         adapter->hw.fc.requested_mode = input;
5716 			adapter->fc = input;
5717                         break;
5718                 default:
5719 			/* Do nothing */
5720 			return (error);
5721         }
5722 
5723         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5724         e1000_force_mac_fc(&adapter->hw);
5725         return (error);
5726 }
5727 
5728 /*
5729 ** Manage Energy Efficient Ethernet:
5730 ** Control values:
5731 **     0/1 - enabled/disabled
5732 */
5733 static int
em_sysctl_eee(SYSCTL_HANDLER_ARGS)5734 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5735 {
5736        struct adapter *adapter = (struct adapter *) arg1;
5737        int             error, value;
5738 
5739        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5740        error = sysctl_handle_int(oidp, &value, 0, req);
5741        if (error || req->newptr == NULL)
5742                return (error);
5743        EM_CORE_LOCK(adapter);
5744        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5745        em_init_locked(adapter);
5746        EM_CORE_UNLOCK(adapter);
5747        return (0);
5748 }
5749 
5750 static int
em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)5751 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5752 {
5753 	struct adapter *adapter;
5754 	int error;
5755 	int result;
5756 
5757 	result = -1;
5758 	error = sysctl_handle_int(oidp, &result, 0, req);
5759 
5760 	if (error || !req->newptr)
5761 		return (error);
5762 
5763 	if (result == 1) {
5764 		adapter = (struct adapter *)arg1;
5765 		em_print_debug_info(adapter);
5766         }
5767 
5768 	return (error);
5769 }
5770 
5771 /*
5772 ** This routine is meant to be fluid, add whatever is
5773 ** needed for debugging a problem.  -jfv
5774 */
5775 static void
em_print_debug_info(struct adapter * adapter)5776 em_print_debug_info(struct adapter *adapter)
5777 {
5778 	device_t dev = adapter->dev;
5779 	struct tx_ring *txr = adapter->tx_rings;
5780 	struct rx_ring *rxr = adapter->rx_rings;
5781 
5782 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5783 		printf("Interface is RUNNING ");
5784 	else
5785 		printf("Interface is NOT RUNNING\n");
5786 
5787 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5788 		printf("and INACTIVE\n");
5789 	else
5790 		printf("and ACTIVE\n");
5791 
5792 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5793 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5794 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5795 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5796 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5797 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5798 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5799 	device_printf(dev, "TX descriptors avail = %d\n",
5800 	    txr->tx_avail);
5801 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5802 	    txr->no_desc_avail);
5803 	device_printf(dev, "RX discarded packets = %ld\n",
5804 	    rxr->rx_discarded);
5805 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5806 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5807 }
5808