xref: /freebsd-11-stable/sys/dev/e1000/if_em.c (revision 5d9cce74c2636cbf3bf818723939490cb0b7a24c)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69 
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80 
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88 
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93 
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97 
98 /*********************************************************************
99  *  Driver version:
100  *********************************************************************/
101 char em_driver_version[] = "7.6.1-k";
102 
103 /*********************************************************************
104  *  PCI Device ID Table
105  *
106  *  Used by probe to select devices to load on
107  *  Last field stores an index into e1000_strings
108  *  Last entry must be all 0s
109  *
110  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111  *********************************************************************/
112 
113 static em_vendor_info_t em_vendor_info_array[] =
114 {
115 	/* Intel(R) PRO/1000 Network Connection */
116 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128 						PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130 						PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 
136 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145 						PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147 						PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
182 						PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
184 						PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
187 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
188 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
189 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
190 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
191 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
192                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
193 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
194 	{ 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
195 						PCI_ANY_ID, PCI_ANY_ID, 0},
196 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
197 						PCI_ANY_ID, PCI_ANY_ID, 0},
198 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
199 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
200 						PCI_ANY_ID, PCI_ANY_ID, 0},
201 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
202 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
203 						PCI_ANY_ID, PCI_ANY_ID, 0},
204 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
205 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
206 						PCI_ANY_ID, PCI_ANY_ID, 0},
207 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
208 	{ 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM6,
209 						PCI_ANY_ID, PCI_ANY_ID, 0},
210 	{ 0x8086, E1000_DEV_ID_PCH_CNP_I219_V6, PCI_ANY_ID, PCI_ANY_ID, 0},
211 	{ 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM7,
212 						PCI_ANY_ID, PCI_ANY_ID, 0},
213 	{ 0x8086, E1000_DEV_ID_PCH_CNP_I219_V7, PCI_ANY_ID, PCI_ANY_ID, 0},
214 	{ 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM8,
215 						PCI_ANY_ID, PCI_ANY_ID, 0},
216 	{ 0x8086, E1000_DEV_ID_PCH_ICP_I219_V8, PCI_ANY_ID, PCI_ANY_ID, 0},
217 	{ 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM9,
218 						PCI_ANY_ID, PCI_ANY_ID, 0},
219 	{ 0x8086, E1000_DEV_ID_PCH_ICP_I219_V9, PCI_ANY_ID, PCI_ANY_ID, 0},
220 	/* required last entry */
221 	{ 0, 0, 0, 0, 0}
222 };
223 
224 /*********************************************************************
225  *  Table of branding strings for all supported NICs.
226  *********************************************************************/
227 
228 static char *em_strings[] = {
229 	"Intel(R) PRO/1000 Network Connection"
230 };
231 
232 /*********************************************************************
233  *  Function prototypes
234  *********************************************************************/
235 static int	em_probe(device_t);
236 static int	em_attach(device_t);
237 static int	em_detach(device_t);
238 static int	em_shutdown(device_t);
239 static int	em_suspend(device_t);
240 static int	em_resume(device_t);
241 #ifdef EM_MULTIQUEUE
242 static int	em_mq_start(if_t, struct mbuf *);
243 static int	em_mq_start_locked(if_t,
244 		    struct tx_ring *);
245 static void	em_qflush(if_t);
246 #else
247 static void	em_start(if_t);
248 static void	em_start_locked(if_t, struct tx_ring *);
249 #endif
250 static int	em_ioctl(if_t, u_long, caddr_t);
251 static uint64_t	em_get_counter(if_t, ift_counter);
252 static void	em_init(void *);
253 static void	em_init_locked(struct adapter *);
254 static void	em_stop(void *);
255 static void	em_media_status(if_t, struct ifmediareq *);
256 static int	em_media_change(if_t);
257 static void	em_identify_hardware(struct adapter *);
258 static int	em_allocate_pci_resources(struct adapter *);
259 static int	em_allocate_legacy(struct adapter *);
260 static int	em_allocate_msix(struct adapter *);
261 static int	em_allocate_queues(struct adapter *);
262 static int	em_setup_msix(struct adapter *);
263 static void	em_free_pci_resources(struct adapter *);
264 static void	em_local_timer(void *);
265 static void	em_reset(struct adapter *);
266 static int	em_setup_interface(device_t, struct adapter *);
267 static void	em_flush_desc_rings(struct adapter *);
268 
269 static void	em_setup_transmit_structures(struct adapter *);
270 static void	em_initialize_transmit_unit(struct adapter *);
271 static int	em_allocate_transmit_buffers(struct tx_ring *);
272 static void	em_free_transmit_structures(struct adapter *);
273 static void	em_free_transmit_buffers(struct tx_ring *);
274 
275 static int	em_setup_receive_structures(struct adapter *);
276 static int	em_allocate_receive_buffers(struct rx_ring *);
277 static void	em_initialize_receive_unit(struct adapter *);
278 static void	em_free_receive_structures(struct adapter *);
279 static void	em_free_receive_buffers(struct rx_ring *);
280 
281 static void	em_enable_intr(struct adapter *);
282 static void	em_disable_intr(struct adapter *);
283 static void	em_update_stats_counters(struct adapter *);
284 static void	em_add_hw_stats(struct adapter *adapter);
285 static void	em_txeof(struct tx_ring *);
286 static bool	em_rxeof(struct rx_ring *, int, int *);
287 #ifndef __NO_STRICT_ALIGNMENT
288 static int	em_fixup_rx(struct rx_ring *);
289 #endif
290 static void	em_setup_rxdesc(union e1000_rx_desc_extended *,
291 		    const struct em_rxbuffer *rxbuf);
292 static void	em_receive_checksum(uint32_t status, struct mbuf *);
293 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
294 		    struct ip *, u32 *, u32 *);
295 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
296 		    struct tcphdr *, u32 *, u32 *);
297 static void	em_set_promisc(struct adapter *);
298 static void	em_disable_promisc(struct adapter *);
299 static void	em_set_multi(struct adapter *);
300 static void	em_update_link_status(struct adapter *);
301 static void	em_refresh_mbufs(struct rx_ring *, int);
302 static void	em_register_vlan(void *, if_t, u16);
303 static void	em_unregister_vlan(void *, if_t, u16);
304 static void	em_setup_vlan_hw_support(struct adapter *);
305 static int	em_xmit(struct tx_ring *, struct mbuf **);
306 static int	em_dma_malloc(struct adapter *, bus_size_t,
307 		    struct em_dma_alloc *, int);
308 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
309 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
310 static void	em_print_nvm_info(struct adapter *);
311 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
312 static void	em_print_debug_info(struct adapter *);
313 static int 	em_is_valid_ether_addr(u8 *);
314 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
315 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
316 		    const char *, struct em_int_delay_info *, int, int);
317 /* Management and WOL Support */
318 static void	em_init_manageability(struct adapter *);
319 static void	em_release_manageability(struct adapter *);
320 static void     em_get_hw_control(struct adapter *);
321 static void     em_release_hw_control(struct adapter *);
322 static void	em_get_wakeup(device_t);
323 static void     em_enable_wakeup(device_t);
324 static int	em_enable_phy_wakeup(struct adapter *);
325 static void	em_led_func(void *, int);
326 static void	em_disable_aspm(struct adapter *);
327 
328 static int	em_irq_fast(void *);
329 
330 /* MSIX handlers */
331 static void	em_msix_tx(void *);
332 static void	em_msix_rx(void *);
333 static void	em_msix_link(void *);
334 static void	em_handle_tx(void *context, int pending);
335 static void	em_handle_rx(void *context, int pending);
336 static void	em_handle_link(void *context, int pending);
337 
338 #ifdef EM_MULTIQUEUE
339 static void	em_enable_vectors_82574(struct adapter *);
340 #endif
341 
342 static void	em_set_sysctl_value(struct adapter *, const char *,
343 		    const char *, int *, int);
344 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
345 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
346 
347 static __inline void em_rx_discard(struct rx_ring *, int);
348 
349 #ifdef DEVICE_POLLING
350 static poll_handler_t em_poll;
351 #endif /* POLLING */
352 
353 /*********************************************************************
354  *  FreeBSD Device Interface Entry Points
355  *********************************************************************/
356 
357 static device_method_t em_methods[] = {
358 	/* Device interface */
359 	DEVMETHOD(device_probe, em_probe),
360 	DEVMETHOD(device_attach, em_attach),
361 	DEVMETHOD(device_detach, em_detach),
362 	DEVMETHOD(device_shutdown, em_shutdown),
363 	DEVMETHOD(device_suspend, em_suspend),
364 	DEVMETHOD(device_resume, em_resume),
365 	DEVMETHOD_END
366 };
367 
368 static driver_t em_driver = {
369 	"em", em_methods, sizeof(struct adapter),
370 };
371 
372 devclass_t em_devclass;
373 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
374 MODULE_DEPEND(em, pci, 1, 1, 1);
375 MODULE_DEPEND(em, ether, 1, 1, 1);
376 #ifdef DEV_NETMAP
377 MODULE_DEPEND(em, netmap, 1, 1, 1);
378 #endif /* DEV_NETMAP */
379 
380 /*********************************************************************
381  *  Tunable default values.
382  *********************************************************************/
383 
384 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
385 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
386 #define M_TSO_LEN			66
387 
388 #define MAX_INTS_PER_SEC	8000
389 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
390 
391 #define TSO_WORKAROUND	4
392 
393 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
394 
395 static int em_disable_crc_stripping = 0;
396 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
397     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
398 
399 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
400 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
401 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
402     0, "Default transmit interrupt delay in usecs");
403 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
404     0, "Default receive interrupt delay in usecs");
405 
406 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
407 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
408 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
409     &em_tx_abs_int_delay_dflt, 0,
410     "Default transmit interrupt delay limit in usecs");
411 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
412     &em_rx_abs_int_delay_dflt, 0,
413     "Default receive interrupt delay limit in usecs");
414 
415 static int em_rxd = EM_DEFAULT_RXD;
416 static int em_txd = EM_DEFAULT_TXD;
417 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
418     "Number of receive descriptors per queue");
419 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
420     "Number of transmit descriptors per queue");
421 
422 static int em_smart_pwr_down = FALSE;
423 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
424     0, "Set to true to leave smart power down enabled on newer adapters");
425 
426 /* Controls whether promiscuous also shows bad packets */
427 static int em_debug_sbp = FALSE;
428 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
429     "Show bad packets in promiscuous mode");
430 
431 static int em_enable_msix = TRUE;
432 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
433     "Enable MSI-X interrupts");
434 
435 #ifdef EM_MULTIQUEUE
436 static int em_num_queues = 1;
437 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
438     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
439 #endif
440 
441 /*
442 ** Global variable to store last used CPU when binding queues
443 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
444 ** queue is bound to a cpu.
445 */
446 static int em_last_bind_cpu = -1;
447 
448 /* How many packets rxeof tries to clean at a time */
449 static int em_rx_process_limit = 100;
450 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
451     &em_rx_process_limit, 0,
452     "Maximum number of received packets to process "
453     "at a time, -1 means unlimited");
454 
455 /* Energy efficient ethernet - default to OFF */
456 static int eee_setting = 1;
457 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
458     "Enable Energy Efficient Ethernet");
459 
460 /* Global used in WOL setup with multiport cards */
461 static int global_quad_port_a = 0;
462 
463 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
464 #include <dev/netmap/if_em_netmap.h>
465 #endif /* DEV_NETMAP */
466 
467 /*********************************************************************
468  *  Device identification routine
469  *
470  *  em_probe determines if the driver should be loaded on
471  *  adapter based on PCI vendor/device id of the adapter.
472  *
473  *  return BUS_PROBE_DEFAULT on success, positive on failure
474  *********************************************************************/
475 
476 static int
em_probe(device_t dev)477 em_probe(device_t dev)
478 {
479 	char		adapter_name[60];
480 	uint16_t	pci_vendor_id = 0;
481 	uint16_t	pci_device_id = 0;
482 	uint16_t	pci_subvendor_id = 0;
483 	uint16_t	pci_subdevice_id = 0;
484 	em_vendor_info_t *ent;
485 
486 	INIT_DEBUGOUT("em_probe: begin");
487 
488 	pci_vendor_id = pci_get_vendor(dev);
489 	if (pci_vendor_id != EM_VENDOR_ID)
490 		return (ENXIO);
491 
492 	pci_device_id = pci_get_device(dev);
493 	pci_subvendor_id = pci_get_subvendor(dev);
494 	pci_subdevice_id = pci_get_subdevice(dev);
495 
496 	ent = em_vendor_info_array;
497 	while (ent->vendor_id != 0) {
498 		if ((pci_vendor_id == ent->vendor_id) &&
499 		    (pci_device_id == ent->device_id) &&
500 
501 		    ((pci_subvendor_id == ent->subvendor_id) ||
502 		    (ent->subvendor_id == PCI_ANY_ID)) &&
503 
504 		    ((pci_subdevice_id == ent->subdevice_id) ||
505 		    (ent->subdevice_id == PCI_ANY_ID))) {
506 			sprintf(adapter_name, "%s %s",
507 				em_strings[ent->index],
508 				em_driver_version);
509 			device_set_desc_copy(dev, adapter_name);
510 			return (BUS_PROBE_DEFAULT);
511 		}
512 		ent++;
513 	}
514 
515 	return (ENXIO);
516 }
517 
518 /*********************************************************************
519  *  Device initialization routine
520  *
521  *  The attach entry point is called when the driver is being loaded.
522  *  This routine identifies the type of hardware, allocates all resources
523  *  and initializes the hardware.
524  *
525  *  return 0 on success, positive on failure
526  *********************************************************************/
527 
528 static int
em_attach(device_t dev)529 em_attach(device_t dev)
530 {
531 	struct adapter	*adapter;
532 	struct e1000_hw	*hw;
533 	int		error = 0;
534 
535 	INIT_DEBUGOUT("em_attach: begin");
536 
537 	if (resource_disabled("em", device_get_unit(dev))) {
538 		device_printf(dev, "Disabled by device hint\n");
539 		return (ENXIO);
540 	}
541 
542 	adapter = device_get_softc(dev);
543 	adapter->dev = adapter->osdep.dev = dev;
544 	hw = &adapter->hw;
545 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
546 
547 	/* SYSCTL stuff */
548 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
549 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
550 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
551 	    em_sysctl_nvm_info, "I", "NVM Information");
552 
553 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
554 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
555 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
556 	    em_sysctl_debug_info, "I", "Debug Information");
557 
558 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
559 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
560 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
561 	    em_set_flowcntl, "I", "Flow Control");
562 
563 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
564 
565 	/* Determine hardware and mac info */
566 	em_identify_hardware(adapter);
567 
568 	/* Setup PCI resources */
569 	if (em_allocate_pci_resources(adapter)) {
570 		device_printf(dev, "Allocation of PCI resources failed\n");
571 		error = ENXIO;
572 		goto err_pci;
573 	}
574 
575 	/*
576 	** For ICH8 and family we need to
577 	** map the flash memory, and this
578 	** must happen after the MAC is
579 	** identified
580 	*/
581 	if ((hw->mac.type == e1000_ich8lan) ||
582 	    (hw->mac.type == e1000_ich9lan) ||
583 	    (hw->mac.type == e1000_ich10lan) ||
584 	    (hw->mac.type == e1000_pchlan) ||
585 	    (hw->mac.type == e1000_pch2lan) ||
586 	    (hw->mac.type == e1000_pch_lpt)) {
587 		int rid = EM_BAR_TYPE_FLASH;
588 		adapter->flash = bus_alloc_resource_any(dev,
589 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
590 		if (adapter->flash == NULL) {
591 			device_printf(dev, "Mapping of Flash failed\n");
592 			error = ENXIO;
593 			goto err_pci;
594 		}
595 		/* This is used in the shared code */
596 		hw->flash_address = (u8 *)adapter->flash;
597 		adapter->osdep.flash_bus_space_tag =
598 		    rman_get_bustag(adapter->flash);
599 		adapter->osdep.flash_bus_space_handle =
600 		    rman_get_bushandle(adapter->flash);
601 	}
602 	/*
603 	** In the new SPT device flash is not  a
604 	** separate BAR, rather it is also in BAR0,
605 	** so use the same tag and an offset handle for the
606 	** FLASH read/write macros in the shared code.
607 	*/
608 	else if (hw->mac.type >= e1000_pch_spt) {
609 		adapter->osdep.flash_bus_space_tag =
610 		    adapter->osdep.mem_bus_space_tag;
611 		adapter->osdep.flash_bus_space_handle =
612 		    adapter->osdep.mem_bus_space_handle
613 		    + E1000_FLASH_BASE_ADDR;
614 	}
615 
616 	/* Do Shared Code initialization */
617 	error = e1000_setup_init_funcs(hw, TRUE);
618 	if (error) {
619 		device_printf(dev, "Setup of Shared code failed, error %d\n",
620 		    error);
621 		error = ENXIO;
622 		goto err_pci;
623 	}
624 
625 	/*
626 	 * Setup MSI/X or MSI if PCI Express
627 	 */
628 	adapter->msix = em_setup_msix(adapter);
629 
630 	e1000_get_bus_info(hw);
631 
632 	/* Set up some sysctls for the tunable interrupt delays */
633 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
634 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
635 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
636 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
637 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
638 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
639 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
640 	    "receive interrupt delay limit in usecs",
641 	    &adapter->rx_abs_int_delay,
642 	    E1000_REGISTER(hw, E1000_RADV),
643 	    em_rx_abs_int_delay_dflt);
644 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
645 	    "transmit interrupt delay limit in usecs",
646 	    &adapter->tx_abs_int_delay,
647 	    E1000_REGISTER(hw, E1000_TADV),
648 	    em_tx_abs_int_delay_dflt);
649 	em_add_int_delay_sysctl(adapter, "itr",
650 	    "interrupt delay limit in usecs/4",
651 	    &adapter->tx_itr,
652 	    E1000_REGISTER(hw, E1000_ITR),
653 	    DEFAULT_ITR);
654 
655 	/* Sysctl for limiting the amount of work done in the taskqueue */
656 	em_set_sysctl_value(adapter, "rx_processing_limit",
657 	    "max number of rx packets to process", &adapter->rx_process_limit,
658 	    em_rx_process_limit);
659 
660 	/*
661 	 * Validate number of transmit and receive descriptors. It
662 	 * must not exceed hardware maximum, and must be multiple
663 	 * of E1000_DBA_ALIGN.
664 	 */
665 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
666 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
667 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
668 		    EM_DEFAULT_TXD, em_txd);
669 		adapter->num_tx_desc = EM_DEFAULT_TXD;
670 	} else
671 		adapter->num_tx_desc = em_txd;
672 
673 	if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
674 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
675 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
676 		    EM_DEFAULT_RXD, em_rxd);
677 		adapter->num_rx_desc = EM_DEFAULT_RXD;
678 	} else
679 		adapter->num_rx_desc = em_rxd;
680 
681 	hw->mac.autoneg = DO_AUTO_NEG;
682 	hw->phy.autoneg_wait_to_complete = FALSE;
683 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
684 
685 	/* Copper options */
686 	if (hw->phy.media_type == e1000_media_type_copper) {
687 		hw->phy.mdix = AUTO_ALL_MODES;
688 		hw->phy.disable_polarity_correction = FALSE;
689 		hw->phy.ms_type = EM_MASTER_SLAVE;
690 	}
691 
692 	/*
693 	 * Set the frame limits assuming
694 	 * standard ethernet sized frames.
695 	 */
696 	adapter->hw.mac.max_frame_size =
697 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
698 
699 	/*
700 	 * This controls when hardware reports transmit completion
701 	 * status.
702 	 */
703 	hw->mac.report_tx_early = 1;
704 
705 	/*
706 	** Get queue/ring memory
707 	*/
708 	if (em_allocate_queues(adapter)) {
709 		error = ENOMEM;
710 		goto err_pci;
711 	}
712 
713 	/* Allocate multicast array memory. */
714 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
715 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
716 	if (adapter->mta == NULL) {
717 		device_printf(dev, "Can not allocate multicast setup array\n");
718 		error = ENOMEM;
719 		goto err_late;
720 	}
721 
722 	/* Check SOL/IDER usage */
723 	if (e1000_check_reset_block(hw))
724 		device_printf(dev, "PHY reset is blocked"
725 		    " due to SOL/IDER session.\n");
726 
727 	/* Sysctl for setting Energy Efficient Ethernet */
728 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
729 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
730 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
731 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
732 	    adapter, 0, em_sysctl_eee, "I",
733 	    "Disable Energy Efficient Ethernet");
734 
735 	/*
736 	** Start from a known state, this is
737 	** important in reading the nvm and
738 	** mac from that.
739 	*/
740 	e1000_reset_hw(hw);
741 
742 
743 	/* Make sure we have a good EEPROM before we read from it */
744 	if (e1000_validate_nvm_checksum(hw) < 0) {
745 		/*
746 		** Some PCI-E parts fail the first check due to
747 		** the link being in sleep state, call it again,
748 		** if it fails a second time its a real issue.
749 		*/
750 		if (e1000_validate_nvm_checksum(hw) < 0) {
751 			device_printf(dev,
752 			    "The EEPROM Checksum Is Not Valid\n");
753 			error = EIO;
754 			goto err_late;
755 		}
756 	}
757 
758 	/* Copy the permanent MAC address out of the EEPROM */
759 	if (e1000_read_mac_addr(hw) < 0) {
760 		device_printf(dev, "EEPROM read error while reading MAC"
761 		    " address\n");
762 		error = EIO;
763 		goto err_late;
764 	}
765 
766 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
767 		device_printf(dev, "Invalid MAC address\n");
768 		error = EIO;
769 		goto err_late;
770 	}
771 
772 	/* Disable ULP support */
773 	e1000_disable_ulp_lpt_lp(hw, TRUE);
774 
775 	/*
776 	**  Do interrupt configuration
777 	*/
778 	if (adapter->msix > 1) /* Do MSIX */
779 		error = em_allocate_msix(adapter);
780 	else  /* MSI or Legacy */
781 		error = em_allocate_legacy(adapter);
782 	if (error)
783 		goto err_late;
784 
785 	/*
786 	 * Get Wake-on-Lan and Management info for later use
787 	 */
788 	em_get_wakeup(dev);
789 
790 	/* Setup OS specific network interface */
791 	if (em_setup_interface(dev, adapter) != 0)
792 		goto err_late;
793 
794 	em_reset(adapter);
795 
796 	/* Initialize statistics */
797 	em_update_stats_counters(adapter);
798 
799 	hw->mac.get_link_status = 1;
800 	em_update_link_status(adapter);
801 
802 	/* Register for VLAN events */
803 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
804 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
805 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
806 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
807 
808 	em_add_hw_stats(adapter);
809 
810 	/* Non-AMT based hardware can now take control from firmware */
811 	if (adapter->has_manage && !adapter->has_amt)
812 		em_get_hw_control(adapter);
813 
814 	/* Tell the stack that the interface is not active */
815 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
816 
817 	adapter->led_dev = led_create(em_led_func, adapter,
818 	    device_get_nameunit(dev));
819 #ifdef DEV_NETMAP
820 	em_netmap_attach(adapter);
821 #endif /* DEV_NETMAP */
822 
823 	INIT_DEBUGOUT("em_attach: end");
824 
825 	return (0);
826 
827 err_late:
828 	em_free_transmit_structures(adapter);
829 	em_free_receive_structures(adapter);
830 	em_release_hw_control(adapter);
831 	if (adapter->ifp != (void *)NULL)
832 		if_free(adapter->ifp);
833 err_pci:
834 	em_free_pci_resources(adapter);
835 	free(adapter->mta, M_DEVBUF);
836 	EM_CORE_LOCK_DESTROY(adapter);
837 
838 	return (error);
839 }
840 
841 /*********************************************************************
842  *  Device removal routine
843  *
844  *  The detach entry point is called when the driver is being removed.
845  *  This routine stops the adapter and deallocates all the resources
846  *  that were allocated for driver operation.
847  *
848  *  return 0 on success, positive on failure
849  *********************************************************************/
850 
851 static int
em_detach(device_t dev)852 em_detach(device_t dev)
853 {
854 	struct adapter	*adapter = device_get_softc(dev);
855 	if_t ifp = adapter->ifp;
856 
857 	INIT_DEBUGOUT("em_detach: begin");
858 
859 	/* Make sure VLANS are not using driver */
860 	if (if_vlantrunkinuse(ifp)) {
861 		device_printf(dev,"Vlan in use, detach first\n");
862 		return (EBUSY);
863 	}
864 
865 #ifdef DEVICE_POLLING
866 	if (if_getcapenable(ifp) & IFCAP_POLLING)
867 		ether_poll_deregister(ifp);
868 #endif
869 
870 	if (adapter->led_dev != NULL)
871 		led_destroy(adapter->led_dev);
872 
873 	EM_CORE_LOCK(adapter);
874 	adapter->in_detach = 1;
875 	em_stop(adapter);
876 	EM_CORE_UNLOCK(adapter);
877 	EM_CORE_LOCK_DESTROY(adapter);
878 
879 	e1000_phy_hw_reset(&adapter->hw);
880 
881 	em_release_manageability(adapter);
882 	em_release_hw_control(adapter);
883 
884 	/* Unregister VLAN events */
885 	if (adapter->vlan_attach != NULL)
886 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
887 	if (adapter->vlan_detach != NULL)
888 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
889 
890 	ether_ifdetach(adapter->ifp);
891 	callout_drain(&adapter->timer);
892 
893 #ifdef DEV_NETMAP
894 	netmap_detach(ifp);
895 #endif /* DEV_NETMAP */
896 
897 	em_free_pci_resources(adapter);
898 	bus_generic_detach(dev);
899 	if_free(ifp);
900 
901 	em_free_transmit_structures(adapter);
902 	em_free_receive_structures(adapter);
903 
904 	em_release_hw_control(adapter);
905 	free(adapter->mta, M_DEVBUF);
906 
907 	return (0);
908 }
909 
910 /*********************************************************************
911  *
912  *  Shutdown entry point
913  *
914  **********************************************************************/
915 
916 static int
em_shutdown(device_t dev)917 em_shutdown(device_t dev)
918 {
919 	return em_suspend(dev);
920 }
921 
922 /*
923  * Suspend/resume device methods.
924  */
925 static int
em_suspend(device_t dev)926 em_suspend(device_t dev)
927 {
928 	struct adapter *adapter = device_get_softc(dev);
929 
930 	EM_CORE_LOCK(adapter);
931 
932         em_release_manageability(adapter);
933 	em_release_hw_control(adapter);
934 	em_enable_wakeup(dev);
935 
936 	EM_CORE_UNLOCK(adapter);
937 
938 	return bus_generic_suspend(dev);
939 }
940 
941 static int
em_resume(device_t dev)942 em_resume(device_t dev)
943 {
944 	struct adapter *adapter = device_get_softc(dev);
945 	struct tx_ring	*txr = adapter->tx_rings;
946 	if_t ifp = adapter->ifp;
947 
948 	EM_CORE_LOCK(adapter);
949 	if (adapter->hw.mac.type == e1000_pch2lan)
950 		e1000_resume_workarounds_pchlan(&adapter->hw);
951 	em_init_locked(adapter);
952 	em_init_manageability(adapter);
953 
954 	if ((if_getflags(ifp) & IFF_UP) &&
955 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
956 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
957 			EM_TX_LOCK(txr);
958 #ifdef EM_MULTIQUEUE
959 			if (!drbr_empty(ifp, txr->br))
960 				em_mq_start_locked(ifp, txr);
961 #else
962 			if (!if_sendq_empty(ifp))
963 				em_start_locked(ifp, txr);
964 #endif
965 			EM_TX_UNLOCK(txr);
966 		}
967 	}
968 	EM_CORE_UNLOCK(adapter);
969 
970 	return bus_generic_resume(dev);
971 }
972 
973 
974 #ifndef EM_MULTIQUEUE
975 static void
em_start_locked(if_t ifp,struct tx_ring * txr)976 em_start_locked(if_t ifp, struct tx_ring *txr)
977 {
978 	struct adapter	*adapter = if_getsoftc(ifp);
979 	struct mbuf	*m_head;
980 
981 	EM_TX_LOCK_ASSERT(txr);
982 
983 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
984 	    IFF_DRV_RUNNING)
985 		return;
986 
987 	if (!adapter->link_active)
988 		return;
989 
990 	while (!if_sendq_empty(ifp)) {
991         	/* Call cleanup if number of TX descriptors low */
992 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
993 			em_txeof(txr);
994 		if (txr->tx_avail < EM_MAX_SCATTER) {
995 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
996 			break;
997 		}
998 		m_head = if_dequeue(ifp);
999 		if (m_head == NULL)
1000 			break;
1001 		/*
1002 		 *  Encapsulation can modify our pointer, and or make it
1003 		 *  NULL on failure.  In that event, we can't requeue.
1004 		 */
1005 		if (em_xmit(txr, &m_head)) {
1006 			if (m_head == NULL)
1007 				break;
1008 			if_sendq_prepend(ifp, m_head);
1009 			break;
1010 		}
1011 
1012 		/* Mark the queue as having work */
1013 		if (txr->busy == EM_TX_IDLE)
1014 			txr->busy = EM_TX_BUSY;
1015 
1016 		/* Send a copy of the frame to the BPF listener */
1017 		ETHER_BPF_MTAP(ifp, m_head);
1018 
1019 	}
1020 
1021 	return;
1022 }
1023 
1024 static void
em_start(if_t ifp)1025 em_start(if_t ifp)
1026 {
1027 	struct adapter	*adapter = if_getsoftc(ifp);
1028 	struct tx_ring	*txr = adapter->tx_rings;
1029 
1030 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1031 		EM_TX_LOCK(txr);
1032 		em_start_locked(ifp, txr);
1033 		EM_TX_UNLOCK(txr);
1034 	}
1035 	return;
1036 }
1037 #else /* EM_MULTIQUEUE */
1038 /*********************************************************************
1039  *  Multiqueue Transmit routines
1040  *
1041  *  em_mq_start is called by the stack to initiate a transmit.
1042  *  however, if busy the driver can queue the request rather
1043  *  than do an immediate send. It is this that is an advantage
1044  *  in this driver, rather than also having multiple tx queues.
1045  **********************************************************************/
1046 /*
1047 ** Multiqueue capable stack interface
1048 */
1049 static int
em_mq_start(if_t ifp,struct mbuf * m)1050 em_mq_start(if_t ifp, struct mbuf *m)
1051 {
1052 	struct adapter	*adapter = if_getsoftc(ifp);
1053 	struct tx_ring	*txr = adapter->tx_rings;
1054 	unsigned int	i, error;
1055 
1056 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1057 		i = m->m_pkthdr.flowid % adapter->num_queues;
1058 	else
1059 		i = curcpu % adapter->num_queues;
1060 
1061 	txr = &adapter->tx_rings[i];
1062 
1063 	error = drbr_enqueue(ifp, txr->br, m);
1064 	if (error)
1065 		return (error);
1066 
1067 	if (EM_TX_TRYLOCK(txr)) {
1068 		em_mq_start_locked(ifp, txr);
1069 		EM_TX_UNLOCK(txr);
1070 	} else
1071 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1072 
1073 	return (0);
1074 }
1075 
1076 static int
em_mq_start_locked(if_t ifp,struct tx_ring * txr)1077 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1078 {
1079 	struct adapter  *adapter = txr->adapter;
1080         struct mbuf     *next;
1081         int             err = 0, enq = 0;
1082 
1083 	EM_TX_LOCK_ASSERT(txr);
1084 
1085 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1086 	    adapter->link_active == 0) {
1087 		return (ENETDOWN);
1088 	}
1089 
1090 	/* Process the queue */
1091 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1092 		if ((err = em_xmit(txr, &next)) != 0) {
1093 			if (next == NULL) {
1094 				/* It was freed, move forward */
1095 				drbr_advance(ifp, txr->br);
1096 			} else {
1097 				/*
1098 				 * Still have one left, it may not be
1099 				 * the same since the transmit function
1100 				 * may have changed it.
1101 				 */
1102 				drbr_putback(ifp, txr->br, next);
1103 			}
1104 			break;
1105 		}
1106 		drbr_advance(ifp, txr->br);
1107 		enq++;
1108 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1109 		if (next->m_flags & M_MCAST)
1110 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1111 		ETHER_BPF_MTAP(ifp, next);
1112 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1113                         break;
1114 	}
1115 
1116 	/* Mark the queue as having work */
1117 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1118 		txr->busy = EM_TX_BUSY;
1119 
1120 	if (txr->tx_avail < EM_MAX_SCATTER)
1121 		em_txeof(txr);
1122 	if (txr->tx_avail < EM_MAX_SCATTER) {
1123 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1124 	}
1125 	return (err);
1126 }
1127 
1128 /*
1129 ** Flush all ring buffers
1130 */
1131 static void
em_qflush(if_t ifp)1132 em_qflush(if_t ifp)
1133 {
1134 	struct adapter  *adapter = if_getsoftc(ifp);
1135 	struct tx_ring  *txr = adapter->tx_rings;
1136 	struct mbuf     *m;
1137 
1138 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1139 		EM_TX_LOCK(txr);
1140 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1141 			m_freem(m);
1142 		EM_TX_UNLOCK(txr);
1143 	}
1144 	if_qflush(ifp);
1145 }
1146 #endif /* EM_MULTIQUEUE */
1147 
1148 /*********************************************************************
1149  *  Ioctl entry point
1150  *
1151  *  em_ioctl is called when the user wants to configure the
1152  *  interface.
1153  *
1154  *  return 0 on success, positive on failure
1155  **********************************************************************/
1156 
1157 static int
em_ioctl(if_t ifp,u_long command,caddr_t data)1158 em_ioctl(if_t ifp, u_long command, caddr_t data)
1159 {
1160 	struct adapter	*adapter = if_getsoftc(ifp);
1161 	struct ifreq	*ifr = (struct ifreq *)data;
1162 #if defined(INET) || defined(INET6)
1163 	struct ifaddr	*ifa = (struct ifaddr *)data;
1164 #endif
1165 	bool		avoid_reset = FALSE;
1166 	int		error = 0;
1167 
1168 	if (adapter->in_detach)
1169 		return (error);
1170 
1171 	switch (command) {
1172 	case SIOCSIFADDR:
1173 #ifdef INET
1174 		if (ifa->ifa_addr->sa_family == AF_INET)
1175 			avoid_reset = TRUE;
1176 #endif
1177 #ifdef INET6
1178 		if (ifa->ifa_addr->sa_family == AF_INET6)
1179 			avoid_reset = TRUE;
1180 #endif
1181 		/*
1182 		** Calling init results in link renegotiation,
1183 		** so we avoid doing it when possible.
1184 		*/
1185 		if (avoid_reset) {
1186 			if_setflagbits(ifp,IFF_UP,0);
1187 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1188 				em_init(adapter);
1189 #ifdef INET
1190 			if (!(if_getflags(ifp) & IFF_NOARP))
1191 				arp_ifinit(ifp, ifa);
1192 #endif
1193 		} else
1194 			error = ether_ioctl(ifp, command, data);
1195 		break;
1196 	case SIOCSIFMTU:
1197 	    {
1198 		int max_frame_size;
1199 
1200 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1201 
1202 		EM_CORE_LOCK(adapter);
1203 		switch (adapter->hw.mac.type) {
1204 		case e1000_82571:
1205 		case e1000_82572:
1206 		case e1000_ich9lan:
1207 		case e1000_ich10lan:
1208 		case e1000_pch2lan:
1209 		case e1000_pch_lpt:
1210 		case e1000_pch_spt:
1211 		case e1000_pch_cnp:
1212 		case e1000_82574:
1213 		case e1000_82583:
1214 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1215 			max_frame_size = 9234;
1216 			break;
1217 		case e1000_pchlan:
1218 			max_frame_size = 4096;
1219 			break;
1220 			/* Adapters that do not support jumbo frames */
1221 		case e1000_ich8lan:
1222 			max_frame_size = ETHER_MAX_LEN;
1223 			break;
1224 		default:
1225 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1226 		}
1227 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1228 		    ETHER_CRC_LEN) {
1229 			EM_CORE_UNLOCK(adapter);
1230 			error = EINVAL;
1231 			break;
1232 		}
1233 
1234 		if_setmtu(ifp, ifr->ifr_mtu);
1235 		adapter->hw.mac.max_frame_size =
1236 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1237 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1238 			em_init_locked(adapter);
1239 		EM_CORE_UNLOCK(adapter);
1240 		break;
1241 	    }
1242 	case SIOCSIFFLAGS:
1243 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1244 		    SIOCSIFFLAGS (Set Interface Flags)");
1245 		EM_CORE_LOCK(adapter);
1246 		if (if_getflags(ifp) & IFF_UP) {
1247 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1248 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1249 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1250 					em_disable_promisc(adapter);
1251 					em_set_promisc(adapter);
1252 				}
1253 			} else
1254 				em_init_locked(adapter);
1255 		} else
1256 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1257 				em_stop(adapter);
1258 		adapter->if_flags = if_getflags(ifp);
1259 		EM_CORE_UNLOCK(adapter);
1260 		break;
1261 	case SIOCADDMULTI:
1262 	case SIOCDELMULTI:
1263 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1264 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1265 			EM_CORE_LOCK(adapter);
1266 			em_disable_intr(adapter);
1267 			em_set_multi(adapter);
1268 #ifdef DEVICE_POLLING
1269 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1270 #endif
1271 				em_enable_intr(adapter);
1272 			EM_CORE_UNLOCK(adapter);
1273 		}
1274 		break;
1275 	case SIOCSIFMEDIA:
1276 		/* Check SOL/IDER usage */
1277 		EM_CORE_LOCK(adapter);
1278 		if (e1000_check_reset_block(&adapter->hw)) {
1279 			EM_CORE_UNLOCK(adapter);
1280 			device_printf(adapter->dev, "Media change is"
1281 			    " blocked due to SOL/IDER session.\n");
1282 			break;
1283 		}
1284 		EM_CORE_UNLOCK(adapter);
1285 		/* falls thru */
1286 	case SIOCGIFMEDIA:
1287 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1288 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1289 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1290 		break;
1291 	case SIOCSIFCAP:
1292 	    {
1293 		int mask, reinit;
1294 
1295 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1296 		reinit = 0;
1297 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1298 #ifdef DEVICE_POLLING
1299 		if (mask & IFCAP_POLLING) {
1300 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1301 				error = ether_poll_register(em_poll, ifp);
1302 				if (error)
1303 					return (error);
1304 				EM_CORE_LOCK(adapter);
1305 				em_disable_intr(adapter);
1306 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1307 				EM_CORE_UNLOCK(adapter);
1308 			} else {
1309 				error = ether_poll_deregister(ifp);
1310 				/* Enable interrupt even in error case */
1311 				EM_CORE_LOCK(adapter);
1312 				em_enable_intr(adapter);
1313 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1314 				EM_CORE_UNLOCK(adapter);
1315 			}
1316 		}
1317 #endif
1318 		if (mask & IFCAP_HWCSUM) {
1319 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1320 			reinit = 1;
1321 		}
1322 		if (mask & IFCAP_TSO4) {
1323 			if_togglecapenable(ifp,IFCAP_TSO4);
1324 			reinit = 1;
1325 		}
1326 		if (mask & IFCAP_VLAN_HWTAGGING) {
1327 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1328 			reinit = 1;
1329 		}
1330 		if (mask & IFCAP_VLAN_HWFILTER) {
1331 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1332 			reinit = 1;
1333 		}
1334 		if (mask & IFCAP_VLAN_HWTSO) {
1335 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1336 			reinit = 1;
1337 		}
1338 		if ((mask & IFCAP_WOL) &&
1339 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1340 			if (mask & IFCAP_WOL_MCAST)
1341 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1342 			if (mask & IFCAP_WOL_MAGIC)
1343 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1344 		}
1345 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1346 			em_init(adapter);
1347 		if_vlancap(ifp);
1348 		break;
1349 	    }
1350 
1351 	default:
1352 		error = ether_ioctl(ifp, command, data);
1353 		break;
1354 	}
1355 
1356 	return (error);
1357 }
1358 
1359 
1360 /*********************************************************************
1361  *  Init entry point
1362  *
1363  *  This routine is used in two ways. It is used by the stack as
1364  *  init entry point in network interface structure. It is also used
1365  *  by the driver as a hw/sw initialization routine to get to a
1366  *  consistent state.
1367  *
1368  *  return 0 on success, positive on failure
1369  **********************************************************************/
1370 
1371 static void
em_init_locked(struct adapter * adapter)1372 em_init_locked(struct adapter *adapter)
1373 {
1374 	if_t ifp = adapter->ifp;
1375 	device_t	dev = adapter->dev;
1376 
1377 	INIT_DEBUGOUT("em_init: begin");
1378 
1379 	EM_CORE_LOCK_ASSERT(adapter);
1380 
1381 	em_disable_intr(adapter);
1382 	callout_stop(&adapter->timer);
1383 
1384 	/* Get the latest mac address, User can use a LAA */
1385         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1386               ETHER_ADDR_LEN);
1387 
1388 	/* Put the address into the Receive Address Array */
1389 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1390 
1391 	/*
1392 	 * With the 82571 adapter, RAR[0] may be overwritten
1393 	 * when the other port is reset, we make a duplicate
1394 	 * in RAR[14] for that eventuality, this assures
1395 	 * the interface continues to function.
1396 	 */
1397 	if (adapter->hw.mac.type == e1000_82571) {
1398 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1399 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1400 		    E1000_RAR_ENTRIES - 1);
1401 	}
1402 
1403 	/* Initialize the hardware */
1404 	em_reset(adapter);
1405 	em_update_link_status(adapter);
1406 
1407 	/* Setup VLAN support, basic and offload if available */
1408 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1409 
1410 	/* Set hardware offload abilities */
1411 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1412 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1413 	else
1414 		if_sethwassistbits(ifp, 0, CSUM_TCP | CSUM_UDP);
1415 
1416 	/* Configure for OS presence */
1417 	em_init_manageability(adapter);
1418 
1419 	/* Prepare transmit descriptors and buffers */
1420 	em_setup_transmit_structures(adapter);
1421 	em_initialize_transmit_unit(adapter);
1422 
1423 	/* Setup Multicast table */
1424 	em_set_multi(adapter);
1425 
1426 	/*
1427 	** Figure out the desired mbuf
1428 	** pool for doing jumbos
1429 	*/
1430 	if (adapter->hw.mac.max_frame_size <= 2048)
1431 		adapter->rx_mbuf_sz = MCLBYTES;
1432        else
1433                adapter->rx_mbuf_sz = MJUMPAGESIZE;
1434 
1435 	/* Prepare receive descriptors and buffers */
1436 	if (em_setup_receive_structures(adapter)) {
1437 		device_printf(dev, "Could not setup receive structures\n");
1438 		em_stop(adapter);
1439 		return;
1440 	}
1441 	em_initialize_receive_unit(adapter);
1442 
1443 	/* Use real VLAN Filter support? */
1444 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1445 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1446 			/* Use real VLAN Filter support */
1447 			em_setup_vlan_hw_support(adapter);
1448 		else {
1449 			u32 ctrl;
1450 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1451 			ctrl |= E1000_CTRL_VME;
1452 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1453 		}
1454 	} else {
1455 		u32 ctrl;
1456 		ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1457 		ctrl &= ~E1000_CTRL_VME;
1458 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1459 	}
1460 
1461 	/* Don't lose promiscuous settings */
1462 	em_set_promisc(adapter);
1463 
1464 	/* Set the interface as ACTIVE */
1465 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1466 
1467 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1468 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1469 
1470 	/* MSI/X configuration for 82574 */
1471 	if (adapter->hw.mac.type == e1000_82574) {
1472 		int tmp;
1473 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1474 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1475 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1476 		/* Set the IVAR - interrupt vector routing. */
1477 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1478 	}
1479 
1480 #ifdef DEVICE_POLLING
1481 	/*
1482 	 * Only enable interrupts if we are not polling, make sure
1483 	 * they are off otherwise.
1484 	 */
1485 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1486 		em_disable_intr(adapter);
1487 	else
1488 #endif /* DEVICE_POLLING */
1489 		em_enable_intr(adapter);
1490 
1491 	/* AMT based hardware can now take control from firmware */
1492 	if (adapter->has_manage && adapter->has_amt)
1493 		em_get_hw_control(adapter);
1494 }
1495 
1496 static void
em_init(void * arg)1497 em_init(void *arg)
1498 {
1499 	struct adapter *adapter = arg;
1500 
1501 	EM_CORE_LOCK(adapter);
1502 	em_init_locked(adapter);
1503 	EM_CORE_UNLOCK(adapter);
1504 }
1505 
1506 
1507 #ifdef DEVICE_POLLING
1508 /*********************************************************************
1509  *
1510  *  Legacy polling routine: note this only works with single queue
1511  *
1512  *********************************************************************/
1513 static int
em_poll(if_t ifp,enum poll_cmd cmd,int count)1514 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1515 {
1516 	struct adapter *adapter = if_getsoftc(ifp);
1517 	struct tx_ring	*txr = adapter->tx_rings;
1518 	struct rx_ring	*rxr = adapter->rx_rings;
1519 	u32		reg_icr;
1520 	int		rx_done;
1521 
1522 	EM_CORE_LOCK(adapter);
1523 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1524 		EM_CORE_UNLOCK(adapter);
1525 		return (0);
1526 	}
1527 
1528 	if (cmd == POLL_AND_CHECK_STATUS) {
1529 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1530 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1531 			callout_stop(&adapter->timer);
1532 			adapter->hw.mac.get_link_status = 1;
1533 			em_update_link_status(adapter);
1534 			callout_reset(&adapter->timer, hz,
1535 			    em_local_timer, adapter);
1536 		}
1537 	}
1538 	EM_CORE_UNLOCK(adapter);
1539 
1540 	em_rxeof(rxr, count, &rx_done);
1541 
1542 	EM_TX_LOCK(txr);
1543 	em_txeof(txr);
1544 #ifdef EM_MULTIQUEUE
1545 	if (!drbr_empty(ifp, txr->br))
1546 		em_mq_start_locked(ifp, txr);
1547 #else
1548 	if (!if_sendq_empty(ifp))
1549 		em_start_locked(ifp, txr);
1550 #endif
1551 	EM_TX_UNLOCK(txr);
1552 
1553 	return (rx_done);
1554 }
1555 #endif /* DEVICE_POLLING */
1556 
1557 
1558 /*********************************************************************
1559  *
1560  *  Fast Legacy/MSI Combined Interrupt Service routine
1561  *
1562  *********************************************************************/
1563 static int
em_irq_fast(void * arg)1564 em_irq_fast(void *arg)
1565 {
1566 	struct adapter	*adapter = arg;
1567 	if_t ifp;
1568 	u32		reg_icr;
1569 
1570 	ifp = adapter->ifp;
1571 
1572 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1573 
1574 	/* Hot eject?  */
1575 	if (reg_icr == 0xffffffff)
1576 		return FILTER_STRAY;
1577 
1578 	/* Definitely not our interrupt.  */
1579 	if (reg_icr == 0x0)
1580 		return FILTER_STRAY;
1581 
1582 	/*
1583 	 * Starting with the 82571 chip, bit 31 should be used to
1584 	 * determine whether the interrupt belongs to us.
1585 	 */
1586 	if (adapter->hw.mac.type >= e1000_82571 &&
1587 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1588 		return FILTER_STRAY;
1589 
1590 	em_disable_intr(adapter);
1591 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1592 
1593 	/* Link status change */
1594 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1595 		adapter->hw.mac.get_link_status = 1;
1596 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1597 	}
1598 
1599 	if (reg_icr & E1000_ICR_RXO)
1600 		adapter->rx_overruns++;
1601 	return FILTER_HANDLED;
1602 }
1603 
1604 /* Combined RX/TX handler, used by Legacy and MSI */
1605 static void
em_handle_que(void * context,int pending)1606 em_handle_que(void *context, int pending)
1607 {
1608 	struct adapter	*adapter = context;
1609 	if_t ifp = adapter->ifp;
1610 	struct tx_ring	*txr = adapter->tx_rings;
1611 	struct rx_ring	*rxr = adapter->rx_rings;
1612 
1613 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1614 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1615 
1616 		EM_TX_LOCK(txr);
1617 		em_txeof(txr);
1618 #ifdef EM_MULTIQUEUE
1619 		if (!drbr_empty(ifp, txr->br))
1620 			em_mq_start_locked(ifp, txr);
1621 #else
1622 		if (!if_sendq_empty(ifp))
1623 			em_start_locked(ifp, txr);
1624 #endif
1625 		EM_TX_UNLOCK(txr);
1626 		if (more) {
1627 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1628 			return;
1629 		}
1630 	}
1631 
1632 	em_enable_intr(adapter);
1633 	return;
1634 }
1635 
1636 
1637 /*********************************************************************
1638  *
1639  *  MSIX Interrupt Service Routines
1640  *
1641  **********************************************************************/
1642 static void
em_msix_tx(void * arg)1643 em_msix_tx(void *arg)
1644 {
1645 	struct tx_ring *txr = arg;
1646 	struct adapter *adapter = txr->adapter;
1647 	if_t ifp = adapter->ifp;
1648 
1649 	++txr->tx_irq;
1650 	EM_TX_LOCK(txr);
1651 	em_txeof(txr);
1652 #ifdef EM_MULTIQUEUE
1653 	if (!drbr_empty(ifp, txr->br))
1654 		em_mq_start_locked(ifp, txr);
1655 #else
1656 	if (!if_sendq_empty(ifp))
1657 		em_start_locked(ifp, txr);
1658 #endif
1659 
1660 	/* Reenable this interrupt */
1661 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1662 	EM_TX_UNLOCK(txr);
1663 	return;
1664 }
1665 
1666 /*********************************************************************
1667  *
1668  *  MSIX RX Interrupt Service routine
1669  *
1670  **********************************************************************/
1671 
1672 static void
em_msix_rx(void * arg)1673 em_msix_rx(void *arg)
1674 {
1675 	struct rx_ring	*rxr = arg;
1676 	struct adapter	*adapter = rxr->adapter;
1677 	bool		more;
1678 
1679 	++rxr->rx_irq;
1680 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1681 		return;
1682 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1683 	if (more)
1684 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1685 	else {
1686 		/* Reenable this interrupt */
1687 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1688 	}
1689 	return;
1690 }
1691 
1692 /*********************************************************************
1693  *
1694  *  MSIX Link Fast Interrupt Service routine
1695  *
1696  **********************************************************************/
1697 static void
em_msix_link(void * arg)1698 em_msix_link(void *arg)
1699 {
1700 	struct adapter	*adapter = arg;
1701 	u32		reg_icr;
1702 
1703 	++adapter->link_irq;
1704 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1705 
1706 	if (reg_icr & E1000_ICR_RXO)
1707 		adapter->rx_overruns++;
1708 
1709 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1710 		adapter->hw.mac.get_link_status = 1;
1711 		em_handle_link(adapter, 0);
1712 	} else
1713 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1714 		    EM_MSIX_LINK | E1000_IMS_LSC);
1715 	/*
1716  	** Because we must read the ICR for this interrupt
1717  	** it may clear other causes using autoclear, for
1718  	** this reason we simply create a soft interrupt
1719  	** for all these vectors.
1720  	*/
1721 	if (reg_icr) {
1722 		E1000_WRITE_REG(&adapter->hw,
1723 			E1000_ICS, adapter->ims);
1724 	}
1725 	return;
1726 }
1727 
1728 static void
em_handle_rx(void * context,int pending)1729 em_handle_rx(void *context, int pending)
1730 {
1731 	struct rx_ring	*rxr = context;
1732 	struct adapter	*adapter = rxr->adapter;
1733         bool            more;
1734 
1735 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1736 	if (more)
1737 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1738 	else {
1739 		/* Reenable this interrupt */
1740 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1741 	}
1742 }
1743 
1744 static void
em_handle_tx(void * context,int pending)1745 em_handle_tx(void *context, int pending)
1746 {
1747 	struct tx_ring	*txr = context;
1748 	struct adapter	*adapter = txr->adapter;
1749 	if_t ifp = adapter->ifp;
1750 
1751 	EM_TX_LOCK(txr);
1752 	em_txeof(txr);
1753 #ifdef EM_MULTIQUEUE
1754 	if (!drbr_empty(ifp, txr->br))
1755 		em_mq_start_locked(ifp, txr);
1756 #else
1757 	if (!if_sendq_empty(ifp))
1758 		em_start_locked(ifp, txr);
1759 #endif
1760 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1761 	EM_TX_UNLOCK(txr);
1762 }
1763 
1764 static void
em_handle_link(void * context,int pending)1765 em_handle_link(void *context, int pending)
1766 {
1767 	struct adapter	*adapter = context;
1768 	struct e1000_hw *hw = &adapter->hw;
1769 	struct tx_ring	*txr = adapter->tx_rings;
1770 	if_t ifp = adapter->ifp;
1771 
1772 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1773 		return;
1774 
1775 	EM_CORE_LOCK(adapter);
1776 	callout_stop(&adapter->timer);
1777 	em_update_link_status(adapter);
1778 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1779 	if (hw->mac.type == e1000_82574 && adapter->msix_mem != NULL)
1780 		E1000_WRITE_REG(hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC);
1781 	if (adapter->link_active) {
1782 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1783 			EM_TX_LOCK(txr);
1784 #ifdef EM_MULTIQUEUE
1785 			if (!drbr_empty(ifp, txr->br))
1786 				em_mq_start_locked(ifp, txr);
1787 #else
1788 			if (if_sendq_empty(ifp))
1789 				em_start_locked(ifp, txr);
1790 #endif
1791 			EM_TX_UNLOCK(txr);
1792 		}
1793 	}
1794 	EM_CORE_UNLOCK(adapter);
1795 }
1796 
1797 
1798 /*********************************************************************
1799  *
1800  *  Media Ioctl callback
1801  *
1802  *  This routine is called whenever the user queries the status of
1803  *  the interface using ifconfig.
1804  *
1805  **********************************************************************/
1806 static void
em_media_status(if_t ifp,struct ifmediareq * ifmr)1807 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1808 {
1809 	struct adapter *adapter = if_getsoftc(ifp);
1810 	u_char fiber_type = IFM_1000_SX;
1811 
1812 	INIT_DEBUGOUT("em_media_status: begin");
1813 
1814 	EM_CORE_LOCK(adapter);
1815 	em_update_link_status(adapter);
1816 
1817 	ifmr->ifm_status = IFM_AVALID;
1818 	ifmr->ifm_active = IFM_ETHER;
1819 
1820 	if (!adapter->link_active) {
1821 		EM_CORE_UNLOCK(adapter);
1822 		return;
1823 	}
1824 
1825 	ifmr->ifm_status |= IFM_ACTIVE;
1826 
1827 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1828 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1829 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1830 	} else {
1831 		switch (adapter->link_speed) {
1832 		case 10:
1833 			ifmr->ifm_active |= IFM_10_T;
1834 			break;
1835 		case 100:
1836 			ifmr->ifm_active |= IFM_100_TX;
1837 			break;
1838 		case 1000:
1839 			ifmr->ifm_active |= IFM_1000_T;
1840 			break;
1841 		}
1842 		if (adapter->link_duplex == FULL_DUPLEX)
1843 			ifmr->ifm_active |= IFM_FDX;
1844 		else
1845 			ifmr->ifm_active |= IFM_HDX;
1846 	}
1847 	EM_CORE_UNLOCK(adapter);
1848 }
1849 
1850 /*********************************************************************
1851  *
1852  *  Media Ioctl callback
1853  *
1854  *  This routine is called when the user changes speed/duplex using
1855  *  media/mediopt option with ifconfig.
1856  *
1857  **********************************************************************/
1858 static int
em_media_change(if_t ifp)1859 em_media_change(if_t ifp)
1860 {
1861 	struct adapter *adapter = if_getsoftc(ifp);
1862 	struct ifmedia  *ifm = &adapter->media;
1863 
1864 	INIT_DEBUGOUT("em_media_change: begin");
1865 
1866 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1867 		return (EINVAL);
1868 
1869 	EM_CORE_LOCK(adapter);
1870 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1871 	case IFM_AUTO:
1872 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1873 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1874 		break;
1875 	case IFM_1000_LX:
1876 	case IFM_1000_SX:
1877 	case IFM_1000_T:
1878 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1879 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1880 		break;
1881 	case IFM_100_TX:
1882 		adapter->hw.mac.autoneg = FALSE;
1883 		adapter->hw.phy.autoneg_advertised = 0;
1884 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1885 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1886 		else
1887 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1888 		break;
1889 	case IFM_10_T:
1890 		adapter->hw.mac.autoneg = FALSE;
1891 		adapter->hw.phy.autoneg_advertised = 0;
1892 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1893 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1894 		else
1895 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1896 		break;
1897 	default:
1898 		device_printf(adapter->dev, "Unsupported media type\n");
1899 	}
1900 
1901 	em_init_locked(adapter);
1902 	EM_CORE_UNLOCK(adapter);
1903 
1904 	return (0);
1905 }
1906 
1907 /*********************************************************************
1908  *
1909  *  This routine maps the mbufs to tx descriptors.
1910  *
1911  *  return 0 on success, positive on failure
1912  **********************************************************************/
1913 
1914 static int
em_xmit(struct tx_ring * txr,struct mbuf ** m_headp)1915 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1916 {
1917 	struct adapter		*adapter = txr->adapter;
1918 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1919 	bus_dmamap_t		map;
1920 	struct em_txbuffer	*tx_buffer, *tx_buffer_mapped;
1921 	struct e1000_tx_desc	*ctxd = NULL;
1922 	struct mbuf		*m_head;
1923 	struct ether_header	*eh;
1924 	struct ip		*ip = NULL;
1925 	struct tcphdr		*tp = NULL;
1926 	u32			txd_upper = 0, txd_lower = 0;
1927 	int			ip_off, poff;
1928 	int			nsegs, i, j, first, last = 0;
1929 	int			error;
1930 	bool			do_tso, tso_desc, remap = TRUE;
1931 
1932 	m_head = *m_headp;
1933 	do_tso = m_head->m_pkthdr.csum_flags & CSUM_IP_TSO;
1934 	tso_desc = FALSE;
1935 	ip_off = poff = 0;
1936 
1937 	/*
1938 	 * Intel recommends entire IP/TCP header length reside in a single
1939 	 * buffer. If multiple descriptors are used to describe the IP and
1940 	 * TCP header, each descriptor should describe one or more
1941 	 * complete headers; descriptors referencing only parts of headers
1942 	 * are not supported. If all layer headers are not coalesced into
1943 	 * a single buffer, each buffer should not cross a 4KB boundary,
1944 	 * or be larger than the maximum read request size.
1945 	 * Controller also requires modifing IP/TCP header to make TSO work
1946 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1947 	 * IP/TCP header into a single buffer to meet the requirement of
1948 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1949 	 * which also has similar restrictions.
1950 	 */
1951 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1952 		if (do_tso || (m_head->m_next != NULL &&
1953 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1954 			if (M_WRITABLE(*m_headp) == 0) {
1955 				m_head = m_dup(*m_headp, M_NOWAIT);
1956 				m_freem(*m_headp);
1957 				if (m_head == NULL) {
1958 					*m_headp = NULL;
1959 					return (ENOBUFS);
1960 				}
1961 				*m_headp = m_head;
1962 			}
1963 		}
1964 		/*
1965 		 * XXX
1966 		 * Assume IPv4, we don't have TSO/checksum offload support
1967 		 * for IPv6 yet.
1968 		 */
1969 		ip_off = sizeof(struct ether_header);
1970 		if (m_head->m_len < ip_off) {
1971 			m_head = m_pullup(m_head, ip_off);
1972 			if (m_head == NULL) {
1973 				*m_headp = NULL;
1974 				return (ENOBUFS);
1975 			}
1976 		}
1977 		eh = mtod(m_head, struct ether_header *);
1978 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1979 			ip_off = sizeof(struct ether_vlan_header);
1980 			if (m_head->m_len < ip_off) {
1981 				m_head = m_pullup(m_head, ip_off);
1982 				if (m_head == NULL) {
1983 					*m_headp = NULL;
1984 					return (ENOBUFS);
1985 				}
1986 			}
1987 		}
1988 		if (m_head->m_len < ip_off + sizeof(struct ip)) {
1989 			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1990 			if (m_head == NULL) {
1991 				*m_headp = NULL;
1992 				return (ENOBUFS);
1993 			}
1994 		}
1995 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1996 		poff = ip_off + (ip->ip_hl << 2);
1997 
1998 		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1999 			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
2000 				m_head = m_pullup(m_head, poff +
2001 				    sizeof(struct tcphdr));
2002 				if (m_head == NULL) {
2003 					*m_headp = NULL;
2004 					return (ENOBUFS);
2005 				}
2006 			}
2007 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2008 			/*
2009 			 * TSO workaround:
2010 			 *   pull 4 more bytes of data into it.
2011 			 */
2012 			if (m_head->m_len < poff + (tp->th_off << 2)) {
2013 				m_head = m_pullup(m_head, poff +
2014 				                 (tp->th_off << 2) +
2015 				                 TSO_WORKAROUND);
2016 				if (m_head == NULL) {
2017 					*m_headp = NULL;
2018 					return (ENOBUFS);
2019 				}
2020 			}
2021 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2022 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2023 			if (do_tso) {
2024 				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2025 				                  (ip->ip_hl << 2) +
2026 				                  (tp->th_off << 2));
2027 				ip->ip_sum = 0;
2028 				/*
2029 				 * The pseudo TCP checksum does not include TCP
2030 				 * payload length so driver should recompute
2031 				 * the checksum here what hardware expect to
2032 				 * see. This is adherence of Microsoft's Large
2033 				 * Send specification.
2034 			 	*/
2035 				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2036 				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2037 			}
2038 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2039 			if (m_head->m_len < poff + sizeof(struct udphdr)) {
2040 				m_head = m_pullup(m_head, poff +
2041 				    sizeof(struct udphdr));
2042 				if (m_head == NULL) {
2043 					*m_headp = NULL;
2044 					return (ENOBUFS);
2045 				}
2046 			}
2047 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2048 		}
2049 		*m_headp = m_head;
2050 	}
2051 
2052 	/*
2053 	 * Map the packet for DMA
2054 	 *
2055 	 * Capture the first descriptor index,
2056 	 * this descriptor will have the index
2057 	 * of the EOP which is the only one that
2058 	 * now gets a DONE bit writeback.
2059 	 */
2060 	first = txr->next_avail_desc;
2061 	tx_buffer = &txr->tx_buffers[first];
2062 	tx_buffer_mapped = tx_buffer;
2063 	map = tx_buffer->map;
2064 
2065 retry:
2066 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2067 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2068 
2069 	/*
2070 	 * There are two types of errors we can (try) to handle:
2071 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2072 	 *   out of segments.  Defragment the mbuf chain and try again.
2073 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2074 	 *   at this point in time.  Defer sending and try again later.
2075 	 * All other errors, in particular EINVAL, are fatal and prevent the
2076 	 * mbuf chain from ever going through.  Drop it and report error.
2077 	 */
2078 	if (error == EFBIG && remap) {
2079 		struct mbuf *m;
2080 
2081 		m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2082 		if (m == NULL) {
2083 			adapter->mbuf_defrag_failed++;
2084 			m_freem(*m_headp);
2085 			*m_headp = NULL;
2086 			return (ENOBUFS);
2087 		}
2088 		*m_headp = m;
2089 
2090 		/* Try it again, but only once */
2091 		remap = FALSE;
2092 		goto retry;
2093 	} else if (error != 0) {
2094 		adapter->no_tx_dma_setup++;
2095 		m_freem(*m_headp);
2096 		*m_headp = NULL;
2097 		return (error);
2098 	}
2099 
2100 	/*
2101 	 * TSO Hardware workaround, if this packet is not
2102 	 * TSO, and is only a single descriptor long, and
2103 	 * it follows a TSO burst, then we need to add a
2104 	 * sentinel descriptor to prevent premature writeback.
2105 	 */
2106 	if ((!do_tso) && (txr->tx_tso == TRUE)) {
2107 		if (nsegs == 1)
2108 			tso_desc = TRUE;
2109 		txr->tx_tso = FALSE;
2110 	}
2111 
2112         if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2113                 txr->no_desc_avail++;
2114 		bus_dmamap_unload(txr->txtag, map);
2115 		return (ENOBUFS);
2116         }
2117 	m_head = *m_headp;
2118 
2119 	/* Do hardware assists */
2120 	if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
2121 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2122 		    &txd_upper, &txd_lower);
2123 		/* we need to make a final sentinel transmit desc */
2124 		tso_desc = TRUE;
2125 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2126 		em_transmit_checksum_setup(txr, m_head,
2127 		    ip_off, ip, &txd_upper, &txd_lower);
2128 
2129 	if (m_head->m_flags & M_VLANTAG) {
2130 		/* Set the vlan id. */
2131 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2132                 /* Tell hardware to add tag */
2133                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2134         }
2135 
2136 	i = txr->next_avail_desc;
2137 
2138 	/* Set up our transmit descriptors */
2139 	for (j = 0; j < nsegs; j++) {
2140 		bus_size_t seg_len;
2141 		bus_addr_t seg_addr;
2142 
2143 		tx_buffer = &txr->tx_buffers[i];
2144 		ctxd = &txr->tx_base[i];
2145 		seg_addr = segs[j].ds_addr;
2146 		seg_len  = segs[j].ds_len;
2147 		/*
2148 		** TSO Workaround:
2149 		** If this is the last descriptor, we want to
2150 		** split it so we have a small final sentinel
2151 		*/
2152 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2153 			seg_len -= TSO_WORKAROUND;
2154 			ctxd->buffer_addr = htole64(seg_addr);
2155 			ctxd->lower.data = htole32(
2156 				adapter->txd_cmd | txd_lower | seg_len);
2157 			ctxd->upper.data = htole32(txd_upper);
2158 			if (++i == adapter->num_tx_desc)
2159 				i = 0;
2160 
2161 			/* Now make the sentinel */
2162 			txr->tx_avail--;
2163 			ctxd = &txr->tx_base[i];
2164 			tx_buffer = &txr->tx_buffers[i];
2165 			ctxd->buffer_addr =
2166 			    htole64(seg_addr + seg_len);
2167 			ctxd->lower.data = htole32(
2168 			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2169 			ctxd->upper.data =
2170 			    htole32(txd_upper);
2171 			last = i;
2172 			if (++i == adapter->num_tx_desc)
2173 				i = 0;
2174 		} else {
2175 			ctxd->buffer_addr = htole64(seg_addr);
2176 			ctxd->lower.data = htole32(
2177 			adapter->txd_cmd | txd_lower | seg_len);
2178 			ctxd->upper.data = htole32(txd_upper);
2179 			last = i;
2180 			if (++i == adapter->num_tx_desc)
2181 				i = 0;
2182 		}
2183 		tx_buffer->m_head = NULL;
2184 		tx_buffer->next_eop = -1;
2185 	}
2186 
2187 	txr->next_avail_desc = i;
2188 	txr->tx_avail -= nsegs;
2189 
2190         tx_buffer->m_head = m_head;
2191 	/*
2192 	** Here we swap the map so the last descriptor,
2193 	** which gets the completion interrupt has the
2194 	** real map, and the first descriptor gets the
2195 	** unused map from this descriptor.
2196 	*/
2197 	tx_buffer_mapped->map = tx_buffer->map;
2198 	tx_buffer->map = map;
2199         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2200 
2201         /*
2202          * Last Descriptor of Packet
2203 	 * needs End Of Packet (EOP)
2204 	 * and Report Status (RS)
2205          */
2206         ctxd->lower.data |=
2207 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2208 	/*
2209 	 * Keep track in the first buffer which
2210 	 * descriptor will be written back
2211 	 */
2212 	tx_buffer = &txr->tx_buffers[first];
2213 	tx_buffer->next_eop = last;
2214 
2215 	/*
2216 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2217 	 * that this frame is available to transmit.
2218 	 */
2219 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2220 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2221 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2222 
2223 	return (0);
2224 }
2225 
2226 static void
em_set_promisc(struct adapter * adapter)2227 em_set_promisc(struct adapter *adapter)
2228 {
2229 	if_t ifp = adapter->ifp;
2230 	u32		reg_rctl;
2231 
2232 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2233 
2234 	if (if_getflags(ifp) & IFF_PROMISC) {
2235 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2236 		/* Turn this on if you want to see bad packets */
2237 		if (em_debug_sbp)
2238 			reg_rctl |= E1000_RCTL_SBP;
2239 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2240 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2241 		reg_rctl |= E1000_RCTL_MPE;
2242 		reg_rctl &= ~E1000_RCTL_UPE;
2243 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2244 	}
2245 }
2246 
2247 static void
em_disable_promisc(struct adapter * adapter)2248 em_disable_promisc(struct adapter *adapter)
2249 {
2250 	if_t		ifp = adapter->ifp;
2251 	u32		reg_rctl;
2252 	int		mcnt = 0;
2253 
2254 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2255 	reg_rctl &=  (~E1000_RCTL_UPE);
2256 	if (if_getflags(ifp) & IFF_ALLMULTI)
2257 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2258 	else
2259 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2260 	/* Don't disable if in MAX groups */
2261 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2262 		reg_rctl &=  (~E1000_RCTL_MPE);
2263 	reg_rctl &=  (~E1000_RCTL_SBP);
2264 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2265 }
2266 
2267 
2268 /*********************************************************************
2269  *  Multicast Update
2270  *
2271  *  This routine is called whenever multicast address list is updated.
2272  *
2273  **********************************************************************/
2274 
2275 static void
em_set_multi(struct adapter * adapter)2276 em_set_multi(struct adapter *adapter)
2277 {
2278 	if_t ifp = adapter->ifp;
2279 	u32 reg_rctl = 0;
2280 	u8  *mta; /* Multicast array memory */
2281 	int mcnt = 0;
2282 
2283 	IOCTL_DEBUGOUT("em_set_multi: begin");
2284 
2285 	mta = adapter->mta;
2286 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2287 
2288 	if (adapter->hw.mac.type == e1000_82542 &&
2289 	    adapter->hw.revision_id == E1000_REVISION_2) {
2290 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2291 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2292 			e1000_pci_clear_mwi(&adapter->hw);
2293 		reg_rctl |= E1000_RCTL_RST;
2294 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2295 		msec_delay(5);
2296 	}
2297 
2298 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2299 
2300 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2301 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2302 		reg_rctl |= E1000_RCTL_MPE;
2303 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2304 	} else
2305 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2306 
2307 	if (adapter->hw.mac.type == e1000_82542 &&
2308 	    adapter->hw.revision_id == E1000_REVISION_2) {
2309 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2310 		reg_rctl &= ~E1000_RCTL_RST;
2311 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2312 		msec_delay(5);
2313 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2314 			e1000_pci_set_mwi(&adapter->hw);
2315 	}
2316 }
2317 
2318 
2319 /*********************************************************************
2320  *  Timer routine
2321  *
2322  *  This routine checks for link status and updates statistics.
2323  *
2324  **********************************************************************/
2325 
2326 static void
em_local_timer(void * arg)2327 em_local_timer(void *arg)
2328 {
2329 	struct adapter	*adapter = arg;
2330 	if_t ifp = adapter->ifp;
2331 	struct tx_ring	*txr = adapter->tx_rings;
2332 	struct rx_ring	*rxr = adapter->rx_rings;
2333 	u32		trigger = 0;
2334 
2335 	EM_CORE_LOCK_ASSERT(adapter);
2336 
2337 	em_update_link_status(adapter);
2338 	em_update_stats_counters(adapter);
2339 
2340 	/* Reset LAA into RAR[0] on 82571 */
2341 	if ((adapter->hw.mac.type == e1000_82571) &&
2342 	    e1000_get_laa_state_82571(&adapter->hw))
2343 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2344 
2345 	/* Mask to use in the irq trigger */
2346 	if (adapter->msix_mem) {
2347 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2348 			trigger |= rxr->ims;
2349 		rxr = adapter->rx_rings;
2350 	} else
2351 		trigger = E1000_ICS_RXDMT0;
2352 
2353 	/*
2354 	** Check on the state of the TX queue(s), this
2355 	** can be done without the lock because its RO
2356 	** and the HUNG state will be static if set.
2357 	*/
2358 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2359 		if (txr->busy == EM_TX_HUNG)
2360 			goto hung;
2361 		if (txr->busy >= EM_TX_MAXTRIES)
2362 			txr->busy = EM_TX_HUNG;
2363 		/* Schedule a TX tasklet if needed */
2364 		if (txr->tx_avail <= EM_MAX_SCATTER)
2365 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2366 	}
2367 
2368 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2369 #ifndef DEVICE_POLLING
2370 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2371 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2372 #endif
2373 	return;
2374 hung:
2375 	/* Looks like we're hung */
2376 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2377 			txr->me);
2378 	em_print_debug_info(adapter);
2379 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2380 	adapter->watchdog_events++;
2381 	em_init_locked(adapter);
2382 }
2383 
2384 
2385 static void
em_update_link_status(struct adapter * adapter)2386 em_update_link_status(struct adapter *adapter)
2387 {
2388 	struct e1000_hw *hw = &adapter->hw;
2389 	if_t ifp = adapter->ifp;
2390 	device_t dev = adapter->dev;
2391 	struct tx_ring *txr = adapter->tx_rings;
2392 	u32 link_check = 0;
2393 
2394 	/* Get the cached link value or read phy for real */
2395 	switch (hw->phy.media_type) {
2396 	case e1000_media_type_copper:
2397 		if (hw->mac.get_link_status) {
2398 			if (hw->mac.type == e1000_pch_spt)
2399 				msec_delay(50);
2400 			/* Do the work to read phy */
2401 			e1000_check_for_link(hw);
2402 			link_check = !hw->mac.get_link_status;
2403 			if (link_check) /* ESB2 fix */
2404 				e1000_cfg_on_link_up(hw);
2405 		} else
2406 			link_check = TRUE;
2407 		break;
2408 	case e1000_media_type_fiber:
2409 		e1000_check_for_link(hw);
2410 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2411                                  E1000_STATUS_LU);
2412 		break;
2413 	case e1000_media_type_internal_serdes:
2414 		e1000_check_for_link(hw);
2415 		link_check = adapter->hw.mac.serdes_has_link;
2416 		break;
2417 	default:
2418 	case e1000_media_type_unknown:
2419 		break;
2420 	}
2421 
2422 	/* Now check for a transition */
2423 	if (link_check && (adapter->link_active == 0)) {
2424 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2425 		    &adapter->link_duplex);
2426 
2427 		/*
2428 		** There have proven to be problems with TSO when not at full
2429 		** gigabit speed, so disable the assist automatically when at
2430 		** lower speeds.  -jfv
2431 		*/
2432 		if (if_getcapenable(ifp) & IFCAP_TSO4) {
2433 			if (adapter->link_speed == SPEED_1000)
2434 				if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
2435 			else
2436 				if_sethwassistbits(ifp, 0, CSUM_IP_TSO);
2437 		}
2438 
2439 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2440 		if ((adapter->link_speed != SPEED_1000) &&
2441 		    ((hw->mac.type == e1000_82571) ||
2442 		    (hw->mac.type == e1000_82572))) {
2443 			int tarc0;
2444 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2445 			tarc0 &= ~TARC_SPEED_MODE_BIT;
2446 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2447 		}
2448 		if (bootverbose)
2449 			device_printf(dev, "Link is up %d Mbps %s\n",
2450 			    adapter->link_speed,
2451 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2452 			    "Full Duplex" : "Half Duplex"));
2453 		adapter->link_active = 1;
2454 		adapter->smartspeed = 0;
2455 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2456 		if_link_state_change(ifp, LINK_STATE_UP);
2457 	} else if (!link_check && (adapter->link_active == 1)) {
2458 		if_setbaudrate(ifp, 0);
2459 		adapter->link_speed = 0;
2460 		adapter->link_duplex = 0;
2461 		if (bootverbose)
2462 			device_printf(dev, "Link is Down\n");
2463 		adapter->link_active = 0;
2464 		/* Link down, disable hang detection */
2465 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2466 			txr->busy = EM_TX_IDLE;
2467 		if_link_state_change(ifp, LINK_STATE_DOWN);
2468 	}
2469 }
2470 
2471 /*********************************************************************
2472  *
2473  *  This routine disables all traffic on the adapter by issuing a
2474  *  global reset on the MAC and deallocates TX/RX buffers.
2475  *
2476  *  This routine should always be called with BOTH the CORE
2477  *  and TX locks.
2478  **********************************************************************/
2479 
2480 static void
em_stop(void * arg)2481 em_stop(void *arg)
2482 {
2483 	struct adapter	*adapter = arg;
2484 	if_t ifp = adapter->ifp;
2485 	struct tx_ring	*txr = adapter->tx_rings;
2486 
2487 	EM_CORE_LOCK_ASSERT(adapter);
2488 
2489 	INIT_DEBUGOUT("em_stop: begin");
2490 
2491 	em_disable_intr(adapter);
2492 	callout_stop(&adapter->timer);
2493 
2494 	/* Tell the stack that the interface is no longer active */
2495 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2496 
2497         /* Disarm Hang Detection. */
2498 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2499 		EM_TX_LOCK(txr);
2500 		txr->busy = EM_TX_IDLE;
2501 		EM_TX_UNLOCK(txr);
2502 	}
2503 
2504 	/* I219 needs some special flushing to avoid hangs */
2505 	if (adapter->hw.mac.type == e1000_pch_spt)
2506 		em_flush_desc_rings(adapter);
2507 
2508 	e1000_reset_hw(&adapter->hw);
2509 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2510 
2511 	e1000_led_off(&adapter->hw);
2512 	e1000_cleanup_led(&adapter->hw);
2513 }
2514 
2515 
2516 /*********************************************************************
2517  *
2518  *  Determine hardware revision.
2519  *
2520  **********************************************************************/
2521 static void
em_identify_hardware(struct adapter * adapter)2522 em_identify_hardware(struct adapter *adapter)
2523 {
2524 	device_t dev = adapter->dev;
2525 
2526 	/* Make sure our PCI config space has the necessary stuff set */
2527 	pci_enable_busmaster(dev);
2528 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2529 
2530 	/* Save off the information about this board */
2531 	adapter->hw.vendor_id = pci_get_vendor(dev);
2532 	adapter->hw.device_id = pci_get_device(dev);
2533 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2534 	adapter->hw.subsystem_vendor_id =
2535 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2536 	adapter->hw.subsystem_device_id =
2537 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2538 
2539 	/* Do Shared Code Init and Setup */
2540 	if (e1000_set_mac_type(&adapter->hw)) {
2541 		device_printf(dev, "Setup init failure\n");
2542 		return;
2543 	}
2544 }
2545 
2546 static int
em_allocate_pci_resources(struct adapter * adapter)2547 em_allocate_pci_resources(struct adapter *adapter)
2548 {
2549 	device_t	dev = adapter->dev;
2550 	int		rid;
2551 
2552 	rid = PCIR_BAR(0);
2553 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2554 	    &rid, RF_ACTIVE);
2555 	if (adapter->memory == NULL) {
2556 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2557 		return (ENXIO);
2558 	}
2559 	adapter->osdep.mem_bus_space_tag =
2560 	    rman_get_bustag(adapter->memory);
2561 	adapter->osdep.mem_bus_space_handle =
2562 	    rman_get_bushandle(adapter->memory);
2563 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2564 
2565 	adapter->hw.back = &adapter->osdep;
2566 
2567 	return (0);
2568 }
2569 
2570 /*********************************************************************
2571  *
2572  *  Setup the Legacy or MSI Interrupt handler
2573  *
2574  **********************************************************************/
2575 static int
em_allocate_legacy(struct adapter * adapter)2576 em_allocate_legacy(struct adapter *adapter)
2577 {
2578 	device_t dev = adapter->dev;
2579 	struct tx_ring	*txr = adapter->tx_rings;
2580 	int error, rid = 0;
2581 
2582 	/* Manually turn off all interrupts */
2583 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2584 
2585 	if (adapter->msix == 1) /* using MSI */
2586 		rid = 1;
2587 	/* We allocate a single interrupt resource */
2588 	adapter->res = bus_alloc_resource_any(dev,
2589 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2590 	if (adapter->res == NULL) {
2591 		device_printf(dev, "Unable to allocate bus resource: "
2592 		    "interrupt\n");
2593 		return (ENXIO);
2594 	}
2595 
2596 	/*
2597 	 * Allocate a fast interrupt and the associated
2598 	 * deferred processing contexts.
2599 	 */
2600 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2601 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2602 	    taskqueue_thread_enqueue, &adapter->tq);
2603 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2604 	    device_get_nameunit(adapter->dev));
2605 	/* Use a TX only tasklet for local timer */
2606 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2607 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2608 	    taskqueue_thread_enqueue, &txr->tq);
2609 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2610 	    device_get_nameunit(adapter->dev));
2611 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2612 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2613 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2614 		device_printf(dev, "Failed to register fast interrupt "
2615 			    "handler: %d\n", error);
2616 		taskqueue_free(adapter->tq);
2617 		adapter->tq = NULL;
2618 		return (error);
2619 	}
2620 
2621 	return (0);
2622 }
2623 
2624 /*********************************************************************
2625  *
2626  *  Setup the MSIX Interrupt handlers
2627  *   This is not really Multiqueue, rather
2628  *   its just separate interrupt vectors
2629  *   for TX, RX, and Link.
2630  *
2631  **********************************************************************/
2632 static int
em_allocate_msix(struct adapter * adapter)2633 em_allocate_msix(struct adapter *adapter)
2634 {
2635 	device_t	dev = adapter->dev;
2636 	struct		tx_ring *txr = adapter->tx_rings;
2637 	struct		rx_ring *rxr = adapter->rx_rings;
2638 	int		error, rid, vector = 0;
2639 	int		cpu_id = 0;
2640 
2641 
2642 	/* Make sure all interrupts are disabled */
2643 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2644 
2645 	/* First set up ring resources */
2646 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2647 
2648 		/* RX ring */
2649 		rid = vector + 1;
2650 
2651 		rxr->res = bus_alloc_resource_any(dev,
2652 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2653 		if (rxr->res == NULL) {
2654 			device_printf(dev,
2655 			    "Unable to allocate bus resource: "
2656 			    "RX MSIX Interrupt %d\n", i);
2657 			return (ENXIO);
2658 		}
2659 		if ((error = bus_setup_intr(dev, rxr->res,
2660 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2661 		    rxr, &rxr->tag)) != 0) {
2662 			device_printf(dev, "Failed to register RX handler");
2663 			return (error);
2664 		}
2665 #if __FreeBSD_version >= 800504
2666 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2667 #endif
2668 		rxr->msix = vector;
2669 
2670 		if (em_last_bind_cpu < 0)
2671 			em_last_bind_cpu = CPU_FIRST();
2672 		cpu_id = em_last_bind_cpu;
2673 		bus_bind_intr(dev, rxr->res, cpu_id);
2674 
2675 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2676 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2677 		    taskqueue_thread_enqueue, &rxr->tq);
2678 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2679 		    device_get_nameunit(adapter->dev), cpu_id);
2680 		/*
2681 		** Set the bit to enable interrupt
2682 		** in E1000_IMS -- bits 20 and 21
2683 		** are for RX0 and RX1, note this has
2684 		** NOTHING to do with the MSIX vector
2685 		*/
2686 		rxr->ims = 1 << (20 + i);
2687 		adapter->ims |= rxr->ims;
2688 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2689 
2690 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2691 	}
2692 
2693 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2694 		/* TX ring */
2695 		rid = vector + 1;
2696 		txr->res = bus_alloc_resource_any(dev,
2697 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2698 		if (txr->res == NULL) {
2699 			device_printf(dev,
2700 			    "Unable to allocate bus resource: "
2701 			    "TX MSIX Interrupt %d\n", i);
2702 			return (ENXIO);
2703 		}
2704 		if ((error = bus_setup_intr(dev, txr->res,
2705 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2706 		    txr, &txr->tag)) != 0) {
2707 			device_printf(dev, "Failed to register TX handler");
2708 			return (error);
2709 		}
2710 #if __FreeBSD_version >= 800504
2711 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2712 #endif
2713 		txr->msix = vector;
2714 
2715                 if (em_last_bind_cpu < 0)
2716                         em_last_bind_cpu = CPU_FIRST();
2717                 cpu_id = em_last_bind_cpu;
2718                 bus_bind_intr(dev, txr->res, cpu_id);
2719 
2720 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2721 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2722 		    taskqueue_thread_enqueue, &txr->tq);
2723 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2724 		    device_get_nameunit(adapter->dev), cpu_id);
2725 		/*
2726 		** Set the bit to enable interrupt
2727 		** in E1000_IMS -- bits 22 and 23
2728 		** are for TX0 and TX1, note this has
2729 		** NOTHING to do with the MSIX vector
2730 		*/
2731 		txr->ims = 1 << (22 + i);
2732 		adapter->ims |= txr->ims;
2733 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2734 
2735 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2736 	}
2737 
2738 	/* Link interrupt */
2739 	rid = vector + 1;
2740 	adapter->res = bus_alloc_resource_any(dev,
2741 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2742 	if (!adapter->res) {
2743 		device_printf(dev,"Unable to allocate "
2744 		    "bus resource: Link interrupt [%d]\n", rid);
2745 		return (ENXIO);
2746         }
2747 	/* Set the link handler function */
2748 	error = bus_setup_intr(dev, adapter->res,
2749 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2750 	    em_msix_link, adapter, &adapter->tag);
2751 	if (error) {
2752 		adapter->res = NULL;
2753 		device_printf(dev, "Failed to register LINK handler");
2754 		return (error);
2755 	}
2756 #if __FreeBSD_version >= 800504
2757 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2758 #endif
2759 	adapter->linkvec = vector;
2760 	adapter->ivars |=  (8 | vector) << 16;
2761 	adapter->ivars |= 0x80000000;
2762 
2763 	return (0);
2764 }
2765 
2766 
2767 static void
em_free_pci_resources(struct adapter * adapter)2768 em_free_pci_resources(struct adapter *adapter)
2769 {
2770 	device_t	dev = adapter->dev;
2771 	struct tx_ring	*txr;
2772 	struct rx_ring	*rxr;
2773 	int		rid;
2774 
2775 
2776 	/*
2777 	** Release all the queue interrupt resources:
2778 	*/
2779 	for (int i = 0; i < adapter->num_queues; i++) {
2780 		txr = &adapter->tx_rings[i];
2781 		/* an early abort? */
2782 		if (txr == NULL)
2783 			break;
2784 		rid = txr->msix +1;
2785 		if (txr->tag != NULL) {
2786 			bus_teardown_intr(dev, txr->res, txr->tag);
2787 			txr->tag = NULL;
2788 		}
2789 		if (txr->res != NULL)
2790 			bus_release_resource(dev, SYS_RES_IRQ,
2791 			    rid, txr->res);
2792 
2793 		rxr = &adapter->rx_rings[i];
2794 		/* an early abort? */
2795 		if (rxr == NULL)
2796 			break;
2797 		rid = rxr->msix +1;
2798 		if (rxr->tag != NULL) {
2799 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2800 			rxr->tag = NULL;
2801 		}
2802 		if (rxr->res != NULL)
2803 			bus_release_resource(dev, SYS_RES_IRQ,
2804 			    rid, rxr->res);
2805 	}
2806 
2807         if (adapter->linkvec) /* we are doing MSIX */
2808                 rid = adapter->linkvec + 1;
2809         else
2810                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2811 
2812 	if (adapter->tag != NULL) {
2813 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2814 		adapter->tag = NULL;
2815 	}
2816 
2817 	if (adapter->res != NULL)
2818 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2819 
2820 
2821 	if (adapter->msix)
2822 		pci_release_msi(dev);
2823 
2824 	if (adapter->msix_mem != NULL)
2825 		bus_release_resource(dev, SYS_RES_MEMORY,
2826 		    adapter->memrid, adapter->msix_mem);
2827 
2828 	if (adapter->memory != NULL)
2829 		bus_release_resource(dev, SYS_RES_MEMORY,
2830 		    PCIR_BAR(0), adapter->memory);
2831 
2832 	if (adapter->flash != NULL)
2833 		bus_release_resource(dev, SYS_RES_MEMORY,
2834 		    EM_FLASH, adapter->flash);
2835 }
2836 
2837 /*
2838  * Setup MSI or MSI/X
2839  */
2840 static int
em_setup_msix(struct adapter * adapter)2841 em_setup_msix(struct adapter *adapter)
2842 {
2843 	device_t dev = adapter->dev;
2844 	int val;
2845 
2846 	/* Nearly always going to use one queue */
2847 	adapter->num_queues = 1;
2848 
2849 	/*
2850 	** Try using MSI-X for Hartwell adapters
2851 	*/
2852 	if ((adapter->hw.mac.type == e1000_82574) &&
2853 	    (em_enable_msix == TRUE)) {
2854 #ifdef EM_MULTIQUEUE
2855 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2856 		if (adapter->num_queues > 1)
2857 			em_enable_vectors_82574(adapter);
2858 #endif
2859 		/* Map the MSIX BAR */
2860 		adapter->memrid = PCIR_BAR(EM_MSIX_BAR);
2861 		adapter->msix_mem = bus_alloc_resource_any(dev,
2862 		    SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2863        		if (adapter->msix_mem == NULL) {
2864 			/* May not be enabled */
2865                		device_printf(adapter->dev,
2866 			    "Unable to map MSIX table \n");
2867 			goto msi;
2868        		}
2869 		val = pci_msix_count(dev);
2870 
2871 #ifdef EM_MULTIQUEUE
2872 		/* We need 5 vectors in the multiqueue case */
2873 		if (adapter->num_queues > 1 ) {
2874 			if (val >= 5)
2875 				val = 5;
2876 			else {
2877 				adapter->num_queues = 1;
2878 				device_printf(adapter->dev,
2879 				    "Insufficient MSIX vectors for >1 queue, "
2880 				    "using single queue...\n");
2881 				goto msix_one;
2882 			}
2883 		} else {
2884 msix_one:
2885 #endif
2886 			if (val >= 3)
2887 				val = 3;
2888 			else {
2889 				device_printf(adapter->dev,
2890 			    	"Insufficient MSIX vectors, using MSI\n");
2891 				goto msi;
2892 			}
2893 #ifdef EM_MULTIQUEUE
2894 		}
2895 #endif
2896 
2897 		if ((pci_alloc_msix(dev, &val) == 0)) {
2898 			device_printf(adapter->dev,
2899 			    "Using MSIX interrupts "
2900 			    "with %d vectors\n", val);
2901 			return (val);
2902 		}
2903 
2904 		/*
2905 		** If MSIX alloc failed or provided us with
2906 		** less than needed, free and fall through to MSI
2907 		*/
2908 		pci_release_msi(dev);
2909 	}
2910 msi:
2911 	if (adapter->msix_mem != NULL) {
2912 		bus_release_resource(dev, SYS_RES_MEMORY,
2913 		    adapter->memrid, adapter->msix_mem);
2914 		adapter->msix_mem = NULL;
2915 	}
2916        	val = 1;
2917        	if (pci_alloc_msi(dev, &val) == 0) {
2918                	device_printf(adapter->dev, "Using an MSI interrupt\n");
2919 		return (val);
2920 	}
2921 	/* Should only happen due to manual configuration */
2922 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2923 	return (0);
2924 }
2925 
2926 
2927 /*
2928 ** The 3 following flush routines are used as a workaround in the
2929 ** I219 client parts and only for them.
2930 **
2931 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2932 **
2933 ** We want to clear all pending descriptors from the TX ring.
2934 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2935 ** the data of the next descriptor. We don't care about the data we are about
2936 ** to reset the HW.
2937 */
2938 static void
em_flush_tx_ring(struct adapter * adapter)2939 em_flush_tx_ring(struct adapter *adapter)
2940 {
2941 	struct e1000_hw		*hw = &adapter->hw;
2942 	struct tx_ring		*txr = adapter->tx_rings;
2943 	struct e1000_tx_desc	*txd;
2944 	u32			tctl, txd_lower = E1000_TXD_CMD_IFCS;
2945 	u16			size = 512;
2946 
2947 	tctl = E1000_READ_REG(hw, E1000_TCTL);
2948 	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2949 
2950 	txd = &txr->tx_base[txr->next_avail_desc++];
2951 	if (txr->next_avail_desc == adapter->num_tx_desc)
2952 		txr->next_avail_desc = 0;
2953 
2954 	/* Just use the ring as a dummy buffer addr */
2955 	txd->buffer_addr = txr->txdma.dma_paddr;
2956 	txd->lower.data = htole32(txd_lower | size);
2957 	txd->upper.data = 0;
2958 
2959 	/* flush descriptors to memory before notifying the HW */
2960 	wmb();
2961 
2962 	E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2963 	mb();
2964 	usec_delay(250);
2965 }
2966 
2967 /*
2968 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2969 **
2970 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
2971 */
2972 static void
em_flush_rx_ring(struct adapter * adapter)2973 em_flush_rx_ring(struct adapter *adapter)
2974 {
2975 	struct e1000_hw	*hw = &adapter->hw;
2976 	u32		rctl, rxdctl;
2977 
2978 	rctl = E1000_READ_REG(hw, E1000_RCTL);
2979 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2980 	E1000_WRITE_FLUSH(hw);
2981 	usec_delay(150);
2982 
2983 	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
2984 	/* zero the lower 14 bits (prefetch and host thresholds) */
2985 	rxdctl &= 0xffffc000;
2986 	/*
2987 	 * update thresholds: prefetch threshold to 31, host threshold to 1
2988 	 * and make sure the granularity is "descriptors" and not "cache lines"
2989 	 */
2990 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
2991 	E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
2992 
2993 	/* momentarily enable the RX ring for the changes to take effect */
2994 	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
2995 	E1000_WRITE_FLUSH(hw);
2996 	usec_delay(150);
2997 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2998 }
2999 
3000 /*
3001 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
3002 **
3003 ** In i219, the descriptor rings must be emptied before resetting the HW
3004 ** or before changing the device state to D3 during runtime (runtime PM).
3005 **
3006 ** Failure to do this will cause the HW to enter a unit hang state which can
3007 ** only be released by PCI reset on the device
3008 **
3009 */
3010 static void
em_flush_desc_rings(struct adapter * adapter)3011 em_flush_desc_rings(struct adapter *adapter)
3012 {
3013 	struct e1000_hw	*hw = &adapter->hw;
3014 	device_t	dev = adapter->dev;
3015 	u16		hang_state;
3016 	u32		fext_nvm11, tdlen;
3017 
3018 	/* First, disable MULR fix in FEXTNVM11 */
3019 	fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3020 	fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3021 	E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3022 
3023 	/* do nothing if we're not in faulty state, or if the queue is empty */
3024 	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3025 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3026 	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3027 		return;
3028 	em_flush_tx_ring(adapter);
3029 
3030 	/* recheck, maybe the fault is caused by the rx ring */
3031 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3032 	if (hang_state & FLUSH_DESC_REQUIRED)
3033 		em_flush_rx_ring(adapter);
3034 }
3035 
3036 
3037 /*********************************************************************
3038  *
3039  *  Initialize the hardware to a configuration
3040  *  as specified by the adapter structure.
3041  *
3042  **********************************************************************/
3043 static void
em_reset(struct adapter * adapter)3044 em_reset(struct adapter *adapter)
3045 {
3046 	device_t	dev = adapter->dev;
3047 	if_t ifp = adapter->ifp;
3048 	struct e1000_hw	*hw = &adapter->hw;
3049 	u16		rx_buffer_size;
3050 	u32		pba;
3051 
3052 	INIT_DEBUGOUT("em_reset: begin");
3053 
3054 	/* Set up smart power down as default off on newer adapters. */
3055 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3056 	    hw->mac.type == e1000_82572)) {
3057 		u16 phy_tmp = 0;
3058 
3059 		/* Speed up time to link by disabling smart power down. */
3060 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3061 		phy_tmp &= ~IGP02E1000_PM_SPD;
3062 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3063 	}
3064 
3065 	/*
3066 	 * Packet Buffer Allocation (PBA)
3067 	 * Writing PBA sets the receive portion of the buffer
3068 	 * the remainder is used for the transmit buffer.
3069 	 */
3070 	switch (hw->mac.type) {
3071 	/* Total Packet Buffer on these is 48K */
3072 	case e1000_82571:
3073 	case e1000_82572:
3074 	case e1000_80003es2lan:
3075 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3076 		break;
3077 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3078 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3079 		break;
3080 	case e1000_82574:
3081 	case e1000_82583:
3082 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3083 		break;
3084 	case e1000_ich8lan:
3085 		pba = E1000_PBA_8K;
3086 		break;
3087 	case e1000_ich9lan:
3088 	case e1000_ich10lan:
3089 		/* Boost Receive side for jumbo frames */
3090 		if (adapter->hw.mac.max_frame_size > 4096)
3091 			pba = E1000_PBA_14K;
3092 		else
3093 			pba = E1000_PBA_10K;
3094 		break;
3095 	case e1000_pchlan:
3096 	case e1000_pch2lan:
3097 	case e1000_pch_lpt:
3098 	case e1000_pch_spt:
3099 	case e1000_pch_cnp:
3100 		pba = E1000_PBA_26K;
3101 		break;
3102 	default:
3103 		if (adapter->hw.mac.max_frame_size > 8192)
3104 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3105 		else
3106 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3107 	}
3108 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3109 
3110 	/*
3111 	 * These parameters control the automatic generation (Tx) and
3112 	 * response (Rx) to Ethernet PAUSE frames.
3113 	 * - High water mark should allow for at least two frames to be
3114 	 *   received after sending an XOFF.
3115 	 * - Low water mark works best when it is very near the high water mark.
3116 	 *   This allows the receiver to restart by sending XON when it has
3117 	 *   drained a bit. Here we use an arbitrary value of 1500 which will
3118 	 *   restart after one full frame is pulled from the buffer. There
3119 	 *   could be several smaller frames in the buffer and if so they will
3120 	 *   not trigger the XON until their total number reduces the buffer
3121 	 *   by 1500.
3122 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3123 	 */
3124 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3125 	hw->fc.high_water = rx_buffer_size -
3126 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
3127 	hw->fc.low_water = hw->fc.high_water - 1500;
3128 
3129 	if (adapter->fc) /* locally set flow control value? */
3130 		hw->fc.requested_mode = adapter->fc;
3131 	else
3132 		hw->fc.requested_mode = e1000_fc_full;
3133 
3134 	if (hw->mac.type == e1000_80003es2lan)
3135 		hw->fc.pause_time = 0xFFFF;
3136 	else
3137 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
3138 
3139 	hw->fc.send_xon = TRUE;
3140 
3141 	/* Device specific overrides/settings */
3142 	switch (hw->mac.type) {
3143 	case e1000_pchlan:
3144 		/* Workaround: no TX flow ctrl for PCH */
3145                 hw->fc.requested_mode = e1000_fc_rx_pause;
3146 		hw->fc.pause_time = 0xFFFF; /* override */
3147 		if (if_getmtu(ifp) > ETHERMTU) {
3148 			hw->fc.high_water = 0x3500;
3149 			hw->fc.low_water = 0x1500;
3150 		} else {
3151 			hw->fc.high_water = 0x5000;
3152 			hw->fc.low_water = 0x3000;
3153 		}
3154 		hw->fc.refresh_time = 0x1000;
3155 		break;
3156 	case e1000_pch2lan:
3157 	case e1000_pch_lpt:
3158 	case e1000_pch_spt:
3159 	case e1000_pch_cnp:
3160 		hw->fc.high_water = 0x5C20;
3161 		hw->fc.low_water = 0x5048;
3162 		hw->fc.pause_time = 0x0650;
3163 		hw->fc.refresh_time = 0x0400;
3164 		/* Jumbos need adjusted PBA */
3165 		if (if_getmtu(ifp) > ETHERMTU)
3166 			E1000_WRITE_REG(hw, E1000_PBA, 12);
3167 		else
3168 			E1000_WRITE_REG(hw, E1000_PBA, 26);
3169 		break;
3170         case e1000_ich9lan:
3171         case e1000_ich10lan:
3172 		if (if_getmtu(ifp) > ETHERMTU) {
3173 			hw->fc.high_water = 0x2800;
3174 			hw->fc.low_water = hw->fc.high_water - 8;
3175 			break;
3176 		}
3177 		/* else fall thru */
3178 	default:
3179 		if (hw->mac.type == e1000_80003es2lan)
3180 			hw->fc.pause_time = 0xFFFF;
3181 		break;
3182 	}
3183 
3184 	/* I219 needs some special flushing to avoid hangs */
3185 	if (hw->mac.type == e1000_pch_spt)
3186 		em_flush_desc_rings(adapter);
3187 
3188 	/* Issue a global reset */
3189 	e1000_reset_hw(hw);
3190 	E1000_WRITE_REG(hw, E1000_WUC, 0);
3191 	em_disable_aspm(adapter);
3192 	/* and a re-init */
3193 	if (e1000_init_hw(hw) < 0) {
3194 		device_printf(dev, "Hardware Initialization Failed\n");
3195 		return;
3196 	}
3197 
3198 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3199 	e1000_get_phy_info(hw);
3200 	e1000_check_for_link(hw);
3201 	return;
3202 }
3203 
3204 /*********************************************************************
3205  *
3206  *  Setup networking device structure and register an interface.
3207  *
3208  **********************************************************************/
3209 static int
em_setup_interface(device_t dev,struct adapter * adapter)3210 em_setup_interface(device_t dev, struct adapter *adapter)
3211 {
3212 	if_t ifp;
3213 
3214 	INIT_DEBUGOUT("em_setup_interface: begin");
3215 
3216 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3217 	if (ifp == 0) {
3218 		device_printf(dev, "can not allocate ifnet structure\n");
3219 		return (-1);
3220 	}
3221 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3222 	if_setdev(ifp, dev);
3223 	if_setinitfn(ifp, em_init);
3224 	if_setsoftc(ifp, adapter);
3225 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3226 	if_setioctlfn(ifp, em_ioctl);
3227 	if_setgetcounterfn(ifp, em_get_counter);
3228 
3229 	/* TSO parameters */
3230 	ifp->if_hw_tsomax = IP_MAXPACKET;
3231 	/* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3232 	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3233 	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3234 
3235 #ifdef EM_MULTIQUEUE
3236 	/* Multiqueue stack interface */
3237 	if_settransmitfn(ifp, em_mq_start);
3238 	if_setqflushfn(ifp, em_qflush);
3239 #else
3240 	if_setstartfn(ifp, em_start);
3241 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3242 	if_setsendqready(ifp);
3243 #endif
3244 
3245 	ether_ifattach(ifp, adapter->hw.mac.addr);
3246 
3247 	if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM);
3248 	if_setcapenable(ifp, if_getcapabilities(ifp));
3249 
3250 	/*
3251 	 * Tell the upper layer(s) we
3252 	 * support full VLAN capability
3253 	 */
3254 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3255 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3256 	    IFCAP_VLAN_MTU, 0);
3257 	if_setcapenablebit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU, 0);
3258 
3259 	/*
3260 	 * We don't enable IFCAP_{TSO4,VLAN_HWTSO} by default because:
3261 	 * - Although the silicon bug of TSO only working at gigabit speed is
3262 	 *   worked around in em_update_link_status() by selectively setting
3263 	 *   CSUM_IP_TSO, we cannot atomically flush already queued TSO-using
3264 	 *   descriptors.  Thus, such descriptors may still cause the MAC to
3265 	 *   hang and, consequently, TSO is only safe to be used in setups
3266 	 *   where the link isn't expected to switch from gigabit to lower
3267 	 *   speeds.
3268 	 * - Similarly, there's currently no way to trigger a reconfiguration
3269 	 *   of vlan(4) when the state of IFCAP_VLAN_HWTSO support changes at
3270 	 *   runtime.  Therefore, IFCAP_VLAN_HWTSO also only is safe to use
3271 	 *   when link speed changes are not to be expected.
3272 	 * - Despite all the workarounds for TSO-related silicon bugs, at
3273 	 *   least 82579 still may hang at gigabit speed with IFCAP_TSO4.
3274 	 */
3275 	if_setcapabilitiesbit(ifp, IFCAP_TSO4 | IFCAP_VLAN_HWTSO, 0);
3276 
3277 	/*
3278 	** Don't turn this on by default, if vlans are
3279 	** created on another pseudo device (eg. lagg)
3280 	** then vlan events are not passed thru, breaking
3281 	** operation, but with HW FILTER off it works. If
3282 	** using vlans directly on the em driver you can
3283 	** enable this and get full hardware tag filtering.
3284 	*/
3285 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3286 
3287 #ifdef DEVICE_POLLING
3288 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3289 #endif
3290 
3291 	/* Enable only WOL MAGIC by default */
3292 	if (adapter->wol) {
3293 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3294 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3295 	}
3296 
3297 	/*
3298 	 * Specify the media types supported by this adapter and register
3299 	 * callbacks to update media and link information
3300 	 */
3301 	ifmedia_init(&adapter->media, IFM_IMASK,
3302 	    em_media_change, em_media_status);
3303 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3304 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3305 		u_char fiber_type = IFM_1000_SX;	/* default type */
3306 
3307 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3308 			    0, NULL);
3309 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3310 	} else {
3311 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3312 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3313 			    0, NULL);
3314 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3315 			    0, NULL);
3316 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3317 			    0, NULL);
3318 		if (adapter->hw.phy.type != e1000_phy_ife) {
3319 			ifmedia_add(&adapter->media,
3320 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3321 			ifmedia_add(&adapter->media,
3322 				IFM_ETHER | IFM_1000_T, 0, NULL);
3323 		}
3324 	}
3325 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3326 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3327 	return (0);
3328 }
3329 
3330 
3331 /*
3332  * Manage DMA'able memory.
3333  */
3334 static void
em_dmamap_cb(void * arg,bus_dma_segment_t * segs,int nseg,int error)3335 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3336 {
3337 	if (error)
3338 		return;
3339 	*(bus_addr_t *) arg = segs[0].ds_addr;
3340 }
3341 
3342 static int
em_dma_malloc(struct adapter * adapter,bus_size_t size,struct em_dma_alloc * dma,int mapflags)3343 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3344         struct em_dma_alloc *dma, int mapflags)
3345 {
3346 	int error;
3347 
3348 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3349 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3350 				BUS_SPACE_MAXADDR,	/* lowaddr */
3351 				BUS_SPACE_MAXADDR,	/* highaddr */
3352 				NULL, NULL,		/* filter, filterarg */
3353 				size,			/* maxsize */
3354 				1,			/* nsegments */
3355 				size,			/* maxsegsize */
3356 				0,			/* flags */
3357 				NULL,			/* lockfunc */
3358 				NULL,			/* lockarg */
3359 				&dma->dma_tag);
3360 	if (error) {
3361 		device_printf(adapter->dev,
3362 		    "%s: bus_dma_tag_create failed: %d\n",
3363 		    __func__, error);
3364 		goto fail_0;
3365 	}
3366 
3367 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3368 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3369 	if (error) {
3370 		device_printf(adapter->dev,
3371 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3372 		    __func__, (uintmax_t)size, error);
3373 		goto fail_2;
3374 	}
3375 
3376 	dma->dma_paddr = 0;
3377 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3378 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3379 	if (error || dma->dma_paddr == 0) {
3380 		device_printf(adapter->dev,
3381 		    "%s: bus_dmamap_load failed: %d\n",
3382 		    __func__, error);
3383 		goto fail_3;
3384 	}
3385 
3386 	return (0);
3387 
3388 fail_3:
3389 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3390 fail_2:
3391 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3392 	bus_dma_tag_destroy(dma->dma_tag);
3393 fail_0:
3394 	dma->dma_tag = NULL;
3395 
3396 	return (error);
3397 }
3398 
3399 static void
em_dma_free(struct adapter * adapter,struct em_dma_alloc * dma)3400 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3401 {
3402 	if (dma->dma_tag == NULL)
3403 		return;
3404 	if (dma->dma_paddr != 0) {
3405 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3406 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3407 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3408 		dma->dma_paddr = 0;
3409 	}
3410 	if (dma->dma_vaddr != NULL) {
3411 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3412 		dma->dma_vaddr = NULL;
3413 	}
3414 	bus_dma_tag_destroy(dma->dma_tag);
3415 	dma->dma_tag = NULL;
3416 }
3417 
3418 
3419 /*********************************************************************
3420  *
3421  *  Allocate memory for the transmit and receive rings, and then
3422  *  the descriptors associated with each, called only once at attach.
3423  *
3424  **********************************************************************/
3425 static int
em_allocate_queues(struct adapter * adapter)3426 em_allocate_queues(struct adapter *adapter)
3427 {
3428 	device_t		dev = adapter->dev;
3429 	struct tx_ring		*txr = NULL;
3430 	struct rx_ring		*rxr = NULL;
3431 	int rsize, tsize, error = E1000_SUCCESS;
3432 	int txconf = 0, rxconf = 0;
3433 
3434 
3435 	/* Allocate the TX ring struct memory */
3436 	if (!(adapter->tx_rings =
3437 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3438 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3439 		device_printf(dev, "Unable to allocate TX ring memory\n");
3440 		error = ENOMEM;
3441 		goto fail;
3442 	}
3443 
3444 	/* Now allocate the RX */
3445 	if (!(adapter->rx_rings =
3446 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3447 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3448 		device_printf(dev, "Unable to allocate RX ring memory\n");
3449 		error = ENOMEM;
3450 		goto rx_fail;
3451 	}
3452 
3453 	tsize = roundup2(adapter->num_tx_desc *
3454 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3455 	/*
3456 	 * Now set up the TX queues, txconf is needed to handle the
3457 	 * possibility that things fail midcourse and we need to
3458 	 * undo memory gracefully
3459 	 */
3460 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3461 		/* Set up some basics */
3462 		txr = &adapter->tx_rings[i];
3463 		txr->adapter = adapter;
3464 		txr->me = i;
3465 
3466 		/* Initialize the TX lock */
3467 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3468 		    device_get_nameunit(dev), txr->me);
3469 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3470 
3471 		if (em_dma_malloc(adapter, tsize,
3472 			&txr->txdma, BUS_DMA_NOWAIT)) {
3473 			device_printf(dev,
3474 			    "Unable to allocate TX Descriptor memory\n");
3475 			error = ENOMEM;
3476 			goto err_tx_desc;
3477 		}
3478 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3479 		bzero((void *)txr->tx_base, tsize);
3480 
3481         	if (em_allocate_transmit_buffers(txr)) {
3482 			device_printf(dev,
3483 			    "Critical Failure setting up transmit buffers\n");
3484 			error = ENOMEM;
3485 			goto err_tx_desc;
3486         	}
3487 #if __FreeBSD_version >= 800000
3488 		/* Allocate a buf ring */
3489 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3490 		    M_WAITOK, &txr->tx_mtx);
3491 #endif
3492 	}
3493 
3494 	/*
3495 	 * Next the RX queues...
3496 	 */
3497 	rsize = roundup2(adapter->num_rx_desc *
3498 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3499 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3500 		rxr = &adapter->rx_rings[i];
3501 		rxr->adapter = adapter;
3502 		rxr->me = i;
3503 
3504 		/* Initialize the RX lock */
3505 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3506 		    device_get_nameunit(dev), txr->me);
3507 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3508 
3509 		if (em_dma_malloc(adapter, rsize,
3510 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3511 			device_printf(dev,
3512 			    "Unable to allocate RxDescriptor memory\n");
3513 			error = ENOMEM;
3514 			goto err_rx_desc;
3515 		}
3516 		rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3517 		bzero((void *)rxr->rx_base, rsize);
3518 
3519         	/* Allocate receive buffers for the ring*/
3520 		if (em_allocate_receive_buffers(rxr)) {
3521 			device_printf(dev,
3522 			    "Critical Failure setting up receive buffers\n");
3523 			error = ENOMEM;
3524 			goto err_rx_desc;
3525 		}
3526 	}
3527 
3528 	return (0);
3529 
3530 err_rx_desc:
3531 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3532 		em_dma_free(adapter, &rxr->rxdma);
3533 err_tx_desc:
3534 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3535 		em_dma_free(adapter, &txr->txdma);
3536 	free(adapter->rx_rings, M_DEVBUF);
3537 rx_fail:
3538 #if __FreeBSD_version >= 800000
3539 	buf_ring_free(txr->br, M_DEVBUF);
3540 #endif
3541 	free(adapter->tx_rings, M_DEVBUF);
3542 fail:
3543 	return (error);
3544 }
3545 
3546 
3547 /*********************************************************************
3548  *
3549  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3550  *  the information needed to transmit a packet on the wire. This is
3551  *  called only once at attach, setup is done every reset.
3552  *
3553  **********************************************************************/
3554 static int
em_allocate_transmit_buffers(struct tx_ring * txr)3555 em_allocate_transmit_buffers(struct tx_ring *txr)
3556 {
3557 	struct adapter *adapter = txr->adapter;
3558 	device_t dev = adapter->dev;
3559 	struct em_txbuffer *txbuf;
3560 	int error, i;
3561 
3562 	/*
3563 	 * Setup DMA descriptor areas.
3564 	 */
3565 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3566 			       1, 0,			/* alignment, bounds */
3567 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3568 			       BUS_SPACE_MAXADDR,	/* highaddr */
3569 			       NULL, NULL,		/* filter, filterarg */
3570 			       EM_TSO_SIZE,		/* maxsize */
3571 			       EM_MAX_SCATTER,		/* nsegments */
3572 			       PAGE_SIZE,		/* maxsegsize */
3573 			       0,			/* flags */
3574 			       NULL,			/* lockfunc */
3575 			       NULL,			/* lockfuncarg */
3576 			       &txr->txtag))) {
3577 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3578 		goto fail;
3579 	}
3580 
3581 	if (!(txr->tx_buffers =
3582 	    (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3583 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3584 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3585 		error = ENOMEM;
3586 		goto fail;
3587 	}
3588 
3589         /* Create the descriptor buffer dma maps */
3590 	txbuf = txr->tx_buffers;
3591 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3592 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3593 		if (error != 0) {
3594 			device_printf(dev, "Unable to create TX DMA map\n");
3595 			goto fail;
3596 		}
3597 	}
3598 
3599 	return 0;
3600 fail:
3601 	/* We free all, it handles case where we are in the middle */
3602 	em_free_transmit_structures(adapter);
3603 	return (error);
3604 }
3605 
3606 /*********************************************************************
3607  *
3608  *  Initialize a transmit ring.
3609  *
3610  **********************************************************************/
3611 static void
em_setup_transmit_ring(struct tx_ring * txr)3612 em_setup_transmit_ring(struct tx_ring *txr)
3613 {
3614 	struct adapter *adapter = txr->adapter;
3615 	struct em_txbuffer *txbuf;
3616 	int i;
3617 #ifdef DEV_NETMAP
3618 	struct netmap_slot *slot;
3619 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3620 #endif /* DEV_NETMAP */
3621 
3622 	/* Clear the old descriptor contents */
3623 	EM_TX_LOCK(txr);
3624 #ifdef DEV_NETMAP
3625 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3626 #endif /* DEV_NETMAP */
3627 
3628 	bzero((void *)txr->tx_base,
3629 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3630 	/* Reset indices */
3631 	txr->next_avail_desc = 0;
3632 	txr->next_to_clean = 0;
3633 
3634 	/* Free any existing tx buffers. */
3635         txbuf = txr->tx_buffers;
3636 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3637 		if (txbuf->m_head != NULL) {
3638 			bus_dmamap_sync(txr->txtag, txbuf->map,
3639 			    BUS_DMASYNC_POSTWRITE);
3640 			bus_dmamap_unload(txr->txtag, txbuf->map);
3641 			m_freem(txbuf->m_head);
3642 			txbuf->m_head = NULL;
3643 		}
3644 #ifdef DEV_NETMAP
3645 		if (slot) {
3646 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
3647 			uint64_t paddr;
3648 			void *addr;
3649 
3650 			addr = PNMB(na, slot + si, &paddr);
3651 			txr->tx_base[i].buffer_addr = htole64(paddr);
3652 			/* reload the map for netmap mode */
3653 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3654 		}
3655 #endif /* DEV_NETMAP */
3656 
3657 		/* clear the watch index */
3658 		txbuf->next_eop = -1;
3659         }
3660 
3661 	/* Set number of descriptors available */
3662 	txr->tx_avail = adapter->num_tx_desc;
3663 	txr->busy = EM_TX_IDLE;
3664 
3665 	/* Clear checksum offload context. */
3666 	txr->last_hw_offload = 0;
3667 	txr->last_hw_ipcss = 0;
3668 	txr->last_hw_ipcso = 0;
3669 	txr->last_hw_tucss = 0;
3670 	txr->last_hw_tucso = 0;
3671 
3672 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3673 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3674 	EM_TX_UNLOCK(txr);
3675 }
3676 
3677 /*********************************************************************
3678  *
3679  *  Initialize all transmit rings.
3680  *
3681  **********************************************************************/
3682 static void
em_setup_transmit_structures(struct adapter * adapter)3683 em_setup_transmit_structures(struct adapter *adapter)
3684 {
3685 	struct tx_ring *txr = adapter->tx_rings;
3686 
3687 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3688 		em_setup_transmit_ring(txr);
3689 
3690 	return;
3691 }
3692 
3693 /*********************************************************************
3694  *
3695  *  Enable transmit unit.
3696  *
3697  **********************************************************************/
3698 static void
em_initialize_transmit_unit(struct adapter * adapter)3699 em_initialize_transmit_unit(struct adapter *adapter)
3700 {
3701 	struct tx_ring	*txr = adapter->tx_rings;
3702 	struct e1000_hw	*hw = &adapter->hw;
3703 	u32	tctl, txdctl = 0, tarc, tipg = 0;
3704 
3705 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3706 
3707 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3708 		u64 bus_addr = txr->txdma.dma_paddr;
3709 		/* Base and Len of TX Ring */
3710 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3711 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3712 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3713 	    	    (u32)(bus_addr >> 32));
3714 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3715 	    	    (u32)bus_addr);
3716 		/* Init the HEAD/TAIL indices */
3717 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3718 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3719 
3720 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3721 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3722 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3723 
3724 		txr->busy = EM_TX_IDLE;
3725 		txdctl = 0; /* clear txdctl */
3726                 txdctl |= 0x1f; /* PTHRESH */
3727                 txdctl |= 1 << 8; /* HTHRESH */
3728                 txdctl |= 1 << 16;/* WTHRESH */
3729 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3730 		txdctl |= E1000_TXDCTL_GRAN;
3731                 txdctl |= 1 << 25; /* LWTHRESH */
3732 
3733                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3734 	}
3735 
3736 	/* Set the default values for the Tx Inter Packet Gap timer */
3737 	switch (adapter->hw.mac.type) {
3738 	case e1000_80003es2lan:
3739 		tipg = DEFAULT_82543_TIPG_IPGR1;
3740 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3741 		    E1000_TIPG_IPGR2_SHIFT;
3742 		break;
3743 	default:
3744 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3745 		    (adapter->hw.phy.media_type ==
3746 		    e1000_media_type_internal_serdes))
3747 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3748 		else
3749 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3750 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3751 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3752 	}
3753 
3754 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3755 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3756 
3757 	if(adapter->hw.mac.type >= e1000_82540)
3758 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3759 		    adapter->tx_abs_int_delay.value);
3760 
3761 	if ((adapter->hw.mac.type == e1000_82571) ||
3762 	    (adapter->hw.mac.type == e1000_82572)) {
3763 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3764 		tarc |= TARC_SPEED_MODE_BIT;
3765 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3766 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3767 		/* errata: program both queues to unweighted RR */
3768 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3769 		tarc |= 1;
3770 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3771 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3772 		tarc |= 1;
3773 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3774 	} else if (adapter->hw.mac.type == e1000_82574) {
3775 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3776 		tarc |= TARC_ERRATA_BIT;
3777 		if ( adapter->num_queues > 1) {
3778 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3779 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3780 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3781 		} else
3782 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3783 	}
3784 
3785 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3786 	if (adapter->tx_int_delay.value > 0)
3787 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3788 
3789 	/* Program the Transmit Control Register */
3790 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3791 	tctl &= ~E1000_TCTL_CT;
3792 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3793 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3794 
3795 	if (adapter->hw.mac.type >= e1000_82571)
3796 		tctl |= E1000_TCTL_MULR;
3797 
3798 	/* This write will effectively turn on the transmit unit. */
3799 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3800 
3801 	/* SPT and KBL errata workarounds */
3802 	if (hw->mac.type == e1000_pch_spt) {
3803 		u32 reg;
3804 		reg = E1000_READ_REG(hw, E1000_IOSFPC);
3805 		reg |= E1000_RCTL_RDMTS_HEX;
3806 		E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3807 		/* i218-i219 Specification Update 1.5.4.5 */
3808 		reg = E1000_READ_REG(hw, E1000_TARC(0));
3809 		reg &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
3810 		reg |= E1000_TARC0_CB_MULTIQ_2_REQ;
3811 		E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3812 	}
3813 }
3814 
3815 
3816 /*********************************************************************
3817  *
3818  *  Free all transmit rings.
3819  *
3820  **********************************************************************/
3821 static void
em_free_transmit_structures(struct adapter * adapter)3822 em_free_transmit_structures(struct adapter *adapter)
3823 {
3824 	struct tx_ring *txr = adapter->tx_rings;
3825 
3826 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3827 		EM_TX_LOCK(txr);
3828 		em_free_transmit_buffers(txr);
3829 		em_dma_free(adapter, &txr->txdma);
3830 		EM_TX_UNLOCK(txr);
3831 		EM_TX_LOCK_DESTROY(txr);
3832 	}
3833 
3834 	free(adapter->tx_rings, M_DEVBUF);
3835 }
3836 
3837 /*********************************************************************
3838  *
3839  *  Free transmit ring related data structures.
3840  *
3841  **********************************************************************/
3842 static void
em_free_transmit_buffers(struct tx_ring * txr)3843 em_free_transmit_buffers(struct tx_ring *txr)
3844 {
3845 	struct adapter		*adapter = txr->adapter;
3846 	struct em_txbuffer	*txbuf;
3847 
3848 	INIT_DEBUGOUT("free_transmit_ring: begin");
3849 
3850 	if (txr->tx_buffers == NULL)
3851 		return;
3852 
3853 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3854 		txbuf = &txr->tx_buffers[i];
3855 		if (txbuf->m_head != NULL) {
3856 			bus_dmamap_sync(txr->txtag, txbuf->map,
3857 			    BUS_DMASYNC_POSTWRITE);
3858 			bus_dmamap_unload(txr->txtag,
3859 			    txbuf->map);
3860 			m_freem(txbuf->m_head);
3861 			txbuf->m_head = NULL;
3862 			if (txbuf->map != NULL) {
3863 				bus_dmamap_destroy(txr->txtag,
3864 				    txbuf->map);
3865 				txbuf->map = NULL;
3866 			}
3867 		} else if (txbuf->map != NULL) {
3868 			bus_dmamap_unload(txr->txtag,
3869 			    txbuf->map);
3870 			bus_dmamap_destroy(txr->txtag,
3871 			    txbuf->map);
3872 			txbuf->map = NULL;
3873 		}
3874 	}
3875 #if __FreeBSD_version >= 800000
3876 	if (txr->br != NULL)
3877 		buf_ring_free(txr->br, M_DEVBUF);
3878 #endif
3879 	if (txr->tx_buffers != NULL) {
3880 		free(txr->tx_buffers, M_DEVBUF);
3881 		txr->tx_buffers = NULL;
3882 	}
3883 	if (txr->txtag != NULL) {
3884 		bus_dma_tag_destroy(txr->txtag);
3885 		txr->txtag = NULL;
3886 	}
3887 	return;
3888 }
3889 
3890 
3891 /*********************************************************************
3892  *  The offload context is protocol specific (TCP/UDP) and thus
3893  *  only needs to be set when the protocol changes. The occasion
3894  *  of a context change can be a performance detriment, and
3895  *  might be better just disabled. The reason arises in the way
3896  *  in which the controller supports pipelined requests from the
3897  *  Tx data DMA. Up to four requests can be pipelined, and they may
3898  *  belong to the same packet or to multiple packets. However all
3899  *  requests for one packet are issued before a request is issued
3900  *  for a subsequent packet and if a request for the next packet
3901  *  requires a context change, that request will be stalled
3902  *  until the previous request completes. This means setting up
3903  *  a new context effectively disables pipelined Tx data DMA which
3904  *  in turn greatly slow down performance to send small sized
3905  *  frames.
3906  **********************************************************************/
3907 static void
em_transmit_checksum_setup(struct tx_ring * txr,struct mbuf * mp,int ip_off,struct ip * ip,u32 * txd_upper,u32 * txd_lower)3908 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3909     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3910 {
3911 	struct adapter			*adapter = txr->adapter;
3912 	struct e1000_context_desc	*TXD = NULL;
3913 	struct em_txbuffer		*tx_buffer;
3914 	int				cur, hdr_len;
3915 	u32				cmd = 0;
3916 	u16				offload = 0;
3917 	u8				ipcso, ipcss, tucso, tucss;
3918 
3919 	ipcss = ipcso = tucss = tucso = 0;
3920 	hdr_len = ip_off + (ip->ip_hl << 2);
3921 	cur = txr->next_avail_desc;
3922 
3923 	/* Setup of IP header checksum. */
3924 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3925 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3926 		offload |= CSUM_IP;
3927 		ipcss = ip_off;
3928 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3929 		/*
3930 		 * Start offset for header checksum calculation.
3931 		 * End offset for header checksum calculation.
3932 		 * Offset of place to put the checksum.
3933 		 */
3934 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3935 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3936 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3937 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3938 		cmd |= E1000_TXD_CMD_IP;
3939 	}
3940 
3941 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3942  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3943  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3944  		offload |= CSUM_TCP;
3945  		tucss = hdr_len;
3946  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3947 		/*
3948 		 * The 82574L can only remember the *last* context used
3949 		 * regardless of queue that it was use for.  We cannot reuse
3950 		 * contexts on this hardware platform and must generate a new
3951 		 * context every time.  82574L hardware spec, section 7.2.6,
3952 		 * second note.
3953 		 */
3954 		if (adapter->num_queues < 2) {
3955  			/*
3956  		 	* Setting up new checksum offload context for every
3957 			* frames takes a lot of processing time for hardware.
3958 			* This also reduces performance a lot for small sized
3959 			* frames so avoid it if driver can use previously
3960 			* configured checksum offload context.
3961  		 	*/
3962  			if (txr->last_hw_offload == offload) {
3963  				if (offload & CSUM_IP) {
3964  					if (txr->last_hw_ipcss == ipcss &&
3965  				    	txr->last_hw_ipcso == ipcso &&
3966  				    	txr->last_hw_tucss == tucss &&
3967  				    	txr->last_hw_tucso == tucso)
3968  						return;
3969  				} else {
3970  					if (txr->last_hw_tucss == tucss &&
3971  				    	txr->last_hw_tucso == tucso)
3972  						return;
3973  				}
3974   			}
3975  			txr->last_hw_offload = offload;
3976  			txr->last_hw_tucss = tucss;
3977  			txr->last_hw_tucso = tucso;
3978 		}
3979  		/*
3980  		 * Start offset for payload checksum calculation.
3981  		 * End offset for payload checksum calculation.
3982  		 * Offset of place to put the checksum.
3983  		 */
3984 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3985  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3986  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3987  		TXD->upper_setup.tcp_fields.tucso = tucso;
3988  		cmd |= E1000_TXD_CMD_TCP;
3989  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3990  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3991  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3992  		tucss = hdr_len;
3993  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3994 		/*
3995 		 * The 82574L can only remember the *last* context used
3996 		 * regardless of queue that it was use for.  We cannot reuse
3997 		 * contexts on this hardware platform and must generate a new
3998 		 * context every time.  82574L hardware spec, section 7.2.6,
3999 		 * second note.
4000 		 */
4001 		if (adapter->num_queues < 2) {
4002  			/*
4003  		 	* Setting up new checksum offload context for every
4004 			* frames takes a lot of processing time for hardware.
4005 			* This also reduces performance a lot for small sized
4006 			* frames so avoid it if driver can use previously
4007 			* configured checksum offload context.
4008  		 	*/
4009  			if (txr->last_hw_offload == offload) {
4010  				if (offload & CSUM_IP) {
4011  					if (txr->last_hw_ipcss == ipcss &&
4012  				    	txr->last_hw_ipcso == ipcso &&
4013  				    	txr->last_hw_tucss == tucss &&
4014  				    	txr->last_hw_tucso == tucso)
4015  						return;
4016  				} else {
4017  					if (txr->last_hw_tucss == tucss &&
4018  				    	txr->last_hw_tucso == tucso)
4019  						return;
4020  				}
4021  			}
4022  			txr->last_hw_offload = offload;
4023  			txr->last_hw_tucss = tucss;
4024  			txr->last_hw_tucso = tucso;
4025 		}
4026  		/*
4027  		 * Start offset for header checksum calculation.
4028  		 * End offset for header checksum calculation.
4029  		 * Offset of place to put the checksum.
4030  		 */
4031 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4032  		TXD->upper_setup.tcp_fields.tucss = tucss;
4033  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
4034  		TXD->upper_setup.tcp_fields.tucso = tucso;
4035   	}
4036 
4037  	if (offload & CSUM_IP) {
4038  		txr->last_hw_ipcss = ipcss;
4039  		txr->last_hw_ipcso = ipcso;
4040   	}
4041 
4042 	TXD->tcp_seg_setup.data = htole32(0);
4043 	TXD->cmd_and_length =
4044 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4045 	tx_buffer = &txr->tx_buffers[cur];
4046 	tx_buffer->m_head = NULL;
4047 	tx_buffer->next_eop = -1;
4048 
4049 	if (++cur == adapter->num_tx_desc)
4050 		cur = 0;
4051 
4052 	txr->tx_avail--;
4053 	txr->next_avail_desc = cur;
4054 }
4055 
4056 
4057 /**********************************************************************
4058  *
4059  *  Setup work for hardware segmentation offload (TSO)
4060  *
4061  **********************************************************************/
4062 static void
em_tso_setup(struct tx_ring * txr,struct mbuf * mp,int ip_off,struct ip * ip,struct tcphdr * tp,u32 * txd_upper,u32 * txd_lower)4063 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4064     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4065 {
4066 	struct adapter			*adapter = txr->adapter;
4067 	struct e1000_context_desc	*TXD;
4068 	struct em_txbuffer		*tx_buffer;
4069 	int cur, hdr_len;
4070 
4071 	/*
4072 	 * In theory we can use the same TSO context if and only if
4073 	 * frame is the same type(IP/TCP) and the same MSS. However
4074 	 * checking whether a frame has the same IP/TCP structure is
4075 	 * hard thing so just ignore that and always restablish a
4076 	 * new TSO context.
4077 	 */
4078 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4079 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
4080 		      E1000_TXD_DTYP_D |	/* Data descr type */
4081 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
4082 
4083 	/* IP and/or TCP header checksum calculation and insertion. */
4084 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4085 
4086 	cur = txr->next_avail_desc;
4087 	tx_buffer = &txr->tx_buffers[cur];
4088 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4089 
4090 	/*
4091 	 * Start offset for header checksum calculation.
4092 	 * End offset for header checksum calculation.
4093 	 * Offset of place put the checksum.
4094 	 */
4095 	TXD->lower_setup.ip_fields.ipcss = ip_off;
4096 	TXD->lower_setup.ip_fields.ipcse =
4097 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
4098 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4099 	/*
4100 	 * Start offset for payload checksum calculation.
4101 	 * End offset for payload checksum calculation.
4102 	 * Offset of place to put the checksum.
4103 	 */
4104 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4105 	TXD->upper_setup.tcp_fields.tucse = 0;
4106 	TXD->upper_setup.tcp_fields.tucso =
4107 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4108 	/*
4109 	 * Payload size per packet w/o any headers.
4110 	 * Length of all headers up to payload.
4111 	 */
4112 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4113 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4114 
4115 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
4116 				E1000_TXD_CMD_DEXT |	/* Extended descr */
4117 				E1000_TXD_CMD_TSE |	/* TSE context */
4118 				E1000_TXD_CMD_IP |	/* Do IP csum */
4119 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
4120 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
4121 
4122 	tx_buffer->m_head = NULL;
4123 	tx_buffer->next_eop = -1;
4124 
4125 	if (++cur == adapter->num_tx_desc)
4126 		cur = 0;
4127 
4128 	txr->tx_avail--;
4129 	txr->next_avail_desc = cur;
4130 	txr->tx_tso = TRUE;
4131 }
4132 
4133 
4134 /**********************************************************************
4135  *
4136  *  Examine each tx_buffer in the used queue. If the hardware is done
4137  *  processing the packet then free associated resources. The
4138  *  tx_buffer is put back on the free queue.
4139  *
4140  **********************************************************************/
4141 static void
em_txeof(struct tx_ring * txr)4142 em_txeof(struct tx_ring *txr)
4143 {
4144 	struct adapter	*adapter = txr->adapter;
4145         int first, last, done, processed;
4146         struct em_txbuffer *tx_buffer;
4147         struct e1000_tx_desc   *tx_desc, *eop_desc;
4148 	if_t ifp = adapter->ifp;
4149 
4150 	EM_TX_LOCK_ASSERT(txr);
4151 #ifdef DEV_NETMAP
4152 	if (netmap_tx_irq(ifp, txr->me))
4153 		return;
4154 #endif /* DEV_NETMAP */
4155 
4156 	/* No work, make sure hang detection is disabled */
4157         if (txr->tx_avail == adapter->num_tx_desc) {
4158 		txr->busy = EM_TX_IDLE;
4159                 return;
4160 	}
4161 
4162 	processed = 0;
4163         first = txr->next_to_clean;
4164         tx_desc = &txr->tx_base[first];
4165         tx_buffer = &txr->tx_buffers[first];
4166 	last = tx_buffer->next_eop;
4167         eop_desc = &txr->tx_base[last];
4168 
4169 	/*
4170 	 * What this does is get the index of the
4171 	 * first descriptor AFTER the EOP of the
4172 	 * first packet, that way we can do the
4173 	 * simple comparison on the inner while loop.
4174 	 */
4175 	if (++last == adapter->num_tx_desc)
4176  		last = 0;
4177 	done = last;
4178 
4179         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4180             BUS_DMASYNC_POSTREAD);
4181 
4182         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4183 		/* We clean the range of the packet */
4184 		while (first != done) {
4185                 	tx_desc->upper.data = 0;
4186                 	tx_desc->lower.data = 0;
4187                 	tx_desc->buffer_addr = 0;
4188                 	++txr->tx_avail;
4189 			++processed;
4190 
4191 			if (tx_buffer->m_head) {
4192 				bus_dmamap_sync(txr->txtag,
4193 				    tx_buffer->map,
4194 				    BUS_DMASYNC_POSTWRITE);
4195 				bus_dmamap_unload(txr->txtag,
4196 				    tx_buffer->map);
4197                         	m_freem(tx_buffer->m_head);
4198                         	tx_buffer->m_head = NULL;
4199                 	}
4200 			tx_buffer->next_eop = -1;
4201 
4202 	                if (++first == adapter->num_tx_desc)
4203 				first = 0;
4204 
4205 	                tx_buffer = &txr->tx_buffers[first];
4206 			tx_desc = &txr->tx_base[first];
4207 		}
4208 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4209 		/* See if we can continue to the next packet */
4210 		last = tx_buffer->next_eop;
4211 		if (last != -1) {
4212         		eop_desc = &txr->tx_base[last];
4213 			/* Get new done point */
4214 			if (++last == adapter->num_tx_desc) last = 0;
4215 			done = last;
4216 		} else
4217 			break;
4218         }
4219         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4220             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4221 
4222         txr->next_to_clean = first;
4223 
4224 	/*
4225 	** Hang detection: we know there's work outstanding
4226 	** or the entry return would have been taken, so no
4227 	** descriptor processed here indicates a potential hang.
4228 	** The local timer will examine this and do a reset if needed.
4229 	*/
4230 	if (processed == 0) {
4231 		if (txr->busy != EM_TX_HUNG)
4232 			++txr->busy;
4233 	} else /* At least one descriptor was cleaned */
4234 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4235 
4236         /*
4237          * If we have a minimum free, clear IFF_DRV_OACTIVE
4238          * to tell the stack that it is OK to send packets.
4239 	 * Notice that all writes of OACTIVE happen under the
4240 	 * TX lock which, with a single queue, guarantees
4241 	 * sanity.
4242          */
4243         if (txr->tx_avail >= EM_MAX_SCATTER) {
4244 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4245 	}
4246 
4247 	/* Disable hang detection if all clean */
4248 	if (txr->tx_avail == adapter->num_tx_desc)
4249 		txr->busy = EM_TX_IDLE;
4250 }
4251 
4252 /*********************************************************************
4253  *
4254  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4255  *
4256  **********************************************************************/
4257 static void
em_refresh_mbufs(struct rx_ring * rxr,int limit)4258 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4259 {
4260 	struct adapter		*adapter = rxr->adapter;
4261 	struct mbuf		*m;
4262 	bus_dma_segment_t	segs;
4263 	struct em_rxbuffer	*rxbuf;
4264 	int			i, j, error, nsegs;
4265 	bool			cleaned = FALSE;
4266 
4267 	i = j = rxr->next_to_refresh;
4268 	/*
4269 	** Get one descriptor beyond
4270 	** our work mark to control
4271 	** the loop.
4272 	*/
4273 	if (++j == adapter->num_rx_desc)
4274 		j = 0;
4275 
4276 	while (j != limit) {
4277 		rxbuf = &rxr->rx_buffers[i];
4278 		if (rxbuf->m_head == NULL) {
4279 			m = m_getjcl(M_NOWAIT, MT_DATA,
4280 			    M_PKTHDR, adapter->rx_mbuf_sz);
4281 			/*
4282 			** If we have a temporary resource shortage
4283 			** that causes a failure, just abort refresh
4284 			** for now, we will return to this point when
4285 			** reinvoked from em_rxeof.
4286 			*/
4287 			if (m == NULL)
4288 				goto update;
4289 		} else
4290 			m = rxbuf->m_head;
4291 
4292 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4293 		m->m_flags |= M_PKTHDR;
4294 		m->m_data = m->m_ext.ext_buf;
4295 
4296 		/* Use bus_dma machinery to setup the memory mapping  */
4297 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4298 		    m, &segs, &nsegs, BUS_DMA_NOWAIT);
4299 		if (error != 0) {
4300 			printf("Refresh mbufs: hdr dmamap load"
4301 			    " failure - %d\n", error);
4302 			m_free(m);
4303 			rxbuf->m_head = NULL;
4304 			goto update;
4305 		}
4306 		rxbuf->m_head = m;
4307 		rxbuf->paddr = segs.ds_addr;
4308 		bus_dmamap_sync(rxr->rxtag,
4309 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4310 		em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4311 		cleaned = TRUE;
4312 
4313 		i = j; /* Next is precalulated for us */
4314 		rxr->next_to_refresh = i;
4315 		/* Calculate next controlling index */
4316 		if (++j == adapter->num_rx_desc)
4317 			j = 0;
4318 	}
4319 update:
4320 	/*
4321 	** Update the tail pointer only if,
4322 	** and as far as we have refreshed.
4323 	*/
4324 	if (cleaned)
4325 		E1000_WRITE_REG(&adapter->hw,
4326 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4327 
4328 	return;
4329 }
4330 
4331 
4332 /*********************************************************************
4333  *
4334  *  Allocate memory for rx_buffer structures. Since we use one
4335  *  rx_buffer per received packet, the maximum number of rx_buffer's
4336  *  that we'll need is equal to the number of receive descriptors
4337  *  that we've allocated.
4338  *
4339  **********************************************************************/
4340 static int
em_allocate_receive_buffers(struct rx_ring * rxr)4341 em_allocate_receive_buffers(struct rx_ring *rxr)
4342 {
4343 	struct adapter		*adapter = rxr->adapter;
4344 	device_t		dev = adapter->dev;
4345 	struct em_rxbuffer	*rxbuf;
4346 	int			error;
4347 
4348 	rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4349 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4350 	if (rxr->rx_buffers == NULL) {
4351 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4352 		return (ENOMEM);
4353 	}
4354 
4355 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4356 				1, 0,			/* alignment, bounds */
4357 				BUS_SPACE_MAXADDR,	/* lowaddr */
4358 				BUS_SPACE_MAXADDR,	/* highaddr */
4359 				NULL, NULL,		/* filter, filterarg */
4360 				MJUM9BYTES,		/* maxsize */
4361 				1,			/* nsegments */
4362 				MJUM9BYTES,		/* maxsegsize */
4363 				0,			/* flags */
4364 				NULL,			/* lockfunc */
4365 				NULL,			/* lockarg */
4366 				&rxr->rxtag);
4367 	if (error) {
4368 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4369 		    __func__, error);
4370 		goto fail;
4371 	}
4372 
4373 	rxbuf = rxr->rx_buffers;
4374 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4375 		rxbuf = &rxr->rx_buffers[i];
4376 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4377 		if (error) {
4378 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4379 			    __func__, error);
4380 			goto fail;
4381 		}
4382 	}
4383 
4384 	return (0);
4385 
4386 fail:
4387 	em_free_receive_structures(adapter);
4388 	return (error);
4389 }
4390 
4391 
4392 /*********************************************************************
4393  *
4394  *  Initialize a receive ring and its buffers.
4395  *
4396  **********************************************************************/
4397 static int
em_setup_receive_ring(struct rx_ring * rxr)4398 em_setup_receive_ring(struct rx_ring *rxr)
4399 {
4400 	struct	adapter 	*adapter = rxr->adapter;
4401 	struct em_rxbuffer	*rxbuf;
4402 	bus_dma_segment_t	seg[1];
4403 	int			rsize, nsegs, error = 0;
4404 #ifdef DEV_NETMAP
4405 	struct netmap_slot *slot;
4406 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4407 #endif
4408 
4409 
4410 	/* Clear the ring contents */
4411 	EM_RX_LOCK(rxr);
4412 	rsize = roundup2(adapter->num_rx_desc *
4413 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4414 	bzero((void *)rxr->rx_base, rsize);
4415 #ifdef DEV_NETMAP
4416 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4417 #endif
4418 
4419 	/*
4420 	** Free current RX buffer structs and their mbufs
4421 	*/
4422 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4423 		rxbuf = &rxr->rx_buffers[i];
4424 		if (rxbuf->m_head != NULL) {
4425 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4426 			    BUS_DMASYNC_POSTREAD);
4427 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4428 			m_freem(rxbuf->m_head);
4429 			rxbuf->m_head = NULL; /* mark as freed */
4430 		}
4431 	}
4432 
4433 	/* Now replenish the mbufs */
4434         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4435 		rxbuf = &rxr->rx_buffers[j];
4436 #ifdef DEV_NETMAP
4437 		if (slot) {
4438 			int si = netmap_idx_n2k(na->rx_rings[rxr->me], j);
4439 			uint64_t paddr;
4440 			void *addr;
4441 
4442 			addr = PNMB(na, slot + si, &paddr);
4443 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4444 			rxbuf->paddr = paddr;
4445 			em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4446 			continue;
4447 		}
4448 #endif /* DEV_NETMAP */
4449 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4450 		    M_PKTHDR, adapter->rx_mbuf_sz);
4451 		if (rxbuf->m_head == NULL) {
4452 			error = ENOBUFS;
4453 			goto fail;
4454 		}
4455 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4456 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4457 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4458 
4459 		/* Get the memory mapping */
4460 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4461 		    rxbuf->map, rxbuf->m_head, seg,
4462 		    &nsegs, BUS_DMA_NOWAIT);
4463 		if (error != 0) {
4464 			m_freem(rxbuf->m_head);
4465 			rxbuf->m_head = NULL;
4466 			goto fail;
4467 		}
4468 		bus_dmamap_sync(rxr->rxtag,
4469 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4470 
4471 		rxbuf->paddr = seg[0].ds_addr;
4472 		em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4473 	}
4474 	rxr->next_to_check = 0;
4475 	rxr->next_to_refresh = 0;
4476 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4477 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4478 
4479 fail:
4480 	EM_RX_UNLOCK(rxr);
4481 	return (error);
4482 }
4483 
4484 /*********************************************************************
4485  *
4486  *  Initialize all receive rings.
4487  *
4488  **********************************************************************/
4489 static int
em_setup_receive_structures(struct adapter * adapter)4490 em_setup_receive_structures(struct adapter *adapter)
4491 {
4492 	struct rx_ring *rxr = adapter->rx_rings;
4493 	int q;
4494 
4495 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4496 		if (em_setup_receive_ring(rxr))
4497 			goto fail;
4498 
4499 	return (0);
4500 fail:
4501 	/*
4502 	 * Free RX buffers allocated so far, we will only handle
4503 	 * the rings that completed, the failing case will have
4504 	 * cleaned up for itself. 'q' failed, so its the terminus.
4505 	 */
4506 	for (int i = 0; i < q; ++i) {
4507 		rxr = &adapter->rx_rings[i];
4508 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4509 			struct em_rxbuffer *rxbuf;
4510 			rxbuf = &rxr->rx_buffers[n];
4511 			if (rxbuf->m_head != NULL) {
4512 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4513 			  	  BUS_DMASYNC_POSTREAD);
4514 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4515 				m_freem(rxbuf->m_head);
4516 				rxbuf->m_head = NULL;
4517 			}
4518 		}
4519 		rxr->next_to_check = 0;
4520 		rxr->next_to_refresh = 0;
4521 	}
4522 
4523 	return (ENOBUFS);
4524 }
4525 
4526 /*********************************************************************
4527  *
4528  *  Free all receive rings.
4529  *
4530  **********************************************************************/
4531 static void
em_free_receive_structures(struct adapter * adapter)4532 em_free_receive_structures(struct adapter *adapter)
4533 {
4534 	struct rx_ring *rxr = adapter->rx_rings;
4535 
4536 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4537 		em_free_receive_buffers(rxr);
4538 		/* Free the ring memory as well */
4539 		em_dma_free(adapter, &rxr->rxdma);
4540 		EM_RX_LOCK_DESTROY(rxr);
4541 	}
4542 
4543 	free(adapter->rx_rings, M_DEVBUF);
4544 }
4545 
4546 
4547 /*********************************************************************
4548  *
4549  *  Free receive ring data structures
4550  *
4551  **********************************************************************/
4552 static void
em_free_receive_buffers(struct rx_ring * rxr)4553 em_free_receive_buffers(struct rx_ring *rxr)
4554 {
4555 	struct adapter		*adapter = rxr->adapter;
4556 	struct em_rxbuffer	*rxbuf = NULL;
4557 
4558 	INIT_DEBUGOUT("free_receive_buffers: begin");
4559 
4560 	if (rxr->rx_buffers != NULL) {
4561 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4562 			rxbuf = &rxr->rx_buffers[i];
4563 			if (rxbuf->map != NULL) {
4564 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4565 				    BUS_DMASYNC_POSTREAD);
4566 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4567 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4568 			}
4569 			if (rxbuf->m_head != NULL) {
4570 				m_freem(rxbuf->m_head);
4571 				rxbuf->m_head = NULL;
4572 			}
4573 		}
4574 		free(rxr->rx_buffers, M_DEVBUF);
4575 		rxr->rx_buffers = NULL;
4576 		rxr->next_to_check = 0;
4577 		rxr->next_to_refresh = 0;
4578 	}
4579 
4580 	if (rxr->rxtag != NULL) {
4581 		bus_dma_tag_destroy(rxr->rxtag);
4582 		rxr->rxtag = NULL;
4583 	}
4584 
4585 	return;
4586 }
4587 
4588 
4589 /*********************************************************************
4590  *
4591  *  Enable receive unit.
4592  *
4593  **********************************************************************/
4594 
4595 static void
em_initialize_receive_unit(struct adapter * adapter)4596 em_initialize_receive_unit(struct adapter *adapter)
4597 {
4598 	struct rx_ring *rxr = adapter->rx_rings;
4599 	if_t ifp = adapter->ifp;
4600 	struct e1000_hw	*hw = &adapter->hw;
4601 	u32	rctl, rxcsum, rfctl;
4602 
4603 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4604 
4605 	/*
4606 	 * Make sure receives are disabled while setting
4607 	 * up the descriptor ring
4608 	 */
4609 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4610 	/* Do not disable if ever enabled on this hardware */
4611 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4612 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4613 
4614 	/* Setup the Receive Control Register */
4615 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4616 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4617 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4618 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4619 
4620 	/* Do not store bad packets */
4621 	rctl &= ~E1000_RCTL_SBP;
4622 
4623 	/* Enable Long Packet receive */
4624 	if (if_getmtu(ifp) > ETHERMTU)
4625 		rctl |= E1000_RCTL_LPE;
4626 	else
4627 		rctl &= ~E1000_RCTL_LPE;
4628 
4629         /* Strip the CRC */
4630         if (!em_disable_crc_stripping)
4631 		rctl |= E1000_RCTL_SECRC;
4632 
4633 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4634 	    adapter->rx_abs_int_delay.value);
4635 
4636 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4637 	    adapter->rx_int_delay.value);
4638 	/*
4639 	 * Set the interrupt throttling rate. Value is calculated
4640 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4641 	 */
4642 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4643 
4644 	/* Use extended rx descriptor formats */
4645 	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4646 	rfctl |= E1000_RFCTL_EXTEN;
4647 	/*
4648 	** When using MSIX interrupts we need to throttle
4649 	** using the EITR register (82574 only)
4650 	*/
4651 	if (hw->mac.type == e1000_82574) {
4652 		for (int i = 0; i < 4; i++)
4653 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4654 			    DEFAULT_ITR);
4655 		/* Disable accelerated acknowledge */
4656 		rfctl |= E1000_RFCTL_ACK_DIS;
4657 	}
4658 	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4659 
4660 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4661 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4662 #ifdef EM_MULTIQUEUE
4663 		rxcsum |= E1000_RXCSUM_TUOFL |
4664 			  E1000_RXCSUM_IPOFL |
4665 			  E1000_RXCSUM_PCSD;
4666 #else
4667 		rxcsum |= E1000_RXCSUM_TUOFL;
4668 #endif
4669 	} else
4670 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4671 
4672 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4673 
4674 #ifdef EM_MULTIQUEUE
4675 #define RSSKEYLEN 10
4676 	if (adapter->num_queues > 1) {
4677 		uint8_t  rss_key[4 * RSSKEYLEN];
4678 		uint32_t reta = 0;
4679 		int i;
4680 
4681 		/*
4682 		* Configure RSS key
4683 		*/
4684 		arc4rand(rss_key, sizeof(rss_key), 0);
4685 		for (i = 0; i < RSSKEYLEN; ++i) {
4686 			uint32_t rssrk = 0;
4687 
4688 			rssrk = EM_RSSRK_VAL(rss_key, i);
4689 			E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4690 		}
4691 
4692 		/*
4693 		* Configure RSS redirect table in following fashion:
4694 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4695 		*/
4696 		for (i = 0; i < sizeof(reta); ++i) {
4697 			uint32_t q;
4698 
4699 			q = (i % adapter->num_queues) << 7;
4700 			reta |= q << (8 * i);
4701 		}
4702 
4703 		for (i = 0; i < 32; ++i) {
4704 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4705 		}
4706 
4707 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4708 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4709 				E1000_MRQC_RSS_FIELD_IPV4 |
4710 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4711 				E1000_MRQC_RSS_FIELD_IPV6_EX |
4712 				E1000_MRQC_RSS_FIELD_IPV6);
4713 	}
4714 #endif
4715 	/*
4716 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4717 	** long latencies are observed, like Lenovo X60. This
4718 	** change eliminates the problem, but since having positive
4719 	** values in RDTR is a known source of problems on other
4720 	** platforms another solution is being sought.
4721 	*/
4722 	if (hw->mac.type == e1000_82573)
4723 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4724 
4725 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4726 		/* Setup the Base and Length of the Rx Descriptor Ring */
4727 		u64 bus_addr = rxr->rxdma.dma_paddr;
4728 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4729 
4730 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4731 		    adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4732 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4733 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4734 		/* Setup the Head and Tail Descriptor Pointers */
4735 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4736 #ifdef DEV_NETMAP
4737 		/*
4738 		 * an init() while a netmap client is active must
4739 		 * preserve the rx buffers passed to userspace.
4740 		 */
4741 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4742 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4743 			rdt -= nm_kr_rxspace(na->rx_rings[i]);
4744 		}
4745 #endif /* DEV_NETMAP */
4746 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4747 	}
4748 
4749 	/*
4750 	 * Set PTHRESH for improved jumbo performance
4751 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4752 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4753 	 * Only write to RXDCTL(1) if there is a need for different
4754 	 * settings.
4755 	 */
4756 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4757 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4758 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4759 	    (if_getmtu(ifp) > ETHERMTU)) {
4760 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4761 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4762 	} else if (adapter->hw.mac.type == e1000_82574) {
4763 		for (int i = 0; i < adapter->num_queues; i++) {
4764 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4765 
4766 			rxdctl |= 0x20; /* PTHRESH */
4767 			rxdctl |= 4 << 8; /* HTHRESH */
4768 			rxdctl |= 4 << 16;/* WTHRESH */
4769 			rxdctl |= 1 << 24; /* Switch to granularity */
4770 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4771 		}
4772 	}
4773 
4774 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4775 		if (if_getmtu(ifp) > ETHERMTU)
4776 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4777 		else
4778 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4779 	}
4780 
4781         /* Make sure VLAN Filters are off */
4782         rctl &= ~E1000_RCTL_VFE;
4783 
4784 	if (adapter->rx_mbuf_sz == MCLBYTES)
4785 		rctl |= E1000_RCTL_SZ_2048;
4786 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4787 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4788 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4789 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4790 
4791 	/* ensure we clear use DTYPE of 00 here */
4792 	rctl &= ~0x00000C00;
4793 	/* Write out the settings */
4794 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4795 
4796 	return;
4797 }
4798 
4799 
4800 /*********************************************************************
4801  *
4802  *  This routine executes in interrupt context. It replenishes
4803  *  the mbufs in the descriptor and sends data which has been
4804  *  dma'ed into host memory to upper layer.
4805  *
4806  *  We loop at most count times if count is > 0, or until done if
4807  *  count < 0.
4808  *
4809  *  For polling we also now return the number of cleaned packets
4810  *********************************************************************/
4811 static bool
em_rxeof(struct rx_ring * rxr,int count,int * done)4812 em_rxeof(struct rx_ring *rxr, int count, int *done)
4813 {
4814 	struct adapter		*adapter = rxr->adapter;
4815 	if_t ifp = adapter->ifp;
4816 	struct mbuf		*mp, *sendmp;
4817 	u32			status = 0;
4818 	u16 			len;
4819 	int			i, processed, rxdone = 0;
4820 	bool			eop;
4821 	union e1000_rx_desc_extended	*cur;
4822 
4823 	EM_RX_LOCK(rxr);
4824 
4825 	/* Sync the ring */
4826 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4827 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4828 
4829 
4830 #ifdef DEV_NETMAP
4831 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4832 		EM_RX_UNLOCK(rxr);
4833 		return (FALSE);
4834 	}
4835 #endif /* DEV_NETMAP */
4836 
4837 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4838 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4839 			break;
4840 
4841 		cur = &rxr->rx_base[i];
4842 		status = le32toh(cur->wb.upper.status_error);
4843 		mp = sendmp = NULL;
4844 
4845 		if ((status & E1000_RXD_STAT_DD) == 0)
4846 			break;
4847 
4848 		len = le16toh(cur->wb.upper.length);
4849 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4850 
4851 		if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4852 		    (rxr->discard == TRUE)) {
4853 			adapter->dropped_pkts++;
4854 			++rxr->rx_discarded;
4855 			if (!eop) /* Catch subsequent segs */
4856 				rxr->discard = TRUE;
4857 			else
4858 				rxr->discard = FALSE;
4859 			em_rx_discard(rxr, i);
4860 			goto next_desc;
4861 		}
4862 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4863 
4864 		/* Assign correct length to the current fragment */
4865 		mp = rxr->rx_buffers[i].m_head;
4866 		mp->m_len = len;
4867 
4868 		/* Trigger for refresh */
4869 		rxr->rx_buffers[i].m_head = NULL;
4870 
4871 		/* First segment? */
4872 		if (rxr->fmp == NULL) {
4873 			mp->m_pkthdr.len = len;
4874 			rxr->fmp = rxr->lmp = mp;
4875 		} else {
4876 			/* Chain mbuf's together */
4877 			mp->m_flags &= ~M_PKTHDR;
4878 			rxr->lmp->m_next = mp;
4879 			rxr->lmp = mp;
4880 			rxr->fmp->m_pkthdr.len += len;
4881 		}
4882 
4883 		if (eop) {
4884 			--count;
4885 			sendmp = rxr->fmp;
4886 			if_setrcvif(sendmp, ifp);
4887 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4888 			em_receive_checksum(status, sendmp);
4889 #ifndef __NO_STRICT_ALIGNMENT
4890 			if (adapter->hw.mac.max_frame_size >
4891 			    (MCLBYTES - ETHER_ALIGN) &&
4892 			    em_fixup_rx(rxr) != 0)
4893 				goto skip;
4894 #endif
4895 			if (status & E1000_RXD_STAT_VP) {
4896 				if_setvtag(sendmp,
4897 				    le16toh(cur->wb.upper.vlan));
4898 				sendmp->m_flags |= M_VLANTAG;
4899 			}
4900 #ifndef __NO_STRICT_ALIGNMENT
4901 skip:
4902 #endif
4903 			rxr->fmp = rxr->lmp = NULL;
4904 		}
4905 next_desc:
4906 		/* Sync the ring */
4907 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4908 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4909 
4910 		/* Zero out the receive descriptors status. */
4911 		cur->wb.upper.status_error &= htole32(~0xFF);
4912 		++rxdone;	/* cumulative for POLL */
4913 		++processed;
4914 
4915 		/* Advance our pointers to the next descriptor. */
4916 		if (++i == adapter->num_rx_desc)
4917 			i = 0;
4918 
4919 		/* Send to the stack */
4920 		if (sendmp != NULL) {
4921 			rxr->next_to_check = i;
4922 			EM_RX_UNLOCK(rxr);
4923 			if_input(ifp, sendmp);
4924 			EM_RX_LOCK(rxr);
4925 			i = rxr->next_to_check;
4926 		}
4927 
4928 		/* Only refresh mbufs every 8 descriptors */
4929 		if (processed == 8) {
4930 			em_refresh_mbufs(rxr, i);
4931 			processed = 0;
4932 		}
4933 	}
4934 
4935 	/* Catch any remaining refresh work */
4936 	if (e1000_rx_unrefreshed(rxr))
4937 		em_refresh_mbufs(rxr, i);
4938 
4939 	rxr->next_to_check = i;
4940 	if (done != NULL)
4941 		*done = rxdone;
4942 	EM_RX_UNLOCK(rxr);
4943 
4944 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4945 }
4946 
4947 static __inline void
em_rx_discard(struct rx_ring * rxr,int i)4948 em_rx_discard(struct rx_ring *rxr, int i)
4949 {
4950 	struct em_rxbuffer	*rbuf;
4951 
4952 	rbuf = &rxr->rx_buffers[i];
4953 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4954 
4955 	/* Free any previous pieces */
4956 	if (rxr->fmp != NULL) {
4957 		rxr->fmp->m_flags |= M_PKTHDR;
4958 		m_freem(rxr->fmp);
4959 		rxr->fmp = NULL;
4960 		rxr->lmp = NULL;
4961 	}
4962 	/*
4963 	** Free buffer and allow em_refresh_mbufs()
4964 	** to clean up and recharge buffer.
4965 	*/
4966 	if (rbuf->m_head) {
4967 		m_free(rbuf->m_head);
4968 		rbuf->m_head = NULL;
4969 	}
4970 	return;
4971 }
4972 
4973 #ifndef __NO_STRICT_ALIGNMENT
4974 /*
4975  * When jumbo frames are enabled we should realign entire payload on
4976  * architecures with strict alignment. This is serious design mistake of 8254x
4977  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4978  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4979  * payload. On architecures without strict alignment restrictions 8254x still
4980  * performs unaligned memory access which would reduce the performance too.
4981  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4982  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4983  * existing mbuf chain.
4984  *
4985  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4986  * not used at all on architectures with strict alignment.
4987  */
4988 static int
em_fixup_rx(struct rx_ring * rxr)4989 em_fixup_rx(struct rx_ring *rxr)
4990 {
4991 	struct adapter *adapter = rxr->adapter;
4992 	struct mbuf *m, *n;
4993 	int error;
4994 
4995 	error = 0;
4996 	m = rxr->fmp;
4997 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4998 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4999 		m->m_data += ETHER_HDR_LEN;
5000 	} else {
5001 		MGETHDR(n, M_NOWAIT, MT_DATA);
5002 		if (n != NULL) {
5003 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
5004 			m->m_data += ETHER_HDR_LEN;
5005 			m->m_len -= ETHER_HDR_LEN;
5006 			n->m_len = ETHER_HDR_LEN;
5007 			M_MOVE_PKTHDR(n, m);
5008 			n->m_next = m;
5009 			rxr->fmp = n;
5010 		} else {
5011 			adapter->dropped_pkts++;
5012 			m_freem(rxr->fmp);
5013 			rxr->fmp = NULL;
5014 			error = ENOMEM;
5015 		}
5016 	}
5017 
5018 	return (error);
5019 }
5020 #endif
5021 
5022 static void
em_setup_rxdesc(union e1000_rx_desc_extended * rxd,const struct em_rxbuffer * rxbuf)5023 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
5024 {
5025 	rxd->read.buffer_addr = htole64(rxbuf->paddr);
5026 	/* DD bits must be cleared */
5027 	rxd->wb.upper.status_error= 0;
5028 }
5029 
5030 /*********************************************************************
5031  *
5032  *  Verify that the hardware indicated that the checksum is valid.
5033  *  Inform the stack about the status of checksum so that stack
5034  *  doesn't spend time verifying the checksum.
5035  *
5036  *********************************************************************/
5037 static void
em_receive_checksum(uint32_t status,struct mbuf * mp)5038 em_receive_checksum(uint32_t status, struct mbuf *mp)
5039 {
5040 	mp->m_pkthdr.csum_flags = 0;
5041 
5042 	/* Ignore Checksum bit is set */
5043 	if (status & E1000_RXD_STAT_IXSM)
5044 		return;
5045 
5046 	/* If the IP checksum exists and there is no IP Checksum error */
5047 	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5048 		E1000_RXD_STAT_IPCS) {
5049 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5050 	}
5051 
5052 	/* TCP or UDP checksum */
5053 	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5054 	    E1000_RXD_STAT_TCPCS) {
5055 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5056 		mp->m_pkthdr.csum_data = htons(0xffff);
5057 	}
5058 	if (status & E1000_RXD_STAT_UDPCS) {
5059 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5060 		mp->m_pkthdr.csum_data = htons(0xffff);
5061 	}
5062 }
5063 
5064 /*
5065  * This routine is run via an vlan
5066  * config EVENT
5067  */
5068 static void
em_register_vlan(void * arg,if_t ifp,u16 vtag)5069 em_register_vlan(void *arg, if_t ifp, u16 vtag)
5070 {
5071 	struct adapter	*adapter = if_getsoftc(ifp);
5072 	u32		index, bit;
5073 
5074 	if ((void*)adapter !=  arg)   /* Not our event */
5075 		return;
5076 
5077 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5078                 return;
5079 
5080 	EM_CORE_LOCK(adapter);
5081 	index = (vtag >> 5) & 0x7F;
5082 	bit = vtag & 0x1F;
5083 	adapter->shadow_vfta[index] |= (1 << bit);
5084 	++adapter->num_vlans;
5085 	/* Re-init to load the changes */
5086 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5087 		em_init_locked(adapter);
5088 	EM_CORE_UNLOCK(adapter);
5089 }
5090 
5091 /*
5092  * This routine is run via an vlan
5093  * unconfig EVENT
5094  */
5095 static void
em_unregister_vlan(void * arg,if_t ifp,u16 vtag)5096 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
5097 {
5098 	struct adapter	*adapter = if_getsoftc(ifp);
5099 	u32		index, bit;
5100 
5101 	if (adapter != arg)
5102 		return;
5103 
5104 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5105                 return;
5106 
5107 	EM_CORE_LOCK(adapter);
5108 	index = (vtag >> 5) & 0x7F;
5109 	bit = vtag & 0x1F;
5110 	adapter->shadow_vfta[index] &= ~(1 << bit);
5111 	--adapter->num_vlans;
5112 	/* Re-init to load the changes */
5113 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5114 		em_init_locked(adapter);
5115 	EM_CORE_UNLOCK(adapter);
5116 }
5117 
5118 static void
em_setup_vlan_hw_support(struct adapter * adapter)5119 em_setup_vlan_hw_support(struct adapter *adapter)
5120 {
5121 	struct e1000_hw *hw = &adapter->hw;
5122 	u32             reg;
5123 
5124 	/*
5125 	** We get here thru init_locked, meaning
5126 	** a soft reset, this has already cleared
5127 	** the VFTA and other state, so if there
5128 	** have been no vlan's registered do nothing.
5129 	*/
5130 	if (adapter->num_vlans == 0)
5131                 return;
5132 
5133 	/*
5134 	** A soft reset zero's out the VFTA, so
5135 	** we need to repopulate it now.
5136 	*/
5137 	for (int i = 0; i < EM_VFTA_SIZE; i++)
5138                 if (adapter->shadow_vfta[i] != 0)
5139 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5140                             i, adapter->shadow_vfta[i]);
5141 
5142 	reg = E1000_READ_REG(hw, E1000_CTRL);
5143 	reg |= E1000_CTRL_VME;
5144 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5145 
5146 	/* Enable the Filter Table */
5147 	reg = E1000_READ_REG(hw, E1000_RCTL);
5148 	reg &= ~E1000_RCTL_CFIEN;
5149 	reg |= E1000_RCTL_VFE;
5150 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
5151 }
5152 
5153 static void
em_enable_intr(struct adapter * adapter)5154 em_enable_intr(struct adapter *adapter)
5155 {
5156 	struct e1000_hw *hw = &adapter->hw;
5157 	u32 ims_mask = IMS_ENABLE_MASK;
5158 
5159 	if (hw->mac.type == e1000_82574) {
5160 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
5161 		ims_mask |= EM_MSIX_MASK;
5162 	}
5163 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5164 }
5165 
5166 static void
em_disable_intr(struct adapter * adapter)5167 em_disable_intr(struct adapter *adapter)
5168 {
5169 	struct e1000_hw *hw = &adapter->hw;
5170 
5171 	if (hw->mac.type == e1000_82574)
5172 		E1000_WRITE_REG(hw, EM_EIAC, 0);
5173 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5174 }
5175 
5176 /*
5177  * Bit of a misnomer, what this really means is
5178  * to enable OS management of the system... aka
5179  * to disable special hardware management features
5180  */
5181 static void
em_init_manageability(struct adapter * adapter)5182 em_init_manageability(struct adapter *adapter)
5183 {
5184 	/* A shared code workaround */
5185 #define E1000_82542_MANC2H E1000_MANC2H
5186 	if (adapter->has_manage) {
5187 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5188 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5189 
5190 		/* disable hardware interception of ARP */
5191 		manc &= ~(E1000_MANC_ARP_EN);
5192 
5193                 /* enable receiving management packets to the host */
5194 		manc |= E1000_MANC_EN_MNG2HOST;
5195 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5196 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5197 		manc2h |= E1000_MNG2HOST_PORT_623;
5198 		manc2h |= E1000_MNG2HOST_PORT_664;
5199 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5200 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5201 	}
5202 }
5203 
5204 /*
5205  * Give control back to hardware management
5206  * controller if there is one.
5207  */
5208 static void
em_release_manageability(struct adapter * adapter)5209 em_release_manageability(struct adapter *adapter)
5210 {
5211 	if (adapter->has_manage) {
5212 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5213 
5214 		/* re-enable hardware interception of ARP */
5215 		manc |= E1000_MANC_ARP_EN;
5216 		manc &= ~E1000_MANC_EN_MNG2HOST;
5217 
5218 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5219 	}
5220 }
5221 
5222 /*
5223  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5224  * For ASF and Pass Through versions of f/w this means
5225  * that the driver is loaded. For AMT version type f/w
5226  * this means that the network i/f is open.
5227  */
5228 static void
em_get_hw_control(struct adapter * adapter)5229 em_get_hw_control(struct adapter *adapter)
5230 {
5231 	u32 ctrl_ext, swsm;
5232 
5233 	if (adapter->hw.mac.type == e1000_82573) {
5234 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5235 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5236 		    swsm | E1000_SWSM_DRV_LOAD);
5237 		return;
5238 	}
5239 	/* else */
5240 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5241 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5242 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5243 	return;
5244 }
5245 
5246 /*
5247  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5248  * For ASF and Pass Through versions of f/w this means that
5249  * the driver is no longer loaded. For AMT versions of the
5250  * f/w this means that the network i/f is closed.
5251  */
5252 static void
em_release_hw_control(struct adapter * adapter)5253 em_release_hw_control(struct adapter *adapter)
5254 {
5255 	u32 ctrl_ext, swsm;
5256 
5257 	if (!adapter->has_manage)
5258 		return;
5259 
5260 	if (adapter->hw.mac.type == e1000_82573) {
5261 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5262 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5263 		    swsm & ~E1000_SWSM_DRV_LOAD);
5264 		return;
5265 	}
5266 	/* else */
5267 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5268 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5269 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5270 	return;
5271 }
5272 
5273 static int
em_is_valid_ether_addr(u8 * addr)5274 em_is_valid_ether_addr(u8 *addr)
5275 {
5276 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5277 
5278 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5279 		return (FALSE);
5280 	}
5281 
5282 	return (TRUE);
5283 }
5284 
5285 /*
5286 ** Parse the interface capabilities with regard
5287 ** to both system management and wake-on-lan for
5288 ** later use.
5289 */
5290 static void
em_get_wakeup(device_t dev)5291 em_get_wakeup(device_t dev)
5292 {
5293 	struct adapter	*adapter = device_get_softc(dev);
5294 	u16		eeprom_data = 0, device_id, apme_mask;
5295 
5296 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5297 	apme_mask = EM_EEPROM_APME;
5298 
5299 	switch (adapter->hw.mac.type) {
5300 	case e1000_82573:
5301 	case e1000_82583:
5302 		adapter->has_amt = TRUE;
5303 		/* Falls thru */
5304 	case e1000_82571:
5305 	case e1000_82572:
5306 	case e1000_80003es2lan:
5307 		if (adapter->hw.bus.func == 1) {
5308 			e1000_read_nvm(&adapter->hw,
5309 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5310 			break;
5311 		} else
5312 			e1000_read_nvm(&adapter->hw,
5313 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5314 		break;
5315 	case e1000_ich8lan:
5316 	case e1000_ich9lan:
5317 	case e1000_ich10lan:
5318 	case e1000_pchlan:
5319 	case e1000_pch2lan:
5320 	case e1000_pch_lpt:
5321 	case e1000_pch_spt:
5322 	case e1000_pch_cnp:
5323 		apme_mask = E1000_WUC_APME;
5324 		adapter->has_amt = TRUE;
5325 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5326 		break;
5327 	default:
5328 		e1000_read_nvm(&adapter->hw,
5329 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5330 		break;
5331 	}
5332 	if (eeprom_data & apme_mask)
5333 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5334 	/*
5335          * We have the eeprom settings, now apply the special cases
5336          * where the eeprom may be wrong or the board won't support
5337          * wake on lan on a particular port
5338 	 */
5339 	device_id = pci_get_device(dev);
5340         switch (device_id) {
5341 	case E1000_DEV_ID_82571EB_FIBER:
5342 		/* Wake events only supported on port A for dual fiber
5343 		 * regardless of eeprom setting */
5344 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5345 		    E1000_STATUS_FUNC_1)
5346 			adapter->wol = 0;
5347 		break;
5348 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5349 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5350 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5351                 /* if quad port adapter, disable WoL on all but port A */
5352 		if (global_quad_port_a != 0)
5353 			adapter->wol = 0;
5354 		/* Reset for multiple quad port adapters */
5355 		if (++global_quad_port_a == 4)
5356 			global_quad_port_a = 0;
5357                 break;
5358 	}
5359 	return;
5360 }
5361 
5362 
5363 /*
5364  * Enable PCI Wake On Lan capability
5365  */
5366 static void
em_enable_wakeup(device_t dev)5367 em_enable_wakeup(device_t dev)
5368 {
5369 	struct adapter	*adapter = device_get_softc(dev);
5370 	if_t ifp = adapter->ifp;
5371 	int		error = 0;
5372 	u32		pmc, ctrl, ctrl_ext, rctl;
5373 	u16     	status;
5374 
5375 	if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5376 		return;
5377 
5378 	/*
5379 	** Determine type of Wakeup: note that wol
5380 	** is set with all bits on by default.
5381 	*/
5382 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5383 		adapter->wol &= ~E1000_WUFC_MAG;
5384 
5385 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5386 		adapter->wol &= ~E1000_WUFC_MC;
5387 	else {
5388 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5389 		rctl |= E1000_RCTL_MPE;
5390 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5391 	}
5392 
5393 	if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5394 		goto pme;
5395 
5396 	/* Advertise the wakeup capability */
5397 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5398 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5399 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5400 
5401 	/* Keep the laser running on Fiber adapters */
5402 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5403 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5404 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5405 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5406 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5407 	}
5408 
5409 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5410 	    (adapter->hw.mac.type == e1000_pchlan) ||
5411 	    (adapter->hw.mac.type == e1000_ich9lan) ||
5412 	    (adapter->hw.mac.type == e1000_ich10lan))
5413 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5414 
5415 	if ((adapter->hw.mac.type == e1000_pchlan)  ||
5416 	    (adapter->hw.mac.type == e1000_pch2lan) ||
5417 	    (adapter->hw.mac.type == e1000_pch_lpt) ||
5418 	    (adapter->hw.mac.type == e1000_pch_spt) ||
5419 	    (adapter->hw.mac.type == e1000_pch_cnp)) {
5420 		error = em_enable_phy_wakeup(adapter);
5421 		if (error)
5422 			goto pme;
5423 	} else {
5424 		/* Enable wakeup by the MAC */
5425 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5426 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5427 	}
5428 
5429 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5430 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5431 
5432 pme:
5433         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5434 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5435 	if (!error && (if_getcapenable(ifp) & IFCAP_WOL))
5436 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5437         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5438 
5439 	return;
5440 }
5441 
5442 /*
5443 ** WOL in the newer chipset interfaces (pchlan)
5444 ** require thing to be copied into the phy
5445 */
5446 static int
em_enable_phy_wakeup(struct adapter * adapter)5447 em_enable_phy_wakeup(struct adapter *adapter)
5448 {
5449 	struct e1000_hw *hw = &adapter->hw;
5450 	u32 mreg, ret = 0;
5451 	u16 preg;
5452 
5453 	/* copy MAC RARs to PHY RARs */
5454 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5455 
5456 	/* copy MAC MTA to PHY MTA */
5457 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5458 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5459 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5460 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5461 		    (u16)((mreg >> 16) & 0xFFFF));
5462 	}
5463 
5464 	/* configure PHY Rx Control register */
5465 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5466 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5467 	if (mreg & E1000_RCTL_UPE)
5468 		preg |= BM_RCTL_UPE;
5469 	if (mreg & E1000_RCTL_MPE)
5470 		preg |= BM_RCTL_MPE;
5471 	preg &= ~(BM_RCTL_MO_MASK);
5472 	if (mreg & E1000_RCTL_MO_3)
5473 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5474 				<< BM_RCTL_MO_SHIFT);
5475 	if (mreg & E1000_RCTL_BAM)
5476 		preg |= BM_RCTL_BAM;
5477 	if (mreg & E1000_RCTL_PMCF)
5478 		preg |= BM_RCTL_PMCF;
5479 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5480 	if (mreg & E1000_CTRL_RFCE)
5481 		preg |= BM_RCTL_RFCE;
5482 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5483 
5484 	/* enable PHY wakeup in MAC register */
5485 	E1000_WRITE_REG(hw, E1000_WUC,
5486 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5487 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5488 
5489 	/* configure and enable PHY wakeup in PHY registers */
5490 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5491 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5492 
5493 	/* activate PHY wakeup */
5494 	ret = hw->phy.ops.acquire(hw);
5495 	if (ret) {
5496 		printf("Could not acquire PHY\n");
5497 		return ret;
5498 	}
5499 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5500 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5501 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5502 	if (ret) {
5503 		printf("Could not read PHY page 769\n");
5504 		goto out;
5505 	}
5506 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5507 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5508 	if (ret)
5509 		printf("Could not set PHY Host Wakeup bit\n");
5510 out:
5511 	hw->phy.ops.release(hw);
5512 
5513 	return ret;
5514 }
5515 
5516 static void
em_led_func(void * arg,int onoff)5517 em_led_func(void *arg, int onoff)
5518 {
5519 	struct adapter	*adapter = arg;
5520 
5521 	EM_CORE_LOCK(adapter);
5522 	if (onoff) {
5523 		e1000_setup_led(&adapter->hw);
5524 		e1000_led_on(&adapter->hw);
5525 	} else {
5526 		e1000_led_off(&adapter->hw);
5527 		e1000_cleanup_led(&adapter->hw);
5528 	}
5529 	EM_CORE_UNLOCK(adapter);
5530 }
5531 
5532 /*
5533 ** Disable the L0S and L1 LINK states
5534 */
5535 static void
em_disable_aspm(struct adapter * adapter)5536 em_disable_aspm(struct adapter *adapter)
5537 {
5538 	int		base, reg;
5539 	u16		link_cap,link_ctrl;
5540 	device_t	dev = adapter->dev;
5541 
5542 	switch (adapter->hw.mac.type) {
5543 		case e1000_82573:
5544 		case e1000_82574:
5545 		case e1000_82583:
5546 			break;
5547 		default:
5548 			return;
5549 	}
5550 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5551 		return;
5552 	reg = base + PCIER_LINK_CAP;
5553 	link_cap = pci_read_config(dev, reg, 2);
5554 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5555 		return;
5556 	reg = base + PCIER_LINK_CTL;
5557 	link_ctrl = pci_read_config(dev, reg, 2);
5558 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5559 	pci_write_config(dev, reg, link_ctrl, 2);
5560 	return;
5561 }
5562 
5563 /**********************************************************************
5564  *
5565  *  Update the board statistics counters.
5566  *
5567  **********************************************************************/
5568 static void
em_update_stats_counters(struct adapter * adapter)5569 em_update_stats_counters(struct adapter *adapter)
5570 {
5571 
5572 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5573 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5574 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5575 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5576 	}
5577 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5578 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5579 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5580 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5581 
5582 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5583 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5584 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5585 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5586 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5587 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5588 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5589 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5590 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5591 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5592 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5593 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5594 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5595 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5596 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5597 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5598 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5599 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5600 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5601 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5602 
5603 	/* For the 64-bit byte counters the low dword must be read first. */
5604 	/* Both registers clear on the read of the high dword */
5605 
5606 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5607 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5608 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5609 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5610 
5611 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5612 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5613 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5614 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5615 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5616 
5617 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5618 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5619 
5620 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5621 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5622 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5623 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5624 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5625 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5626 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5627 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5628 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5629 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5630 
5631 	/* Interrupt Counts */
5632 
5633 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5634 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5635 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5636 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5637 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5638 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5639 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5640 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5641 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5642 
5643 	if (adapter->hw.mac.type >= e1000_82543) {
5644 		adapter->stats.algnerrc +=
5645 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5646 		adapter->stats.rxerrc +=
5647 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5648 		adapter->stats.tncrs +=
5649 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5650 		adapter->stats.cexterr +=
5651 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5652 		adapter->stats.tsctc +=
5653 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5654 		adapter->stats.tsctfc +=
5655 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5656 	}
5657 }
5658 
5659 static uint64_t
em_get_counter(if_t ifp,ift_counter cnt)5660 em_get_counter(if_t ifp, ift_counter cnt)
5661 {
5662 	struct adapter *adapter;
5663 
5664 	adapter = if_getsoftc(ifp);
5665 
5666 	switch (cnt) {
5667 	case IFCOUNTER_COLLISIONS:
5668 		return (adapter->stats.colc);
5669 	case IFCOUNTER_IERRORS:
5670 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5671 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5672 		    adapter->stats.ruc + adapter->stats.roc +
5673 		    adapter->stats.mpc + adapter->stats.cexterr);
5674 	case IFCOUNTER_OERRORS:
5675 		return (adapter->stats.ecol + adapter->stats.latecol +
5676 		    adapter->watchdog_events);
5677 	default:
5678 		return (if_get_counter_default(ifp, cnt));
5679 	}
5680 }
5681 
5682 /* Export a single 32-bit register via a read-only sysctl. */
5683 static int
em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)5684 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5685 {
5686 	struct adapter *adapter;
5687 	u_int val;
5688 
5689 	adapter = oidp->oid_arg1;
5690 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5691 	return (sysctl_handle_int(oidp, &val, 0, req));
5692 }
5693 
5694 /*
5695  * Add sysctl variables, one per statistic, to the system.
5696  */
5697 static void
em_add_hw_stats(struct adapter * adapter)5698 em_add_hw_stats(struct adapter *adapter)
5699 {
5700 	device_t dev = adapter->dev;
5701 
5702 	struct tx_ring *txr = adapter->tx_rings;
5703 	struct rx_ring *rxr = adapter->rx_rings;
5704 
5705 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5706 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5707 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5708 	struct e1000_hw_stats *stats = &adapter->stats;
5709 
5710 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5711 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5712 
5713 #define QUEUE_NAME_LEN 32
5714 	char namebuf[QUEUE_NAME_LEN];
5715 
5716 	/* Driver Statistics */
5717 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5718 			CTLFLAG_RD, &adapter->dropped_pkts,
5719 			"Driver dropped packets");
5720 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5721 			CTLFLAG_RD, &adapter->link_irq,
5722 			"Link MSIX IRQ Handled");
5723 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5724 			 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5725 			 "Defragmenting mbuf chain failed");
5726 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5727 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5728 			"Driver tx dma failure in xmit");
5729 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5730 			CTLFLAG_RD, &adapter->rx_overruns,
5731 			"RX overruns");
5732 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5733 			CTLFLAG_RD, &adapter->watchdog_events,
5734 			"Watchdog timeouts");
5735 
5736 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5737 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5738 			em_sysctl_reg_handler, "IU",
5739 			"Device Control Register");
5740 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5741 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5742 			em_sysctl_reg_handler, "IU",
5743 			"Receiver Control Register");
5744 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5745 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5746 			"Flow Control High Watermark");
5747 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5748 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5749 			"Flow Control Low Watermark");
5750 
5751 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5752 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5753 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5754 					    CTLFLAG_RD, NULL, "TX Queue Name");
5755 		queue_list = SYSCTL_CHILDREN(queue_node);
5756 
5757 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5758 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5759 				E1000_TDH(txr->me),
5760 				em_sysctl_reg_handler, "IU",
5761  				"Transmit Descriptor Head");
5762 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5763 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5764 				E1000_TDT(txr->me),
5765 				em_sysctl_reg_handler, "IU",
5766  				"Transmit Descriptor Tail");
5767 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5768 				CTLFLAG_RD, &txr->tx_irq,
5769 				"Queue MSI-X Transmit Interrupts");
5770 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5771 				CTLFLAG_RD, &txr->no_desc_avail,
5772 				"Queue No Descriptor Available");
5773 
5774 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5775 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5776 					    CTLFLAG_RD, NULL, "RX Queue Name");
5777 		queue_list = SYSCTL_CHILDREN(queue_node);
5778 
5779 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5780 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5781 				E1000_RDH(rxr->me),
5782 				em_sysctl_reg_handler, "IU",
5783 				"Receive Descriptor Head");
5784 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5785 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5786 				E1000_RDT(rxr->me),
5787 				em_sysctl_reg_handler, "IU",
5788 				"Receive Descriptor Tail");
5789 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5790 				CTLFLAG_RD, &rxr->rx_irq,
5791 				"Queue MSI-X Receive Interrupts");
5792 	}
5793 
5794 	/* MAC stats get their own sub node */
5795 
5796 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5797 				    CTLFLAG_RD, NULL, "Statistics");
5798 	stat_list = SYSCTL_CHILDREN(stat_node);
5799 
5800 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5801 			CTLFLAG_RD, &stats->ecol,
5802 			"Excessive collisions");
5803 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5804 			CTLFLAG_RD, &stats->scc,
5805 			"Single collisions");
5806 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5807 			CTLFLAG_RD, &stats->mcc,
5808 			"Multiple collisions");
5809 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5810 			CTLFLAG_RD, &stats->latecol,
5811 			"Late collisions");
5812 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5813 			CTLFLAG_RD, &stats->colc,
5814 			"Collision Count");
5815 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5816 			CTLFLAG_RD, &adapter->stats.symerrs,
5817 			"Symbol Errors");
5818 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5819 			CTLFLAG_RD, &adapter->stats.sec,
5820 			"Sequence Errors");
5821 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5822 			CTLFLAG_RD, &adapter->stats.dc,
5823 			"Defer Count");
5824 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5825 			CTLFLAG_RD, &adapter->stats.mpc,
5826 			"Missed Packets");
5827 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5828 			CTLFLAG_RD, &adapter->stats.rnbc,
5829 			"Receive No Buffers");
5830 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5831 			CTLFLAG_RD, &adapter->stats.ruc,
5832 			"Receive Undersize");
5833 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5834 			CTLFLAG_RD, &adapter->stats.rfc,
5835 			"Fragmented Packets Received ");
5836 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5837 			CTLFLAG_RD, &adapter->stats.roc,
5838 			"Oversized Packets Received");
5839 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5840 			CTLFLAG_RD, &adapter->stats.rjc,
5841 			"Recevied Jabber");
5842 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5843 			CTLFLAG_RD, &adapter->stats.rxerrc,
5844 			"Receive Errors");
5845 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5846 			CTLFLAG_RD, &adapter->stats.crcerrs,
5847 			"CRC errors");
5848 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5849 			CTLFLAG_RD, &adapter->stats.algnerrc,
5850 			"Alignment Errors");
5851 	/* On 82575 these are collision counts */
5852 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5853 			CTLFLAG_RD, &adapter->stats.cexterr,
5854 			"Collision/Carrier extension errors");
5855 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5856 			CTLFLAG_RD, &adapter->stats.xonrxc,
5857 			"XON Received");
5858 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5859 			CTLFLAG_RD, &adapter->stats.xontxc,
5860 			"XON Transmitted");
5861 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5862 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5863 			"XOFF Received");
5864 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5865 			CTLFLAG_RD, &adapter->stats.xofftxc,
5866 			"XOFF Transmitted");
5867 
5868 	/* Packet Reception Stats */
5869 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5870 			CTLFLAG_RD, &adapter->stats.tpr,
5871 			"Total Packets Received ");
5872 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5873 			CTLFLAG_RD, &adapter->stats.gprc,
5874 			"Good Packets Received");
5875 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5876 			CTLFLAG_RD, &adapter->stats.bprc,
5877 			"Broadcast Packets Received");
5878 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5879 			CTLFLAG_RD, &adapter->stats.mprc,
5880 			"Multicast Packets Received");
5881 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5882 			CTLFLAG_RD, &adapter->stats.prc64,
5883 			"64 byte frames received ");
5884 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5885 			CTLFLAG_RD, &adapter->stats.prc127,
5886 			"65-127 byte frames received");
5887 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5888 			CTLFLAG_RD, &adapter->stats.prc255,
5889 			"128-255 byte frames received");
5890 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5891 			CTLFLAG_RD, &adapter->stats.prc511,
5892 			"256-511 byte frames received");
5893 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5894 			CTLFLAG_RD, &adapter->stats.prc1023,
5895 			"512-1023 byte frames received");
5896 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5897 			CTLFLAG_RD, &adapter->stats.prc1522,
5898 			"1023-1522 byte frames received");
5899  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5900  			CTLFLAG_RD, &adapter->stats.gorc,
5901  			"Good Octets Received");
5902 
5903 	/* Packet Transmission Stats */
5904  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5905  			CTLFLAG_RD, &adapter->stats.gotc,
5906  			"Good Octets Transmitted");
5907 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5908 			CTLFLAG_RD, &adapter->stats.tpt,
5909 			"Total Packets Transmitted");
5910 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5911 			CTLFLAG_RD, &adapter->stats.gptc,
5912 			"Good Packets Transmitted");
5913 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5914 			CTLFLAG_RD, &adapter->stats.bptc,
5915 			"Broadcast Packets Transmitted");
5916 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5917 			CTLFLAG_RD, &adapter->stats.mptc,
5918 			"Multicast Packets Transmitted");
5919 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5920 			CTLFLAG_RD, &adapter->stats.ptc64,
5921 			"64 byte frames transmitted ");
5922 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5923 			CTLFLAG_RD, &adapter->stats.ptc127,
5924 			"65-127 byte frames transmitted");
5925 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5926 			CTLFLAG_RD, &adapter->stats.ptc255,
5927 			"128-255 byte frames transmitted");
5928 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5929 			CTLFLAG_RD, &adapter->stats.ptc511,
5930 			"256-511 byte frames transmitted");
5931 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5932 			CTLFLAG_RD, &adapter->stats.ptc1023,
5933 			"512-1023 byte frames transmitted");
5934 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5935 			CTLFLAG_RD, &adapter->stats.ptc1522,
5936 			"1024-1522 byte frames transmitted");
5937 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5938 			CTLFLAG_RD, &adapter->stats.tsctc,
5939 			"TSO Contexts Transmitted");
5940 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5941 			CTLFLAG_RD, &adapter->stats.tsctfc,
5942 			"TSO Contexts Failed");
5943 
5944 
5945 	/* Interrupt Stats */
5946 
5947 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5948 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5949 	int_list = SYSCTL_CHILDREN(int_node);
5950 
5951 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5952 			CTLFLAG_RD, &adapter->stats.iac,
5953 			"Interrupt Assertion Count");
5954 
5955 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5956 			CTLFLAG_RD, &adapter->stats.icrxptc,
5957 			"Interrupt Cause Rx Pkt Timer Expire Count");
5958 
5959 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5960 			CTLFLAG_RD, &adapter->stats.icrxatc,
5961 			"Interrupt Cause Rx Abs Timer Expire Count");
5962 
5963 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5964 			CTLFLAG_RD, &adapter->stats.ictxptc,
5965 			"Interrupt Cause Tx Pkt Timer Expire Count");
5966 
5967 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5968 			CTLFLAG_RD, &adapter->stats.ictxatc,
5969 			"Interrupt Cause Tx Abs Timer Expire Count");
5970 
5971 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5972 			CTLFLAG_RD, &adapter->stats.ictxqec,
5973 			"Interrupt Cause Tx Queue Empty Count");
5974 
5975 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5976 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5977 			"Interrupt Cause Tx Queue Min Thresh Count");
5978 
5979 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5980 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5981 			"Interrupt Cause Rx Desc Min Thresh Count");
5982 
5983 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5984 			CTLFLAG_RD, &adapter->stats.icrxoc,
5985 			"Interrupt Cause Receiver Overrun Count");
5986 }
5987 
5988 /**********************************************************************
5989  *
5990  *  This routine provides a way to dump out the adapter eeprom,
5991  *  often a useful debug/service tool. This only dumps the first
5992  *  32 words, stuff that matters is in that extent.
5993  *
5994  **********************************************************************/
5995 static int
em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)5996 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5997 {
5998 	struct adapter *adapter = (struct adapter *)arg1;
5999 	int error;
6000 	int result;
6001 
6002 	result = -1;
6003 	error = sysctl_handle_int(oidp, &result, 0, req);
6004 
6005 	if (error || !req->newptr)
6006 		return (error);
6007 
6008 	/*
6009 	 * This value will cause a hex dump of the
6010 	 * first 32 16-bit words of the EEPROM to
6011 	 * the screen.
6012 	 */
6013 	if (result == 1)
6014 		em_print_nvm_info(adapter);
6015 
6016 	return (error);
6017 }
6018 
6019 static void
em_print_nvm_info(struct adapter * adapter)6020 em_print_nvm_info(struct adapter *adapter)
6021 {
6022 	u16	eeprom_data;
6023 	int	i, j, row = 0;
6024 
6025 	/* Its a bit crude, but it gets the job done */
6026 	printf("\nInterface EEPROM Dump:\n");
6027 	printf("Offset\n0x0000  ");
6028 	for (i = 0, j = 0; i < 32; i++, j++) {
6029 		if (j == 8) { /* Make the offset block */
6030 			j = 0; ++row;
6031 			printf("\n0x00%x0  ",row);
6032 		}
6033 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6034 		printf("%04x ", eeprom_data);
6035 	}
6036 	printf("\n");
6037 }
6038 
6039 static int
em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)6040 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6041 {
6042 	struct em_int_delay_info *info;
6043 	struct adapter *adapter;
6044 	u32 regval;
6045 	int error, usecs, ticks;
6046 
6047 	info = (struct em_int_delay_info *)arg1;
6048 	usecs = info->value;
6049 	error = sysctl_handle_int(oidp, &usecs, 0, req);
6050 	if (error != 0 || req->newptr == NULL)
6051 		return (error);
6052 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6053 		return (EINVAL);
6054 	info->value = usecs;
6055 	ticks = EM_USECS_TO_TICKS(usecs);
6056 	if (info->offset == E1000_ITR)	/* units are 256ns here */
6057 		ticks *= 4;
6058 
6059 	adapter = info->adapter;
6060 
6061 	EM_CORE_LOCK(adapter);
6062 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6063 	regval = (regval & ~0xffff) | (ticks & 0xffff);
6064 	/* Handle a few special cases. */
6065 	switch (info->offset) {
6066 	case E1000_RDTR:
6067 		break;
6068 	case E1000_TIDV:
6069 		if (ticks == 0) {
6070 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6071 			/* Don't write 0 into the TIDV register. */
6072 			regval++;
6073 		} else
6074 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6075 		break;
6076 	}
6077 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6078 	EM_CORE_UNLOCK(adapter);
6079 	return (0);
6080 }
6081 
6082 static void
em_add_int_delay_sysctl(struct adapter * adapter,const char * name,const char * description,struct em_int_delay_info * info,int offset,int value)6083 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6084 	const char *description, struct em_int_delay_info *info,
6085 	int offset, int value)
6086 {
6087 	info->adapter = adapter;
6088 	info->offset = offset;
6089 	info->value = value;
6090 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6091 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6092 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6093 	    info, 0, em_sysctl_int_delay, "I", description);
6094 }
6095 
6096 static void
em_set_sysctl_value(struct adapter * adapter,const char * name,const char * description,int * limit,int value)6097 em_set_sysctl_value(struct adapter *adapter, const char *name,
6098 	const char *description, int *limit, int value)
6099 {
6100 	*limit = value;
6101 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6102 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6103 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6104 }
6105 
6106 
6107 /*
6108 ** Set flow control using sysctl:
6109 ** Flow control values:
6110 **      0 - off
6111 **      1 - rx pause
6112 **      2 - tx pause
6113 **      3 - full
6114 */
6115 static int
em_set_flowcntl(SYSCTL_HANDLER_ARGS)6116 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6117 {
6118         int		error;
6119 	static int	input = 3; /* default is full */
6120         struct adapter	*adapter = (struct adapter *) arg1;
6121 
6122         error = sysctl_handle_int(oidp, &input, 0, req);
6123 
6124         if ((error) || (req->newptr == NULL))
6125                 return (error);
6126 
6127 	if (input == adapter->fc) /* no change? */
6128 		return (error);
6129 
6130         switch (input) {
6131                 case e1000_fc_rx_pause:
6132                 case e1000_fc_tx_pause:
6133                 case e1000_fc_full:
6134                 case e1000_fc_none:
6135                         adapter->hw.fc.requested_mode = input;
6136 			adapter->fc = input;
6137                         break;
6138                 default:
6139 			/* Do nothing */
6140 			return (error);
6141         }
6142 
6143         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6144         e1000_force_mac_fc(&adapter->hw);
6145         return (error);
6146 }
6147 
6148 /*
6149 ** Manage Energy Efficient Ethernet:
6150 ** Control values:
6151 **     0/1 - enabled/disabled
6152 */
6153 static int
em_sysctl_eee(SYSCTL_HANDLER_ARGS)6154 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6155 {
6156        struct adapter *adapter = (struct adapter *) arg1;
6157        int             error, value;
6158 
6159        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6160        error = sysctl_handle_int(oidp, &value, 0, req);
6161        if (error || req->newptr == NULL)
6162                return (error);
6163        EM_CORE_LOCK(adapter);
6164        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6165        em_init_locked(adapter);
6166        EM_CORE_UNLOCK(adapter);
6167        return (0);
6168 }
6169 
6170 static int
em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)6171 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6172 {
6173 	struct adapter *adapter;
6174 	int error;
6175 	int result;
6176 
6177 	result = -1;
6178 	error = sysctl_handle_int(oidp, &result, 0, req);
6179 
6180 	if (error || !req->newptr)
6181 		return (error);
6182 
6183 	if (result == 1) {
6184 		adapter = (struct adapter *)arg1;
6185 		em_print_debug_info(adapter);
6186         }
6187 
6188 	return (error);
6189 }
6190 
6191 /*
6192 ** This routine is meant to be fluid, add whatever is
6193 ** needed for debugging a problem.  -jfv
6194 */
6195 static void
em_print_debug_info(struct adapter * adapter)6196 em_print_debug_info(struct adapter *adapter)
6197 {
6198 	device_t dev = adapter->dev;
6199 	struct tx_ring *txr = adapter->tx_rings;
6200 	struct rx_ring *rxr = adapter->rx_rings;
6201 
6202 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
6203 		printf("Interface is RUNNING ");
6204 	else
6205 		printf("Interface is NOT RUNNING\n");
6206 
6207 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
6208 		printf("and INACTIVE\n");
6209 	else
6210 		printf("and ACTIVE\n");
6211 
6212 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6213 		device_printf(dev, "TX Queue %d ------\n", i);
6214 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6215 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6216 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6217 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6218 		device_printf(dev, "TX descriptors avail = %d\n",
6219 	    		txr->tx_avail);
6220 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6221 	    		txr->no_desc_avail);
6222 		device_printf(dev, "RX Queue %d ------\n", i);
6223 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6224 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6225 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6226 		device_printf(dev, "RX discarded packets = %ld\n",
6227 	    		rxr->rx_discarded);
6228 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6229 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6230 	}
6231 }
6232 
6233 #ifdef EM_MULTIQUEUE
6234 /*
6235  * 82574 only:
6236  * Write a new value to the EEPROM increasing the number of MSIX
6237  * vectors from 3 to 5, for proper multiqueue support.
6238  */
6239 static void
em_enable_vectors_82574(struct adapter * adapter)6240 em_enable_vectors_82574(struct adapter *adapter)
6241 {
6242 	struct e1000_hw *hw = &adapter->hw;
6243 	device_t dev = adapter->dev;
6244 	u16 edata;
6245 
6246 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6247 	printf("Current cap: %#06x\n", edata);
6248 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6249 		device_printf(dev, "Writing to eeprom: increasing "
6250 		    "reported MSIX vectors from 3 to 5...\n");
6251 		edata &= ~(EM_NVM_MSIX_N_MASK);
6252 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6253 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6254 		e1000_update_nvm_checksum(hw);
6255 		device_printf(dev, "Writing to eeprom: done\n");
6256 	}
6257 }
6258 #endif
6259 
6260 #ifdef DDB
DB_COMMAND(em_reset_dev,em_ddb_reset_dev)6261 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6262 {
6263 	devclass_t	dc;
6264 	int max_em;
6265 
6266 	dc = devclass_find("em");
6267 	max_em = devclass_get_maxunit(dc);
6268 
6269 	for (int index = 0; index < (max_em - 1); index++) {
6270 		device_t dev;
6271 		dev = devclass_get_device(dc, index);
6272 		if (device_get_driver(dev) == &em_driver) {
6273 			struct adapter *adapter = device_get_softc(dev);
6274 			EM_CORE_LOCK(adapter);
6275 			em_init_locked(adapter);
6276 			EM_CORE_UNLOCK(adapter);
6277 		}
6278 	}
6279 }
DB_COMMAND(em_dump_queue,em_ddb_dump_queue)6280 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6281 {
6282 	devclass_t	dc;
6283 	int max_em;
6284 
6285 	dc = devclass_find("em");
6286 	max_em = devclass_get_maxunit(dc);
6287 
6288 	for (int index = 0; index < (max_em - 1); index++) {
6289 		device_t dev;
6290 		dev = devclass_get_device(dc, index);
6291 		if (device_get_driver(dev) == &em_driver)
6292 			em_print_debug_info(device_get_softc(dev));
6293 	}
6294 
6295 }
6296 #endif
6297