1 /******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97
98 /*********************************************************************
99 * Driver version:
100 *********************************************************************/
101 char em_driver_version[] = "7.6.1-k";
102
103 /*********************************************************************
104 * PCI Device ID Table
105 *
106 * Used by probe to select devices to load on
107 * Last field stores an index into e1000_strings
108 * Last entry must be all 0s
109 *
110 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111 *********************************************************************/
112
113 static em_vendor_info_t em_vendor_info_array[] =
114 {
115 /* Intel(R) PRO/1000 Network Connection */
116 { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
117 { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
118 { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
119 { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120 PCI_ANY_ID, PCI_ANY_ID, 0},
121 { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122 PCI_ANY_ID, PCI_ANY_ID, 0},
123 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124 PCI_ANY_ID, PCI_ANY_ID, 0},
125 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126 PCI_ANY_ID, PCI_ANY_ID, 0},
127 { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128 PCI_ANY_ID, PCI_ANY_ID, 0},
129 { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130 PCI_ANY_ID, PCI_ANY_ID, 0},
131 { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
132 { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
133 { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
134 { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0},
135
136 { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0},
137 { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0},
138 { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0},
139 { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0},
140 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141 PCI_ANY_ID, PCI_ANY_ID, 0},
142 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143 PCI_ANY_ID, PCI_ANY_ID, 0},
144 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145 PCI_ANY_ID, PCI_ANY_ID, 0},
146 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147 PCI_ANY_ID, PCI_ANY_ID, 0},
148 { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
149 { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
150 { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
151 { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
152 { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
153 { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
154 { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
155 { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0},
156 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
157 { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
158 { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
159 { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
160 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0},
161 { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
162 { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
163 { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
164 { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0},
165 { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0},
166 { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0},
167 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
168 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
169 { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
170 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
171 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
172 { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
173 { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
174 { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0},
175 { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0},
176 { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0},
177 { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
178 { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0},
179 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
180 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0},
181 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
182 PCI_ANY_ID, PCI_ANY_ID, 0},
183 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
184 PCI_ANY_ID, PCI_ANY_ID, 0},
185 { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0},
186 { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
187 { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0},
188 { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0},
189 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
190 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0},
191 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
192 PCI_ANY_ID, PCI_ANY_ID, 0},
193 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
194 { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
195 PCI_ANY_ID, PCI_ANY_ID, 0},
196 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
197 PCI_ANY_ID, PCI_ANY_ID, 0},
198 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
199 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
200 PCI_ANY_ID, PCI_ANY_ID, 0},
201 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
202 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
203 PCI_ANY_ID, PCI_ANY_ID, 0},
204 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
205 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
206 PCI_ANY_ID, PCI_ANY_ID, 0},
207 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
208 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM6,
209 PCI_ANY_ID, PCI_ANY_ID, 0},
210 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_V6, PCI_ANY_ID, PCI_ANY_ID, 0},
211 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM7,
212 PCI_ANY_ID, PCI_ANY_ID, 0},
213 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_V7, PCI_ANY_ID, PCI_ANY_ID, 0},
214 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM8,
215 PCI_ANY_ID, PCI_ANY_ID, 0},
216 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_V8, PCI_ANY_ID, PCI_ANY_ID, 0},
217 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM9,
218 PCI_ANY_ID, PCI_ANY_ID, 0},
219 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_V9, PCI_ANY_ID, PCI_ANY_ID, 0},
220 /* required last entry */
221 { 0, 0, 0, 0, 0}
222 };
223
224 /*********************************************************************
225 * Table of branding strings for all supported NICs.
226 *********************************************************************/
227
228 static char *em_strings[] = {
229 "Intel(R) PRO/1000 Network Connection"
230 };
231
232 /*********************************************************************
233 * Function prototypes
234 *********************************************************************/
235 static int em_probe(device_t);
236 static int em_attach(device_t);
237 static int em_detach(device_t);
238 static int em_shutdown(device_t);
239 static int em_suspend(device_t);
240 static int em_resume(device_t);
241 #ifdef EM_MULTIQUEUE
242 static int em_mq_start(if_t, struct mbuf *);
243 static int em_mq_start_locked(if_t,
244 struct tx_ring *);
245 static void em_qflush(if_t);
246 #else
247 static void em_start(if_t);
248 static void em_start_locked(if_t, struct tx_ring *);
249 #endif
250 static int em_ioctl(if_t, u_long, caddr_t);
251 static uint64_t em_get_counter(if_t, ift_counter);
252 static void em_init(void *);
253 static void em_init_locked(struct adapter *);
254 static void em_stop(void *);
255 static void em_media_status(if_t, struct ifmediareq *);
256 static int em_media_change(if_t);
257 static void em_identify_hardware(struct adapter *);
258 static int em_allocate_pci_resources(struct adapter *);
259 static int em_allocate_legacy(struct adapter *);
260 static int em_allocate_msix(struct adapter *);
261 static int em_allocate_queues(struct adapter *);
262 static int em_setup_msix(struct adapter *);
263 static void em_free_pci_resources(struct adapter *);
264 static void em_local_timer(void *);
265 static void em_reset(struct adapter *);
266 static int em_setup_interface(device_t, struct adapter *);
267 static void em_flush_desc_rings(struct adapter *);
268
269 static void em_setup_transmit_structures(struct adapter *);
270 static void em_initialize_transmit_unit(struct adapter *);
271 static int em_allocate_transmit_buffers(struct tx_ring *);
272 static void em_free_transmit_structures(struct adapter *);
273 static void em_free_transmit_buffers(struct tx_ring *);
274
275 static int em_setup_receive_structures(struct adapter *);
276 static int em_allocate_receive_buffers(struct rx_ring *);
277 static void em_initialize_receive_unit(struct adapter *);
278 static void em_free_receive_structures(struct adapter *);
279 static void em_free_receive_buffers(struct rx_ring *);
280
281 static void em_enable_intr(struct adapter *);
282 static void em_disable_intr(struct adapter *);
283 static void em_update_stats_counters(struct adapter *);
284 static void em_add_hw_stats(struct adapter *adapter);
285 static void em_txeof(struct tx_ring *);
286 static bool em_rxeof(struct rx_ring *, int, int *);
287 #ifndef __NO_STRICT_ALIGNMENT
288 static int em_fixup_rx(struct rx_ring *);
289 #endif
290 static void em_setup_rxdesc(union e1000_rx_desc_extended *,
291 const struct em_rxbuffer *rxbuf);
292 static void em_receive_checksum(uint32_t status, struct mbuf *);
293 static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
294 struct ip *, u32 *, u32 *);
295 static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
296 struct tcphdr *, u32 *, u32 *);
297 static void em_set_promisc(struct adapter *);
298 static void em_disable_promisc(struct adapter *);
299 static void em_set_multi(struct adapter *);
300 static void em_update_link_status(struct adapter *);
301 static void em_refresh_mbufs(struct rx_ring *, int);
302 static void em_register_vlan(void *, if_t, u16);
303 static void em_unregister_vlan(void *, if_t, u16);
304 static void em_setup_vlan_hw_support(struct adapter *);
305 static int em_xmit(struct tx_ring *, struct mbuf **);
306 static int em_dma_malloc(struct adapter *, bus_size_t,
307 struct em_dma_alloc *, int);
308 static void em_dma_free(struct adapter *, struct em_dma_alloc *);
309 static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
310 static void em_print_nvm_info(struct adapter *);
311 static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
312 static void em_print_debug_info(struct adapter *);
313 static int em_is_valid_ether_addr(u8 *);
314 static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
315 static void em_add_int_delay_sysctl(struct adapter *, const char *,
316 const char *, struct em_int_delay_info *, int, int);
317 /* Management and WOL Support */
318 static void em_init_manageability(struct adapter *);
319 static void em_release_manageability(struct adapter *);
320 static void em_get_hw_control(struct adapter *);
321 static void em_release_hw_control(struct adapter *);
322 static void em_get_wakeup(device_t);
323 static void em_enable_wakeup(device_t);
324 static int em_enable_phy_wakeup(struct adapter *);
325 static void em_led_func(void *, int);
326 static void em_disable_aspm(struct adapter *);
327
328 static int em_irq_fast(void *);
329
330 /* MSIX handlers */
331 static void em_msix_tx(void *);
332 static void em_msix_rx(void *);
333 static void em_msix_link(void *);
334 static void em_handle_tx(void *context, int pending);
335 static void em_handle_rx(void *context, int pending);
336 static void em_handle_link(void *context, int pending);
337
338 #ifdef EM_MULTIQUEUE
339 static void em_enable_vectors_82574(struct adapter *);
340 #endif
341
342 static void em_set_sysctl_value(struct adapter *, const char *,
343 const char *, int *, int);
344 static int em_set_flowcntl(SYSCTL_HANDLER_ARGS);
345 static int em_sysctl_eee(SYSCTL_HANDLER_ARGS);
346
347 static __inline void em_rx_discard(struct rx_ring *, int);
348
349 #ifdef DEVICE_POLLING
350 static poll_handler_t em_poll;
351 #endif /* POLLING */
352
353 /*********************************************************************
354 * FreeBSD Device Interface Entry Points
355 *********************************************************************/
356
357 static device_method_t em_methods[] = {
358 /* Device interface */
359 DEVMETHOD(device_probe, em_probe),
360 DEVMETHOD(device_attach, em_attach),
361 DEVMETHOD(device_detach, em_detach),
362 DEVMETHOD(device_shutdown, em_shutdown),
363 DEVMETHOD(device_suspend, em_suspend),
364 DEVMETHOD(device_resume, em_resume),
365 DEVMETHOD_END
366 };
367
368 static driver_t em_driver = {
369 "em", em_methods, sizeof(struct adapter),
370 };
371
372 devclass_t em_devclass;
373 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
374 MODULE_DEPEND(em, pci, 1, 1, 1);
375 MODULE_DEPEND(em, ether, 1, 1, 1);
376 #ifdef DEV_NETMAP
377 MODULE_DEPEND(em, netmap, 1, 1, 1);
378 #endif /* DEV_NETMAP */
379
380 /*********************************************************************
381 * Tunable default values.
382 *********************************************************************/
383
384 #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000)
385 #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024)
386 #define M_TSO_LEN 66
387
388 #define MAX_INTS_PER_SEC 8000
389 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256))
390
391 #define TSO_WORKAROUND 4
392
393 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
394
395 static int em_disable_crc_stripping = 0;
396 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
397 &em_disable_crc_stripping, 0, "Disable CRC Stripping");
398
399 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
400 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
401 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
402 0, "Default transmit interrupt delay in usecs");
403 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
404 0, "Default receive interrupt delay in usecs");
405
406 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
407 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
408 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
409 &em_tx_abs_int_delay_dflt, 0,
410 "Default transmit interrupt delay limit in usecs");
411 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
412 &em_rx_abs_int_delay_dflt, 0,
413 "Default receive interrupt delay limit in usecs");
414
415 static int em_rxd = EM_DEFAULT_RXD;
416 static int em_txd = EM_DEFAULT_TXD;
417 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
418 "Number of receive descriptors per queue");
419 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
420 "Number of transmit descriptors per queue");
421
422 static int em_smart_pwr_down = FALSE;
423 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
424 0, "Set to true to leave smart power down enabled on newer adapters");
425
426 /* Controls whether promiscuous also shows bad packets */
427 static int em_debug_sbp = FALSE;
428 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
429 "Show bad packets in promiscuous mode");
430
431 static int em_enable_msix = TRUE;
432 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
433 "Enable MSI-X interrupts");
434
435 #ifdef EM_MULTIQUEUE
436 static int em_num_queues = 1;
437 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
438 "82574 only: Number of queues to configure, 0 indicates autoconfigure");
439 #endif
440
441 /*
442 ** Global variable to store last used CPU when binding queues
443 ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
444 ** queue is bound to a cpu.
445 */
446 static int em_last_bind_cpu = -1;
447
448 /* How many packets rxeof tries to clean at a time */
449 static int em_rx_process_limit = 100;
450 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
451 &em_rx_process_limit, 0,
452 "Maximum number of received packets to process "
453 "at a time, -1 means unlimited");
454
455 /* Energy efficient ethernet - default to OFF */
456 static int eee_setting = 1;
457 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
458 "Enable Energy Efficient Ethernet");
459
460 /* Global used in WOL setup with multiport cards */
461 static int global_quad_port_a = 0;
462
463 #ifdef DEV_NETMAP /* see ixgbe.c for details */
464 #include <dev/netmap/if_em_netmap.h>
465 #endif /* DEV_NETMAP */
466
467 /*********************************************************************
468 * Device identification routine
469 *
470 * em_probe determines if the driver should be loaded on
471 * adapter based on PCI vendor/device id of the adapter.
472 *
473 * return BUS_PROBE_DEFAULT on success, positive on failure
474 *********************************************************************/
475
476 static int
em_probe(device_t dev)477 em_probe(device_t dev)
478 {
479 char adapter_name[60];
480 uint16_t pci_vendor_id = 0;
481 uint16_t pci_device_id = 0;
482 uint16_t pci_subvendor_id = 0;
483 uint16_t pci_subdevice_id = 0;
484 em_vendor_info_t *ent;
485
486 INIT_DEBUGOUT("em_probe: begin");
487
488 pci_vendor_id = pci_get_vendor(dev);
489 if (pci_vendor_id != EM_VENDOR_ID)
490 return (ENXIO);
491
492 pci_device_id = pci_get_device(dev);
493 pci_subvendor_id = pci_get_subvendor(dev);
494 pci_subdevice_id = pci_get_subdevice(dev);
495
496 ent = em_vendor_info_array;
497 while (ent->vendor_id != 0) {
498 if ((pci_vendor_id == ent->vendor_id) &&
499 (pci_device_id == ent->device_id) &&
500
501 ((pci_subvendor_id == ent->subvendor_id) ||
502 (ent->subvendor_id == PCI_ANY_ID)) &&
503
504 ((pci_subdevice_id == ent->subdevice_id) ||
505 (ent->subdevice_id == PCI_ANY_ID))) {
506 sprintf(adapter_name, "%s %s",
507 em_strings[ent->index],
508 em_driver_version);
509 device_set_desc_copy(dev, adapter_name);
510 return (BUS_PROBE_DEFAULT);
511 }
512 ent++;
513 }
514
515 return (ENXIO);
516 }
517
518 /*********************************************************************
519 * Device initialization routine
520 *
521 * The attach entry point is called when the driver is being loaded.
522 * This routine identifies the type of hardware, allocates all resources
523 * and initializes the hardware.
524 *
525 * return 0 on success, positive on failure
526 *********************************************************************/
527
528 static int
em_attach(device_t dev)529 em_attach(device_t dev)
530 {
531 struct adapter *adapter;
532 struct e1000_hw *hw;
533 int error = 0;
534
535 INIT_DEBUGOUT("em_attach: begin");
536
537 if (resource_disabled("em", device_get_unit(dev))) {
538 device_printf(dev, "Disabled by device hint\n");
539 return (ENXIO);
540 }
541
542 adapter = device_get_softc(dev);
543 adapter->dev = adapter->osdep.dev = dev;
544 hw = &adapter->hw;
545 EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
546
547 /* SYSCTL stuff */
548 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
549 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
550 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
551 em_sysctl_nvm_info, "I", "NVM Information");
552
553 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
554 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
555 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
556 em_sysctl_debug_info, "I", "Debug Information");
557
558 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
559 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
560 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
561 em_set_flowcntl, "I", "Flow Control");
562
563 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
564
565 /* Determine hardware and mac info */
566 em_identify_hardware(adapter);
567
568 /* Setup PCI resources */
569 if (em_allocate_pci_resources(adapter)) {
570 device_printf(dev, "Allocation of PCI resources failed\n");
571 error = ENXIO;
572 goto err_pci;
573 }
574
575 /*
576 ** For ICH8 and family we need to
577 ** map the flash memory, and this
578 ** must happen after the MAC is
579 ** identified
580 */
581 if ((hw->mac.type == e1000_ich8lan) ||
582 (hw->mac.type == e1000_ich9lan) ||
583 (hw->mac.type == e1000_ich10lan) ||
584 (hw->mac.type == e1000_pchlan) ||
585 (hw->mac.type == e1000_pch2lan) ||
586 (hw->mac.type == e1000_pch_lpt)) {
587 int rid = EM_BAR_TYPE_FLASH;
588 adapter->flash = bus_alloc_resource_any(dev,
589 SYS_RES_MEMORY, &rid, RF_ACTIVE);
590 if (adapter->flash == NULL) {
591 device_printf(dev, "Mapping of Flash failed\n");
592 error = ENXIO;
593 goto err_pci;
594 }
595 /* This is used in the shared code */
596 hw->flash_address = (u8 *)adapter->flash;
597 adapter->osdep.flash_bus_space_tag =
598 rman_get_bustag(adapter->flash);
599 adapter->osdep.flash_bus_space_handle =
600 rman_get_bushandle(adapter->flash);
601 }
602 /*
603 ** In the new SPT device flash is not a
604 ** separate BAR, rather it is also in BAR0,
605 ** so use the same tag and an offset handle for the
606 ** FLASH read/write macros in the shared code.
607 */
608 else if (hw->mac.type >= e1000_pch_spt) {
609 adapter->osdep.flash_bus_space_tag =
610 adapter->osdep.mem_bus_space_tag;
611 adapter->osdep.flash_bus_space_handle =
612 adapter->osdep.mem_bus_space_handle
613 + E1000_FLASH_BASE_ADDR;
614 }
615
616 /* Do Shared Code initialization */
617 error = e1000_setup_init_funcs(hw, TRUE);
618 if (error) {
619 device_printf(dev, "Setup of Shared code failed, error %d\n",
620 error);
621 error = ENXIO;
622 goto err_pci;
623 }
624
625 /*
626 * Setup MSI/X or MSI if PCI Express
627 */
628 adapter->msix = em_setup_msix(adapter);
629
630 e1000_get_bus_info(hw);
631
632 /* Set up some sysctls for the tunable interrupt delays */
633 em_add_int_delay_sysctl(adapter, "rx_int_delay",
634 "receive interrupt delay in usecs", &adapter->rx_int_delay,
635 E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
636 em_add_int_delay_sysctl(adapter, "tx_int_delay",
637 "transmit interrupt delay in usecs", &adapter->tx_int_delay,
638 E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
639 em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
640 "receive interrupt delay limit in usecs",
641 &adapter->rx_abs_int_delay,
642 E1000_REGISTER(hw, E1000_RADV),
643 em_rx_abs_int_delay_dflt);
644 em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
645 "transmit interrupt delay limit in usecs",
646 &adapter->tx_abs_int_delay,
647 E1000_REGISTER(hw, E1000_TADV),
648 em_tx_abs_int_delay_dflt);
649 em_add_int_delay_sysctl(adapter, "itr",
650 "interrupt delay limit in usecs/4",
651 &adapter->tx_itr,
652 E1000_REGISTER(hw, E1000_ITR),
653 DEFAULT_ITR);
654
655 /* Sysctl for limiting the amount of work done in the taskqueue */
656 em_set_sysctl_value(adapter, "rx_processing_limit",
657 "max number of rx packets to process", &adapter->rx_process_limit,
658 em_rx_process_limit);
659
660 /*
661 * Validate number of transmit and receive descriptors. It
662 * must not exceed hardware maximum, and must be multiple
663 * of E1000_DBA_ALIGN.
664 */
665 if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
666 (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
667 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
668 EM_DEFAULT_TXD, em_txd);
669 adapter->num_tx_desc = EM_DEFAULT_TXD;
670 } else
671 adapter->num_tx_desc = em_txd;
672
673 if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
674 (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
675 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
676 EM_DEFAULT_RXD, em_rxd);
677 adapter->num_rx_desc = EM_DEFAULT_RXD;
678 } else
679 adapter->num_rx_desc = em_rxd;
680
681 hw->mac.autoneg = DO_AUTO_NEG;
682 hw->phy.autoneg_wait_to_complete = FALSE;
683 hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
684
685 /* Copper options */
686 if (hw->phy.media_type == e1000_media_type_copper) {
687 hw->phy.mdix = AUTO_ALL_MODES;
688 hw->phy.disable_polarity_correction = FALSE;
689 hw->phy.ms_type = EM_MASTER_SLAVE;
690 }
691
692 /*
693 * Set the frame limits assuming
694 * standard ethernet sized frames.
695 */
696 adapter->hw.mac.max_frame_size =
697 ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
698
699 /*
700 * This controls when hardware reports transmit completion
701 * status.
702 */
703 hw->mac.report_tx_early = 1;
704
705 /*
706 ** Get queue/ring memory
707 */
708 if (em_allocate_queues(adapter)) {
709 error = ENOMEM;
710 goto err_pci;
711 }
712
713 /* Allocate multicast array memory. */
714 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
715 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
716 if (adapter->mta == NULL) {
717 device_printf(dev, "Can not allocate multicast setup array\n");
718 error = ENOMEM;
719 goto err_late;
720 }
721
722 /* Check SOL/IDER usage */
723 if (e1000_check_reset_block(hw))
724 device_printf(dev, "PHY reset is blocked"
725 " due to SOL/IDER session.\n");
726
727 /* Sysctl for setting Energy Efficient Ethernet */
728 hw->dev_spec.ich8lan.eee_disable = eee_setting;
729 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
730 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
731 OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
732 adapter, 0, em_sysctl_eee, "I",
733 "Disable Energy Efficient Ethernet");
734
735 /*
736 ** Start from a known state, this is
737 ** important in reading the nvm and
738 ** mac from that.
739 */
740 e1000_reset_hw(hw);
741
742
743 /* Make sure we have a good EEPROM before we read from it */
744 if (e1000_validate_nvm_checksum(hw) < 0) {
745 /*
746 ** Some PCI-E parts fail the first check due to
747 ** the link being in sleep state, call it again,
748 ** if it fails a second time its a real issue.
749 */
750 if (e1000_validate_nvm_checksum(hw) < 0) {
751 device_printf(dev,
752 "The EEPROM Checksum Is Not Valid\n");
753 error = EIO;
754 goto err_late;
755 }
756 }
757
758 /* Copy the permanent MAC address out of the EEPROM */
759 if (e1000_read_mac_addr(hw) < 0) {
760 device_printf(dev, "EEPROM read error while reading MAC"
761 " address\n");
762 error = EIO;
763 goto err_late;
764 }
765
766 if (!em_is_valid_ether_addr(hw->mac.addr)) {
767 device_printf(dev, "Invalid MAC address\n");
768 error = EIO;
769 goto err_late;
770 }
771
772 /* Disable ULP support */
773 e1000_disable_ulp_lpt_lp(hw, TRUE);
774
775 /*
776 ** Do interrupt configuration
777 */
778 if (adapter->msix > 1) /* Do MSIX */
779 error = em_allocate_msix(adapter);
780 else /* MSI or Legacy */
781 error = em_allocate_legacy(adapter);
782 if (error)
783 goto err_late;
784
785 /*
786 * Get Wake-on-Lan and Management info for later use
787 */
788 em_get_wakeup(dev);
789
790 /* Setup OS specific network interface */
791 if (em_setup_interface(dev, adapter) != 0)
792 goto err_late;
793
794 em_reset(adapter);
795
796 /* Initialize statistics */
797 em_update_stats_counters(adapter);
798
799 hw->mac.get_link_status = 1;
800 em_update_link_status(adapter);
801
802 /* Register for VLAN events */
803 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
804 em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
805 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
806 em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
807
808 em_add_hw_stats(adapter);
809
810 /* Non-AMT based hardware can now take control from firmware */
811 if (adapter->has_manage && !adapter->has_amt)
812 em_get_hw_control(adapter);
813
814 /* Tell the stack that the interface is not active */
815 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
816
817 adapter->led_dev = led_create(em_led_func, adapter,
818 device_get_nameunit(dev));
819 #ifdef DEV_NETMAP
820 em_netmap_attach(adapter);
821 #endif /* DEV_NETMAP */
822
823 INIT_DEBUGOUT("em_attach: end");
824
825 return (0);
826
827 err_late:
828 em_free_transmit_structures(adapter);
829 em_free_receive_structures(adapter);
830 em_release_hw_control(adapter);
831 if (adapter->ifp != (void *)NULL)
832 if_free(adapter->ifp);
833 err_pci:
834 em_free_pci_resources(adapter);
835 free(adapter->mta, M_DEVBUF);
836 EM_CORE_LOCK_DESTROY(adapter);
837
838 return (error);
839 }
840
841 /*********************************************************************
842 * Device removal routine
843 *
844 * The detach entry point is called when the driver is being removed.
845 * This routine stops the adapter and deallocates all the resources
846 * that were allocated for driver operation.
847 *
848 * return 0 on success, positive on failure
849 *********************************************************************/
850
851 static int
em_detach(device_t dev)852 em_detach(device_t dev)
853 {
854 struct adapter *adapter = device_get_softc(dev);
855 if_t ifp = adapter->ifp;
856
857 INIT_DEBUGOUT("em_detach: begin");
858
859 /* Make sure VLANS are not using driver */
860 if (if_vlantrunkinuse(ifp)) {
861 device_printf(dev,"Vlan in use, detach first\n");
862 return (EBUSY);
863 }
864
865 #ifdef DEVICE_POLLING
866 if (if_getcapenable(ifp) & IFCAP_POLLING)
867 ether_poll_deregister(ifp);
868 #endif
869
870 if (adapter->led_dev != NULL)
871 led_destroy(adapter->led_dev);
872
873 EM_CORE_LOCK(adapter);
874 adapter->in_detach = 1;
875 em_stop(adapter);
876 EM_CORE_UNLOCK(adapter);
877 EM_CORE_LOCK_DESTROY(adapter);
878
879 e1000_phy_hw_reset(&adapter->hw);
880
881 em_release_manageability(adapter);
882 em_release_hw_control(adapter);
883
884 /* Unregister VLAN events */
885 if (adapter->vlan_attach != NULL)
886 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
887 if (adapter->vlan_detach != NULL)
888 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
889
890 ether_ifdetach(adapter->ifp);
891 callout_drain(&adapter->timer);
892
893 #ifdef DEV_NETMAP
894 netmap_detach(ifp);
895 #endif /* DEV_NETMAP */
896
897 em_free_pci_resources(adapter);
898 bus_generic_detach(dev);
899 if_free(ifp);
900
901 em_free_transmit_structures(adapter);
902 em_free_receive_structures(adapter);
903
904 em_release_hw_control(adapter);
905 free(adapter->mta, M_DEVBUF);
906
907 return (0);
908 }
909
910 /*********************************************************************
911 *
912 * Shutdown entry point
913 *
914 **********************************************************************/
915
916 static int
em_shutdown(device_t dev)917 em_shutdown(device_t dev)
918 {
919 return em_suspend(dev);
920 }
921
922 /*
923 * Suspend/resume device methods.
924 */
925 static int
em_suspend(device_t dev)926 em_suspend(device_t dev)
927 {
928 struct adapter *adapter = device_get_softc(dev);
929
930 EM_CORE_LOCK(adapter);
931
932 em_release_manageability(adapter);
933 em_release_hw_control(adapter);
934 em_enable_wakeup(dev);
935
936 EM_CORE_UNLOCK(adapter);
937
938 return bus_generic_suspend(dev);
939 }
940
941 static int
em_resume(device_t dev)942 em_resume(device_t dev)
943 {
944 struct adapter *adapter = device_get_softc(dev);
945 struct tx_ring *txr = adapter->tx_rings;
946 if_t ifp = adapter->ifp;
947
948 EM_CORE_LOCK(adapter);
949 if (adapter->hw.mac.type == e1000_pch2lan)
950 e1000_resume_workarounds_pchlan(&adapter->hw);
951 em_init_locked(adapter);
952 em_init_manageability(adapter);
953
954 if ((if_getflags(ifp) & IFF_UP) &&
955 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
956 for (int i = 0; i < adapter->num_queues; i++, txr++) {
957 EM_TX_LOCK(txr);
958 #ifdef EM_MULTIQUEUE
959 if (!drbr_empty(ifp, txr->br))
960 em_mq_start_locked(ifp, txr);
961 #else
962 if (!if_sendq_empty(ifp))
963 em_start_locked(ifp, txr);
964 #endif
965 EM_TX_UNLOCK(txr);
966 }
967 }
968 EM_CORE_UNLOCK(adapter);
969
970 return bus_generic_resume(dev);
971 }
972
973
974 #ifndef EM_MULTIQUEUE
975 static void
em_start_locked(if_t ifp,struct tx_ring * txr)976 em_start_locked(if_t ifp, struct tx_ring *txr)
977 {
978 struct adapter *adapter = if_getsoftc(ifp);
979 struct mbuf *m_head;
980
981 EM_TX_LOCK_ASSERT(txr);
982
983 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
984 IFF_DRV_RUNNING)
985 return;
986
987 if (!adapter->link_active)
988 return;
989
990 while (!if_sendq_empty(ifp)) {
991 /* Call cleanup if number of TX descriptors low */
992 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
993 em_txeof(txr);
994 if (txr->tx_avail < EM_MAX_SCATTER) {
995 if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
996 break;
997 }
998 m_head = if_dequeue(ifp);
999 if (m_head == NULL)
1000 break;
1001 /*
1002 * Encapsulation can modify our pointer, and or make it
1003 * NULL on failure. In that event, we can't requeue.
1004 */
1005 if (em_xmit(txr, &m_head)) {
1006 if (m_head == NULL)
1007 break;
1008 if_sendq_prepend(ifp, m_head);
1009 break;
1010 }
1011
1012 /* Mark the queue as having work */
1013 if (txr->busy == EM_TX_IDLE)
1014 txr->busy = EM_TX_BUSY;
1015
1016 /* Send a copy of the frame to the BPF listener */
1017 ETHER_BPF_MTAP(ifp, m_head);
1018
1019 }
1020
1021 return;
1022 }
1023
1024 static void
em_start(if_t ifp)1025 em_start(if_t ifp)
1026 {
1027 struct adapter *adapter = if_getsoftc(ifp);
1028 struct tx_ring *txr = adapter->tx_rings;
1029
1030 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1031 EM_TX_LOCK(txr);
1032 em_start_locked(ifp, txr);
1033 EM_TX_UNLOCK(txr);
1034 }
1035 return;
1036 }
1037 #else /* EM_MULTIQUEUE */
1038 /*********************************************************************
1039 * Multiqueue Transmit routines
1040 *
1041 * em_mq_start is called by the stack to initiate a transmit.
1042 * however, if busy the driver can queue the request rather
1043 * than do an immediate send. It is this that is an advantage
1044 * in this driver, rather than also having multiple tx queues.
1045 **********************************************************************/
1046 /*
1047 ** Multiqueue capable stack interface
1048 */
1049 static int
em_mq_start(if_t ifp,struct mbuf * m)1050 em_mq_start(if_t ifp, struct mbuf *m)
1051 {
1052 struct adapter *adapter = if_getsoftc(ifp);
1053 struct tx_ring *txr = adapter->tx_rings;
1054 unsigned int i, error;
1055
1056 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1057 i = m->m_pkthdr.flowid % adapter->num_queues;
1058 else
1059 i = curcpu % adapter->num_queues;
1060
1061 txr = &adapter->tx_rings[i];
1062
1063 error = drbr_enqueue(ifp, txr->br, m);
1064 if (error)
1065 return (error);
1066
1067 if (EM_TX_TRYLOCK(txr)) {
1068 em_mq_start_locked(ifp, txr);
1069 EM_TX_UNLOCK(txr);
1070 } else
1071 taskqueue_enqueue(txr->tq, &txr->tx_task);
1072
1073 return (0);
1074 }
1075
1076 static int
em_mq_start_locked(if_t ifp,struct tx_ring * txr)1077 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1078 {
1079 struct adapter *adapter = txr->adapter;
1080 struct mbuf *next;
1081 int err = 0, enq = 0;
1082
1083 EM_TX_LOCK_ASSERT(txr);
1084
1085 if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1086 adapter->link_active == 0) {
1087 return (ENETDOWN);
1088 }
1089
1090 /* Process the queue */
1091 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1092 if ((err = em_xmit(txr, &next)) != 0) {
1093 if (next == NULL) {
1094 /* It was freed, move forward */
1095 drbr_advance(ifp, txr->br);
1096 } else {
1097 /*
1098 * Still have one left, it may not be
1099 * the same since the transmit function
1100 * may have changed it.
1101 */
1102 drbr_putback(ifp, txr->br, next);
1103 }
1104 break;
1105 }
1106 drbr_advance(ifp, txr->br);
1107 enq++;
1108 if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1109 if (next->m_flags & M_MCAST)
1110 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1111 ETHER_BPF_MTAP(ifp, next);
1112 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1113 break;
1114 }
1115
1116 /* Mark the queue as having work */
1117 if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1118 txr->busy = EM_TX_BUSY;
1119
1120 if (txr->tx_avail < EM_MAX_SCATTER)
1121 em_txeof(txr);
1122 if (txr->tx_avail < EM_MAX_SCATTER) {
1123 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1124 }
1125 return (err);
1126 }
1127
1128 /*
1129 ** Flush all ring buffers
1130 */
1131 static void
em_qflush(if_t ifp)1132 em_qflush(if_t ifp)
1133 {
1134 struct adapter *adapter = if_getsoftc(ifp);
1135 struct tx_ring *txr = adapter->tx_rings;
1136 struct mbuf *m;
1137
1138 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1139 EM_TX_LOCK(txr);
1140 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1141 m_freem(m);
1142 EM_TX_UNLOCK(txr);
1143 }
1144 if_qflush(ifp);
1145 }
1146 #endif /* EM_MULTIQUEUE */
1147
1148 /*********************************************************************
1149 * Ioctl entry point
1150 *
1151 * em_ioctl is called when the user wants to configure the
1152 * interface.
1153 *
1154 * return 0 on success, positive on failure
1155 **********************************************************************/
1156
1157 static int
em_ioctl(if_t ifp,u_long command,caddr_t data)1158 em_ioctl(if_t ifp, u_long command, caddr_t data)
1159 {
1160 struct adapter *adapter = if_getsoftc(ifp);
1161 struct ifreq *ifr = (struct ifreq *)data;
1162 #if defined(INET) || defined(INET6)
1163 struct ifaddr *ifa = (struct ifaddr *)data;
1164 #endif
1165 bool avoid_reset = FALSE;
1166 int error = 0;
1167
1168 if (adapter->in_detach)
1169 return (error);
1170
1171 switch (command) {
1172 case SIOCSIFADDR:
1173 #ifdef INET
1174 if (ifa->ifa_addr->sa_family == AF_INET)
1175 avoid_reset = TRUE;
1176 #endif
1177 #ifdef INET6
1178 if (ifa->ifa_addr->sa_family == AF_INET6)
1179 avoid_reset = TRUE;
1180 #endif
1181 /*
1182 ** Calling init results in link renegotiation,
1183 ** so we avoid doing it when possible.
1184 */
1185 if (avoid_reset) {
1186 if_setflagbits(ifp,IFF_UP,0);
1187 if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1188 em_init(adapter);
1189 #ifdef INET
1190 if (!(if_getflags(ifp) & IFF_NOARP))
1191 arp_ifinit(ifp, ifa);
1192 #endif
1193 } else
1194 error = ether_ioctl(ifp, command, data);
1195 break;
1196 case SIOCSIFMTU:
1197 {
1198 int max_frame_size;
1199
1200 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1201
1202 EM_CORE_LOCK(adapter);
1203 switch (adapter->hw.mac.type) {
1204 case e1000_82571:
1205 case e1000_82572:
1206 case e1000_ich9lan:
1207 case e1000_ich10lan:
1208 case e1000_pch2lan:
1209 case e1000_pch_lpt:
1210 case e1000_pch_spt:
1211 case e1000_pch_cnp:
1212 case e1000_82574:
1213 case e1000_82583:
1214 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1215 max_frame_size = 9234;
1216 break;
1217 case e1000_pchlan:
1218 max_frame_size = 4096;
1219 break;
1220 /* Adapters that do not support jumbo frames */
1221 case e1000_ich8lan:
1222 max_frame_size = ETHER_MAX_LEN;
1223 break;
1224 default:
1225 max_frame_size = MAX_JUMBO_FRAME_SIZE;
1226 }
1227 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1228 ETHER_CRC_LEN) {
1229 EM_CORE_UNLOCK(adapter);
1230 error = EINVAL;
1231 break;
1232 }
1233
1234 if_setmtu(ifp, ifr->ifr_mtu);
1235 adapter->hw.mac.max_frame_size =
1236 if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1237 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1238 em_init_locked(adapter);
1239 EM_CORE_UNLOCK(adapter);
1240 break;
1241 }
1242 case SIOCSIFFLAGS:
1243 IOCTL_DEBUGOUT("ioctl rcv'd:\
1244 SIOCSIFFLAGS (Set Interface Flags)");
1245 EM_CORE_LOCK(adapter);
1246 if (if_getflags(ifp) & IFF_UP) {
1247 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1248 if ((if_getflags(ifp) ^ adapter->if_flags) &
1249 (IFF_PROMISC | IFF_ALLMULTI)) {
1250 em_disable_promisc(adapter);
1251 em_set_promisc(adapter);
1252 }
1253 } else
1254 em_init_locked(adapter);
1255 } else
1256 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1257 em_stop(adapter);
1258 adapter->if_flags = if_getflags(ifp);
1259 EM_CORE_UNLOCK(adapter);
1260 break;
1261 case SIOCADDMULTI:
1262 case SIOCDELMULTI:
1263 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1264 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1265 EM_CORE_LOCK(adapter);
1266 em_disable_intr(adapter);
1267 em_set_multi(adapter);
1268 #ifdef DEVICE_POLLING
1269 if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1270 #endif
1271 em_enable_intr(adapter);
1272 EM_CORE_UNLOCK(adapter);
1273 }
1274 break;
1275 case SIOCSIFMEDIA:
1276 /* Check SOL/IDER usage */
1277 EM_CORE_LOCK(adapter);
1278 if (e1000_check_reset_block(&adapter->hw)) {
1279 EM_CORE_UNLOCK(adapter);
1280 device_printf(adapter->dev, "Media change is"
1281 " blocked due to SOL/IDER session.\n");
1282 break;
1283 }
1284 EM_CORE_UNLOCK(adapter);
1285 /* falls thru */
1286 case SIOCGIFMEDIA:
1287 IOCTL_DEBUGOUT("ioctl rcv'd: \
1288 SIOCxIFMEDIA (Get/Set Interface Media)");
1289 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1290 break;
1291 case SIOCSIFCAP:
1292 {
1293 int mask, reinit;
1294
1295 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1296 reinit = 0;
1297 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1298 #ifdef DEVICE_POLLING
1299 if (mask & IFCAP_POLLING) {
1300 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1301 error = ether_poll_register(em_poll, ifp);
1302 if (error)
1303 return (error);
1304 EM_CORE_LOCK(adapter);
1305 em_disable_intr(adapter);
1306 if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1307 EM_CORE_UNLOCK(adapter);
1308 } else {
1309 error = ether_poll_deregister(ifp);
1310 /* Enable interrupt even in error case */
1311 EM_CORE_LOCK(adapter);
1312 em_enable_intr(adapter);
1313 if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1314 EM_CORE_UNLOCK(adapter);
1315 }
1316 }
1317 #endif
1318 if (mask & IFCAP_HWCSUM) {
1319 if_togglecapenable(ifp,IFCAP_HWCSUM);
1320 reinit = 1;
1321 }
1322 if (mask & IFCAP_TSO4) {
1323 if_togglecapenable(ifp,IFCAP_TSO4);
1324 reinit = 1;
1325 }
1326 if (mask & IFCAP_VLAN_HWTAGGING) {
1327 if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1328 reinit = 1;
1329 }
1330 if (mask & IFCAP_VLAN_HWFILTER) {
1331 if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1332 reinit = 1;
1333 }
1334 if (mask & IFCAP_VLAN_HWTSO) {
1335 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1336 reinit = 1;
1337 }
1338 if ((mask & IFCAP_WOL) &&
1339 (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1340 if (mask & IFCAP_WOL_MCAST)
1341 if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1342 if (mask & IFCAP_WOL_MAGIC)
1343 if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1344 }
1345 if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1346 em_init(adapter);
1347 if_vlancap(ifp);
1348 break;
1349 }
1350
1351 default:
1352 error = ether_ioctl(ifp, command, data);
1353 break;
1354 }
1355
1356 return (error);
1357 }
1358
1359
1360 /*********************************************************************
1361 * Init entry point
1362 *
1363 * This routine is used in two ways. It is used by the stack as
1364 * init entry point in network interface structure. It is also used
1365 * by the driver as a hw/sw initialization routine to get to a
1366 * consistent state.
1367 *
1368 * return 0 on success, positive on failure
1369 **********************************************************************/
1370
1371 static void
em_init_locked(struct adapter * adapter)1372 em_init_locked(struct adapter *adapter)
1373 {
1374 if_t ifp = adapter->ifp;
1375 device_t dev = adapter->dev;
1376
1377 INIT_DEBUGOUT("em_init: begin");
1378
1379 EM_CORE_LOCK_ASSERT(adapter);
1380
1381 em_disable_intr(adapter);
1382 callout_stop(&adapter->timer);
1383
1384 /* Get the latest mac address, User can use a LAA */
1385 bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1386 ETHER_ADDR_LEN);
1387
1388 /* Put the address into the Receive Address Array */
1389 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1390
1391 /*
1392 * With the 82571 adapter, RAR[0] may be overwritten
1393 * when the other port is reset, we make a duplicate
1394 * in RAR[14] for that eventuality, this assures
1395 * the interface continues to function.
1396 */
1397 if (adapter->hw.mac.type == e1000_82571) {
1398 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1399 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1400 E1000_RAR_ENTRIES - 1);
1401 }
1402
1403 /* Initialize the hardware */
1404 em_reset(adapter);
1405 em_update_link_status(adapter);
1406
1407 /* Setup VLAN support, basic and offload if available */
1408 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1409
1410 /* Set hardware offload abilities */
1411 if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1412 if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1413 else
1414 if_sethwassistbits(ifp, 0, CSUM_TCP | CSUM_UDP);
1415
1416 /* Configure for OS presence */
1417 em_init_manageability(adapter);
1418
1419 /* Prepare transmit descriptors and buffers */
1420 em_setup_transmit_structures(adapter);
1421 em_initialize_transmit_unit(adapter);
1422
1423 /* Setup Multicast table */
1424 em_set_multi(adapter);
1425
1426 /*
1427 ** Figure out the desired mbuf
1428 ** pool for doing jumbos
1429 */
1430 if (adapter->hw.mac.max_frame_size <= 2048)
1431 adapter->rx_mbuf_sz = MCLBYTES;
1432 else
1433 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1434
1435 /* Prepare receive descriptors and buffers */
1436 if (em_setup_receive_structures(adapter)) {
1437 device_printf(dev, "Could not setup receive structures\n");
1438 em_stop(adapter);
1439 return;
1440 }
1441 em_initialize_receive_unit(adapter);
1442
1443 /* Use real VLAN Filter support? */
1444 if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1445 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1446 /* Use real VLAN Filter support */
1447 em_setup_vlan_hw_support(adapter);
1448 else {
1449 u32 ctrl;
1450 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1451 ctrl |= E1000_CTRL_VME;
1452 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1453 }
1454 } else {
1455 u32 ctrl;
1456 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1457 ctrl &= ~E1000_CTRL_VME;
1458 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1459 }
1460
1461 /* Don't lose promiscuous settings */
1462 em_set_promisc(adapter);
1463
1464 /* Set the interface as ACTIVE */
1465 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1466
1467 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1468 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1469
1470 /* MSI/X configuration for 82574 */
1471 if (adapter->hw.mac.type == e1000_82574) {
1472 int tmp;
1473 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1474 tmp |= E1000_CTRL_EXT_PBA_CLR;
1475 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1476 /* Set the IVAR - interrupt vector routing. */
1477 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1478 }
1479
1480 #ifdef DEVICE_POLLING
1481 /*
1482 * Only enable interrupts if we are not polling, make sure
1483 * they are off otherwise.
1484 */
1485 if (if_getcapenable(ifp) & IFCAP_POLLING)
1486 em_disable_intr(adapter);
1487 else
1488 #endif /* DEVICE_POLLING */
1489 em_enable_intr(adapter);
1490
1491 /* AMT based hardware can now take control from firmware */
1492 if (adapter->has_manage && adapter->has_amt)
1493 em_get_hw_control(adapter);
1494 }
1495
1496 static void
em_init(void * arg)1497 em_init(void *arg)
1498 {
1499 struct adapter *adapter = arg;
1500
1501 EM_CORE_LOCK(adapter);
1502 em_init_locked(adapter);
1503 EM_CORE_UNLOCK(adapter);
1504 }
1505
1506
1507 #ifdef DEVICE_POLLING
1508 /*********************************************************************
1509 *
1510 * Legacy polling routine: note this only works with single queue
1511 *
1512 *********************************************************************/
1513 static int
em_poll(if_t ifp,enum poll_cmd cmd,int count)1514 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1515 {
1516 struct adapter *adapter = if_getsoftc(ifp);
1517 struct tx_ring *txr = adapter->tx_rings;
1518 struct rx_ring *rxr = adapter->rx_rings;
1519 u32 reg_icr;
1520 int rx_done;
1521
1522 EM_CORE_LOCK(adapter);
1523 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1524 EM_CORE_UNLOCK(adapter);
1525 return (0);
1526 }
1527
1528 if (cmd == POLL_AND_CHECK_STATUS) {
1529 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1530 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1531 callout_stop(&adapter->timer);
1532 adapter->hw.mac.get_link_status = 1;
1533 em_update_link_status(adapter);
1534 callout_reset(&adapter->timer, hz,
1535 em_local_timer, adapter);
1536 }
1537 }
1538 EM_CORE_UNLOCK(adapter);
1539
1540 em_rxeof(rxr, count, &rx_done);
1541
1542 EM_TX_LOCK(txr);
1543 em_txeof(txr);
1544 #ifdef EM_MULTIQUEUE
1545 if (!drbr_empty(ifp, txr->br))
1546 em_mq_start_locked(ifp, txr);
1547 #else
1548 if (!if_sendq_empty(ifp))
1549 em_start_locked(ifp, txr);
1550 #endif
1551 EM_TX_UNLOCK(txr);
1552
1553 return (rx_done);
1554 }
1555 #endif /* DEVICE_POLLING */
1556
1557
1558 /*********************************************************************
1559 *
1560 * Fast Legacy/MSI Combined Interrupt Service routine
1561 *
1562 *********************************************************************/
1563 static int
em_irq_fast(void * arg)1564 em_irq_fast(void *arg)
1565 {
1566 struct adapter *adapter = arg;
1567 if_t ifp;
1568 u32 reg_icr;
1569
1570 ifp = adapter->ifp;
1571
1572 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1573
1574 /* Hot eject? */
1575 if (reg_icr == 0xffffffff)
1576 return FILTER_STRAY;
1577
1578 /* Definitely not our interrupt. */
1579 if (reg_icr == 0x0)
1580 return FILTER_STRAY;
1581
1582 /*
1583 * Starting with the 82571 chip, bit 31 should be used to
1584 * determine whether the interrupt belongs to us.
1585 */
1586 if (adapter->hw.mac.type >= e1000_82571 &&
1587 (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1588 return FILTER_STRAY;
1589
1590 em_disable_intr(adapter);
1591 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1592
1593 /* Link status change */
1594 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1595 adapter->hw.mac.get_link_status = 1;
1596 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1597 }
1598
1599 if (reg_icr & E1000_ICR_RXO)
1600 adapter->rx_overruns++;
1601 return FILTER_HANDLED;
1602 }
1603
1604 /* Combined RX/TX handler, used by Legacy and MSI */
1605 static void
em_handle_que(void * context,int pending)1606 em_handle_que(void *context, int pending)
1607 {
1608 struct adapter *adapter = context;
1609 if_t ifp = adapter->ifp;
1610 struct tx_ring *txr = adapter->tx_rings;
1611 struct rx_ring *rxr = adapter->rx_rings;
1612
1613 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1614 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1615
1616 EM_TX_LOCK(txr);
1617 em_txeof(txr);
1618 #ifdef EM_MULTIQUEUE
1619 if (!drbr_empty(ifp, txr->br))
1620 em_mq_start_locked(ifp, txr);
1621 #else
1622 if (!if_sendq_empty(ifp))
1623 em_start_locked(ifp, txr);
1624 #endif
1625 EM_TX_UNLOCK(txr);
1626 if (more) {
1627 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1628 return;
1629 }
1630 }
1631
1632 em_enable_intr(adapter);
1633 return;
1634 }
1635
1636
1637 /*********************************************************************
1638 *
1639 * MSIX Interrupt Service Routines
1640 *
1641 **********************************************************************/
1642 static void
em_msix_tx(void * arg)1643 em_msix_tx(void *arg)
1644 {
1645 struct tx_ring *txr = arg;
1646 struct adapter *adapter = txr->adapter;
1647 if_t ifp = adapter->ifp;
1648
1649 ++txr->tx_irq;
1650 EM_TX_LOCK(txr);
1651 em_txeof(txr);
1652 #ifdef EM_MULTIQUEUE
1653 if (!drbr_empty(ifp, txr->br))
1654 em_mq_start_locked(ifp, txr);
1655 #else
1656 if (!if_sendq_empty(ifp))
1657 em_start_locked(ifp, txr);
1658 #endif
1659
1660 /* Reenable this interrupt */
1661 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1662 EM_TX_UNLOCK(txr);
1663 return;
1664 }
1665
1666 /*********************************************************************
1667 *
1668 * MSIX RX Interrupt Service routine
1669 *
1670 **********************************************************************/
1671
1672 static void
em_msix_rx(void * arg)1673 em_msix_rx(void *arg)
1674 {
1675 struct rx_ring *rxr = arg;
1676 struct adapter *adapter = rxr->adapter;
1677 bool more;
1678
1679 ++rxr->rx_irq;
1680 if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1681 return;
1682 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1683 if (more)
1684 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1685 else {
1686 /* Reenable this interrupt */
1687 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1688 }
1689 return;
1690 }
1691
1692 /*********************************************************************
1693 *
1694 * MSIX Link Fast Interrupt Service routine
1695 *
1696 **********************************************************************/
1697 static void
em_msix_link(void * arg)1698 em_msix_link(void *arg)
1699 {
1700 struct adapter *adapter = arg;
1701 u32 reg_icr;
1702
1703 ++adapter->link_irq;
1704 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1705
1706 if (reg_icr & E1000_ICR_RXO)
1707 adapter->rx_overruns++;
1708
1709 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1710 adapter->hw.mac.get_link_status = 1;
1711 em_handle_link(adapter, 0);
1712 } else
1713 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1714 EM_MSIX_LINK | E1000_IMS_LSC);
1715 /*
1716 ** Because we must read the ICR for this interrupt
1717 ** it may clear other causes using autoclear, for
1718 ** this reason we simply create a soft interrupt
1719 ** for all these vectors.
1720 */
1721 if (reg_icr) {
1722 E1000_WRITE_REG(&adapter->hw,
1723 E1000_ICS, adapter->ims);
1724 }
1725 return;
1726 }
1727
1728 static void
em_handle_rx(void * context,int pending)1729 em_handle_rx(void *context, int pending)
1730 {
1731 struct rx_ring *rxr = context;
1732 struct adapter *adapter = rxr->adapter;
1733 bool more;
1734
1735 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1736 if (more)
1737 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1738 else {
1739 /* Reenable this interrupt */
1740 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1741 }
1742 }
1743
1744 static void
em_handle_tx(void * context,int pending)1745 em_handle_tx(void *context, int pending)
1746 {
1747 struct tx_ring *txr = context;
1748 struct adapter *adapter = txr->adapter;
1749 if_t ifp = adapter->ifp;
1750
1751 EM_TX_LOCK(txr);
1752 em_txeof(txr);
1753 #ifdef EM_MULTIQUEUE
1754 if (!drbr_empty(ifp, txr->br))
1755 em_mq_start_locked(ifp, txr);
1756 #else
1757 if (!if_sendq_empty(ifp))
1758 em_start_locked(ifp, txr);
1759 #endif
1760 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1761 EM_TX_UNLOCK(txr);
1762 }
1763
1764 static void
em_handle_link(void * context,int pending)1765 em_handle_link(void *context, int pending)
1766 {
1767 struct adapter *adapter = context;
1768 struct e1000_hw *hw = &adapter->hw;
1769 struct tx_ring *txr = adapter->tx_rings;
1770 if_t ifp = adapter->ifp;
1771
1772 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1773 return;
1774
1775 EM_CORE_LOCK(adapter);
1776 callout_stop(&adapter->timer);
1777 em_update_link_status(adapter);
1778 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1779 if (hw->mac.type == e1000_82574 && adapter->msix_mem != NULL)
1780 E1000_WRITE_REG(hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC);
1781 if (adapter->link_active) {
1782 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1783 EM_TX_LOCK(txr);
1784 #ifdef EM_MULTIQUEUE
1785 if (!drbr_empty(ifp, txr->br))
1786 em_mq_start_locked(ifp, txr);
1787 #else
1788 if (if_sendq_empty(ifp))
1789 em_start_locked(ifp, txr);
1790 #endif
1791 EM_TX_UNLOCK(txr);
1792 }
1793 }
1794 EM_CORE_UNLOCK(adapter);
1795 }
1796
1797
1798 /*********************************************************************
1799 *
1800 * Media Ioctl callback
1801 *
1802 * This routine is called whenever the user queries the status of
1803 * the interface using ifconfig.
1804 *
1805 **********************************************************************/
1806 static void
em_media_status(if_t ifp,struct ifmediareq * ifmr)1807 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1808 {
1809 struct adapter *adapter = if_getsoftc(ifp);
1810 u_char fiber_type = IFM_1000_SX;
1811
1812 INIT_DEBUGOUT("em_media_status: begin");
1813
1814 EM_CORE_LOCK(adapter);
1815 em_update_link_status(adapter);
1816
1817 ifmr->ifm_status = IFM_AVALID;
1818 ifmr->ifm_active = IFM_ETHER;
1819
1820 if (!adapter->link_active) {
1821 EM_CORE_UNLOCK(adapter);
1822 return;
1823 }
1824
1825 ifmr->ifm_status |= IFM_ACTIVE;
1826
1827 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1828 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1829 ifmr->ifm_active |= fiber_type | IFM_FDX;
1830 } else {
1831 switch (adapter->link_speed) {
1832 case 10:
1833 ifmr->ifm_active |= IFM_10_T;
1834 break;
1835 case 100:
1836 ifmr->ifm_active |= IFM_100_TX;
1837 break;
1838 case 1000:
1839 ifmr->ifm_active |= IFM_1000_T;
1840 break;
1841 }
1842 if (adapter->link_duplex == FULL_DUPLEX)
1843 ifmr->ifm_active |= IFM_FDX;
1844 else
1845 ifmr->ifm_active |= IFM_HDX;
1846 }
1847 EM_CORE_UNLOCK(adapter);
1848 }
1849
1850 /*********************************************************************
1851 *
1852 * Media Ioctl callback
1853 *
1854 * This routine is called when the user changes speed/duplex using
1855 * media/mediopt option with ifconfig.
1856 *
1857 **********************************************************************/
1858 static int
em_media_change(if_t ifp)1859 em_media_change(if_t ifp)
1860 {
1861 struct adapter *adapter = if_getsoftc(ifp);
1862 struct ifmedia *ifm = &adapter->media;
1863
1864 INIT_DEBUGOUT("em_media_change: begin");
1865
1866 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1867 return (EINVAL);
1868
1869 EM_CORE_LOCK(adapter);
1870 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1871 case IFM_AUTO:
1872 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1873 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1874 break;
1875 case IFM_1000_LX:
1876 case IFM_1000_SX:
1877 case IFM_1000_T:
1878 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1879 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1880 break;
1881 case IFM_100_TX:
1882 adapter->hw.mac.autoneg = FALSE;
1883 adapter->hw.phy.autoneg_advertised = 0;
1884 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1885 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1886 else
1887 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1888 break;
1889 case IFM_10_T:
1890 adapter->hw.mac.autoneg = FALSE;
1891 adapter->hw.phy.autoneg_advertised = 0;
1892 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1893 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1894 else
1895 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1896 break;
1897 default:
1898 device_printf(adapter->dev, "Unsupported media type\n");
1899 }
1900
1901 em_init_locked(adapter);
1902 EM_CORE_UNLOCK(adapter);
1903
1904 return (0);
1905 }
1906
1907 /*********************************************************************
1908 *
1909 * This routine maps the mbufs to tx descriptors.
1910 *
1911 * return 0 on success, positive on failure
1912 **********************************************************************/
1913
1914 static int
em_xmit(struct tx_ring * txr,struct mbuf ** m_headp)1915 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1916 {
1917 struct adapter *adapter = txr->adapter;
1918 bus_dma_segment_t segs[EM_MAX_SCATTER];
1919 bus_dmamap_t map;
1920 struct em_txbuffer *tx_buffer, *tx_buffer_mapped;
1921 struct e1000_tx_desc *ctxd = NULL;
1922 struct mbuf *m_head;
1923 struct ether_header *eh;
1924 struct ip *ip = NULL;
1925 struct tcphdr *tp = NULL;
1926 u32 txd_upper = 0, txd_lower = 0;
1927 int ip_off, poff;
1928 int nsegs, i, j, first, last = 0;
1929 int error;
1930 bool do_tso, tso_desc, remap = TRUE;
1931
1932 m_head = *m_headp;
1933 do_tso = m_head->m_pkthdr.csum_flags & CSUM_IP_TSO;
1934 tso_desc = FALSE;
1935 ip_off = poff = 0;
1936
1937 /*
1938 * Intel recommends entire IP/TCP header length reside in a single
1939 * buffer. If multiple descriptors are used to describe the IP and
1940 * TCP header, each descriptor should describe one or more
1941 * complete headers; descriptors referencing only parts of headers
1942 * are not supported. If all layer headers are not coalesced into
1943 * a single buffer, each buffer should not cross a 4KB boundary,
1944 * or be larger than the maximum read request size.
1945 * Controller also requires modifing IP/TCP header to make TSO work
1946 * so we firstly get a writable mbuf chain then coalesce ethernet/
1947 * IP/TCP header into a single buffer to meet the requirement of
1948 * controller. This also simplifies IP/TCP/UDP checksum offloading
1949 * which also has similar restrictions.
1950 */
1951 if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1952 if (do_tso || (m_head->m_next != NULL &&
1953 m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1954 if (M_WRITABLE(*m_headp) == 0) {
1955 m_head = m_dup(*m_headp, M_NOWAIT);
1956 m_freem(*m_headp);
1957 if (m_head == NULL) {
1958 *m_headp = NULL;
1959 return (ENOBUFS);
1960 }
1961 *m_headp = m_head;
1962 }
1963 }
1964 /*
1965 * XXX
1966 * Assume IPv4, we don't have TSO/checksum offload support
1967 * for IPv6 yet.
1968 */
1969 ip_off = sizeof(struct ether_header);
1970 if (m_head->m_len < ip_off) {
1971 m_head = m_pullup(m_head, ip_off);
1972 if (m_head == NULL) {
1973 *m_headp = NULL;
1974 return (ENOBUFS);
1975 }
1976 }
1977 eh = mtod(m_head, struct ether_header *);
1978 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1979 ip_off = sizeof(struct ether_vlan_header);
1980 if (m_head->m_len < ip_off) {
1981 m_head = m_pullup(m_head, ip_off);
1982 if (m_head == NULL) {
1983 *m_headp = NULL;
1984 return (ENOBUFS);
1985 }
1986 }
1987 }
1988 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1989 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1990 if (m_head == NULL) {
1991 *m_headp = NULL;
1992 return (ENOBUFS);
1993 }
1994 }
1995 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1996 poff = ip_off + (ip->ip_hl << 2);
1997
1998 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1999 if (m_head->m_len < poff + sizeof(struct tcphdr)) {
2000 m_head = m_pullup(m_head, poff +
2001 sizeof(struct tcphdr));
2002 if (m_head == NULL) {
2003 *m_headp = NULL;
2004 return (ENOBUFS);
2005 }
2006 }
2007 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2008 /*
2009 * TSO workaround:
2010 * pull 4 more bytes of data into it.
2011 */
2012 if (m_head->m_len < poff + (tp->th_off << 2)) {
2013 m_head = m_pullup(m_head, poff +
2014 (tp->th_off << 2) +
2015 TSO_WORKAROUND);
2016 if (m_head == NULL) {
2017 *m_headp = NULL;
2018 return (ENOBUFS);
2019 }
2020 }
2021 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2022 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2023 if (do_tso) {
2024 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2025 (ip->ip_hl << 2) +
2026 (tp->th_off << 2));
2027 ip->ip_sum = 0;
2028 /*
2029 * The pseudo TCP checksum does not include TCP
2030 * payload length so driver should recompute
2031 * the checksum here what hardware expect to
2032 * see. This is adherence of Microsoft's Large
2033 * Send specification.
2034 */
2035 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2036 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2037 }
2038 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2039 if (m_head->m_len < poff + sizeof(struct udphdr)) {
2040 m_head = m_pullup(m_head, poff +
2041 sizeof(struct udphdr));
2042 if (m_head == NULL) {
2043 *m_headp = NULL;
2044 return (ENOBUFS);
2045 }
2046 }
2047 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2048 }
2049 *m_headp = m_head;
2050 }
2051
2052 /*
2053 * Map the packet for DMA
2054 *
2055 * Capture the first descriptor index,
2056 * this descriptor will have the index
2057 * of the EOP which is the only one that
2058 * now gets a DONE bit writeback.
2059 */
2060 first = txr->next_avail_desc;
2061 tx_buffer = &txr->tx_buffers[first];
2062 tx_buffer_mapped = tx_buffer;
2063 map = tx_buffer->map;
2064
2065 retry:
2066 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2067 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2068
2069 /*
2070 * There are two types of errors we can (try) to handle:
2071 * - EFBIG means the mbuf chain was too long and bus_dma ran
2072 * out of segments. Defragment the mbuf chain and try again.
2073 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2074 * at this point in time. Defer sending and try again later.
2075 * All other errors, in particular EINVAL, are fatal and prevent the
2076 * mbuf chain from ever going through. Drop it and report error.
2077 */
2078 if (error == EFBIG && remap) {
2079 struct mbuf *m;
2080
2081 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2082 if (m == NULL) {
2083 adapter->mbuf_defrag_failed++;
2084 m_freem(*m_headp);
2085 *m_headp = NULL;
2086 return (ENOBUFS);
2087 }
2088 *m_headp = m;
2089
2090 /* Try it again, but only once */
2091 remap = FALSE;
2092 goto retry;
2093 } else if (error != 0) {
2094 adapter->no_tx_dma_setup++;
2095 m_freem(*m_headp);
2096 *m_headp = NULL;
2097 return (error);
2098 }
2099
2100 /*
2101 * TSO Hardware workaround, if this packet is not
2102 * TSO, and is only a single descriptor long, and
2103 * it follows a TSO burst, then we need to add a
2104 * sentinel descriptor to prevent premature writeback.
2105 */
2106 if ((!do_tso) && (txr->tx_tso == TRUE)) {
2107 if (nsegs == 1)
2108 tso_desc = TRUE;
2109 txr->tx_tso = FALSE;
2110 }
2111
2112 if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2113 txr->no_desc_avail++;
2114 bus_dmamap_unload(txr->txtag, map);
2115 return (ENOBUFS);
2116 }
2117 m_head = *m_headp;
2118
2119 /* Do hardware assists */
2120 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
2121 em_tso_setup(txr, m_head, ip_off, ip, tp,
2122 &txd_upper, &txd_lower);
2123 /* we need to make a final sentinel transmit desc */
2124 tso_desc = TRUE;
2125 } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2126 em_transmit_checksum_setup(txr, m_head,
2127 ip_off, ip, &txd_upper, &txd_lower);
2128
2129 if (m_head->m_flags & M_VLANTAG) {
2130 /* Set the vlan id. */
2131 txd_upper |= htole16(if_getvtag(m_head)) << 16;
2132 /* Tell hardware to add tag */
2133 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2134 }
2135
2136 i = txr->next_avail_desc;
2137
2138 /* Set up our transmit descriptors */
2139 for (j = 0; j < nsegs; j++) {
2140 bus_size_t seg_len;
2141 bus_addr_t seg_addr;
2142
2143 tx_buffer = &txr->tx_buffers[i];
2144 ctxd = &txr->tx_base[i];
2145 seg_addr = segs[j].ds_addr;
2146 seg_len = segs[j].ds_len;
2147 /*
2148 ** TSO Workaround:
2149 ** If this is the last descriptor, we want to
2150 ** split it so we have a small final sentinel
2151 */
2152 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2153 seg_len -= TSO_WORKAROUND;
2154 ctxd->buffer_addr = htole64(seg_addr);
2155 ctxd->lower.data = htole32(
2156 adapter->txd_cmd | txd_lower | seg_len);
2157 ctxd->upper.data = htole32(txd_upper);
2158 if (++i == adapter->num_tx_desc)
2159 i = 0;
2160
2161 /* Now make the sentinel */
2162 txr->tx_avail--;
2163 ctxd = &txr->tx_base[i];
2164 tx_buffer = &txr->tx_buffers[i];
2165 ctxd->buffer_addr =
2166 htole64(seg_addr + seg_len);
2167 ctxd->lower.data = htole32(
2168 adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2169 ctxd->upper.data =
2170 htole32(txd_upper);
2171 last = i;
2172 if (++i == adapter->num_tx_desc)
2173 i = 0;
2174 } else {
2175 ctxd->buffer_addr = htole64(seg_addr);
2176 ctxd->lower.data = htole32(
2177 adapter->txd_cmd | txd_lower | seg_len);
2178 ctxd->upper.data = htole32(txd_upper);
2179 last = i;
2180 if (++i == adapter->num_tx_desc)
2181 i = 0;
2182 }
2183 tx_buffer->m_head = NULL;
2184 tx_buffer->next_eop = -1;
2185 }
2186
2187 txr->next_avail_desc = i;
2188 txr->tx_avail -= nsegs;
2189
2190 tx_buffer->m_head = m_head;
2191 /*
2192 ** Here we swap the map so the last descriptor,
2193 ** which gets the completion interrupt has the
2194 ** real map, and the first descriptor gets the
2195 ** unused map from this descriptor.
2196 */
2197 tx_buffer_mapped->map = tx_buffer->map;
2198 tx_buffer->map = map;
2199 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2200
2201 /*
2202 * Last Descriptor of Packet
2203 * needs End Of Packet (EOP)
2204 * and Report Status (RS)
2205 */
2206 ctxd->lower.data |=
2207 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2208 /*
2209 * Keep track in the first buffer which
2210 * descriptor will be written back
2211 */
2212 tx_buffer = &txr->tx_buffers[first];
2213 tx_buffer->next_eop = last;
2214
2215 /*
2216 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2217 * that this frame is available to transmit.
2218 */
2219 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2220 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2221 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2222
2223 return (0);
2224 }
2225
2226 static void
em_set_promisc(struct adapter * adapter)2227 em_set_promisc(struct adapter *adapter)
2228 {
2229 if_t ifp = adapter->ifp;
2230 u32 reg_rctl;
2231
2232 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2233
2234 if (if_getflags(ifp) & IFF_PROMISC) {
2235 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2236 /* Turn this on if you want to see bad packets */
2237 if (em_debug_sbp)
2238 reg_rctl |= E1000_RCTL_SBP;
2239 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2240 } else if (if_getflags(ifp) & IFF_ALLMULTI) {
2241 reg_rctl |= E1000_RCTL_MPE;
2242 reg_rctl &= ~E1000_RCTL_UPE;
2243 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2244 }
2245 }
2246
2247 static void
em_disable_promisc(struct adapter * adapter)2248 em_disable_promisc(struct adapter *adapter)
2249 {
2250 if_t ifp = adapter->ifp;
2251 u32 reg_rctl;
2252 int mcnt = 0;
2253
2254 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2255 reg_rctl &= (~E1000_RCTL_UPE);
2256 if (if_getflags(ifp) & IFF_ALLMULTI)
2257 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2258 else
2259 mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2260 /* Don't disable if in MAX groups */
2261 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2262 reg_rctl &= (~E1000_RCTL_MPE);
2263 reg_rctl &= (~E1000_RCTL_SBP);
2264 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2265 }
2266
2267
2268 /*********************************************************************
2269 * Multicast Update
2270 *
2271 * This routine is called whenever multicast address list is updated.
2272 *
2273 **********************************************************************/
2274
2275 static void
em_set_multi(struct adapter * adapter)2276 em_set_multi(struct adapter *adapter)
2277 {
2278 if_t ifp = adapter->ifp;
2279 u32 reg_rctl = 0;
2280 u8 *mta; /* Multicast array memory */
2281 int mcnt = 0;
2282
2283 IOCTL_DEBUGOUT("em_set_multi: begin");
2284
2285 mta = adapter->mta;
2286 bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2287
2288 if (adapter->hw.mac.type == e1000_82542 &&
2289 adapter->hw.revision_id == E1000_REVISION_2) {
2290 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2291 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2292 e1000_pci_clear_mwi(&adapter->hw);
2293 reg_rctl |= E1000_RCTL_RST;
2294 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2295 msec_delay(5);
2296 }
2297
2298 if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2299
2300 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2301 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2302 reg_rctl |= E1000_RCTL_MPE;
2303 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2304 } else
2305 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2306
2307 if (adapter->hw.mac.type == e1000_82542 &&
2308 adapter->hw.revision_id == E1000_REVISION_2) {
2309 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2310 reg_rctl &= ~E1000_RCTL_RST;
2311 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2312 msec_delay(5);
2313 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2314 e1000_pci_set_mwi(&adapter->hw);
2315 }
2316 }
2317
2318
2319 /*********************************************************************
2320 * Timer routine
2321 *
2322 * This routine checks for link status and updates statistics.
2323 *
2324 **********************************************************************/
2325
2326 static void
em_local_timer(void * arg)2327 em_local_timer(void *arg)
2328 {
2329 struct adapter *adapter = arg;
2330 if_t ifp = adapter->ifp;
2331 struct tx_ring *txr = adapter->tx_rings;
2332 struct rx_ring *rxr = adapter->rx_rings;
2333 u32 trigger = 0;
2334
2335 EM_CORE_LOCK_ASSERT(adapter);
2336
2337 em_update_link_status(adapter);
2338 em_update_stats_counters(adapter);
2339
2340 /* Reset LAA into RAR[0] on 82571 */
2341 if ((adapter->hw.mac.type == e1000_82571) &&
2342 e1000_get_laa_state_82571(&adapter->hw))
2343 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2344
2345 /* Mask to use in the irq trigger */
2346 if (adapter->msix_mem) {
2347 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2348 trigger |= rxr->ims;
2349 rxr = adapter->rx_rings;
2350 } else
2351 trigger = E1000_ICS_RXDMT0;
2352
2353 /*
2354 ** Check on the state of the TX queue(s), this
2355 ** can be done without the lock because its RO
2356 ** and the HUNG state will be static if set.
2357 */
2358 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2359 if (txr->busy == EM_TX_HUNG)
2360 goto hung;
2361 if (txr->busy >= EM_TX_MAXTRIES)
2362 txr->busy = EM_TX_HUNG;
2363 /* Schedule a TX tasklet if needed */
2364 if (txr->tx_avail <= EM_MAX_SCATTER)
2365 taskqueue_enqueue(txr->tq, &txr->tx_task);
2366 }
2367
2368 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2369 #ifndef DEVICE_POLLING
2370 /* Trigger an RX interrupt to guarantee mbuf refresh */
2371 E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2372 #endif
2373 return;
2374 hung:
2375 /* Looks like we're hung */
2376 device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2377 txr->me);
2378 em_print_debug_info(adapter);
2379 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2380 adapter->watchdog_events++;
2381 em_init_locked(adapter);
2382 }
2383
2384
2385 static void
em_update_link_status(struct adapter * adapter)2386 em_update_link_status(struct adapter *adapter)
2387 {
2388 struct e1000_hw *hw = &adapter->hw;
2389 if_t ifp = adapter->ifp;
2390 device_t dev = adapter->dev;
2391 struct tx_ring *txr = adapter->tx_rings;
2392 u32 link_check = 0;
2393
2394 /* Get the cached link value or read phy for real */
2395 switch (hw->phy.media_type) {
2396 case e1000_media_type_copper:
2397 if (hw->mac.get_link_status) {
2398 if (hw->mac.type == e1000_pch_spt)
2399 msec_delay(50);
2400 /* Do the work to read phy */
2401 e1000_check_for_link(hw);
2402 link_check = !hw->mac.get_link_status;
2403 if (link_check) /* ESB2 fix */
2404 e1000_cfg_on_link_up(hw);
2405 } else
2406 link_check = TRUE;
2407 break;
2408 case e1000_media_type_fiber:
2409 e1000_check_for_link(hw);
2410 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2411 E1000_STATUS_LU);
2412 break;
2413 case e1000_media_type_internal_serdes:
2414 e1000_check_for_link(hw);
2415 link_check = adapter->hw.mac.serdes_has_link;
2416 break;
2417 default:
2418 case e1000_media_type_unknown:
2419 break;
2420 }
2421
2422 /* Now check for a transition */
2423 if (link_check && (adapter->link_active == 0)) {
2424 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2425 &adapter->link_duplex);
2426
2427 /*
2428 ** There have proven to be problems with TSO when not at full
2429 ** gigabit speed, so disable the assist automatically when at
2430 ** lower speeds. -jfv
2431 */
2432 if (if_getcapenable(ifp) & IFCAP_TSO4) {
2433 if (adapter->link_speed == SPEED_1000)
2434 if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
2435 else
2436 if_sethwassistbits(ifp, 0, CSUM_IP_TSO);
2437 }
2438
2439 /* Check if we must disable SPEED_MODE bit on PCI-E */
2440 if ((adapter->link_speed != SPEED_1000) &&
2441 ((hw->mac.type == e1000_82571) ||
2442 (hw->mac.type == e1000_82572))) {
2443 int tarc0;
2444 tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2445 tarc0 &= ~TARC_SPEED_MODE_BIT;
2446 E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2447 }
2448 if (bootverbose)
2449 device_printf(dev, "Link is up %d Mbps %s\n",
2450 adapter->link_speed,
2451 ((adapter->link_duplex == FULL_DUPLEX) ?
2452 "Full Duplex" : "Half Duplex"));
2453 adapter->link_active = 1;
2454 adapter->smartspeed = 0;
2455 if_setbaudrate(ifp, adapter->link_speed * 1000000);
2456 if_link_state_change(ifp, LINK_STATE_UP);
2457 } else if (!link_check && (adapter->link_active == 1)) {
2458 if_setbaudrate(ifp, 0);
2459 adapter->link_speed = 0;
2460 adapter->link_duplex = 0;
2461 if (bootverbose)
2462 device_printf(dev, "Link is Down\n");
2463 adapter->link_active = 0;
2464 /* Link down, disable hang detection */
2465 for (int i = 0; i < adapter->num_queues; i++, txr++)
2466 txr->busy = EM_TX_IDLE;
2467 if_link_state_change(ifp, LINK_STATE_DOWN);
2468 }
2469 }
2470
2471 /*********************************************************************
2472 *
2473 * This routine disables all traffic on the adapter by issuing a
2474 * global reset on the MAC and deallocates TX/RX buffers.
2475 *
2476 * This routine should always be called with BOTH the CORE
2477 * and TX locks.
2478 **********************************************************************/
2479
2480 static void
em_stop(void * arg)2481 em_stop(void *arg)
2482 {
2483 struct adapter *adapter = arg;
2484 if_t ifp = adapter->ifp;
2485 struct tx_ring *txr = adapter->tx_rings;
2486
2487 EM_CORE_LOCK_ASSERT(adapter);
2488
2489 INIT_DEBUGOUT("em_stop: begin");
2490
2491 em_disable_intr(adapter);
2492 callout_stop(&adapter->timer);
2493
2494 /* Tell the stack that the interface is no longer active */
2495 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2496
2497 /* Disarm Hang Detection. */
2498 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2499 EM_TX_LOCK(txr);
2500 txr->busy = EM_TX_IDLE;
2501 EM_TX_UNLOCK(txr);
2502 }
2503
2504 /* I219 needs some special flushing to avoid hangs */
2505 if (adapter->hw.mac.type == e1000_pch_spt)
2506 em_flush_desc_rings(adapter);
2507
2508 e1000_reset_hw(&adapter->hw);
2509 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2510
2511 e1000_led_off(&adapter->hw);
2512 e1000_cleanup_led(&adapter->hw);
2513 }
2514
2515
2516 /*********************************************************************
2517 *
2518 * Determine hardware revision.
2519 *
2520 **********************************************************************/
2521 static void
em_identify_hardware(struct adapter * adapter)2522 em_identify_hardware(struct adapter *adapter)
2523 {
2524 device_t dev = adapter->dev;
2525
2526 /* Make sure our PCI config space has the necessary stuff set */
2527 pci_enable_busmaster(dev);
2528 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2529
2530 /* Save off the information about this board */
2531 adapter->hw.vendor_id = pci_get_vendor(dev);
2532 adapter->hw.device_id = pci_get_device(dev);
2533 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2534 adapter->hw.subsystem_vendor_id =
2535 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2536 adapter->hw.subsystem_device_id =
2537 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2538
2539 /* Do Shared Code Init and Setup */
2540 if (e1000_set_mac_type(&adapter->hw)) {
2541 device_printf(dev, "Setup init failure\n");
2542 return;
2543 }
2544 }
2545
2546 static int
em_allocate_pci_resources(struct adapter * adapter)2547 em_allocate_pci_resources(struct adapter *adapter)
2548 {
2549 device_t dev = adapter->dev;
2550 int rid;
2551
2552 rid = PCIR_BAR(0);
2553 adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2554 &rid, RF_ACTIVE);
2555 if (adapter->memory == NULL) {
2556 device_printf(dev, "Unable to allocate bus resource: memory\n");
2557 return (ENXIO);
2558 }
2559 adapter->osdep.mem_bus_space_tag =
2560 rman_get_bustag(adapter->memory);
2561 adapter->osdep.mem_bus_space_handle =
2562 rman_get_bushandle(adapter->memory);
2563 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2564
2565 adapter->hw.back = &adapter->osdep;
2566
2567 return (0);
2568 }
2569
2570 /*********************************************************************
2571 *
2572 * Setup the Legacy or MSI Interrupt handler
2573 *
2574 **********************************************************************/
2575 static int
em_allocate_legacy(struct adapter * adapter)2576 em_allocate_legacy(struct adapter *adapter)
2577 {
2578 device_t dev = adapter->dev;
2579 struct tx_ring *txr = adapter->tx_rings;
2580 int error, rid = 0;
2581
2582 /* Manually turn off all interrupts */
2583 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2584
2585 if (adapter->msix == 1) /* using MSI */
2586 rid = 1;
2587 /* We allocate a single interrupt resource */
2588 adapter->res = bus_alloc_resource_any(dev,
2589 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2590 if (adapter->res == NULL) {
2591 device_printf(dev, "Unable to allocate bus resource: "
2592 "interrupt\n");
2593 return (ENXIO);
2594 }
2595
2596 /*
2597 * Allocate a fast interrupt and the associated
2598 * deferred processing contexts.
2599 */
2600 TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2601 adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2602 taskqueue_thread_enqueue, &adapter->tq);
2603 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2604 device_get_nameunit(adapter->dev));
2605 /* Use a TX only tasklet for local timer */
2606 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2607 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2608 taskqueue_thread_enqueue, &txr->tq);
2609 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2610 device_get_nameunit(adapter->dev));
2611 TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2612 if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2613 em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2614 device_printf(dev, "Failed to register fast interrupt "
2615 "handler: %d\n", error);
2616 taskqueue_free(adapter->tq);
2617 adapter->tq = NULL;
2618 return (error);
2619 }
2620
2621 return (0);
2622 }
2623
2624 /*********************************************************************
2625 *
2626 * Setup the MSIX Interrupt handlers
2627 * This is not really Multiqueue, rather
2628 * its just separate interrupt vectors
2629 * for TX, RX, and Link.
2630 *
2631 **********************************************************************/
2632 static int
em_allocate_msix(struct adapter * adapter)2633 em_allocate_msix(struct adapter *adapter)
2634 {
2635 device_t dev = adapter->dev;
2636 struct tx_ring *txr = adapter->tx_rings;
2637 struct rx_ring *rxr = adapter->rx_rings;
2638 int error, rid, vector = 0;
2639 int cpu_id = 0;
2640
2641
2642 /* Make sure all interrupts are disabled */
2643 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2644
2645 /* First set up ring resources */
2646 for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2647
2648 /* RX ring */
2649 rid = vector + 1;
2650
2651 rxr->res = bus_alloc_resource_any(dev,
2652 SYS_RES_IRQ, &rid, RF_ACTIVE);
2653 if (rxr->res == NULL) {
2654 device_printf(dev,
2655 "Unable to allocate bus resource: "
2656 "RX MSIX Interrupt %d\n", i);
2657 return (ENXIO);
2658 }
2659 if ((error = bus_setup_intr(dev, rxr->res,
2660 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2661 rxr, &rxr->tag)) != 0) {
2662 device_printf(dev, "Failed to register RX handler");
2663 return (error);
2664 }
2665 #if __FreeBSD_version >= 800504
2666 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2667 #endif
2668 rxr->msix = vector;
2669
2670 if (em_last_bind_cpu < 0)
2671 em_last_bind_cpu = CPU_FIRST();
2672 cpu_id = em_last_bind_cpu;
2673 bus_bind_intr(dev, rxr->res, cpu_id);
2674
2675 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2676 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2677 taskqueue_thread_enqueue, &rxr->tq);
2678 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2679 device_get_nameunit(adapter->dev), cpu_id);
2680 /*
2681 ** Set the bit to enable interrupt
2682 ** in E1000_IMS -- bits 20 and 21
2683 ** are for RX0 and RX1, note this has
2684 ** NOTHING to do with the MSIX vector
2685 */
2686 rxr->ims = 1 << (20 + i);
2687 adapter->ims |= rxr->ims;
2688 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2689
2690 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2691 }
2692
2693 for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2694 /* TX ring */
2695 rid = vector + 1;
2696 txr->res = bus_alloc_resource_any(dev,
2697 SYS_RES_IRQ, &rid, RF_ACTIVE);
2698 if (txr->res == NULL) {
2699 device_printf(dev,
2700 "Unable to allocate bus resource: "
2701 "TX MSIX Interrupt %d\n", i);
2702 return (ENXIO);
2703 }
2704 if ((error = bus_setup_intr(dev, txr->res,
2705 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2706 txr, &txr->tag)) != 0) {
2707 device_printf(dev, "Failed to register TX handler");
2708 return (error);
2709 }
2710 #if __FreeBSD_version >= 800504
2711 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2712 #endif
2713 txr->msix = vector;
2714
2715 if (em_last_bind_cpu < 0)
2716 em_last_bind_cpu = CPU_FIRST();
2717 cpu_id = em_last_bind_cpu;
2718 bus_bind_intr(dev, txr->res, cpu_id);
2719
2720 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2721 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2722 taskqueue_thread_enqueue, &txr->tq);
2723 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2724 device_get_nameunit(adapter->dev), cpu_id);
2725 /*
2726 ** Set the bit to enable interrupt
2727 ** in E1000_IMS -- bits 22 and 23
2728 ** are for TX0 and TX1, note this has
2729 ** NOTHING to do with the MSIX vector
2730 */
2731 txr->ims = 1 << (22 + i);
2732 adapter->ims |= txr->ims;
2733 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2734
2735 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2736 }
2737
2738 /* Link interrupt */
2739 rid = vector + 1;
2740 adapter->res = bus_alloc_resource_any(dev,
2741 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2742 if (!adapter->res) {
2743 device_printf(dev,"Unable to allocate "
2744 "bus resource: Link interrupt [%d]\n", rid);
2745 return (ENXIO);
2746 }
2747 /* Set the link handler function */
2748 error = bus_setup_intr(dev, adapter->res,
2749 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2750 em_msix_link, adapter, &adapter->tag);
2751 if (error) {
2752 adapter->res = NULL;
2753 device_printf(dev, "Failed to register LINK handler");
2754 return (error);
2755 }
2756 #if __FreeBSD_version >= 800504
2757 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2758 #endif
2759 adapter->linkvec = vector;
2760 adapter->ivars |= (8 | vector) << 16;
2761 adapter->ivars |= 0x80000000;
2762
2763 return (0);
2764 }
2765
2766
2767 static void
em_free_pci_resources(struct adapter * adapter)2768 em_free_pci_resources(struct adapter *adapter)
2769 {
2770 device_t dev = adapter->dev;
2771 struct tx_ring *txr;
2772 struct rx_ring *rxr;
2773 int rid;
2774
2775
2776 /*
2777 ** Release all the queue interrupt resources:
2778 */
2779 for (int i = 0; i < adapter->num_queues; i++) {
2780 txr = &adapter->tx_rings[i];
2781 /* an early abort? */
2782 if (txr == NULL)
2783 break;
2784 rid = txr->msix +1;
2785 if (txr->tag != NULL) {
2786 bus_teardown_intr(dev, txr->res, txr->tag);
2787 txr->tag = NULL;
2788 }
2789 if (txr->res != NULL)
2790 bus_release_resource(dev, SYS_RES_IRQ,
2791 rid, txr->res);
2792
2793 rxr = &adapter->rx_rings[i];
2794 /* an early abort? */
2795 if (rxr == NULL)
2796 break;
2797 rid = rxr->msix +1;
2798 if (rxr->tag != NULL) {
2799 bus_teardown_intr(dev, rxr->res, rxr->tag);
2800 rxr->tag = NULL;
2801 }
2802 if (rxr->res != NULL)
2803 bus_release_resource(dev, SYS_RES_IRQ,
2804 rid, rxr->res);
2805 }
2806
2807 if (adapter->linkvec) /* we are doing MSIX */
2808 rid = adapter->linkvec + 1;
2809 else
2810 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2811
2812 if (adapter->tag != NULL) {
2813 bus_teardown_intr(dev, adapter->res, adapter->tag);
2814 adapter->tag = NULL;
2815 }
2816
2817 if (adapter->res != NULL)
2818 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2819
2820
2821 if (adapter->msix)
2822 pci_release_msi(dev);
2823
2824 if (adapter->msix_mem != NULL)
2825 bus_release_resource(dev, SYS_RES_MEMORY,
2826 adapter->memrid, adapter->msix_mem);
2827
2828 if (adapter->memory != NULL)
2829 bus_release_resource(dev, SYS_RES_MEMORY,
2830 PCIR_BAR(0), adapter->memory);
2831
2832 if (adapter->flash != NULL)
2833 bus_release_resource(dev, SYS_RES_MEMORY,
2834 EM_FLASH, adapter->flash);
2835 }
2836
2837 /*
2838 * Setup MSI or MSI/X
2839 */
2840 static int
em_setup_msix(struct adapter * adapter)2841 em_setup_msix(struct adapter *adapter)
2842 {
2843 device_t dev = adapter->dev;
2844 int val;
2845
2846 /* Nearly always going to use one queue */
2847 adapter->num_queues = 1;
2848
2849 /*
2850 ** Try using MSI-X for Hartwell adapters
2851 */
2852 if ((adapter->hw.mac.type == e1000_82574) &&
2853 (em_enable_msix == TRUE)) {
2854 #ifdef EM_MULTIQUEUE
2855 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2856 if (adapter->num_queues > 1)
2857 em_enable_vectors_82574(adapter);
2858 #endif
2859 /* Map the MSIX BAR */
2860 adapter->memrid = PCIR_BAR(EM_MSIX_BAR);
2861 adapter->msix_mem = bus_alloc_resource_any(dev,
2862 SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2863 if (adapter->msix_mem == NULL) {
2864 /* May not be enabled */
2865 device_printf(adapter->dev,
2866 "Unable to map MSIX table \n");
2867 goto msi;
2868 }
2869 val = pci_msix_count(dev);
2870
2871 #ifdef EM_MULTIQUEUE
2872 /* We need 5 vectors in the multiqueue case */
2873 if (adapter->num_queues > 1 ) {
2874 if (val >= 5)
2875 val = 5;
2876 else {
2877 adapter->num_queues = 1;
2878 device_printf(adapter->dev,
2879 "Insufficient MSIX vectors for >1 queue, "
2880 "using single queue...\n");
2881 goto msix_one;
2882 }
2883 } else {
2884 msix_one:
2885 #endif
2886 if (val >= 3)
2887 val = 3;
2888 else {
2889 device_printf(adapter->dev,
2890 "Insufficient MSIX vectors, using MSI\n");
2891 goto msi;
2892 }
2893 #ifdef EM_MULTIQUEUE
2894 }
2895 #endif
2896
2897 if ((pci_alloc_msix(dev, &val) == 0)) {
2898 device_printf(adapter->dev,
2899 "Using MSIX interrupts "
2900 "with %d vectors\n", val);
2901 return (val);
2902 }
2903
2904 /*
2905 ** If MSIX alloc failed or provided us with
2906 ** less than needed, free and fall through to MSI
2907 */
2908 pci_release_msi(dev);
2909 }
2910 msi:
2911 if (adapter->msix_mem != NULL) {
2912 bus_release_resource(dev, SYS_RES_MEMORY,
2913 adapter->memrid, adapter->msix_mem);
2914 adapter->msix_mem = NULL;
2915 }
2916 val = 1;
2917 if (pci_alloc_msi(dev, &val) == 0) {
2918 device_printf(adapter->dev, "Using an MSI interrupt\n");
2919 return (val);
2920 }
2921 /* Should only happen due to manual configuration */
2922 device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2923 return (0);
2924 }
2925
2926
2927 /*
2928 ** The 3 following flush routines are used as a workaround in the
2929 ** I219 client parts and only for them.
2930 **
2931 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2932 **
2933 ** We want to clear all pending descriptors from the TX ring.
2934 ** zeroing happens when the HW reads the regs. We assign the ring itself as
2935 ** the data of the next descriptor. We don't care about the data we are about
2936 ** to reset the HW.
2937 */
2938 static void
em_flush_tx_ring(struct adapter * adapter)2939 em_flush_tx_ring(struct adapter *adapter)
2940 {
2941 struct e1000_hw *hw = &adapter->hw;
2942 struct tx_ring *txr = adapter->tx_rings;
2943 struct e1000_tx_desc *txd;
2944 u32 tctl, txd_lower = E1000_TXD_CMD_IFCS;
2945 u16 size = 512;
2946
2947 tctl = E1000_READ_REG(hw, E1000_TCTL);
2948 E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2949
2950 txd = &txr->tx_base[txr->next_avail_desc++];
2951 if (txr->next_avail_desc == adapter->num_tx_desc)
2952 txr->next_avail_desc = 0;
2953
2954 /* Just use the ring as a dummy buffer addr */
2955 txd->buffer_addr = txr->txdma.dma_paddr;
2956 txd->lower.data = htole32(txd_lower | size);
2957 txd->upper.data = 0;
2958
2959 /* flush descriptors to memory before notifying the HW */
2960 wmb();
2961
2962 E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2963 mb();
2964 usec_delay(250);
2965 }
2966
2967 /*
2968 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2969 **
2970 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
2971 */
2972 static void
em_flush_rx_ring(struct adapter * adapter)2973 em_flush_rx_ring(struct adapter *adapter)
2974 {
2975 struct e1000_hw *hw = &adapter->hw;
2976 u32 rctl, rxdctl;
2977
2978 rctl = E1000_READ_REG(hw, E1000_RCTL);
2979 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2980 E1000_WRITE_FLUSH(hw);
2981 usec_delay(150);
2982
2983 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
2984 /* zero the lower 14 bits (prefetch and host thresholds) */
2985 rxdctl &= 0xffffc000;
2986 /*
2987 * update thresholds: prefetch threshold to 31, host threshold to 1
2988 * and make sure the granularity is "descriptors" and not "cache lines"
2989 */
2990 rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
2991 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
2992
2993 /* momentarily enable the RX ring for the changes to take effect */
2994 E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
2995 E1000_WRITE_FLUSH(hw);
2996 usec_delay(150);
2997 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2998 }
2999
3000 /*
3001 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
3002 **
3003 ** In i219, the descriptor rings must be emptied before resetting the HW
3004 ** or before changing the device state to D3 during runtime (runtime PM).
3005 **
3006 ** Failure to do this will cause the HW to enter a unit hang state which can
3007 ** only be released by PCI reset on the device
3008 **
3009 */
3010 static void
em_flush_desc_rings(struct adapter * adapter)3011 em_flush_desc_rings(struct adapter *adapter)
3012 {
3013 struct e1000_hw *hw = &adapter->hw;
3014 device_t dev = adapter->dev;
3015 u16 hang_state;
3016 u32 fext_nvm11, tdlen;
3017
3018 /* First, disable MULR fix in FEXTNVM11 */
3019 fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3020 fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3021 E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3022
3023 /* do nothing if we're not in faulty state, or if the queue is empty */
3024 tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3025 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3026 if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3027 return;
3028 em_flush_tx_ring(adapter);
3029
3030 /* recheck, maybe the fault is caused by the rx ring */
3031 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3032 if (hang_state & FLUSH_DESC_REQUIRED)
3033 em_flush_rx_ring(adapter);
3034 }
3035
3036
3037 /*********************************************************************
3038 *
3039 * Initialize the hardware to a configuration
3040 * as specified by the adapter structure.
3041 *
3042 **********************************************************************/
3043 static void
em_reset(struct adapter * adapter)3044 em_reset(struct adapter *adapter)
3045 {
3046 device_t dev = adapter->dev;
3047 if_t ifp = adapter->ifp;
3048 struct e1000_hw *hw = &adapter->hw;
3049 u16 rx_buffer_size;
3050 u32 pba;
3051
3052 INIT_DEBUGOUT("em_reset: begin");
3053
3054 /* Set up smart power down as default off on newer adapters. */
3055 if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3056 hw->mac.type == e1000_82572)) {
3057 u16 phy_tmp = 0;
3058
3059 /* Speed up time to link by disabling smart power down. */
3060 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3061 phy_tmp &= ~IGP02E1000_PM_SPD;
3062 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3063 }
3064
3065 /*
3066 * Packet Buffer Allocation (PBA)
3067 * Writing PBA sets the receive portion of the buffer
3068 * the remainder is used for the transmit buffer.
3069 */
3070 switch (hw->mac.type) {
3071 /* Total Packet Buffer on these is 48K */
3072 case e1000_82571:
3073 case e1000_82572:
3074 case e1000_80003es2lan:
3075 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3076 break;
3077 case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3078 pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3079 break;
3080 case e1000_82574:
3081 case e1000_82583:
3082 pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3083 break;
3084 case e1000_ich8lan:
3085 pba = E1000_PBA_8K;
3086 break;
3087 case e1000_ich9lan:
3088 case e1000_ich10lan:
3089 /* Boost Receive side for jumbo frames */
3090 if (adapter->hw.mac.max_frame_size > 4096)
3091 pba = E1000_PBA_14K;
3092 else
3093 pba = E1000_PBA_10K;
3094 break;
3095 case e1000_pchlan:
3096 case e1000_pch2lan:
3097 case e1000_pch_lpt:
3098 case e1000_pch_spt:
3099 case e1000_pch_cnp:
3100 pba = E1000_PBA_26K;
3101 break;
3102 default:
3103 if (adapter->hw.mac.max_frame_size > 8192)
3104 pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3105 else
3106 pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3107 }
3108 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3109
3110 /*
3111 * These parameters control the automatic generation (Tx) and
3112 * response (Rx) to Ethernet PAUSE frames.
3113 * - High water mark should allow for at least two frames to be
3114 * received after sending an XOFF.
3115 * - Low water mark works best when it is very near the high water mark.
3116 * This allows the receiver to restart by sending XON when it has
3117 * drained a bit. Here we use an arbitrary value of 1500 which will
3118 * restart after one full frame is pulled from the buffer. There
3119 * could be several smaller frames in the buffer and if so they will
3120 * not trigger the XON until their total number reduces the buffer
3121 * by 1500.
3122 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3123 */
3124 rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3125 hw->fc.high_water = rx_buffer_size -
3126 roundup2(adapter->hw.mac.max_frame_size, 1024);
3127 hw->fc.low_water = hw->fc.high_water - 1500;
3128
3129 if (adapter->fc) /* locally set flow control value? */
3130 hw->fc.requested_mode = adapter->fc;
3131 else
3132 hw->fc.requested_mode = e1000_fc_full;
3133
3134 if (hw->mac.type == e1000_80003es2lan)
3135 hw->fc.pause_time = 0xFFFF;
3136 else
3137 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3138
3139 hw->fc.send_xon = TRUE;
3140
3141 /* Device specific overrides/settings */
3142 switch (hw->mac.type) {
3143 case e1000_pchlan:
3144 /* Workaround: no TX flow ctrl for PCH */
3145 hw->fc.requested_mode = e1000_fc_rx_pause;
3146 hw->fc.pause_time = 0xFFFF; /* override */
3147 if (if_getmtu(ifp) > ETHERMTU) {
3148 hw->fc.high_water = 0x3500;
3149 hw->fc.low_water = 0x1500;
3150 } else {
3151 hw->fc.high_water = 0x5000;
3152 hw->fc.low_water = 0x3000;
3153 }
3154 hw->fc.refresh_time = 0x1000;
3155 break;
3156 case e1000_pch2lan:
3157 case e1000_pch_lpt:
3158 case e1000_pch_spt:
3159 case e1000_pch_cnp:
3160 hw->fc.high_water = 0x5C20;
3161 hw->fc.low_water = 0x5048;
3162 hw->fc.pause_time = 0x0650;
3163 hw->fc.refresh_time = 0x0400;
3164 /* Jumbos need adjusted PBA */
3165 if (if_getmtu(ifp) > ETHERMTU)
3166 E1000_WRITE_REG(hw, E1000_PBA, 12);
3167 else
3168 E1000_WRITE_REG(hw, E1000_PBA, 26);
3169 break;
3170 case e1000_ich9lan:
3171 case e1000_ich10lan:
3172 if (if_getmtu(ifp) > ETHERMTU) {
3173 hw->fc.high_water = 0x2800;
3174 hw->fc.low_water = hw->fc.high_water - 8;
3175 break;
3176 }
3177 /* else fall thru */
3178 default:
3179 if (hw->mac.type == e1000_80003es2lan)
3180 hw->fc.pause_time = 0xFFFF;
3181 break;
3182 }
3183
3184 /* I219 needs some special flushing to avoid hangs */
3185 if (hw->mac.type == e1000_pch_spt)
3186 em_flush_desc_rings(adapter);
3187
3188 /* Issue a global reset */
3189 e1000_reset_hw(hw);
3190 E1000_WRITE_REG(hw, E1000_WUC, 0);
3191 em_disable_aspm(adapter);
3192 /* and a re-init */
3193 if (e1000_init_hw(hw) < 0) {
3194 device_printf(dev, "Hardware Initialization Failed\n");
3195 return;
3196 }
3197
3198 E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3199 e1000_get_phy_info(hw);
3200 e1000_check_for_link(hw);
3201 return;
3202 }
3203
3204 /*********************************************************************
3205 *
3206 * Setup networking device structure and register an interface.
3207 *
3208 **********************************************************************/
3209 static int
em_setup_interface(device_t dev,struct adapter * adapter)3210 em_setup_interface(device_t dev, struct adapter *adapter)
3211 {
3212 if_t ifp;
3213
3214 INIT_DEBUGOUT("em_setup_interface: begin");
3215
3216 ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3217 if (ifp == 0) {
3218 device_printf(dev, "can not allocate ifnet structure\n");
3219 return (-1);
3220 }
3221 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3222 if_setdev(ifp, dev);
3223 if_setinitfn(ifp, em_init);
3224 if_setsoftc(ifp, adapter);
3225 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3226 if_setioctlfn(ifp, em_ioctl);
3227 if_setgetcounterfn(ifp, em_get_counter);
3228
3229 /* TSO parameters */
3230 ifp->if_hw_tsomax = IP_MAXPACKET;
3231 /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3232 ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3233 ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3234
3235 #ifdef EM_MULTIQUEUE
3236 /* Multiqueue stack interface */
3237 if_settransmitfn(ifp, em_mq_start);
3238 if_setqflushfn(ifp, em_qflush);
3239 #else
3240 if_setstartfn(ifp, em_start);
3241 if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3242 if_setsendqready(ifp);
3243 #endif
3244
3245 ether_ifattach(ifp, adapter->hw.mac.addr);
3246
3247 if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM);
3248 if_setcapenable(ifp, if_getcapabilities(ifp));
3249
3250 /*
3251 * Tell the upper layer(s) we
3252 * support full VLAN capability
3253 */
3254 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3255 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3256 IFCAP_VLAN_MTU, 0);
3257 if_setcapenablebit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU, 0);
3258
3259 /*
3260 * We don't enable IFCAP_{TSO4,VLAN_HWTSO} by default because:
3261 * - Although the silicon bug of TSO only working at gigabit speed is
3262 * worked around in em_update_link_status() by selectively setting
3263 * CSUM_IP_TSO, we cannot atomically flush already queued TSO-using
3264 * descriptors. Thus, such descriptors may still cause the MAC to
3265 * hang and, consequently, TSO is only safe to be used in setups
3266 * where the link isn't expected to switch from gigabit to lower
3267 * speeds.
3268 * - Similarly, there's currently no way to trigger a reconfiguration
3269 * of vlan(4) when the state of IFCAP_VLAN_HWTSO support changes at
3270 * runtime. Therefore, IFCAP_VLAN_HWTSO also only is safe to use
3271 * when link speed changes are not to be expected.
3272 * - Despite all the workarounds for TSO-related silicon bugs, at
3273 * least 82579 still may hang at gigabit speed with IFCAP_TSO4.
3274 */
3275 if_setcapabilitiesbit(ifp, IFCAP_TSO4 | IFCAP_VLAN_HWTSO, 0);
3276
3277 /*
3278 ** Don't turn this on by default, if vlans are
3279 ** created on another pseudo device (eg. lagg)
3280 ** then vlan events are not passed thru, breaking
3281 ** operation, but with HW FILTER off it works. If
3282 ** using vlans directly on the em driver you can
3283 ** enable this and get full hardware tag filtering.
3284 */
3285 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3286
3287 #ifdef DEVICE_POLLING
3288 if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3289 #endif
3290
3291 /* Enable only WOL MAGIC by default */
3292 if (adapter->wol) {
3293 if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3294 if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3295 }
3296
3297 /*
3298 * Specify the media types supported by this adapter and register
3299 * callbacks to update media and link information
3300 */
3301 ifmedia_init(&adapter->media, IFM_IMASK,
3302 em_media_change, em_media_status);
3303 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3304 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3305 u_char fiber_type = IFM_1000_SX; /* default type */
3306
3307 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3308 0, NULL);
3309 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3310 } else {
3311 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3312 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3313 0, NULL);
3314 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3315 0, NULL);
3316 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3317 0, NULL);
3318 if (adapter->hw.phy.type != e1000_phy_ife) {
3319 ifmedia_add(&adapter->media,
3320 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3321 ifmedia_add(&adapter->media,
3322 IFM_ETHER | IFM_1000_T, 0, NULL);
3323 }
3324 }
3325 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3326 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3327 return (0);
3328 }
3329
3330
3331 /*
3332 * Manage DMA'able memory.
3333 */
3334 static void
em_dmamap_cb(void * arg,bus_dma_segment_t * segs,int nseg,int error)3335 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3336 {
3337 if (error)
3338 return;
3339 *(bus_addr_t *) arg = segs[0].ds_addr;
3340 }
3341
3342 static int
em_dma_malloc(struct adapter * adapter,bus_size_t size,struct em_dma_alloc * dma,int mapflags)3343 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3344 struct em_dma_alloc *dma, int mapflags)
3345 {
3346 int error;
3347
3348 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3349 EM_DBA_ALIGN, 0, /* alignment, bounds */
3350 BUS_SPACE_MAXADDR, /* lowaddr */
3351 BUS_SPACE_MAXADDR, /* highaddr */
3352 NULL, NULL, /* filter, filterarg */
3353 size, /* maxsize */
3354 1, /* nsegments */
3355 size, /* maxsegsize */
3356 0, /* flags */
3357 NULL, /* lockfunc */
3358 NULL, /* lockarg */
3359 &dma->dma_tag);
3360 if (error) {
3361 device_printf(adapter->dev,
3362 "%s: bus_dma_tag_create failed: %d\n",
3363 __func__, error);
3364 goto fail_0;
3365 }
3366
3367 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3368 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3369 if (error) {
3370 device_printf(adapter->dev,
3371 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3372 __func__, (uintmax_t)size, error);
3373 goto fail_2;
3374 }
3375
3376 dma->dma_paddr = 0;
3377 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3378 size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3379 if (error || dma->dma_paddr == 0) {
3380 device_printf(adapter->dev,
3381 "%s: bus_dmamap_load failed: %d\n",
3382 __func__, error);
3383 goto fail_3;
3384 }
3385
3386 return (0);
3387
3388 fail_3:
3389 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3390 fail_2:
3391 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3392 bus_dma_tag_destroy(dma->dma_tag);
3393 fail_0:
3394 dma->dma_tag = NULL;
3395
3396 return (error);
3397 }
3398
3399 static void
em_dma_free(struct adapter * adapter,struct em_dma_alloc * dma)3400 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3401 {
3402 if (dma->dma_tag == NULL)
3403 return;
3404 if (dma->dma_paddr != 0) {
3405 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3406 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3407 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3408 dma->dma_paddr = 0;
3409 }
3410 if (dma->dma_vaddr != NULL) {
3411 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3412 dma->dma_vaddr = NULL;
3413 }
3414 bus_dma_tag_destroy(dma->dma_tag);
3415 dma->dma_tag = NULL;
3416 }
3417
3418
3419 /*********************************************************************
3420 *
3421 * Allocate memory for the transmit and receive rings, and then
3422 * the descriptors associated with each, called only once at attach.
3423 *
3424 **********************************************************************/
3425 static int
em_allocate_queues(struct adapter * adapter)3426 em_allocate_queues(struct adapter *adapter)
3427 {
3428 device_t dev = adapter->dev;
3429 struct tx_ring *txr = NULL;
3430 struct rx_ring *rxr = NULL;
3431 int rsize, tsize, error = E1000_SUCCESS;
3432 int txconf = 0, rxconf = 0;
3433
3434
3435 /* Allocate the TX ring struct memory */
3436 if (!(adapter->tx_rings =
3437 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3438 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3439 device_printf(dev, "Unable to allocate TX ring memory\n");
3440 error = ENOMEM;
3441 goto fail;
3442 }
3443
3444 /* Now allocate the RX */
3445 if (!(adapter->rx_rings =
3446 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3447 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3448 device_printf(dev, "Unable to allocate RX ring memory\n");
3449 error = ENOMEM;
3450 goto rx_fail;
3451 }
3452
3453 tsize = roundup2(adapter->num_tx_desc *
3454 sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3455 /*
3456 * Now set up the TX queues, txconf is needed to handle the
3457 * possibility that things fail midcourse and we need to
3458 * undo memory gracefully
3459 */
3460 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3461 /* Set up some basics */
3462 txr = &adapter->tx_rings[i];
3463 txr->adapter = adapter;
3464 txr->me = i;
3465
3466 /* Initialize the TX lock */
3467 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3468 device_get_nameunit(dev), txr->me);
3469 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3470
3471 if (em_dma_malloc(adapter, tsize,
3472 &txr->txdma, BUS_DMA_NOWAIT)) {
3473 device_printf(dev,
3474 "Unable to allocate TX Descriptor memory\n");
3475 error = ENOMEM;
3476 goto err_tx_desc;
3477 }
3478 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3479 bzero((void *)txr->tx_base, tsize);
3480
3481 if (em_allocate_transmit_buffers(txr)) {
3482 device_printf(dev,
3483 "Critical Failure setting up transmit buffers\n");
3484 error = ENOMEM;
3485 goto err_tx_desc;
3486 }
3487 #if __FreeBSD_version >= 800000
3488 /* Allocate a buf ring */
3489 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3490 M_WAITOK, &txr->tx_mtx);
3491 #endif
3492 }
3493
3494 /*
3495 * Next the RX queues...
3496 */
3497 rsize = roundup2(adapter->num_rx_desc *
3498 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3499 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3500 rxr = &adapter->rx_rings[i];
3501 rxr->adapter = adapter;
3502 rxr->me = i;
3503
3504 /* Initialize the RX lock */
3505 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3506 device_get_nameunit(dev), txr->me);
3507 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3508
3509 if (em_dma_malloc(adapter, rsize,
3510 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3511 device_printf(dev,
3512 "Unable to allocate RxDescriptor memory\n");
3513 error = ENOMEM;
3514 goto err_rx_desc;
3515 }
3516 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3517 bzero((void *)rxr->rx_base, rsize);
3518
3519 /* Allocate receive buffers for the ring*/
3520 if (em_allocate_receive_buffers(rxr)) {
3521 device_printf(dev,
3522 "Critical Failure setting up receive buffers\n");
3523 error = ENOMEM;
3524 goto err_rx_desc;
3525 }
3526 }
3527
3528 return (0);
3529
3530 err_rx_desc:
3531 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3532 em_dma_free(adapter, &rxr->rxdma);
3533 err_tx_desc:
3534 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3535 em_dma_free(adapter, &txr->txdma);
3536 free(adapter->rx_rings, M_DEVBUF);
3537 rx_fail:
3538 #if __FreeBSD_version >= 800000
3539 buf_ring_free(txr->br, M_DEVBUF);
3540 #endif
3541 free(adapter->tx_rings, M_DEVBUF);
3542 fail:
3543 return (error);
3544 }
3545
3546
3547 /*********************************************************************
3548 *
3549 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3550 * the information needed to transmit a packet on the wire. This is
3551 * called only once at attach, setup is done every reset.
3552 *
3553 **********************************************************************/
3554 static int
em_allocate_transmit_buffers(struct tx_ring * txr)3555 em_allocate_transmit_buffers(struct tx_ring *txr)
3556 {
3557 struct adapter *adapter = txr->adapter;
3558 device_t dev = adapter->dev;
3559 struct em_txbuffer *txbuf;
3560 int error, i;
3561
3562 /*
3563 * Setup DMA descriptor areas.
3564 */
3565 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3566 1, 0, /* alignment, bounds */
3567 BUS_SPACE_MAXADDR, /* lowaddr */
3568 BUS_SPACE_MAXADDR, /* highaddr */
3569 NULL, NULL, /* filter, filterarg */
3570 EM_TSO_SIZE, /* maxsize */
3571 EM_MAX_SCATTER, /* nsegments */
3572 PAGE_SIZE, /* maxsegsize */
3573 0, /* flags */
3574 NULL, /* lockfunc */
3575 NULL, /* lockfuncarg */
3576 &txr->txtag))) {
3577 device_printf(dev,"Unable to allocate TX DMA tag\n");
3578 goto fail;
3579 }
3580
3581 if (!(txr->tx_buffers =
3582 (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3583 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3584 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3585 error = ENOMEM;
3586 goto fail;
3587 }
3588
3589 /* Create the descriptor buffer dma maps */
3590 txbuf = txr->tx_buffers;
3591 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3592 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3593 if (error != 0) {
3594 device_printf(dev, "Unable to create TX DMA map\n");
3595 goto fail;
3596 }
3597 }
3598
3599 return 0;
3600 fail:
3601 /* We free all, it handles case where we are in the middle */
3602 em_free_transmit_structures(adapter);
3603 return (error);
3604 }
3605
3606 /*********************************************************************
3607 *
3608 * Initialize a transmit ring.
3609 *
3610 **********************************************************************/
3611 static void
em_setup_transmit_ring(struct tx_ring * txr)3612 em_setup_transmit_ring(struct tx_ring *txr)
3613 {
3614 struct adapter *adapter = txr->adapter;
3615 struct em_txbuffer *txbuf;
3616 int i;
3617 #ifdef DEV_NETMAP
3618 struct netmap_slot *slot;
3619 struct netmap_adapter *na = netmap_getna(adapter->ifp);
3620 #endif /* DEV_NETMAP */
3621
3622 /* Clear the old descriptor contents */
3623 EM_TX_LOCK(txr);
3624 #ifdef DEV_NETMAP
3625 slot = netmap_reset(na, NR_TX, txr->me, 0);
3626 #endif /* DEV_NETMAP */
3627
3628 bzero((void *)txr->tx_base,
3629 (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3630 /* Reset indices */
3631 txr->next_avail_desc = 0;
3632 txr->next_to_clean = 0;
3633
3634 /* Free any existing tx buffers. */
3635 txbuf = txr->tx_buffers;
3636 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3637 if (txbuf->m_head != NULL) {
3638 bus_dmamap_sync(txr->txtag, txbuf->map,
3639 BUS_DMASYNC_POSTWRITE);
3640 bus_dmamap_unload(txr->txtag, txbuf->map);
3641 m_freem(txbuf->m_head);
3642 txbuf->m_head = NULL;
3643 }
3644 #ifdef DEV_NETMAP
3645 if (slot) {
3646 int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
3647 uint64_t paddr;
3648 void *addr;
3649
3650 addr = PNMB(na, slot + si, &paddr);
3651 txr->tx_base[i].buffer_addr = htole64(paddr);
3652 /* reload the map for netmap mode */
3653 netmap_load_map(na, txr->txtag, txbuf->map, addr);
3654 }
3655 #endif /* DEV_NETMAP */
3656
3657 /* clear the watch index */
3658 txbuf->next_eop = -1;
3659 }
3660
3661 /* Set number of descriptors available */
3662 txr->tx_avail = adapter->num_tx_desc;
3663 txr->busy = EM_TX_IDLE;
3664
3665 /* Clear checksum offload context. */
3666 txr->last_hw_offload = 0;
3667 txr->last_hw_ipcss = 0;
3668 txr->last_hw_ipcso = 0;
3669 txr->last_hw_tucss = 0;
3670 txr->last_hw_tucso = 0;
3671
3672 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3673 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3674 EM_TX_UNLOCK(txr);
3675 }
3676
3677 /*********************************************************************
3678 *
3679 * Initialize all transmit rings.
3680 *
3681 **********************************************************************/
3682 static void
em_setup_transmit_structures(struct adapter * adapter)3683 em_setup_transmit_structures(struct adapter *adapter)
3684 {
3685 struct tx_ring *txr = adapter->tx_rings;
3686
3687 for (int i = 0; i < adapter->num_queues; i++, txr++)
3688 em_setup_transmit_ring(txr);
3689
3690 return;
3691 }
3692
3693 /*********************************************************************
3694 *
3695 * Enable transmit unit.
3696 *
3697 **********************************************************************/
3698 static void
em_initialize_transmit_unit(struct adapter * adapter)3699 em_initialize_transmit_unit(struct adapter *adapter)
3700 {
3701 struct tx_ring *txr = adapter->tx_rings;
3702 struct e1000_hw *hw = &adapter->hw;
3703 u32 tctl, txdctl = 0, tarc, tipg = 0;
3704
3705 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3706
3707 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3708 u64 bus_addr = txr->txdma.dma_paddr;
3709 /* Base and Len of TX Ring */
3710 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3711 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3712 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3713 (u32)(bus_addr >> 32));
3714 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3715 (u32)bus_addr);
3716 /* Init the HEAD/TAIL indices */
3717 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3718 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3719
3720 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3721 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3722 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3723
3724 txr->busy = EM_TX_IDLE;
3725 txdctl = 0; /* clear txdctl */
3726 txdctl |= 0x1f; /* PTHRESH */
3727 txdctl |= 1 << 8; /* HTHRESH */
3728 txdctl |= 1 << 16;/* WTHRESH */
3729 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3730 txdctl |= E1000_TXDCTL_GRAN;
3731 txdctl |= 1 << 25; /* LWTHRESH */
3732
3733 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3734 }
3735
3736 /* Set the default values for the Tx Inter Packet Gap timer */
3737 switch (adapter->hw.mac.type) {
3738 case e1000_80003es2lan:
3739 tipg = DEFAULT_82543_TIPG_IPGR1;
3740 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3741 E1000_TIPG_IPGR2_SHIFT;
3742 break;
3743 default:
3744 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3745 (adapter->hw.phy.media_type ==
3746 e1000_media_type_internal_serdes))
3747 tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3748 else
3749 tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3750 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3751 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3752 }
3753
3754 E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3755 E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3756
3757 if(adapter->hw.mac.type >= e1000_82540)
3758 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3759 adapter->tx_abs_int_delay.value);
3760
3761 if ((adapter->hw.mac.type == e1000_82571) ||
3762 (adapter->hw.mac.type == e1000_82572)) {
3763 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3764 tarc |= TARC_SPEED_MODE_BIT;
3765 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3766 } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3767 /* errata: program both queues to unweighted RR */
3768 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3769 tarc |= 1;
3770 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3771 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3772 tarc |= 1;
3773 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3774 } else if (adapter->hw.mac.type == e1000_82574) {
3775 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3776 tarc |= TARC_ERRATA_BIT;
3777 if ( adapter->num_queues > 1) {
3778 tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3779 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3780 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3781 } else
3782 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3783 }
3784
3785 adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3786 if (adapter->tx_int_delay.value > 0)
3787 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3788
3789 /* Program the Transmit Control Register */
3790 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3791 tctl &= ~E1000_TCTL_CT;
3792 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3793 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3794
3795 if (adapter->hw.mac.type >= e1000_82571)
3796 tctl |= E1000_TCTL_MULR;
3797
3798 /* This write will effectively turn on the transmit unit. */
3799 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3800
3801 /* SPT and KBL errata workarounds */
3802 if (hw->mac.type == e1000_pch_spt) {
3803 u32 reg;
3804 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3805 reg |= E1000_RCTL_RDMTS_HEX;
3806 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3807 /* i218-i219 Specification Update 1.5.4.5 */
3808 reg = E1000_READ_REG(hw, E1000_TARC(0));
3809 reg &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
3810 reg |= E1000_TARC0_CB_MULTIQ_2_REQ;
3811 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3812 }
3813 }
3814
3815
3816 /*********************************************************************
3817 *
3818 * Free all transmit rings.
3819 *
3820 **********************************************************************/
3821 static void
em_free_transmit_structures(struct adapter * adapter)3822 em_free_transmit_structures(struct adapter *adapter)
3823 {
3824 struct tx_ring *txr = adapter->tx_rings;
3825
3826 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3827 EM_TX_LOCK(txr);
3828 em_free_transmit_buffers(txr);
3829 em_dma_free(adapter, &txr->txdma);
3830 EM_TX_UNLOCK(txr);
3831 EM_TX_LOCK_DESTROY(txr);
3832 }
3833
3834 free(adapter->tx_rings, M_DEVBUF);
3835 }
3836
3837 /*********************************************************************
3838 *
3839 * Free transmit ring related data structures.
3840 *
3841 **********************************************************************/
3842 static void
em_free_transmit_buffers(struct tx_ring * txr)3843 em_free_transmit_buffers(struct tx_ring *txr)
3844 {
3845 struct adapter *adapter = txr->adapter;
3846 struct em_txbuffer *txbuf;
3847
3848 INIT_DEBUGOUT("free_transmit_ring: begin");
3849
3850 if (txr->tx_buffers == NULL)
3851 return;
3852
3853 for (int i = 0; i < adapter->num_tx_desc; i++) {
3854 txbuf = &txr->tx_buffers[i];
3855 if (txbuf->m_head != NULL) {
3856 bus_dmamap_sync(txr->txtag, txbuf->map,
3857 BUS_DMASYNC_POSTWRITE);
3858 bus_dmamap_unload(txr->txtag,
3859 txbuf->map);
3860 m_freem(txbuf->m_head);
3861 txbuf->m_head = NULL;
3862 if (txbuf->map != NULL) {
3863 bus_dmamap_destroy(txr->txtag,
3864 txbuf->map);
3865 txbuf->map = NULL;
3866 }
3867 } else if (txbuf->map != NULL) {
3868 bus_dmamap_unload(txr->txtag,
3869 txbuf->map);
3870 bus_dmamap_destroy(txr->txtag,
3871 txbuf->map);
3872 txbuf->map = NULL;
3873 }
3874 }
3875 #if __FreeBSD_version >= 800000
3876 if (txr->br != NULL)
3877 buf_ring_free(txr->br, M_DEVBUF);
3878 #endif
3879 if (txr->tx_buffers != NULL) {
3880 free(txr->tx_buffers, M_DEVBUF);
3881 txr->tx_buffers = NULL;
3882 }
3883 if (txr->txtag != NULL) {
3884 bus_dma_tag_destroy(txr->txtag);
3885 txr->txtag = NULL;
3886 }
3887 return;
3888 }
3889
3890
3891 /*********************************************************************
3892 * The offload context is protocol specific (TCP/UDP) and thus
3893 * only needs to be set when the protocol changes. The occasion
3894 * of a context change can be a performance detriment, and
3895 * might be better just disabled. The reason arises in the way
3896 * in which the controller supports pipelined requests from the
3897 * Tx data DMA. Up to four requests can be pipelined, and they may
3898 * belong to the same packet or to multiple packets. However all
3899 * requests for one packet are issued before a request is issued
3900 * for a subsequent packet and if a request for the next packet
3901 * requires a context change, that request will be stalled
3902 * until the previous request completes. This means setting up
3903 * a new context effectively disables pipelined Tx data DMA which
3904 * in turn greatly slow down performance to send small sized
3905 * frames.
3906 **********************************************************************/
3907 static void
em_transmit_checksum_setup(struct tx_ring * txr,struct mbuf * mp,int ip_off,struct ip * ip,u32 * txd_upper,u32 * txd_lower)3908 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3909 struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3910 {
3911 struct adapter *adapter = txr->adapter;
3912 struct e1000_context_desc *TXD = NULL;
3913 struct em_txbuffer *tx_buffer;
3914 int cur, hdr_len;
3915 u32 cmd = 0;
3916 u16 offload = 0;
3917 u8 ipcso, ipcss, tucso, tucss;
3918
3919 ipcss = ipcso = tucss = tucso = 0;
3920 hdr_len = ip_off + (ip->ip_hl << 2);
3921 cur = txr->next_avail_desc;
3922
3923 /* Setup of IP header checksum. */
3924 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3925 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3926 offload |= CSUM_IP;
3927 ipcss = ip_off;
3928 ipcso = ip_off + offsetof(struct ip, ip_sum);
3929 /*
3930 * Start offset for header checksum calculation.
3931 * End offset for header checksum calculation.
3932 * Offset of place to put the checksum.
3933 */
3934 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3935 TXD->lower_setup.ip_fields.ipcss = ipcss;
3936 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3937 TXD->lower_setup.ip_fields.ipcso = ipcso;
3938 cmd |= E1000_TXD_CMD_IP;
3939 }
3940
3941 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3942 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3943 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3944 offload |= CSUM_TCP;
3945 tucss = hdr_len;
3946 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3947 /*
3948 * The 82574L can only remember the *last* context used
3949 * regardless of queue that it was use for. We cannot reuse
3950 * contexts on this hardware platform and must generate a new
3951 * context every time. 82574L hardware spec, section 7.2.6,
3952 * second note.
3953 */
3954 if (adapter->num_queues < 2) {
3955 /*
3956 * Setting up new checksum offload context for every
3957 * frames takes a lot of processing time for hardware.
3958 * This also reduces performance a lot for small sized
3959 * frames so avoid it if driver can use previously
3960 * configured checksum offload context.
3961 */
3962 if (txr->last_hw_offload == offload) {
3963 if (offload & CSUM_IP) {
3964 if (txr->last_hw_ipcss == ipcss &&
3965 txr->last_hw_ipcso == ipcso &&
3966 txr->last_hw_tucss == tucss &&
3967 txr->last_hw_tucso == tucso)
3968 return;
3969 } else {
3970 if (txr->last_hw_tucss == tucss &&
3971 txr->last_hw_tucso == tucso)
3972 return;
3973 }
3974 }
3975 txr->last_hw_offload = offload;
3976 txr->last_hw_tucss = tucss;
3977 txr->last_hw_tucso = tucso;
3978 }
3979 /*
3980 * Start offset for payload checksum calculation.
3981 * End offset for payload checksum calculation.
3982 * Offset of place to put the checksum.
3983 */
3984 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3985 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3986 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3987 TXD->upper_setup.tcp_fields.tucso = tucso;
3988 cmd |= E1000_TXD_CMD_TCP;
3989 } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3990 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3991 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3992 tucss = hdr_len;
3993 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3994 /*
3995 * The 82574L can only remember the *last* context used
3996 * regardless of queue that it was use for. We cannot reuse
3997 * contexts on this hardware platform and must generate a new
3998 * context every time. 82574L hardware spec, section 7.2.6,
3999 * second note.
4000 */
4001 if (adapter->num_queues < 2) {
4002 /*
4003 * Setting up new checksum offload context for every
4004 * frames takes a lot of processing time for hardware.
4005 * This also reduces performance a lot for small sized
4006 * frames so avoid it if driver can use previously
4007 * configured checksum offload context.
4008 */
4009 if (txr->last_hw_offload == offload) {
4010 if (offload & CSUM_IP) {
4011 if (txr->last_hw_ipcss == ipcss &&
4012 txr->last_hw_ipcso == ipcso &&
4013 txr->last_hw_tucss == tucss &&
4014 txr->last_hw_tucso == tucso)
4015 return;
4016 } else {
4017 if (txr->last_hw_tucss == tucss &&
4018 txr->last_hw_tucso == tucso)
4019 return;
4020 }
4021 }
4022 txr->last_hw_offload = offload;
4023 txr->last_hw_tucss = tucss;
4024 txr->last_hw_tucso = tucso;
4025 }
4026 /*
4027 * Start offset for header checksum calculation.
4028 * End offset for header checksum calculation.
4029 * Offset of place to put the checksum.
4030 */
4031 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4032 TXD->upper_setup.tcp_fields.tucss = tucss;
4033 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4034 TXD->upper_setup.tcp_fields.tucso = tucso;
4035 }
4036
4037 if (offload & CSUM_IP) {
4038 txr->last_hw_ipcss = ipcss;
4039 txr->last_hw_ipcso = ipcso;
4040 }
4041
4042 TXD->tcp_seg_setup.data = htole32(0);
4043 TXD->cmd_and_length =
4044 htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4045 tx_buffer = &txr->tx_buffers[cur];
4046 tx_buffer->m_head = NULL;
4047 tx_buffer->next_eop = -1;
4048
4049 if (++cur == adapter->num_tx_desc)
4050 cur = 0;
4051
4052 txr->tx_avail--;
4053 txr->next_avail_desc = cur;
4054 }
4055
4056
4057 /**********************************************************************
4058 *
4059 * Setup work for hardware segmentation offload (TSO)
4060 *
4061 **********************************************************************/
4062 static void
em_tso_setup(struct tx_ring * txr,struct mbuf * mp,int ip_off,struct ip * ip,struct tcphdr * tp,u32 * txd_upper,u32 * txd_lower)4063 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4064 struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4065 {
4066 struct adapter *adapter = txr->adapter;
4067 struct e1000_context_desc *TXD;
4068 struct em_txbuffer *tx_buffer;
4069 int cur, hdr_len;
4070
4071 /*
4072 * In theory we can use the same TSO context if and only if
4073 * frame is the same type(IP/TCP) and the same MSS. However
4074 * checking whether a frame has the same IP/TCP structure is
4075 * hard thing so just ignore that and always restablish a
4076 * new TSO context.
4077 */
4078 hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4079 *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */
4080 E1000_TXD_DTYP_D | /* Data descr type */
4081 E1000_TXD_CMD_TSE); /* Do TSE on this packet */
4082
4083 /* IP and/or TCP header checksum calculation and insertion. */
4084 *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4085
4086 cur = txr->next_avail_desc;
4087 tx_buffer = &txr->tx_buffers[cur];
4088 TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4089
4090 /*
4091 * Start offset for header checksum calculation.
4092 * End offset for header checksum calculation.
4093 * Offset of place put the checksum.
4094 */
4095 TXD->lower_setup.ip_fields.ipcss = ip_off;
4096 TXD->lower_setup.ip_fields.ipcse =
4097 htole16(ip_off + (ip->ip_hl << 2) - 1);
4098 TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4099 /*
4100 * Start offset for payload checksum calculation.
4101 * End offset for payload checksum calculation.
4102 * Offset of place to put the checksum.
4103 */
4104 TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4105 TXD->upper_setup.tcp_fields.tucse = 0;
4106 TXD->upper_setup.tcp_fields.tucso =
4107 ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4108 /*
4109 * Payload size per packet w/o any headers.
4110 * Length of all headers up to payload.
4111 */
4112 TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4113 TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4114
4115 TXD->cmd_and_length = htole32(adapter->txd_cmd |
4116 E1000_TXD_CMD_DEXT | /* Extended descr */
4117 E1000_TXD_CMD_TSE | /* TSE context */
4118 E1000_TXD_CMD_IP | /* Do IP csum */
4119 E1000_TXD_CMD_TCP | /* Do TCP checksum */
4120 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4121
4122 tx_buffer->m_head = NULL;
4123 tx_buffer->next_eop = -1;
4124
4125 if (++cur == adapter->num_tx_desc)
4126 cur = 0;
4127
4128 txr->tx_avail--;
4129 txr->next_avail_desc = cur;
4130 txr->tx_tso = TRUE;
4131 }
4132
4133
4134 /**********************************************************************
4135 *
4136 * Examine each tx_buffer in the used queue. If the hardware is done
4137 * processing the packet then free associated resources. The
4138 * tx_buffer is put back on the free queue.
4139 *
4140 **********************************************************************/
4141 static void
em_txeof(struct tx_ring * txr)4142 em_txeof(struct tx_ring *txr)
4143 {
4144 struct adapter *adapter = txr->adapter;
4145 int first, last, done, processed;
4146 struct em_txbuffer *tx_buffer;
4147 struct e1000_tx_desc *tx_desc, *eop_desc;
4148 if_t ifp = adapter->ifp;
4149
4150 EM_TX_LOCK_ASSERT(txr);
4151 #ifdef DEV_NETMAP
4152 if (netmap_tx_irq(ifp, txr->me))
4153 return;
4154 #endif /* DEV_NETMAP */
4155
4156 /* No work, make sure hang detection is disabled */
4157 if (txr->tx_avail == adapter->num_tx_desc) {
4158 txr->busy = EM_TX_IDLE;
4159 return;
4160 }
4161
4162 processed = 0;
4163 first = txr->next_to_clean;
4164 tx_desc = &txr->tx_base[first];
4165 tx_buffer = &txr->tx_buffers[first];
4166 last = tx_buffer->next_eop;
4167 eop_desc = &txr->tx_base[last];
4168
4169 /*
4170 * What this does is get the index of the
4171 * first descriptor AFTER the EOP of the
4172 * first packet, that way we can do the
4173 * simple comparison on the inner while loop.
4174 */
4175 if (++last == adapter->num_tx_desc)
4176 last = 0;
4177 done = last;
4178
4179 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4180 BUS_DMASYNC_POSTREAD);
4181
4182 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4183 /* We clean the range of the packet */
4184 while (first != done) {
4185 tx_desc->upper.data = 0;
4186 tx_desc->lower.data = 0;
4187 tx_desc->buffer_addr = 0;
4188 ++txr->tx_avail;
4189 ++processed;
4190
4191 if (tx_buffer->m_head) {
4192 bus_dmamap_sync(txr->txtag,
4193 tx_buffer->map,
4194 BUS_DMASYNC_POSTWRITE);
4195 bus_dmamap_unload(txr->txtag,
4196 tx_buffer->map);
4197 m_freem(tx_buffer->m_head);
4198 tx_buffer->m_head = NULL;
4199 }
4200 tx_buffer->next_eop = -1;
4201
4202 if (++first == adapter->num_tx_desc)
4203 first = 0;
4204
4205 tx_buffer = &txr->tx_buffers[first];
4206 tx_desc = &txr->tx_base[first];
4207 }
4208 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4209 /* See if we can continue to the next packet */
4210 last = tx_buffer->next_eop;
4211 if (last != -1) {
4212 eop_desc = &txr->tx_base[last];
4213 /* Get new done point */
4214 if (++last == adapter->num_tx_desc) last = 0;
4215 done = last;
4216 } else
4217 break;
4218 }
4219 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4220 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4221
4222 txr->next_to_clean = first;
4223
4224 /*
4225 ** Hang detection: we know there's work outstanding
4226 ** or the entry return would have been taken, so no
4227 ** descriptor processed here indicates a potential hang.
4228 ** The local timer will examine this and do a reset if needed.
4229 */
4230 if (processed == 0) {
4231 if (txr->busy != EM_TX_HUNG)
4232 ++txr->busy;
4233 } else /* At least one descriptor was cleaned */
4234 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4235
4236 /*
4237 * If we have a minimum free, clear IFF_DRV_OACTIVE
4238 * to tell the stack that it is OK to send packets.
4239 * Notice that all writes of OACTIVE happen under the
4240 * TX lock which, with a single queue, guarantees
4241 * sanity.
4242 */
4243 if (txr->tx_avail >= EM_MAX_SCATTER) {
4244 if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4245 }
4246
4247 /* Disable hang detection if all clean */
4248 if (txr->tx_avail == adapter->num_tx_desc)
4249 txr->busy = EM_TX_IDLE;
4250 }
4251
4252 /*********************************************************************
4253 *
4254 * Refresh RX descriptor mbufs from system mbuf buffer pool.
4255 *
4256 **********************************************************************/
4257 static void
em_refresh_mbufs(struct rx_ring * rxr,int limit)4258 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4259 {
4260 struct adapter *adapter = rxr->adapter;
4261 struct mbuf *m;
4262 bus_dma_segment_t segs;
4263 struct em_rxbuffer *rxbuf;
4264 int i, j, error, nsegs;
4265 bool cleaned = FALSE;
4266
4267 i = j = rxr->next_to_refresh;
4268 /*
4269 ** Get one descriptor beyond
4270 ** our work mark to control
4271 ** the loop.
4272 */
4273 if (++j == adapter->num_rx_desc)
4274 j = 0;
4275
4276 while (j != limit) {
4277 rxbuf = &rxr->rx_buffers[i];
4278 if (rxbuf->m_head == NULL) {
4279 m = m_getjcl(M_NOWAIT, MT_DATA,
4280 M_PKTHDR, adapter->rx_mbuf_sz);
4281 /*
4282 ** If we have a temporary resource shortage
4283 ** that causes a failure, just abort refresh
4284 ** for now, we will return to this point when
4285 ** reinvoked from em_rxeof.
4286 */
4287 if (m == NULL)
4288 goto update;
4289 } else
4290 m = rxbuf->m_head;
4291
4292 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4293 m->m_flags |= M_PKTHDR;
4294 m->m_data = m->m_ext.ext_buf;
4295
4296 /* Use bus_dma machinery to setup the memory mapping */
4297 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4298 m, &segs, &nsegs, BUS_DMA_NOWAIT);
4299 if (error != 0) {
4300 printf("Refresh mbufs: hdr dmamap load"
4301 " failure - %d\n", error);
4302 m_free(m);
4303 rxbuf->m_head = NULL;
4304 goto update;
4305 }
4306 rxbuf->m_head = m;
4307 rxbuf->paddr = segs.ds_addr;
4308 bus_dmamap_sync(rxr->rxtag,
4309 rxbuf->map, BUS_DMASYNC_PREREAD);
4310 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4311 cleaned = TRUE;
4312
4313 i = j; /* Next is precalulated for us */
4314 rxr->next_to_refresh = i;
4315 /* Calculate next controlling index */
4316 if (++j == adapter->num_rx_desc)
4317 j = 0;
4318 }
4319 update:
4320 /*
4321 ** Update the tail pointer only if,
4322 ** and as far as we have refreshed.
4323 */
4324 if (cleaned)
4325 E1000_WRITE_REG(&adapter->hw,
4326 E1000_RDT(rxr->me), rxr->next_to_refresh);
4327
4328 return;
4329 }
4330
4331
4332 /*********************************************************************
4333 *
4334 * Allocate memory for rx_buffer structures. Since we use one
4335 * rx_buffer per received packet, the maximum number of rx_buffer's
4336 * that we'll need is equal to the number of receive descriptors
4337 * that we've allocated.
4338 *
4339 **********************************************************************/
4340 static int
em_allocate_receive_buffers(struct rx_ring * rxr)4341 em_allocate_receive_buffers(struct rx_ring *rxr)
4342 {
4343 struct adapter *adapter = rxr->adapter;
4344 device_t dev = adapter->dev;
4345 struct em_rxbuffer *rxbuf;
4346 int error;
4347
4348 rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4349 adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4350 if (rxr->rx_buffers == NULL) {
4351 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4352 return (ENOMEM);
4353 }
4354
4355 error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4356 1, 0, /* alignment, bounds */
4357 BUS_SPACE_MAXADDR, /* lowaddr */
4358 BUS_SPACE_MAXADDR, /* highaddr */
4359 NULL, NULL, /* filter, filterarg */
4360 MJUM9BYTES, /* maxsize */
4361 1, /* nsegments */
4362 MJUM9BYTES, /* maxsegsize */
4363 0, /* flags */
4364 NULL, /* lockfunc */
4365 NULL, /* lockarg */
4366 &rxr->rxtag);
4367 if (error) {
4368 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4369 __func__, error);
4370 goto fail;
4371 }
4372
4373 rxbuf = rxr->rx_buffers;
4374 for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4375 rxbuf = &rxr->rx_buffers[i];
4376 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4377 if (error) {
4378 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4379 __func__, error);
4380 goto fail;
4381 }
4382 }
4383
4384 return (0);
4385
4386 fail:
4387 em_free_receive_structures(adapter);
4388 return (error);
4389 }
4390
4391
4392 /*********************************************************************
4393 *
4394 * Initialize a receive ring and its buffers.
4395 *
4396 **********************************************************************/
4397 static int
em_setup_receive_ring(struct rx_ring * rxr)4398 em_setup_receive_ring(struct rx_ring *rxr)
4399 {
4400 struct adapter *adapter = rxr->adapter;
4401 struct em_rxbuffer *rxbuf;
4402 bus_dma_segment_t seg[1];
4403 int rsize, nsegs, error = 0;
4404 #ifdef DEV_NETMAP
4405 struct netmap_slot *slot;
4406 struct netmap_adapter *na = netmap_getna(adapter->ifp);
4407 #endif
4408
4409
4410 /* Clear the ring contents */
4411 EM_RX_LOCK(rxr);
4412 rsize = roundup2(adapter->num_rx_desc *
4413 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4414 bzero((void *)rxr->rx_base, rsize);
4415 #ifdef DEV_NETMAP
4416 slot = netmap_reset(na, NR_RX, rxr->me, 0);
4417 #endif
4418
4419 /*
4420 ** Free current RX buffer structs and their mbufs
4421 */
4422 for (int i = 0; i < adapter->num_rx_desc; i++) {
4423 rxbuf = &rxr->rx_buffers[i];
4424 if (rxbuf->m_head != NULL) {
4425 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4426 BUS_DMASYNC_POSTREAD);
4427 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4428 m_freem(rxbuf->m_head);
4429 rxbuf->m_head = NULL; /* mark as freed */
4430 }
4431 }
4432
4433 /* Now replenish the mbufs */
4434 for (int j = 0; j != adapter->num_rx_desc; ++j) {
4435 rxbuf = &rxr->rx_buffers[j];
4436 #ifdef DEV_NETMAP
4437 if (slot) {
4438 int si = netmap_idx_n2k(na->rx_rings[rxr->me], j);
4439 uint64_t paddr;
4440 void *addr;
4441
4442 addr = PNMB(na, slot + si, &paddr);
4443 netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4444 rxbuf->paddr = paddr;
4445 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4446 continue;
4447 }
4448 #endif /* DEV_NETMAP */
4449 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4450 M_PKTHDR, adapter->rx_mbuf_sz);
4451 if (rxbuf->m_head == NULL) {
4452 error = ENOBUFS;
4453 goto fail;
4454 }
4455 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4456 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4457 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4458
4459 /* Get the memory mapping */
4460 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4461 rxbuf->map, rxbuf->m_head, seg,
4462 &nsegs, BUS_DMA_NOWAIT);
4463 if (error != 0) {
4464 m_freem(rxbuf->m_head);
4465 rxbuf->m_head = NULL;
4466 goto fail;
4467 }
4468 bus_dmamap_sync(rxr->rxtag,
4469 rxbuf->map, BUS_DMASYNC_PREREAD);
4470
4471 rxbuf->paddr = seg[0].ds_addr;
4472 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4473 }
4474 rxr->next_to_check = 0;
4475 rxr->next_to_refresh = 0;
4476 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4477 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4478
4479 fail:
4480 EM_RX_UNLOCK(rxr);
4481 return (error);
4482 }
4483
4484 /*********************************************************************
4485 *
4486 * Initialize all receive rings.
4487 *
4488 **********************************************************************/
4489 static int
em_setup_receive_structures(struct adapter * adapter)4490 em_setup_receive_structures(struct adapter *adapter)
4491 {
4492 struct rx_ring *rxr = adapter->rx_rings;
4493 int q;
4494
4495 for (q = 0; q < adapter->num_queues; q++, rxr++)
4496 if (em_setup_receive_ring(rxr))
4497 goto fail;
4498
4499 return (0);
4500 fail:
4501 /*
4502 * Free RX buffers allocated so far, we will only handle
4503 * the rings that completed, the failing case will have
4504 * cleaned up for itself. 'q' failed, so its the terminus.
4505 */
4506 for (int i = 0; i < q; ++i) {
4507 rxr = &adapter->rx_rings[i];
4508 for (int n = 0; n < adapter->num_rx_desc; n++) {
4509 struct em_rxbuffer *rxbuf;
4510 rxbuf = &rxr->rx_buffers[n];
4511 if (rxbuf->m_head != NULL) {
4512 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4513 BUS_DMASYNC_POSTREAD);
4514 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4515 m_freem(rxbuf->m_head);
4516 rxbuf->m_head = NULL;
4517 }
4518 }
4519 rxr->next_to_check = 0;
4520 rxr->next_to_refresh = 0;
4521 }
4522
4523 return (ENOBUFS);
4524 }
4525
4526 /*********************************************************************
4527 *
4528 * Free all receive rings.
4529 *
4530 **********************************************************************/
4531 static void
em_free_receive_structures(struct adapter * adapter)4532 em_free_receive_structures(struct adapter *adapter)
4533 {
4534 struct rx_ring *rxr = adapter->rx_rings;
4535
4536 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4537 em_free_receive_buffers(rxr);
4538 /* Free the ring memory as well */
4539 em_dma_free(adapter, &rxr->rxdma);
4540 EM_RX_LOCK_DESTROY(rxr);
4541 }
4542
4543 free(adapter->rx_rings, M_DEVBUF);
4544 }
4545
4546
4547 /*********************************************************************
4548 *
4549 * Free receive ring data structures
4550 *
4551 **********************************************************************/
4552 static void
em_free_receive_buffers(struct rx_ring * rxr)4553 em_free_receive_buffers(struct rx_ring *rxr)
4554 {
4555 struct adapter *adapter = rxr->adapter;
4556 struct em_rxbuffer *rxbuf = NULL;
4557
4558 INIT_DEBUGOUT("free_receive_buffers: begin");
4559
4560 if (rxr->rx_buffers != NULL) {
4561 for (int i = 0; i < adapter->num_rx_desc; i++) {
4562 rxbuf = &rxr->rx_buffers[i];
4563 if (rxbuf->map != NULL) {
4564 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4565 BUS_DMASYNC_POSTREAD);
4566 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4567 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4568 }
4569 if (rxbuf->m_head != NULL) {
4570 m_freem(rxbuf->m_head);
4571 rxbuf->m_head = NULL;
4572 }
4573 }
4574 free(rxr->rx_buffers, M_DEVBUF);
4575 rxr->rx_buffers = NULL;
4576 rxr->next_to_check = 0;
4577 rxr->next_to_refresh = 0;
4578 }
4579
4580 if (rxr->rxtag != NULL) {
4581 bus_dma_tag_destroy(rxr->rxtag);
4582 rxr->rxtag = NULL;
4583 }
4584
4585 return;
4586 }
4587
4588
4589 /*********************************************************************
4590 *
4591 * Enable receive unit.
4592 *
4593 **********************************************************************/
4594
4595 static void
em_initialize_receive_unit(struct adapter * adapter)4596 em_initialize_receive_unit(struct adapter *adapter)
4597 {
4598 struct rx_ring *rxr = adapter->rx_rings;
4599 if_t ifp = adapter->ifp;
4600 struct e1000_hw *hw = &adapter->hw;
4601 u32 rctl, rxcsum, rfctl;
4602
4603 INIT_DEBUGOUT("em_initialize_receive_units: begin");
4604
4605 /*
4606 * Make sure receives are disabled while setting
4607 * up the descriptor ring
4608 */
4609 rctl = E1000_READ_REG(hw, E1000_RCTL);
4610 /* Do not disable if ever enabled on this hardware */
4611 if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4612 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4613
4614 /* Setup the Receive Control Register */
4615 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4616 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4617 E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4618 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4619
4620 /* Do not store bad packets */
4621 rctl &= ~E1000_RCTL_SBP;
4622
4623 /* Enable Long Packet receive */
4624 if (if_getmtu(ifp) > ETHERMTU)
4625 rctl |= E1000_RCTL_LPE;
4626 else
4627 rctl &= ~E1000_RCTL_LPE;
4628
4629 /* Strip the CRC */
4630 if (!em_disable_crc_stripping)
4631 rctl |= E1000_RCTL_SECRC;
4632
4633 E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4634 adapter->rx_abs_int_delay.value);
4635
4636 E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4637 adapter->rx_int_delay.value);
4638 /*
4639 * Set the interrupt throttling rate. Value is calculated
4640 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4641 */
4642 E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4643
4644 /* Use extended rx descriptor formats */
4645 rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4646 rfctl |= E1000_RFCTL_EXTEN;
4647 /*
4648 ** When using MSIX interrupts we need to throttle
4649 ** using the EITR register (82574 only)
4650 */
4651 if (hw->mac.type == e1000_82574) {
4652 for (int i = 0; i < 4; i++)
4653 E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4654 DEFAULT_ITR);
4655 /* Disable accelerated acknowledge */
4656 rfctl |= E1000_RFCTL_ACK_DIS;
4657 }
4658 E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4659
4660 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4661 if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4662 #ifdef EM_MULTIQUEUE
4663 rxcsum |= E1000_RXCSUM_TUOFL |
4664 E1000_RXCSUM_IPOFL |
4665 E1000_RXCSUM_PCSD;
4666 #else
4667 rxcsum |= E1000_RXCSUM_TUOFL;
4668 #endif
4669 } else
4670 rxcsum &= ~E1000_RXCSUM_TUOFL;
4671
4672 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4673
4674 #ifdef EM_MULTIQUEUE
4675 #define RSSKEYLEN 10
4676 if (adapter->num_queues > 1) {
4677 uint8_t rss_key[4 * RSSKEYLEN];
4678 uint32_t reta = 0;
4679 int i;
4680
4681 /*
4682 * Configure RSS key
4683 */
4684 arc4rand(rss_key, sizeof(rss_key), 0);
4685 for (i = 0; i < RSSKEYLEN; ++i) {
4686 uint32_t rssrk = 0;
4687
4688 rssrk = EM_RSSRK_VAL(rss_key, i);
4689 E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4690 }
4691
4692 /*
4693 * Configure RSS redirect table in following fashion:
4694 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4695 */
4696 for (i = 0; i < sizeof(reta); ++i) {
4697 uint32_t q;
4698
4699 q = (i % adapter->num_queues) << 7;
4700 reta |= q << (8 * i);
4701 }
4702
4703 for (i = 0; i < 32; ++i) {
4704 E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4705 }
4706
4707 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4708 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4709 E1000_MRQC_RSS_FIELD_IPV4 |
4710 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4711 E1000_MRQC_RSS_FIELD_IPV6_EX |
4712 E1000_MRQC_RSS_FIELD_IPV6);
4713 }
4714 #endif
4715 /*
4716 ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4717 ** long latencies are observed, like Lenovo X60. This
4718 ** change eliminates the problem, but since having positive
4719 ** values in RDTR is a known source of problems on other
4720 ** platforms another solution is being sought.
4721 */
4722 if (hw->mac.type == e1000_82573)
4723 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4724
4725 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4726 /* Setup the Base and Length of the Rx Descriptor Ring */
4727 u64 bus_addr = rxr->rxdma.dma_paddr;
4728 u32 rdt = adapter->num_rx_desc - 1; /* default */
4729
4730 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4731 adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4732 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4733 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4734 /* Setup the Head and Tail Descriptor Pointers */
4735 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4736 #ifdef DEV_NETMAP
4737 /*
4738 * an init() while a netmap client is active must
4739 * preserve the rx buffers passed to userspace.
4740 */
4741 if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4742 struct netmap_adapter *na = netmap_getna(adapter->ifp);
4743 rdt -= nm_kr_rxspace(na->rx_rings[i]);
4744 }
4745 #endif /* DEV_NETMAP */
4746 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4747 }
4748
4749 /*
4750 * Set PTHRESH for improved jumbo performance
4751 * According to 10.2.5.11 of Intel 82574 Datasheet,
4752 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4753 * Only write to RXDCTL(1) if there is a need for different
4754 * settings.
4755 */
4756 if (((adapter->hw.mac.type == e1000_ich9lan) ||
4757 (adapter->hw.mac.type == e1000_pch2lan) ||
4758 (adapter->hw.mac.type == e1000_ich10lan)) &&
4759 (if_getmtu(ifp) > ETHERMTU)) {
4760 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4761 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4762 } else if (adapter->hw.mac.type == e1000_82574) {
4763 for (int i = 0; i < adapter->num_queues; i++) {
4764 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4765
4766 rxdctl |= 0x20; /* PTHRESH */
4767 rxdctl |= 4 << 8; /* HTHRESH */
4768 rxdctl |= 4 << 16;/* WTHRESH */
4769 rxdctl |= 1 << 24; /* Switch to granularity */
4770 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4771 }
4772 }
4773
4774 if (adapter->hw.mac.type >= e1000_pch2lan) {
4775 if (if_getmtu(ifp) > ETHERMTU)
4776 e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4777 else
4778 e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4779 }
4780
4781 /* Make sure VLAN Filters are off */
4782 rctl &= ~E1000_RCTL_VFE;
4783
4784 if (adapter->rx_mbuf_sz == MCLBYTES)
4785 rctl |= E1000_RCTL_SZ_2048;
4786 else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4787 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4788 else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4789 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4790
4791 /* ensure we clear use DTYPE of 00 here */
4792 rctl &= ~0x00000C00;
4793 /* Write out the settings */
4794 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4795
4796 return;
4797 }
4798
4799
4800 /*********************************************************************
4801 *
4802 * This routine executes in interrupt context. It replenishes
4803 * the mbufs in the descriptor and sends data which has been
4804 * dma'ed into host memory to upper layer.
4805 *
4806 * We loop at most count times if count is > 0, or until done if
4807 * count < 0.
4808 *
4809 * For polling we also now return the number of cleaned packets
4810 *********************************************************************/
4811 static bool
em_rxeof(struct rx_ring * rxr,int count,int * done)4812 em_rxeof(struct rx_ring *rxr, int count, int *done)
4813 {
4814 struct adapter *adapter = rxr->adapter;
4815 if_t ifp = adapter->ifp;
4816 struct mbuf *mp, *sendmp;
4817 u32 status = 0;
4818 u16 len;
4819 int i, processed, rxdone = 0;
4820 bool eop;
4821 union e1000_rx_desc_extended *cur;
4822
4823 EM_RX_LOCK(rxr);
4824
4825 /* Sync the ring */
4826 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4827 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4828
4829
4830 #ifdef DEV_NETMAP
4831 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4832 EM_RX_UNLOCK(rxr);
4833 return (FALSE);
4834 }
4835 #endif /* DEV_NETMAP */
4836
4837 for (i = rxr->next_to_check, processed = 0; count != 0;) {
4838 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4839 break;
4840
4841 cur = &rxr->rx_base[i];
4842 status = le32toh(cur->wb.upper.status_error);
4843 mp = sendmp = NULL;
4844
4845 if ((status & E1000_RXD_STAT_DD) == 0)
4846 break;
4847
4848 len = le16toh(cur->wb.upper.length);
4849 eop = (status & E1000_RXD_STAT_EOP) != 0;
4850
4851 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4852 (rxr->discard == TRUE)) {
4853 adapter->dropped_pkts++;
4854 ++rxr->rx_discarded;
4855 if (!eop) /* Catch subsequent segs */
4856 rxr->discard = TRUE;
4857 else
4858 rxr->discard = FALSE;
4859 em_rx_discard(rxr, i);
4860 goto next_desc;
4861 }
4862 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4863
4864 /* Assign correct length to the current fragment */
4865 mp = rxr->rx_buffers[i].m_head;
4866 mp->m_len = len;
4867
4868 /* Trigger for refresh */
4869 rxr->rx_buffers[i].m_head = NULL;
4870
4871 /* First segment? */
4872 if (rxr->fmp == NULL) {
4873 mp->m_pkthdr.len = len;
4874 rxr->fmp = rxr->lmp = mp;
4875 } else {
4876 /* Chain mbuf's together */
4877 mp->m_flags &= ~M_PKTHDR;
4878 rxr->lmp->m_next = mp;
4879 rxr->lmp = mp;
4880 rxr->fmp->m_pkthdr.len += len;
4881 }
4882
4883 if (eop) {
4884 --count;
4885 sendmp = rxr->fmp;
4886 if_setrcvif(sendmp, ifp);
4887 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4888 em_receive_checksum(status, sendmp);
4889 #ifndef __NO_STRICT_ALIGNMENT
4890 if (adapter->hw.mac.max_frame_size >
4891 (MCLBYTES - ETHER_ALIGN) &&
4892 em_fixup_rx(rxr) != 0)
4893 goto skip;
4894 #endif
4895 if (status & E1000_RXD_STAT_VP) {
4896 if_setvtag(sendmp,
4897 le16toh(cur->wb.upper.vlan));
4898 sendmp->m_flags |= M_VLANTAG;
4899 }
4900 #ifndef __NO_STRICT_ALIGNMENT
4901 skip:
4902 #endif
4903 rxr->fmp = rxr->lmp = NULL;
4904 }
4905 next_desc:
4906 /* Sync the ring */
4907 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4908 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4909
4910 /* Zero out the receive descriptors status. */
4911 cur->wb.upper.status_error &= htole32(~0xFF);
4912 ++rxdone; /* cumulative for POLL */
4913 ++processed;
4914
4915 /* Advance our pointers to the next descriptor. */
4916 if (++i == adapter->num_rx_desc)
4917 i = 0;
4918
4919 /* Send to the stack */
4920 if (sendmp != NULL) {
4921 rxr->next_to_check = i;
4922 EM_RX_UNLOCK(rxr);
4923 if_input(ifp, sendmp);
4924 EM_RX_LOCK(rxr);
4925 i = rxr->next_to_check;
4926 }
4927
4928 /* Only refresh mbufs every 8 descriptors */
4929 if (processed == 8) {
4930 em_refresh_mbufs(rxr, i);
4931 processed = 0;
4932 }
4933 }
4934
4935 /* Catch any remaining refresh work */
4936 if (e1000_rx_unrefreshed(rxr))
4937 em_refresh_mbufs(rxr, i);
4938
4939 rxr->next_to_check = i;
4940 if (done != NULL)
4941 *done = rxdone;
4942 EM_RX_UNLOCK(rxr);
4943
4944 return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4945 }
4946
4947 static __inline void
em_rx_discard(struct rx_ring * rxr,int i)4948 em_rx_discard(struct rx_ring *rxr, int i)
4949 {
4950 struct em_rxbuffer *rbuf;
4951
4952 rbuf = &rxr->rx_buffers[i];
4953 bus_dmamap_unload(rxr->rxtag, rbuf->map);
4954
4955 /* Free any previous pieces */
4956 if (rxr->fmp != NULL) {
4957 rxr->fmp->m_flags |= M_PKTHDR;
4958 m_freem(rxr->fmp);
4959 rxr->fmp = NULL;
4960 rxr->lmp = NULL;
4961 }
4962 /*
4963 ** Free buffer and allow em_refresh_mbufs()
4964 ** to clean up and recharge buffer.
4965 */
4966 if (rbuf->m_head) {
4967 m_free(rbuf->m_head);
4968 rbuf->m_head = NULL;
4969 }
4970 return;
4971 }
4972
4973 #ifndef __NO_STRICT_ALIGNMENT
4974 /*
4975 * When jumbo frames are enabled we should realign entire payload on
4976 * architecures with strict alignment. This is serious design mistake of 8254x
4977 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4978 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4979 * payload. On architecures without strict alignment restrictions 8254x still
4980 * performs unaligned memory access which would reduce the performance too.
4981 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4982 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4983 * existing mbuf chain.
4984 *
4985 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4986 * not used at all on architectures with strict alignment.
4987 */
4988 static int
em_fixup_rx(struct rx_ring * rxr)4989 em_fixup_rx(struct rx_ring *rxr)
4990 {
4991 struct adapter *adapter = rxr->adapter;
4992 struct mbuf *m, *n;
4993 int error;
4994
4995 error = 0;
4996 m = rxr->fmp;
4997 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4998 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4999 m->m_data += ETHER_HDR_LEN;
5000 } else {
5001 MGETHDR(n, M_NOWAIT, MT_DATA);
5002 if (n != NULL) {
5003 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
5004 m->m_data += ETHER_HDR_LEN;
5005 m->m_len -= ETHER_HDR_LEN;
5006 n->m_len = ETHER_HDR_LEN;
5007 M_MOVE_PKTHDR(n, m);
5008 n->m_next = m;
5009 rxr->fmp = n;
5010 } else {
5011 adapter->dropped_pkts++;
5012 m_freem(rxr->fmp);
5013 rxr->fmp = NULL;
5014 error = ENOMEM;
5015 }
5016 }
5017
5018 return (error);
5019 }
5020 #endif
5021
5022 static void
em_setup_rxdesc(union e1000_rx_desc_extended * rxd,const struct em_rxbuffer * rxbuf)5023 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
5024 {
5025 rxd->read.buffer_addr = htole64(rxbuf->paddr);
5026 /* DD bits must be cleared */
5027 rxd->wb.upper.status_error= 0;
5028 }
5029
5030 /*********************************************************************
5031 *
5032 * Verify that the hardware indicated that the checksum is valid.
5033 * Inform the stack about the status of checksum so that stack
5034 * doesn't spend time verifying the checksum.
5035 *
5036 *********************************************************************/
5037 static void
em_receive_checksum(uint32_t status,struct mbuf * mp)5038 em_receive_checksum(uint32_t status, struct mbuf *mp)
5039 {
5040 mp->m_pkthdr.csum_flags = 0;
5041
5042 /* Ignore Checksum bit is set */
5043 if (status & E1000_RXD_STAT_IXSM)
5044 return;
5045
5046 /* If the IP checksum exists and there is no IP Checksum error */
5047 if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5048 E1000_RXD_STAT_IPCS) {
5049 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5050 }
5051
5052 /* TCP or UDP checksum */
5053 if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5054 E1000_RXD_STAT_TCPCS) {
5055 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5056 mp->m_pkthdr.csum_data = htons(0xffff);
5057 }
5058 if (status & E1000_RXD_STAT_UDPCS) {
5059 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5060 mp->m_pkthdr.csum_data = htons(0xffff);
5061 }
5062 }
5063
5064 /*
5065 * This routine is run via an vlan
5066 * config EVENT
5067 */
5068 static void
em_register_vlan(void * arg,if_t ifp,u16 vtag)5069 em_register_vlan(void *arg, if_t ifp, u16 vtag)
5070 {
5071 struct adapter *adapter = if_getsoftc(ifp);
5072 u32 index, bit;
5073
5074 if ((void*)adapter != arg) /* Not our event */
5075 return;
5076
5077 if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */
5078 return;
5079
5080 EM_CORE_LOCK(adapter);
5081 index = (vtag >> 5) & 0x7F;
5082 bit = vtag & 0x1F;
5083 adapter->shadow_vfta[index] |= (1 << bit);
5084 ++adapter->num_vlans;
5085 /* Re-init to load the changes */
5086 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5087 em_init_locked(adapter);
5088 EM_CORE_UNLOCK(adapter);
5089 }
5090
5091 /*
5092 * This routine is run via an vlan
5093 * unconfig EVENT
5094 */
5095 static void
em_unregister_vlan(void * arg,if_t ifp,u16 vtag)5096 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
5097 {
5098 struct adapter *adapter = if_getsoftc(ifp);
5099 u32 index, bit;
5100
5101 if (adapter != arg)
5102 return;
5103
5104 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5105 return;
5106
5107 EM_CORE_LOCK(adapter);
5108 index = (vtag >> 5) & 0x7F;
5109 bit = vtag & 0x1F;
5110 adapter->shadow_vfta[index] &= ~(1 << bit);
5111 --adapter->num_vlans;
5112 /* Re-init to load the changes */
5113 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5114 em_init_locked(adapter);
5115 EM_CORE_UNLOCK(adapter);
5116 }
5117
5118 static void
em_setup_vlan_hw_support(struct adapter * adapter)5119 em_setup_vlan_hw_support(struct adapter *adapter)
5120 {
5121 struct e1000_hw *hw = &adapter->hw;
5122 u32 reg;
5123
5124 /*
5125 ** We get here thru init_locked, meaning
5126 ** a soft reset, this has already cleared
5127 ** the VFTA and other state, so if there
5128 ** have been no vlan's registered do nothing.
5129 */
5130 if (adapter->num_vlans == 0)
5131 return;
5132
5133 /*
5134 ** A soft reset zero's out the VFTA, so
5135 ** we need to repopulate it now.
5136 */
5137 for (int i = 0; i < EM_VFTA_SIZE; i++)
5138 if (adapter->shadow_vfta[i] != 0)
5139 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5140 i, adapter->shadow_vfta[i]);
5141
5142 reg = E1000_READ_REG(hw, E1000_CTRL);
5143 reg |= E1000_CTRL_VME;
5144 E1000_WRITE_REG(hw, E1000_CTRL, reg);
5145
5146 /* Enable the Filter Table */
5147 reg = E1000_READ_REG(hw, E1000_RCTL);
5148 reg &= ~E1000_RCTL_CFIEN;
5149 reg |= E1000_RCTL_VFE;
5150 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5151 }
5152
5153 static void
em_enable_intr(struct adapter * adapter)5154 em_enable_intr(struct adapter *adapter)
5155 {
5156 struct e1000_hw *hw = &adapter->hw;
5157 u32 ims_mask = IMS_ENABLE_MASK;
5158
5159 if (hw->mac.type == e1000_82574) {
5160 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
5161 ims_mask |= EM_MSIX_MASK;
5162 }
5163 E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5164 }
5165
5166 static void
em_disable_intr(struct adapter * adapter)5167 em_disable_intr(struct adapter *adapter)
5168 {
5169 struct e1000_hw *hw = &adapter->hw;
5170
5171 if (hw->mac.type == e1000_82574)
5172 E1000_WRITE_REG(hw, EM_EIAC, 0);
5173 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5174 }
5175
5176 /*
5177 * Bit of a misnomer, what this really means is
5178 * to enable OS management of the system... aka
5179 * to disable special hardware management features
5180 */
5181 static void
em_init_manageability(struct adapter * adapter)5182 em_init_manageability(struct adapter *adapter)
5183 {
5184 /* A shared code workaround */
5185 #define E1000_82542_MANC2H E1000_MANC2H
5186 if (adapter->has_manage) {
5187 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5188 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5189
5190 /* disable hardware interception of ARP */
5191 manc &= ~(E1000_MANC_ARP_EN);
5192
5193 /* enable receiving management packets to the host */
5194 manc |= E1000_MANC_EN_MNG2HOST;
5195 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5196 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5197 manc2h |= E1000_MNG2HOST_PORT_623;
5198 manc2h |= E1000_MNG2HOST_PORT_664;
5199 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5200 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5201 }
5202 }
5203
5204 /*
5205 * Give control back to hardware management
5206 * controller if there is one.
5207 */
5208 static void
em_release_manageability(struct adapter * adapter)5209 em_release_manageability(struct adapter *adapter)
5210 {
5211 if (adapter->has_manage) {
5212 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5213
5214 /* re-enable hardware interception of ARP */
5215 manc |= E1000_MANC_ARP_EN;
5216 manc &= ~E1000_MANC_EN_MNG2HOST;
5217
5218 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5219 }
5220 }
5221
5222 /*
5223 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5224 * For ASF and Pass Through versions of f/w this means
5225 * that the driver is loaded. For AMT version type f/w
5226 * this means that the network i/f is open.
5227 */
5228 static void
em_get_hw_control(struct adapter * adapter)5229 em_get_hw_control(struct adapter *adapter)
5230 {
5231 u32 ctrl_ext, swsm;
5232
5233 if (adapter->hw.mac.type == e1000_82573) {
5234 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5235 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5236 swsm | E1000_SWSM_DRV_LOAD);
5237 return;
5238 }
5239 /* else */
5240 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5241 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5242 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5243 return;
5244 }
5245
5246 /*
5247 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5248 * For ASF and Pass Through versions of f/w this means that
5249 * the driver is no longer loaded. For AMT versions of the
5250 * f/w this means that the network i/f is closed.
5251 */
5252 static void
em_release_hw_control(struct adapter * adapter)5253 em_release_hw_control(struct adapter *adapter)
5254 {
5255 u32 ctrl_ext, swsm;
5256
5257 if (!adapter->has_manage)
5258 return;
5259
5260 if (adapter->hw.mac.type == e1000_82573) {
5261 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5262 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5263 swsm & ~E1000_SWSM_DRV_LOAD);
5264 return;
5265 }
5266 /* else */
5267 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5268 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5269 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5270 return;
5271 }
5272
5273 static int
em_is_valid_ether_addr(u8 * addr)5274 em_is_valid_ether_addr(u8 *addr)
5275 {
5276 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5277
5278 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5279 return (FALSE);
5280 }
5281
5282 return (TRUE);
5283 }
5284
5285 /*
5286 ** Parse the interface capabilities with regard
5287 ** to both system management and wake-on-lan for
5288 ** later use.
5289 */
5290 static void
em_get_wakeup(device_t dev)5291 em_get_wakeup(device_t dev)
5292 {
5293 struct adapter *adapter = device_get_softc(dev);
5294 u16 eeprom_data = 0, device_id, apme_mask;
5295
5296 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5297 apme_mask = EM_EEPROM_APME;
5298
5299 switch (adapter->hw.mac.type) {
5300 case e1000_82573:
5301 case e1000_82583:
5302 adapter->has_amt = TRUE;
5303 /* Falls thru */
5304 case e1000_82571:
5305 case e1000_82572:
5306 case e1000_80003es2lan:
5307 if (adapter->hw.bus.func == 1) {
5308 e1000_read_nvm(&adapter->hw,
5309 NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5310 break;
5311 } else
5312 e1000_read_nvm(&adapter->hw,
5313 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5314 break;
5315 case e1000_ich8lan:
5316 case e1000_ich9lan:
5317 case e1000_ich10lan:
5318 case e1000_pchlan:
5319 case e1000_pch2lan:
5320 case e1000_pch_lpt:
5321 case e1000_pch_spt:
5322 case e1000_pch_cnp:
5323 apme_mask = E1000_WUC_APME;
5324 adapter->has_amt = TRUE;
5325 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5326 break;
5327 default:
5328 e1000_read_nvm(&adapter->hw,
5329 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5330 break;
5331 }
5332 if (eeprom_data & apme_mask)
5333 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5334 /*
5335 * We have the eeprom settings, now apply the special cases
5336 * where the eeprom may be wrong or the board won't support
5337 * wake on lan on a particular port
5338 */
5339 device_id = pci_get_device(dev);
5340 switch (device_id) {
5341 case E1000_DEV_ID_82571EB_FIBER:
5342 /* Wake events only supported on port A for dual fiber
5343 * regardless of eeprom setting */
5344 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5345 E1000_STATUS_FUNC_1)
5346 adapter->wol = 0;
5347 break;
5348 case E1000_DEV_ID_82571EB_QUAD_COPPER:
5349 case E1000_DEV_ID_82571EB_QUAD_FIBER:
5350 case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5351 /* if quad port adapter, disable WoL on all but port A */
5352 if (global_quad_port_a != 0)
5353 adapter->wol = 0;
5354 /* Reset for multiple quad port adapters */
5355 if (++global_quad_port_a == 4)
5356 global_quad_port_a = 0;
5357 break;
5358 }
5359 return;
5360 }
5361
5362
5363 /*
5364 * Enable PCI Wake On Lan capability
5365 */
5366 static void
em_enable_wakeup(device_t dev)5367 em_enable_wakeup(device_t dev)
5368 {
5369 struct adapter *adapter = device_get_softc(dev);
5370 if_t ifp = adapter->ifp;
5371 int error = 0;
5372 u32 pmc, ctrl, ctrl_ext, rctl;
5373 u16 status;
5374
5375 if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5376 return;
5377
5378 /*
5379 ** Determine type of Wakeup: note that wol
5380 ** is set with all bits on by default.
5381 */
5382 if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5383 adapter->wol &= ~E1000_WUFC_MAG;
5384
5385 if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5386 adapter->wol &= ~E1000_WUFC_MC;
5387 else {
5388 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5389 rctl |= E1000_RCTL_MPE;
5390 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5391 }
5392
5393 if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5394 goto pme;
5395
5396 /* Advertise the wakeup capability */
5397 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5398 ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5399 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5400
5401 /* Keep the laser running on Fiber adapters */
5402 if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5403 adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5404 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5405 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5406 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5407 }
5408
5409 if ((adapter->hw.mac.type == e1000_ich8lan) ||
5410 (adapter->hw.mac.type == e1000_pchlan) ||
5411 (adapter->hw.mac.type == e1000_ich9lan) ||
5412 (adapter->hw.mac.type == e1000_ich10lan))
5413 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5414
5415 if ((adapter->hw.mac.type == e1000_pchlan) ||
5416 (adapter->hw.mac.type == e1000_pch2lan) ||
5417 (adapter->hw.mac.type == e1000_pch_lpt) ||
5418 (adapter->hw.mac.type == e1000_pch_spt) ||
5419 (adapter->hw.mac.type == e1000_pch_cnp)) {
5420 error = em_enable_phy_wakeup(adapter);
5421 if (error)
5422 goto pme;
5423 } else {
5424 /* Enable wakeup by the MAC */
5425 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5426 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5427 }
5428
5429 if (adapter->hw.phy.type == e1000_phy_igp_3)
5430 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5431
5432 pme:
5433 status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5434 status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5435 if (!error && (if_getcapenable(ifp) & IFCAP_WOL))
5436 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5437 pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5438
5439 return;
5440 }
5441
5442 /*
5443 ** WOL in the newer chipset interfaces (pchlan)
5444 ** require thing to be copied into the phy
5445 */
5446 static int
em_enable_phy_wakeup(struct adapter * adapter)5447 em_enable_phy_wakeup(struct adapter *adapter)
5448 {
5449 struct e1000_hw *hw = &adapter->hw;
5450 u32 mreg, ret = 0;
5451 u16 preg;
5452
5453 /* copy MAC RARs to PHY RARs */
5454 e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5455
5456 /* copy MAC MTA to PHY MTA */
5457 for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5458 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5459 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5460 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5461 (u16)((mreg >> 16) & 0xFFFF));
5462 }
5463
5464 /* configure PHY Rx Control register */
5465 e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5466 mreg = E1000_READ_REG(hw, E1000_RCTL);
5467 if (mreg & E1000_RCTL_UPE)
5468 preg |= BM_RCTL_UPE;
5469 if (mreg & E1000_RCTL_MPE)
5470 preg |= BM_RCTL_MPE;
5471 preg &= ~(BM_RCTL_MO_MASK);
5472 if (mreg & E1000_RCTL_MO_3)
5473 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5474 << BM_RCTL_MO_SHIFT);
5475 if (mreg & E1000_RCTL_BAM)
5476 preg |= BM_RCTL_BAM;
5477 if (mreg & E1000_RCTL_PMCF)
5478 preg |= BM_RCTL_PMCF;
5479 mreg = E1000_READ_REG(hw, E1000_CTRL);
5480 if (mreg & E1000_CTRL_RFCE)
5481 preg |= BM_RCTL_RFCE;
5482 e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5483
5484 /* enable PHY wakeup in MAC register */
5485 E1000_WRITE_REG(hw, E1000_WUC,
5486 E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5487 E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5488
5489 /* configure and enable PHY wakeup in PHY registers */
5490 e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5491 e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5492
5493 /* activate PHY wakeup */
5494 ret = hw->phy.ops.acquire(hw);
5495 if (ret) {
5496 printf("Could not acquire PHY\n");
5497 return ret;
5498 }
5499 e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5500 (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5501 ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5502 if (ret) {
5503 printf("Could not read PHY page 769\n");
5504 goto out;
5505 }
5506 preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5507 ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5508 if (ret)
5509 printf("Could not set PHY Host Wakeup bit\n");
5510 out:
5511 hw->phy.ops.release(hw);
5512
5513 return ret;
5514 }
5515
5516 static void
em_led_func(void * arg,int onoff)5517 em_led_func(void *arg, int onoff)
5518 {
5519 struct adapter *adapter = arg;
5520
5521 EM_CORE_LOCK(adapter);
5522 if (onoff) {
5523 e1000_setup_led(&adapter->hw);
5524 e1000_led_on(&adapter->hw);
5525 } else {
5526 e1000_led_off(&adapter->hw);
5527 e1000_cleanup_led(&adapter->hw);
5528 }
5529 EM_CORE_UNLOCK(adapter);
5530 }
5531
5532 /*
5533 ** Disable the L0S and L1 LINK states
5534 */
5535 static void
em_disable_aspm(struct adapter * adapter)5536 em_disable_aspm(struct adapter *adapter)
5537 {
5538 int base, reg;
5539 u16 link_cap,link_ctrl;
5540 device_t dev = adapter->dev;
5541
5542 switch (adapter->hw.mac.type) {
5543 case e1000_82573:
5544 case e1000_82574:
5545 case e1000_82583:
5546 break;
5547 default:
5548 return;
5549 }
5550 if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5551 return;
5552 reg = base + PCIER_LINK_CAP;
5553 link_cap = pci_read_config(dev, reg, 2);
5554 if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5555 return;
5556 reg = base + PCIER_LINK_CTL;
5557 link_ctrl = pci_read_config(dev, reg, 2);
5558 link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5559 pci_write_config(dev, reg, link_ctrl, 2);
5560 return;
5561 }
5562
5563 /**********************************************************************
5564 *
5565 * Update the board statistics counters.
5566 *
5567 **********************************************************************/
5568 static void
em_update_stats_counters(struct adapter * adapter)5569 em_update_stats_counters(struct adapter *adapter)
5570 {
5571
5572 if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5573 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5574 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5575 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5576 }
5577 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5578 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5579 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5580 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5581
5582 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5583 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5584 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5585 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5586 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5587 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5588 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5589 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5590 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5591 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5592 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5593 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5594 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5595 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5596 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5597 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5598 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5599 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5600 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5601 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5602
5603 /* For the 64-bit byte counters the low dword must be read first. */
5604 /* Both registers clear on the read of the high dword */
5605
5606 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5607 ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5608 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5609 ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5610
5611 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5612 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5613 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5614 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5615 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5616
5617 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5618 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5619
5620 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5621 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5622 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5623 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5624 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5625 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5626 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5627 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5628 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5629 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5630
5631 /* Interrupt Counts */
5632
5633 adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5634 adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5635 adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5636 adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5637 adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5638 adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5639 adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5640 adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5641 adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5642
5643 if (adapter->hw.mac.type >= e1000_82543) {
5644 adapter->stats.algnerrc +=
5645 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5646 adapter->stats.rxerrc +=
5647 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5648 adapter->stats.tncrs +=
5649 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5650 adapter->stats.cexterr +=
5651 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5652 adapter->stats.tsctc +=
5653 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5654 adapter->stats.tsctfc +=
5655 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5656 }
5657 }
5658
5659 static uint64_t
em_get_counter(if_t ifp,ift_counter cnt)5660 em_get_counter(if_t ifp, ift_counter cnt)
5661 {
5662 struct adapter *adapter;
5663
5664 adapter = if_getsoftc(ifp);
5665
5666 switch (cnt) {
5667 case IFCOUNTER_COLLISIONS:
5668 return (adapter->stats.colc);
5669 case IFCOUNTER_IERRORS:
5670 return (adapter->dropped_pkts + adapter->stats.rxerrc +
5671 adapter->stats.crcerrs + adapter->stats.algnerrc +
5672 adapter->stats.ruc + adapter->stats.roc +
5673 adapter->stats.mpc + adapter->stats.cexterr);
5674 case IFCOUNTER_OERRORS:
5675 return (adapter->stats.ecol + adapter->stats.latecol +
5676 adapter->watchdog_events);
5677 default:
5678 return (if_get_counter_default(ifp, cnt));
5679 }
5680 }
5681
5682 /* Export a single 32-bit register via a read-only sysctl. */
5683 static int
em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)5684 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5685 {
5686 struct adapter *adapter;
5687 u_int val;
5688
5689 adapter = oidp->oid_arg1;
5690 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5691 return (sysctl_handle_int(oidp, &val, 0, req));
5692 }
5693
5694 /*
5695 * Add sysctl variables, one per statistic, to the system.
5696 */
5697 static void
em_add_hw_stats(struct adapter * adapter)5698 em_add_hw_stats(struct adapter *adapter)
5699 {
5700 device_t dev = adapter->dev;
5701
5702 struct tx_ring *txr = adapter->tx_rings;
5703 struct rx_ring *rxr = adapter->rx_rings;
5704
5705 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5706 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5707 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5708 struct e1000_hw_stats *stats = &adapter->stats;
5709
5710 struct sysctl_oid *stat_node, *queue_node, *int_node;
5711 struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5712
5713 #define QUEUE_NAME_LEN 32
5714 char namebuf[QUEUE_NAME_LEN];
5715
5716 /* Driver Statistics */
5717 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5718 CTLFLAG_RD, &adapter->dropped_pkts,
5719 "Driver dropped packets");
5720 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5721 CTLFLAG_RD, &adapter->link_irq,
5722 "Link MSIX IRQ Handled");
5723 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5724 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5725 "Defragmenting mbuf chain failed");
5726 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5727 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5728 "Driver tx dma failure in xmit");
5729 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5730 CTLFLAG_RD, &adapter->rx_overruns,
5731 "RX overruns");
5732 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5733 CTLFLAG_RD, &adapter->watchdog_events,
5734 "Watchdog timeouts");
5735
5736 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5737 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5738 em_sysctl_reg_handler, "IU",
5739 "Device Control Register");
5740 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5741 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5742 em_sysctl_reg_handler, "IU",
5743 "Receiver Control Register");
5744 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5745 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5746 "Flow Control High Watermark");
5747 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5748 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5749 "Flow Control Low Watermark");
5750
5751 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5752 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5753 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5754 CTLFLAG_RD, NULL, "TX Queue Name");
5755 queue_list = SYSCTL_CHILDREN(queue_node);
5756
5757 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5758 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5759 E1000_TDH(txr->me),
5760 em_sysctl_reg_handler, "IU",
5761 "Transmit Descriptor Head");
5762 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5763 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5764 E1000_TDT(txr->me),
5765 em_sysctl_reg_handler, "IU",
5766 "Transmit Descriptor Tail");
5767 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5768 CTLFLAG_RD, &txr->tx_irq,
5769 "Queue MSI-X Transmit Interrupts");
5770 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5771 CTLFLAG_RD, &txr->no_desc_avail,
5772 "Queue No Descriptor Available");
5773
5774 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5775 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5776 CTLFLAG_RD, NULL, "RX Queue Name");
5777 queue_list = SYSCTL_CHILDREN(queue_node);
5778
5779 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5780 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5781 E1000_RDH(rxr->me),
5782 em_sysctl_reg_handler, "IU",
5783 "Receive Descriptor Head");
5784 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5785 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5786 E1000_RDT(rxr->me),
5787 em_sysctl_reg_handler, "IU",
5788 "Receive Descriptor Tail");
5789 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5790 CTLFLAG_RD, &rxr->rx_irq,
5791 "Queue MSI-X Receive Interrupts");
5792 }
5793
5794 /* MAC stats get their own sub node */
5795
5796 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5797 CTLFLAG_RD, NULL, "Statistics");
5798 stat_list = SYSCTL_CHILDREN(stat_node);
5799
5800 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5801 CTLFLAG_RD, &stats->ecol,
5802 "Excessive collisions");
5803 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5804 CTLFLAG_RD, &stats->scc,
5805 "Single collisions");
5806 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5807 CTLFLAG_RD, &stats->mcc,
5808 "Multiple collisions");
5809 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5810 CTLFLAG_RD, &stats->latecol,
5811 "Late collisions");
5812 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5813 CTLFLAG_RD, &stats->colc,
5814 "Collision Count");
5815 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5816 CTLFLAG_RD, &adapter->stats.symerrs,
5817 "Symbol Errors");
5818 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5819 CTLFLAG_RD, &adapter->stats.sec,
5820 "Sequence Errors");
5821 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5822 CTLFLAG_RD, &adapter->stats.dc,
5823 "Defer Count");
5824 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5825 CTLFLAG_RD, &adapter->stats.mpc,
5826 "Missed Packets");
5827 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5828 CTLFLAG_RD, &adapter->stats.rnbc,
5829 "Receive No Buffers");
5830 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5831 CTLFLAG_RD, &adapter->stats.ruc,
5832 "Receive Undersize");
5833 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5834 CTLFLAG_RD, &adapter->stats.rfc,
5835 "Fragmented Packets Received ");
5836 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5837 CTLFLAG_RD, &adapter->stats.roc,
5838 "Oversized Packets Received");
5839 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5840 CTLFLAG_RD, &adapter->stats.rjc,
5841 "Recevied Jabber");
5842 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5843 CTLFLAG_RD, &adapter->stats.rxerrc,
5844 "Receive Errors");
5845 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5846 CTLFLAG_RD, &adapter->stats.crcerrs,
5847 "CRC errors");
5848 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5849 CTLFLAG_RD, &adapter->stats.algnerrc,
5850 "Alignment Errors");
5851 /* On 82575 these are collision counts */
5852 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5853 CTLFLAG_RD, &adapter->stats.cexterr,
5854 "Collision/Carrier extension errors");
5855 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5856 CTLFLAG_RD, &adapter->stats.xonrxc,
5857 "XON Received");
5858 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5859 CTLFLAG_RD, &adapter->stats.xontxc,
5860 "XON Transmitted");
5861 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5862 CTLFLAG_RD, &adapter->stats.xoffrxc,
5863 "XOFF Received");
5864 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5865 CTLFLAG_RD, &adapter->stats.xofftxc,
5866 "XOFF Transmitted");
5867
5868 /* Packet Reception Stats */
5869 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5870 CTLFLAG_RD, &adapter->stats.tpr,
5871 "Total Packets Received ");
5872 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5873 CTLFLAG_RD, &adapter->stats.gprc,
5874 "Good Packets Received");
5875 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5876 CTLFLAG_RD, &adapter->stats.bprc,
5877 "Broadcast Packets Received");
5878 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5879 CTLFLAG_RD, &adapter->stats.mprc,
5880 "Multicast Packets Received");
5881 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5882 CTLFLAG_RD, &adapter->stats.prc64,
5883 "64 byte frames received ");
5884 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5885 CTLFLAG_RD, &adapter->stats.prc127,
5886 "65-127 byte frames received");
5887 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5888 CTLFLAG_RD, &adapter->stats.prc255,
5889 "128-255 byte frames received");
5890 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5891 CTLFLAG_RD, &adapter->stats.prc511,
5892 "256-511 byte frames received");
5893 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5894 CTLFLAG_RD, &adapter->stats.prc1023,
5895 "512-1023 byte frames received");
5896 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5897 CTLFLAG_RD, &adapter->stats.prc1522,
5898 "1023-1522 byte frames received");
5899 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5900 CTLFLAG_RD, &adapter->stats.gorc,
5901 "Good Octets Received");
5902
5903 /* Packet Transmission Stats */
5904 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5905 CTLFLAG_RD, &adapter->stats.gotc,
5906 "Good Octets Transmitted");
5907 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5908 CTLFLAG_RD, &adapter->stats.tpt,
5909 "Total Packets Transmitted");
5910 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5911 CTLFLAG_RD, &adapter->stats.gptc,
5912 "Good Packets Transmitted");
5913 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5914 CTLFLAG_RD, &adapter->stats.bptc,
5915 "Broadcast Packets Transmitted");
5916 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5917 CTLFLAG_RD, &adapter->stats.mptc,
5918 "Multicast Packets Transmitted");
5919 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5920 CTLFLAG_RD, &adapter->stats.ptc64,
5921 "64 byte frames transmitted ");
5922 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5923 CTLFLAG_RD, &adapter->stats.ptc127,
5924 "65-127 byte frames transmitted");
5925 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5926 CTLFLAG_RD, &adapter->stats.ptc255,
5927 "128-255 byte frames transmitted");
5928 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5929 CTLFLAG_RD, &adapter->stats.ptc511,
5930 "256-511 byte frames transmitted");
5931 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5932 CTLFLAG_RD, &adapter->stats.ptc1023,
5933 "512-1023 byte frames transmitted");
5934 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5935 CTLFLAG_RD, &adapter->stats.ptc1522,
5936 "1024-1522 byte frames transmitted");
5937 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5938 CTLFLAG_RD, &adapter->stats.tsctc,
5939 "TSO Contexts Transmitted");
5940 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5941 CTLFLAG_RD, &adapter->stats.tsctfc,
5942 "TSO Contexts Failed");
5943
5944
5945 /* Interrupt Stats */
5946
5947 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5948 CTLFLAG_RD, NULL, "Interrupt Statistics");
5949 int_list = SYSCTL_CHILDREN(int_node);
5950
5951 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5952 CTLFLAG_RD, &adapter->stats.iac,
5953 "Interrupt Assertion Count");
5954
5955 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5956 CTLFLAG_RD, &adapter->stats.icrxptc,
5957 "Interrupt Cause Rx Pkt Timer Expire Count");
5958
5959 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5960 CTLFLAG_RD, &adapter->stats.icrxatc,
5961 "Interrupt Cause Rx Abs Timer Expire Count");
5962
5963 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5964 CTLFLAG_RD, &adapter->stats.ictxptc,
5965 "Interrupt Cause Tx Pkt Timer Expire Count");
5966
5967 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5968 CTLFLAG_RD, &adapter->stats.ictxatc,
5969 "Interrupt Cause Tx Abs Timer Expire Count");
5970
5971 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5972 CTLFLAG_RD, &adapter->stats.ictxqec,
5973 "Interrupt Cause Tx Queue Empty Count");
5974
5975 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5976 CTLFLAG_RD, &adapter->stats.ictxqmtc,
5977 "Interrupt Cause Tx Queue Min Thresh Count");
5978
5979 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5980 CTLFLAG_RD, &adapter->stats.icrxdmtc,
5981 "Interrupt Cause Rx Desc Min Thresh Count");
5982
5983 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5984 CTLFLAG_RD, &adapter->stats.icrxoc,
5985 "Interrupt Cause Receiver Overrun Count");
5986 }
5987
5988 /**********************************************************************
5989 *
5990 * This routine provides a way to dump out the adapter eeprom,
5991 * often a useful debug/service tool. This only dumps the first
5992 * 32 words, stuff that matters is in that extent.
5993 *
5994 **********************************************************************/
5995 static int
em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)5996 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5997 {
5998 struct adapter *adapter = (struct adapter *)arg1;
5999 int error;
6000 int result;
6001
6002 result = -1;
6003 error = sysctl_handle_int(oidp, &result, 0, req);
6004
6005 if (error || !req->newptr)
6006 return (error);
6007
6008 /*
6009 * This value will cause a hex dump of the
6010 * first 32 16-bit words of the EEPROM to
6011 * the screen.
6012 */
6013 if (result == 1)
6014 em_print_nvm_info(adapter);
6015
6016 return (error);
6017 }
6018
6019 static void
em_print_nvm_info(struct adapter * adapter)6020 em_print_nvm_info(struct adapter *adapter)
6021 {
6022 u16 eeprom_data;
6023 int i, j, row = 0;
6024
6025 /* Its a bit crude, but it gets the job done */
6026 printf("\nInterface EEPROM Dump:\n");
6027 printf("Offset\n0x0000 ");
6028 for (i = 0, j = 0; i < 32; i++, j++) {
6029 if (j == 8) { /* Make the offset block */
6030 j = 0; ++row;
6031 printf("\n0x00%x0 ",row);
6032 }
6033 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6034 printf("%04x ", eeprom_data);
6035 }
6036 printf("\n");
6037 }
6038
6039 static int
em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)6040 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6041 {
6042 struct em_int_delay_info *info;
6043 struct adapter *adapter;
6044 u32 regval;
6045 int error, usecs, ticks;
6046
6047 info = (struct em_int_delay_info *)arg1;
6048 usecs = info->value;
6049 error = sysctl_handle_int(oidp, &usecs, 0, req);
6050 if (error != 0 || req->newptr == NULL)
6051 return (error);
6052 if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6053 return (EINVAL);
6054 info->value = usecs;
6055 ticks = EM_USECS_TO_TICKS(usecs);
6056 if (info->offset == E1000_ITR) /* units are 256ns here */
6057 ticks *= 4;
6058
6059 adapter = info->adapter;
6060
6061 EM_CORE_LOCK(adapter);
6062 regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6063 regval = (regval & ~0xffff) | (ticks & 0xffff);
6064 /* Handle a few special cases. */
6065 switch (info->offset) {
6066 case E1000_RDTR:
6067 break;
6068 case E1000_TIDV:
6069 if (ticks == 0) {
6070 adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6071 /* Don't write 0 into the TIDV register. */
6072 regval++;
6073 } else
6074 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6075 break;
6076 }
6077 E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6078 EM_CORE_UNLOCK(adapter);
6079 return (0);
6080 }
6081
6082 static void
em_add_int_delay_sysctl(struct adapter * adapter,const char * name,const char * description,struct em_int_delay_info * info,int offset,int value)6083 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6084 const char *description, struct em_int_delay_info *info,
6085 int offset, int value)
6086 {
6087 info->adapter = adapter;
6088 info->offset = offset;
6089 info->value = value;
6090 SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6091 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6092 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6093 info, 0, em_sysctl_int_delay, "I", description);
6094 }
6095
6096 static void
em_set_sysctl_value(struct adapter * adapter,const char * name,const char * description,int * limit,int value)6097 em_set_sysctl_value(struct adapter *adapter, const char *name,
6098 const char *description, int *limit, int value)
6099 {
6100 *limit = value;
6101 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6102 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6103 OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6104 }
6105
6106
6107 /*
6108 ** Set flow control using sysctl:
6109 ** Flow control values:
6110 ** 0 - off
6111 ** 1 - rx pause
6112 ** 2 - tx pause
6113 ** 3 - full
6114 */
6115 static int
em_set_flowcntl(SYSCTL_HANDLER_ARGS)6116 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6117 {
6118 int error;
6119 static int input = 3; /* default is full */
6120 struct adapter *adapter = (struct adapter *) arg1;
6121
6122 error = sysctl_handle_int(oidp, &input, 0, req);
6123
6124 if ((error) || (req->newptr == NULL))
6125 return (error);
6126
6127 if (input == adapter->fc) /* no change? */
6128 return (error);
6129
6130 switch (input) {
6131 case e1000_fc_rx_pause:
6132 case e1000_fc_tx_pause:
6133 case e1000_fc_full:
6134 case e1000_fc_none:
6135 adapter->hw.fc.requested_mode = input;
6136 adapter->fc = input;
6137 break;
6138 default:
6139 /* Do nothing */
6140 return (error);
6141 }
6142
6143 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6144 e1000_force_mac_fc(&adapter->hw);
6145 return (error);
6146 }
6147
6148 /*
6149 ** Manage Energy Efficient Ethernet:
6150 ** Control values:
6151 ** 0/1 - enabled/disabled
6152 */
6153 static int
em_sysctl_eee(SYSCTL_HANDLER_ARGS)6154 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6155 {
6156 struct adapter *adapter = (struct adapter *) arg1;
6157 int error, value;
6158
6159 value = adapter->hw.dev_spec.ich8lan.eee_disable;
6160 error = sysctl_handle_int(oidp, &value, 0, req);
6161 if (error || req->newptr == NULL)
6162 return (error);
6163 EM_CORE_LOCK(adapter);
6164 adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6165 em_init_locked(adapter);
6166 EM_CORE_UNLOCK(adapter);
6167 return (0);
6168 }
6169
6170 static int
em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)6171 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6172 {
6173 struct adapter *adapter;
6174 int error;
6175 int result;
6176
6177 result = -1;
6178 error = sysctl_handle_int(oidp, &result, 0, req);
6179
6180 if (error || !req->newptr)
6181 return (error);
6182
6183 if (result == 1) {
6184 adapter = (struct adapter *)arg1;
6185 em_print_debug_info(adapter);
6186 }
6187
6188 return (error);
6189 }
6190
6191 /*
6192 ** This routine is meant to be fluid, add whatever is
6193 ** needed for debugging a problem. -jfv
6194 */
6195 static void
em_print_debug_info(struct adapter * adapter)6196 em_print_debug_info(struct adapter *adapter)
6197 {
6198 device_t dev = adapter->dev;
6199 struct tx_ring *txr = adapter->tx_rings;
6200 struct rx_ring *rxr = adapter->rx_rings;
6201
6202 if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
6203 printf("Interface is RUNNING ");
6204 else
6205 printf("Interface is NOT RUNNING\n");
6206
6207 if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
6208 printf("and INACTIVE\n");
6209 else
6210 printf("and ACTIVE\n");
6211
6212 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6213 device_printf(dev, "TX Queue %d ------\n", i);
6214 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6215 E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6216 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6217 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6218 device_printf(dev, "TX descriptors avail = %d\n",
6219 txr->tx_avail);
6220 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6221 txr->no_desc_avail);
6222 device_printf(dev, "RX Queue %d ------\n", i);
6223 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6224 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6225 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6226 device_printf(dev, "RX discarded packets = %ld\n",
6227 rxr->rx_discarded);
6228 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6229 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6230 }
6231 }
6232
6233 #ifdef EM_MULTIQUEUE
6234 /*
6235 * 82574 only:
6236 * Write a new value to the EEPROM increasing the number of MSIX
6237 * vectors from 3 to 5, for proper multiqueue support.
6238 */
6239 static void
em_enable_vectors_82574(struct adapter * adapter)6240 em_enable_vectors_82574(struct adapter *adapter)
6241 {
6242 struct e1000_hw *hw = &adapter->hw;
6243 device_t dev = adapter->dev;
6244 u16 edata;
6245
6246 e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6247 printf("Current cap: %#06x\n", edata);
6248 if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6249 device_printf(dev, "Writing to eeprom: increasing "
6250 "reported MSIX vectors from 3 to 5...\n");
6251 edata &= ~(EM_NVM_MSIX_N_MASK);
6252 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6253 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6254 e1000_update_nvm_checksum(hw);
6255 device_printf(dev, "Writing to eeprom: done\n");
6256 }
6257 }
6258 #endif
6259
6260 #ifdef DDB
DB_COMMAND(em_reset_dev,em_ddb_reset_dev)6261 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6262 {
6263 devclass_t dc;
6264 int max_em;
6265
6266 dc = devclass_find("em");
6267 max_em = devclass_get_maxunit(dc);
6268
6269 for (int index = 0; index < (max_em - 1); index++) {
6270 device_t dev;
6271 dev = devclass_get_device(dc, index);
6272 if (device_get_driver(dev) == &em_driver) {
6273 struct adapter *adapter = device_get_softc(dev);
6274 EM_CORE_LOCK(adapter);
6275 em_init_locked(adapter);
6276 EM_CORE_UNLOCK(adapter);
6277 }
6278 }
6279 }
DB_COMMAND(em_dump_queue,em_ddb_dump_queue)6280 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6281 {
6282 devclass_t dc;
6283 int max_em;
6284
6285 dc = devclass_find("em");
6286 max_em = devclass_get_maxunit(dc);
6287
6288 for (int index = 0; index < (max_em - 1); index++) {
6289 device_t dev;
6290 dev = devclass_get_device(dc, index);
6291 if (device_get_driver(dev) == &em_driver)
6292 em_print_debug_info(device_get_softc(dev));
6293 }
6294
6295 }
6296 #endif
6297