1 /******************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3
4 Copyright (c) 2006-2013, Myricom Inc.
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
12
13 2. Neither the name of the Myricom Inc, nor the names of its
14 contributors may be used to endorse or promote products derived from
15 this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28
29 ***************************************************************************/
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD: stable/12/sys/dev/mxge/if_mxge.c 371940 2022-04-10 05:02:22Z git2svn $");
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/linker.h>
37 #include <sys/firmware.h>
38 #include <sys/endian.h>
39 #include <sys/sockio.h>
40 #include <sys/mbuf.h>
41 #include <sys/malloc.h>
42 #include <sys/kdb.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/module.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/sx.h>
49 #include <sys/taskqueue.h>
50 #include <contrib/zlib/zlib.h>
51 #include <dev/zlib/zcalloc.h>
52
53 #include <net/if.h>
54 #include <net/if_var.h>
55 #include <net/if_arp.h>
56 #include <net/ethernet.h>
57 #include <net/if_dl.h>
58 #include <net/if_media.h>
59
60 #include <net/bpf.h>
61
62 #include <net/if_types.h>
63 #include <net/if_vlan_var.h>
64
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip6.h>
69 #include <netinet/tcp.h>
70 #include <netinet/tcp_lro.h>
71 #include <netinet6/ip6_var.h>
72
73 #include <machine/bus.h>
74 #include <machine/in_cksum.h>
75 #include <machine/resource.h>
76 #include <sys/bus.h>
77 #include <sys/rman.h>
78 #include <sys/smp.h>
79
80 #include <dev/pci/pcireg.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
83
84 #include <vm/vm.h> /* for pmap_mapdev() */
85 #include <vm/pmap.h>
86
87 #if defined(__i386) || defined(__amd64)
88 #include <machine/specialreg.h>
89 #endif
90
91 #include <dev/mxge/mxge_mcp.h>
92 #include <dev/mxge/mcp_gen_header.h>
93 /*#define MXGE_FAKE_IFP*/
94 #include <dev/mxge/if_mxge_var.h>
95 #ifdef IFNET_BUF_RING
96 #include <sys/buf_ring.h>
97 #endif
98
99 #include "opt_inet.h"
100 #include "opt_inet6.h"
101
102 /* tunable params */
103 static int mxge_nvidia_ecrc_enable = 1;
104 static int mxge_force_firmware = 0;
105 static int mxge_intr_coal_delay = 30;
106 static int mxge_deassert_wait = 1;
107 static int mxge_flow_control = 1;
108 static int mxge_verbose = 0;
109 static int mxge_ticks;
110 static int mxge_max_slices = 1;
111 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
112 static int mxge_always_promisc = 0;
113 static int mxge_initial_mtu = ETHERMTU_JUMBO;
114 static int mxge_throttle = 0;
115 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
116 static char *mxge_fw_aligned = "mxge_eth_z8e";
117 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
118 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
119
120 static int mxge_probe(device_t dev);
121 static int mxge_attach(device_t dev);
122 static int mxge_detach(device_t dev);
123 static int mxge_shutdown(device_t dev);
124 static void mxge_intr(void *arg);
125
126 static device_method_t mxge_methods[] =
127 {
128 /* Device interface */
129 DEVMETHOD(device_probe, mxge_probe),
130 DEVMETHOD(device_attach, mxge_attach),
131 DEVMETHOD(device_detach, mxge_detach),
132 DEVMETHOD(device_shutdown, mxge_shutdown),
133
134 DEVMETHOD_END
135 };
136
137 static driver_t mxge_driver =
138 {
139 "mxge",
140 mxge_methods,
141 sizeof(mxge_softc_t),
142 };
143
144 static devclass_t mxge_devclass;
145
146 /* Declare ourselves to be a child of the PCI bus.*/
147 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
148 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
149 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
150
151 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
152 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
153 static int mxge_close(mxge_softc_t *sc, int down);
154 static int mxge_open(mxge_softc_t *sc);
155 static void mxge_tick(void *arg);
156
157 static int
mxge_probe(device_t dev)158 mxge_probe(device_t dev)
159 {
160 int rev;
161
162
163 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
164 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
165 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
166 rev = pci_get_revid(dev);
167 switch (rev) {
168 case MXGE_PCI_REV_Z8E:
169 device_set_desc(dev, "Myri10G-PCIE-8A");
170 break;
171 case MXGE_PCI_REV_Z8ES:
172 device_set_desc(dev, "Myri10G-PCIE-8B");
173 break;
174 default:
175 device_set_desc(dev, "Myri10G-PCIE-8??");
176 device_printf(dev, "Unrecognized rev %d NIC\n",
177 rev);
178 break;
179 }
180 return 0;
181 }
182 return ENXIO;
183 }
184
185 static void
mxge_enable_wc(mxge_softc_t * sc)186 mxge_enable_wc(mxge_softc_t *sc)
187 {
188 #if defined(__i386) || defined(__amd64)
189 vm_offset_t len;
190 int err;
191
192 sc->wc = 1;
193 len = rman_get_size(sc->mem_res);
194 err = pmap_change_attr((vm_offset_t) sc->sram,
195 len, PAT_WRITE_COMBINING);
196 if (err != 0) {
197 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
198 err);
199 sc->wc = 0;
200 }
201 #endif
202 }
203
204
205 /* callback to get our DMA address */
206 static void
mxge_dmamap_callback(void * arg,bus_dma_segment_t * segs,int nsegs,int error)207 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
208 int error)
209 {
210 if (error == 0) {
211 *(bus_addr_t *) arg = segs->ds_addr;
212 }
213 }
214
215 static int
mxge_dma_alloc(mxge_softc_t * sc,mxge_dma_t * dma,size_t bytes,bus_size_t alignment)216 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
217 bus_size_t alignment)
218 {
219 int err;
220 device_t dev = sc->dev;
221 bus_size_t boundary, maxsegsize;
222
223 if (bytes > 4096 && alignment == 4096) {
224 boundary = 0;
225 maxsegsize = bytes;
226 } else {
227 boundary = 4096;
228 maxsegsize = 4096;
229 }
230
231 /* allocate DMAable memory tags */
232 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
233 alignment, /* alignment */
234 boundary, /* boundary */
235 BUS_SPACE_MAXADDR, /* low */
236 BUS_SPACE_MAXADDR, /* high */
237 NULL, NULL, /* filter */
238 bytes, /* maxsize */
239 1, /* num segs */
240 maxsegsize, /* maxsegsize */
241 BUS_DMA_COHERENT, /* flags */
242 NULL, NULL, /* lock */
243 &dma->dmat); /* tag */
244 if (err != 0) {
245 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
246 return err;
247 }
248
249 /* allocate DMAable memory & map */
250 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
251 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
252 | BUS_DMA_ZERO), &dma->map);
253 if (err != 0) {
254 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
255 goto abort_with_dmat;
256 }
257
258 /* load the memory */
259 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
260 mxge_dmamap_callback,
261 (void *)&dma->bus_addr, 0);
262 if (err != 0) {
263 device_printf(dev, "couldn't load map (err = %d)\n", err);
264 goto abort_with_mem;
265 }
266 return 0;
267
268 abort_with_mem:
269 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
270 abort_with_dmat:
271 (void)bus_dma_tag_destroy(dma->dmat);
272 return err;
273 }
274
275
276 static void
mxge_dma_free(mxge_dma_t * dma)277 mxge_dma_free(mxge_dma_t *dma)
278 {
279 bus_dmamap_unload(dma->dmat, dma->map);
280 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
281 (void)bus_dma_tag_destroy(dma->dmat);
282 }
283
284 /*
285 * The eeprom strings on the lanaiX have the format
286 * SN=x\0
287 * MAC=x:x:x:x:x:x\0
288 * PC=text\0
289 */
290
291 static int
mxge_parse_strings(mxge_softc_t * sc)292 mxge_parse_strings(mxge_softc_t *sc)
293 {
294 char *ptr;
295 int i, found_mac, found_sn2;
296 char *endptr;
297
298 ptr = sc->eeprom_strings;
299 found_mac = 0;
300 found_sn2 = 0;
301 while (*ptr != '\0') {
302 if (strncmp(ptr, "MAC=", 4) == 0) {
303 ptr += 4;
304 for (i = 0;;) {
305 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
306 if (endptr - ptr != 2)
307 goto abort;
308 ptr = endptr;
309 if (++i == 6)
310 break;
311 if (*ptr++ != ':')
312 goto abort;
313 }
314 found_mac = 1;
315 } else if (strncmp(ptr, "PC=", 3) == 0) {
316 ptr += 3;
317 strlcpy(sc->product_code_string, ptr,
318 sizeof(sc->product_code_string));
319 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
320 ptr += 3;
321 strlcpy(sc->serial_number_string, ptr,
322 sizeof(sc->serial_number_string));
323 } else if (strncmp(ptr, "SN2=", 4) == 0) {
324 /* SN2 takes precedence over SN */
325 ptr += 4;
326 found_sn2 = 1;
327 strlcpy(sc->serial_number_string, ptr,
328 sizeof(sc->serial_number_string));
329 }
330 while (*ptr++ != '\0') {}
331 }
332
333 if (found_mac)
334 return 0;
335
336 abort:
337 device_printf(sc->dev, "failed to parse eeprom_strings\n");
338
339 return ENXIO;
340 }
341
342 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
343 static void
mxge_enable_nvidia_ecrc(mxge_softc_t * sc)344 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
345 {
346 uint32_t val;
347 unsigned long base, off;
348 char *va, *cfgptr;
349 device_t pdev, mcp55;
350 uint16_t vendor_id, device_id, word;
351 uintptr_t bus, slot, func, ivend, idev;
352 uint32_t *ptr32;
353
354
355 if (!mxge_nvidia_ecrc_enable)
356 return;
357
358 pdev = device_get_parent(device_get_parent(sc->dev));
359 if (pdev == NULL) {
360 device_printf(sc->dev, "could not find parent?\n");
361 return;
362 }
363 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
364 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
365
366 if (vendor_id != 0x10de)
367 return;
368
369 base = 0;
370
371 if (device_id == 0x005d) {
372 /* ck804, base address is magic */
373 base = 0xe0000000UL;
374 } else if (device_id >= 0x0374 && device_id <= 0x378) {
375 /* mcp55, base address stored in chipset */
376 mcp55 = pci_find_bsf(0, 0, 0);
377 if (mcp55 &&
378 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
379 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
380 word = pci_read_config(mcp55, 0x90, 2);
381 base = ((unsigned long)word & 0x7ffeU) << 25;
382 }
383 }
384 if (!base)
385 return;
386
387 /* XXXX
388 Test below is commented because it is believed that doing
389 config read/write beyond 0xff will access the config space
390 for the next larger function. Uncomment this and remove
391 the hacky pmap_mapdev() way of accessing config space when
392 FreeBSD grows support for extended pcie config space access
393 */
394 #if 0
395 /* See if we can, by some miracle, access the extended
396 config space */
397 val = pci_read_config(pdev, 0x178, 4);
398 if (val != 0xffffffff) {
399 val |= 0x40;
400 pci_write_config(pdev, 0x178, val, 4);
401 return;
402 }
403 #endif
404 /* Rather than using normal pci config space writes, we must
405 * map the Nvidia config space ourselves. This is because on
406 * opteron/nvidia class machine the 0xe000000 mapping is
407 * handled by the nvidia chipset, that means the internal PCI
408 * device (the on-chip northbridge), or the amd-8131 bridge
409 * and things behind them are not visible by this method.
410 */
411
412 BUS_READ_IVAR(device_get_parent(pdev), pdev,
413 PCI_IVAR_BUS, &bus);
414 BUS_READ_IVAR(device_get_parent(pdev), pdev,
415 PCI_IVAR_SLOT, &slot);
416 BUS_READ_IVAR(device_get_parent(pdev), pdev,
417 PCI_IVAR_FUNCTION, &func);
418 BUS_READ_IVAR(device_get_parent(pdev), pdev,
419 PCI_IVAR_VENDOR, &ivend);
420 BUS_READ_IVAR(device_get_parent(pdev), pdev,
421 PCI_IVAR_DEVICE, &idev);
422
423 off = base
424 + 0x00100000UL * (unsigned long)bus
425 + 0x00001000UL * (unsigned long)(func
426 + 8 * slot);
427
428 /* map it into the kernel */
429 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
430
431
432 if (va == NULL) {
433 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
434 return;
435 }
436 /* get a pointer to the config space mapped into the kernel */
437 cfgptr = va + (off & PAGE_MASK);
438
439 /* make sure that we can really access it */
440 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
441 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
442 if (! (vendor_id == ivend && device_id == idev)) {
443 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
444 vendor_id, device_id);
445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
446 return;
447 }
448
449 ptr32 = (uint32_t*)(cfgptr + 0x178);
450 val = *ptr32;
451
452 if (val == 0xffffffff) {
453 device_printf(sc->dev, "extended mapping failed\n");
454 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
455 return;
456 }
457 *ptr32 = val | 0x40;
458 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
459 if (mxge_verbose)
460 device_printf(sc->dev,
461 "Enabled ECRC on upstream Nvidia bridge "
462 "at %d:%d:%d\n",
463 (int)bus, (int)slot, (int)func);
464 return;
465 }
466 #else
467 static void
mxge_enable_nvidia_ecrc(mxge_softc_t * sc)468 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
469 {
470 device_printf(sc->dev,
471 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
472 return;
473 }
474 #endif
475
476
477 static int
mxge_dma_test(mxge_softc_t * sc,int test_type)478 mxge_dma_test(mxge_softc_t *sc, int test_type)
479 {
480 mxge_cmd_t cmd;
481 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
482 int status;
483 uint32_t len;
484 char *test = " ";
485
486
487 /* Run a small DMA test.
488 * The magic multipliers to the length tell the firmware
489 * to do DMA read, write, or read+write tests. The
490 * results are returned in cmd.data0. The upper 16
491 * bits of the return is the number of transfers completed.
492 * The lower 16 bits is the time in 0.5us ticks that the
493 * transfers took to complete.
494 */
495
496 len = sc->tx_boundary;
497
498 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
499 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
500 cmd.data2 = len * 0x10000;
501 status = mxge_send_cmd(sc, test_type, &cmd);
502 if (status != 0) {
503 test = "read";
504 goto abort;
505 }
506 sc->read_dma = ((cmd.data0>>16) * len * 2) /
507 (cmd.data0 & 0xffff);
508 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
509 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
510 cmd.data2 = len * 0x1;
511 status = mxge_send_cmd(sc, test_type, &cmd);
512 if (status != 0) {
513 test = "write";
514 goto abort;
515 }
516 sc->write_dma = ((cmd.data0>>16) * len * 2) /
517 (cmd.data0 & 0xffff);
518
519 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
520 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
521 cmd.data2 = len * 0x10001;
522 status = mxge_send_cmd(sc, test_type, &cmd);
523 if (status != 0) {
524 test = "read/write";
525 goto abort;
526 }
527 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
528 (cmd.data0 & 0xffff);
529
530 abort:
531 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
532 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
533 test, status);
534
535 return status;
536 }
537
538 /*
539 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
540 * when the PCI-E Completion packets are aligned on an 8-byte
541 * boundary. Some PCI-E chip sets always align Completion packets; on
542 * the ones that do not, the alignment can be enforced by enabling
543 * ECRC generation (if supported).
544 *
545 * When PCI-E Completion packets are not aligned, it is actually more
546 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
547 *
548 * If the driver can neither enable ECRC nor verify that it has
549 * already been enabled, then it must use a firmware image which works
550 * around unaligned completion packets (ethp_z8e.dat), and it should
551 * also ensure that it never gives the device a Read-DMA which is
552 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
553 * enabled, then the driver should use the aligned (eth_z8e.dat)
554 * firmware image, and set tx_boundary to 4KB.
555 */
556
557 static int
mxge_firmware_probe(mxge_softc_t * sc)558 mxge_firmware_probe(mxge_softc_t *sc)
559 {
560 device_t dev = sc->dev;
561 int reg, status;
562 uint16_t pectl;
563
564 sc->tx_boundary = 4096;
565 /*
566 * Verify the max read request size was set to 4KB
567 * before trying the test with 4KB.
568 */
569 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
570 pectl = pci_read_config(dev, reg + 0x8, 2);
571 if ((pectl & (5 << 12)) != (5 << 12)) {
572 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
573 pectl);
574 sc->tx_boundary = 2048;
575 }
576 }
577
578 /*
579 * load the optimized firmware (which assumes aligned PCIe
580 * completions) in order to see if it works on this host.
581 */
582 sc->fw_name = mxge_fw_aligned;
583 status = mxge_load_firmware(sc, 1);
584 if (status != 0) {
585 return status;
586 }
587
588 /*
589 * Enable ECRC if possible
590 */
591 mxge_enable_nvidia_ecrc(sc);
592
593 /*
594 * Run a DMA test which watches for unaligned completions and
595 * aborts on the first one seen. Not required on Z8ES or newer.
596 */
597 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
598 return 0;
599 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
600 if (status == 0)
601 return 0; /* keep the aligned firmware */
602
603 if (status != E2BIG)
604 device_printf(dev, "DMA test failed: %d\n", status);
605 if (status == ENOSYS)
606 device_printf(dev, "Falling back to ethp! "
607 "Please install up to date fw\n");
608 return status;
609 }
610
611 static int
mxge_select_firmware(mxge_softc_t * sc)612 mxge_select_firmware(mxge_softc_t *sc)
613 {
614 int aligned = 0;
615 int force_firmware = mxge_force_firmware;
616
617 if (sc->throttle)
618 force_firmware = sc->throttle;
619
620 if (force_firmware != 0) {
621 if (force_firmware == 1)
622 aligned = 1;
623 else
624 aligned = 0;
625 if (mxge_verbose)
626 device_printf(sc->dev,
627 "Assuming %s completions (forced)\n",
628 aligned ? "aligned" : "unaligned");
629 goto abort;
630 }
631
632 /* if the PCIe link width is 4 or less, we can use the aligned
633 firmware and skip any checks */
634 if (sc->link_width != 0 && sc->link_width <= 4) {
635 device_printf(sc->dev,
636 "PCIe x%d Link, expect reduced performance\n",
637 sc->link_width);
638 aligned = 1;
639 goto abort;
640 }
641
642 if (0 == mxge_firmware_probe(sc))
643 return 0;
644
645 abort:
646 if (aligned) {
647 sc->fw_name = mxge_fw_aligned;
648 sc->tx_boundary = 4096;
649 } else {
650 sc->fw_name = mxge_fw_unaligned;
651 sc->tx_boundary = 2048;
652 }
653 return (mxge_load_firmware(sc, 0));
654 }
655
656 static int
mxge_validate_firmware(mxge_softc_t * sc,const mcp_gen_header_t * hdr)657 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
658 {
659
660
661 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
662 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
663 be32toh(hdr->mcp_type));
664 return EIO;
665 }
666
667 /* save firmware version for sysctl */
668 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
669 if (mxge_verbose)
670 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
671
672 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
673 &sc->fw_ver_minor, &sc->fw_ver_tiny);
674
675 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
676 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
677 device_printf(sc->dev, "Found firmware version %s\n",
678 sc->fw_version);
679 device_printf(sc->dev, "Driver needs %d.%d\n",
680 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
681 return EINVAL;
682 }
683 return 0;
684
685 }
686
687 static int
mxge_load_firmware_helper(mxge_softc_t * sc,uint32_t * limit)688 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
689 {
690 z_stream zs;
691 char *inflate_buffer;
692 const struct firmware *fw;
693 const mcp_gen_header_t *hdr;
694 unsigned hdr_offset;
695 int status;
696 unsigned int i;
697 char dummy;
698 size_t fw_len;
699
700 fw = firmware_get(sc->fw_name);
701 if (fw == NULL) {
702 device_printf(sc->dev, "Could not find firmware image %s\n",
703 sc->fw_name);
704 return ENOENT;
705 }
706
707
708
709 /* setup zlib and decompress f/w */
710 bzero(&zs, sizeof (zs));
711 zs.zalloc = zcalloc_nowait;
712 zs.zfree = zcfree;
713 status = inflateInit(&zs);
714 if (status != Z_OK) {
715 status = EIO;
716 goto abort_with_fw;
717 }
718
719 /* the uncompressed size is stored as the firmware version,
720 which would otherwise go unused */
721 fw_len = (size_t) fw->version;
722 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
723 if (inflate_buffer == NULL)
724 goto abort_with_zs;
725 zs.avail_in = fw->datasize;
726 zs.next_in = __DECONST(char *, fw->data);
727 zs.avail_out = fw_len;
728 zs.next_out = inflate_buffer;
729 status = inflate(&zs, Z_FINISH);
730 if (status != Z_STREAM_END) {
731 device_printf(sc->dev, "zlib %d\n", status);
732 status = EIO;
733 goto abort_with_buffer;
734 }
735
736 /* check id */
737 hdr_offset = htobe32(*(const uint32_t *)
738 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
739 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
740 device_printf(sc->dev, "Bad firmware file");
741 status = EIO;
742 goto abort_with_buffer;
743 }
744 hdr = (const void*)(inflate_buffer + hdr_offset);
745
746 status = mxge_validate_firmware(sc, hdr);
747 if (status != 0)
748 goto abort_with_buffer;
749
750 /* Copy the inflated firmware to NIC SRAM. */
751 for (i = 0; i < fw_len; i += 256) {
752 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
753 inflate_buffer + i,
754 min(256U, (unsigned)(fw_len - i)));
755 wmb();
756 dummy = *sc->sram;
757 wmb();
758 }
759
760 *limit = fw_len;
761 status = 0;
762 abort_with_buffer:
763 free(inflate_buffer, M_TEMP);
764 abort_with_zs:
765 inflateEnd(&zs);
766 abort_with_fw:
767 firmware_put(fw, FIRMWARE_UNLOAD);
768 return status;
769 }
770
771 /*
772 * Enable or disable periodic RDMAs from the host to make certain
773 * chipsets resend dropped PCIe messages
774 */
775
776 static void
mxge_dummy_rdma(mxge_softc_t * sc,int enable)777 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
778 {
779 char buf_bytes[72];
780 volatile uint32_t *confirm;
781 volatile char *submit;
782 uint32_t *buf, dma_low, dma_high;
783 int i;
784
785 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
786
787 /* clear confirmation addr */
788 confirm = (volatile uint32_t *)sc->cmd;
789 *confirm = 0;
790 wmb();
791
792 /* send an rdma command to the PCIe engine, and wait for the
793 response in the confirmation address. The firmware should
794 write a -1 there to indicate it is alive and well
795 */
796
797 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
798 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
799 buf[0] = htobe32(dma_high); /* confirm addr MSW */
800 buf[1] = htobe32(dma_low); /* confirm addr LSW */
801 buf[2] = htobe32(0xffffffff); /* confirm data */
802 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
803 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
804 buf[3] = htobe32(dma_high); /* dummy addr MSW */
805 buf[4] = htobe32(dma_low); /* dummy addr LSW */
806 buf[5] = htobe32(enable); /* enable? */
807
808
809 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
810
811 mxge_pio_copy(submit, buf, 64);
812 wmb();
813 DELAY(1000);
814 wmb();
815 i = 0;
816 while (*confirm != 0xffffffff && i < 20) {
817 DELAY(1000);
818 i++;
819 }
820 if (*confirm != 0xffffffff) {
821 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
822 (enable ? "enable" : "disable"), confirm,
823 *confirm);
824 }
825 return;
826 }
827
828 static int
mxge_send_cmd(mxge_softc_t * sc,uint32_t cmd,mxge_cmd_t * data)829 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
830 {
831 mcp_cmd_t *buf;
832 char buf_bytes[sizeof(*buf) + 8];
833 volatile mcp_cmd_response_t *response = sc->cmd;
834 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
835 uint32_t dma_low, dma_high;
836 int err, sleep_total = 0;
837
838 /* ensure buf is aligned to 8 bytes */
839 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
840
841 buf->data0 = htobe32(data->data0);
842 buf->data1 = htobe32(data->data1);
843 buf->data2 = htobe32(data->data2);
844 buf->cmd = htobe32(cmd);
845 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
846 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
847
848 buf->response_addr.low = htobe32(dma_low);
849 buf->response_addr.high = htobe32(dma_high);
850 mtx_lock(&sc->cmd_mtx);
851 response->result = 0xffffffff;
852 wmb();
853 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
854
855 /* wait up to 20ms */
856 err = EAGAIN;
857 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
858 bus_dmamap_sync(sc->cmd_dma.dmat,
859 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
860 wmb();
861 switch (be32toh(response->result)) {
862 case 0:
863 data->data0 = be32toh(response->data);
864 err = 0;
865 break;
866 case 0xffffffff:
867 DELAY(1000);
868 break;
869 case MXGEFW_CMD_UNKNOWN:
870 err = ENOSYS;
871 break;
872 case MXGEFW_CMD_ERROR_UNALIGNED:
873 err = E2BIG;
874 break;
875 case MXGEFW_CMD_ERROR_BUSY:
876 err = EBUSY;
877 break;
878 case MXGEFW_CMD_ERROR_I2C_ABSENT:
879 err = ENXIO;
880 break;
881 default:
882 device_printf(sc->dev,
883 "mxge: command %d "
884 "failed, result = %d\n",
885 cmd, be32toh(response->result));
886 err = ENXIO;
887 break;
888 }
889 if (err != EAGAIN)
890 break;
891 }
892 if (err == EAGAIN)
893 device_printf(sc->dev, "mxge: command %d timed out"
894 "result = %d\n",
895 cmd, be32toh(response->result));
896 mtx_unlock(&sc->cmd_mtx);
897 return err;
898 }
899
900 static int
mxge_adopt_running_firmware(mxge_softc_t * sc)901 mxge_adopt_running_firmware(mxge_softc_t *sc)
902 {
903 struct mcp_gen_header *hdr;
904 const size_t bytes = sizeof (struct mcp_gen_header);
905 size_t hdr_offset;
906 int status;
907
908 /* find running firmware header */
909 hdr_offset = htobe32(*(volatile uint32_t *)
910 (sc->sram + MCP_HEADER_PTR_OFFSET));
911
912 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
913 device_printf(sc->dev,
914 "Running firmware has bad header offset (%d)\n",
915 (int)hdr_offset);
916 return EIO;
917 }
918
919 /* copy header of running firmware from SRAM to host memory to
920 * validate firmware */
921 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
922 if (hdr == NULL) {
923 device_printf(sc->dev, "could not malloc firmware hdr\n");
924 return ENOMEM;
925 }
926 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
927 rman_get_bushandle(sc->mem_res),
928 hdr_offset, (char *)hdr, bytes);
929 status = mxge_validate_firmware(sc, hdr);
930 free(hdr, M_DEVBUF);
931
932 /*
933 * check to see if adopted firmware has bug where adopting
934 * it will cause broadcasts to be filtered unless the NIC
935 * is kept in ALLMULTI mode
936 */
937 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
938 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
939 sc->adopted_rx_filter_bug = 1;
940 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
941 "working around rx filter bug\n",
942 sc->fw_ver_major, sc->fw_ver_minor,
943 sc->fw_ver_tiny);
944 }
945
946 return status;
947 }
948
949
950 static int
mxge_load_firmware(mxge_softc_t * sc,int adopt)951 mxge_load_firmware(mxge_softc_t *sc, int adopt)
952 {
953 volatile uint32_t *confirm;
954 volatile char *submit;
955 char buf_bytes[72];
956 uint32_t *buf, size, dma_low, dma_high;
957 int status, i;
958
959 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
960
961 size = sc->sram_size;
962 status = mxge_load_firmware_helper(sc, &size);
963 if (status) {
964 if (!adopt)
965 return status;
966 /* Try to use the currently running firmware, if
967 it is new enough */
968 status = mxge_adopt_running_firmware(sc);
969 if (status) {
970 device_printf(sc->dev,
971 "failed to adopt running firmware\n");
972 return status;
973 }
974 device_printf(sc->dev,
975 "Successfully adopted running firmware\n");
976 if (sc->tx_boundary == 4096) {
977 device_printf(sc->dev,
978 "Using firmware currently running on NIC"
979 ". For optimal\n");
980 device_printf(sc->dev,
981 "performance consider loading optimized "
982 "firmware\n");
983 }
984 sc->fw_name = mxge_fw_unaligned;
985 sc->tx_boundary = 2048;
986 return 0;
987 }
988 /* clear confirmation addr */
989 confirm = (volatile uint32_t *)sc->cmd;
990 *confirm = 0;
991 wmb();
992 /* send a reload command to the bootstrap MCP, and wait for the
993 response in the confirmation address. The firmware should
994 write a -1 there to indicate it is alive and well
995 */
996
997 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
998 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
999
1000 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1001 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1002 buf[2] = htobe32(0xffffffff); /* confirm data */
1003
1004 /* FIX: All newest firmware should un-protect the bottom of
1005 the sram before handoff. However, the very first interfaces
1006 do not. Therefore the handoff copy must skip the first 8 bytes
1007 */
1008 /* where the code starts*/
1009 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1010 buf[4] = htobe32(size - 8); /* length of code */
1011 buf[5] = htobe32(8); /* where to copy to */
1012 buf[6] = htobe32(0); /* where to jump to */
1013
1014 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1015 mxge_pio_copy(submit, buf, 64);
1016 wmb();
1017 DELAY(1000);
1018 wmb();
1019 i = 0;
1020 while (*confirm != 0xffffffff && i < 20) {
1021 DELAY(1000*10);
1022 i++;
1023 bus_dmamap_sync(sc->cmd_dma.dmat,
1024 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1025 }
1026 if (*confirm != 0xffffffff) {
1027 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1028 confirm, *confirm);
1029
1030 return ENXIO;
1031 }
1032 return 0;
1033 }
1034
1035 static int
mxge_update_mac_address(mxge_softc_t * sc)1036 mxge_update_mac_address(mxge_softc_t *sc)
1037 {
1038 mxge_cmd_t cmd;
1039 uint8_t *addr = sc->mac_addr;
1040 int status;
1041
1042
1043 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1044 | (addr[2] << 8) | addr[3]);
1045
1046 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1047
1048 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1049 return status;
1050 }
1051
1052 static int
mxge_change_pause(mxge_softc_t * sc,int pause)1053 mxge_change_pause(mxge_softc_t *sc, int pause)
1054 {
1055 mxge_cmd_t cmd;
1056 int status;
1057
1058 if (pause)
1059 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1060 &cmd);
1061 else
1062 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1063 &cmd);
1064
1065 if (status) {
1066 device_printf(sc->dev, "Failed to set flow control mode\n");
1067 return ENXIO;
1068 }
1069 sc->pause = pause;
1070 return 0;
1071 }
1072
1073 static void
mxge_change_promisc(mxge_softc_t * sc,int promisc)1074 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1075 {
1076 mxge_cmd_t cmd;
1077 int status;
1078
1079 if (mxge_always_promisc)
1080 promisc = 1;
1081
1082 if (promisc)
1083 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1084 &cmd);
1085 else
1086 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1087 &cmd);
1088
1089 if (status) {
1090 device_printf(sc->dev, "Failed to set promisc mode\n");
1091 }
1092 }
1093
1094 static void
mxge_set_multicast_list(mxge_softc_t * sc)1095 mxge_set_multicast_list(mxge_softc_t *sc)
1096 {
1097 mxge_cmd_t cmd;
1098 struct ifmultiaddr *ifma;
1099 struct ifnet *ifp = sc->ifp;
1100 int err;
1101
1102 /* This firmware is known to not support multicast */
1103 if (!sc->fw_multicast_support)
1104 return;
1105
1106 /* Disable multicast filtering while we play with the lists*/
1107 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1108 if (err != 0) {
1109 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1110 " error status: %d\n", err);
1111 return;
1112 }
1113
1114 if (sc->adopted_rx_filter_bug)
1115 return;
1116
1117 if (ifp->if_flags & IFF_ALLMULTI)
1118 /* request to disable multicast filtering, so quit here */
1119 return;
1120
1121 /* Flush all the filters */
1122
1123 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1124 if (err != 0) {
1125 device_printf(sc->dev,
1126 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1127 ", error status: %d\n", err);
1128 return;
1129 }
1130
1131 /* Walk the multicast list, and add each address */
1132
1133 if_maddr_rlock(ifp);
1134 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1135 if (ifma->ifma_addr->sa_family != AF_LINK)
1136 continue;
1137 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1138 &cmd.data0, 4);
1139 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1140 &cmd.data1, 2);
1141 cmd.data0 = htonl(cmd.data0);
1142 cmd.data1 = htonl(cmd.data1);
1143 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1144 if (err != 0) {
1145 device_printf(sc->dev, "Failed "
1146 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1147 "%d\t", err);
1148 /* abort, leaving multicast filtering off */
1149 if_maddr_runlock(ifp);
1150 return;
1151 }
1152 }
1153 if_maddr_runlock(ifp);
1154 /* Enable multicast filtering */
1155 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1156 if (err != 0) {
1157 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1158 ", error status: %d\n", err);
1159 }
1160 }
1161
1162 static int
mxge_max_mtu(mxge_softc_t * sc)1163 mxge_max_mtu(mxge_softc_t *sc)
1164 {
1165 mxge_cmd_t cmd;
1166 int status;
1167
1168 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1169 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1170
1171 /* try to set nbufs to see if it we can
1172 use virtually contiguous jumbos */
1173 cmd.data0 = 0;
1174 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1175 &cmd);
1176 if (status == 0)
1177 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1178
1179 /* otherwise, we're limited to MJUMPAGESIZE */
1180 return MJUMPAGESIZE - MXGEFW_PAD;
1181 }
1182
1183 static int
mxge_reset(mxge_softc_t * sc,int interrupts_setup)1184 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1185 {
1186 struct mxge_slice_state *ss;
1187 mxge_rx_done_t *rx_done;
1188 volatile uint32_t *irq_claim;
1189 mxge_cmd_t cmd;
1190 int slice, status;
1191
1192 /* try to send a reset command to the card to see if it
1193 is alive */
1194 memset(&cmd, 0, sizeof (cmd));
1195 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1196 if (status != 0) {
1197 device_printf(sc->dev, "failed reset\n");
1198 return ENXIO;
1199 }
1200
1201 mxge_dummy_rdma(sc, 1);
1202
1203
1204 /* set the intrq size */
1205 cmd.data0 = sc->rx_ring_size;
1206 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1207
1208 /*
1209 * Even though we already know how many slices are supported
1210 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1211 * has magic side effects, and must be called after a reset.
1212 * It must be called prior to calling any RSS related cmds,
1213 * including assigning an interrupt queue for anything but
1214 * slice 0. It must also be called *after*
1215 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1216 * the firmware to compute offsets.
1217 */
1218
1219 if (sc->num_slices > 1) {
1220 /* ask the maximum number of slices it supports */
1221 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1222 &cmd);
1223 if (status != 0) {
1224 device_printf(sc->dev,
1225 "failed to get number of slices\n");
1226 return status;
1227 }
1228 /*
1229 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1230 * to setting up the interrupt queue DMA
1231 */
1232 cmd.data0 = sc->num_slices;
1233 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1234 #ifdef IFNET_BUF_RING
1235 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1236 #endif
1237 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1238 &cmd);
1239 if (status != 0) {
1240 device_printf(sc->dev,
1241 "failed to set number of slices\n");
1242 return status;
1243 }
1244 }
1245
1246
1247 if (interrupts_setup) {
1248 /* Now exchange information about interrupts */
1249 for (slice = 0; slice < sc->num_slices; slice++) {
1250 rx_done = &sc->ss[slice].rx_done;
1251 memset(rx_done->entry, 0, sc->rx_ring_size);
1252 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1253 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1254 cmd.data2 = slice;
1255 status |= mxge_send_cmd(sc,
1256 MXGEFW_CMD_SET_INTRQ_DMA,
1257 &cmd);
1258 }
1259 }
1260
1261 status |= mxge_send_cmd(sc,
1262 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1263
1264
1265 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1266
1267 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1268 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1269
1270
1271 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1272 &cmd);
1273 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1274 if (status != 0) {
1275 device_printf(sc->dev, "failed set interrupt parameters\n");
1276 return status;
1277 }
1278
1279
1280 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1281
1282
1283 /* run a DMA benchmark */
1284 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1285
1286 for (slice = 0; slice < sc->num_slices; slice++) {
1287 ss = &sc->ss[slice];
1288
1289 ss->irq_claim = irq_claim + (2 * slice);
1290 /* reset mcp/driver shared state back to 0 */
1291 ss->rx_done.idx = 0;
1292 ss->rx_done.cnt = 0;
1293 ss->tx.req = 0;
1294 ss->tx.done = 0;
1295 ss->tx.pkt_done = 0;
1296 ss->tx.queue_active = 0;
1297 ss->tx.activate = 0;
1298 ss->tx.deactivate = 0;
1299 ss->tx.wake = 0;
1300 ss->tx.defrag = 0;
1301 ss->tx.stall = 0;
1302 ss->rx_big.cnt = 0;
1303 ss->rx_small.cnt = 0;
1304 ss->lc.lro_bad_csum = 0;
1305 ss->lc.lro_queued = 0;
1306 ss->lc.lro_flushed = 0;
1307 if (ss->fw_stats != NULL) {
1308 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1309 }
1310 }
1311 sc->rdma_tags_available = 15;
1312 status = mxge_update_mac_address(sc);
1313 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1314 mxge_change_pause(sc, sc->pause);
1315 mxge_set_multicast_list(sc);
1316 if (sc->throttle) {
1317 cmd.data0 = sc->throttle;
1318 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1319 &cmd)) {
1320 device_printf(sc->dev,
1321 "can't enable throttle\n");
1322 }
1323 }
1324 return status;
1325 }
1326
1327 static int
mxge_change_throttle(SYSCTL_HANDLER_ARGS)1328 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1329 {
1330 mxge_cmd_t cmd;
1331 mxge_softc_t *sc;
1332 int err;
1333 unsigned int throttle;
1334
1335 sc = arg1;
1336 throttle = sc->throttle;
1337 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1338 if (err != 0) {
1339 return err;
1340 }
1341
1342 if (throttle == sc->throttle)
1343 return 0;
1344
1345 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1346 return EINVAL;
1347
1348 mtx_lock(&sc->driver_mtx);
1349 cmd.data0 = throttle;
1350 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1351 if (err == 0)
1352 sc->throttle = throttle;
1353 mtx_unlock(&sc->driver_mtx);
1354 return err;
1355 }
1356
1357 static int
mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)1358 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1359 {
1360 mxge_softc_t *sc;
1361 unsigned int intr_coal_delay;
1362 int err;
1363
1364 sc = arg1;
1365 intr_coal_delay = sc->intr_coal_delay;
1366 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1367 if (err != 0) {
1368 return err;
1369 }
1370 if (intr_coal_delay == sc->intr_coal_delay)
1371 return 0;
1372
1373 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1374 return EINVAL;
1375
1376 mtx_lock(&sc->driver_mtx);
1377 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1378 sc->intr_coal_delay = intr_coal_delay;
1379
1380 mtx_unlock(&sc->driver_mtx);
1381 return err;
1382 }
1383
1384 static int
mxge_change_flow_control(SYSCTL_HANDLER_ARGS)1385 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1386 {
1387 mxge_softc_t *sc;
1388 unsigned int enabled;
1389 int err;
1390
1391 sc = arg1;
1392 enabled = sc->pause;
1393 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1394 if (err != 0) {
1395 return err;
1396 }
1397 if (enabled == sc->pause)
1398 return 0;
1399
1400 mtx_lock(&sc->driver_mtx);
1401 err = mxge_change_pause(sc, enabled);
1402 mtx_unlock(&sc->driver_mtx);
1403 return err;
1404 }
1405
1406 static int
mxge_handle_be32(SYSCTL_HANDLER_ARGS)1407 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1408 {
1409 int err;
1410
1411 if (arg1 == NULL)
1412 return EFAULT;
1413 arg2 = be32toh(*(int *)arg1);
1414 arg1 = NULL;
1415 err = sysctl_handle_int(oidp, arg1, arg2, req);
1416
1417 return err;
1418 }
1419
1420 static void
mxge_rem_sysctls(mxge_softc_t * sc)1421 mxge_rem_sysctls(mxge_softc_t *sc)
1422 {
1423 struct mxge_slice_state *ss;
1424 int slice;
1425
1426 if (sc->slice_sysctl_tree == NULL)
1427 return;
1428
1429 for (slice = 0; slice < sc->num_slices; slice++) {
1430 ss = &sc->ss[slice];
1431 if (ss == NULL || ss->sysctl_tree == NULL)
1432 continue;
1433 sysctl_ctx_free(&ss->sysctl_ctx);
1434 ss->sysctl_tree = NULL;
1435 }
1436 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1437 sc->slice_sysctl_tree = NULL;
1438 }
1439
1440 static void
mxge_add_sysctls(mxge_softc_t * sc)1441 mxge_add_sysctls(mxge_softc_t *sc)
1442 {
1443 struct sysctl_ctx_list *ctx;
1444 struct sysctl_oid_list *children;
1445 mcp_irq_data_t *fw;
1446 struct mxge_slice_state *ss;
1447 int slice;
1448 char slice_num[8];
1449
1450 ctx = device_get_sysctl_ctx(sc->dev);
1451 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1452 fw = sc->ss[0].fw_stats;
1453
1454 /* random information */
1455 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1456 "firmware_version",
1457 CTLFLAG_RD, sc->fw_version,
1458 0, "firmware version");
1459 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1460 "serial_number",
1461 CTLFLAG_RD, sc->serial_number_string,
1462 0, "serial number");
1463 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1464 "product_code",
1465 CTLFLAG_RD, sc->product_code_string,
1466 0, "product_code");
1467 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1468 "pcie_link_width",
1469 CTLFLAG_RD, &sc->link_width,
1470 0, "tx_boundary");
1471 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1472 "tx_boundary",
1473 CTLFLAG_RD, &sc->tx_boundary,
1474 0, "tx_boundary");
1475 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1476 "write_combine",
1477 CTLFLAG_RD, &sc->wc,
1478 0, "write combining PIO?");
1479 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1480 "read_dma_MBs",
1481 CTLFLAG_RD, &sc->read_dma,
1482 0, "DMA Read speed in MB/s");
1483 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1484 "write_dma_MBs",
1485 CTLFLAG_RD, &sc->write_dma,
1486 0, "DMA Write speed in MB/s");
1487 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1488 "read_write_dma_MBs",
1489 CTLFLAG_RD, &sc->read_write_dma,
1490 0, "DMA concurrent Read/Write speed in MB/s");
1491 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1492 "watchdog_resets",
1493 CTLFLAG_RD, &sc->watchdog_resets,
1494 0, "Number of times NIC was reset");
1495
1496
1497 /* performance related tunables */
1498 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1499 "intr_coal_delay",
1500 CTLTYPE_INT|CTLFLAG_RW, sc,
1501 0, mxge_change_intr_coal,
1502 "I", "interrupt coalescing delay in usecs");
1503
1504 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1505 "throttle",
1506 CTLTYPE_INT|CTLFLAG_RW, sc,
1507 0, mxge_change_throttle,
1508 "I", "transmit throttling");
1509
1510 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1511 "flow_control_enabled",
1512 CTLTYPE_INT|CTLFLAG_RW, sc,
1513 0, mxge_change_flow_control,
1514 "I", "interrupt coalescing delay in usecs");
1515
1516 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1517 "deassert_wait",
1518 CTLFLAG_RW, &mxge_deassert_wait,
1519 0, "Wait for IRQ line to go low in ihandler");
1520
1521 /* stats block from firmware is in network byte order.
1522 Need to swap it */
1523 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1524 "link_up",
1525 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1526 0, mxge_handle_be32,
1527 "I", "link up");
1528 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1529 "rdma_tags_available",
1530 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1531 0, mxge_handle_be32,
1532 "I", "rdma_tags_available");
1533 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1534 "dropped_bad_crc32",
1535 CTLTYPE_INT|CTLFLAG_RD,
1536 &fw->dropped_bad_crc32,
1537 0, mxge_handle_be32,
1538 "I", "dropped_bad_crc32");
1539 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1540 "dropped_bad_phy",
1541 CTLTYPE_INT|CTLFLAG_RD,
1542 &fw->dropped_bad_phy,
1543 0, mxge_handle_be32,
1544 "I", "dropped_bad_phy");
1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1546 "dropped_link_error_or_filtered",
1547 CTLTYPE_INT|CTLFLAG_RD,
1548 &fw->dropped_link_error_or_filtered,
1549 0, mxge_handle_be32,
1550 "I", "dropped_link_error_or_filtered");
1551 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1552 "dropped_link_overflow",
1553 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1554 0, mxge_handle_be32,
1555 "I", "dropped_link_overflow");
1556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1557 "dropped_multicast_filtered",
1558 CTLTYPE_INT|CTLFLAG_RD,
1559 &fw->dropped_multicast_filtered,
1560 0, mxge_handle_be32,
1561 "I", "dropped_multicast_filtered");
1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1563 "dropped_no_big_buffer",
1564 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1565 0, mxge_handle_be32,
1566 "I", "dropped_no_big_buffer");
1567 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1568 "dropped_no_small_buffer",
1569 CTLTYPE_INT|CTLFLAG_RD,
1570 &fw->dropped_no_small_buffer,
1571 0, mxge_handle_be32,
1572 "I", "dropped_no_small_buffer");
1573 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1574 "dropped_overrun",
1575 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1576 0, mxge_handle_be32,
1577 "I", "dropped_overrun");
1578 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1579 "dropped_pause",
1580 CTLTYPE_INT|CTLFLAG_RD,
1581 &fw->dropped_pause,
1582 0, mxge_handle_be32,
1583 "I", "dropped_pause");
1584 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1585 "dropped_runt",
1586 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1587 0, mxge_handle_be32,
1588 "I", "dropped_runt");
1589
1590 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1591 "dropped_unicast_filtered",
1592 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1593 0, mxge_handle_be32,
1594 "I", "dropped_unicast_filtered");
1595
1596 /* verbose printing? */
1597 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1598 "verbose",
1599 CTLFLAG_RW, &mxge_verbose,
1600 0, "verbose printing");
1601
1602 /* add counters exported for debugging from all slices */
1603 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1604 sc->slice_sysctl_tree =
1605 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1606 "slice", CTLFLAG_RD, 0, "");
1607
1608 for (slice = 0; slice < sc->num_slices; slice++) {
1609 ss = &sc->ss[slice];
1610 sysctl_ctx_init(&ss->sysctl_ctx);
1611 ctx = &ss->sysctl_ctx;
1612 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1613 sprintf(slice_num, "%d", slice);
1614 ss->sysctl_tree =
1615 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1616 CTLFLAG_RD, 0, "");
1617 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1618 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1619 "rx_small_cnt",
1620 CTLFLAG_RD, &ss->rx_small.cnt,
1621 0, "rx_small_cnt");
1622 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1623 "rx_big_cnt",
1624 CTLFLAG_RD, &ss->rx_big.cnt,
1625 0, "rx_small_cnt");
1626 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1627 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1628 0, "number of lro merge queues flushed");
1629
1630 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1631 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1632 0, "number of bad csums preventing LRO");
1633
1634 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1635 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1636 0, "number of frames appended to lro merge"
1637 "queues");
1638
1639 #ifndef IFNET_BUF_RING
1640 /* only transmit from slice 0 for now */
1641 if (slice > 0)
1642 continue;
1643 #endif
1644 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1645 "tx_req",
1646 CTLFLAG_RD, &ss->tx.req,
1647 0, "tx_req");
1648
1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1650 "tx_done",
1651 CTLFLAG_RD, &ss->tx.done,
1652 0, "tx_done");
1653 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1654 "tx_pkt_done",
1655 CTLFLAG_RD, &ss->tx.pkt_done,
1656 0, "tx_done");
1657 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1658 "tx_stall",
1659 CTLFLAG_RD, &ss->tx.stall,
1660 0, "tx_stall");
1661 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1662 "tx_wake",
1663 CTLFLAG_RD, &ss->tx.wake,
1664 0, "tx_wake");
1665 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1666 "tx_defrag",
1667 CTLFLAG_RD, &ss->tx.defrag,
1668 0, "tx_defrag");
1669 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1670 "tx_queue_active",
1671 CTLFLAG_RD, &ss->tx.queue_active,
1672 0, "tx_queue_active");
1673 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1674 "tx_activate",
1675 CTLFLAG_RD, &ss->tx.activate,
1676 0, "tx_activate");
1677 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1678 "tx_deactivate",
1679 CTLFLAG_RD, &ss->tx.deactivate,
1680 0, "tx_deactivate");
1681 }
1682 }
1683
1684 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1685 backwards one at a time and handle ring wraps */
1686
1687 static inline void
mxge_submit_req_backwards(mxge_tx_ring_t * tx,mcp_kreq_ether_send_t * src,int cnt)1688 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1689 mcp_kreq_ether_send_t *src, int cnt)
1690 {
1691 int idx, starting_slot;
1692 starting_slot = tx->req;
1693 while (cnt > 1) {
1694 cnt--;
1695 idx = (starting_slot + cnt) & tx->mask;
1696 mxge_pio_copy(&tx->lanai[idx],
1697 &src[cnt], sizeof(*src));
1698 wmb();
1699 }
1700 }
1701
1702 /*
1703 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1704 * at most 32 bytes at a time, so as to avoid involving the software
1705 * pio handler in the nic. We re-write the first segment's flags
1706 * to mark them valid only after writing the entire chain
1707 */
1708
1709 static inline void
mxge_submit_req(mxge_tx_ring_t * tx,mcp_kreq_ether_send_t * src,int cnt)1710 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1711 int cnt)
1712 {
1713 int idx, i;
1714 uint32_t *src_ints;
1715 volatile uint32_t *dst_ints;
1716 mcp_kreq_ether_send_t *srcp;
1717 volatile mcp_kreq_ether_send_t *dstp, *dst;
1718 uint8_t last_flags;
1719
1720 idx = tx->req & tx->mask;
1721
1722 last_flags = src->flags;
1723 src->flags = 0;
1724 wmb();
1725 dst = dstp = &tx->lanai[idx];
1726 srcp = src;
1727
1728 if ((idx + cnt) < tx->mask) {
1729 for (i = 0; i < (cnt - 1); i += 2) {
1730 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1731 wmb(); /* force write every 32 bytes */
1732 srcp += 2;
1733 dstp += 2;
1734 }
1735 } else {
1736 /* submit all but the first request, and ensure
1737 that it is submitted below */
1738 mxge_submit_req_backwards(tx, src, cnt);
1739 i = 0;
1740 }
1741 if (i < cnt) {
1742 /* submit the first request */
1743 mxge_pio_copy(dstp, srcp, sizeof(*src));
1744 wmb(); /* barrier before setting valid flag */
1745 }
1746
1747 /* re-write the last 32-bits with the valid flags */
1748 src->flags = last_flags;
1749 src_ints = (uint32_t *)src;
1750 src_ints+=3;
1751 dst_ints = (volatile uint32_t *)dst;
1752 dst_ints+=3;
1753 *dst_ints = *src_ints;
1754 tx->req += cnt;
1755 wmb();
1756 }
1757
1758 static int
mxge_parse_tx(struct mxge_slice_state * ss,struct mbuf * m,struct mxge_pkt_info * pi)1759 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1760 struct mxge_pkt_info *pi)
1761 {
1762 struct ether_vlan_header *eh;
1763 uint16_t etype;
1764 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1765 #if IFCAP_TSO6 && defined(INET6)
1766 int nxt;
1767 #endif
1768
1769 eh = mtod(m, struct ether_vlan_header *);
1770 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1771 etype = ntohs(eh->evl_proto);
1772 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1773 } else {
1774 etype = ntohs(eh->evl_encap_proto);
1775 pi->ip_off = ETHER_HDR_LEN;
1776 }
1777
1778 switch (etype) {
1779 case ETHERTYPE_IP:
1780 /*
1781 * ensure ip header is in first mbuf, copy it to a
1782 * scratch buffer if not
1783 */
1784 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1785 pi->ip6 = NULL;
1786 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1787 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1788 ss->scratch);
1789 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1790 }
1791 pi->ip_hlen = pi->ip->ip_hl << 2;
1792 if (!tso)
1793 return 0;
1794
1795 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1796 sizeof(struct tcphdr))) {
1797 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1798 sizeof(struct tcphdr), ss->scratch);
1799 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1800 }
1801 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1802 break;
1803 #if IFCAP_TSO6 && defined(INET6)
1804 case ETHERTYPE_IPV6:
1805 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1806 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1807 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1808 ss->scratch);
1809 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1810 }
1811 nxt = 0;
1812 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1813 pi->ip_hlen -= pi->ip_off;
1814 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1815 return EINVAL;
1816
1817 if (!tso)
1818 return 0;
1819
1820 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1821 return EINVAL;
1822
1823 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1824 sizeof(struct tcphdr))) {
1825 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1826 sizeof(struct tcphdr), ss->scratch);
1827 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1828 }
1829 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1830 break;
1831 #endif
1832 default:
1833 return EINVAL;
1834 }
1835 return 0;
1836 }
1837
1838 #if IFCAP_TSO4
1839
1840 static void
mxge_encap_tso(struct mxge_slice_state * ss,struct mbuf * m,int busdma_seg_cnt,struct mxge_pkt_info * pi)1841 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1842 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1843 {
1844 mxge_tx_ring_t *tx;
1845 mcp_kreq_ether_send_t *req;
1846 bus_dma_segment_t *seg;
1847 uint32_t low, high_swapped;
1848 int len, seglen, cum_len, cum_len_next;
1849 int next_is_first, chop, cnt, rdma_count, small;
1850 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1851 uint8_t flags, flags_next;
1852 static int once;
1853
1854 mss = m->m_pkthdr.tso_segsz;
1855
1856 /* negative cum_len signifies to the
1857 * send loop that we are still in the
1858 * header portion of the TSO packet.
1859 */
1860
1861 cksum_offset = pi->ip_off + pi->ip_hlen;
1862 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1863
1864 /* TSO implies checksum offload on this hardware */
1865 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1866 /*
1867 * If packet has full TCP csum, replace it with pseudo hdr
1868 * sum that the NIC expects, otherwise the NIC will emit
1869 * packets with bad TCP checksums.
1870 */
1871 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1872 if (pi->ip6) {
1873 #if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1874 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1875 sum = in6_cksum_pseudo(pi->ip6,
1876 m->m_pkthdr.len - cksum_offset,
1877 IPPROTO_TCP, 0);
1878 #endif
1879 } else {
1880 #ifdef INET
1881 m->m_pkthdr.csum_flags |= CSUM_TCP;
1882 sum = in_pseudo(pi->ip->ip_src.s_addr,
1883 pi->ip->ip_dst.s_addr,
1884 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1885 cksum_offset)));
1886 #endif
1887 }
1888 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1889 cksum_offset, sizeof(sum), (caddr_t)&sum);
1890 }
1891 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1892
1893
1894 /* for TSO, pseudo_hdr_offset holds mss.
1895 * The firmware figures out where to put
1896 * the checksum by parsing the header. */
1897 pseudo_hdr_offset = htobe16(mss);
1898
1899 if (pi->ip6) {
1900 /*
1901 * for IPv6 TSO, the "checksum offset" is re-purposed
1902 * to store the TCP header len
1903 */
1904 cksum_offset = (pi->tcp->th_off << 2);
1905 }
1906
1907 tx = &ss->tx;
1908 req = tx->req_list;
1909 seg = tx->seg_list;
1910 cnt = 0;
1911 rdma_count = 0;
1912 /* "rdma_count" is the number of RDMAs belonging to the
1913 * current packet BEFORE the current send request. For
1914 * non-TSO packets, this is equal to "count".
1915 * For TSO packets, rdma_count needs to be reset
1916 * to 0 after a segment cut.
1917 *
1918 * The rdma_count field of the send request is
1919 * the number of RDMAs of the packet starting at
1920 * that request. For TSO send requests with one ore more cuts
1921 * in the middle, this is the number of RDMAs starting
1922 * after the last cut in the request. All previous
1923 * segments before the last cut implicitly have 1 RDMA.
1924 *
1925 * Since the number of RDMAs is not known beforehand,
1926 * it must be filled-in retroactively - after each
1927 * segmentation cut or at the end of the entire packet.
1928 */
1929
1930 while (busdma_seg_cnt) {
1931 /* Break the busdma segment up into pieces*/
1932 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1933 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1934 len = seg->ds_len;
1935
1936 while (len) {
1937 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1938 seglen = len;
1939 cum_len_next = cum_len + seglen;
1940 (req-rdma_count)->rdma_count = rdma_count + 1;
1941 if (__predict_true(cum_len >= 0)) {
1942 /* payload */
1943 chop = (cum_len_next > mss);
1944 cum_len_next = cum_len_next % mss;
1945 next_is_first = (cum_len_next == 0);
1946 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1947 flags_next |= next_is_first *
1948 MXGEFW_FLAGS_FIRST;
1949 rdma_count |= -(chop | next_is_first);
1950 rdma_count += chop & !next_is_first;
1951 } else if (cum_len_next >= 0) {
1952 /* header ends */
1953 rdma_count = -1;
1954 cum_len_next = 0;
1955 seglen = -cum_len;
1956 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1957 flags_next = MXGEFW_FLAGS_TSO_PLD |
1958 MXGEFW_FLAGS_FIRST |
1959 (small * MXGEFW_FLAGS_SMALL);
1960 }
1961
1962 req->addr_high = high_swapped;
1963 req->addr_low = htobe32(low);
1964 req->pseudo_hdr_offset = pseudo_hdr_offset;
1965 req->pad = 0;
1966 req->rdma_count = 1;
1967 req->length = htobe16(seglen);
1968 req->cksum_offset = cksum_offset;
1969 req->flags = flags | ((cum_len & 1) *
1970 MXGEFW_FLAGS_ALIGN_ODD);
1971 low += seglen;
1972 len -= seglen;
1973 cum_len = cum_len_next;
1974 flags = flags_next;
1975 req++;
1976 cnt++;
1977 rdma_count++;
1978 if (cksum_offset != 0 && !pi->ip6) {
1979 if (__predict_false(cksum_offset > seglen))
1980 cksum_offset -= seglen;
1981 else
1982 cksum_offset = 0;
1983 }
1984 if (__predict_false(cnt > tx->max_desc))
1985 goto drop;
1986 }
1987 busdma_seg_cnt--;
1988 seg++;
1989 }
1990 (req-rdma_count)->rdma_count = rdma_count;
1991
1992 do {
1993 req--;
1994 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1995 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1996
1997 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1998 mxge_submit_req(tx, tx->req_list, cnt);
1999 #ifdef IFNET_BUF_RING
2000 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2001 /* tell the NIC to start polling this slice */
2002 *tx->send_go = 1;
2003 tx->queue_active = 1;
2004 tx->activate++;
2005 wmb();
2006 }
2007 #endif
2008 return;
2009
2010 drop:
2011 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
2012 m_freem(m);
2013 ss->oerrors++;
2014 if (!once) {
2015 printf("tx->max_desc exceeded via TSO!\n");
2016 printf("mss = %d, %ld, %d!\n", mss,
2017 (long)seg - (long)tx->seg_list, tx->max_desc);
2018 once = 1;
2019 }
2020 return;
2021
2022 }
2023
2024 #endif /* IFCAP_TSO4 */
2025
2026 #ifdef MXGE_NEW_VLAN_API
2027 /*
2028 * We reproduce the software vlan tag insertion from
2029 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2030 * vlan tag insertion. We need to advertise this in order to have the
2031 * vlan interface respect our csum offload flags.
2032 */
2033 static struct mbuf *
mxge_vlan_tag_insert(struct mbuf * m)2034 mxge_vlan_tag_insert(struct mbuf *m)
2035 {
2036 struct ether_vlan_header *evl;
2037
2038 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
2039 if (__predict_false(m == NULL))
2040 return NULL;
2041 if (m->m_len < sizeof(*evl)) {
2042 m = m_pullup(m, sizeof(*evl));
2043 if (__predict_false(m == NULL))
2044 return NULL;
2045 }
2046 /*
2047 * Transform the Ethernet header into an Ethernet header
2048 * with 802.1Q encapsulation.
2049 */
2050 evl = mtod(m, struct ether_vlan_header *);
2051 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2052 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2053 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2054 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2055 m->m_flags &= ~M_VLANTAG;
2056 return m;
2057 }
2058 #endif /* MXGE_NEW_VLAN_API */
2059
2060 static void
mxge_encap(struct mxge_slice_state * ss,struct mbuf * m)2061 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2062 {
2063 struct mxge_pkt_info pi = {0,0,0,0};
2064 mxge_softc_t *sc;
2065 mcp_kreq_ether_send_t *req;
2066 bus_dma_segment_t *seg;
2067 struct mbuf *m_tmp;
2068 struct ifnet *ifp;
2069 mxge_tx_ring_t *tx;
2070 int cnt, cum_len, err, i, idx, odd_flag;
2071 uint16_t pseudo_hdr_offset;
2072 uint8_t flags, cksum_offset;
2073
2074
2075 sc = ss->sc;
2076 ifp = sc->ifp;
2077 tx = &ss->tx;
2078
2079 #ifdef MXGE_NEW_VLAN_API
2080 if (m->m_flags & M_VLANTAG) {
2081 m = mxge_vlan_tag_insert(m);
2082 if (__predict_false(m == NULL))
2083 goto drop_without_m;
2084 }
2085 #endif
2086 if (m->m_pkthdr.csum_flags &
2087 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2088 if (mxge_parse_tx(ss, m, &pi))
2089 goto drop;
2090 }
2091
2092 /* (try to) map the frame for DMA */
2093 idx = tx->req & tx->mask;
2094 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2095 m, tx->seg_list, &cnt,
2096 BUS_DMA_NOWAIT);
2097 if (__predict_false(err == EFBIG)) {
2098 /* Too many segments in the chain. Try
2099 to defrag */
2100 m_tmp = m_defrag(m, M_NOWAIT);
2101 if (m_tmp == NULL) {
2102 goto drop;
2103 }
2104 ss->tx.defrag++;
2105 m = m_tmp;
2106 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2107 tx->info[idx].map,
2108 m, tx->seg_list, &cnt,
2109 BUS_DMA_NOWAIT);
2110 }
2111 if (__predict_false(err != 0)) {
2112 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2113 " packet len = %d\n", err, m->m_pkthdr.len);
2114 goto drop;
2115 }
2116 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2117 BUS_DMASYNC_PREWRITE);
2118 tx->info[idx].m = m;
2119
2120 #if IFCAP_TSO4
2121 /* TSO is different enough, we handle it in another routine */
2122 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2123 mxge_encap_tso(ss, m, cnt, &pi);
2124 return;
2125 }
2126 #endif
2127
2128 req = tx->req_list;
2129 cksum_offset = 0;
2130 pseudo_hdr_offset = 0;
2131 flags = MXGEFW_FLAGS_NO_TSO;
2132
2133 /* checksum offloading? */
2134 if (m->m_pkthdr.csum_flags &
2135 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2136 /* ensure ip header is in first mbuf, copy
2137 it to a scratch buffer if not */
2138 cksum_offset = pi.ip_off + pi.ip_hlen;
2139 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2140 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2141 req->cksum_offset = cksum_offset;
2142 flags |= MXGEFW_FLAGS_CKSUM;
2143 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2144 } else {
2145 odd_flag = 0;
2146 }
2147 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2148 flags |= MXGEFW_FLAGS_SMALL;
2149
2150 /* convert segments into a request list */
2151 cum_len = 0;
2152 seg = tx->seg_list;
2153 req->flags = MXGEFW_FLAGS_FIRST;
2154 for (i = 0; i < cnt; i++) {
2155 req->addr_low =
2156 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2157 req->addr_high =
2158 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2159 req->length = htobe16(seg->ds_len);
2160 req->cksum_offset = cksum_offset;
2161 if (cksum_offset > seg->ds_len)
2162 cksum_offset -= seg->ds_len;
2163 else
2164 cksum_offset = 0;
2165 req->pseudo_hdr_offset = pseudo_hdr_offset;
2166 req->pad = 0; /* complete solid 16-byte block */
2167 req->rdma_count = 1;
2168 req->flags |= flags | ((cum_len & 1) * odd_flag);
2169 cum_len += seg->ds_len;
2170 seg++;
2171 req++;
2172 req->flags = 0;
2173 }
2174 req--;
2175 /* pad runts to 60 bytes */
2176 if (cum_len < 60) {
2177 req++;
2178 req->addr_low =
2179 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2180 req->addr_high =
2181 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2182 req->length = htobe16(60 - cum_len);
2183 req->cksum_offset = 0;
2184 req->pseudo_hdr_offset = pseudo_hdr_offset;
2185 req->pad = 0; /* complete solid 16-byte block */
2186 req->rdma_count = 1;
2187 req->flags |= flags | ((cum_len & 1) * odd_flag);
2188 cnt++;
2189 }
2190
2191 tx->req_list[0].rdma_count = cnt;
2192 #if 0
2193 /* print what the firmware will see */
2194 for (i = 0; i < cnt; i++) {
2195 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2196 "cso:%d, flags:0x%x, rdma:%d\n",
2197 i, (int)ntohl(tx->req_list[i].addr_high),
2198 (int)ntohl(tx->req_list[i].addr_low),
2199 (int)ntohs(tx->req_list[i].length),
2200 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2201 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2202 tx->req_list[i].rdma_count);
2203 }
2204 printf("--------------\n");
2205 #endif
2206 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2207 mxge_submit_req(tx, tx->req_list, cnt);
2208 #ifdef IFNET_BUF_RING
2209 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2210 /* tell the NIC to start polling this slice */
2211 *tx->send_go = 1;
2212 tx->queue_active = 1;
2213 tx->activate++;
2214 wmb();
2215 }
2216 #endif
2217 return;
2218
2219 drop:
2220 m_freem(m);
2221 drop_without_m:
2222 ss->oerrors++;
2223 return;
2224 }
2225
2226 #ifdef IFNET_BUF_RING
2227 static void
mxge_qflush(struct ifnet * ifp)2228 mxge_qflush(struct ifnet *ifp)
2229 {
2230 mxge_softc_t *sc = ifp->if_softc;
2231 mxge_tx_ring_t *tx;
2232 struct mbuf *m;
2233 int slice;
2234
2235 for (slice = 0; slice < sc->num_slices; slice++) {
2236 tx = &sc->ss[slice].tx;
2237 mtx_lock(&tx->mtx);
2238 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2239 m_freem(m);
2240 mtx_unlock(&tx->mtx);
2241 }
2242 if_qflush(ifp);
2243 }
2244
2245 static inline void
mxge_start_locked(struct mxge_slice_state * ss)2246 mxge_start_locked(struct mxge_slice_state *ss)
2247 {
2248 mxge_softc_t *sc;
2249 struct mbuf *m;
2250 struct ifnet *ifp;
2251 mxge_tx_ring_t *tx;
2252
2253 sc = ss->sc;
2254 ifp = sc->ifp;
2255 tx = &ss->tx;
2256
2257 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2258 m = drbr_dequeue(ifp, tx->br);
2259 if (m == NULL) {
2260 return;
2261 }
2262 /* let BPF see it */
2263 BPF_MTAP(ifp, m);
2264
2265 /* give it to the nic */
2266 mxge_encap(ss, m);
2267 }
2268 /* ran out of transmit slots */
2269 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2270 && (!drbr_empty(ifp, tx->br))) {
2271 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2272 tx->stall++;
2273 }
2274 }
2275
2276 static int
mxge_transmit_locked(struct mxge_slice_state * ss,struct mbuf * m)2277 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2278 {
2279 mxge_softc_t *sc;
2280 struct ifnet *ifp;
2281 mxge_tx_ring_t *tx;
2282 int err;
2283
2284 sc = ss->sc;
2285 ifp = sc->ifp;
2286 tx = &ss->tx;
2287
2288 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2289 IFF_DRV_RUNNING) {
2290 err = drbr_enqueue(ifp, tx->br, m);
2291 return (err);
2292 }
2293
2294 if (!drbr_needs_enqueue(ifp, tx->br) &&
2295 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2296 /* let BPF see it */
2297 BPF_MTAP(ifp, m);
2298 /* give it to the nic */
2299 mxge_encap(ss, m);
2300 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2301 return (err);
2302 }
2303 if (!drbr_empty(ifp, tx->br))
2304 mxge_start_locked(ss);
2305 return (0);
2306 }
2307
2308 static int
mxge_transmit(struct ifnet * ifp,struct mbuf * m)2309 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2310 {
2311 mxge_softc_t *sc = ifp->if_softc;
2312 struct mxge_slice_state *ss;
2313 mxge_tx_ring_t *tx;
2314 int err = 0;
2315 int slice;
2316
2317 slice = m->m_pkthdr.flowid;
2318 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2319
2320 ss = &sc->ss[slice];
2321 tx = &ss->tx;
2322
2323 if (mtx_trylock(&tx->mtx)) {
2324 err = mxge_transmit_locked(ss, m);
2325 mtx_unlock(&tx->mtx);
2326 } else {
2327 err = drbr_enqueue(ifp, tx->br, m);
2328 }
2329
2330 return (err);
2331 }
2332
2333 #else
2334
2335 static inline void
mxge_start_locked(struct mxge_slice_state * ss)2336 mxge_start_locked(struct mxge_slice_state *ss)
2337 {
2338 mxge_softc_t *sc;
2339 struct mbuf *m;
2340 struct ifnet *ifp;
2341 mxge_tx_ring_t *tx;
2342
2343 sc = ss->sc;
2344 ifp = sc->ifp;
2345 tx = &ss->tx;
2346 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2347 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2348 if (m == NULL) {
2349 return;
2350 }
2351 /* let BPF see it */
2352 BPF_MTAP(ifp, m);
2353
2354 /* give it to the nic */
2355 mxge_encap(ss, m);
2356 }
2357 /* ran out of transmit slots */
2358 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2359 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2360 tx->stall++;
2361 }
2362 }
2363 #endif
2364 static void
mxge_start(struct ifnet * ifp)2365 mxge_start(struct ifnet *ifp)
2366 {
2367 mxge_softc_t *sc = ifp->if_softc;
2368 struct mxge_slice_state *ss;
2369
2370 /* only use the first slice for now */
2371 ss = &sc->ss[0];
2372 mtx_lock(&ss->tx.mtx);
2373 mxge_start_locked(ss);
2374 mtx_unlock(&ss->tx.mtx);
2375 }
2376
2377 /*
2378 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2379 * at most 32 bytes at a time, so as to avoid involving the software
2380 * pio handler in the nic. We re-write the first segment's low
2381 * DMA address to mark it valid only after we write the entire chunk
2382 * in a burst
2383 */
2384 static inline void
mxge_submit_8rx(volatile mcp_kreq_ether_recv_t * dst,mcp_kreq_ether_recv_t * src)2385 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2386 mcp_kreq_ether_recv_t *src)
2387 {
2388 uint32_t low;
2389
2390 low = src->addr_low;
2391 src->addr_low = 0xffffffff;
2392 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2393 wmb();
2394 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2395 wmb();
2396 src->addr_low = low;
2397 dst->addr_low = low;
2398 wmb();
2399 }
2400
2401 static int
mxge_get_buf_small(struct mxge_slice_state * ss,bus_dmamap_t map,int idx)2402 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2403 {
2404 bus_dma_segment_t seg;
2405 struct mbuf *m;
2406 mxge_rx_ring_t *rx = &ss->rx_small;
2407 int cnt, err;
2408
2409 m = m_gethdr(M_NOWAIT, MT_DATA);
2410 if (m == NULL) {
2411 rx->alloc_fail++;
2412 err = ENOBUFS;
2413 goto done;
2414 }
2415 m->m_len = MHLEN;
2416 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2417 &seg, &cnt, BUS_DMA_NOWAIT);
2418 if (err != 0) {
2419 m_free(m);
2420 goto done;
2421 }
2422 rx->info[idx].m = m;
2423 rx->shadow[idx].addr_low =
2424 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2425 rx->shadow[idx].addr_high =
2426 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2427
2428 done:
2429 if ((idx & 7) == 7)
2430 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2431 return err;
2432 }
2433
2434 static int
mxge_get_buf_big(struct mxge_slice_state * ss,bus_dmamap_t map,int idx)2435 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2436 {
2437 bus_dma_segment_t seg[3];
2438 struct mbuf *m;
2439 mxge_rx_ring_t *rx = &ss->rx_big;
2440 int cnt, err, i;
2441
2442 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2443 if (m == NULL) {
2444 rx->alloc_fail++;
2445 err = ENOBUFS;
2446 goto done;
2447 }
2448 m->m_len = rx->mlen;
2449 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2450 seg, &cnt, BUS_DMA_NOWAIT);
2451 if (err != 0) {
2452 m_free(m);
2453 goto done;
2454 }
2455 rx->info[idx].m = m;
2456 rx->shadow[idx].addr_low =
2457 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2458 rx->shadow[idx].addr_high =
2459 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2460
2461 #if MXGE_VIRT_JUMBOS
2462 for (i = 1; i < cnt; i++) {
2463 rx->shadow[idx + i].addr_low =
2464 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2465 rx->shadow[idx + i].addr_high =
2466 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2467 }
2468 #endif
2469
2470 done:
2471 for (i = 0; i < rx->nbufs; i++) {
2472 if ((idx & 7) == 7) {
2473 mxge_submit_8rx(&rx->lanai[idx - 7],
2474 &rx->shadow[idx - 7]);
2475 }
2476 idx++;
2477 }
2478 return err;
2479 }
2480
2481 #ifdef INET6
2482
2483 static uint16_t
mxge_csum_generic(uint16_t * raw,int len)2484 mxge_csum_generic(uint16_t *raw, int len)
2485 {
2486 uint32_t csum;
2487
2488
2489 csum = 0;
2490 while (len > 0) {
2491 csum += *raw;
2492 raw++;
2493 len -= 2;
2494 }
2495 csum = (csum >> 16) + (csum & 0xffff);
2496 csum = (csum >> 16) + (csum & 0xffff);
2497 return (uint16_t)csum;
2498 }
2499
2500 static inline uint16_t
mxge_rx_csum6(void * p,struct mbuf * m,uint32_t csum)2501 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2502 {
2503 uint32_t partial;
2504 int nxt, cksum_offset;
2505 struct ip6_hdr *ip6 = p;
2506 uint16_t c;
2507
2508 nxt = ip6->ip6_nxt;
2509 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2510 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2511 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2512 IPPROTO_IPV6, &nxt);
2513 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2514 return (1);
2515 }
2516
2517 /*
2518 * IPv6 headers do not contain a checksum, and hence
2519 * do not checksum to zero, so they don't "fall out"
2520 * of the partial checksum calculation like IPv4
2521 * headers do. We need to fix the partial checksum by
2522 * subtracting the checksum of the IPv6 header.
2523 */
2524
2525 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2526 ETHER_HDR_LEN);
2527 csum += ~partial;
2528 csum += (csum < ~partial);
2529 csum = (csum >> 16) + (csum & 0xFFFF);
2530 csum = (csum >> 16) + (csum & 0xFFFF);
2531 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2532 csum);
2533 c ^= 0xffff;
2534 return (c);
2535 }
2536 #endif /* INET6 */
2537 /*
2538 * Myri10GE hardware checksums are not valid if the sender
2539 * padded the frame with non-zero padding. This is because
2540 * the firmware just does a simple 16-bit 1s complement
2541 * checksum across the entire frame, excluding the first 14
2542 * bytes. It is best to simply to check the checksum and
2543 * tell the stack about it only if the checksum is good
2544 */
2545
2546 static inline uint16_t
mxge_rx_csum(struct mbuf * m,int csum)2547 mxge_rx_csum(struct mbuf *m, int csum)
2548 {
2549 struct ether_header *eh;
2550 #ifdef INET
2551 struct ip *ip;
2552 #endif
2553 #if defined(INET) || defined(INET6)
2554 int cap = m->m_pkthdr.rcvif->if_capenable;
2555 #endif
2556 uint16_t c, etype;
2557
2558
2559 eh = mtod(m, struct ether_header *);
2560 etype = ntohs(eh->ether_type);
2561 switch (etype) {
2562 #ifdef INET
2563 case ETHERTYPE_IP:
2564 if ((cap & IFCAP_RXCSUM) == 0)
2565 return (1);
2566 ip = (struct ip *)(eh + 1);
2567 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2568 return (1);
2569 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2570 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2571 (ip->ip_hl << 2) + ip->ip_p));
2572 c ^= 0xffff;
2573 break;
2574 #endif
2575 #ifdef INET6
2576 case ETHERTYPE_IPV6:
2577 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2578 return (1);
2579 c = mxge_rx_csum6((eh + 1), m, csum);
2580 break;
2581 #endif
2582 default:
2583 c = 1;
2584 }
2585 return (c);
2586 }
2587
2588 static void
mxge_vlan_tag_remove(struct mbuf * m,uint32_t * csum)2589 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2590 {
2591 struct ether_vlan_header *evl;
2592 struct ether_header *eh;
2593 uint32_t partial;
2594
2595 evl = mtod(m, struct ether_vlan_header *);
2596 eh = mtod(m, struct ether_header *);
2597
2598 /*
2599 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2600 * after what the firmware thought was the end of the ethernet
2601 * header.
2602 */
2603
2604 /* put checksum into host byte order */
2605 *csum = ntohs(*csum);
2606 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2607 (*csum) += ~partial;
2608 (*csum) += ((*csum) < ~partial);
2609 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2610 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2611
2612 /* restore checksum to network byte order;
2613 later consumers expect this */
2614 *csum = htons(*csum);
2615
2616 /* save the tag */
2617 #ifdef MXGE_NEW_VLAN_API
2618 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2619 #else
2620 {
2621 struct m_tag *mtag;
2622 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2623 M_NOWAIT);
2624 if (mtag == NULL)
2625 return;
2626 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2627 m_tag_prepend(m, mtag);
2628 }
2629
2630 #endif
2631 m->m_flags |= M_VLANTAG;
2632
2633 /*
2634 * Remove the 802.1q header by copying the Ethernet
2635 * addresses over it and adjusting the beginning of
2636 * the data in the mbuf. The encapsulated Ethernet
2637 * type field is already in place.
2638 */
2639 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2640 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2641 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2642 }
2643
2644
2645 static inline void
mxge_rx_done_big(struct mxge_slice_state * ss,uint32_t len,uint32_t csum,int lro)2646 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2647 uint32_t csum, int lro)
2648 {
2649 mxge_softc_t *sc;
2650 struct ifnet *ifp;
2651 struct mbuf *m;
2652 struct ether_header *eh;
2653 mxge_rx_ring_t *rx;
2654 bus_dmamap_t old_map;
2655 int idx;
2656
2657 sc = ss->sc;
2658 ifp = sc->ifp;
2659 rx = &ss->rx_big;
2660 idx = rx->cnt & rx->mask;
2661 rx->cnt += rx->nbufs;
2662 /* save a pointer to the received mbuf */
2663 m = rx->info[idx].m;
2664 /* try to replace the received mbuf */
2665 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2666 /* drop the frame -- the old mbuf is re-cycled */
2667 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2668 return;
2669 }
2670
2671 /* unmap the received buffer */
2672 old_map = rx->info[idx].map;
2673 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2674 bus_dmamap_unload(rx->dmat, old_map);
2675
2676 /* swap the bus_dmamap_t's */
2677 rx->info[idx].map = rx->extra_map;
2678 rx->extra_map = old_map;
2679
2680 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2681 * aligned */
2682 m->m_data += MXGEFW_PAD;
2683
2684 m->m_pkthdr.rcvif = ifp;
2685 m->m_len = m->m_pkthdr.len = len;
2686 ss->ipackets++;
2687 eh = mtod(m, struct ether_header *);
2688 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2689 mxge_vlan_tag_remove(m, &csum);
2690 }
2691 /* flowid only valid if RSS hashing is enabled */
2692 if (sc->num_slices > 1) {
2693 m->m_pkthdr.flowid = (ss - sc->ss);
2694 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2695 }
2696 /* if the checksum is valid, mark it in the mbuf header */
2697 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2698 (0 == mxge_rx_csum(m, csum))) {
2699 /* Tell the stack that the checksum is good */
2700 m->m_pkthdr.csum_data = 0xffff;
2701 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2702 CSUM_DATA_VALID;
2703
2704 #if defined(INET) || defined (INET6)
2705 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2706 return;
2707 #endif
2708 }
2709 /* pass the frame up the stack */
2710 (*ifp->if_input)(ifp, m);
2711 }
2712
2713 static inline void
mxge_rx_done_small(struct mxge_slice_state * ss,uint32_t len,uint32_t csum,int lro)2714 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2715 uint32_t csum, int lro)
2716 {
2717 mxge_softc_t *sc;
2718 struct ifnet *ifp;
2719 struct ether_header *eh;
2720 struct mbuf *m;
2721 mxge_rx_ring_t *rx;
2722 bus_dmamap_t old_map;
2723 int idx;
2724
2725 sc = ss->sc;
2726 ifp = sc->ifp;
2727 rx = &ss->rx_small;
2728 idx = rx->cnt & rx->mask;
2729 rx->cnt++;
2730 /* save a pointer to the received mbuf */
2731 m = rx->info[idx].m;
2732 /* try to replace the received mbuf */
2733 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2734 /* drop the frame -- the old mbuf is re-cycled */
2735 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2736 return;
2737 }
2738
2739 /* unmap the received buffer */
2740 old_map = rx->info[idx].map;
2741 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2742 bus_dmamap_unload(rx->dmat, old_map);
2743
2744 /* swap the bus_dmamap_t's */
2745 rx->info[idx].map = rx->extra_map;
2746 rx->extra_map = old_map;
2747
2748 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2749 * aligned */
2750 m->m_data += MXGEFW_PAD;
2751
2752 m->m_pkthdr.rcvif = ifp;
2753 m->m_len = m->m_pkthdr.len = len;
2754 ss->ipackets++;
2755 eh = mtod(m, struct ether_header *);
2756 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2757 mxge_vlan_tag_remove(m, &csum);
2758 }
2759 /* flowid only valid if RSS hashing is enabled */
2760 if (sc->num_slices > 1) {
2761 m->m_pkthdr.flowid = (ss - sc->ss);
2762 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2763 }
2764 /* if the checksum is valid, mark it in the mbuf header */
2765 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2766 (0 == mxge_rx_csum(m, csum))) {
2767 /* Tell the stack that the checksum is good */
2768 m->m_pkthdr.csum_data = 0xffff;
2769 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2770 CSUM_DATA_VALID;
2771
2772 #if defined(INET) || defined (INET6)
2773 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2774 return;
2775 #endif
2776 }
2777 /* pass the frame up the stack */
2778 (*ifp->if_input)(ifp, m);
2779 }
2780
2781 static inline void
mxge_clean_rx_done(struct mxge_slice_state * ss)2782 mxge_clean_rx_done(struct mxge_slice_state *ss)
2783 {
2784 mxge_rx_done_t *rx_done = &ss->rx_done;
2785 int limit = 0;
2786 uint16_t length;
2787 uint16_t checksum;
2788 int lro;
2789
2790 lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
2791 while (rx_done->entry[rx_done->idx].length != 0) {
2792 length = ntohs(rx_done->entry[rx_done->idx].length);
2793 rx_done->entry[rx_done->idx].length = 0;
2794 checksum = rx_done->entry[rx_done->idx].checksum;
2795 if (length <= (MHLEN - MXGEFW_PAD))
2796 mxge_rx_done_small(ss, length, checksum, lro);
2797 else
2798 mxge_rx_done_big(ss, length, checksum, lro);
2799 rx_done->cnt++;
2800 rx_done->idx = rx_done->cnt & rx_done->mask;
2801
2802 /* limit potential for livelock */
2803 if (__predict_false(++limit > rx_done->mask / 2))
2804 break;
2805 }
2806 #if defined(INET) || defined (INET6)
2807 tcp_lro_flush_all(&ss->lc);
2808 #endif
2809 }
2810
2811
2812 static inline void
mxge_tx_done(struct mxge_slice_state * ss,uint32_t mcp_idx)2813 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2814 {
2815 struct ifnet *ifp;
2816 mxge_tx_ring_t *tx;
2817 struct mbuf *m;
2818 bus_dmamap_t map;
2819 int idx;
2820 int *flags;
2821
2822 tx = &ss->tx;
2823 ifp = ss->sc->ifp;
2824 while (tx->pkt_done != mcp_idx) {
2825 idx = tx->done & tx->mask;
2826 tx->done++;
2827 m = tx->info[idx].m;
2828 /* mbuf and DMA map only attached to the first
2829 segment per-mbuf */
2830 if (m != NULL) {
2831 ss->obytes += m->m_pkthdr.len;
2832 if (m->m_flags & M_MCAST)
2833 ss->omcasts++;
2834 ss->opackets++;
2835 tx->info[idx].m = NULL;
2836 map = tx->info[idx].map;
2837 bus_dmamap_unload(tx->dmat, map);
2838 m_freem(m);
2839 }
2840 if (tx->info[idx].flag) {
2841 tx->info[idx].flag = 0;
2842 tx->pkt_done++;
2843 }
2844 }
2845
2846 /* If we have space, clear IFF_OACTIVE to tell the stack that
2847 its OK to send packets */
2848 #ifdef IFNET_BUF_RING
2849 flags = &ss->if_drv_flags;
2850 #else
2851 flags = &ifp->if_drv_flags;
2852 #endif
2853 mtx_lock(&ss->tx.mtx);
2854 if ((*flags) & IFF_DRV_OACTIVE &&
2855 tx->req - tx->done < (tx->mask + 1)/4) {
2856 *(flags) &= ~IFF_DRV_OACTIVE;
2857 ss->tx.wake++;
2858 mxge_start_locked(ss);
2859 }
2860 #ifdef IFNET_BUF_RING
2861 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2862 /* let the NIC stop polling this queue, since there
2863 * are no more transmits pending */
2864 if (tx->req == tx->done) {
2865 *tx->send_stop = 1;
2866 tx->queue_active = 0;
2867 tx->deactivate++;
2868 wmb();
2869 }
2870 }
2871 #endif
2872 mtx_unlock(&ss->tx.mtx);
2873
2874 }
2875
2876 static struct mxge_media_type mxge_xfp_media_types[] =
2877 {
2878 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2879 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2880 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2881 {0, (1 << 5), "10GBASE-ER"},
2882 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2883 {0, (1 << 3), "10GBASE-SW"},
2884 {0, (1 << 2), "10GBASE-LW"},
2885 {0, (1 << 1), "10GBASE-EW"},
2886 {0, (1 << 0), "Reserved"}
2887 };
2888 static struct mxge_media_type mxge_sfp_media_types[] =
2889 {
2890 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2891 {0, (1 << 7), "Reserved"},
2892 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2893 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2894 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2895 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2896 };
2897
2898 static void
mxge_media_set(mxge_softc_t * sc,int media_type)2899 mxge_media_set(mxge_softc_t *sc, int media_type)
2900 {
2901
2902
2903 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2904 0, NULL);
2905 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2906 sc->current_media = media_type;
2907 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2908 }
2909
2910 static void
mxge_media_init(mxge_softc_t * sc)2911 mxge_media_init(mxge_softc_t *sc)
2912 {
2913 char *ptr;
2914 int i;
2915
2916 ifmedia_removeall(&sc->media);
2917 mxge_media_set(sc, IFM_AUTO);
2918
2919 /*
2920 * parse the product code to deterimine the interface type
2921 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2922 * after the 3rd dash in the driver's cached copy of the
2923 * EEPROM's product code string.
2924 */
2925 ptr = sc->product_code_string;
2926 if (ptr == NULL) {
2927 device_printf(sc->dev, "Missing product code\n");
2928 return;
2929 }
2930
2931 for (i = 0; i < 3; i++, ptr++) {
2932 ptr = strchr(ptr, '-');
2933 if (ptr == NULL) {
2934 device_printf(sc->dev,
2935 "only %d dashes in PC?!?\n", i);
2936 return;
2937 }
2938 }
2939 if (*ptr == 'C' || *(ptr +1) == 'C') {
2940 /* -C is CX4 */
2941 sc->connector = MXGE_CX4;
2942 mxge_media_set(sc, IFM_10G_CX4);
2943 } else if (*ptr == 'Q') {
2944 /* -Q is Quad Ribbon Fiber */
2945 sc->connector = MXGE_QRF;
2946 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2947 /* FreeBSD has no media type for Quad ribbon fiber */
2948 } else if (*ptr == 'R') {
2949 /* -R is XFP */
2950 sc->connector = MXGE_XFP;
2951 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2952 /* -S or -2S is SFP+ */
2953 sc->connector = MXGE_SFP;
2954 } else {
2955 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2956 }
2957 }
2958
2959 /*
2960 * Determine the media type for a NIC. Some XFPs will identify
2961 * themselves only when their link is up, so this is initiated via a
2962 * link up interrupt. However, this can potentially take up to
2963 * several milliseconds, so it is run via the watchdog routine, rather
2964 * than in the interrupt handler itself.
2965 */
2966 static void
mxge_media_probe(mxge_softc_t * sc)2967 mxge_media_probe(mxge_softc_t *sc)
2968 {
2969 mxge_cmd_t cmd;
2970 char *cage_type;
2971
2972 struct mxge_media_type *mxge_media_types = NULL;
2973 int i, err, ms, mxge_media_type_entries;
2974 uint32_t byte;
2975
2976 sc->need_media_probe = 0;
2977
2978 if (sc->connector == MXGE_XFP) {
2979 /* -R is XFP */
2980 mxge_media_types = mxge_xfp_media_types;
2981 mxge_media_type_entries =
2982 nitems(mxge_xfp_media_types);
2983 byte = MXGE_XFP_COMPLIANCE_BYTE;
2984 cage_type = "XFP";
2985 } else if (sc->connector == MXGE_SFP) {
2986 /* -S or -2S is SFP+ */
2987 mxge_media_types = mxge_sfp_media_types;
2988 mxge_media_type_entries =
2989 nitems(mxge_sfp_media_types);
2990 cage_type = "SFP+";
2991 byte = 3;
2992 } else {
2993 /* nothing to do; media type cannot change */
2994 return;
2995 }
2996
2997 /*
2998 * At this point we know the NIC has an XFP cage, so now we
2999 * try to determine what is in the cage by using the
3000 * firmware's XFP I2C commands to read the XFP 10GbE compilance
3001 * register. We read just one byte, which may take over
3002 * a millisecond
3003 */
3004
3005 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
3006 cmd.data1 = byte;
3007 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
3008 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
3009 device_printf(sc->dev, "failed to read XFP\n");
3010 }
3011 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
3012 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
3013 }
3014 if (err != MXGEFW_CMD_OK) {
3015 return;
3016 }
3017
3018 /* now we wait for the data to be cached */
3019 cmd.data0 = byte;
3020 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3021 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
3022 DELAY(1000);
3023 cmd.data0 = byte;
3024 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3025 }
3026 if (err != MXGEFW_CMD_OK) {
3027 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
3028 cage_type, err, ms);
3029 return;
3030 }
3031
3032 if (cmd.data0 == mxge_media_types[0].bitmask) {
3033 if (mxge_verbose)
3034 device_printf(sc->dev, "%s:%s\n", cage_type,
3035 mxge_media_types[0].name);
3036 if (sc->current_media != mxge_media_types[0].flag) {
3037 mxge_media_init(sc);
3038 mxge_media_set(sc, mxge_media_types[0].flag);
3039 }
3040 return;
3041 }
3042 for (i = 1; i < mxge_media_type_entries; i++) {
3043 if (cmd.data0 & mxge_media_types[i].bitmask) {
3044 if (mxge_verbose)
3045 device_printf(sc->dev, "%s:%s\n",
3046 cage_type,
3047 mxge_media_types[i].name);
3048
3049 if (sc->current_media != mxge_media_types[i].flag) {
3050 mxge_media_init(sc);
3051 mxge_media_set(sc, mxge_media_types[i].flag);
3052 }
3053 return;
3054 }
3055 }
3056 if (mxge_verbose)
3057 device_printf(sc->dev, "%s media 0x%x unknown\n",
3058 cage_type, cmd.data0);
3059
3060 return;
3061 }
3062
3063 static void
mxge_intr(void * arg)3064 mxge_intr(void *arg)
3065 {
3066 struct mxge_slice_state *ss = arg;
3067 mxge_softc_t *sc = ss->sc;
3068 mcp_irq_data_t *stats = ss->fw_stats;
3069 mxge_tx_ring_t *tx = &ss->tx;
3070 mxge_rx_done_t *rx_done = &ss->rx_done;
3071 uint32_t send_done_count;
3072 uint8_t valid;
3073
3074
3075 #ifndef IFNET_BUF_RING
3076 /* an interrupt on a non-zero slice is implicitly valid
3077 since MSI-X irqs are not shared */
3078 if (ss != sc->ss) {
3079 mxge_clean_rx_done(ss);
3080 *ss->irq_claim = be32toh(3);
3081 return;
3082 }
3083 #endif
3084
3085 /* make sure the DMA has finished */
3086 if (!stats->valid) {
3087 return;
3088 }
3089 valid = stats->valid;
3090
3091 if (sc->legacy_irq) {
3092 /* lower legacy IRQ */
3093 *sc->irq_deassert = 0;
3094 if (!mxge_deassert_wait)
3095 /* don't wait for conf. that irq is low */
3096 stats->valid = 0;
3097 } else {
3098 stats->valid = 0;
3099 }
3100
3101 /* loop while waiting for legacy irq deassertion */
3102 do {
3103 /* check for transmit completes and receives */
3104 send_done_count = be32toh(stats->send_done_count);
3105 while ((send_done_count != tx->pkt_done) ||
3106 (rx_done->entry[rx_done->idx].length != 0)) {
3107 if (send_done_count != tx->pkt_done)
3108 mxge_tx_done(ss, (int)send_done_count);
3109 mxge_clean_rx_done(ss);
3110 send_done_count = be32toh(stats->send_done_count);
3111 }
3112 if (sc->legacy_irq && mxge_deassert_wait)
3113 wmb();
3114 } while (*((volatile uint8_t *) &stats->valid));
3115
3116 /* fw link & error stats meaningful only on the first slice */
3117 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3118 if (sc->link_state != stats->link_up) {
3119 sc->link_state = stats->link_up;
3120 if (sc->link_state) {
3121 if_link_state_change(sc->ifp, LINK_STATE_UP);
3122 if (mxge_verbose)
3123 device_printf(sc->dev, "link up\n");
3124 } else {
3125 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3126 if (mxge_verbose)
3127 device_printf(sc->dev, "link down\n");
3128 }
3129 sc->need_media_probe = 1;
3130 }
3131 if (sc->rdma_tags_available !=
3132 be32toh(stats->rdma_tags_available)) {
3133 sc->rdma_tags_available =
3134 be32toh(stats->rdma_tags_available);
3135 device_printf(sc->dev, "RDMA timed out! %d tags "
3136 "left\n", sc->rdma_tags_available);
3137 }
3138
3139 if (stats->link_down) {
3140 sc->down_cnt += stats->link_down;
3141 sc->link_state = 0;
3142 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3143 }
3144 }
3145
3146 /* check to see if we have rx token to pass back */
3147 if (valid & 0x1)
3148 *ss->irq_claim = be32toh(3);
3149 *(ss->irq_claim + 1) = be32toh(3);
3150 }
3151
3152 static void
mxge_init(void * arg)3153 mxge_init(void *arg)
3154 {
3155 mxge_softc_t *sc = arg;
3156 struct ifnet *ifp = sc->ifp;
3157
3158
3159 mtx_lock(&sc->driver_mtx);
3160 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3161 (void) mxge_open(sc);
3162 mtx_unlock(&sc->driver_mtx);
3163 }
3164
3165
3166
3167 static void
mxge_free_slice_mbufs(struct mxge_slice_state * ss)3168 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3169 {
3170 int i;
3171
3172 #if defined(INET) || defined(INET6)
3173 tcp_lro_free(&ss->lc);
3174 #endif
3175 for (i = 0; i <= ss->rx_big.mask; i++) {
3176 if (ss->rx_big.info[i].m == NULL)
3177 continue;
3178 bus_dmamap_unload(ss->rx_big.dmat,
3179 ss->rx_big.info[i].map);
3180 m_freem(ss->rx_big.info[i].m);
3181 ss->rx_big.info[i].m = NULL;
3182 }
3183
3184 for (i = 0; i <= ss->rx_small.mask; i++) {
3185 if (ss->rx_small.info[i].m == NULL)
3186 continue;
3187 bus_dmamap_unload(ss->rx_small.dmat,
3188 ss->rx_small.info[i].map);
3189 m_freem(ss->rx_small.info[i].m);
3190 ss->rx_small.info[i].m = NULL;
3191 }
3192
3193 /* transmit ring used only on the first slice */
3194 if (ss->tx.info == NULL)
3195 return;
3196
3197 for (i = 0; i <= ss->tx.mask; i++) {
3198 ss->tx.info[i].flag = 0;
3199 if (ss->tx.info[i].m == NULL)
3200 continue;
3201 bus_dmamap_unload(ss->tx.dmat,
3202 ss->tx.info[i].map);
3203 m_freem(ss->tx.info[i].m);
3204 ss->tx.info[i].m = NULL;
3205 }
3206 }
3207
3208 static void
mxge_free_mbufs(mxge_softc_t * sc)3209 mxge_free_mbufs(mxge_softc_t *sc)
3210 {
3211 int slice;
3212
3213 for (slice = 0; slice < sc->num_slices; slice++)
3214 mxge_free_slice_mbufs(&sc->ss[slice]);
3215 }
3216
3217 static void
mxge_free_slice_rings(struct mxge_slice_state * ss)3218 mxge_free_slice_rings(struct mxge_slice_state *ss)
3219 {
3220 int i;
3221
3222
3223 if (ss->rx_done.entry != NULL)
3224 mxge_dma_free(&ss->rx_done.dma);
3225 ss->rx_done.entry = NULL;
3226
3227 if (ss->tx.req_bytes != NULL)
3228 free(ss->tx.req_bytes, M_DEVBUF);
3229 ss->tx.req_bytes = NULL;
3230
3231 if (ss->tx.seg_list != NULL)
3232 free(ss->tx.seg_list, M_DEVBUF);
3233 ss->tx.seg_list = NULL;
3234
3235 if (ss->rx_small.shadow != NULL)
3236 free(ss->rx_small.shadow, M_DEVBUF);
3237 ss->rx_small.shadow = NULL;
3238
3239 if (ss->rx_big.shadow != NULL)
3240 free(ss->rx_big.shadow, M_DEVBUF);
3241 ss->rx_big.shadow = NULL;
3242
3243 if (ss->tx.info != NULL) {
3244 if (ss->tx.dmat != NULL) {
3245 for (i = 0; i <= ss->tx.mask; i++) {
3246 bus_dmamap_destroy(ss->tx.dmat,
3247 ss->tx.info[i].map);
3248 }
3249 bus_dma_tag_destroy(ss->tx.dmat);
3250 }
3251 free(ss->tx.info, M_DEVBUF);
3252 }
3253 ss->tx.info = NULL;
3254
3255 if (ss->rx_small.info != NULL) {
3256 if (ss->rx_small.dmat != NULL) {
3257 for (i = 0; i <= ss->rx_small.mask; i++) {
3258 bus_dmamap_destroy(ss->rx_small.dmat,
3259 ss->rx_small.info[i].map);
3260 }
3261 bus_dmamap_destroy(ss->rx_small.dmat,
3262 ss->rx_small.extra_map);
3263 bus_dma_tag_destroy(ss->rx_small.dmat);
3264 }
3265 free(ss->rx_small.info, M_DEVBUF);
3266 }
3267 ss->rx_small.info = NULL;
3268
3269 if (ss->rx_big.info != NULL) {
3270 if (ss->rx_big.dmat != NULL) {
3271 for (i = 0; i <= ss->rx_big.mask; i++) {
3272 bus_dmamap_destroy(ss->rx_big.dmat,
3273 ss->rx_big.info[i].map);
3274 }
3275 bus_dmamap_destroy(ss->rx_big.dmat,
3276 ss->rx_big.extra_map);
3277 bus_dma_tag_destroy(ss->rx_big.dmat);
3278 }
3279 free(ss->rx_big.info, M_DEVBUF);
3280 }
3281 ss->rx_big.info = NULL;
3282 }
3283
3284 static void
mxge_free_rings(mxge_softc_t * sc)3285 mxge_free_rings(mxge_softc_t *sc)
3286 {
3287 int slice;
3288
3289 for (slice = 0; slice < sc->num_slices; slice++)
3290 mxge_free_slice_rings(&sc->ss[slice]);
3291 }
3292
3293 static int
mxge_alloc_slice_rings(struct mxge_slice_state * ss,int rx_ring_entries,int tx_ring_entries)3294 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3295 int tx_ring_entries)
3296 {
3297 mxge_softc_t *sc = ss->sc;
3298 size_t bytes;
3299 int err, i;
3300
3301 /* allocate per-slice receive resources */
3302
3303 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3304 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3305
3306 /* allocate the rx shadow rings */
3307 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3308 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3309
3310 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3311 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3312
3313 /* allocate the rx host info rings */
3314 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3315 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3316
3317 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3318 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3319
3320 /* allocate the rx busdma resources */
3321 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3322 1, /* alignment */
3323 4096, /* boundary */
3324 BUS_SPACE_MAXADDR, /* low */
3325 BUS_SPACE_MAXADDR, /* high */
3326 NULL, NULL, /* filter */
3327 MHLEN, /* maxsize */
3328 1, /* num segs */
3329 MHLEN, /* maxsegsize */
3330 BUS_DMA_ALLOCNOW, /* flags */
3331 NULL, NULL, /* lock */
3332 &ss->rx_small.dmat); /* tag */
3333 if (err != 0) {
3334 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3335 err);
3336 return err;
3337 }
3338
3339 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3340 1, /* alignment */
3341 #if MXGE_VIRT_JUMBOS
3342 4096, /* boundary */
3343 #else
3344 0, /* boundary */
3345 #endif
3346 BUS_SPACE_MAXADDR, /* low */
3347 BUS_SPACE_MAXADDR, /* high */
3348 NULL, NULL, /* filter */
3349 3*4096, /* maxsize */
3350 #if MXGE_VIRT_JUMBOS
3351 3, /* num segs */
3352 4096, /* maxsegsize*/
3353 #else
3354 1, /* num segs */
3355 MJUM9BYTES, /* maxsegsize*/
3356 #endif
3357 BUS_DMA_ALLOCNOW, /* flags */
3358 NULL, NULL, /* lock */
3359 &ss->rx_big.dmat); /* tag */
3360 if (err != 0) {
3361 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3362 err);
3363 return err;
3364 }
3365 for (i = 0; i <= ss->rx_small.mask; i++) {
3366 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3367 &ss->rx_small.info[i].map);
3368 if (err != 0) {
3369 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3370 err);
3371 return err;
3372 }
3373 }
3374 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3375 &ss->rx_small.extra_map);
3376 if (err != 0) {
3377 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3378 err);
3379 return err;
3380 }
3381
3382 for (i = 0; i <= ss->rx_big.mask; i++) {
3383 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3384 &ss->rx_big.info[i].map);
3385 if (err != 0) {
3386 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3387 err);
3388 return err;
3389 }
3390 }
3391 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3392 &ss->rx_big.extra_map);
3393 if (err != 0) {
3394 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3395 err);
3396 return err;
3397 }
3398
3399 /* now allocate TX resources */
3400
3401 #ifndef IFNET_BUF_RING
3402 /* only use a single TX ring for now */
3403 if (ss != ss->sc->ss)
3404 return 0;
3405 #endif
3406
3407 ss->tx.mask = tx_ring_entries - 1;
3408 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3409
3410
3411 /* allocate the tx request copy block */
3412 bytes = 8 +
3413 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3414 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3415 /* ensure req_list entries are aligned to 8 bytes */
3416 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3417 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3418
3419 /* allocate the tx busdma segment list */
3420 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3421 ss->tx.seg_list = (bus_dma_segment_t *)
3422 malloc(bytes, M_DEVBUF, M_WAITOK);
3423
3424 /* allocate the tx host info ring */
3425 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3426 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3427
3428 /* allocate the tx busdma resources */
3429 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3430 1, /* alignment */
3431 sc->tx_boundary, /* boundary */
3432 BUS_SPACE_MAXADDR, /* low */
3433 BUS_SPACE_MAXADDR, /* high */
3434 NULL, NULL, /* filter */
3435 65536 + 256, /* maxsize */
3436 ss->tx.max_desc - 2, /* num segs */
3437 sc->tx_boundary, /* maxsegsz */
3438 BUS_DMA_ALLOCNOW, /* flags */
3439 NULL, NULL, /* lock */
3440 &ss->tx.dmat); /* tag */
3441
3442 if (err != 0) {
3443 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3444 err);
3445 return err;
3446 }
3447
3448 /* now use these tags to setup dmamaps for each slot
3449 in the ring */
3450 for (i = 0; i <= ss->tx.mask; i++) {
3451 err = bus_dmamap_create(ss->tx.dmat, 0,
3452 &ss->tx.info[i].map);
3453 if (err != 0) {
3454 device_printf(sc->dev, "Err %d tx dmamap\n",
3455 err);
3456 return err;
3457 }
3458 }
3459 return 0;
3460
3461 }
3462
3463 static int
mxge_alloc_rings(mxge_softc_t * sc)3464 mxge_alloc_rings(mxge_softc_t *sc)
3465 {
3466 mxge_cmd_t cmd;
3467 int tx_ring_size;
3468 int tx_ring_entries, rx_ring_entries;
3469 int err, slice;
3470
3471 /* get ring sizes */
3472 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3473 tx_ring_size = cmd.data0;
3474 if (err != 0) {
3475 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3476 goto abort;
3477 }
3478
3479 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3480 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3481 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3482 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3483 IFQ_SET_READY(&sc->ifp->if_snd);
3484
3485 for (slice = 0; slice < sc->num_slices; slice++) {
3486 err = mxge_alloc_slice_rings(&sc->ss[slice],
3487 rx_ring_entries,
3488 tx_ring_entries);
3489 if (err != 0)
3490 goto abort;
3491 }
3492 return 0;
3493
3494 abort:
3495 mxge_free_rings(sc);
3496 return err;
3497
3498 }
3499
3500
3501 static void
mxge_choose_params(int mtu,int * big_buf_size,int * cl_size,int * nbufs)3502 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3503 {
3504 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3505
3506 if (bufsize < MCLBYTES) {
3507 /* easy, everything fits in a single buffer */
3508 *big_buf_size = MCLBYTES;
3509 *cl_size = MCLBYTES;
3510 *nbufs = 1;
3511 return;
3512 }
3513
3514 if (bufsize < MJUMPAGESIZE) {
3515 /* still easy, everything still fits in a single buffer */
3516 *big_buf_size = MJUMPAGESIZE;
3517 *cl_size = MJUMPAGESIZE;
3518 *nbufs = 1;
3519 return;
3520 }
3521 #if MXGE_VIRT_JUMBOS
3522 /* now we need to use virtually contiguous buffers */
3523 *cl_size = MJUM9BYTES;
3524 *big_buf_size = 4096;
3525 *nbufs = mtu / 4096 + 1;
3526 /* needs to be a power of two, so round up */
3527 if (*nbufs == 3)
3528 *nbufs = 4;
3529 #else
3530 *cl_size = MJUM9BYTES;
3531 *big_buf_size = MJUM9BYTES;
3532 *nbufs = 1;
3533 #endif
3534 }
3535
3536 static int
mxge_slice_open(struct mxge_slice_state * ss,int nbufs,int cl_size)3537 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3538 {
3539 mxge_softc_t *sc;
3540 mxge_cmd_t cmd;
3541 bus_dmamap_t map;
3542 int err, i, slice;
3543
3544
3545 sc = ss->sc;
3546 slice = ss - sc->ss;
3547
3548 #if defined(INET) || defined(INET6)
3549 (void)tcp_lro_init(&ss->lc);
3550 #endif
3551 ss->lc.ifp = sc->ifp;
3552
3553 /* get the lanai pointers to the send and receive rings */
3554
3555 err = 0;
3556 #ifndef IFNET_BUF_RING
3557 /* We currently only send from the first slice */
3558 if (slice == 0) {
3559 #endif
3560 cmd.data0 = slice;
3561 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3562 ss->tx.lanai =
3563 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3564 ss->tx.send_go = (volatile uint32_t *)
3565 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3566 ss->tx.send_stop = (volatile uint32_t *)
3567 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3568 #ifndef IFNET_BUF_RING
3569 }
3570 #endif
3571 cmd.data0 = slice;
3572 err |= mxge_send_cmd(sc,
3573 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3574 ss->rx_small.lanai =
3575 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3576 cmd.data0 = slice;
3577 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3578 ss->rx_big.lanai =
3579 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3580
3581 if (err != 0) {
3582 device_printf(sc->dev,
3583 "failed to get ring sizes or locations\n");
3584 return EIO;
3585 }
3586
3587 /* stock receive rings */
3588 for (i = 0; i <= ss->rx_small.mask; i++) {
3589 map = ss->rx_small.info[i].map;
3590 err = mxge_get_buf_small(ss, map, i);
3591 if (err) {
3592 device_printf(sc->dev, "alloced %d/%d smalls\n",
3593 i, ss->rx_small.mask + 1);
3594 return ENOMEM;
3595 }
3596 }
3597 for (i = 0; i <= ss->rx_big.mask; i++) {
3598 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3599 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3600 }
3601 ss->rx_big.nbufs = nbufs;
3602 ss->rx_big.cl_size = cl_size;
3603 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3604 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3605 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3606 map = ss->rx_big.info[i].map;
3607 err = mxge_get_buf_big(ss, map, i);
3608 if (err) {
3609 device_printf(sc->dev, "alloced %d/%d bigs\n",
3610 i, ss->rx_big.mask + 1);
3611 return ENOMEM;
3612 }
3613 }
3614 return 0;
3615 }
3616
3617 static int
mxge_open(mxge_softc_t * sc)3618 mxge_open(mxge_softc_t *sc)
3619 {
3620 mxge_cmd_t cmd;
3621 int err, big_bytes, nbufs, slice, cl_size, i;
3622 bus_addr_t bus;
3623 volatile uint8_t *itable;
3624 struct mxge_slice_state *ss;
3625
3626 /* Copy the MAC address in case it was overridden */
3627 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3628
3629 err = mxge_reset(sc, 1);
3630 if (err != 0) {
3631 device_printf(sc->dev, "failed to reset\n");
3632 return EIO;
3633 }
3634
3635 if (sc->num_slices > 1) {
3636 /* setup the indirection table */
3637 cmd.data0 = sc->num_slices;
3638 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3639 &cmd);
3640
3641 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3642 &cmd);
3643 if (err != 0) {
3644 device_printf(sc->dev,
3645 "failed to setup rss tables\n");
3646 return err;
3647 }
3648
3649 /* just enable an identity mapping */
3650 itable = sc->sram + cmd.data0;
3651 for (i = 0; i < sc->num_slices; i++)
3652 itable[i] = (uint8_t)i;
3653
3654 cmd.data0 = 1;
3655 cmd.data1 = mxge_rss_hash_type;
3656 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3657 if (err != 0) {
3658 device_printf(sc->dev, "failed to enable slices\n");
3659 return err;
3660 }
3661 }
3662
3663
3664 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3665
3666 cmd.data0 = nbufs;
3667 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3668 &cmd);
3669 /* error is only meaningful if we're trying to set
3670 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3671 if (err && nbufs > 1) {
3672 device_printf(sc->dev,
3673 "Failed to set alway-use-n to %d\n",
3674 nbufs);
3675 return EIO;
3676 }
3677 /* Give the firmware the mtu and the big and small buffer
3678 sizes. The firmware wants the big buf size to be a power
3679 of two. Luckily, FreeBSD's clusters are powers of two */
3680 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3681 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3682 cmd.data0 = MHLEN - MXGEFW_PAD;
3683 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3684 &cmd);
3685 cmd.data0 = big_bytes;
3686 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3687
3688 if (err != 0) {
3689 device_printf(sc->dev, "failed to setup params\n");
3690 goto abort;
3691 }
3692
3693 /* Now give him the pointer to the stats block */
3694 for (slice = 0;
3695 #ifdef IFNET_BUF_RING
3696 slice < sc->num_slices;
3697 #else
3698 slice < 1;
3699 #endif
3700 slice++) {
3701 ss = &sc->ss[slice];
3702 cmd.data0 =
3703 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3704 cmd.data1 =
3705 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3706 cmd.data2 = sizeof(struct mcp_irq_data);
3707 cmd.data2 |= (slice << 16);
3708 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3709 }
3710
3711 if (err != 0) {
3712 bus = sc->ss->fw_stats_dma.bus_addr;
3713 bus += offsetof(struct mcp_irq_data, send_done_count);
3714 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3715 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3716 err = mxge_send_cmd(sc,
3717 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3718 &cmd);
3719 /* Firmware cannot support multicast without STATS_DMA_V2 */
3720 sc->fw_multicast_support = 0;
3721 } else {
3722 sc->fw_multicast_support = 1;
3723 }
3724
3725 if (err != 0) {
3726 device_printf(sc->dev, "failed to setup params\n");
3727 goto abort;
3728 }
3729
3730 for (slice = 0; slice < sc->num_slices; slice++) {
3731 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3732 if (err != 0) {
3733 device_printf(sc->dev, "couldn't open slice %d\n",
3734 slice);
3735 goto abort;
3736 }
3737 }
3738
3739 /* Finally, start the firmware running */
3740 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3741 if (err) {
3742 device_printf(sc->dev, "Couldn't bring up link\n");
3743 goto abort;
3744 }
3745 #ifdef IFNET_BUF_RING
3746 for (slice = 0; slice < sc->num_slices; slice++) {
3747 ss = &sc->ss[slice];
3748 ss->if_drv_flags |= IFF_DRV_RUNNING;
3749 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3750 }
3751 #endif
3752 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3753 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3754
3755 return 0;
3756
3757
3758 abort:
3759 mxge_free_mbufs(sc);
3760
3761 return err;
3762 }
3763
3764 static int
mxge_close(mxge_softc_t * sc,int down)3765 mxge_close(mxge_softc_t *sc, int down)
3766 {
3767 mxge_cmd_t cmd;
3768 int err, old_down_cnt;
3769 #ifdef IFNET_BUF_RING
3770 struct mxge_slice_state *ss;
3771 int slice;
3772 #endif
3773
3774 #ifdef IFNET_BUF_RING
3775 for (slice = 0; slice < sc->num_slices; slice++) {
3776 ss = &sc->ss[slice];
3777 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3778 }
3779 #endif
3780 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3781 if (!down) {
3782 old_down_cnt = sc->down_cnt;
3783 wmb();
3784 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3785 if (err) {
3786 device_printf(sc->dev,
3787 "Couldn't bring down link\n");
3788 }
3789 if (old_down_cnt == sc->down_cnt) {
3790 /* wait for down irq */
3791 DELAY(10 * sc->intr_coal_delay);
3792 }
3793 wmb();
3794 if (old_down_cnt == sc->down_cnt) {
3795 device_printf(sc->dev, "never got down irq\n");
3796 }
3797 }
3798 mxge_free_mbufs(sc);
3799
3800 return 0;
3801 }
3802
3803 static void
mxge_setup_cfg_space(mxge_softc_t * sc)3804 mxge_setup_cfg_space(mxge_softc_t *sc)
3805 {
3806 device_t dev = sc->dev;
3807 int reg;
3808 uint16_t lnk, pectl;
3809
3810 /* find the PCIe link width and set max read request to 4KB*/
3811 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
3812 lnk = pci_read_config(dev, reg + 0x12, 2);
3813 sc->link_width = (lnk >> 4) & 0x3f;
3814
3815 if (sc->pectl == 0) {
3816 pectl = pci_read_config(dev, reg + 0x8, 2);
3817 pectl = (pectl & ~0x7000) | (5 << 12);
3818 pci_write_config(dev, reg + 0x8, pectl, 2);
3819 sc->pectl = pectl;
3820 } else {
3821 /* restore saved pectl after watchdog reset */
3822 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3823 }
3824 }
3825
3826 /* Enable DMA and Memory space access */
3827 pci_enable_busmaster(dev);
3828 }
3829
3830 static uint32_t
mxge_read_reboot(mxge_softc_t * sc)3831 mxge_read_reboot(mxge_softc_t *sc)
3832 {
3833 device_t dev = sc->dev;
3834 uint32_t vs;
3835
3836 /* find the vendor specific offset */
3837 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3838 device_printf(sc->dev,
3839 "could not find vendor specific offset\n");
3840 return (uint32_t)-1;
3841 }
3842 /* enable read32 mode */
3843 pci_write_config(dev, vs + 0x10, 0x3, 1);
3844 /* tell NIC which register to read */
3845 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3846 return (pci_read_config(dev, vs + 0x14, 4));
3847 }
3848
3849 static void
mxge_watchdog_reset(mxge_softc_t * sc)3850 mxge_watchdog_reset(mxge_softc_t *sc)
3851 {
3852 struct pci_devinfo *dinfo;
3853 struct mxge_slice_state *ss;
3854 int err, running, s, num_tx_slices = 1;
3855 uint32_t reboot;
3856 uint16_t cmd;
3857
3858 err = ENXIO;
3859
3860 device_printf(sc->dev, "Watchdog reset!\n");
3861
3862 /*
3863 * check to see if the NIC rebooted. If it did, then all of
3864 * PCI config space has been reset, and things like the
3865 * busmaster bit will be zero. If this is the case, then we
3866 * must restore PCI config space before the NIC can be used
3867 * again
3868 */
3869 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3870 if (cmd == 0xffff) {
3871 /*
3872 * maybe the watchdog caught the NIC rebooting; wait
3873 * up to 100ms for it to finish. If it does not come
3874 * back, then give up
3875 */
3876 DELAY(1000*100);
3877 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3878 if (cmd == 0xffff) {
3879 device_printf(sc->dev, "NIC disappeared!\n");
3880 }
3881 }
3882 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3883 /* print the reboot status */
3884 reboot = mxge_read_reboot(sc);
3885 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3886 reboot);
3887 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3888 if (running) {
3889
3890 /*
3891 * quiesce NIC so that TX routines will not try to
3892 * xmit after restoration of BAR
3893 */
3894
3895 /* Mark the link as down */
3896 if (sc->link_state) {
3897 sc->link_state = 0;
3898 if_link_state_change(sc->ifp,
3899 LINK_STATE_DOWN);
3900 }
3901 #ifdef IFNET_BUF_RING
3902 num_tx_slices = sc->num_slices;
3903 #endif
3904 /* grab all TX locks to ensure no tx */
3905 for (s = 0; s < num_tx_slices; s++) {
3906 ss = &sc->ss[s];
3907 mtx_lock(&ss->tx.mtx);
3908 }
3909 mxge_close(sc, 1);
3910 }
3911 /* restore PCI configuration space */
3912 dinfo = device_get_ivars(sc->dev);
3913 pci_cfg_restore(sc->dev, dinfo);
3914
3915 /* and redo any changes we made to our config space */
3916 mxge_setup_cfg_space(sc);
3917
3918 /* reload f/w */
3919 err = mxge_load_firmware(sc, 0);
3920 if (err) {
3921 device_printf(sc->dev,
3922 "Unable to re-load f/w\n");
3923 }
3924 if (running) {
3925 if (!err)
3926 err = mxge_open(sc);
3927 /* release all TX locks */
3928 for (s = 0; s < num_tx_slices; s++) {
3929 ss = &sc->ss[s];
3930 #ifdef IFNET_BUF_RING
3931 mxge_start_locked(ss);
3932 #endif
3933 mtx_unlock(&ss->tx.mtx);
3934 }
3935 }
3936 sc->watchdog_resets++;
3937 } else {
3938 device_printf(sc->dev,
3939 "NIC did not reboot, not resetting\n");
3940 err = 0;
3941 }
3942 if (err) {
3943 device_printf(sc->dev, "watchdog reset failed\n");
3944 } else {
3945 if (sc->dying == 2)
3946 sc->dying = 0;
3947 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3948 }
3949 }
3950
3951 static void
mxge_watchdog_task(void * arg,int pending)3952 mxge_watchdog_task(void *arg, int pending)
3953 {
3954 mxge_softc_t *sc = arg;
3955
3956
3957 mtx_lock(&sc->driver_mtx);
3958 mxge_watchdog_reset(sc);
3959 mtx_unlock(&sc->driver_mtx);
3960 }
3961
3962 static void
mxge_warn_stuck(mxge_softc_t * sc,mxge_tx_ring_t * tx,int slice)3963 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3964 {
3965 tx = &sc->ss[slice].tx;
3966 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3967 device_printf(sc->dev,
3968 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3969 tx->req, tx->done, tx->queue_active);
3970 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3971 tx->activate, tx->deactivate);
3972 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3973 tx->pkt_done,
3974 be32toh(sc->ss->fw_stats->send_done_count));
3975 }
3976
3977 static int
mxge_watchdog(mxge_softc_t * sc)3978 mxge_watchdog(mxge_softc_t *sc)
3979 {
3980 mxge_tx_ring_t *tx;
3981 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3982 int i, err = 0;
3983
3984 /* see if we have outstanding transmits, which
3985 have been pending for more than mxge_ticks */
3986 for (i = 0;
3987 #ifdef IFNET_BUF_RING
3988 (i < sc->num_slices) && (err == 0);
3989 #else
3990 (i < 1) && (err == 0);
3991 #endif
3992 i++) {
3993 tx = &sc->ss[i].tx;
3994 if (tx->req != tx->done &&
3995 tx->watchdog_req != tx->watchdog_done &&
3996 tx->done == tx->watchdog_done) {
3997 /* check for pause blocking before resetting */
3998 if (tx->watchdog_rx_pause == rx_pause) {
3999 mxge_warn_stuck(sc, tx, i);
4000 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4001 return (ENXIO);
4002 }
4003 else
4004 device_printf(sc->dev, "Flow control blocking "
4005 "xmits, check link partner\n");
4006 }
4007
4008 tx->watchdog_req = tx->req;
4009 tx->watchdog_done = tx->done;
4010 tx->watchdog_rx_pause = rx_pause;
4011 }
4012
4013 if (sc->need_media_probe)
4014 mxge_media_probe(sc);
4015 return (err);
4016 }
4017
4018 static uint64_t
mxge_get_counter(struct ifnet * ifp,ift_counter cnt)4019 mxge_get_counter(struct ifnet *ifp, ift_counter cnt)
4020 {
4021 struct mxge_softc *sc;
4022 uint64_t rv;
4023
4024 sc = if_getsoftc(ifp);
4025 rv = 0;
4026
4027 switch (cnt) {
4028 case IFCOUNTER_IPACKETS:
4029 for (int s = 0; s < sc->num_slices; s++)
4030 rv += sc->ss[s].ipackets;
4031 return (rv);
4032 case IFCOUNTER_OPACKETS:
4033 for (int s = 0; s < sc->num_slices; s++)
4034 rv += sc->ss[s].opackets;
4035 return (rv);
4036 case IFCOUNTER_OERRORS:
4037 for (int s = 0; s < sc->num_slices; s++)
4038 rv += sc->ss[s].oerrors;
4039 return (rv);
4040 #ifdef IFNET_BUF_RING
4041 case IFCOUNTER_OBYTES:
4042 for (int s = 0; s < sc->num_slices; s++)
4043 rv += sc->ss[s].obytes;
4044 return (rv);
4045 case IFCOUNTER_OMCASTS:
4046 for (int s = 0; s < sc->num_slices; s++)
4047 rv += sc->ss[s].omcasts;
4048 return (rv);
4049 case IFCOUNTER_OQDROPS:
4050 for (int s = 0; s < sc->num_slices; s++)
4051 rv += sc->ss[s].tx.br->br_drops;
4052 return (rv);
4053 #endif
4054 default:
4055 return (if_get_counter_default(ifp, cnt));
4056 }
4057 }
4058
4059 static void
mxge_tick(void * arg)4060 mxge_tick(void *arg)
4061 {
4062 mxge_softc_t *sc = arg;
4063 u_long pkts = 0;
4064 int err = 0;
4065 int running, ticks;
4066 uint16_t cmd;
4067
4068 ticks = mxge_ticks;
4069 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4070 if (running) {
4071 if (!sc->watchdog_countdown) {
4072 err = mxge_watchdog(sc);
4073 sc->watchdog_countdown = 4;
4074 }
4075 sc->watchdog_countdown--;
4076 }
4077 if (pkts == 0) {
4078 /* ensure NIC did not suffer h/w fault while idle */
4079 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4080 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4081 sc->dying = 2;
4082 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4083 err = ENXIO;
4084 }
4085 /* look less often if NIC is idle */
4086 ticks *= 4;
4087 }
4088
4089 if (err == 0)
4090 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4091
4092 }
4093
4094 static int
mxge_media_change(struct ifnet * ifp)4095 mxge_media_change(struct ifnet *ifp)
4096 {
4097 return EINVAL;
4098 }
4099
4100 static int
mxge_change_mtu(mxge_softc_t * sc,int mtu)4101 mxge_change_mtu(mxge_softc_t *sc, int mtu)
4102 {
4103 struct ifnet *ifp = sc->ifp;
4104 int real_mtu, old_mtu;
4105 int err = 0;
4106
4107
4108 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4109 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4110 return EINVAL;
4111 mtx_lock(&sc->driver_mtx);
4112 old_mtu = ifp->if_mtu;
4113 ifp->if_mtu = mtu;
4114 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4115 mxge_close(sc, 0);
4116 err = mxge_open(sc);
4117 if (err != 0) {
4118 ifp->if_mtu = old_mtu;
4119 mxge_close(sc, 0);
4120 (void) mxge_open(sc);
4121 }
4122 }
4123 mtx_unlock(&sc->driver_mtx);
4124 return err;
4125 }
4126
4127 static void
mxge_media_status(struct ifnet * ifp,struct ifmediareq * ifmr)4128 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4129 {
4130 mxge_softc_t *sc = ifp->if_softc;
4131
4132
4133 if (sc == NULL)
4134 return;
4135 ifmr->ifm_status = IFM_AVALID;
4136 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4137 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4138 ifmr->ifm_active |= sc->current_media;
4139 }
4140
4141 static int
mxge_fetch_i2c(mxge_softc_t * sc,struct ifi2creq * i2c)4142 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c)
4143 {
4144 mxge_cmd_t cmd;
4145 uint32_t i2c_args;
4146 int i, ms, err;
4147
4148
4149 if (i2c->dev_addr != 0xA0 &&
4150 i2c->dev_addr != 0xA2)
4151 return (EINVAL);
4152 if (i2c->len > sizeof(i2c->data))
4153 return (EINVAL);
4154
4155 for (i = 0; i < i2c->len; i++) {
4156 i2c_args = i2c->dev_addr << 0x8;
4157 i2c_args |= i2c->offset + i;
4158 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
4159 cmd.data1 = i2c_args;
4160 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
4161
4162 if (err != MXGEFW_CMD_OK)
4163 return (EIO);
4164 /* now we wait for the data to be cached */
4165 cmd.data0 = i2c_args & 0xff;
4166 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
4167 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
4168 cmd.data0 = i2c_args & 0xff;
4169 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
4170 if (err == EBUSY)
4171 DELAY(1000);
4172 }
4173 if (err != MXGEFW_CMD_OK)
4174 return (EIO);
4175 i2c->data[i] = cmd.data0;
4176 }
4177 return (0);
4178 }
4179
4180 static int
mxge_ioctl(struct ifnet * ifp,u_long command,caddr_t data)4181 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4182 {
4183 mxge_softc_t *sc = ifp->if_softc;
4184 struct ifreq *ifr = (struct ifreq *)data;
4185 struct ifi2creq i2c;
4186 int err, mask;
4187
4188 err = 0;
4189 switch (command) {
4190 case SIOCSIFMTU:
4191 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4192 break;
4193
4194 case SIOCSIFFLAGS:
4195 mtx_lock(&sc->driver_mtx);
4196 if (sc->dying) {
4197 mtx_unlock(&sc->driver_mtx);
4198 return EINVAL;
4199 }
4200 if (ifp->if_flags & IFF_UP) {
4201 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4202 err = mxge_open(sc);
4203 } else {
4204 /* take care of promis can allmulti
4205 flag chages */
4206 mxge_change_promisc(sc,
4207 ifp->if_flags & IFF_PROMISC);
4208 mxge_set_multicast_list(sc);
4209 }
4210 } else {
4211 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4212 mxge_close(sc, 0);
4213 }
4214 }
4215 mtx_unlock(&sc->driver_mtx);
4216 break;
4217
4218 case SIOCADDMULTI:
4219 case SIOCDELMULTI:
4220 mtx_lock(&sc->driver_mtx);
4221 if (sc->dying) {
4222 mtx_unlock(&sc->driver_mtx);
4223 return (EINVAL);
4224 }
4225 mxge_set_multicast_list(sc);
4226 mtx_unlock(&sc->driver_mtx);
4227 break;
4228
4229 case SIOCSIFCAP:
4230 mtx_lock(&sc->driver_mtx);
4231 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4232 if (mask & IFCAP_TXCSUM) {
4233 if (IFCAP_TXCSUM & ifp->if_capenable) {
4234 mask &= ~IFCAP_TSO4;
4235 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4236 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
4237 } else {
4238 ifp->if_capenable |= IFCAP_TXCSUM;
4239 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4240 }
4241 }
4242 if (mask & IFCAP_RXCSUM) {
4243 if (IFCAP_RXCSUM & ifp->if_capenable) {
4244 ifp->if_capenable &= ~IFCAP_RXCSUM;
4245 } else {
4246 ifp->if_capenable |= IFCAP_RXCSUM;
4247 }
4248 }
4249 if (mask & IFCAP_TSO4) {
4250 if (IFCAP_TSO4 & ifp->if_capenable) {
4251 ifp->if_capenable &= ~IFCAP_TSO4;
4252 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4253 ifp->if_capenable |= IFCAP_TSO4;
4254 ifp->if_hwassist |= CSUM_TSO;
4255 } else {
4256 printf("mxge requires tx checksum offload"
4257 " be enabled to use TSO\n");
4258 err = EINVAL;
4259 }
4260 }
4261 #if IFCAP_TSO6
4262 if (mask & IFCAP_TXCSUM_IPV6) {
4263 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4264 mask &= ~IFCAP_TSO6;
4265 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6
4266 | IFCAP_TSO6);
4267 ifp->if_hwassist &= ~(CSUM_TCP_IPV6
4268 | CSUM_UDP);
4269 } else {
4270 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
4271 ifp->if_hwassist |= (CSUM_TCP_IPV6
4272 | CSUM_UDP_IPV6);
4273 }
4274 }
4275 if (mask & IFCAP_RXCSUM_IPV6) {
4276 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
4277 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
4278 } else {
4279 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
4280 }
4281 }
4282 if (mask & IFCAP_TSO6) {
4283 if (IFCAP_TSO6 & ifp->if_capenable) {
4284 ifp->if_capenable &= ~IFCAP_TSO6;
4285 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4286 ifp->if_capenable |= IFCAP_TSO6;
4287 ifp->if_hwassist |= CSUM_TSO;
4288 } else {
4289 printf("mxge requires tx checksum offload"
4290 " be enabled to use TSO\n");
4291 err = EINVAL;
4292 }
4293 }
4294 #endif /*IFCAP_TSO6 */
4295
4296 if (mask & IFCAP_LRO)
4297 ifp->if_capenable ^= IFCAP_LRO;
4298 if (mask & IFCAP_VLAN_HWTAGGING)
4299 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4300 if (mask & IFCAP_VLAN_HWTSO)
4301 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4302
4303 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4304 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4305 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4306
4307 mtx_unlock(&sc->driver_mtx);
4308 VLAN_CAPABILITIES(ifp);
4309
4310 break;
4311
4312 case SIOCGIFMEDIA:
4313 mtx_lock(&sc->driver_mtx);
4314 if (sc->dying) {
4315 mtx_unlock(&sc->driver_mtx);
4316 return (EINVAL);
4317 }
4318 mxge_media_probe(sc);
4319 mtx_unlock(&sc->driver_mtx);
4320 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4321 &sc->media, command);
4322 break;
4323
4324 case SIOCGI2C:
4325 if (sc->connector != MXGE_XFP &&
4326 sc->connector != MXGE_SFP) {
4327 err = ENXIO;
4328 break;
4329 }
4330 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
4331 if (err != 0)
4332 break;
4333 mtx_lock(&sc->driver_mtx);
4334 if (sc->dying) {
4335 mtx_unlock(&sc->driver_mtx);
4336 return (EINVAL);
4337 }
4338 err = mxge_fetch_i2c(sc, &i2c);
4339 mtx_unlock(&sc->driver_mtx);
4340 if (err == 0)
4341 err = copyout(&i2c, ifr_data_get_ptr(ifr),
4342 sizeof(i2c));
4343 break;
4344 default:
4345 err = ether_ioctl(ifp, command, data);
4346 break;
4347 }
4348 return err;
4349 }
4350
4351 static void
mxge_fetch_tunables(mxge_softc_t * sc)4352 mxge_fetch_tunables(mxge_softc_t *sc)
4353 {
4354
4355 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4356 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4357 &mxge_flow_control);
4358 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4359 &mxge_intr_coal_delay);
4360 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4361 &mxge_nvidia_ecrc_enable);
4362 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4363 &mxge_force_firmware);
4364 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4365 &mxge_deassert_wait);
4366 TUNABLE_INT_FETCH("hw.mxge.verbose",
4367 &mxge_verbose);
4368 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4369 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4370 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4371 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4372 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4373 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4374
4375 if (bootverbose)
4376 mxge_verbose = 1;
4377 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4378 mxge_intr_coal_delay = 30;
4379 if (mxge_ticks == 0)
4380 mxge_ticks = hz / 2;
4381 sc->pause = mxge_flow_control;
4382 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4383 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4384 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4385 }
4386 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4387 mxge_initial_mtu < ETHER_MIN_LEN)
4388 mxge_initial_mtu = ETHERMTU_JUMBO;
4389
4390 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4391 mxge_throttle = MXGE_MAX_THROTTLE;
4392 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4393 mxge_throttle = MXGE_MIN_THROTTLE;
4394 sc->throttle = mxge_throttle;
4395 }
4396
4397
4398 static void
mxge_free_slices(mxge_softc_t * sc)4399 mxge_free_slices(mxge_softc_t *sc)
4400 {
4401 struct mxge_slice_state *ss;
4402 int i;
4403
4404
4405 if (sc->ss == NULL)
4406 return;
4407
4408 for (i = 0; i < sc->num_slices; i++) {
4409 ss = &sc->ss[i];
4410 if (ss->fw_stats != NULL) {
4411 mxge_dma_free(&ss->fw_stats_dma);
4412 ss->fw_stats = NULL;
4413 #ifdef IFNET_BUF_RING
4414 if (ss->tx.br != NULL) {
4415 drbr_free(ss->tx.br, M_DEVBUF);
4416 ss->tx.br = NULL;
4417 }
4418 #endif
4419 mtx_destroy(&ss->tx.mtx);
4420 }
4421 if (ss->rx_done.entry != NULL) {
4422 mxge_dma_free(&ss->rx_done.dma);
4423 ss->rx_done.entry = NULL;
4424 }
4425 }
4426 free(sc->ss, M_DEVBUF);
4427 sc->ss = NULL;
4428 }
4429
4430 static int
mxge_alloc_slices(mxge_softc_t * sc)4431 mxge_alloc_slices(mxge_softc_t *sc)
4432 {
4433 mxge_cmd_t cmd;
4434 struct mxge_slice_state *ss;
4435 size_t bytes;
4436 int err, i, max_intr_slots;
4437
4438 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4439 if (err != 0) {
4440 device_printf(sc->dev, "Cannot determine rx ring size\n");
4441 return err;
4442 }
4443 sc->rx_ring_size = cmd.data0;
4444 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4445
4446 bytes = sizeof (*sc->ss) * sc->num_slices;
4447 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4448 if (sc->ss == NULL)
4449 return (ENOMEM);
4450 for (i = 0; i < sc->num_slices; i++) {
4451 ss = &sc->ss[i];
4452
4453 ss->sc = sc;
4454
4455 /* allocate per-slice rx interrupt queues */
4456
4457 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4458 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4459 if (err != 0)
4460 goto abort;
4461 ss->rx_done.entry = ss->rx_done.dma.addr;
4462 bzero(ss->rx_done.entry, bytes);
4463
4464 /*
4465 * allocate the per-slice firmware stats; stats
4466 * (including tx) are used used only on the first
4467 * slice for now
4468 */
4469 #ifndef IFNET_BUF_RING
4470 if (i > 0)
4471 continue;
4472 #endif
4473
4474 bytes = sizeof (*ss->fw_stats);
4475 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4476 sizeof (*ss->fw_stats), 64);
4477 if (err != 0)
4478 goto abort;
4479 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4480 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4481 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4482 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4483 #ifdef IFNET_BUF_RING
4484 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4485 &ss->tx.mtx);
4486 #endif
4487 }
4488
4489 return (0);
4490
4491 abort:
4492 mxge_free_slices(sc);
4493 return (ENOMEM);
4494 }
4495
4496 static void
mxge_slice_probe(mxge_softc_t * sc)4497 mxge_slice_probe(mxge_softc_t *sc)
4498 {
4499 mxge_cmd_t cmd;
4500 char *old_fw;
4501 int msix_cnt, status, max_intr_slots;
4502
4503 sc->num_slices = 1;
4504 /*
4505 * don't enable multiple slices if they are not enabled,
4506 * or if this is not an SMP system
4507 */
4508
4509 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4510 return;
4511
4512 /* see how many MSI-X interrupts are available */
4513 msix_cnt = pci_msix_count(sc->dev);
4514 if (msix_cnt < 2)
4515 return;
4516
4517 /* now load the slice aware firmware see what it supports */
4518 old_fw = sc->fw_name;
4519 if (old_fw == mxge_fw_aligned)
4520 sc->fw_name = mxge_fw_rss_aligned;
4521 else
4522 sc->fw_name = mxge_fw_rss_unaligned;
4523 status = mxge_load_firmware(sc, 0);
4524 if (status != 0) {
4525 device_printf(sc->dev, "Falling back to a single slice\n");
4526 return;
4527 }
4528
4529 /* try to send a reset command to the card to see if it
4530 is alive */
4531 memset(&cmd, 0, sizeof (cmd));
4532 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4533 if (status != 0) {
4534 device_printf(sc->dev, "failed reset\n");
4535 goto abort_with_fw;
4536 }
4537
4538 /* get rx ring size */
4539 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4540 if (status != 0) {
4541 device_printf(sc->dev, "Cannot determine rx ring size\n");
4542 goto abort_with_fw;
4543 }
4544 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4545
4546 /* tell it the size of the interrupt queues */
4547 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4548 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4549 if (status != 0) {
4550 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4551 goto abort_with_fw;
4552 }
4553
4554 /* ask the maximum number of slices it supports */
4555 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4556 if (status != 0) {
4557 device_printf(sc->dev,
4558 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4559 goto abort_with_fw;
4560 }
4561 sc->num_slices = cmd.data0;
4562 if (sc->num_slices > msix_cnt)
4563 sc->num_slices = msix_cnt;
4564
4565 if (mxge_max_slices == -1) {
4566 /* cap to number of CPUs in system */
4567 if (sc->num_slices > mp_ncpus)
4568 sc->num_slices = mp_ncpus;
4569 } else {
4570 if (sc->num_slices > mxge_max_slices)
4571 sc->num_slices = mxge_max_slices;
4572 }
4573 /* make sure it is a power of two */
4574 while (sc->num_slices & (sc->num_slices - 1))
4575 sc->num_slices--;
4576
4577 if (mxge_verbose)
4578 device_printf(sc->dev, "using %d slices\n",
4579 sc->num_slices);
4580
4581 return;
4582
4583 abort_with_fw:
4584 sc->fw_name = old_fw;
4585 (void) mxge_load_firmware(sc, 0);
4586 }
4587
4588 static int
mxge_add_msix_irqs(mxge_softc_t * sc)4589 mxge_add_msix_irqs(mxge_softc_t *sc)
4590 {
4591 size_t bytes;
4592 int count, err, i, rid;
4593
4594 rid = PCIR_BAR(2);
4595 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4596 &rid, RF_ACTIVE);
4597
4598 if (sc->msix_table_res == NULL) {
4599 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4600 return ENXIO;
4601 }
4602
4603 count = sc->num_slices;
4604 err = pci_alloc_msix(sc->dev, &count);
4605 if (err != 0) {
4606 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4607 "err = %d \n", sc->num_slices, err);
4608 goto abort_with_msix_table;
4609 }
4610 if (count < sc->num_slices) {
4611 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4612 count, sc->num_slices);
4613 device_printf(sc->dev,
4614 "Try setting hw.mxge.max_slices to %d\n",
4615 count);
4616 err = ENOSPC;
4617 goto abort_with_msix;
4618 }
4619 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4620 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4621 if (sc->msix_irq_res == NULL) {
4622 err = ENOMEM;
4623 goto abort_with_msix;
4624 }
4625
4626 for (i = 0; i < sc->num_slices; i++) {
4627 rid = i + 1;
4628 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4629 SYS_RES_IRQ,
4630 &rid, RF_ACTIVE);
4631 if (sc->msix_irq_res[i] == NULL) {
4632 device_printf(sc->dev, "couldn't allocate IRQ res"
4633 " for message %d\n", i);
4634 err = ENXIO;
4635 goto abort_with_res;
4636 }
4637 }
4638
4639 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4640 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4641
4642 for (i = 0; i < sc->num_slices; i++) {
4643 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4644 INTR_TYPE_NET | INTR_MPSAFE,
4645 #if __FreeBSD_version > 700030
4646 NULL,
4647 #endif
4648 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4649 if (err != 0) {
4650 device_printf(sc->dev, "couldn't setup intr for "
4651 "message %d\n", i);
4652 goto abort_with_intr;
4653 }
4654 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4655 sc->msix_ih[i], "s%d", i);
4656 }
4657
4658 if (mxge_verbose) {
4659 device_printf(sc->dev, "using %d msix IRQs:",
4660 sc->num_slices);
4661 for (i = 0; i < sc->num_slices; i++)
4662 printf(" %jd", rman_get_start(sc->msix_irq_res[i]));
4663 printf("\n");
4664 }
4665 return (0);
4666
4667 abort_with_intr:
4668 for (i = 0; i < sc->num_slices; i++) {
4669 if (sc->msix_ih[i] != NULL) {
4670 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4671 sc->msix_ih[i]);
4672 sc->msix_ih[i] = NULL;
4673 }
4674 }
4675 free(sc->msix_ih, M_DEVBUF);
4676
4677
4678 abort_with_res:
4679 for (i = 0; i < sc->num_slices; i++) {
4680 rid = i + 1;
4681 if (sc->msix_irq_res[i] != NULL)
4682 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4683 sc->msix_irq_res[i]);
4684 sc->msix_irq_res[i] = NULL;
4685 }
4686 free(sc->msix_irq_res, M_DEVBUF);
4687
4688
4689 abort_with_msix:
4690 pci_release_msi(sc->dev);
4691
4692 abort_with_msix_table:
4693 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4694 sc->msix_table_res);
4695
4696 return err;
4697 }
4698
4699 static int
mxge_add_single_irq(mxge_softc_t * sc)4700 mxge_add_single_irq(mxge_softc_t *sc)
4701 {
4702 int count, err, rid;
4703
4704 count = pci_msi_count(sc->dev);
4705 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4706 rid = 1;
4707 } else {
4708 rid = 0;
4709 sc->legacy_irq = 1;
4710 }
4711 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
4712 RF_SHAREABLE | RF_ACTIVE);
4713 if (sc->irq_res == NULL) {
4714 device_printf(sc->dev, "could not alloc interrupt\n");
4715 return ENXIO;
4716 }
4717 if (mxge_verbose)
4718 device_printf(sc->dev, "using %s irq %jd\n",
4719 sc->legacy_irq ? "INTx" : "MSI",
4720 rman_get_start(sc->irq_res));
4721 err = bus_setup_intr(sc->dev, sc->irq_res,
4722 INTR_TYPE_NET | INTR_MPSAFE,
4723 #if __FreeBSD_version > 700030
4724 NULL,
4725 #endif
4726 mxge_intr, &sc->ss[0], &sc->ih);
4727 if (err != 0) {
4728 bus_release_resource(sc->dev, SYS_RES_IRQ,
4729 sc->legacy_irq ? 0 : 1, sc->irq_res);
4730 if (!sc->legacy_irq)
4731 pci_release_msi(sc->dev);
4732 }
4733 return err;
4734 }
4735
4736 static void
mxge_rem_msix_irqs(mxge_softc_t * sc)4737 mxge_rem_msix_irqs(mxge_softc_t *sc)
4738 {
4739 int i, rid;
4740
4741 for (i = 0; i < sc->num_slices; i++) {
4742 if (sc->msix_ih[i] != NULL) {
4743 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4744 sc->msix_ih[i]);
4745 sc->msix_ih[i] = NULL;
4746 }
4747 }
4748 free(sc->msix_ih, M_DEVBUF);
4749
4750 for (i = 0; i < sc->num_slices; i++) {
4751 rid = i + 1;
4752 if (sc->msix_irq_res[i] != NULL)
4753 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4754 sc->msix_irq_res[i]);
4755 sc->msix_irq_res[i] = NULL;
4756 }
4757 free(sc->msix_irq_res, M_DEVBUF);
4758
4759 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4760 sc->msix_table_res);
4761
4762 pci_release_msi(sc->dev);
4763 return;
4764 }
4765
4766 static void
mxge_rem_single_irq(mxge_softc_t * sc)4767 mxge_rem_single_irq(mxge_softc_t *sc)
4768 {
4769 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4770 bus_release_resource(sc->dev, SYS_RES_IRQ,
4771 sc->legacy_irq ? 0 : 1, sc->irq_res);
4772 if (!sc->legacy_irq)
4773 pci_release_msi(sc->dev);
4774 }
4775
4776 static void
mxge_rem_irq(mxge_softc_t * sc)4777 mxge_rem_irq(mxge_softc_t *sc)
4778 {
4779 if (sc->num_slices > 1)
4780 mxge_rem_msix_irqs(sc);
4781 else
4782 mxge_rem_single_irq(sc);
4783 }
4784
4785 static int
mxge_add_irq(mxge_softc_t * sc)4786 mxge_add_irq(mxge_softc_t *sc)
4787 {
4788 int err;
4789
4790 if (sc->num_slices > 1)
4791 err = mxge_add_msix_irqs(sc);
4792 else
4793 err = mxge_add_single_irq(sc);
4794
4795 if (0 && err == 0 && sc->num_slices > 1) {
4796 mxge_rem_msix_irqs(sc);
4797 err = mxge_add_msix_irqs(sc);
4798 }
4799 return err;
4800 }
4801
4802
4803 static int
mxge_attach(device_t dev)4804 mxge_attach(device_t dev)
4805 {
4806 mxge_cmd_t cmd;
4807 mxge_softc_t *sc = device_get_softc(dev);
4808 struct ifnet *ifp;
4809 int err, rid;
4810
4811 sc->dev = dev;
4812 mxge_fetch_tunables(sc);
4813
4814 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4815 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4816 taskqueue_thread_enqueue, &sc->tq);
4817 if (sc->tq == NULL) {
4818 err = ENOMEM;
4819 goto abort_with_nothing;
4820 }
4821
4822 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4823 1, /* alignment */
4824 0, /* boundary */
4825 BUS_SPACE_MAXADDR, /* low */
4826 BUS_SPACE_MAXADDR, /* high */
4827 NULL, NULL, /* filter */
4828 65536 + 256, /* maxsize */
4829 MXGE_MAX_SEND_DESC, /* num segs */
4830 65536, /* maxsegsize */
4831 0, /* flags */
4832 NULL, NULL, /* lock */
4833 &sc->parent_dmat); /* tag */
4834
4835 if (err != 0) {
4836 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4837 err);
4838 goto abort_with_tq;
4839 }
4840
4841 ifp = sc->ifp = if_alloc(IFT_ETHER);
4842 if (ifp == NULL) {
4843 device_printf(dev, "can not if_alloc()\n");
4844 err = ENOSPC;
4845 goto abort_with_parent_dmat;
4846 }
4847 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4848
4849 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4850 device_get_nameunit(dev));
4851 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4852 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4853 "%s:drv", device_get_nameunit(dev));
4854 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4855 MTX_NETWORK_LOCK, MTX_DEF);
4856
4857 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4858
4859 mxge_setup_cfg_space(sc);
4860
4861 /* Map the board into the kernel */
4862 rid = PCIR_BARS;
4863 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
4864 RF_ACTIVE);
4865 if (sc->mem_res == NULL) {
4866 device_printf(dev, "could not map memory\n");
4867 err = ENXIO;
4868 goto abort_with_lock;
4869 }
4870 sc->sram = rman_get_virtual(sc->mem_res);
4871 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4872 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4873 device_printf(dev, "impossible memory region size %jd\n",
4874 rman_get_size(sc->mem_res));
4875 err = ENXIO;
4876 goto abort_with_mem_res;
4877 }
4878
4879 /* make NULL terminated copy of the EEPROM strings section of
4880 lanai SRAM */
4881 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4882 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4883 rman_get_bushandle(sc->mem_res),
4884 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4885 sc->eeprom_strings,
4886 MXGE_EEPROM_STRINGS_SIZE - 2);
4887 err = mxge_parse_strings(sc);
4888 if (err != 0)
4889 goto abort_with_mem_res;
4890
4891 /* Enable write combining for efficient use of PCIe bus */
4892 mxge_enable_wc(sc);
4893
4894 /* Allocate the out of band dma memory */
4895 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4896 sizeof (mxge_cmd_t), 64);
4897 if (err != 0)
4898 goto abort_with_mem_res;
4899 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4900 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4901 if (err != 0)
4902 goto abort_with_cmd_dma;
4903
4904 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4905 if (err != 0)
4906 goto abort_with_zeropad_dma;
4907
4908 /* select & load the firmware */
4909 err = mxge_select_firmware(sc);
4910 if (err != 0)
4911 goto abort_with_dmabench;
4912 sc->intr_coal_delay = mxge_intr_coal_delay;
4913
4914 mxge_slice_probe(sc);
4915 err = mxge_alloc_slices(sc);
4916 if (err != 0)
4917 goto abort_with_dmabench;
4918
4919 err = mxge_reset(sc, 0);
4920 if (err != 0)
4921 goto abort_with_slices;
4922
4923 err = mxge_alloc_rings(sc);
4924 if (err != 0) {
4925 device_printf(sc->dev, "failed to allocate rings\n");
4926 goto abort_with_slices;
4927 }
4928
4929 err = mxge_add_irq(sc);
4930 if (err != 0) {
4931 device_printf(sc->dev, "failed to add irq\n");
4932 goto abort_with_rings;
4933 }
4934
4935 ifp->if_baudrate = IF_Gbps(10);
4936 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4937 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4938 IFCAP_RXCSUM_IPV6;
4939 #if defined(INET) || defined(INET6)
4940 ifp->if_capabilities |= IFCAP_LRO;
4941 #endif
4942
4943 #ifdef MXGE_NEW_VLAN_API
4944 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4945
4946 /* Only FW 1.4.32 and newer can do TSO over vlans */
4947 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4948 sc->fw_ver_tiny >= 32)
4949 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4950 #endif
4951 sc->max_mtu = mxge_max_mtu(sc);
4952 if (sc->max_mtu >= 9000)
4953 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4954 else
4955 device_printf(dev, "MTU limited to %d. Install "
4956 "latest firmware for 9000 byte jumbo support\n",
4957 sc->max_mtu - ETHER_HDR_LEN);
4958 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4959 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
4960 /* check to see if f/w supports TSO for IPv6 */
4961 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4962 if (CSUM_TCP_IPV6)
4963 ifp->if_capabilities |= IFCAP_TSO6;
4964 sc->max_tso6_hlen = min(cmd.data0,
4965 sizeof (sc->ss[0].scratch));
4966 }
4967 ifp->if_capenable = ifp->if_capabilities;
4968 if (sc->lro_cnt == 0)
4969 ifp->if_capenable &= ~IFCAP_LRO;
4970 ifp->if_init = mxge_init;
4971 ifp->if_softc = sc;
4972 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4973 ifp->if_ioctl = mxge_ioctl;
4974 ifp->if_start = mxge_start;
4975 ifp->if_get_counter = mxge_get_counter;
4976 ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
4977 ifp->if_hw_tsomaxsegcount = sc->ss[0].tx.max_desc;
4978 ifp->if_hw_tsomaxsegsize = IP_MAXPACKET;
4979 /* Initialise the ifmedia structure */
4980 ifmedia_init(&sc->media, 0, mxge_media_change,
4981 mxge_media_status);
4982 mxge_media_init(sc);
4983 mxge_media_probe(sc);
4984 sc->dying = 0;
4985 ether_ifattach(ifp, sc->mac_addr);
4986 /* ether_ifattach sets mtu to ETHERMTU */
4987 if (mxge_initial_mtu != ETHERMTU)
4988 mxge_change_mtu(sc, mxge_initial_mtu);
4989
4990 mxge_add_sysctls(sc);
4991 #ifdef IFNET_BUF_RING
4992 ifp->if_transmit = mxge_transmit;
4993 ifp->if_qflush = mxge_qflush;
4994 #endif
4995 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4996 device_get_nameunit(sc->dev));
4997 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4998 return 0;
4999
5000 abort_with_rings:
5001 mxge_free_rings(sc);
5002 abort_with_slices:
5003 mxge_free_slices(sc);
5004 abort_with_dmabench:
5005 mxge_dma_free(&sc->dmabench_dma);
5006 abort_with_zeropad_dma:
5007 mxge_dma_free(&sc->zeropad_dma);
5008 abort_with_cmd_dma:
5009 mxge_dma_free(&sc->cmd_dma);
5010 abort_with_mem_res:
5011 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5012 abort_with_lock:
5013 pci_disable_busmaster(dev);
5014 mtx_destroy(&sc->cmd_mtx);
5015 mtx_destroy(&sc->driver_mtx);
5016 if_free(ifp);
5017 abort_with_parent_dmat:
5018 bus_dma_tag_destroy(sc->parent_dmat);
5019 abort_with_tq:
5020 if (sc->tq != NULL) {
5021 taskqueue_drain(sc->tq, &sc->watchdog_task);
5022 taskqueue_free(sc->tq);
5023 sc->tq = NULL;
5024 }
5025 abort_with_nothing:
5026 return err;
5027 }
5028
5029 static int
mxge_detach(device_t dev)5030 mxge_detach(device_t dev)
5031 {
5032 mxge_softc_t *sc = device_get_softc(dev);
5033
5034 if (mxge_vlans_active(sc)) {
5035 device_printf(sc->dev,
5036 "Detach vlans before removing module\n");
5037 return EBUSY;
5038 }
5039 mtx_lock(&sc->driver_mtx);
5040 sc->dying = 1;
5041 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
5042 mxge_close(sc, 0);
5043 mtx_unlock(&sc->driver_mtx);
5044 ether_ifdetach(sc->ifp);
5045 if (sc->tq != NULL) {
5046 taskqueue_drain(sc->tq, &sc->watchdog_task);
5047 taskqueue_free(sc->tq);
5048 sc->tq = NULL;
5049 }
5050 callout_drain(&sc->co_hdl);
5051 ifmedia_removeall(&sc->media);
5052 mxge_dummy_rdma(sc, 0);
5053 mxge_rem_sysctls(sc);
5054 mxge_rem_irq(sc);
5055 mxge_free_rings(sc);
5056 mxge_free_slices(sc);
5057 mxge_dma_free(&sc->dmabench_dma);
5058 mxge_dma_free(&sc->zeropad_dma);
5059 mxge_dma_free(&sc->cmd_dma);
5060 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5061 pci_disable_busmaster(dev);
5062 mtx_destroy(&sc->cmd_mtx);
5063 mtx_destroy(&sc->driver_mtx);
5064 if_free(sc->ifp);
5065 bus_dma_tag_destroy(sc->parent_dmat);
5066 return 0;
5067 }
5068
5069 static int
mxge_shutdown(device_t dev)5070 mxge_shutdown(device_t dev)
5071 {
5072 return 0;
5073 }
5074
5075 /*
5076 This file uses Myri10GE driver indentation.
5077
5078 Local Variables:
5079 c-file-style:"linux"
5080 tab-width:8
5081 End:
5082 */
5083