/*	$OpenBSD: if_cad.c,v 1.14 2024/03/24 22:34:06 patrick Exp $	*/

/*
 * Copyright (c) 2021-2022 Visa Hankala
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*
 * Driver for Cadence 10/100/Gigabit Ethernet device.
 */

#include "bpfilter.h"
#include "kstat.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/atomic.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/mutex.h>
#include <sys/kstat.h>
#include <sys/rwlock.h>
#include <sys/task.h>
#include <sys/timeout.h>

#include <net/if.h>
#include <net/if_media.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/if_ether.h>

#if NBPFILTER > 0
#include <net/bpf.h>
#endif

#include <dev/mii/mii.h>
#include <dev/mii/miivar.h>
#include <dev/mii/miidevs.h>

#include <machine/bus.h>
#include <machine/fdt.h>

#include <dev/ofw/fdt.h>
#include <dev/ofw/openfirm.h>
#include <dev/ofw/ofw_clock.h>
#include <dev/ofw/ofw_gpio.h>

#define GEM_NETCTL			0x0000
#define  GEM_NETCTL_DPRAM			(1 << 18)
#define  GEM_NETCTL_STARTTX			(1 << 9)
#define  GEM_NETCTL_STATCLR			(1 << 5)
#define  GEM_NETCTL_MDEN			(1 << 4)
#define  GEM_NETCTL_TXEN			(1 << 3)
#define  GEM_NETCTL_RXEN			(1 << 2)
#define GEM_NETCFG			0x0004
#define  GEM_NETCFG_SGMIIEN			(1 << 27)
#define  GEM_NETCFG_RXCSUMEN			(1 << 24)
#define  GEM_NETCFG_MDCCLKDIV_MASK		(0x7 << 18)
#define  GEM_NETCFG_MDCCLKDIV_SHIFT		18
#define  GEM_NETCFG_FCSREM			(1 << 17)
#define  GEM_NETCFG_RXOFFS_MASK			(0x3 << 14)
#define  GEM_NETCFG_RXOFFS_SHIFT		14
#define  GEM_NETCFG_PCSSEL			(1 << 11)
#define  GEM_NETCFG_1000			(1 << 10)
#define  GEM_NETCFG_1536RXEN			(1 << 8)
#define  GEM_NETCFG_UCASTHASHEN			(1 << 7)
#define  GEM_NETCFG_MCASTHASHEN			(1 << 6)
#define  GEM_NETCFG_BCASTDI			(1 << 5)
#define  GEM_NETCFG_COPYALL			(1 << 4)
#define  GEM_NETCFG_FDEN			(1 << 1)
#define  GEM_NETCFG_100				(1 << 0)
#define GEM_NETSR			0x0008
#define  GEM_NETSR_PHY_MGMT_IDLE		(1 << 2)
#define GEM_DMACR			0x0010
#define  GEM_DMACR_DMA64			(1 << 30)
#define  GEM_DMACR_AHBDISC			(1 << 24)
#define  GEM_DMACR_RXBUF_MASK			(0xff << 16)
#define  GEM_DMACR_RXBUF_SHIFT			16
#define  GEM_DMACR_TXCSUMEN			(1 << 11)
#define  GEM_DMACR_TXSIZE			(1 << 10)
#define  GEM_DMACR_RXSIZE_MASK			(0x3 << 8)
#define  GEM_DMACR_RXSIZE_8K			(0x3 << 8)
#define  GEM_DMACR_ES_PDATA			(1 << 7)
#define  GEM_DMACR_ES_DESCR			(1 << 6)
#define  GEM_DMACR_BLEN_MASK			(0x1f << 0)
#define  GEM_DMACR_BLEN_16			(0x10 << 0)
#define GEM_TXSR			0x0014
#define  GEM_TXSR_TXGO				(1 << 3)
#define GEM_RXQBASE			0x0018
#define GEM_TXQBASE			0x001c
#define GEM_RXSR			0x0020
#define  GEM_RXSR_RXOVR				(1 << 2)
#define GEM_ISR				0x0024
#define GEM_IER				0x0028
#define GEM_IDR				0x002c
#define  GEM_IXR_HRESP				(1 << 11)
#define  GEM_IXR_RXOVR				(1 << 10)
#define  GEM_IXR_TXDONE				(1 << 7)
#define  GEM_IXR_TXURUN				(1 << 6)
#define  GEM_IXR_RETRY				(1 << 5)
#define  GEM_IXR_TXUSED				(1 << 3)
#define  GEM_IXR_RXUSED				(1 << 2)
#define  GEM_IXR_RXDONE				(1 << 1)
#define GEM_PHYMNTNC			0x0034
#define  GEM_PHYMNTNC_CLAUSE_22			(1 << 30)
#define  GEM_PHYMNTNC_OP_READ			(0x2 << 28)
#define  GEM_PHYMNTNC_OP_WRITE			(0x1 << 28)
#define  GEM_PHYMNTNC_ADDR_MASK			(0x1f << 23)
#define  GEM_PHYMNTNC_ADDR_SHIFT		23
#define  GEM_PHYMNTNC_REG_MASK			(0x1f << 18)
#define  GEM_PHYMNTNC_REG_SHIFT			18
#define  GEM_PHYMNTNC_MUST_10			(0x2 << 16)
#define  GEM_PHYMNTNC_DATA_MASK			0xffff
#define GEM_HASHL			0x0080
#define GEM_HASHH			0x0084
#define GEM_LADDRL(i)			(0x0088 + (i) * 8)
#define GEM_LADDRH(i)			(0x008c + (i) * 8)
#define GEM_LADDRNUM			4
#define GEM_MID				0x00fc
#define  GEM_MID_VERSION_MASK			(0xfff << 16)
#define  GEM_MID_VERSION_SHIFT			16
#define GEM_OCTTXL			0x0100
#define GEM_OCTTXH			0x0104
#define GEM_TXCNT			0x0108
#define GEM_TXBCCNT			0x010c
#define GEM_TXMCCNT			0x0110
#define GEM_TXPAUSECNT			0x0114
#define GEM_TX64CNT			0x0118
#define GEM_TX65CNT			0x011c
#define GEM_TX128CNT			0x0120
#define GEM_TX256CNT			0x0124
#define GEM_TX512CNT			0x0128
#define GEM_TX1024CNT			0x012c
#define GEM_TXURUNCNT			0x0134
#define GEM_SNGLCOLLCNT			0x0138
#define GEM_MULTICOLLCNT		0x013c
#define GEM_EXCESSCOLLCNT		0x0140
#define GEM_LATECOLLCNT			0x0144
#define GEM_TXDEFERCNT			0x0148
#define GEM_TXCSENSECNT			0x014c
#define GEM_OCTRXL			0x0150
#define GEM_OCTRXH			0x0154
#define GEM_RXCNT			0x0158
#define GEM_RXBROADCNT			0x015c
#define GEM_RXMULTICNT			0x0160
#define GEM_RXPAUSECNT			0x0164
#define GEM_RX64CNT			0x0168
#define GEM_RX65CNT			0x016c
#define GEM_RX128CNT			0x0170
#define GEM_RX256CNT			0x0174
#define GEM_RX512CNT			0x0178
#define GEM_RX1024CNT			0x017c
#define GEM_RXUNDRCNT			0x0184
#define GEM_RXOVRCNT			0x0188
#define GEM_RXJABCNT			0x018c
#define GEM_RXFCSCNT			0x0190
#define GEM_RXLENGTHCNT			0x0194
#define GEM_RXSYMBCNT			0x0198
#define GEM_RXALIGNCNT			0x019c
#define GEM_RXRESERRCNT			0x01a0
#define GEM_RXORCNT			0x01a4
#define GEM_RXIPCCNT			0x01a8
#define GEM_RXTCPCCNT			0x01ac
#define GEM_RXUDPCCNT			0x01b0
#define GEM_CFG6			0x0294
#define  GEM_CFG6_DMA64				(1 << 23)
#define  GEM_CFG6_PRIQ_MASK(x)			((x) & 0xffff)
#define GEM_CFG8			0x029c
#define  GEM_CFG8_NUM_TYPE1_SCR(x)		(((x) >> 24) & 0xff)
#define  GEM_CFG8_NUM_TYPE2_SCR(x)		(((x) >> 16) & 0xff)
#define GEM_TXQ1BASE(i)			(0x0440 + (i) * 4)
#define  GEM_TXQ1BASE_DISABLE			(1 << 0)
#define GEM_RXQ1BASE(i)			(0x0480 + (i) * 4)
#define  GEM_RXQ1BASE_DISABLE			(1 << 0)
#define GEM_TXQBASEHI			0x04c8
#define GEM_RXQBASEHI			0x04d4
#define GEM_SCR_TYPE1(i)		(0x0500 + (i) * 4)
#define GEM_SCR_TYPE2(i)		(0x0540 + (i) * 4)
#define GEM_RXQ8BASE(i)			(0x05c0 + (i) * 4)
#define  GEM_RXQ8BASE_DISABLE			(1 << 0)

#define GEM_MAX_PRIQ		16

#define GEM_CLK_TX		"tx_clk"

struct cad_buf {
	bus_dmamap_t		bf_map;
	struct mbuf		*bf_m;
};

struct cad_dmamem {
	bus_dmamap_t		cdm_map;
	bus_dma_segment_t	cdm_seg;
	size_t			cdm_size;
	caddr_t			cdm_kva;
};

struct cad_desc32 {
	uint32_t		d_addr;
	uint32_t		d_status;
};

struct cad_desc64 {
	uint32_t		d_addrlo;
	uint32_t		d_status;
	uint32_t		d_addrhi;
	uint32_t		d_unused;
};

#define GEM_RXD_ADDR_WRAP	(1 << 1)
#define GEM_RXD_ADDR_USED	(1 << 0)

#define GEM_RXD_BCAST		(1U << 31)
#define GEM_RXD_MCAST		(1 << 30)
#define GEM_RXD_UCAST		(1 << 29)
#define GEM_RXD_SPEC		(1 << 27)
#define GEM_RXD_SPEC_MASK	(0x3 << 25)
#define GEM_RXD_CSUM_MASK	(0x3 << 22)
#define GEM_RXD_CSUM_UDP_OK	(0x3 << 22)
#define GEM_RXD_CSUM_TCP_OK	(0x2 << 22)
#define GEM_RXD_CSUM_IP_OK	(0x1 << 22)
#define GEM_RXD_VLANTAG		(1 << 21)
#define GEM_RXD_PRIOTAG		(1 << 20)
#define GEM_RXD_CFI		(1 << 16)
#define GEM_RXD_EOF		(1 << 15)
#define GEM_RXD_SOF		(1 << 14)
#define GEM_RXD_BADFCS		(1 << 13)
#define GEM_RXD_LEN_MASK	0x1fff

#define GEM_TXD_USED		(1U << 31)
#define GEM_TXD_WRAP		(1 << 30)
#define GEM_TXD_RLIMIT		(1 << 29)
#define GEM_TXD_CORRUPT		(1 << 27)
#define GEM_TXD_LCOLL		(1 << 26)
#define GEM_TXD_CSUMERR_MASK	(0x7 << 20)
#define GEM_TXD_NOFCS		(1 << 16)
#define GEM_TXD_LAST		(1 << 15)
#define GEM_TXD_LEN_MASK	0x3fff

#define CAD_NRXDESC		256

#define CAD_NTXDESC		256
#define CAD_NTXSEGS		16

enum cad_phy_mode {
	CAD_PHY_MODE_GMII,
	CAD_PHY_MODE_RGMII,
	CAD_PHY_MODE_RGMII_ID,
	CAD_PHY_MODE_RGMII_RXID,
	CAD_PHY_MODE_RGMII_TXID,
	CAD_PHY_MODE_SGMII,
};

struct cad_softc {
	struct device		sc_dev;
	struct arpcom		sc_ac;

	bus_dma_tag_t		sc_dmat;
	bus_space_tag_t		sc_iot;
	bus_space_handle_t	sc_ioh;
	void			*sc_ih;
	int			sc_node;
	int			sc_phy_loc;
	enum cad_phy_mode	sc_phy_mode;
	unsigned char		sc_rxhang_erratum;
	unsigned char		sc_rxdone;
	unsigned char		sc_dma64;
	size_t			sc_descsize;
	uint32_t		sc_qmask;
	uint8_t			sc_ntype1scr;
	uint8_t			sc_ntype2scr;

	struct mii_data		sc_mii;
#define sc_media	sc_mii.mii_media
	struct timeout		sc_tick;

	struct cad_dmamem	*sc_txring;
	struct cad_buf		*sc_txbuf;
	caddr_t			sc_txdesc;
	unsigned int		sc_tx_prod;
	unsigned int		sc_tx_cons;

	struct if_rxring	sc_rx_ring;
	struct cad_dmamem	*sc_rxring;
	struct cad_buf		*sc_rxbuf;
	caddr_t			sc_rxdesc;
	unsigned int		sc_rx_prod;
	unsigned int		sc_rx_cons;
	uint32_t		sc_netctl;

	struct rwlock		sc_cfg_lock;
	struct task		sc_statchg_task;
	uint32_t		sc_tx_freq;

	struct mutex		sc_kstat_mtx;
	struct kstat		*sc_kstat;
};

#define HREAD4(sc, reg) \
	(bus_space_read_4((sc)->sc_iot, (sc)->sc_ioh, (reg)))
#define HWRITE4(sc, reg, val) \
	bus_space_write_4((sc)->sc_iot, (sc)->sc_ioh, (reg), (val))

int	cad_match(struct device *, void *, void *);
void	cad_attach(struct device *, struct device *, void *);

int	cad_ioctl(struct ifnet *, u_long, caddr_t);
void	cad_start(struct ifqueue *);
void	cad_watchdog(struct ifnet *);

void	cad_reset(struct cad_softc *);
int	cad_up(struct cad_softc *);
void	cad_down(struct cad_softc *);
void	cad_iff(struct cad_softc *);
int	cad_intr(void *);
void	cad_tick(void *);
void	cad_statchg_task(void *);

int	cad_media_change(struct ifnet *);
void	cad_media_status(struct ifnet *, struct ifmediareq *);
int	cad_mii_readreg(struct device *, int, int);
void	cad_mii_writereg(struct device *, int, int, int);
void	cad_mii_statchg(struct device *);

struct cad_dmamem *cad_dmamem_alloc(struct cad_softc *, bus_size_t, bus_size_t);
void	cad_dmamem_free(struct cad_softc *, struct cad_dmamem *);
void	cad_rxfill(struct cad_softc *);
void	cad_rxeof(struct cad_softc *);
void	cad_txeof(struct cad_softc *);
unsigned int cad_encap(struct cad_softc *, struct mbuf *);
struct mbuf *cad_alloc_mbuf(struct cad_softc *, bus_dmamap_t);

#if NKSTAT > 0
void	cad_kstat_attach(struct cad_softc *);
int	cad_kstat_read(struct kstat *);
void	cad_kstat_tick(void *);
#endif

#ifdef DDB
struct cad_softc *cad_sc[4];
#endif

const struct cfattach cad_ca = {
	sizeof(struct cad_softc), cad_match, cad_attach
};

struct cfdriver cad_cd = {
	NULL, "cad", DV_IFNET
};

const struct {
	const char		*name;
	enum cad_phy_mode	mode;
} cad_phy_modes[] = {
	{ "gmii",	CAD_PHY_MODE_GMII },
	{ "rgmii",	CAD_PHY_MODE_RGMII },
	{ "rgmii-id",	CAD_PHY_MODE_RGMII_ID },
	{ "rgmii-rxid",	CAD_PHY_MODE_RGMII_RXID },
	{ "rgmii-txid",	CAD_PHY_MODE_RGMII_TXID },
	{ "sgmii",	CAD_PHY_MODE_SGMII },
};

int
cad_match(struct device *parent, void *match, void *aux)
{
	struct fdt_attach_args *faa = aux;

	return (OF_is_compatible(faa->fa_node, "cdns,gem") ||
	    OF_is_compatible(faa->fa_node, "cdns,macb") ||
	    OF_is_compatible(faa->fa_node, "sifive,fu540-c000-gem") ||
	    OF_is_compatible(faa->fa_node, "sifive,fu740-c000-gem"));
}

void
cad_attach(struct device *parent, struct device *self, void *aux)
{
	char phy_mode[16];
	struct fdt_attach_args *faa = aux;
	struct cad_softc *sc = (struct cad_softc *)self;
	struct ifnet *ifp = &sc->sc_ac.ac_if;
	uint32_t phy_reset_gpio[3];
	uint32_t phy_reset_duration;
	uint32_t hi, lo;
	uint32_t rev, ver;
	uint32_t val;
	unsigned int i;
	int node, phy;

	if (faa->fa_nreg < 1) {
		printf(": no registers\n");
		return;
	}

	sc->sc_node = faa->fa_node;
	sc->sc_dmat = faa->fa_dmat;
	sc->sc_iot = faa->fa_iot;
	if (bus_space_map(sc->sc_iot, faa->fa_reg[0].addr,
	    faa->fa_reg[0].size, 0, &sc->sc_ioh) != 0) {
		printf(": can't map registers\n");
		return;
	}

	if (OF_getprop(faa->fa_node, "local-mac-address", sc->sc_ac.ac_enaddr,
	    sizeof(sc->sc_ac.ac_enaddr)) != sizeof(sc->sc_ac.ac_enaddr)) {
		for (i = 0; i < GEM_LADDRNUM; i++) {
			lo = HREAD4(sc, GEM_LADDRL(i));
			hi = HREAD4(sc, GEM_LADDRH(i));
			if (lo != 0 || hi != 0) {
				sc->sc_ac.ac_enaddr[0] = lo;
				sc->sc_ac.ac_enaddr[1] = lo >> 8;
				sc->sc_ac.ac_enaddr[2] = lo >> 16;
				sc->sc_ac.ac_enaddr[3] = lo >> 24;
				sc->sc_ac.ac_enaddr[4] = hi;
				sc->sc_ac.ac_enaddr[5] = hi >> 8;
				break;
			}
		}
		if (i == GEM_LADDRNUM)
			ether_fakeaddr(ifp);
	}

	if (OF_getpropintarray(faa->fa_node, "phy-reset-gpios", phy_reset_gpio,
	    sizeof(phy_reset_gpio)) == sizeof(phy_reset_gpio)) {
		phy_reset_duration = OF_getpropint(faa->fa_node,
		    "phy-reset-duration", 1);
		if (phy_reset_duration > 1000)
			phy_reset_duration = 1;

		gpio_controller_config_pin(phy_reset_gpio, GPIO_CONFIG_OUTPUT);
		gpio_controller_set_pin(phy_reset_gpio, 1);
		delay((phy_reset_duration + 1) * 1000);
		gpio_controller_set_pin(phy_reset_gpio, 0);
		delay(1000);
	}

	phy = OF_getpropint(faa->fa_node, "phy-handle", 0);
	node = OF_getnodebyphandle(phy);
	if (node != 0)
		sc->sc_phy_loc = OF_getpropint(node, "reg", MII_PHY_ANY);
	else
		sc->sc_phy_loc = MII_PHY_ANY;

	sc->sc_phy_mode = CAD_PHY_MODE_RGMII;
	OF_getprop(faa->fa_node, "phy-mode", phy_mode, sizeof(phy_mode));
	for (i = 0; i < nitems(cad_phy_modes); i++) {
		if (strcmp(phy_mode, cad_phy_modes[i].name) == 0) {
			sc->sc_phy_mode = cad_phy_modes[i].mode;
			break;
		}
	}

	rev = HREAD4(sc, GEM_MID);
	ver = (rev & GEM_MID_VERSION_MASK) >> GEM_MID_VERSION_SHIFT;

	sc->sc_descsize = sizeof(struct cad_desc32);
	/* Queue 0 is always present. */
	sc->sc_qmask = 0x1;
	/*
	 * Registers CFG1 and CFG6-10 are not present
	 * on Zynq-7000 / GEM version 0x2.
	 */
	if (ver >= 0x7) {
		val = HREAD4(sc, GEM_CFG6);
		if (val & GEM_CFG6_DMA64) {
			sc->sc_descsize = sizeof(struct cad_desc64);
			sc->sc_dma64 = 1;
		}
		sc->sc_qmask |= GEM_CFG6_PRIQ_MASK(val);

		val = HREAD4(sc, GEM_CFG8);
		sc->sc_ntype1scr = GEM_CFG8_NUM_TYPE1_SCR(val);
		sc->sc_ntype2scr = GEM_CFG8_NUM_TYPE2_SCR(val);
	}

	if (OF_is_compatible(faa->fa_node, "cdns,zynq-gem"))
		sc->sc_rxhang_erratum = 1;

	rw_init(&sc->sc_cfg_lock, "cadcfg");
	timeout_set(&sc->sc_tick, cad_tick, sc);
	task_set(&sc->sc_statchg_task, cad_statchg_task, sc);

	rw_enter_write(&sc->sc_cfg_lock);
	cad_reset(sc);
	rw_exit_write(&sc->sc_cfg_lock);

	sc->sc_ih = fdt_intr_establish(faa->fa_node, IPL_NET | IPL_MPSAFE,
	    cad_intr, sc, sc->sc_dev.dv_xname);
	if (sc->sc_ih == NULL) {
		printf(": can't establish interrupt\n");
		goto fail;
	}

	ifp->if_softc = sc;
	strlcpy(ifp->if_xname, sc->sc_dev.dv_xname, IFNAMSIZ);
	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
	ifp->if_xflags |= IFXF_MPSAFE;
	ifp->if_ioctl = cad_ioctl;
	ifp->if_qstart = cad_start;
	ifp->if_watchdog = cad_watchdog;
	ifp->if_hardmtu = ETHER_MAX_DIX_LEN - ETHER_HDR_LEN - ETHER_CRC_LEN;
	ifp->if_capabilities = IFCAP_VLAN_MTU;

	/*
	 * Enable transmit checksum offload only on reliable hardware.
	 * At least Zynq-7000 appears to generate bad UDP header checksum if
	 * the checksum field has not been initialized to zero and
	 * UDP payload size is less than three octets.
	 */
	if (0) {
		ifp->if_capabilities |= IFCAP_CSUM_IPv4 |
		    IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 |
		    IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
	}

	printf(": rev 0x%x, address %s\n", rev,
	    ether_sprintf(sc->sc_ac.ac_enaddr));

	sc->sc_mii.mii_ifp = ifp;
	sc->sc_mii.mii_readreg = cad_mii_readreg;
	sc->sc_mii.mii_writereg = cad_mii_writereg;
	sc->sc_mii.mii_statchg = cad_mii_statchg;
	ifmedia_init(&sc->sc_media, 0, cad_media_change, cad_media_status);

	mii_attach(&sc->sc_dev, &sc->sc_mii, 0xffffffff, sc->sc_phy_loc,
	    MII_OFFSET_ANY, MIIF_NOISOLATE);

	if (LIST_EMPTY(&sc->sc_mii.mii_phys)) {
		printf("%s: no PHY found\n", sc->sc_dev.dv_xname);
		ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_MANUAL, 0, NULL);
		ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_MANUAL);
	} else {
		ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
	}

	if_attach(ifp);
	ether_ifattach(ifp);

#if NKSTAT > 0
	cad_kstat_attach(sc);
#endif

#ifdef DDB
	if (sc->sc_dev.dv_unit < nitems(cad_sc))
		cad_sc[sc->sc_dev.dv_unit] = sc;
#endif

	return;

fail:
	if (sc->sc_ioh != 0)
		bus_space_unmap(sc->sc_iot, sc->sc_ioh, faa->fa_reg[0].size);
}

int
cad_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
	struct cad_softc *sc = ifp->if_softc;
	struct ifreq *ifr = (struct ifreq *)data;
	int error = 0, netlock_held = 1;
	int s;

	switch (cmd) {
	case SIOCGIFMEDIA:
	case SIOCSIFMEDIA:
	case SIOCGIFSFFPAGE:
		netlock_held = 0;
		break;
	}

	if (netlock_held)
		NET_UNLOCK();
	rw_enter_write(&sc->sc_cfg_lock);
	if (netlock_held)
		NET_LOCK();
	s = splnet();

	switch (cmd) {
	case SIOCSIFADDR:
		ifp->if_flags |= IFF_UP;
		/* FALLTHROUGH */

	case SIOCSIFFLAGS:
		if (ISSET(ifp->if_flags, IFF_UP)) {
			if (ISSET(ifp->if_flags, IFF_RUNNING))
				error = ENETRESET;
			else
				error = cad_up(sc);
		} else {
			if (ISSET(ifp->if_flags, IFF_RUNNING))
				cad_down(sc);
		}
		break;

	case SIOCGIFMEDIA:
	case SIOCSIFMEDIA:
		error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii.mii_media, cmd);
		break;

	case SIOCGIFRXR:
		error = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
		    NULL, MCLBYTES, &sc->sc_rx_ring);
		break;

	default:
		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
		break;
	}

	if (error == ENETRESET) {
		if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) ==
		    (IFF_UP | IFF_RUNNING))
			cad_iff(sc);
		error = 0;
	}

	splx(s);
	rw_exit_write(&sc->sc_cfg_lock);

	return error;
}

void
cad_reset(struct cad_softc *sc)
{
	static const unsigned int mdcclk_divs[] = {
		8, 16, 32, 48, 64, 96, 128, 224
	};
	unsigned int freq, i;
	uint32_t div, netcfg;

	rw_assert_wrlock(&sc->sc_cfg_lock);

	HWRITE4(sc, GEM_NETCTL, 0);
	HWRITE4(sc, GEM_IDR, ~0U);
	HWRITE4(sc, GEM_RXSR, 0);
	HWRITE4(sc, GEM_TXSR, 0);
	if (sc->sc_dma64) {
		HWRITE4(sc, GEM_RXQBASEHI, 0);
		HWRITE4(sc, GEM_TXQBASEHI, 0);
	}
	HWRITE4(sc, GEM_RXQBASE, 0);
	HWRITE4(sc, GEM_TXQBASE, 0);

	for (i = 1; i < GEM_MAX_PRIQ; i++) {
		if (sc->sc_qmask & (1U << i)) {
			if (i < 8)
				HWRITE4(sc, GEM_RXQ1BASE(i - 1), 0);
			else
				HWRITE4(sc, GEM_RXQ8BASE(i - 8), 0);
			HWRITE4(sc, GEM_TXQ1BASE(i - 1), 0);
		}
	}

	/* Disable all screeners so that Rx goes through queue 0. */
	for (i = 0; i < sc->sc_ntype1scr; i++)
		HWRITE4(sc, GEM_SCR_TYPE1(i), 0);
	for (i = 0; i < sc->sc_ntype2scr; i++)
		HWRITE4(sc, GEM_SCR_TYPE2(i), 0);

	/* MDIO clock rate must not exceed 2.5 MHz. */
	freq = clock_get_frequency(sc->sc_node, "pclk");
	for (div = 0; div < nitems(mdcclk_divs) - 1; div++) {
		if (freq / mdcclk_divs[div] <= 2500000)
			break;
	}
	KASSERT(div < nitems(mdcclk_divs));

	netcfg = HREAD4(sc, GEM_NETCFG);
	netcfg &= ~GEM_NETCFG_MDCCLKDIV_MASK;
	netcfg |= div << GEM_NETCFG_MDCCLKDIV_SHIFT;
	HWRITE4(sc, GEM_NETCFG, netcfg);

	/* Enable MDIO bus. */
	sc->sc_netctl = GEM_NETCTL_MDEN;
	HWRITE4(sc, GEM_NETCTL, sc->sc_netctl);
}

int
cad_up(struct cad_softc *sc)
{
	struct ifnet *ifp = &sc->sc_ac.ac_if;
	struct cad_buf *rxb, *txb;
	struct cad_desc32 *desc32;
	struct cad_desc64 *desc64;
	uint64_t addr;
	int flags = BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW;
	unsigned int i, nrxd, ntxd;
	uint32_t val;

	rw_assert_wrlock(&sc->sc_cfg_lock);

	/* Release lock for memory allocation. */
	NET_UNLOCK();

	if (sc->sc_dma64)
		flags |= BUS_DMA_64BIT;

	ntxd = CAD_NTXDESC;
	nrxd = CAD_NRXDESC;

	/*
	 * Allocate a dummy descriptor for unused priority queues.
	 * This is necessary with GEM revisions that have no option
	 * to disable queues.
	 */
	if (sc->sc_qmask & ~1U) {
		ntxd++;
		nrxd++;
	}

	/*
	 * Set up Tx descriptor ring.
	 */

	sc->sc_txring = cad_dmamem_alloc(sc,
	    ntxd * sc->sc_descsize, sc->sc_descsize);
	sc->sc_txdesc = sc->sc_txring->cdm_kva;

	desc32 = (struct cad_desc32 *)sc->sc_txdesc;
	desc64 = (struct cad_desc64 *)sc->sc_txdesc;

	sc->sc_txbuf = malloc(sizeof(*sc->sc_txbuf) * CAD_NTXDESC,
	    M_DEVBUF, M_WAITOK);
	for (i = 0; i < CAD_NTXDESC; i++) {
		txb = &sc->sc_txbuf[i];
		bus_dmamap_create(sc->sc_dmat, MCLBYTES, CAD_NTXSEGS,
		    MCLBYTES, 0, flags, &txb->bf_map);
		txb->bf_m = NULL;

		if (sc->sc_dma64) {
			desc64[i].d_addrhi = 0;
			desc64[i].d_addrlo = 0;
			desc64[i].d_status = GEM_TXD_USED;
			if (i == CAD_NTXDESC - 1)
				desc64[i].d_status |= GEM_TXD_WRAP;
		} else {
			desc32[i].d_addr = 0;
			desc32[i].d_status = GEM_TXD_USED;
			if (i == CAD_NTXDESC - 1)
				desc32[i].d_status |= GEM_TXD_WRAP;
		}
	}

	/* The remaining descriptors are dummies. */
	for (; i < ntxd; i++) {
		if (sc->sc_dma64) {
			desc64[i].d_addrhi = 0;
			desc64[i].d_addrlo = 0;
			desc64[i].d_status = GEM_TXD_USED | GEM_TXD_WRAP;
		} else {
			desc32[i].d_addr = 0;
			desc32[i].d_status = GEM_TXD_USED | GEM_TXD_WRAP;
		}
	}

	sc->sc_tx_prod = 0;
	sc->sc_tx_cons = 0;

	bus_dmamap_sync(sc->sc_dmat, sc->sc_txring->cdm_map,
	    0, sc->sc_txring->cdm_size,
	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);

	addr = sc->sc_txring->cdm_map->dm_segs[0].ds_addr;
	if (sc->sc_dma64)
		HWRITE4(sc, GEM_TXQBASEHI, addr >> 32);
	HWRITE4(sc, GEM_TXQBASE, addr);

	/* Initialize unused queues. Disable them if possible. */
	addr += CAD_NTXDESC * sc->sc_descsize;
	for (i = 1; i < GEM_MAX_PRIQ; i++) {
		if (sc->sc_qmask & (1U << i)) {
			HWRITE4(sc, GEM_TXQ1BASE(i - 1),
			    addr | GEM_TXQ1BASE_DISABLE);
		}
	}

	/*
	 * Set up Rx descriptor ring.
	 */

	sc->sc_rxring = cad_dmamem_alloc(sc,
	    nrxd * sc->sc_descsize, sc->sc_descsize);
	sc->sc_rxdesc = sc->sc_rxring->cdm_kva;

	desc32 = (struct cad_desc32 *)sc->sc_rxdesc;
	desc64 = (struct cad_desc64 *)sc->sc_rxdesc;

	sc->sc_rxbuf = malloc(sizeof(struct cad_buf) * CAD_NRXDESC,
	    M_DEVBUF, M_WAITOK);
	for (i = 0; i < CAD_NRXDESC; i++) {
		rxb = &sc->sc_rxbuf[i];
		bus_dmamap_create(sc->sc_dmat, MCLBYTES, 1,
		    MCLBYTES, 0, flags, &rxb->bf_map);
		rxb->bf_m = NULL;

		/* Mark all descriptors as used so that driver owns them. */
		if (sc->sc_dma64) {
			desc64[i].d_addrhi = 0;
			desc64[i].d_addrlo = GEM_RXD_ADDR_USED;
			if (i == CAD_NRXDESC - 1)
				desc64[i].d_addrlo |= GEM_RXD_ADDR_WRAP;
		} else {
			desc32[i].d_addr = GEM_RXD_ADDR_USED;
			if (i == CAD_NRXDESC - 1)
				desc32[i].d_addr |= GEM_RXD_ADDR_WRAP;
		}
	}

	/* The remaining descriptors are dummies. */
	for (; i < nrxd; i++) {
		if (sc->sc_dma64) {
			desc64[i].d_addrhi = 0;
			desc64[i].d_addrlo =
			    GEM_RXD_ADDR_USED | GEM_RXD_ADDR_WRAP;
		} else {
			desc32[i].d_addr =
			    GEM_RXD_ADDR_USED | GEM_RXD_ADDR_WRAP;
		}
	}

	if_rxr_init(&sc->sc_rx_ring, 2, CAD_NRXDESC);

	sc->sc_rx_prod = 0;
	sc->sc_rx_cons = 0;
	cad_rxfill(sc);

	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxring->cdm_map,
	    0, sc->sc_rxring->cdm_size,
	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);

	addr = sc->sc_rxring->cdm_map->dm_segs[0].ds_addr;
	if (sc->sc_dma64)
		HWRITE4(sc, GEM_RXQBASEHI, addr >> 32);
	HWRITE4(sc, GEM_RXQBASE, addr);

	/* Initialize unused queues. Disable them if possible. */
	addr += sc->sc_descsize * CAD_NRXDESC;
	for (i = 1; i < GEM_MAX_PRIQ; i++) {
		if (sc->sc_qmask & (1U << i)) {
			if (i < 8) {
				HWRITE4(sc, GEM_RXQ1BASE(i - 1),
				    addr | GEM_RXQ1BASE_DISABLE);
			} else {
				HWRITE4(sc, GEM_RXQ8BASE(i - 8),
				    addr | GEM_RXQ8BASE_DISABLE);
			}
		}
	}

	NET_LOCK();

	/*
	 * Set MAC address filters.
	 */

	HWRITE4(sc, GEM_LADDRL(0), sc->sc_ac.ac_enaddr[0] |
	    ((uint32_t)sc->sc_ac.ac_enaddr[1] << 8) |
	    ((uint32_t)sc->sc_ac.ac_enaddr[2] << 16) |
	    ((uint32_t)sc->sc_ac.ac_enaddr[3] << 24));
	HWRITE4(sc, GEM_LADDRH(0), sc->sc_ac.ac_enaddr[4] |
	    ((uint32_t)sc->sc_ac.ac_enaddr[5] << 8));

	for (i = 1; i < GEM_LADDRNUM; i++) {
		HWRITE4(sc, GEM_LADDRL(i), 0);
		HWRITE4(sc, GEM_LADDRH(i), 0);
	}

	cad_iff(sc);

	clock_set_frequency(sc->sc_node, GEM_CLK_TX, 2500000);
	clock_enable(sc->sc_node, GEM_CLK_TX);
	delay(1000);

	val = HREAD4(sc, GEM_NETCFG);

	val |= GEM_NETCFG_FCSREM | GEM_NETCFG_RXCSUMEN | GEM_NETCFG_1000 |
	    GEM_NETCFG_100 | GEM_NETCFG_FDEN | GEM_NETCFG_1536RXEN;
	val &= ~GEM_NETCFG_RXOFFS_MASK;
	val |= ETHER_ALIGN << GEM_NETCFG_RXOFFS_SHIFT;
	val &= ~GEM_NETCFG_BCASTDI;

	if (sc->sc_phy_mode == CAD_PHY_MODE_SGMII)
		val |= GEM_NETCFG_SGMIIEN | GEM_NETCFG_PCSSEL;
	else
		val &= ~(GEM_NETCFG_SGMIIEN | GEM_NETCFG_PCSSEL);

	HWRITE4(sc, GEM_NETCFG, val);

	val = HREAD4(sc, GEM_DMACR);

	if (sc->sc_dma64)
		val |= GEM_DMACR_DMA64;
	else
		val &= ~GEM_DMACR_DMA64;
	/* Use CPU's native byte order with descriptor words. */
#if BYTE_ORDER == BIG_ENDIAN
	val |= GEM_DMACR_ES_DESCR;
#else
	val &= ~GEM_DMACR_ES_DESCR;
#endif
	val &= ~GEM_DMACR_ES_PDATA;
	val |= GEM_DMACR_AHBDISC | GEM_DMACR_TXSIZE;
	val &= ~GEM_DMACR_RXSIZE_MASK;
	val |= GEM_DMACR_RXSIZE_8K;
	val &= ~GEM_DMACR_RXBUF_MASK;
	val |= (MCLBYTES / 64) << GEM_DMACR_RXBUF_SHIFT;
	val &= ~GEM_DMACR_BLEN_MASK;
	val |= GEM_DMACR_BLEN_16;

	if (ifp->if_capabilities & IFCAP_CSUM_IPv4)
		val |= GEM_DMACR_TXCSUMEN;

	HWRITE4(sc, GEM_DMACR, val);

	/* Clear statistics. */
	HWRITE4(sc, GEM_NETCTL, sc->sc_netctl | GEM_NETCTL_STATCLR);

	/* Enable Rx and Tx. */
	sc->sc_netctl |= GEM_NETCTL_RXEN | GEM_NETCTL_TXEN;
	HWRITE4(sc, GEM_NETCTL, sc->sc_netctl);

	/* Enable interrupts. */
	HWRITE4(sc, GEM_IER, GEM_IXR_HRESP | GEM_IXR_RXOVR | GEM_IXR_RXDONE |
	    GEM_IXR_TXDONE);

	if (sc->sc_rxhang_erratum)
		HWRITE4(sc, GEM_IER, GEM_IXR_RXUSED);

	if (!LIST_EMPTY(&sc->sc_mii.mii_phys))
		mii_mediachg(&sc->sc_mii);

	ifp->if_flags |= IFF_RUNNING;
	ifq_clr_oactive(&ifp->if_snd);

	timeout_add_sec(&sc->sc_tick, 1);

	return 0;
}

void
cad_down(struct cad_softc *sc)
{
	struct ifnet *ifp = &sc->sc_ac.ac_if;
	struct cad_buf *rxb, *txb;
	unsigned int i, timeout;

	rw_assert_wrlock(&sc->sc_cfg_lock);

	ifp->if_flags &= ~IFF_RUNNING;

	ifq_clr_oactive(&ifp->if_snd);
	ifp->if_timer = 0;

	/* Avoid lock order issues with barriers. */
	NET_UNLOCK();

	timeout_del_barrier(&sc->sc_tick);

	/* Disable data transfer. */
	sc->sc_netctl &= ~(GEM_NETCTL_TXEN | GEM_NETCTL_RXEN);
	HWRITE4(sc, GEM_NETCTL, sc->sc_netctl);

	/* Disable all interrupts. */
	HWRITE4(sc, GEM_IDR, ~0U);

	/* Wait for transmitter to become idle. */
	for (timeout = 1000; timeout > 0; timeout--) {
		if ((HREAD4(sc, GEM_TXSR) & GEM_TXSR_TXGO) == 0)
			break;
		delay(10);
	}
	if (timeout == 0)
		printf("%s: transmitter not idle\n", sc->sc_dev.dv_xname);

	mii_down(&sc->sc_mii);

	/* Wait for activity to cease. */
	intr_barrier(sc->sc_ih);
	ifq_barrier(&ifp->if_snd);
	taskq_del_barrier(systq, &sc->sc_statchg_task);

	/* Disable the packet clock as it is not needed any longer. */
	clock_disable(sc->sc_node, GEM_CLK_TX);

	cad_reset(sc);

	/*
	 * Tear down the Tx descriptor ring.
	 */

	for (i = 0; i < CAD_NTXDESC; i++) {
		txb = &sc->sc_txbuf[i];
		if (txb->bf_m != NULL) {
			bus_dmamap_sync(sc->sc_dmat, txb->bf_map, 0,
			    txb->bf_map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
			bus_dmamap_unload(sc->sc_dmat, txb->bf_map);
			m_freem(txb->bf_m);
		}
		bus_dmamap_destroy(sc->sc_dmat, txb->bf_map);
	}
	free(sc->sc_txbuf, M_DEVBUF, sizeof(*sc->sc_txbuf) * CAD_NTXDESC);
	sc->sc_txbuf = NULL;

	cad_dmamem_free(sc, sc->sc_txring);
	sc->sc_txring = NULL;
	sc->sc_txdesc = NULL;

	/*
	 * Tear down the Rx descriptor ring.
	 */

	for (i = 0; i < CAD_NRXDESC; i++) {
		rxb = &sc->sc_rxbuf[i];
		if (rxb->bf_m != NULL) {
			bus_dmamap_sync(sc->sc_dmat, rxb->bf_map, 0,
			    rxb->bf_map->dm_mapsize, BUS_DMASYNC_POSTREAD);
			bus_dmamap_unload(sc->sc_dmat, rxb->bf_map);
			m_freem(rxb->bf_m);
		}
		bus_dmamap_destroy(sc->sc_dmat, rxb->bf_map);
	}
	free(sc->sc_rxbuf, M_DEVBUF, sizeof(*sc->sc_txbuf) * CAD_NRXDESC);
	sc->sc_rxbuf = NULL;

	cad_dmamem_free(sc, sc->sc_rxring);
	sc->sc_rxring = NULL;
	sc->sc_rxdesc = NULL;

	NET_LOCK();
}

uint8_t
cad_hash_mac(const uint8_t *eaddr)
{
	uint64_t val = 0;
	int i;
	uint8_t hash = 0;

	for (i = ETHER_ADDR_LEN - 1; i >= 0; i--)
		val = (val << 8) | eaddr[i];

	for (i = 0; i < 8; i++) {
		hash ^= val;
		val >>= 6;
	}

	return hash & 0x3f;
}

void
cad_iff(struct cad_softc *sc)
{
	struct arpcom *ac = &sc->sc_ac;
	struct ifnet *ifp = &sc->sc_ac.ac_if;
	struct ether_multi *enm;
	struct ether_multistep step;
	uint64_t hash;
	uint32_t netcfg;

	rw_assert_wrlock(&sc->sc_cfg_lock);

	netcfg = HREAD4(sc, GEM_NETCFG);
	netcfg &= ~GEM_NETCFG_UCASTHASHEN;

	ifp->if_flags &= ~IFF_ALLMULTI;

	if (ifp->if_flags & IFF_PROMISC) {
		netcfg |= GEM_NETCFG_COPYALL;
		netcfg &= ~GEM_NETCFG_MCASTHASHEN;
	} else {
		netcfg &= ~GEM_NETCFG_COPYALL;
		netcfg |= GEM_NETCFG_MCASTHASHEN;

		if (ac->ac_multirangecnt > 0)
			ifp->if_flags |= IFF_ALLMULTI;

		if (ifp->if_flags & IFF_ALLMULTI) {
			hash = ~0ULL;
		} else {
			hash = 0;
			ETHER_FIRST_MULTI(step, ac, enm);
			while (enm != NULL) {
				hash |= 1ULL << cad_hash_mac(enm->enm_addrlo);
				ETHER_NEXT_MULTI(step, enm);
			}
		}

		HWRITE4(sc, GEM_HASHL, hash);
		HWRITE4(sc, GEM_HASHH, hash >> 32);
	}

	HWRITE4(sc, GEM_NETCFG, netcfg);
}

void
cad_start(struct ifqueue *ifq)
{
	struct ifnet *ifp = ifq->ifq_if;
	struct cad_softc *sc = ifp->if_softc;
	struct mbuf *m;
	unsigned int free, head, used;

	free = sc->sc_tx_cons;
	head = sc->sc_tx_prod;
	if (free <= head)
		free += CAD_NTXDESC;
	free -= head;

	for (;;) {
		if (free <= CAD_NTXSEGS) {
			ifq_set_oactive(ifq);
			break;
		}

		m = ifq_dequeue(ifq);
		if (m == NULL)
			break;

		used = cad_encap(sc, m);
		if (used == 0) {
			m_freem(m);
			continue;
		}

#if NBPFILTER > 0
		if (ifp->if_bpf != NULL)
			bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
#endif

		ifp->if_timer = 5;

		KASSERT(free >= used);
		free -= used;
	}

	HWRITE4(sc, GEM_NETCTL, sc->sc_netctl | GEM_NETCTL_STARTTX);
}

void
cad_watchdog(struct ifnet *ifp)
{
	struct cad_softc *sc = ifp->if_softc;

	ifp->if_timer = 0;

	if ((ifp->if_flags & IFF_RUNNING) == 0)
		return;

	if (sc->sc_tx_cons == sc->sc_tx_prod)
		return;

	/* XXX */
	HWRITE4(sc, GEM_NETCTL, sc->sc_netctl | GEM_NETCTL_STARTTX);
}

unsigned int
cad_encap(struct cad_softc *sc, struct mbuf *m)
{
	bus_dmamap_t map;
	struct cad_buf *txb;
	struct cad_desc32 *desc32 = (struct cad_desc32 *)sc->sc_txdesc;
	struct cad_desc64 *desc64 = (struct cad_desc64 *)sc->sc_txdesc;
	unsigned int head, idx, nsegs;
	uint32_t status;
	int i;

	head = sc->sc_tx_prod;

	txb = &sc->sc_txbuf[head];
	map = txb->bf_map;

	switch (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT)) {
	case 0:
		break;
	case EFBIG:
		if (m_defrag(m, M_DONTWAIT) != 0)
			return 0;
		if (bus_dmamap_load_mbuf(sc->sc_dmat, map, m,
		    BUS_DMA_NOWAIT) != 0)
			return 0;
		break;
	default:
		return 0;
	}

	bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
	    BUS_DMASYNC_PREWRITE);

	nsegs = map->dm_nsegs;
	KASSERT(nsegs > 0);

	txb->bf_m = m;

	/*
	 * Fill descriptors in reverse order so that all the descriptors
	 * are ready when the first descriptor's GEM_TXD_USED bit is cleared.
	 */
	for (i = nsegs - 1; i >= 0; i--) {
		idx = (head + i) % CAD_NTXDESC;

		status = map->dm_segs[i].ds_len & GEM_TXD_LEN_MASK;
		if (i == nsegs - 1)
			status |= GEM_TXD_LAST;
		if (idx == CAD_NTXDESC - 1)
			status |= GEM_TXD_WRAP;

		if (sc->sc_dma64) {
			uint64_t addr = map->dm_segs[i].ds_addr;

			desc64[idx].d_addrlo = addr;
			desc64[idx].d_addrhi = addr >> 32;
		} else {
			desc32[idx].d_addr = map->dm_segs[i].ds_addr;
		}

		/* Make d_addr visible before GEM_TXD_USED is cleared
		 * in d_status. */
		bus_dmamap_sync(sc->sc_dmat, sc->sc_txring->cdm_map,
		    idx * sc->sc_descsize, sc->sc_descsize,
		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);

		if (sc->sc_dma64)
			desc64[idx].d_status = status;
		else
			desc32[idx].d_status = status;

		bus_dmamap_sync(sc->sc_dmat, sc->sc_txring->cdm_map,
		    idx * sc->sc_descsize, sc->sc_descsize,
		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
	}

	sc->sc_tx_prod = (head + nsegs) % CAD_NTXDESC;

	return nsegs;
}

int
cad_intr(void *arg)
{
	struct cad_softc *sc = arg;
	struct ifnet *ifp = &sc->sc_ac.ac_if;
	uint32_t isr;

	isr = HREAD4(sc, GEM_ISR);
	HWRITE4(sc, GEM_ISR, isr);

	if (isr & GEM_IXR_RXDONE)
		cad_rxeof(sc);
	if (isr & GEM_IXR_TXDONE)
		cad_txeof(sc);

	if (isr & GEM_IXR_RXOVR)
		ifp->if_ierrors++;

	if (sc->sc_rxhang_erratum && (isr & GEM_IXR_RXUSED)) {
		/*
		 * Try to flush a packet from the Rx SRAM to avoid triggering
		 * the Rx hang.
		 */
		HWRITE4(sc, GEM_NETCTL, sc->sc_netctl | GEM_NETCTL_DPRAM);
		cad_rxfill(sc);
	}

	/* If there has been a DMA error, stop the interface to limit damage. */
	if (isr & GEM_IXR_HRESP) {
		sc->sc_netctl &= ~(GEM_NETCTL_TXEN | GEM_NETCTL_RXEN);
		HWRITE4(sc, GEM_NETCTL, sc->sc_netctl);
		HWRITE4(sc, GEM_IDR, ~0U);

		printf("%s: hresp error, interface stopped\n",
		    sc->sc_dev.dv_xname);
	}

	return 1;
}

void
cad_rxeof(struct cad_softc *sc)
{
	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
	struct ifnet *ifp = &sc->sc_ac.ac_if;
	struct mbuf *m;
	struct cad_buf *rxb;
	struct cad_desc32 *desc32 = (struct cad_desc32 *)sc->sc_rxdesc;
	struct cad_desc64 *desc64 = (struct cad_desc64 *)sc->sc_rxdesc;
	size_t len;
	unsigned int idx;
	uint32_t addr, status;

	idx = sc->sc_rx_cons;

	while (if_rxr_inuse(&sc->sc_rx_ring) > 0) {
		bus_dmamap_sync(sc->sc_dmat, sc->sc_rxring->cdm_map,
		    idx * sc->sc_descsize, sc->sc_descsize,
		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);

		if (sc->sc_dma64)
			addr = desc64[idx].d_addrlo;
		else
			addr = desc32[idx].d_addr;
		if ((addr & GEM_RXD_ADDR_USED) == 0)
			break;

		/* Prevent premature read of d_status. */
		bus_dmamap_sync(sc->sc_dmat, sc->sc_rxring->cdm_map,
		    idx * sc->sc_descsize, sc->sc_descsize,
		    BUS_DMASYNC_POSTREAD);

		if (sc->sc_dma64)
			status = desc64[idx].d_status;
		else
			status = desc32[idx].d_status;
		len = status & GEM_RXD_LEN_MASK;

		rxb = &sc->sc_rxbuf[idx];

		bus_dmamap_sync(sc->sc_dmat, rxb->bf_map, ETHER_ALIGN, len,
		    BUS_DMASYNC_POSTREAD);
		bus_dmamap_unload(sc->sc_dmat, rxb->bf_map);

		m = rxb->bf_m;
		rxb->bf_m = NULL;
		KASSERT(m != NULL);

		if_rxr_put(&sc->sc_rx_ring, 1);
		idx = (idx + 1) % CAD_NRXDESC;

		if ((status & (GEM_RXD_SOF | GEM_RXD_EOF)) !=
		    (GEM_RXD_SOF | GEM_RXD_EOF)) {
			m_freem(m);
			ifp->if_ierrors++;
			continue;
		}

		m_adj(m, ETHER_ALIGN);
		m->m_len = m->m_pkthdr.len = len;

		m->m_pkthdr.csum_flags = 0;
		switch (status & GEM_RXD_CSUM_MASK) {
		case GEM_RXD_CSUM_IP_OK:
			m->m_pkthdr.csum_flags = M_IPV4_CSUM_IN_OK;
			break;
		case GEM_RXD_CSUM_TCP_OK:
		case GEM_RXD_CSUM_UDP_OK:
			m->m_pkthdr.csum_flags = M_IPV4_CSUM_IN_OK |
			    M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
			break;
		}

		ml_enqueue(&ml, m);

		sc->sc_rxdone = 1;
	}

	sc->sc_rx_cons = idx;

	cad_rxfill(sc);

	if (ifiq_input(&ifp->if_rcv, &ml))
		if_rxr_livelocked(&sc->sc_rx_ring);
}

void
cad_rxfill(struct cad_softc *sc)
{
	struct cad_buf *rxb;
	struct cad_desc32 *desc32 = (struct cad_desc32 *)sc->sc_rxdesc;
	struct cad_desc64 *desc64 = (struct cad_desc64 *)sc->sc_rxdesc;
	uint64_t addr;
	unsigned int idx;
	u_int slots;

	idx = sc->sc_rx_prod;

	for (slots = if_rxr_get(&sc->sc_rx_ring, CAD_NRXDESC);
	    slots > 0; slots--) {
		rxb = &sc->sc_rxbuf[idx];
		rxb->bf_m = cad_alloc_mbuf(sc, rxb->bf_map);
		if (rxb->bf_m == NULL)
			break;

		addr = rxb->bf_map->dm_segs[0].ds_addr;
		KASSERT((addr & (GEM_RXD_ADDR_WRAP | GEM_RXD_ADDR_USED)) == 0);
		if (idx == CAD_NRXDESC - 1)
			addr |= GEM_RXD_ADDR_WRAP;

		if (sc->sc_dma64) {
			desc64[idx].d_addrhi = addr >> 32;
			desc64[idx].d_status = 0;
		} else {
			desc32[idx].d_status = 0;
		}

		/* Make d_addrhi and d_status visible before clearing
		 * GEM_RXD_ADDR_USED in d_addr or d_addrlo. */
		bus_dmamap_sync(sc->sc_dmat, sc->sc_rxring->cdm_map,
		    idx * sc->sc_descsize, sc->sc_descsize,
		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);

		if (sc->sc_dma64)
			desc64[idx].d_addrlo = addr;
		else
			desc32[idx].d_addr = addr;

		bus_dmamap_sync(sc->sc_dmat, sc->sc_rxring->cdm_map,
		    idx * sc->sc_descsize, sc->sc_descsize,
		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);

		idx = (idx + 1) % CAD_NRXDESC;
	}
	if_rxr_put(&sc->sc_rx_ring, slots);

	sc->sc_rx_prod = idx;
}

void
cad_txeof(struct cad_softc *sc)
{
	struct ifnet *ifp = &sc->sc_ac.ac_if;
	struct cad_buf *txb;
	struct cad_desc32 *desc32 = (struct cad_desc32 *)sc->sc_txdesc;
	struct cad_desc64 *desc64 = (struct cad_desc64 *)sc->sc_txdesc;
	unsigned int free = 0;
	unsigned int idx, nsegs;
	uint32_t status;

	idx = sc->sc_tx_cons;

	while (idx != sc->sc_tx_prod) {
		bus_dmamap_sync(sc->sc_dmat, sc->sc_txring->cdm_map,
		    idx * sc->sc_descsize, sc->sc_descsize,
		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);

		if (sc->sc_dma64)
			status = desc64[idx].d_status;
		else
			status = desc32[idx].d_status;
		if ((status & GEM_TXD_USED) == 0)
			break;

		if (status & (GEM_TXD_RLIMIT | GEM_TXD_CORRUPT |
		    GEM_TXD_LCOLL | GEM_TXD_CSUMERR_MASK))
			ifp->if_oerrors++;

		txb = &sc->sc_txbuf[idx];
		nsegs = txb->bf_map->dm_nsegs;
		KASSERT(nsegs > 0);

		bus_dmamap_sync(sc->sc_dmat, txb->bf_map, 0,
		    txb->bf_map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
		bus_dmamap_unload(sc->sc_dmat, txb->bf_map);

		m_freem(txb->bf_m);
		txb->bf_m = NULL;

		for (;;) {
			idx = (idx + 1) % CAD_NTXDESC;

			nsegs--;
			if (nsegs == 0)
				break;

			/*
			 * The controller marks only the initial segment used.
			 * Mark the remaining segments used manually, so that
			 * the controller will not accidentally use them later.
			 *
			 * This could be done lazily on the Tx ring producer
			 * side by ensuring that the subsequent descriptor
			 * after the actual segments is marked used.
			 * However, this would make the ring trickier to debug.
			 */

			bus_dmamap_sync(sc->sc_dmat, sc->sc_txring->cdm_map,
			    idx * sc->sc_descsize, sc->sc_descsize,
			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);

			if (sc->sc_dma64)
				desc64[idx].d_status |= GEM_TXD_USED;
			else
				desc32[idx].d_status |= GEM_TXD_USED;

			bus_dmamap_sync(sc->sc_dmat, sc->sc_txring->cdm_map,
			    idx * sc->sc_descsize, sc->sc_descsize,
			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
		}

		free++;
	}

	if (free == 0)
		return;

	sc->sc_tx_cons = idx;

	if (ifq_is_oactive(&ifp->if_snd))
		ifq_restart(&ifp->if_snd);
}

void
cad_tick(void *arg)
{
	struct cad_softc *sc = arg;
	struct ifnet *ifp = &sc->sc_ac.ac_if;
	int s;

	if ((ifp->if_flags & IFF_RUNNING) == 0)
		return;

	s = splnet();

	mii_tick(&sc->sc_mii);

	/*
	 * If there has been no Rx for a moment, Rx DMA might be stuck.
	 * Try to recover by restarting the receiver.
	 */
	if (sc->sc_rxhang_erratum && !sc->sc_rxdone) {
		HWRITE4(sc, GEM_NETCTL, sc->sc_netctl & ~GEM_NETCTL_RXEN);
		(void)HREAD4(sc, GEM_NETCTL);
		HWRITE4(sc, GEM_NETCTL, sc->sc_netctl);
	}
	sc->sc_rxdone = 0;

	splx(s);

	timeout_add_sec(&sc->sc_tick, 1);
}

int
cad_media_change(struct ifnet *ifp)
{
	struct cad_softc *sc = ifp->if_softc;

	if (!LIST_EMPTY(&sc->sc_mii.mii_phys))
		mii_mediachg(&sc->sc_mii);

	return 0;
}

void
cad_media_status(struct ifnet *ifp, struct ifmediareq *imr)
{
	struct cad_softc *sc = ifp->if_softc;

	if (!LIST_EMPTY(&sc->sc_mii.mii_phys)) {
		mii_pollstat(&sc->sc_mii);
		imr->ifm_active = sc->sc_mii.mii_media_active;
		imr->ifm_status = sc->sc_mii.mii_media_status;
	}
}

int
cad_mii_wait(struct cad_softc *sc)
{
	int timeout;

	for (timeout = 10000; timeout > 0; timeout--) {
		if (HREAD4(sc, GEM_NETSR) & GEM_NETSR_PHY_MGMT_IDLE)
			break;
		delay(10);
	}
	if (timeout == 0)
		return ETIMEDOUT;
	return 0;
}

void
cad_mii_oper(struct cad_softc *sc, int phy_no, int reg, uint32_t oper)
{
	oper |= (phy_no << GEM_PHYMNTNC_ADDR_SHIFT) & GEM_PHYMNTNC_ADDR_MASK;
	oper |= (reg << GEM_PHYMNTNC_REG_SHIFT) & GEM_PHYMNTNC_REG_MASK;
	oper |= GEM_PHYMNTNC_CLAUSE_22 | GEM_PHYMNTNC_MUST_10;

	if (cad_mii_wait(sc) != 0) {
		printf("%s: MII bus idle timeout\n", sc->sc_dev.dv_xname);
		return;
	}

	HWRITE4(sc, GEM_PHYMNTNC, oper);

	if (cad_mii_wait(sc) != 0) {
		printf("%s: MII bus operation timeout\n", sc->sc_dev.dv_xname);
		return;
	}
}

int
cad_mii_readreg(struct device *self, int phy_no, int reg)
{
	struct cad_softc *sc = (struct cad_softc *)self;
	int val;

	cad_mii_oper(sc, phy_no, reg, GEM_PHYMNTNC_OP_READ);

	val = HREAD4(sc, GEM_PHYMNTNC) & GEM_PHYMNTNC_DATA_MASK;

	/* The MAC does not handle 1000baseT in half duplex mode. */
	if (reg == MII_EXTSR)
		val &= ~EXTSR_1000THDX;

	return val;
}

void
cad_mii_writereg(struct device *self, int phy_no, int reg, int val)
{
	struct cad_softc *sc = (struct cad_softc *)self;

	cad_mii_oper(sc, phy_no, reg, GEM_PHYMNTNC_OP_WRITE |
	    (val & GEM_PHYMNTNC_DATA_MASK));
}

void
cad_mii_statchg(struct device *self)
{
	struct cad_softc *sc = (struct cad_softc *)self;
	uint32_t netcfg;

	netcfg = HREAD4(sc, GEM_NETCFG);
	if (sc->sc_mii.mii_media_active & IFM_FDX)
		netcfg |= GEM_NETCFG_FDEN;
	else
		netcfg &= ~GEM_NETCFG_FDEN;

	netcfg &= ~(GEM_NETCFG_100 | GEM_NETCFG_1000);
	switch (IFM_SUBTYPE(sc->sc_mii.mii_media_active)) {
	default:
		sc->sc_tx_freq = 2500000;
		break;
	case IFM_100_TX:
		netcfg |= GEM_NETCFG_100;
		sc->sc_tx_freq = 25000000;
		break;
	case IFM_1000_T:
		netcfg |= GEM_NETCFG_100 | GEM_NETCFG_1000;
		sc->sc_tx_freq = 125000000;
		break;
	}

	HWRITE4(sc, GEM_NETCFG, netcfg);

	/* Defer clock setting because it allocates memory with M_WAITOK. */
	task_add(systq, &sc->sc_statchg_task);
}

void
cad_statchg_task(void *arg)
{
	struct cad_softc *sc = arg;

	clock_set_frequency(sc->sc_node, GEM_CLK_TX, sc->sc_tx_freq);
}

struct cad_dmamem *
cad_dmamem_alloc(struct cad_softc *sc, bus_size_t size, bus_size_t align)
{
	struct cad_dmamem *cdm;
	bus_size_t boundary = 0;
	int flags = BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW;
	int nsegs;

	cdm = malloc(sizeof(*cdm), M_DEVBUF, M_WAITOK | M_ZERO);
	cdm->cdm_size = size;

	if (sc->sc_dma64) {
		/*
		 * The segment contains an actual ring and possibly
		 * a dummy ring for unused priority queues.
		 * The segment must not cross a 32-bit boundary so that
		 * the rings have the same base address bits 63:32.
		 */
		boundary = 1ULL << 32;
		flags |= BUS_DMA_64BIT;
	}

	if (bus_dmamap_create(sc->sc_dmat, size, 1, size, boundary,
	    flags, &cdm->cdm_map) != 0)
		goto cdmfree;
	if (bus_dmamem_alloc(sc->sc_dmat, size, align, boundary,
	    &cdm->cdm_seg, 1, &nsegs, BUS_DMA_WAITOK) != 0)
		goto destroy;
	if (bus_dmamem_map(sc->sc_dmat, &cdm->cdm_seg, nsegs, size,
	    &cdm->cdm_kva, BUS_DMA_WAITOK | BUS_DMA_COHERENT) != 0)
		goto free;
	if (bus_dmamap_load(sc->sc_dmat, cdm->cdm_map, cdm->cdm_kva, size,
	    NULL, BUS_DMA_WAITOK) != 0)
		goto unmap;
	memset(cdm->cdm_kva, 0, size);
	return cdm;

unmap:
	bus_dmamem_unmap(sc->sc_dmat, cdm->cdm_kva, size);
free:
	bus_dmamem_free(sc->sc_dmat, &cdm->cdm_seg, 1);
destroy:
	bus_dmamap_destroy(sc->sc_dmat, cdm->cdm_map);
cdmfree:
	free(cdm, M_DEVBUF, sizeof(*cdm));
	return NULL;
}

void
cad_dmamem_free(struct cad_softc *sc, struct cad_dmamem *cdm)
{
	bus_dmamem_unmap(sc->sc_dmat, cdm->cdm_kva, cdm->cdm_size);
	bus_dmamem_free(sc->sc_dmat, &cdm->cdm_seg, 1);
	bus_dmamap_destroy(sc->sc_dmat, cdm->cdm_map);
	free(cdm, M_DEVBUF, sizeof(*cdm));
}

struct mbuf *
cad_alloc_mbuf(struct cad_softc *sc, bus_dmamap_t map)
{
	struct mbuf *m;

	m = MCLGETL(NULL, M_DONTWAIT, MCLBYTES);
	if (m == NULL)
		return NULL;
	m->m_len = m->m_pkthdr.len = MCLBYTES;

	if (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT) != 0) {
		m_freem(m);
		return NULL;
	}

	bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
	    BUS_DMASYNC_PREREAD);

	return m;
}

#if NKSTAT > 0
enum cad_stat {
	cad_stat_tx_toto,
	cad_stat_tx_totp,
	cad_stat_tx_bcast,
	cad_stat_tx_mcast,
	cad_stat_tx_pause,
	cad_stat_tx_h64,
	cad_stat_tx_h65,
	cad_stat_tx_h128,
	cad_stat_tx_h256,
	cad_stat_tx_h512,
	cad_stat_tx_h1024,
	cad_stat_tx_underrun,
	cad_stat_tx_scoll,
	cad_stat_tx_mcoll,
	cad_stat_tx_ecoll,
	cad_stat_tx_lcoll,
	cad_stat_tx_defer,
	cad_stat_tx_sense,
	cad_stat_rx_toto,
	cad_stat_rx_totp,
	cad_stat_rx_bcast,
	cad_stat_rx_mcast,
	cad_stat_rx_pause,
	cad_stat_rx_h64,
	cad_stat_rx_h65,
	cad_stat_rx_h128,
	cad_stat_rx_h256,
	cad_stat_rx_h512,
	cad_stat_rx_h1024,
	cad_stat_rx_undersz,
	cad_stat_rx_oversz,
	cad_stat_rx_jabber,
	cad_stat_rx_fcs,
	cad_stat_rx_symberr,
	cad_stat_rx_align,
	cad_stat_rx_reserr,
	cad_stat_rx_overrun,
	cad_stat_rx_ipcsum,
	cad_stat_rx_tcpcsum,
	cad_stat_rx_udpcsum,
	cad_stat_count
};

struct cad_counter {
	const char		*c_name;
	enum kstat_kv_unit	c_unit;
	uint32_t		c_reg;
};

const struct cad_counter cad_counters[cad_stat_count] = {
	[cad_stat_tx_toto] =
	    { "tx total",	KSTAT_KV_U_BYTES, 0 },
	[cad_stat_tx_totp] =
	    { "tx total",	KSTAT_KV_U_PACKETS, GEM_TXCNT },
	[cad_stat_tx_bcast] =
	    { "tx bcast",	KSTAT_KV_U_PACKETS, GEM_TXBCCNT },
	[cad_stat_tx_mcast] =
	    { "tx mcast",	KSTAT_KV_U_PACKETS, GEM_TXMCCNT },
	[cad_stat_tx_pause] =
	    { "tx pause",	KSTAT_KV_U_PACKETS, GEM_TXPAUSECNT },
	[cad_stat_tx_h64] =
	    { "tx 64B",		KSTAT_KV_U_PACKETS, GEM_TX64CNT },
	[cad_stat_tx_h65] =
	    { "tx 65-127B",	KSTAT_KV_U_PACKETS, GEM_TX65CNT },
	[cad_stat_tx_h128] =
	    { "tx 128-255B",	KSTAT_KV_U_PACKETS, GEM_TX128CNT },
	[cad_stat_tx_h256] =
	    { "tx 256-511B",	KSTAT_KV_U_PACKETS, GEM_TX256CNT },
	[cad_stat_tx_h512] =
	    { "tx 512-1023B",	KSTAT_KV_U_PACKETS, GEM_TX512CNT },
	[cad_stat_tx_h1024] =
	    { "tx 1024-1518B",	KSTAT_KV_U_PACKETS, GEM_TX1024CNT },
	[cad_stat_tx_underrun] =
	    { "tx underrun",	KSTAT_KV_U_PACKETS, GEM_TXURUNCNT },
	[cad_stat_tx_scoll] =
	    { "tx scoll",	KSTAT_KV_U_PACKETS, GEM_SNGLCOLLCNT },
	[cad_stat_tx_mcoll] =
	    { "tx mcoll",	KSTAT_KV_U_PACKETS, GEM_MULTICOLLCNT },
	[cad_stat_tx_ecoll] =
	    { "tx excess coll",	KSTAT_KV_U_PACKETS, GEM_EXCESSCOLLCNT },
	[cad_stat_tx_lcoll] =
	    { "tx late coll",	KSTAT_KV_U_PACKETS, GEM_LATECOLLCNT },
	[cad_stat_tx_defer] =
	    { "tx defer",	KSTAT_KV_U_PACKETS, GEM_TXDEFERCNT },
	[cad_stat_tx_sense] =
	    { "tx csense",	KSTAT_KV_U_PACKETS, GEM_TXCSENSECNT },
	[cad_stat_rx_toto] =
	    { "rx total",	KSTAT_KV_U_BYTES, 0 },
	[cad_stat_rx_totp] =
	    { "rx total",	KSTAT_KV_U_PACKETS, GEM_RXCNT },
	[cad_stat_rx_bcast] =
	    { "rx bcast",	KSTAT_KV_U_PACKETS, GEM_RXBROADCNT },
	[cad_stat_rx_mcast] =
	    { "rx mcast",	KSTAT_KV_U_PACKETS, GEM_RXMULTICNT },
	[cad_stat_rx_pause] =
	    { "rx pause",	KSTAT_KV_U_PACKETS, GEM_RXPAUSECNT },
	[cad_stat_rx_h64] =
	    { "rx 64B",		KSTAT_KV_U_PACKETS, GEM_RX64CNT },
	[cad_stat_rx_h65] =
	    { "rx 65-127B",	KSTAT_KV_U_PACKETS, GEM_RX65CNT },
	[cad_stat_rx_h128] =
	    { "rx 128-255B",	KSTAT_KV_U_PACKETS, GEM_RX128CNT },
	[cad_stat_rx_h256] =
	    { "rx 256-511B",	KSTAT_KV_U_PACKETS, GEM_RX256CNT },
	[cad_stat_rx_h512] =
	    { "rx 512-1023B",	KSTAT_KV_U_PACKETS, GEM_RX512CNT },
	[cad_stat_rx_h1024] =
	    { "rx 1024-1518B",	KSTAT_KV_U_PACKETS, GEM_RX1024CNT },
	[cad_stat_rx_undersz] =
	    { "rx undersz",	KSTAT_KV_U_PACKETS, GEM_RXUNDRCNT },
	[cad_stat_rx_oversz] =
	    { "rx oversz",	KSTAT_KV_U_PACKETS, GEM_RXOVRCNT },
	[cad_stat_rx_jabber] =
	    { "rx jabber",	KSTAT_KV_U_PACKETS, GEM_RXJABCNT },
	[cad_stat_rx_fcs] =
	    { "rx fcs",		KSTAT_KV_U_PACKETS, GEM_RXFCSCNT },
	[cad_stat_rx_symberr] =
	    { "rx symberr",	KSTAT_KV_U_PACKETS, GEM_RXSYMBCNT },
	[cad_stat_rx_align] =
	    { "rx align",	KSTAT_KV_U_PACKETS, GEM_RXALIGNCNT },
	[cad_stat_rx_reserr] =
	    { "rx reserr",	KSTAT_KV_U_PACKETS, GEM_RXRESERRCNT },
	[cad_stat_rx_overrun] =
	    { "rx overrun",	KSTAT_KV_U_PACKETS, GEM_RXORCNT },
	[cad_stat_rx_ipcsum] =
	    { "rx ip csum",	KSTAT_KV_U_PACKETS, GEM_RXIPCCNT },
	[cad_stat_rx_tcpcsum] =
	    { "rx tcp csum",	KSTAT_KV_U_PACKETS, GEM_RXTCPCCNT },
	[cad_stat_rx_udpcsum] =
	    { "rx udp csum",	KSTAT_KV_U_PACKETS, GEM_RXUDPCCNT },
};

void
cad_kstat_attach(struct cad_softc *sc)
{
	const struct cad_counter *c;
	struct kstat *ks;
	struct kstat_kv *kvs;
	int i;

	mtx_init(&sc->sc_kstat_mtx, IPL_SOFTCLOCK);

	ks = kstat_create(sc->sc_dev.dv_xname, 0, "cad-stats", 0,
	    KSTAT_T_KV, 0);
	if (ks == NULL)
		return;

	kvs = mallocarray(nitems(cad_counters), sizeof(*kvs),
	    M_DEVBUF, M_WAITOK | M_ZERO);
	for (i = 0; i < nitems(cad_counters); i++) {
		c = &cad_counters[i];
		kstat_kv_unit_init(&kvs[i], c->c_name, KSTAT_KV_T_COUNTER64,
		    c->c_unit);
	}

	kstat_set_mutex(ks, &sc->sc_kstat_mtx);
	ks->ks_softc = sc;
	ks->ks_data = kvs;
	ks->ks_datalen = nitems(cad_counters) * sizeof(*kvs);
	ks->ks_read = cad_kstat_read;

	sc->sc_kstat = ks;
	kstat_install(ks);
}

int
cad_kstat_read(struct kstat *ks)
{
	const struct cad_counter *c;
	struct kstat_kv *kvs = ks->ks_data;
	struct cad_softc *sc = ks->ks_softc;
	uint64_t v64;
	int i;

	v64 = HREAD4(sc, GEM_OCTTXL);
	v64 |= (uint64_t)HREAD4(sc, GEM_OCTTXH) << 32;
	kstat_kv_u64(&kvs[cad_stat_tx_toto]) += v64;

	v64 = HREAD4(sc, GEM_OCTRXL);
	v64 |= (uint64_t)HREAD4(sc, GEM_OCTRXH) << 32;
	kstat_kv_u64(&kvs[cad_stat_rx_toto]) += v64;

	for (i = 0; i < nitems(cad_counters); i++) {
		c = &cad_counters[i];
		if (c->c_reg == 0)
			continue;
		kstat_kv_u64(&kvs[i]) += HREAD4(sc, c->c_reg);
	}

	getnanouptime(&ks->ks_updated);

	return 0;
}

void
cad_kstat_tick(void *arg)
{
	struct cad_softc *sc = arg;

	if (mtx_enter_try(&sc->sc_kstat_mtx)) {
		cad_kstat_read(sc->sc_kstat);
		mtx_leave(&sc->sc_kstat_mtx);
	}
}
#endif /* NKSTAT > 0 */

#ifdef DDB
void
cad_dump(struct cad_softc *sc)
{
	struct cad_buf *rxb, *txb;
	struct cad_desc32 *desc32;
	struct cad_desc64 *desc64;
	int i;

	printf("isr 0x%x txsr 0x%x rxsr 0x%x\n", HREAD4(sc, GEM_ISR),
	    HREAD4(sc, GEM_TXSR), HREAD4(sc, GEM_RXSR));

	if (sc->sc_dma64) {
		printf("tx q 0x%08x%08x\n",
		    HREAD4(sc, GEM_TXQBASEHI),
		    HREAD4(sc, GEM_TXQBASE));
	} else {
		printf("tx q 0x%08x\n",
		    HREAD4(sc, GEM_TXQBASE));
	}
	desc32 = (struct cad_desc32 *)sc->sc_txdesc;
	desc64 = (struct cad_desc64 *)sc->sc_txdesc;
	if (sc->sc_txbuf != NULL) {
		for (i = 0; i < CAD_NTXDESC; i++) {
			txb = &sc->sc_txbuf[i];
			if (sc->sc_dma64) {
				printf(" %3i %p 0x%08x%08x 0x%08x %s%s "
				    "m %p\n", i,
				    &desc64[i],
				    desc64[i].d_addrhi, desc64[i].d_addrlo,
				    desc64[i].d_status,
				    sc->sc_tx_cons == i ? ">" : " ",
				    sc->sc_tx_prod == i ? "<" : " ",
				    txb->bf_m);
			} else {
				printf(" %3i %p 0x%08x 0x%08x %s%s m %p\n", i,
				    &desc32[i],
				    desc32[i].d_addr,
				    desc32[i].d_status,
				    sc->sc_tx_cons == i ? ">" : " ",
				    sc->sc_tx_prod == i ? "<" : " ",
				    txb->bf_m);
			}
		}
	}
	for (i = 1; i < GEM_MAX_PRIQ; i++) {
		if (sc->sc_qmask & (1U << i)) {
			printf("tx q%d 0x%08x\n", i,
			    HREAD4(sc, GEM_TXQ1BASE(i - 1)));
		}
	}

	if (sc->sc_dma64) {
		printf("rx q 0x%08x%08x\n",
		    HREAD4(sc, GEM_RXQBASEHI),
		    HREAD4(sc, GEM_RXQBASE));
	} else {
		printf("rx q 0x%08x\n",
		    HREAD4(sc, GEM_RXQBASE));
	}
	desc32 = (struct cad_desc32 *)sc->sc_rxdesc;
	desc64 = (struct cad_desc64 *)sc->sc_rxdesc;
	if (sc->sc_rxbuf != NULL) {
		for (i = 0; i < CAD_NRXDESC; i++) {
			rxb = &sc->sc_rxbuf[i];
			if (sc->sc_dma64) {
				printf(" %3i %p 0x%08x%08x 0x%08x %s%s "
				    "m %p\n", i,
				    &desc64[i],
				    desc64[i].d_addrhi, desc64[i].d_addrlo,
				    desc64[i].d_status,
				    sc->sc_rx_cons == i ? ">" : " ",
				    sc->sc_rx_prod == i ? "<" : " ",
				    rxb->bf_m);
			} else {
				printf(" %3i %p 0x%08x 0x%08x %s%s m %p\n", i,
				    &desc32[i],
				    desc32[i].d_addr,
				    desc32[i].d_status,
				    sc->sc_rx_cons == i ? ">" : " ",
				    sc->sc_rx_prod == i ? "<" : " ",
				    rxb->bf_m);
			}
		}
	}
	for (i = 1; i < GEM_MAX_PRIQ; i++) {
		if (sc->sc_qmask & (1U << i)) {
			printf("rx q%d 0x%08x\n", i,
			    HREAD4(sc, (i < 8) ? GEM_RXQ1BASE(i - 1)
			      : GEM_RXQ8BASE(i - 8)));
		}
	}
}
#endif
