xref: /freebsd-11-stable/sys/dev/cxgbe/t4_sge.c (revision 17de9e9a183c63970fdbe7d9fbf51b999f36a22c)
1 /*-
2  * Copyright (c) 2011 Chelsio Communications, Inc.
3  * All rights reserved.
4  * Written by: Navdeep Parhar <np@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #include <sys/types.h>
35 #include <sys/eventhandler.h>
36 #include <sys/mbuf.h>
37 #include <sys/socket.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/queue.h>
41 #include <sys/sbuf.h>
42 #include <sys/taskqueue.h>
43 #include <sys/time.h>
44 #include <sys/sglist.h>
45 #include <sys/sysctl.h>
46 #include <sys/smp.h>
47 #include <sys/counter.h>
48 #include <net/bpf.h>
49 #include <net/ethernet.h>
50 #include <net/if.h>
51 #include <net/if_vlan_var.h>
52 #include <netinet/in.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip6.h>
55 #include <netinet/tcp.h>
56 #include <machine/in_cksum.h>
57 #include <machine/md_var.h>
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 #ifdef DEV_NETMAP
61 #include <machine/bus.h>
62 #include <sys/selinfo.h>
63 #include <net/if_var.h>
64 #include <net/netmap.h>
65 #include <dev/netmap/netmap_kern.h>
66 #endif
67 
68 #include "common/common.h"
69 #include "common/t4_regs.h"
70 #include "common/t4_regs_values.h"
71 #include "common/t4_msg.h"
72 #include "t4_l2t.h"
73 #include "t4_mp_ring.h"
74 
75 #ifdef T4_PKT_TIMESTAMP
76 #define RX_COPY_THRESHOLD (MINCLSIZE - 8)
77 #else
78 #define RX_COPY_THRESHOLD MINCLSIZE
79 #endif
80 
81 /*
82  * Ethernet frames are DMA'd at this byte offset into the freelist buffer.
83  * 0-7 are valid values.
84  */
85 static int fl_pktshift = 2;
86 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pktshift, CTLFLAG_RDTUN, &fl_pktshift, 0,
87     "payload DMA offset in rx buffer (bytes)");
88 
89 /*
90  * Pad ethernet payload up to this boundary.
91  * -1: driver should figure out a good value.
92  *  0: disable padding.
93  *  Any power of 2 from 32 to 4096 (both inclusive) is also a valid value.
94  */
95 int fl_pad = -1;
96 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pad, CTLFLAG_RDTUN, &fl_pad, 0,
97     "payload pad boundary (bytes)");
98 
99 /*
100  * Status page length.
101  * -1: driver should figure out a good value.
102  *  64 or 128 are the only other valid values.
103  */
104 static int spg_len = -1;
105 SYSCTL_INT(_hw_cxgbe, OID_AUTO, spg_len, CTLFLAG_RDTUN, &spg_len, 0,
106     "status page size (bytes)");
107 
108 /*
109  * Congestion drops.
110  * -1: no congestion feedback (not recommended).
111  *  0: backpressure the channel instead of dropping packets right away.
112  *  1: no backpressure, drop packets for the congested queue immediately.
113  */
114 static int cong_drop = 0;
115 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cong_drop, CTLFLAG_RDTUN, &cong_drop, 0,
116     "Congestion control for RX queues (0 = backpressure, 1 = drop");
117 
118 /*
119  * Deliver multiple frames in the same free list buffer if they fit.
120  * -1: let the driver decide whether to enable buffer packing or not.
121  *  0: disable buffer packing.
122  *  1: enable buffer packing.
123  */
124 static int buffer_packing = -1;
125 SYSCTL_INT(_hw_cxgbe, OID_AUTO, buffer_packing, CTLFLAG_RDTUN, &buffer_packing,
126     0, "Enable buffer packing");
127 
128 /*
129  * Start next frame in a packed buffer at this boundary.
130  * -1: driver should figure out a good value.
131  * T4: driver will ignore this and use the same value as fl_pad above.
132  * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value.
133  */
134 static int fl_pack = -1;
135 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pack, CTLFLAG_RDTUN, &fl_pack, 0,
136     "payload pack boundary (bytes)");
137 
138 /*
139  * Allow the driver to create mbuf(s) in a cluster allocated for rx.
140  * 0: never; always allocate mbufs from the zone_mbuf UMA zone.
141  * 1: ok to create mbuf(s) within a cluster if there is room.
142  */
143 static int allow_mbufs_in_cluster = 1;
144 SYSCTL_INT(_hw_cxgbe, OID_AUTO, allow_mbufs_in_cluster, CTLFLAG_RDTUN,
145     &allow_mbufs_in_cluster, 0,
146     "Allow driver to create mbufs within a rx cluster");
147 
148 /*
149  * Largest rx cluster size that the driver is allowed to allocate.
150  */
151 static int largest_rx_cluster = MJUM16BYTES;
152 SYSCTL_INT(_hw_cxgbe, OID_AUTO, largest_rx_cluster, CTLFLAG_RDTUN,
153     &largest_rx_cluster, 0, "Largest rx cluster (bytes)");
154 
155 /*
156  * Size of cluster allocation that's most likely to succeed.  The driver will
157  * fall back to this size if it fails to allocate clusters larger than this.
158  */
159 static int safest_rx_cluster = PAGE_SIZE;
160 SYSCTL_INT(_hw_cxgbe, OID_AUTO, safest_rx_cluster, CTLFLAG_RDTUN,
161     &safest_rx_cluster, 0, "Safe rx cluster (bytes)");
162 
163 /*
164  * The interrupt holdoff timers are multiplied by this value on T6+.
165  * 1 and 3-17 (both inclusive) are legal values.
166  */
167 static int tscale = 1;
168 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tscale, CTLFLAG_RDTUN, &tscale, 0,
169     "Interrupt holdoff timer scale on T6+");
170 
171 /*
172  * Number of LRO entries in the lro_ctrl structure per rx queue.
173  */
174 static int lro_entries = TCP_LRO_ENTRIES;
175 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_entries, CTLFLAG_RDTUN, &lro_entries, 0,
176     "Number of LRO entries per RX queue");
177 
178 /*
179  * This enables presorting of frames before they're fed into tcp_lro_rx.
180  */
181 static int lro_mbufs = 0;
182 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_mbufs, CTLFLAG_RDTUN, &lro_mbufs, 0,
183     "Enable presorting of LRO frames");
184 
185 struct txpkts {
186 	u_int wr_type;		/* type 0 or type 1 */
187 	u_int npkt;		/* # of packets in this work request */
188 	u_int plen;		/* total payload (sum of all packets) */
189 	u_int len16;		/* # of 16B pieces used by this work request */
190 };
191 
192 /* A packet's SGL.  This + m_pkthdr has all info needed for tx */
193 struct sgl {
194 	struct sglist sg;
195 	struct sglist_seg seg[TX_SGL_SEGS];
196 };
197 
198 static int service_iq(struct sge_iq *, int);
199 static int service_iq_fl(struct sge_iq *, int);
200 static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t);
201 static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *);
202 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int);
203 static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *);
204 static inline void init_eq(struct adapter *, struct sge_eq *, int, int, uint8_t,
205     uint16_t, char *);
206 static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *,
207     bus_addr_t *, void **);
208 static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
209     void *);
210 static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *,
211     int, int);
212 static int free_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *);
213 static void add_iq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *,
214     struct sge_iq *);
215 static void add_fl_sysctls(struct adapter *, struct sysctl_ctx_list *,
216     struct sysctl_oid *, struct sge_fl *);
217 static int alloc_fwq(struct adapter *);
218 static int free_fwq(struct adapter *);
219 static int alloc_ctrlq(struct adapter *, struct sge_wrq *, int,
220     struct sysctl_oid *);
221 static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int,
222     struct sysctl_oid *);
223 static int free_rxq(struct vi_info *, struct sge_rxq *);
224 #ifdef TCP_OFFLOAD
225 static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int,
226     struct sysctl_oid *);
227 static int free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *);
228 #endif
229 #ifdef DEV_NETMAP
230 static int alloc_nm_rxq(struct vi_info *, struct sge_nm_rxq *, int, int,
231     struct sysctl_oid *);
232 static int free_nm_rxq(struct vi_info *, struct sge_nm_rxq *);
233 static int alloc_nm_txq(struct vi_info *, struct sge_nm_txq *, int, int,
234     struct sysctl_oid *);
235 static int free_nm_txq(struct vi_info *, struct sge_nm_txq *);
236 #endif
237 static int ctrl_eq_alloc(struct adapter *, struct sge_eq *);
238 static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *);
239 #ifdef TCP_OFFLOAD
240 static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *);
241 #endif
242 static int alloc_eq(struct adapter *, struct vi_info *, struct sge_eq *);
243 static int free_eq(struct adapter *, struct sge_eq *);
244 static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *,
245     struct sysctl_oid *);
246 static int free_wrq(struct adapter *, struct sge_wrq *);
247 static int alloc_txq(struct vi_info *, struct sge_txq *, int,
248     struct sysctl_oid *);
249 static int free_txq(struct vi_info *, struct sge_txq *);
250 static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int);
251 static inline void ring_fl_db(struct adapter *, struct sge_fl *);
252 static int refill_fl(struct adapter *, struct sge_fl *, int);
253 static void refill_sfl(void *);
254 static int alloc_fl_sdesc(struct sge_fl *);
255 static void free_fl_sdesc(struct adapter *, struct sge_fl *);
256 static void find_best_refill_source(struct adapter *, struct sge_fl *, int);
257 static void find_safe_refill_source(struct adapter *, struct sge_fl *);
258 static void add_fl_to_sfl(struct adapter *, struct sge_fl *);
259 
260 static inline void get_pkt_gl(struct mbuf *, struct sglist *);
261 static inline u_int txpkt_len16(u_int, u_int);
262 static inline u_int txpkt_vm_len16(u_int, u_int);
263 static inline u_int txpkts0_len16(u_int);
264 static inline u_int txpkts1_len16(void);
265 static u_int write_txpkt_wr(struct sge_txq *, struct fw_eth_tx_pkt_wr *,
266     struct mbuf *, u_int);
267 static u_int write_txpkt_vm_wr(struct adapter *, struct sge_txq *,
268     struct fw_eth_tx_pkt_vm_wr *, struct mbuf *, u_int);
269 static int try_txpkts(struct mbuf *, struct mbuf *, struct txpkts *, u_int);
270 static int add_to_txpkts(struct mbuf *, struct txpkts *, u_int);
271 static u_int write_txpkts_wr(struct sge_txq *, struct fw_eth_tx_pkts_wr *,
272     struct mbuf *, const struct txpkts *, u_int);
273 static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int);
274 static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int);
275 static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int);
276 static inline uint16_t read_hw_cidx(struct sge_eq *);
277 static inline u_int reclaimable_tx_desc(struct sge_eq *);
278 static inline u_int total_available_tx_desc(struct sge_eq *);
279 static u_int reclaim_tx_descs(struct sge_txq *, u_int);
280 static void tx_reclaim(void *, int);
281 static __be64 get_flit(struct sglist_seg *, int, int);
282 static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *,
283     struct mbuf *);
284 static int handle_fw_msg(struct sge_iq *, const struct rss_header *,
285     struct mbuf *);
286 static int t4_handle_wrerr_rpl(struct adapter *, const __be64 *);
287 static void wrq_tx_drain(void *, int);
288 static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *);
289 
290 static int sysctl_uint16(SYSCTL_HANDLER_ARGS);
291 static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS);
292 
293 static counter_u64_t extfree_refs;
294 static counter_u64_t extfree_rels;
295 
296 an_handler_t t4_an_handler;
297 fw_msg_handler_t t4_fw_msg_handler[NUM_FW6_TYPES];
298 cpl_handler_t t4_cpl_handler[NUM_CPL_CMDS];
299 cpl_handler_t set_tcb_rpl_handlers[NUM_CPL_COOKIES];
300 cpl_handler_t l2t_write_rpl_handlers[NUM_CPL_COOKIES];
301 cpl_handler_t act_open_rpl_handlers[NUM_CPL_COOKIES];
302 cpl_handler_t abort_rpl_rss_handlers[NUM_CPL_COOKIES];
303 cpl_handler_t fw4_ack_handlers[NUM_CPL_COOKIES];
304 
305 void
t4_register_an_handler(an_handler_t h)306 t4_register_an_handler(an_handler_t h)
307 {
308 	uintptr_t *loc;
309 
310 	MPASS(h == NULL || t4_an_handler == NULL);
311 
312 	loc = (uintptr_t *)&t4_an_handler;
313 	atomic_store_rel_ptr(loc, (uintptr_t)h);
314 }
315 
316 void
t4_register_fw_msg_handler(int type,fw_msg_handler_t h)317 t4_register_fw_msg_handler(int type, fw_msg_handler_t h)
318 {
319 	uintptr_t *loc;
320 
321 	MPASS(type < nitems(t4_fw_msg_handler));
322 	MPASS(h == NULL || t4_fw_msg_handler[type] == NULL);
323 	/*
324 	 * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL
325 	 * handler dispatch table.  Reject any attempt to install a handler for
326 	 * this subtype.
327 	 */
328 	MPASS(type != FW_TYPE_RSSCPL);
329 	MPASS(type != FW6_TYPE_RSSCPL);
330 
331 	loc = (uintptr_t *)&t4_fw_msg_handler[type];
332 	atomic_store_rel_ptr(loc, (uintptr_t)h);
333 }
334 
335 void
t4_register_cpl_handler(int opcode,cpl_handler_t h)336 t4_register_cpl_handler(int opcode, cpl_handler_t h)
337 {
338 	uintptr_t *loc;
339 
340 	MPASS(opcode < nitems(t4_cpl_handler));
341 	MPASS(h == NULL || t4_cpl_handler[opcode] == NULL);
342 
343 	loc = (uintptr_t *)&t4_cpl_handler[opcode];
344 	atomic_store_rel_ptr(loc, (uintptr_t)h);
345 }
346 
347 static int
set_tcb_rpl_handler(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)348 set_tcb_rpl_handler(struct sge_iq *iq, const struct rss_header *rss,
349     struct mbuf *m)
350 {
351 	const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1);
352 	u_int tid;
353 	int cookie;
354 
355 	MPASS(m == NULL);
356 
357 	tid = GET_TID(cpl);
358 	if (is_hpftid(iq->adapter, tid) || is_ftid(iq->adapter, tid)) {
359 		/*
360 		 * The return code for filter-write is put in the CPL cookie so
361 		 * we have to rely on the hardware tid (is_ftid) to determine
362 		 * that this is a response to a filter.
363 		 */
364 		cookie = CPL_COOKIE_FILTER;
365 	} else {
366 		cookie = G_COOKIE(cpl->cookie);
367 	}
368 	MPASS(cookie > CPL_COOKIE_RESERVED);
369 	MPASS(cookie < nitems(set_tcb_rpl_handlers));
370 
371 	return (set_tcb_rpl_handlers[cookie](iq, rss, m));
372 }
373 
374 static int
l2t_write_rpl_handler(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)375 l2t_write_rpl_handler(struct sge_iq *iq, const struct rss_header *rss,
376     struct mbuf *m)
377 {
378 	const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
379 	unsigned int cookie;
380 
381 	MPASS(m == NULL);
382 
383 	cookie = GET_TID(rpl) & F_SYNC_WR ? CPL_COOKIE_TOM : CPL_COOKIE_FILTER;
384 	return (l2t_write_rpl_handlers[cookie](iq, rss, m));
385 }
386 
387 static int
act_open_rpl_handler(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)388 act_open_rpl_handler(struct sge_iq *iq, const struct rss_header *rss,
389     struct mbuf *m)
390 {
391 	const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1);
392 	u_int cookie = G_TID_COOKIE(G_AOPEN_ATID(be32toh(cpl->atid_status)));
393 
394 	MPASS(m == NULL);
395 	MPASS(cookie != CPL_COOKIE_RESERVED);
396 
397 	return (act_open_rpl_handlers[cookie](iq, rss, m));
398 }
399 
400 static int
abort_rpl_rss_handler(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)401 abort_rpl_rss_handler(struct sge_iq *iq, const struct rss_header *rss,
402     struct mbuf *m)
403 {
404 	struct adapter *sc = iq->adapter;
405 	u_int cookie;
406 
407 	MPASS(m == NULL);
408 	if (is_hashfilter(sc))
409 		cookie = CPL_COOKIE_HASHFILTER;
410 	else
411 		cookie = CPL_COOKIE_TOM;
412 
413 	return (abort_rpl_rss_handlers[cookie](iq, rss, m));
414 }
415 
416 static int
fw4_ack_handler(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)417 fw4_ack_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
418 {
419 	struct adapter *sc = iq->adapter;
420 	const struct cpl_fw4_ack *cpl = (const void *)(rss + 1);
421 	unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl)));
422 	u_int cookie;
423 
424 	MPASS(m == NULL);
425 	if (is_etid(sc, tid))
426 		cookie = CPL_COOKIE_ETHOFLD;
427 	else
428 		cookie = CPL_COOKIE_TOM;
429 
430 	return (fw4_ack_handlers[cookie](iq, rss, m));
431 }
432 
433 static void
t4_init_shared_cpl_handlers(void)434 t4_init_shared_cpl_handlers(void)
435 {
436 
437 	t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl_handler);
438 	t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl_handler);
439 	t4_register_cpl_handler(CPL_ACT_OPEN_RPL, act_open_rpl_handler);
440 	t4_register_cpl_handler(CPL_ABORT_RPL_RSS, abort_rpl_rss_handler);
441 	t4_register_cpl_handler(CPL_FW4_ACK, fw4_ack_handler);
442 }
443 
444 void
t4_register_shared_cpl_handler(int opcode,cpl_handler_t h,int cookie)445 t4_register_shared_cpl_handler(int opcode, cpl_handler_t h, int cookie)
446 {
447 	uintptr_t *loc;
448 
449 	MPASS(opcode < nitems(t4_cpl_handler));
450 	MPASS(cookie > CPL_COOKIE_RESERVED);
451 	MPASS(cookie < NUM_CPL_COOKIES);
452 	MPASS(t4_cpl_handler[opcode] != NULL);
453 
454 	switch (opcode) {
455 	case CPL_SET_TCB_RPL:
456 		loc = (uintptr_t *)&set_tcb_rpl_handlers[cookie];
457 		break;
458 	case CPL_L2T_WRITE_RPL:
459 		loc = (uintptr_t *)&l2t_write_rpl_handlers[cookie];
460 		break;
461 	case CPL_ACT_OPEN_RPL:
462 		loc = (uintptr_t *)&act_open_rpl_handlers[cookie];
463 		break;
464 	case CPL_ABORT_RPL_RSS:
465 		loc = (uintptr_t *)&abort_rpl_rss_handlers[cookie];
466 		break;
467 	case CPL_FW4_ACK:
468 		loc = (uintptr_t *)&fw4_ack_handlers[cookie];
469 		break;
470 	default:
471 		MPASS(0);
472 		return;
473 	}
474 	MPASS(h == NULL || *loc == (uintptr_t)NULL);
475 	atomic_store_rel_ptr(loc, (uintptr_t)h);
476 }
477 
478 /*
479  * Called on MOD_LOAD.  Validates and calculates the SGE tunables.
480  */
481 void
t4_sge_modload(void)482 t4_sge_modload(void)
483 {
484 
485 	if (fl_pktshift < 0 || fl_pktshift > 7) {
486 		printf("Invalid hw.cxgbe.fl_pktshift value (%d),"
487 		    " using 2 instead.\n", fl_pktshift);
488 		fl_pktshift = 2;
489 	}
490 
491 	if (spg_len != 64 && spg_len != 128) {
492 		int len;
493 
494 #if defined(__i386__) || defined(__amd64__)
495 		len = cpu_clflush_line_size > 64 ? 128 : 64;
496 #else
497 		len = 64;
498 #endif
499 		if (spg_len != -1) {
500 			printf("Invalid hw.cxgbe.spg_len value (%d),"
501 			    " using %d instead.\n", spg_len, len);
502 		}
503 		spg_len = len;
504 	}
505 
506 	if (cong_drop < -1 || cong_drop > 1) {
507 		printf("Invalid hw.cxgbe.cong_drop value (%d),"
508 		    " using 0 instead.\n", cong_drop);
509 		cong_drop = 0;
510 	}
511 
512 	if (tscale != 1 && (tscale < 3 || tscale > 17)) {
513 		printf("Invalid hw.cxgbe.tscale value (%d),"
514 		    " using 1 instead.\n", tscale);
515 		tscale = 1;
516 	}
517 
518 	extfree_refs = counter_u64_alloc(M_WAITOK);
519 	extfree_rels = counter_u64_alloc(M_WAITOK);
520 	counter_u64_zero(extfree_refs);
521 	counter_u64_zero(extfree_rels);
522 
523 	t4_init_shared_cpl_handlers();
524 	t4_register_cpl_handler(CPL_FW4_MSG, handle_fw_msg);
525 	t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg);
526 	t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
527 	t4_register_cpl_handler(CPL_RX_PKT, t4_eth_rx);
528 	t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl);
529 	t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl);
530 }
531 
532 void
t4_sge_modunload(void)533 t4_sge_modunload(void)
534 {
535 
536 	counter_u64_free(extfree_refs);
537 	counter_u64_free(extfree_rels);
538 }
539 
540 uint64_t
t4_sge_extfree_refs(void)541 t4_sge_extfree_refs(void)
542 {
543 	uint64_t refs, rels;
544 
545 	rels = counter_u64_fetch(extfree_rels);
546 	refs = counter_u64_fetch(extfree_refs);
547 
548 	return (refs - rels);
549 }
550 
551 static inline void
setup_pad_and_pack_boundaries(struct adapter * sc)552 setup_pad_and_pack_boundaries(struct adapter *sc)
553 {
554 	uint32_t v, m;
555 	int pad, pack, pad_shift;
556 
557 	pad_shift = chip_id(sc) > CHELSIO_T5 ? X_T6_INGPADBOUNDARY_SHIFT :
558 	    X_INGPADBOUNDARY_SHIFT;
559 	pad = fl_pad;
560 	if (fl_pad < (1 << pad_shift) ||
561 	    fl_pad > (1 << (pad_shift + M_INGPADBOUNDARY)) ||
562 	    !powerof2(fl_pad)) {
563 		/*
564 		 * If there is any chance that we might use buffer packing and
565 		 * the chip is a T4, then pick 64 as the pad/pack boundary.  Set
566 		 * it to the minimum allowed in all other cases.
567 		 */
568 		pad = is_t4(sc) && buffer_packing ? 64 : 1 << pad_shift;
569 
570 		/*
571 		 * For fl_pad = 0 we'll still write a reasonable value to the
572 		 * register but all the freelists will opt out of padding.
573 		 * We'll complain here only if the user tried to set it to a
574 		 * value greater than 0 that was invalid.
575 		 */
576 		if (fl_pad > 0) {
577 			device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value"
578 			    " (%d), using %d instead.\n", fl_pad, pad);
579 		}
580 	}
581 	m = V_INGPADBOUNDARY(M_INGPADBOUNDARY);
582 	v = V_INGPADBOUNDARY(ilog2(pad) - pad_shift);
583 	t4_set_reg_field(sc, A_SGE_CONTROL, m, v);
584 
585 	if (is_t4(sc)) {
586 		if (fl_pack != -1 && fl_pack != pad) {
587 			/* Complain but carry on. */
588 			device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored,"
589 			    " using %d instead.\n", fl_pack, pad);
590 		}
591 		return;
592 	}
593 
594 	pack = fl_pack;
595 	if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 ||
596 	    !powerof2(fl_pack)) {
597 		pack = max(sc->params.pci.mps, CACHE_LINE_SIZE);
598 		MPASS(powerof2(pack));
599 		if (pack < 16)
600 			pack = 16;
601 		if (pack == 32)
602 			pack = 64;
603 		if (pack > 4096)
604 			pack = 4096;
605 		if (fl_pack != -1) {
606 			device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value"
607 			    " (%d), using %d instead.\n", fl_pack, pack);
608 		}
609 	}
610 	m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY);
611 	if (pack == 16)
612 		v = V_INGPACKBOUNDARY(0);
613 	else
614 		v = V_INGPACKBOUNDARY(ilog2(pack) - 5);
615 
616 	MPASS(!is_t4(sc));	/* T4 doesn't have SGE_CONTROL2 */
617 	t4_set_reg_field(sc, A_SGE_CONTROL2, m, v);
618 }
619 
620 /*
621  * adap->params.vpd.cclk must be set up before this is called.
622  */
623 void
t4_tweak_chip_settings(struct adapter * sc)624 t4_tweak_chip_settings(struct adapter *sc)
625 {
626 	int i;
627 	uint32_t v, m;
628 	int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200};
629 	int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk;
630 	int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */
631 	uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
632 	static int sge_flbuf_sizes[] = {
633 		MCLBYTES,
634 #if MJUMPAGESIZE != MCLBYTES
635 		MJUMPAGESIZE,
636 		MJUMPAGESIZE - CL_METADATA_SIZE,
637 		MJUMPAGESIZE - 2 * MSIZE - CL_METADATA_SIZE,
638 #endif
639 		MJUM9BYTES,
640 		MJUM16BYTES,
641 		MCLBYTES - MSIZE - CL_METADATA_SIZE,
642 		MJUM9BYTES - CL_METADATA_SIZE,
643 		MJUM16BYTES - CL_METADATA_SIZE,
644 	};
645 
646 	KASSERT(sc->flags & MASTER_PF,
647 	    ("%s: trying to change chip settings when not master.", __func__));
648 
649 	m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE;
650 	v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
651 	    V_EGRSTATUSPAGESIZE(spg_len == 128);
652 	t4_set_reg_field(sc, A_SGE_CONTROL, m, v);
653 
654 	setup_pad_and_pack_boundaries(sc);
655 
656 	v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
657 	    V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
658 	    V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
659 	    V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) |
660 	    V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) |
661 	    V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) |
662 	    V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) |
663 	    V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
664 	t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v);
665 
666 	KASSERT(nitems(sge_flbuf_sizes) <= SGE_FLBUF_SIZES,
667 	    ("%s: hw buffer size table too big", __func__));
668 	t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0, 4096);
669 	t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE1, 65536);
670 	for (i = 0; i < min(nitems(sge_flbuf_sizes), SGE_FLBUF_SIZES); i++) {
671 		t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE15 - (4 * i),
672 		    sge_flbuf_sizes[i]);
673 	}
674 
675 	v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) |
676 	    V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]);
677 	t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v);
678 
679 	KASSERT(intr_timer[0] <= timer_max,
680 	    ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0],
681 	    timer_max));
682 	for (i = 1; i < nitems(intr_timer); i++) {
683 		KASSERT(intr_timer[i] >= intr_timer[i - 1],
684 		    ("%s: timers not listed in increasing order (%d)",
685 		    __func__, i));
686 
687 		while (intr_timer[i] > timer_max) {
688 			if (i == nitems(intr_timer) - 1) {
689 				intr_timer[i] = timer_max;
690 				break;
691 			}
692 			intr_timer[i] += intr_timer[i - 1];
693 			intr_timer[i] /= 2;
694 		}
695 	}
696 
697 	v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) |
698 	    V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1]));
699 	t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v);
700 	v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) |
701 	    V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3]));
702 	t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v);
703 	v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) |
704 	    V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5]));
705 	t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v);
706 
707 	if (chip_id(sc) >= CHELSIO_T6) {
708 		m = V_TSCALE(M_TSCALE);
709 		if (tscale == 1)
710 			v = 0;
711 		else
712 			v = V_TSCALE(tscale - 2);
713 		t4_set_reg_field(sc, A_SGE_ITP_CONTROL, m, v);
714 
715 		if (sc->debug_flags & DF_DISABLE_TCB_CACHE) {
716 			m = V_RDTHRESHOLD(M_RDTHRESHOLD) | F_WRTHRTHRESHEN |
717 			    V_WRTHRTHRESH(M_WRTHRTHRESH);
718 			t4_tp_pio_read(sc, &v, 1, A_TP_CMM_CONFIG, 1);
719 			v &= ~m;
720 			v |= V_RDTHRESHOLD(1) | F_WRTHRTHRESHEN |
721 			    V_WRTHRTHRESH(16);
722 			t4_tp_pio_write(sc, &v, 1, A_TP_CMM_CONFIG, 1);
723 		}
724 	}
725 
726 	/* 4K, 16K, 64K, 256K DDP "page sizes" */
727 	v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6);
728 	t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v);
729 
730 	m = v = F_TDDPTAGTCB;
731 	t4_set_reg_field(sc, A_ULP_RX_CTL, m, v);
732 
733 	m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET |
734 	    F_RESETDDPOFFSET;
735 	v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET;
736 	t4_set_reg_field(sc, A_TP_PARA_REG5, m, v);
737 }
738 
739 /*
740  * SGE wants the buffer to be at least 64B and then a multiple of 16.  If
741  * padding is in use, the buffer's start and end need to be aligned to the pad
742  * boundary as well.  We'll just make sure that the size is a multiple of the
743  * boundary here, it is up to the buffer allocation code to make sure the start
744  * of the buffer is aligned as well.
745  */
746 static inline int
hwsz_ok(struct adapter * sc,int hwsz)747 hwsz_ok(struct adapter *sc, int hwsz)
748 {
749 	int mask = fl_pad ? sc->params.sge.pad_boundary - 1 : 16 - 1;
750 
751 	return (hwsz >= 64 && (hwsz & mask) == 0);
752 }
753 
754 /*
755  * XXX: driver really should be able to deal with unexpected settings.
756  */
757 int
t4_read_chip_settings(struct adapter * sc)758 t4_read_chip_settings(struct adapter *sc)
759 {
760 	struct sge *s = &sc->sge;
761 	struct sge_params *sp = &sc->params.sge;
762 	int i, j, n, rc = 0;
763 	uint32_t m, v, r;
764 	uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
765 	static int sw_buf_sizes[] = {	/* Sorted by size */
766 		MCLBYTES,
767 #if MJUMPAGESIZE != MCLBYTES
768 		MJUMPAGESIZE,
769 #endif
770 		MJUM9BYTES,
771 		MJUM16BYTES
772 	};
773 	struct sw_zone_info *swz, *safe_swz;
774 	struct hw_buf_info *hwb;
775 
776 	m = F_RXPKTCPLMODE;
777 	v = F_RXPKTCPLMODE;
778 	r = sc->params.sge.sge_control;
779 	if ((r & m) != v) {
780 		device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r);
781 		rc = EINVAL;
782 	}
783 
784 	/*
785 	 * If this changes then every single use of PAGE_SHIFT in the driver
786 	 * needs to be carefully reviewed for PAGE_SHIFT vs sp->page_shift.
787 	 */
788 	if (sp->page_shift != PAGE_SHIFT) {
789 		device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r);
790 		rc = EINVAL;
791 	}
792 
793 	/* Filter out unusable hw buffer sizes entirely (mark with -2). */
794 	hwb = &s->hw_buf_info[0];
795 	for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) {
796 		r = sc->params.sge.sge_fl_buffer_size[i];
797 		hwb->size = r;
798 		hwb->zidx = hwsz_ok(sc, r) ? -1 : -2;
799 		hwb->next = -1;
800 	}
801 
802 	/*
803 	 * Create a sorted list in decreasing order of hw buffer sizes (and so
804 	 * increasing order of spare area) for each software zone.
805 	 *
806 	 * If padding is enabled then the start and end of the buffer must align
807 	 * to the pad boundary; if packing is enabled then they must align with
808 	 * the pack boundary as well.  Allocations from the cluster zones are
809 	 * aligned to min(size, 4K), so the buffer starts at that alignment and
810 	 * ends at hwb->size alignment.  If mbuf inlining is allowed the
811 	 * starting alignment will be reduced to MSIZE and the driver will
812 	 * exercise appropriate caution when deciding on the best buffer layout
813 	 * to use.
814 	 */
815 	n = 0;	/* no usable buffer size to begin with */
816 	swz = &s->sw_zone_info[0];
817 	safe_swz = NULL;
818 	for (i = 0; i < SW_ZONE_SIZES; i++, swz++) {
819 		int8_t head = -1, tail = -1;
820 
821 		swz->size = sw_buf_sizes[i];
822 		swz->zone = m_getzone(swz->size);
823 		swz->type = m_gettype(swz->size);
824 
825 		if (swz->size < PAGE_SIZE) {
826 			MPASS(powerof2(swz->size));
827 			if (fl_pad && (swz->size % sp->pad_boundary != 0))
828 				continue;
829 		}
830 
831 		if (swz->size == safest_rx_cluster)
832 			safe_swz = swz;
833 
834 		hwb = &s->hw_buf_info[0];
835 		for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) {
836 			if (hwb->zidx != -1 || hwb->size > swz->size)
837 				continue;
838 #ifdef INVARIANTS
839 			if (fl_pad)
840 				MPASS(hwb->size % sp->pad_boundary == 0);
841 #endif
842 			hwb->zidx = i;
843 			if (head == -1)
844 				head = tail = j;
845 			else if (hwb->size < s->hw_buf_info[tail].size) {
846 				s->hw_buf_info[tail].next = j;
847 				tail = j;
848 			} else {
849 				int8_t *cur;
850 				struct hw_buf_info *t;
851 
852 				for (cur = &head; *cur != -1; cur = &t->next) {
853 					t = &s->hw_buf_info[*cur];
854 					if (hwb->size == t->size) {
855 						hwb->zidx = -2;
856 						break;
857 					}
858 					if (hwb->size > t->size) {
859 						hwb->next = *cur;
860 						*cur = j;
861 						break;
862 					}
863 				}
864 			}
865 		}
866 		swz->head_hwidx = head;
867 		swz->tail_hwidx = tail;
868 
869 		if (tail != -1) {
870 			n++;
871 			if (swz->size - s->hw_buf_info[tail].size >=
872 			    CL_METADATA_SIZE)
873 				sc->flags |= BUF_PACKING_OK;
874 		}
875 	}
876 	if (n == 0) {
877 		device_printf(sc->dev, "no usable SGE FL buffer size.\n");
878 		rc = EINVAL;
879 	}
880 
881 	s->safe_hwidx1 = -1;
882 	s->safe_hwidx2 = -1;
883 	if (safe_swz != NULL) {
884 		s->safe_hwidx1 = safe_swz->head_hwidx;
885 		for (i = safe_swz->head_hwidx; i != -1; i = hwb->next) {
886 			int spare;
887 
888 			hwb = &s->hw_buf_info[i];
889 #ifdef INVARIANTS
890 			if (fl_pad)
891 				MPASS(hwb->size % sp->pad_boundary == 0);
892 #endif
893 			spare = safe_swz->size - hwb->size;
894 			if (spare >= CL_METADATA_SIZE) {
895 				s->safe_hwidx2 = i;
896 				break;
897 			}
898 		}
899 	}
900 
901 	if (sc->flags & IS_VF)
902 		return (0);
903 
904 	v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6);
905 	r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ);
906 	if (r != v) {
907 		device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r);
908 		rc = EINVAL;
909 	}
910 
911 	m = v = F_TDDPTAGTCB;
912 	r = t4_read_reg(sc, A_ULP_RX_CTL);
913 	if ((r & m) != v) {
914 		device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r);
915 		rc = EINVAL;
916 	}
917 
918 	m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET |
919 	    F_RESETDDPOFFSET;
920 	v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET;
921 	r = t4_read_reg(sc, A_TP_PARA_REG5);
922 	if ((r & m) != v) {
923 		device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r);
924 		rc = EINVAL;
925 	}
926 
927 	t4_init_tp_params(sc, 1);
928 
929 	t4_read_mtu_tbl(sc, sc->params.mtus, NULL);
930 	t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd);
931 
932 	return (rc);
933 }
934 
935 int
t4_create_dma_tag(struct adapter * sc)936 t4_create_dma_tag(struct adapter *sc)
937 {
938 	int rc;
939 
940 	rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0,
941 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
942 	    BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL,
943 	    NULL, &sc->dmat);
944 	if (rc != 0) {
945 		device_printf(sc->dev,
946 		    "failed to create main DMA tag: %d\n", rc);
947 	}
948 
949 	return (rc);
950 }
951 
952 void
t4_sge_sysctls(struct adapter * sc,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * children)953 t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx,
954     struct sysctl_oid_list *children)
955 {
956 	struct sge_params *sp = &sc->params.sge;
957 
958 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes",
959 	    CTLTYPE_STRING | CTLFLAG_RD, &sc->sge, 0, sysctl_bufsizes, "A",
960 	    "freelist buffer sizes");
961 
962 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD,
963 	    NULL, sp->fl_pktshift, "payload DMA offset in rx buffer (bytes)");
964 
965 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD,
966 	    NULL, sp->pad_boundary, "payload pad boundary (bytes)");
967 
968 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD,
969 	    NULL, sp->spg_len, "status page size (bytes)");
970 
971 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD,
972 	    NULL, cong_drop, "congestion drop setting");
973 
974 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD,
975 	    NULL, sp->pack_boundary, "payload pack boundary (bytes)");
976 }
977 
978 int
t4_destroy_dma_tag(struct adapter * sc)979 t4_destroy_dma_tag(struct adapter *sc)
980 {
981 	if (sc->dmat)
982 		bus_dma_tag_destroy(sc->dmat);
983 
984 	return (0);
985 }
986 
987 /*
988  * Allocate and initialize the firmware event queue, control queues, and special
989  * purpose rx queues owned by the adapter.
990  *
991  * Returns errno on failure.  Resources allocated up to that point may still be
992  * allocated.  Caller is responsible for cleanup in case this function fails.
993  */
994 int
t4_setup_adapter_queues(struct adapter * sc)995 t4_setup_adapter_queues(struct adapter *sc)
996 {
997 	struct sysctl_oid *oid;
998 	struct sysctl_oid_list *children;
999 	int rc, i;
1000 
1001 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1002 
1003 	sysctl_ctx_init(&sc->ctx);
1004 	sc->flags |= ADAP_SYSCTL_CTX;
1005 
1006 	/*
1007 	 * Firmware event queue
1008 	 */
1009 	rc = alloc_fwq(sc);
1010 	if (rc != 0)
1011 		return (rc);
1012 
1013 	/*
1014 	 * That's all for the VF driver.
1015 	 */
1016 	if (sc->flags & IS_VF)
1017 		return (rc);
1018 
1019 	oid = device_get_sysctl_tree(sc->dev);
1020 	children = SYSCTL_CHILDREN(oid);
1021 
1022 	/*
1023 	 * XXX: General purpose rx queues, one per port.
1024 	 */
1025 
1026 	/*
1027 	 * Control queues, one per port.
1028 	 */
1029 	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "ctrlq",
1030 	    CTLFLAG_RD, NULL, "control queues");
1031 	for_each_port(sc, i) {
1032 		struct sge_wrq *ctrlq = &sc->sge.ctrlq[i];
1033 
1034 		rc = alloc_ctrlq(sc, ctrlq, i, oid);
1035 		if (rc != 0)
1036 			return (rc);
1037 	}
1038 
1039 	return (rc);
1040 }
1041 
1042 /*
1043  * Idempotent
1044  */
1045 int
t4_teardown_adapter_queues(struct adapter * sc)1046 t4_teardown_adapter_queues(struct adapter *sc)
1047 {
1048 	int i;
1049 
1050 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1051 
1052 	/* Do this before freeing the queue */
1053 	if (sc->flags & ADAP_SYSCTL_CTX) {
1054 		sysctl_ctx_free(&sc->ctx);
1055 		sc->flags &= ~ADAP_SYSCTL_CTX;
1056 	}
1057 
1058 	if (!(sc->flags & IS_VF)) {
1059 		for_each_port(sc, i)
1060 			free_wrq(sc, &sc->sge.ctrlq[i]);
1061 	}
1062 	free_fwq(sc);
1063 
1064 	return (0);
1065 }
1066 
1067 /* Maximum payload that can be delivered with a single iq descriptor */
1068 static inline int
mtu_to_max_payload(struct adapter * sc,int mtu,const int toe)1069 mtu_to_max_payload(struct adapter *sc, int mtu, const int toe)
1070 {
1071 	int payload;
1072 
1073 #ifdef TCP_OFFLOAD
1074 	if (toe) {
1075 		int rxcs = G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2));
1076 
1077 		/* Note that COP can set rx_coalesce on/off per connection. */
1078 		payload = max(mtu, rxcs);
1079 	} else {
1080 #endif
1081 		/* large enough even when hw VLAN extraction is disabled */
1082 		payload = sc->params.sge.fl_pktshift + ETHER_HDR_LEN +
1083 		    ETHER_VLAN_ENCAP_LEN + mtu;
1084 #ifdef TCP_OFFLOAD
1085 	}
1086 #endif
1087 
1088 	return (payload);
1089 }
1090 
1091 int
t4_setup_vi_queues(struct vi_info * vi)1092 t4_setup_vi_queues(struct vi_info *vi)
1093 {
1094 	int rc = 0, i, intr_idx, iqidx;
1095 	struct sge_rxq *rxq;
1096 	struct sge_txq *txq;
1097 #ifdef TCP_OFFLOAD
1098 	struct sge_ofld_rxq *ofld_rxq;
1099 	struct sge_wrq *ofld_txq;
1100 #endif
1101 #ifdef DEV_NETMAP
1102 	int saved_idx;
1103 	struct sge_nm_rxq *nm_rxq;
1104 	struct sge_nm_txq *nm_txq;
1105 #endif
1106 	char name[16];
1107 	struct port_info *pi = vi->pi;
1108 	struct adapter *sc = pi->adapter;
1109 	struct ifnet *ifp = vi->ifp;
1110 	struct sysctl_oid *oid = device_get_sysctl_tree(vi->dev);
1111 	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
1112 	int maxp, mtu = ifp->if_mtu;
1113 
1114 	/* Interrupt vector to start from (when using multiple vectors) */
1115 	intr_idx = vi->first_intr;
1116 
1117 #ifdef DEV_NETMAP
1118 	saved_idx = intr_idx;
1119 	if (ifp->if_capabilities & IFCAP_NETMAP) {
1120 
1121 		/* netmap is supported with direct interrupts only. */
1122 		MPASS(!forwarding_intr_to_fwq(sc));
1123 
1124 		/*
1125 		 * We don't have buffers to back the netmap rx queues
1126 		 * right now so we create the queues in a way that
1127 		 * doesn't set off any congestion signal in the chip.
1128 		 */
1129 		oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_rxq",
1130 		    CTLFLAG_RD, NULL, "rx queues");
1131 		for_each_nm_rxq(vi, i, nm_rxq) {
1132 			rc = alloc_nm_rxq(vi, nm_rxq, intr_idx, i, oid);
1133 			if (rc != 0)
1134 				goto done;
1135 			intr_idx++;
1136 		}
1137 
1138 		oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_txq",
1139 		    CTLFLAG_RD, NULL, "tx queues");
1140 		for_each_nm_txq(vi, i, nm_txq) {
1141 			iqidx = vi->first_nm_rxq + (i % vi->nnmrxq);
1142 			rc = alloc_nm_txq(vi, nm_txq, iqidx, i, oid);
1143 			if (rc != 0)
1144 				goto done;
1145 		}
1146 	}
1147 
1148 	/* Normal rx queues and netmap rx queues share the same interrupts. */
1149 	intr_idx = saved_idx;
1150 #endif
1151 
1152 	/*
1153 	 * Allocate rx queues first because a default iqid is required when
1154 	 * creating a tx queue.
1155 	 */
1156 	maxp = mtu_to_max_payload(sc, mtu, 0);
1157 	oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq",
1158 	    CTLFLAG_RD, NULL, "rx queues");
1159 	for_each_rxq(vi, i, rxq) {
1160 
1161 		init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq);
1162 
1163 		snprintf(name, sizeof(name), "%s rxq%d-fl",
1164 		    device_get_nameunit(vi->dev), i);
1165 		init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name);
1166 
1167 		rc = alloc_rxq(vi, rxq,
1168 		    forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid);
1169 		if (rc != 0)
1170 			goto done;
1171 		intr_idx++;
1172 	}
1173 #ifdef DEV_NETMAP
1174 	if (ifp->if_capabilities & IFCAP_NETMAP)
1175 		intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq);
1176 #endif
1177 #ifdef TCP_OFFLOAD
1178 	maxp = mtu_to_max_payload(sc, mtu, 1);
1179 	oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq",
1180 	    CTLFLAG_RD, NULL, "rx queues for offloaded TCP connections");
1181 	for_each_ofld_rxq(vi, i, ofld_rxq) {
1182 
1183 		init_iq(&ofld_rxq->iq, sc, vi->ofld_tmr_idx, vi->ofld_pktc_idx,
1184 		    vi->qsize_rxq);
1185 
1186 		snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
1187 		    device_get_nameunit(vi->dev), i);
1188 		init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name);
1189 
1190 		rc = alloc_ofld_rxq(vi, ofld_rxq,
1191 		    forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid);
1192 		if (rc != 0)
1193 			goto done;
1194 		intr_idx++;
1195 	}
1196 #endif
1197 
1198 	/*
1199 	 * Now the tx queues.
1200 	 */
1201 	oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD,
1202 	    NULL, "tx queues");
1203 	for_each_txq(vi, i, txq) {
1204 		iqidx = vi->first_rxq + (i % vi->nrxq);
1205 		snprintf(name, sizeof(name), "%s txq%d",
1206 		    device_get_nameunit(vi->dev), i);
1207 		init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan,
1208 		    sc->sge.rxq[iqidx].iq.cntxt_id, name);
1209 
1210 		rc = alloc_txq(vi, txq, i, oid);
1211 		if (rc != 0)
1212 			goto done;
1213 	}
1214 #ifdef TCP_OFFLOAD
1215 	oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_txq",
1216 	    CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections");
1217 	for_each_ofld_txq(vi, i, ofld_txq) {
1218 		struct sysctl_oid *oid2;
1219 
1220 		iqidx = vi->first_ofld_rxq + (i % vi->nofldrxq);
1221 		snprintf(name, sizeof(name), "%s ofld_txq%d",
1222 		    device_get_nameunit(vi->dev), i);
1223 		init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan,
1224 		    sc->sge.ofld_rxq[iqidx].iq.cntxt_id, name);
1225 
1226 		snprintf(name, sizeof(name), "%d", i);
1227 		oid2 = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
1228 		    name, CTLFLAG_RD, NULL, "offload tx queue");
1229 
1230 		rc = alloc_wrq(sc, vi, ofld_txq, oid2);
1231 		if (rc != 0)
1232 			goto done;
1233 	}
1234 #endif
1235 done:
1236 	if (rc)
1237 		t4_teardown_vi_queues(vi);
1238 
1239 	return (rc);
1240 }
1241 
1242 /*
1243  * Idempotent
1244  */
1245 int
t4_teardown_vi_queues(struct vi_info * vi)1246 t4_teardown_vi_queues(struct vi_info *vi)
1247 {
1248 	int i;
1249 	struct sge_rxq *rxq;
1250 	struct sge_txq *txq;
1251 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1252 	struct port_info *pi = vi->pi;
1253 	struct adapter *sc = pi->adapter;
1254 	struct sge_wrq *ofld_txq;
1255 #endif
1256 #ifdef TCP_OFFLOAD
1257 	struct sge_ofld_rxq *ofld_rxq;
1258 #endif
1259 #ifdef DEV_NETMAP
1260 	struct sge_nm_rxq *nm_rxq;
1261 	struct sge_nm_txq *nm_txq;
1262 #endif
1263 
1264 	/* Do this before freeing the queues */
1265 	if (vi->flags & VI_SYSCTL_CTX) {
1266 		sysctl_ctx_free(&vi->ctx);
1267 		vi->flags &= ~VI_SYSCTL_CTX;
1268 	}
1269 
1270 #ifdef DEV_NETMAP
1271 	if (vi->ifp->if_capabilities & IFCAP_NETMAP) {
1272 		for_each_nm_txq(vi, i, nm_txq) {
1273 			free_nm_txq(vi, nm_txq);
1274 		}
1275 
1276 		for_each_nm_rxq(vi, i, nm_rxq) {
1277 			free_nm_rxq(vi, nm_rxq);
1278 		}
1279 	}
1280 #endif
1281 
1282 	/*
1283 	 * Take down all the tx queues first, as they reference the rx queues
1284 	 * (for egress updates, etc.).
1285 	 */
1286 
1287 	for_each_txq(vi, i, txq) {
1288 		free_txq(vi, txq);
1289 	}
1290 #ifdef TCP_OFFLOAD
1291 	for_each_ofld_txq(vi, i, ofld_txq) {
1292 		free_wrq(sc, ofld_txq);
1293 	}
1294 #endif
1295 
1296 	/*
1297 	 * Then take down the rx queues.
1298 	 */
1299 
1300 	for_each_rxq(vi, i, rxq) {
1301 		free_rxq(vi, rxq);
1302 	}
1303 #ifdef TCP_OFFLOAD
1304 	for_each_ofld_rxq(vi, i, ofld_rxq) {
1305 		free_ofld_rxq(vi, ofld_rxq);
1306 	}
1307 #endif
1308 
1309 	return (0);
1310 }
1311 
1312 /*
1313  * Interrupt handler when the driver is using only 1 interrupt.  This is a very
1314  * unusual scenario.
1315  *
1316  * a) Deals with errors, if any.
1317  * b) Services firmware event queue, which is taking interrupts for all other
1318  *    queues.
1319  */
1320 void
t4_intr_all(void * arg)1321 t4_intr_all(void *arg)
1322 {
1323 	struct adapter *sc = arg;
1324 	struct sge_iq *fwq = &sc->sge.fwq;
1325 
1326 	MPASS(sc->intr_count == 1);
1327 
1328 	if (sc->intr_type == INTR_INTX)
1329 		t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0);
1330 
1331 	t4_intr_err(arg);
1332 	t4_intr_evt(fwq);
1333 }
1334 
1335 /*
1336  * Interrupt handler for errors (installed directly when multiple interrupts are
1337  * being used, or called by t4_intr_all).
1338  */
1339 void
t4_intr_err(void * arg)1340 t4_intr_err(void *arg)
1341 {
1342 	struct adapter *sc = arg;
1343 	uint32_t v;
1344 	const bool verbose = (sc->debug_flags & DF_VERBOSE_SLOWINTR) != 0;
1345 
1346 	if (sc->flags & ADAP_ERR)
1347 		return;
1348 
1349 	v = t4_read_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE));
1350 	if (v & F_PFSW) {
1351 		sc->swintr++;
1352 		t4_write_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE), v);
1353 	}
1354 
1355 	t4_slow_intr_handler(sc, verbose);
1356 }
1357 
1358 /*
1359  * Interrupt handler for iq-only queues.  The firmware event queue is the only
1360  * such queue right now.
1361  */
1362 void
t4_intr_evt(void * arg)1363 t4_intr_evt(void *arg)
1364 {
1365 	struct sge_iq *iq = arg;
1366 
1367 	if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
1368 		service_iq(iq, 0);
1369 		(void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
1370 	}
1371 }
1372 
1373 /*
1374  * Interrupt handler for iq+fl queues.
1375  */
1376 void
t4_intr(void * arg)1377 t4_intr(void *arg)
1378 {
1379 	struct sge_iq *iq = arg;
1380 
1381 	if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
1382 		service_iq_fl(iq, 0);
1383 		(void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
1384 	}
1385 }
1386 
1387 #ifdef DEV_NETMAP
1388 /*
1389  * Interrupt handler for netmap rx queues.
1390  */
1391 void
t4_nm_intr(void * arg)1392 t4_nm_intr(void *arg)
1393 {
1394 	struct sge_nm_rxq *nm_rxq = arg;
1395 
1396 	if (atomic_cmpset_int(&nm_rxq->nm_state, NM_ON, NM_BUSY)) {
1397 		service_nm_rxq(nm_rxq);
1398 		(void) atomic_cmpset_int(&nm_rxq->nm_state, NM_BUSY, NM_ON);
1399 	}
1400 }
1401 
1402 /*
1403  * Interrupt handler for vectors shared between NIC and netmap rx queues.
1404  */
1405 void
t4_vi_intr(void * arg)1406 t4_vi_intr(void *arg)
1407 {
1408 	struct irq *irq = arg;
1409 
1410 	MPASS(irq->nm_rxq != NULL);
1411 	t4_nm_intr(irq->nm_rxq);
1412 
1413 	MPASS(irq->rxq != NULL);
1414 	t4_intr(irq->rxq);
1415 }
1416 #endif
1417 
1418 /*
1419  * Deals with interrupts on an iq-only (no freelist) queue.
1420  */
1421 static int
service_iq(struct sge_iq * iq,int budget)1422 service_iq(struct sge_iq *iq, int budget)
1423 {
1424 	struct sge_iq *q;
1425 	struct adapter *sc = iq->adapter;
1426 	struct iq_desc *d = &iq->desc[iq->cidx];
1427 	int ndescs = 0, limit;
1428 	int rsp_type;
1429 	uint32_t lq;
1430 	STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql);
1431 
1432 	KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq));
1433 	KASSERT((iq->flags & IQ_HAS_FL) == 0,
1434 	    ("%s: called for iq %p with fl (iq->flags 0x%x)", __func__, iq,
1435 	    iq->flags));
1436 	MPASS((iq->flags & IQ_ADJ_CREDIT) == 0);
1437 	MPASS((iq->flags & IQ_LRO_ENABLED) == 0);
1438 
1439 	limit = budget ? budget : iq->qsize / 16;
1440 
1441 	/*
1442 	 * We always come back and check the descriptor ring for new indirect
1443 	 * interrupts and other responses after running a single handler.
1444 	 */
1445 	for (;;) {
1446 		while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) {
1447 
1448 			rmb();
1449 
1450 			rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen);
1451 			lq = be32toh(d->rsp.pldbuflen_qid);
1452 
1453 			switch (rsp_type) {
1454 			case X_RSPD_TYPE_FLBUF:
1455 				panic("%s: data for an iq (%p) with no freelist",
1456 				    __func__, iq);
1457 
1458 				/* NOTREACHED */
1459 
1460 			case X_RSPD_TYPE_CPL:
1461 				KASSERT(d->rss.opcode < NUM_CPL_CMDS,
1462 				    ("%s: bad opcode %02x.", __func__,
1463 				    d->rss.opcode));
1464 				t4_cpl_handler[d->rss.opcode](iq, &d->rss, NULL);
1465 				break;
1466 
1467 			case X_RSPD_TYPE_INTR:
1468 				/*
1469 				 * There are 1K interrupt-capable queues (qids 0
1470 				 * through 1023).  A response type indicating a
1471 				 * forwarded interrupt with a qid >= 1K is an
1472 				 * iWARP async notification.
1473 				 */
1474 				if (__predict_true(lq >= 1024)) {
1475 					t4_an_handler(iq, &d->rsp);
1476 					break;
1477 				}
1478 
1479 				q = sc->sge.iqmap[lq - sc->sge.iq_start -
1480 				    sc->sge.iq_base];
1481 				if (atomic_cmpset_int(&q->state, IQS_IDLE,
1482 				    IQS_BUSY)) {
1483 					if (service_iq_fl(q, q->qsize / 16) == 0) {
1484 						(void) atomic_cmpset_int(&q->state,
1485 						    IQS_BUSY, IQS_IDLE);
1486 					} else {
1487 						STAILQ_INSERT_TAIL(&iql, q,
1488 						    link);
1489 					}
1490 				}
1491 				break;
1492 
1493 			default:
1494 				KASSERT(0,
1495 				    ("%s: illegal response type %d on iq %p",
1496 				    __func__, rsp_type, iq));
1497 				log(LOG_ERR,
1498 				    "%s: illegal response type %d on iq %p",
1499 				    device_get_nameunit(sc->dev), rsp_type, iq);
1500 				break;
1501 			}
1502 
1503 			d++;
1504 			if (__predict_false(++iq->cidx == iq->sidx)) {
1505 				iq->cidx = 0;
1506 				iq->gen ^= F_RSPD_GEN;
1507 				d = &iq->desc[0];
1508 			}
1509 			if (__predict_false(++ndescs == limit)) {
1510 				t4_write_reg(sc, sc->sge_gts_reg,
1511 				    V_CIDXINC(ndescs) |
1512 				    V_INGRESSQID(iq->cntxt_id) |
1513 				    V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
1514 				ndescs = 0;
1515 
1516 				if (budget) {
1517 					return (EINPROGRESS);
1518 				}
1519 			}
1520 		}
1521 
1522 		if (STAILQ_EMPTY(&iql))
1523 			break;
1524 
1525 		/*
1526 		 * Process the head only, and send it to the back of the list if
1527 		 * it's still not done.
1528 		 */
1529 		q = STAILQ_FIRST(&iql);
1530 		STAILQ_REMOVE_HEAD(&iql, link);
1531 		if (service_iq_fl(q, q->qsize / 8) == 0)
1532 			(void) atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE);
1533 		else
1534 			STAILQ_INSERT_TAIL(&iql, q, link);
1535 	}
1536 
1537 	t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) |
1538 	    V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
1539 
1540 	return (0);
1541 }
1542 
1543 static inline int
sort_before_lro(struct lro_ctrl * lro)1544 sort_before_lro(struct lro_ctrl *lro)
1545 {
1546 
1547 	return (lro->lro_mbuf_max != 0);
1548 }
1549 
1550 /*
1551  * Deals with interrupts on an iq+fl queue.
1552  */
1553 static int
service_iq_fl(struct sge_iq * iq,int budget)1554 service_iq_fl(struct sge_iq *iq, int budget)
1555 {
1556 	struct sge_rxq *rxq = iq_to_rxq(iq);
1557 	struct sge_fl *fl;
1558 	struct adapter *sc = iq->adapter;
1559 	struct iq_desc *d = &iq->desc[iq->cidx];
1560 	int ndescs = 0, limit;
1561 	int rsp_type, refill, starved;
1562 	uint32_t lq;
1563 	uint16_t fl_hw_cidx;
1564 	struct mbuf *m0;
1565 #if defined(INET) || defined(INET6)
1566 	const struct timeval lro_timeout = {0, sc->lro_timeout};
1567 	struct lro_ctrl *lro = &rxq->lro;
1568 #endif
1569 
1570 	KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq));
1571 	MPASS(iq->flags & IQ_HAS_FL);
1572 
1573 	limit = budget ? budget : iq->qsize / 16;
1574 	fl = &rxq->fl;
1575 	fl_hw_cidx = fl->hw_cidx;	/* stable snapshot */
1576 
1577 #if defined(INET) || defined(INET6)
1578 	if (iq->flags & IQ_ADJ_CREDIT) {
1579 		MPASS(sort_before_lro(lro));
1580 		iq->flags &= ~IQ_ADJ_CREDIT;
1581 		if ((d->rsp.u.type_gen & F_RSPD_GEN) != iq->gen) {
1582 			tcp_lro_flush_all(lro);
1583 			t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(1) |
1584 			    V_INGRESSQID((u32)iq->cntxt_id) |
1585 			    V_SEINTARM(iq->intr_params));
1586 			return (0);
1587 		}
1588 		ndescs = 1;
1589 	}
1590 #else
1591 	MPASS((iq->flags & IQ_ADJ_CREDIT) == 0);
1592 #endif
1593 
1594 	while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) {
1595 
1596 		rmb();
1597 
1598 		refill = 0;
1599 		m0 = NULL;
1600 		rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen);
1601 		lq = be32toh(d->rsp.pldbuflen_qid);
1602 
1603 		switch (rsp_type) {
1604 		case X_RSPD_TYPE_FLBUF:
1605 
1606 			m0 = get_fl_payload(sc, fl, lq);
1607 			if (__predict_false(m0 == NULL))
1608 				goto out;
1609 			refill = IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 2;
1610 #ifdef T4_PKT_TIMESTAMP
1611 			/*
1612 			 * 60 bit timestamp for the payload is
1613 			 * *(uint64_t *)m0->m_pktdat.  Note that it is
1614 			 * in the leading free-space in the mbuf.  The
1615 			 * kernel can clobber it during a pullup,
1616 			 * m_copymdata, etc.  You need to make sure that
1617 			 * the mbuf reaches you unmolested if you care
1618 			 * about the timestamp.
1619 			 */
1620 			*(uint64_t *)m0->m_pktdat =
1621 			    be64toh(ctrl->u.last_flit) & 0xfffffffffffffff;
1622 #endif
1623 
1624 			/* fall through */
1625 
1626 		case X_RSPD_TYPE_CPL:
1627 			KASSERT(d->rss.opcode < NUM_CPL_CMDS,
1628 			    ("%s: bad opcode %02x.", __func__, d->rss.opcode));
1629 			t4_cpl_handler[d->rss.opcode](iq, &d->rss, m0);
1630 			break;
1631 
1632 		case X_RSPD_TYPE_INTR:
1633 
1634 			/*
1635 			 * There are 1K interrupt-capable queues (qids 0
1636 			 * through 1023).  A response type indicating a
1637 			 * forwarded interrupt with a qid >= 1K is an
1638 			 * iWARP async notification.  That is the only
1639 			 * acceptable indirect interrupt on this queue.
1640 			 */
1641 			if (__predict_false(lq < 1024)) {
1642 				panic("%s: indirect interrupt on iq_fl %p "
1643 				    "with qid %u", __func__, iq, lq);
1644 			}
1645 
1646 			t4_an_handler(iq, &d->rsp);
1647 			break;
1648 
1649 		default:
1650 			KASSERT(0, ("%s: illegal response type %d on iq %p",
1651 			    __func__, rsp_type, iq));
1652 			log(LOG_ERR, "%s: illegal response type %d on iq %p",
1653 			    device_get_nameunit(sc->dev), rsp_type, iq);
1654 			break;
1655 		}
1656 
1657 		d++;
1658 		if (__predict_false(++iq->cidx == iq->sidx)) {
1659 			iq->cidx = 0;
1660 			iq->gen ^= F_RSPD_GEN;
1661 			d = &iq->desc[0];
1662 		}
1663 		if (__predict_false(++ndescs == limit)) {
1664 			t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) |
1665 			    V_INGRESSQID(iq->cntxt_id) |
1666 			    V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
1667 			ndescs = 0;
1668 
1669 #if defined(INET) || defined(INET6)
1670 			if (iq->flags & IQ_LRO_ENABLED &&
1671 			    !sort_before_lro(lro) &&
1672 			    sc->lro_timeout != 0) {
1673 				tcp_lro_flush_inactive(lro, &lro_timeout);
1674 			}
1675 #endif
1676 			if (budget) {
1677 				FL_LOCK(fl);
1678 				refill_fl(sc, fl, 32);
1679 				FL_UNLOCK(fl);
1680 
1681 				return (EINPROGRESS);
1682 			}
1683 		}
1684 		if (refill) {
1685 			FL_LOCK(fl);
1686 			refill_fl(sc, fl, 32);
1687 			FL_UNLOCK(fl);
1688 			fl_hw_cidx = fl->hw_cidx;
1689 		}
1690 	}
1691 out:
1692 #if defined(INET) || defined(INET6)
1693 	if (iq->flags & IQ_LRO_ENABLED) {
1694 		if (ndescs > 0 && lro->lro_mbuf_count > 8) {
1695 			MPASS(sort_before_lro(lro));
1696 			/* hold back one credit and don't flush LRO state */
1697 			iq->flags |= IQ_ADJ_CREDIT;
1698 			ndescs--;
1699 		} else {
1700 			tcp_lro_flush_all(lro);
1701 		}
1702 	}
1703 #endif
1704 
1705 	t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) |
1706 	    V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
1707 
1708 	FL_LOCK(fl);
1709 	starved = refill_fl(sc, fl, 64);
1710 	FL_UNLOCK(fl);
1711 	if (__predict_false(starved != 0))
1712 		add_fl_to_sfl(sc, fl);
1713 
1714 	return (0);
1715 }
1716 
1717 static inline int
cl_has_metadata(struct sge_fl * fl,struct cluster_layout * cll)1718 cl_has_metadata(struct sge_fl *fl, struct cluster_layout *cll)
1719 {
1720 	int rc = fl->flags & FL_BUF_PACKING || cll->region1 > 0;
1721 
1722 	if (rc)
1723 		MPASS(cll->region3 >= CL_METADATA_SIZE);
1724 
1725 	return (rc);
1726 }
1727 
1728 static inline struct cluster_metadata *
cl_metadata(struct adapter * sc,struct sge_fl * fl,struct cluster_layout * cll,caddr_t cl)1729 cl_metadata(struct adapter *sc, struct sge_fl *fl, struct cluster_layout *cll,
1730     caddr_t cl)
1731 {
1732 
1733 	if (cl_has_metadata(fl, cll)) {
1734 		struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx];
1735 
1736 		return ((struct cluster_metadata *)(cl + swz->size) - 1);
1737 	}
1738 	return (NULL);
1739 }
1740 
1741 static void
rxb_free(struct mbuf * m,void * arg1,void * arg2)1742 rxb_free(struct mbuf *m, void *arg1, void *arg2)
1743 {
1744 	uma_zone_t zone = arg1;
1745 	caddr_t cl = arg2;
1746 
1747 	uma_zfree(zone, cl);
1748 	counter_u64_add(extfree_rels, 1);
1749 }
1750 
1751 /*
1752  * The mbuf returned by this function could be allocated from zone_mbuf or
1753  * constructed in spare room in the cluster.
1754  *
1755  * The mbuf carries the payload in one of these ways
1756  * a) frame inside the mbuf (mbuf from zone_mbuf)
1757  * b) m_cljset (for clusters without metadata) zone_mbuf
1758  * c) m_extaddref (cluster with metadata) inline mbuf
1759  * d) m_extaddref (cluster with metadata) zone_mbuf
1760  */
1761 static struct mbuf *
get_scatter_segment(struct adapter * sc,struct sge_fl * fl,int fr_offset,int remaining)1762 get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset,
1763     int remaining)
1764 {
1765 	struct mbuf *m;
1766 	struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
1767 	struct cluster_layout *cll = &sd->cll;
1768 	struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx];
1769 	struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx];
1770 	struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl);
1771 	int len, blen;
1772 	caddr_t payload;
1773 
1774 	blen = hwb->size - fl->rx_offset;	/* max possible in this buf */
1775 	len = min(remaining, blen);
1776 	payload = sd->cl + cll->region1 + fl->rx_offset;
1777 	if (fl->flags & FL_BUF_PACKING) {
1778 		const u_int l = fr_offset + len;
1779 		const u_int pad = roundup2(l, fl->buf_boundary) - l;
1780 
1781 		if (fl->rx_offset + len + pad < hwb->size)
1782 			blen = len + pad;
1783 		MPASS(fl->rx_offset + blen <= hwb->size);
1784 	} else {
1785 		MPASS(fl->rx_offset == 0);	/* not packing */
1786 	}
1787 
1788 
1789 	if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) {
1790 
1791 		/*
1792 		 * Copy payload into a freshly allocated mbuf.
1793 		 */
1794 
1795 		m = fr_offset == 0 ?
1796 		    m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA);
1797 		if (m == NULL)
1798 			return (NULL);
1799 		fl->mbuf_allocated++;
1800 #ifdef T4_PKT_TIMESTAMP
1801 		/* Leave room for a timestamp */
1802 		m->m_data += 8;
1803 #endif
1804 		/* copy data to mbuf */
1805 		bcopy(payload, mtod(m, caddr_t), len);
1806 
1807 	} else if (sd->nmbuf * MSIZE < cll->region1) {
1808 
1809 		/*
1810 		 * There's spare room in the cluster for an mbuf.  Create one
1811 		 * and associate it with the payload that's in the cluster.
1812 		 */
1813 
1814 		MPASS(clm != NULL);
1815 		m = (struct mbuf *)(sd->cl + sd->nmbuf * MSIZE);
1816 		/* No bzero required */
1817 		if (m_init(m, M_NOWAIT, MT_DATA,
1818 		    fr_offset == 0 ? M_PKTHDR | M_NOFREE : M_NOFREE))
1819 			return (NULL);
1820 		fl->mbuf_inlined++;
1821 		m_extaddref(m, payload, blen, &clm->refcount, rxb_free,
1822 		    swz->zone, sd->cl);
1823 		if (sd->nmbuf++ == 0)
1824 			counter_u64_add(extfree_refs, 1);
1825 
1826 	} else {
1827 
1828 		/*
1829 		 * Grab an mbuf from zone_mbuf and associate it with the
1830 		 * payload in the cluster.
1831 		 */
1832 
1833 		m = fr_offset == 0 ?
1834 		    m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA);
1835 		if (m == NULL)
1836 			return (NULL);
1837 		fl->mbuf_allocated++;
1838 		if (clm != NULL) {
1839 			m_extaddref(m, payload, blen, &clm->refcount,
1840 			    rxb_free, swz->zone, sd->cl);
1841 			if (sd->nmbuf++ == 0)
1842 				counter_u64_add(extfree_refs, 1);
1843 		} else {
1844 			m_cljset(m, sd->cl, swz->type);
1845 			sd->cl = NULL;	/* consumed, not a recycle candidate */
1846 		}
1847 	}
1848 	if (fr_offset == 0)
1849 		m->m_pkthdr.len = remaining;
1850 	m->m_len = len;
1851 
1852 	if (fl->flags & FL_BUF_PACKING) {
1853 		fl->rx_offset += blen;
1854 		MPASS(fl->rx_offset <= hwb->size);
1855 		if (fl->rx_offset < hwb->size)
1856 			return (m);	/* without advancing the cidx */
1857 	}
1858 
1859 	if (__predict_false(++fl->cidx % 8 == 0)) {
1860 		uint16_t cidx = fl->cidx / 8;
1861 
1862 		if (__predict_false(cidx == fl->sidx))
1863 			fl->cidx = cidx = 0;
1864 		fl->hw_cidx = cidx;
1865 	}
1866 	fl->rx_offset = 0;
1867 
1868 	return (m);
1869 }
1870 
1871 static struct mbuf *
get_fl_payload(struct adapter * sc,struct sge_fl * fl,uint32_t len_newbuf)1872 get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf)
1873 {
1874 	struct mbuf *m0, *m, **pnext;
1875 	u_int remaining;
1876 	const u_int total = G_RSPD_LEN(len_newbuf);
1877 
1878 	if (__predict_false(fl->flags & FL_BUF_RESUME)) {
1879 		M_ASSERTPKTHDR(fl->m0);
1880 		MPASS(fl->m0->m_pkthdr.len == total);
1881 		MPASS(fl->remaining < total);
1882 
1883 		m0 = fl->m0;
1884 		pnext = fl->pnext;
1885 		remaining = fl->remaining;
1886 		fl->flags &= ~FL_BUF_RESUME;
1887 		goto get_segment;
1888 	}
1889 
1890 	if (fl->rx_offset > 0 && len_newbuf & F_RSPD_NEWBUF) {
1891 		fl->rx_offset = 0;
1892 		if (__predict_false(++fl->cidx % 8 == 0)) {
1893 			uint16_t cidx = fl->cidx / 8;
1894 
1895 			if (__predict_false(cidx == fl->sidx))
1896 				fl->cidx = cidx = 0;
1897 			fl->hw_cidx = cidx;
1898 		}
1899 	}
1900 
1901 	/*
1902 	 * Payload starts at rx_offset in the current hw buffer.  Its length is
1903 	 * 'len' and it may span multiple hw buffers.
1904 	 */
1905 
1906 	m0 = get_scatter_segment(sc, fl, 0, total);
1907 	if (m0 == NULL)
1908 		return (NULL);
1909 	remaining = total - m0->m_len;
1910 	pnext = &m0->m_next;
1911 	while (remaining > 0) {
1912 get_segment:
1913 		MPASS(fl->rx_offset == 0);
1914 		m = get_scatter_segment(sc, fl, total - remaining, remaining);
1915 		if (__predict_false(m == NULL)) {
1916 			fl->m0 = m0;
1917 			fl->pnext = pnext;
1918 			fl->remaining = remaining;
1919 			fl->flags |= FL_BUF_RESUME;
1920 			return (NULL);
1921 		}
1922 		*pnext = m;
1923 		pnext = &m->m_next;
1924 		remaining -= m->m_len;
1925 	}
1926 	*pnext = NULL;
1927 
1928 	M_ASSERTPKTHDR(m0);
1929 	return (m0);
1930 }
1931 
1932 static int
t4_eth_rx(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m0)1933 t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0)
1934 {
1935 	struct sge_rxq *rxq = iq_to_rxq(iq);
1936 	struct ifnet *ifp = rxq->ifp;
1937 	struct adapter *sc = iq->adapter;
1938 	const struct cpl_rx_pkt *cpl = (const void *)(rss + 1);
1939 #if defined(INET) || defined(INET6)
1940 	struct lro_ctrl *lro = &rxq->lro;
1941 #endif
1942 	static const int sw_hashtype[4][2] = {
1943 		{M_HASHTYPE_NONE, M_HASHTYPE_NONE},
1944 		{M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6},
1945 		{M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6},
1946 		{M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6},
1947 	};
1948 
1949 	KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__,
1950 	    rss->opcode));
1951 
1952 	m0->m_pkthdr.len -= sc->params.sge.fl_pktshift;
1953 	m0->m_len -= sc->params.sge.fl_pktshift;
1954 	m0->m_data += sc->params.sge.fl_pktshift;
1955 
1956 	m0->m_pkthdr.rcvif = ifp;
1957 	M_HASHTYPE_SET(m0, sw_hashtype[rss->hash_type][rss->ipv6]);
1958 	m0->m_pkthdr.flowid = be32toh(rss->hash_val);
1959 
1960 	if (cpl->csum_calc && !(cpl->err_vec & sc->params.tp.err_vec_mask)) {
1961 		if (ifp->if_capenable & IFCAP_RXCSUM &&
1962 		    cpl->l2info & htobe32(F_RXF_IP)) {
1963 			m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
1964 			    CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1965 			rxq->rxcsum++;
1966 		} else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
1967 		    cpl->l2info & htobe32(F_RXF_IP6)) {
1968 			m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
1969 			    CSUM_PSEUDO_HDR);
1970 			rxq->rxcsum++;
1971 		}
1972 
1973 		if (__predict_false(cpl->ip_frag))
1974 			m0->m_pkthdr.csum_data = be16toh(cpl->csum);
1975 		else
1976 			m0->m_pkthdr.csum_data = 0xffff;
1977 	}
1978 
1979 	if (cpl->vlan_ex) {
1980 		m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan);
1981 		m0->m_flags |= M_VLANTAG;
1982 		rxq->vlan_extraction++;
1983 	}
1984 
1985 #if defined(INET) || defined(INET6)
1986 	if (iq->flags & IQ_LRO_ENABLED) {
1987 		if (sort_before_lro(lro)) {
1988 			tcp_lro_queue_mbuf(lro, m0);
1989 			return (0); /* queued for sort, then LRO */
1990 		}
1991 		if (tcp_lro_rx(lro, m0, 0) == 0)
1992 			return (0); /* queued for LRO */
1993 	}
1994 #endif
1995 	ifp->if_input(ifp, m0);
1996 
1997 	return (0);
1998 }
1999 
2000 /*
2001  * Must drain the wrq or make sure that someone else will.
2002  */
2003 static void
wrq_tx_drain(void * arg,int n)2004 wrq_tx_drain(void *arg, int n)
2005 {
2006 	struct sge_wrq *wrq = arg;
2007 	struct sge_eq *eq = &wrq->eq;
2008 
2009 	EQ_LOCK(eq);
2010 	if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list))
2011 		drain_wrq_wr_list(wrq->adapter, wrq);
2012 	EQ_UNLOCK(eq);
2013 }
2014 
2015 static void
drain_wrq_wr_list(struct adapter * sc,struct sge_wrq * wrq)2016 drain_wrq_wr_list(struct adapter *sc, struct sge_wrq *wrq)
2017 {
2018 	struct sge_eq *eq = &wrq->eq;
2019 	u_int available, dbdiff;	/* # of hardware descriptors */
2020 	u_int n;
2021 	struct wrqe *wr;
2022 	struct fw_eth_tx_pkt_wr *dst;	/* any fw WR struct will do */
2023 
2024 	EQ_LOCK_ASSERT_OWNED(eq);
2025 	MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs));
2026 	wr = STAILQ_FIRST(&wrq->wr_list);
2027 	MPASS(wr != NULL);	/* Must be called with something useful to do */
2028 	MPASS(eq->pidx == eq->dbidx);
2029 	dbdiff = 0;
2030 
2031 	do {
2032 		eq->cidx = read_hw_cidx(eq);
2033 		if (eq->pidx == eq->cidx)
2034 			available = eq->sidx - 1;
2035 		else
2036 			available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1;
2037 
2038 		MPASS(wr->wrq == wrq);
2039 		n = howmany(wr->wr_len, EQ_ESIZE);
2040 		if (available < n)
2041 			break;
2042 
2043 		dst = (void *)&eq->desc[eq->pidx];
2044 		if (__predict_true(eq->sidx - eq->pidx > n)) {
2045 			/* Won't wrap, won't end exactly at the status page. */
2046 			bcopy(&wr->wr[0], dst, wr->wr_len);
2047 			eq->pidx += n;
2048 		} else {
2049 			int first_portion = (eq->sidx - eq->pidx) * EQ_ESIZE;
2050 
2051 			bcopy(&wr->wr[0], dst, first_portion);
2052 			if (wr->wr_len > first_portion) {
2053 				bcopy(&wr->wr[first_portion], &eq->desc[0],
2054 				    wr->wr_len - first_portion);
2055 			}
2056 			eq->pidx = n - (eq->sidx - eq->pidx);
2057 		}
2058 		wrq->tx_wrs_copied++;
2059 
2060 		if (available < eq->sidx / 4 &&
2061 		    atomic_cmpset_int(&eq->equiq, 0, 1)) {
2062 				/*
2063 				 * XXX: This is not 100% reliable with some
2064 				 * types of WRs.  But this is a very unusual
2065 				 * situation for an ofld/ctrl queue anyway.
2066 				 */
2067 			dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ |
2068 			    F_FW_WR_EQUEQ);
2069 		}
2070 
2071 		dbdiff += n;
2072 		if (dbdiff >= 16) {
2073 			ring_eq_db(sc, eq, dbdiff);
2074 			dbdiff = 0;
2075 		}
2076 
2077 		STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
2078 		free_wrqe(wr);
2079 		MPASS(wrq->nwr_pending > 0);
2080 		wrq->nwr_pending--;
2081 		MPASS(wrq->ndesc_needed >= n);
2082 		wrq->ndesc_needed -= n;
2083 	} while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL);
2084 
2085 	if (dbdiff)
2086 		ring_eq_db(sc, eq, dbdiff);
2087 }
2088 
2089 /*
2090  * Doesn't fail.  Holds on to work requests it can't send right away.
2091  */
2092 void
t4_wrq_tx_locked(struct adapter * sc,struct sge_wrq * wrq,struct wrqe * wr)2093 t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr)
2094 {
2095 #ifdef INVARIANTS
2096 	struct sge_eq *eq = &wrq->eq;
2097 #endif
2098 
2099 	EQ_LOCK_ASSERT_OWNED(eq);
2100 	MPASS(wr != NULL);
2101 	MPASS(wr->wr_len > 0 && wr->wr_len <= SGE_MAX_WR_LEN);
2102 	MPASS((wr->wr_len & 0x7) == 0);
2103 
2104 	STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link);
2105 	wrq->nwr_pending++;
2106 	wrq->ndesc_needed += howmany(wr->wr_len, EQ_ESIZE);
2107 
2108 	if (!TAILQ_EMPTY(&wrq->incomplete_wrs))
2109 		return;	/* commit_wrq_wr will drain wr_list as well. */
2110 
2111 	drain_wrq_wr_list(sc, wrq);
2112 
2113 	/* Doorbell must have caught up to the pidx. */
2114 	MPASS(eq->pidx == eq->dbidx);
2115 }
2116 
2117 void
t4_update_fl_bufsize(struct ifnet * ifp)2118 t4_update_fl_bufsize(struct ifnet *ifp)
2119 {
2120 	struct vi_info *vi = ifp->if_softc;
2121 	struct adapter *sc = vi->pi->adapter;
2122 	struct sge_rxq *rxq;
2123 #ifdef TCP_OFFLOAD
2124 	struct sge_ofld_rxq *ofld_rxq;
2125 #endif
2126 	struct sge_fl *fl;
2127 	int i, maxp, mtu = ifp->if_mtu;
2128 
2129 	maxp = mtu_to_max_payload(sc, mtu, 0);
2130 	for_each_rxq(vi, i, rxq) {
2131 		fl = &rxq->fl;
2132 
2133 		FL_LOCK(fl);
2134 		find_best_refill_source(sc, fl, maxp);
2135 		FL_UNLOCK(fl);
2136 	}
2137 #ifdef TCP_OFFLOAD
2138 	maxp = mtu_to_max_payload(sc, mtu, 1);
2139 	for_each_ofld_rxq(vi, i, ofld_rxq) {
2140 		fl = &ofld_rxq->fl;
2141 
2142 		FL_LOCK(fl);
2143 		find_best_refill_source(sc, fl, maxp);
2144 		FL_UNLOCK(fl);
2145 	}
2146 #endif
2147 }
2148 
2149 static inline int
mbuf_nsegs(struct mbuf * m)2150 mbuf_nsegs(struct mbuf *m)
2151 {
2152 
2153 	M_ASSERTPKTHDR(m);
2154 	KASSERT(m->m_pkthdr.l5hlen > 0,
2155 	    ("%s: mbuf %p missing information on # of segments.", __func__, m));
2156 
2157 	return (m->m_pkthdr.l5hlen);
2158 }
2159 
2160 static inline void
set_mbuf_nsegs(struct mbuf * m,uint8_t nsegs)2161 set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs)
2162 {
2163 
2164 	M_ASSERTPKTHDR(m);
2165 	m->m_pkthdr.l5hlen = nsegs;
2166 }
2167 
2168 static inline int
mbuf_len16(struct mbuf * m)2169 mbuf_len16(struct mbuf *m)
2170 {
2171 	int n;
2172 
2173 	M_ASSERTPKTHDR(m);
2174 	n = m->m_pkthdr.PH_loc.eight[0];
2175 	MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16);
2176 
2177 	return (n);
2178 }
2179 
2180 static inline void
set_mbuf_len16(struct mbuf * m,uint8_t len16)2181 set_mbuf_len16(struct mbuf *m, uint8_t len16)
2182 {
2183 
2184 	M_ASSERTPKTHDR(m);
2185 	m->m_pkthdr.PH_loc.eight[0] = len16;
2186 }
2187 
2188 static inline int
needs_tso(struct mbuf * m)2189 needs_tso(struct mbuf *m)
2190 {
2191 
2192 	M_ASSERTPKTHDR(m);
2193 
2194 	return (m->m_pkthdr.csum_flags & CSUM_TSO);
2195 }
2196 
2197 static inline int
needs_l3_csum(struct mbuf * m)2198 needs_l3_csum(struct mbuf *m)
2199 {
2200 
2201 	M_ASSERTPKTHDR(m);
2202 
2203 	return (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO));
2204 }
2205 
2206 static inline int
needs_l4_csum(struct mbuf * m)2207 needs_l4_csum(struct mbuf *m)
2208 {
2209 
2210 	M_ASSERTPKTHDR(m);
2211 
2212 	return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
2213 	    CSUM_TCP_IPV6 | CSUM_TSO));
2214 }
2215 
2216 static inline int
needs_vlan_insertion(struct mbuf * m)2217 needs_vlan_insertion(struct mbuf *m)
2218 {
2219 
2220 	M_ASSERTPKTHDR(m);
2221 
2222 	return (m->m_flags & M_VLANTAG);
2223 }
2224 
2225 static void *
m_advance(struct mbuf ** pm,int * poffset,int len)2226 m_advance(struct mbuf **pm, int *poffset, int len)
2227 {
2228 	struct mbuf *m = *pm;
2229 	int offset = *poffset;
2230 	uintptr_t p = 0;
2231 
2232 	MPASS(len > 0);
2233 
2234 	for (;;) {
2235 		if (offset + len < m->m_len) {
2236 			offset += len;
2237 			p = mtod(m, uintptr_t) + offset;
2238 			break;
2239 		}
2240 		len -= m->m_len - offset;
2241 		m = m->m_next;
2242 		offset = 0;
2243 		MPASS(m != NULL);
2244 	}
2245 	*poffset = offset;
2246 	*pm = m;
2247 	return ((void *)p);
2248 }
2249 
2250 /*
2251  * Can deal with empty mbufs in the chain that have m_len = 0, but the chain
2252  * must have at least one mbuf that's not empty.
2253  */
2254 static inline int
count_mbuf_nsegs(struct mbuf * m)2255 count_mbuf_nsegs(struct mbuf *m)
2256 {
2257 	vm_paddr_t lastb, next;
2258 	vm_offset_t va;
2259 	int len, nsegs;
2260 
2261 	MPASS(m != NULL);
2262 
2263 	nsegs = 0;
2264 	lastb = 0;
2265 	for (; m; m = m->m_next) {
2266 
2267 		len = m->m_len;
2268 		if (__predict_false(len == 0))
2269 			continue;
2270 		va = mtod(m, vm_offset_t);
2271 		next = pmap_kextract(va);
2272 		nsegs += sglist_count(m->m_data, len);
2273 		if (lastb + 1 == next)
2274 			nsegs--;
2275 		lastb = pmap_kextract(va + len - 1);
2276 	}
2277 
2278 	MPASS(nsegs > 0);
2279 	return (nsegs);
2280 }
2281 
2282 /*
2283  * Analyze the mbuf to determine its tx needs.  The mbuf passed in may change:
2284  * a) caller can assume it's been freed if this function returns with an error.
2285  * b) it may get defragged up if the gather list is too long for the hardware.
2286  */
2287 int
parse_pkt(struct adapter * sc,struct mbuf ** mp)2288 parse_pkt(struct adapter *sc, struct mbuf **mp)
2289 {
2290 	struct mbuf *m0 = *mp, *m;
2291 	int rc, nsegs, defragged = 0, offset;
2292 	struct ether_header *eh;
2293 	void *l3hdr;
2294 #if defined(INET) || defined(INET6)
2295 	struct tcphdr *tcp;
2296 #endif
2297 	uint16_t eh_type;
2298 
2299 	M_ASSERTPKTHDR(m0);
2300 	if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) {
2301 		rc = EINVAL;
2302 fail:
2303 		m_freem(m0);
2304 		*mp = NULL;
2305 		return (rc);
2306 	}
2307 restart:
2308 	/*
2309 	 * First count the number of gather list segments in the payload.
2310 	 * Defrag the mbuf if nsegs exceeds the hardware limit.
2311 	 */
2312 	M_ASSERTPKTHDR(m0);
2313 	MPASS(m0->m_pkthdr.len > 0);
2314 	nsegs = count_mbuf_nsegs(m0);
2315 	if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) {
2316 		if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) {
2317 			rc = EFBIG;
2318 			goto fail;
2319 		}
2320 		*mp = m0 = m;	/* update caller's copy after defrag */
2321 		goto restart;
2322 	}
2323 
2324 	if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN)) {
2325 		m0 = m_pullup(m0, m0->m_pkthdr.len);
2326 		if (m0 == NULL) {
2327 			/* Should have left well enough alone. */
2328 			rc = EFBIG;
2329 			goto fail;
2330 		}
2331 		*mp = m0;	/* update caller's copy after pullup */
2332 		goto restart;
2333 	}
2334 	set_mbuf_nsegs(m0, nsegs);
2335 	if (sc->flags & IS_VF)
2336 		set_mbuf_len16(m0, txpkt_vm_len16(nsegs, needs_tso(m0)));
2337 	else
2338 		set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0)));
2339 
2340 	if (!needs_tso(m0) &&
2341 	    !(sc->flags & IS_VF && (needs_l3_csum(m0) || needs_l4_csum(m0))))
2342 		return (0);
2343 
2344 	m = m0;
2345 	eh = mtod(m, struct ether_header *);
2346 	eh_type = ntohs(eh->ether_type);
2347 	if (eh_type == ETHERTYPE_VLAN) {
2348 		struct ether_vlan_header *evh = (void *)eh;
2349 
2350 		eh_type = ntohs(evh->evl_proto);
2351 		m0->m_pkthdr.l2hlen = sizeof(*evh);
2352 	} else
2353 		m0->m_pkthdr.l2hlen = sizeof(*eh);
2354 
2355 	offset = 0;
2356 	l3hdr = m_advance(&m, &offset, m0->m_pkthdr.l2hlen);
2357 
2358 	switch (eh_type) {
2359 #ifdef INET6
2360 	case ETHERTYPE_IPV6:
2361 	{
2362 		struct ip6_hdr *ip6 = l3hdr;
2363 
2364 		MPASS(!needs_tso(m0) || ip6->ip6_nxt == IPPROTO_TCP);
2365 
2366 		m0->m_pkthdr.l3hlen = sizeof(*ip6);
2367 		break;
2368 	}
2369 #endif
2370 #ifdef INET
2371 	case ETHERTYPE_IP:
2372 	{
2373 		struct ip *ip = l3hdr;
2374 
2375 		m0->m_pkthdr.l3hlen = ip->ip_hl * 4;
2376 		break;
2377 	}
2378 #endif
2379 	default:
2380 		panic("%s: ethertype 0x%04x unknown.  if_cxgbe must be compiled"
2381 		    " with the same INET/INET6 options as the kernel.",
2382 		    __func__, eh_type);
2383 	}
2384 
2385 #if defined(INET) || defined(INET6)
2386 	if (needs_tso(m0)) {
2387 		tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen);
2388 		m0->m_pkthdr.l4hlen = tcp->th_off * 4;
2389 	}
2390 #endif
2391 	MPASS(m0 == *mp);
2392 	return (0);
2393 }
2394 
2395 void *
start_wrq_wr(struct sge_wrq * wrq,int len16,struct wrq_cookie * cookie)2396 start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie)
2397 {
2398 	struct sge_eq *eq = &wrq->eq;
2399 	struct adapter *sc = wrq->adapter;
2400 	int ndesc, available;
2401 	struct wrqe *wr;
2402 	void *w;
2403 
2404 	MPASS(len16 > 0);
2405 	ndesc = howmany(len16, EQ_ESIZE / 16);
2406 	MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC);
2407 
2408 	EQ_LOCK(eq);
2409 
2410 	if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list))
2411 		drain_wrq_wr_list(sc, wrq);
2412 
2413 	if (!STAILQ_EMPTY(&wrq->wr_list)) {
2414 slowpath:
2415 		EQ_UNLOCK(eq);
2416 		wr = alloc_wrqe(len16 * 16, wrq);
2417 		if (__predict_false(wr == NULL))
2418 			return (NULL);
2419 		cookie->pidx = -1;
2420 		cookie->ndesc = ndesc;
2421 		return (&wr->wr);
2422 	}
2423 
2424 	eq->cidx = read_hw_cidx(eq);
2425 	if (eq->pidx == eq->cidx)
2426 		available = eq->sidx - 1;
2427 	else
2428 		available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1;
2429 	if (available < ndesc)
2430 		goto slowpath;
2431 
2432 	cookie->pidx = eq->pidx;
2433 	cookie->ndesc = ndesc;
2434 	TAILQ_INSERT_TAIL(&wrq->incomplete_wrs, cookie, link);
2435 
2436 	w = &eq->desc[eq->pidx];
2437 	IDXINCR(eq->pidx, ndesc, eq->sidx);
2438 	if (__predict_false(cookie->pidx + ndesc > eq->sidx)) {
2439 		w = &wrq->ss[0];
2440 		wrq->ss_pidx = cookie->pidx;
2441 		wrq->ss_len = len16 * 16;
2442 	}
2443 
2444 	EQ_UNLOCK(eq);
2445 
2446 	return (w);
2447 }
2448 
2449 void
commit_wrq_wr(struct sge_wrq * wrq,void * w,struct wrq_cookie * cookie)2450 commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie)
2451 {
2452 	struct sge_eq *eq = &wrq->eq;
2453 	struct adapter *sc = wrq->adapter;
2454 	int ndesc, pidx;
2455 	struct wrq_cookie *prev, *next;
2456 
2457 	if (cookie->pidx == -1) {
2458 		struct wrqe *wr = __containerof(w, struct wrqe, wr);
2459 
2460 		t4_wrq_tx(sc, wr);
2461 		return;
2462 	}
2463 
2464 	if (__predict_false(w == &wrq->ss[0])) {
2465 		int n = (eq->sidx - wrq->ss_pidx) * EQ_ESIZE;
2466 
2467 		MPASS(wrq->ss_len > n);	/* WR had better wrap around. */
2468 		bcopy(&wrq->ss[0], &eq->desc[wrq->ss_pidx], n);
2469 		bcopy(&wrq->ss[n], &eq->desc[0], wrq->ss_len - n);
2470 		wrq->tx_wrs_ss++;
2471 	} else
2472 		wrq->tx_wrs_direct++;
2473 
2474 	EQ_LOCK(eq);
2475 	ndesc = cookie->ndesc;	/* Can be more than SGE_MAX_WR_NDESC here. */
2476 	pidx = cookie->pidx;
2477 	MPASS(pidx >= 0 && pidx < eq->sidx);
2478 	prev = TAILQ_PREV(cookie, wrq_incomplete_wrs, link);
2479 	next = TAILQ_NEXT(cookie, link);
2480 	if (prev == NULL) {
2481 		MPASS(pidx == eq->dbidx);
2482 		if (next == NULL || ndesc >= 16) {
2483 			int available;
2484 			struct fw_eth_tx_pkt_wr *dst;	/* any fw WR struct will do */
2485 
2486 			/*
2487 			 * Note that the WR via which we'll request tx updates
2488 			 * is at pidx and not eq->pidx, which has moved on
2489 			 * already.
2490 			 */
2491 			dst = (void *)&eq->desc[pidx];
2492 			available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1;
2493 			if (available < eq->sidx / 4 &&
2494 			    atomic_cmpset_int(&eq->equiq, 0, 1)) {
2495 				/*
2496 				 * XXX: This is not 100% reliable with some
2497 				 * types of WRs.  But this is a very unusual
2498 				 * situation for an ofld/ctrl queue anyway.
2499 				 */
2500 				dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ |
2501 				    F_FW_WR_EQUEQ);
2502 			}
2503 
2504 			ring_eq_db(wrq->adapter, eq, ndesc);
2505 		} else {
2506 			MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc);
2507 			next->pidx = pidx;
2508 			next->ndesc += ndesc;
2509 		}
2510 	} else {
2511 		MPASS(IDXDIFF(pidx, prev->pidx, eq->sidx) == prev->ndesc);
2512 		prev->ndesc += ndesc;
2513 	}
2514 	TAILQ_REMOVE(&wrq->incomplete_wrs, cookie, link);
2515 
2516 	if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list))
2517 		drain_wrq_wr_list(sc, wrq);
2518 
2519 #ifdef INVARIANTS
2520 	if (TAILQ_EMPTY(&wrq->incomplete_wrs)) {
2521 		/* Doorbell must have caught up to the pidx. */
2522 		MPASS(wrq->eq.pidx == wrq->eq.dbidx);
2523 	}
2524 #endif
2525 	EQ_UNLOCK(eq);
2526 }
2527 
2528 static u_int
can_resume_eth_tx(struct mp_ring * r)2529 can_resume_eth_tx(struct mp_ring *r)
2530 {
2531 	struct sge_eq *eq = r->cookie;
2532 
2533 	return (total_available_tx_desc(eq) > eq->sidx / 8);
2534 }
2535 
2536 static inline int
cannot_use_txpkts(struct mbuf * m)2537 cannot_use_txpkts(struct mbuf *m)
2538 {
2539 	/* maybe put a GL limit too, to avoid silliness? */
2540 
2541 	return (needs_tso(m));
2542 }
2543 
2544 static inline int
discard_tx(struct sge_eq * eq)2545 discard_tx(struct sge_eq *eq)
2546 {
2547 
2548 	return ((eq->flags & (EQ_ENABLED | EQ_QFLUSH)) != EQ_ENABLED);
2549 }
2550 
2551 /*
2552  * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to
2553  * be consumed.  Return the actual number consumed.  0 indicates a stall.
2554  */
2555 static u_int
eth_tx(struct mp_ring * r,u_int cidx,u_int pidx)2556 eth_tx(struct mp_ring *r, u_int cidx, u_int pidx)
2557 {
2558 	struct sge_txq *txq = r->cookie;
2559 	struct sge_eq *eq = &txq->eq;
2560 	struct ifnet *ifp = txq->ifp;
2561 	struct vi_info *vi = ifp->if_softc;
2562 	struct port_info *pi = vi->pi;
2563 	struct adapter *sc = pi->adapter;
2564 	u_int total, remaining;		/* # of packets */
2565 	u_int available, dbdiff;	/* # of hardware descriptors */
2566 	u_int n, next_cidx;
2567 	struct mbuf *m0, *tail;
2568 	struct txpkts txp;
2569 	struct fw_eth_tx_pkts_wr *wr;	/* any fw WR struct will do */
2570 
2571 	remaining = IDXDIFF(pidx, cidx, r->size);
2572 	MPASS(remaining > 0);	/* Must not be called without work to do. */
2573 	total = 0;
2574 
2575 	TXQ_LOCK(txq);
2576 	if (__predict_false(discard_tx(eq))) {
2577 		while (cidx != pidx) {
2578 			m0 = r->items[cidx];
2579 			m_freem(m0);
2580 			if (++cidx == r->size)
2581 				cidx = 0;
2582 		}
2583 		reclaim_tx_descs(txq, 2048);
2584 		total = remaining;
2585 		goto done;
2586 	}
2587 
2588 	/* How many hardware descriptors do we have readily available. */
2589 	if (eq->pidx == eq->cidx)
2590 		available = eq->sidx - 1;
2591 	else
2592 		available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1;
2593 	dbdiff = IDXDIFF(eq->pidx, eq->dbidx, eq->sidx);
2594 
2595 	while (remaining > 0) {
2596 
2597 		m0 = r->items[cidx];
2598 		M_ASSERTPKTHDR(m0);
2599 		MPASS(m0->m_nextpkt == NULL);
2600 
2601 		if (available < SGE_MAX_WR_NDESC) {
2602 			available += reclaim_tx_descs(txq, 64);
2603 			if (available < howmany(mbuf_len16(m0), EQ_ESIZE / 16))
2604 				break;	/* out of descriptors */
2605 		}
2606 
2607 		next_cidx = cidx + 1;
2608 		if (__predict_false(next_cidx == r->size))
2609 			next_cidx = 0;
2610 
2611 		wr = (void *)&eq->desc[eq->pidx];
2612 		if (sc->flags & IS_VF) {
2613 			total++;
2614 			remaining--;
2615 			ETHER_BPF_MTAP(ifp, m0);
2616 			n = write_txpkt_vm_wr(sc, txq, (void *)wr, m0,
2617 			    available);
2618 		} else if (remaining > 1 &&
2619 		    try_txpkts(m0, r->items[next_cidx], &txp, available) == 0) {
2620 
2621 			/* pkts at cidx, next_cidx should both be in txp. */
2622 			MPASS(txp.npkt == 2);
2623 			tail = r->items[next_cidx];
2624 			MPASS(tail->m_nextpkt == NULL);
2625 			ETHER_BPF_MTAP(ifp, m0);
2626 			ETHER_BPF_MTAP(ifp, tail);
2627 			m0->m_nextpkt = tail;
2628 
2629 			if (__predict_false(++next_cidx == r->size))
2630 				next_cidx = 0;
2631 
2632 			while (next_cidx != pidx) {
2633 				if (add_to_txpkts(r->items[next_cidx], &txp,
2634 				    available) != 0)
2635 					break;
2636 				tail->m_nextpkt = r->items[next_cidx];
2637 				tail = tail->m_nextpkt;
2638 				ETHER_BPF_MTAP(ifp, tail);
2639 				if (__predict_false(++next_cidx == r->size))
2640 					next_cidx = 0;
2641 			}
2642 
2643 			n = write_txpkts_wr(txq, wr, m0, &txp, available);
2644 			total += txp.npkt;
2645 			remaining -= txp.npkt;
2646 		} else {
2647 			total++;
2648 			remaining--;
2649 			ETHER_BPF_MTAP(ifp, m0);
2650 			n = write_txpkt_wr(txq, (void *)wr, m0, available);
2651 		}
2652 		MPASS(n >= 1 && n <= available && n <= SGE_MAX_WR_NDESC);
2653 
2654 		available -= n;
2655 		dbdiff += n;
2656 		IDXINCR(eq->pidx, n, eq->sidx);
2657 
2658 		if (total_available_tx_desc(eq) < eq->sidx / 4 &&
2659 		    atomic_cmpset_int(&eq->equiq, 0, 1)) {
2660 			wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ |
2661 			    F_FW_WR_EQUEQ);
2662 			eq->equeqidx = eq->pidx;
2663 		} else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) {
2664 			wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ);
2665 			eq->equeqidx = eq->pidx;
2666 		}
2667 
2668 		if (dbdiff >= 16 && remaining >= 4) {
2669 			ring_eq_db(sc, eq, dbdiff);
2670 			available += reclaim_tx_descs(txq, 4 * dbdiff);
2671 			dbdiff = 0;
2672 		}
2673 
2674 		cidx = next_cidx;
2675 	}
2676 	if (dbdiff != 0) {
2677 		ring_eq_db(sc, eq, dbdiff);
2678 		reclaim_tx_descs(txq, 32);
2679 	}
2680 done:
2681 	TXQ_UNLOCK(txq);
2682 
2683 	return (total);
2684 }
2685 
2686 static inline void
init_iq(struct sge_iq * iq,struct adapter * sc,int tmr_idx,int pktc_idx,int qsize)2687 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
2688     int qsize)
2689 {
2690 
2691 	KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS,
2692 	    ("%s: bad tmr_idx %d", __func__, tmr_idx));
2693 	KASSERT(pktc_idx < SGE_NCOUNTERS,	/* -ve is ok, means don't use */
2694 	    ("%s: bad pktc_idx %d", __func__, pktc_idx));
2695 
2696 	iq->flags = 0;
2697 	iq->adapter = sc;
2698 	iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx);
2699 	iq->intr_pktc_idx = SGE_NCOUNTERS - 1;
2700 	if (pktc_idx >= 0) {
2701 		iq->intr_params |= F_QINTR_CNT_EN;
2702 		iq->intr_pktc_idx = pktc_idx;
2703 	}
2704 	iq->qsize = roundup2(qsize, 16);	/* See FW_IQ_CMD/iqsize */
2705 	iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE;
2706 }
2707 
2708 static inline void
init_fl(struct adapter * sc,struct sge_fl * fl,int qsize,int maxp,char * name)2709 init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name)
2710 {
2711 
2712 	fl->qsize = qsize;
2713 	fl->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE;
2714 	strlcpy(fl->lockname, name, sizeof(fl->lockname));
2715 	if (sc->flags & BUF_PACKING_OK &&
2716 	    ((!is_t4(sc) && buffer_packing) ||	/* T5+: enabled unless 0 */
2717 	    (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */
2718 		fl->flags |= FL_BUF_PACKING;
2719 	find_best_refill_source(sc, fl, maxp);
2720 	find_safe_refill_source(sc, fl);
2721 }
2722 
2723 static inline void
init_eq(struct adapter * sc,struct sge_eq * eq,int eqtype,int qsize,uint8_t tx_chan,uint16_t iqid,char * name)2724 init_eq(struct adapter *sc, struct sge_eq *eq, int eqtype, int qsize,
2725     uint8_t tx_chan, uint16_t iqid, char *name)
2726 {
2727 	KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype));
2728 
2729 	eq->flags = eqtype & EQ_TYPEMASK;
2730 	eq->tx_chan = tx_chan;
2731 	eq->iqid = iqid;
2732 	eq->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE;
2733 	strlcpy(eq->lockname, name, sizeof(eq->lockname));
2734 }
2735 
2736 static int
alloc_ring(struct adapter * sc,size_t len,bus_dma_tag_t * tag,bus_dmamap_t * map,bus_addr_t * pa,void ** va)2737 alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag,
2738     bus_dmamap_t *map, bus_addr_t *pa, void **va)
2739 {
2740 	int rc;
2741 
2742 	rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR,
2743 	    BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag);
2744 	if (rc != 0) {
2745 		device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc);
2746 		goto done;
2747 	}
2748 
2749 	rc = bus_dmamem_alloc(*tag, va,
2750 	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map);
2751 	if (rc != 0) {
2752 		device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc);
2753 		goto done;
2754 	}
2755 
2756 	rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0);
2757 	if (rc != 0) {
2758 		device_printf(sc->dev, "cannot load DMA map: %d\n", rc);
2759 		goto done;
2760 	}
2761 done:
2762 	if (rc)
2763 		free_ring(sc, *tag, *map, *pa, *va);
2764 
2765 	return (rc);
2766 }
2767 
2768 static int
free_ring(struct adapter * sc,bus_dma_tag_t tag,bus_dmamap_t map,bus_addr_t pa,void * va)2769 free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map,
2770     bus_addr_t pa, void *va)
2771 {
2772 	if (pa)
2773 		bus_dmamap_unload(tag, map);
2774 	if (va)
2775 		bus_dmamem_free(tag, va, map);
2776 	if (tag)
2777 		bus_dma_tag_destroy(tag);
2778 
2779 	return (0);
2780 }
2781 
2782 /*
2783  * Allocates the ring for an ingress queue and an optional freelist.  If the
2784  * freelist is specified it will be allocated and then associated with the
2785  * ingress queue.
2786  *
2787  * Returns errno on failure.  Resources allocated up to that point may still be
2788  * allocated.  Caller is responsible for cleanup in case this function fails.
2789  *
2790  * If the ingress queue will take interrupts directly then the intr_idx
2791  * specifies the vector, starting from 0.  -1 means the interrupts for this
2792  * queue should be forwarded to the fwq.
2793  */
2794 static int
alloc_iq_fl(struct vi_info * vi,struct sge_iq * iq,struct sge_fl * fl,int intr_idx,int cong)2795 alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl,
2796     int intr_idx, int cong)
2797 {
2798 	int rc, i, cntxt_id;
2799 	size_t len;
2800 	struct fw_iq_cmd c;
2801 	struct port_info *pi = vi->pi;
2802 	struct adapter *sc = iq->adapter;
2803 	struct sge_params *sp = &sc->params.sge;
2804 	__be32 v = 0;
2805 
2806 	len = iq->qsize * IQ_ESIZE;
2807 	rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba,
2808 	    (void **)&iq->desc);
2809 	if (rc != 0)
2810 		return (rc);
2811 
2812 	bzero(&c, sizeof(c));
2813 	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
2814 	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
2815 	    V_FW_IQ_CMD_VFN(0));
2816 
2817 	c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
2818 	    FW_LEN16(c));
2819 
2820 	/* Special handling for firmware event queue */
2821 	if (iq == &sc->sge.fwq)
2822 		v |= F_FW_IQ_CMD_IQASYNCH;
2823 
2824 	if (intr_idx < 0) {
2825 		/* Forwarded interrupts, all headed to fwq */
2826 		v |= F_FW_IQ_CMD_IQANDST;
2827 		v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fwq.cntxt_id);
2828 	} else {
2829 		KASSERT(intr_idx < sc->intr_count,
2830 		    ("%s: invalid direct intr_idx %d", __func__, intr_idx));
2831 		v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx);
2832 	}
2833 
2834 	c.type_to_iqandstindex = htobe32(v |
2835 	    V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
2836 	    V_FW_IQ_CMD_VIID(vi->viid) |
2837 	    V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
2838 	c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
2839 	    F_FW_IQ_CMD_IQGTSMODE |
2840 	    V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
2841 	    V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4));
2842 	c.iqsize = htobe16(iq->qsize);
2843 	c.iqaddr = htobe64(iq->ba);
2844 	if (cong >= 0)
2845 		c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN);
2846 
2847 	if (fl) {
2848 		mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF);
2849 
2850 		len = fl->qsize * EQ_ESIZE;
2851 		rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map,
2852 		    &fl->ba, (void **)&fl->desc);
2853 		if (rc)
2854 			return (rc);
2855 
2856 		/* Allocate space for one software descriptor per buffer. */
2857 		rc = alloc_fl_sdesc(fl);
2858 		if (rc != 0) {
2859 			device_printf(sc->dev,
2860 			    "failed to setup fl software descriptors: %d\n",
2861 			    rc);
2862 			return (rc);
2863 		}
2864 
2865 		if (fl->flags & FL_BUF_PACKING) {
2866 			fl->lowat = roundup2(sp->fl_starve_threshold2, 8);
2867 			fl->buf_boundary = sp->pack_boundary;
2868 		} else {
2869 			fl->lowat = roundup2(sp->fl_starve_threshold, 8);
2870 			fl->buf_boundary = 16;
2871 		}
2872 		if (fl_pad && fl->buf_boundary < sp->pad_boundary)
2873 			fl->buf_boundary = sp->pad_boundary;
2874 
2875 		c.iqns_to_fl0congen |=
2876 		    htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
2877 			F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
2878 			(fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) |
2879 			(fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN :
2880 			    0));
2881 		if (cong >= 0) {
2882 			c.iqns_to_fl0congen |=
2883 				htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) |
2884 				    F_FW_IQ_CMD_FL0CONGCIF |
2885 				    F_FW_IQ_CMD_FL0CONGEN);
2886 		}
2887 		c.fl0dcaen_to_fl0cidxfthresh =
2888 		    htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ?
2889 			X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B_T6) |
2890 			V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ?
2891 			X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B));
2892 		c.fl0size = htobe16(fl->qsize);
2893 		c.fl0addr = htobe64(fl->ba);
2894 	}
2895 
2896 	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2897 	if (rc != 0) {
2898 		device_printf(sc->dev,
2899 		    "failed to create ingress queue: %d\n", rc);
2900 		return (rc);
2901 	}
2902 
2903 	iq->cidx = 0;
2904 	iq->gen = F_RSPD_GEN;
2905 	iq->intr_next = iq->intr_params;
2906 	iq->cntxt_id = be16toh(c.iqid);
2907 	iq->abs_id = be16toh(c.physiqid);
2908 	iq->flags |= IQ_ALLOCATED;
2909 
2910 	cntxt_id = iq->cntxt_id - sc->sge.iq_start;
2911 	if (cntxt_id >= sc->sge.niq) {
2912 		panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__,
2913 		    cntxt_id, sc->sge.niq - 1);
2914 	}
2915 	sc->sge.iqmap[cntxt_id] = iq;
2916 
2917 	if (fl) {
2918 		u_int qid;
2919 
2920 		iq->flags |= IQ_HAS_FL;
2921 		fl->cntxt_id = be16toh(c.fl0id);
2922 		fl->pidx = fl->cidx = 0;
2923 
2924 		cntxt_id = fl->cntxt_id - sc->sge.eq_start;
2925 		if (cntxt_id >= sc->sge.neq) {
2926 			panic("%s: fl->cntxt_id (%d) more than the max (%d)",
2927 			    __func__, cntxt_id, sc->sge.neq - 1);
2928 		}
2929 		sc->sge.eqmap[cntxt_id] = (void *)fl;
2930 
2931 		qid = fl->cntxt_id;
2932 		if (isset(&sc->doorbells, DOORBELL_UDB)) {
2933 			uint32_t s_qpp = sc->params.sge.eq_s_qpp;
2934 			uint32_t mask = (1 << s_qpp) - 1;
2935 			volatile uint8_t *udb;
2936 
2937 			udb = sc->udbs_base + UDBS_DB_OFFSET;
2938 			udb += (qid >> s_qpp) << PAGE_SHIFT;
2939 			qid &= mask;
2940 			if (qid < PAGE_SIZE / UDBS_SEG_SIZE) {
2941 				udb += qid << UDBS_SEG_SHIFT;
2942 				qid = 0;
2943 			}
2944 			fl->udb = (volatile void *)udb;
2945 		}
2946 		fl->dbval = V_QID(qid) | sc->chip_params->sge_fl_db;
2947 
2948 		FL_LOCK(fl);
2949 		/* Enough to make sure the SGE doesn't think it's starved */
2950 		refill_fl(sc, fl, fl->lowat);
2951 		FL_UNLOCK(fl);
2952 	}
2953 
2954 	if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) && cong >= 0) {
2955 		uint32_t param, val;
2956 
2957 		param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
2958 		    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
2959 		    V_FW_PARAMS_PARAM_YZ(iq->cntxt_id);
2960 		if (cong == 0)
2961 			val = 1 << 19;
2962 		else {
2963 			val = 2 << 19;
2964 			for (i = 0; i < 4; i++) {
2965 				if (cong & (1 << i))
2966 					val |= 1 << (i << 2);
2967 			}
2968 		}
2969 
2970 		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
2971 		if (rc != 0) {
2972 			/* report error but carry on */
2973 			device_printf(sc->dev,
2974 			    "failed to set congestion manager context for "
2975 			    "ingress queue %d: %d\n", iq->cntxt_id, rc);
2976 		}
2977 	}
2978 
2979 	/* Enable IQ interrupts */
2980 	atomic_store_rel_int(&iq->state, IQS_IDLE);
2981 	t4_write_reg(sc, sc->sge_gts_reg, V_SEINTARM(iq->intr_params) |
2982 	    V_INGRESSQID(iq->cntxt_id));
2983 
2984 	return (0);
2985 }
2986 
2987 static int
free_iq_fl(struct vi_info * vi,struct sge_iq * iq,struct sge_fl * fl)2988 free_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl)
2989 {
2990 	int rc;
2991 	struct adapter *sc = iq->adapter;
2992 	device_t dev;
2993 
2994 	if (sc == NULL)
2995 		return (0);	/* nothing to do */
2996 
2997 	dev = vi ? vi->dev : sc->dev;
2998 
2999 	if (iq->flags & IQ_ALLOCATED) {
3000 		rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0,
3001 		    FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id,
3002 		    fl ? fl->cntxt_id : 0xffff, 0xffff);
3003 		if (rc != 0) {
3004 			device_printf(dev,
3005 			    "failed to free queue %p: %d\n", iq, rc);
3006 			return (rc);
3007 		}
3008 		iq->flags &= ~IQ_ALLOCATED;
3009 	}
3010 
3011 	free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc);
3012 
3013 	bzero(iq, sizeof(*iq));
3014 
3015 	if (fl) {
3016 		free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba,
3017 		    fl->desc);
3018 
3019 		if (fl->sdesc)
3020 			free_fl_sdesc(sc, fl);
3021 
3022 		if (mtx_initialized(&fl->fl_lock))
3023 			mtx_destroy(&fl->fl_lock);
3024 
3025 		bzero(fl, sizeof(*fl));
3026 	}
3027 
3028 	return (0);
3029 }
3030 
3031 static void
add_iq_sysctls(struct sysctl_ctx_list * ctx,struct sysctl_oid * oid,struct sge_iq * iq)3032 add_iq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid,
3033     struct sge_iq *iq)
3034 {
3035 	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
3036 
3037 	SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &iq->ba,
3038 	    "bus address of descriptor ring");
3039 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL,
3040 	    iq->qsize * IQ_ESIZE, "descriptor ring size in bytes");
3041 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id",
3042 	    CTLTYPE_INT | CTLFLAG_RD, &iq->abs_id, 0, sysctl_uint16, "I",
3043 	    "absolute id of the queue");
3044 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id",
3045 	    CTLTYPE_INT | CTLFLAG_RD, &iq->cntxt_id, 0, sysctl_uint16, "I",
3046 	    "SGE context id of the queue");
3047 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx",
3048 	    CTLTYPE_INT | CTLFLAG_RD, &iq->cidx, 0, sysctl_uint16, "I",
3049 	    "consumer index");
3050 }
3051 
3052 static void
add_fl_sysctls(struct adapter * sc,struct sysctl_ctx_list * ctx,struct sysctl_oid * oid,struct sge_fl * fl)3053 add_fl_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx,
3054     struct sysctl_oid *oid, struct sge_fl *fl)
3055 {
3056 	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
3057 
3058 	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL,
3059 	    "freelist");
3060 	children = SYSCTL_CHILDREN(oid);
3061 
3062 	SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD,
3063 	    &fl->ba, "bus address of descriptor ring");
3064 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL,
3065 	    fl->sidx * EQ_ESIZE + sc->params.sge.spg_len,
3066 	    "desc ring size in bytes");
3067 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id",
3068 	    CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I",
3069 	    "SGE context id of the freelist");
3070 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL,
3071 	    fl_pad ? 1 : 0, "padding enabled");
3072 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL,
3073 	    fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled");
3074 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx,
3075 	    0, "consumer index");
3076 	if (fl->flags & FL_BUF_PACKING) {
3077 		SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset",
3078 		    CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset");
3079 	}
3080 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx,
3081 	    0, "producer index");
3082 	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_allocated",
3083 	    CTLFLAG_RD, &fl->mbuf_allocated, "# of mbuf allocated");
3084 	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_inlined",
3085 	    CTLFLAG_RD, &fl->mbuf_inlined, "# of mbuf inlined in clusters");
3086 	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated",
3087 	    CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated");
3088 	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled",
3089 	    CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled");
3090 	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled",
3091 	    CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)");
3092 }
3093 
3094 static int
alloc_fwq(struct adapter * sc)3095 alloc_fwq(struct adapter *sc)
3096 {
3097 	int rc, intr_idx;
3098 	struct sge_iq *fwq = &sc->sge.fwq;
3099 	struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
3100 	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
3101 
3102 	init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE);
3103 	if (sc->flags & IS_VF)
3104 		intr_idx = 0;
3105 	else
3106 		intr_idx = sc->intr_count > 1 ? 1 : 0;
3107 	rc = alloc_iq_fl(&sc->port[0]->vi[0], fwq, NULL, intr_idx, -1);
3108 	if (rc != 0) {
3109 		device_printf(sc->dev,
3110 		    "failed to create firmware event queue: %d\n", rc);
3111 		return (rc);
3112 	}
3113 
3114 	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD,
3115 	    NULL, "firmware event queue");
3116 	add_iq_sysctls(&sc->ctx, oid, fwq);
3117 
3118 	return (0);
3119 }
3120 
3121 static int
free_fwq(struct adapter * sc)3122 free_fwq(struct adapter *sc)
3123 {
3124 	return free_iq_fl(NULL, &sc->sge.fwq, NULL);
3125 }
3126 
3127 static int
alloc_ctrlq(struct adapter * sc,struct sge_wrq * ctrlq,int idx,struct sysctl_oid * oid)3128 alloc_ctrlq(struct adapter *sc, struct sge_wrq *ctrlq, int idx,
3129     struct sysctl_oid *oid)
3130 {
3131 	int rc;
3132 	char name[16];
3133 	struct sysctl_oid_list *children;
3134 
3135 	snprintf(name, sizeof(name), "%s ctrlq%d", device_get_nameunit(sc->dev),
3136 	    idx);
3137 	init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[idx]->tx_chan,
3138 	    sc->sge.fwq.cntxt_id, name);
3139 
3140 	children = SYSCTL_CHILDREN(oid);
3141 	snprintf(name, sizeof(name), "%d", idx);
3142 	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD,
3143 	    NULL, "ctrl queue");
3144 	rc = alloc_wrq(sc, NULL, ctrlq, oid);
3145 
3146 	return (rc);
3147 }
3148 
3149 int
tnl_cong(struct port_info * pi,int drop)3150 tnl_cong(struct port_info *pi, int drop)
3151 {
3152 
3153 	if (drop == -1)
3154 		return (-1);
3155 	else if (drop == 1)
3156 		return (0);
3157 	else
3158 		return (pi->rx_e_chan_map);
3159 }
3160 
3161 static int
alloc_rxq(struct vi_info * vi,struct sge_rxq * rxq,int intr_idx,int idx,struct sysctl_oid * oid)3162 alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int intr_idx, int idx,
3163     struct sysctl_oid *oid)
3164 {
3165 	int rc;
3166 	struct adapter *sc = vi->pi->adapter;
3167 	struct sysctl_oid_list *children;
3168 	char name[16];
3169 
3170 	rc = alloc_iq_fl(vi, &rxq->iq, &rxq->fl, intr_idx,
3171 	    tnl_cong(vi->pi, cong_drop));
3172 	if (rc != 0)
3173 		return (rc);
3174 
3175 	if (idx == 0)
3176 		sc->sge.iq_base = rxq->iq.abs_id - rxq->iq.cntxt_id;
3177 	else
3178 		KASSERT(rxq->iq.cntxt_id + sc->sge.iq_base == rxq->iq.abs_id,
3179 		    ("iq_base mismatch"));
3180 	KASSERT(sc->sge.iq_base == 0 || sc->flags & IS_VF,
3181 	    ("PF with non-zero iq_base"));
3182 
3183 	/*
3184 	 * The freelist is just barely above the starvation threshold right now,
3185 	 * fill it up a bit more.
3186 	 */
3187 	FL_LOCK(&rxq->fl);
3188 	refill_fl(sc, &rxq->fl, 128);
3189 	FL_UNLOCK(&rxq->fl);
3190 
3191 #if defined(INET) || defined(INET6)
3192 	rc = tcp_lro_init_args(&rxq->lro, vi->ifp, lro_entries, lro_mbufs);
3193 	if (rc != 0)
3194 		return (rc);
3195 	MPASS(rxq->lro.ifp == vi->ifp);	/* also indicates LRO init'ed */
3196 
3197 	if (vi->ifp->if_capenable & IFCAP_LRO)
3198 		rxq->iq.flags |= IQ_LRO_ENABLED;
3199 #endif
3200 	rxq->ifp = vi->ifp;
3201 
3202 	children = SYSCTL_CHILDREN(oid);
3203 
3204 	snprintf(name, sizeof(name), "%d", idx);
3205 	oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
3206 	    NULL, "rx queue");
3207 	children = SYSCTL_CHILDREN(oid);
3208 
3209 	add_iq_sysctls(&vi->ctx, oid, &rxq->iq);
3210 #if defined(INET) || defined(INET6)
3211 	SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD,
3212 	    &rxq->lro.lro_queued, 0, NULL);
3213 	SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD,
3214 	    &rxq->lro.lro_flushed, 0, NULL);
3215 #endif
3216 	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD,
3217 	    &rxq->rxcsum, "# of times hardware assisted with checksum");
3218 	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_extraction",
3219 	    CTLFLAG_RD, &rxq->vlan_extraction,
3220 	    "# of times hardware extracted 802.1Q tag");
3221 
3222 	add_fl_sysctls(sc, &vi->ctx, oid, &rxq->fl);
3223 
3224 	return (rc);
3225 }
3226 
3227 static int
free_rxq(struct vi_info * vi,struct sge_rxq * rxq)3228 free_rxq(struct vi_info *vi, struct sge_rxq *rxq)
3229 {
3230 	int rc;
3231 
3232 #if defined(INET) || defined(INET6)
3233 	if (rxq->lro.ifp) {
3234 		tcp_lro_free(&rxq->lro);
3235 		rxq->lro.ifp = NULL;
3236 	}
3237 #endif
3238 
3239 	rc = free_iq_fl(vi, &rxq->iq, &rxq->fl);
3240 	if (rc == 0)
3241 		bzero(rxq, sizeof(*rxq));
3242 
3243 	return (rc);
3244 }
3245 
3246 #ifdef TCP_OFFLOAD
3247 static int
alloc_ofld_rxq(struct vi_info * vi,struct sge_ofld_rxq * ofld_rxq,int intr_idx,int idx,struct sysctl_oid * oid)3248 alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq,
3249     int intr_idx, int idx, struct sysctl_oid *oid)
3250 {
3251 	struct port_info *pi = vi->pi;
3252 	int rc;
3253 	struct sysctl_oid_list *children;
3254 	char name[16];
3255 
3256 	rc = alloc_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, 0);
3257 	if (rc != 0)
3258 		return (rc);
3259 
3260 	children = SYSCTL_CHILDREN(oid);
3261 
3262 	snprintf(name, sizeof(name), "%d", idx);
3263 	oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
3264 	    NULL, "rx queue");
3265 	add_iq_sysctls(&vi->ctx, oid, &ofld_rxq->iq);
3266 	add_fl_sysctls(pi->adapter, &vi->ctx, oid, &ofld_rxq->fl);
3267 
3268 	return (rc);
3269 }
3270 
3271 static int
free_ofld_rxq(struct vi_info * vi,struct sge_ofld_rxq * ofld_rxq)3272 free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq)
3273 {
3274 	int rc;
3275 
3276 	rc = free_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl);
3277 	if (rc == 0)
3278 		bzero(ofld_rxq, sizeof(*ofld_rxq));
3279 
3280 	return (rc);
3281 }
3282 #endif
3283 
3284 #ifdef DEV_NETMAP
3285 static int
alloc_nm_rxq(struct vi_info * vi,struct sge_nm_rxq * nm_rxq,int intr_idx,int idx,struct sysctl_oid * oid)3286 alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx,
3287     int idx, struct sysctl_oid *oid)
3288 {
3289 	int rc;
3290 	struct sysctl_oid_list *children;
3291 	struct sysctl_ctx_list *ctx;
3292 	char name[16];
3293 	size_t len;
3294 	struct adapter *sc = vi->pi->adapter;
3295 	struct netmap_adapter *na = NA(vi->ifp);
3296 
3297 	MPASS(na != NULL);
3298 
3299 	len = vi->qsize_rxq * IQ_ESIZE;
3300 	rc = alloc_ring(sc, len, &nm_rxq->iq_desc_tag, &nm_rxq->iq_desc_map,
3301 	    &nm_rxq->iq_ba, (void **)&nm_rxq->iq_desc);
3302 	if (rc != 0)
3303 		return (rc);
3304 
3305 	len = na->num_rx_desc * EQ_ESIZE + sc->params.sge.spg_len;
3306 	rc = alloc_ring(sc, len, &nm_rxq->fl_desc_tag, &nm_rxq->fl_desc_map,
3307 	    &nm_rxq->fl_ba, (void **)&nm_rxq->fl_desc);
3308 	if (rc != 0)
3309 		return (rc);
3310 
3311 	nm_rxq->vi = vi;
3312 	nm_rxq->nid = idx;
3313 	nm_rxq->iq_cidx = 0;
3314 	nm_rxq->iq_sidx = vi->qsize_rxq - sc->params.sge.spg_len / IQ_ESIZE;
3315 	nm_rxq->iq_gen = F_RSPD_GEN;
3316 	nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0;
3317 	nm_rxq->fl_sidx = na->num_rx_desc;
3318 	nm_rxq->intr_idx = intr_idx;
3319 
3320 	ctx = &vi->ctx;
3321 	children = SYSCTL_CHILDREN(oid);
3322 
3323 	snprintf(name, sizeof(name), "%d", idx);
3324 	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL,
3325 	    "rx queue");
3326 	children = SYSCTL_CHILDREN(oid);
3327 
3328 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id",
3329 	    CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_abs_id, 0, sysctl_uint16,
3330 	    "I", "absolute id of the queue");
3331 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id",
3332 	    CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cntxt_id, 0, sysctl_uint16,
3333 	    "I", "SGE context id of the queue");
3334 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx",
3335 	    CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cidx, 0, sysctl_uint16, "I",
3336 	    "consumer index");
3337 
3338 	children = SYSCTL_CHILDREN(oid);
3339 	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL,
3340 	    "freelist");
3341 	children = SYSCTL_CHILDREN(oid);
3342 
3343 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id",
3344 	    CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->fl_cntxt_id, 0, sysctl_uint16,
3345 	    "I", "SGE context id of the freelist");
3346 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
3347 	    &nm_rxq->fl_cidx, 0, "consumer index");
3348 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
3349 	    &nm_rxq->fl_pidx, 0, "producer index");
3350 
3351 	return (rc);
3352 }
3353 
3354 
3355 static int
free_nm_rxq(struct vi_info * vi,struct sge_nm_rxq * nm_rxq)3356 free_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq)
3357 {
3358 	struct adapter *sc = vi->pi->adapter;
3359 
3360 	free_ring(sc, nm_rxq->iq_desc_tag, nm_rxq->iq_desc_map, nm_rxq->iq_ba,
3361 	    nm_rxq->iq_desc);
3362 	free_ring(sc, nm_rxq->fl_desc_tag, nm_rxq->fl_desc_map, nm_rxq->fl_ba,
3363 	    nm_rxq->fl_desc);
3364 
3365 	return (0);
3366 }
3367 
3368 static int
alloc_nm_txq(struct vi_info * vi,struct sge_nm_txq * nm_txq,int iqidx,int idx,struct sysctl_oid * oid)3369 alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx,
3370     struct sysctl_oid *oid)
3371 {
3372 	int rc;
3373 	size_t len;
3374 	struct port_info *pi = vi->pi;
3375 	struct adapter *sc = pi->adapter;
3376 	struct netmap_adapter *na = NA(vi->ifp);
3377 	char name[16];
3378 	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
3379 
3380 	len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len;
3381 	rc = alloc_ring(sc, len, &nm_txq->desc_tag, &nm_txq->desc_map,
3382 	    &nm_txq->ba, (void **)&nm_txq->desc);
3383 	if (rc)
3384 		return (rc);
3385 
3386 	nm_txq->pidx = nm_txq->cidx = 0;
3387 	nm_txq->sidx = na->num_tx_desc;
3388 	nm_txq->nid = idx;
3389 	nm_txq->iqidx = iqidx;
3390 	nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
3391 	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) |
3392 	    V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld));
3393 
3394 	snprintf(name, sizeof(name), "%d", idx);
3395 	oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
3396 	    NULL, "netmap tx queue");
3397 	children = SYSCTL_CHILDREN(oid);
3398 
3399 	SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
3400 	    &nm_txq->cntxt_id, 0, "SGE context id of the queue");
3401 	SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx",
3402 	    CTLTYPE_INT | CTLFLAG_RD, &nm_txq->cidx, 0, sysctl_uint16, "I",
3403 	    "consumer index");
3404 	SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx",
3405 	    CTLTYPE_INT | CTLFLAG_RD, &nm_txq->pidx, 0, sysctl_uint16, "I",
3406 	    "producer index");
3407 
3408 	return (rc);
3409 }
3410 
3411 static int
free_nm_txq(struct vi_info * vi,struct sge_nm_txq * nm_txq)3412 free_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq)
3413 {
3414 	struct adapter *sc = vi->pi->adapter;
3415 
3416 	free_ring(sc, nm_txq->desc_tag, nm_txq->desc_map, nm_txq->ba,
3417 	    nm_txq->desc);
3418 
3419 	return (0);
3420 }
3421 #endif
3422 
3423 /*
3424  * Returns a reasonable automatic cidx flush threshold for a given queue size.
3425  */
3426 static u_int
qsize_to_fthresh(int qsize)3427 qsize_to_fthresh(int qsize)
3428 {
3429 	u_int fthresh;
3430 
3431 	while (!powerof2(qsize))
3432 		qsize++;
3433 	fthresh = ilog2(qsize);
3434 	if (fthresh > X_CIDXFLUSHTHRESH_128)
3435 		fthresh = X_CIDXFLUSHTHRESH_128;
3436 
3437 	return (fthresh);
3438 }
3439 
3440 static int
ctrl_eq_alloc(struct adapter * sc,struct sge_eq * eq)3441 ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq)
3442 {
3443 	int rc, cntxt_id;
3444 	struct fw_eq_ctrl_cmd c;
3445 	int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE;
3446 
3447 	bzero(&c, sizeof(c));
3448 
3449 	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST |
3450 	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) |
3451 	    V_FW_EQ_CTRL_CMD_VFN(0));
3452 	c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC |
3453 	    F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c));
3454 	c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid));
3455 	c.physeqid_pkd = htobe32(0);
3456 	c.fetchszm_to_iqid =
3457 	    htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
3458 		V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) |
3459 		F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid));
3460 	c.dcaen_to_eqsize =
3461 	    htobe32(V_FW_EQ_CTRL_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ?
3462 		X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) |
3463 		V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
3464 		V_FW_EQ_CTRL_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) |
3465 		V_FW_EQ_CTRL_CMD_EQSIZE(qsize));
3466 	c.eqaddr = htobe64(eq->ba);
3467 
3468 	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
3469 	if (rc != 0) {
3470 		device_printf(sc->dev,
3471 		    "failed to create control queue %d: %d\n", eq->tx_chan, rc);
3472 		return (rc);
3473 	}
3474 	eq->flags |= EQ_ALLOCATED;
3475 
3476 	eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid));
3477 	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
3478 	if (cntxt_id >= sc->sge.neq)
3479 	    panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
3480 		cntxt_id, sc->sge.neq - 1);
3481 	sc->sge.eqmap[cntxt_id] = eq;
3482 
3483 	return (rc);
3484 }
3485 
3486 static int
eth_eq_alloc(struct adapter * sc,struct vi_info * vi,struct sge_eq * eq)3487 eth_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq)
3488 {
3489 	int rc, cntxt_id;
3490 	struct fw_eq_eth_cmd c;
3491 	int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE;
3492 
3493 	bzero(&c, sizeof(c));
3494 
3495 	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
3496 	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
3497 	    V_FW_EQ_ETH_CMD_VFN(0));
3498 	c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
3499 	    F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
3500 	c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE |
3501 	    F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid));
3502 	c.fetchszm_to_iqid =
3503 	    htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) |
3504 		V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
3505 		V_FW_EQ_ETH_CMD_IQID(eq->iqid));
3506 	c.dcaen_to_eqsize =
3507 	    htobe32(V_FW_EQ_ETH_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ?
3508 		X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) |
3509 		V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
3510 		V_FW_EQ_ETH_CMD_EQSIZE(qsize));
3511 	c.eqaddr = htobe64(eq->ba);
3512 
3513 	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
3514 	if (rc != 0) {
3515 		device_printf(vi->dev,
3516 		    "failed to create Ethernet egress queue: %d\n", rc);
3517 		return (rc);
3518 	}
3519 	eq->flags |= EQ_ALLOCATED;
3520 
3521 	eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
3522 	eq->abs_id = G_FW_EQ_ETH_CMD_PHYSEQID(be32toh(c.physeqid_pkd));
3523 	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
3524 	if (cntxt_id >= sc->sge.neq)
3525 	    panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
3526 		cntxt_id, sc->sge.neq - 1);
3527 	sc->sge.eqmap[cntxt_id] = eq;
3528 
3529 	return (rc);
3530 }
3531 
3532 #ifdef TCP_OFFLOAD
3533 static int
ofld_eq_alloc(struct adapter * sc,struct vi_info * vi,struct sge_eq * eq)3534 ofld_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq)
3535 {
3536 	int rc, cntxt_id;
3537 	struct fw_eq_ofld_cmd c;
3538 	int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE;
3539 
3540 	bzero(&c, sizeof(c));
3541 
3542 	c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST |
3543 	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) |
3544 	    V_FW_EQ_OFLD_CMD_VFN(0));
3545 	c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC |
3546 	    F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c));
3547 	c.fetchszm_to_iqid =
3548 		htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
3549 		    V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) |
3550 		    F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid));
3551 	c.dcaen_to_eqsize =
3552 	    htobe32(V_FW_EQ_OFLD_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ?
3553 		X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) |
3554 		V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
3555 		V_FW_EQ_OFLD_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) |
3556 		V_FW_EQ_OFLD_CMD_EQSIZE(qsize));
3557 	c.eqaddr = htobe64(eq->ba);
3558 
3559 	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
3560 	if (rc != 0) {
3561 		device_printf(vi->dev,
3562 		    "failed to create egress queue for TCP offload: %d\n", rc);
3563 		return (rc);
3564 	}
3565 	eq->flags |= EQ_ALLOCATED;
3566 
3567 	eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd));
3568 	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
3569 	if (cntxt_id >= sc->sge.neq)
3570 	    panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
3571 		cntxt_id, sc->sge.neq - 1);
3572 	sc->sge.eqmap[cntxt_id] = eq;
3573 
3574 	return (rc);
3575 }
3576 #endif
3577 
3578 static int
alloc_eq(struct adapter * sc,struct vi_info * vi,struct sge_eq * eq)3579 alloc_eq(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq)
3580 {
3581 	int rc, qsize;
3582 	size_t len;
3583 
3584 	mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
3585 
3586 	qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE;
3587 	len = qsize * EQ_ESIZE;
3588 	rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
3589 	    &eq->ba, (void **)&eq->desc);
3590 	if (rc)
3591 		return (rc);
3592 
3593 	eq->pidx = eq->cidx = eq->dbidx = 0;
3594 	/* Note that equeqidx is not used with sge_wrq (OFLD/CTRL) queues. */
3595 	eq->equeqidx = 0;
3596 	eq->doorbells = sc->doorbells;
3597 
3598 	switch (eq->flags & EQ_TYPEMASK) {
3599 	case EQ_CTRL:
3600 		rc = ctrl_eq_alloc(sc, eq);
3601 		break;
3602 
3603 	case EQ_ETH:
3604 		rc = eth_eq_alloc(sc, vi, eq);
3605 		break;
3606 
3607 #ifdef TCP_OFFLOAD
3608 	case EQ_OFLD:
3609 		rc = ofld_eq_alloc(sc, vi, eq);
3610 		break;
3611 #endif
3612 
3613 	default:
3614 		panic("%s: invalid eq type %d.", __func__,
3615 		    eq->flags & EQ_TYPEMASK);
3616 	}
3617 	if (rc != 0) {
3618 		device_printf(sc->dev,
3619 		    "failed to allocate egress queue(%d): %d\n",
3620 		    eq->flags & EQ_TYPEMASK, rc);
3621 	}
3622 
3623 	if (isset(&eq->doorbells, DOORBELL_UDB) ||
3624 	    isset(&eq->doorbells, DOORBELL_UDBWC) ||
3625 	    isset(&eq->doorbells, DOORBELL_WCWR)) {
3626 		uint32_t s_qpp = sc->params.sge.eq_s_qpp;
3627 		uint32_t mask = (1 << s_qpp) - 1;
3628 		volatile uint8_t *udb;
3629 
3630 		udb = sc->udbs_base + UDBS_DB_OFFSET;
3631 		udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT;	/* pg offset */
3632 		eq->udb_qid = eq->cntxt_id & mask;		/* id in page */
3633 		if (eq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE)
3634 	    		clrbit(&eq->doorbells, DOORBELL_WCWR);
3635 		else {
3636 			udb += eq->udb_qid << UDBS_SEG_SHIFT;	/* seg offset */
3637 			eq->udb_qid = 0;
3638 		}
3639 		eq->udb = (volatile void *)udb;
3640 	}
3641 
3642 	return (rc);
3643 }
3644 
3645 static int
free_eq(struct adapter * sc,struct sge_eq * eq)3646 free_eq(struct adapter *sc, struct sge_eq *eq)
3647 {
3648 	int rc;
3649 
3650 	if (eq->flags & EQ_ALLOCATED) {
3651 		switch (eq->flags & EQ_TYPEMASK) {
3652 		case EQ_CTRL:
3653 			rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0,
3654 			    eq->cntxt_id);
3655 			break;
3656 
3657 		case EQ_ETH:
3658 			rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0,
3659 			    eq->cntxt_id);
3660 			break;
3661 
3662 #ifdef TCP_OFFLOAD
3663 		case EQ_OFLD:
3664 			rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0,
3665 			    eq->cntxt_id);
3666 			break;
3667 #endif
3668 
3669 		default:
3670 			panic("%s: invalid eq type %d.", __func__,
3671 			    eq->flags & EQ_TYPEMASK);
3672 		}
3673 		if (rc != 0) {
3674 			device_printf(sc->dev,
3675 			    "failed to free egress queue (%d): %d\n",
3676 			    eq->flags & EQ_TYPEMASK, rc);
3677 			return (rc);
3678 		}
3679 		eq->flags &= ~EQ_ALLOCATED;
3680 	}
3681 
3682 	free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
3683 
3684 	if (mtx_initialized(&eq->eq_lock))
3685 		mtx_destroy(&eq->eq_lock);
3686 
3687 	bzero(eq, sizeof(*eq));
3688 	return (0);
3689 }
3690 
3691 static int
alloc_wrq(struct adapter * sc,struct vi_info * vi,struct sge_wrq * wrq,struct sysctl_oid * oid)3692 alloc_wrq(struct adapter *sc, struct vi_info *vi, struct sge_wrq *wrq,
3693     struct sysctl_oid *oid)
3694 {
3695 	int rc;
3696 	struct sysctl_ctx_list *ctx = vi ? &vi->ctx : &sc->ctx;
3697 	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
3698 
3699 	rc = alloc_eq(sc, vi, &wrq->eq);
3700 	if (rc)
3701 		return (rc);
3702 
3703 	wrq->adapter = sc;
3704 	TASK_INIT(&wrq->wrq_tx_task, 0, wrq_tx_drain, wrq);
3705 	TAILQ_INIT(&wrq->incomplete_wrs);
3706 	STAILQ_INIT(&wrq->wr_list);
3707 	wrq->nwr_pending = 0;
3708 	wrq->ndesc_needed = 0;
3709 
3710 	SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD,
3711 	    &wrq->eq.ba, "bus address of descriptor ring");
3712 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL,
3713 	    wrq->eq.sidx * EQ_ESIZE + sc->params.sge.spg_len,
3714 	    "desc ring size in bytes");
3715 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
3716 	    &wrq->eq.cntxt_id, 0, "SGE context id of the queue");
3717 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx",
3718 	    CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I",
3719 	    "consumer index");
3720 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx",
3721 	    CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I",
3722 	    "producer index");
3723 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL,
3724 	    wrq->eq.sidx, "status page index");
3725 	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_direct", CTLFLAG_RD,
3726 	    &wrq->tx_wrs_direct, "# of work requests (direct)");
3727 	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_copied", CTLFLAG_RD,
3728 	    &wrq->tx_wrs_copied, "# of work requests (copied)");
3729 	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_sspace", CTLFLAG_RD,
3730 	    &wrq->tx_wrs_ss, "# of work requests (copied from scratch space)");
3731 
3732 	return (rc);
3733 }
3734 
3735 static int
free_wrq(struct adapter * sc,struct sge_wrq * wrq)3736 free_wrq(struct adapter *sc, struct sge_wrq *wrq)
3737 {
3738 	int rc;
3739 
3740 	rc = free_eq(sc, &wrq->eq);
3741 	if (rc)
3742 		return (rc);
3743 
3744 	bzero(wrq, sizeof(*wrq));
3745 	return (0);
3746 }
3747 
3748 static int
alloc_txq(struct vi_info * vi,struct sge_txq * txq,int idx,struct sysctl_oid * oid)3749 alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx,
3750     struct sysctl_oid *oid)
3751 {
3752 	int rc;
3753 	struct port_info *pi = vi->pi;
3754 	struct adapter *sc = pi->adapter;
3755 	struct sge_eq *eq = &txq->eq;
3756 	char name[16];
3757 	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
3758 
3759 	rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx, can_resume_eth_tx,
3760 	    M_CXGBE, M_WAITOK);
3761 	if (rc != 0) {
3762 		device_printf(sc->dev, "failed to allocate mp_ring: %d\n", rc);
3763 		return (rc);
3764 	}
3765 
3766 	rc = alloc_eq(sc, vi, eq);
3767 	if (rc != 0) {
3768 		mp_ring_free(txq->r);
3769 		txq->r = NULL;
3770 		return (rc);
3771 	}
3772 
3773 	/* Can't fail after this point. */
3774 
3775 	if (idx == 0)
3776 		sc->sge.eq_base = eq->abs_id - eq->cntxt_id;
3777 	else
3778 		KASSERT(eq->cntxt_id + sc->sge.eq_base == eq->abs_id,
3779 		    ("eq_base mismatch"));
3780 	KASSERT(sc->sge.eq_base == 0 || sc->flags & IS_VF,
3781 	    ("PF with non-zero eq_base"));
3782 
3783 	TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq);
3784 	txq->ifp = vi->ifp;
3785 	txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK);
3786 	if (sc->flags & IS_VF)
3787 		txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
3788 		    V_TXPKT_INTF(pi->tx_chan));
3789 	else
3790 		txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
3791 		    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) |
3792 		    V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld));
3793 	txq->tc_idx = -1;
3794 	txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE,
3795 	    M_ZERO | M_WAITOK);
3796 
3797 	snprintf(name, sizeof(name), "%d", idx);
3798 	oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
3799 	    NULL, "tx queue");
3800 	children = SYSCTL_CHILDREN(oid);
3801 
3802 	SYSCTL_ADD_UAUTO(&vi->ctx, children, OID_AUTO, "ba", CTLFLAG_RD,
3803 	    &eq->ba, "bus address of descriptor ring");
3804 	SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL,
3805 	    eq->sidx * EQ_ESIZE + sc->params.sge.spg_len,
3806 	    "desc ring size in bytes");
3807 	SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD,
3808 	    &eq->abs_id, 0, "absolute id of the queue");
3809 	SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
3810 	    &eq->cntxt_id, 0, "SGE context id of the queue");
3811 	SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx",
3812 	    CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I",
3813 	    "consumer index");
3814 	SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx",
3815 	    CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I",
3816 	    "producer index");
3817 	SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL,
3818 	    eq->sidx, "status page index");
3819 
3820 	SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "tc",
3821 	    CTLTYPE_INT | CTLFLAG_RW, vi, idx, sysctl_tc, "I",
3822 	    "traffic class (-1 means none)");
3823 
3824 	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD,
3825 	    &txq->txcsum, "# of times hardware assisted with checksum");
3826 	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_insertion",
3827 	    CTLFLAG_RD, &txq->vlan_insertion,
3828 	    "# of times hardware inserted 802.1Q tag");
3829 	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD,
3830 	    &txq->tso_wrs, "# of TSO work requests");
3831 	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD,
3832 	    &txq->imm_wrs, "# of work requests with immediate data");
3833 	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD,
3834 	    &txq->sgl_wrs, "# of work requests with direct SGL");
3835 	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD,
3836 	    &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)");
3837 	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_wrs",
3838 	    CTLFLAG_RD, &txq->txpkts0_wrs,
3839 	    "# of txpkts (type 0) work requests");
3840 	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_wrs",
3841 	    CTLFLAG_RD, &txq->txpkts1_wrs,
3842 	    "# of txpkts (type 1) work requests");
3843 	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_pkts",
3844 	    CTLFLAG_RD, &txq->txpkts0_pkts,
3845 	    "# of frames tx'd using type0 txpkts work requests");
3846 	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_pkts",
3847 	    CTLFLAG_RD, &txq->txpkts1_pkts,
3848 	    "# of frames tx'd using type1 txpkts work requests");
3849 
3850 	SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_enqueues",
3851 	    CTLFLAG_RD, &txq->r->enqueues,
3852 	    "# of enqueues to the mp_ring for this queue");
3853 	SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_drops",
3854 	    CTLFLAG_RD, &txq->r->drops,
3855 	    "# of drops in the mp_ring for this queue");
3856 	SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_starts",
3857 	    CTLFLAG_RD, &txq->r->starts,
3858 	    "# of normal consumer starts in the mp_ring for this queue");
3859 	SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_stalls",
3860 	    CTLFLAG_RD, &txq->r->stalls,
3861 	    "# of consumer stalls in the mp_ring for this queue");
3862 	SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_restarts",
3863 	    CTLFLAG_RD, &txq->r->restarts,
3864 	    "# of consumer restarts in the mp_ring for this queue");
3865 	SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_abdications",
3866 	    CTLFLAG_RD, &txq->r->abdications,
3867 	    "# of consumer abdications in the mp_ring for this queue");
3868 
3869 	return (0);
3870 }
3871 
3872 static int
free_txq(struct vi_info * vi,struct sge_txq * txq)3873 free_txq(struct vi_info *vi, struct sge_txq *txq)
3874 {
3875 	int rc;
3876 	struct adapter *sc = vi->pi->adapter;
3877 	struct sge_eq *eq = &txq->eq;
3878 
3879 	rc = free_eq(sc, eq);
3880 	if (rc)
3881 		return (rc);
3882 
3883 	sglist_free(txq->gl);
3884 	free(txq->sdesc, M_CXGBE);
3885 	mp_ring_free(txq->r);
3886 
3887 	bzero(txq, sizeof(*txq));
3888 	return (0);
3889 }
3890 
3891 static void
oneseg_dma_callback(void * arg,bus_dma_segment_t * segs,int nseg,int error)3892 oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3893 {
3894 	bus_addr_t *ba = arg;
3895 
3896 	KASSERT(nseg == 1,
3897 	    ("%s meant for single segment mappings only.", __func__));
3898 
3899 	*ba = error ? 0 : segs->ds_addr;
3900 }
3901 
3902 static inline void
ring_fl_db(struct adapter * sc,struct sge_fl * fl)3903 ring_fl_db(struct adapter *sc, struct sge_fl *fl)
3904 {
3905 	uint32_t n, v;
3906 
3907 	n = IDXDIFF(fl->pidx / 8, fl->dbidx, fl->sidx);
3908 	MPASS(n > 0);
3909 
3910 	wmb();
3911 	v = fl->dbval | V_PIDX(n);
3912 	if (fl->udb)
3913 		*fl->udb = htole32(v);
3914 	else
3915 		t4_write_reg(sc, sc->sge_kdoorbell_reg, v);
3916 	IDXINCR(fl->dbidx, n, fl->sidx);
3917 }
3918 
3919 /*
3920  * Fills up the freelist by allocating up to 'n' buffers.  Buffers that are
3921  * recycled do not count towards this allocation budget.
3922  *
3923  * Returns non-zero to indicate that this freelist should be added to the list
3924  * of starving freelists.
3925  */
3926 static int
refill_fl(struct adapter * sc,struct sge_fl * fl,int n)3927 refill_fl(struct adapter *sc, struct sge_fl *fl, int n)
3928 {
3929 	__be64 *d;
3930 	struct fl_sdesc *sd;
3931 	uintptr_t pa;
3932 	caddr_t cl;
3933 	struct cluster_layout *cll;
3934 	struct sw_zone_info *swz;
3935 	struct cluster_metadata *clm;
3936 	uint16_t max_pidx;
3937 	uint16_t hw_cidx = fl->hw_cidx;		/* stable snapshot */
3938 
3939 	FL_LOCK_ASSERT_OWNED(fl);
3940 
3941 	/*
3942 	 * We always stop at the beginning of the hardware descriptor that's just
3943 	 * before the one with the hw cidx.  This is to avoid hw pidx = hw cidx,
3944 	 * which would mean an empty freelist to the chip.
3945 	 */
3946 	max_pidx = __predict_false(hw_cidx == 0) ? fl->sidx - 1 : hw_cidx - 1;
3947 	if (fl->pidx == max_pidx * 8)
3948 		return (0);
3949 
3950 	d = &fl->desc[fl->pidx];
3951 	sd = &fl->sdesc[fl->pidx];
3952 	cll = &fl->cll_def;	/* default layout */
3953 	swz = &sc->sge.sw_zone_info[cll->zidx];
3954 
3955 	while (n > 0) {
3956 
3957 		if (sd->cl != NULL) {
3958 
3959 			if (sd->nmbuf == 0) {
3960 				/*
3961 				 * Fast recycle without involving any atomics on
3962 				 * the cluster's metadata (if the cluster has
3963 				 * metadata).  This happens when all frames
3964 				 * received in the cluster were small enough to
3965 				 * fit within a single mbuf each.
3966 				 */
3967 				fl->cl_fast_recycled++;
3968 #ifdef INVARIANTS
3969 				clm = cl_metadata(sc, fl, &sd->cll, sd->cl);
3970 				if (clm != NULL)
3971 					MPASS(clm->refcount == 1);
3972 #endif
3973 				goto recycled_fast;
3974 			}
3975 
3976 			/*
3977 			 * Cluster is guaranteed to have metadata.  Clusters
3978 			 * without metadata always take the fast recycle path
3979 			 * when they're recycled.
3980 			 */
3981 			clm = cl_metadata(sc, fl, &sd->cll, sd->cl);
3982 			MPASS(clm != NULL);
3983 
3984 			if (atomic_fetchadd_int(&clm->refcount, -1) == 1) {
3985 				fl->cl_recycled++;
3986 				counter_u64_add(extfree_rels, 1);
3987 				goto recycled;
3988 			}
3989 			sd->cl = NULL;	/* gave up my reference */
3990 		}
3991 		MPASS(sd->cl == NULL);
3992 alloc:
3993 		cl = uma_zalloc(swz->zone, M_NOWAIT);
3994 		if (__predict_false(cl == NULL)) {
3995 			if (cll == &fl->cll_alt || fl->cll_alt.zidx == -1 ||
3996 			    fl->cll_def.zidx == fl->cll_alt.zidx)
3997 				break;
3998 
3999 			/* fall back to the safe zone */
4000 			cll = &fl->cll_alt;
4001 			swz = &sc->sge.sw_zone_info[cll->zidx];
4002 			goto alloc;
4003 		}
4004 		fl->cl_allocated++;
4005 		n--;
4006 
4007 		pa = pmap_kextract((vm_offset_t)cl);
4008 		pa += cll->region1;
4009 		sd->cl = cl;
4010 		sd->cll = *cll;
4011 		*d = htobe64(pa | cll->hwidx);
4012 		clm = cl_metadata(sc, fl, cll, cl);
4013 		if (clm != NULL) {
4014 recycled:
4015 #ifdef INVARIANTS
4016 			clm->sd = sd;
4017 #endif
4018 			clm->refcount = 1;
4019 		}
4020 		sd->nmbuf = 0;
4021 recycled_fast:
4022 		d++;
4023 		sd++;
4024 		if (__predict_false(++fl->pidx % 8 == 0)) {
4025 			uint16_t pidx = fl->pidx / 8;
4026 
4027 			if (__predict_false(pidx == fl->sidx)) {
4028 				fl->pidx = 0;
4029 				pidx = 0;
4030 				sd = fl->sdesc;
4031 				d = fl->desc;
4032 			}
4033 			if (pidx == max_pidx)
4034 				break;
4035 
4036 			if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4)
4037 				ring_fl_db(sc, fl);
4038 		}
4039 	}
4040 
4041 	if (fl->pidx / 8 != fl->dbidx)
4042 		ring_fl_db(sc, fl);
4043 
4044 	return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING));
4045 }
4046 
4047 /*
4048  * Attempt to refill all starving freelists.
4049  */
4050 static void
refill_sfl(void * arg)4051 refill_sfl(void *arg)
4052 {
4053 	struct adapter *sc = arg;
4054 	struct sge_fl *fl, *fl_temp;
4055 
4056 	mtx_assert(&sc->sfl_lock, MA_OWNED);
4057 	TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) {
4058 		FL_LOCK(fl);
4059 		refill_fl(sc, fl, 64);
4060 		if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) {
4061 			TAILQ_REMOVE(&sc->sfl, fl, link);
4062 			fl->flags &= ~FL_STARVING;
4063 		}
4064 		FL_UNLOCK(fl);
4065 	}
4066 
4067 	if (!TAILQ_EMPTY(&sc->sfl))
4068 		callout_schedule(&sc->sfl_callout, hz / 5);
4069 }
4070 
4071 static int
alloc_fl_sdesc(struct sge_fl * fl)4072 alloc_fl_sdesc(struct sge_fl *fl)
4073 {
4074 
4075 	fl->sdesc = malloc(fl->sidx * 8 * sizeof(struct fl_sdesc), M_CXGBE,
4076 	    M_ZERO | M_WAITOK);
4077 
4078 	return (0);
4079 }
4080 
4081 static void
free_fl_sdesc(struct adapter * sc,struct sge_fl * fl)4082 free_fl_sdesc(struct adapter *sc, struct sge_fl *fl)
4083 {
4084 	struct fl_sdesc *sd;
4085 	struct cluster_metadata *clm;
4086 	struct cluster_layout *cll;
4087 	int i;
4088 
4089 	sd = fl->sdesc;
4090 	for (i = 0; i < fl->sidx * 8; i++, sd++) {
4091 		if (sd->cl == NULL)
4092 			continue;
4093 
4094 		cll = &sd->cll;
4095 		clm = cl_metadata(sc, fl, cll, sd->cl);
4096 		if (sd->nmbuf == 0)
4097 			uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl);
4098 		else if (clm && atomic_fetchadd_int(&clm->refcount, -1) == 1) {
4099 			uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl);
4100 			counter_u64_add(extfree_rels, 1);
4101 		}
4102 		sd->cl = NULL;
4103 	}
4104 
4105 	free(fl->sdesc, M_CXGBE);
4106 	fl->sdesc = NULL;
4107 }
4108 
4109 static inline void
get_pkt_gl(struct mbuf * m,struct sglist * gl)4110 get_pkt_gl(struct mbuf *m, struct sglist *gl)
4111 {
4112 	int rc;
4113 
4114 	M_ASSERTPKTHDR(m);
4115 
4116 	sglist_reset(gl);
4117 	rc = sglist_append_mbuf(gl, m);
4118 	if (__predict_false(rc != 0)) {
4119 		panic("%s: mbuf %p (%d segs) was vetted earlier but now fails "
4120 		    "with %d.", __func__, m, mbuf_nsegs(m), rc);
4121 	}
4122 
4123 	KASSERT(gl->sg_nseg == mbuf_nsegs(m),
4124 	    ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m,
4125 	    mbuf_nsegs(m), gl->sg_nseg));
4126 	KASSERT(gl->sg_nseg > 0 &&
4127 	    gl->sg_nseg <= (needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS),
4128 	    ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__,
4129 		gl->sg_nseg, needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS));
4130 }
4131 
4132 /*
4133  * len16 for a txpkt WR with a GL.  Includes the firmware work request header.
4134  */
4135 static inline u_int
txpkt_len16(u_int nsegs,u_int tso)4136 txpkt_len16(u_int nsegs, u_int tso)
4137 {
4138 	u_int n;
4139 
4140 	MPASS(nsegs > 0);
4141 
4142 	nsegs--; /* first segment is part of ulptx_sgl */
4143 	n = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) +
4144 	    sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1));
4145 	if (tso)
4146 		n += sizeof(struct cpl_tx_pkt_lso_core);
4147 
4148 	return (howmany(n, 16));
4149 }
4150 
4151 /*
4152  * len16 for a txpkt_vm WR with a GL.  Includes the firmware work
4153  * request header.
4154  */
4155 static inline u_int
txpkt_vm_len16(u_int nsegs,u_int tso)4156 txpkt_vm_len16(u_int nsegs, u_int tso)
4157 {
4158 	u_int n;
4159 
4160 	MPASS(nsegs > 0);
4161 
4162 	nsegs--; /* first segment is part of ulptx_sgl */
4163 	n = sizeof(struct fw_eth_tx_pkt_vm_wr) +
4164 	    sizeof(struct cpl_tx_pkt_core) +
4165 	    sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1));
4166 	if (tso)
4167 		n += sizeof(struct cpl_tx_pkt_lso_core);
4168 
4169 	return (howmany(n, 16));
4170 }
4171 
4172 /*
4173  * len16 for a txpkts type 0 WR with a GL.  Does not include the firmware work
4174  * request header.
4175  */
4176 static inline u_int
txpkts0_len16(u_int nsegs)4177 txpkts0_len16(u_int nsegs)
4178 {
4179 	u_int n;
4180 
4181 	MPASS(nsegs > 0);
4182 
4183 	nsegs--; /* first segment is part of ulptx_sgl */
4184 	n = sizeof(struct ulp_txpkt) + sizeof(struct ulptx_idata) +
4185 	    sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) +
4186 	    8 * ((3 * nsegs) / 2 + (nsegs & 1));
4187 
4188 	return (howmany(n, 16));
4189 }
4190 
4191 /*
4192  * len16 for a txpkts type 1 WR with a GL.  Does not include the firmware work
4193  * request header.
4194  */
4195 static inline u_int
txpkts1_len16(void)4196 txpkts1_len16(void)
4197 {
4198 	u_int n;
4199 
4200 	n = sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl);
4201 
4202 	return (howmany(n, 16));
4203 }
4204 
4205 static inline u_int
imm_payload(u_int ndesc)4206 imm_payload(u_int ndesc)
4207 {
4208 	u_int n;
4209 
4210 	n = ndesc * EQ_ESIZE - sizeof(struct fw_eth_tx_pkt_wr) -
4211 	    sizeof(struct cpl_tx_pkt_core);
4212 
4213 	return (n);
4214 }
4215 
4216 /*
4217  * Write a VM txpkt WR for this packet to the hardware descriptors, update the
4218  * software descriptor, and advance the pidx.  It is guaranteed that enough
4219  * descriptors are available.
4220  *
4221  * The return value is the # of hardware descriptors used.
4222  */
4223 static u_int
write_txpkt_vm_wr(struct adapter * sc,struct sge_txq * txq,struct fw_eth_tx_pkt_vm_wr * wr,struct mbuf * m0,u_int available)4224 write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq,
4225     struct fw_eth_tx_pkt_vm_wr *wr, struct mbuf *m0, u_int available)
4226 {
4227 	struct sge_eq *eq = &txq->eq;
4228 	struct tx_sdesc *txsd;
4229 	struct cpl_tx_pkt_core *cpl;
4230 	uint32_t ctrl;	/* used in many unrelated places */
4231 	uint64_t ctrl1;
4232 	int csum_type, len16, ndesc, pktlen, nsegs;
4233 	caddr_t dst;
4234 
4235 	TXQ_LOCK_ASSERT_OWNED(txq);
4236 	M_ASSERTPKTHDR(m0);
4237 	MPASS(available > 0 && available < eq->sidx);
4238 
4239 	len16 = mbuf_len16(m0);
4240 	nsegs = mbuf_nsegs(m0);
4241 	pktlen = m0->m_pkthdr.len;
4242 	ctrl = sizeof(struct cpl_tx_pkt_core);
4243 	if (needs_tso(m0))
4244 		ctrl += sizeof(struct cpl_tx_pkt_lso_core);
4245 	ndesc = howmany(len16, EQ_ESIZE / 16);
4246 	MPASS(ndesc <= available);
4247 
4248 	/* Firmware work request header */
4249 	MPASS(wr == (void *)&eq->desc[eq->pidx]);
4250 	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_VM_WR) |
4251 	    V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
4252 
4253 	ctrl = V_FW_WR_LEN16(len16);
4254 	wr->equiq_to_len16 = htobe32(ctrl);
4255 	wr->r3[0] = 0;
4256 	wr->r3[1] = 0;
4257 
4258 	/*
4259 	 * Copy over ethmacdst, ethmacsrc, ethtype, and vlantci.
4260 	 * vlantci is ignored unless the ethtype is 0x8100, so it's
4261 	 * simpler to always copy it rather than making it
4262 	 * conditional.  Also, it seems that we do not have to set
4263 	 * vlantci or fake the ethtype when doing VLAN tag insertion.
4264 	 */
4265 	m_copydata(m0, 0, sizeof(struct ether_header) + 2, wr->ethmacdst);
4266 
4267 	csum_type = -1;
4268 	if (needs_tso(m0)) {
4269 		struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
4270 
4271 		KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
4272 		    m0->m_pkthdr.l4hlen > 0,
4273 		    ("%s: mbuf %p needs TSO but missing header lengths",
4274 			__func__, m0));
4275 
4276 		ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
4277 		    F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2)
4278 		    | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2);
4279 		if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header))
4280 			ctrl |= V_LSO_ETHHDR_LEN(1);
4281 		if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
4282 			ctrl |= F_LSO_IPV6;
4283 
4284 		lso->lso_ctrl = htobe32(ctrl);
4285 		lso->ipid_ofst = htobe16(0);
4286 		lso->mss = htobe16(m0->m_pkthdr.tso_segsz);
4287 		lso->seqno_offset = htobe32(0);
4288 		lso->len = htobe32(pktlen);
4289 
4290 		if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
4291 			csum_type = TX_CSUM_TCPIP6;
4292 		else
4293 			csum_type = TX_CSUM_TCPIP;
4294 
4295 		cpl = (void *)(lso + 1);
4296 
4297 		txq->tso_wrs++;
4298 	} else {
4299 		if (m0->m_pkthdr.csum_flags & CSUM_IP_TCP)
4300 			csum_type = TX_CSUM_TCPIP;
4301 		else if (m0->m_pkthdr.csum_flags & CSUM_IP_UDP)
4302 			csum_type = TX_CSUM_UDPIP;
4303 		else if (m0->m_pkthdr.csum_flags & CSUM_IP6_TCP)
4304 			csum_type = TX_CSUM_TCPIP6;
4305 		else if (m0->m_pkthdr.csum_flags & CSUM_IP6_UDP)
4306 			csum_type = TX_CSUM_UDPIP6;
4307 #if defined(INET)
4308 		else if (m0->m_pkthdr.csum_flags & CSUM_IP) {
4309 			/*
4310 			 * XXX: The firmware appears to stomp on the
4311 			 * fragment/flags field of the IP header when
4312 			 * using TX_CSUM_IP.  Fall back to doing
4313 			 * software checksums.
4314 			 */
4315 			u_short *sump;
4316 			struct mbuf *m;
4317 			int offset;
4318 
4319 			m = m0;
4320 			offset = 0;
4321 			sump = m_advance(&m, &offset, m0->m_pkthdr.l2hlen +
4322 			    offsetof(struct ip, ip_sum));
4323 			*sump = in_cksum_skip(m0, m0->m_pkthdr.l2hlen +
4324 			    m0->m_pkthdr.l3hlen, m0->m_pkthdr.l2hlen);
4325 			m0->m_pkthdr.csum_flags &= ~CSUM_IP;
4326 		}
4327 #endif
4328 
4329 		cpl = (void *)(wr + 1);
4330 	}
4331 
4332 	/* Checksum offload */
4333 	ctrl1 = 0;
4334 	if (needs_l3_csum(m0) == 0)
4335 		ctrl1 |= F_TXPKT_IPCSUM_DIS;
4336 	if (csum_type >= 0) {
4337 		KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0,
4338 	    ("%s: mbuf %p needs checksum offload but missing header lengths",
4339 			__func__, m0));
4340 
4341 		if (chip_id(sc) <= CHELSIO_T5) {
4342 			ctrl1 |= V_TXPKT_ETHHDR_LEN(m0->m_pkthdr.l2hlen -
4343 			    ETHER_HDR_LEN);
4344 		} else {
4345 			ctrl1 |= V_T6_TXPKT_ETHHDR_LEN(m0->m_pkthdr.l2hlen -
4346 			    ETHER_HDR_LEN);
4347 		}
4348 		ctrl1 |= V_TXPKT_IPHDR_LEN(m0->m_pkthdr.l3hlen);
4349 		ctrl1 |= V_TXPKT_CSUM_TYPE(csum_type);
4350 	} else
4351 		ctrl1 |= F_TXPKT_L4CSUM_DIS;
4352 	if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
4353 	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
4354 		txq->txcsum++;	/* some hardware assistance provided */
4355 
4356 	/* VLAN tag insertion */
4357 	if (needs_vlan_insertion(m0)) {
4358 		ctrl1 |= F_TXPKT_VLAN_VLD |
4359 		    V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
4360 		txq->vlan_insertion++;
4361 	}
4362 
4363 	/* CPL header */
4364 	cpl->ctrl0 = txq->cpl_ctrl0;
4365 	cpl->pack = 0;
4366 	cpl->len = htobe16(pktlen);
4367 	cpl->ctrl1 = htobe64(ctrl1);
4368 
4369 	/* SGL */
4370 	dst = (void *)(cpl + 1);
4371 
4372 	/*
4373 	 * A packet using TSO will use up an entire descriptor for the
4374 	 * firmware work request header, LSO CPL, and TX_PKT_XT CPL.
4375 	 * If this descriptor is the last descriptor in the ring, wrap
4376 	 * around to the front of the ring explicitly for the start of
4377 	 * the sgl.
4378 	 */
4379 	if (dst == (void *)&eq->desc[eq->sidx]) {
4380 		dst = (void *)&eq->desc[0];
4381 		write_gl_to_txd(txq, m0, &dst, 0);
4382 	} else
4383 		write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx);
4384 	txq->sgl_wrs++;
4385 
4386 	txq->txpkt_wrs++;
4387 
4388 	txsd = &txq->sdesc[eq->pidx];
4389 	txsd->m = m0;
4390 	txsd->desc_used = ndesc;
4391 
4392 	return (ndesc);
4393 }
4394 
4395 /*
4396  * Write a txpkt WR for this packet to the hardware descriptors, update the
4397  * software descriptor, and advance the pidx.  It is guaranteed that enough
4398  * descriptors are available.
4399  *
4400  * The return value is the # of hardware descriptors used.
4401  */
4402 static u_int
write_txpkt_wr(struct sge_txq * txq,struct fw_eth_tx_pkt_wr * wr,struct mbuf * m0,u_int available)4403 write_txpkt_wr(struct sge_txq *txq, struct fw_eth_tx_pkt_wr *wr,
4404     struct mbuf *m0, u_int available)
4405 {
4406 	struct sge_eq *eq = &txq->eq;
4407 	struct tx_sdesc *txsd;
4408 	struct cpl_tx_pkt_core *cpl;
4409 	uint32_t ctrl;	/* used in many unrelated places */
4410 	uint64_t ctrl1;
4411 	int len16, ndesc, pktlen, nsegs;
4412 	caddr_t dst;
4413 
4414 	TXQ_LOCK_ASSERT_OWNED(txq);
4415 	M_ASSERTPKTHDR(m0);
4416 	MPASS(available > 0 && available < eq->sidx);
4417 
4418 	len16 = mbuf_len16(m0);
4419 	nsegs = mbuf_nsegs(m0);
4420 	pktlen = m0->m_pkthdr.len;
4421 	ctrl = sizeof(struct cpl_tx_pkt_core);
4422 	if (needs_tso(m0))
4423 		ctrl += sizeof(struct cpl_tx_pkt_lso_core);
4424 	else if (pktlen <= imm_payload(2) && available >= 2) {
4425 		/* Immediate data.  Recalculate len16 and set nsegs to 0. */
4426 		ctrl += pktlen;
4427 		len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) +
4428 		    sizeof(struct cpl_tx_pkt_core) + pktlen, 16);
4429 		nsegs = 0;
4430 	}
4431 	ndesc = howmany(len16, EQ_ESIZE / 16);
4432 	MPASS(ndesc <= available);
4433 
4434 	/* Firmware work request header */
4435 	MPASS(wr == (void *)&eq->desc[eq->pidx]);
4436 	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
4437 	    V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
4438 
4439 	ctrl = V_FW_WR_LEN16(len16);
4440 	wr->equiq_to_len16 = htobe32(ctrl);
4441 	wr->r3 = 0;
4442 
4443 	if (needs_tso(m0)) {
4444 		struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
4445 
4446 		KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
4447 		    m0->m_pkthdr.l4hlen > 0,
4448 		    ("%s: mbuf %p needs TSO but missing header lengths",
4449 			__func__, m0));
4450 
4451 		ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
4452 		    F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2)
4453 		    | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2);
4454 		if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header))
4455 			ctrl |= V_LSO_ETHHDR_LEN(1);
4456 		if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
4457 			ctrl |= F_LSO_IPV6;
4458 
4459 		lso->lso_ctrl = htobe32(ctrl);
4460 		lso->ipid_ofst = htobe16(0);
4461 		lso->mss = htobe16(m0->m_pkthdr.tso_segsz);
4462 		lso->seqno_offset = htobe32(0);
4463 		lso->len = htobe32(pktlen);
4464 
4465 		cpl = (void *)(lso + 1);
4466 
4467 		txq->tso_wrs++;
4468 	} else
4469 		cpl = (void *)(wr + 1);
4470 
4471 	/* Checksum offload */
4472 	ctrl1 = 0;
4473 	if (needs_l3_csum(m0) == 0)
4474 		ctrl1 |= F_TXPKT_IPCSUM_DIS;
4475 	if (needs_l4_csum(m0) == 0)
4476 		ctrl1 |= F_TXPKT_L4CSUM_DIS;
4477 	if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
4478 	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
4479 		txq->txcsum++;	/* some hardware assistance provided */
4480 
4481 	/* VLAN tag insertion */
4482 	if (needs_vlan_insertion(m0)) {
4483 		ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
4484 		txq->vlan_insertion++;
4485 	}
4486 
4487 	/* CPL header */
4488 	cpl->ctrl0 = txq->cpl_ctrl0;
4489 	cpl->pack = 0;
4490 	cpl->len = htobe16(pktlen);
4491 	cpl->ctrl1 = htobe64(ctrl1);
4492 
4493 	/* SGL */
4494 	dst = (void *)(cpl + 1);
4495 	if (nsegs > 0) {
4496 
4497 		write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx);
4498 		txq->sgl_wrs++;
4499 	} else {
4500 		struct mbuf *m;
4501 
4502 		for (m = m0; m != NULL; m = m->m_next) {
4503 			copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
4504 #ifdef INVARIANTS
4505 			pktlen -= m->m_len;
4506 #endif
4507 		}
4508 #ifdef INVARIANTS
4509 		KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen));
4510 #endif
4511 		txq->imm_wrs++;
4512 	}
4513 
4514 	txq->txpkt_wrs++;
4515 
4516 	txsd = &txq->sdesc[eq->pidx];
4517 	txsd->m = m0;
4518 	txsd->desc_used = ndesc;
4519 
4520 	return (ndesc);
4521 }
4522 
4523 static int
try_txpkts(struct mbuf * m,struct mbuf * n,struct txpkts * txp,u_int available)4524 try_txpkts(struct mbuf *m, struct mbuf *n, struct txpkts *txp, u_int available)
4525 {
4526 	u_int needed, nsegs1, nsegs2, l1, l2;
4527 
4528 	if (cannot_use_txpkts(m) || cannot_use_txpkts(n))
4529 		return (1);
4530 
4531 	nsegs1 = mbuf_nsegs(m);
4532 	nsegs2 = mbuf_nsegs(n);
4533 	if (nsegs1 + nsegs2 == 2) {
4534 		txp->wr_type = 1;
4535 		l1 = l2 = txpkts1_len16();
4536 	} else {
4537 		txp->wr_type = 0;
4538 		l1 = txpkts0_len16(nsegs1);
4539 		l2 = txpkts0_len16(nsegs2);
4540 	}
4541 	txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + l1 + l2;
4542 	needed = howmany(txp->len16, EQ_ESIZE / 16);
4543 	if (needed > SGE_MAX_WR_NDESC || needed > available)
4544 		return (1);
4545 
4546 	txp->plen = m->m_pkthdr.len + n->m_pkthdr.len;
4547 	if (txp->plen > 65535)
4548 		return (1);
4549 
4550 	txp->npkt = 2;
4551 	set_mbuf_len16(m, l1);
4552 	set_mbuf_len16(n, l2);
4553 
4554 	return (0);
4555 }
4556 
4557 static int
add_to_txpkts(struct mbuf * m,struct txpkts * txp,u_int available)4558 add_to_txpkts(struct mbuf *m, struct txpkts *txp, u_int available)
4559 {
4560 	u_int plen, len16, needed, nsegs;
4561 
4562 	MPASS(txp->wr_type == 0 || txp->wr_type == 1);
4563 
4564 	nsegs = mbuf_nsegs(m);
4565 	if (needs_tso(m) || (txp->wr_type == 1 && nsegs != 1))
4566 		return (1);
4567 
4568 	plen = txp->plen + m->m_pkthdr.len;
4569 	if (plen > 65535)
4570 		return (1);
4571 
4572 	if (txp->wr_type == 0)
4573 		len16 = txpkts0_len16(nsegs);
4574 	else
4575 		len16 = txpkts1_len16();
4576 	needed = howmany(txp->len16 + len16, EQ_ESIZE / 16);
4577 	if (needed > SGE_MAX_WR_NDESC || needed > available)
4578 		return (1);
4579 
4580 	txp->npkt++;
4581 	txp->plen = plen;
4582 	txp->len16 += len16;
4583 	set_mbuf_len16(m, len16);
4584 
4585 	return (0);
4586 }
4587 
4588 /*
4589  * Write a txpkts WR for the packets in txp to the hardware descriptors, update
4590  * the software descriptor, and advance the pidx.  It is guaranteed that enough
4591  * descriptors are available.
4592  *
4593  * The return value is the # of hardware descriptors used.
4594  */
4595 static u_int
write_txpkts_wr(struct sge_txq * txq,struct fw_eth_tx_pkts_wr * wr,struct mbuf * m0,const struct txpkts * txp,u_int available)4596 write_txpkts_wr(struct sge_txq *txq, struct fw_eth_tx_pkts_wr *wr,
4597     struct mbuf *m0, const struct txpkts *txp, u_int available)
4598 {
4599 	struct sge_eq *eq = &txq->eq;
4600 	struct tx_sdesc *txsd;
4601 	struct cpl_tx_pkt_core *cpl;
4602 	uint32_t ctrl;
4603 	uint64_t ctrl1;
4604 	int ndesc, checkwrap;
4605 	struct mbuf *m;
4606 	void *flitp;
4607 
4608 	TXQ_LOCK_ASSERT_OWNED(txq);
4609 	MPASS(txp->npkt > 0);
4610 	MPASS(txp->plen < 65536);
4611 	MPASS(m0 != NULL);
4612 	MPASS(m0->m_nextpkt != NULL);
4613 	MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16));
4614 	MPASS(available > 0 && available < eq->sidx);
4615 
4616 	ndesc = howmany(txp->len16, EQ_ESIZE / 16);
4617 	MPASS(ndesc <= available);
4618 
4619 	MPASS(wr == (void *)&eq->desc[eq->pidx]);
4620 	wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));
4621 	ctrl = V_FW_WR_LEN16(txp->len16);
4622 	wr->equiq_to_len16 = htobe32(ctrl);
4623 	wr->plen = htobe16(txp->plen);
4624 	wr->npkt = txp->npkt;
4625 	wr->r3 = 0;
4626 	wr->type = txp->wr_type;
4627 	flitp = wr + 1;
4628 
4629 	/*
4630 	 * At this point we are 16B into a hardware descriptor.  If checkwrap is
4631 	 * set then we know the WR is going to wrap around somewhere.  We'll
4632 	 * check for that at appropriate points.
4633 	 */
4634 	checkwrap = eq->sidx - ndesc < eq->pidx;
4635 	for (m = m0; m != NULL; m = m->m_nextpkt) {
4636 		if (txp->wr_type == 0) {
4637 			struct ulp_txpkt *ulpmc;
4638 			struct ulptx_idata *ulpsc;
4639 
4640 			/* ULP master command */
4641 			ulpmc = flitp;
4642 			ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) |
4643 			    V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid));
4644 			ulpmc->len = htobe32(mbuf_len16(m));
4645 
4646 			/* ULP subcommand */
4647 			ulpsc = (void *)(ulpmc + 1);
4648 			ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) |
4649 			    F_ULP_TX_SC_MORE);
4650 			ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core));
4651 
4652 			cpl = (void *)(ulpsc + 1);
4653 			if (checkwrap &&
4654 			    (uintptr_t)cpl == (uintptr_t)&eq->desc[eq->sidx])
4655 				cpl = (void *)&eq->desc[0];
4656 		} else {
4657 			cpl = flitp;
4658 		}
4659 
4660 		/* Checksum offload */
4661 		ctrl1 = 0;
4662 		if (needs_l3_csum(m) == 0)
4663 			ctrl1 |= F_TXPKT_IPCSUM_DIS;
4664 		if (needs_l4_csum(m) == 0)
4665 			ctrl1 |= F_TXPKT_L4CSUM_DIS;
4666 		if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
4667 		    CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
4668 			txq->txcsum++;	/* some hardware assistance provided */
4669 
4670 		/* VLAN tag insertion */
4671 		if (needs_vlan_insertion(m)) {
4672 			ctrl1 |= F_TXPKT_VLAN_VLD |
4673 			    V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
4674 			txq->vlan_insertion++;
4675 		}
4676 
4677 		/* CPL header */
4678 		cpl->ctrl0 = txq->cpl_ctrl0;
4679 		cpl->pack = 0;
4680 		cpl->len = htobe16(m->m_pkthdr.len);
4681 		cpl->ctrl1 = htobe64(ctrl1);
4682 
4683 		flitp = cpl + 1;
4684 		if (checkwrap &&
4685 		    (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx])
4686 			flitp = (void *)&eq->desc[0];
4687 
4688 		write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap);
4689 
4690 	}
4691 
4692 	if (txp->wr_type == 0) {
4693 		txq->txpkts0_pkts += txp->npkt;
4694 		txq->txpkts0_wrs++;
4695 	} else {
4696 		txq->txpkts1_pkts += txp->npkt;
4697 		txq->txpkts1_wrs++;
4698 	}
4699 
4700 	txsd = &txq->sdesc[eq->pidx];
4701 	txsd->m = m0;
4702 	txsd->desc_used = ndesc;
4703 
4704 	return (ndesc);
4705 }
4706 
4707 /*
4708  * If the SGL ends on an address that is not 16 byte aligned, this function will
4709  * add a 0 filled flit at the end.
4710  */
4711 static void
write_gl_to_txd(struct sge_txq * txq,struct mbuf * m,caddr_t * to,int checkwrap)4712 write_gl_to_txd(struct sge_txq *txq, struct mbuf *m, caddr_t *to, int checkwrap)
4713 {
4714 	struct sge_eq *eq = &txq->eq;
4715 	struct sglist *gl = txq->gl;
4716 	struct sglist_seg *seg;
4717 	__be64 *flitp, *wrap;
4718 	struct ulptx_sgl *usgl;
4719 	int i, nflits, nsegs;
4720 
4721 	KASSERT(((uintptr_t)(*to) & 0xf) == 0,
4722 	    ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to));
4723 	MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]);
4724 	MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]);
4725 
4726 	get_pkt_gl(m, gl);
4727 	nsegs = gl->sg_nseg;
4728 	MPASS(nsegs > 0);
4729 
4730 	nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2;
4731 	flitp = (__be64 *)(*to);
4732 	wrap = (__be64 *)(&eq->desc[eq->sidx]);
4733 	seg = &gl->sg_segs[0];
4734 	usgl = (void *)flitp;
4735 
4736 	/*
4737 	 * We start at a 16 byte boundary somewhere inside the tx descriptor
4738 	 * ring, so we're at least 16 bytes away from the status page.  There is
4739 	 * no chance of a wrap around in the middle of usgl (which is 16 bytes).
4740 	 */
4741 
4742 	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
4743 	    V_ULPTX_NSGE(nsegs));
4744 	usgl->len0 = htobe32(seg->ss_len);
4745 	usgl->addr0 = htobe64(seg->ss_paddr);
4746 	seg++;
4747 
4748 	if (checkwrap == 0 || (uintptr_t)(flitp + nflits) <= (uintptr_t)wrap) {
4749 
4750 		/* Won't wrap around at all */
4751 
4752 		for (i = 0; i < nsegs - 1; i++, seg++) {
4753 			usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len);
4754 			usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr);
4755 		}
4756 		if (i & 1)
4757 			usgl->sge[i / 2].len[1] = htobe32(0);
4758 		flitp += nflits;
4759 	} else {
4760 
4761 		/* Will wrap somewhere in the rest of the SGL */
4762 
4763 		/* 2 flits already written, write the rest flit by flit */
4764 		flitp = (void *)(usgl + 1);
4765 		for (i = 0; i < nflits - 2; i++) {
4766 			if (flitp == wrap)
4767 				flitp = (void *)eq->desc;
4768 			*flitp++ = get_flit(seg, nsegs - 1, i);
4769 		}
4770 	}
4771 
4772 	if (nflits & 1) {
4773 		MPASS(((uintptr_t)flitp) & 0xf);
4774 		*flitp++ = 0;
4775 	}
4776 
4777 	MPASS((((uintptr_t)flitp) & 0xf) == 0);
4778 	if (__predict_false(flitp == wrap))
4779 		*to = (void *)eq->desc;
4780 	else
4781 		*to = (void *)flitp;
4782 }
4783 
4784 static inline void
copy_to_txd(struct sge_eq * eq,caddr_t from,caddr_t * to,int len)4785 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
4786 {
4787 
4788 	MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]);
4789 	MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]);
4790 
4791 	if (__predict_true((uintptr_t)(*to) + len <=
4792 	    (uintptr_t)&eq->desc[eq->sidx])) {
4793 		bcopy(from, *to, len);
4794 		(*to) += len;
4795 	} else {
4796 		int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to);
4797 
4798 		bcopy(from, *to, portion);
4799 		from += portion;
4800 		portion = len - portion;	/* remaining */
4801 		bcopy(from, (void *)eq->desc, portion);
4802 		(*to) = (caddr_t)eq->desc + portion;
4803 	}
4804 }
4805 
4806 static inline void
ring_eq_db(struct adapter * sc,struct sge_eq * eq,u_int n)4807 ring_eq_db(struct adapter *sc, struct sge_eq *eq, u_int n)
4808 {
4809 	u_int db;
4810 
4811 	MPASS(n > 0);
4812 
4813 	db = eq->doorbells;
4814 	if (n > 1)
4815 		clrbit(&db, DOORBELL_WCWR);
4816 	wmb();
4817 
4818 	switch (ffs(db) - 1) {
4819 	case DOORBELL_UDB:
4820 		*eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n));
4821 		break;
4822 
4823 	case DOORBELL_WCWR: {
4824 		volatile uint64_t *dst, *src;
4825 		int i;
4826 
4827 		/*
4828 		 * Queues whose 128B doorbell segment fits in the page do not
4829 		 * use relative qid (udb_qid is always 0).  Only queues with
4830 		 * doorbell segments can do WCWR.
4831 		 */
4832 		KASSERT(eq->udb_qid == 0 && n == 1,
4833 		    ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p",
4834 		    __func__, eq->doorbells, n, eq->dbidx, eq));
4835 
4836 		dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET -
4837 		    UDBS_DB_OFFSET);
4838 		i = eq->dbidx;
4839 		src = (void *)&eq->desc[i];
4840 		while (src != (void *)&eq->desc[i + 1])
4841 			*dst++ = *src++;
4842 		wmb();
4843 		break;
4844 	}
4845 
4846 	case DOORBELL_UDBWC:
4847 		*eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n));
4848 		wmb();
4849 		break;
4850 
4851 	case DOORBELL_KDB:
4852 		t4_write_reg(sc, sc->sge_kdoorbell_reg,
4853 		    V_QID(eq->cntxt_id) | V_PIDX(n));
4854 		break;
4855 	}
4856 
4857 	IDXINCR(eq->dbidx, n, eq->sidx);
4858 }
4859 
4860 static inline u_int
reclaimable_tx_desc(struct sge_eq * eq)4861 reclaimable_tx_desc(struct sge_eq *eq)
4862 {
4863 	uint16_t hw_cidx;
4864 
4865 	hw_cidx = read_hw_cidx(eq);
4866 	return (IDXDIFF(hw_cidx, eq->cidx, eq->sidx));
4867 }
4868 
4869 static inline u_int
total_available_tx_desc(struct sge_eq * eq)4870 total_available_tx_desc(struct sge_eq *eq)
4871 {
4872 	uint16_t hw_cidx, pidx;
4873 
4874 	hw_cidx = read_hw_cidx(eq);
4875 	pidx = eq->pidx;
4876 
4877 	if (pidx == hw_cidx)
4878 		return (eq->sidx - 1);
4879 	else
4880 		return (IDXDIFF(hw_cidx, pidx, eq->sidx) - 1);
4881 }
4882 
4883 static inline uint16_t
read_hw_cidx(struct sge_eq * eq)4884 read_hw_cidx(struct sge_eq *eq)
4885 {
4886 	struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
4887 	uint16_t cidx = spg->cidx;	/* stable snapshot */
4888 
4889 	return (be16toh(cidx));
4890 }
4891 
4892 /*
4893  * Reclaim 'n' descriptors approximately.
4894  */
4895 static u_int
reclaim_tx_descs(struct sge_txq * txq,u_int n)4896 reclaim_tx_descs(struct sge_txq *txq, u_int n)
4897 {
4898 	struct tx_sdesc *txsd;
4899 	struct sge_eq *eq = &txq->eq;
4900 	u_int can_reclaim, reclaimed;
4901 
4902 	TXQ_LOCK_ASSERT_OWNED(txq);
4903 	MPASS(n > 0);
4904 
4905 	reclaimed = 0;
4906 	can_reclaim = reclaimable_tx_desc(eq);
4907 	while (can_reclaim && reclaimed < n) {
4908 		int ndesc;
4909 		struct mbuf *m, *nextpkt;
4910 
4911 		txsd = &txq->sdesc[eq->cidx];
4912 		ndesc = txsd->desc_used;
4913 
4914 		/* Firmware doesn't return "partial" credits. */
4915 		KASSERT(can_reclaim >= ndesc,
4916 		    ("%s: unexpected number of credits: %d, %d",
4917 		    __func__, can_reclaim, ndesc));
4918 		KASSERT(ndesc != 0,
4919 		    ("%s: descriptor with no credits: cidx %d",
4920 		    __func__, eq->cidx));
4921 
4922 		for (m = txsd->m; m != NULL; m = nextpkt) {
4923 			nextpkt = m->m_nextpkt;
4924 			m->m_nextpkt = NULL;
4925 			m_freem(m);
4926 		}
4927 		reclaimed += ndesc;
4928 		can_reclaim -= ndesc;
4929 		IDXINCR(eq->cidx, ndesc, eq->sidx);
4930 	}
4931 
4932 	return (reclaimed);
4933 }
4934 
4935 static void
tx_reclaim(void * arg,int n)4936 tx_reclaim(void *arg, int n)
4937 {
4938 	struct sge_txq *txq = arg;
4939 	struct sge_eq *eq = &txq->eq;
4940 
4941 	do {
4942 		if (TXQ_TRYLOCK(txq) == 0)
4943 			break;
4944 		n = reclaim_tx_descs(txq, 32);
4945 		if (eq->cidx == eq->pidx)
4946 			eq->equeqidx = eq->pidx;
4947 		TXQ_UNLOCK(txq);
4948 	} while (n > 0);
4949 }
4950 
4951 static __be64
get_flit(struct sglist_seg * segs,int nsegs,int idx)4952 get_flit(struct sglist_seg *segs, int nsegs, int idx)
4953 {
4954 	int i = (idx / 3) * 2;
4955 
4956 	switch (idx % 3) {
4957 	case 0: {
4958 		uint64_t rc;
4959 
4960 		rc = (uint64_t)segs[i].ss_len << 32;
4961 		if (i + 1 < nsegs)
4962 			rc |= (uint64_t)(segs[i + 1].ss_len);
4963 
4964 		return (htobe64(rc));
4965 	}
4966 	case 1:
4967 		return (htobe64(segs[i].ss_paddr));
4968 	case 2:
4969 		return (htobe64(segs[i + 1].ss_paddr));
4970 	}
4971 
4972 	return (0);
4973 }
4974 
4975 static void
find_best_refill_source(struct adapter * sc,struct sge_fl * fl,int maxp)4976 find_best_refill_source(struct adapter *sc, struct sge_fl *fl, int maxp)
4977 {
4978 	int8_t zidx, hwidx, idx;
4979 	uint16_t region1, region3;
4980 	int spare, spare_needed, n;
4981 	struct sw_zone_info *swz;
4982 	struct hw_buf_info *hwb, *hwb_list = &sc->sge.hw_buf_info[0];
4983 
4984 	/*
4985 	 * Buffer Packing: Look for PAGE_SIZE or larger zone which has a bufsize
4986 	 * large enough for the max payload and cluster metadata.  Otherwise
4987 	 * settle for the largest bufsize that leaves enough room in the cluster
4988 	 * for metadata.
4989 	 *
4990 	 * Without buffer packing: Look for the smallest zone which has a
4991 	 * bufsize large enough for the max payload.  Settle for the largest
4992 	 * bufsize available if there's nothing big enough for max payload.
4993 	 */
4994 	spare_needed = fl->flags & FL_BUF_PACKING ? CL_METADATA_SIZE : 0;
4995 	swz = &sc->sge.sw_zone_info[0];
4996 	hwidx = -1;
4997 	for (zidx = 0; zidx < SW_ZONE_SIZES; zidx++, swz++) {
4998 		if (swz->size > largest_rx_cluster) {
4999 			if (__predict_true(hwidx != -1))
5000 				break;
5001 
5002 			/*
5003 			 * This is a misconfiguration.  largest_rx_cluster is
5004 			 * preventing us from finding a refill source.  See
5005 			 * dev.t5nex.<n>.buffer_sizes to figure out why.
5006 			 */
5007 			device_printf(sc->dev, "largest_rx_cluster=%u leaves no"
5008 			    " refill source for fl %p (dma %u).  Ignored.\n",
5009 			    largest_rx_cluster, fl, maxp);
5010 		}
5011 		for (idx = swz->head_hwidx; idx != -1; idx = hwb->next) {
5012 			hwb = &hwb_list[idx];
5013 			spare = swz->size - hwb->size;
5014 			if (spare < spare_needed)
5015 				continue;
5016 
5017 			hwidx = idx;		/* best option so far */
5018 			if (hwb->size >= maxp) {
5019 
5020 				if ((fl->flags & FL_BUF_PACKING) == 0)
5021 					goto done; /* stop looking (not packing) */
5022 
5023 				if (swz->size >= safest_rx_cluster)
5024 					goto done; /* stop looking (packing) */
5025 			}
5026 			break;		/* keep looking, next zone */
5027 		}
5028 	}
5029 done:
5030 	/* A usable hwidx has been located. */
5031 	MPASS(hwidx != -1);
5032 	hwb = &hwb_list[hwidx];
5033 	zidx = hwb->zidx;
5034 	swz = &sc->sge.sw_zone_info[zidx];
5035 	region1 = 0;
5036 	region3 = swz->size - hwb->size;
5037 
5038 	/*
5039 	 * Stay within this zone and see if there is a better match when mbuf
5040 	 * inlining is allowed.  Remember that the hwidx's are sorted in
5041 	 * decreasing order of size (so in increasing order of spare area).
5042 	 */
5043 	for (idx = hwidx; idx != -1; idx = hwb->next) {
5044 		hwb = &hwb_list[idx];
5045 		spare = swz->size - hwb->size;
5046 
5047 		if (allow_mbufs_in_cluster == 0 || hwb->size < maxp)
5048 			break;
5049 
5050 		/*
5051 		 * Do not inline mbufs if doing so would violate the pad/pack
5052 		 * boundary alignment requirement.
5053 		 */
5054 		if (fl_pad && (MSIZE % sc->params.sge.pad_boundary) != 0)
5055 			continue;
5056 		if (fl->flags & FL_BUF_PACKING &&
5057 		    (MSIZE % sc->params.sge.pack_boundary) != 0)
5058 			continue;
5059 
5060 		if (spare < CL_METADATA_SIZE + MSIZE)
5061 			continue;
5062 		n = (spare - CL_METADATA_SIZE) / MSIZE;
5063 		if (n > howmany(hwb->size, maxp))
5064 			break;
5065 
5066 		hwidx = idx;
5067 		if (fl->flags & FL_BUF_PACKING) {
5068 			region1 = n * MSIZE;
5069 			region3 = spare - region1;
5070 		} else {
5071 			region1 = MSIZE;
5072 			region3 = spare - region1;
5073 			break;
5074 		}
5075 	}
5076 
5077 	KASSERT(zidx >= 0 && zidx < SW_ZONE_SIZES,
5078 	    ("%s: bad zone %d for fl %p, maxp %d", __func__, zidx, fl, maxp));
5079 	KASSERT(hwidx >= 0 && hwidx <= SGE_FLBUF_SIZES,
5080 	    ("%s: bad hwidx %d for fl %p, maxp %d", __func__, hwidx, fl, maxp));
5081 	KASSERT(region1 + sc->sge.hw_buf_info[hwidx].size + region3 ==
5082 	    sc->sge.sw_zone_info[zidx].size,
5083 	    ("%s: bad buffer layout for fl %p, maxp %d. "
5084 		"cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp,
5085 		sc->sge.sw_zone_info[zidx].size, region1,
5086 		sc->sge.hw_buf_info[hwidx].size, region3));
5087 	if (fl->flags & FL_BUF_PACKING || region1 > 0) {
5088 		KASSERT(region3 >= CL_METADATA_SIZE,
5089 		    ("%s: no room for metadata.  fl %p, maxp %d; "
5090 		    "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp,
5091 		    sc->sge.sw_zone_info[zidx].size, region1,
5092 		    sc->sge.hw_buf_info[hwidx].size, region3));
5093 		KASSERT(region1 % MSIZE == 0,
5094 		    ("%s: bad mbuf region for fl %p, maxp %d. "
5095 		    "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp,
5096 		    sc->sge.sw_zone_info[zidx].size, region1,
5097 		    sc->sge.hw_buf_info[hwidx].size, region3));
5098 	}
5099 
5100 	fl->cll_def.zidx = zidx;
5101 	fl->cll_def.hwidx = hwidx;
5102 	fl->cll_def.region1 = region1;
5103 	fl->cll_def.region3 = region3;
5104 }
5105 
5106 static void
find_safe_refill_source(struct adapter * sc,struct sge_fl * fl)5107 find_safe_refill_source(struct adapter *sc, struct sge_fl *fl)
5108 {
5109 	struct sge *s = &sc->sge;
5110 	struct hw_buf_info *hwb;
5111 	struct sw_zone_info *swz;
5112 	int spare;
5113 	int8_t hwidx;
5114 
5115 	if (fl->flags & FL_BUF_PACKING)
5116 		hwidx = s->safe_hwidx2;	/* with room for metadata */
5117 	else if (allow_mbufs_in_cluster && s->safe_hwidx2 != -1) {
5118 		hwidx = s->safe_hwidx2;
5119 		hwb = &s->hw_buf_info[hwidx];
5120 		swz = &s->sw_zone_info[hwb->zidx];
5121 		spare = swz->size - hwb->size;
5122 
5123 		/* no good if there isn't room for an mbuf as well */
5124 		if (spare < CL_METADATA_SIZE + MSIZE)
5125 			hwidx = s->safe_hwidx1;
5126 	} else
5127 		hwidx = s->safe_hwidx1;
5128 
5129 	if (hwidx == -1) {
5130 		/* No fallback source */
5131 		fl->cll_alt.hwidx = -1;
5132 		fl->cll_alt.zidx = -1;
5133 
5134 		return;
5135 	}
5136 
5137 	hwb = &s->hw_buf_info[hwidx];
5138 	swz = &s->sw_zone_info[hwb->zidx];
5139 	spare = swz->size - hwb->size;
5140 	fl->cll_alt.hwidx = hwidx;
5141 	fl->cll_alt.zidx = hwb->zidx;
5142 	if (allow_mbufs_in_cluster &&
5143 	    (fl_pad == 0 || (MSIZE % sc->params.sge.pad_boundary) == 0))
5144 		fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE;
5145 	else
5146 		fl->cll_alt.region1 = 0;
5147 	fl->cll_alt.region3 = spare - fl->cll_alt.region1;
5148 }
5149 
5150 static void
add_fl_to_sfl(struct adapter * sc,struct sge_fl * fl)5151 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl)
5152 {
5153 	mtx_lock(&sc->sfl_lock);
5154 	FL_LOCK(fl);
5155 	if ((fl->flags & FL_DOOMED) == 0) {
5156 		fl->flags |= FL_STARVING;
5157 		TAILQ_INSERT_TAIL(&sc->sfl, fl, link);
5158 		callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc);
5159 	}
5160 	FL_UNLOCK(fl);
5161 	mtx_unlock(&sc->sfl_lock);
5162 }
5163 
5164 static void
handle_wrq_egr_update(struct adapter * sc,struct sge_eq * eq)5165 handle_wrq_egr_update(struct adapter *sc, struct sge_eq *eq)
5166 {
5167 	struct sge_wrq *wrq = (void *)eq;
5168 
5169 	atomic_readandclear_int(&eq->equiq);
5170 	taskqueue_enqueue(sc->tq[eq->tx_chan], &wrq->wrq_tx_task);
5171 }
5172 
5173 static void
handle_eth_egr_update(struct adapter * sc,struct sge_eq * eq)5174 handle_eth_egr_update(struct adapter *sc, struct sge_eq *eq)
5175 {
5176 	struct sge_txq *txq = (void *)eq;
5177 
5178 	MPASS((eq->flags & EQ_TYPEMASK) == EQ_ETH);
5179 
5180 	atomic_readandclear_int(&eq->equiq);
5181 	mp_ring_check_drainage(txq->r, 0);
5182 	taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task);
5183 }
5184 
5185 static int
handle_sge_egr_update(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)5186 handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss,
5187     struct mbuf *m)
5188 {
5189 	const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1);
5190 	unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid));
5191 	struct adapter *sc = iq->adapter;
5192 	struct sge *s = &sc->sge;
5193 	struct sge_eq *eq;
5194 	static void (*h[])(struct adapter *, struct sge_eq *) = {NULL,
5195 		&handle_wrq_egr_update, &handle_eth_egr_update,
5196 		&handle_wrq_egr_update};
5197 
5198 	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
5199 	    rss->opcode));
5200 
5201 	eq = s->eqmap[qid - s->eq_start - s->eq_base];
5202 	(*h[eq->flags & EQ_TYPEMASK])(sc, eq);
5203 
5204 	return (0);
5205 }
5206 
5207 /* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */
5208 CTASSERT(offsetof(struct cpl_fw4_msg, data) == \
5209     offsetof(struct cpl_fw6_msg, data));
5210 
5211 static int
handle_fw_msg(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)5212 handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
5213 {
5214 	struct adapter *sc = iq->adapter;
5215 	const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
5216 
5217 	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
5218 	    rss->opcode));
5219 
5220 	if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) {
5221 		const struct rss_header *rss2;
5222 
5223 		rss2 = (const struct rss_header *)&cpl->data[0];
5224 		return (t4_cpl_handler[rss2->opcode](iq, rss2, m));
5225 	}
5226 
5227 	return (t4_fw_msg_handler[cpl->type](sc, &cpl->data[0]));
5228 }
5229 
5230 /**
5231  *	t4_handle_wrerr_rpl - process a FW work request error message
5232  *	@adap: the adapter
5233  *	@rpl: start of the FW message
5234  */
5235 static int
t4_handle_wrerr_rpl(struct adapter * adap,const __be64 * rpl)5236 t4_handle_wrerr_rpl(struct adapter *adap, const __be64 *rpl)
5237 {
5238 	u8 opcode = *(const u8 *)rpl;
5239 	const struct fw_error_cmd *e = (const void *)rpl;
5240 	unsigned int i;
5241 
5242 	if (opcode != FW_ERROR_CMD) {
5243 		log(LOG_ERR,
5244 		    "%s: Received WRERR_RPL message with opcode %#x\n",
5245 		    device_get_nameunit(adap->dev), opcode);
5246 		return (EINVAL);
5247 	}
5248 	log(LOG_ERR, "%s: FW_ERROR (%s) ", device_get_nameunit(adap->dev),
5249 	    G_FW_ERROR_CMD_FATAL(be32toh(e->op_to_type)) ? "fatal" :
5250 	    "non-fatal");
5251 	switch (G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))) {
5252 	case FW_ERROR_TYPE_EXCEPTION:
5253 		log(LOG_ERR, "exception info:\n");
5254 		for (i = 0; i < nitems(e->u.exception.info); i++)
5255 			log(LOG_ERR, "%s%08x", i == 0 ? "\t" : " ",
5256 			    be32toh(e->u.exception.info[i]));
5257 		log(LOG_ERR, "\n");
5258 		break;
5259 	case FW_ERROR_TYPE_HWMODULE:
5260 		log(LOG_ERR, "HW module regaddr %08x regval %08x\n",
5261 		    be32toh(e->u.hwmodule.regaddr),
5262 		    be32toh(e->u.hwmodule.regval));
5263 		break;
5264 	case FW_ERROR_TYPE_WR:
5265 		log(LOG_ERR, "WR cidx %d PF %d VF %d eqid %d hdr:\n",
5266 		    be16toh(e->u.wr.cidx),
5267 		    G_FW_ERROR_CMD_PFN(be16toh(e->u.wr.pfn_vfn)),
5268 		    G_FW_ERROR_CMD_VFN(be16toh(e->u.wr.pfn_vfn)),
5269 		    be32toh(e->u.wr.eqid));
5270 		for (i = 0; i < nitems(e->u.wr.wrhdr); i++)
5271 			log(LOG_ERR, "%s%02x", i == 0 ? "\t" : " ",
5272 			    e->u.wr.wrhdr[i]);
5273 		log(LOG_ERR, "\n");
5274 		break;
5275 	case FW_ERROR_TYPE_ACL:
5276 		log(LOG_ERR, "ACL cidx %d PF %d VF %d eqid %d %s",
5277 		    be16toh(e->u.acl.cidx),
5278 		    G_FW_ERROR_CMD_PFN(be16toh(e->u.acl.pfn_vfn)),
5279 		    G_FW_ERROR_CMD_VFN(be16toh(e->u.acl.pfn_vfn)),
5280 		    be32toh(e->u.acl.eqid),
5281 		    G_FW_ERROR_CMD_MV(be16toh(e->u.acl.mv_pkd)) ? "vlanid" :
5282 		    "MAC");
5283 		for (i = 0; i < nitems(e->u.acl.val); i++)
5284 			log(LOG_ERR, " %02x", e->u.acl.val[i]);
5285 		log(LOG_ERR, "\n");
5286 		break;
5287 	default:
5288 		log(LOG_ERR, "type %#x\n",
5289 		    G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type)));
5290 		return (EINVAL);
5291 	}
5292 	return (0);
5293 }
5294 
5295 static int
sysctl_uint16(SYSCTL_HANDLER_ARGS)5296 sysctl_uint16(SYSCTL_HANDLER_ARGS)
5297 {
5298 	uint16_t *id = arg1;
5299 	int i = *id;
5300 
5301 	return sysctl_handle_int(oidp, &i, 0, req);
5302 }
5303 
5304 static int
sysctl_bufsizes(SYSCTL_HANDLER_ARGS)5305 sysctl_bufsizes(SYSCTL_HANDLER_ARGS)
5306 {
5307 	struct sge *s = arg1;
5308 	struct hw_buf_info *hwb = &s->hw_buf_info[0];
5309 	struct sw_zone_info *swz = &s->sw_zone_info[0];
5310 	int i, rc;
5311 	struct sbuf sb;
5312 	char c;
5313 
5314 	sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND);
5315 	for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) {
5316 		if (hwb->zidx >= 0 && swz[hwb->zidx].size <= largest_rx_cluster)
5317 			c = '*';
5318 		else
5319 			c = '\0';
5320 
5321 		sbuf_printf(&sb, "%u%c ", hwb->size, c);
5322 	}
5323 	sbuf_trim(&sb);
5324 	sbuf_finish(&sb);
5325 	rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
5326 	sbuf_delete(&sb);
5327 	return (rc);
5328 }
5329