1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 Chelsio Communications, Inc.
5 * All rights reserved.
6 * Written by: Navdeep Parhar <np@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 #include "opt_kern_tls.h"
34 #include "opt_ratelimit.h"
35
36 #include <sys/types.h>
37 #include <sys/eventhandler.h>
38 #include <sys/mbuf.h>
39 #include <sys/socket.h>
40 #include <sys/kernel.h>
41 #include <sys/ktls.h>
42 #include <sys/malloc.h>
43 #include <sys/queue.h>
44 #include <sys/sbuf.h>
45 #include <sys/taskqueue.h>
46 #include <sys/time.h>
47 #include <sys/sglist.h>
48 #include <sys/sysctl.h>
49 #include <sys/smp.h>
50 #include <sys/socketvar.h>
51 #include <sys/counter.h>
52 #include <net/bpf.h>
53 #include <net/ethernet.h>
54 #include <net/if.h>
55 #include <net/if_vlan_var.h>
56 #include <net/if_vxlan.h>
57 #include <netinet/in.h>
58 #include <netinet/ip.h>
59 #include <netinet/ip6.h>
60 #include <netinet/tcp.h>
61 #include <netinet/udp.h>
62 #include <machine/in_cksum.h>
63 #include <machine/md_var.h>
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #ifdef DEV_NETMAP
67 #include <machine/bus.h>
68 #include <sys/selinfo.h>
69 #include <net/if_var.h>
70 #include <net/netmap.h>
71 #include <dev/netmap/netmap_kern.h>
72 #endif
73
74 #include "common/common.h"
75 #include "common/t4_regs.h"
76 #include "common/t4_regs_values.h"
77 #include "common/t4_msg.h"
78 #include "t4_l2t.h"
79 #include "t4_mp_ring.h"
80
81 #define RX_COPY_THRESHOLD MINCLSIZE
82
83 /* Internal mbuf flags stored in PH_loc.eight[1]. */
84 #define MC_NOMAP 0x01
85 #define MC_RAW_WR 0x02
86 #define MC_TLS 0x04
87
88 /*
89 * Ethernet frames are DMA'd at this byte offset into the freelist buffer.
90 * 0-7 are valid values.
91 */
92 static int fl_pktshift = 0;
93 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pktshift, CTLFLAG_RDTUN, &fl_pktshift, 0,
94 "payload DMA offset in rx buffer (bytes)");
95
96 /*
97 * Pad ethernet payload up to this boundary.
98 * -1: driver should figure out a good value.
99 * 0: disable padding.
100 * Any power of 2 from 32 to 4096 (both inclusive) is also a valid value.
101 */
102 int fl_pad = -1;
103 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pad, CTLFLAG_RDTUN, &fl_pad, 0,
104 "payload pad boundary (bytes)");
105
106 /*
107 * Status page length.
108 * -1: driver should figure out a good value.
109 * 64 or 128 are the only other valid values.
110 */
111 static int spg_len = -1;
112 SYSCTL_INT(_hw_cxgbe, OID_AUTO, spg_len, CTLFLAG_RDTUN, &spg_len, 0,
113 "status page size (bytes)");
114
115 /*
116 * Congestion drops.
117 * -1: no congestion feedback (not recommended).
118 * 0: backpressure the channel instead of dropping packets right away.
119 * 1: no backpressure, drop packets for the congested queue immediately.
120 * 2: both backpressure and drop.
121 */
122 static int cong_drop = 0;
123 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cong_drop, CTLFLAG_RDTUN, &cong_drop, 0,
124 "Congestion control for NIC RX queues (0 = backpressure, 1 = drop, 2 = both");
125 #ifdef TCP_OFFLOAD
126 static int ofld_cong_drop = 0;
127 SYSCTL_INT(_hw_cxgbe, OID_AUTO, ofld_cong_drop, CTLFLAG_RDTUN, &ofld_cong_drop, 0,
128 "Congestion control for TOE RX queues (0 = backpressure, 1 = drop, 2 = both");
129 #endif
130
131 /*
132 * Deliver multiple frames in the same free list buffer if they fit.
133 * -1: let the driver decide whether to enable buffer packing or not.
134 * 0: disable buffer packing.
135 * 1: enable buffer packing.
136 */
137 static int buffer_packing = -1;
138 SYSCTL_INT(_hw_cxgbe, OID_AUTO, buffer_packing, CTLFLAG_RDTUN, &buffer_packing,
139 0, "Enable buffer packing");
140
141 /*
142 * Start next frame in a packed buffer at this boundary.
143 * -1: driver should figure out a good value.
144 * T4: driver will ignore this and use the same value as fl_pad above.
145 * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value.
146 */
147 static int fl_pack = -1;
148 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pack, CTLFLAG_RDTUN, &fl_pack, 0,
149 "payload pack boundary (bytes)");
150
151 /*
152 * Largest rx cluster size that the driver is allowed to allocate.
153 */
154 static int largest_rx_cluster = MJUM16BYTES;
155 SYSCTL_INT(_hw_cxgbe, OID_AUTO, largest_rx_cluster, CTLFLAG_RDTUN,
156 &largest_rx_cluster, 0, "Largest rx cluster (bytes)");
157
158 /*
159 * Size of cluster allocation that's most likely to succeed. The driver will
160 * fall back to this size if it fails to allocate clusters larger than this.
161 */
162 static int safest_rx_cluster = PAGE_SIZE;
163 SYSCTL_INT(_hw_cxgbe, OID_AUTO, safest_rx_cluster, CTLFLAG_RDTUN,
164 &safest_rx_cluster, 0, "Safe rx cluster (bytes)");
165
166 #ifdef RATELIMIT
167 /*
168 * Knob to control TCP timestamp rewriting, and the granularity of the tick used
169 * for rewriting. -1 and 0-3 are all valid values.
170 * -1: hardware should leave the TCP timestamps alone.
171 * 0: 1ms
172 * 1: 100us
173 * 2: 10us
174 * 3: 1us
175 */
176 static int tsclk = -1;
177 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tsclk, CTLFLAG_RDTUN, &tsclk, 0,
178 "Control TCP timestamp rewriting when using pacing");
179
180 static int eo_max_backlog = 1024 * 1024;
181 SYSCTL_INT(_hw_cxgbe, OID_AUTO, eo_max_backlog, CTLFLAG_RDTUN, &eo_max_backlog,
182 0, "Maximum backlog of ratelimited data per flow");
183 #endif
184
185 /*
186 * The interrupt holdoff timers are multiplied by this value on T6+.
187 * 1 and 3-17 (both inclusive) are legal values.
188 */
189 static int tscale = 1;
190 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tscale, CTLFLAG_RDTUN, &tscale, 0,
191 "Interrupt holdoff timer scale on T6+");
192
193 /*
194 * Number of LRO entries in the lro_ctrl structure per rx queue.
195 */
196 static int lro_entries = TCP_LRO_ENTRIES;
197 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_entries, CTLFLAG_RDTUN, &lro_entries, 0,
198 "Number of LRO entries per RX queue");
199
200 /*
201 * This enables presorting of frames before they're fed into tcp_lro_rx.
202 */
203 static int lro_mbufs = 0;
204 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_mbufs, CTLFLAG_RDTUN, &lro_mbufs, 0,
205 "Enable presorting of LRO frames");
206
207 static counter_u64_t pullups;
208 SYSCTL_COUNTER_U64(_hw_cxgbe, OID_AUTO, pullups, CTLFLAG_RD, &pullups,
209 "Number of mbuf pullups performed");
210
211 static counter_u64_t defrags;
212 SYSCTL_COUNTER_U64(_hw_cxgbe, OID_AUTO, defrags, CTLFLAG_RD, &defrags,
213 "Number of mbuf defrags performed");
214
215 static int t4_tx_coalesce = 1;
216 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_coalesce, CTLFLAG_RWTUN, &t4_tx_coalesce, 0,
217 "tx coalescing allowed");
218
219 /*
220 * The driver will make aggressive attempts at tx coalescing if it sees these
221 * many packets eligible for coalescing in quick succession, with no more than
222 * the specified gap in between the eth_tx calls that delivered the packets.
223 */
224 static int t4_tx_coalesce_pkts = 32;
225 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_coalesce_pkts, CTLFLAG_RWTUN,
226 &t4_tx_coalesce_pkts, 0,
227 "# of consecutive packets (1 - 255) that will trigger tx coalescing");
228 static int t4_tx_coalesce_gap = 5;
229 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_coalesce_gap, CTLFLAG_RWTUN,
230 &t4_tx_coalesce_gap, 0, "tx gap (in microseconds)");
231
232 static int service_iq(struct sge_iq *, int);
233 static int service_iq_fl(struct sge_iq *, int);
234 static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t);
235 static int eth_rx(struct adapter *, struct sge_rxq *, const struct iq_desc *,
236 u_int);
237 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int,
238 int, int, int);
239 static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *);
240 static inline void init_eq(struct adapter *, struct sge_eq *, int, int, uint8_t,
241 struct sge_iq *, char *);
242 static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *,
243 struct sysctl_ctx_list *, struct sysctl_oid *);
244 static void free_iq_fl(struct adapter *, struct sge_iq *, struct sge_fl *);
245 static void add_iq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *,
246 struct sge_iq *);
247 static void add_fl_sysctls(struct adapter *, struct sysctl_ctx_list *,
248 struct sysctl_oid *, struct sge_fl *);
249 static int alloc_iq_fl_hwq(struct vi_info *, struct sge_iq *, struct sge_fl *);
250 static int free_iq_fl_hwq(struct adapter *, struct sge_iq *, struct sge_fl *);
251 static int alloc_fwq(struct adapter *);
252 static void free_fwq(struct adapter *);
253 static int alloc_ctrlq(struct adapter *, int);
254 static void free_ctrlq(struct adapter *, int);
255 static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int, int);
256 static void free_rxq(struct vi_info *, struct sge_rxq *);
257 static void add_rxq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *,
258 struct sge_rxq *);
259 #ifdef TCP_OFFLOAD
260 static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int,
261 int);
262 static void free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *);
263 static void add_ofld_rxq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *,
264 struct sge_ofld_rxq *);
265 #endif
266 static int ctrl_eq_alloc(struct adapter *, struct sge_eq *);
267 static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *);
268 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
269 static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *);
270 #endif
271 static int alloc_eq(struct adapter *, struct sge_eq *, struct sysctl_ctx_list *,
272 struct sysctl_oid *);
273 static void free_eq(struct adapter *, struct sge_eq *);
274 static void add_eq_sysctls(struct adapter *, struct sysctl_ctx_list *,
275 struct sysctl_oid *, struct sge_eq *);
276 static int alloc_eq_hwq(struct adapter *, struct vi_info *, struct sge_eq *);
277 static int free_eq_hwq(struct adapter *, struct vi_info *, struct sge_eq *);
278 static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *,
279 struct sysctl_ctx_list *, struct sysctl_oid *);
280 static void free_wrq(struct adapter *, struct sge_wrq *);
281 static void add_wrq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *,
282 struct sge_wrq *);
283 static int alloc_txq(struct vi_info *, struct sge_txq *, int);
284 static void free_txq(struct vi_info *, struct sge_txq *);
285 static void add_txq_sysctls(struct vi_info *, struct sysctl_ctx_list *,
286 struct sysctl_oid *, struct sge_txq *);
287 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
288 static int alloc_ofld_txq(struct vi_info *, struct sge_ofld_txq *, int);
289 static void free_ofld_txq(struct vi_info *, struct sge_ofld_txq *);
290 static void add_ofld_txq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *,
291 struct sge_ofld_txq *);
292 #endif
293 static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int);
294 static inline void ring_fl_db(struct adapter *, struct sge_fl *);
295 static int refill_fl(struct adapter *, struct sge_fl *, int);
296 static void refill_sfl(void *);
297 static int find_refill_source(struct adapter *, int, bool);
298 static void add_fl_to_sfl(struct adapter *, struct sge_fl *);
299
300 static inline void get_pkt_gl(struct mbuf *, struct sglist *);
301 static inline u_int txpkt_len16(u_int, const u_int);
302 static inline u_int txpkt_vm_len16(u_int, const u_int);
303 static inline void calculate_mbuf_len16(struct mbuf *, bool);
304 static inline u_int txpkts0_len16(u_int);
305 static inline u_int txpkts1_len16(void);
306 static u_int write_raw_wr(struct sge_txq *, void *, struct mbuf *, u_int);
307 static u_int write_txpkt_wr(struct adapter *, struct sge_txq *, struct mbuf *,
308 u_int);
309 static u_int write_txpkt_vm_wr(struct adapter *, struct sge_txq *,
310 struct mbuf *);
311 static int add_to_txpkts_vf(struct adapter *, struct sge_txq *, struct mbuf *,
312 int, bool *);
313 static int add_to_txpkts_pf(struct adapter *, struct sge_txq *, struct mbuf *,
314 int, bool *);
315 static u_int write_txpkts_wr(struct adapter *, struct sge_txq *);
316 static u_int write_txpkts_vm_wr(struct adapter *, struct sge_txq *);
317 static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int);
318 static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int);
319 static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int);
320 static inline uint16_t read_hw_cidx(struct sge_eq *);
321 static inline u_int reclaimable_tx_desc(struct sge_eq *);
322 static inline u_int total_available_tx_desc(struct sge_eq *);
323 static u_int reclaim_tx_descs(struct sge_txq *, u_int);
324 static void tx_reclaim(void *, int);
325 static __be64 get_flit(struct sglist_seg *, int, int);
326 static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *,
327 struct mbuf *);
328 static int handle_fw_msg(struct sge_iq *, const struct rss_header *,
329 struct mbuf *);
330 static int t4_handle_wrerr_rpl(struct adapter *, const __be64 *);
331 static void wrq_tx_drain(void *, int);
332 static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *);
333
334 static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS);
335 #ifdef RATELIMIT
336 #if defined(INET) || defined(INET6)
337 static inline u_int txpkt_eo_len16(u_int, u_int, u_int);
338 #endif
339 static int ethofld_fw4_ack(struct sge_iq *, const struct rss_header *,
340 struct mbuf *);
341 #endif
342
343 static counter_u64_t extfree_refs;
344 static counter_u64_t extfree_rels;
345
346 an_handler_t t4_an_handler;
347 fw_msg_handler_t t4_fw_msg_handler[NUM_FW6_TYPES];
348 cpl_handler_t t4_cpl_handler[NUM_CPL_CMDS];
349 cpl_handler_t set_tcb_rpl_handlers[NUM_CPL_COOKIES];
350 cpl_handler_t l2t_write_rpl_handlers[NUM_CPL_COOKIES];
351 cpl_handler_t act_open_rpl_handlers[NUM_CPL_COOKIES];
352 cpl_handler_t abort_rpl_rss_handlers[NUM_CPL_COOKIES];
353 cpl_handler_t fw4_ack_handlers[NUM_CPL_COOKIES];
354
355 void
t4_register_an_handler(an_handler_t h)356 t4_register_an_handler(an_handler_t h)
357 {
358 uintptr_t *loc;
359
360 MPASS(h == NULL || t4_an_handler == NULL);
361
362 loc = (uintptr_t *)&t4_an_handler;
363 atomic_store_rel_ptr(loc, (uintptr_t)h);
364 }
365
366 void
t4_register_fw_msg_handler(int type,fw_msg_handler_t h)367 t4_register_fw_msg_handler(int type, fw_msg_handler_t h)
368 {
369 uintptr_t *loc;
370
371 MPASS(type < nitems(t4_fw_msg_handler));
372 MPASS(h == NULL || t4_fw_msg_handler[type] == NULL);
373 /*
374 * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL
375 * handler dispatch table. Reject any attempt to install a handler for
376 * this subtype.
377 */
378 MPASS(type != FW_TYPE_RSSCPL);
379 MPASS(type != FW6_TYPE_RSSCPL);
380
381 loc = (uintptr_t *)&t4_fw_msg_handler[type];
382 atomic_store_rel_ptr(loc, (uintptr_t)h);
383 }
384
385 void
t4_register_cpl_handler(int opcode,cpl_handler_t h)386 t4_register_cpl_handler(int opcode, cpl_handler_t h)
387 {
388 uintptr_t *loc;
389
390 MPASS(opcode < nitems(t4_cpl_handler));
391 MPASS(h == NULL || t4_cpl_handler[opcode] == NULL);
392
393 loc = (uintptr_t *)&t4_cpl_handler[opcode];
394 atomic_store_rel_ptr(loc, (uintptr_t)h);
395 }
396
397 static int
set_tcb_rpl_handler(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)398 set_tcb_rpl_handler(struct sge_iq *iq, const struct rss_header *rss,
399 struct mbuf *m)
400 {
401 const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1);
402 u_int tid;
403 int cookie;
404
405 MPASS(m == NULL);
406
407 tid = GET_TID(cpl);
408 if (is_hpftid(iq->adapter, tid) || is_ftid(iq->adapter, tid)) {
409 /*
410 * The return code for filter-write is put in the CPL cookie so
411 * we have to rely on the hardware tid (is_ftid) to determine
412 * that this is a response to a filter.
413 */
414 cookie = CPL_COOKIE_FILTER;
415 } else {
416 cookie = G_COOKIE(cpl->cookie);
417 }
418 MPASS(cookie > CPL_COOKIE_RESERVED);
419 MPASS(cookie < nitems(set_tcb_rpl_handlers));
420
421 return (set_tcb_rpl_handlers[cookie](iq, rss, m));
422 }
423
424 static int
l2t_write_rpl_handler(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)425 l2t_write_rpl_handler(struct sge_iq *iq, const struct rss_header *rss,
426 struct mbuf *m)
427 {
428 const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
429 unsigned int cookie;
430
431 MPASS(m == NULL);
432
433 cookie = GET_TID(rpl) & F_SYNC_WR ? CPL_COOKIE_TOM : CPL_COOKIE_FILTER;
434 return (l2t_write_rpl_handlers[cookie](iq, rss, m));
435 }
436
437 static int
act_open_rpl_handler(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)438 act_open_rpl_handler(struct sge_iq *iq, const struct rss_header *rss,
439 struct mbuf *m)
440 {
441 const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1);
442 u_int cookie = G_TID_COOKIE(G_AOPEN_ATID(be32toh(cpl->atid_status)));
443
444 MPASS(m == NULL);
445 MPASS(cookie != CPL_COOKIE_RESERVED);
446
447 return (act_open_rpl_handlers[cookie](iq, rss, m));
448 }
449
450 static int
abort_rpl_rss_handler(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)451 abort_rpl_rss_handler(struct sge_iq *iq, const struct rss_header *rss,
452 struct mbuf *m)
453 {
454 struct adapter *sc = iq->adapter;
455 u_int cookie;
456
457 MPASS(m == NULL);
458 if (is_hashfilter(sc))
459 cookie = CPL_COOKIE_HASHFILTER;
460 else
461 cookie = CPL_COOKIE_TOM;
462
463 return (abort_rpl_rss_handlers[cookie](iq, rss, m));
464 }
465
466 static int
fw4_ack_handler(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)467 fw4_ack_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
468 {
469 struct adapter *sc = iq->adapter;
470 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1);
471 unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl)));
472 u_int cookie;
473
474 MPASS(m == NULL);
475 if (is_etid(sc, tid))
476 cookie = CPL_COOKIE_ETHOFLD;
477 else
478 cookie = CPL_COOKIE_TOM;
479
480 return (fw4_ack_handlers[cookie](iq, rss, m));
481 }
482
483 static void
t4_init_shared_cpl_handlers(void)484 t4_init_shared_cpl_handlers(void)
485 {
486
487 t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl_handler);
488 t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl_handler);
489 t4_register_cpl_handler(CPL_ACT_OPEN_RPL, act_open_rpl_handler);
490 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, abort_rpl_rss_handler);
491 t4_register_cpl_handler(CPL_FW4_ACK, fw4_ack_handler);
492 }
493
494 void
t4_register_shared_cpl_handler(int opcode,cpl_handler_t h,int cookie)495 t4_register_shared_cpl_handler(int opcode, cpl_handler_t h, int cookie)
496 {
497 uintptr_t *loc;
498
499 MPASS(opcode < nitems(t4_cpl_handler));
500 MPASS(cookie > CPL_COOKIE_RESERVED);
501 MPASS(cookie < NUM_CPL_COOKIES);
502 MPASS(t4_cpl_handler[opcode] != NULL);
503
504 switch (opcode) {
505 case CPL_SET_TCB_RPL:
506 loc = (uintptr_t *)&set_tcb_rpl_handlers[cookie];
507 break;
508 case CPL_L2T_WRITE_RPL:
509 loc = (uintptr_t *)&l2t_write_rpl_handlers[cookie];
510 break;
511 case CPL_ACT_OPEN_RPL:
512 loc = (uintptr_t *)&act_open_rpl_handlers[cookie];
513 break;
514 case CPL_ABORT_RPL_RSS:
515 loc = (uintptr_t *)&abort_rpl_rss_handlers[cookie];
516 break;
517 case CPL_FW4_ACK:
518 loc = (uintptr_t *)&fw4_ack_handlers[cookie];
519 break;
520 default:
521 MPASS(0);
522 return;
523 }
524 MPASS(h == NULL || *loc == (uintptr_t)NULL);
525 atomic_store_rel_ptr(loc, (uintptr_t)h);
526 }
527
528 /*
529 * Called on MOD_LOAD. Validates and calculates the SGE tunables.
530 */
531 void
t4_sge_modload(void)532 t4_sge_modload(void)
533 {
534
535 if (fl_pktshift < 0 || fl_pktshift > 7) {
536 printf("Invalid hw.cxgbe.fl_pktshift value (%d),"
537 " using 0 instead.\n", fl_pktshift);
538 fl_pktshift = 0;
539 }
540
541 if (spg_len != 64 && spg_len != 128) {
542 int len;
543
544 #if defined(__i386__) || defined(__amd64__)
545 len = cpu_clflush_line_size > 64 ? 128 : 64;
546 #else
547 len = 64;
548 #endif
549 if (spg_len != -1) {
550 printf("Invalid hw.cxgbe.spg_len value (%d),"
551 " using %d instead.\n", spg_len, len);
552 }
553 spg_len = len;
554 }
555
556 if (cong_drop < -1 || cong_drop > 2) {
557 printf("Invalid hw.cxgbe.cong_drop value (%d),"
558 " using 0 instead.\n", cong_drop);
559 cong_drop = 0;
560 }
561 #ifdef TCP_OFFLOAD
562 if (ofld_cong_drop < -1 || ofld_cong_drop > 2) {
563 printf("Invalid hw.cxgbe.ofld_cong_drop value (%d),"
564 " using 0 instead.\n", ofld_cong_drop);
565 ofld_cong_drop = 0;
566 }
567 #endif
568
569 if (tscale != 1 && (tscale < 3 || tscale > 17)) {
570 printf("Invalid hw.cxgbe.tscale value (%d),"
571 " using 1 instead.\n", tscale);
572 tscale = 1;
573 }
574
575 if (largest_rx_cluster != MCLBYTES &&
576 #if MJUMPAGESIZE != MCLBYTES
577 largest_rx_cluster != MJUMPAGESIZE &&
578 #endif
579 largest_rx_cluster != MJUM9BYTES &&
580 largest_rx_cluster != MJUM16BYTES) {
581 printf("Invalid hw.cxgbe.largest_rx_cluster value (%d),"
582 " using %d instead.\n", largest_rx_cluster, MJUM16BYTES);
583 largest_rx_cluster = MJUM16BYTES;
584 }
585
586 if (safest_rx_cluster != MCLBYTES &&
587 #if MJUMPAGESIZE != MCLBYTES
588 safest_rx_cluster != MJUMPAGESIZE &&
589 #endif
590 safest_rx_cluster != MJUM9BYTES &&
591 safest_rx_cluster != MJUM16BYTES) {
592 printf("Invalid hw.cxgbe.safest_rx_cluster value (%d),"
593 " using %d instead.\n", safest_rx_cluster, MJUMPAGESIZE);
594 safest_rx_cluster = MJUMPAGESIZE;
595 }
596
597 extfree_refs = counter_u64_alloc(M_WAITOK);
598 extfree_rels = counter_u64_alloc(M_WAITOK);
599 pullups = counter_u64_alloc(M_WAITOK);
600 defrags = counter_u64_alloc(M_WAITOK);
601 counter_u64_zero(extfree_refs);
602 counter_u64_zero(extfree_rels);
603 counter_u64_zero(pullups);
604 counter_u64_zero(defrags);
605
606 t4_init_shared_cpl_handlers();
607 t4_register_cpl_handler(CPL_FW4_MSG, handle_fw_msg);
608 t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg);
609 t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
610 #ifdef RATELIMIT
611 t4_register_shared_cpl_handler(CPL_FW4_ACK, ethofld_fw4_ack,
612 CPL_COOKIE_ETHOFLD);
613 #endif
614 t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl);
615 t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl);
616 }
617
618 void
t4_sge_modunload(void)619 t4_sge_modunload(void)
620 {
621
622 counter_u64_free(extfree_refs);
623 counter_u64_free(extfree_rels);
624 counter_u64_free(pullups);
625 counter_u64_free(defrags);
626 }
627
628 uint64_t
t4_sge_extfree_refs(void)629 t4_sge_extfree_refs(void)
630 {
631 uint64_t refs, rels;
632
633 rels = counter_u64_fetch(extfree_rels);
634 refs = counter_u64_fetch(extfree_refs);
635
636 return (refs - rels);
637 }
638
639 /* max 4096 */
640 #define MAX_PACK_BOUNDARY 512
641
642 static inline void
setup_pad_and_pack_boundaries(struct adapter * sc)643 setup_pad_and_pack_boundaries(struct adapter *sc)
644 {
645 uint32_t v, m;
646 int pad, pack, pad_shift;
647
648 pad_shift = chip_id(sc) > CHELSIO_T5 ? X_T6_INGPADBOUNDARY_SHIFT :
649 X_INGPADBOUNDARY_SHIFT;
650 pad = fl_pad;
651 if (fl_pad < (1 << pad_shift) ||
652 fl_pad > (1 << (pad_shift + M_INGPADBOUNDARY)) ||
653 !powerof2(fl_pad)) {
654 /*
655 * If there is any chance that we might use buffer packing and
656 * the chip is a T4, then pick 64 as the pad/pack boundary. Set
657 * it to the minimum allowed in all other cases.
658 */
659 pad = is_t4(sc) && buffer_packing ? 64 : 1 << pad_shift;
660
661 /*
662 * For fl_pad = 0 we'll still write a reasonable value to the
663 * register but all the freelists will opt out of padding.
664 * We'll complain here only if the user tried to set it to a
665 * value greater than 0 that was invalid.
666 */
667 if (fl_pad > 0) {
668 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value"
669 " (%d), using %d instead.\n", fl_pad, pad);
670 }
671 }
672 m = V_INGPADBOUNDARY(M_INGPADBOUNDARY);
673 v = V_INGPADBOUNDARY(ilog2(pad) - pad_shift);
674 t4_set_reg_field(sc, A_SGE_CONTROL, m, v);
675
676 if (is_t4(sc)) {
677 if (fl_pack != -1 && fl_pack != pad) {
678 /* Complain but carry on. */
679 device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored,"
680 " using %d instead.\n", fl_pack, pad);
681 }
682 return;
683 }
684
685 pack = fl_pack;
686 if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 ||
687 !powerof2(fl_pack)) {
688 if (sc->params.pci.mps > MAX_PACK_BOUNDARY)
689 pack = MAX_PACK_BOUNDARY;
690 else
691 pack = max(sc->params.pci.mps, CACHE_LINE_SIZE);
692 MPASS(powerof2(pack));
693 if (pack < 16)
694 pack = 16;
695 if (pack == 32)
696 pack = 64;
697 if (pack > 4096)
698 pack = 4096;
699 if (fl_pack != -1) {
700 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value"
701 " (%d), using %d instead.\n", fl_pack, pack);
702 }
703 }
704 m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY);
705 if (pack == 16)
706 v = V_INGPACKBOUNDARY(0);
707 else
708 v = V_INGPACKBOUNDARY(ilog2(pack) - 5);
709
710 MPASS(!is_t4(sc)); /* T4 doesn't have SGE_CONTROL2 */
711 t4_set_reg_field(sc, A_SGE_CONTROL2, m, v);
712 }
713
714 /*
715 * adap->params.vpd.cclk must be set up before this is called.
716 */
717 void
t4_tweak_chip_settings(struct adapter * sc)718 t4_tweak_chip_settings(struct adapter *sc)
719 {
720 int i, reg;
721 uint32_t v, m;
722 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200};
723 int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk;
724 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */
725 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
726 static int sw_buf_sizes[] = {
727 MCLBYTES,
728 #if MJUMPAGESIZE != MCLBYTES
729 MJUMPAGESIZE,
730 #endif
731 MJUM9BYTES,
732 MJUM16BYTES
733 };
734
735 KASSERT(sc->flags & MASTER_PF,
736 ("%s: trying to change chip settings when not master.", __func__));
737
738 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE;
739 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
740 V_EGRSTATUSPAGESIZE(spg_len == 128);
741 t4_set_reg_field(sc, A_SGE_CONTROL, m, v);
742
743 setup_pad_and_pack_boundaries(sc);
744
745 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
746 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
747 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
748 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) |
749 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) |
750 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) |
751 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) |
752 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
753 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v);
754
755 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0, 4096);
756 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE1, 65536);
757 reg = A_SGE_FL_BUFFER_SIZE2;
758 for (i = 0; i < nitems(sw_buf_sizes); i++) {
759 MPASS(reg <= A_SGE_FL_BUFFER_SIZE15);
760 t4_write_reg(sc, reg, sw_buf_sizes[i]);
761 reg += 4;
762 MPASS(reg <= A_SGE_FL_BUFFER_SIZE15);
763 t4_write_reg(sc, reg, sw_buf_sizes[i] - CL_METADATA_SIZE);
764 reg += 4;
765 }
766
767 v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) |
768 V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]);
769 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v);
770
771 KASSERT(intr_timer[0] <= timer_max,
772 ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0],
773 timer_max));
774 for (i = 1; i < nitems(intr_timer); i++) {
775 KASSERT(intr_timer[i] >= intr_timer[i - 1],
776 ("%s: timers not listed in increasing order (%d)",
777 __func__, i));
778
779 while (intr_timer[i] > timer_max) {
780 if (i == nitems(intr_timer) - 1) {
781 intr_timer[i] = timer_max;
782 break;
783 }
784 intr_timer[i] += intr_timer[i - 1];
785 intr_timer[i] /= 2;
786 }
787 }
788
789 v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) |
790 V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1]));
791 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v);
792 v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) |
793 V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3]));
794 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v);
795 v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) |
796 V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5]));
797 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v);
798
799 if (chip_id(sc) >= CHELSIO_T6) {
800 m = V_TSCALE(M_TSCALE);
801 if (tscale == 1)
802 v = 0;
803 else
804 v = V_TSCALE(tscale - 2);
805 t4_set_reg_field(sc, A_SGE_ITP_CONTROL, m, v);
806
807 if (sc->debug_flags & DF_DISABLE_TCB_CACHE) {
808 m = V_RDTHRESHOLD(M_RDTHRESHOLD) | F_WRTHRTHRESHEN |
809 V_WRTHRTHRESH(M_WRTHRTHRESH);
810 t4_tp_pio_read(sc, &v, 1, A_TP_CMM_CONFIG, 1);
811 v &= ~m;
812 v |= V_RDTHRESHOLD(1) | F_WRTHRTHRESHEN |
813 V_WRTHRTHRESH(16);
814 t4_tp_pio_write(sc, &v, 1, A_TP_CMM_CONFIG, 1);
815 }
816 }
817
818 /* 4K, 16K, 64K, 256K DDP "page sizes" for TDDP */
819 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6);
820 t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v);
821
822 /*
823 * 4K, 8K, 16K, 64K DDP "page sizes" for iSCSI DDP. These have been
824 * chosen with MAXPHYS = 128K in mind. The largest DDP buffer that we
825 * may have to deal with is MAXPHYS + 1 page.
826 */
827 v = V_HPZ0(0) | V_HPZ1(1) | V_HPZ2(2) | V_HPZ3(4);
828 t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, v);
829
830 /* We use multiple DDP page sizes both in plain-TOE and ISCSI modes. */
831 m = v = F_TDDPTAGTCB | F_ISCSITAGTCB;
832 t4_set_reg_field(sc, A_ULP_RX_CTL, m, v);
833
834 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET |
835 F_RESETDDPOFFSET;
836 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET;
837 t4_set_reg_field(sc, A_TP_PARA_REG5, m, v);
838 }
839
840 /*
841 * SGE wants the buffer to be at least 64B and then a multiple of 16. Its
842 * address mut be 16B aligned. If padding is in use the buffer's start and end
843 * need to be aligned to the pad boundary as well. We'll just make sure that
844 * the size is a multiple of the pad boundary here, it is up to the buffer
845 * allocation code to make sure the start of the buffer is aligned.
846 */
847 static inline int
hwsz_ok(struct adapter * sc,int hwsz)848 hwsz_ok(struct adapter *sc, int hwsz)
849 {
850 int mask = fl_pad ? sc->params.sge.pad_boundary - 1 : 16 - 1;
851
852 return (hwsz >= 64 && (hwsz & mask) == 0);
853 }
854
855 /*
856 * Initialize the rx buffer sizes and figure out which zones the buffers will
857 * be allocated from.
858 */
859 void
t4_init_rx_buf_info(struct adapter * sc)860 t4_init_rx_buf_info(struct adapter *sc)
861 {
862 struct sge *s = &sc->sge;
863 struct sge_params *sp = &sc->params.sge;
864 int i, j, n;
865 static int sw_buf_sizes[] = { /* Sorted by size */
866 MCLBYTES,
867 #if MJUMPAGESIZE != MCLBYTES
868 MJUMPAGESIZE,
869 #endif
870 MJUM9BYTES,
871 MJUM16BYTES
872 };
873 struct rx_buf_info *rxb;
874
875 s->safe_zidx = -1;
876 rxb = &s->rx_buf_info[0];
877 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) {
878 rxb->size1 = sw_buf_sizes[i];
879 rxb->zone = m_getzone(rxb->size1);
880 rxb->type = m_gettype(rxb->size1);
881 rxb->size2 = 0;
882 rxb->hwidx1 = -1;
883 rxb->hwidx2 = -1;
884 for (j = 0; j < SGE_FLBUF_SIZES; j++) {
885 int hwsize = sp->sge_fl_buffer_size[j];
886
887 if (!hwsz_ok(sc, hwsize))
888 continue;
889
890 /* hwidx for size1 */
891 if (rxb->hwidx1 == -1 && rxb->size1 == hwsize)
892 rxb->hwidx1 = j;
893
894 /* hwidx for size2 (buffer packing) */
895 if (rxb->size1 - CL_METADATA_SIZE < hwsize)
896 continue;
897 n = rxb->size1 - hwsize - CL_METADATA_SIZE;
898 if (n == 0) {
899 rxb->hwidx2 = j;
900 rxb->size2 = hwsize;
901 break; /* stop looking */
902 }
903 if (rxb->hwidx2 != -1) {
904 if (n < sp->sge_fl_buffer_size[rxb->hwidx2] -
905 hwsize - CL_METADATA_SIZE) {
906 rxb->hwidx2 = j;
907 rxb->size2 = hwsize;
908 }
909 } else if (n <= 2 * CL_METADATA_SIZE) {
910 rxb->hwidx2 = j;
911 rxb->size2 = hwsize;
912 }
913 }
914 if (rxb->hwidx2 != -1)
915 sc->flags |= BUF_PACKING_OK;
916 if (s->safe_zidx == -1 && rxb->size1 == safest_rx_cluster)
917 s->safe_zidx = i;
918 }
919 }
920
921 /*
922 * Verify some basic SGE settings for the PF and VF driver, and other
923 * miscellaneous settings for the PF driver.
924 */
925 int
t4_verify_chip_settings(struct adapter * sc)926 t4_verify_chip_settings(struct adapter *sc)
927 {
928 struct sge_params *sp = &sc->params.sge;
929 uint32_t m, v, r;
930 int rc = 0;
931 const uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
932
933 m = F_RXPKTCPLMODE;
934 v = F_RXPKTCPLMODE;
935 r = sp->sge_control;
936 if ((r & m) != v) {
937 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r);
938 rc = EINVAL;
939 }
940
941 /*
942 * If this changes then every single use of PAGE_SHIFT in the driver
943 * needs to be carefully reviewed for PAGE_SHIFT vs sp->page_shift.
944 */
945 if (sp->page_shift != PAGE_SHIFT) {
946 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r);
947 rc = EINVAL;
948 }
949
950 if (sc->flags & IS_VF)
951 return (0);
952
953 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6);
954 r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ);
955 if (r != v) {
956 device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r);
957 if (sc->vres.ddp.size != 0)
958 rc = EINVAL;
959 }
960
961 m = v = F_TDDPTAGTCB;
962 r = t4_read_reg(sc, A_ULP_RX_CTL);
963 if ((r & m) != v) {
964 device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r);
965 if (sc->vres.ddp.size != 0)
966 rc = EINVAL;
967 }
968
969 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET |
970 F_RESETDDPOFFSET;
971 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET;
972 r = t4_read_reg(sc, A_TP_PARA_REG5);
973 if ((r & m) != v) {
974 device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r);
975 if (sc->vres.ddp.size != 0)
976 rc = EINVAL;
977 }
978
979 return (rc);
980 }
981
982 int
t4_create_dma_tag(struct adapter * sc)983 t4_create_dma_tag(struct adapter *sc)
984 {
985 int rc;
986
987 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0,
988 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
989 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL,
990 NULL, &sc->dmat);
991 if (rc != 0) {
992 device_printf(sc->dev,
993 "failed to create main DMA tag: %d\n", rc);
994 }
995
996 return (rc);
997 }
998
999 void
t4_sge_sysctls(struct adapter * sc,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * children)1000 t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx,
1001 struct sysctl_oid_list *children)
1002 {
1003 struct sge_params *sp = &sc->params.sge;
1004
1005 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes",
1006 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1007 sysctl_bufsizes, "A", "freelist buffer sizes");
1008
1009 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD,
1010 NULL, sp->fl_pktshift, "payload DMA offset in rx buffer (bytes)");
1011
1012 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD,
1013 NULL, sp->pad_boundary, "payload pad boundary (bytes)");
1014
1015 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD,
1016 NULL, sp->spg_len, "status page size (bytes)");
1017
1018 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD,
1019 NULL, cong_drop, "congestion drop setting");
1020 #ifdef TCP_OFFLOAD
1021 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ofld_cong_drop", CTLFLAG_RD,
1022 NULL, ofld_cong_drop, "congestion drop setting");
1023 #endif
1024
1025 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD,
1026 NULL, sp->pack_boundary, "payload pack boundary (bytes)");
1027 }
1028
1029 int
t4_destroy_dma_tag(struct adapter * sc)1030 t4_destroy_dma_tag(struct adapter *sc)
1031 {
1032 if (sc->dmat)
1033 bus_dma_tag_destroy(sc->dmat);
1034
1035 return (0);
1036 }
1037
1038 /*
1039 * Allocate and initialize the firmware event queue, control queues, and special
1040 * purpose rx queues owned by the adapter.
1041 *
1042 * Returns errno on failure. Resources allocated up to that point may still be
1043 * allocated. Caller is responsible for cleanup in case this function fails.
1044 */
1045 int
t4_setup_adapter_queues(struct adapter * sc)1046 t4_setup_adapter_queues(struct adapter *sc)
1047 {
1048 int rc, i;
1049
1050 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1051
1052 /*
1053 * Firmware event queue
1054 */
1055 rc = alloc_fwq(sc);
1056 if (rc != 0)
1057 return (rc);
1058
1059 /*
1060 * That's all for the VF driver.
1061 */
1062 if (sc->flags & IS_VF)
1063 return (rc);
1064
1065 /*
1066 * XXX: General purpose rx queues, one per port.
1067 */
1068
1069 /*
1070 * Control queues, one per port.
1071 */
1072 for_each_port(sc, i) {
1073 rc = alloc_ctrlq(sc, i);
1074 if (rc != 0)
1075 return (rc);
1076 }
1077
1078 return (rc);
1079 }
1080
1081 /*
1082 * Idempotent
1083 */
1084 int
t4_teardown_adapter_queues(struct adapter * sc)1085 t4_teardown_adapter_queues(struct adapter *sc)
1086 {
1087 int i;
1088
1089 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1090
1091 if (sc->sge.ctrlq != NULL) {
1092 MPASS(!(sc->flags & IS_VF)); /* VFs don't allocate ctrlq. */
1093 for_each_port(sc, i)
1094 free_ctrlq(sc, i);
1095 }
1096 free_fwq(sc);
1097
1098 return (0);
1099 }
1100
1101 /* Maximum payload that could arrive with a single iq descriptor. */
1102 static inline int
max_rx_payload(struct adapter * sc,struct ifnet * ifp,const bool ofld)1103 max_rx_payload(struct adapter *sc, struct ifnet *ifp, const bool ofld)
1104 {
1105 int maxp;
1106
1107 /* large enough even when hw VLAN extraction is disabled */
1108 maxp = sc->params.sge.fl_pktshift + ETHER_HDR_LEN +
1109 ETHER_VLAN_ENCAP_LEN + ifp->if_mtu;
1110 if (ofld && sc->tt.tls && sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS &&
1111 maxp < sc->params.tp.max_rx_pdu)
1112 maxp = sc->params.tp.max_rx_pdu;
1113 return (maxp);
1114 }
1115
1116 int
t4_setup_vi_queues(struct vi_info * vi)1117 t4_setup_vi_queues(struct vi_info *vi)
1118 {
1119 int rc = 0, i, intr_idx;
1120 struct sge_rxq *rxq;
1121 struct sge_txq *txq;
1122 #ifdef TCP_OFFLOAD
1123 struct sge_ofld_rxq *ofld_rxq;
1124 #endif
1125 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1126 struct sge_ofld_txq *ofld_txq;
1127 #endif
1128 #ifdef DEV_NETMAP
1129 int saved_idx, iqidx;
1130 struct sge_nm_rxq *nm_rxq;
1131 struct sge_nm_txq *nm_txq;
1132 #endif
1133 struct adapter *sc = vi->adapter;
1134 struct ifnet *ifp = vi->ifp;
1135 int maxp;
1136
1137 /* Interrupt vector to start from (when using multiple vectors) */
1138 intr_idx = vi->first_intr;
1139
1140 #ifdef DEV_NETMAP
1141 saved_idx = intr_idx;
1142 if (ifp->if_capabilities & IFCAP_NETMAP) {
1143
1144 /* netmap is supported with direct interrupts only. */
1145 MPASS(!forwarding_intr_to_fwq(sc));
1146 MPASS(vi->first_intr >= 0);
1147
1148 /*
1149 * We don't have buffers to back the netmap rx queues
1150 * right now so we create the queues in a way that
1151 * doesn't set off any congestion signal in the chip.
1152 */
1153 for_each_nm_rxq(vi, i, nm_rxq) {
1154 rc = alloc_nm_rxq(vi, nm_rxq, intr_idx, i);
1155 if (rc != 0)
1156 goto done;
1157 intr_idx++;
1158 }
1159
1160 for_each_nm_txq(vi, i, nm_txq) {
1161 iqidx = vi->first_nm_rxq + (i % vi->nnmrxq);
1162 rc = alloc_nm_txq(vi, nm_txq, iqidx, i);
1163 if (rc != 0)
1164 goto done;
1165 }
1166 }
1167
1168 /* Normal rx queues and netmap rx queues share the same interrupts. */
1169 intr_idx = saved_idx;
1170 #endif
1171
1172 /*
1173 * Allocate rx queues first because a default iqid is required when
1174 * creating a tx queue.
1175 */
1176 maxp = max_rx_payload(sc, ifp, false);
1177 for_each_rxq(vi, i, rxq) {
1178 rc = alloc_rxq(vi, rxq, i, intr_idx, maxp);
1179 if (rc != 0)
1180 goto done;
1181 if (!forwarding_intr_to_fwq(sc))
1182 intr_idx++;
1183 }
1184 #ifdef DEV_NETMAP
1185 if (ifp->if_capabilities & IFCAP_NETMAP)
1186 intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq);
1187 #endif
1188 #ifdef TCP_OFFLOAD
1189 maxp = max_rx_payload(sc, ifp, true);
1190 for_each_ofld_rxq(vi, i, ofld_rxq) {
1191 rc = alloc_ofld_rxq(vi, ofld_rxq, i, intr_idx, maxp);
1192 if (rc != 0)
1193 goto done;
1194 if (!forwarding_intr_to_fwq(sc))
1195 intr_idx++;
1196 }
1197 #endif
1198
1199 /*
1200 * Now the tx queues.
1201 */
1202 for_each_txq(vi, i, txq) {
1203 rc = alloc_txq(vi, txq, i);
1204 if (rc != 0)
1205 goto done;
1206 }
1207 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1208 for_each_ofld_txq(vi, i, ofld_txq) {
1209 rc = alloc_ofld_txq(vi, ofld_txq, i);
1210 if (rc != 0)
1211 goto done;
1212 }
1213 #endif
1214 done:
1215 if (rc)
1216 t4_teardown_vi_queues(vi);
1217
1218 return (rc);
1219 }
1220
1221 /*
1222 * Idempotent
1223 */
1224 int
t4_teardown_vi_queues(struct vi_info * vi)1225 t4_teardown_vi_queues(struct vi_info *vi)
1226 {
1227 int i;
1228 struct sge_rxq *rxq;
1229 struct sge_txq *txq;
1230 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1231 struct sge_ofld_txq *ofld_txq;
1232 #endif
1233 #ifdef TCP_OFFLOAD
1234 struct sge_ofld_rxq *ofld_rxq;
1235 #endif
1236 #ifdef DEV_NETMAP
1237 struct sge_nm_rxq *nm_rxq;
1238 struct sge_nm_txq *nm_txq;
1239 #endif
1240
1241 #ifdef DEV_NETMAP
1242 if (vi->ifp->if_capabilities & IFCAP_NETMAP) {
1243 for_each_nm_txq(vi, i, nm_txq) {
1244 free_nm_txq(vi, nm_txq);
1245 }
1246
1247 for_each_nm_rxq(vi, i, nm_rxq) {
1248 free_nm_rxq(vi, nm_rxq);
1249 }
1250 }
1251 #endif
1252
1253 /*
1254 * Take down all the tx queues first, as they reference the rx queues
1255 * (for egress updates, etc.).
1256 */
1257
1258 for_each_txq(vi, i, txq) {
1259 free_txq(vi, txq);
1260 }
1261 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1262 for_each_ofld_txq(vi, i, ofld_txq) {
1263 free_ofld_txq(vi, ofld_txq);
1264 }
1265 #endif
1266
1267 /*
1268 * Then take down the rx queues.
1269 */
1270
1271 for_each_rxq(vi, i, rxq) {
1272 free_rxq(vi, rxq);
1273 }
1274 #ifdef TCP_OFFLOAD
1275 for_each_ofld_rxq(vi, i, ofld_rxq) {
1276 free_ofld_rxq(vi, ofld_rxq);
1277 }
1278 #endif
1279
1280 return (0);
1281 }
1282
1283 /*
1284 * Interrupt handler when the driver is using only 1 interrupt. This is a very
1285 * unusual scenario.
1286 *
1287 * a) Deals with errors, if any.
1288 * b) Services firmware event queue, which is taking interrupts for all other
1289 * queues.
1290 */
1291 void
t4_intr_all(void * arg)1292 t4_intr_all(void *arg)
1293 {
1294 struct adapter *sc = arg;
1295 struct sge_iq *fwq = &sc->sge.fwq;
1296
1297 MPASS(sc->intr_count == 1);
1298
1299 if (sc->intr_type == INTR_INTX)
1300 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0);
1301
1302 t4_intr_err(arg);
1303 t4_intr_evt(fwq);
1304 }
1305
1306 /*
1307 * Interrupt handler for errors (installed directly when multiple interrupts are
1308 * being used, or called by t4_intr_all).
1309 */
1310 void
t4_intr_err(void * arg)1311 t4_intr_err(void *arg)
1312 {
1313 struct adapter *sc = arg;
1314 uint32_t v;
1315 const bool verbose = (sc->debug_flags & DF_VERBOSE_SLOWINTR) != 0;
1316
1317 if (atomic_load_int(&sc->error_flags) & ADAP_FATAL_ERR)
1318 return;
1319
1320 v = t4_read_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE));
1321 if (v & F_PFSW) {
1322 sc->swintr++;
1323 t4_write_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE), v);
1324 }
1325
1326 if (t4_slow_intr_handler(sc, verbose))
1327 t4_fatal_err(sc, false);
1328 }
1329
1330 /*
1331 * Interrupt handler for iq-only queues. The firmware event queue is the only
1332 * such queue right now.
1333 */
1334 void
t4_intr_evt(void * arg)1335 t4_intr_evt(void *arg)
1336 {
1337 struct sge_iq *iq = arg;
1338
1339 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
1340 service_iq(iq, 0);
1341 (void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
1342 }
1343 }
1344
1345 /*
1346 * Interrupt handler for iq+fl queues.
1347 */
1348 void
t4_intr(void * arg)1349 t4_intr(void *arg)
1350 {
1351 struct sge_iq *iq = arg;
1352
1353 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
1354 service_iq_fl(iq, 0);
1355 (void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
1356 }
1357 }
1358
1359 #ifdef DEV_NETMAP
1360 /*
1361 * Interrupt handler for netmap rx queues.
1362 */
1363 void
t4_nm_intr(void * arg)1364 t4_nm_intr(void *arg)
1365 {
1366 struct sge_nm_rxq *nm_rxq = arg;
1367
1368 if (atomic_cmpset_int(&nm_rxq->nm_state, NM_ON, NM_BUSY)) {
1369 service_nm_rxq(nm_rxq);
1370 (void) atomic_cmpset_int(&nm_rxq->nm_state, NM_BUSY, NM_ON);
1371 }
1372 }
1373
1374 /*
1375 * Interrupt handler for vectors shared between NIC and netmap rx queues.
1376 */
1377 void
t4_vi_intr(void * arg)1378 t4_vi_intr(void *arg)
1379 {
1380 struct irq *irq = arg;
1381
1382 MPASS(irq->nm_rxq != NULL);
1383 t4_nm_intr(irq->nm_rxq);
1384
1385 MPASS(irq->rxq != NULL);
1386 t4_intr(irq->rxq);
1387 }
1388 #endif
1389
1390 /*
1391 * Deals with interrupts on an iq-only (no freelist) queue.
1392 */
1393 static int
service_iq(struct sge_iq * iq,int budget)1394 service_iq(struct sge_iq *iq, int budget)
1395 {
1396 struct sge_iq *q;
1397 struct adapter *sc = iq->adapter;
1398 struct iq_desc *d = &iq->desc[iq->cidx];
1399 int ndescs = 0, limit;
1400 int rsp_type;
1401 uint32_t lq;
1402 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql);
1403
1404 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq));
1405 KASSERT((iq->flags & IQ_HAS_FL) == 0,
1406 ("%s: called for iq %p with fl (iq->flags 0x%x)", __func__, iq,
1407 iq->flags));
1408 MPASS((iq->flags & IQ_ADJ_CREDIT) == 0);
1409 MPASS((iq->flags & IQ_LRO_ENABLED) == 0);
1410
1411 limit = budget ? budget : iq->qsize / 16;
1412
1413 /*
1414 * We always come back and check the descriptor ring for new indirect
1415 * interrupts and other responses after running a single handler.
1416 */
1417 for (;;) {
1418 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) {
1419
1420 rmb();
1421
1422 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen);
1423 lq = be32toh(d->rsp.pldbuflen_qid);
1424
1425 switch (rsp_type) {
1426 case X_RSPD_TYPE_FLBUF:
1427 panic("%s: data for an iq (%p) with no freelist",
1428 __func__, iq);
1429
1430 /* NOTREACHED */
1431
1432 case X_RSPD_TYPE_CPL:
1433 KASSERT(d->rss.opcode < NUM_CPL_CMDS,
1434 ("%s: bad opcode %02x.", __func__,
1435 d->rss.opcode));
1436 t4_cpl_handler[d->rss.opcode](iq, &d->rss, NULL);
1437 break;
1438
1439 case X_RSPD_TYPE_INTR:
1440 /*
1441 * There are 1K interrupt-capable queues (qids 0
1442 * through 1023). A response type indicating a
1443 * forwarded interrupt with a qid >= 1K is an
1444 * iWARP async notification.
1445 */
1446 if (__predict_true(lq >= 1024)) {
1447 t4_an_handler(iq, &d->rsp);
1448 break;
1449 }
1450
1451 q = sc->sge.iqmap[lq - sc->sge.iq_start -
1452 sc->sge.iq_base];
1453 if (atomic_cmpset_int(&q->state, IQS_IDLE,
1454 IQS_BUSY)) {
1455 if (service_iq_fl(q, q->qsize / 16) == 0) {
1456 (void) atomic_cmpset_int(&q->state,
1457 IQS_BUSY, IQS_IDLE);
1458 } else {
1459 STAILQ_INSERT_TAIL(&iql, q,
1460 link);
1461 }
1462 }
1463 break;
1464
1465 default:
1466 KASSERT(0,
1467 ("%s: illegal response type %d on iq %p",
1468 __func__, rsp_type, iq));
1469 log(LOG_ERR,
1470 "%s: illegal response type %d on iq %p",
1471 device_get_nameunit(sc->dev), rsp_type, iq);
1472 break;
1473 }
1474
1475 d++;
1476 if (__predict_false(++iq->cidx == iq->sidx)) {
1477 iq->cidx = 0;
1478 iq->gen ^= F_RSPD_GEN;
1479 d = &iq->desc[0];
1480 }
1481 if (__predict_false(++ndescs == limit)) {
1482 t4_write_reg(sc, sc->sge_gts_reg,
1483 V_CIDXINC(ndescs) |
1484 V_INGRESSQID(iq->cntxt_id) |
1485 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
1486 ndescs = 0;
1487
1488 if (budget) {
1489 return (EINPROGRESS);
1490 }
1491 }
1492 }
1493
1494 if (STAILQ_EMPTY(&iql))
1495 break;
1496
1497 /*
1498 * Process the head only, and send it to the back of the list if
1499 * it's still not done.
1500 */
1501 q = STAILQ_FIRST(&iql);
1502 STAILQ_REMOVE_HEAD(&iql, link);
1503 if (service_iq_fl(q, q->qsize / 8) == 0)
1504 (void) atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE);
1505 else
1506 STAILQ_INSERT_TAIL(&iql, q, link);
1507 }
1508
1509 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) |
1510 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
1511
1512 return (0);
1513 }
1514
1515 #if defined(INET) || defined(INET6)
1516 static inline int
sort_before_lro(struct lro_ctrl * lro)1517 sort_before_lro(struct lro_ctrl *lro)
1518 {
1519
1520 return (lro->lro_mbuf_max != 0);
1521 }
1522 #endif
1523
1524 #define CGBE_SHIFT_SCALE 10
1525
1526 static inline uint64_t
t4_tstmp_to_ns(struct adapter * sc,uint64_t lf)1527 t4_tstmp_to_ns(struct adapter *sc, uint64_t lf)
1528 {
1529 struct clock_sync *cur, dcur;
1530 uint64_t hw_clocks;
1531 uint64_t hw_clk_div;
1532 sbintime_t sbt_cur_to_prev, sbt;
1533 uint64_t hw_tstmp = lf & 0xfffffffffffffffULL; /* 60b, not 64b. */
1534 seqc_t gen;
1535
1536 for (;;) {
1537 cur = &sc->cal_info[sc->cal_current];
1538 gen = seqc_read(&cur->gen);
1539 if (gen == 0)
1540 return (0);
1541 dcur = *cur;
1542 if (seqc_consistent(&cur->gen, gen))
1543 break;
1544 }
1545
1546 /*
1547 * Our goal here is to have a result that is:
1548 *
1549 * ( (cur_time - prev_time) )
1550 * ((hw_tstmp - hw_prev) * ----------------------------- ) + prev_time
1551 * ( (hw_cur - hw_prev) )
1552 *
1553 * With the constraints that we cannot use float and we
1554 * don't want to overflow the uint64_t numbers we are using.
1555 */
1556 hw_clocks = hw_tstmp - dcur.hw_prev;
1557 sbt_cur_to_prev = (dcur.sbt_cur - dcur.sbt_prev);
1558 hw_clk_div = dcur.hw_cur - dcur.hw_prev;
1559 sbt = hw_clocks * sbt_cur_to_prev / hw_clk_div + dcur.sbt_prev;
1560 return (sbttons(sbt));
1561 }
1562
1563 static inline void
move_to_next_rxbuf(struct sge_fl * fl)1564 move_to_next_rxbuf(struct sge_fl *fl)
1565 {
1566
1567 fl->rx_offset = 0;
1568 if (__predict_false((++fl->cidx & 7) == 0)) {
1569 uint16_t cidx = fl->cidx >> 3;
1570
1571 if (__predict_false(cidx == fl->sidx))
1572 fl->cidx = cidx = 0;
1573 fl->hw_cidx = cidx;
1574 }
1575 }
1576
1577 /*
1578 * Deals with interrupts on an iq+fl queue.
1579 */
1580 static int
service_iq_fl(struct sge_iq * iq,int budget)1581 service_iq_fl(struct sge_iq *iq, int budget)
1582 {
1583 struct sge_rxq *rxq = iq_to_rxq(iq);
1584 struct sge_fl *fl;
1585 struct adapter *sc = iq->adapter;
1586 struct iq_desc *d = &iq->desc[iq->cidx];
1587 int ndescs, limit;
1588 int rsp_type, starved;
1589 uint32_t lq;
1590 uint16_t fl_hw_cidx;
1591 struct mbuf *m0;
1592 #if defined(INET) || defined(INET6)
1593 const struct timeval lro_timeout = {0, sc->lro_timeout};
1594 struct lro_ctrl *lro = &rxq->lro;
1595 #endif
1596
1597 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq));
1598 MPASS(iq->flags & IQ_HAS_FL);
1599
1600 ndescs = 0;
1601 #if defined(INET) || defined(INET6)
1602 if (iq->flags & IQ_ADJ_CREDIT) {
1603 MPASS(sort_before_lro(lro));
1604 iq->flags &= ~IQ_ADJ_CREDIT;
1605 if ((d->rsp.u.type_gen & F_RSPD_GEN) != iq->gen) {
1606 tcp_lro_flush_all(lro);
1607 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(1) |
1608 V_INGRESSQID((u32)iq->cntxt_id) |
1609 V_SEINTARM(iq->intr_params));
1610 return (0);
1611 }
1612 ndescs = 1;
1613 }
1614 #else
1615 MPASS((iq->flags & IQ_ADJ_CREDIT) == 0);
1616 #endif
1617
1618 limit = budget ? budget : iq->qsize / 16;
1619 fl = &rxq->fl;
1620 fl_hw_cidx = fl->hw_cidx; /* stable snapshot */
1621 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) {
1622
1623 rmb();
1624
1625 m0 = NULL;
1626 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen);
1627 lq = be32toh(d->rsp.pldbuflen_qid);
1628
1629 switch (rsp_type) {
1630 case X_RSPD_TYPE_FLBUF:
1631 if (lq & F_RSPD_NEWBUF) {
1632 if (fl->rx_offset > 0)
1633 move_to_next_rxbuf(fl);
1634 lq = G_RSPD_LEN(lq);
1635 }
1636 if (IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 4) {
1637 FL_LOCK(fl);
1638 refill_fl(sc, fl, 64);
1639 FL_UNLOCK(fl);
1640 fl_hw_cidx = fl->hw_cidx;
1641 }
1642
1643 if (d->rss.opcode == CPL_RX_PKT) {
1644 if (__predict_true(eth_rx(sc, rxq, d, lq) == 0))
1645 break;
1646 goto out;
1647 }
1648 m0 = get_fl_payload(sc, fl, lq);
1649 if (__predict_false(m0 == NULL))
1650 goto out;
1651
1652 /* fall through */
1653
1654 case X_RSPD_TYPE_CPL:
1655 KASSERT(d->rss.opcode < NUM_CPL_CMDS,
1656 ("%s: bad opcode %02x.", __func__, d->rss.opcode));
1657 t4_cpl_handler[d->rss.opcode](iq, &d->rss, m0);
1658 break;
1659
1660 case X_RSPD_TYPE_INTR:
1661
1662 /*
1663 * There are 1K interrupt-capable queues (qids 0
1664 * through 1023). A response type indicating a
1665 * forwarded interrupt with a qid >= 1K is an
1666 * iWARP async notification. That is the only
1667 * acceptable indirect interrupt on this queue.
1668 */
1669 if (__predict_false(lq < 1024)) {
1670 panic("%s: indirect interrupt on iq_fl %p "
1671 "with qid %u", __func__, iq, lq);
1672 }
1673
1674 t4_an_handler(iq, &d->rsp);
1675 break;
1676
1677 default:
1678 KASSERT(0, ("%s: illegal response type %d on iq %p",
1679 __func__, rsp_type, iq));
1680 log(LOG_ERR, "%s: illegal response type %d on iq %p",
1681 device_get_nameunit(sc->dev), rsp_type, iq);
1682 break;
1683 }
1684
1685 d++;
1686 if (__predict_false(++iq->cidx == iq->sidx)) {
1687 iq->cidx = 0;
1688 iq->gen ^= F_RSPD_GEN;
1689 d = &iq->desc[0];
1690 }
1691 if (__predict_false(++ndescs == limit)) {
1692 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) |
1693 V_INGRESSQID(iq->cntxt_id) |
1694 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
1695
1696 #if defined(INET) || defined(INET6)
1697 if (iq->flags & IQ_LRO_ENABLED &&
1698 !sort_before_lro(lro) &&
1699 sc->lro_timeout != 0) {
1700 tcp_lro_flush_inactive(lro, &lro_timeout);
1701 }
1702 #endif
1703 if (budget)
1704 return (EINPROGRESS);
1705 ndescs = 0;
1706 }
1707 }
1708 out:
1709 #if defined(INET) || defined(INET6)
1710 if (iq->flags & IQ_LRO_ENABLED) {
1711 if (ndescs > 0 && lro->lro_mbuf_count > 8) {
1712 MPASS(sort_before_lro(lro));
1713 /* hold back one credit and don't flush LRO state */
1714 iq->flags |= IQ_ADJ_CREDIT;
1715 ndescs--;
1716 } else {
1717 tcp_lro_flush_all(lro);
1718 }
1719 }
1720 #endif
1721
1722 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) |
1723 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
1724
1725 FL_LOCK(fl);
1726 starved = refill_fl(sc, fl, 64);
1727 FL_UNLOCK(fl);
1728 if (__predict_false(starved != 0))
1729 add_fl_to_sfl(sc, fl);
1730
1731 return (0);
1732 }
1733
1734 static inline struct cluster_metadata *
cl_metadata(struct fl_sdesc * sd)1735 cl_metadata(struct fl_sdesc *sd)
1736 {
1737
1738 return ((void *)(sd->cl + sd->moff));
1739 }
1740
1741 static void
rxb_free(struct mbuf * m)1742 rxb_free(struct mbuf *m)
1743 {
1744 struct cluster_metadata *clm = m->m_ext.ext_arg1;
1745
1746 uma_zfree(clm->zone, clm->cl);
1747 counter_u64_add(extfree_rels, 1);
1748 }
1749
1750 /*
1751 * The mbuf returned comes from zone_muf and carries the payload in one of these
1752 * ways
1753 * a) complete frame inside the mbuf
1754 * b) m_cljset (for clusters without metadata)
1755 * d) m_extaddref (cluster with metadata)
1756 */
1757 static struct mbuf *
get_scatter_segment(struct adapter * sc,struct sge_fl * fl,int fr_offset,int remaining)1758 get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset,
1759 int remaining)
1760 {
1761 struct mbuf *m;
1762 struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
1763 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx];
1764 struct cluster_metadata *clm;
1765 int len, blen;
1766 caddr_t payload;
1767
1768 if (fl->flags & FL_BUF_PACKING) {
1769 u_int l, pad;
1770
1771 blen = rxb->size2 - fl->rx_offset; /* max possible in this buf */
1772 len = min(remaining, blen);
1773 payload = sd->cl + fl->rx_offset;
1774
1775 l = fr_offset + len;
1776 pad = roundup2(l, fl->buf_boundary) - l;
1777 if (fl->rx_offset + len + pad < rxb->size2)
1778 blen = len + pad;
1779 MPASS(fl->rx_offset + blen <= rxb->size2);
1780 } else {
1781 MPASS(fl->rx_offset == 0); /* not packing */
1782 blen = rxb->size1;
1783 len = min(remaining, blen);
1784 payload = sd->cl;
1785 }
1786
1787 if (fr_offset == 0) {
1788 m = m_gethdr(M_NOWAIT, MT_DATA);
1789 if (__predict_false(m == NULL))
1790 return (NULL);
1791 m->m_pkthdr.len = remaining;
1792 } else {
1793 m = m_get(M_NOWAIT, MT_DATA);
1794 if (__predict_false(m == NULL))
1795 return (NULL);
1796 }
1797 m->m_len = len;
1798
1799 if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) {
1800 /* copy data to mbuf */
1801 bcopy(payload, mtod(m, caddr_t), len);
1802 if (fl->flags & FL_BUF_PACKING) {
1803 fl->rx_offset += blen;
1804 MPASS(fl->rx_offset <= rxb->size2);
1805 if (fl->rx_offset < rxb->size2)
1806 return (m); /* without advancing the cidx */
1807 }
1808 } else if (fl->flags & FL_BUF_PACKING) {
1809 clm = cl_metadata(sd);
1810 if (sd->nmbuf++ == 0) {
1811 clm->refcount = 1;
1812 clm->zone = rxb->zone;
1813 clm->cl = sd->cl;
1814 counter_u64_add(extfree_refs, 1);
1815 }
1816 m_extaddref(m, payload, blen, &clm->refcount, rxb_free, clm,
1817 NULL);
1818
1819 fl->rx_offset += blen;
1820 MPASS(fl->rx_offset <= rxb->size2);
1821 if (fl->rx_offset < rxb->size2)
1822 return (m); /* without advancing the cidx */
1823 } else {
1824 m_cljset(m, sd->cl, rxb->type);
1825 sd->cl = NULL; /* consumed, not a recycle candidate */
1826 }
1827
1828 move_to_next_rxbuf(fl);
1829
1830 return (m);
1831 }
1832
1833 static struct mbuf *
get_fl_payload(struct adapter * sc,struct sge_fl * fl,const u_int plen)1834 get_fl_payload(struct adapter *sc, struct sge_fl *fl, const u_int plen)
1835 {
1836 struct mbuf *m0, *m, **pnext;
1837 u_int remaining;
1838
1839 if (__predict_false(fl->flags & FL_BUF_RESUME)) {
1840 M_ASSERTPKTHDR(fl->m0);
1841 MPASS(fl->m0->m_pkthdr.len == plen);
1842 MPASS(fl->remaining < plen);
1843
1844 m0 = fl->m0;
1845 pnext = fl->pnext;
1846 remaining = fl->remaining;
1847 fl->flags &= ~FL_BUF_RESUME;
1848 goto get_segment;
1849 }
1850
1851 /*
1852 * Payload starts at rx_offset in the current hw buffer. Its length is
1853 * 'len' and it may span multiple hw buffers.
1854 */
1855
1856 m0 = get_scatter_segment(sc, fl, 0, plen);
1857 if (m0 == NULL)
1858 return (NULL);
1859 remaining = plen - m0->m_len;
1860 pnext = &m0->m_next;
1861 while (remaining > 0) {
1862 get_segment:
1863 MPASS(fl->rx_offset == 0);
1864 m = get_scatter_segment(sc, fl, plen - remaining, remaining);
1865 if (__predict_false(m == NULL)) {
1866 fl->m0 = m0;
1867 fl->pnext = pnext;
1868 fl->remaining = remaining;
1869 fl->flags |= FL_BUF_RESUME;
1870 return (NULL);
1871 }
1872 *pnext = m;
1873 pnext = &m->m_next;
1874 remaining -= m->m_len;
1875 }
1876 *pnext = NULL;
1877
1878 M_ASSERTPKTHDR(m0);
1879 return (m0);
1880 }
1881
1882 static int
skip_scatter_segment(struct adapter * sc,struct sge_fl * fl,int fr_offset,int remaining)1883 skip_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset,
1884 int remaining)
1885 {
1886 struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
1887 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx];
1888 int len, blen;
1889
1890 if (fl->flags & FL_BUF_PACKING) {
1891 u_int l, pad;
1892
1893 blen = rxb->size2 - fl->rx_offset; /* max possible in this buf */
1894 len = min(remaining, blen);
1895
1896 l = fr_offset + len;
1897 pad = roundup2(l, fl->buf_boundary) - l;
1898 if (fl->rx_offset + len + pad < rxb->size2)
1899 blen = len + pad;
1900 fl->rx_offset += blen;
1901 MPASS(fl->rx_offset <= rxb->size2);
1902 if (fl->rx_offset < rxb->size2)
1903 return (len); /* without advancing the cidx */
1904 } else {
1905 MPASS(fl->rx_offset == 0); /* not packing */
1906 blen = rxb->size1;
1907 len = min(remaining, blen);
1908 }
1909 move_to_next_rxbuf(fl);
1910 return (len);
1911 }
1912
1913 static inline void
skip_fl_payload(struct adapter * sc,struct sge_fl * fl,int plen)1914 skip_fl_payload(struct adapter *sc, struct sge_fl *fl, int plen)
1915 {
1916 int remaining, fr_offset, len;
1917
1918 fr_offset = 0;
1919 remaining = plen;
1920 while (remaining > 0) {
1921 len = skip_scatter_segment(sc, fl, fr_offset, remaining);
1922 fr_offset += len;
1923 remaining -= len;
1924 }
1925 }
1926
1927 static inline int
get_segment_len(struct adapter * sc,struct sge_fl * fl,int plen)1928 get_segment_len(struct adapter *sc, struct sge_fl *fl, int plen)
1929 {
1930 int len;
1931 struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
1932 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx];
1933
1934 if (fl->flags & FL_BUF_PACKING)
1935 len = rxb->size2 - fl->rx_offset;
1936 else
1937 len = rxb->size1;
1938
1939 return (min(plen, len));
1940 }
1941
1942 static int
eth_rx(struct adapter * sc,struct sge_rxq * rxq,const struct iq_desc * d,u_int plen)1943 eth_rx(struct adapter *sc, struct sge_rxq *rxq, const struct iq_desc *d,
1944 u_int plen)
1945 {
1946 struct mbuf *m0;
1947 struct ifnet *ifp = rxq->ifp;
1948 struct sge_fl *fl = &rxq->fl;
1949 struct vi_info *vi = ifp->if_softc;
1950 const struct cpl_rx_pkt *cpl;
1951 #if defined(INET) || defined(INET6)
1952 struct lro_ctrl *lro = &rxq->lro;
1953 #endif
1954 uint16_t err_vec, tnl_type, tnlhdr_len;
1955 static const int sw_hashtype[4][2] = {
1956 {M_HASHTYPE_NONE, M_HASHTYPE_NONE},
1957 {M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6},
1958 {M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6},
1959 {M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6},
1960 };
1961 static const int sw_csum_flags[2][2] = {
1962 {
1963 /* IP, inner IP */
1964 CSUM_ENCAP_VXLAN |
1965 CSUM_L3_CALC | CSUM_L3_VALID |
1966 CSUM_L4_CALC | CSUM_L4_VALID |
1967 CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID |
1968 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
1969
1970 /* IP, inner IP6 */
1971 CSUM_ENCAP_VXLAN |
1972 CSUM_L3_CALC | CSUM_L3_VALID |
1973 CSUM_L4_CALC | CSUM_L4_VALID |
1974 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
1975 },
1976 {
1977 /* IP6, inner IP */
1978 CSUM_ENCAP_VXLAN |
1979 CSUM_L4_CALC | CSUM_L4_VALID |
1980 CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID |
1981 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
1982
1983 /* IP6, inner IP6 */
1984 CSUM_ENCAP_VXLAN |
1985 CSUM_L4_CALC | CSUM_L4_VALID |
1986 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
1987 },
1988 };
1989
1990 MPASS(plen > sc->params.sge.fl_pktshift);
1991 if (vi->pfil != NULL && PFIL_HOOKED_IN(vi->pfil) &&
1992 __predict_true((fl->flags & FL_BUF_RESUME) == 0)) {
1993 struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
1994 caddr_t frame;
1995 int rc, slen;
1996
1997 slen = get_segment_len(sc, fl, plen) -
1998 sc->params.sge.fl_pktshift;
1999 frame = sd->cl + fl->rx_offset + sc->params.sge.fl_pktshift;
2000 CURVNET_SET_QUIET(ifp->if_vnet);
2001 rc = pfil_run_hooks(vi->pfil, frame, ifp,
2002 slen | PFIL_MEMPTR | PFIL_IN, NULL);
2003 CURVNET_RESTORE();
2004 if (rc == PFIL_DROPPED || rc == PFIL_CONSUMED) {
2005 skip_fl_payload(sc, fl, plen);
2006 return (0);
2007 }
2008 if (rc == PFIL_REALLOCED) {
2009 skip_fl_payload(sc, fl, plen);
2010 m0 = pfil_mem2mbuf(frame);
2011 goto have_mbuf;
2012 }
2013 }
2014
2015 m0 = get_fl_payload(sc, fl, plen);
2016 if (__predict_false(m0 == NULL))
2017 return (ENOMEM);
2018
2019 m0->m_pkthdr.len -= sc->params.sge.fl_pktshift;
2020 m0->m_len -= sc->params.sge.fl_pktshift;
2021 m0->m_data += sc->params.sge.fl_pktshift;
2022
2023 have_mbuf:
2024 m0->m_pkthdr.rcvif = ifp;
2025 M_HASHTYPE_SET(m0, sw_hashtype[d->rss.hash_type][d->rss.ipv6]);
2026 m0->m_pkthdr.flowid = be32toh(d->rss.hash_val);
2027
2028 cpl = (const void *)(&d->rss + 1);
2029 if (sc->params.tp.rx_pkt_encap) {
2030 const uint16_t ev = be16toh(cpl->err_vec);
2031
2032 err_vec = G_T6_COMPR_RXERR_VEC(ev);
2033 tnl_type = G_T6_RX_TNL_TYPE(ev);
2034 tnlhdr_len = G_T6_RX_TNLHDR_LEN(ev);
2035 } else {
2036 err_vec = be16toh(cpl->err_vec);
2037 tnl_type = 0;
2038 tnlhdr_len = 0;
2039 }
2040 if (cpl->csum_calc && err_vec == 0) {
2041 int ipv6 = !!(cpl->l2info & htobe32(F_RXF_IP6));
2042
2043 /* checksum(s) calculated and found to be correct. */
2044
2045 MPASS((cpl->l2info & htobe32(F_RXF_IP)) ^
2046 (cpl->l2info & htobe32(F_RXF_IP6)));
2047 m0->m_pkthdr.csum_data = be16toh(cpl->csum);
2048 if (tnl_type == 0) {
2049 if (!ipv6 && ifp->if_capenable & IFCAP_RXCSUM) {
2050 m0->m_pkthdr.csum_flags = CSUM_L3_CALC |
2051 CSUM_L3_VALID | CSUM_L4_CALC |
2052 CSUM_L4_VALID;
2053 } else if (ipv6 && ifp->if_capenable & IFCAP_RXCSUM_IPV6) {
2054 m0->m_pkthdr.csum_flags = CSUM_L4_CALC |
2055 CSUM_L4_VALID;
2056 }
2057 rxq->rxcsum++;
2058 } else {
2059 MPASS(tnl_type == RX_PKT_TNL_TYPE_VXLAN);
2060
2061 M_HASHTYPE_SETINNER(m0);
2062 if (__predict_false(cpl->ip_frag)) {
2063 /*
2064 * csum_data is for the inner frame (which is an
2065 * IP fragment) and is not 0xffff. There is no
2066 * way to pass the inner csum_data to the stack.
2067 * We don't want the stack to use the inner
2068 * csum_data to validate the outer frame or it
2069 * will get rejected. So we fix csum_data here
2070 * and let sw do the checksum of inner IP
2071 * fragments.
2072 *
2073 * XXX: Need 32b for csum_data2 in an rx mbuf.
2074 * Maybe stuff it into rcv_tstmp?
2075 */
2076 m0->m_pkthdr.csum_data = 0xffff;
2077 if (ipv6) {
2078 m0->m_pkthdr.csum_flags = CSUM_L4_CALC |
2079 CSUM_L4_VALID;
2080 } else {
2081 m0->m_pkthdr.csum_flags = CSUM_L3_CALC |
2082 CSUM_L3_VALID | CSUM_L4_CALC |
2083 CSUM_L4_VALID;
2084 }
2085 } else {
2086 int outer_ipv6;
2087
2088 MPASS(m0->m_pkthdr.csum_data == 0xffff);
2089
2090 outer_ipv6 = tnlhdr_len >=
2091 sizeof(struct ether_header) +
2092 sizeof(struct ip6_hdr);
2093 m0->m_pkthdr.csum_flags =
2094 sw_csum_flags[outer_ipv6][ipv6];
2095 }
2096 rxq->vxlan_rxcsum++;
2097 }
2098 }
2099
2100 if (cpl->vlan_ex) {
2101 if (sc->flags & IS_VF && sc->vlan_id) {
2102 /*
2103 * HW is not setup correctly if extracted vlan_id does
2104 * not match the VF's setting.
2105 */
2106 MPASS(be16toh(cpl->vlan) == sc->vlan_id);
2107 } else {
2108 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan);
2109 m0->m_flags |= M_VLANTAG;
2110 rxq->vlan_extraction++;
2111 }
2112 }
2113
2114 if (rxq->iq.flags & IQ_RX_TIMESTAMP) {
2115 /*
2116 * Fill up rcv_tstmp but do not set M_TSTMP as
2117 * long as we get a non-zero back from t4_tstmp_to_ns().
2118 */
2119 m0->m_pkthdr.rcv_tstmp = t4_tstmp_to_ns(sc,
2120 be64toh(d->rsp.u.last_flit));
2121 if (m0->m_pkthdr.rcv_tstmp != 0)
2122 m0->m_flags |= M_TSTMP;
2123 }
2124
2125 #ifdef NUMA
2126 m0->m_pkthdr.numa_domain = ifp->if_numa_domain;
2127 #endif
2128 #if defined(INET) || defined(INET6)
2129 if (rxq->iq.flags & IQ_LRO_ENABLED && tnl_type == 0 &&
2130 (M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV4 ||
2131 M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV6)) {
2132 if (sort_before_lro(lro)) {
2133 tcp_lro_queue_mbuf(lro, m0);
2134 return (0); /* queued for sort, then LRO */
2135 }
2136 if (tcp_lro_rx(lro, m0, 0) == 0)
2137 return (0); /* queued for LRO */
2138 }
2139 #endif
2140 ifp->if_input(ifp, m0);
2141
2142 return (0);
2143 }
2144
2145 /*
2146 * Must drain the wrq or make sure that someone else will.
2147 */
2148 static void
wrq_tx_drain(void * arg,int n)2149 wrq_tx_drain(void *arg, int n)
2150 {
2151 struct sge_wrq *wrq = arg;
2152 struct sge_eq *eq = &wrq->eq;
2153
2154 EQ_LOCK(eq);
2155 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list))
2156 drain_wrq_wr_list(wrq->adapter, wrq);
2157 EQ_UNLOCK(eq);
2158 }
2159
2160 static void
drain_wrq_wr_list(struct adapter * sc,struct sge_wrq * wrq)2161 drain_wrq_wr_list(struct adapter *sc, struct sge_wrq *wrq)
2162 {
2163 struct sge_eq *eq = &wrq->eq;
2164 u_int available, dbdiff; /* # of hardware descriptors */
2165 u_int n;
2166 struct wrqe *wr;
2167 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */
2168
2169 EQ_LOCK_ASSERT_OWNED(eq);
2170 MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs));
2171 wr = STAILQ_FIRST(&wrq->wr_list);
2172 MPASS(wr != NULL); /* Must be called with something useful to do */
2173 MPASS(eq->pidx == eq->dbidx);
2174 dbdiff = 0;
2175
2176 do {
2177 eq->cidx = read_hw_cidx(eq);
2178 if (eq->pidx == eq->cidx)
2179 available = eq->sidx - 1;
2180 else
2181 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1;
2182
2183 MPASS(wr->wrq == wrq);
2184 n = howmany(wr->wr_len, EQ_ESIZE);
2185 if (available < n)
2186 break;
2187
2188 dst = (void *)&eq->desc[eq->pidx];
2189 if (__predict_true(eq->sidx - eq->pidx > n)) {
2190 /* Won't wrap, won't end exactly at the status page. */
2191 bcopy(&wr->wr[0], dst, wr->wr_len);
2192 eq->pidx += n;
2193 } else {
2194 int first_portion = (eq->sidx - eq->pidx) * EQ_ESIZE;
2195
2196 bcopy(&wr->wr[0], dst, first_portion);
2197 if (wr->wr_len > first_portion) {
2198 bcopy(&wr->wr[first_portion], &eq->desc[0],
2199 wr->wr_len - first_portion);
2200 }
2201 eq->pidx = n - (eq->sidx - eq->pidx);
2202 }
2203 wrq->tx_wrs_copied++;
2204
2205 if (available < eq->sidx / 4 &&
2206 atomic_cmpset_int(&eq->equiq, 0, 1)) {
2207 /*
2208 * XXX: This is not 100% reliable with some
2209 * types of WRs. But this is a very unusual
2210 * situation for an ofld/ctrl queue anyway.
2211 */
2212 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ |
2213 F_FW_WR_EQUEQ);
2214 }
2215
2216 dbdiff += n;
2217 if (dbdiff >= 16) {
2218 ring_eq_db(sc, eq, dbdiff);
2219 dbdiff = 0;
2220 }
2221
2222 STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
2223 free_wrqe(wr);
2224 MPASS(wrq->nwr_pending > 0);
2225 wrq->nwr_pending--;
2226 MPASS(wrq->ndesc_needed >= n);
2227 wrq->ndesc_needed -= n;
2228 } while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL);
2229
2230 if (dbdiff)
2231 ring_eq_db(sc, eq, dbdiff);
2232 }
2233
2234 /*
2235 * Doesn't fail. Holds on to work requests it can't send right away.
2236 */
2237 void
t4_wrq_tx_locked(struct adapter * sc,struct sge_wrq * wrq,struct wrqe * wr)2238 t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr)
2239 {
2240 #ifdef INVARIANTS
2241 struct sge_eq *eq = &wrq->eq;
2242 #endif
2243
2244 EQ_LOCK_ASSERT_OWNED(eq);
2245 MPASS(wr != NULL);
2246 MPASS(wr->wr_len > 0 && wr->wr_len <= SGE_MAX_WR_LEN);
2247 MPASS((wr->wr_len & 0x7) == 0);
2248
2249 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link);
2250 wrq->nwr_pending++;
2251 wrq->ndesc_needed += howmany(wr->wr_len, EQ_ESIZE);
2252
2253 if (!TAILQ_EMPTY(&wrq->incomplete_wrs))
2254 return; /* commit_wrq_wr will drain wr_list as well. */
2255
2256 drain_wrq_wr_list(sc, wrq);
2257
2258 /* Doorbell must have caught up to the pidx. */
2259 MPASS(eq->pidx == eq->dbidx);
2260 }
2261
2262 void
t4_update_fl_bufsize(struct ifnet * ifp)2263 t4_update_fl_bufsize(struct ifnet *ifp)
2264 {
2265 struct vi_info *vi = ifp->if_softc;
2266 struct adapter *sc = vi->adapter;
2267 struct sge_rxq *rxq;
2268 #ifdef TCP_OFFLOAD
2269 struct sge_ofld_rxq *ofld_rxq;
2270 #endif
2271 struct sge_fl *fl;
2272 int i, maxp;
2273
2274 maxp = max_rx_payload(sc, ifp, false);
2275 for_each_rxq(vi, i, rxq) {
2276 fl = &rxq->fl;
2277
2278 FL_LOCK(fl);
2279 fl->zidx = find_refill_source(sc, maxp,
2280 fl->flags & FL_BUF_PACKING);
2281 FL_UNLOCK(fl);
2282 }
2283 #ifdef TCP_OFFLOAD
2284 maxp = max_rx_payload(sc, ifp, true);
2285 for_each_ofld_rxq(vi, i, ofld_rxq) {
2286 fl = &ofld_rxq->fl;
2287
2288 FL_LOCK(fl);
2289 fl->zidx = find_refill_source(sc, maxp,
2290 fl->flags & FL_BUF_PACKING);
2291 FL_UNLOCK(fl);
2292 }
2293 #endif
2294 }
2295
2296 static inline int
mbuf_nsegs(struct mbuf * m)2297 mbuf_nsegs(struct mbuf *m)
2298 {
2299
2300 M_ASSERTPKTHDR(m);
2301 KASSERT(m->m_pkthdr.inner_l5hlen > 0,
2302 ("%s: mbuf %p missing information on # of segments.", __func__, m));
2303
2304 return (m->m_pkthdr.inner_l5hlen);
2305 }
2306
2307 static inline void
set_mbuf_nsegs(struct mbuf * m,uint8_t nsegs)2308 set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs)
2309 {
2310
2311 M_ASSERTPKTHDR(m);
2312 m->m_pkthdr.inner_l5hlen = nsegs;
2313 }
2314
2315 static inline int
mbuf_cflags(struct mbuf * m)2316 mbuf_cflags(struct mbuf *m)
2317 {
2318
2319 M_ASSERTPKTHDR(m);
2320 return (m->m_pkthdr.PH_loc.eight[4]);
2321 }
2322
2323 static inline void
set_mbuf_cflags(struct mbuf * m,uint8_t flags)2324 set_mbuf_cflags(struct mbuf *m, uint8_t flags)
2325 {
2326
2327 M_ASSERTPKTHDR(m);
2328 m->m_pkthdr.PH_loc.eight[4] = flags;
2329 }
2330
2331 static inline int
mbuf_len16(struct mbuf * m)2332 mbuf_len16(struct mbuf *m)
2333 {
2334 int n;
2335
2336 M_ASSERTPKTHDR(m);
2337 n = m->m_pkthdr.PH_loc.eight[0];
2338 if (!(mbuf_cflags(m) & MC_TLS))
2339 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16);
2340
2341 return (n);
2342 }
2343
2344 static inline void
set_mbuf_len16(struct mbuf * m,uint8_t len16)2345 set_mbuf_len16(struct mbuf *m, uint8_t len16)
2346 {
2347
2348 M_ASSERTPKTHDR(m);
2349 if (!(mbuf_cflags(m) & MC_TLS))
2350 MPASS(len16 > 0 && len16 <= SGE_MAX_WR_LEN / 16);
2351 m->m_pkthdr.PH_loc.eight[0] = len16;
2352 }
2353
2354 #ifdef RATELIMIT
2355 static inline int
mbuf_eo_nsegs(struct mbuf * m)2356 mbuf_eo_nsegs(struct mbuf *m)
2357 {
2358
2359 M_ASSERTPKTHDR(m);
2360 return (m->m_pkthdr.PH_loc.eight[1]);
2361 }
2362
2363 #if defined(INET) || defined(INET6)
2364 static inline void
set_mbuf_eo_nsegs(struct mbuf * m,uint8_t nsegs)2365 set_mbuf_eo_nsegs(struct mbuf *m, uint8_t nsegs)
2366 {
2367
2368 M_ASSERTPKTHDR(m);
2369 m->m_pkthdr.PH_loc.eight[1] = nsegs;
2370 }
2371 #endif
2372
2373 static inline int
mbuf_eo_len16(struct mbuf * m)2374 mbuf_eo_len16(struct mbuf *m)
2375 {
2376 int n;
2377
2378 M_ASSERTPKTHDR(m);
2379 n = m->m_pkthdr.PH_loc.eight[2];
2380 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16);
2381
2382 return (n);
2383 }
2384
2385 #if defined(INET) || defined(INET6)
2386 static inline void
set_mbuf_eo_len16(struct mbuf * m,uint8_t len16)2387 set_mbuf_eo_len16(struct mbuf *m, uint8_t len16)
2388 {
2389
2390 M_ASSERTPKTHDR(m);
2391 m->m_pkthdr.PH_loc.eight[2] = len16;
2392 }
2393 #endif
2394
2395 static inline int
mbuf_eo_tsclk_tsoff(struct mbuf * m)2396 mbuf_eo_tsclk_tsoff(struct mbuf *m)
2397 {
2398
2399 M_ASSERTPKTHDR(m);
2400 return (m->m_pkthdr.PH_loc.eight[3]);
2401 }
2402
2403 #if defined(INET) || defined(INET6)
2404 static inline void
set_mbuf_eo_tsclk_tsoff(struct mbuf * m,uint8_t tsclk_tsoff)2405 set_mbuf_eo_tsclk_tsoff(struct mbuf *m, uint8_t tsclk_tsoff)
2406 {
2407
2408 M_ASSERTPKTHDR(m);
2409 m->m_pkthdr.PH_loc.eight[3] = tsclk_tsoff;
2410 }
2411 #endif
2412
2413 static inline int
needs_eo(struct m_snd_tag * mst)2414 needs_eo(struct m_snd_tag *mst)
2415 {
2416
2417 return (mst != NULL && mst->type == IF_SND_TAG_TYPE_RATE_LIMIT);
2418 }
2419 #endif
2420
2421 /*
2422 * Try to allocate an mbuf to contain a raw work request. To make it
2423 * easy to construct the work request, don't allocate a chain but a
2424 * single mbuf.
2425 */
2426 struct mbuf *
alloc_wr_mbuf(int len,int how)2427 alloc_wr_mbuf(int len, int how)
2428 {
2429 struct mbuf *m;
2430
2431 if (len <= MHLEN)
2432 m = m_gethdr(how, MT_DATA);
2433 else if (len <= MCLBYTES)
2434 m = m_getcl(how, MT_DATA, M_PKTHDR);
2435 else
2436 m = NULL;
2437 if (m == NULL)
2438 return (NULL);
2439 m->m_pkthdr.len = len;
2440 m->m_len = len;
2441 set_mbuf_cflags(m, MC_RAW_WR);
2442 set_mbuf_len16(m, howmany(len, 16));
2443 return (m);
2444 }
2445
2446 static inline bool
needs_hwcsum(struct mbuf * m)2447 needs_hwcsum(struct mbuf *m)
2448 {
2449 const uint32_t csum_flags = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP |
2450 CSUM_IP_TSO | CSUM_INNER_IP | CSUM_INNER_IP_UDP |
2451 CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_IP6_UDP |
2452 CSUM_IP6_TCP | CSUM_IP6_TSO | CSUM_INNER_IP6_UDP |
2453 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO;
2454
2455 M_ASSERTPKTHDR(m);
2456
2457 return (m->m_pkthdr.csum_flags & csum_flags);
2458 }
2459
2460 static inline bool
needs_tso(struct mbuf * m)2461 needs_tso(struct mbuf *m)
2462 {
2463 const uint32_t csum_flags = CSUM_IP_TSO | CSUM_IP6_TSO |
2464 CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO;
2465
2466 M_ASSERTPKTHDR(m);
2467
2468 return (m->m_pkthdr.csum_flags & csum_flags);
2469 }
2470
2471 static inline bool
needs_vxlan_csum(struct mbuf * m)2472 needs_vxlan_csum(struct mbuf *m)
2473 {
2474
2475 M_ASSERTPKTHDR(m);
2476
2477 return (m->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN);
2478 }
2479
2480 static inline bool
needs_vxlan_tso(struct mbuf * m)2481 needs_vxlan_tso(struct mbuf *m)
2482 {
2483 const uint32_t csum_flags = CSUM_ENCAP_VXLAN | CSUM_INNER_IP_TSO |
2484 CSUM_INNER_IP6_TSO;
2485
2486 M_ASSERTPKTHDR(m);
2487
2488 return ((m->m_pkthdr.csum_flags & csum_flags) != 0 &&
2489 (m->m_pkthdr.csum_flags & csum_flags) != CSUM_ENCAP_VXLAN);
2490 }
2491
2492 #if defined(INET) || defined(INET6)
2493 static inline bool
needs_inner_tcp_csum(struct mbuf * m)2494 needs_inner_tcp_csum(struct mbuf *m)
2495 {
2496 const uint32_t csum_flags = CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO;
2497
2498 M_ASSERTPKTHDR(m);
2499
2500 return (m->m_pkthdr.csum_flags & csum_flags);
2501 }
2502 #endif
2503
2504 static inline bool
needs_l3_csum(struct mbuf * m)2505 needs_l3_csum(struct mbuf *m)
2506 {
2507 const uint32_t csum_flags = CSUM_IP | CSUM_IP_TSO | CSUM_INNER_IP |
2508 CSUM_INNER_IP_TSO;
2509
2510 M_ASSERTPKTHDR(m);
2511
2512 return (m->m_pkthdr.csum_flags & csum_flags);
2513 }
2514
2515 static inline bool
needs_outer_tcp_csum(struct mbuf * m)2516 needs_outer_tcp_csum(struct mbuf *m)
2517 {
2518 const uint32_t csum_flags = CSUM_IP_TCP | CSUM_IP_TSO | CSUM_IP6_TCP |
2519 CSUM_IP6_TSO;
2520
2521 M_ASSERTPKTHDR(m);
2522
2523 return (m->m_pkthdr.csum_flags & csum_flags);
2524 }
2525
2526 #ifdef RATELIMIT
2527 static inline bool
needs_outer_l4_csum(struct mbuf * m)2528 needs_outer_l4_csum(struct mbuf *m)
2529 {
2530 const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP_TSO |
2531 CSUM_IP6_UDP | CSUM_IP6_TCP | CSUM_IP6_TSO;
2532
2533 M_ASSERTPKTHDR(m);
2534
2535 return (m->m_pkthdr.csum_flags & csum_flags);
2536 }
2537
2538 static inline bool
needs_outer_udp_csum(struct mbuf * m)2539 needs_outer_udp_csum(struct mbuf *m)
2540 {
2541 const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP6_UDP;
2542
2543 M_ASSERTPKTHDR(m);
2544
2545 return (m->m_pkthdr.csum_flags & csum_flags);
2546 }
2547 #endif
2548
2549 static inline bool
needs_vlan_insertion(struct mbuf * m)2550 needs_vlan_insertion(struct mbuf *m)
2551 {
2552
2553 M_ASSERTPKTHDR(m);
2554
2555 return (m->m_flags & M_VLANTAG);
2556 }
2557
2558 #if defined(INET) || defined(INET6)
2559 static void *
m_advance(struct mbuf ** pm,int * poffset,int len)2560 m_advance(struct mbuf **pm, int *poffset, int len)
2561 {
2562 struct mbuf *m = *pm;
2563 int offset = *poffset;
2564 uintptr_t p = 0;
2565
2566 MPASS(len > 0);
2567
2568 for (;;) {
2569 if (offset + len < m->m_len) {
2570 offset += len;
2571 p = mtod(m, uintptr_t) + offset;
2572 break;
2573 }
2574 len -= m->m_len - offset;
2575 m = m->m_next;
2576 offset = 0;
2577 MPASS(m != NULL);
2578 }
2579 *poffset = offset;
2580 *pm = m;
2581 return ((void *)p);
2582 }
2583 #endif
2584
2585 static inline int
count_mbuf_ext_pgs(struct mbuf * m,int skip,vm_paddr_t * nextaddr)2586 count_mbuf_ext_pgs(struct mbuf *m, int skip, vm_paddr_t *nextaddr)
2587 {
2588 vm_paddr_t paddr;
2589 int i, len, off, pglen, pgoff, seglen, segoff;
2590 int nsegs = 0;
2591
2592 M_ASSERTEXTPG(m);
2593 off = mtod(m, vm_offset_t);
2594 len = m->m_len;
2595 off += skip;
2596 len -= skip;
2597
2598 if (m->m_epg_hdrlen != 0) {
2599 if (off >= m->m_epg_hdrlen) {
2600 off -= m->m_epg_hdrlen;
2601 } else {
2602 seglen = m->m_epg_hdrlen - off;
2603 segoff = off;
2604 seglen = min(seglen, len);
2605 off = 0;
2606 len -= seglen;
2607 paddr = pmap_kextract(
2608 (vm_offset_t)&m->m_epg_hdr[segoff]);
2609 if (*nextaddr != paddr)
2610 nsegs++;
2611 *nextaddr = paddr + seglen;
2612 }
2613 }
2614 pgoff = m->m_epg_1st_off;
2615 for (i = 0; i < m->m_epg_npgs && len > 0; i++) {
2616 pglen = m_epg_pagelen(m, i, pgoff);
2617 if (off >= pglen) {
2618 off -= pglen;
2619 pgoff = 0;
2620 continue;
2621 }
2622 seglen = pglen - off;
2623 segoff = pgoff + off;
2624 off = 0;
2625 seglen = min(seglen, len);
2626 len -= seglen;
2627 paddr = m->m_epg_pa[i] + segoff;
2628 if (*nextaddr != paddr)
2629 nsegs++;
2630 *nextaddr = paddr + seglen;
2631 pgoff = 0;
2632 };
2633 if (len != 0) {
2634 seglen = min(len, m->m_epg_trllen - off);
2635 len -= seglen;
2636 paddr = pmap_kextract((vm_offset_t)&m->m_epg_trail[off]);
2637 if (*nextaddr != paddr)
2638 nsegs++;
2639 *nextaddr = paddr + seglen;
2640 }
2641
2642 return (nsegs);
2643 }
2644
2645
2646 /*
2647 * Can deal with empty mbufs in the chain that have m_len = 0, but the chain
2648 * must have at least one mbuf that's not empty. It is possible for this
2649 * routine to return 0 if skip accounts for all the contents of the mbuf chain.
2650 */
2651 static inline int
count_mbuf_nsegs(struct mbuf * m,int skip,uint8_t * cflags)2652 count_mbuf_nsegs(struct mbuf *m, int skip, uint8_t *cflags)
2653 {
2654 vm_paddr_t nextaddr, paddr;
2655 vm_offset_t va;
2656 int len, nsegs;
2657
2658 M_ASSERTPKTHDR(m);
2659 MPASS(m->m_pkthdr.len > 0);
2660 MPASS(m->m_pkthdr.len >= skip);
2661
2662 nsegs = 0;
2663 nextaddr = 0;
2664 for (; m; m = m->m_next) {
2665 len = m->m_len;
2666 if (__predict_false(len == 0))
2667 continue;
2668 if (skip >= len) {
2669 skip -= len;
2670 continue;
2671 }
2672 if ((m->m_flags & M_EXTPG) != 0) {
2673 *cflags |= MC_NOMAP;
2674 nsegs += count_mbuf_ext_pgs(m, skip, &nextaddr);
2675 skip = 0;
2676 continue;
2677 }
2678 va = mtod(m, vm_offset_t) + skip;
2679 len -= skip;
2680 skip = 0;
2681 paddr = pmap_kextract(va);
2682 nsegs += sglist_count((void *)(uintptr_t)va, len);
2683 if (paddr == nextaddr)
2684 nsegs--;
2685 nextaddr = pmap_kextract(va + len - 1) + 1;
2686 }
2687
2688 return (nsegs);
2689 }
2690
2691 /*
2692 * The maximum number of segments that can fit in a WR.
2693 */
2694 static int
max_nsegs_allowed(struct mbuf * m,bool vm_wr)2695 max_nsegs_allowed(struct mbuf *m, bool vm_wr)
2696 {
2697
2698 if (vm_wr) {
2699 if (needs_tso(m))
2700 return (TX_SGL_SEGS_VM_TSO);
2701 return (TX_SGL_SEGS_VM);
2702 }
2703
2704 if (needs_tso(m)) {
2705 if (needs_vxlan_tso(m))
2706 return (TX_SGL_SEGS_VXLAN_TSO);
2707 else
2708 return (TX_SGL_SEGS_TSO);
2709 }
2710
2711 return (TX_SGL_SEGS);
2712 }
2713
2714 static struct timeval txerr_ratecheck = {0};
2715 static const struct timeval txerr_interval = {3, 0};
2716
2717 /*
2718 * Analyze the mbuf to determine its tx needs. The mbuf passed in may change:
2719 * a) caller can assume it's been freed if this function returns with an error.
2720 * b) it may get defragged up if the gather list is too long for the hardware.
2721 */
2722 int
parse_pkt(struct mbuf ** mp,bool vm_wr)2723 parse_pkt(struct mbuf **mp, bool vm_wr)
2724 {
2725 struct mbuf *m0 = *mp, *m;
2726 int rc, nsegs, defragged = 0;
2727 struct ether_header *eh;
2728 #ifdef INET
2729 void *l3hdr;
2730 #endif
2731 #if defined(INET) || defined(INET6)
2732 int offset;
2733 struct tcphdr *tcp;
2734 #endif
2735 #if defined(KERN_TLS) || defined(RATELIMIT)
2736 struct m_snd_tag *mst;
2737 #endif
2738 uint16_t eh_type;
2739 uint8_t cflags;
2740
2741 cflags = 0;
2742 M_ASSERTPKTHDR(m0);
2743 if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) {
2744 rc = EINVAL;
2745 fail:
2746 m_freem(m0);
2747 *mp = NULL;
2748 return (rc);
2749 }
2750 restart:
2751 /*
2752 * First count the number of gather list segments in the payload.
2753 * Defrag the mbuf if nsegs exceeds the hardware limit.
2754 */
2755 M_ASSERTPKTHDR(m0);
2756 MPASS(m0->m_pkthdr.len > 0);
2757 nsegs = count_mbuf_nsegs(m0, 0, &cflags);
2758 #if defined(KERN_TLS) || defined(RATELIMIT)
2759 if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG)
2760 mst = m0->m_pkthdr.snd_tag;
2761 else
2762 mst = NULL;
2763 #endif
2764 #ifdef KERN_TLS
2765 if (mst != NULL && mst->type == IF_SND_TAG_TYPE_TLS) {
2766 int len16;
2767
2768 cflags |= MC_TLS;
2769 set_mbuf_cflags(m0, cflags);
2770 rc = t6_ktls_parse_pkt(m0, &nsegs, &len16);
2771 if (rc != 0)
2772 goto fail;
2773 set_mbuf_nsegs(m0, nsegs);
2774 set_mbuf_len16(m0, len16);
2775 return (0);
2776 }
2777 #endif
2778 if (nsegs > max_nsegs_allowed(m0, vm_wr)) {
2779 if (defragged++ > 0) {
2780 rc = EFBIG;
2781 goto fail;
2782 }
2783 counter_u64_add(defrags, 1);
2784 if ((m = m_defrag(m0, M_NOWAIT)) == NULL) {
2785 rc = ENOMEM;
2786 goto fail;
2787 }
2788 *mp = m0 = m; /* update caller's copy after defrag */
2789 goto restart;
2790 }
2791
2792 if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN &&
2793 !(cflags & MC_NOMAP))) {
2794 counter_u64_add(pullups, 1);
2795 m0 = m_pullup(m0, m0->m_pkthdr.len);
2796 if (m0 == NULL) {
2797 /* Should have left well enough alone. */
2798 rc = EFBIG;
2799 goto fail;
2800 }
2801 *mp = m0; /* update caller's copy after pullup */
2802 goto restart;
2803 }
2804 set_mbuf_nsegs(m0, nsegs);
2805 set_mbuf_cflags(m0, cflags);
2806 calculate_mbuf_len16(m0, vm_wr);
2807
2808 #ifdef RATELIMIT
2809 /*
2810 * Ethofld is limited to TCP and UDP for now, and only when L4 hw
2811 * checksumming is enabled. needs_outer_l4_csum happens to check for
2812 * all the right things.
2813 */
2814 if (__predict_false(needs_eo(mst) && !needs_outer_l4_csum(m0))) {
2815 m_snd_tag_rele(m0->m_pkthdr.snd_tag);
2816 m0->m_pkthdr.snd_tag = NULL;
2817 m0->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
2818 mst = NULL;
2819 }
2820 #endif
2821
2822 if (!needs_hwcsum(m0)
2823 #ifdef RATELIMIT
2824 && !needs_eo(mst)
2825 #endif
2826 )
2827 return (0);
2828
2829 m = m0;
2830 eh = mtod(m, struct ether_header *);
2831 eh_type = ntohs(eh->ether_type);
2832 if (eh_type == ETHERTYPE_VLAN) {
2833 struct ether_vlan_header *evh = (void *)eh;
2834
2835 eh_type = ntohs(evh->evl_proto);
2836 m0->m_pkthdr.l2hlen = sizeof(*evh);
2837 } else
2838 m0->m_pkthdr.l2hlen = sizeof(*eh);
2839
2840 #if defined(INET) || defined(INET6)
2841 offset = 0;
2842 #ifdef INET
2843 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.l2hlen);
2844 #else
2845 m_advance(&m, &offset, m0->m_pkthdr.l2hlen);
2846 #endif
2847 #endif
2848
2849 switch (eh_type) {
2850 #ifdef INET6
2851 case ETHERTYPE_IPV6:
2852 m0->m_pkthdr.l3hlen = sizeof(struct ip6_hdr);
2853 break;
2854 #endif
2855 #ifdef INET
2856 case ETHERTYPE_IP:
2857 {
2858 struct ip *ip = l3hdr;
2859
2860 if (needs_vxlan_csum(m0)) {
2861 /* Driver will do the outer IP hdr checksum. */
2862 ip->ip_sum = 0;
2863 if (needs_vxlan_tso(m0)) {
2864 const uint16_t ipl = ip->ip_len;
2865
2866 ip->ip_len = 0;
2867 ip->ip_sum = ~in_cksum_hdr(ip);
2868 ip->ip_len = ipl;
2869 } else
2870 ip->ip_sum = in_cksum_hdr(ip);
2871 }
2872 m0->m_pkthdr.l3hlen = ip->ip_hl << 2;
2873 break;
2874 }
2875 #endif
2876 default:
2877 if (ratecheck(&txerr_ratecheck, &txerr_interval)) {
2878 log(LOG_ERR, "%s: ethertype 0x%04x unknown. "
2879 "if_cxgbe must be compiled with the same "
2880 "INET/INET6 options as the kernel.\n", __func__,
2881 eh_type);
2882 }
2883 rc = EINVAL;
2884 goto fail;
2885 }
2886
2887 #if defined(INET) || defined(INET6)
2888 if (needs_vxlan_csum(m0)) {
2889 m0->m_pkthdr.l4hlen = sizeof(struct udphdr);
2890 m0->m_pkthdr.l5hlen = sizeof(struct vxlan_header);
2891
2892 /* Inner headers. */
2893 eh = m_advance(&m, &offset, m0->m_pkthdr.l3hlen +
2894 sizeof(struct udphdr) + sizeof(struct vxlan_header));
2895 eh_type = ntohs(eh->ether_type);
2896 if (eh_type == ETHERTYPE_VLAN) {
2897 struct ether_vlan_header *evh = (void *)eh;
2898
2899 eh_type = ntohs(evh->evl_proto);
2900 m0->m_pkthdr.inner_l2hlen = sizeof(*evh);
2901 } else
2902 m0->m_pkthdr.inner_l2hlen = sizeof(*eh);
2903 #ifdef INET
2904 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.inner_l2hlen);
2905 #else
2906 m_advance(&m, &offset, m0->m_pkthdr.inner_l2hlen);
2907 #endif
2908
2909 switch (eh_type) {
2910 #ifdef INET6
2911 case ETHERTYPE_IPV6:
2912 m0->m_pkthdr.inner_l3hlen = sizeof(struct ip6_hdr);
2913 break;
2914 #endif
2915 #ifdef INET
2916 case ETHERTYPE_IP:
2917 {
2918 struct ip *ip = l3hdr;
2919
2920 m0->m_pkthdr.inner_l3hlen = ip->ip_hl << 2;
2921 break;
2922 }
2923 #endif
2924 default:
2925 if (ratecheck(&txerr_ratecheck, &txerr_interval)) {
2926 log(LOG_ERR, "%s: VXLAN hw offload requested"
2927 "with unknown ethertype 0x%04x. if_cxgbe "
2928 "must be compiled with the same INET/INET6 "
2929 "options as the kernel.\n", __func__,
2930 eh_type);
2931 }
2932 rc = EINVAL;
2933 goto fail;
2934 }
2935 if (needs_inner_tcp_csum(m0)) {
2936 tcp = m_advance(&m, &offset, m0->m_pkthdr.inner_l3hlen);
2937 m0->m_pkthdr.inner_l4hlen = tcp->th_off * 4;
2938 }
2939 MPASS((m0->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
2940 m0->m_pkthdr.csum_flags &= CSUM_INNER_IP6_UDP |
2941 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO | CSUM_INNER_IP |
2942 CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO |
2943 CSUM_ENCAP_VXLAN;
2944 }
2945
2946 if (needs_outer_tcp_csum(m0)) {
2947 tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen);
2948 m0->m_pkthdr.l4hlen = tcp->th_off * 4;
2949 #ifdef RATELIMIT
2950 if (tsclk >= 0 && *(uint32_t *)(tcp + 1) == ntohl(0x0101080a)) {
2951 set_mbuf_eo_tsclk_tsoff(m0,
2952 V_FW_ETH_TX_EO_WR_TSCLK(tsclk) |
2953 V_FW_ETH_TX_EO_WR_TSOFF(sizeof(*tcp) / 2 + 1));
2954 } else
2955 set_mbuf_eo_tsclk_tsoff(m0, 0);
2956 } else if (needs_outer_udp_csum(m0)) {
2957 m0->m_pkthdr.l4hlen = sizeof(struct udphdr);
2958 #endif
2959 }
2960 #ifdef RATELIMIT
2961 if (needs_eo(mst)) {
2962 u_int immhdrs;
2963
2964 /* EO WRs have the headers in the WR and not the GL. */
2965 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen +
2966 m0->m_pkthdr.l4hlen;
2967 cflags = 0;
2968 nsegs = count_mbuf_nsegs(m0, immhdrs, &cflags);
2969 MPASS(cflags == mbuf_cflags(m0));
2970 set_mbuf_eo_nsegs(m0, nsegs);
2971 set_mbuf_eo_len16(m0,
2972 txpkt_eo_len16(nsegs, immhdrs, needs_tso(m0)));
2973 }
2974 #endif
2975 #endif
2976 MPASS(m0 == *mp);
2977 return (0);
2978 }
2979
2980 void *
start_wrq_wr(struct sge_wrq * wrq,int len16,struct wrq_cookie * cookie)2981 start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie)
2982 {
2983 struct sge_eq *eq = &wrq->eq;
2984 struct adapter *sc = wrq->adapter;
2985 int ndesc, available;
2986 struct wrqe *wr;
2987 void *w;
2988
2989 MPASS(len16 > 0);
2990 ndesc = tx_len16_to_desc(len16);
2991 MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC);
2992
2993 EQ_LOCK(eq);
2994
2995 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list))
2996 drain_wrq_wr_list(sc, wrq);
2997
2998 if (!STAILQ_EMPTY(&wrq->wr_list)) {
2999 slowpath:
3000 EQ_UNLOCK(eq);
3001 wr = alloc_wrqe(len16 * 16, wrq);
3002 if (__predict_false(wr == NULL))
3003 return (NULL);
3004 cookie->pidx = -1;
3005 cookie->ndesc = ndesc;
3006 return (&wr->wr);
3007 }
3008
3009 eq->cidx = read_hw_cidx(eq);
3010 if (eq->pidx == eq->cidx)
3011 available = eq->sidx - 1;
3012 else
3013 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1;
3014 if (available < ndesc)
3015 goto slowpath;
3016
3017 cookie->pidx = eq->pidx;
3018 cookie->ndesc = ndesc;
3019 TAILQ_INSERT_TAIL(&wrq->incomplete_wrs, cookie, link);
3020
3021 w = &eq->desc[eq->pidx];
3022 IDXINCR(eq->pidx, ndesc, eq->sidx);
3023 if (__predict_false(cookie->pidx + ndesc > eq->sidx)) {
3024 w = &wrq->ss[0];
3025 wrq->ss_pidx = cookie->pidx;
3026 wrq->ss_len = len16 * 16;
3027 }
3028
3029 EQ_UNLOCK(eq);
3030
3031 return (w);
3032 }
3033
3034 void
commit_wrq_wr(struct sge_wrq * wrq,void * w,struct wrq_cookie * cookie)3035 commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie)
3036 {
3037 struct sge_eq *eq = &wrq->eq;
3038 struct adapter *sc = wrq->adapter;
3039 int ndesc, pidx;
3040 struct wrq_cookie *prev, *next;
3041
3042 if (cookie->pidx == -1) {
3043 struct wrqe *wr = __containerof(w, struct wrqe, wr);
3044
3045 t4_wrq_tx(sc, wr);
3046 return;
3047 }
3048
3049 if (__predict_false(w == &wrq->ss[0])) {
3050 int n = (eq->sidx - wrq->ss_pidx) * EQ_ESIZE;
3051
3052 MPASS(wrq->ss_len > n); /* WR had better wrap around. */
3053 bcopy(&wrq->ss[0], &eq->desc[wrq->ss_pidx], n);
3054 bcopy(&wrq->ss[n], &eq->desc[0], wrq->ss_len - n);
3055 wrq->tx_wrs_ss++;
3056 } else
3057 wrq->tx_wrs_direct++;
3058
3059 EQ_LOCK(eq);
3060 ndesc = cookie->ndesc; /* Can be more than SGE_MAX_WR_NDESC here. */
3061 pidx = cookie->pidx;
3062 MPASS(pidx >= 0 && pidx < eq->sidx);
3063 prev = TAILQ_PREV(cookie, wrq_incomplete_wrs, link);
3064 next = TAILQ_NEXT(cookie, link);
3065 if (prev == NULL) {
3066 MPASS(pidx == eq->dbidx);
3067 if (next == NULL || ndesc >= 16) {
3068 int available;
3069 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */
3070
3071 /*
3072 * Note that the WR via which we'll request tx updates
3073 * is at pidx and not eq->pidx, which has moved on
3074 * already.
3075 */
3076 dst = (void *)&eq->desc[pidx];
3077 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1;
3078 if (available < eq->sidx / 4 &&
3079 atomic_cmpset_int(&eq->equiq, 0, 1)) {
3080 /*
3081 * XXX: This is not 100% reliable with some
3082 * types of WRs. But this is a very unusual
3083 * situation for an ofld/ctrl queue anyway.
3084 */
3085 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ |
3086 F_FW_WR_EQUEQ);
3087 }
3088
3089 ring_eq_db(wrq->adapter, eq, ndesc);
3090 } else {
3091 MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc);
3092 next->pidx = pidx;
3093 next->ndesc += ndesc;
3094 }
3095 } else {
3096 MPASS(IDXDIFF(pidx, prev->pidx, eq->sidx) == prev->ndesc);
3097 prev->ndesc += ndesc;
3098 }
3099 TAILQ_REMOVE(&wrq->incomplete_wrs, cookie, link);
3100
3101 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list))
3102 drain_wrq_wr_list(sc, wrq);
3103
3104 #ifdef INVARIANTS
3105 if (TAILQ_EMPTY(&wrq->incomplete_wrs)) {
3106 /* Doorbell must have caught up to the pidx. */
3107 MPASS(wrq->eq.pidx == wrq->eq.dbidx);
3108 }
3109 #endif
3110 EQ_UNLOCK(eq);
3111 }
3112
3113 static u_int
can_resume_eth_tx(struct mp_ring * r)3114 can_resume_eth_tx(struct mp_ring *r)
3115 {
3116 struct sge_eq *eq = r->cookie;
3117
3118 return (total_available_tx_desc(eq) > eq->sidx / 8);
3119 }
3120
3121 static inline bool
cannot_use_txpkts(struct mbuf * m)3122 cannot_use_txpkts(struct mbuf *m)
3123 {
3124 /* maybe put a GL limit too, to avoid silliness? */
3125
3126 return (needs_tso(m) || (mbuf_cflags(m) & (MC_RAW_WR | MC_TLS)) != 0);
3127 }
3128
3129 static inline int
discard_tx(struct sge_eq * eq)3130 discard_tx(struct sge_eq *eq)
3131 {
3132
3133 return ((eq->flags & (EQ_ENABLED | EQ_QFLUSH)) != EQ_ENABLED);
3134 }
3135
3136 static inline int
wr_can_update_eq(void * p)3137 wr_can_update_eq(void *p)
3138 {
3139 struct fw_eth_tx_pkts_wr *wr = p;
3140
3141 switch (G_FW_WR_OP(be32toh(wr->op_pkd))) {
3142 case FW_ULPTX_WR:
3143 case FW_ETH_TX_PKT_WR:
3144 case FW_ETH_TX_PKTS_WR:
3145 case FW_ETH_TX_PKTS2_WR:
3146 case FW_ETH_TX_PKT_VM_WR:
3147 case FW_ETH_TX_PKTS_VM_WR:
3148 return (1);
3149 default:
3150 return (0);
3151 }
3152 }
3153
3154 static inline void
set_txupdate_flags(struct sge_txq * txq,u_int avail,struct fw_eth_tx_pkt_wr * wr)3155 set_txupdate_flags(struct sge_txq *txq, u_int avail,
3156 struct fw_eth_tx_pkt_wr *wr)
3157 {
3158 struct sge_eq *eq = &txq->eq;
3159 struct txpkts *txp = &txq->txp;
3160
3161 if ((txp->npkt > 0 || avail < eq->sidx / 2) &&
3162 atomic_cmpset_int(&eq->equiq, 0, 1)) {
3163 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ | F_FW_WR_EQUIQ);
3164 eq->equeqidx = eq->pidx;
3165 } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) {
3166 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ);
3167 eq->equeqidx = eq->pidx;
3168 }
3169 }
3170
3171 #if defined(__i386__) || defined(__amd64__)
3172 extern uint64_t tsc_freq;
3173 #endif
3174
3175 static inline bool
record_eth_tx_time(struct sge_txq * txq)3176 record_eth_tx_time(struct sge_txq *txq)
3177 {
3178 const uint64_t cycles = get_cyclecount();
3179 const uint64_t last_tx = txq->last_tx;
3180 #if defined(__i386__) || defined(__amd64__)
3181 const uint64_t itg = tsc_freq * t4_tx_coalesce_gap / 1000000;
3182 #else
3183 const uint64_t itg = 0;
3184 #endif
3185
3186 MPASS(cycles >= last_tx);
3187 txq->last_tx = cycles;
3188 return (cycles - last_tx < itg);
3189 }
3190
3191 /*
3192 * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to
3193 * be consumed. Return the actual number consumed. 0 indicates a stall.
3194 */
3195 static u_int
eth_tx(struct mp_ring * r,u_int cidx,u_int pidx,bool * coalescing)3196 eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing)
3197 {
3198 struct sge_txq *txq = r->cookie;
3199 struct ifnet *ifp = txq->ifp;
3200 struct sge_eq *eq = &txq->eq;
3201 struct txpkts *txp = &txq->txp;
3202 struct vi_info *vi = ifp->if_softc;
3203 struct adapter *sc = vi->adapter;
3204 u_int total, remaining; /* # of packets */
3205 u_int n, avail, dbdiff; /* # of hardware descriptors */
3206 int i, rc;
3207 struct mbuf *m0;
3208 bool snd, recent_tx;
3209 void *wr; /* start of the last WR written to the ring */
3210
3211 TXQ_LOCK_ASSERT_OWNED(txq);
3212 recent_tx = record_eth_tx_time(txq);
3213
3214 remaining = IDXDIFF(pidx, cidx, r->size);
3215 if (__predict_false(discard_tx(eq))) {
3216 for (i = 0; i < txp->npkt; i++)
3217 m_freem(txp->mb[i]);
3218 txp->npkt = 0;
3219 while (cidx != pidx) {
3220 m0 = r->items[cidx];
3221 m_freem(m0);
3222 if (++cidx == r->size)
3223 cidx = 0;
3224 }
3225 reclaim_tx_descs(txq, eq->sidx);
3226 *coalescing = false;
3227 return (remaining); /* emptied */
3228 }
3229
3230 /* How many hardware descriptors do we have readily available. */
3231 if (eq->pidx == eq->cidx)
3232 avail = eq->sidx - 1;
3233 else
3234 avail = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1;
3235
3236 total = 0;
3237 if (remaining == 0) {
3238 txp->score = 0;
3239 txq->txpkts_flush++;
3240 goto send_txpkts;
3241 }
3242
3243 dbdiff = 0;
3244 MPASS(remaining > 0);
3245 while (remaining > 0) {
3246 m0 = r->items[cidx];
3247 M_ASSERTPKTHDR(m0);
3248 MPASS(m0->m_nextpkt == NULL);
3249
3250 if (avail < 2 * SGE_MAX_WR_NDESC)
3251 avail += reclaim_tx_descs(txq, 64);
3252
3253 if (t4_tx_coalesce == 0 && txp->npkt == 0)
3254 goto skip_coalescing;
3255 if (cannot_use_txpkts(m0))
3256 txp->score = 0;
3257 else if (recent_tx) {
3258 if (++txp->score == 0)
3259 txp->score = UINT8_MAX;
3260 } else
3261 txp->score = 1;
3262 if (txp->npkt > 0 || remaining > 1 ||
3263 txp->score >= t4_tx_coalesce_pkts ||
3264 atomic_load_int(&txq->eq.equiq) != 0) {
3265 if (vi->flags & TX_USES_VM_WR)
3266 rc = add_to_txpkts_vf(sc, txq, m0, avail, &snd);
3267 else
3268 rc = add_to_txpkts_pf(sc, txq, m0, avail, &snd);
3269 } else {
3270 snd = false;
3271 rc = EINVAL;
3272 }
3273 if (snd) {
3274 MPASS(txp->npkt > 0);
3275 for (i = 0; i < txp->npkt; i++)
3276 ETHER_BPF_MTAP(ifp, txp->mb[i]);
3277 if (txp->npkt > 1) {
3278 MPASS(avail >= tx_len16_to_desc(txp->len16));
3279 if (vi->flags & TX_USES_VM_WR)
3280 n = write_txpkts_vm_wr(sc, txq);
3281 else
3282 n = write_txpkts_wr(sc, txq);
3283 } else {
3284 MPASS(avail >=
3285 tx_len16_to_desc(mbuf_len16(txp->mb[0])));
3286 if (vi->flags & TX_USES_VM_WR)
3287 n = write_txpkt_vm_wr(sc, txq,
3288 txp->mb[0]);
3289 else
3290 n = write_txpkt_wr(sc, txq, txp->mb[0],
3291 avail);
3292 }
3293 MPASS(n <= SGE_MAX_WR_NDESC);
3294 avail -= n;
3295 dbdiff += n;
3296 wr = &eq->desc[eq->pidx];
3297 IDXINCR(eq->pidx, n, eq->sidx);
3298 txp->npkt = 0; /* emptied */
3299 }
3300 if (rc == 0) {
3301 /* m0 was coalesced into txq->txpkts. */
3302 goto next_mbuf;
3303 }
3304 if (rc == EAGAIN) {
3305 /*
3306 * m0 is suitable for tx coalescing but could not be
3307 * combined with the existing txq->txpkts, which has now
3308 * been transmitted. Start a new txpkts with m0.
3309 */
3310 MPASS(snd);
3311 MPASS(txp->npkt == 0);
3312 continue;
3313 }
3314
3315 MPASS(rc != 0 && rc != EAGAIN);
3316 MPASS(txp->npkt == 0);
3317 skip_coalescing:
3318 n = tx_len16_to_desc(mbuf_len16(m0));
3319 if (__predict_false(avail < n)) {
3320 avail += reclaim_tx_descs(txq, min(n, 32));
3321 if (avail < n)
3322 break; /* out of descriptors */
3323 }
3324
3325 wr = &eq->desc[eq->pidx];
3326 if (mbuf_cflags(m0) & MC_RAW_WR) {
3327 n = write_raw_wr(txq, wr, m0, avail);
3328 #ifdef KERN_TLS
3329 } else if (mbuf_cflags(m0) & MC_TLS) {
3330 ETHER_BPF_MTAP(ifp, m0);
3331 n = t6_ktls_write_wr(txq, wr, m0, mbuf_nsegs(m0),
3332 avail);
3333 #endif
3334 } else {
3335 ETHER_BPF_MTAP(ifp, m0);
3336 if (vi->flags & TX_USES_VM_WR)
3337 n = write_txpkt_vm_wr(sc, txq, m0);
3338 else
3339 n = write_txpkt_wr(sc, txq, m0, avail);
3340 }
3341 MPASS(n >= 1 && n <= avail);
3342 if (!(mbuf_cflags(m0) & MC_TLS))
3343 MPASS(n <= SGE_MAX_WR_NDESC);
3344
3345 avail -= n;
3346 dbdiff += n;
3347 IDXINCR(eq->pidx, n, eq->sidx);
3348
3349 if (dbdiff >= 512 / EQ_ESIZE) { /* X_FETCHBURSTMAX_512B */
3350 if (wr_can_update_eq(wr))
3351 set_txupdate_flags(txq, avail, wr);
3352 ring_eq_db(sc, eq, dbdiff);
3353 avail += reclaim_tx_descs(txq, 32);
3354 dbdiff = 0;
3355 }
3356 next_mbuf:
3357 total++;
3358 remaining--;
3359 if (__predict_false(++cidx == r->size))
3360 cidx = 0;
3361 }
3362 if (dbdiff != 0) {
3363 if (wr_can_update_eq(wr))
3364 set_txupdate_flags(txq, avail, wr);
3365 ring_eq_db(sc, eq, dbdiff);
3366 reclaim_tx_descs(txq, 32);
3367 } else if (eq->pidx == eq->cidx && txp->npkt > 0 &&
3368 atomic_load_int(&txq->eq.equiq) == 0) {
3369 /*
3370 * If nothing was submitted to the chip for tx (it was coalesced
3371 * into txpkts instead) and there is no tx update outstanding
3372 * then we need to send txpkts now.
3373 */
3374 send_txpkts:
3375 MPASS(txp->npkt > 0);
3376 for (i = 0; i < txp->npkt; i++)
3377 ETHER_BPF_MTAP(ifp, txp->mb[i]);
3378 if (txp->npkt > 1) {
3379 MPASS(avail >= tx_len16_to_desc(txp->len16));
3380 if (vi->flags & TX_USES_VM_WR)
3381 n = write_txpkts_vm_wr(sc, txq);
3382 else
3383 n = write_txpkts_wr(sc, txq);
3384 } else {
3385 MPASS(avail >=
3386 tx_len16_to_desc(mbuf_len16(txp->mb[0])));
3387 if (vi->flags & TX_USES_VM_WR)
3388 n = write_txpkt_vm_wr(sc, txq, txp->mb[0]);
3389 else
3390 n = write_txpkt_wr(sc, txq, txp->mb[0], avail);
3391 }
3392 MPASS(n <= SGE_MAX_WR_NDESC);
3393 wr = &eq->desc[eq->pidx];
3394 IDXINCR(eq->pidx, n, eq->sidx);
3395 txp->npkt = 0; /* emptied */
3396
3397 MPASS(wr_can_update_eq(wr));
3398 set_txupdate_flags(txq, avail - n, wr);
3399 ring_eq_db(sc, eq, n);
3400 reclaim_tx_descs(txq, 32);
3401 }
3402 *coalescing = txp->npkt > 0;
3403
3404 return (total);
3405 }
3406
3407 static inline void
init_iq(struct sge_iq * iq,struct adapter * sc,int tmr_idx,int pktc_idx,int qsize,int intr_idx,int cong,int qtype)3408 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
3409 int qsize, int intr_idx, int cong, int qtype)
3410 {
3411
3412 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS,
3413 ("%s: bad tmr_idx %d", __func__, tmr_idx));
3414 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */
3415 ("%s: bad pktc_idx %d", __func__, pktc_idx));
3416 KASSERT(intr_idx >= -1 && intr_idx < sc->intr_count,
3417 ("%s: bad intr_idx %d", __func__, intr_idx));
3418 KASSERT(qtype == FW_IQ_IQTYPE_OTHER || qtype == FW_IQ_IQTYPE_NIC ||
3419 qtype == FW_IQ_IQTYPE_OFLD, ("%s: bad qtype %d", __func__, qtype));
3420
3421 iq->flags = 0;
3422 iq->state = IQS_DISABLED;
3423 iq->adapter = sc;
3424 iq->qtype = qtype;
3425 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx);
3426 iq->intr_pktc_idx = SGE_NCOUNTERS - 1;
3427 if (pktc_idx >= 0) {
3428 iq->intr_params |= F_QINTR_CNT_EN;
3429 iq->intr_pktc_idx = pktc_idx;
3430 }
3431 iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */
3432 iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE;
3433 iq->intr_idx = intr_idx;
3434 iq->cong_drop = cong;
3435 }
3436
3437 static inline void
init_fl(struct adapter * sc,struct sge_fl * fl,int qsize,int maxp,char * name)3438 init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name)
3439 {
3440 struct sge_params *sp = &sc->params.sge;
3441
3442 fl->qsize = qsize;
3443 fl->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE;
3444 strlcpy(fl->lockname, name, sizeof(fl->lockname));
3445 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF);
3446 if (sc->flags & BUF_PACKING_OK &&
3447 ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */
3448 (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */
3449 fl->flags |= FL_BUF_PACKING;
3450 fl->zidx = find_refill_source(sc, maxp, fl->flags & FL_BUF_PACKING);
3451 fl->safe_zidx = sc->sge.safe_zidx;
3452 if (fl->flags & FL_BUF_PACKING) {
3453 fl->lowat = roundup2(sp->fl_starve_threshold2, 8);
3454 fl->buf_boundary = sp->pack_boundary;
3455 } else {
3456 fl->lowat = roundup2(sp->fl_starve_threshold, 8);
3457 fl->buf_boundary = 16;
3458 }
3459 if (fl_pad && fl->buf_boundary < sp->pad_boundary)
3460 fl->buf_boundary = sp->pad_boundary;
3461 }
3462
3463 static inline void
init_eq(struct adapter * sc,struct sge_eq * eq,int eqtype,int qsize,uint8_t port_id,struct sge_iq * iq,char * name)3464 init_eq(struct adapter *sc, struct sge_eq *eq, int eqtype, int qsize,
3465 uint8_t port_id, struct sge_iq *iq, char *name)
3466 {
3467 KASSERT(eqtype >= EQ_CTRL && eqtype <= EQ_OFLD,
3468 ("%s: bad qtype %d", __func__, eqtype));
3469
3470 eq->type = eqtype;
3471 eq->port_id = port_id;
3472 eq->tx_chan = sc->port[port_id]->tx_chan;
3473 eq->iq = iq;
3474 eq->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE;
3475 strlcpy(eq->lockname, name, sizeof(eq->lockname));
3476 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
3477 }
3478
3479 int
alloc_ring(struct adapter * sc,size_t len,bus_dma_tag_t * tag,bus_dmamap_t * map,bus_addr_t * pa,void ** va)3480 alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag,
3481 bus_dmamap_t *map, bus_addr_t *pa, void **va)
3482 {
3483 int rc;
3484
3485 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR,
3486 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag);
3487 if (rc != 0) {
3488 CH_ERR(sc, "cannot allocate DMA tag: %d\n", rc);
3489 goto done;
3490 }
3491
3492 rc = bus_dmamem_alloc(*tag, va,
3493 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map);
3494 if (rc != 0) {
3495 CH_ERR(sc, "cannot allocate DMA memory: %d\n", rc);
3496 goto done;
3497 }
3498
3499 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0);
3500 if (rc != 0) {
3501 CH_ERR(sc, "cannot load DMA map: %d\n", rc);
3502 goto done;
3503 }
3504 done:
3505 if (rc)
3506 free_ring(sc, *tag, *map, *pa, *va);
3507
3508 return (rc);
3509 }
3510
3511 int
free_ring(struct adapter * sc,bus_dma_tag_t tag,bus_dmamap_t map,bus_addr_t pa,void * va)3512 free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map,
3513 bus_addr_t pa, void *va)
3514 {
3515 if (pa)
3516 bus_dmamap_unload(tag, map);
3517 if (va)
3518 bus_dmamem_free(tag, va, map);
3519 if (tag)
3520 bus_dma_tag_destroy(tag);
3521
3522 return (0);
3523 }
3524
3525 /*
3526 * Allocates the software resources (mainly memory and sysctl nodes) for an
3527 * ingress queue and an optional freelist.
3528 *
3529 * Sets IQ_SW_ALLOCATED and returns 0 on success.
3530 */
3531 static int
alloc_iq_fl(struct vi_info * vi,struct sge_iq * iq,struct sge_fl * fl,struct sysctl_ctx_list * ctx,struct sysctl_oid * oid)3532 alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl,
3533 struct sysctl_ctx_list *ctx, struct sysctl_oid *oid)
3534 {
3535 int rc;
3536 size_t len;
3537 struct adapter *sc = vi->adapter;
3538
3539 MPASS(!(iq->flags & IQ_SW_ALLOCATED));
3540
3541 len = iq->qsize * IQ_ESIZE;
3542 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba,
3543 (void **)&iq->desc);
3544 if (rc != 0)
3545 return (rc);
3546
3547 if (fl) {
3548 len = fl->qsize * EQ_ESIZE;
3549 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map,
3550 &fl->ba, (void **)&fl->desc);
3551 if (rc) {
3552 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba,
3553 iq->desc);
3554 return (rc);
3555 }
3556
3557 /* Allocate space for one software descriptor per buffer. */
3558 fl->sdesc = malloc(fl->sidx * 8 * sizeof(struct fl_sdesc),
3559 M_CXGBE, M_ZERO | M_WAITOK);
3560
3561 add_fl_sysctls(sc, ctx, oid, fl);
3562 iq->flags |= IQ_HAS_FL;
3563 }
3564 add_iq_sysctls(ctx, oid, iq);
3565 iq->flags |= IQ_SW_ALLOCATED;
3566
3567 return (0);
3568 }
3569
3570 /*
3571 * Frees all software resources (memory and locks) associated with an ingress
3572 * queue and an optional freelist.
3573 */
3574 static void
free_iq_fl(struct adapter * sc,struct sge_iq * iq,struct sge_fl * fl)3575 free_iq_fl(struct adapter *sc, struct sge_iq *iq, struct sge_fl *fl)
3576 {
3577 MPASS(iq->flags & IQ_SW_ALLOCATED);
3578
3579 if (fl) {
3580 MPASS(iq->flags & IQ_HAS_FL);
3581 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, fl->desc);
3582 free_fl_buffers(sc, fl);
3583 free(fl->sdesc, M_CXGBE);
3584 mtx_destroy(&fl->fl_lock);
3585 bzero(fl, sizeof(*fl));
3586 }
3587 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc);
3588 bzero(iq, sizeof(*iq));
3589 }
3590
3591 /*
3592 * Allocates a hardware ingress queue and an optional freelist that will be
3593 * associated with it.
3594 *
3595 * Returns errno on failure. Resources allocated up to that point may still be
3596 * allocated. Caller is responsible for cleanup in case this function fails.
3597 */
3598 static int
alloc_iq_fl_hwq(struct vi_info * vi,struct sge_iq * iq,struct sge_fl * fl)3599 alloc_iq_fl_hwq(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl)
3600 {
3601 int rc, cntxt_id, cong_map;
3602 struct fw_iq_cmd c;
3603 struct adapter *sc = vi->adapter;
3604 struct port_info *pi = vi->pi;
3605 __be32 v = 0;
3606
3607 MPASS (!(iq->flags & IQ_HW_ALLOCATED));
3608
3609 bzero(&c, sizeof(c));
3610 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
3611 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
3612 V_FW_IQ_CMD_VFN(0));
3613
3614 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
3615 FW_LEN16(c));
3616
3617 /* Special handling for firmware event queue */
3618 if (iq == &sc->sge.fwq)
3619 v |= F_FW_IQ_CMD_IQASYNCH;
3620
3621 if (iq->intr_idx < 0) {
3622 /* Forwarded interrupts, all headed to fwq */
3623 v |= F_FW_IQ_CMD_IQANDST;
3624 v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fwq.cntxt_id);
3625 } else {
3626 KASSERT(iq->intr_idx < sc->intr_count,
3627 ("%s: invalid direct intr_idx %d", __func__, iq->intr_idx));
3628 v |= V_FW_IQ_CMD_IQANDSTINDEX(iq->intr_idx);
3629 }
3630
3631 bzero(iq->desc, iq->qsize * IQ_ESIZE);
3632 c.type_to_iqandstindex = htobe32(v |
3633 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
3634 V_FW_IQ_CMD_VIID(vi->viid) |
3635 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
3636 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
3637 F_FW_IQ_CMD_IQGTSMODE |
3638 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
3639 V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4));
3640 c.iqsize = htobe16(iq->qsize);
3641 c.iqaddr = htobe64(iq->ba);
3642 c.iqns_to_fl0congen = htobe32(V_FW_IQ_CMD_IQTYPE(iq->qtype));
3643 if (iq->cong_drop != -1) {
3644 cong_map = iq->qtype == IQ_ETH ? pi->rx_e_chan_map : 0;
3645 c.iqns_to_fl0congen |= htobe32(F_FW_IQ_CMD_IQFLINTCONGEN);
3646 }
3647
3648 if (fl) {
3649 bzero(fl->desc, fl->sidx * EQ_ESIZE + sc->params.sge.spg_len);
3650 c.iqns_to_fl0congen |=
3651 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
3652 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
3653 (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) |
3654 (fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN :
3655 0));
3656 if (iq->cong_drop != -1) {
3657 c.iqns_to_fl0congen |=
3658 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong_map) |
3659 F_FW_IQ_CMD_FL0CONGCIF |
3660 F_FW_IQ_CMD_FL0CONGEN);
3661 }
3662 c.fl0dcaen_to_fl0cidxfthresh =
3663 htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ?
3664 X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B_T6) |
3665 V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ?
3666 X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B));
3667 c.fl0size = htobe16(fl->qsize);
3668 c.fl0addr = htobe64(fl->ba);
3669 }
3670
3671 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
3672 if (rc != 0) {
3673 CH_ERR(sc, "failed to create hw ingress queue: %d\n", rc);
3674 return (rc);
3675 }
3676
3677 iq->cidx = 0;
3678 iq->gen = F_RSPD_GEN;
3679 iq->cntxt_id = be16toh(c.iqid);
3680 iq->abs_id = be16toh(c.physiqid);
3681
3682 cntxt_id = iq->cntxt_id - sc->sge.iq_start;
3683 if (cntxt_id >= sc->sge.iqmap_sz) {
3684 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__,
3685 cntxt_id, sc->sge.iqmap_sz - 1);
3686 }
3687 sc->sge.iqmap[cntxt_id] = iq;
3688
3689 if (fl) {
3690 u_int qid;
3691 #ifdef INVARIANTS
3692 int i;
3693
3694 MPASS(!(fl->flags & FL_BUF_RESUME));
3695 for (i = 0; i < fl->sidx * 8; i++)
3696 MPASS(fl->sdesc[i].cl == NULL);
3697 #endif
3698 fl->cntxt_id = be16toh(c.fl0id);
3699 fl->pidx = fl->cidx = fl->hw_cidx = fl->dbidx = 0;
3700 fl->rx_offset = 0;
3701 fl->flags &= ~(FL_STARVING | FL_DOOMED);
3702
3703 cntxt_id = fl->cntxt_id - sc->sge.eq_start;
3704 if (cntxt_id >= sc->sge.eqmap_sz) {
3705 panic("%s: fl->cntxt_id (%d) more than the max (%d)",
3706 __func__, cntxt_id, sc->sge.eqmap_sz - 1);
3707 }
3708 sc->sge.eqmap[cntxt_id] = (void *)fl;
3709
3710 qid = fl->cntxt_id;
3711 if (isset(&sc->doorbells, DOORBELL_UDB)) {
3712 uint32_t s_qpp = sc->params.sge.eq_s_qpp;
3713 uint32_t mask = (1 << s_qpp) - 1;
3714 volatile uint8_t *udb;
3715
3716 udb = sc->udbs_base + UDBS_DB_OFFSET;
3717 udb += (qid >> s_qpp) << PAGE_SHIFT;
3718 qid &= mask;
3719 if (qid < PAGE_SIZE / UDBS_SEG_SIZE) {
3720 udb += qid << UDBS_SEG_SHIFT;
3721 qid = 0;
3722 }
3723 fl->udb = (volatile void *)udb;
3724 }
3725 fl->dbval = V_QID(qid) | sc->chip_params->sge_fl_db;
3726
3727 FL_LOCK(fl);
3728 /* Enough to make sure the SGE doesn't think it's starved */
3729 refill_fl(sc, fl, fl->lowat);
3730 FL_UNLOCK(fl);
3731 }
3732
3733 if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) &&
3734 iq->cong_drop != -1) {
3735 t4_sge_set_conm_context(sc, iq->cntxt_id, iq->cong_drop,
3736 cong_map);
3737 }
3738
3739 /* Enable IQ interrupts */
3740 atomic_store_rel_int(&iq->state, IQS_IDLE);
3741 t4_write_reg(sc, sc->sge_gts_reg, V_SEINTARM(iq->intr_params) |
3742 V_INGRESSQID(iq->cntxt_id));
3743
3744 iq->flags |= IQ_HW_ALLOCATED;
3745
3746 return (0);
3747 }
3748
3749 static int
free_iq_fl_hwq(struct adapter * sc,struct sge_iq * iq,struct sge_fl * fl)3750 free_iq_fl_hwq(struct adapter *sc, struct sge_iq *iq, struct sge_fl *fl)
3751 {
3752 int rc;
3753
3754 MPASS(iq->flags & IQ_HW_ALLOCATED);
3755 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP,
3756 iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff);
3757 if (rc != 0) {
3758 CH_ERR(sc, "failed to free iq %p: %d\n", iq, rc);
3759 return (rc);
3760 }
3761 iq->flags &= ~IQ_HW_ALLOCATED;
3762
3763 return (0);
3764 }
3765
3766 static void
add_iq_sysctls(struct sysctl_ctx_list * ctx,struct sysctl_oid * oid,struct sge_iq * iq)3767 add_iq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid,
3768 struct sge_iq *iq)
3769 {
3770 struct sysctl_oid_list *children;
3771
3772 if (ctx == NULL || oid == NULL)
3773 return;
3774
3775 children = SYSCTL_CHILDREN(oid);
3776 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &iq->ba,
3777 "bus address of descriptor ring");
3778 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL,
3779 iq->qsize * IQ_ESIZE, "descriptor ring size in bytes");
3780 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD,
3781 &iq->abs_id, 0, "absolute id of the queue");
3782 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
3783 &iq->cntxt_id, 0, "SGE context id of the queue");
3784 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &iq->cidx,
3785 0, "consumer index");
3786 }
3787
3788 static void
add_fl_sysctls(struct adapter * sc,struct sysctl_ctx_list * ctx,struct sysctl_oid * oid,struct sge_fl * fl)3789 add_fl_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx,
3790 struct sysctl_oid *oid, struct sge_fl *fl)
3791 {
3792 struct sysctl_oid_list *children;
3793
3794 if (ctx == NULL || oid == NULL)
3795 return;
3796
3797 children = SYSCTL_CHILDREN(oid);
3798 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl",
3799 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "freelist");
3800 children = SYSCTL_CHILDREN(oid);
3801
3802 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD,
3803 &fl->ba, "bus address of descriptor ring");
3804 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL,
3805 fl->sidx * EQ_ESIZE + sc->params.sge.spg_len,
3806 "desc ring size in bytes");
3807 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
3808 &fl->cntxt_id, 0, "SGE context id of the freelist");
3809 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL,
3810 fl_pad ? 1 : 0, "padding enabled");
3811 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL,
3812 fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled");
3813 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx,
3814 0, "consumer index");
3815 if (fl->flags & FL_BUF_PACKING) {
3816 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset",
3817 CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset");
3818 }
3819 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx,
3820 0, "producer index");
3821 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated",
3822 CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated");
3823 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled",
3824 CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled");
3825 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled",
3826 CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)");
3827 }
3828
3829 /*
3830 * Idempotent.
3831 */
3832 static int
alloc_fwq(struct adapter * sc)3833 alloc_fwq(struct adapter *sc)
3834 {
3835 int rc, intr_idx;
3836 struct sge_iq *fwq = &sc->sge.fwq;
3837 struct vi_info *vi = &sc->port[0]->vi[0];
3838
3839 if (!(fwq->flags & IQ_SW_ALLOCATED)) {
3840 MPASS(!(fwq->flags & IQ_HW_ALLOCATED));
3841
3842 if (sc->flags & IS_VF)
3843 intr_idx = 0;
3844 else
3845 intr_idx = sc->intr_count > 1 ? 1 : 0;
3846 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, intr_idx, -1, IQ_OTHER);
3847 rc = alloc_iq_fl(vi, fwq, NULL, &sc->ctx, sc->fwq_oid);
3848 if (rc != 0) {
3849 CH_ERR(sc, "failed to allocate fwq: %d\n", rc);
3850 return (rc);
3851 }
3852 MPASS(fwq->flags & IQ_SW_ALLOCATED);
3853 }
3854
3855 if (!(fwq->flags & IQ_HW_ALLOCATED)) {
3856 MPASS(fwq->flags & IQ_SW_ALLOCATED);
3857
3858 rc = alloc_iq_fl_hwq(vi, fwq, NULL);
3859 if (rc != 0) {
3860 CH_ERR(sc, "failed to create hw fwq: %d\n", rc);
3861 return (rc);
3862 }
3863 MPASS(fwq->flags & IQ_HW_ALLOCATED);
3864 }
3865
3866 return (0);
3867 }
3868
3869 /*
3870 * Idempotent.
3871 */
3872 static void
free_fwq(struct adapter * sc)3873 free_fwq(struct adapter *sc)
3874 {
3875 struct sge_iq *fwq = &sc->sge.fwq;
3876
3877 if (fwq->flags & IQ_HW_ALLOCATED) {
3878 MPASS(fwq->flags & IQ_SW_ALLOCATED);
3879 free_iq_fl_hwq(sc, fwq, NULL);
3880 MPASS(!(fwq->flags & IQ_HW_ALLOCATED));
3881 }
3882
3883 if (fwq->flags & IQ_SW_ALLOCATED) {
3884 MPASS(!(fwq->flags & IQ_HW_ALLOCATED));
3885 free_iq_fl(sc, fwq, NULL);
3886 MPASS(!(fwq->flags & IQ_SW_ALLOCATED));
3887 }
3888 }
3889
3890 /*
3891 * Idempotent.
3892 */
3893 static int
alloc_ctrlq(struct adapter * sc,int idx)3894 alloc_ctrlq(struct adapter *sc, int idx)
3895 {
3896 int rc;
3897 char name[16];
3898 struct sysctl_oid *oid;
3899 struct sge_wrq *ctrlq = &sc->sge.ctrlq[idx];
3900
3901 MPASS(idx < sc->params.nports);
3902
3903 if (!(ctrlq->eq.flags & EQ_SW_ALLOCATED)) {
3904 MPASS(!(ctrlq->eq.flags & EQ_HW_ALLOCATED));
3905
3906 snprintf(name, sizeof(name), "%d", idx);
3907 oid = SYSCTL_ADD_NODE(&sc->ctx, SYSCTL_CHILDREN(sc->ctrlq_oid),
3908 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3909 "ctrl queue");
3910
3911 snprintf(name, sizeof(name), "%s ctrlq%d",
3912 device_get_nameunit(sc->dev), idx);
3913 init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, idx,
3914 &sc->sge.fwq, name);
3915 rc = alloc_wrq(sc, NULL, ctrlq, &sc->ctx, oid);
3916 if (rc != 0) {
3917 CH_ERR(sc, "failed to allocate ctrlq%d: %d\n", idx, rc);
3918 sysctl_remove_oid(oid, 1, 1);
3919 return (rc);
3920 }
3921 MPASS(ctrlq->eq.flags & EQ_SW_ALLOCATED);
3922 }
3923
3924 if (!(ctrlq->eq.flags & EQ_HW_ALLOCATED)) {
3925 MPASS(ctrlq->eq.flags & EQ_SW_ALLOCATED);
3926
3927 rc = alloc_eq_hwq(sc, NULL, &ctrlq->eq);
3928 if (rc != 0) {
3929 CH_ERR(sc, "failed to create hw ctrlq%d: %d\n", idx, rc);
3930 return (rc);
3931 }
3932 MPASS(ctrlq->eq.flags & EQ_HW_ALLOCATED);
3933 }
3934
3935 return (0);
3936 }
3937
3938 /*
3939 * Idempotent.
3940 */
3941 static void
free_ctrlq(struct adapter * sc,int idx)3942 free_ctrlq(struct adapter *sc, int idx)
3943 {
3944 struct sge_wrq *ctrlq = &sc->sge.ctrlq[idx];
3945
3946 if (ctrlq->eq.flags & EQ_HW_ALLOCATED) {
3947 MPASS(ctrlq->eq.flags & EQ_SW_ALLOCATED);
3948 free_eq_hwq(sc, NULL, &ctrlq->eq);
3949 MPASS(!(ctrlq->eq.flags & EQ_HW_ALLOCATED));
3950 }
3951
3952 if (ctrlq->eq.flags & EQ_SW_ALLOCATED) {
3953 MPASS(!(ctrlq->eq.flags & EQ_HW_ALLOCATED));
3954 free_wrq(sc, ctrlq);
3955 MPASS(!(ctrlq->eq.flags & EQ_SW_ALLOCATED));
3956 }
3957 }
3958
3959 int
t4_sge_set_conm_context(struct adapter * sc,int cntxt_id,int cong_drop,int cong_map)3960 t4_sge_set_conm_context(struct adapter *sc, int cntxt_id, int cong_drop,
3961 int cong_map)
3962 {
3963 const int cng_ch_bits_log = sc->chip_params->cng_ch_bits_log;
3964 uint32_t param, val;
3965 uint16_t ch_map;
3966 int cong_mode, rc, i;
3967
3968 if (chip_id(sc) < CHELSIO_T5)
3969 return (ENOTSUP);
3970
3971 /* Convert the driver knob to the mode understood by the firmware. */
3972 switch (cong_drop) {
3973 case -1:
3974 cong_mode = X_CONMCTXT_CNGTPMODE_DISABLE;
3975 break;
3976 case 0:
3977 cong_mode = X_CONMCTXT_CNGTPMODE_CHANNEL;
3978 break;
3979 case 1:
3980 cong_mode = X_CONMCTXT_CNGTPMODE_QUEUE;
3981 break;
3982 case 2:
3983 cong_mode = X_CONMCTXT_CNGTPMODE_BOTH;
3984 break;
3985 default:
3986 MPASS(0);
3987 CH_ERR(sc, "cong_drop = %d is invalid (ingress queue %d).\n",
3988 cong_drop, cntxt_id);
3989 return (EINVAL);
3990 }
3991
3992 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
3993 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
3994 V_FW_PARAMS_PARAM_YZ(cntxt_id);
3995 val = V_CONMCTXT_CNGTPMODE(cong_mode);
3996 if (cong_mode == X_CONMCTXT_CNGTPMODE_CHANNEL ||
3997 cong_mode == X_CONMCTXT_CNGTPMODE_BOTH) {
3998 for (i = 0, ch_map = 0; i < 4; i++) {
3999 if (cong_map & (1 << i))
4000 ch_map |= 1 << (i << cng_ch_bits_log);
4001 }
4002 val |= V_CONMCTXT_CNGCHMAP(ch_map);
4003 }
4004 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val);
4005 if (rc != 0) {
4006 CH_ERR(sc, "failed to set congestion manager context "
4007 "for ingress queue %d: %d\n", cntxt_id, rc);
4008 }
4009
4010 return (rc);
4011 }
4012
4013 /*
4014 * Idempotent.
4015 */
4016 static int
alloc_rxq(struct vi_info * vi,struct sge_rxq * rxq,int idx,int intr_idx,int maxp)4017 alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int idx, int intr_idx,
4018 int maxp)
4019 {
4020 int rc;
4021 struct adapter *sc = vi->adapter;
4022 struct ifnet *ifp = vi->ifp;
4023 struct sysctl_oid *oid;
4024 char name[16];
4025
4026 if (!(rxq->iq.flags & IQ_SW_ALLOCATED)) {
4027 MPASS(!(rxq->iq.flags & IQ_HW_ALLOCATED));
4028 #if defined(INET) || defined(INET6)
4029 rc = tcp_lro_init_args(&rxq->lro, ifp, lro_entries, lro_mbufs);
4030 if (rc != 0)
4031 return (rc);
4032 MPASS(rxq->lro.ifp == ifp); /* also indicates LRO init'ed */
4033 #endif
4034 rxq->ifp = ifp;
4035
4036 snprintf(name, sizeof(name), "%d", idx);
4037 oid = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(vi->rxq_oid),
4038 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
4039 "rx queue");
4040
4041 init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq,
4042 intr_idx, cong_drop, IQ_ETH);
4043 #if defined(INET) || defined(INET6)
4044 if (ifp->if_capenable & IFCAP_LRO)
4045 rxq->iq.flags |= IQ_LRO_ENABLED;
4046 #endif
4047 if (ifp->if_capenable & IFCAP_HWRXTSTMP)
4048 rxq->iq.flags |= IQ_RX_TIMESTAMP;
4049 snprintf(name, sizeof(name), "%s rxq%d-fl",
4050 device_get_nameunit(vi->dev), idx);
4051 init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name);
4052 rc = alloc_iq_fl(vi, &rxq->iq, &rxq->fl, &vi->ctx, oid);
4053 if (rc != 0) {
4054 CH_ERR(vi, "failed to allocate rxq%d: %d\n", idx, rc);
4055 sysctl_remove_oid(oid, 1, 1);
4056 #if defined(INET) || defined(INET6)
4057 tcp_lro_free(&rxq->lro);
4058 rxq->lro.ifp = NULL;
4059 #endif
4060 return (rc);
4061 }
4062 MPASS(rxq->iq.flags & IQ_SW_ALLOCATED);
4063 add_rxq_sysctls(&vi->ctx, oid, rxq);
4064 }
4065
4066 if (!(rxq->iq.flags & IQ_HW_ALLOCATED)) {
4067 MPASS(rxq->iq.flags & IQ_SW_ALLOCATED);
4068 rc = alloc_iq_fl_hwq(vi, &rxq->iq, &rxq->fl);
4069 if (rc != 0) {
4070 CH_ERR(vi, "failed to create hw rxq%d: %d\n", idx, rc);
4071 return (rc);
4072 }
4073 MPASS(rxq->iq.flags & IQ_HW_ALLOCATED);
4074
4075 if (idx == 0)
4076 sc->sge.iq_base = rxq->iq.abs_id - rxq->iq.cntxt_id;
4077 else
4078 KASSERT(rxq->iq.cntxt_id + sc->sge.iq_base == rxq->iq.abs_id,
4079 ("iq_base mismatch"));
4080 KASSERT(sc->sge.iq_base == 0 || sc->flags & IS_VF,
4081 ("PF with non-zero iq_base"));
4082
4083 /*
4084 * The freelist is just barely above the starvation threshold
4085 * right now, fill it up a bit more.
4086 */
4087 FL_LOCK(&rxq->fl);
4088 refill_fl(sc, &rxq->fl, 128);
4089 FL_UNLOCK(&rxq->fl);
4090 }
4091
4092 return (0);
4093 }
4094
4095 /*
4096 * Idempotent.
4097 */
4098 static void
free_rxq(struct vi_info * vi,struct sge_rxq * rxq)4099 free_rxq(struct vi_info *vi, struct sge_rxq *rxq)
4100 {
4101 if (rxq->iq.flags & IQ_HW_ALLOCATED) {
4102 MPASS(rxq->iq.flags & IQ_SW_ALLOCATED);
4103 free_iq_fl_hwq(vi->adapter, &rxq->iq, &rxq->fl);
4104 MPASS(!(rxq->iq.flags & IQ_HW_ALLOCATED));
4105 }
4106
4107 if (rxq->iq.flags & IQ_SW_ALLOCATED) {
4108 MPASS(!(rxq->iq.flags & IQ_HW_ALLOCATED));
4109 #if defined(INET) || defined(INET6)
4110 tcp_lro_free(&rxq->lro);
4111 #endif
4112 free_iq_fl(vi->adapter, &rxq->iq, &rxq->fl);
4113 MPASS(!(rxq->iq.flags & IQ_SW_ALLOCATED));
4114 bzero(rxq, sizeof(*rxq));
4115 }
4116 }
4117
4118 static void
add_rxq_sysctls(struct sysctl_ctx_list * ctx,struct sysctl_oid * oid,struct sge_rxq * rxq)4119 add_rxq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid,
4120 struct sge_rxq *rxq)
4121 {
4122 struct sysctl_oid_list *children;
4123
4124 if (ctx == NULL || oid == NULL)
4125 return;
4126
4127 children = SYSCTL_CHILDREN(oid);
4128 #if defined(INET) || defined(INET6)
4129 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD,
4130 &rxq->lro.lro_queued, 0, NULL);
4131 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD,
4132 &rxq->lro.lro_flushed, 0, NULL);
4133 #endif
4134 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD,
4135 &rxq->rxcsum, "# of times hardware assisted with checksum");
4136 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vlan_extraction", CTLFLAG_RD,
4137 &rxq->vlan_extraction, "# of times hardware extracted 802.1Q tag");
4138 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vxlan_rxcsum", CTLFLAG_RD,
4139 &rxq->vxlan_rxcsum,
4140 "# of times hardware assisted with inner checksum (VXLAN)");
4141 }
4142
4143 #ifdef TCP_OFFLOAD
4144 /*
4145 * Idempotent.
4146 */
4147 static int
alloc_ofld_rxq(struct vi_info * vi,struct sge_ofld_rxq * ofld_rxq,int idx,int intr_idx,int maxp)4148 alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq, int idx,
4149 int intr_idx, int maxp)
4150 {
4151 int rc;
4152 struct adapter *sc = vi->adapter;
4153 struct sysctl_oid *oid;
4154 char name[16];
4155
4156 if (!(ofld_rxq->iq.flags & IQ_SW_ALLOCATED)) {
4157 MPASS(!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED));
4158
4159 snprintf(name, sizeof(name), "%d", idx);
4160 oid = SYSCTL_ADD_NODE(&vi->ctx,
4161 SYSCTL_CHILDREN(vi->ofld_rxq_oid), OID_AUTO, name,
4162 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "offload rx queue");
4163
4164 init_iq(&ofld_rxq->iq, sc, vi->ofld_tmr_idx, vi->ofld_pktc_idx,
4165 vi->qsize_rxq, intr_idx, ofld_cong_drop, IQ_OFLD);
4166 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
4167 device_get_nameunit(vi->dev), idx);
4168 init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name);
4169 rc = alloc_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl, &vi->ctx,
4170 oid);
4171 if (rc != 0) {
4172 CH_ERR(vi, "failed to allocate ofld_rxq%d: %d\n", idx,
4173 rc);
4174 sysctl_remove_oid(oid, 1, 1);
4175 return (rc);
4176 }
4177 MPASS(ofld_rxq->iq.flags & IQ_SW_ALLOCATED);
4178 ofld_rxq->rx_iscsi_ddp_setup_ok = counter_u64_alloc(M_WAITOK);
4179 ofld_rxq->rx_iscsi_ddp_setup_error =
4180 counter_u64_alloc(M_WAITOK);
4181 add_ofld_rxq_sysctls(&vi->ctx, oid, ofld_rxq);
4182 }
4183
4184 if (!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED)) {
4185 MPASS(ofld_rxq->iq.flags & IQ_SW_ALLOCATED);
4186 rc = alloc_iq_fl_hwq(vi, &ofld_rxq->iq, &ofld_rxq->fl);
4187 if (rc != 0) {
4188 CH_ERR(vi, "failed to create hw ofld_rxq%d: %d\n", idx,
4189 rc);
4190 return (rc);
4191 }
4192 MPASS(ofld_rxq->iq.flags & IQ_HW_ALLOCATED);
4193 }
4194 return (rc);
4195 }
4196
4197 /*
4198 * Idempotent.
4199 */
4200 static void
free_ofld_rxq(struct vi_info * vi,struct sge_ofld_rxq * ofld_rxq)4201 free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq)
4202 {
4203 if (ofld_rxq->iq.flags & IQ_HW_ALLOCATED) {
4204 MPASS(ofld_rxq->iq.flags & IQ_SW_ALLOCATED);
4205 free_iq_fl_hwq(vi->adapter, &ofld_rxq->iq, &ofld_rxq->fl);
4206 MPASS(!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED));
4207 }
4208
4209 if (ofld_rxq->iq.flags & IQ_SW_ALLOCATED) {
4210 MPASS(!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED));
4211 free_iq_fl(vi->adapter, &ofld_rxq->iq, &ofld_rxq->fl);
4212 MPASS(!(ofld_rxq->iq.flags & IQ_SW_ALLOCATED));
4213 counter_u64_free(ofld_rxq->rx_iscsi_ddp_setup_ok);
4214 counter_u64_free(ofld_rxq->rx_iscsi_ddp_setup_error);
4215 bzero(ofld_rxq, sizeof(*ofld_rxq));
4216 }
4217 }
4218
4219 static void
add_ofld_rxq_sysctls(struct sysctl_ctx_list * ctx,struct sysctl_oid * oid,struct sge_ofld_rxq * ofld_rxq)4220 add_ofld_rxq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid,
4221 struct sge_ofld_rxq *ofld_rxq)
4222 {
4223 struct sysctl_oid_list *children;
4224
4225 if (ctx == NULL || oid == NULL)
4226 return;
4227
4228 children = SYSCTL_CHILDREN(oid);
4229 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "rx_aio_ddp_jobs",
4230 CTLFLAG_RD, &ofld_rxq->rx_aio_ddp_jobs, 0,
4231 "# of aio_read(2) jobs completed via DDP");
4232 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "rx_aio_ddp_octets",
4233 CTLFLAG_RD, &ofld_rxq->rx_aio_ddp_octets, 0,
4234 "# of octets placed directly for aio_read(2) jobs");
4235 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO,
4236 "rx_toe_tls_records", CTLFLAG_RD, &ofld_rxq->rx_toe_tls_records,
4237 "# of TOE TLS records received");
4238 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO,
4239 "rx_toe_tls_octets", CTLFLAG_RD, &ofld_rxq->rx_toe_tls_octets,
4240 "# of payload octets in received TOE TLS records");
4241
4242 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "iscsi",
4243 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TOE iSCSI statistics");
4244 children = SYSCTL_CHILDREN(oid);
4245
4246 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "ddp_setup_ok",
4247 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_setup_ok,
4248 "# of times DDP buffer was setup successfully.");
4249 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "ddp_setup_error",
4250 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_setup_error,
4251 "# of times DDP buffer setup failed.");
4252 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "ddp_octets",
4253 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_octets, 0,
4254 "# of octets placed directly");
4255 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "ddp_pdus",
4256 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_pdus, 0,
4257 "# of PDUs with data placed directly.");
4258 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "fl_octets",
4259 CTLFLAG_RD, &ofld_rxq->rx_iscsi_fl_octets, 0,
4260 "# of data octets delivered in freelist");
4261 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "fl_pdus",
4262 CTLFLAG_RD, &ofld_rxq->rx_iscsi_fl_pdus, 0,
4263 "# of PDUs with data delivered in freelist");
4264 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "padding_errors",
4265 CTLFLAG_RD, &ofld_rxq->rx_iscsi_padding_errors, 0,
4266 "# of PDUs with invalid padding");
4267 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "header_digest_errors",
4268 CTLFLAG_RD, &ofld_rxq->rx_iscsi_header_digest_errors, 0,
4269 "# of PDUs with invalid header digests");
4270 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "data_digest_errors",
4271 CTLFLAG_RD, &ofld_rxq->rx_iscsi_data_digest_errors, 0,
4272 "# of PDUs with invalid data digests");
4273 }
4274 #endif
4275
4276 /*
4277 * Returns a reasonable automatic cidx flush threshold for a given queue size.
4278 */
4279 static u_int
qsize_to_fthresh(int qsize)4280 qsize_to_fthresh(int qsize)
4281 {
4282 u_int fthresh;
4283
4284 while (!powerof2(qsize))
4285 qsize++;
4286 fthresh = ilog2(qsize);
4287 if (fthresh > X_CIDXFLUSHTHRESH_128)
4288 fthresh = X_CIDXFLUSHTHRESH_128;
4289
4290 return (fthresh);
4291 }
4292
4293 static int
ctrl_eq_alloc(struct adapter * sc,struct sge_eq * eq)4294 ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq)
4295 {
4296 int rc, cntxt_id;
4297 struct fw_eq_ctrl_cmd c;
4298 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE;
4299
4300 bzero(&c, sizeof(c));
4301
4302 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST |
4303 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) |
4304 V_FW_EQ_CTRL_CMD_VFN(0));
4305 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC |
4306 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c));
4307 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid));
4308 c.physeqid_pkd = htobe32(0);
4309 c.fetchszm_to_iqid =
4310 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
4311 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) |
4312 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid));
4313 c.dcaen_to_eqsize =
4314 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ?
4315 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) |
4316 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
4317 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) |
4318 V_FW_EQ_CTRL_CMD_EQSIZE(qsize));
4319 c.eqaddr = htobe64(eq->ba);
4320
4321 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
4322 if (rc != 0) {
4323 CH_ERR(sc, "failed to create hw ctrlq for tx_chan %d: %d\n",
4324 eq->tx_chan, rc);
4325 return (rc);
4326 }
4327
4328 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid));
4329 eq->abs_id = G_FW_EQ_CTRL_CMD_PHYSEQID(be32toh(c.physeqid_pkd));
4330 cntxt_id = eq->cntxt_id - sc->sge.eq_start;
4331 if (cntxt_id >= sc->sge.eqmap_sz)
4332 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
4333 cntxt_id, sc->sge.eqmap_sz - 1);
4334 sc->sge.eqmap[cntxt_id] = eq;
4335
4336 return (rc);
4337 }
4338
4339 static int
eth_eq_alloc(struct adapter * sc,struct vi_info * vi,struct sge_eq * eq)4340 eth_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq)
4341 {
4342 int rc, cntxt_id;
4343 struct fw_eq_eth_cmd c;
4344 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE;
4345
4346 bzero(&c, sizeof(c));
4347
4348 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
4349 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
4350 V_FW_EQ_ETH_CMD_VFN(0));
4351 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
4352 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
4353 c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE |
4354 F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid));
4355 c.fetchszm_to_iqid =
4356 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) |
4357 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
4358 V_FW_EQ_ETH_CMD_IQID(eq->iqid));
4359 c.dcaen_to_eqsize =
4360 htobe32(V_FW_EQ_ETH_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ?
4361 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) |
4362 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
4363 V_FW_EQ_ETH_CMD_EQSIZE(qsize));
4364 c.eqaddr = htobe64(eq->ba);
4365
4366 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
4367 if (rc != 0) {
4368 device_printf(vi->dev,
4369 "failed to create Ethernet egress queue: %d\n", rc);
4370 return (rc);
4371 }
4372
4373 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
4374 eq->abs_id = G_FW_EQ_ETH_CMD_PHYSEQID(be32toh(c.physeqid_pkd));
4375 cntxt_id = eq->cntxt_id - sc->sge.eq_start;
4376 if (cntxt_id >= sc->sge.eqmap_sz)
4377 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
4378 cntxt_id, sc->sge.eqmap_sz - 1);
4379 sc->sge.eqmap[cntxt_id] = eq;
4380
4381 return (rc);
4382 }
4383
4384 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
4385 static int
ofld_eq_alloc(struct adapter * sc,struct vi_info * vi,struct sge_eq * eq)4386 ofld_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq)
4387 {
4388 int rc, cntxt_id;
4389 struct fw_eq_ofld_cmd c;
4390 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE;
4391
4392 bzero(&c, sizeof(c));
4393
4394 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST |
4395 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) |
4396 V_FW_EQ_OFLD_CMD_VFN(0));
4397 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC |
4398 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c));
4399 c.fetchszm_to_iqid =
4400 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
4401 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) |
4402 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid));
4403 c.dcaen_to_eqsize =
4404 htobe32(V_FW_EQ_OFLD_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ?
4405 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) |
4406 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
4407 V_FW_EQ_OFLD_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) |
4408 V_FW_EQ_OFLD_CMD_EQSIZE(qsize));
4409 c.eqaddr = htobe64(eq->ba);
4410
4411 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
4412 if (rc != 0) {
4413 device_printf(vi->dev,
4414 "failed to create egress queue for TCP offload: %d\n", rc);
4415 return (rc);
4416 }
4417
4418 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd));
4419 eq->abs_id = G_FW_EQ_OFLD_CMD_PHYSEQID(be32toh(c.physeqid_pkd));
4420 cntxt_id = eq->cntxt_id - sc->sge.eq_start;
4421 if (cntxt_id >= sc->sge.eqmap_sz)
4422 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
4423 cntxt_id, sc->sge.eqmap_sz - 1);
4424 sc->sge.eqmap[cntxt_id] = eq;
4425
4426 return (rc);
4427 }
4428 #endif
4429
4430 /* SW only */
4431 static int
alloc_eq(struct adapter * sc,struct sge_eq * eq,struct sysctl_ctx_list * ctx,struct sysctl_oid * oid)4432 alloc_eq(struct adapter *sc, struct sge_eq *eq, struct sysctl_ctx_list *ctx,
4433 struct sysctl_oid *oid)
4434 {
4435 int rc, qsize;
4436 size_t len;
4437
4438 MPASS(!(eq->flags & EQ_SW_ALLOCATED));
4439
4440 qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE;
4441 len = qsize * EQ_ESIZE;
4442 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, &eq->ba,
4443 (void **)&eq->desc);
4444 if (rc)
4445 return (rc);
4446 if (ctx != NULL && oid != NULL)
4447 add_eq_sysctls(sc, ctx, oid, eq);
4448 eq->flags |= EQ_SW_ALLOCATED;
4449
4450 return (0);
4451 }
4452
4453 /* SW only */
4454 static void
free_eq(struct adapter * sc,struct sge_eq * eq)4455 free_eq(struct adapter *sc, struct sge_eq *eq)
4456 {
4457 MPASS(eq->flags & EQ_SW_ALLOCATED);
4458 if (eq->type == EQ_ETH)
4459 MPASS(eq->pidx == eq->cidx);
4460
4461 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
4462 mtx_destroy(&eq->eq_lock);
4463 bzero(eq, sizeof(*eq));
4464 }
4465
4466 static void
add_eq_sysctls(struct adapter * sc,struct sysctl_ctx_list * ctx,struct sysctl_oid * oid,struct sge_eq * eq)4467 add_eq_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx,
4468 struct sysctl_oid *oid, struct sge_eq *eq)
4469 {
4470 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
4471
4472 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &eq->ba,
4473 "bus address of descriptor ring");
4474 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL,
4475 eq->sidx * EQ_ESIZE + sc->params.sge.spg_len,
4476 "desc ring size in bytes");
4477 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD,
4478 &eq->abs_id, 0, "absolute id of the queue");
4479 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
4480 &eq->cntxt_id, 0, "SGE context id of the queue");
4481 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &eq->cidx,
4482 0, "consumer index");
4483 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &eq->pidx,
4484 0, "producer index");
4485 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL,
4486 eq->sidx, "status page index");
4487 }
4488
4489 static int
alloc_eq_hwq(struct adapter * sc,struct vi_info * vi,struct sge_eq * eq)4490 alloc_eq_hwq(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq)
4491 {
4492 int rc;
4493
4494 MPASS(!(eq->flags & EQ_HW_ALLOCATED));
4495
4496 eq->iqid = eq->iq->cntxt_id;
4497 eq->pidx = eq->cidx = eq->dbidx = 0;
4498 /* Note that equeqidx is not used with sge_wrq (OFLD/CTRL) queues. */
4499 eq->equeqidx = 0;
4500 eq->doorbells = sc->doorbells;
4501 bzero(eq->desc, eq->sidx * EQ_ESIZE + sc->params.sge.spg_len);
4502
4503 switch (eq->type) {
4504 case EQ_CTRL:
4505 rc = ctrl_eq_alloc(sc, eq);
4506 break;
4507
4508 case EQ_ETH:
4509 rc = eth_eq_alloc(sc, vi, eq);
4510 break;
4511
4512 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
4513 case EQ_OFLD:
4514 rc = ofld_eq_alloc(sc, vi, eq);
4515 break;
4516 #endif
4517
4518 default:
4519 panic("%s: invalid eq type %d.", __func__, eq->type);
4520 }
4521 if (rc != 0) {
4522 CH_ERR(sc, "failed to allocate egress queue(%d): %d\n",
4523 eq->type, rc);
4524 return (rc);
4525 }
4526
4527 if (isset(&eq->doorbells, DOORBELL_UDB) ||
4528 isset(&eq->doorbells, DOORBELL_UDBWC) ||
4529 isset(&eq->doorbells, DOORBELL_WCWR)) {
4530 uint32_t s_qpp = sc->params.sge.eq_s_qpp;
4531 uint32_t mask = (1 << s_qpp) - 1;
4532 volatile uint8_t *udb;
4533
4534 udb = sc->udbs_base + UDBS_DB_OFFSET;
4535 udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */
4536 eq->udb_qid = eq->cntxt_id & mask; /* id in page */
4537 if (eq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE)
4538 clrbit(&eq->doorbells, DOORBELL_WCWR);
4539 else {
4540 udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */
4541 eq->udb_qid = 0;
4542 }
4543 eq->udb = (volatile void *)udb;
4544 }
4545
4546 eq->flags |= EQ_HW_ALLOCATED;
4547 return (0);
4548 }
4549
4550 static int
free_eq_hwq(struct adapter * sc,struct vi_info * vi __unused,struct sge_eq * eq)4551 free_eq_hwq(struct adapter *sc, struct vi_info *vi __unused, struct sge_eq *eq)
4552 {
4553 int rc;
4554
4555 MPASS(eq->flags & EQ_HW_ALLOCATED);
4556
4557 switch (eq->type) {
4558 case EQ_CTRL:
4559 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id);
4560 break;
4561 case EQ_ETH:
4562 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id);
4563 break;
4564 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
4565 case EQ_OFLD:
4566 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id);
4567 break;
4568 #endif
4569 default:
4570 panic("%s: invalid eq type %d.", __func__, eq->type);
4571 }
4572 if (rc != 0) {
4573 CH_ERR(sc, "failed to free eq (type %d): %d\n", eq->type, rc);
4574 return (rc);
4575 }
4576 eq->flags &= ~EQ_HW_ALLOCATED;
4577
4578 return (0);
4579 }
4580
4581 static int
alloc_wrq(struct adapter * sc,struct vi_info * vi,struct sge_wrq * wrq,struct sysctl_ctx_list * ctx,struct sysctl_oid * oid)4582 alloc_wrq(struct adapter *sc, struct vi_info *vi, struct sge_wrq *wrq,
4583 struct sysctl_ctx_list *ctx, struct sysctl_oid *oid)
4584 {
4585 struct sge_eq *eq = &wrq->eq;
4586 int rc;
4587
4588 MPASS(!(eq->flags & EQ_SW_ALLOCATED));
4589
4590 rc = alloc_eq(sc, eq, ctx, oid);
4591 if (rc)
4592 return (rc);
4593 MPASS(eq->flags & EQ_SW_ALLOCATED);
4594 /* Can't fail after this. */
4595
4596 wrq->adapter = sc;
4597 TASK_INIT(&wrq->wrq_tx_task, 0, wrq_tx_drain, wrq);
4598 TAILQ_INIT(&wrq->incomplete_wrs);
4599 STAILQ_INIT(&wrq->wr_list);
4600 wrq->nwr_pending = 0;
4601 wrq->ndesc_needed = 0;
4602 add_wrq_sysctls(ctx, oid, wrq);
4603
4604 return (0);
4605 }
4606
4607 static void
free_wrq(struct adapter * sc,struct sge_wrq * wrq)4608 free_wrq(struct adapter *sc, struct sge_wrq *wrq)
4609 {
4610 free_eq(sc, &wrq->eq);
4611 MPASS(wrq->nwr_pending == 0);
4612 MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs));
4613 MPASS(STAILQ_EMPTY(&wrq->wr_list));
4614 bzero(wrq, sizeof(*wrq));
4615 }
4616
4617 static void
add_wrq_sysctls(struct sysctl_ctx_list * ctx,struct sysctl_oid * oid,struct sge_wrq * wrq)4618 add_wrq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid,
4619 struct sge_wrq *wrq)
4620 {
4621 struct sysctl_oid_list *children;
4622
4623 if (ctx == NULL || oid == NULL)
4624 return;
4625
4626 children = SYSCTL_CHILDREN(oid);
4627 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_direct", CTLFLAG_RD,
4628 &wrq->tx_wrs_direct, "# of work requests (direct)");
4629 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_copied", CTLFLAG_RD,
4630 &wrq->tx_wrs_copied, "# of work requests (copied)");
4631 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_sspace", CTLFLAG_RD,
4632 &wrq->tx_wrs_ss, "# of work requests (copied from scratch space)");
4633 }
4634
4635 /*
4636 * Idempotent.
4637 */
4638 static int
alloc_txq(struct vi_info * vi,struct sge_txq * txq,int idx)4639 alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx)
4640 {
4641 int rc, iqidx;
4642 struct port_info *pi = vi->pi;
4643 struct adapter *sc = vi->adapter;
4644 struct sge_eq *eq = &txq->eq;
4645 struct txpkts *txp;
4646 char name[16];
4647 struct sysctl_oid *oid;
4648
4649 if (!(eq->flags & EQ_SW_ALLOCATED)) {
4650 MPASS(!(eq->flags & EQ_HW_ALLOCATED));
4651
4652 snprintf(name, sizeof(name), "%d", idx);
4653 oid = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(vi->txq_oid),
4654 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
4655 "tx queue");
4656
4657 iqidx = vi->first_rxq + (idx % vi->nrxq);
4658 snprintf(name, sizeof(name), "%s txq%d",
4659 device_get_nameunit(vi->dev), idx);
4660 init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->port_id,
4661 &sc->sge.rxq[iqidx].iq, name);
4662
4663 rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx,
4664 can_resume_eth_tx, M_CXGBE, &eq->eq_lock, M_WAITOK);
4665 if (rc != 0) {
4666 CH_ERR(vi, "failed to allocate mp_ring for txq%d: %d\n",
4667 idx, rc);
4668 failed:
4669 sysctl_remove_oid(oid, 1, 1);
4670 return (rc);
4671 }
4672
4673 rc = alloc_eq(sc, eq, &vi->ctx, oid);
4674 if (rc) {
4675 CH_ERR(vi, "failed to allocate txq%d: %d\n", idx, rc);
4676 mp_ring_free(txq->r);
4677 goto failed;
4678 }
4679 MPASS(eq->flags & EQ_SW_ALLOCATED);
4680 /* Can't fail after this point. */
4681
4682 TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq);
4683 txq->ifp = vi->ifp;
4684 txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK);
4685 txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE,
4686 M_ZERO | M_WAITOK);
4687
4688 add_txq_sysctls(vi, &vi->ctx, oid, txq);
4689 }
4690
4691 if (!(eq->flags & EQ_HW_ALLOCATED)) {
4692 MPASS(eq->flags & EQ_SW_ALLOCATED);
4693 rc = alloc_eq_hwq(sc, vi, eq);
4694 if (rc != 0) {
4695 CH_ERR(vi, "failed to create hw txq%d: %d\n", idx, rc);
4696 return (rc);
4697 }
4698 MPASS(eq->flags & EQ_HW_ALLOCATED);
4699 /* Can't fail after this point. */
4700
4701 if (idx == 0)
4702 sc->sge.eq_base = eq->abs_id - eq->cntxt_id;
4703 else
4704 KASSERT(eq->cntxt_id + sc->sge.eq_base == eq->abs_id,
4705 ("eq_base mismatch"));
4706 KASSERT(sc->sge.eq_base == 0 || sc->flags & IS_VF,
4707 ("PF with non-zero eq_base"));
4708
4709 txp = &txq->txp;
4710 MPASS(nitems(txp->mb) >= sc->params.max_pkts_per_eth_tx_pkts_wr);
4711 txq->txp.max_npkt = min(nitems(txp->mb),
4712 sc->params.max_pkts_per_eth_tx_pkts_wr);
4713 if (vi->flags & TX_USES_VM_WR && !(sc->flags & IS_VF))
4714 txq->txp.max_npkt--;
4715
4716 if (vi->flags & TX_USES_VM_WR)
4717 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
4718 V_TXPKT_INTF(pi->tx_chan));
4719 else
4720 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
4721 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) |
4722 V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld));
4723
4724 txq->tc_idx = -1;
4725 }
4726
4727 return (0);
4728 }
4729
4730 /*
4731 * Idempotent.
4732 */
4733 static void
free_txq(struct vi_info * vi,struct sge_txq * txq)4734 free_txq(struct vi_info *vi, struct sge_txq *txq)
4735 {
4736 struct adapter *sc = vi->adapter;
4737 struct sge_eq *eq = &txq->eq;
4738
4739 if (eq->flags & EQ_HW_ALLOCATED) {
4740 MPASS(eq->flags & EQ_SW_ALLOCATED);
4741 free_eq_hwq(sc, NULL, eq);
4742 MPASS(!(eq->flags & EQ_HW_ALLOCATED));
4743 }
4744
4745 if (eq->flags & EQ_SW_ALLOCATED) {
4746 MPASS(!(eq->flags & EQ_HW_ALLOCATED));
4747 sglist_free(txq->gl);
4748 free(txq->sdesc, M_CXGBE);
4749 mp_ring_free(txq->r);
4750 free_eq(sc, eq);
4751 MPASS(!(eq->flags & EQ_SW_ALLOCATED));
4752 bzero(txq, sizeof(*txq));
4753 }
4754 }
4755
4756 static void
add_txq_sysctls(struct vi_info * vi,struct sysctl_ctx_list * ctx,struct sysctl_oid * oid,struct sge_txq * txq)4757 add_txq_sysctls(struct vi_info *vi, struct sysctl_ctx_list *ctx,
4758 struct sysctl_oid *oid, struct sge_txq *txq)
4759 {
4760 struct adapter *sc;
4761 struct sysctl_oid_list *children;
4762
4763 if (ctx == NULL || oid == NULL)
4764 return;
4765
4766 sc = vi->adapter;
4767 children = SYSCTL_CHILDREN(oid);
4768
4769 mp_ring_sysctls(txq->r, ctx, children);
4770
4771 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tc",
4772 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, txq - sc->sge.txq,
4773 sysctl_tc, "I", "traffic class (-1 means none)");
4774
4775 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD,
4776 &txq->txcsum, "# of times hardware assisted with checksum");
4777 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vlan_insertion", CTLFLAG_RD,
4778 &txq->vlan_insertion, "# of times hardware inserted 802.1Q tag");
4779 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD,
4780 &txq->tso_wrs, "# of TSO work requests");
4781 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD,
4782 &txq->imm_wrs, "# of work requests with immediate data");
4783 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD,
4784 &txq->sgl_wrs, "# of work requests with direct SGL");
4785 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD,
4786 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)");
4787 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts0_wrs", CTLFLAG_RD,
4788 &txq->txpkts0_wrs, "# of txpkts (type 0) work requests");
4789 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts1_wrs", CTLFLAG_RD,
4790 &txq->txpkts1_wrs, "# of txpkts (type 1) work requests");
4791 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts0_pkts", CTLFLAG_RD,
4792 &txq->txpkts0_pkts,
4793 "# of frames tx'd using type0 txpkts work requests");
4794 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts1_pkts", CTLFLAG_RD,
4795 &txq->txpkts1_pkts,
4796 "# of frames tx'd using type1 txpkts work requests");
4797 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts_flush", CTLFLAG_RD,
4798 &txq->txpkts_flush,
4799 "# of times txpkts had to be flushed out by an egress-update");
4800 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "raw_wrs", CTLFLAG_RD,
4801 &txq->raw_wrs, "# of raw work requests (non-packets)");
4802 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vxlan_tso_wrs", CTLFLAG_RD,
4803 &txq->vxlan_tso_wrs, "# of VXLAN TSO work requests");
4804 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vxlan_txcsum", CTLFLAG_RD,
4805 &txq->vxlan_txcsum,
4806 "# of times hardware assisted with inner checksums (VXLAN)");
4807
4808 #ifdef KERN_TLS
4809 if (is_ktls(sc)) {
4810 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_records",
4811 CTLFLAG_RD, &txq->kern_tls_records,
4812 "# of NIC TLS records transmitted");
4813 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_short",
4814 CTLFLAG_RD, &txq->kern_tls_short,
4815 "# of short NIC TLS records transmitted");
4816 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_partial",
4817 CTLFLAG_RD, &txq->kern_tls_partial,
4818 "# of partial NIC TLS records transmitted");
4819 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_full",
4820 CTLFLAG_RD, &txq->kern_tls_full,
4821 "# of full NIC TLS records transmitted");
4822 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_octets",
4823 CTLFLAG_RD, &txq->kern_tls_octets,
4824 "# of payload octets in transmitted NIC TLS records");
4825 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_waste",
4826 CTLFLAG_RD, &txq->kern_tls_waste,
4827 "# of octets DMAd but not transmitted in NIC TLS records");
4828 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_options",
4829 CTLFLAG_RD, &txq->kern_tls_options,
4830 "# of NIC TLS options-only packets transmitted");
4831 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_header",
4832 CTLFLAG_RD, &txq->kern_tls_header,
4833 "# of NIC TLS header-only packets transmitted");
4834 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_fin",
4835 CTLFLAG_RD, &txq->kern_tls_fin,
4836 "# of NIC TLS FIN-only packets transmitted");
4837 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_fin_short",
4838 CTLFLAG_RD, &txq->kern_tls_fin_short,
4839 "# of NIC TLS padded FIN packets on short TLS records");
4840 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_cbc",
4841 CTLFLAG_RD, &txq->kern_tls_cbc,
4842 "# of NIC TLS sessions using AES-CBC");
4843 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_gcm",
4844 CTLFLAG_RD, &txq->kern_tls_gcm,
4845 "# of NIC TLS sessions using AES-GCM");
4846 }
4847 #endif
4848 }
4849
4850 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
4851 /*
4852 * Idempotent.
4853 */
4854 static int
alloc_ofld_txq(struct vi_info * vi,struct sge_ofld_txq * ofld_txq,int idx)4855 alloc_ofld_txq(struct vi_info *vi, struct sge_ofld_txq *ofld_txq, int idx)
4856 {
4857 struct sysctl_oid *oid;
4858 struct port_info *pi = vi->pi;
4859 struct adapter *sc = vi->adapter;
4860 struct sge_eq *eq = &ofld_txq->wrq.eq;
4861 int rc, iqidx;
4862 char name[16];
4863
4864 MPASS(idx >= 0);
4865 MPASS(idx < vi->nofldtxq);
4866
4867 if (!(eq->flags & EQ_SW_ALLOCATED)) {
4868 snprintf(name, sizeof(name), "%d", idx);
4869 oid = SYSCTL_ADD_NODE(&vi->ctx,
4870 SYSCTL_CHILDREN(vi->ofld_txq_oid), OID_AUTO, name,
4871 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "offload tx queue");
4872
4873 snprintf(name, sizeof(name), "%s ofld_txq%d",
4874 device_get_nameunit(vi->dev), idx);
4875 if (vi->nofldrxq > 0) {
4876 iqidx = vi->first_ofld_rxq + (idx % vi->nofldrxq);
4877 init_eq(sc, eq, EQ_OFLD, vi->qsize_txq, pi->port_id,
4878 &sc->sge.ofld_rxq[iqidx].iq, name);
4879 } else {
4880 iqidx = vi->first_rxq + (idx % vi->nrxq);
4881 init_eq(sc, eq, EQ_OFLD, vi->qsize_txq, pi->port_id,
4882 &sc->sge.rxq[iqidx].iq, name);
4883 }
4884
4885 rc = alloc_wrq(sc, vi, &ofld_txq->wrq, &vi->ctx, oid);
4886 if (rc != 0) {
4887 CH_ERR(vi, "failed to allocate ofld_txq%d: %d\n", idx,
4888 rc);
4889 sysctl_remove_oid(oid, 1, 1);
4890 return (rc);
4891 }
4892 MPASS(eq->flags & EQ_SW_ALLOCATED);
4893 /* Can't fail after this point. */
4894
4895 ofld_txq->tx_iscsi_pdus = counter_u64_alloc(M_WAITOK);
4896 ofld_txq->tx_iscsi_octets = counter_u64_alloc(M_WAITOK);
4897 ofld_txq->tx_iscsi_iso_wrs = counter_u64_alloc(M_WAITOK);
4898 ofld_txq->tx_aio_jobs = counter_u64_alloc(M_WAITOK);
4899 ofld_txq->tx_aio_octets = counter_u64_alloc(M_WAITOK);
4900 ofld_txq->tx_toe_tls_records = counter_u64_alloc(M_WAITOK);
4901 ofld_txq->tx_toe_tls_octets = counter_u64_alloc(M_WAITOK);
4902 add_ofld_txq_sysctls(&vi->ctx, oid, ofld_txq);
4903 }
4904
4905 if (!(eq->flags & EQ_HW_ALLOCATED)) {
4906 rc = alloc_eq_hwq(sc, vi, eq);
4907 if (rc != 0) {
4908 CH_ERR(vi, "failed to create hw ofld_txq%d: %d\n", idx,
4909 rc);
4910 return (rc);
4911 }
4912 MPASS(eq->flags & EQ_HW_ALLOCATED);
4913 }
4914
4915 return (0);
4916 }
4917
4918 /*
4919 * Idempotent.
4920 */
4921 static void
free_ofld_txq(struct vi_info * vi,struct sge_ofld_txq * ofld_txq)4922 free_ofld_txq(struct vi_info *vi, struct sge_ofld_txq *ofld_txq)
4923 {
4924 struct adapter *sc = vi->adapter;
4925 struct sge_eq *eq = &ofld_txq->wrq.eq;
4926
4927 if (eq->flags & EQ_HW_ALLOCATED) {
4928 MPASS(eq->flags & EQ_SW_ALLOCATED);
4929 free_eq_hwq(sc, NULL, eq);
4930 MPASS(!(eq->flags & EQ_HW_ALLOCATED));
4931 }
4932
4933 if (eq->flags & EQ_SW_ALLOCATED) {
4934 MPASS(!(eq->flags & EQ_HW_ALLOCATED));
4935 counter_u64_free(ofld_txq->tx_iscsi_pdus);
4936 counter_u64_free(ofld_txq->tx_iscsi_octets);
4937 counter_u64_free(ofld_txq->tx_iscsi_iso_wrs);
4938 counter_u64_free(ofld_txq->tx_aio_jobs);
4939 counter_u64_free(ofld_txq->tx_aio_octets);
4940 counter_u64_free(ofld_txq->tx_toe_tls_records);
4941 counter_u64_free(ofld_txq->tx_toe_tls_octets);
4942 free_wrq(sc, &ofld_txq->wrq);
4943 MPASS(!(eq->flags & EQ_SW_ALLOCATED));
4944 bzero(ofld_txq, sizeof(*ofld_txq));
4945 }
4946 }
4947
4948 static void
add_ofld_txq_sysctls(struct sysctl_ctx_list * ctx,struct sysctl_oid * oid,struct sge_ofld_txq * ofld_txq)4949 add_ofld_txq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid,
4950 struct sge_ofld_txq *ofld_txq)
4951 {
4952 struct sysctl_oid_list *children;
4953
4954 if (ctx == NULL || oid == NULL)
4955 return;
4956
4957 children = SYSCTL_CHILDREN(oid);
4958 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_iscsi_pdus",
4959 CTLFLAG_RD, &ofld_txq->tx_iscsi_pdus,
4960 "# of iSCSI PDUs transmitted");
4961 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_iscsi_octets",
4962 CTLFLAG_RD, &ofld_txq->tx_iscsi_octets,
4963 "# of payload octets in transmitted iSCSI PDUs");
4964 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_iscsi_iso_wrs",
4965 CTLFLAG_RD, &ofld_txq->tx_iscsi_iso_wrs,
4966 "# of iSCSI segmentation offload work requests");
4967 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_aio_jobs",
4968 CTLFLAG_RD, &ofld_txq->tx_aio_jobs,
4969 "# of zero-copy aio_write(2) jobs transmitted");
4970 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_aio_octets",
4971 CTLFLAG_RD, &ofld_txq->tx_aio_octets,
4972 "# of payload octets in transmitted zero-copy aio_write(2) jobs");
4973 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_toe_tls_records",
4974 CTLFLAG_RD, &ofld_txq->tx_toe_tls_records,
4975 "# of TOE TLS records transmitted");
4976 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_toe_tls_octets",
4977 CTLFLAG_RD, &ofld_txq->tx_toe_tls_octets,
4978 "# of payload octets in transmitted TOE TLS records");
4979 }
4980 #endif
4981
4982 static void
oneseg_dma_callback(void * arg,bus_dma_segment_t * segs,int nseg,int error)4983 oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
4984 {
4985 bus_addr_t *ba = arg;
4986
4987 KASSERT(nseg == 1,
4988 ("%s meant for single segment mappings only.", __func__));
4989
4990 *ba = error ? 0 : segs->ds_addr;
4991 }
4992
4993 static inline void
ring_fl_db(struct adapter * sc,struct sge_fl * fl)4994 ring_fl_db(struct adapter *sc, struct sge_fl *fl)
4995 {
4996 uint32_t n, v;
4997
4998 n = IDXDIFF(fl->pidx >> 3, fl->dbidx, fl->sidx);
4999 MPASS(n > 0);
5000
5001 wmb();
5002 v = fl->dbval | V_PIDX(n);
5003 if (fl->udb)
5004 *fl->udb = htole32(v);
5005 else
5006 t4_write_reg(sc, sc->sge_kdoorbell_reg, v);
5007 IDXINCR(fl->dbidx, n, fl->sidx);
5008 }
5009
5010 /*
5011 * Fills up the freelist by allocating up to 'n' buffers. Buffers that are
5012 * recycled do not count towards this allocation budget.
5013 *
5014 * Returns non-zero to indicate that this freelist should be added to the list
5015 * of starving freelists.
5016 */
5017 static int
refill_fl(struct adapter * sc,struct sge_fl * fl,int n)5018 refill_fl(struct adapter *sc, struct sge_fl *fl, int n)
5019 {
5020 __be64 *d;
5021 struct fl_sdesc *sd;
5022 uintptr_t pa;
5023 caddr_t cl;
5024 struct rx_buf_info *rxb;
5025 struct cluster_metadata *clm;
5026 uint16_t max_pidx, zidx = fl->zidx;
5027 uint16_t hw_cidx = fl->hw_cidx; /* stable snapshot */
5028
5029 FL_LOCK_ASSERT_OWNED(fl);
5030
5031 /*
5032 * We always stop at the beginning of the hardware descriptor that's just
5033 * before the one with the hw cidx. This is to avoid hw pidx = hw cidx,
5034 * which would mean an empty freelist to the chip.
5035 */
5036 max_pidx = __predict_false(hw_cidx == 0) ? fl->sidx - 1 : hw_cidx - 1;
5037 if (fl->pidx == max_pidx * 8)
5038 return (0);
5039
5040 d = &fl->desc[fl->pidx];
5041 sd = &fl->sdesc[fl->pidx];
5042 rxb = &sc->sge.rx_buf_info[zidx];
5043
5044 while (n > 0) {
5045
5046 if (sd->cl != NULL) {
5047
5048 if (sd->nmbuf == 0) {
5049 /*
5050 * Fast recycle without involving any atomics on
5051 * the cluster's metadata (if the cluster has
5052 * metadata). This happens when all frames
5053 * received in the cluster were small enough to
5054 * fit within a single mbuf each.
5055 */
5056 fl->cl_fast_recycled++;
5057 goto recycled;
5058 }
5059
5060 /*
5061 * Cluster is guaranteed to have metadata. Clusters
5062 * without metadata always take the fast recycle path
5063 * when they're recycled.
5064 */
5065 clm = cl_metadata(sd);
5066 MPASS(clm != NULL);
5067
5068 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) {
5069 fl->cl_recycled++;
5070 counter_u64_add(extfree_rels, 1);
5071 goto recycled;
5072 }
5073 sd->cl = NULL; /* gave up my reference */
5074 }
5075 MPASS(sd->cl == NULL);
5076 cl = uma_zalloc(rxb->zone, M_NOWAIT);
5077 if (__predict_false(cl == NULL)) {
5078 if (zidx != fl->safe_zidx) {
5079 zidx = fl->safe_zidx;
5080 rxb = &sc->sge.rx_buf_info[zidx];
5081 cl = uma_zalloc(rxb->zone, M_NOWAIT);
5082 }
5083 if (cl == NULL)
5084 break;
5085 }
5086 fl->cl_allocated++;
5087 n--;
5088
5089 pa = pmap_kextract((vm_offset_t)cl);
5090 sd->cl = cl;
5091 sd->zidx = zidx;
5092
5093 if (fl->flags & FL_BUF_PACKING) {
5094 *d = htobe64(pa | rxb->hwidx2);
5095 sd->moff = rxb->size2;
5096 } else {
5097 *d = htobe64(pa | rxb->hwidx1);
5098 sd->moff = 0;
5099 }
5100 recycled:
5101 sd->nmbuf = 0;
5102 d++;
5103 sd++;
5104 if (__predict_false((++fl->pidx & 7) == 0)) {
5105 uint16_t pidx = fl->pidx >> 3;
5106
5107 if (__predict_false(pidx == fl->sidx)) {
5108 fl->pidx = 0;
5109 pidx = 0;
5110 sd = fl->sdesc;
5111 d = fl->desc;
5112 }
5113 if (n < 8 || pidx == max_pidx)
5114 break;
5115
5116 if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4)
5117 ring_fl_db(sc, fl);
5118 }
5119 }
5120
5121 if ((fl->pidx >> 3) != fl->dbidx)
5122 ring_fl_db(sc, fl);
5123
5124 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING));
5125 }
5126
5127 /*
5128 * Attempt to refill all starving freelists.
5129 */
5130 static void
refill_sfl(void * arg)5131 refill_sfl(void *arg)
5132 {
5133 struct adapter *sc = arg;
5134 struct sge_fl *fl, *fl_temp;
5135
5136 mtx_assert(&sc->sfl_lock, MA_OWNED);
5137 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) {
5138 FL_LOCK(fl);
5139 refill_fl(sc, fl, 64);
5140 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) {
5141 TAILQ_REMOVE(&sc->sfl, fl, link);
5142 fl->flags &= ~FL_STARVING;
5143 }
5144 FL_UNLOCK(fl);
5145 }
5146
5147 if (!TAILQ_EMPTY(&sc->sfl))
5148 callout_schedule(&sc->sfl_callout, hz / 5);
5149 }
5150
5151 /*
5152 * Release the driver's reference on all buffers in the given freelist. Buffers
5153 * with kernel references cannot be freed and will prevent the driver from being
5154 * unloaded safely.
5155 */
5156 void
free_fl_buffers(struct adapter * sc,struct sge_fl * fl)5157 free_fl_buffers(struct adapter *sc, struct sge_fl *fl)
5158 {
5159 struct fl_sdesc *sd;
5160 struct cluster_metadata *clm;
5161 int i;
5162
5163 sd = fl->sdesc;
5164 for (i = 0; i < fl->sidx * 8; i++, sd++) {
5165 if (sd->cl == NULL)
5166 continue;
5167
5168 if (sd->nmbuf == 0)
5169 uma_zfree(sc->sge.rx_buf_info[sd->zidx].zone, sd->cl);
5170 else if (fl->flags & FL_BUF_PACKING) {
5171 clm = cl_metadata(sd);
5172 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) {
5173 uma_zfree(sc->sge.rx_buf_info[sd->zidx].zone,
5174 sd->cl);
5175 counter_u64_add(extfree_rels, 1);
5176 }
5177 }
5178 sd->cl = NULL;
5179 }
5180
5181 if (fl->flags & FL_BUF_RESUME) {
5182 m_freem(fl->m0);
5183 fl->flags &= ~FL_BUF_RESUME;
5184 }
5185 }
5186
5187 static inline void
get_pkt_gl(struct mbuf * m,struct sglist * gl)5188 get_pkt_gl(struct mbuf *m, struct sglist *gl)
5189 {
5190 int rc;
5191
5192 M_ASSERTPKTHDR(m);
5193
5194 sglist_reset(gl);
5195 rc = sglist_append_mbuf(gl, m);
5196 if (__predict_false(rc != 0)) {
5197 panic("%s: mbuf %p (%d segs) was vetted earlier but now fails "
5198 "with %d.", __func__, m, mbuf_nsegs(m), rc);
5199 }
5200
5201 KASSERT(gl->sg_nseg == mbuf_nsegs(m),
5202 ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m,
5203 mbuf_nsegs(m), gl->sg_nseg));
5204 #if 0 /* vm_wr not readily available here. */
5205 KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= max_nsegs_allowed(m, vm_wr),
5206 ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__,
5207 gl->sg_nseg, max_nsegs_allowed(m, vm_wr)));
5208 #endif
5209 }
5210
5211 /*
5212 * len16 for a txpkt WR with a GL. Includes the firmware work request header.
5213 */
5214 static inline u_int
txpkt_len16(u_int nsegs,const u_int extra)5215 txpkt_len16(u_int nsegs, const u_int extra)
5216 {
5217 u_int n;
5218
5219 MPASS(nsegs > 0);
5220
5221 nsegs--; /* first segment is part of ulptx_sgl */
5222 n = extra + sizeof(struct fw_eth_tx_pkt_wr) +
5223 sizeof(struct cpl_tx_pkt_core) +
5224 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1));
5225
5226 return (howmany(n, 16));
5227 }
5228
5229 /*
5230 * len16 for a txpkt_vm WR with a GL. Includes the firmware work
5231 * request header.
5232 */
5233 static inline u_int
txpkt_vm_len16(u_int nsegs,const u_int extra)5234 txpkt_vm_len16(u_int nsegs, const u_int extra)
5235 {
5236 u_int n;
5237
5238 MPASS(nsegs > 0);
5239
5240 nsegs--; /* first segment is part of ulptx_sgl */
5241 n = extra + sizeof(struct fw_eth_tx_pkt_vm_wr) +
5242 sizeof(struct cpl_tx_pkt_core) +
5243 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1));
5244
5245 return (howmany(n, 16));
5246 }
5247
5248 static inline void
calculate_mbuf_len16(struct mbuf * m,bool vm_wr)5249 calculate_mbuf_len16(struct mbuf *m, bool vm_wr)
5250 {
5251 const int lso = sizeof(struct cpl_tx_pkt_lso_core);
5252 const int tnl_lso = sizeof(struct cpl_tx_tnl_lso);
5253
5254 if (vm_wr) {
5255 if (needs_tso(m))
5256 set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), lso));
5257 else
5258 set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), 0));
5259 return;
5260 }
5261
5262 if (needs_tso(m)) {
5263 if (needs_vxlan_tso(m))
5264 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), tnl_lso));
5265 else
5266 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), lso));
5267 } else
5268 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), 0));
5269 }
5270
5271 /*
5272 * len16 for a txpkts type 0 WR with a GL. Does not include the firmware work
5273 * request header.
5274 */
5275 static inline u_int
txpkts0_len16(u_int nsegs)5276 txpkts0_len16(u_int nsegs)
5277 {
5278 u_int n;
5279
5280 MPASS(nsegs > 0);
5281
5282 nsegs--; /* first segment is part of ulptx_sgl */
5283 n = sizeof(struct ulp_txpkt) + sizeof(struct ulptx_idata) +
5284 sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) +
5285 8 * ((3 * nsegs) / 2 + (nsegs & 1));
5286
5287 return (howmany(n, 16));
5288 }
5289
5290 /*
5291 * len16 for a txpkts type 1 WR with a GL. Does not include the firmware work
5292 * request header.
5293 */
5294 static inline u_int
txpkts1_len16(void)5295 txpkts1_len16(void)
5296 {
5297 u_int n;
5298
5299 n = sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl);
5300
5301 return (howmany(n, 16));
5302 }
5303
5304 static inline u_int
imm_payload(u_int ndesc)5305 imm_payload(u_int ndesc)
5306 {
5307 u_int n;
5308
5309 n = ndesc * EQ_ESIZE - sizeof(struct fw_eth_tx_pkt_wr) -
5310 sizeof(struct cpl_tx_pkt_core);
5311
5312 return (n);
5313 }
5314
5315 static inline uint64_t
csum_to_ctrl(struct adapter * sc,struct mbuf * m)5316 csum_to_ctrl(struct adapter *sc, struct mbuf *m)
5317 {
5318 uint64_t ctrl;
5319 int csum_type, l2hlen, l3hlen;
5320 int x, y;
5321 static const int csum_types[3][2] = {
5322 {TX_CSUM_TCPIP, TX_CSUM_TCPIP6},
5323 {TX_CSUM_UDPIP, TX_CSUM_UDPIP6},
5324 {TX_CSUM_IP, 0}
5325 };
5326
5327 M_ASSERTPKTHDR(m);
5328
5329 if (!needs_hwcsum(m))
5330 return (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS);
5331
5332 MPASS(m->m_pkthdr.l2hlen >= ETHER_HDR_LEN);
5333 MPASS(m->m_pkthdr.l3hlen >= sizeof(struct ip));
5334
5335 if (needs_vxlan_csum(m)) {
5336 MPASS(m->m_pkthdr.l4hlen > 0);
5337 MPASS(m->m_pkthdr.l5hlen > 0);
5338 MPASS(m->m_pkthdr.inner_l2hlen >= ETHER_HDR_LEN);
5339 MPASS(m->m_pkthdr.inner_l3hlen >= sizeof(struct ip));
5340
5341 l2hlen = m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen +
5342 m->m_pkthdr.l4hlen + m->m_pkthdr.l5hlen +
5343 m->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN;
5344 l3hlen = m->m_pkthdr.inner_l3hlen;
5345 } else {
5346 l2hlen = m->m_pkthdr.l2hlen - ETHER_HDR_LEN;
5347 l3hlen = m->m_pkthdr.l3hlen;
5348 }
5349
5350 ctrl = 0;
5351 if (!needs_l3_csum(m))
5352 ctrl |= F_TXPKT_IPCSUM_DIS;
5353
5354 if (m->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_INNER_IP_TCP |
5355 CSUM_IP6_TCP | CSUM_INNER_IP6_TCP))
5356 x = 0; /* TCP */
5357 else if (m->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_INNER_IP_UDP |
5358 CSUM_IP6_UDP | CSUM_INNER_IP6_UDP))
5359 x = 1; /* UDP */
5360 else
5361 x = 2;
5362
5363 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP |
5364 CSUM_INNER_IP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP))
5365 y = 0; /* IPv4 */
5366 else {
5367 MPASS(m->m_pkthdr.csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP |
5368 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP));
5369 y = 1; /* IPv6 */
5370 }
5371 /*
5372 * needs_hwcsum returned true earlier so there must be some kind of
5373 * checksum to calculate.
5374 */
5375 csum_type = csum_types[x][y];
5376 MPASS(csum_type != 0);
5377 if (csum_type == TX_CSUM_IP)
5378 ctrl |= F_TXPKT_L4CSUM_DIS;
5379 ctrl |= V_TXPKT_CSUM_TYPE(csum_type) | V_TXPKT_IPHDR_LEN(l3hlen);
5380 if (chip_id(sc) <= CHELSIO_T5)
5381 ctrl |= V_TXPKT_ETHHDR_LEN(l2hlen);
5382 else
5383 ctrl |= V_T6_TXPKT_ETHHDR_LEN(l2hlen);
5384
5385 return (ctrl);
5386 }
5387
5388 static inline void *
write_lso_cpl(void * cpl,struct mbuf * m0)5389 write_lso_cpl(void *cpl, struct mbuf *m0)
5390 {
5391 struct cpl_tx_pkt_lso_core *lso;
5392 uint32_t ctrl;
5393
5394 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
5395 m0->m_pkthdr.l4hlen > 0,
5396 ("%s: mbuf %p needs TSO but missing header lengths",
5397 __func__, m0));
5398
5399 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) |
5400 F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE |
5401 V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) |
5402 V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) |
5403 V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2);
5404 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
5405 ctrl |= F_LSO_IPV6;
5406
5407 lso = cpl;
5408 lso->lso_ctrl = htobe32(ctrl);
5409 lso->ipid_ofst = htobe16(0);
5410 lso->mss = htobe16(m0->m_pkthdr.tso_segsz);
5411 lso->seqno_offset = htobe32(0);
5412 lso->len = htobe32(m0->m_pkthdr.len);
5413
5414 return (lso + 1);
5415 }
5416
5417 static void *
write_tnl_lso_cpl(void * cpl,struct mbuf * m0)5418 write_tnl_lso_cpl(void *cpl, struct mbuf *m0)
5419 {
5420 struct cpl_tx_tnl_lso *tnl_lso = cpl;
5421 uint32_t ctrl;
5422
5423 KASSERT(m0->m_pkthdr.inner_l2hlen > 0 &&
5424 m0->m_pkthdr.inner_l3hlen > 0 && m0->m_pkthdr.inner_l4hlen > 0 &&
5425 m0->m_pkthdr.inner_l5hlen > 0,
5426 ("%s: mbuf %p needs VXLAN_TSO but missing inner header lengths",
5427 __func__, m0));
5428 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
5429 m0->m_pkthdr.l4hlen > 0 && m0->m_pkthdr.l5hlen > 0,
5430 ("%s: mbuf %p needs VXLAN_TSO but missing outer header lengths",
5431 __func__, m0));
5432
5433 /* Outer headers. */
5434 ctrl = V_CPL_TX_TNL_LSO_OPCODE(CPL_TX_TNL_LSO) |
5435 F_CPL_TX_TNL_LSO_FIRST | F_CPL_TX_TNL_LSO_LAST |
5436 V_CPL_TX_TNL_LSO_ETHHDRLENOUT(
5437 (m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) |
5438 V_CPL_TX_TNL_LSO_IPHDRLENOUT(m0->m_pkthdr.l3hlen >> 2) |
5439 F_CPL_TX_TNL_LSO_IPLENSETOUT;
5440 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
5441 ctrl |= F_CPL_TX_TNL_LSO_IPV6OUT;
5442 else {
5443 ctrl |= F_CPL_TX_TNL_LSO_IPHDRCHKOUT |
5444 F_CPL_TX_TNL_LSO_IPIDINCOUT;
5445 }
5446 tnl_lso->op_to_IpIdSplitOut = htobe32(ctrl);
5447 tnl_lso->IpIdOffsetOut = 0;
5448 tnl_lso->UdpLenSetOut_to_TnlHdrLen =
5449 htobe16(F_CPL_TX_TNL_LSO_UDPCHKCLROUT |
5450 F_CPL_TX_TNL_LSO_UDPLENSETOUT |
5451 V_CPL_TX_TNL_LSO_TNLHDRLEN(m0->m_pkthdr.l2hlen +
5452 m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen +
5453 m0->m_pkthdr.l5hlen) |
5454 V_CPL_TX_TNL_LSO_TNLTYPE(TX_TNL_TYPE_VXLAN));
5455 tnl_lso->r1 = 0;
5456
5457 /* Inner headers. */
5458 ctrl = V_CPL_TX_TNL_LSO_ETHHDRLEN(
5459 (m0->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN) >> 2) |
5460 V_CPL_TX_TNL_LSO_IPHDRLEN(m0->m_pkthdr.inner_l3hlen >> 2) |
5461 V_CPL_TX_TNL_LSO_TCPHDRLEN(m0->m_pkthdr.inner_l4hlen >> 2);
5462 if (m0->m_pkthdr.inner_l3hlen == sizeof(struct ip6_hdr))
5463 ctrl |= F_CPL_TX_TNL_LSO_IPV6;
5464 tnl_lso->Flow_to_TcpHdrLen = htobe32(ctrl);
5465 tnl_lso->IpIdOffset = 0;
5466 tnl_lso->IpIdSplit_to_Mss =
5467 htobe16(V_CPL_TX_TNL_LSO_MSS(m0->m_pkthdr.tso_segsz));
5468 tnl_lso->TCPSeqOffset = 0;
5469 tnl_lso->EthLenOffset_Size =
5470 htobe32(V_CPL_TX_TNL_LSO_SIZE(m0->m_pkthdr.len));
5471
5472 return (tnl_lso + 1);
5473 }
5474
5475 #define VM_TX_L2HDR_LEN 16 /* ethmacdst to vlantci */
5476
5477 /*
5478 * Write a VM txpkt WR for this packet to the hardware descriptors, update the
5479 * software descriptor, and advance the pidx. It is guaranteed that enough
5480 * descriptors are available.
5481 *
5482 * The return value is the # of hardware descriptors used.
5483 */
5484 static u_int
write_txpkt_vm_wr(struct adapter * sc,struct sge_txq * txq,struct mbuf * m0)5485 write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0)
5486 {
5487 struct sge_eq *eq;
5488 struct fw_eth_tx_pkt_vm_wr *wr;
5489 struct tx_sdesc *txsd;
5490 struct cpl_tx_pkt_core *cpl;
5491 uint32_t ctrl; /* used in many unrelated places */
5492 uint64_t ctrl1;
5493 int len16, ndesc, pktlen;
5494 caddr_t dst;
5495
5496 TXQ_LOCK_ASSERT_OWNED(txq);
5497 M_ASSERTPKTHDR(m0);
5498
5499 len16 = mbuf_len16(m0);
5500 pktlen = m0->m_pkthdr.len;
5501 ctrl = sizeof(struct cpl_tx_pkt_core);
5502 if (needs_tso(m0))
5503 ctrl += sizeof(struct cpl_tx_pkt_lso_core);
5504 ndesc = tx_len16_to_desc(len16);
5505
5506 /* Firmware work request header */
5507 eq = &txq->eq;
5508 wr = (void *)&eq->desc[eq->pidx];
5509 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_VM_WR) |
5510 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
5511
5512 ctrl = V_FW_WR_LEN16(len16);
5513 wr->equiq_to_len16 = htobe32(ctrl);
5514 wr->r3[0] = 0;
5515 wr->r3[1] = 0;
5516
5517 /*
5518 * Copy over ethmacdst, ethmacsrc, ethtype, and vlantci.
5519 * vlantci is ignored unless the ethtype is 0x8100, so it's
5520 * simpler to always copy it rather than making it
5521 * conditional. Also, it seems that we do not have to set
5522 * vlantci or fake the ethtype when doing VLAN tag insertion.
5523 */
5524 m_copydata(m0, 0, VM_TX_L2HDR_LEN, wr->ethmacdst);
5525
5526 if (needs_tso(m0)) {
5527 cpl = write_lso_cpl(wr + 1, m0);
5528 txq->tso_wrs++;
5529 } else
5530 cpl = (void *)(wr + 1);
5531
5532 /* Checksum offload */
5533 ctrl1 = csum_to_ctrl(sc, m0);
5534 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS))
5535 txq->txcsum++; /* some hardware assistance provided */
5536
5537 /* VLAN tag insertion */
5538 if (needs_vlan_insertion(m0)) {
5539 ctrl1 |= F_TXPKT_VLAN_VLD |
5540 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
5541 txq->vlan_insertion++;
5542 } else if (sc->vlan_id)
5543 ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(sc->vlan_id);
5544
5545 /* CPL header */
5546 cpl->ctrl0 = txq->cpl_ctrl0;
5547 cpl->pack = 0;
5548 cpl->len = htobe16(pktlen);
5549 cpl->ctrl1 = htobe64(ctrl1);
5550
5551 /* SGL */
5552 dst = (void *)(cpl + 1);
5553
5554 /*
5555 * A packet using TSO will use up an entire descriptor for the
5556 * firmware work request header, LSO CPL, and TX_PKT_XT CPL.
5557 * If this descriptor is the last descriptor in the ring, wrap
5558 * around to the front of the ring explicitly for the start of
5559 * the sgl.
5560 */
5561 if (dst == (void *)&eq->desc[eq->sidx]) {
5562 dst = (void *)&eq->desc[0];
5563 write_gl_to_txd(txq, m0, &dst, 0);
5564 } else
5565 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx);
5566 txq->sgl_wrs++;
5567 txq->txpkt_wrs++;
5568
5569 txsd = &txq->sdesc[eq->pidx];
5570 txsd->m = m0;
5571 txsd->desc_used = ndesc;
5572
5573 return (ndesc);
5574 }
5575
5576 /*
5577 * Write a raw WR to the hardware descriptors, update the software
5578 * descriptor, and advance the pidx. It is guaranteed that enough
5579 * descriptors are available.
5580 *
5581 * The return value is the # of hardware descriptors used.
5582 */
5583 static u_int
write_raw_wr(struct sge_txq * txq,void * wr,struct mbuf * m0,u_int available)5584 write_raw_wr(struct sge_txq *txq, void *wr, struct mbuf *m0, u_int available)
5585 {
5586 struct sge_eq *eq = &txq->eq;
5587 struct tx_sdesc *txsd;
5588 struct mbuf *m;
5589 caddr_t dst;
5590 int len16, ndesc;
5591
5592 len16 = mbuf_len16(m0);
5593 ndesc = tx_len16_to_desc(len16);
5594 MPASS(ndesc <= available);
5595
5596 dst = wr;
5597 for (m = m0; m != NULL; m = m->m_next)
5598 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
5599
5600 txq->raw_wrs++;
5601
5602 txsd = &txq->sdesc[eq->pidx];
5603 txsd->m = m0;
5604 txsd->desc_used = ndesc;
5605
5606 return (ndesc);
5607 }
5608
5609 /*
5610 * Write a txpkt WR for this packet to the hardware descriptors, update the
5611 * software descriptor, and advance the pidx. It is guaranteed that enough
5612 * descriptors are available.
5613 *
5614 * The return value is the # of hardware descriptors used.
5615 */
5616 static u_int
write_txpkt_wr(struct adapter * sc,struct sge_txq * txq,struct mbuf * m0,u_int available)5617 write_txpkt_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0,
5618 u_int available)
5619 {
5620 struct sge_eq *eq;
5621 struct fw_eth_tx_pkt_wr *wr;
5622 struct tx_sdesc *txsd;
5623 struct cpl_tx_pkt_core *cpl;
5624 uint32_t ctrl; /* used in many unrelated places */
5625 uint64_t ctrl1;
5626 int len16, ndesc, pktlen, nsegs;
5627 caddr_t dst;
5628
5629 TXQ_LOCK_ASSERT_OWNED(txq);
5630 M_ASSERTPKTHDR(m0);
5631
5632 len16 = mbuf_len16(m0);
5633 nsegs = mbuf_nsegs(m0);
5634 pktlen = m0->m_pkthdr.len;
5635 ctrl = sizeof(struct cpl_tx_pkt_core);
5636 if (needs_tso(m0)) {
5637 if (needs_vxlan_tso(m0))
5638 ctrl += sizeof(struct cpl_tx_tnl_lso);
5639 else
5640 ctrl += sizeof(struct cpl_tx_pkt_lso_core);
5641 } else if (!(mbuf_cflags(m0) & MC_NOMAP) && pktlen <= imm_payload(2) &&
5642 available >= 2) {
5643 /* Immediate data. Recalculate len16 and set nsegs to 0. */
5644 ctrl += pktlen;
5645 len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) +
5646 sizeof(struct cpl_tx_pkt_core) + pktlen, 16);
5647 nsegs = 0;
5648 }
5649 ndesc = tx_len16_to_desc(len16);
5650 MPASS(ndesc <= available);
5651
5652 /* Firmware work request header */
5653 eq = &txq->eq;
5654 wr = (void *)&eq->desc[eq->pidx];
5655 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
5656 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
5657
5658 ctrl = V_FW_WR_LEN16(len16);
5659 wr->equiq_to_len16 = htobe32(ctrl);
5660 wr->r3 = 0;
5661
5662 if (needs_tso(m0)) {
5663 if (needs_vxlan_tso(m0)) {
5664 cpl = write_tnl_lso_cpl(wr + 1, m0);
5665 txq->vxlan_tso_wrs++;
5666 } else {
5667 cpl = write_lso_cpl(wr + 1, m0);
5668 txq->tso_wrs++;
5669 }
5670 } else
5671 cpl = (void *)(wr + 1);
5672
5673 /* Checksum offload */
5674 ctrl1 = csum_to_ctrl(sc, m0);
5675 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) {
5676 /* some hardware assistance provided */
5677 if (needs_vxlan_csum(m0))
5678 txq->vxlan_txcsum++;
5679 else
5680 txq->txcsum++;
5681 }
5682
5683 /* VLAN tag insertion */
5684 if (needs_vlan_insertion(m0)) {
5685 ctrl1 |= F_TXPKT_VLAN_VLD |
5686 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
5687 txq->vlan_insertion++;
5688 }
5689
5690 /* CPL header */
5691 cpl->ctrl0 = txq->cpl_ctrl0;
5692 cpl->pack = 0;
5693 cpl->len = htobe16(pktlen);
5694 cpl->ctrl1 = htobe64(ctrl1);
5695
5696 /* SGL */
5697 dst = (void *)(cpl + 1);
5698 if (__predict_false((uintptr_t)dst == (uintptr_t)&eq->desc[eq->sidx]))
5699 dst = (caddr_t)&eq->desc[0];
5700 if (nsegs > 0) {
5701
5702 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx);
5703 txq->sgl_wrs++;
5704 } else {
5705 struct mbuf *m;
5706
5707 for (m = m0; m != NULL; m = m->m_next) {
5708 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
5709 #ifdef INVARIANTS
5710 pktlen -= m->m_len;
5711 #endif
5712 }
5713 #ifdef INVARIANTS
5714 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen));
5715 #endif
5716 txq->imm_wrs++;
5717 }
5718
5719 txq->txpkt_wrs++;
5720
5721 txsd = &txq->sdesc[eq->pidx];
5722 txsd->m = m0;
5723 txsd->desc_used = ndesc;
5724
5725 return (ndesc);
5726 }
5727
5728 static inline bool
cmp_l2hdr(struct txpkts * txp,struct mbuf * m)5729 cmp_l2hdr(struct txpkts *txp, struct mbuf *m)
5730 {
5731 int len;
5732
5733 MPASS(txp->npkt > 0);
5734 MPASS(m->m_len >= VM_TX_L2HDR_LEN);
5735
5736 if (txp->ethtype == be16toh(ETHERTYPE_VLAN))
5737 len = VM_TX_L2HDR_LEN;
5738 else
5739 len = sizeof(struct ether_header);
5740
5741 return (memcmp(m->m_data, &txp->ethmacdst[0], len) != 0);
5742 }
5743
5744 static inline void
save_l2hdr(struct txpkts * txp,struct mbuf * m)5745 save_l2hdr(struct txpkts *txp, struct mbuf *m)
5746 {
5747 MPASS(m->m_len >= VM_TX_L2HDR_LEN);
5748
5749 memcpy(&txp->ethmacdst[0], mtod(m, const void *), VM_TX_L2HDR_LEN);
5750 }
5751
5752 static int
add_to_txpkts_vf(struct adapter * sc,struct sge_txq * txq,struct mbuf * m,int avail,bool * send)5753 add_to_txpkts_vf(struct adapter *sc, struct sge_txq *txq, struct mbuf *m,
5754 int avail, bool *send)
5755 {
5756 struct txpkts *txp = &txq->txp;
5757
5758 /* Cannot have TSO and coalesce at the same time. */
5759 if (cannot_use_txpkts(m)) {
5760 cannot_coalesce:
5761 *send = txp->npkt > 0;
5762 return (EINVAL);
5763 }
5764
5765 /* VF allows coalescing of type 1 (1 GL) only */
5766 if (mbuf_nsegs(m) > 1)
5767 goto cannot_coalesce;
5768
5769 *send = false;
5770 if (txp->npkt > 0) {
5771 MPASS(tx_len16_to_desc(txp->len16) <= avail);
5772 MPASS(txp->npkt < txp->max_npkt);
5773 MPASS(txp->wr_type == 1); /* VF supports type 1 only */
5774
5775 if (tx_len16_to_desc(txp->len16 + txpkts1_len16()) > avail) {
5776 retry_after_send:
5777 *send = true;
5778 return (EAGAIN);
5779 }
5780 if (m->m_pkthdr.len + txp->plen > 65535)
5781 goto retry_after_send;
5782 if (cmp_l2hdr(txp, m))
5783 goto retry_after_send;
5784
5785 txp->len16 += txpkts1_len16();
5786 txp->plen += m->m_pkthdr.len;
5787 txp->mb[txp->npkt++] = m;
5788 if (txp->npkt == txp->max_npkt)
5789 *send = true;
5790 } else {
5791 txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_vm_wr), 16) +
5792 txpkts1_len16();
5793 if (tx_len16_to_desc(txp->len16) > avail)
5794 goto cannot_coalesce;
5795 txp->npkt = 1;
5796 txp->wr_type = 1;
5797 txp->plen = m->m_pkthdr.len;
5798 txp->mb[0] = m;
5799 save_l2hdr(txp, m);
5800 }
5801 return (0);
5802 }
5803
5804 static int
add_to_txpkts_pf(struct adapter * sc,struct sge_txq * txq,struct mbuf * m,int avail,bool * send)5805 add_to_txpkts_pf(struct adapter *sc, struct sge_txq *txq, struct mbuf *m,
5806 int avail, bool *send)
5807 {
5808 struct txpkts *txp = &txq->txp;
5809 int nsegs;
5810
5811 MPASS(!(sc->flags & IS_VF));
5812
5813 /* Cannot have TSO and coalesce at the same time. */
5814 if (cannot_use_txpkts(m)) {
5815 cannot_coalesce:
5816 *send = txp->npkt > 0;
5817 return (EINVAL);
5818 }
5819
5820 *send = false;
5821 nsegs = mbuf_nsegs(m);
5822 if (txp->npkt == 0) {
5823 if (m->m_pkthdr.len > 65535)
5824 goto cannot_coalesce;
5825 if (nsegs > 1) {
5826 txp->wr_type = 0;
5827 txp->len16 =
5828 howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) +
5829 txpkts0_len16(nsegs);
5830 } else {
5831 txp->wr_type = 1;
5832 txp->len16 =
5833 howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) +
5834 txpkts1_len16();
5835 }
5836 if (tx_len16_to_desc(txp->len16) > avail)
5837 goto cannot_coalesce;
5838 txp->npkt = 1;
5839 txp->plen = m->m_pkthdr.len;
5840 txp->mb[0] = m;
5841 } else {
5842 MPASS(tx_len16_to_desc(txp->len16) <= avail);
5843 MPASS(txp->npkt < txp->max_npkt);
5844
5845 if (m->m_pkthdr.len + txp->plen > 65535) {
5846 retry_after_send:
5847 *send = true;
5848 return (EAGAIN);
5849 }
5850
5851 MPASS(txp->wr_type == 0 || txp->wr_type == 1);
5852 if (txp->wr_type == 0) {
5853 if (tx_len16_to_desc(txp->len16 +
5854 txpkts0_len16(nsegs)) > min(avail, SGE_MAX_WR_NDESC))
5855 goto retry_after_send;
5856 txp->len16 += txpkts0_len16(nsegs);
5857 } else {
5858 if (nsegs != 1)
5859 goto retry_after_send;
5860 if (tx_len16_to_desc(txp->len16 + txpkts1_len16()) >
5861 avail)
5862 goto retry_after_send;
5863 txp->len16 += txpkts1_len16();
5864 }
5865
5866 txp->plen += m->m_pkthdr.len;
5867 txp->mb[txp->npkt++] = m;
5868 if (txp->npkt == txp->max_npkt)
5869 *send = true;
5870 }
5871 return (0);
5872 }
5873
5874 /*
5875 * Write a txpkts WR for the packets in txp to the hardware descriptors, update
5876 * the software descriptor, and advance the pidx. It is guaranteed that enough
5877 * descriptors are available.
5878 *
5879 * The return value is the # of hardware descriptors used.
5880 */
5881 static u_int
write_txpkts_wr(struct adapter * sc,struct sge_txq * txq)5882 write_txpkts_wr(struct adapter *sc, struct sge_txq *txq)
5883 {
5884 const struct txpkts *txp = &txq->txp;
5885 struct sge_eq *eq = &txq->eq;
5886 struct fw_eth_tx_pkts_wr *wr;
5887 struct tx_sdesc *txsd;
5888 struct cpl_tx_pkt_core *cpl;
5889 uint64_t ctrl1;
5890 int ndesc, i, checkwrap;
5891 struct mbuf *m, *last;
5892 void *flitp;
5893
5894 TXQ_LOCK_ASSERT_OWNED(txq);
5895 MPASS(txp->npkt > 0);
5896 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16));
5897
5898 wr = (void *)&eq->desc[eq->pidx];
5899 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));
5900 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(txp->len16));
5901 wr->plen = htobe16(txp->plen);
5902 wr->npkt = txp->npkt;
5903 wr->r3 = 0;
5904 wr->type = txp->wr_type;
5905 flitp = wr + 1;
5906
5907 /*
5908 * At this point we are 16B into a hardware descriptor. If checkwrap is
5909 * set then we know the WR is going to wrap around somewhere. We'll
5910 * check for that at appropriate points.
5911 */
5912 ndesc = tx_len16_to_desc(txp->len16);
5913 last = NULL;
5914 checkwrap = eq->sidx - ndesc < eq->pidx;
5915 for (i = 0; i < txp->npkt; i++) {
5916 m = txp->mb[i];
5917 if (txp->wr_type == 0) {
5918 struct ulp_txpkt *ulpmc;
5919 struct ulptx_idata *ulpsc;
5920
5921 /* ULP master command */
5922 ulpmc = flitp;
5923 ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) |
5924 V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid));
5925 ulpmc->len = htobe32(txpkts0_len16(mbuf_nsegs(m)));
5926
5927 /* ULP subcommand */
5928 ulpsc = (void *)(ulpmc + 1);
5929 ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) |
5930 F_ULP_TX_SC_MORE);
5931 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core));
5932
5933 cpl = (void *)(ulpsc + 1);
5934 if (checkwrap &&
5935 (uintptr_t)cpl == (uintptr_t)&eq->desc[eq->sidx])
5936 cpl = (void *)&eq->desc[0];
5937 } else {
5938 cpl = flitp;
5939 }
5940
5941 /* Checksum offload */
5942 ctrl1 = csum_to_ctrl(sc, m);
5943 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) {
5944 /* some hardware assistance provided */
5945 if (needs_vxlan_csum(m))
5946 txq->vxlan_txcsum++;
5947 else
5948 txq->txcsum++;
5949 }
5950
5951 /* VLAN tag insertion */
5952 if (needs_vlan_insertion(m)) {
5953 ctrl1 |= F_TXPKT_VLAN_VLD |
5954 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
5955 txq->vlan_insertion++;
5956 }
5957
5958 /* CPL header */
5959 cpl->ctrl0 = txq->cpl_ctrl0;
5960 cpl->pack = 0;
5961 cpl->len = htobe16(m->m_pkthdr.len);
5962 cpl->ctrl1 = htobe64(ctrl1);
5963
5964 flitp = cpl + 1;
5965 if (checkwrap &&
5966 (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx])
5967 flitp = (void *)&eq->desc[0];
5968
5969 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap);
5970
5971 if (last != NULL)
5972 last->m_nextpkt = m;
5973 last = m;
5974 }
5975
5976 txq->sgl_wrs++;
5977 if (txp->wr_type == 0) {
5978 txq->txpkts0_pkts += txp->npkt;
5979 txq->txpkts0_wrs++;
5980 } else {
5981 txq->txpkts1_pkts += txp->npkt;
5982 txq->txpkts1_wrs++;
5983 }
5984
5985 txsd = &txq->sdesc[eq->pidx];
5986 txsd->m = txp->mb[0];
5987 txsd->desc_used = ndesc;
5988
5989 return (ndesc);
5990 }
5991
5992 static u_int
write_txpkts_vm_wr(struct adapter * sc,struct sge_txq * txq)5993 write_txpkts_vm_wr(struct adapter *sc, struct sge_txq *txq)
5994 {
5995 const struct txpkts *txp = &txq->txp;
5996 struct sge_eq *eq = &txq->eq;
5997 struct fw_eth_tx_pkts_vm_wr *wr;
5998 struct tx_sdesc *txsd;
5999 struct cpl_tx_pkt_core *cpl;
6000 uint64_t ctrl1;
6001 int ndesc, i;
6002 struct mbuf *m, *last;
6003 void *flitp;
6004
6005 TXQ_LOCK_ASSERT_OWNED(txq);
6006 MPASS(txp->npkt > 0);
6007 MPASS(txp->wr_type == 1); /* VF supports type 1 only */
6008 MPASS(txp->mb[0] != NULL);
6009 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16));
6010
6011 wr = (void *)&eq->desc[eq->pidx];
6012 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_VM_WR));
6013 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(txp->len16));
6014 wr->r3 = 0;
6015 wr->plen = htobe16(txp->plen);
6016 wr->npkt = txp->npkt;
6017 wr->r4 = 0;
6018 memcpy(&wr->ethmacdst[0], &txp->ethmacdst[0], 16);
6019 flitp = wr + 1;
6020
6021 /*
6022 * At this point we are 32B into a hardware descriptor. Each mbuf in
6023 * the WR will take 32B so we check for the end of the descriptor ring
6024 * before writing odd mbufs (mb[1], 3, 5, ..)
6025 */
6026 ndesc = tx_len16_to_desc(txp->len16);
6027 last = NULL;
6028 for (i = 0; i < txp->npkt; i++) {
6029 m = txp->mb[i];
6030 if (i & 1 && (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx])
6031 flitp = &eq->desc[0];
6032 cpl = flitp;
6033
6034 /* Checksum offload */
6035 ctrl1 = csum_to_ctrl(sc, m);
6036 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS))
6037 txq->txcsum++; /* some hardware assistance provided */
6038
6039 /* VLAN tag insertion */
6040 if (needs_vlan_insertion(m)) {
6041 ctrl1 |= F_TXPKT_VLAN_VLD |
6042 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
6043 txq->vlan_insertion++;
6044 } else if (sc->vlan_id)
6045 ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(sc->vlan_id);
6046
6047 /* CPL header */
6048 cpl->ctrl0 = txq->cpl_ctrl0;
6049 cpl->pack = 0;
6050 cpl->len = htobe16(m->m_pkthdr.len);
6051 cpl->ctrl1 = htobe64(ctrl1);
6052
6053 flitp = cpl + 1;
6054 MPASS(mbuf_nsegs(m) == 1);
6055 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), 0);
6056
6057 if (last != NULL)
6058 last->m_nextpkt = m;
6059 last = m;
6060 }
6061
6062 txq->sgl_wrs++;
6063 txq->txpkts1_pkts += txp->npkt;
6064 txq->txpkts1_wrs++;
6065
6066 txsd = &txq->sdesc[eq->pidx];
6067 txsd->m = txp->mb[0];
6068 txsd->desc_used = ndesc;
6069
6070 return (ndesc);
6071 }
6072
6073 /*
6074 * If the SGL ends on an address that is not 16 byte aligned, this function will
6075 * add a 0 filled flit at the end.
6076 */
6077 static void
write_gl_to_txd(struct sge_txq * txq,struct mbuf * m,caddr_t * to,int checkwrap)6078 write_gl_to_txd(struct sge_txq *txq, struct mbuf *m, caddr_t *to, int checkwrap)
6079 {
6080 struct sge_eq *eq = &txq->eq;
6081 struct sglist *gl = txq->gl;
6082 struct sglist_seg *seg;
6083 __be64 *flitp, *wrap;
6084 struct ulptx_sgl *usgl;
6085 int i, nflits, nsegs;
6086
6087 KASSERT(((uintptr_t)(*to) & 0xf) == 0,
6088 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to));
6089 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]);
6090 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]);
6091
6092 get_pkt_gl(m, gl);
6093 nsegs = gl->sg_nseg;
6094 MPASS(nsegs > 0);
6095
6096 nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2;
6097 flitp = (__be64 *)(*to);
6098 wrap = (__be64 *)(&eq->desc[eq->sidx]);
6099 seg = &gl->sg_segs[0];
6100 usgl = (void *)flitp;
6101
6102 /*
6103 * We start at a 16 byte boundary somewhere inside the tx descriptor
6104 * ring, so we're at least 16 bytes away from the status page. There is
6105 * no chance of a wrap around in the middle of usgl (which is 16 bytes).
6106 */
6107
6108 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
6109 V_ULPTX_NSGE(nsegs));
6110 usgl->len0 = htobe32(seg->ss_len);
6111 usgl->addr0 = htobe64(seg->ss_paddr);
6112 seg++;
6113
6114 if (checkwrap == 0 || (uintptr_t)(flitp + nflits) <= (uintptr_t)wrap) {
6115
6116 /* Won't wrap around at all */
6117
6118 for (i = 0; i < nsegs - 1; i++, seg++) {
6119 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len);
6120 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr);
6121 }
6122 if (i & 1)
6123 usgl->sge[i / 2].len[1] = htobe32(0);
6124 flitp += nflits;
6125 } else {
6126
6127 /* Will wrap somewhere in the rest of the SGL */
6128
6129 /* 2 flits already written, write the rest flit by flit */
6130 flitp = (void *)(usgl + 1);
6131 for (i = 0; i < nflits - 2; i++) {
6132 if (flitp == wrap)
6133 flitp = (void *)eq->desc;
6134 *flitp++ = get_flit(seg, nsegs - 1, i);
6135 }
6136 }
6137
6138 if (nflits & 1) {
6139 MPASS(((uintptr_t)flitp) & 0xf);
6140 *flitp++ = 0;
6141 }
6142
6143 MPASS((((uintptr_t)flitp) & 0xf) == 0);
6144 if (__predict_false(flitp == wrap))
6145 *to = (void *)eq->desc;
6146 else
6147 *to = (void *)flitp;
6148 }
6149
6150 static inline void
copy_to_txd(struct sge_eq * eq,caddr_t from,caddr_t * to,int len)6151 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
6152 {
6153
6154 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]);
6155 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]);
6156
6157 if (__predict_true((uintptr_t)(*to) + len <=
6158 (uintptr_t)&eq->desc[eq->sidx])) {
6159 bcopy(from, *to, len);
6160 (*to) += len;
6161 } else {
6162 int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to);
6163
6164 bcopy(from, *to, portion);
6165 from += portion;
6166 portion = len - portion; /* remaining */
6167 bcopy(from, (void *)eq->desc, portion);
6168 (*to) = (caddr_t)eq->desc + portion;
6169 }
6170 }
6171
6172 static inline void
ring_eq_db(struct adapter * sc,struct sge_eq * eq,u_int n)6173 ring_eq_db(struct adapter *sc, struct sge_eq *eq, u_int n)
6174 {
6175 u_int db;
6176
6177 MPASS(n > 0);
6178
6179 db = eq->doorbells;
6180 if (n > 1)
6181 clrbit(&db, DOORBELL_WCWR);
6182 wmb();
6183
6184 switch (ffs(db) - 1) {
6185 case DOORBELL_UDB:
6186 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n));
6187 break;
6188
6189 case DOORBELL_WCWR: {
6190 volatile uint64_t *dst, *src;
6191 int i;
6192
6193 /*
6194 * Queues whose 128B doorbell segment fits in the page do not
6195 * use relative qid (udb_qid is always 0). Only queues with
6196 * doorbell segments can do WCWR.
6197 */
6198 KASSERT(eq->udb_qid == 0 && n == 1,
6199 ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p",
6200 __func__, eq->doorbells, n, eq->dbidx, eq));
6201
6202 dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET -
6203 UDBS_DB_OFFSET);
6204 i = eq->dbidx;
6205 src = (void *)&eq->desc[i];
6206 while (src != (void *)&eq->desc[i + 1])
6207 *dst++ = *src++;
6208 wmb();
6209 break;
6210 }
6211
6212 case DOORBELL_UDBWC:
6213 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n));
6214 wmb();
6215 break;
6216
6217 case DOORBELL_KDB:
6218 t4_write_reg(sc, sc->sge_kdoorbell_reg,
6219 V_QID(eq->cntxt_id) | V_PIDX(n));
6220 break;
6221 }
6222
6223 IDXINCR(eq->dbidx, n, eq->sidx);
6224 }
6225
6226 static inline u_int
reclaimable_tx_desc(struct sge_eq * eq)6227 reclaimable_tx_desc(struct sge_eq *eq)
6228 {
6229 uint16_t hw_cidx;
6230
6231 hw_cidx = read_hw_cidx(eq);
6232 return (IDXDIFF(hw_cidx, eq->cidx, eq->sidx));
6233 }
6234
6235 static inline u_int
total_available_tx_desc(struct sge_eq * eq)6236 total_available_tx_desc(struct sge_eq *eq)
6237 {
6238 uint16_t hw_cidx, pidx;
6239
6240 hw_cidx = read_hw_cidx(eq);
6241 pidx = eq->pidx;
6242
6243 if (pidx == hw_cidx)
6244 return (eq->sidx - 1);
6245 else
6246 return (IDXDIFF(hw_cidx, pidx, eq->sidx) - 1);
6247 }
6248
6249 static inline uint16_t
read_hw_cidx(struct sge_eq * eq)6250 read_hw_cidx(struct sge_eq *eq)
6251 {
6252 struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
6253 uint16_t cidx = spg->cidx; /* stable snapshot */
6254
6255 return (be16toh(cidx));
6256 }
6257
6258 /*
6259 * Reclaim 'n' descriptors approximately.
6260 */
6261 static u_int
reclaim_tx_descs(struct sge_txq * txq,u_int n)6262 reclaim_tx_descs(struct sge_txq *txq, u_int n)
6263 {
6264 struct tx_sdesc *txsd;
6265 struct sge_eq *eq = &txq->eq;
6266 u_int can_reclaim, reclaimed;
6267
6268 TXQ_LOCK_ASSERT_OWNED(txq);
6269 MPASS(n > 0);
6270
6271 reclaimed = 0;
6272 can_reclaim = reclaimable_tx_desc(eq);
6273 while (can_reclaim && reclaimed < n) {
6274 int ndesc;
6275 struct mbuf *m, *nextpkt;
6276
6277 txsd = &txq->sdesc[eq->cidx];
6278 ndesc = txsd->desc_used;
6279
6280 /* Firmware doesn't return "partial" credits. */
6281 KASSERT(can_reclaim >= ndesc,
6282 ("%s: unexpected number of credits: %d, %d",
6283 __func__, can_reclaim, ndesc));
6284 KASSERT(ndesc != 0,
6285 ("%s: descriptor with no credits: cidx %d",
6286 __func__, eq->cidx));
6287
6288 for (m = txsd->m; m != NULL; m = nextpkt) {
6289 nextpkt = m->m_nextpkt;
6290 m->m_nextpkt = NULL;
6291 m_freem(m);
6292 }
6293 reclaimed += ndesc;
6294 can_reclaim -= ndesc;
6295 IDXINCR(eq->cidx, ndesc, eq->sidx);
6296 }
6297
6298 return (reclaimed);
6299 }
6300
6301 static void
tx_reclaim(void * arg,int n)6302 tx_reclaim(void *arg, int n)
6303 {
6304 struct sge_txq *txq = arg;
6305 struct sge_eq *eq = &txq->eq;
6306
6307 do {
6308 if (TXQ_TRYLOCK(txq) == 0)
6309 break;
6310 n = reclaim_tx_descs(txq, 32);
6311 if (eq->cidx == eq->pidx)
6312 eq->equeqidx = eq->pidx;
6313 TXQ_UNLOCK(txq);
6314 } while (n > 0);
6315 }
6316
6317 static __be64
get_flit(struct sglist_seg * segs,int nsegs,int idx)6318 get_flit(struct sglist_seg *segs, int nsegs, int idx)
6319 {
6320 int i = (idx / 3) * 2;
6321
6322 switch (idx % 3) {
6323 case 0: {
6324 uint64_t rc;
6325
6326 rc = (uint64_t)segs[i].ss_len << 32;
6327 if (i + 1 < nsegs)
6328 rc |= (uint64_t)(segs[i + 1].ss_len);
6329
6330 return (htobe64(rc));
6331 }
6332 case 1:
6333 return (htobe64(segs[i].ss_paddr));
6334 case 2:
6335 return (htobe64(segs[i + 1].ss_paddr));
6336 }
6337
6338 return (0);
6339 }
6340
6341 static int
find_refill_source(struct adapter * sc,int maxp,bool packing)6342 find_refill_source(struct adapter *sc, int maxp, bool packing)
6343 {
6344 int i, zidx = -1;
6345 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[0];
6346
6347 if (packing) {
6348 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) {
6349 if (rxb->hwidx2 == -1)
6350 continue;
6351 if (rxb->size1 < PAGE_SIZE &&
6352 rxb->size1 < largest_rx_cluster)
6353 continue;
6354 if (rxb->size1 > largest_rx_cluster)
6355 break;
6356 MPASS(rxb->size1 - rxb->size2 >= CL_METADATA_SIZE);
6357 if (rxb->size2 >= maxp)
6358 return (i);
6359 zidx = i;
6360 }
6361 } else {
6362 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) {
6363 if (rxb->hwidx1 == -1)
6364 continue;
6365 if (rxb->size1 > largest_rx_cluster)
6366 break;
6367 if (rxb->size1 >= maxp)
6368 return (i);
6369 zidx = i;
6370 }
6371 }
6372
6373 return (zidx);
6374 }
6375
6376 static void
add_fl_to_sfl(struct adapter * sc,struct sge_fl * fl)6377 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl)
6378 {
6379 mtx_lock(&sc->sfl_lock);
6380 FL_LOCK(fl);
6381 if ((fl->flags & FL_DOOMED) == 0) {
6382 fl->flags |= FL_STARVING;
6383 TAILQ_INSERT_TAIL(&sc->sfl, fl, link);
6384 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc);
6385 }
6386 FL_UNLOCK(fl);
6387 mtx_unlock(&sc->sfl_lock);
6388 }
6389
6390 static void
handle_wrq_egr_update(struct adapter * sc,struct sge_eq * eq)6391 handle_wrq_egr_update(struct adapter *sc, struct sge_eq *eq)
6392 {
6393 struct sge_wrq *wrq = (void *)eq;
6394
6395 atomic_readandclear_int(&eq->equiq);
6396 taskqueue_enqueue(sc->tq[eq->port_id], &wrq->wrq_tx_task);
6397 }
6398
6399 static void
handle_eth_egr_update(struct adapter * sc,struct sge_eq * eq)6400 handle_eth_egr_update(struct adapter *sc, struct sge_eq *eq)
6401 {
6402 struct sge_txq *txq = (void *)eq;
6403
6404 MPASS(eq->type == EQ_ETH);
6405
6406 atomic_readandclear_int(&eq->equiq);
6407 if (mp_ring_is_idle(txq->r))
6408 taskqueue_enqueue(sc->tq[eq->port_id], &txq->tx_reclaim_task);
6409 else
6410 mp_ring_check_drainage(txq->r, 64);
6411 }
6412
6413 static int
handle_sge_egr_update(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)6414 handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss,
6415 struct mbuf *m)
6416 {
6417 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1);
6418 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid));
6419 struct adapter *sc = iq->adapter;
6420 struct sge *s = &sc->sge;
6421 struct sge_eq *eq;
6422 static void (*h[])(struct adapter *, struct sge_eq *) = {NULL,
6423 &handle_wrq_egr_update, &handle_eth_egr_update,
6424 &handle_wrq_egr_update};
6425
6426 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
6427 rss->opcode));
6428
6429 eq = s->eqmap[qid - s->eq_start - s->eq_base];
6430 (*h[eq->type])(sc, eq);
6431
6432 return (0);
6433 }
6434
6435 /* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */
6436 CTASSERT(offsetof(struct cpl_fw4_msg, data) == \
6437 offsetof(struct cpl_fw6_msg, data));
6438
6439 static int
handle_fw_msg(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)6440 handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
6441 {
6442 struct adapter *sc = iq->adapter;
6443 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
6444
6445 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
6446 rss->opcode));
6447
6448 if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) {
6449 const struct rss_header *rss2;
6450
6451 rss2 = (const struct rss_header *)&cpl->data[0];
6452 return (t4_cpl_handler[rss2->opcode](iq, rss2, m));
6453 }
6454
6455 return (t4_fw_msg_handler[cpl->type](sc, &cpl->data[0]));
6456 }
6457
6458 /**
6459 * t4_handle_wrerr_rpl - process a FW work request error message
6460 * @adap: the adapter
6461 * @rpl: start of the FW message
6462 */
6463 static int
t4_handle_wrerr_rpl(struct adapter * adap,const __be64 * rpl)6464 t4_handle_wrerr_rpl(struct adapter *adap, const __be64 *rpl)
6465 {
6466 u8 opcode = *(const u8 *)rpl;
6467 const struct fw_error_cmd *e = (const void *)rpl;
6468 unsigned int i;
6469
6470 if (opcode != FW_ERROR_CMD) {
6471 log(LOG_ERR,
6472 "%s: Received WRERR_RPL message with opcode %#x\n",
6473 device_get_nameunit(adap->dev), opcode);
6474 return (EINVAL);
6475 }
6476 log(LOG_ERR, "%s: FW_ERROR (%s) ", device_get_nameunit(adap->dev),
6477 G_FW_ERROR_CMD_FATAL(be32toh(e->op_to_type)) ? "fatal" :
6478 "non-fatal");
6479 switch (G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))) {
6480 case FW_ERROR_TYPE_EXCEPTION:
6481 log(LOG_ERR, "exception info:\n");
6482 for (i = 0; i < nitems(e->u.exception.info); i++)
6483 log(LOG_ERR, "%s%08x", i == 0 ? "\t" : " ",
6484 be32toh(e->u.exception.info[i]));
6485 log(LOG_ERR, "\n");
6486 break;
6487 case FW_ERROR_TYPE_HWMODULE:
6488 log(LOG_ERR, "HW module regaddr %08x regval %08x\n",
6489 be32toh(e->u.hwmodule.regaddr),
6490 be32toh(e->u.hwmodule.regval));
6491 break;
6492 case FW_ERROR_TYPE_WR:
6493 log(LOG_ERR, "WR cidx %d PF %d VF %d eqid %d hdr:\n",
6494 be16toh(e->u.wr.cidx),
6495 G_FW_ERROR_CMD_PFN(be16toh(e->u.wr.pfn_vfn)),
6496 G_FW_ERROR_CMD_VFN(be16toh(e->u.wr.pfn_vfn)),
6497 be32toh(e->u.wr.eqid));
6498 for (i = 0; i < nitems(e->u.wr.wrhdr); i++)
6499 log(LOG_ERR, "%s%02x", i == 0 ? "\t" : " ",
6500 e->u.wr.wrhdr[i]);
6501 log(LOG_ERR, "\n");
6502 break;
6503 case FW_ERROR_TYPE_ACL:
6504 log(LOG_ERR, "ACL cidx %d PF %d VF %d eqid %d %s",
6505 be16toh(e->u.acl.cidx),
6506 G_FW_ERROR_CMD_PFN(be16toh(e->u.acl.pfn_vfn)),
6507 G_FW_ERROR_CMD_VFN(be16toh(e->u.acl.pfn_vfn)),
6508 be32toh(e->u.acl.eqid),
6509 G_FW_ERROR_CMD_MV(be16toh(e->u.acl.mv_pkd)) ? "vlanid" :
6510 "MAC");
6511 for (i = 0; i < nitems(e->u.acl.val); i++)
6512 log(LOG_ERR, " %02x", e->u.acl.val[i]);
6513 log(LOG_ERR, "\n");
6514 break;
6515 default:
6516 log(LOG_ERR, "type %#x\n",
6517 G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type)));
6518 return (EINVAL);
6519 }
6520 return (0);
6521 }
6522
6523 static inline bool
bufidx_used(struct adapter * sc,int idx)6524 bufidx_used(struct adapter *sc, int idx)
6525 {
6526 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[0];
6527 int i;
6528
6529 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) {
6530 if (rxb->size1 > largest_rx_cluster)
6531 continue;
6532 if (rxb->hwidx1 == idx || rxb->hwidx2 == idx)
6533 return (true);
6534 }
6535
6536 return (false);
6537 }
6538
6539 static int
sysctl_bufsizes(SYSCTL_HANDLER_ARGS)6540 sysctl_bufsizes(SYSCTL_HANDLER_ARGS)
6541 {
6542 struct adapter *sc = arg1;
6543 struct sge_params *sp = &sc->params.sge;
6544 int i, rc;
6545 struct sbuf sb;
6546 char c;
6547
6548 sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND);
6549 for (i = 0; i < SGE_FLBUF_SIZES; i++) {
6550 if (bufidx_used(sc, i))
6551 c = '*';
6552 else
6553 c = '\0';
6554
6555 sbuf_printf(&sb, "%u%c ", sp->sge_fl_buffer_size[i], c);
6556 }
6557 sbuf_trim(&sb);
6558 sbuf_finish(&sb);
6559 rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
6560 sbuf_delete(&sb);
6561 return (rc);
6562 }
6563
6564 #ifdef RATELIMIT
6565 #if defined(INET) || defined(INET6)
6566 /*
6567 * len16 for a txpkt WR with a GL. Includes the firmware work request header.
6568 */
6569 static inline u_int
txpkt_eo_len16(u_int nsegs,u_int immhdrs,u_int tso)6570 txpkt_eo_len16(u_int nsegs, u_int immhdrs, u_int tso)
6571 {
6572 u_int n;
6573
6574 MPASS(immhdrs > 0);
6575
6576 n = roundup2(sizeof(struct fw_eth_tx_eo_wr) +
6577 sizeof(struct cpl_tx_pkt_core) + immhdrs, 16);
6578 if (__predict_false(nsegs == 0))
6579 goto done;
6580
6581 nsegs--; /* first segment is part of ulptx_sgl */
6582 n += sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1));
6583 if (tso)
6584 n += sizeof(struct cpl_tx_pkt_lso_core);
6585
6586 done:
6587 return (howmany(n, 16));
6588 }
6589 #endif
6590
6591 #define ETID_FLOWC_NPARAMS 6
6592 #define ETID_FLOWC_LEN (roundup2((sizeof(struct fw_flowc_wr) + \
6593 ETID_FLOWC_NPARAMS * sizeof(struct fw_flowc_mnemval)), 16))
6594 #define ETID_FLOWC_LEN16 (howmany(ETID_FLOWC_LEN, 16))
6595
6596 static int
send_etid_flowc_wr(struct cxgbe_rate_tag * cst,struct port_info * pi,struct vi_info * vi)6597 send_etid_flowc_wr(struct cxgbe_rate_tag *cst, struct port_info *pi,
6598 struct vi_info *vi)
6599 {
6600 struct wrq_cookie cookie;
6601 u_int pfvf = pi->adapter->pf << S_FW_VIID_PFN;
6602 struct fw_flowc_wr *flowc;
6603
6604 mtx_assert(&cst->lock, MA_OWNED);
6605 MPASS((cst->flags & (EO_FLOWC_PENDING | EO_FLOWC_RPL_PENDING)) ==
6606 EO_FLOWC_PENDING);
6607
6608 flowc = start_wrq_wr(&cst->eo_txq->wrq, ETID_FLOWC_LEN16, &cookie);
6609 if (__predict_false(flowc == NULL))
6610 return (ENOMEM);
6611
6612 bzero(flowc, ETID_FLOWC_LEN);
6613 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
6614 V_FW_FLOWC_WR_NPARAMS(ETID_FLOWC_NPARAMS) | V_FW_WR_COMPL(0));
6615 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(ETID_FLOWC_LEN16) |
6616 V_FW_WR_FLOWID(cst->etid));
6617 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
6618 flowc->mnemval[0].val = htobe32(pfvf);
6619 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
6620 flowc->mnemval[1].val = htobe32(pi->tx_chan);
6621 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
6622 flowc->mnemval[2].val = htobe32(pi->tx_chan);
6623 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
6624 flowc->mnemval[3].val = htobe32(cst->iqid);
6625 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_EOSTATE;
6626 flowc->mnemval[4].val = htobe32(FW_FLOWC_MNEM_EOSTATE_ESTABLISHED);
6627 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
6628 flowc->mnemval[5].val = htobe32(cst->schedcl);
6629
6630 commit_wrq_wr(&cst->eo_txq->wrq, flowc, &cookie);
6631
6632 cst->flags &= ~EO_FLOWC_PENDING;
6633 cst->flags |= EO_FLOWC_RPL_PENDING;
6634 MPASS(cst->tx_credits >= ETID_FLOWC_LEN16); /* flowc is first WR. */
6635 cst->tx_credits -= ETID_FLOWC_LEN16;
6636
6637 return (0);
6638 }
6639
6640 #define ETID_FLUSH_LEN16 (howmany(sizeof (struct fw_flowc_wr), 16))
6641
6642 void
send_etid_flush_wr(struct cxgbe_rate_tag * cst)6643 send_etid_flush_wr(struct cxgbe_rate_tag *cst)
6644 {
6645 struct fw_flowc_wr *flowc;
6646 struct wrq_cookie cookie;
6647
6648 mtx_assert(&cst->lock, MA_OWNED);
6649
6650 flowc = start_wrq_wr(&cst->eo_txq->wrq, ETID_FLUSH_LEN16, &cookie);
6651 if (__predict_false(flowc == NULL))
6652 CXGBE_UNIMPLEMENTED(__func__);
6653
6654 bzero(flowc, ETID_FLUSH_LEN16 * 16);
6655 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
6656 V_FW_FLOWC_WR_NPARAMS(0) | F_FW_WR_COMPL);
6657 flowc->flowid_len16 = htobe32(V_FW_WR_LEN16(ETID_FLUSH_LEN16) |
6658 V_FW_WR_FLOWID(cst->etid));
6659
6660 commit_wrq_wr(&cst->eo_txq->wrq, flowc, &cookie);
6661
6662 cst->flags |= EO_FLUSH_RPL_PENDING;
6663 MPASS(cst->tx_credits >= ETID_FLUSH_LEN16);
6664 cst->tx_credits -= ETID_FLUSH_LEN16;
6665 cst->ncompl++;
6666 }
6667
6668 static void
write_ethofld_wr(struct cxgbe_rate_tag * cst,struct fw_eth_tx_eo_wr * wr,struct mbuf * m0,int compl)6669 write_ethofld_wr(struct cxgbe_rate_tag *cst, struct fw_eth_tx_eo_wr *wr,
6670 struct mbuf *m0, int compl)
6671 {
6672 struct cpl_tx_pkt_core *cpl;
6673 uint64_t ctrl1;
6674 uint32_t ctrl; /* used in many unrelated places */
6675 int len16, pktlen, nsegs, immhdrs;
6676 uintptr_t p;
6677 struct ulptx_sgl *usgl;
6678 struct sglist sg;
6679 struct sglist_seg segs[38]; /* XXX: find real limit. XXX: get off the stack */
6680
6681 mtx_assert(&cst->lock, MA_OWNED);
6682 M_ASSERTPKTHDR(m0);
6683 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
6684 m0->m_pkthdr.l4hlen > 0,
6685 ("%s: ethofld mbuf %p is missing header lengths", __func__, m0));
6686
6687 len16 = mbuf_eo_len16(m0);
6688 nsegs = mbuf_eo_nsegs(m0);
6689 pktlen = m0->m_pkthdr.len;
6690 ctrl = sizeof(struct cpl_tx_pkt_core);
6691 if (needs_tso(m0))
6692 ctrl += sizeof(struct cpl_tx_pkt_lso_core);
6693 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen;
6694 ctrl += immhdrs;
6695
6696 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_EO_WR) |
6697 V_FW_ETH_TX_EO_WR_IMMDLEN(ctrl) | V_FW_WR_COMPL(!!compl));
6698 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(len16) |
6699 V_FW_WR_FLOWID(cst->etid));
6700 wr->r3 = 0;
6701 if (needs_outer_udp_csum(m0)) {
6702 wr->u.udpseg.type = FW_ETH_TX_EO_TYPE_UDPSEG;
6703 wr->u.udpseg.ethlen = m0->m_pkthdr.l2hlen;
6704 wr->u.udpseg.iplen = htobe16(m0->m_pkthdr.l3hlen);
6705 wr->u.udpseg.udplen = m0->m_pkthdr.l4hlen;
6706 wr->u.udpseg.rtplen = 0;
6707 wr->u.udpseg.r4 = 0;
6708 wr->u.udpseg.mss = htobe16(pktlen - immhdrs);
6709 wr->u.udpseg.schedpktsize = wr->u.udpseg.mss;
6710 wr->u.udpseg.plen = htobe32(pktlen - immhdrs);
6711 cpl = (void *)(wr + 1);
6712 } else {
6713 MPASS(needs_outer_tcp_csum(m0));
6714 wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG;
6715 wr->u.tcpseg.ethlen = m0->m_pkthdr.l2hlen;
6716 wr->u.tcpseg.iplen = htobe16(m0->m_pkthdr.l3hlen);
6717 wr->u.tcpseg.tcplen = m0->m_pkthdr.l4hlen;
6718 wr->u.tcpseg.tsclk_tsoff = mbuf_eo_tsclk_tsoff(m0);
6719 wr->u.tcpseg.r4 = 0;
6720 wr->u.tcpseg.r5 = 0;
6721 wr->u.tcpseg.plen = htobe32(pktlen - immhdrs);
6722
6723 if (needs_tso(m0)) {
6724 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
6725
6726 wr->u.tcpseg.mss = htobe16(m0->m_pkthdr.tso_segsz);
6727
6728 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) |
6729 F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE |
6730 V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen -
6731 ETHER_HDR_LEN) >> 2) |
6732 V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) |
6733 V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2);
6734 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
6735 ctrl |= F_LSO_IPV6;
6736 lso->lso_ctrl = htobe32(ctrl);
6737 lso->ipid_ofst = htobe16(0);
6738 lso->mss = htobe16(m0->m_pkthdr.tso_segsz);
6739 lso->seqno_offset = htobe32(0);
6740 lso->len = htobe32(pktlen);
6741
6742 cpl = (void *)(lso + 1);
6743 } else {
6744 wr->u.tcpseg.mss = htobe16(0xffff);
6745 cpl = (void *)(wr + 1);
6746 }
6747 }
6748
6749 /* Checksum offload must be requested for ethofld. */
6750 MPASS(needs_outer_l4_csum(m0));
6751 ctrl1 = csum_to_ctrl(cst->adapter, m0);
6752
6753 /* VLAN tag insertion */
6754 if (needs_vlan_insertion(m0)) {
6755 ctrl1 |= F_TXPKT_VLAN_VLD |
6756 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
6757 }
6758
6759 /* CPL header */
6760 cpl->ctrl0 = cst->ctrl0;
6761 cpl->pack = 0;
6762 cpl->len = htobe16(pktlen);
6763 cpl->ctrl1 = htobe64(ctrl1);
6764
6765 /* Copy Ethernet, IP & TCP/UDP hdrs as immediate data */
6766 p = (uintptr_t)(cpl + 1);
6767 m_copydata(m0, 0, immhdrs, (void *)p);
6768
6769 /* SGL */
6770 if (nsegs > 0) {
6771 int i, pad;
6772
6773 /* zero-pad upto next 16Byte boundary, if not 16Byte aligned */
6774 p += immhdrs;
6775 pad = 16 - (immhdrs & 0xf);
6776 bzero((void *)p, pad);
6777
6778 usgl = (void *)(p + pad);
6779 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
6780 V_ULPTX_NSGE(nsegs));
6781
6782 sglist_init(&sg, nitems(segs), segs);
6783 for (; m0 != NULL; m0 = m0->m_next) {
6784 if (__predict_false(m0->m_len == 0))
6785 continue;
6786 if (immhdrs >= m0->m_len) {
6787 immhdrs -= m0->m_len;
6788 continue;
6789 }
6790 if (m0->m_flags & M_EXTPG)
6791 sglist_append_mbuf_epg(&sg, m0,
6792 mtod(m0, vm_offset_t), m0->m_len);
6793 else
6794 sglist_append(&sg, mtod(m0, char *) + immhdrs,
6795 m0->m_len - immhdrs);
6796 immhdrs = 0;
6797 }
6798 MPASS(sg.sg_nseg == nsegs);
6799
6800 /*
6801 * Zero pad last 8B in case the WR doesn't end on a 16B
6802 * boundary.
6803 */
6804 *(uint64_t *)((char *)wr + len16 * 16 - 8) = 0;
6805
6806 usgl->len0 = htobe32(segs[0].ss_len);
6807 usgl->addr0 = htobe64(segs[0].ss_paddr);
6808 for (i = 0; i < nsegs - 1; i++) {
6809 usgl->sge[i / 2].len[i & 1] = htobe32(segs[i + 1].ss_len);
6810 usgl->sge[i / 2].addr[i & 1] = htobe64(segs[i + 1].ss_paddr);
6811 }
6812 if (i & 1)
6813 usgl->sge[i / 2].len[1] = htobe32(0);
6814 }
6815
6816 }
6817
6818 static void
ethofld_tx(struct cxgbe_rate_tag * cst)6819 ethofld_tx(struct cxgbe_rate_tag *cst)
6820 {
6821 struct mbuf *m;
6822 struct wrq_cookie cookie;
6823 int next_credits, compl;
6824 struct fw_eth_tx_eo_wr *wr;
6825
6826 mtx_assert(&cst->lock, MA_OWNED);
6827
6828 while ((m = mbufq_first(&cst->pending_tx)) != NULL) {
6829 M_ASSERTPKTHDR(m);
6830
6831 /* How many len16 credits do we need to send this mbuf. */
6832 next_credits = mbuf_eo_len16(m);
6833 MPASS(next_credits > 0);
6834 if (next_credits > cst->tx_credits) {
6835 /*
6836 * Tx will make progress eventually because there is at
6837 * least one outstanding fw4_ack that will return
6838 * credits and kick the tx.
6839 */
6840 MPASS(cst->ncompl > 0);
6841 return;
6842 }
6843 wr = start_wrq_wr(&cst->eo_txq->wrq, next_credits, &cookie);
6844 if (__predict_false(wr == NULL)) {
6845 /* XXX: wishful thinking, not a real assertion. */
6846 MPASS(cst->ncompl > 0);
6847 return;
6848 }
6849 cst->tx_credits -= next_credits;
6850 cst->tx_nocompl += next_credits;
6851 compl = cst->ncompl == 0 || cst->tx_nocompl >= cst->tx_total / 2;
6852 ETHER_BPF_MTAP(cst->com.ifp, m);
6853 write_ethofld_wr(cst, wr, m, compl);
6854 commit_wrq_wr(&cst->eo_txq->wrq, wr, &cookie);
6855 if (compl) {
6856 cst->ncompl++;
6857 cst->tx_nocompl = 0;
6858 }
6859 (void) mbufq_dequeue(&cst->pending_tx);
6860
6861 /*
6862 * Drop the mbuf's reference on the tag now rather
6863 * than waiting until m_freem(). This ensures that
6864 * cxgbe_rate_tag_free gets called when the inp drops
6865 * its reference on the tag and there are no more
6866 * mbufs in the pending_tx queue and can flush any
6867 * pending requests. Otherwise if the last mbuf
6868 * doesn't request a completion the etid will never be
6869 * released.
6870 */
6871 m->m_pkthdr.snd_tag = NULL;
6872 m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
6873 m_snd_tag_rele(&cst->com);
6874
6875 mbufq_enqueue(&cst->pending_fwack, m);
6876 }
6877 }
6878
6879 int
ethofld_transmit(struct ifnet * ifp,struct mbuf * m0)6880 ethofld_transmit(struct ifnet *ifp, struct mbuf *m0)
6881 {
6882 struct cxgbe_rate_tag *cst;
6883 int rc;
6884
6885 MPASS(m0->m_nextpkt == NULL);
6886 MPASS(m0->m_pkthdr.csum_flags & CSUM_SND_TAG);
6887 MPASS(m0->m_pkthdr.snd_tag != NULL);
6888 cst = mst_to_crt(m0->m_pkthdr.snd_tag);
6889
6890 mtx_lock(&cst->lock);
6891 MPASS(cst->flags & EO_SND_TAG_REF);
6892
6893 if (__predict_false(cst->flags & EO_FLOWC_PENDING)) {
6894 struct vi_info *vi = ifp->if_softc;
6895 struct port_info *pi = vi->pi;
6896 struct adapter *sc = pi->adapter;
6897 const uint32_t rss_mask = vi->rss_size - 1;
6898 uint32_t rss_hash;
6899
6900 cst->eo_txq = &sc->sge.ofld_txq[vi->first_ofld_txq];
6901 if (M_HASHTYPE_ISHASH(m0))
6902 rss_hash = m0->m_pkthdr.flowid;
6903 else
6904 rss_hash = arc4random();
6905 /* We assume RSS hashing */
6906 cst->iqid = vi->rss[rss_hash & rss_mask];
6907 cst->eo_txq += rss_hash % vi->nofldtxq;
6908 rc = send_etid_flowc_wr(cst, pi, vi);
6909 if (rc != 0)
6910 goto done;
6911 }
6912
6913 if (__predict_false(cst->plen + m0->m_pkthdr.len > eo_max_backlog)) {
6914 rc = ENOBUFS;
6915 goto done;
6916 }
6917
6918 mbufq_enqueue(&cst->pending_tx, m0);
6919 cst->plen += m0->m_pkthdr.len;
6920
6921 /*
6922 * Hold an extra reference on the tag while generating work
6923 * requests to ensure that we don't try to free the tag during
6924 * ethofld_tx() in case we are sending the final mbuf after
6925 * the inp was freed.
6926 */
6927 m_snd_tag_ref(&cst->com);
6928 ethofld_tx(cst);
6929 mtx_unlock(&cst->lock);
6930 m_snd_tag_rele(&cst->com);
6931 return (0);
6932
6933 done:
6934 mtx_unlock(&cst->lock);
6935 if (__predict_false(rc != 0))
6936 m_freem(m0);
6937 return (rc);
6938 }
6939
6940 static int
ethofld_fw4_ack(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m0)6941 ethofld_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0)
6942 {
6943 struct adapter *sc = iq->adapter;
6944 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1);
6945 struct mbuf *m;
6946 u_int etid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl)));
6947 struct cxgbe_rate_tag *cst;
6948 uint8_t credits = cpl->credits;
6949
6950 cst = lookup_etid(sc, etid);
6951 mtx_lock(&cst->lock);
6952 if (__predict_false(cst->flags & EO_FLOWC_RPL_PENDING)) {
6953 MPASS(credits >= ETID_FLOWC_LEN16);
6954 credits -= ETID_FLOWC_LEN16;
6955 cst->flags &= ~EO_FLOWC_RPL_PENDING;
6956 }
6957
6958 KASSERT(cst->ncompl > 0,
6959 ("%s: etid %u (%p) wasn't expecting completion.",
6960 __func__, etid, cst));
6961 cst->ncompl--;
6962
6963 while (credits > 0) {
6964 m = mbufq_dequeue(&cst->pending_fwack);
6965 if (__predict_false(m == NULL)) {
6966 /*
6967 * The remaining credits are for the final flush that
6968 * was issued when the tag was freed by the kernel.
6969 */
6970 MPASS((cst->flags &
6971 (EO_FLUSH_RPL_PENDING | EO_SND_TAG_REF)) ==
6972 EO_FLUSH_RPL_PENDING);
6973 MPASS(credits == ETID_FLUSH_LEN16);
6974 MPASS(cst->tx_credits + cpl->credits == cst->tx_total);
6975 MPASS(cst->ncompl == 0);
6976
6977 cst->flags &= ~EO_FLUSH_RPL_PENDING;
6978 cst->tx_credits += cpl->credits;
6979 cxgbe_rate_tag_free_locked(cst);
6980 return (0); /* cst is gone. */
6981 }
6982 KASSERT(m != NULL,
6983 ("%s: too many credits (%u, %u)", __func__, cpl->credits,
6984 credits));
6985 KASSERT(credits >= mbuf_eo_len16(m),
6986 ("%s: too few credits (%u, %u, %u)", __func__,
6987 cpl->credits, credits, mbuf_eo_len16(m)));
6988 credits -= mbuf_eo_len16(m);
6989 cst->plen -= m->m_pkthdr.len;
6990 m_freem(m);
6991 }
6992
6993 cst->tx_credits += cpl->credits;
6994 MPASS(cst->tx_credits <= cst->tx_total);
6995
6996 if (cst->flags & EO_SND_TAG_REF) {
6997 /*
6998 * As with ethofld_transmit(), hold an extra reference
6999 * so that the tag is stable across ethold_tx().
7000 */
7001 m_snd_tag_ref(&cst->com);
7002 m = mbufq_first(&cst->pending_tx);
7003 if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m))
7004 ethofld_tx(cst);
7005 mtx_unlock(&cst->lock);
7006 m_snd_tag_rele(&cst->com);
7007 } else {
7008 /*
7009 * There shouldn't be any pending packets if the tag
7010 * was freed by the kernel since any pending packet
7011 * should hold a reference to the tag.
7012 */
7013 MPASS(mbufq_first(&cst->pending_tx) == NULL);
7014 mtx_unlock(&cst->lock);
7015 }
7016
7017 return (0);
7018 }
7019 #endif
7020