1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2024 Google LLC
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * 3. Neither the name of the copyright holder nor the names of its contributors
17 * may be used to endorse or promote products derived from this software without
18 * specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include "opt_inet6.h"
33
34 #include "gve.h"
35 #include "gve_dqo.h"
36
37 static void
gve_unmap_packet(struct gve_tx_ring * tx,struct gve_tx_pending_pkt_dqo * pending_pkt)38 gve_unmap_packet(struct gve_tx_ring *tx,
39 struct gve_tx_pending_pkt_dqo *pending_pkt)
40 {
41 bus_dmamap_sync(tx->dqo.buf_dmatag, pending_pkt->dmamap,
42 BUS_DMASYNC_POSTWRITE);
43 bus_dmamap_unload(tx->dqo.buf_dmatag, pending_pkt->dmamap);
44 }
45
46 static void
gve_clear_qpl_pending_pkt(struct gve_tx_pending_pkt_dqo * pending_pkt)47 gve_clear_qpl_pending_pkt(struct gve_tx_pending_pkt_dqo *pending_pkt)
48 {
49 pending_pkt->qpl_buf_head = -1;
50 pending_pkt->num_qpl_bufs = 0;
51 }
52
53 static void
gve_free_tx_mbufs_dqo(struct gve_tx_ring * tx)54 gve_free_tx_mbufs_dqo(struct gve_tx_ring *tx)
55 {
56 struct gve_tx_pending_pkt_dqo *pending_pkt;
57 int i;
58
59 for (i = 0; i < tx->dqo.num_pending_pkts; i++) {
60 pending_pkt = &tx->dqo.pending_pkts[i];
61 if (!pending_pkt->mbuf)
62 continue;
63
64 if (gve_is_qpl(tx->com.priv))
65 gve_clear_qpl_pending_pkt(pending_pkt);
66 else
67 gve_unmap_packet(tx, pending_pkt);
68
69 m_freem(pending_pkt->mbuf);
70 pending_pkt->mbuf = NULL;
71 }
72 }
73
74 void
gve_tx_free_ring_dqo(struct gve_priv * priv,int i)75 gve_tx_free_ring_dqo(struct gve_priv *priv, int i)
76 {
77 struct gve_tx_ring *tx = &priv->tx[i];
78 struct gve_ring_com *com = &tx->com;
79 int j;
80
81 if (tx->dqo.desc_ring != NULL) {
82 gve_dma_free_coherent(&tx->desc_ring_mem);
83 tx->dqo.desc_ring = NULL;
84 }
85
86 if (tx->dqo.compl_ring != NULL) {
87 gve_dma_free_coherent(&tx->dqo.compl_ring_mem);
88 tx->dqo.compl_ring = NULL;
89 }
90
91 if (tx->dqo.pending_pkts != NULL) {
92 gve_free_tx_mbufs_dqo(tx);
93
94 if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag) {
95 for (j = 0; j < tx->dqo.num_pending_pkts; j++)
96 if (tx->dqo.pending_pkts[j].state !=
97 GVE_PACKET_STATE_UNALLOCATED)
98 bus_dmamap_destroy(tx->dqo.buf_dmatag,
99 tx->dqo.pending_pkts[j].dmamap);
100 }
101
102 free(tx->dqo.pending_pkts, M_GVE);
103 tx->dqo.pending_pkts = NULL;
104 }
105
106 if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag)
107 bus_dma_tag_destroy(tx->dqo.buf_dmatag);
108
109 if (gve_is_qpl(priv) && tx->dqo.qpl_bufs != NULL) {
110 free(tx->dqo.qpl_bufs, M_GVE);
111 tx->dqo.qpl_bufs = NULL;
112 }
113
114 if (com->qpl != NULL) {
115 gve_free_qpl(priv, com->qpl);
116 com->qpl = NULL;
117 }
118 }
119
120 static int
gve_tx_alloc_rda_fields_dqo(struct gve_tx_ring * tx)121 gve_tx_alloc_rda_fields_dqo(struct gve_tx_ring *tx)
122 {
123 struct gve_priv *priv = tx->com.priv;
124 int err;
125 int j;
126
127 /*
128 * DMA tag for mapping Tx mbufs
129 * The maxsize, nsegments, and maxsegsize params should match
130 * the if_sethwtso* arguments in gve_setup_ifnet in gve_main.c.
131 */
132 err = bus_dma_tag_create(
133 bus_get_dma_tag(priv->dev), /* parent */
134 1, 0, /* alignment, bounds */
135 BUS_SPACE_MAXADDR, /* lowaddr */
136 BUS_SPACE_MAXADDR, /* highaddr */
137 NULL, NULL, /* filter, filterarg */
138 GVE_TSO_MAXSIZE_DQO, /* maxsize */
139 GVE_TX_MAX_DATA_DESCS_DQO, /* nsegments */
140 GVE_TX_MAX_BUF_SIZE_DQO, /* maxsegsize */
141 BUS_DMA_ALLOCNOW, /* flags */
142 NULL, /* lockfunc */
143 NULL, /* lockarg */
144 &tx->dqo.buf_dmatag);
145 if (err != 0) {
146 device_printf(priv->dev, "%s: bus_dma_tag_create failed: %d\n",
147 __func__, err);
148 return (err);
149 }
150
151 for (j = 0; j < tx->dqo.num_pending_pkts; j++) {
152 err = bus_dmamap_create(tx->dqo.buf_dmatag, 0,
153 &tx->dqo.pending_pkts[j].dmamap);
154 if (err != 0) {
155 device_printf(priv->dev,
156 "err in creating pending pkt dmamap %d: %d",
157 j, err);
158 return (err);
159 }
160 tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE;
161 }
162
163 return (0);
164 }
165
166 int
gve_tx_alloc_ring_dqo(struct gve_priv * priv,int i)167 gve_tx_alloc_ring_dqo(struct gve_priv *priv, int i)
168 {
169 struct gve_tx_ring *tx = &priv->tx[i];
170 uint16_t num_pending_pkts;
171 int err;
172
173 /* Descriptor ring */
174 err = gve_dma_alloc_coherent(priv,
175 sizeof(union gve_tx_desc_dqo) * priv->tx_desc_cnt,
176 CACHE_LINE_SIZE, &tx->desc_ring_mem);
177 if (err != 0) {
178 device_printf(priv->dev,
179 "Failed to alloc desc ring for tx ring %d", i);
180 goto abort;
181 }
182 tx->dqo.desc_ring = tx->desc_ring_mem.cpu_addr;
183
184 /* Completion ring */
185 err = gve_dma_alloc_coherent(priv,
186 sizeof(struct gve_tx_compl_desc_dqo) * priv->tx_desc_cnt,
187 CACHE_LINE_SIZE, &tx->dqo.compl_ring_mem);
188 if (err != 0) {
189 device_printf(priv->dev,
190 "Failed to alloc compl ring for tx ring %d", i);
191 goto abort;
192 }
193 tx->dqo.compl_ring = tx->dqo.compl_ring_mem.cpu_addr;
194
195 /*
196 * pending_pkts array
197 *
198 * The max number of pending packets determines the maximum number of
199 * descriptors which maybe written to the completion queue.
200 *
201 * We must set the number small enough to make sure we never overrun the
202 * completion queue.
203 */
204 num_pending_pkts = priv->tx_desc_cnt;
205 /*
206 * Reserve space for descriptor completions, which will be reported at
207 * most every GVE_TX_MIN_RE_INTERVAL packets.
208 */
209 num_pending_pkts -= num_pending_pkts / GVE_TX_MIN_RE_INTERVAL;
210
211 tx->dqo.num_pending_pkts = num_pending_pkts;
212 tx->dqo.pending_pkts = malloc(
213 sizeof(struct gve_tx_pending_pkt_dqo) * num_pending_pkts,
214 M_GVE, M_WAITOK | M_ZERO);
215
216 if (gve_is_qpl(priv)) {
217 int qpl_buf_cnt;
218
219 tx->com.qpl = gve_alloc_qpl(priv, i, GVE_TX_NUM_QPL_PAGES_DQO,
220 /*single_kva*/false);
221 if (tx->com.qpl == NULL) {
222 device_printf(priv->dev,
223 "Failed to alloc QPL for tx ring %d", i);
224 err = ENOMEM;
225 goto abort;
226 }
227
228 qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO *
229 tx->com.qpl->num_pages;
230
231 tx->dqo.qpl_bufs = malloc(
232 sizeof(*tx->dqo.qpl_bufs) * qpl_buf_cnt,
233 M_GVE, M_WAITOK | M_ZERO);
234 } else
235 gve_tx_alloc_rda_fields_dqo(tx);
236 return (0);
237
238 abort:
239 gve_tx_free_ring_dqo(priv, i);
240 return (err);
241 }
242
243 static void
gve_extract_tx_metadata_dqo(const struct mbuf * mbuf,struct gve_tx_metadata_dqo * metadata)244 gve_extract_tx_metadata_dqo(const struct mbuf *mbuf,
245 struct gve_tx_metadata_dqo *metadata)
246 {
247 uint32_t hash = mbuf->m_pkthdr.flowid;
248 uint16_t path_hash;
249
250 metadata->version = GVE_TX_METADATA_VERSION_DQO;
251 if (hash) {
252 path_hash = hash ^ (hash >> 16);
253
254 path_hash &= (1 << 15) - 1;
255 if (__predict_false(path_hash == 0))
256 path_hash = ~path_hash;
257
258 metadata->path_hash = path_hash;
259 }
260 }
261
262 static void
gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring * tx,uint32_t * desc_idx,uint32_t len,uint64_t addr,int16_t compl_tag,bool eop,bool csum_enabled)263 gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx,
264 uint32_t *desc_idx, uint32_t len, uint64_t addr,
265 int16_t compl_tag, bool eop, bool csum_enabled)
266 {
267 while (len > 0) {
268 struct gve_tx_pkt_desc_dqo *desc =
269 &tx->dqo.desc_ring[*desc_idx].pkt;
270 uint32_t cur_len = MIN(len, GVE_TX_MAX_BUF_SIZE_DQO);
271 bool cur_eop = eop && cur_len == len;
272
273 *desc = (struct gve_tx_pkt_desc_dqo){
274 .buf_addr = htole64(addr),
275 .dtype = GVE_TX_PKT_DESC_DTYPE_DQO,
276 .end_of_packet = cur_eop,
277 .checksum_offload_enable = csum_enabled,
278 .compl_tag = htole16(compl_tag),
279 .buf_size = cur_len,
280 };
281
282 addr += cur_len;
283 len -= cur_len;
284 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
285 }
286 }
287
288 static void
gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo * desc,const struct mbuf * mbuf,const struct gve_tx_metadata_dqo * metadata,int header_len)289 gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo *desc,
290 const struct mbuf *mbuf, const struct gve_tx_metadata_dqo *metadata,
291 int header_len)
292 {
293 *desc = (struct gve_tx_tso_context_desc_dqo){
294 .header_len = header_len,
295 .cmd_dtype = {
296 .dtype = GVE_TX_TSO_CTX_DESC_DTYPE_DQO,
297 .tso = 1,
298 },
299 .flex0 = metadata->bytes[0],
300 .flex5 = metadata->bytes[5],
301 .flex6 = metadata->bytes[6],
302 .flex7 = metadata->bytes[7],
303 .flex8 = metadata->bytes[8],
304 .flex9 = metadata->bytes[9],
305 .flex10 = metadata->bytes[10],
306 .flex11 = metadata->bytes[11],
307 };
308 desc->tso_total_len = mbuf->m_pkthdr.len - header_len;
309 desc->mss = mbuf->m_pkthdr.tso_segsz;
310 }
311
312 static void
gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo * desc,const struct gve_tx_metadata_dqo * metadata)313 gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc,
314 const struct gve_tx_metadata_dqo *metadata)
315 {
316 *desc = (struct gve_tx_general_context_desc_dqo){
317 .flex0 = metadata->bytes[0],
318 .flex1 = metadata->bytes[1],
319 .flex2 = metadata->bytes[2],
320 .flex3 = metadata->bytes[3],
321 .flex4 = metadata->bytes[4],
322 .flex5 = metadata->bytes[5],
323 .flex6 = metadata->bytes[6],
324 .flex7 = metadata->bytes[7],
325 .flex8 = metadata->bytes[8],
326 .flex9 = metadata->bytes[9],
327 .flex10 = metadata->bytes[10],
328 .flex11 = metadata->bytes[11],
329 .cmd_dtype = {.dtype = GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO},
330 };
331 }
332
333 #define PULLUP_HDR(m, len) \
334 do { \
335 if (__predict_false((m)->m_len < (len))) { \
336 (m) = m_pullup((m), (len)); \
337 if ((m) == NULL) \
338 return (EINVAL); \
339 } \
340 } while (0)
341
342 static int
gve_prep_tso(struct mbuf * mbuf,int * header_len)343 gve_prep_tso(struct mbuf *mbuf, int *header_len)
344 {
345 uint8_t l3_off, l4_off = 0;
346 struct ether_header *eh;
347 struct tcphdr *th;
348 u_short csum;
349
350 PULLUP_HDR(mbuf, sizeof(*eh));
351 eh = mtod(mbuf, struct ether_header *);
352 KASSERT(eh->ether_type != ETHERTYPE_VLAN,
353 ("VLAN-tagged packets not supported"));
354 l3_off = ETHER_HDR_LEN;
355
356 #ifdef INET6
357 if (ntohs(eh->ether_type) == ETHERTYPE_IPV6) {
358 struct ip6_hdr *ip6;
359
360 PULLUP_HDR(mbuf, l3_off + sizeof(*ip6));
361 ip6 = (struct ip6_hdr *)(mtodo(mbuf, l3_off));
362 l4_off = l3_off + sizeof(struct ip6_hdr);
363 csum = in6_cksum_pseudo(ip6, /*len=*/0, IPPROTO_TCP,
364 /*csum=*/0);
365 } else
366 #endif
367 if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
368 struct ip *ip;
369
370 PULLUP_HDR(mbuf, l3_off + sizeof(*ip));
371 ip = (struct ip *)(mtodo(mbuf, l3_off));
372 l4_off = l3_off + (ip->ip_hl << 2);
373 csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
374 htons(IPPROTO_TCP));
375 }
376
377 PULLUP_HDR(mbuf, l4_off + sizeof(struct tcphdr *));
378 th = (struct tcphdr *)(mtodo(mbuf, l4_off));
379 *header_len = l4_off + (th->th_off << 2);
380
381 /*
382 * Hardware requires the th->th_sum to not include the TCP payload,
383 * hence we recompute the csum with it excluded.
384 */
385 th->th_sum = csum;
386
387 return (0);
388 }
389
390 static int
gve_tx_fill_ctx_descs(struct gve_tx_ring * tx,struct mbuf * mbuf,bool is_tso,uint32_t * desc_idx)391 gve_tx_fill_ctx_descs(struct gve_tx_ring *tx, struct mbuf *mbuf,
392 bool is_tso, uint32_t *desc_idx)
393 {
394 struct gve_tx_general_context_desc_dqo *gen_desc;
395 struct gve_tx_tso_context_desc_dqo *tso_desc;
396 struct gve_tx_metadata_dqo metadata;
397 int header_len;
398 int err;
399
400 metadata = (struct gve_tx_metadata_dqo){0};
401 gve_extract_tx_metadata_dqo(mbuf, &metadata);
402
403 if (is_tso) {
404 err = gve_prep_tso(mbuf, &header_len);
405 if (__predict_false(err)) {
406 counter_enter();
407 counter_u64_add_protected(
408 tx->stats.tx_delayed_pkt_tsoerr, 1);
409 counter_exit();
410 return (err);
411 }
412
413 tso_desc = &tx->dqo.desc_ring[*desc_idx].tso_ctx;
414 gve_tx_fill_tso_ctx_desc(tso_desc, mbuf, &metadata, header_len);
415
416 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
417 counter_enter();
418 counter_u64_add_protected(tx->stats.tso_packet_cnt, 1);
419 counter_exit();
420 }
421
422 gen_desc = &tx->dqo.desc_ring[*desc_idx].general_ctx;
423 gve_tx_fill_general_ctx_desc(gen_desc, &metadata);
424 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
425 return (0);
426 }
427
428 static int
gve_map_mbuf_dqo(struct gve_tx_ring * tx,struct mbuf ** mbuf,bus_dmamap_t dmamap,bus_dma_segment_t * segs,int * nsegs,int attempt)429 gve_map_mbuf_dqo(struct gve_tx_ring *tx,
430 struct mbuf **mbuf, bus_dmamap_t dmamap,
431 bus_dma_segment_t *segs, int *nsegs, int attempt)
432 {
433 struct mbuf *m_new = NULL;
434 int err;
435
436 err = bus_dmamap_load_mbuf_sg(tx->dqo.buf_dmatag, dmamap,
437 *mbuf, segs, nsegs, BUS_DMA_NOWAIT);
438
439 switch (err) {
440 case __predict_true(0):
441 break;
442 case EFBIG:
443 if (__predict_false(attempt > 0))
444 goto abort;
445
446 counter_enter();
447 counter_u64_add_protected(
448 tx->stats.tx_mbuf_collapse, 1);
449 counter_exit();
450
451 /* Try m_collapse before m_defrag */
452 m_new = m_collapse(*mbuf, M_NOWAIT,
453 GVE_TX_MAX_DATA_DESCS_DQO);
454 if (m_new == NULL) {
455 counter_enter();
456 counter_u64_add_protected(
457 tx->stats.tx_mbuf_defrag, 1);
458 counter_exit();
459 m_new = m_defrag(*mbuf, M_NOWAIT);
460 }
461
462 if (__predict_false(m_new == NULL)) {
463 counter_enter();
464 counter_u64_add_protected(
465 tx->stats.tx_mbuf_defrag_err, 1);
466 counter_exit();
467
468 m_freem(*mbuf);
469 *mbuf = NULL;
470 err = ENOMEM;
471 goto abort;
472 } else {
473 *mbuf = m_new;
474 return (gve_map_mbuf_dqo(tx, mbuf, dmamap,
475 segs, nsegs, ++attempt));
476 }
477 case ENOMEM:
478 counter_enter();
479 counter_u64_add_protected(
480 tx->stats.tx_mbuf_dmamap_enomem_err, 1);
481 counter_exit();
482 goto abort;
483 default:
484 goto abort;
485 }
486
487 return (0);
488
489 abort:
490 counter_enter();
491 counter_u64_add_protected(tx->stats.tx_mbuf_dmamap_err, 1);
492 counter_exit();
493 return (err);
494 }
495
496 static uint32_t
num_avail_desc_ring_slots(const struct gve_tx_ring * tx)497 num_avail_desc_ring_slots(const struct gve_tx_ring *tx)
498 {
499 uint32_t num_used = (tx->dqo.desc_tail - tx->dqo.desc_head) &
500 tx->dqo.desc_mask;
501
502 return (tx->dqo.desc_mask - num_used);
503 }
504
505 static struct gve_tx_pending_pkt_dqo *
gve_alloc_pending_packet(struct gve_tx_ring * tx)506 gve_alloc_pending_packet(struct gve_tx_ring *tx)
507 {
508 int32_t index = tx->dqo.free_pending_pkts_csm;
509 struct gve_tx_pending_pkt_dqo *pending_pkt;
510
511 /*
512 * No pending packets available in the consumer list,
513 * try to steal the producer list.
514 */
515 if (__predict_false(index == -1)) {
516 tx->dqo.free_pending_pkts_csm = atomic_swap_32(
517 &tx->dqo.free_pending_pkts_prd, -1);
518
519 index = tx->dqo.free_pending_pkts_csm;
520 if (__predict_false(index == -1))
521 return (NULL);
522 }
523
524 pending_pkt = &tx->dqo.pending_pkts[index];
525
526 /* Remove pending_pkt from the consumer list */
527 tx->dqo.free_pending_pkts_csm = pending_pkt->next;
528 pending_pkt->state = GVE_PACKET_STATE_PENDING_DATA_COMPL;
529
530 return (pending_pkt);
531 }
532
533 static void
gve_free_pending_packet(struct gve_tx_ring * tx,struct gve_tx_pending_pkt_dqo * pending_pkt)534 gve_free_pending_packet(struct gve_tx_ring *tx,
535 struct gve_tx_pending_pkt_dqo *pending_pkt)
536 {
537 int index = pending_pkt - tx->dqo.pending_pkts;
538 int32_t old_head;
539
540 pending_pkt->state = GVE_PACKET_STATE_FREE;
541
542 /* Add pending_pkt to the producer list */
543 while (true) {
544 old_head = atomic_load_acq_32(&tx->dqo.free_pending_pkts_prd);
545
546 pending_pkt->next = old_head;
547 if (atomic_cmpset_32(&tx->dqo.free_pending_pkts_prd,
548 old_head, index))
549 break;
550 }
551 }
552
553 /*
554 * Has the side-effect of retrieving the value of the last desc index
555 * processed by the NIC. hw_tx_head is written to by the completions-processing
556 * taskqueue upon receiving descriptor-completions.
557 */
558 static bool
gve_tx_has_desc_room_dqo(struct gve_tx_ring * tx,int needed_descs)559 gve_tx_has_desc_room_dqo(struct gve_tx_ring *tx, int needed_descs)
560 {
561 if (needed_descs <= num_avail_desc_ring_slots(tx))
562 return (true);
563
564 tx->dqo.desc_head = atomic_load_acq_32(&tx->dqo.hw_tx_head);
565 if (needed_descs > num_avail_desc_ring_slots(tx)) {
566 counter_enter();
567 counter_u64_add_protected(
568 tx->stats.tx_delayed_pkt_nospace_descring, 1);
569 counter_exit();
570 return (false);
571 }
572
573 return (0);
574 }
575
576 static void
gve_tx_request_desc_compl(struct gve_tx_ring * tx,uint32_t desc_idx)577 gve_tx_request_desc_compl(struct gve_tx_ring *tx, uint32_t desc_idx)
578 {
579 uint32_t last_report_event_interval;
580 uint32_t last_desc_idx;
581
582 last_desc_idx = (desc_idx - 1) & tx->dqo.desc_mask;
583 last_report_event_interval =
584 (last_desc_idx - tx->dqo.last_re_idx) & tx->dqo.desc_mask;
585
586 if (__predict_false(last_report_event_interval >=
587 GVE_TX_MIN_RE_INTERVAL)) {
588 tx->dqo.desc_ring[last_desc_idx].pkt.report_event = true;
589 tx->dqo.last_re_idx = last_desc_idx;
590 }
591 }
592
593 static bool
gve_tx_have_enough_qpl_bufs(struct gve_tx_ring * tx,int num_bufs)594 gve_tx_have_enough_qpl_bufs(struct gve_tx_ring *tx, int num_bufs)
595 {
596 uint32_t available = tx->dqo.qpl_bufs_produced_cached -
597 tx->dqo.qpl_bufs_consumed;
598
599 if (__predict_true(available >= num_bufs))
600 return (true);
601
602 tx->dqo.qpl_bufs_produced_cached = atomic_load_acq_32(
603 &tx->dqo.qpl_bufs_produced);
604 available = tx->dqo.qpl_bufs_produced_cached -
605 tx->dqo.qpl_bufs_consumed;
606
607 if (__predict_true(available >= num_bufs))
608 return (true);
609 return (false);
610 }
611
612 static int32_t
gve_tx_alloc_qpl_buf(struct gve_tx_ring * tx)613 gve_tx_alloc_qpl_buf(struct gve_tx_ring *tx)
614 {
615 int32_t buf = tx->dqo.free_qpl_bufs_csm;
616
617 if (__predict_false(buf == -1)) {
618 tx->dqo.free_qpl_bufs_csm = atomic_swap_32(
619 &tx->dqo.free_qpl_bufs_prd, -1);
620 buf = tx->dqo.free_qpl_bufs_csm;
621 if (__predict_false(buf == -1))
622 return (-1);
623 }
624
625 tx->dqo.free_qpl_bufs_csm = tx->dqo.qpl_bufs[buf];
626 tx->dqo.qpl_bufs_consumed++;
627 return (buf);
628 }
629
630 /*
631 * Tx buffer i corresponds to
632 * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO
633 * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO
634 */
635 static void
gve_tx_buf_get_addr_dqo(struct gve_tx_ring * tx,int32_t index,void ** va,bus_addr_t * dma_addr)636 gve_tx_buf_get_addr_dqo(struct gve_tx_ring *tx,
637 int32_t index, void **va, bus_addr_t *dma_addr)
638 {
639 int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
640 int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) <<
641 GVE_TX_BUF_SHIFT_DQO;
642
643 *va = (char *)tx->com.qpl->dmas[page_id].cpu_addr + offset;
644 *dma_addr = tx->com.qpl->dmas[page_id].bus_addr + offset;
645 }
646
647 static struct gve_dma_handle *
gve_get_page_dma_handle(struct gve_tx_ring * tx,int32_t index)648 gve_get_page_dma_handle(struct gve_tx_ring *tx, int32_t index)
649 {
650 int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
651
652 return (&tx->com.qpl->dmas[page_id]);
653 }
654
655 static void
gve_tx_copy_mbuf_and_write_pkt_descs(struct gve_tx_ring * tx,struct mbuf * mbuf,struct gve_tx_pending_pkt_dqo * pkt,bool csum_enabled,int16_t completion_tag,uint32_t * desc_idx)656 gve_tx_copy_mbuf_and_write_pkt_descs(struct gve_tx_ring *tx,
657 struct mbuf *mbuf, struct gve_tx_pending_pkt_dqo *pkt,
658 bool csum_enabled, int16_t completion_tag,
659 uint32_t *desc_idx)
660 {
661 int32_t pkt_len = mbuf->m_pkthdr.len;
662 struct gve_dma_handle *dma;
663 uint32_t copy_offset = 0;
664 int32_t prev_buf = -1;
665 uint32_t copy_len;
666 bus_addr_t addr;
667 int32_t buf;
668 void *va;
669
670 MPASS(pkt->num_qpl_bufs == 0);
671 MPASS(pkt->qpl_buf_head == -1);
672
673 while (copy_offset < pkt_len) {
674 buf = gve_tx_alloc_qpl_buf(tx);
675 /* We already checked for availability */
676 MPASS(buf != -1);
677
678 gve_tx_buf_get_addr_dqo(tx, buf, &va, &addr);
679 copy_len = MIN(GVE_TX_BUF_SIZE_DQO, pkt_len - copy_offset);
680 m_copydata(mbuf, copy_offset, copy_len, va);
681 copy_offset += copy_len;
682
683 dma = gve_get_page_dma_handle(tx, buf);
684 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE);
685
686 gve_tx_fill_pkt_desc_dqo(tx, desc_idx,
687 copy_len, addr, completion_tag,
688 /*eop=*/copy_offset == pkt_len,
689 csum_enabled);
690
691 /* Link all the qpl bufs for a packet */
692 if (prev_buf == -1)
693 pkt->qpl_buf_head = buf;
694 else
695 tx->dqo.qpl_bufs[prev_buf] = buf;
696
697 prev_buf = buf;
698 pkt->num_qpl_bufs++;
699 }
700
701 tx->dqo.qpl_bufs[buf] = -1;
702 }
703
704 int
gve_xmit_dqo_qpl(struct gve_tx_ring * tx,struct mbuf * mbuf)705 gve_xmit_dqo_qpl(struct gve_tx_ring *tx, struct mbuf *mbuf)
706 {
707 uint32_t desc_idx = tx->dqo.desc_tail;
708 struct gve_tx_pending_pkt_dqo *pkt;
709 int total_descs_needed;
710 int16_t completion_tag;
711 bool has_csum_flag;
712 int csum_flags;
713 bool is_tso;
714 int nsegs;
715 int err;
716
717 csum_flags = mbuf->m_pkthdr.csum_flags;
718 has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP |
719 CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO);
720 is_tso = csum_flags & CSUM_TSO;
721
722 nsegs = howmany(mbuf->m_pkthdr.len, GVE_TX_BUF_SIZE_DQO);
723 /* Check if we have enough room in the desc ring */
724 total_descs_needed = 1 + /* general_ctx_desc */
725 nsegs + /* pkt_desc */
726 (is_tso ? 1 : 0); /* tso_ctx_desc */
727 if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed)))
728 return (ENOBUFS);
729
730 if (!gve_tx_have_enough_qpl_bufs(tx, nsegs)) {
731 counter_enter();
732 counter_u64_add_protected(
733 tx->stats.tx_delayed_pkt_nospace_qpl_bufs, 1);
734 counter_exit();
735 return (ENOBUFS);
736 }
737
738 pkt = gve_alloc_pending_packet(tx);
739 if (pkt == NULL) {
740 counter_enter();
741 counter_u64_add_protected(
742 tx->stats.tx_delayed_pkt_nospace_compring, 1);
743 counter_exit();
744 return (ENOBUFS);
745 }
746 completion_tag = pkt - tx->dqo.pending_pkts;
747 pkt->mbuf = mbuf;
748
749 err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx);
750 if (err)
751 goto abort;
752
753 gve_tx_copy_mbuf_and_write_pkt_descs(tx, mbuf, pkt,
754 has_csum_flag, completion_tag, &desc_idx);
755
756 /* Remember the index of the last desc written */
757 tx->dqo.desc_tail = desc_idx;
758
759 /*
760 * Request a descriptor completion on the last descriptor of the
761 * packet if we are allowed to by the HW enforced interval.
762 */
763 gve_tx_request_desc_compl(tx, desc_idx);
764
765 tx->req += total_descs_needed; /* tx->req is just a sysctl counter */
766 return (0);
767
768 abort:
769 pkt->mbuf = NULL;
770 gve_free_pending_packet(tx, pkt);
771 return (err);
772 }
773
774 int
gve_xmit_dqo(struct gve_tx_ring * tx,struct mbuf ** mbuf_ptr)775 gve_xmit_dqo(struct gve_tx_ring *tx, struct mbuf **mbuf_ptr)
776 {
777 bus_dma_segment_t segs[GVE_TX_MAX_DATA_DESCS_DQO];
778 uint32_t desc_idx = tx->dqo.desc_tail;
779 struct gve_tx_pending_pkt_dqo *pkt;
780 struct mbuf *mbuf = *mbuf_ptr;
781 int total_descs_needed;
782 int16_t completion_tag;
783 bool has_csum_flag;
784 int csum_flags;
785 bool is_tso;
786 int nsegs;
787 int err;
788 int i;
789
790 csum_flags = mbuf->m_pkthdr.csum_flags;
791 has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP |
792 CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO);
793 is_tso = csum_flags & CSUM_TSO;
794
795 /*
796 * This mbuf might end up needing more than 1 pkt desc.
797 * The actual number, `nsegs` is known only after the
798 * expensive gve_map_mbuf_dqo call. This check beneath
799 * exists to fail early when the desc ring is really full.
800 */
801 total_descs_needed = 1 + /* general_ctx_desc */
802 1 + /* pkt_desc */
803 (is_tso ? 1 : 0); /* tso_ctx_desc */
804 if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed)))
805 return (ENOBUFS);
806
807 pkt = gve_alloc_pending_packet(tx);
808 if (pkt == NULL) {
809 counter_enter();
810 counter_u64_add_protected(
811 tx->stats.tx_delayed_pkt_nospace_compring, 1);
812 counter_exit();
813 return (ENOBUFS);
814 }
815 completion_tag = pkt - tx->dqo.pending_pkts;
816
817 err = gve_map_mbuf_dqo(tx, mbuf_ptr, pkt->dmamap,
818 segs, &nsegs, /*attempt=*/0);
819 if (err)
820 goto abort;
821 mbuf = *mbuf_ptr; /* gve_map_mbuf_dqo might replace the mbuf chain */
822 pkt->mbuf = mbuf;
823
824 total_descs_needed = 1 + /* general_ctx_desc */
825 nsegs + /* pkt_desc */
826 (is_tso ? 1 : 0); /* tso_ctx_desc */
827 if (__predict_false(
828 !gve_tx_has_desc_room_dqo(tx, total_descs_needed))) {
829 err = ENOBUFS;
830 goto abort_with_dma;
831 }
832
833 err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx);
834 if (err)
835 goto abort_with_dma;
836
837 bus_dmamap_sync(tx->dqo.buf_dmatag, pkt->dmamap, BUS_DMASYNC_PREWRITE);
838 for (i = 0; i < nsegs; i++) {
839 gve_tx_fill_pkt_desc_dqo(tx, &desc_idx,
840 segs[i].ds_len, segs[i].ds_addr,
841 completion_tag, /*eop=*/i == (nsegs - 1),
842 has_csum_flag);
843 }
844
845 /* Remember the index of the last desc written */
846 tx->dqo.desc_tail = desc_idx;
847
848 /*
849 * Request a descriptor completion on the last descriptor of the
850 * packet if we are allowed to by the HW enforced interval.
851 */
852 gve_tx_request_desc_compl(tx, desc_idx);
853
854 tx->req += total_descs_needed; /* tx->req is just a sysctl counter */
855 return (0);
856
857 abort_with_dma:
858 gve_unmap_packet(tx, pkt);
859 abort:
860 pkt->mbuf = NULL;
861 gve_free_pending_packet(tx, pkt);
862 return (err);
863 }
864
865 static void
gve_reap_qpl_bufs_dqo(struct gve_tx_ring * tx,struct gve_tx_pending_pkt_dqo * pkt)866 gve_reap_qpl_bufs_dqo(struct gve_tx_ring *tx,
867 struct gve_tx_pending_pkt_dqo *pkt)
868 {
869 int32_t buf = pkt->qpl_buf_head;
870 struct gve_dma_handle *dma;
871 int32_t qpl_buf_tail;
872 int32_t old_head;
873 int i;
874
875 for (i = 0; i < pkt->num_qpl_bufs; i++) {
876 dma = gve_get_page_dma_handle(tx, buf);
877 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_POSTWRITE);
878 qpl_buf_tail = buf;
879 buf = tx->dqo.qpl_bufs[buf];
880 }
881 MPASS(buf == -1);
882 buf = qpl_buf_tail;
883
884 while (true) {
885 old_head = atomic_load_32(&tx->dqo.free_qpl_bufs_prd);
886 tx->dqo.qpl_bufs[buf] = old_head;
887
888 /*
889 * The "rel" ensures that the update to dqo.free_qpl_bufs_prd
890 * is visible only after the linked list from this pkt is
891 * attached above to old_head.
892 */
893 if (atomic_cmpset_rel_32(&tx->dqo.free_qpl_bufs_prd,
894 old_head, pkt->qpl_buf_head))
895 break;
896 }
897 /*
898 * The "rel" ensures that the update to dqo.qpl_bufs_produced is
899 * visible only adter the update to dqo.free_qpl_bufs_prd above.
900 */
901 atomic_add_rel_32(&tx->dqo.qpl_bufs_produced, pkt->num_qpl_bufs);
902
903 gve_clear_qpl_pending_pkt(pkt);
904 }
905
906 static uint64_t
gve_handle_packet_completion(struct gve_priv * priv,struct gve_tx_ring * tx,uint16_t compl_tag)907 gve_handle_packet_completion(struct gve_priv *priv,
908 struct gve_tx_ring *tx, uint16_t compl_tag)
909 {
910 struct gve_tx_pending_pkt_dqo *pending_pkt;
911 int32_t pkt_len;
912
913 if (__predict_false(compl_tag >= tx->dqo.num_pending_pkts)) {
914 device_printf(priv->dev, "Invalid TX completion tag: %d\n",
915 compl_tag);
916 return (0);
917 }
918
919 pending_pkt = &tx->dqo.pending_pkts[compl_tag];
920
921 /* Packet is allocated but not pending data completion. */
922 if (__predict_false(pending_pkt->state !=
923 GVE_PACKET_STATE_PENDING_DATA_COMPL)) {
924 device_printf(priv->dev,
925 "No pending data completion: %d\n", compl_tag);
926 return (0);
927 }
928
929 pkt_len = pending_pkt->mbuf->m_pkthdr.len;
930
931 if (gve_is_qpl(priv))
932 gve_reap_qpl_bufs_dqo(tx, pending_pkt);
933 else
934 gve_unmap_packet(tx, pending_pkt);
935
936 m_freem(pending_pkt->mbuf);
937 pending_pkt->mbuf = NULL;
938 gve_free_pending_packet(tx, pending_pkt);
939 return (pkt_len);
940 }
941
942 int
gve_tx_intr_dqo(void * arg)943 gve_tx_intr_dqo(void *arg)
944 {
945 struct gve_tx_ring *tx = arg;
946 struct gve_priv *priv = tx->com.priv;
947 struct gve_ring_com *com = &tx->com;
948
949 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
950 return (FILTER_STRAY);
951
952 /* Interrupts are automatically masked */
953 taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task);
954 return (FILTER_HANDLED);
955 }
956
957 static void
gve_tx_clear_desc_ring_dqo(struct gve_tx_ring * tx)958 gve_tx_clear_desc_ring_dqo(struct gve_tx_ring *tx)
959 {
960 struct gve_ring_com *com = &tx->com;
961 int i;
962
963 for (i = 0; i < com->priv->tx_desc_cnt; i++)
964 tx->dqo.desc_ring[i] = (union gve_tx_desc_dqo){};
965
966 bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map,
967 BUS_DMASYNC_PREWRITE);
968 }
969
970 static void
gve_tx_clear_compl_ring_dqo(struct gve_tx_ring * tx)971 gve_tx_clear_compl_ring_dqo(struct gve_tx_ring *tx)
972 {
973 struct gve_ring_com *com = &tx->com;
974 int entries;
975 int i;
976
977 entries = com->priv->tx_desc_cnt;
978 for (i = 0; i < entries; i++)
979 tx->dqo.compl_ring[i] = (struct gve_tx_compl_desc_dqo){};
980
981 bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map,
982 BUS_DMASYNC_PREWRITE);
983 }
984
985 void
gve_clear_tx_ring_dqo(struct gve_priv * priv,int i)986 gve_clear_tx_ring_dqo(struct gve_priv *priv, int i)
987 {
988 struct gve_tx_ring *tx = &priv->tx[i];
989 int j;
990
991 tx->dqo.desc_head = 0;
992 tx->dqo.desc_tail = 0;
993 tx->dqo.desc_mask = priv->tx_desc_cnt - 1;
994 tx->dqo.last_re_idx = 0;
995
996 tx->dqo.compl_head = 0;
997 tx->dqo.compl_mask = priv->tx_desc_cnt - 1;
998 atomic_store_32(&tx->dqo.hw_tx_head, 0);
999 tx->dqo.cur_gen_bit = 0;
1000
1001 gve_free_tx_mbufs_dqo(tx);
1002
1003 for (j = 0; j < tx->dqo.num_pending_pkts; j++) {
1004 if (gve_is_qpl(tx->com.priv))
1005 gve_clear_qpl_pending_pkt(&tx->dqo.pending_pkts[j]);
1006 tx->dqo.pending_pkts[j].next =
1007 (j == tx->dqo.num_pending_pkts - 1) ? -1 : j + 1;
1008 tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE;
1009 }
1010 tx->dqo.free_pending_pkts_csm = 0;
1011 atomic_store_rel_32(&tx->dqo.free_pending_pkts_prd, -1);
1012
1013 if (gve_is_qpl(priv)) {
1014 int qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO *
1015 tx->com.qpl->num_pages;
1016
1017 for (j = 0; j < qpl_buf_cnt - 1; j++)
1018 tx->dqo.qpl_bufs[j] = j + 1;
1019 tx->dqo.qpl_bufs[j] = -1;
1020
1021 tx->dqo.free_qpl_bufs_csm = 0;
1022 atomic_store_32(&tx->dqo.free_qpl_bufs_prd, -1);
1023 atomic_store_32(&tx->dqo.qpl_bufs_produced, qpl_buf_cnt);
1024 tx->dqo.qpl_bufs_produced_cached = qpl_buf_cnt;
1025 tx->dqo.qpl_bufs_consumed = 0;
1026 }
1027
1028 gve_tx_clear_desc_ring_dqo(tx);
1029 gve_tx_clear_compl_ring_dqo(tx);
1030 }
1031
1032 static bool
gve_tx_cleanup_dqo(struct gve_priv * priv,struct gve_tx_ring * tx,int budget)1033 gve_tx_cleanup_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, int budget)
1034 {
1035 struct gve_tx_compl_desc_dqo *compl_desc;
1036 uint64_t bytes_done = 0;
1037 uint64_t pkts_done = 0;
1038 uint16_t compl_tag;
1039 int work_done = 0;
1040 uint16_t tx_head;
1041 uint16_t type;
1042
1043 while (work_done < budget) {
1044 bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map,
1045 BUS_DMASYNC_POSTREAD);
1046
1047 compl_desc = &tx->dqo.compl_ring[tx->dqo.compl_head];
1048 if (compl_desc->generation == tx->dqo.cur_gen_bit)
1049 break;
1050
1051 /*
1052 * Prevent generation bit from being read after the rest of the
1053 * descriptor.
1054 */
1055 atomic_thread_fence_acq();
1056 type = compl_desc->type;
1057
1058 if (type == GVE_COMPL_TYPE_DQO_DESC) {
1059 /* This is the last descriptor fetched by HW plus one */
1060 tx_head = le16toh(compl_desc->tx_head);
1061 atomic_store_rel_32(&tx->dqo.hw_tx_head, tx_head);
1062 } else if (type == GVE_COMPL_TYPE_DQO_PKT) {
1063 compl_tag = le16toh(compl_desc->completion_tag);
1064 bytes_done += gve_handle_packet_completion(priv,
1065 tx, compl_tag);
1066 pkts_done++;
1067 }
1068
1069 tx->dqo.compl_head = (tx->dqo.compl_head + 1) &
1070 tx->dqo.compl_mask;
1071 /* Flip the generation bit when we wrap around */
1072 tx->dqo.cur_gen_bit ^= tx->dqo.compl_head == 0;
1073 work_done++;
1074 }
1075
1076 /*
1077 * Waking the xmit taskqueue has to occur after room has been made in
1078 * the queue.
1079 */
1080 atomic_thread_fence_seq_cst();
1081 if (atomic_load_bool(&tx->stopped) && work_done) {
1082 atomic_store_bool(&tx->stopped, false);
1083 taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
1084 }
1085
1086 tx->done += work_done; /* tx->done is just a sysctl counter */
1087 counter_enter();
1088 counter_u64_add_protected(tx->stats.tbytes, bytes_done);
1089 counter_u64_add_protected(tx->stats.tpackets, pkts_done);
1090 counter_exit();
1091
1092 return (work_done == budget);
1093 }
1094
1095 void
gve_tx_cleanup_tq_dqo(void * arg,int pending)1096 gve_tx_cleanup_tq_dqo(void *arg, int pending)
1097 {
1098 struct gve_tx_ring *tx = arg;
1099 struct gve_priv *priv = tx->com.priv;
1100
1101 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
1102 return;
1103
1104 if (gve_tx_cleanup_dqo(priv, tx, /*budget=*/1024)) {
1105 taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task);
1106 return;
1107 }
1108
1109 gve_db_bar_dqo_write_4(priv, tx->com.irq_db_offset,
1110 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
1111 }
1112