1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD: stable/12/sys/dev/ena/ena_datapath.c 372299 2022-07-29 17:10:48Z mw $");
32 
33 #include "opt_rss.h"
34 
35 #include "ena.h"
36 #include "ena_datapath.h"
37 #ifdef DEV_NETMAP
38 #include "ena_netmap.h"
39 #endif /* DEV_NETMAP */
40 #ifdef RSS
41 #include <net/rss_config.h>
42 #endif /* RSS */
43 
44 #include <netinet6/ip6_var.h>
45 
46 /*********************************************************************
47  *  Static functions prototypes
48  *********************************************************************/
49 
50 static int ena_tx_cleanup(struct ena_ring *);
51 static int ena_rx_cleanup(struct ena_ring *);
52 static inline int ena_get_tx_req_id(struct ena_ring *tx_ring,
53     struct ena_com_io_cq *io_cq, uint16_t *req_id);
54 static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
55     struct mbuf *);
56 static struct mbuf *ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
57     struct ena_com_rx_ctx *, uint16_t *);
58 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
59     struct mbuf *);
60 static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *, bool);
61 static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring,
62     struct mbuf **mbuf);
63 static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **);
64 static void ena_start_xmit(struct ena_ring *);
65 
66 /*********************************************************************
67  *  Global functions
68  *********************************************************************/
69 
70 void
ena_cleanup(void * arg,int pending)71 ena_cleanup(void *arg, int pending)
72 {
73 	struct ena_que *que = arg;
74 	struct ena_adapter *adapter = que->adapter;
75 	if_t ifp = adapter->ifp;
76 	struct ena_ring *tx_ring;
77 	struct ena_ring *rx_ring;
78 	struct ena_com_io_cq *io_cq;
79 	struct ena_eth_io_intr_reg intr_reg;
80 	int qid, ena_qid;
81 	int txc, rxc, i;
82 
83 	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
84 		return;
85 
86 	ena_log_io(adapter->pdev, DBG, "MSI-X TX/RX routine\n");
87 
88 	tx_ring = que->tx_ring;
89 	rx_ring = que->rx_ring;
90 	qid = que->id;
91 	ena_qid = ENA_IO_TXQ_IDX(qid);
92 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
93 
94 	atomic_store_8(&tx_ring->first_interrupt, 1);
95 	atomic_store_8(&rx_ring->first_interrupt, 1);
96 
97 	for (i = 0; i < ENA_CLEAN_BUDGET; ++i) {
98 		rxc = ena_rx_cleanup(rx_ring);
99 		txc = ena_tx_cleanup(tx_ring);
100 
101 		if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
102 			return;
103 
104 		if ((txc != ENA_TX_BUDGET) && (rxc != ENA_RX_BUDGET))
105 			break;
106 	}
107 
108 	/* Signal that work is done and unmask interrupt */
109 	ena_com_update_intr_reg(&intr_reg, ENA_RX_IRQ_INTERVAL,
110 	    ENA_TX_IRQ_INTERVAL, true);
111 	counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1);
112 	ena_com_unmask_intr(io_cq, &intr_reg);
113 }
114 
115 void
ena_deferred_mq_start(void * arg,int pending)116 ena_deferred_mq_start(void *arg, int pending)
117 {
118 	struct ena_ring *tx_ring = (struct ena_ring *)arg;
119 	struct ifnet *ifp = tx_ring->adapter->ifp;
120 
121 	while (!drbr_empty(ifp, tx_ring->br) && tx_ring->running &&
122 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
123 		ENA_RING_MTX_LOCK(tx_ring);
124 		ena_start_xmit(tx_ring);
125 		ENA_RING_MTX_UNLOCK(tx_ring);
126 	}
127 }
128 
129 int
ena_mq_start(if_t ifp,struct mbuf * m)130 ena_mq_start(if_t ifp, struct mbuf *m)
131 {
132 	struct ena_adapter *adapter = ifp->if_softc;
133 	struct ena_ring *tx_ring;
134 	int ret, is_drbr_empty;
135 	uint32_t i;
136 #ifdef RSS
137 	uint32_t bucket_id;
138 #endif
139 
140 	if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
141 		return (ENODEV);
142 
143 	/* Which queue to use */
144 	/*
145 	 * If everything is setup correctly, it should be the
146 	 * same bucket that the current CPU we're on is.
147 	 * It should improve performance.
148 	 */
149 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
150 #ifdef RSS
151 		if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
152 		    &bucket_id) == 0)
153 			i = bucket_id % adapter->num_io_queues;
154 		else
155 #endif
156 			i = m->m_pkthdr.flowid % adapter->num_io_queues;
157 	} else {
158 		i = curcpu % adapter->num_io_queues;
159 	}
160 	tx_ring = &adapter->tx_ring[i];
161 
162 	/* Check if drbr is empty before putting packet */
163 	is_drbr_empty = drbr_empty(ifp, tx_ring->br);
164 	ret = drbr_enqueue(ifp, tx_ring->br, m);
165 	if (unlikely(ret != 0)) {
166 		taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
167 		return (ret);
168 	}
169 
170 	if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) {
171 		ena_start_xmit(tx_ring);
172 		ENA_RING_MTX_UNLOCK(tx_ring);
173 	} else {
174 		taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
175 	}
176 
177 	return (0);
178 }
179 
180 void
ena_qflush(if_t ifp)181 ena_qflush(if_t ifp)
182 {
183 	struct ena_adapter *adapter = ifp->if_softc;
184 	struct ena_ring *tx_ring = adapter->tx_ring;
185 	int i;
186 
187 	for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring)
188 		if (!drbr_empty(ifp, tx_ring->br)) {
189 			ENA_RING_MTX_LOCK(tx_ring);
190 			drbr_flush(ifp, tx_ring->br);
191 			ENA_RING_MTX_UNLOCK(tx_ring);
192 		}
193 
194 	if_qflush(ifp);
195 }
196 
197 /*********************************************************************
198  *  Static functions
199  *********************************************************************/
200 
201 static inline int
ena_get_tx_req_id(struct ena_ring * tx_ring,struct ena_com_io_cq * io_cq,uint16_t * req_id)202 ena_get_tx_req_id(struct ena_ring *tx_ring, struct ena_com_io_cq *io_cq,
203     uint16_t *req_id)
204 {
205 	struct ena_adapter *adapter = tx_ring->adapter;
206 	int rc;
207 
208 	rc = ena_com_tx_comp_req_id_get(io_cq, req_id);
209 	if (rc == ENA_COM_TRY_AGAIN)
210 		return (EAGAIN);
211 
212 	if (unlikely(rc != 0)) {
213 		ena_log(adapter->pdev, ERR, "Invalid req_id %hu in qid %hu\n",
214 		    *req_id, tx_ring->qid);
215 		counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
216 		goto err;
217 	}
218 
219 	if (tx_ring->tx_buffer_info[*req_id].mbuf != NULL)
220 		return (0);
221 
222 	ena_log(adapter->pdev, ERR,
223 	    "tx_info doesn't have valid mbuf. req_id %hu qid %hu\n",
224 	    *req_id, tx_ring->qid);
225 err:
226 	ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
227 
228 	return (EFAULT);
229 }
230 
231 /**
232  * ena_tx_cleanup - clear sent packets and corresponding descriptors
233  * @tx_ring: ring for which we want to clean packets
234  *
235  * Once packets are sent, we ask the device in a loop for no longer used
236  * descriptors. We find the related mbuf chain in a map (index in an array)
237  * and free it, then update ring state.
238  * This is performed in "endless" loop, updating ring pointers every
239  * TX_COMMIT. The first check of free descriptor is performed before the actual
240  * loop, then repeated at the loop end.
241  **/
242 static int
ena_tx_cleanup(struct ena_ring * tx_ring)243 ena_tx_cleanup(struct ena_ring *tx_ring)
244 {
245 	struct ena_adapter *adapter;
246 	struct ena_com_io_cq *io_cq;
247 	uint16_t next_to_clean;
248 	uint16_t req_id;
249 	uint16_t ena_qid;
250 	unsigned int total_done = 0;
251 	int rc;
252 	int commit = ENA_TX_COMMIT;
253 	int budget = ENA_TX_BUDGET;
254 	int work_done;
255 	bool above_thresh;
256 
257 	adapter = tx_ring->que->adapter;
258 	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
259 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
260 	next_to_clean = tx_ring->next_to_clean;
261 
262 #ifdef DEV_NETMAP
263 	if (netmap_tx_irq(adapter->ifp, tx_ring->qid) != NM_IRQ_PASS)
264 		return (0);
265 #endif /* DEV_NETMAP */
266 
267 	do {
268 		struct ena_tx_buffer *tx_info;
269 		struct mbuf *mbuf;
270 
271 		rc = ena_get_tx_req_id(tx_ring, io_cq, &req_id);
272 		if (unlikely(rc != 0))
273 			break;
274 
275 		tx_info = &tx_ring->tx_buffer_info[req_id];
276 
277 		mbuf = tx_info->mbuf;
278 
279 		tx_info->mbuf = NULL;
280 		bintime_clear(&tx_info->timestamp);
281 
282 		bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
283 		    BUS_DMASYNC_POSTWRITE);
284 		bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
285 
286 		ena_log_io(adapter->pdev, DBG, "tx: q %d mbuf %p completed\n",
287 		    tx_ring->qid, mbuf);
288 
289 		m_freem(mbuf);
290 
291 		total_done += tx_info->tx_descs;
292 
293 		tx_ring->free_tx_ids[next_to_clean] = req_id;
294 		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
295 		    tx_ring->ring_size);
296 
297 		if (unlikely(--commit == 0)) {
298 			commit = ENA_TX_COMMIT;
299 			/* update ring state every ENA_TX_COMMIT descriptor */
300 			tx_ring->next_to_clean = next_to_clean;
301 			ena_com_comp_ack(
302 			    &adapter->ena_dev->io_sq_queues[ena_qid],
303 			    total_done);
304 			ena_com_update_dev_comp_head(io_cq);
305 			total_done = 0;
306 		}
307 	} while (likely(--budget));
308 
309 	work_done = ENA_TX_BUDGET - budget;
310 
311 	ena_log_io(adapter->pdev, DBG, "tx: q %d done. total pkts: %d\n",
312 	    tx_ring->qid, work_done);
313 
314 	/* If there is still something to commit update ring state */
315 	if (likely(commit != ENA_TX_COMMIT)) {
316 		tx_ring->next_to_clean = next_to_clean;
317 		ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid],
318 		    total_done);
319 		ena_com_update_dev_comp_head(io_cq);
320 	}
321 
322 	/*
323 	 * Need to make the rings circular update visible to
324 	 * ena_xmit_mbuf() before checking for tx_ring->running.
325 	 */
326 	mb();
327 
328 	above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
329 	    ENA_TX_RESUME_THRESH);
330 	if (unlikely(!tx_ring->running && above_thresh)) {
331 		ENA_RING_MTX_LOCK(tx_ring);
332 		above_thresh = ena_com_sq_have_enough_space(
333 		    tx_ring->ena_com_io_sq, ENA_TX_RESUME_THRESH);
334 		if (!tx_ring->running && above_thresh) {
335 			tx_ring->running = true;
336 			counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
337 			taskqueue_enqueue(tx_ring->enqueue_tq,
338 			    &tx_ring->enqueue_task);
339 		}
340 		ENA_RING_MTX_UNLOCK(tx_ring);
341 	}
342 
343 	tx_ring->tx_last_cleanup_ticks = ticks;
344 
345 	return (work_done);
346 }
347 
348 static void
ena_rx_hash_mbuf(struct ena_ring * rx_ring,struct ena_com_rx_ctx * ena_rx_ctx,struct mbuf * mbuf)349 ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
350     struct mbuf *mbuf)
351 {
352 	struct ena_adapter *adapter = rx_ring->adapter;
353 
354 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
355 		mbuf->m_pkthdr.flowid = ena_rx_ctx->hash;
356 
357 #ifdef RSS
358 		/*
359 		 * Hardware and software RSS are in agreement only when both are
360 		 * configured to Toeplitz algorithm.  This driver configures
361 		 * that algorithm only when software RSS is enabled and uses it.
362 		 */
363 		if (adapter->ena_dev->rss.hash_func != ENA_ADMIN_TOEPLITZ &&
364 		    ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN) {
365 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
366 			return;
367 		}
368 #endif
369 
370 		if (ena_rx_ctx->frag &&
371 		    (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) {
372 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
373 			return;
374 		}
375 
376 		switch (ena_rx_ctx->l3_proto) {
377 		case ENA_ETH_IO_L3_PROTO_IPV4:
378 			switch (ena_rx_ctx->l4_proto) {
379 			case ENA_ETH_IO_L4_PROTO_TCP:
380 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
381 				break;
382 			case ENA_ETH_IO_L4_PROTO_UDP:
383 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
384 				break;
385 			default:
386 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
387 			}
388 			break;
389 		case ENA_ETH_IO_L3_PROTO_IPV6:
390 			switch (ena_rx_ctx->l4_proto) {
391 			case ENA_ETH_IO_L4_PROTO_TCP:
392 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
393 				break;
394 			case ENA_ETH_IO_L4_PROTO_UDP:
395 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
396 				break;
397 			default:
398 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
399 			}
400 			break;
401 		case ENA_ETH_IO_L3_PROTO_UNKNOWN:
402 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
403 			break;
404 		default:
405 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
406 		}
407 	} else {
408 		mbuf->m_pkthdr.flowid = rx_ring->qid;
409 		M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
410 	}
411 }
412 
413 /**
414  * ena_rx_mbuf - assemble mbuf from descriptors
415  * @rx_ring: ring for which we want to clean packets
416  * @ena_bufs: buffer info
417  * @ena_rx_ctx: metadata for this packet(s)
418  * @next_to_clean: ring pointer, will be updated only upon success
419  *
420  **/
421 static struct mbuf *
ena_rx_mbuf(struct ena_ring * rx_ring,struct ena_com_rx_buf_info * ena_bufs,struct ena_com_rx_ctx * ena_rx_ctx,uint16_t * next_to_clean)422 ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
423     struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean)
424 {
425 	struct mbuf *mbuf;
426 	struct ena_rx_buffer *rx_info;
427 	struct ena_adapter *adapter;
428 	device_t pdev;
429 	unsigned int descs = ena_rx_ctx->descs;
430 	uint16_t ntc, len, req_id, buf = 0;
431 
432 	ntc = *next_to_clean;
433 	adapter = rx_ring->adapter;
434 	pdev = adapter->pdev;
435 
436 	len = ena_bufs[buf].len;
437 	req_id = ena_bufs[buf].req_id;
438 	rx_info = &rx_ring->rx_buffer_info[req_id];
439 	if (unlikely(rx_info->mbuf == NULL)) {
440 		ena_log(pdev, ERR, "NULL mbuf in rx_info");
441 		return (NULL);
442 	}
443 
444 	ena_log_io(pdev, DBG, "rx_info %p, mbuf %p, paddr %jx\n", rx_info,
445 	    rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr);
446 
447 	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
448 	    BUS_DMASYNC_POSTREAD);
449 	mbuf = rx_info->mbuf;
450 	mbuf->m_flags |= M_PKTHDR;
451 	mbuf->m_pkthdr.len = len;
452 	mbuf->m_len = len;
453 	/* Only for the first segment the data starts at specific offset */
454 	mbuf->m_data = mtodo(mbuf, ena_rx_ctx->pkt_offset);
455 	ena_log_io(pdev, DBG, "Mbuf data offset=%u\n", ena_rx_ctx->pkt_offset);
456 	mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp;
457 
458 	/* Fill mbuf with hash key and it's interpretation for optimization */
459 	ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf);
460 
461 	ena_log_io(pdev, DBG, "rx mbuf 0x%p, flags=0x%x, len: %d\n", mbuf,
462 	    mbuf->m_flags, mbuf->m_pkthdr.len);
463 
464 	/* DMA address is not needed anymore, unmap it */
465 	bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
466 
467 	rx_info->mbuf = NULL;
468 	rx_ring->free_rx_ids[ntc] = req_id;
469 	ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
470 
471 	/*
472 	 * While we have more than 1 descriptors for one rcvd packet, append
473 	 * other mbufs to the main one
474 	 */
475 	while (--descs) {
476 		++buf;
477 		len = ena_bufs[buf].len;
478 		req_id = ena_bufs[buf].req_id;
479 		rx_info = &rx_ring->rx_buffer_info[req_id];
480 
481 		if (unlikely(rx_info->mbuf == NULL)) {
482 			ena_log(pdev, ERR, "NULL mbuf in rx_info");
483 			/*
484 			 * If one of the required mbufs was not allocated yet,
485 			 * we can break there.
486 			 * All earlier used descriptors will be reallocated
487 			 * later and not used mbufs can be reused.
488 			 * The next_to_clean pointer will not be updated in case
489 			 * of an error, so caller should advance it manually
490 			 * in error handling routine to keep it up to date
491 			 * with hw ring.
492 			 */
493 			m_freem(mbuf);
494 			return (NULL);
495 		}
496 
497 		bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
498 		    BUS_DMASYNC_POSTREAD);
499 		if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) {
500 			counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
501 			ena_log_io(pdev, WARN, "Failed to append Rx mbuf %p\n",
502 			    mbuf);
503 		}
504 
505 		ena_log_io(pdev, DBG, "rx mbuf updated. len %d\n",
506 		    mbuf->m_pkthdr.len);
507 
508 		/* Free already appended mbuf, it won't be useful anymore */
509 		bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
510 		m_freem(rx_info->mbuf);
511 		rx_info->mbuf = NULL;
512 
513 		rx_ring->free_rx_ids[ntc] = req_id;
514 		ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
515 	}
516 
517 	*next_to_clean = ntc;
518 
519 	return (mbuf);
520 }
521 
522 /**
523  * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum
524  **/
525 static inline void
ena_rx_checksum(struct ena_ring * rx_ring,struct ena_com_rx_ctx * ena_rx_ctx,struct mbuf * mbuf)526 ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
527     struct mbuf *mbuf)
528 {
529 	device_t pdev = rx_ring->adapter->pdev;
530 
531 	/* if IP and error */
532 	if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
533 	    ena_rx_ctx->l3_csum_err)) {
534 		/* ipv4 checksum error */
535 		mbuf->m_pkthdr.csum_flags = 0;
536 		counter_u64_add(rx_ring->rx_stats.csum_bad, 1);
537 		ena_log_io(pdev, DBG, "RX IPv4 header checksum error\n");
538 		return;
539 	}
540 
541 	/* if TCP/UDP */
542 	if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
543 	    (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
544 		if (ena_rx_ctx->l4_csum_err) {
545 			/* TCP/UDP checksum error */
546 			mbuf->m_pkthdr.csum_flags = 0;
547 			counter_u64_add(rx_ring->rx_stats.csum_bad, 1);
548 			ena_log_io(pdev, DBG, "RX L4 checksum error\n");
549 		} else {
550 			mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
551 			mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID;
552 			counter_u64_add(rx_ring->rx_stats.csum_good, 1);
553 		}
554 	}
555 }
556 
557 /**
558  * ena_rx_cleanup - handle rx irq
559  * @arg: ring for which irq is being handled
560  **/
561 static int
ena_rx_cleanup(struct ena_ring * rx_ring)562 ena_rx_cleanup(struct ena_ring *rx_ring)
563 {
564 	struct ena_adapter *adapter;
565 	device_t pdev;
566 	struct mbuf *mbuf;
567 	struct ena_com_rx_ctx ena_rx_ctx;
568 	struct ena_com_io_cq *io_cq;
569 	struct ena_com_io_sq *io_sq;
570 	enum ena_regs_reset_reason_types reset_reason;
571 	if_t ifp;
572 	uint16_t ena_qid;
573 	uint16_t next_to_clean;
574 	uint32_t refill_required;
575 	uint32_t refill_threshold;
576 	uint32_t do_if_input = 0;
577 	unsigned int qid;
578 	int rc, i;
579 	int budget = ENA_RX_BUDGET;
580 #ifdef DEV_NETMAP
581 	int done;
582 #endif /* DEV_NETMAP */
583 
584 	adapter = rx_ring->que->adapter;
585 	pdev = adapter->pdev;
586 	ifp = adapter->ifp;
587 	qid = rx_ring->que->id;
588 	ena_qid = ENA_IO_RXQ_IDX(qid);
589 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
590 	io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
591 	next_to_clean = rx_ring->next_to_clean;
592 
593 #ifdef DEV_NETMAP
594 	if (netmap_rx_irq(adapter->ifp, rx_ring->qid, &done) != NM_IRQ_PASS)
595 		return (0);
596 #endif /* DEV_NETMAP */
597 
598 	ena_log_io(pdev, DBG, "rx: qid %d\n", qid);
599 
600 	do {
601 		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
602 		ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size;
603 		ena_rx_ctx.descs = 0;
604 		ena_rx_ctx.pkt_offset = 0;
605 
606 		bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
607 		    io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD);
608 		rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx);
609 		if (unlikely(rc != 0)) {
610 			if (rc == ENA_COM_NO_SPACE) {
611 				counter_u64_add(rx_ring->rx_stats.bad_desc_num,
612 				    1);
613 				reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
614 			} else {
615 				counter_u64_add(rx_ring->rx_stats.bad_req_id,
616 				    1);
617 				reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
618 			}
619 			ena_trigger_reset(adapter, reset_reason);
620 			return (0);
621 		}
622 
623 		if (unlikely(ena_rx_ctx.descs == 0))
624 			break;
625 
626 		ena_log_io(pdev, DBG,
627 		    "rx: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
628 		    rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
629 		    ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
630 
631 		/* Receive mbuf from the ring */
632 		mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs, &ena_rx_ctx,
633 		    &next_to_clean);
634 		bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
635 		    io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD);
636 		/* Exit if we failed to retrieve a buffer */
637 		if (unlikely(mbuf == NULL)) {
638 			for (i = 0; i < ena_rx_ctx.descs; ++i) {
639 				rx_ring->free_rx_ids[next_to_clean] =
640 				    rx_ring->ena_bufs[i].req_id;
641 				next_to_clean = ENA_RX_RING_IDX_NEXT(
642 				    next_to_clean, rx_ring->ring_size);
643 			}
644 			break;
645 		}
646 
647 		if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) ||
648 		    ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) {
649 			ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf);
650 		}
651 
652 		counter_enter();
653 		counter_u64_add_protected(rx_ring->rx_stats.bytes,
654 		    mbuf->m_pkthdr.len);
655 		counter_u64_add_protected(adapter->hw_stats.rx_bytes,
656 		    mbuf->m_pkthdr.len);
657 		counter_exit();
658 		/*
659 		 * LRO is only for IP/TCP packets and TCP checksum of the packet
660 		 * should be computed by hardware.
661 		 */
662 		do_if_input = 1;
663 		if (((ifp->if_capenable & IFCAP_LRO) != 0) &&
664 		    ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) &&
665 		    (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) {
666 			/*
667 			 * Send to the stack if:
668 			 *  - LRO not enabled, or
669 			 *  - no LRO resources, or
670 			 *  - lro enqueue fails
671 			 */
672 			if ((rx_ring->lro.lro_cnt != 0) &&
673 			    (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0))
674 				do_if_input = 0;
675 		}
676 		if (do_if_input != 0) {
677 			ena_log_io(pdev, DBG,
678 			    "calling if_input() with mbuf %p\n", mbuf);
679 			(*ifp->if_input)(ifp, mbuf);
680 		}
681 
682 		counter_enter();
683 		counter_u64_add_protected(rx_ring->rx_stats.cnt, 1);
684 		counter_u64_add_protected(adapter->hw_stats.rx_packets, 1);
685 		counter_exit();
686 	} while (--budget);
687 
688 	rx_ring->next_to_clean = next_to_clean;
689 
690 	refill_required = ena_com_free_q_entries(io_sq);
691 	refill_threshold = min_t(int,
692 	    rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
693 	    ENA_RX_REFILL_THRESH_PACKET);
694 
695 	if (refill_required > refill_threshold) {
696 		ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
697 		ena_refill_rx_bufs(rx_ring, refill_required);
698 	}
699 
700 	tcp_lro_flush_all(&rx_ring->lro);
701 
702 	return (ENA_RX_BUDGET - budget);
703 }
704 
705 static void
ena_tx_csum(struct ena_com_tx_ctx * ena_tx_ctx,struct mbuf * mbuf,bool disable_meta_caching)706 ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf,
707     bool disable_meta_caching)
708 {
709 	struct ena_com_tx_meta *ena_meta;
710 	struct ether_vlan_header *eh;
711 	struct mbuf *mbuf_next;
712 	u32 mss;
713 	bool offload;
714 	uint16_t etype;
715 	int ehdrlen;
716 	struct ip *ip;
717 	int ipproto;
718 	int iphlen;
719 	struct tcphdr *th;
720 	int offset;
721 
722 	offload = false;
723 	ena_meta = &ena_tx_ctx->ena_meta;
724 	mss = mbuf->m_pkthdr.tso_segsz;
725 
726 	if (mss != 0)
727 		offload = true;
728 
729 	if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0)
730 		offload = true;
731 
732 	if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
733 		offload = true;
734 
735 	if ((mbuf->m_pkthdr.csum_flags & CSUM6_OFFLOAD) != 0)
736 		offload = true;
737 
738 	if (!offload) {
739 		if (disable_meta_caching) {
740 			memset(ena_meta, 0, sizeof(*ena_meta));
741 			ena_tx_ctx->meta_valid = 1;
742 		} else {
743 			ena_tx_ctx->meta_valid = 0;
744 		}
745 		return;
746 	}
747 
748 	/* Determine where frame payload starts. */
749 	eh = mtod(mbuf, struct ether_vlan_header *);
750 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
751 		etype = ntohs(eh->evl_proto);
752 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
753 	} else {
754 		etype = ntohs(eh->evl_encap_proto);
755 		ehdrlen = ETHER_HDR_LEN;
756 	}
757 
758 	mbuf_next = m_getptr(mbuf, ehdrlen, &offset);
759 
760 	switch (etype) {
761 	case ETHERTYPE_IP:
762 		ip = (struct ip *)(mtodo(mbuf_next, offset));
763 		iphlen = ip->ip_hl << 2;
764 		ipproto = ip->ip_p;
765 		ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
766 		if ((ip->ip_off & htons(IP_DF)) != 0)
767 			ena_tx_ctx->df = 1;
768 		break;
769 	case ETHERTYPE_IPV6:
770 		ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
771 		iphlen = ip6_lasthdr(mbuf, ehdrlen, IPPROTO_IPV6, &ipproto);
772 		iphlen -= ehdrlen;
773 		ena_tx_ctx->df = 1;
774 		break;
775 	default:
776 		iphlen = 0;
777 		ipproto = 0;
778 		break;
779 	}
780 
781 	mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset);
782 	th = (struct tcphdr *)(mtodo(mbuf_next, offset));
783 
784 	if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) {
785 		ena_tx_ctx->l3_csum_enable = 1;
786 	}
787 	if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
788 		ena_tx_ctx->tso_enable = 1;
789 		ena_meta->l4_hdr_len = (th->th_off);
790 	}
791 
792 	if (ipproto == IPPROTO_TCP) {
793 		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
794 		if ((mbuf->m_pkthdr.csum_flags &
795 		    (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0)
796 			ena_tx_ctx->l4_csum_enable = 1;
797 		else
798 			ena_tx_ctx->l4_csum_enable = 0;
799 	} else if (ipproto == IPPROTO_UDP) {
800 		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
801 		if ((mbuf->m_pkthdr.csum_flags &
802 		    (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0)
803 			ena_tx_ctx->l4_csum_enable = 1;
804 		else
805 			ena_tx_ctx->l4_csum_enable = 0;
806 	} else {
807 		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
808 		ena_tx_ctx->l4_csum_enable = 0;
809 	}
810 
811 	ena_meta->mss = mss;
812 	ena_meta->l3_hdr_len = iphlen;
813 	ena_meta->l3_hdr_offset = ehdrlen;
814 	ena_tx_ctx->meta_valid = 1;
815 }
816 
817 static int
ena_check_and_collapse_mbuf(struct ena_ring * tx_ring,struct mbuf ** mbuf)818 ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
819 {
820 	struct ena_adapter *adapter;
821 	struct mbuf *collapsed_mbuf;
822 	int num_frags;
823 
824 	adapter = tx_ring->adapter;
825 	num_frags = ena_mbuf_count(*mbuf);
826 
827 	/* One segment must be reserved for configuration descriptor. */
828 	if (num_frags < adapter->max_tx_sgl_size)
829 		return (0);
830 
831 	if ((num_frags == adapter->max_tx_sgl_size) &&
832 	    ((*mbuf)->m_pkthdr.len < tx_ring->tx_max_header_size))
833 		return (0);
834 
835 	counter_u64_add(tx_ring->tx_stats.collapse, 1);
836 
837 	collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT,
838 	    adapter->max_tx_sgl_size - 1);
839 	if (unlikely(collapsed_mbuf == NULL)) {
840 		counter_u64_add(tx_ring->tx_stats.collapse_err, 1);
841 		return (ENOMEM);
842 	}
843 
844 	/* If mbuf was collapsed succesfully, original mbuf is released. */
845 	*mbuf = collapsed_mbuf;
846 
847 	return (0);
848 }
849 
850 static int
ena_tx_map_mbuf(struct ena_ring * tx_ring,struct ena_tx_buffer * tx_info,struct mbuf * mbuf,void ** push_hdr,u16 * header_len)851 ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info,
852     struct mbuf *mbuf, void **push_hdr, u16 *header_len)
853 {
854 	struct ena_adapter *adapter = tx_ring->adapter;
855 	struct ena_com_buf *ena_buf;
856 	bus_dma_segment_t segs[ENA_BUS_DMA_SEGS];
857 	size_t iseg = 0;
858 	uint32_t mbuf_head_len;
859 	uint16_t offset;
860 	int rc, nsegs;
861 
862 	mbuf_head_len = mbuf->m_len;
863 	tx_info->mbuf = mbuf;
864 	ena_buf = tx_info->bufs;
865 
866 	/*
867 	 * For easier maintaining of the DMA map, map the whole mbuf even if
868 	 * the LLQ is used. The descriptors will be filled using the segments.
869 	 */
870 	rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag,
871 	    tx_info->dmamap, mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
872 	if (unlikely((rc != 0) || (nsegs == 0))) {
873 		ena_log_io(adapter->pdev, WARN,
874 		    "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs);
875 		goto dma_error;
876 	}
877 
878 	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
879 		/*
880 		 * When the device is LLQ mode, the driver will copy
881 		 * the header into the device memory space.
882 		 * the ena_com layer assumes the header is in a linear
883 		 * memory space.
884 		 * This assumption might be wrong since part of the header
885 		 * can be in the fragmented buffers.
886 		 * First check if header fits in the mbuf. If not, copy it to
887 		 * separate buffer that will be holding linearized data.
888 		 */
889 		*header_len = min_t(uint32_t, mbuf->m_pkthdr.len,
890 		    tx_ring->tx_max_header_size);
891 
892 		/* If header is in linear space, just point into mbuf's data. */
893 		if (likely(*header_len <= mbuf_head_len)) {
894 			*push_hdr = mbuf->m_data;
895 		/*
896 		 * Otherwise, copy whole portion of header from multiple
897 		 * mbufs to intermediate buffer.
898 		 */
899 		} else {
900 			m_copydata(mbuf, 0, *header_len,
901 			    tx_ring->push_buf_intermediate_buf);
902 			*push_hdr = tx_ring->push_buf_intermediate_buf;
903 
904 			counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1);
905 		}
906 
907 		ena_log_io(adapter->pdev, DBG,
908 		    "mbuf: %p header_buf->vaddr: %p push_len: %d\n",
909 		    mbuf, *push_hdr, *header_len);
910 
911 		/* If packet is fitted in LLQ header, no need for DMA segments. */
912 		if (mbuf->m_pkthdr.len <= tx_ring->tx_max_header_size) {
913 			return (0);
914 		} else {
915 			offset = tx_ring->tx_max_header_size;
916 			/*
917 			 * As Header part is mapped to LLQ header, we can skip
918 			 * it and just map the residuum of the mbuf to DMA
919 			 * Segments.
920 			 */
921 			while (offset > 0) {
922 				if (offset >= segs[iseg].ds_len) {
923 					offset -= segs[iseg].ds_len;
924 				} else {
925 					ena_buf->paddr = segs[iseg].ds_addr +
926 					    offset;
927 					ena_buf->len = segs[iseg].ds_len -
928 					    offset;
929 					ena_buf++;
930 					tx_info->num_of_bufs++;
931 					offset = 0;
932 				}
933 				iseg++;
934 			}
935 		}
936 	} else {
937 		*push_hdr = NULL;
938 		/*
939 		 * header_len is just a hint for the device. Because FreeBSD is
940 		 * not giving us information about packet header length and it
941 		 * is not guaranteed that all packet headers will be in the 1st
942 		 * mbuf, setting header_len to 0 is making the device ignore
943 		 * this value and resolve header on it's own.
944 		 */
945 		*header_len = 0;
946 	}
947 
948 	/* Map rest of the mbuf */
949 	while (iseg < nsegs) {
950 		ena_buf->paddr = segs[iseg].ds_addr;
951 		ena_buf->len = segs[iseg].ds_len;
952 		ena_buf++;
953 		iseg++;
954 		tx_info->num_of_bufs++;
955 	}
956 
957 	return (0);
958 
959 dma_error:
960 	counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1);
961 	tx_info->mbuf = NULL;
962 	return (rc);
963 }
964 
965 static int
ena_xmit_mbuf(struct ena_ring * tx_ring,struct mbuf ** mbuf)966 ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
967 {
968 	struct ena_adapter *adapter;
969 	device_t pdev;
970 	struct ena_tx_buffer *tx_info;
971 	struct ena_com_tx_ctx ena_tx_ctx;
972 	struct ena_com_dev *ena_dev;
973 	struct ena_com_io_sq *io_sq;
974 	void *push_hdr;
975 	uint16_t next_to_use;
976 	uint16_t req_id;
977 	uint16_t ena_qid;
978 	uint16_t header_len;
979 	int rc;
980 	int nb_hw_desc;
981 
982 	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
983 	adapter = tx_ring->que->adapter;
984 	pdev = adapter->pdev;
985 	ena_dev = adapter->ena_dev;
986 	io_sq = &ena_dev->io_sq_queues[ena_qid];
987 
988 	rc = ena_check_and_collapse_mbuf(tx_ring, mbuf);
989 	if (unlikely(rc != 0)) {
990 		ena_log_io(pdev, WARN, "Failed to collapse mbuf! err: %d\n",
991 		    rc);
992 		return (rc);
993 	}
994 
995 	ena_log_io(pdev, DBG, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len);
996 
997 	next_to_use = tx_ring->next_to_use;
998 	req_id = tx_ring->free_tx_ids[next_to_use];
999 	tx_info = &tx_ring->tx_buffer_info[req_id];
1000 	tx_info->num_of_bufs = 0;
1001 
1002 	ENA_WARN(tx_info->mbuf != NULL, adapter->ena_dev,
1003 	    "mbuf isn't NULL for req_id %d\n", req_id);
1004 
1005 	rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len);
1006 	if (unlikely(rc != 0)) {
1007 		ena_log_io(pdev, WARN, "Failed to map TX mbuf\n");
1008 		return (rc);
1009 	}
1010 	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
1011 	ena_tx_ctx.ena_bufs = tx_info->bufs;
1012 	ena_tx_ctx.push_header = push_hdr;
1013 	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
1014 	ena_tx_ctx.req_id = req_id;
1015 	ena_tx_ctx.header_len = header_len;
1016 
1017 	/* Set flags and meta data */
1018 	ena_tx_csum(&ena_tx_ctx, *mbuf, adapter->disable_meta_caching);
1019 
1020 	if (tx_ring->acum_pkts == ENA_DB_THRESHOLD ||
1021 	    ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) {
1022 		ena_log_io(pdev, DBG,
1023 		    "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
1024 		    tx_ring->que->id);
1025 		ena_ring_tx_doorbell(tx_ring);
1026 	}
1027 
1028 	/* Prepare the packet's descriptors and send them to device */
1029 	rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc);
1030 	if (unlikely(rc != 0)) {
1031 		if (likely(rc == ENA_COM_NO_MEM)) {
1032 			ena_log_io(pdev, DBG, "tx ring[%d] is out of space\n",
1033 			    tx_ring->que->id);
1034 		} else {
1035 			ena_log(pdev, ERR, "failed to prepare tx bufs\n");
1036 			ena_trigger_reset(adapter,
1037 			    ENA_REGS_RESET_DRIVER_INVALID_STATE);
1038 		}
1039 		counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1);
1040 		goto dma_error;
1041 	}
1042 
1043 	counter_enter();
1044 	counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
1045 	counter_u64_add_protected(tx_ring->tx_stats.bytes,
1046 	    (*mbuf)->m_pkthdr.len);
1047 
1048 	counter_u64_add_protected(adapter->hw_stats.tx_packets, 1);
1049 	counter_u64_add_protected(adapter->hw_stats.tx_bytes,
1050 	    (*mbuf)->m_pkthdr.len);
1051 	counter_exit();
1052 
1053 	tx_info->tx_descs = nb_hw_desc;
1054 	getbinuptime(&tx_info->timestamp);
1055 	tx_info->print_once = true;
1056 
1057 	tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
1058 	    tx_ring->ring_size);
1059 
1060 	/* stop the queue when no more space available, the packet can have up
1061 	 * to sgl_size + 2. one for the meta descriptor and one for header
1062 	 * (if the header is larger than tx_max_header_size).
1063 	 */
1064 	if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1065 	    adapter->max_tx_sgl_size + 2))) {
1066 		ena_log_io(pdev, DBG, "Stop queue %d\n", tx_ring->que->id);
1067 
1068 		tx_ring->running = false;
1069 		counter_u64_add(tx_ring->tx_stats.queue_stop, 1);
1070 
1071 		/* There is a rare condition where this function decides to
1072 		 * stop the queue but meanwhile tx_cleanup() updates
1073 		 * next_to_completion and terminates.
1074 		 * The queue will remain stopped forever.
1075 		 * To solve this issue this function performs mb(), checks
1076 		 * the wakeup condition and wakes up the queue if needed.
1077 		 */
1078 		mb();
1079 
1080 		if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1081 		    ENA_TX_RESUME_THRESH)) {
1082 			tx_ring->running = true;
1083 			counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
1084 		}
1085 	}
1086 
1087 	bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
1088 	    BUS_DMASYNC_PREWRITE);
1089 
1090 	return (0);
1091 
1092 dma_error:
1093 	tx_info->mbuf = NULL;
1094 	bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
1095 
1096 	return (rc);
1097 }
1098 
1099 static void
ena_start_xmit(struct ena_ring * tx_ring)1100 ena_start_xmit(struct ena_ring *tx_ring)
1101 {
1102 	struct mbuf *mbuf;
1103 	struct ena_adapter *adapter = tx_ring->adapter;
1104 	int ret = 0;
1105 
1106 	ENA_RING_MTX_ASSERT(tx_ring);
1107 
1108 	if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
1109 		return;
1110 
1111 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)))
1112 		return;
1113 
1114 	while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) {
1115 		ena_log_io(adapter->pdev, DBG,
1116 		    "\ndequeued mbuf %p with flags %#x and header csum flags %#jx\n",
1117 		    mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags);
1118 
1119 		if (unlikely(!tx_ring->running)) {
1120 			drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1121 			break;
1122 		}
1123 
1124 		if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) {
1125 			if (ret == ENA_COM_NO_MEM) {
1126 				drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1127 			} else if (ret == ENA_COM_NO_SPACE) {
1128 				drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1129 			} else {
1130 				m_freem(mbuf);
1131 				drbr_advance(adapter->ifp, tx_ring->br);
1132 			}
1133 
1134 			break;
1135 		}
1136 
1137 		drbr_advance(adapter->ifp, tx_ring->br);
1138 
1139 		if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
1140 			return;
1141 
1142 		tx_ring->acum_pkts++;
1143 
1144 		BPF_MTAP(adapter->ifp, mbuf);
1145 	}
1146 
1147 	if (likely(tx_ring->acum_pkts != 0)) {
1148 		/* Trigger the dma engine */
1149 		ena_ring_tx_doorbell(tx_ring);
1150 	}
1151 
1152 	if (unlikely(!tx_ring->running))
1153 		taskqueue_enqueue(tx_ring->que->cleanup_tq,
1154 		    &tx_ring->que->cleanup_task);
1155 }
1156