1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
20 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 * $FreeBSD: stable/12/usr.sbin/bhyve/net_backends.c 358438 2020-02-28 17:41:46Z vmaffione $
28 */
29
30 /*
31 * This file implements multiple network backends (tap, netmap, ...),
32 * to be used by network frontends such as virtio-net and e1000.
33 * The API to access the backend (e.g. send/receive packets, negotiate
34 * features) is exported by net_backends.h.
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD: stable/12/usr.sbin/bhyve/net_backends.c 358438 2020-02-28 17:41:46Z vmaffione $");
39
40 #include <sys/types.h> /* u_short etc */
41 #ifndef WITHOUT_CAPSICUM
42 #include <sys/capsicum.h>
43 #endif
44 #include <sys/ioctl.h>
45 #include <sys/mman.h>
46 #include <sys/uio.h>
47
48 #include <net/if.h>
49 #include <net/netmap.h>
50 #include <net/netmap_virt.h>
51 #define NETMAP_WITH_LIBS
52 #include <net/netmap_user.h>
53
54 #ifndef WITHOUT_CAPSICUM
55 #include <capsicum_helpers.h>
56 #endif
57 #include <err.h>
58 #include <errno.h>
59 #include <fcntl.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <stdint.h>
63 #include <string.h>
64 #include <unistd.h>
65 #include <sysexits.h>
66 #include <assert.h>
67 #include <pthread.h>
68 #include <pthread_np.h>
69 #include <poll.h>
70 #include <assert.h>
71
72
73 #include "debug.h"
74 #include "iov.h"
75 #include "mevent.h"
76 #include "net_backends.h"
77
78 #include <sys/linker_set.h>
79
80 /*
81 * Each network backend registers a set of function pointers that are
82 * used to implement the net backends API.
83 * This might need to be exposed if we implement backends in separate files.
84 */
85 struct net_backend {
86 const char *prefix; /* prefix matching this backend */
87
88 /*
89 * Routines used to initialize and cleanup the resources needed
90 * by a backend. The cleanup function is used internally,
91 * and should not be called by the frontend.
92 */
93 int (*init)(struct net_backend *be, const char *devname,
94 net_be_rxeof_t cb, void *param);
95 void (*cleanup)(struct net_backend *be);
96
97 /*
98 * Called to serve a guest transmit request. The scatter-gather
99 * vector provided by the caller has 'iovcnt' elements and contains
100 * the packet to send.
101 */
102 ssize_t (*send)(struct net_backend *be, const struct iovec *iov,
103 int iovcnt);
104
105 /*
106 * Get the length of the next packet that can be received from
107 * the backend. If no packets are currently available, this
108 * function returns 0.
109 */
110 ssize_t (*peek_recvlen)(struct net_backend *be);
111
112 /*
113 * Called to receive a packet from the backend. When the function
114 * returns a positive value 'len', the scatter-gather vector
115 * provided by the caller contains a packet with such length.
116 * The function returns 0 if the backend doesn't have a new packet to
117 * receive.
118 */
119 ssize_t (*recv)(struct net_backend *be, const struct iovec *iov,
120 int iovcnt);
121
122 /*
123 * Ask the backend to enable or disable receive operation in the
124 * backend. On return from a disable operation, it is guaranteed
125 * that the receive callback won't be called until receive is
126 * enabled again. Note however that it is up to the caller to make
127 * sure that netbe_recv() is not currently being executed by another
128 * thread.
129 */
130 void (*recv_enable)(struct net_backend *be);
131 void (*recv_disable)(struct net_backend *be);
132
133 /*
134 * Ask the backend for the virtio-net features it is able to
135 * support. Possible features are TSO, UFO and checksum offloading
136 * in both rx and tx direction and for both IPv4 and IPv6.
137 */
138 uint64_t (*get_cap)(struct net_backend *be);
139
140 /*
141 * Tell the backend to enable/disable the specified virtio-net
142 * features (capabilities).
143 */
144 int (*set_cap)(struct net_backend *be, uint64_t features,
145 unsigned int vnet_hdr_len);
146
147 struct pci_vtnet_softc *sc;
148 int fd;
149
150 /*
151 * Length of the virtio-net header used by the backend and the
152 * frontend, respectively. A zero value means that the header
153 * is not used.
154 */
155 unsigned int be_vnet_hdr_len;
156 unsigned int fe_vnet_hdr_len;
157
158 /* Size of backend-specific private data. */
159 size_t priv_size;
160
161 /* Room for backend-specific data. */
162 char opaque[0];
163 };
164
165 SET_DECLARE(net_backend_set, struct net_backend);
166
167 #define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr)
168
169 #define WPRINTF(params) PRINTLN params
170
171 /*
172 * The tap backend
173 */
174
175 struct tap_priv {
176 struct mevent *mevp;
177 /*
178 * A bounce buffer that allows us to implement the peek_recvlen
179 * callback. In the future we may get the same information from
180 * the kevent data.
181 */
182 char bbuf[1 << 16];
183 ssize_t bbuflen;
184 };
185
186 static void
tap_cleanup(struct net_backend * be)187 tap_cleanup(struct net_backend *be)
188 {
189 struct tap_priv *priv = (struct tap_priv *)be->opaque;
190
191 if (priv->mevp) {
192 mevent_delete(priv->mevp);
193 }
194 if (be->fd != -1) {
195 close(be->fd);
196 be->fd = -1;
197 }
198 }
199
200 static int
tap_init(struct net_backend * be,const char * devname,net_be_rxeof_t cb,void * param)201 tap_init(struct net_backend *be, const char *devname,
202 net_be_rxeof_t cb, void *param)
203 {
204 struct tap_priv *priv = (struct tap_priv *)be->opaque;
205 char tbuf[80];
206 int opt = 1;
207 #ifndef WITHOUT_CAPSICUM
208 cap_rights_t rights;
209 #endif
210
211 if (cb == NULL) {
212 WPRINTF(("TAP backend requires non-NULL callback"));
213 return (-1);
214 }
215
216 strcpy(tbuf, "/dev/");
217 strlcat(tbuf, devname, sizeof(tbuf));
218
219 be->fd = open(tbuf, O_RDWR);
220 if (be->fd == -1) {
221 WPRINTF(("open of tap device %s failed", tbuf));
222 goto error;
223 }
224
225 /*
226 * Set non-blocking and register for read
227 * notifications with the event loop
228 */
229 if (ioctl(be->fd, FIONBIO, &opt) < 0) {
230 WPRINTF(("tap device O_NONBLOCK failed"));
231 goto error;
232 }
233
234 #ifndef WITHOUT_CAPSICUM
235 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
236 if (caph_rights_limit(be->fd, &rights) == -1)
237 errx(EX_OSERR, "Unable to apply rights for sandbox");
238 #endif
239
240 memset(priv->bbuf, 0, sizeof(priv->bbuf));
241 priv->bbuflen = 0;
242
243 priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
244 if (priv->mevp == NULL) {
245 WPRINTF(("Could not register event"));
246 goto error;
247 }
248
249 return (0);
250
251 error:
252 tap_cleanup(be);
253 return (-1);
254 }
255
256 /*
257 * Called to send a buffer chain out to the tap device
258 */
259 static ssize_t
tap_send(struct net_backend * be,const struct iovec * iov,int iovcnt)260 tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
261 {
262 return (writev(be->fd, iov, iovcnt));
263 }
264
265 static ssize_t
tap_peek_recvlen(struct net_backend * be)266 tap_peek_recvlen(struct net_backend *be)
267 {
268 struct tap_priv *priv = (struct tap_priv *)be->opaque;
269 ssize_t ret;
270
271 if (priv->bbuflen > 0) {
272 /*
273 * We already have a packet in the bounce buffer.
274 * Just return its length.
275 */
276 return priv->bbuflen;
277 }
278
279 /*
280 * Read the next packet (if any) into the bounce buffer, so
281 * that we get to know its length and we can return that
282 * to the caller.
283 */
284 ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf));
285 if (ret < 0 && errno == EWOULDBLOCK) {
286 return (0);
287 }
288
289 if (ret > 0)
290 priv->bbuflen = ret;
291
292 return (ret);
293 }
294
295 static ssize_t
tap_recv(struct net_backend * be,const struct iovec * iov,int iovcnt)296 tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
297 {
298 struct tap_priv *priv = (struct tap_priv *)be->opaque;
299 ssize_t ret;
300
301 if (priv->bbuflen > 0) {
302 /*
303 * A packet is available in the bounce buffer, so
304 * we read it from there.
305 */
306 ret = buf_to_iov(priv->bbuf, priv->bbuflen,
307 iov, iovcnt, 0);
308
309 /* Mark the bounce buffer as empty. */
310 priv->bbuflen = 0;
311
312 return (ret);
313 }
314
315 ret = readv(be->fd, iov, iovcnt);
316 if (ret < 0 && errno == EWOULDBLOCK) {
317 return (0);
318 }
319
320 return (ret);
321 }
322
323 static void
tap_recv_enable(struct net_backend * be)324 tap_recv_enable(struct net_backend *be)
325 {
326 struct tap_priv *priv = (struct tap_priv *)be->opaque;
327
328 mevent_enable(priv->mevp);
329 }
330
331 static void
tap_recv_disable(struct net_backend * be)332 tap_recv_disable(struct net_backend *be)
333 {
334 struct tap_priv *priv = (struct tap_priv *)be->opaque;
335
336 mevent_disable(priv->mevp);
337 }
338
339 static uint64_t
tap_get_cap(struct net_backend * be)340 tap_get_cap(struct net_backend *be)
341 {
342
343 return (0); /* no capabilities for now */
344 }
345
346 static int
tap_set_cap(struct net_backend * be,uint64_t features,unsigned vnet_hdr_len)347 tap_set_cap(struct net_backend *be, uint64_t features,
348 unsigned vnet_hdr_len)
349 {
350
351 return ((features || vnet_hdr_len) ? -1 : 0);
352 }
353
354 static struct net_backend tap_backend = {
355 .prefix = "tap",
356 .priv_size = sizeof(struct tap_priv),
357 .init = tap_init,
358 .cleanup = tap_cleanup,
359 .send = tap_send,
360 .peek_recvlen = tap_peek_recvlen,
361 .recv = tap_recv,
362 .recv_enable = tap_recv_enable,
363 .recv_disable = tap_recv_disable,
364 .get_cap = tap_get_cap,
365 .set_cap = tap_set_cap,
366 };
367
368 /* A clone of the tap backend, with a different prefix. */
369 static struct net_backend vmnet_backend = {
370 .prefix = "vmnet",
371 .priv_size = sizeof(struct tap_priv),
372 .init = tap_init,
373 .cleanup = tap_cleanup,
374 .send = tap_send,
375 .peek_recvlen = tap_peek_recvlen,
376 .recv = tap_recv,
377 .recv_enable = tap_recv_enable,
378 .recv_disable = tap_recv_disable,
379 .get_cap = tap_get_cap,
380 .set_cap = tap_set_cap,
381 };
382
383 DATA_SET(net_backend_set, tap_backend);
384 DATA_SET(net_backend_set, vmnet_backend);
385
386 /*
387 * The netmap backend
388 */
389
390 /* The virtio-net features supported by netmap. */
391 #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
392 VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
393 VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
394 VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
395
396 struct netmap_priv {
397 char ifname[IFNAMSIZ];
398 struct nm_desc *nmd;
399 uint16_t memid;
400 struct netmap_ring *rx;
401 struct netmap_ring *tx;
402 struct mevent *mevp;
403 net_be_rxeof_t cb;
404 void *cb_param;
405 };
406
407 static void
nmreq_init(struct nmreq * req,char * ifname)408 nmreq_init(struct nmreq *req, char *ifname)
409 {
410
411 memset(req, 0, sizeof(*req));
412 strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
413 req->nr_version = NETMAP_API;
414 }
415
416 static int
netmap_set_vnet_hdr_len(struct net_backend * be,int vnet_hdr_len)417 netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
418 {
419 int err;
420 struct nmreq req;
421 struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
422
423 nmreq_init(&req, priv->ifname);
424 req.nr_cmd = NETMAP_BDG_VNET_HDR;
425 req.nr_arg1 = vnet_hdr_len;
426 err = ioctl(be->fd, NIOCREGIF, &req);
427 if (err) {
428 WPRINTF(("Unable to set vnet header length %d",
429 vnet_hdr_len));
430 return (err);
431 }
432
433 be->be_vnet_hdr_len = vnet_hdr_len;
434
435 return (0);
436 }
437
438 static int
netmap_has_vnet_hdr_len(struct net_backend * be,unsigned vnet_hdr_len)439 netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
440 {
441 int prev_hdr_len = be->be_vnet_hdr_len;
442 int ret;
443
444 if (vnet_hdr_len == prev_hdr_len) {
445 return (1);
446 }
447
448 ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
449 if (ret) {
450 return (0);
451 }
452
453 netmap_set_vnet_hdr_len(be, prev_hdr_len);
454
455 return (1);
456 }
457
458 static uint64_t
netmap_get_cap(struct net_backend * be)459 netmap_get_cap(struct net_backend *be)
460 {
461
462 return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
463 NETMAP_FEATURES : 0);
464 }
465
466 static int
netmap_set_cap(struct net_backend * be,uint64_t features,unsigned vnet_hdr_len)467 netmap_set_cap(struct net_backend *be, uint64_t features,
468 unsigned vnet_hdr_len)
469 {
470
471 return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
472 }
473
474 static int
netmap_init(struct net_backend * be,const char * devname,net_be_rxeof_t cb,void * param)475 netmap_init(struct net_backend *be, const char *devname,
476 net_be_rxeof_t cb, void *param)
477 {
478 struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
479
480 strlcpy(priv->ifname, devname, sizeof(priv->ifname));
481 priv->ifname[sizeof(priv->ifname) - 1] = '\0';
482
483 priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
484 if (priv->nmd == NULL) {
485 WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)",
486 devname, strerror(errno)));
487 free(priv);
488 return (-1);
489 }
490
491 priv->memid = priv->nmd->req.nr_arg2;
492 priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
493 priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
494 priv->cb = cb;
495 priv->cb_param = param;
496 be->fd = priv->nmd->fd;
497
498 priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
499 if (priv->mevp == NULL) {
500 WPRINTF(("Could not register event"));
501 return (-1);
502 }
503
504 return (0);
505 }
506
507 static void
netmap_cleanup(struct net_backend * be)508 netmap_cleanup(struct net_backend *be)
509 {
510 struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
511
512 if (priv->mevp) {
513 mevent_delete(priv->mevp);
514 }
515 if (priv->nmd) {
516 nm_close(priv->nmd);
517 }
518 be->fd = -1;
519 }
520
521 static ssize_t
netmap_send(struct net_backend * be,const struct iovec * iov,int iovcnt)522 netmap_send(struct net_backend *be, const struct iovec *iov,
523 int iovcnt)
524 {
525 struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
526 struct netmap_ring *ring;
527 ssize_t totlen = 0;
528 int nm_buf_size;
529 int nm_buf_len;
530 uint32_t head;
531 void *nm_buf;
532 int j;
533
534 ring = priv->tx;
535 head = ring->head;
536 if (head == ring->tail) {
537 WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt)));
538 goto txsync;
539 }
540 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
541 nm_buf_size = ring->nr_buf_size;
542 nm_buf_len = 0;
543
544 for (j = 0; j < iovcnt; j++) {
545 int iov_frag_size = iov[j].iov_len;
546 void *iov_frag_buf = iov[j].iov_base;
547
548 totlen += iov_frag_size;
549
550 /*
551 * Split each iovec fragment over more netmap slots, if
552 * necessary.
553 */
554 for (;;) {
555 int copylen;
556
557 copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
558 memcpy(nm_buf, iov_frag_buf, copylen);
559
560 iov_frag_buf += copylen;
561 iov_frag_size -= copylen;
562 nm_buf += copylen;
563 nm_buf_size -= copylen;
564 nm_buf_len += copylen;
565
566 if (iov_frag_size == 0) {
567 break;
568 }
569
570 ring->slot[head].len = nm_buf_len;
571 ring->slot[head].flags = NS_MOREFRAG;
572 head = nm_ring_next(ring, head);
573 if (head == ring->tail) {
574 /*
575 * We ran out of netmap slots while
576 * splitting the iovec fragments.
577 */
578 WPRINTF(("No space, drop %zu bytes",
579 count_iov(iov, iovcnt)));
580 goto txsync;
581 }
582 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
583 nm_buf_size = ring->nr_buf_size;
584 nm_buf_len = 0;
585 }
586 }
587
588 /* Complete the last slot, which must not have NS_MOREFRAG set. */
589 ring->slot[head].len = nm_buf_len;
590 ring->slot[head].flags = 0;
591 head = nm_ring_next(ring, head);
592
593 /* Now update ring->head and ring->cur. */
594 ring->head = ring->cur = head;
595 txsync:
596 ioctl(be->fd, NIOCTXSYNC, NULL);
597
598 return (totlen);
599 }
600
601 static ssize_t
netmap_peek_recvlen(struct net_backend * be)602 netmap_peek_recvlen(struct net_backend *be)
603 {
604 struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
605 struct netmap_ring *ring = priv->rx;
606 uint32_t head = ring->head;
607 ssize_t totlen = 0;
608
609 while (head != ring->tail) {
610 struct netmap_slot *slot = ring->slot + head;
611
612 totlen += slot->len;
613 if ((slot->flags & NS_MOREFRAG) == 0)
614 break;
615 head = nm_ring_next(ring, head);
616 }
617
618 return (totlen);
619 }
620
621 static ssize_t
netmap_recv(struct net_backend * be,const struct iovec * iov,int iovcnt)622 netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
623 {
624 struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
625 struct netmap_slot *slot = NULL;
626 struct netmap_ring *ring;
627 void *iov_frag_buf;
628 int iov_frag_size;
629 ssize_t totlen = 0;
630 uint32_t head;
631
632 assert(iovcnt);
633
634 ring = priv->rx;
635 head = ring->head;
636 iov_frag_buf = iov->iov_base;
637 iov_frag_size = iov->iov_len;
638
639 do {
640 int nm_buf_len;
641 void *nm_buf;
642
643 if (head == ring->tail) {
644 return (0);
645 }
646
647 slot = ring->slot + head;
648 nm_buf = NETMAP_BUF(ring, slot->buf_idx);
649 nm_buf_len = slot->len;
650
651 for (;;) {
652 int copylen = nm_buf_len < iov_frag_size ?
653 nm_buf_len : iov_frag_size;
654
655 memcpy(iov_frag_buf, nm_buf, copylen);
656 nm_buf += copylen;
657 nm_buf_len -= copylen;
658 iov_frag_buf += copylen;
659 iov_frag_size -= copylen;
660 totlen += copylen;
661
662 if (nm_buf_len == 0) {
663 break;
664 }
665
666 iov++;
667 iovcnt--;
668 if (iovcnt == 0) {
669 /* No space to receive. */
670 WPRINTF(("Short iov, drop %zd bytes",
671 totlen));
672 return (-ENOSPC);
673 }
674 iov_frag_buf = iov->iov_base;
675 iov_frag_size = iov->iov_len;
676 }
677
678 head = nm_ring_next(ring, head);
679
680 } while (slot->flags & NS_MOREFRAG);
681
682 /* Release slots to netmap. */
683 ring->head = ring->cur = head;
684
685 return (totlen);
686 }
687
688 static void
netmap_recv_enable(struct net_backend * be)689 netmap_recv_enable(struct net_backend *be)
690 {
691 struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
692
693 mevent_enable(priv->mevp);
694 }
695
696 static void
netmap_recv_disable(struct net_backend * be)697 netmap_recv_disable(struct net_backend *be)
698 {
699 struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
700
701 mevent_disable(priv->mevp);
702 }
703
704 static struct net_backend netmap_backend = {
705 .prefix = "netmap",
706 .priv_size = sizeof(struct netmap_priv),
707 .init = netmap_init,
708 .cleanup = netmap_cleanup,
709 .send = netmap_send,
710 .peek_recvlen = netmap_peek_recvlen,
711 .recv = netmap_recv,
712 .recv_enable = netmap_recv_enable,
713 .recv_disable = netmap_recv_disable,
714 .get_cap = netmap_get_cap,
715 .set_cap = netmap_set_cap,
716 };
717
718 /* A clone of the netmap backend, with a different prefix. */
719 static struct net_backend vale_backend = {
720 .prefix = "vale",
721 .priv_size = sizeof(struct netmap_priv),
722 .init = netmap_init,
723 .cleanup = netmap_cleanup,
724 .send = netmap_send,
725 .peek_recvlen = netmap_peek_recvlen,
726 .recv = netmap_recv,
727 .recv_enable = netmap_recv_enable,
728 .recv_disable = netmap_recv_disable,
729 .get_cap = netmap_get_cap,
730 .set_cap = netmap_set_cap,
731 };
732
733 DATA_SET(net_backend_set, netmap_backend);
734 DATA_SET(net_backend_set, vale_backend);
735
736 /*
737 * Initialize a backend and attach to the frontend.
738 * This is called during frontend initialization.
739 * @pbe is a pointer to the backend to be initialized
740 * @devname is the backend-name as supplied on the command line,
741 * e.g. -s 2:0,frontend-name,backend-name[,other-args]
742 * @cb is the receive callback supplied by the frontend,
743 * and it is invoked in the event loop when a receive
744 * event is generated in the hypervisor,
745 * @param is a pointer to the frontend, and normally used as
746 * the argument for the callback.
747 */
748 int
netbe_init(struct net_backend ** ret,const char * devname,net_be_rxeof_t cb,void * param)749 netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb,
750 void *param)
751 {
752 struct net_backend **pbe, *nbe, *tbe = NULL;
753 int err;
754
755 /*
756 * Find the network backend that matches the user-provided
757 * device name. net_backend_set is built using a linker set.
758 */
759 SET_FOREACH(pbe, net_backend_set) {
760 if (strncmp(devname, (*pbe)->prefix,
761 strlen((*pbe)->prefix)) == 0) {
762 tbe = *pbe;
763 assert(tbe->init != NULL);
764 assert(tbe->cleanup != NULL);
765 assert(tbe->send != NULL);
766 assert(tbe->recv != NULL);
767 assert(tbe->get_cap != NULL);
768 assert(tbe->set_cap != NULL);
769 break;
770 }
771 }
772
773 *ret = NULL;
774 if (tbe == NULL)
775 return (EINVAL);
776 nbe = calloc(1, sizeof(*nbe) + tbe->priv_size);
777 *nbe = *tbe; /* copy the template */
778 nbe->fd = -1;
779 nbe->sc = param;
780 nbe->be_vnet_hdr_len = 0;
781 nbe->fe_vnet_hdr_len = 0;
782
783 /* Initialize the backend. */
784 err = nbe->init(nbe, devname, cb, param);
785 if (err) {
786 free(nbe);
787 return (err);
788 }
789
790 *ret = nbe;
791
792 return (0);
793 }
794
795 void
netbe_cleanup(struct net_backend * be)796 netbe_cleanup(struct net_backend *be)
797 {
798
799 if (be != NULL) {
800 be->cleanup(be);
801 free(be);
802 }
803 }
804
805 uint64_t
netbe_get_cap(struct net_backend * be)806 netbe_get_cap(struct net_backend *be)
807 {
808
809 assert(be != NULL);
810 return (be->get_cap(be));
811 }
812
813 int
netbe_set_cap(struct net_backend * be,uint64_t features,unsigned vnet_hdr_len)814 netbe_set_cap(struct net_backend *be, uint64_t features,
815 unsigned vnet_hdr_len)
816 {
817 int ret;
818
819 assert(be != NULL);
820
821 /* There are only three valid lengths, i.e., 0, 10 and 12. */
822 if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
823 && vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
824 return (-1);
825
826 be->fe_vnet_hdr_len = vnet_hdr_len;
827
828 ret = be->set_cap(be, features, vnet_hdr_len);
829 assert(be->be_vnet_hdr_len == 0 ||
830 be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
831
832 return (ret);
833 }
834
835 ssize_t
netbe_send(struct net_backend * be,const struct iovec * iov,int iovcnt)836 netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
837 {
838
839 return (be->send(be, iov, iovcnt));
840 }
841
842 ssize_t
netbe_peek_recvlen(struct net_backend * be)843 netbe_peek_recvlen(struct net_backend *be)
844 {
845
846 return (be->peek_recvlen(be));
847 }
848
849 /*
850 * Try to read a packet from the backend, without blocking.
851 * If no packets are available, return 0. In case of success, return
852 * the length of the packet just read. Return -1 in case of errors.
853 */
854 ssize_t
netbe_recv(struct net_backend * be,const struct iovec * iov,int iovcnt)855 netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
856 {
857
858 return (be->recv(be, iov, iovcnt));
859 }
860
861 /*
862 * Read a packet from the backend and discard it.
863 * Returns the size of the discarded packet or zero if no packet was available.
864 * A negative error code is returned in case of read error.
865 */
866 ssize_t
netbe_rx_discard(struct net_backend * be)867 netbe_rx_discard(struct net_backend *be)
868 {
869 /*
870 * MP note: the dummybuf is only used to discard frames,
871 * so there is no need for it to be per-vtnet or locked.
872 * We only make it large enough for TSO-sized segment.
873 */
874 static uint8_t dummybuf[65536 + 64];
875 struct iovec iov;
876
877 iov.iov_base = dummybuf;
878 iov.iov_len = sizeof(dummybuf);
879
880 return netbe_recv(be, &iov, 1);
881 }
882
883 void
netbe_rx_disable(struct net_backend * be)884 netbe_rx_disable(struct net_backend *be)
885 {
886
887 return be->recv_disable(be);
888 }
889
890 void
netbe_rx_enable(struct net_backend * be)891 netbe_rx_enable(struct net_backend *be)
892 {
893
894 return be->recv_enable(be);
895 }
896
897 size_t
netbe_get_vnet_hdr_len(struct net_backend * be)898 netbe_get_vnet_hdr_len(struct net_backend *be)
899 {
900
901 return (be->be_vnet_hdr_len);
902 }
903