1 /*
2 * Copyright (C) 2013-2016 Universita` di Pisa
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27
28 /*
29 * This module implements the VALE switch for netmap
30
31 --- VALE SWITCH ---
32
33 NMG_LOCK() serializes all modifications to switches and ports.
34 A switch cannot be deleted until all ports are gone.
35
36 For each switch, an SX lock (RWlock on linux) protects
37 deletion of ports. When configuring or deleting a new port, the
38 lock is acquired in exclusive mode (after holding NMG_LOCK).
39 When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
40 The lock is held throughout the entire forwarding cycle,
41 during which the thread may incur in a page fault.
42 Hence it is important that sleepable shared locks are used.
43
44 On the rx ring, the per-port lock is grabbed initially to reserve
45 a number of slot in the ring, then the lock is released,
46 packets are copied from source to destination, and then
47 the lock is acquired again and the receive ring is updated.
48 (A similar thing is done on the tx ring for NIC and host stack
49 ports attached to the switch)
50
51 */
52
53 /*
54 * OS-specific code that is used only within this file.
55 * Other OS-specific code that must be accessed by drivers
56 * is present in netmap_kern.h
57 */
58
59 #if defined(__FreeBSD__)
60 #include <sys/cdefs.h> /* prerequisite */
61 #include <sys/types.h>
62 #include <sys/errno.h>
63 #include <sys/param.h> /* defines used in kernel.h */
64 #include <sys/kernel.h> /* types used in module initialization */
65 #include <sys/conf.h> /* cdevsw struct, UID, GID */
66 #include <sys/sockio.h>
67 #include <sys/socketvar.h> /* struct socket */
68 #include <sys/malloc.h>
69 #include <sys/poll.h>
70 #include <sys/rwlock.h>
71 #include <sys/socket.h> /* sockaddrs */
72 #include <sys/selinfo.h>
73 #include <sys/sysctl.h>
74 #include <net/if.h>
75 #include <net/if_var.h>
76 #include <net/bpf.h> /* BIOCIMMEDIATE */
77 #include <machine/bus.h> /* bus_dmamap_* */
78 #include <sys/endian.h>
79 #include <sys/refcount.h>
80 #include <sys/smp.h>
81
82
83 #elif defined(linux)
84
85 #include "bsd_glue.h"
86
87 #elif defined(__APPLE__)
88
89 #warning OSX support is only partial
90 #include "osx_glue.h"
91
92 #elif defined(_WIN32)
93 #include "win_glue.h"
94
95 #else
96
97 #error Unsupported platform
98
99 #endif /* unsupported */
100
101 /*
102 * common headers
103 */
104
105 #include <net/netmap.h>
106 #include <dev/netmap/netmap_kern.h>
107 #include <dev/netmap/netmap_mem2.h>
108
109 #include <dev/netmap/netmap_bdg.h>
110
111 const char*
netmap_bdg_name(struct netmap_vp_adapter * vp)112 netmap_bdg_name(struct netmap_vp_adapter *vp)
113 {
114 struct nm_bridge *b = vp->na_bdg;
115 if (b == NULL)
116 return NULL;
117 return b->bdg_basename;
118 }
119
120
121 #ifndef CONFIG_NET_NS
122 /*
123 * XXX in principle nm_bridges could be created dynamically
124 * Right now we have a static array and deletions are protected
125 * by an exclusive lock.
126 */
127 struct nm_bridge *nm_bridges;
128 #endif /* !CONFIG_NET_NS */
129
130
131 static int
nm_is_id_char(const char c)132 nm_is_id_char(const char c)
133 {
134 return (c >= 'a' && c <= 'z') ||
135 (c >= 'A' && c <= 'Z') ||
136 (c >= '0' && c <= '9') ||
137 (c == '_');
138 }
139
140 /* Validate the name of a bdg port and return the
141 * position of the ":" character. */
142 static int
nm_bdg_name_validate(const char * name,size_t prefixlen)143 nm_bdg_name_validate(const char *name, size_t prefixlen)
144 {
145 int colon_pos = -1;
146 int i;
147
148 if (!name || strlen(name) < prefixlen) {
149 return -1;
150 }
151
152 for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) {
153 if (name[i] == ':') {
154 colon_pos = i;
155 break;
156 } else if (!nm_is_id_char(name[i])) {
157 return -1;
158 }
159 }
160
161 if (strlen(name) - colon_pos > IFNAMSIZ) {
162 /* interface name too long */
163 return -1;
164 }
165
166 return colon_pos;
167 }
168
169 /*
170 * locate a bridge among the existing ones.
171 * MUST BE CALLED WITH NMG_LOCK()
172 *
173 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
174 * We assume that this is called with a name of at least NM_NAME chars.
175 */
176 struct nm_bridge *
nm_find_bridge(const char * name,int create,struct netmap_bdg_ops * ops)177 nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops)
178 {
179 int i, namelen;
180 struct nm_bridge *b = NULL, *bridges;
181 u_int num_bridges;
182
183 NMG_LOCK_ASSERT();
184
185 netmap_bns_getbridges(&bridges, &num_bridges);
186
187 namelen = nm_bdg_name_validate(name,
188 (ops != NULL ? strlen(ops->name) : 0));
189 if (namelen < 0) {
190 nm_prerr("invalid bridge name %s", name ? name : NULL);
191 return NULL;
192 }
193
194 /* lookup the name, remember empty slot if there is one */
195 for (i = 0; i < num_bridges; i++) {
196 struct nm_bridge *x = bridges + i;
197
198 if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) {
199 if (create && b == NULL)
200 b = x; /* record empty slot */
201 } else if (x->bdg_namelen != namelen) {
202 continue;
203 } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
204 nm_prdis("found '%.*s' at %d", namelen, name, i);
205 b = x;
206 break;
207 }
208 }
209 if (i == num_bridges && b) { /* name not found, can create entry */
210 /* initialize the bridge */
211 nm_prdis("create new bridge %s with ports %d", b->bdg_basename,
212 b->bdg_active_ports);
213 b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
214 if (b->ht == NULL) {
215 nm_prerr("failed to allocate hash table");
216 return NULL;
217 }
218 strncpy(b->bdg_basename, name, namelen);
219 b->bdg_namelen = namelen;
220 b->bdg_active_ports = 0;
221 for (i = 0; i < NM_BDG_MAXPORTS; i++)
222 b->bdg_port_index[i] = i;
223 /* set the default function */
224 b->bdg_ops = b->bdg_saved_ops = *ops;
225 b->private_data = b->ht;
226 b->bdg_flags = 0;
227 NM_BNS_GET(b);
228 }
229 return b;
230 }
231
232
233 int
netmap_bdg_free(struct nm_bridge * b)234 netmap_bdg_free(struct nm_bridge *b)
235 {
236 if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) {
237 return EBUSY;
238 }
239
240 nm_prdis("marking bridge %s as free", b->bdg_basename);
241 nm_os_free(b->ht);
242 memset(&b->bdg_ops, 0, sizeof(b->bdg_ops));
243 memset(&b->bdg_saved_ops, 0, sizeof(b->bdg_saved_ops));
244 b->bdg_flags = 0;
245 NM_BNS_PUT(b);
246 return 0;
247 }
248
249 /* Called by external kernel modules (e.g., Openvswitch).
250 * to modify the private data previously given to regops().
251 * 'name' may be just bridge's name (including ':' if it
252 * is not just NM_BDG_NAME).
253 * Called without NMG_LOCK.
254 */
255 int
netmap_bdg_update_private_data(const char * name,bdg_update_private_data_fn_t callback,void * callback_data,void * auth_token)256 netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
257 void *callback_data, void *auth_token)
258 {
259 void *private_data = NULL;
260 struct nm_bridge *b;
261 int error = 0;
262
263 NMG_LOCK();
264 b = nm_find_bridge(name, 0 /* don't create */, NULL);
265 if (!b) {
266 error = EINVAL;
267 goto unlock_update_priv;
268 }
269 if (!nm_bdg_valid_auth_token(b, auth_token)) {
270 error = EACCES;
271 goto unlock_update_priv;
272 }
273 BDG_WLOCK(b);
274 private_data = callback(b->private_data, callback_data, &error);
275 b->private_data = private_data;
276 BDG_WUNLOCK(b);
277
278 unlock_update_priv:
279 NMG_UNLOCK();
280 return error;
281 }
282
283
284
285 /* remove from bridge b the ports in slots hw and sw
286 * (sw can be -1 if not needed)
287 */
288 void
netmap_bdg_detach_common(struct nm_bridge * b,int hw,int sw)289 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
290 {
291 int s_hw = hw, s_sw = sw;
292 int i, lim =b->bdg_active_ports;
293 uint32_t *tmp = b->tmp_bdg_port_index;
294
295 /*
296 New algorithm:
297 make a copy of bdg_port_index;
298 lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
299 in the array of bdg_port_index, replacing them with
300 entries from the bottom of the array;
301 decrement bdg_active_ports;
302 acquire BDG_WLOCK() and copy back the array.
303 */
304
305 if (netmap_debug & NM_DEBUG_BDG)
306 nm_prinf("detach %d and %d (lim %d)", hw, sw, lim);
307 /* make a copy of the list of active ports, update it,
308 * and then copy back within BDG_WLOCK().
309 */
310 memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index));
311 for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
312 if (hw >= 0 && tmp[i] == hw) {
313 nm_prdis("detach hw %d at %d", hw, i);
314 lim--; /* point to last active port */
315 tmp[i] = tmp[lim]; /* swap with i */
316 tmp[lim] = hw; /* now this is inactive */
317 hw = -1;
318 } else if (sw >= 0 && tmp[i] == sw) {
319 nm_prdis("detach sw %d at %d", sw, i);
320 lim--;
321 tmp[i] = tmp[lim];
322 tmp[lim] = sw;
323 sw = -1;
324 } else {
325 i++;
326 }
327 }
328 if (hw >= 0 || sw >= 0) {
329 nm_prerr("delete failed hw %d sw %d, should panic...", hw, sw);
330 }
331
332 BDG_WLOCK(b);
333 if (b->bdg_ops.dtor)
334 b->bdg_ops.dtor(b->bdg_ports[s_hw]);
335 b->bdg_ports[s_hw] = NULL;
336 if (s_sw >= 0) {
337 b->bdg_ports[s_sw] = NULL;
338 }
339 memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index));
340 b->bdg_active_ports = lim;
341 BDG_WUNLOCK(b);
342
343 nm_prdis("now %d active ports", lim);
344 netmap_bdg_free(b);
345 }
346
347
348 /* nm_bdg_ctl callback for VALE ports */
349 int
netmap_vp_bdg_ctl(struct nmreq_header * hdr,struct netmap_adapter * na)350 netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
351 {
352 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
353 struct nm_bridge *b = vpna->na_bdg;
354
355 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
356 return 0; /* nothing to do */
357 }
358 if (b) {
359 netmap_set_all_rings(na, 0 /* disable */);
360 netmap_bdg_detach_common(b, vpna->bdg_port, -1);
361 vpna->na_bdg = NULL;
362 netmap_set_all_rings(na, 1 /* enable */);
363 }
364 /* I have took reference just for attach */
365 netmap_adapter_put(na);
366 return 0;
367 }
368
369 int
netmap_default_bdg_attach(const char * name,struct netmap_adapter * na,struct nm_bridge * b)370 netmap_default_bdg_attach(const char *name, struct netmap_adapter *na,
371 struct nm_bridge *b)
372 {
373 return NM_NEED_BWRAP;
374 }
375
376 /* Try to get a reference to a netmap adapter attached to a VALE switch.
377 * If the adapter is found (or is created), this function returns 0, a
378 * non NULL pointer is returned into *na, and the caller holds a
379 * reference to the adapter.
380 * If an adapter is not found, then no reference is grabbed and the
381 * function returns an error code, or 0 if there is just a VALE prefix
382 * mismatch. Therefore the caller holds a reference when
383 * (*na != NULL && return == 0).
384 */
385 int
netmap_get_bdg_na(struct nmreq_header * hdr,struct netmap_adapter ** na,struct netmap_mem_d * nmd,int create,struct netmap_bdg_ops * ops)386 netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
387 struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops)
388 {
389 char *nr_name = hdr->nr_name;
390 const char *ifname;
391 struct ifnet *ifp = NULL;
392 int error = 0;
393 struct netmap_vp_adapter *vpna, *hostna = NULL;
394 struct nm_bridge *b;
395 uint32_t i, j;
396 uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT;
397 int needed;
398
399 *na = NULL; /* default return value */
400
401 /* first try to see if this is a bridge port. */
402 NMG_LOCK_ASSERT();
403 if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) {
404 return 0; /* no error, but no VALE prefix */
405 }
406
407 b = nm_find_bridge(nr_name, create, ops);
408 if (b == NULL) {
409 nm_prdis("no bridges available for '%s'", nr_name);
410 return (create ? ENOMEM : ENXIO);
411 }
412 if (strlen(nr_name) < b->bdg_namelen) /* impossible */
413 panic("x");
414
415 /* Now we are sure that name starts with the bridge's name,
416 * lookup the port in the bridge. We need to scan the entire
417 * list. It is not important to hold a WLOCK on the bridge
418 * during the search because NMG_LOCK already guarantees
419 * that there are no other possible writers.
420 */
421
422 /* lookup in the local list of ports */
423 for (j = 0; j < b->bdg_active_ports; j++) {
424 i = b->bdg_port_index[j];
425 vpna = b->bdg_ports[i];
426 nm_prdis("checking %s", vpna->up.name);
427 if (!strcmp(vpna->up.name, nr_name)) {
428 netmap_adapter_get(&vpna->up);
429 nm_prdis("found existing if %s refs %d", nr_name)
430 *na = &vpna->up;
431 return 0;
432 }
433 }
434 /* not found, should we create it? */
435 if (!create)
436 return ENXIO;
437 /* yes we should, see if we have space to attach entries */
438 needed = 2; /* in some cases we only need 1 */
439 if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
440 nm_prerr("bridge full %d, cannot create new port", b->bdg_active_ports);
441 return ENOMEM;
442 }
443 /* record the next two ports available, but do not allocate yet */
444 cand = b->bdg_port_index[b->bdg_active_ports];
445 cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
446 nm_prdis("+++ bridge %s port %s used %d avail %d %d",
447 b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
448
449 /*
450 * try see if there is a matching NIC with this name
451 * (after the bridge's name)
452 */
453 ifname = nr_name + b->bdg_namelen + 1;
454 ifp = ifunit_ref(ifname);
455 if (!ifp) {
456 /* Create an ephemeral virtual port.
457 * This block contains all the ephemeral-specific logic.
458 */
459
460 if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
461 error = EINVAL;
462 goto out;
463 }
464
465 /* bdg_netmap_attach creates a struct netmap_adapter */
466 error = b->bdg_ops.vp_create(hdr, NULL, nmd, &vpna);
467 if (error) {
468 if (netmap_debug & NM_DEBUG_BDG)
469 nm_prerr("error %d", error);
470 goto out;
471 }
472 /* shortcut - we can skip get_hw_na(),
473 * ownership check and nm_bdg_attach()
474 */
475
476 } else {
477 struct netmap_adapter *hw;
478
479 /* the vale:nic syntax is only valid for some commands */
480 switch (hdr->nr_reqtype) {
481 case NETMAP_REQ_VALE_ATTACH:
482 case NETMAP_REQ_VALE_DETACH:
483 case NETMAP_REQ_VALE_POLLING_ENABLE:
484 case NETMAP_REQ_VALE_POLLING_DISABLE:
485 break; /* ok */
486 default:
487 error = EINVAL;
488 goto out;
489 }
490
491 error = netmap_get_hw_na(ifp, nmd, &hw);
492 if (error || hw == NULL)
493 goto out;
494
495 /* host adapter might not be created */
496 error = hw->nm_bdg_attach(nr_name, hw, b);
497 if (error == NM_NEED_BWRAP) {
498 error = b->bdg_ops.bwrap_attach(nr_name, hw);
499 }
500 if (error)
501 goto out;
502 vpna = hw->na_vp;
503 hostna = hw->na_hostvp;
504 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
505 /* Check if we need to skip the host rings. */
506 struct nmreq_vale_attach *areq =
507 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
508 if (areq->reg.nr_mode != NR_REG_NIC_SW) {
509 hostna = NULL;
510 }
511 }
512 }
513
514 BDG_WLOCK(b);
515 vpna->bdg_port = cand;
516 nm_prdis("NIC %p to bridge port %d", vpna, cand);
517 /* bind the port to the bridge (virtual ports are not active) */
518 b->bdg_ports[cand] = vpna;
519 vpna->na_bdg = b;
520 b->bdg_active_ports++;
521 if (hostna != NULL) {
522 /* also bind the host stack to the bridge */
523 b->bdg_ports[cand2] = hostna;
524 hostna->bdg_port = cand2;
525 hostna->na_bdg = b;
526 b->bdg_active_ports++;
527 nm_prdis("host %p to bridge port %d", hostna, cand2);
528 }
529 nm_prdis("if %s refs %d", ifname, vpna->up.na_refcount);
530 BDG_WUNLOCK(b);
531 *na = &vpna->up;
532 netmap_adapter_get(*na);
533
534 out:
535 if (ifp)
536 if_rele(ifp);
537
538 return error;
539 }
540
541
542 int
nm_is_bwrap(struct netmap_adapter * na)543 nm_is_bwrap(struct netmap_adapter *na)
544 {
545 return na->nm_register == netmap_bwrap_reg;
546 }
547
548
549 struct nm_bdg_polling_state;
550 struct
551 nm_bdg_kthread {
552 struct nm_kctx *nmk;
553 u_int qfirst;
554 u_int qlast;
555 struct nm_bdg_polling_state *bps;
556 };
557
558 struct nm_bdg_polling_state {
559 bool configured;
560 bool stopped;
561 struct netmap_bwrap_adapter *bna;
562 uint32_t mode;
563 u_int qfirst;
564 u_int qlast;
565 u_int cpu_from;
566 u_int ncpus;
567 struct nm_bdg_kthread *kthreads;
568 };
569
570 static void
netmap_bwrap_polling(void * data)571 netmap_bwrap_polling(void *data)
572 {
573 struct nm_bdg_kthread *nbk = data;
574 struct netmap_bwrap_adapter *bna;
575 u_int qfirst, qlast, i;
576 struct netmap_kring **kring0, *kring;
577
578 if (!nbk)
579 return;
580 qfirst = nbk->qfirst;
581 qlast = nbk->qlast;
582 bna = nbk->bps->bna;
583 kring0 = NMR(bna->hwna, NR_RX);
584
585 for (i = qfirst; i < qlast; i++) {
586 kring = kring0[i];
587 kring->nm_notify(kring, 0);
588 }
589 }
590
591 static int
nm_bdg_create_kthreads(struct nm_bdg_polling_state * bps)592 nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
593 {
594 struct nm_kctx_cfg kcfg;
595 int i, j;
596
597 bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
598 if (bps->kthreads == NULL)
599 return ENOMEM;
600
601 bzero(&kcfg, sizeof(kcfg));
602 kcfg.worker_fn = netmap_bwrap_polling;
603 for (i = 0; i < bps->ncpus; i++) {
604 struct nm_bdg_kthread *t = bps->kthreads + i;
605 int all = (bps->ncpus == 1 &&
606 bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU);
607 int affinity = bps->cpu_from + i;
608
609 t->bps = bps;
610 t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
611 t->qlast = all ? bps->qlast : t->qfirst + 1;
612 if (netmap_verbose)
613 nm_prinf("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
614 t->qlast);
615
616 kcfg.type = i;
617 kcfg.worker_private = t;
618 t->nmk = nm_os_kctx_create(&kcfg, NULL);
619 if (t->nmk == NULL) {
620 goto cleanup;
621 }
622 nm_os_kctx_worker_setaff(t->nmk, affinity);
623 }
624 return 0;
625
626 cleanup:
627 for (j = 0; j < i; j++) {
628 struct nm_bdg_kthread *t = bps->kthreads + i;
629 nm_os_kctx_destroy(t->nmk);
630 }
631 nm_os_free(bps->kthreads);
632 return EFAULT;
633 }
634
635 /* A variant of ptnetmap_start_kthreads() */
636 static int
nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state * bps)637 nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
638 {
639 int error, i, j;
640
641 if (!bps) {
642 nm_prerr("polling is not configured");
643 return EFAULT;
644 }
645 bps->stopped = false;
646
647 for (i = 0; i < bps->ncpus; i++) {
648 struct nm_bdg_kthread *t = bps->kthreads + i;
649 error = nm_os_kctx_worker_start(t->nmk);
650 if (error) {
651 nm_prerr("error in nm_kthread_start(): %d", error);
652 goto cleanup;
653 }
654 }
655 return 0;
656
657 cleanup:
658 for (j = 0; j < i; j++) {
659 struct nm_bdg_kthread *t = bps->kthreads + i;
660 nm_os_kctx_worker_stop(t->nmk);
661 }
662 bps->stopped = true;
663 return error;
664 }
665
666 static void
nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state * bps)667 nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
668 {
669 int i;
670
671 if (!bps)
672 return;
673
674 for (i = 0; i < bps->ncpus; i++) {
675 struct nm_bdg_kthread *t = bps->kthreads + i;
676 nm_os_kctx_worker_stop(t->nmk);
677 nm_os_kctx_destroy(t->nmk);
678 }
679 bps->stopped = true;
680 }
681
682 static int
get_polling_cfg(struct nmreq_vale_polling * req,struct netmap_adapter * na,struct nm_bdg_polling_state * bps)683 get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
684 struct nm_bdg_polling_state *bps)
685 {
686 unsigned int avail_cpus, core_from;
687 unsigned int qfirst, qlast;
688 uint32_t i = req->nr_first_cpu_id;
689 uint32_t req_cpus = req->nr_num_polling_cpus;
690
691 avail_cpus = nm_os_ncpus();
692
693 if (req_cpus == 0) {
694 nm_prerr("req_cpus must be > 0");
695 return EINVAL;
696 } else if (req_cpus >= avail_cpus) {
697 nm_prerr("Cannot use all the CPUs in the system");
698 return EINVAL;
699 }
700
701 if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) {
702 /* Use a separate core for each ring. If nr_num_polling_cpus>1
703 * more consecutive rings are polled.
704 * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2,
705 * ring 2 and 3 are polled by core 2 and 3, respectively. */
706 if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
707 nm_prerr("Rings %u-%u not in range (have %d rings)",
708 i, i + req_cpus, nma_get_nrings(na, NR_RX));
709 return EINVAL;
710 }
711 qfirst = i;
712 qlast = qfirst + req_cpus;
713 core_from = qfirst;
714
715 } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) {
716 /* Poll all the rings using a core specified by nr_first_cpu_id.
717 * the number of cores must be 1. */
718 if (req_cpus != 1) {
719 nm_prerr("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
720 "(was %d)", req_cpus);
721 return EINVAL;
722 }
723 qfirst = 0;
724 qlast = nma_get_nrings(na, NR_RX);
725 core_from = i;
726 } else {
727 nm_prerr("Invalid polling mode");
728 return EINVAL;
729 }
730
731 bps->mode = req->nr_mode;
732 bps->qfirst = qfirst;
733 bps->qlast = qlast;
734 bps->cpu_from = core_from;
735 bps->ncpus = req_cpus;
736 nm_prinf("%s qfirst %u qlast %u cpu_from %u ncpus %u",
737 req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ?
738 "MULTI" : "SINGLE",
739 qfirst, qlast, core_from, req_cpus);
740 return 0;
741 }
742
743 static int
nm_bdg_ctl_polling_start(struct nmreq_vale_polling * req,struct netmap_adapter * na)744 nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na)
745 {
746 struct nm_bdg_polling_state *bps;
747 struct netmap_bwrap_adapter *bna;
748 int error;
749
750 bna = (struct netmap_bwrap_adapter *)na;
751 if (bna->na_polling_state) {
752 nm_prerr("ERROR adapter already in polling mode");
753 return EFAULT;
754 }
755
756 bps = nm_os_malloc(sizeof(*bps));
757 if (!bps)
758 return ENOMEM;
759 bps->configured = false;
760 bps->stopped = true;
761
762 if (get_polling_cfg(req, na, bps)) {
763 nm_os_free(bps);
764 return EINVAL;
765 }
766
767 if (nm_bdg_create_kthreads(bps)) {
768 nm_os_free(bps);
769 return EFAULT;
770 }
771
772 bps->configured = true;
773 bna->na_polling_state = bps;
774 bps->bna = bna;
775
776 /* disable interrupts if possible */
777 nma_intr_enable(bna->hwna, 0);
778 /* start kthread now */
779 error = nm_bdg_polling_start_kthreads(bps);
780 if (error) {
781 nm_prerr("ERROR nm_bdg_polling_start_kthread()");
782 nm_os_free(bps->kthreads);
783 nm_os_free(bps);
784 bna->na_polling_state = NULL;
785 nma_intr_enable(bna->hwna, 1);
786 }
787 return error;
788 }
789
790 static int
nm_bdg_ctl_polling_stop(struct netmap_adapter * na)791 nm_bdg_ctl_polling_stop(struct netmap_adapter *na)
792 {
793 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
794 struct nm_bdg_polling_state *bps;
795
796 if (!bna->na_polling_state) {
797 nm_prerr("ERROR adapter is not in polling mode");
798 return EFAULT;
799 }
800 bps = bna->na_polling_state;
801 nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
802 bps->configured = false;
803 nm_os_free(bps);
804 bna->na_polling_state = NULL;
805 /* re-enable interrupts */
806 nma_intr_enable(bna->hwna, 1);
807 return 0;
808 }
809
810 int
nm_bdg_polling(struct nmreq_header * hdr)811 nm_bdg_polling(struct nmreq_header *hdr)
812 {
813 struct nmreq_vale_polling *req =
814 (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body;
815 struct netmap_adapter *na = NULL;
816 int error = 0;
817
818 NMG_LOCK();
819 error = netmap_get_vale_na(hdr, &na, NULL, /*create=*/0);
820 if (na && !error) {
821 if (!nm_is_bwrap(na)) {
822 error = EOPNOTSUPP;
823 } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) {
824 error = nm_bdg_ctl_polling_start(req, na);
825 if (!error)
826 netmap_adapter_get(na);
827 } else {
828 error = nm_bdg_ctl_polling_stop(na);
829 if (!error)
830 netmap_adapter_put(na);
831 }
832 netmap_adapter_put(na);
833 } else if (!na && !error) {
834 /* Not VALE port. */
835 error = EINVAL;
836 }
837 NMG_UNLOCK();
838
839 return error;
840 }
841
842 /* Called by external kernel modules (e.g., Openvswitch).
843 * to set configure/lookup/dtor functions of a VALE instance.
844 * Register callbacks to the given bridge. 'name' may be just
845 * bridge's name (including ':' if it is not just NM_BDG_NAME).
846 *
847 * Called without NMG_LOCK.
848 */
849
850 int
netmap_bdg_regops(const char * name,struct netmap_bdg_ops * bdg_ops,void * private_data,void * auth_token)851 netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token)
852 {
853 struct nm_bridge *b;
854 int error = 0;
855
856 NMG_LOCK();
857 b = nm_find_bridge(name, 0 /* don't create */, NULL);
858 if (!b) {
859 error = ENXIO;
860 goto unlock_regops;
861 }
862 if (!nm_bdg_valid_auth_token(b, auth_token)) {
863 error = EACCES;
864 goto unlock_regops;
865 }
866
867 BDG_WLOCK(b);
868 if (!bdg_ops) {
869 /* resetting the bridge */
870 bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
871 b->bdg_ops = b->bdg_saved_ops;
872 b->private_data = b->ht;
873 } else {
874 /* modifying the bridge */
875 b->private_data = private_data;
876 #define nm_bdg_override(m) if (bdg_ops->m) b->bdg_ops.m = bdg_ops->m
877 nm_bdg_override(lookup);
878 nm_bdg_override(config);
879 nm_bdg_override(dtor);
880 nm_bdg_override(vp_create);
881 nm_bdg_override(bwrap_attach);
882 #undef nm_bdg_override
883
884 }
885 BDG_WUNLOCK(b);
886
887 unlock_regops:
888 NMG_UNLOCK();
889 return error;
890 }
891
892
893 int
netmap_bdg_config(struct nm_ifreq * nr)894 netmap_bdg_config(struct nm_ifreq *nr)
895 {
896 struct nm_bridge *b;
897 int error = EINVAL;
898
899 NMG_LOCK();
900 b = nm_find_bridge(nr->nifr_name, 0, NULL);
901 if (!b) {
902 NMG_UNLOCK();
903 return error;
904 }
905 NMG_UNLOCK();
906 /* Don't call config() with NMG_LOCK() held */
907 BDG_RLOCK(b);
908 if (b->bdg_ops.config != NULL)
909 error = b->bdg_ops.config(nr);
910 BDG_RUNLOCK(b);
911 return error;
912 }
913
914
915 /* nm_register callback for VALE ports */
916 int
netmap_vp_reg(struct netmap_adapter * na,int onoff)917 netmap_vp_reg(struct netmap_adapter *na, int onoff)
918 {
919 struct netmap_vp_adapter *vpna =
920 (struct netmap_vp_adapter*)na;
921
922 /* persistent ports may be put in netmap mode
923 * before being attached to a bridge
924 */
925 if (vpna->na_bdg)
926 BDG_WLOCK(vpna->na_bdg);
927 if (onoff) {
928 netmap_krings_mode_commit(na, onoff);
929 if (na->active_fds == 0)
930 na->na_flags |= NAF_NETMAP_ON;
931 /* XXX on FreeBSD, persistent VALE ports should also
932 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
933 */
934 } else {
935 if (na->active_fds == 0)
936 na->na_flags &= ~NAF_NETMAP_ON;
937 netmap_krings_mode_commit(na, onoff);
938 }
939 if (vpna->na_bdg)
940 BDG_WUNLOCK(vpna->na_bdg);
941 return 0;
942 }
943
944
945 /* rxsync code used by VALE ports nm_rxsync callback and also
946 * internally by the brwap
947 */
948 static int
netmap_vp_rxsync_locked(struct netmap_kring * kring,int flags)949 netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
950 {
951 struct netmap_adapter *na = kring->na;
952 struct netmap_ring *ring = kring->ring;
953 u_int nm_i, lim = kring->nkr_num_slots - 1;
954 u_int head = kring->rhead;
955 int n;
956
957 if (head > lim) {
958 nm_prerr("ouch dangerous reset!!!");
959 n = netmap_ring_reinit(kring);
960 goto done;
961 }
962
963 /* First part, import newly received packets. */
964 /* actually nothing to do here, they are already in the kring */
965
966 /* Second part, skip past packets that userspace has released. */
967 nm_i = kring->nr_hwcur;
968 if (nm_i != head) {
969 /* consistency check, but nothing really important here */
970 for (n = 0; likely(nm_i != head); n++) {
971 struct netmap_slot *slot = &ring->slot[nm_i];
972 void *addr = NMB(na, slot);
973
974 if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
975 nm_prerr("bad buffer index %d, ignore ?",
976 slot->buf_idx);
977 }
978 slot->flags &= ~NS_BUF_CHANGED;
979 nm_i = nm_next(nm_i, lim);
980 }
981 kring->nr_hwcur = head;
982 }
983
984 n = 0;
985 done:
986 return n;
987 }
988
989 /*
990 * nm_rxsync callback for VALE ports
991 * user process reading from a VALE switch.
992 * Already protected against concurrent calls from userspace,
993 * but we must acquire the queue's lock to protect against
994 * writers on the same queue.
995 */
996 int
netmap_vp_rxsync(struct netmap_kring * kring,int flags)997 netmap_vp_rxsync(struct netmap_kring *kring, int flags)
998 {
999 int n;
1000
1001 mtx_lock(&kring->q_lock);
1002 n = netmap_vp_rxsync_locked(kring, flags);
1003 mtx_unlock(&kring->q_lock);
1004 return n;
1005 }
1006
1007 int
netmap_bwrap_attach(const char * nr_name,struct netmap_adapter * hwna,struct netmap_bdg_ops * ops)1008 netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna,
1009 struct netmap_bdg_ops *ops)
1010 {
1011 return ops->bwrap_attach(nr_name, hwna);
1012 }
1013
1014
1015 /* Bridge wrapper code (bwrap).
1016 * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
1017 * VALE switch.
1018 * The main task is to swap the meaning of tx and rx rings to match the
1019 * expectations of the VALE switch code (see nm_bdg_flush).
1020 *
1021 * The bwrap works by interposing a netmap_bwrap_adapter between the
1022 * rest of the system and the hwna. The netmap_bwrap_adapter looks like
1023 * a netmap_vp_adapter to the rest the system, but, internally, it
1024 * translates all callbacks to what the hwna expects.
1025 *
1026 * Note that we have to intercept callbacks coming from two sides:
1027 *
1028 * - callbacks coming from the netmap module are intercepted by
1029 * passing around the netmap_bwrap_adapter instead of the hwna
1030 *
1031 * - callbacks coming from outside of the netmap module only know
1032 * about the hwna. This, however, only happens in interrupt
1033 * handlers, where only the hwna->nm_notify callback is called.
1034 * What the bwrap does is to overwrite the hwna->nm_notify callback
1035 * with its own netmap_bwrap_intr_notify.
1036 * XXX This assumes that the hwna->nm_notify callback was the
1037 * standard netmap_notify(), as it is the case for nic adapters.
1038 * Any additional action performed by hwna->nm_notify will not be
1039 * performed by netmap_bwrap_intr_notify.
1040 *
1041 * Additionally, the bwrap can optionally attach the host rings pair
1042 * of the wrapped adapter to a different port of the switch.
1043 */
1044
1045
1046 static void
netmap_bwrap_dtor(struct netmap_adapter * na)1047 netmap_bwrap_dtor(struct netmap_adapter *na)
1048 {
1049 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1050 struct netmap_adapter *hwna = bna->hwna;
1051 struct nm_bridge *b = bna->up.na_bdg,
1052 *bh = bna->host.na_bdg;
1053
1054 if (bna->host.up.nm_mem)
1055 netmap_mem_put(bna->host.up.nm_mem);
1056
1057 if (b) {
1058 netmap_bdg_detach_common(b, bna->up.bdg_port,
1059 (bh ? bna->host.bdg_port : -1));
1060 }
1061
1062 nm_prdis("na %p", na);
1063 na->ifp = NULL;
1064 bna->host.up.ifp = NULL;
1065 hwna->na_vp = bna->saved_na_vp;
1066 hwna->na_hostvp = NULL;
1067 hwna->na_private = NULL;
1068 hwna->na_flags &= ~NAF_BUSY;
1069 netmap_adapter_put(hwna);
1070
1071 }
1072
1073
1074 /*
1075 * Intr callback for NICs connected to a bridge.
1076 * Simply ignore tx interrupts (maybe we could try to recover space ?)
1077 * and pass received packets from nic to the bridge.
1078 *
1079 * XXX TODO check locking: this is called from the interrupt
1080 * handler so we should make sure that the interface is not
1081 * disconnected while passing down an interrupt.
1082 *
1083 * Note, no user process can access this NIC or the host stack.
1084 * The only part of the ring that is significant are the slots,
1085 * and head/cur/tail are set from the kring as needed
1086 * (part as a receive ring, part as a transmit ring).
1087 *
1088 * callback that overwrites the hwna notify callback.
1089 * Packets come from the outside or from the host stack and are put on an
1090 * hwna rx ring.
1091 * The bridge wrapper then sends the packets through the bridge.
1092 */
1093 static int
netmap_bwrap_intr_notify(struct netmap_kring * kring,int flags)1094 netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
1095 {
1096 struct netmap_adapter *na = kring->na;
1097 struct netmap_bwrap_adapter *bna = na->na_private;
1098 struct netmap_kring *bkring;
1099 struct netmap_vp_adapter *vpna = &bna->up;
1100 u_int ring_nr = kring->ring_id;
1101 int ret = NM_IRQ_COMPLETED;
1102 int error;
1103
1104 if (netmap_debug & NM_DEBUG_RXINTR)
1105 nm_prinf("%s %s 0x%x", na->name, kring->name, flags);
1106
1107 bkring = vpna->up.tx_rings[ring_nr];
1108
1109 /* make sure the ring is not disabled */
1110 if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
1111 return EIO;
1112 }
1113
1114 if (netmap_debug & NM_DEBUG_RXINTR)
1115 nm_prinf("%s head %d cur %d tail %d", na->name,
1116 kring->rhead, kring->rcur, kring->rtail);
1117
1118 /* simulate a user wakeup on the rx ring
1119 * fetch packets that have arrived.
1120 */
1121 error = kring->nm_sync(kring, 0);
1122 if (error)
1123 goto put_out;
1124 if (kring->nr_hwcur == kring->nr_hwtail) {
1125 if (netmap_verbose)
1126 nm_prlim(1, "interrupt with no packets on %s",
1127 kring->name);
1128 goto put_out;
1129 }
1130
1131 /* new packets are kring->rcur to kring->nr_hwtail, and the bkring
1132 * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
1133 * to push all packets out.
1134 */
1135 bkring->rhead = bkring->rcur = kring->nr_hwtail;
1136
1137 bkring->nm_sync(bkring, flags);
1138
1139 /* mark all buffers as released on this ring */
1140 kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
1141 /* another call to actually release the buffers */
1142 error = kring->nm_sync(kring, 0);
1143
1144 /* The second rxsync may have further advanced hwtail. If this happens,
1145 * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */
1146 if (kring->rcur != kring->nr_hwtail) {
1147 ret = NM_IRQ_RESCHED;
1148 }
1149 put_out:
1150 nm_kr_put(kring);
1151
1152 return error ? error : ret;
1153 }
1154
1155
1156 /* nm_register callback for bwrap */
1157 int
netmap_bwrap_reg(struct netmap_adapter * na,int onoff)1158 netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
1159 {
1160 struct netmap_bwrap_adapter *bna =
1161 (struct netmap_bwrap_adapter *)na;
1162 struct netmap_adapter *hwna = bna->hwna;
1163 struct netmap_vp_adapter *hostna = &bna->host;
1164 int error, i;
1165 enum txrx t;
1166
1167 nm_prdis("%s %s", na->name, onoff ? "on" : "off");
1168
1169 if (onoff) {
1170 /* netmap_do_regif has been called on the bwrap na.
1171 * We need to pass the information about the
1172 * memory allocator down to the hwna before
1173 * putting it in netmap mode
1174 */
1175 hwna->na_lut = na->na_lut;
1176
1177 if (hostna->na_bdg) {
1178 /* if the host rings have been attached to switch,
1179 * we need to copy the memory allocator information
1180 * in the hostna also
1181 */
1182 hostna->up.na_lut = na->na_lut;
1183 }
1184
1185 }
1186
1187 /* pass down the pending ring state information */
1188 for_rx_tx(t) {
1189 for (i = 0; i < netmap_all_rings(na, t); i++) {
1190 NMR(hwna, nm_txrx_swap(t))[i]->nr_pending_mode =
1191 NMR(na, t)[i]->nr_pending_mode;
1192 }
1193 }
1194
1195 /* forward the request to the hwna */
1196 error = hwna->nm_register(hwna, onoff);
1197 if (error)
1198 return error;
1199
1200 /* copy up the current ring state information */
1201 for_rx_tx(t) {
1202 for (i = 0; i < netmap_all_rings(na, t); i++) {
1203 struct netmap_kring *kring = NMR(hwna, nm_txrx_swap(t))[i];
1204 NMR(na, t)[i]->nr_mode = kring->nr_mode;
1205 }
1206 }
1207
1208 /* impersonate a netmap_vp_adapter */
1209 netmap_vp_reg(na, onoff);
1210 if (hostna->na_bdg)
1211 netmap_vp_reg(&hostna->up, onoff);
1212
1213 if (onoff) {
1214 u_int i;
1215 /* intercept the hwna nm_nofify callback on the hw rings */
1216 for (i = 0; i < hwna->num_rx_rings; i++) {
1217 hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
1218 hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify;
1219 }
1220 i = hwna->num_rx_rings; /* for safety */
1221 /* save the host ring notify unconditionally */
1222 for (; i < netmap_real_rings(hwna, NR_RX); i++) {
1223 hwna->rx_rings[i]->save_notify =
1224 hwna->rx_rings[i]->nm_notify;
1225 if (hostna->na_bdg) {
1226 /* also intercept the host ring notify */
1227 hwna->rx_rings[i]->nm_notify =
1228 netmap_bwrap_intr_notify;
1229 na->tx_rings[i]->nm_sync = na->nm_txsync;
1230 }
1231 }
1232 if (na->active_fds == 0)
1233 na->na_flags |= NAF_NETMAP_ON;
1234 } else {
1235 u_int i;
1236
1237 if (na->active_fds == 0)
1238 na->na_flags &= ~NAF_NETMAP_ON;
1239
1240 /* reset all notify callbacks (including host ring) */
1241 for (i = 0; i < netmap_all_rings(hwna, NR_RX); i++) {
1242 hwna->rx_rings[i]->nm_notify =
1243 hwna->rx_rings[i]->save_notify;
1244 hwna->rx_rings[i]->save_notify = NULL;
1245 }
1246 hwna->na_lut.lut = NULL;
1247 hwna->na_lut.plut = NULL;
1248 hwna->na_lut.objtotal = 0;
1249 hwna->na_lut.objsize = 0;
1250
1251 /* pass ownership of the netmap rings to the hwna */
1252 for_rx_tx(t) {
1253 for (i = 0; i < netmap_all_rings(na, t); i++) {
1254 NMR(na, t)[i]->ring = NULL;
1255 }
1256 }
1257 /* reset the number of host rings to default */
1258 for_rx_tx(t) {
1259 nma_set_host_nrings(hwna, t, 1);
1260 }
1261
1262 }
1263
1264 return 0;
1265 }
1266
1267 /* nm_config callback for bwrap */
1268 static int
netmap_bwrap_config(struct netmap_adapter * na,struct nm_config_info * info)1269 netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info)
1270 {
1271 struct netmap_bwrap_adapter *bna =
1272 (struct netmap_bwrap_adapter *)na;
1273 struct netmap_adapter *hwna = bna->hwna;
1274 int error;
1275
1276 /* Forward the request to the hwna. It may happen that nobody
1277 * registered hwna yet, so netmap_mem_get_lut() may have not
1278 * been called yet. */
1279 error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut);
1280 if (error)
1281 return error;
1282 netmap_update_config(hwna);
1283 /* swap the results and propagate */
1284 info->num_tx_rings = hwna->num_rx_rings;
1285 info->num_tx_descs = hwna->num_rx_desc;
1286 info->num_rx_rings = hwna->num_tx_rings;
1287 info->num_rx_descs = hwna->num_tx_desc;
1288 info->rx_buf_maxsize = hwna->rx_buf_maxsize;
1289
1290 return 0;
1291 }
1292
1293
1294 /* nm_krings_create callback for bwrap */
1295 int
netmap_bwrap_krings_create_common(struct netmap_adapter * na)1296 netmap_bwrap_krings_create_common(struct netmap_adapter *na)
1297 {
1298 struct netmap_bwrap_adapter *bna =
1299 (struct netmap_bwrap_adapter *)na;
1300 struct netmap_adapter *hwna = bna->hwna;
1301 struct netmap_adapter *hostna = &bna->host.up;
1302 int i, error = 0;
1303 enum txrx t;
1304
1305 /* also create the hwna krings */
1306 error = hwna->nm_krings_create(hwna);
1307 if (error) {
1308 return error;
1309 }
1310
1311 /* increment the usage counter for all the hwna krings */
1312 for_rx_tx(t) {
1313 for (i = 0; i < netmap_all_rings(hwna, t); i++) {
1314 NMR(hwna, t)[i]->users++;
1315 }
1316 }
1317
1318 /* now create the actual rings */
1319 error = netmap_mem_rings_create(hwna);
1320 if (error) {
1321 goto err_dec_users;
1322 }
1323
1324 /* cross-link the netmap rings
1325 * The original number of rings comes from hwna,
1326 * rx rings on one side equals tx rings on the other.
1327 */
1328 for_rx_tx(t) {
1329 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
1330 for (i = 0; i < netmap_all_rings(hwna, r); i++) {
1331 NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots;
1332 NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring;
1333 }
1334 }
1335
1336 if (na->na_flags & NAF_HOST_RINGS) {
1337 /* the hostna rings are the host rings of the bwrap.
1338 * The corresponding krings must point back to the
1339 * hostna
1340 */
1341 hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
1342 hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
1343 for_rx_tx(t) {
1344 for (i = 0; i < nma_get_nrings(hostna, t); i++) {
1345 NMR(hostna, t)[i]->na = hostna;
1346 }
1347 }
1348 }
1349
1350 return 0;
1351
1352 err_dec_users:
1353 for_rx_tx(t) {
1354 for (i = 0; i < netmap_all_rings(hwna, t); i++) {
1355 NMR(hwna, t)[i]->users--;
1356 }
1357 }
1358 hwna->nm_krings_delete(hwna);
1359 return error;
1360 }
1361
1362
1363 void
netmap_bwrap_krings_delete_common(struct netmap_adapter * na)1364 netmap_bwrap_krings_delete_common(struct netmap_adapter *na)
1365 {
1366 struct netmap_bwrap_adapter *bna =
1367 (struct netmap_bwrap_adapter *)na;
1368 struct netmap_adapter *hwna = bna->hwna;
1369 enum txrx t;
1370 int i;
1371
1372 nm_prdis("%s", na->name);
1373
1374 /* decrement the usage counter for all the hwna krings */
1375 for_rx_tx(t) {
1376 for (i = 0; i < netmap_all_rings(hwna, t); i++) {
1377 NMR(hwna, t)[i]->users--;
1378 }
1379 }
1380
1381 /* delete any netmap rings that are no longer needed */
1382 netmap_mem_rings_delete(hwna);
1383 hwna->nm_krings_delete(hwna);
1384 }
1385
1386
1387 /* notify method for the bridge-->hwna direction */
1388 int
netmap_bwrap_notify(struct netmap_kring * kring,int flags)1389 netmap_bwrap_notify(struct netmap_kring *kring, int flags)
1390 {
1391 struct netmap_adapter *na = kring->na;
1392 struct netmap_bwrap_adapter *bna = na->na_private;
1393 struct netmap_adapter *hwna = bna->hwna;
1394 u_int ring_n = kring->ring_id;
1395 u_int lim = kring->nkr_num_slots - 1;
1396 struct netmap_kring *hw_kring;
1397 int error;
1398
1399 nm_prdis("%s: na %s hwna %s",
1400 (kring ? kring->name : "NULL!"),
1401 (na ? na->name : "NULL!"),
1402 (hwna ? hwna->name : "NULL!"));
1403 hw_kring = hwna->tx_rings[ring_n];
1404
1405 if (nm_kr_tryget(hw_kring, 0, NULL)) {
1406 return ENXIO;
1407 }
1408
1409 /* first step: simulate a user wakeup on the rx ring */
1410 netmap_vp_rxsync(kring, flags);
1411 nm_prdis("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
1412 na->name, ring_n,
1413 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
1414 kring->rhead, kring->rcur, kring->rtail,
1415 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
1416 /* second step: the new packets are sent on the tx ring
1417 * (which is actually the same ring)
1418 */
1419 hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
1420 error = hw_kring->nm_sync(hw_kring, flags);
1421 if (error)
1422 goto put_out;
1423
1424 /* third step: now we are back the rx ring */
1425 /* claim ownership on all hw owned bufs */
1426 kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
1427
1428 /* fourth step: the user goes to sleep again, causing another rxsync */
1429 netmap_vp_rxsync(kring, flags);
1430 nm_prdis("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
1431 na->name, ring_n,
1432 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
1433 kring->rhead, kring->rcur, kring->rtail,
1434 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
1435 put_out:
1436 nm_kr_put(hw_kring);
1437
1438 return error ? error : NM_IRQ_COMPLETED;
1439 }
1440
1441
1442 /* nm_bdg_ctl callback for the bwrap.
1443 * Called on bridge-attach and detach, as an effect of valectl -[ahd].
1444 * On attach, it needs to provide a fake netmap_priv_d structure and
1445 * perform a netmap_do_regif() on the bwrap. This will put both the
1446 * bwrap and the hwna in netmap mode, with the netmap rings shared
1447 * and cross linked. Moroever, it will start intercepting interrupts
1448 * directed to hwna.
1449 */
1450 static int
netmap_bwrap_bdg_ctl(struct nmreq_header * hdr,struct netmap_adapter * na)1451 netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
1452 {
1453 struct netmap_priv_d *npriv;
1454 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1455 int error = 0;
1456
1457 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
1458 struct nmreq_vale_attach *req =
1459 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
1460 if (req->reg.nr_ringid != 0 ||
1461 (req->reg.nr_mode != NR_REG_ALL_NIC &&
1462 req->reg.nr_mode != NR_REG_NIC_SW)) {
1463 /* We only support attaching all the NIC rings
1464 * and/or the host stack. */
1465 return EINVAL;
1466 }
1467 if (NETMAP_OWNED_BY_ANY(na)) {
1468 return EBUSY;
1469 }
1470 if (bna->na_kpriv) {
1471 /* nothing to do */
1472 return 0;
1473 }
1474 npriv = netmap_priv_new();
1475 if (npriv == NULL)
1476 return ENOMEM;
1477 npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
1478 error = netmap_do_regif(npriv, na, hdr);
1479 if (error) {
1480 netmap_priv_delete(npriv);
1481 return error;
1482 }
1483 bna->na_kpriv = npriv;
1484 na->na_flags |= NAF_BUSY;
1485 } else {
1486 if (na->active_fds == 0) /* not registered */
1487 return EINVAL;
1488 netmap_priv_delete(bna->na_kpriv);
1489 bna->na_kpriv = NULL;
1490 na->na_flags &= ~NAF_BUSY;
1491 }
1492
1493 return error;
1494 }
1495
1496 /* attach a bridge wrapper to the 'real' device */
1497 int
netmap_bwrap_attach_common(struct netmap_adapter * na,struct netmap_adapter * hwna)1498 netmap_bwrap_attach_common(struct netmap_adapter *na,
1499 struct netmap_adapter *hwna)
1500 {
1501 struct netmap_bwrap_adapter *bna;
1502 struct netmap_adapter *hostna = NULL;
1503 int error = 0;
1504 enum txrx t;
1505
1506 /* make sure the NIC is not already in use */
1507 if (NETMAP_OWNED_BY_ANY(hwna)) {
1508 nm_prerr("NIC %s busy, cannot attach to bridge", hwna->name);
1509 return EBUSY;
1510 }
1511
1512 bna = (struct netmap_bwrap_adapter *)na;
1513 /* make bwrap ifp point to the real ifp */
1514 na->ifp = hwna->ifp;
1515 if_ref(na->ifp);
1516 na->na_private = bna;
1517 /* fill the ring data for the bwrap adapter with rx/tx meanings
1518 * swapped. The real cross-linking will be done during register,
1519 * when all the krings will have been created.
1520 */
1521 for_rx_tx(t) {
1522 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
1523 nma_set_nrings(na, t, nma_get_nrings(hwna, r));
1524 nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
1525 }
1526 na->nm_dtor = netmap_bwrap_dtor;
1527 na->nm_config = netmap_bwrap_config;
1528 na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
1529 na->pdev = hwna->pdev;
1530 na->nm_mem = netmap_mem_get(hwna->nm_mem);
1531 na->virt_hdr_len = hwna->virt_hdr_len;
1532 na->rx_buf_maxsize = hwna->rx_buf_maxsize;
1533
1534 bna->hwna = hwna;
1535 netmap_adapter_get(hwna);
1536 hwna->na_private = bna; /* weak reference */
1537 bna->saved_na_vp = hwna->na_vp;
1538 hwna->na_vp = &bna->up;
1539 bna->up.up.na_vp = &(bna->up);
1540
1541 if (hwna->na_flags & NAF_HOST_RINGS) {
1542 if (hwna->na_flags & NAF_SW_ONLY)
1543 na->na_flags |= NAF_SW_ONLY;
1544 na->na_flags |= NAF_HOST_RINGS;
1545 hostna = &bna->host.up;
1546
1547 /* limit the number of host rings to that of hw */
1548 nm_bound_var(&hostna->num_tx_rings, 1, 1,
1549 nma_get_nrings(hwna, NR_TX), NULL);
1550 nm_bound_var(&hostna->num_rx_rings, 1, 1,
1551 nma_get_nrings(hwna, NR_RX), NULL);
1552
1553 snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name);
1554 hostna->ifp = hwna->ifp;
1555 for_rx_tx(t) {
1556 enum txrx r = nm_txrx_swap(t);
1557 u_int nr = nma_get_nrings(hostna, t);
1558
1559 nma_set_nrings(hostna, t, nr);
1560 nma_set_host_nrings(na, t, nr);
1561 if (nma_get_host_nrings(hwna, t) < nr) {
1562 nma_set_host_nrings(hwna, t, nr);
1563 }
1564 nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
1565 }
1566 // hostna->nm_txsync = netmap_bwrap_host_txsync;
1567 // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
1568 hostna->nm_mem = netmap_mem_get(na->nm_mem);
1569 hostna->na_private = bna;
1570 hostna->na_vp = &bna->up;
1571 na->na_hostvp = hwna->na_hostvp =
1572 hostna->na_hostvp = &bna->host;
1573 hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
1574 hostna->rx_buf_maxsize = hwna->rx_buf_maxsize;
1575 }
1576 if (hwna->na_flags & NAF_MOREFRAG)
1577 na->na_flags |= NAF_MOREFRAG;
1578
1579 nm_prdis("%s<->%s txr %d txd %d rxr %d rxd %d",
1580 na->name, ifp->if_xname,
1581 na->num_tx_rings, na->num_tx_desc,
1582 na->num_rx_rings, na->num_rx_desc);
1583
1584 error = netmap_attach_common(na);
1585 if (error) {
1586 goto err_put;
1587 }
1588 hwna->na_flags |= NAF_BUSY;
1589 return 0;
1590
1591 err_put:
1592 hwna->na_vp = hwna->na_hostvp = NULL;
1593 netmap_adapter_put(hwna);
1594 return error;
1595
1596 }
1597
1598 struct nm_bridge *
netmap_init_bridges2(u_int n)1599 netmap_init_bridges2(u_int n)
1600 {
1601 int i;
1602 struct nm_bridge *b;
1603
1604 b = nm_os_malloc(sizeof(struct nm_bridge) * n);
1605 if (b == NULL)
1606 return NULL;
1607 for (i = 0; i < n; i++)
1608 BDG_RWINIT(&b[i]);
1609 return b;
1610 }
1611
1612 void
netmap_uninit_bridges2(struct nm_bridge * b,u_int n)1613 netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
1614 {
1615 int i;
1616
1617 if (b == NULL)
1618 return;
1619
1620 for (i = 0; i < n; i++)
1621 BDG_RWDESTROY(&b[i]);
1622 nm_os_free(b);
1623 }
1624
1625 int
netmap_init_bridges(void)1626 netmap_init_bridges(void)
1627 {
1628 #ifdef CONFIG_NET_NS
1629 return netmap_bns_register();
1630 #else
1631 nm_bridges = netmap_init_bridges2(vale_max_bridges);
1632 if (nm_bridges == NULL)
1633 return ENOMEM;
1634 return 0;
1635 #endif
1636 }
1637
1638 void
netmap_uninit_bridges(void)1639 netmap_uninit_bridges(void)
1640 {
1641 #ifdef CONFIG_NET_NS
1642 netmap_bns_unregister();
1643 #else
1644 netmap_uninit_bridges2(nm_bridges, vale_max_bridges);
1645 #endif
1646 }
1647