xref: /dragonfly/sys/net/netmap/netmap_vale.c (revision f933b737dabc806a2f1680f0afea2fb42a345b92)
1 /*
2  * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *   1. Redistributions of source code must retain the above copyright
8  *      notice, this list of conditions and the following disclaimer.
9  *   2. Redistributions in binary form must reproduce the above copyright
10  *      notice, this list of conditions and the following disclaimer in the
11  *      documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 
27 /*
28  * This module implements the VALE switch for netmap
29 
30 --- VALE SWITCH ---
31 
32 NMG_LOCK() serializes all modifications to switches and ports.
33 A switch cannot be deleted until all ports are gone.
34 
35 For each switch, an SX lock (RWlock on linux) protects
36 deletion of ports. When configuring or deleting a new port, the
37 lock is acquired in exclusive mode (after holding NMG_LOCK).
38 When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39 The lock is held throughout the entire forwarding cycle,
40 during which the thread may incur in a page fault.
41 Hence it is important that sleepable shared locks are used.
42 
43 On the rx ring, the per-port lock is grabbed initially to reserve
44 a number of slot in the ring, then the lock is released,
45 packets are copied from source to destination, and then
46 the lock is acquired again and the receive ring is updated.
47 (A similar thing is done on the tx ring for NIC and host stack
48 ports attached to the switch)
49 
50  */
51 
52 /*
53  * OS-specific code that is used only within this file.
54  * Other OS-specific code that must be accessed by drivers
55  * is present in netmap_kern.h
56  */
57 
58 #include <sys/cdefs.h> /* prerequisite */
59 __FBSDID("$FreeBSD: head/sys/dev/netmap/netmap.c 257176 2013-10-26 17:58:36Z glebius $");
60 
61 #include <sys/types.h>
62 #include <sys/errno.h>
63 #include <sys/param.h>        /* defines used in kernel.h */
64 #include <sys/kernel.h>       /* types used in module initialization */
65 #include <sys/conf.h>         /* cdevsw struct, UID, GID */
66 #include <sys/sockio.h>
67 #include <sys/socketvar.h>    /* struct socket */
68 #include <sys/malloc.h>
69 #include <sys/poll.h>
70 #include <sys/lock.h>
71 #include <sys/socket.h> /* sockaddrs */
72 #include <sys/sysctl.h>
73 #include <net/if.h>
74 #include <net/if_var.h>
75 #include <net/bpf.h>                    /* BIOCIMMEDIATE */
76 #include <sys/bus.h>          /* bus_dmamap_* */
77 #include <sys/endian.h>
78 #include <sys/refcount.h>
79 
80 
81 #define BDG_RWLOCK_T                    struct lock
82 
83 #define   BDG_RWINIT(b)                 \
84           lockinit(&(b)->bdg_lock, "bdg lock", 0, LK_CANRECURSE)
85 #define BDG_WLOCK(b)                    lockmgr(&(b)->bdg_lock, LK_EXCLUSIVE)
86 #define BDG_WUNLOCK(b)                  lockmgr(&(b)->bdg_lock, LK_RELEASE)
87 #define BDG_RLOCK(b)                    lockmgr(&(b)->bdg_lock, LK_SHARED)
88 #define BDG_RTRYLOCK(b)                 lockmgr(&(b)->bdg_lock, LK_SHARED|LK_NOWAIT)
89 #define BDG_RUNLOCK(b)                  lockmgr(&(b)->bdg_lock, LK_RELEASE)
90 #define BDG_RWDESTROY(b)      lockuninit(&(b)->bdg_lock)
91 
92 /*
93  * common headers
94  */
95 
96 #include <net/netmap/netmap.h>
97 #include <net/netmap/netmap_kern.h>
98 #include <net/netmap/netmap_mem2.h>
99 
100 #ifdef WITH_VALE
101 
102 /*
103  * system parameters (most of them in netmap_kern.h)
104  * NM_NAME          prefix for switch port names, default "vale"
105  * NM_BDG_MAXPORTS  number of ports
106  * NM_BRIDGES       max number of switches in the system.
107  *        XXX should become a sysctl or tunable
108  *
109  * Switch ports are named valeX:Y where X is the switch name and Y
110  * is the port. If Y matches a physical interface name, the port is
111  * connected to a physical device.
112  *
113  * Unlike physical interfaces, switch ports use their own memory region
114  * for rings and buffers.
115  * The virtual interfaces use per-queue lock instead of core lock.
116  * In the tx loop, we aggregate traffic in batches to make all operations
117  * faster. The batch size is bridge_batch.
118  */
119 #define NM_BDG_MAXRINGS                 16        /* XXX unclear how many. */
120 #define NM_BDG_MAXSLOTS                 4096      /* XXX same as above */
121 #define NM_BRIDGE_RINGSIZE    1024      /* in the device */
122 #define NM_BDG_HASH           1024      /* forwarding table entries */
123 #define NM_BDG_BATCH                    1024      /* entries in the forwarding buffer */
124 #define NM_MULTISEG           64        /* max size of a chain of bufs */
125 /* actual size of the tables */
126 #define NM_BDG_BATCH_MAX      (NM_BDG_BATCH + NM_MULTISEG)
127 /* NM_FT_NULL terminates a list of slots in the ft */
128 #define NM_FT_NULL            NM_BDG_BATCH_MAX
129 #define   NM_BRIDGES                    8         /* number of bridges */
130 
131 
132 /*
133  * bridge_batch is set via sysctl to the max batch size to be
134  * used in the bridge. The actual value may be larger as the
135  * last packet in the block may overflow the size.
136  */
137 int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
138 SYSCTL_DECL(_dev_netmap);
139 SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
140 
141 
142 static int bdg_netmap_attach(struct netmap_adapter *);
143 static int bdg_netmap_reg(struct netmap_adapter *na, int onoff);
144 static int netmap_bwrap_attach(struct ifnet *, struct ifnet *);
145 static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
146 int kern_netmap_regif(struct nmreq *nmr);
147 
148 /*
149  * Each transmit queue accumulates a batch of packets into
150  * a structure before forwarding. Packets to the same
151  * destination are put in a list using ft_next as a link field.
152  * ft_frags and ft_next are valid only on the first fragment.
153  */
154 struct nm_bdg_fwd { /* forwarding entry for a bridge */
155           void *ft_buf;                 /* netmap or indirect buffer */
156           uint8_t ft_frags;   /* how many fragments (only on 1st frag) */
157           uint8_t _ft_port;   /* dst port (unused) */
158           uint16_t ft_flags;  /* flags, e.g. indirect */
159           uint16_t ft_len;    /* src fragment len */
160           uint16_t ft_next;   /* next packet to same destination */
161 };
162 
163 /*
164  * For each output interface, nm_bdg_q is used to construct a list.
165  * bq_len is the number of output buffers (we can have coalescing
166  * during the copy).
167  */
168 struct nm_bdg_q {
169           uint16_t bq_head;
170           uint16_t bq_tail;
171           uint32_t bq_len;    /* number of buffers */
172 };
173 
174 /* XXX revise this */
175 struct nm_hash_ent {
176           uint64_t  mac;      /* the top 2 bytes are the epoch */
177           uint64_t  ports;
178 };
179 
180 /*
181  * nm_bridge is a descriptor for a VALE switch.
182  * Interfaces for a bridge are all in bdg_ports[].
183  * The array has fixed size, an empty entry does not terminate
184  * the search, but lookups only occur on attach/detach so we
185  * don't mind if they are slow.
186  *
187  * The bridge is non blocking on the transmit ports: excess
188  * packets are dropped if there is no room on the output port.
189  *
190  * bdg_lock protects accesses to the bdg_ports array.
191  * This is a rw lock (or equivalent).
192  */
193 struct nm_bridge {
194           /* XXX what is the proper alignment/layout ? */
195           BDG_RWLOCK_T        bdg_lock; /* protects bdg_ports */
196           int                 bdg_namelen;
197           uint32_t  bdg_active_ports; /* 0 means free */
198           char                bdg_basename[IFNAMSIZ];
199 
200           /* Indexes of active ports (up to active_ports)
201            * and all other remaining ports.
202            */
203           uint8_t             bdg_port_index[NM_BDG_MAXPORTS];
204 
205           struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
206 
207 
208           /*
209            * The function to decide the destination port.
210            * It returns either of an index of the destination port,
211            * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
212            * forward this packet.  ring_nr is the source ring index, and the
213            * function may overwrite this value to forward this packet to a
214            * different ring index.
215            * This function must be set by netmap_bdgctl().
216            */
217           bdg_lookup_fn_t nm_bdg_lookup;
218 
219           /* the forwarding table, MAC+ports.
220            * XXX should be changed to an argument to be passed to
221            * the lookup function, and allocated on attach
222            */
223           struct nm_hash_ent ht[NM_BDG_HASH];
224 };
225 
226 
227 /*
228  * XXX in principle nm_bridges could be created dynamically
229  * Right now we have a static array and deletions are protected
230  * by an exclusive lock.
231  */
232 struct nm_bridge nm_bridges[NM_BRIDGES];
233 
234 
235 /*
236  * A few function to tell which kind of port are we using.
237  * XXX should we hold a lock ?
238  *
239  * nma_is_vp()                virtual port
240  * nma_is_host()    port connected to the host stack
241  * nma_is_hw()                port connected to a NIC
242  * nma_is_generic() generic netmap adapter XXX stop this madness
243  */
244 static __inline int
nma_is_vp(struct netmap_adapter * na)245 nma_is_vp(struct netmap_adapter *na)
246 {
247           return na->nm_register == bdg_netmap_reg;
248 }
249 
250 
251 static __inline int
nma_is_host(struct netmap_adapter * na)252 nma_is_host(struct netmap_adapter *na)
253 {
254           return na->nm_register == NULL;
255 }
256 
257 
258 static __inline int
nma_is_hw(struct netmap_adapter * na)259 nma_is_hw(struct netmap_adapter *na)
260 {
261           /* In case of sw adapter, nm_register is NULL */
262           return !nma_is_vp(na) && !nma_is_host(na) && !nma_is_generic(na);
263 }
264 
265 static __inline int
nma_is_bwrap(struct netmap_adapter * na)266 nma_is_bwrap(struct netmap_adapter *na)
267 {
268           return na->nm_register == netmap_bwrap_register;
269 }
270 
271 
272 
273 /*
274  * this is a slightly optimized copy routine which rounds
275  * to multiple of 64 bytes and is often faster than dealing
276  * with other odd sizes. We assume there is enough room
277  * in the source and destination buffers.
278  *
279  * XXX only for multiples of 64 bytes, non overlapped.
280  */
281 static inline void
pkt_copy(void * _src,void * _dst,int l)282 pkt_copy(void *_src, void *_dst, int l)
283 {
284         uint64_t *src = _src;
285         uint64_t *dst = _dst;
286         if (unlikely(l >= 1024)) {
287                 memcpy(dst, src, l);
288                 return;
289         }
290         for (; likely(l > 0); l-=64) {
291                 *dst++ = *src++;
292                 *dst++ = *src++;
293                 *dst++ = *src++;
294                 *dst++ = *src++;
295                 *dst++ = *src++;
296                 *dst++ = *src++;
297                 *dst++ = *src++;
298                 *dst++ = *src++;
299         }
300 }
301 
302 
303 
304 /*
305  * locate a bridge among the existing ones.
306  * MUST BE CALLED WITH NMG_LOCK()
307  *
308  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
309  * We assume that this is called with a name of at least NM_NAME chars.
310  */
311 static struct nm_bridge *
nm_find_bridge(const char * name,int create)312 nm_find_bridge(const char *name, int create)
313 {
314           int i, l, namelen;
315           struct nm_bridge *b = NULL;
316 
317           NMG_LOCK_ASSERT();
318 
319           namelen = strlen(NM_NAME);    /* base length */
320           l = name ? strlen(name) : 0;            /* actual length */
321           if (l < namelen) {
322                     D("invalid bridge name %s", name ? name : NULL);
323                     return NULL;
324           }
325           for (i = namelen + 1; i < l; i++) {
326                     if (name[i] == ':') {
327                               namelen = i;
328                               break;
329                     }
330           }
331           if (namelen >= IFNAMSIZ)
332                     namelen = IFNAMSIZ;
333           ND("--- prefix is '%.*s' ---", namelen, name);
334 
335           /* lookup the name, remember empty slot if there is one */
336           for (i = 0; i < NM_BRIDGES; i++) {
337                     struct nm_bridge *x = nm_bridges + i;
338 
339                     if (x->bdg_active_ports == 0) {
340                               if (create && b == NULL)
341                                         b = x;    /* record empty slot */
342                     } else if (x->bdg_namelen != namelen) {
343                               continue;
344                     } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
345                               ND("found '%.*s' at %d", namelen, name, i);
346                               b = x;
347                               break;
348                     }
349           }
350           if (i == NM_BRIDGES && b) { /* name not found, can create entry */
351                     /* initialize the bridge */
352                     strncpy(b->bdg_basename, name, namelen);
353                     ND("create new bridge %s with ports %d", b->bdg_basename,
354                               b->bdg_active_ports);
355                     b->bdg_namelen = namelen;
356                     b->bdg_active_ports = 0;
357                     for (i = 0; i < NM_BDG_MAXPORTS; i++)
358                               b->bdg_port_index[i] = i;
359                     /* set the default function */
360                     b->nm_bdg_lookup = netmap_bdg_learning;
361                     /* reset the MAC address table */
362                     bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
363           }
364           return b;
365 }
366 
367 
368 /*
369  * Free the forwarding tables for rings attached to switch ports.
370  */
371 static void
nm_free_bdgfwd(struct netmap_adapter * na)372 nm_free_bdgfwd(struct netmap_adapter *na)
373 {
374           int nrings, i;
375           struct netmap_kring *kring;
376 
377           NMG_LOCK_ASSERT();
378           nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
379           kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
380           for (i = 0; i < nrings; i++) {
381                     if (kring[i].nkr_ft) {
382                               kfree(kring[i].nkr_ft, M_DEVBUF);
383                               kring[i].nkr_ft = NULL; /* protect from freeing twice */
384                     }
385           }
386 }
387 
388 
389 /*
390  * Allocate the forwarding tables for the rings attached to the bridge ports.
391  */
392 static int
nm_alloc_bdgfwd(struct netmap_adapter * na)393 nm_alloc_bdgfwd(struct netmap_adapter *na)
394 {
395           int nrings, l, i, num_dstq;
396           struct netmap_kring *kring;
397 
398           NMG_LOCK_ASSERT();
399           /* all port:rings + broadcast */
400           num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
401           l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
402           l += sizeof(struct nm_bdg_q) * num_dstq;
403           l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
404 
405           nrings = na->num_tx_rings + 1;
406           kring = na->tx_rings;
407           for (i = 0; i < nrings; i++) {
408                     struct nm_bdg_fwd *ft;
409                     struct nm_bdg_q *dstq;
410                     int j;
411 
412                     ft = kmalloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
413                     if (!ft) {
414                               nm_free_bdgfwd(na);
415                               return ENOMEM;
416                     }
417                     dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
418                     for (j = 0; j < num_dstq; j++) {
419                               dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
420                               dstq[j].bq_len = 0;
421                     }
422                     kring[i].nkr_ft = ft;
423           }
424           return 0;
425 }
426 
427 
428 static void
netmap_bdg_detach_common(struct nm_bridge * b,int hw,int sw)429 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
430 {
431           int s_hw = hw, s_sw = sw;
432           int i, lim =b->bdg_active_ports;
433           uint8_t tmp[NM_BDG_MAXPORTS];
434 
435           /*
436           New algorithm:
437           make a copy of bdg_port_index;
438           lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
439           in the array of bdg_port_index, replacing them with
440           entries from the bottom of the array;
441           decrement bdg_active_ports;
442           acquire BDG_WLOCK() and copy back the array.
443            */
444 
445           D("detach %d and %d (lim %d)", hw, sw, lim);
446           /* make a copy of the list of active ports, update it,
447            * and then copy back within BDG_WLOCK().
448            */
449           memcpy(tmp, b->bdg_port_index, sizeof(tmp));
450           for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
451                     if (hw >= 0 && tmp[i] == hw) {
452                               ND("detach hw %d at %d", hw, i);
453                               lim--; /* point to last active port */
454                               tmp[i] = tmp[lim]; /* swap with i */
455                               tmp[lim] = hw;      /* now this is inactive */
456                               hw = -1;
457                     } else if (sw >= 0 && tmp[i] == sw) {
458                               ND("detach sw %d at %d", sw, i);
459                               lim--;
460                               tmp[i] = tmp[lim];
461                               tmp[lim] = sw;
462                               sw = -1;
463                     } else {
464                               i++;
465                     }
466           }
467           if (hw >= 0 || sw >= 0) {
468                     D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
469           }
470 
471           BDG_WLOCK(b);
472           b->bdg_ports[s_hw] = NULL;
473           if (s_sw >= 0) {
474                     b->bdg_ports[s_sw] = NULL;
475           }
476           memcpy(b->bdg_port_index, tmp, sizeof(tmp));
477           b->bdg_active_ports = lim;
478           BDG_WUNLOCK(b);
479 
480           ND("now %d active ports", lim);
481           if (lim == 0) {
482                     ND("marking bridge %s as free", b->bdg_basename);
483                     b->nm_bdg_lookup = NULL;
484           }
485 }
486 
487 static void
netmap_adapter_vp_dtor(struct netmap_adapter * na)488 netmap_adapter_vp_dtor(struct netmap_adapter *na)
489 {
490           struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
491           struct nm_bridge *b = vpna->na_bdg;
492           struct ifnet *ifp = na->ifp;
493 
494           ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount);
495 
496           if (b) {
497                     netmap_bdg_detach_common(b, vpna->bdg_port, -1);
498           }
499 
500           bzero(ifp, sizeof(*ifp));
501           kfree(ifp, M_DEVBUF);
502           na->ifp = NULL;
503 }
504 
505 int
netmap_get_bdg_na(struct nmreq * nmr,struct netmap_adapter ** na,int create)506 netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
507 {
508           const char *name = nmr->nr_name;
509           struct ifnet *ifp;
510           int error = 0;
511           struct netmap_adapter *ret;
512           struct netmap_vp_adapter *vpna;
513           struct nm_bridge *b;
514           int i, j, cand = -1, cand2 = -1;
515           int needed;
516 
517           *na = NULL;     /* default return value */
518 
519           /* first try to see if this is a bridge port. */
520           NMG_LOCK_ASSERT();
521           if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
522                     return 0;  /* no error, but no VALE prefix */
523           }
524 
525           b = nm_find_bridge(name, create);
526           if (b == NULL) {
527                     D("no bridges available for '%s'", name);
528                     return (ENXIO);
529           }
530 
531           /* Now we are sure that name starts with the bridge's name,
532            * lookup the port in the bridge. We need to scan the entire
533            * list. It is not important to hold a WLOCK on the bridge
534            * during the search because NMG_LOCK already guarantees
535            * that there are no other possible writers.
536            */
537 
538           /* lookup in the local list of ports */
539           for (j = 0; j < b->bdg_active_ports; j++) {
540                     i = b->bdg_port_index[j];
541                     vpna = b->bdg_ports[i];
542                     // KASSERT(na != NULL);
543                     ifp = vpna->up.ifp;
544                     /* XXX make sure the name only contains one : */
545                     if (!strcmp(NM_IFPNAME(ifp), name)) {
546                               netmap_adapter_get(&vpna->up);
547                               ND("found existing if %s refs %d", name,
548                                         vpna->na_bdg_refcount);
549                               *na = (struct netmap_adapter *)vpna;
550                               return 0;
551                     }
552           }
553           /* not found, should we create it? */
554           if (!create)
555                     return ENXIO;
556           /* yes we should, see if we have space to attach entries */
557           needed = 2; /* in some cases we only need 1 */
558           if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
559                     D("bridge full %d, cannot create new port", b->bdg_active_ports);
560                     return EINVAL;
561           }
562           /* record the next two ports available, but do not allocate yet */
563           cand = b->bdg_port_index[b->bdg_active_ports];
564           cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
565           ND("+++ bridge %s port %s used %d avail %d %d",
566                     b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
567 
568           /*
569            * try see if there is a matching NIC with this name
570            * (after the bridge's name)
571            */
572           ifnet_lock();
573           ifp = ifunit(name + b->bdg_namelen + 1);
574           if (!ifp) { /* this is a virtual port */
575                     /* Create a temporary NA with arguments, then
576                      * bdg_netmap_attach() will allocate the real one
577                      * and attach it to the ifp
578                      */
579                     struct netmap_adapter tmp_na;
580 
581                     ifnet_unlock();
582 
583                     if (nmr->nr_cmd) {
584                               /* nr_cmd must be 0 for a virtual port */
585                               return EINVAL;
586                     }
587                     bzero(&tmp_na, sizeof(tmp_na));
588                     /* bound checking */
589                     tmp_na.num_tx_rings = nmr->nr_tx_rings;
590                     nm_bound_var(&tmp_na.num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
591                     nmr->nr_tx_rings = tmp_na.num_tx_rings; // write back
592                     tmp_na.num_rx_rings = nmr->nr_rx_rings;
593                     nm_bound_var(&tmp_na.num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
594                     nmr->nr_rx_rings = tmp_na.num_rx_rings; // write back
595                     nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
596                                         1, NM_BDG_MAXSLOTS, NULL);
597                     tmp_na.num_tx_desc = nmr->nr_tx_slots;
598                     nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
599                                         1, NM_BDG_MAXSLOTS, NULL);
600                     tmp_na.num_rx_desc = nmr->nr_rx_slots;
601 
602                     /* create a struct ifnet for the new port.
603                      * need M_NOWAIT as we are under nma_lock
604                      */
605                     ifp = kmalloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
606                     if (!ifp)
607                               return ENOMEM;
608 
609                     strcpy(ifp->if_xname, name);
610                     tmp_na.ifp = ifp;
611                     /* bdg_netmap_attach creates a struct netmap_adapter */
612                     error = bdg_netmap_attach(&tmp_na);
613                     if (error) {
614                               D("error %d", error);
615                               kfree(ifp, M_DEVBUF);
616                               return error;
617                     }
618                     ret = NA(ifp);
619                     cand2 = -1;         /* only need one port */
620           } else {  /* this is a NIC */
621                     struct ifnet *fake_ifp;
622 
623                     error = netmap_get_hw_na(ifp, &ret);
624                     if (error || ret == NULL)
625                               goto out;
626 
627                     /* make sure the NIC is not already in use */
628                     if (NETMAP_OWNED_BY_ANY(ret)) {
629                               D("NIC %s busy, cannot attach to bridge",
630                                         NM_IFPNAME(ifp));
631                               error = EINVAL;
632                               goto out;
633                     }
634                     /* create a fake interface */
635                     fake_ifp = kmalloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
636                     if (!fake_ifp) {
637                               error = ENOMEM;
638                               goto out;
639                     }
640                     strcpy(fake_ifp->if_xname, name);
641                     error = netmap_bwrap_attach(fake_ifp, ifp);
642                     if (error) {
643                               kfree(fake_ifp, M_DEVBUF);
644                               goto out;
645                     }
646                     ret = NA(fake_ifp);
647                     if (nmr->nr_arg1 != NETMAP_BDG_HOST)
648                               cand2 = -1; /* only need one port */
649 
650                     ifnet_unlock();
651           }
652           vpna = (struct netmap_vp_adapter *)ret;
653 
654           BDG_WLOCK(b);
655           vpna->bdg_port = cand;
656           ND("NIC  %p to bridge port %d", vpna, cand);
657           /* bind the port to the bridge (virtual ports are not active) */
658           b->bdg_ports[cand] = vpna;
659           vpna->na_bdg = b;
660           b->bdg_active_ports++;
661           if (cand2 >= 0) {
662                     struct netmap_vp_adapter *hostna = vpna + 1;
663                     /* also bind the host stack to the bridge */
664                     b->bdg_ports[cand2] = hostna;
665                     hostna->bdg_port = cand2;
666                     hostna->na_bdg = b;
667                     b->bdg_active_ports++;
668                     ND("host %p to bridge port %d", hostna, cand2);
669           }
670           ND("if %s refs %d", name, vpna->up.na_refcount);
671           BDG_WUNLOCK(b);
672           *na = ret;
673           netmap_adapter_get(ret);
674           return 0;
675 
676 out:
677           ifnet_unlock();
678           return error;
679 }
680 
681 
682 /* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
683 static int
nm_bdg_attach(struct nmreq * nmr)684 nm_bdg_attach(struct nmreq *nmr)
685 {
686           struct netmap_adapter *na;
687           struct netmap_if *nifp;
688           struct netmap_priv_d *npriv;
689           struct netmap_bwrap_adapter *bna;
690           int error;
691 
692           npriv = kmalloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
693           if (npriv == NULL)
694                     return ENOMEM;
695           NMG_LOCK();
696           /* XXX probably netmap_get_bdg_na() */
697           error = netmap_get_na(nmr, &na, 1 /* create if not exists */);
698           if (error) /* no device, or another bridge or user owns the device */
699                     goto unlock_exit;
700           /* netmap_get_na() sets na_bdg if this is a physical interface
701            * that we can attach to a switch.
702            */
703           if (!nma_is_bwrap(na)) {
704                     /* got reference to a virtual port or direct access to a NIC.
705                      * perhaps specified no bridge prefix or wrong NIC name
706                      */
707                     error = EINVAL;
708                     goto unref_exit;
709           }
710 
711           if (na->active_fds > 0) { /* already registered */
712                     error = EBUSY;
713                     goto unref_exit;
714           }
715 
716           nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, &error);
717           if (!nifp) {
718                     goto unref_exit;
719           }
720 
721           bna = (struct netmap_bwrap_adapter*)na;
722           bna->na_kpriv = npriv;
723           NMG_UNLOCK();
724           ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp));
725           return 0;
726 
727 unref_exit:
728           netmap_adapter_put(na);
729 unlock_exit:
730           NMG_UNLOCK();
731           bzero(npriv, sizeof(*npriv));
732           kfree(npriv, M_DEVBUF);
733           return error;
734 }
735 
736 static int
nm_bdg_detach(struct nmreq * nmr)737 nm_bdg_detach(struct nmreq *nmr)
738 {
739           struct netmap_adapter *na;
740           int error;
741           struct netmap_bwrap_adapter *bna;
742           int last_instance;
743 
744           NMG_LOCK();
745           error = netmap_get_na(nmr, &na, 0 /* don't create */);
746           if (error) { /* no device, or another bridge or user owns the device */
747                     goto unlock_exit;
748           }
749           if (!nma_is_bwrap(na)) {
750                     /* got reference to a virtual port or direct access to a NIC.
751                      * perhaps specified no bridge's prefix or wrong NIC's name
752                      */
753                     error = EINVAL;
754                     goto unref_exit;
755           }
756           bna = (struct netmap_bwrap_adapter *)na;
757 
758           if (na->active_fds == 0) { /* not registered */
759                     error = EINVAL;
760                     goto unref_exit;
761           }
762 
763           last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */
764           if (!last_instance) {
765                     D("--- error, trying to detach an entry with active mmaps");
766                     error = EINVAL;
767           } else {
768                     struct netmap_priv_d *npriv = bna->na_kpriv;
769 
770                     bna->na_kpriv = NULL;
771                     D("deleting priv");
772 
773                     bzero(npriv, sizeof(*npriv));
774                     kfree(npriv, M_DEVBUF);
775           }
776 
777 unref_exit:
778           netmap_adapter_put(na);
779 unlock_exit:
780           NMG_UNLOCK();
781           return error;
782 
783 }
784 
785 
786 /* exported to kernel callers, e.g. OVS ?
787  * Entry point.
788  * Called without NMG_LOCK.
789  */
790 int
netmap_bdg_ctl(struct nmreq * nmr,bdg_lookup_fn_t func)791 netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
792 {
793           struct nm_bridge *b;
794           struct netmap_vp_adapter *na;
795           struct ifnet *iter;
796           char *name = nmr->nr_name;
797           int cmd = nmr->nr_cmd, namelen = strlen(name);
798           int error = 0, i, j;
799 
800           switch (cmd) {
801           case NETMAP_BDG_ATTACH:
802                     error = nm_bdg_attach(nmr);
803                     break;
804 
805           case NETMAP_BDG_DETACH:
806                     error = nm_bdg_detach(nmr);
807                     break;
808 
809           case NETMAP_BDG_LIST:
810                     /* this is used to enumerate bridges and ports */
811                     if (namelen) { /* look up indexes of bridge and port */
812                               if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
813                                         error = EINVAL;
814                                         break;
815                               }
816                               NMG_LOCK();
817                               b = nm_find_bridge(name, 0 /* don't create */);
818                               if (!b) {
819                                         error = ENOENT;
820                                         NMG_UNLOCK();
821                                         break;
822                               }
823 
824                               error = ENOENT;
825                               for (j = 0; j < b->bdg_active_ports; j++) {
826                                         i = b->bdg_port_index[j];
827                                         na = b->bdg_ports[i];
828                                         if (na == NULL) {
829                                                   D("---AAAAAAAAARGH-------");
830                                                   continue;
831                                         }
832                                         iter = na->up.ifp;
833                                         /* the former and the latter identify a
834                                          * virtual port and a NIC, respectively
835                                          */
836                                         if (!strcmp(iter->if_xname, name)) {
837                                                   /* bridge index */
838                                                   nmr->nr_arg1 = b - nm_bridges;
839                                                   nmr->nr_arg2 = i; /* port index */
840                                                   error = 0;
841                                                   break;
842                                         }
843                               }
844                               NMG_UNLOCK();
845                     } else {
846                               /* return the first non-empty entry starting from
847                                * bridge nr_arg1 and port nr_arg2.
848                                *
849                                * Users can detect the end of the same bridge by
850                                * seeing the new and old value of nr_arg1, and can
851                                * detect the end of all the bridge by error != 0
852                                */
853                               i = nmr->nr_arg1;
854                               j = nmr->nr_arg2;
855 
856                               NMG_LOCK();
857                               for (error = ENOENT; i < NM_BRIDGES; i++) {
858                                         b = nm_bridges + i;
859                                         if (j >= b->bdg_active_ports) {
860                                                   j = 0; /* following bridges scan from 0 */
861                                                   continue;
862                                         }
863                                         nmr->nr_arg1 = i;
864                                         nmr->nr_arg2 = j;
865                                         j = b->bdg_port_index[j];
866                                         na = b->bdg_ports[j];
867                                         iter = na->up.ifp;
868                                         strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
869                                         error = 0;
870                                         break;
871                               }
872                               NMG_UNLOCK();
873                     }
874                     break;
875 
876           case NETMAP_BDG_LOOKUP_REG:
877                     /* register a lookup function to the given bridge.
878                      * nmr->nr_name may be just bridge's name (including ':'
879                      * if it is not just NM_NAME).
880                      */
881                     if (!func) {
882                               error = EINVAL;
883                               break;
884                     }
885                     NMG_LOCK();
886                     b = nm_find_bridge(name, 0 /* don't create */);
887                     if (!b) {
888                               error = EINVAL;
889                     } else {
890                               b->nm_bdg_lookup = func;
891                     }
892                     NMG_UNLOCK();
893                     break;
894 
895           default:
896                     D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
897                     error = EINVAL;
898                     break;
899           }
900           return error;
901 }
902 
903 
904 static int
netmap_vp_krings_create(struct netmap_adapter * na)905 netmap_vp_krings_create(struct netmap_adapter *na)
906 {
907           u_int ntx, nrx, tailroom;
908           int error, i;
909           uint32_t *leases;
910 
911           /* XXX vps do not need host rings,
912            * but we crash if we don't have one
913            */
914           ntx = na->num_tx_rings + 1;
915           nrx = na->num_rx_rings + 1;
916 
917           /*
918            * Leases are attached to RX rings on vale ports
919            */
920           tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
921 
922           error = netmap_krings_create(na, ntx, nrx, tailroom);
923           if (error)
924                     return error;
925 
926           leases = na->tailroom;
927 
928           for (i = 0; i < nrx; i++) { /* Receive rings */
929                     na->rx_rings[i].nkr_leases = leases;
930                     leases += na->num_rx_desc;
931           }
932 
933           error = nm_alloc_bdgfwd(na);
934           if (error) {
935                     netmap_krings_delete(na);
936                     return error;
937           }
938 
939           return 0;
940 }
941 
942 static void
netmap_vp_krings_delete(struct netmap_adapter * na)943 netmap_vp_krings_delete(struct netmap_adapter *na)
944 {
945           nm_free_bdgfwd(na);
946           netmap_krings_delete(na);
947 }
948 
949 
950 static int
951 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
952           struct netmap_vp_adapter *na, u_int ring_nr);
953 
954 
955 /*
956  * Grab packets from a kring, move them into the ft structure
957  * associated to the tx (input) port. Max one instance per port,
958  * filtered on input (ioctl, poll or XXX).
959  * Returns the next position in the ring.
960  */
961 static int
nm_bdg_preflush(struct netmap_vp_adapter * na,u_int ring_nr,struct netmap_kring * kring,u_int end)962 nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr,
963           struct netmap_kring *kring, u_int end)
964 {
965           struct netmap_ring *ring = kring->ring;
966           struct nm_bdg_fwd *ft;
967           u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
968           u_int ft_i = 0;     /* start from 0 */
969           u_int frags = 1; /* how many frags ? */
970           struct nm_bridge *b = na->na_bdg;
971 
972           /* To protect against modifications to the bridge we acquire a
973            * shared lock, waiting if we can sleep (if the source port is
974            * attached to a user process) or with a trylock otherwise (NICs).
975            */
976           ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
977           if (na->up.na_flags & NAF_BDG_MAYSLEEP)
978                     BDG_RLOCK(b);
979           else if (!BDG_RTRYLOCK(b))
980                     return 0;
981           ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
982           ft = kring->nkr_ft;
983 
984           for (; likely(j != end); j = nm_next(j, lim)) {
985                     struct netmap_slot *slot = &ring->slot[j];
986                     char *buf;
987 
988                     ft[ft_i].ft_len = slot->len;
989                     ft[ft_i].ft_flags = slot->flags;
990 
991                     ND("flags is 0x%x", slot->flags);
992                     /* this slot goes into a list so initialize the link field */
993                     ft[ft_i].ft_next = NM_FT_NULL;
994                     buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
995                               (void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot);
996                     prefetch(buf);
997                     ++ft_i;
998                     if (slot->flags & NS_MOREFRAG) {
999                               frags++;
1000                               continue;
1001                     }
1002                     if (unlikely(netmap_verbose && frags > 1))
1003                               RD(5, "%d frags at %d", frags, ft_i - frags);
1004                     ft[ft_i - frags].ft_frags = frags;
1005                     frags = 1;
1006                     if (unlikely((int)ft_i >= bridge_batch))
1007                               ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1008           }
1009           if (frags > 1) {
1010                     D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1011                     // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1012                     ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1013                     ft[ft_i - frags].ft_frags = frags - 1;
1014           }
1015           if (ft_i)
1016                     ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1017           BDG_RUNLOCK(b);
1018           return j;
1019 }
1020 
1021 
1022 /*
1023  *---- support for virtual bridge -----
1024  */
1025 
1026 /* ----- FreeBSD if_bridge hash function ------- */
1027 
1028 /*
1029  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1030  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1031  *
1032  * http://www.burtleburtle.net/bob/hash/spooky.html
1033  */
1034 #define mix(a, b, c)                                                    \
1035 do {                                                                    \
1036         a -= b; a -= c; a ^= (c >> 13);                                 \
1037         b -= c; b -= a; b ^= (a << 8);                                  \
1038         c -= a; c -= b; c ^= (b >> 13);                                 \
1039         a -= b; a -= c; a ^= (c >> 12);                                 \
1040         b -= c; b -= a; b ^= (a << 16);                                 \
1041         c -= a; c -= b; c ^= (b >> 5);                                  \
1042         a -= b; a -= c; a ^= (c >> 3);                                  \
1043         b -= c; b -= a; b ^= (a << 10);                                 \
1044         c -= a; c -= b; c ^= (b >> 15);                                 \
1045 } while (/*CONSTCOND*/0)
1046 
1047 static __inline uint32_t
nm_bridge_rthash(const uint8_t * addr)1048 nm_bridge_rthash(const uint8_t *addr)
1049 {
1050         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1051 
1052         b += addr[5] << 8;
1053         b += addr[4];
1054         a += addr[3] << 24;
1055         a += addr[2] << 16;
1056         a += addr[1] << 8;
1057         a += addr[0];
1058 
1059         mix(a, b, c);
1060 #define BRIDGE_RTHASH_MASK    (NM_BDG_HASH-1)
1061         return (c & BRIDGE_RTHASH_MASK);
1062 }
1063 
1064 #undef mix
1065 
1066 
1067 static int
bdg_netmap_reg(struct netmap_adapter * na,int onoff)1068 bdg_netmap_reg(struct netmap_adapter *na, int onoff)
1069 {
1070           struct netmap_vp_adapter *vpna =
1071                     (struct netmap_vp_adapter*)na;
1072           struct ifnet *ifp = na->ifp;
1073 
1074           /* the interface is already attached to the bridge,
1075            * so we only need to toggle IFCAP_NETMAP.
1076            */
1077           BDG_WLOCK(vpna->na_bdg);
1078           if (onoff) {
1079                     ifp->if_capenable |= IFCAP_NETMAP;
1080           } else {
1081                     ifp->if_capenable &= ~IFCAP_NETMAP;
1082           }
1083           BDG_WUNLOCK(vpna->na_bdg);
1084           return 0;
1085 }
1086 
1087 
1088 /*
1089  * Lookup function for a learning bridge.
1090  * Update the hash table with the source address,
1091  * and then returns the destination port index, and the
1092  * ring in *dst_ring (at the moment, always use ring 0)
1093  */
1094 u_int
netmap_bdg_learning(char * buf,u_int buf_len,uint8_t * dst_ring,struct netmap_vp_adapter * na)1095 netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
1096                     struct netmap_vp_adapter *na)
1097 {
1098           struct nm_hash_ent *ht = na->na_bdg->ht;
1099           uint32_t sh, dh;
1100           u_int dst, mysrc = na->bdg_port;
1101           uint64_t smac, dmac;
1102 
1103           if (buf_len < 14) {
1104                     D("invalid buf length %d", buf_len);
1105                     return NM_BDG_NOPORT;
1106           }
1107           dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1108           smac = le64toh(*(uint64_t *)(buf + 4));
1109           smac >>= 16;
1110 
1111           /*
1112            * The hash is somewhat expensive, there might be some
1113            * worthwhile optimizations here.
1114            */
1115           if ((buf[6] & 1) == 0) { /* valid src */
1116                     uint8_t *s = buf+6;
1117                     sh = nm_bridge_rthash(s); // XXX hash of source
1118                     /* update source port forwarding entry */
1119                     ht[sh].mac = smac;  /* XXX expire ? */
1120                     ht[sh].ports = mysrc;
1121                     if (netmap_verbose)
1122                         D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1123                               s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1124           }
1125           dst = NM_BDG_BROADCAST;
1126           if ((buf[0] & 1) == 0) { /* unicast */
1127                     dh = nm_bridge_rthash(buf); // XXX hash of dst
1128                     if (ht[dh].mac == dmac) {     /* found dst */
1129                               dst = ht[dh].ports;
1130                     }
1131                     /* XXX otherwise return NM_BDG_UNKNOWN ? */
1132           }
1133           *dst_ring = 0;
1134           return dst;
1135 }
1136 
1137 
1138 /*
1139  * This flush routine supports only unicast and broadcast but a large
1140  * number of ports, and lets us replace the learn and dispatch functions.
1141  */
1142 int
nm_bdg_flush(struct nm_bdg_fwd * ft,u_int n,struct netmap_vp_adapter * na,u_int ring_nr)1143 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1144                     u_int ring_nr)
1145 {
1146           struct nm_bdg_q *dst_ents, *brddst;
1147           uint16_t num_dsts = 0, *dsts;
1148           struct nm_bridge *b = na->na_bdg;
1149           u_int i, j, me = na->bdg_port;
1150 
1151           /*
1152            * The work area (pointed by ft) is followed by an array of
1153            * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1154            * queues per port plus one for the broadcast traffic.
1155            * Then we have an array of destination indexes.
1156            */
1157           dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1158           dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1159 
1160           /* first pass: find a destination for each packet in the batch */
1161           for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1162                     uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1163                     uint16_t dst_port, d_i;
1164                     struct nm_bdg_q *d;
1165 
1166                     ND("slot %d frags %d", i, ft[i].ft_frags);
1167                     dst_port = b->nm_bdg_lookup(ft[i].ft_buf, ft[i].ft_len,
1168                               &dst_ring, na);
1169                     if (netmap_verbose > 255)
1170                               RD(5, "slot %d port %d -> %d", i, me, dst_port);
1171                     if (dst_port == NM_BDG_NOPORT)
1172                               continue; /* this packet is identified to be dropped */
1173                     else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1174                               continue;
1175                     else if (dst_port == NM_BDG_BROADCAST)
1176                               dst_ring = 0; /* broadcasts always go to ring 0 */
1177                     else if (unlikely(dst_port == me ||
1178                         !b->bdg_ports[dst_port]))
1179                               continue;
1180 
1181                     /* get a position in the scratch pad */
1182                     d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1183                     d = dst_ents + d_i;
1184 
1185                     /* append the first fragment to the list */
1186                     if (d->bq_head == NM_FT_NULL) { /* new destination */
1187                               d->bq_head = d->bq_tail = i;
1188                               /* remember this position to be scanned later */
1189                               if (dst_port != NM_BDG_BROADCAST)
1190                                         dsts[num_dsts++] = d_i;
1191                     } else {
1192                               ft[d->bq_tail].ft_next = i;
1193                               d->bq_tail = i;
1194                     }
1195                     d->bq_len += ft[i].ft_frags;
1196           }
1197 
1198           /*
1199            * Broadcast traffic goes to ring 0 on all destinations.
1200            * So we need to add these rings to the list of ports to scan.
1201            * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1202            * expensive. We should keep a compact list of active destinations
1203            * so we could shorten this loop.
1204            */
1205           brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1206           if (brddst->bq_head != NM_FT_NULL) {
1207                     for (j = 0; likely(j < b->bdg_active_ports); j++) {
1208                               uint16_t d_i;
1209                               i = b->bdg_port_index[j];
1210                               if (unlikely(i == me))
1211                                         continue;
1212                               d_i = i * NM_BDG_MAXRINGS;
1213                               if (dst_ents[d_i].bq_head == NM_FT_NULL)
1214                                         dsts[num_dsts++] = d_i;
1215                     }
1216           }
1217 
1218           ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1219           /* second pass: scan destinations (XXX will be modular somehow) */
1220           for (i = 0; i < num_dsts; i++) {
1221                     struct ifnet *dst_ifp;
1222                     struct netmap_vp_adapter *dst_na;
1223                     struct netmap_kring *kring;
1224                     struct netmap_ring *ring;
1225                     u_int dst_nr, lim, j, sent = 0, d_i, next, brd_next;
1226                     u_int needed, howmany;
1227                     int retry = netmap_txsync_retry;
1228                     struct nm_bdg_q *d;
1229                     uint32_t my_start = 0, lease_idx = 0;
1230                     int nrings;
1231 
1232                     d_i = dsts[i];
1233                     ND("second pass %d port %d", i, d_i);
1234                     d = dst_ents + d_i;
1235                     // XXX fix the division
1236                     dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1237                     /* protect from the lookup function returning an inactive
1238                      * destination port
1239                      */
1240                     if (unlikely(dst_na == NULL))
1241                               goto cleanup;
1242                     if (dst_na->up.na_flags & NAF_SW_ONLY)
1243                               goto cleanup;
1244                     dst_ifp = dst_na->up.ifp;
1245                     /*
1246                      * The interface may be in !netmap mode in two cases:
1247                      * - when na is attached but not activated yet;
1248                      * - when na is being deactivated but is still attached.
1249                      */
1250                     if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
1251                               ND("not in netmap mode!");
1252                               goto cleanup;
1253                     }
1254 
1255                     /* there is at least one either unicast or broadcast packet */
1256                     brd_next = brddst->bq_head;
1257                     next = d->bq_head;
1258                     /* we need to reserve this many slots. If fewer are
1259                      * available, some packets will be dropped.
1260                      * Packets may have multiple fragments, so we may not use
1261                      * there is a chance that we may not use all of the slots
1262                      * we have claimed, so we will need to handle the leftover
1263                      * ones when we regain the lock.
1264                      */
1265                     needed = d->bq_len + brddst->bq_len;
1266 
1267                     ND(5, "pass 2 dst %d is %x %s",
1268                               i, d_i, is_vp ? "virtual" : "nic/host");
1269                     dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1270                     nrings = dst_na->up.num_rx_rings;
1271                     if (dst_nr >= nrings)
1272                               dst_nr = dst_nr % nrings;
1273                     kring = &dst_na->up.rx_rings[dst_nr];
1274                     ring = kring->ring;
1275                     lim = kring->nkr_num_slots - 1;
1276 
1277 retry:
1278 
1279                     /* reserve the buffers in the queue and an entry
1280                      * to report completion, and drop lock.
1281                      * XXX this might become a helper function.
1282                      */
1283                     lockmgr(&kring->q_lock, LK_EXCLUSIVE);
1284                     if (kring->nkr_stopped) {
1285                               lockmgr(&kring->q_lock, LK_RELEASE);
1286                               goto cleanup;
1287                     }
1288                     if (dst_na->retry) {
1289                               dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1290                     }
1291                     my_start = j = kring->nkr_hwlease;
1292                     howmany = nm_kr_space(kring, 1);
1293                     if (needed < howmany)
1294                               howmany = needed;
1295                     lease_idx = nm_kr_lease(kring, howmany, 1);
1296                     lockmgr(&kring->q_lock, LK_RELEASE);
1297 
1298                     /* only retry if we need more than available slots */
1299                     if (retry && needed <= howmany)
1300                               retry = 0;
1301 
1302                     /* copy to the destination queue */
1303                     while (howmany > 0) {
1304                               struct netmap_slot *slot;
1305                               struct nm_bdg_fwd *ft_p, *ft_end;
1306                               u_int cnt;
1307 
1308                               /* find the queue from which we pick next packet.
1309                                * NM_FT_NULL is always higher than valid indexes
1310                                * so we never dereference it if the other list
1311                                * has packets (and if both are empty we never
1312                                * get here).
1313                                */
1314                               if (next < brd_next) {
1315                                         ft_p = ft + next;
1316                                         next = ft_p->ft_next;
1317                               } else { /* insert broadcast */
1318                                         ft_p = ft + brd_next;
1319                                         brd_next = ft_p->ft_next;
1320                               }
1321                               cnt = ft_p->ft_frags; // cnt > 0
1322                               if (unlikely(cnt > howmany))
1323                                   break; /* no more space */
1324                               howmany -= cnt;
1325                               if (netmap_verbose && cnt > 1)
1326                                         RD(5, "rx %d frags to %d", cnt, j);
1327                               ft_end = ft_p + cnt;
1328                               do {
1329                                   void *dst, *src = ft_p->ft_buf;
1330                                   size_t len = (ft_p->ft_len + 63) & ~63;
1331 
1332                                   slot = &ring->slot[j];
1333                                   dst = BDG_NMB(&dst_na->up, slot);
1334                                   /* round to a multiple of 64 */
1335 
1336                                   ND("send %d %d bytes at %s:%d",
1337                                         i, ft_p->ft_len, NM_IFPNAME(dst_ifp), j);
1338                                   if (ft_p->ft_flags & NS_INDIRECT) {
1339                                         if (copyin(src, dst, len)) {
1340                                                   // invalid user pointer, pretend len is 0
1341                                                   ft_p->ft_len = 0;
1342                                         }
1343                                   } else {
1344                                         //memcpy(dst, src, len);
1345                                         pkt_copy(src, dst, (int)len);
1346                                   }
1347                                   slot->len = ft_p->ft_len;
1348                                   slot->flags = (cnt << 8)| NS_MOREFRAG;
1349                                   j = nm_next(j, lim);
1350                                   ft_p++;
1351                                   sent++;
1352                               } while (ft_p != ft_end);
1353                               slot->flags = (cnt << 8); /* clear flag on last entry */
1354                               /* are we done ? */
1355                               if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1356                                         break;
1357                     }
1358                     {
1359                         /* current position */
1360                         uint32_t *p = kring->nkr_leases; /* shorthand */
1361                         uint32_t update_pos;
1362                         int still_locked = 1;
1363 
1364                         lockmgr(&kring->q_lock, LK_EXCLUSIVE);
1365                         if (unlikely(howmany > 0)) {
1366                               /* not used all bufs. If i am the last one
1367                                * i can recover the slots, otherwise must
1368                                * fill them with 0 to mark empty packets.
1369                                */
1370                               ND("leftover %d bufs", howmany);
1371                               if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1372                                   /* yes i am the last one */
1373                                   ND("roll back nkr_hwlease to %d", j);
1374                                   kring->nkr_hwlease = j;
1375                               } else {
1376                                   while (howmany-- > 0) {
1377                                         ring->slot[j].len = 0;
1378                                         ring->slot[j].flags = 0;
1379                                         j = nm_next(j, lim);
1380                                   }
1381                               }
1382                         }
1383                         p[lease_idx] = j; /* report I am done */
1384 
1385                         update_pos = nm_kr_rxpos(kring);
1386 
1387                         if (my_start == update_pos) {
1388                               /* all slots before my_start have been reported,
1389                                * so scan subsequent leases to see if other ranges
1390                                * have been completed, and to a selwakeup or txsync.
1391                              */
1392                               while (lease_idx != kring->nkr_lease_idx &&
1393                                         p[lease_idx] != NR_NOSLOT) {
1394                                   j = p[lease_idx];
1395                                   p[lease_idx] = NR_NOSLOT;
1396                                   lease_idx = nm_next(lease_idx, lim);
1397                               }
1398                               /* j is the new 'write' position. j != my_start
1399                                * means there are new buffers to report
1400                                */
1401                               if (likely(j != my_start)) {
1402                                         uint32_t old_avail = kring->nr_hwavail;
1403 
1404                                         kring->nr_hwavail = (j >= kring->nr_hwcur) ?
1405                                                   j - kring->nr_hwcur :
1406                                                   j + lim + 1 - kring->nr_hwcur;
1407                                         if (kring->nr_hwavail < old_avail) {
1408                                                   D("avail shrink %d -> %d",
1409                                                             old_avail, kring->nr_hwavail);
1410                                         }
1411                                         dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1412                                         still_locked = 0;
1413                                         lockmgr(&kring->q_lock, LK_RELEASE);
1414                                         if (dst_na->retry && retry--)
1415                                                   goto retry;
1416                               }
1417                         }
1418                         if (still_locked)
1419                               lockmgr(&kring->q_lock, LK_RELEASE);
1420                     }
1421 cleanup:
1422                     d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1423                     d->bq_len = 0;
1424           }
1425           brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1426           brddst->bq_len = 0;
1427           return 0;
1428 }
1429 
1430 static int
netmap_vp_txsync(struct netmap_vp_adapter * na,u_int ring_nr,int flags)1431 netmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags)
1432 {
1433           struct netmap_kring *kring = &na->up.tx_rings[ring_nr];
1434           struct netmap_ring *ring = kring->ring;
1435           u_int j, k, lim = kring->nkr_num_slots - 1;
1436 
1437           k = ring->cur;
1438           if (k > lim)
1439                     return netmap_ring_reinit(kring);
1440 
1441           if (bridge_batch <= 0) { /* testing only */
1442                     j = k; // used all
1443                     goto done;
1444           }
1445           if (bridge_batch > NM_BDG_BATCH)
1446                     bridge_batch = NM_BDG_BATCH;
1447 
1448           j = nm_bdg_preflush(na, ring_nr, kring, k);
1449           if (j != k)
1450                     D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
1451           /* k-j modulo ring size is the number of slots processed */
1452           if (k < j)
1453                     k += kring->nkr_num_slots;
1454           kring->nr_hwavail = lim - (k - j);
1455 
1456 done:
1457           kring->nr_hwcur = j;
1458           ring->avail = kring->nr_hwavail;
1459           if (netmap_verbose)
1460                     D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags);
1461           return 0;
1462 }
1463 
1464 
1465 /*
1466  * main dispatch routine for the bridge.
1467  * We already know that only one thread is running this.
1468  * we must run nm_bdg_preflush without lock.
1469  */
1470 static int
bdg_netmap_txsync(struct netmap_adapter * na,u_int ring_nr,int flags)1471 bdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1472 {
1473           struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
1474           return netmap_vp_txsync(vpna, ring_nr, flags);
1475 }
1476 
1477 
1478 /*
1479  * user process reading from a VALE switch.
1480  * Already protected against concurrent calls from userspace,
1481  * but we must acquire the queue's lock to protect against
1482  * writers on the same queue.
1483  */
1484 static int
bdg_netmap_rxsync(struct netmap_adapter * na,u_int ring_nr,int flags)1485 bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1486 {
1487           struct netmap_kring *kring = &na->rx_rings[ring_nr];
1488           struct netmap_ring *ring = kring->ring;
1489           u_int j, lim = kring->nkr_num_slots - 1;
1490           u_int k = ring->cur, resvd = ring->reserved;
1491           int n;
1492 
1493           lockmgr(&kring->q_lock, LK_EXCLUSIVE);
1494           if (k > lim) {
1495                     D("ouch dangerous reset!!!");
1496                     n = netmap_ring_reinit(kring);
1497                     goto done;
1498           }
1499 
1500           /* skip past packets that userspace has released */
1501           j = kring->nr_hwcur;    /* netmap ring index */
1502           if (resvd > 0) {
1503                     if (resvd + ring->avail >= lim + 1) {
1504                               D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
1505                               ring->reserved = resvd = 0; // XXX panic...
1506                     }
1507                     k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
1508           }
1509 
1510           if (j != k) { /* userspace has released some packets. */
1511                     n = k - j;
1512                     if (n < 0)
1513                               n += kring->nkr_num_slots;
1514                     ND("userspace releases %d packets", n);
1515                     for (n = 0; likely(j != k); n++) {
1516                               struct netmap_slot *slot = &ring->slot[j];
1517                               void *addr = BDG_NMB(na, slot);
1518 
1519                               if (addr == netmap_buffer_base) { /* bad buf */
1520                                         D("bad buffer index %d, ignore ?",
1521                                                   slot->buf_idx);
1522                               }
1523                               slot->flags &= ~NS_BUF_CHANGED;
1524                               j = nm_next(j, lim);
1525                     }
1526                     kring->nr_hwavail -= n;
1527                     kring->nr_hwcur = k;
1528           }
1529           /* tell userspace that there are new packets */
1530           ring->avail = kring->nr_hwavail - resvd;
1531           n = 0;
1532 done:
1533           lockmgr(&kring->q_lock, LK_RELEASE);
1534           return n;
1535 }
1536 
1537 static int
bdg_netmap_attach(struct netmap_adapter * arg)1538 bdg_netmap_attach(struct netmap_adapter *arg)
1539 {
1540           struct netmap_vp_adapter *vpna;
1541           struct netmap_adapter *na;
1542           int error;
1543 
1544           vpna = kmalloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1545           if (vpna == NULL)
1546                     return ENOMEM;
1547           na = &vpna->up;
1548           *na = *arg;
1549           na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1550           na->nm_txsync = bdg_netmap_txsync;
1551           na->nm_rxsync = bdg_netmap_rxsync;
1552           na->nm_register = bdg_netmap_reg;
1553           na->nm_dtor = netmap_adapter_vp_dtor;
1554           na->nm_krings_create = netmap_vp_krings_create;
1555           na->nm_krings_delete = netmap_vp_krings_delete;
1556           na->nm_mem = netmap_mem_private_new(NM_IFPNAME(arg->ifp),
1557                               na->num_tx_rings, na->num_tx_desc,
1558                               na->num_rx_rings, na->num_rx_desc);
1559           /* other nmd fields are set in the common routine */
1560           error = netmap_attach_common(na);
1561           if (error) {
1562                     kfree(vpna, M_DEVBUF);
1563                     return error;
1564           }
1565           return 0;
1566 }
1567 
1568 static void
netmap_bwrap_dtor(struct netmap_adapter * na)1569 netmap_bwrap_dtor(struct netmap_adapter *na)
1570 {
1571           struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1572           struct netmap_adapter *hwna = bna->hwna;
1573           struct nm_bridge *b = bna->up.na_bdg,
1574                     *bh = bna->host.na_bdg;
1575           struct ifnet *ifp = na->ifp;
1576 
1577           ND("na %p", na);
1578 
1579           if (b) {
1580                     netmap_bdg_detach_common(b, bna->up.bdg_port,
1581                               (bh ? bna->host.bdg_port : -1));
1582           }
1583 
1584           hwna->na_private = NULL;
1585           netmap_adapter_put(hwna);
1586 
1587           bzero(ifp, sizeof(*ifp));
1588           kfree(ifp, M_DEVBUF);
1589           na->ifp = NULL;
1590 
1591 }
1592 
1593 /*
1594  * Pass packets from nic to the bridge.
1595  * XXX TODO check locking: this is called from the interrupt
1596  * handler so we should make sure that the interface is not
1597  * disconnected while passing down an interrupt.
1598  *
1599  * Note, no user process can access this NIC so we can ignore
1600  * the info in the 'ring'.
1601  */
1602 /* callback that overwrites the hwna notify callback.
1603  * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1604  * The bridge wrapper then sends the packets through the bridge.
1605  */
1606 static int
netmap_bwrap_intr_notify(struct netmap_adapter * na,u_int ring_nr,enum txrx tx,int flags)1607 netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1608 {
1609           struct ifnet *ifp = na->ifp;
1610           struct netmap_bwrap_adapter *bna = na->na_private;
1611           struct netmap_vp_adapter *hostna = &bna->host;
1612           struct netmap_kring *kring, *bkring;
1613           struct netmap_ring *ring;
1614           int is_host_ring = ring_nr == na->num_rx_rings;
1615           struct netmap_vp_adapter *vpna = &bna->up;
1616           int error = 0;
1617 
1618           ND("%s[%d] %s %x", NM_IFPNAME(ifp), ring_nr, (tx == NR_TX ? "TX" : "RX"), flags);
1619 
1620           if (flags & NAF_DISABLE_NOTIFY) {
1621                     kring = tx == NR_TX ? na->tx_rings : na->rx_rings;
1622                     bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings;
1623                     if (kring->nkr_stopped)
1624                               netmap_disable_ring(bkring);
1625                     else
1626                               bkring->nkr_stopped = 0;
1627                     return 0;
1628           }
1629 
1630           if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP))
1631                     return 0;
1632 
1633           if (tx == NR_TX)
1634                     return 0;
1635 
1636           kring = &na->rx_rings[ring_nr];
1637           ring = kring->ring;
1638 
1639           /* make sure the ring is not disabled */
1640           if (nm_kr_tryget(kring))
1641                     return 0;
1642 
1643           if (is_host_ring && hostna->na_bdg == NULL) {
1644                     error = bna->save_notify(na, ring_nr, tx, flags);
1645                     goto put_out;
1646           }
1647 
1648           if (is_host_ring) {
1649                     vpna = hostna;
1650                     ring_nr = 0;
1651           } else {
1652                     /* fetch packets that have arrived.
1653                      * XXX maybe do this in a loop ?
1654                      */
1655                     error = na->nm_rxsync(na, ring_nr, 0);
1656                     if (error)
1657                               goto put_out;
1658           }
1659           if (kring->nr_hwavail == 0 && netmap_verbose) {
1660                     D("how strange, interrupt with no packets on %s",
1661                               NM_IFPNAME(ifp));
1662                     goto put_out;
1663           }
1664           /* XXX avail ? */
1665           ring->cur = nm_kr_rxpos(kring);
1666           netmap_vp_txsync(vpna, ring_nr, flags);
1667 
1668           if (!is_host_ring)
1669                     error = na->nm_rxsync(na, ring_nr, 0);
1670 
1671 put_out:
1672           nm_kr_put(kring);
1673           return error;
1674 }
1675 
1676 static int
netmap_bwrap_register(struct netmap_adapter * na,int onoff)1677 netmap_bwrap_register(struct netmap_adapter *na, int onoff)
1678 {
1679           struct netmap_bwrap_adapter *bna =
1680                     (struct netmap_bwrap_adapter *)na;
1681           struct netmap_adapter *hwna = bna->hwna;
1682           struct netmap_vp_adapter *hostna = &bna->host;
1683           int error;
1684 
1685           ND("%s %d", NM_IFPNAME(ifp), onoff);
1686 
1687           if (onoff) {
1688                     int i;
1689 
1690                     hwna->na_lut = na->na_lut;
1691                     hwna->na_lut_objtotal = na->na_lut_objtotal;
1692 
1693                     if (hostna->na_bdg) {
1694                               hostna->up.na_lut = na->na_lut;
1695                               hostna->up.na_lut_objtotal = na->na_lut_objtotal;
1696                     }
1697 
1698                     /* cross-link the netmap rings */
1699                     for (i = 0; i <= na->num_tx_rings; i++) {
1700                               hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
1701                               hwna->tx_rings[i].ring = na->rx_rings[i].ring;
1702                     }
1703                     for (i = 0; i <= na->num_rx_rings; i++) {
1704                               hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
1705                               hwna->rx_rings[i].ring = na->tx_rings[i].ring;
1706                     }
1707           }
1708 
1709           if (hwna->ifp) {
1710                     error = hwna->nm_register(hwna, onoff);
1711                     if (error)
1712                               return error;
1713           }
1714 
1715           bdg_netmap_reg(na, onoff);
1716 
1717           if (onoff) {
1718                     bna->save_notify = hwna->nm_notify;
1719                     hwna->nm_notify = netmap_bwrap_intr_notify;
1720           } else {
1721                     hwna->nm_notify = bna->save_notify;
1722                     hwna->na_lut = NULL;
1723                     hwna->na_lut_objtotal = 0;
1724           }
1725 
1726           return 0;
1727 }
1728 
1729 static int
netmap_bwrap_config(struct netmap_adapter * na,u_int * txr,u_int * txd,u_int * rxr,u_int * rxd)1730 netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
1731                                             u_int *rxr, u_int *rxd)
1732 {
1733           struct netmap_bwrap_adapter *bna =
1734                     (struct netmap_bwrap_adapter *)na;
1735           struct netmap_adapter *hwna = bna->hwna;
1736 
1737           /* forward the request */
1738           netmap_update_config(hwna);
1739           /* swap the results */
1740           *txr = hwna->num_rx_rings;
1741           *txd = hwna->num_rx_desc;
1742           *rxr = hwna->num_tx_rings;
1743           *rxd = hwna->num_rx_desc;
1744 
1745           return 0;
1746 }
1747 
1748 static int
netmap_bwrap_krings_create(struct netmap_adapter * na)1749 netmap_bwrap_krings_create(struct netmap_adapter *na)
1750 {
1751           struct netmap_bwrap_adapter *bna =
1752                     (struct netmap_bwrap_adapter *)na;
1753           struct netmap_adapter *hwna = bna->hwna;
1754           struct netmap_adapter *hostna = &bna->host.up;
1755           int error;
1756 
1757           ND("%s", NM_IFPNAME(na->ifp));
1758 
1759           error = netmap_vp_krings_create(na);
1760           if (error)
1761                     return error;
1762 
1763           error = hwna->nm_krings_create(hwna);
1764           if (error) {
1765                     netmap_vp_krings_delete(na);
1766                     return error;
1767           }
1768 
1769           hostna->tx_rings = na->tx_rings + na->num_tx_rings;
1770           hostna->rx_rings = na->rx_rings + na->num_rx_rings;
1771 
1772           return 0;
1773 }
1774 
1775 static void
netmap_bwrap_krings_delete(struct netmap_adapter * na)1776 netmap_bwrap_krings_delete(struct netmap_adapter *na)
1777 {
1778           struct netmap_bwrap_adapter *bna =
1779                     (struct netmap_bwrap_adapter *)na;
1780           struct netmap_adapter *hwna = bna->hwna;
1781 
1782           ND("%s", NM_IFPNAME(na->ifp));
1783 
1784           hwna->nm_krings_delete(hwna);
1785           netmap_vp_krings_delete(na);
1786 }
1787 
1788 /* notify method for the bridge-->hwna direction */
1789 static int
netmap_bwrap_notify(struct netmap_adapter * na,u_int ring_n,enum txrx tx,int flags)1790 netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1791 {
1792           struct netmap_bwrap_adapter *bna =
1793                     (struct netmap_bwrap_adapter *)na;
1794           struct netmap_adapter *hwna = bna->hwna;
1795           struct netmap_kring *kring, *hw_kring;
1796           struct netmap_ring *ring;
1797           u_int lim, k;
1798           int error = 0;
1799 
1800           if (tx == NR_TX)
1801                   return ENXIO;
1802 
1803           kring = &na->rx_rings[ring_n];
1804           hw_kring = &hwna->tx_rings[ring_n];
1805           ring = kring->ring;
1806 
1807           lim = kring->nkr_num_slots - 1;
1808           k = nm_kr_rxpos(kring);
1809 
1810           if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP))
1811                     return 0;
1812           ring->cur = k;
1813           ND("%s[%d] PRE rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1814                     NM_IFPNAME(na->ifp), ring_n,
1815                     kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1816                     ring->cur, ring->avail, ring->reserved,
1817                     hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1818           if (ring_n == na->num_rx_rings) {
1819                     netmap_txsync_to_host(hwna);
1820           } else {
1821                     error = hwna->nm_txsync(hwna, ring_n, flags);
1822           }
1823           kring->nr_hwcur = ring->cur;
1824           kring->nr_hwavail = 0;
1825           kring->nr_hwreserved = lim - ring->avail;
1826           ND("%s[%d] PST rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1827                     NM_IFPNAME(na->ifp), ring_n,
1828                     kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1829                     ring->cur, ring->avail, ring->reserved,
1830                     hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1831 
1832           return error;
1833 }
1834 
1835 static int
netmap_bwrap_host_notify(struct netmap_adapter * na,u_int ring_n,enum txrx tx,int flags)1836 netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1837 {
1838           struct netmap_bwrap_adapter *bna = na->na_private;
1839           struct netmap_adapter *port_na = &bna->up.up;
1840           if (tx == NR_TX || ring_n != 0)
1841                     return ENXIO;
1842           return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
1843 }
1844 
1845 /* attach a bridge wrapper to the 'real' device */
1846 static int
netmap_bwrap_attach(struct ifnet * fake,struct ifnet * real)1847 netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
1848 {
1849           struct netmap_bwrap_adapter *bna;
1850           struct netmap_adapter *na;
1851           struct netmap_adapter *hwna = NA(real);
1852           struct netmap_adapter *hostna;
1853           int error;
1854 
1855 
1856           bna = kmalloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
1857           if (bna == NULL)
1858                     return ENOMEM;
1859 
1860           na = &bna->up.up;
1861           na->ifp = fake;
1862           /* fill the ring data for the bwrap adapter with rx/tx meanings
1863            * swapped. The real cross-linking will be done during register,
1864            * when all the krings will have been created.
1865            */
1866           na->num_rx_rings = hwna->num_tx_rings;
1867           na->num_tx_rings = hwna->num_rx_rings;
1868           na->num_tx_desc = hwna->num_rx_desc;
1869           na->num_rx_desc = hwna->num_tx_desc;
1870           na->nm_dtor = netmap_bwrap_dtor;
1871           na->nm_register = netmap_bwrap_register;
1872           // na->nm_txsync = netmap_bwrap_txsync;
1873           // na->nm_rxsync = netmap_bwrap_rxsync;
1874           na->nm_config = netmap_bwrap_config;
1875           na->nm_krings_create = netmap_bwrap_krings_create;
1876           na->nm_krings_delete = netmap_bwrap_krings_delete;
1877           na->nm_notify = netmap_bwrap_notify;
1878           na->nm_mem = hwna->nm_mem;
1879           na->na_private = na; /* prevent NIOCREGIF */
1880           bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
1881 
1882           bna->hwna = hwna;
1883           netmap_adapter_get(hwna);
1884           hwna->na_private = bna; /* weak reference */
1885 
1886           hostna = &bna->host.up;
1887           hostna->ifp = hwna->ifp;
1888           hostna->num_tx_rings = 1;
1889           hostna->num_tx_desc = hwna->num_rx_desc;
1890           hostna->num_rx_rings = 1;
1891           hostna->num_rx_desc = hwna->num_tx_desc;
1892           // hostna->nm_txsync = netmap_bwrap_host_txsync;
1893           // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
1894           hostna->nm_notify = netmap_bwrap_host_notify;
1895           hostna->nm_mem = na->nm_mem;
1896           hostna->na_private = bna;
1897 
1898           D("%s<->%s txr %d txd %d rxr %d rxd %d", fake->if_xname, real->if_xname,
1899                     na->num_tx_rings, na->num_tx_desc,
1900                     na->num_rx_rings, na->num_rx_desc);
1901 
1902           error = netmap_attach_common(na);
1903           if (error) {
1904                     netmap_adapter_put(hwna);
1905                     kfree(bna, M_DEVBUF);
1906                     return error;
1907           }
1908           return 0;
1909 }
1910 
1911 void
netmap_init_bridges(void)1912 netmap_init_bridges(void)
1913 {
1914           int i;
1915           bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
1916           for (i = 0; i < NM_BRIDGES; i++)
1917                     BDG_RWINIT(&nm_bridges[i]);
1918 }
1919 #endif /* WITH_VALE */
1920