1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo
5 * Copyright (C) 2013-2016 Universita` di Pisa
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 *
32 * The header contains the definitions of constants and function
33 * prototypes used only in kernelspace.
34 */
35
36 #ifndef _NET_NETMAP_KERN_H_
37 #define _NET_NETMAP_KERN_H_
38
39 #if defined(linux)
40
41 #if defined(CONFIG_NETMAP_EXTMEM)
42 #define WITH_EXTMEM
43 #endif
44 #if defined(CONFIG_NETMAP_VALE)
45 #define WITH_VALE
46 #endif
47 #if defined(CONFIG_NETMAP_PIPE)
48 #define WITH_PIPES
49 #endif
50 #if defined(CONFIG_NETMAP_MONITOR)
51 #define WITH_MONITOR
52 #endif
53 #if defined(CONFIG_NETMAP_GENERIC)
54 #define WITH_GENERIC
55 #endif
56 #if defined(CONFIG_NETMAP_PTNETMAP)
57 #define WITH_PTNETMAP
58 #endif
59 #if defined(CONFIG_NETMAP_SINK)
60 #define WITH_SINK
61 #endif
62 #if defined(CONFIG_NETMAP_NULL)
63 #define WITH_NMNULL
64 #endif
65
66 #elif defined (_WIN32)
67 #define WITH_VALE // comment out to disable VALE support
68 #define WITH_PIPES
69 #define WITH_MONITOR
70 #define WITH_GENERIC
71 #define WITH_NMNULL
72
73 #else /* neither linux nor windows */
74 #define WITH_VALE // comment out to disable VALE support
75 #define WITH_PIPES
76 #define WITH_MONITOR
77 #define WITH_GENERIC
78 #define WITH_EXTMEM
79 #define WITH_NMNULL
80 #endif
81
82 #if defined(__FreeBSD__)
83 #include <sys/selinfo.h>
84
85 #define likely(x) __builtin_expect((long)!!(x), 1L)
86 #define unlikely(x) __builtin_expect((long)!!(x), 0L)
87 #define __user
88
89 #define NM_LOCK_T struct mtx /* low level spinlock, used to protect queues */
90
91 #define NM_MTX_T struct sx /* OS-specific mutex (sleepable) */
92 #define NM_MTX_INIT(m) sx_init(&(m), #m)
93 #define NM_MTX_DESTROY(m) sx_destroy(&(m))
94 #define NM_MTX_LOCK(m) sx_xlock(&(m))
95 #define NM_MTX_SPINLOCK(m) while (!sx_try_xlock(&(m))) ;
96 #define NM_MTX_UNLOCK(m) sx_xunlock(&(m))
97 #define NM_MTX_ASSERT(m) sx_assert(&(m), SA_XLOCKED)
98
99 #define NM_SELINFO_T struct nm_selinfo
100 #define NM_SELRECORD_T struct thread
101 #define MBUF_LEN(m) ((m)->m_pkthdr.len)
102 #define MBUF_TXQ(m) ((m)->m_pkthdr.flowid)
103 #define MBUF_TRANSMIT(na, ifp, m) ((na)->if_transmit(ifp, m))
104 #define GEN_TX_MBUF_IFP(m) ((m)->m_pkthdr.rcvif)
105
106 #define NM_ATOMIC_T volatile int /* required by atomic/bitops.h */
107 /* atomic operations */
108 #include <machine/atomic.h>
109 #define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1))
110 #define NM_ATOMIC_CLEAR(p) atomic_store_rel_int((p), 0)
111
112 #define WNA(_ifp) (_ifp)->if_netmap
113
114 struct netmap_adapter *netmap_getna(if_t ifp);
115
116 #define MBUF_REFCNT(m) ((m)->m_ext.ext_count)
117 #define SET_MBUF_REFCNT(m, x) (m)->m_ext.ext_count = x
118
119 #define MBUF_QUEUED(m) 1
120
121 struct nm_selinfo {
122 /* Support for select(2) and poll(2). */
123 struct selinfo si;
124 /* Support for kqueue(9). See comments in netmap_freebsd.c */
125 struct taskqueue *ntfytq;
126 struct task ntfytask;
127 struct mtx m;
128 char mtxname[32];
129 int kqueue_users;
130 };
131
132
133 struct hrtimer {
134 /* Not used in FreeBSD. */
135 };
136
137 #define NM_BNS_GET(b)
138 #define NM_BNS_PUT(b)
139
140 #elif defined (linux)
141
142 #define NM_LOCK_T safe_spinlock_t // see bsd_glue.h
143 #define NM_SELINFO_T wait_queue_head_t
144 #define MBUF_LEN(m) ((m)->len)
145 #define MBUF_TRANSMIT(na, ifp, m) \
146 ({ \
147 /* Avoid infinite recursion with generic. */ \
148 m->priority = NM_MAGIC_PRIORITY_TX; \
149 (((struct net_device_ops *)(na)->if_transmit)->ndo_start_xmit(m, ifp)); \
150 0; \
151 })
152
153 /* See explanation in nm_os_generic_xmit_frame. */
154 #define GEN_TX_MBUF_IFP(m) ((struct ifnet *)skb_shinfo(m)->destructor_arg)
155
156 #define NM_ATOMIC_T volatile long unsigned int
157
158 #define NM_MTX_T struct mutex /* OS-specific sleepable lock */
159 #define NM_MTX_INIT(m) mutex_init(&(m))
160 #define NM_MTX_DESTROY(m) do { (void)(m); } while (0)
161 #define NM_MTX_LOCK(m) mutex_lock(&(m))
162 #define NM_MTX_UNLOCK(m) mutex_unlock(&(m))
163 #define NM_MTX_ASSERT(m) mutex_is_locked(&(m))
164
165 #ifndef DEV_NETMAP
166 #define DEV_NETMAP
167 #endif /* DEV_NETMAP */
168
169 #elif defined (__APPLE__)
170
171 #warning apple support is incomplete.
172 #define likely(x) __builtin_expect(!!(x), 1)
173 #define unlikely(x) __builtin_expect(!!(x), 0)
174 #define NM_LOCK_T IOLock *
175 #define NM_SELINFO_T struct selinfo
176 #define MBUF_LEN(m) ((m)->m_pkthdr.len)
177
178 #elif defined (_WIN32)
179 #include "../../../WINDOWS/win_glue.h"
180
181 #define NM_SELRECORD_T IO_STACK_LOCATION
182 #define NM_SELINFO_T win_SELINFO // see win_glue.h
183 #define NM_LOCK_T win_spinlock_t // see win_glue.h
184 #define NM_MTX_T KGUARDED_MUTEX /* OS-specific mutex (sleepable) */
185
186 #define NM_MTX_INIT(m) KeInitializeGuardedMutex(&m);
187 #define NM_MTX_DESTROY(m) do { (void)(m); } while (0)
188 #define NM_MTX_LOCK(m) KeAcquireGuardedMutex(&(m))
189 #define NM_MTX_UNLOCK(m) KeReleaseGuardedMutex(&(m))
190 #define NM_MTX_ASSERT(m) assert(&m.Count>0)
191
192 //These linknames are for the NDIS driver
193 #define NETMAP_NDIS_LINKNAME_STRING L"\\DosDevices\\NMAPNDIS"
194 #define NETMAP_NDIS_NTDEVICE_STRING L"\\Device\\NMAPNDIS"
195
196 //Definition of internal driver-to-driver ioctl codes
197 #define NETMAP_KERNEL_XCHANGE_POINTERS _IO('i', 180)
198 #define NETMAP_KERNEL_SEND_SHUTDOWN_SIGNAL _IO_direct('i', 195)
199
200 typedef struct hrtimer{
201 KTIMER timer;
202 BOOLEAN active;
203 KDPC deferred_proc;
204 };
205
206 /* MSVC does not have likely/unlikely support */
207 #ifdef _MSC_VER
208 #define likely(x) (x)
209 #define unlikely(x) (x)
210 #else
211 #define likely(x) __builtin_expect((long)!!(x), 1L)
212 #define unlikely(x) __builtin_expect((long)!!(x), 0L)
213 #endif //_MSC_VER
214
215 #else
216
217 #error unsupported platform
218
219 #endif /* end - platform-specific code */
220
221 #ifndef _WIN32 /* support for emulated sysctl */
222 #define SYSBEGIN(x)
223 #define SYSEND
224 #endif /* _WIN32 */
225
226 #define NM_ACCESS_ONCE(x) (*(volatile __typeof__(x) *)&(x))
227
228 #define NMG_LOCK_T NM_MTX_T
229 #define NMG_LOCK_INIT() NM_MTX_INIT(netmap_global_lock)
230 #define NMG_LOCK_DESTROY() NM_MTX_DESTROY(netmap_global_lock)
231 #define NMG_LOCK() NM_MTX_LOCK(netmap_global_lock)
232 #define NMG_UNLOCK() NM_MTX_UNLOCK(netmap_global_lock)
233 #define NMG_LOCK_ASSERT() NM_MTX_ASSERT(netmap_global_lock)
234
235 #if defined(__FreeBSD__)
236 #define nm_prerr_int printf
237 #define nm_prinf_int printf
238 #elif defined (_WIN32)
239 #define nm_prerr_int DbgPrint
240 #define nm_prinf_int DbgPrint
241 #elif defined(linux)
242 #define nm_prerr_int(fmt, arg...) printk(KERN_ERR fmt, ##arg)
243 #define nm_prinf_int(fmt, arg...) printk(KERN_INFO fmt, ##arg)
244 #endif
245
246 #define nm_prinf(format, ...) \
247 do { \
248 struct timeval __xxts; \
249 microtime(&__xxts); \
250 nm_prinf_int("%03d.%06d [%4d] %-25s " format "\n",\
251 (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
252 __LINE__, __FUNCTION__, ##__VA_ARGS__); \
253 } while (0)
254
255 #define nm_prerr(format, ...) \
256 do { \
257 struct timeval __xxts; \
258 microtime(&__xxts); \
259 nm_prerr_int("%03d.%06d [%4d] %-25s " format "\n",\
260 (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
261 __LINE__, __FUNCTION__, ##__VA_ARGS__); \
262 } while (0)
263
264 /* Disabled printf (used to be nm_prdis). */
265 #define nm_prdis(format, ...)
266
267 /* Rate limited, lps indicates how many per second. */
268 #define nm_prlim(lps, format, ...) \
269 do { \
270 static int t0, __cnt; \
271 if (t0 != time_second) { \
272 t0 = time_second; \
273 __cnt = 0; \
274 } \
275 if (__cnt++ < lps) \
276 nm_prinf(format, ##__VA_ARGS__); \
277 } while (0)
278
279 struct netmap_adapter;
280 struct nm_bdg_fwd;
281 struct nm_bridge;
282 struct netmap_priv_d;
283 struct nm_bdg_args;
284
285 /* os-specific NM_SELINFO_T initialization/destruction functions */
286 int nm_os_selinfo_init(NM_SELINFO_T *, const char *name);
287 void nm_os_selinfo_uninit(NM_SELINFO_T *);
288
289 const char *nm_dump_buf(char *p, int len, int lim, char *dst);
290
291 void nm_os_selwakeup(NM_SELINFO_T *si);
292 void nm_os_selrecord(NM_SELRECORD_T *sr, NM_SELINFO_T *si);
293
294 int nm_os_ifnet_init(void);
295 void nm_os_ifnet_fini(void);
296 void nm_os_ifnet_lock(void);
297 void nm_os_ifnet_unlock(void);
298
299 unsigned nm_os_ifnet_mtu(struct ifnet *ifp);
300
301 void nm_os_get_module(void);
302 void nm_os_put_module(void);
303
304 void netmap_make_zombie(struct ifnet *);
305 void netmap_undo_zombie(struct ifnet *);
306
307 /* os independent alloc/realloc/free */
308 void *nm_os_malloc(size_t);
309 void *nm_os_vmalloc(size_t);
310 void *nm_os_realloc(void *, size_t new_size, size_t old_size);
311 void nm_os_free(void *);
312 void nm_os_vfree(void *);
313
314 /* os specific attach/detach enter/exit-netmap-mode routines */
315 void nm_os_onattach(struct ifnet *);
316 void nm_os_ondetach(struct ifnet *);
317 void nm_os_onenter(struct ifnet *);
318 void nm_os_onexit(struct ifnet *);
319
320 /* passes a packet up to the host stack.
321 * If the packet is sent (or dropped) immediately it returns NULL,
322 * otherwise it links the packet to prev and returns m.
323 * In this case, a final call with m=NULL and prev != NULL will send up
324 * the entire chain to the host stack.
325 */
326 void *nm_os_send_up(struct ifnet *, struct mbuf *m, struct mbuf *prev);
327
328 int nm_os_mbuf_has_seg_offld(struct mbuf *m);
329 int nm_os_mbuf_has_csum_offld(struct mbuf *m);
330
331 #include "netmap_mbq.h"
332
333 extern NMG_LOCK_T netmap_global_lock;
334
335 enum txrx { NR_RX = 0, NR_TX = 1, NR_TXRX };
336
337 static __inline const char*
nm_txrx2str(enum txrx t)338 nm_txrx2str(enum txrx t)
339 {
340 return (t== NR_RX ? "RX" : "TX");
341 }
342
343 static __inline enum txrx
nm_txrx_swap(enum txrx t)344 nm_txrx_swap(enum txrx t)
345 {
346 return (t== NR_RX ? NR_TX : NR_RX);
347 }
348
349 #define for_rx_tx(t) for ((t) = 0; (t) < NR_TXRX; (t)++)
350
351 #ifdef WITH_MONITOR
352 struct netmap_zmon_list {
353 struct netmap_kring *next;
354 struct netmap_kring *prev;
355 };
356 #endif /* WITH_MONITOR */
357
358 /*
359 * private, kernel view of a ring. Keeps track of the status of
360 * a ring across system calls.
361 *
362 * nr_hwcur index of the next buffer to refill.
363 * It corresponds to ring->head
364 * at the time the system call returns.
365 *
366 * nr_hwtail index of the first buffer owned by the kernel.
367 * On RX, hwcur->hwtail are receive buffers
368 * not yet released. hwcur is advanced following
369 * ring->head, hwtail is advanced on incoming packets,
370 * and a wakeup is generated when hwtail passes ring->cur
371 * On TX, hwcur->rcur have been filled by the sender
372 * but not sent yet to the NIC; rcur->hwtail are available
373 * for new transmissions, and hwtail->hwcur-1 are pending
374 * transmissions not yet acknowledged.
375 *
376 * The indexes in the NIC and netmap rings are offset by nkr_hwofs slots.
377 * This is so that, on a reset, buffers owned by userspace are not
378 * modified by the kernel. In particular:
379 * RX rings: the next empty buffer (hwtail + hwofs) coincides with
380 * the next empty buffer as known by the hardware (next_to_check or so).
381 * TX rings: hwcur + hwofs coincides with next_to_send
382 *
383 * The following fields are used to implement lock-free copy of packets
384 * from input to output ports in VALE switch:
385 * nkr_hwlease buffer after the last one being copied.
386 * A writer in nm_bdg_flush reserves N buffers
387 * from nr_hwlease, advances it, then does the
388 * copy outside the lock.
389 * In RX rings (used for VALE ports),
390 * nkr_hwtail <= nkr_hwlease < nkr_hwcur+N-1
391 * In TX rings (used for NIC or host stack ports)
392 * nkr_hwcur <= nkr_hwlease < nkr_hwtail
393 * nkr_leases array of nkr_num_slots where writers can report
394 * completion of their block. NR_NOSLOT (~0) indicates
395 * that the writer has not finished yet
396 * nkr_lease_idx index of next free slot in nr_leases, to be assigned
397 *
398 * The kring is manipulated by txsync/rxsync and generic netmap function.
399 *
400 * Concurrent rxsync or txsync on the same ring are prevented through
401 * by nm_kr_(try)lock() which in turn uses nr_busy. This is all we need
402 * for NIC rings, and for TX rings attached to the host stack.
403 *
404 * RX rings attached to the host stack use an mbq (rx_queue) on both
405 * rxsync_from_host() and netmap_transmit(). The mbq is protected
406 * by its internal lock.
407 *
408 * RX rings attached to the VALE switch are accessed by both senders
409 * and receiver. They are protected through the q_lock on the RX ring.
410 */
411 struct netmap_kring {
412 struct netmap_ring *ring;
413
414 uint32_t nr_hwcur; /* should be nr_hwhead */
415 uint32_t nr_hwtail;
416
417 /*
418 * Copies of values in user rings, so we do not need to look
419 * at the ring (which could be modified). These are set in the
420 * *sync_prologue()/finalize() routines.
421 */
422 uint32_t rhead;
423 uint32_t rcur;
424 uint32_t rtail;
425
426 uint32_t nr_kflags; /* private driver flags */
427 #define NKR_PENDINTR 0x1 // Pending interrupt.
428 #define NKR_EXCLUSIVE 0x2 /* exclusive binding */
429 #define NKR_FORWARD 0x4 /* (host ring only) there are
430 packets to forward
431 */
432 #define NKR_NEEDRING 0x8 /* ring needed even if users==0
433 * (used internally by pipes and
434 * by ptnetmap host ports)
435 */
436 #define NKR_NOINTR 0x10 /* don't use interrupts on this ring */
437 #define NKR_FAKERING 0x20 /* don't allocate/free buffers */
438
439 uint32_t nr_mode;
440 uint32_t nr_pending_mode;
441 #define NKR_NETMAP_OFF 0x0
442 #define NKR_NETMAP_ON 0x1
443
444 uint32_t nkr_num_slots;
445
446 /*
447 * On a NIC reset, the NIC ring indexes may be reset but the
448 * indexes in the netmap rings remain the same. nkr_hwofs
449 * keeps track of the offset between the two.
450 */
451 int32_t nkr_hwofs;
452
453 /* last_reclaim is opaque marker to help reduce the frequency
454 * of operations such as reclaiming tx buffers. A possible use
455 * is set it to ticks and do the reclaim only once per tick.
456 */
457 uint64_t last_reclaim;
458
459
460 NM_SELINFO_T si; /* poll/select wait queue */
461 NM_LOCK_T q_lock; /* protects kring and ring. */
462 NM_ATOMIC_T nr_busy; /* prevent concurrent syscalls */
463
464 /* the adapter the owns this kring */
465 struct netmap_adapter *na;
466
467 /* the adapter that wants to be notified when this kring has
468 * new slots available. This is usually the same as the above,
469 * but wrappers may let it point to themselves
470 */
471 struct netmap_adapter *notify_na;
472
473 /* The following fields are for VALE switch support */
474 struct nm_bdg_fwd *nkr_ft;
475 uint32_t *nkr_leases;
476 #define NR_NOSLOT ((uint32_t)~0) /* used in nkr_*lease* */
477 uint32_t nkr_hwlease;
478 uint32_t nkr_lease_idx;
479
480 /* while nkr_stopped is set, no new [tr]xsync operations can
481 * be started on this kring.
482 * This is used by netmap_disable_all_rings()
483 * to find a synchronization point where critical data
484 * structures pointed to by the kring can be added or removed
485 */
486 volatile int nkr_stopped;
487
488 /* Support for adapters without native netmap support.
489 * On tx rings we preallocate an array of tx buffers
490 * (same size as the netmap ring), on rx rings we
491 * store incoming mbufs in a queue that is drained by
492 * a rxsync.
493 */
494 struct mbuf **tx_pool;
495 struct mbuf *tx_event; /* TX event used as a notification */
496 NM_LOCK_T tx_event_lock; /* protects the tx_event mbuf */
497 struct mbq rx_queue; /* intercepted rx mbufs. */
498
499 uint32_t users; /* existing bindings for this ring */
500
501 uint32_t ring_id; /* kring identifier */
502 enum txrx tx; /* kind of ring (tx or rx) */
503 char name[64]; /* diagnostic */
504
505 /* [tx]sync callback for this kring.
506 * The default nm_kring_create callback (netmap_krings_create)
507 * sets the nm_sync callback of each hardware tx(rx) kring to
508 * the corresponding nm_txsync(nm_rxsync) taken from the
509 * netmap_adapter; moreover, it sets the sync callback
510 * of the host tx(rx) ring to netmap_txsync_to_host
511 * (netmap_rxsync_from_host).
512 *
513 * Overrides: the above configuration is not changed by
514 * any of the nm_krings_create callbacks.
515 */
516 int (*nm_sync)(struct netmap_kring *kring, int flags);
517 int (*nm_notify)(struct netmap_kring *kring, int flags);
518
519 #ifdef WITH_PIPES
520 struct netmap_kring *pipe; /* if this is a pipe ring,
521 * pointer to the other end
522 */
523 uint32_t pipe_tail; /* hwtail updated by the other end */
524 #endif /* WITH_PIPES */
525
526 int (*save_notify)(struct netmap_kring *kring, int flags);
527
528 #ifdef WITH_MONITOR
529 /* array of krings that are monitoring this kring */
530 struct netmap_kring **monitors;
531 uint32_t max_monitors; /* current size of the monitors array */
532 uint32_t n_monitors; /* next unused entry in the monitor array */
533 uint32_t mon_pos[NR_TXRX]; /* index of this ring in the monitored ring array */
534 uint32_t mon_tail; /* last seen slot on rx */
535
536 /* circular list of zero-copy monitors */
537 struct netmap_zmon_list zmon_list[NR_TXRX];
538
539 /*
540 * Monitors work by intercepting the sync and notify callbacks of the
541 * monitored krings. This is implemented by replacing the pointers
542 * above and saving the previous ones in mon_* pointers below
543 */
544 int (*mon_sync)(struct netmap_kring *kring, int flags);
545 int (*mon_notify)(struct netmap_kring *kring, int flags);
546
547 #endif
548 }
549 #ifdef _WIN32
550 __declspec(align(64));
551 #else
552 __attribute__((__aligned__(64)));
553 #endif
554
555 /* return 1 iff the kring needs to be turned on */
556 static inline int
nm_kring_pending_on(struct netmap_kring * kring)557 nm_kring_pending_on(struct netmap_kring *kring)
558 {
559 return kring->nr_pending_mode == NKR_NETMAP_ON &&
560 kring->nr_mode == NKR_NETMAP_OFF;
561 }
562
563 /* return 1 iff the kring needs to be turned off */
564 static inline int
nm_kring_pending_off(struct netmap_kring * kring)565 nm_kring_pending_off(struct netmap_kring *kring)
566 {
567 return kring->nr_pending_mode == NKR_NETMAP_OFF &&
568 kring->nr_mode == NKR_NETMAP_ON;
569 }
570
571 /* return the next index, with wraparound */
572 static inline uint32_t
nm_next(uint32_t i,uint32_t lim)573 nm_next(uint32_t i, uint32_t lim)
574 {
575 return unlikely (i == lim) ? 0 : i + 1;
576 }
577
578
579 /* return the previous index, with wraparound */
580 static inline uint32_t
nm_prev(uint32_t i,uint32_t lim)581 nm_prev(uint32_t i, uint32_t lim)
582 {
583 return unlikely (i == 0) ? lim : i - 1;
584 }
585
586
587 /*
588 *
589 * Here is the layout for the Rx and Tx rings.
590
591 RxRING TxRING
592
593 +-----------------+ +-----------------+
594 | | | |
595 | free | | free |
596 +-----------------+ +-----------------+
597 head->| owned by user |<-hwcur | not sent to nic |<-hwcur
598 | | | yet |
599 +-----------------+ | |
600 cur->| available to | | |
601 | user, not read | +-----------------+
602 | yet | cur->| (being |
603 | | | prepared) |
604 | | | |
605 +-----------------+ + ------ +
606 tail->| |<-hwtail | |<-hwlease
607 | (being | ... | | ...
608 | prepared) | ... | | ...
609 +-----------------+ ... | | ...
610 | |<-hwlease +-----------------+
611 | | tail->| |<-hwtail
612 | | | |
613 | | | |
614 | | | |
615 +-----------------+ +-----------------+
616
617 * The cur/tail (user view) and hwcur/hwtail (kernel view)
618 * are used in the normal operation of the card.
619 *
620 * When a ring is the output of a switch port (Rx ring for
621 * a VALE port, Tx ring for the host stack or NIC), slots
622 * are reserved in blocks through 'hwlease' which points
623 * to the next unused slot.
624 * On an Rx ring, hwlease is always after hwtail,
625 * and completions cause hwtail to advance.
626 * On a Tx ring, hwlease is always between cur and hwtail,
627 * and completions cause cur to advance.
628 *
629 * nm_kr_space() returns the maximum number of slots that
630 * can be assigned.
631 * nm_kr_lease() reserves the required number of buffers,
632 * advances nkr_hwlease and also returns an entry in
633 * a circular array where completions should be reported.
634 */
635
636 struct lut_entry;
637 #ifdef __FreeBSD__
638 #define plut_entry lut_entry
639 #endif
640
641 struct netmap_lut {
642 struct lut_entry *lut;
643 struct plut_entry *plut;
644 uint32_t objtotal; /* max buffer index */
645 uint32_t objsize; /* buffer size */
646 };
647
648 struct netmap_vp_adapter; // forward
649 struct nm_bridge;
650
651 /* Struct to be filled by nm_config callbacks. */
652 struct nm_config_info {
653 unsigned num_tx_rings;
654 unsigned num_rx_rings;
655 unsigned num_tx_descs;
656 unsigned num_rx_descs;
657 unsigned rx_buf_maxsize;
658 };
659
660 /*
661 * default type for the magic field.
662 * May be overridden in glue code.
663 */
664 #ifndef NM_OS_MAGIC
665 #define NM_OS_MAGIC uint32_t
666 #endif /* !NM_OS_MAGIC */
667
668 /*
669 * The "struct netmap_adapter" extends the "struct adapter"
670 * (or equivalent) device descriptor.
671 * It contains all base fields needed to support netmap operation.
672 * There are in fact different types of netmap adapters
673 * (native, generic, VALE switch...) so a netmap_adapter is
674 * just the first field in the derived type.
675 */
676 struct netmap_adapter {
677 /*
678 * On linux we do not have a good way to tell if an interface
679 * is netmap-capable. So we always use the following trick:
680 * NA(ifp) points here, and the first entry (which hopefully
681 * always exists and is at least 32 bits) contains a magic
682 * value which we can use to detect that the interface is good.
683 */
684 NM_OS_MAGIC magic;
685 uint32_t na_flags; /* enabled, and other flags */
686 #define NAF_SKIP_INTR 1 /* use the regular interrupt handler.
687 * useful during initialization
688 */
689 #define NAF_SW_ONLY 2 /* forward packets only to sw adapter */
690 #define NAF_BDG_MAYSLEEP 4 /* the bridge is allowed to sleep when
691 * forwarding packets coming from this
692 * interface
693 */
694 #define NAF_MEM_OWNER 8 /* the adapter uses its own memory area
695 * that cannot be changed
696 */
697 #define NAF_NATIVE 16 /* the adapter is native.
698 * Virtual ports (non persistent vale ports,
699 * pipes, monitors...) should never use
700 * this flag.
701 */
702 #define NAF_NETMAP_ON 32 /* netmap is active (either native or
703 * emulated). Where possible (e.g. FreeBSD)
704 * IFCAP_NETMAP also mirrors this flag.
705 */
706 #define NAF_HOST_RINGS 64 /* the adapter supports the host rings */
707 #define NAF_FORCE_NATIVE 128 /* the adapter is always NATIVE */
708 /* free */
709 #define NAF_MOREFRAG 512 /* the adapter supports NS_MOREFRAG */
710 #define NAF_ZOMBIE (1U<<30) /* the nic driver has been unloaded */
711 #define NAF_BUSY (1U<<31) /* the adapter is used internally and
712 * cannot be registered from userspace
713 */
714 int active_fds; /* number of user-space descriptors using this
715 interface, which is equal to the number of
716 struct netmap_if objs in the mapped region. */
717
718 u_int num_rx_rings; /* number of adapter receive rings */
719 u_int num_tx_rings; /* number of adapter transmit rings */
720 u_int num_host_rx_rings; /* number of host receive rings */
721 u_int num_host_tx_rings; /* number of host transmit rings */
722
723 u_int num_tx_desc; /* number of descriptor in each queue */
724 u_int num_rx_desc;
725
726 /* tx_rings and rx_rings are private but allocated as a
727 * contiguous chunk of memory. Each array has N+K entries,
728 * N for the hardware rings and K for the host rings.
729 */
730 struct netmap_kring **tx_rings; /* array of TX rings. */
731 struct netmap_kring **rx_rings; /* array of RX rings. */
732
733 void *tailroom; /* space below the rings array */
734 /* (used for leases) */
735
736
737 NM_SELINFO_T si[NR_TXRX]; /* global wait queues */
738
739 /* count users of the global wait queues */
740 int si_users[NR_TXRX];
741
742 void *pdev; /* used to store pci device */
743
744 /* copy of if_qflush and if_transmit pointers, to intercept
745 * packets from the network stack when netmap is active.
746 */
747 int (*if_transmit)(struct ifnet *, struct mbuf *);
748
749 /* copy of if_input for netmap_send_up() */
750 void (*if_input)(struct ifnet *, struct mbuf *);
751
752 /* Back reference to the parent ifnet struct. Used for
753 * hardware ports (emulated netmap included). */
754 struct ifnet *ifp; /* adapter is ifp->if_softc */
755
756 /*---- callbacks for this netmap adapter -----*/
757 /*
758 * nm_dtor() is the cleanup routine called when destroying
759 * the adapter.
760 * Called with NMG_LOCK held.
761 *
762 * nm_register() is called on NIOCREGIF and close() to enter
763 * or exit netmap mode on the NIC
764 * Called with NNG_LOCK held.
765 *
766 * nm_txsync() pushes packets to the underlying hw/switch
767 *
768 * nm_rxsync() collects packets from the underlying hw/switch
769 *
770 * nm_config() returns configuration information from the OS
771 * Called with NMG_LOCK held.
772 *
773 * nm_krings_create() create and init the tx_rings and
774 * rx_rings arrays of kring structures. In particular,
775 * set the nm_sync callbacks for each ring.
776 * There is no need to also allocate the corresponding
777 * netmap_rings, since netmap_mem_rings_create() will always
778 * be called to provide the missing ones.
779 * Called with NNG_LOCK held.
780 *
781 * nm_krings_delete() cleanup and delete the tx_rings and rx_rings
782 * arrays
783 * Called with NMG_LOCK held.
784 *
785 * nm_notify() is used to act after data have become available
786 * (or the stopped state of the ring has changed)
787 * For hw devices this is typically a selwakeup(),
788 * but for NIC/host ports attached to a switch (or vice-versa)
789 * we also need to invoke the 'txsync' code downstream.
790 * This callback pointer is actually used only to initialize
791 * kring->nm_notify.
792 * Return values are the same as for netmap_rx_irq().
793 */
794 void (*nm_dtor)(struct netmap_adapter *);
795
796 int (*nm_register)(struct netmap_adapter *, int onoff);
797 void (*nm_intr)(struct netmap_adapter *, int onoff);
798
799 int (*nm_txsync)(struct netmap_kring *kring, int flags);
800 int (*nm_rxsync)(struct netmap_kring *kring, int flags);
801 int (*nm_notify)(struct netmap_kring *kring, int flags);
802 #define NAF_FORCE_READ 1
803 #define NAF_FORCE_RECLAIM 2
804 #define NAF_CAN_FORWARD_DOWN 4
805 /* return configuration information */
806 int (*nm_config)(struct netmap_adapter *, struct nm_config_info *info);
807 int (*nm_krings_create)(struct netmap_adapter *);
808 void (*nm_krings_delete)(struct netmap_adapter *);
809 /*
810 * nm_bdg_attach() initializes the na_vp field to point
811 * to an adapter that can be attached to a VALE switch. If the
812 * current adapter is already a VALE port, na_vp is simply a cast;
813 * otherwise, na_vp points to a netmap_bwrap_adapter.
814 * If applicable, this callback also initializes na_hostvp,
815 * that can be used to connect the adapter host rings to the
816 * switch.
817 * Called with NMG_LOCK held.
818 *
819 * nm_bdg_ctl() is called on the actual attach/detach to/from
820 * to/from the switch, to perform adapter-specific
821 * initializations
822 * Called with NMG_LOCK held.
823 */
824 int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *,
825 struct nm_bridge *);
826 int (*nm_bdg_ctl)(struct nmreq_header *, struct netmap_adapter *);
827
828 /* adapter used to attach this adapter to a VALE switch (if any) */
829 struct netmap_vp_adapter *na_vp;
830 /* adapter used to attach the host rings of this adapter
831 * to a VALE switch (if any) */
832 struct netmap_vp_adapter *na_hostvp;
833
834 /* standard refcount to control the lifetime of the adapter
835 * (it should be equal to the lifetime of the corresponding ifp)
836 */
837 int na_refcount;
838
839 /* memory allocator (opaque)
840 * We also cache a pointer to the lut_entry for translating
841 * buffer addresses, the total number of buffers and the buffer size.
842 */
843 struct netmap_mem_d *nm_mem;
844 struct netmap_mem_d *nm_mem_prev;
845 struct netmap_lut na_lut;
846
847 /* additional information attached to this adapter
848 * by other netmap subsystems. Currently used by
849 * bwrap, LINUX/v1000 and ptnetmap
850 */
851 void *na_private;
852
853 /* array of pipes that have this adapter as a parent */
854 struct netmap_pipe_adapter **na_pipes;
855 int na_next_pipe; /* next free slot in the array */
856 int na_max_pipes; /* size of the array */
857
858 /* Offset of ethernet header for each packet. */
859 u_int virt_hdr_len;
860
861 /* Max number of bytes that the NIC can store in the buffer
862 * referenced by each RX descriptor. This translates to the maximum
863 * bytes that a single netmap slot can reference. Larger packets
864 * require NS_MOREFRAG support. */
865 unsigned rx_buf_maxsize;
866
867 char name[NETMAP_REQ_IFNAMSIZ]; /* used at least by pipes */
868
869 #ifdef WITH_MONITOR
870 unsigned long monitor_id; /* debugging */
871 #endif
872 };
873
874 static __inline u_int
nma_get_ndesc(struct netmap_adapter * na,enum txrx t)875 nma_get_ndesc(struct netmap_adapter *na, enum txrx t)
876 {
877 return (t == NR_TX ? na->num_tx_desc : na->num_rx_desc);
878 }
879
880 static __inline void
nma_set_ndesc(struct netmap_adapter * na,enum txrx t,u_int v)881 nma_set_ndesc(struct netmap_adapter *na, enum txrx t, u_int v)
882 {
883 if (t == NR_TX)
884 na->num_tx_desc = v;
885 else
886 na->num_rx_desc = v;
887 }
888
889 static __inline u_int
nma_get_nrings(struct netmap_adapter * na,enum txrx t)890 nma_get_nrings(struct netmap_adapter *na, enum txrx t)
891 {
892 return (t == NR_TX ? na->num_tx_rings : na->num_rx_rings);
893 }
894
895 static __inline u_int
nma_get_host_nrings(struct netmap_adapter * na,enum txrx t)896 nma_get_host_nrings(struct netmap_adapter *na, enum txrx t)
897 {
898 return (t == NR_TX ? na->num_host_tx_rings : na->num_host_rx_rings);
899 }
900
901 static __inline void
nma_set_nrings(struct netmap_adapter * na,enum txrx t,u_int v)902 nma_set_nrings(struct netmap_adapter *na, enum txrx t, u_int v)
903 {
904 if (t == NR_TX)
905 na->num_tx_rings = v;
906 else
907 na->num_rx_rings = v;
908 }
909
910 static __inline void
nma_set_host_nrings(struct netmap_adapter * na,enum txrx t,u_int v)911 nma_set_host_nrings(struct netmap_adapter *na, enum txrx t, u_int v)
912 {
913 if (t == NR_TX)
914 na->num_host_tx_rings = v;
915 else
916 na->num_host_rx_rings = v;
917 }
918
919 static __inline struct netmap_kring**
NMR(struct netmap_adapter * na,enum txrx t)920 NMR(struct netmap_adapter *na, enum txrx t)
921 {
922 return (t == NR_TX ? na->tx_rings : na->rx_rings);
923 }
924
925 int nma_intr_enable(struct netmap_adapter *na, int onoff);
926
927 /*
928 * If the NIC is owned by the kernel
929 * (i.e., bridge), neither another bridge nor user can use it;
930 * if the NIC is owned by a user, only users can share it.
931 * Evaluation must be done under NMG_LOCK().
932 */
933 #define NETMAP_OWNED_BY_KERN(na) ((na)->na_flags & NAF_BUSY)
934 #define NETMAP_OWNED_BY_ANY(na) \
935 (NETMAP_OWNED_BY_KERN(na) || ((na)->active_fds > 0))
936
937 /*
938 * derived netmap adapters for various types of ports
939 */
940 struct netmap_vp_adapter { /* VALE software port */
941 struct netmap_adapter up;
942
943 /*
944 * Bridge support:
945 *
946 * bdg_port is the port number used in the bridge;
947 * na_bdg points to the bridge this NA is attached to.
948 */
949 int bdg_port;
950 struct nm_bridge *na_bdg;
951 int retry;
952 int autodelete; /* remove the ifp on last reference */
953
954 /* Maximum Frame Size, used in bdg_mismatch_datapath() */
955 u_int mfs;
956 /* Last source MAC on this port */
957 uint64_t last_smac;
958 };
959
960
961 struct netmap_hw_adapter { /* physical device */
962 struct netmap_adapter up;
963
964 #ifdef linux
965 struct net_device_ops nm_ndo;
966 struct ethtool_ops nm_eto;
967 #endif
968 const struct ethtool_ops* save_ethtool;
969
970 int (*nm_hw_register)(struct netmap_adapter *, int onoff);
971 };
972
973 #ifdef WITH_GENERIC
974 /* Mitigation support. */
975 struct nm_generic_mit {
976 struct hrtimer mit_timer;
977 int mit_pending;
978 int mit_ring_idx; /* index of the ring being mitigated */
979 struct netmap_adapter *mit_na; /* backpointer */
980 };
981
982 struct netmap_generic_adapter { /* emulated device */
983 struct netmap_hw_adapter up;
984
985 /* Pointer to a previously used netmap adapter. */
986 struct netmap_adapter *prev;
987
988 /* Emulated netmap adapters support:
989 * - mit implements rx interrupt mitigation;
990 */
991 struct nm_generic_mit *mit;
992 #ifdef linux
993 netdev_tx_t (*save_start_xmit)(struct mbuf *, struct ifnet *);
994 #endif
995 /* Is the adapter able to use multiple RX slots to scatter
996 * each packet pushed up by the driver? */
997 int rxsg;
998
999 /* Is the transmission path controlled by a netmap-aware
1000 * device queue (i.e. qdisc on linux)? */
1001 int txqdisc;
1002 };
1003 #endif /* WITH_GENERIC */
1004
1005 static __inline u_int
netmap_real_rings(struct netmap_adapter * na,enum txrx t)1006 netmap_real_rings(struct netmap_adapter *na, enum txrx t)
1007 {
1008 return nma_get_nrings(na, t) +
1009 !!(na->na_flags & NAF_HOST_RINGS) * nma_get_host_nrings(na, t);
1010 }
1011
1012 /* account for fake rings */
1013 static __inline u_int
netmap_all_rings(struct netmap_adapter * na,enum txrx t)1014 netmap_all_rings(struct netmap_adapter *na, enum txrx t)
1015 {
1016 return max(nma_get_nrings(na, t) + 1, netmap_real_rings(na, t));
1017 }
1018
1019 int netmap_default_bdg_attach(const char *name, struct netmap_adapter *na,
1020 struct nm_bridge *);
1021 struct nm_bdg_polling_state;
1022 /*
1023 * Bridge wrapper for non VALE ports attached to a VALE switch.
1024 *
1025 * The real device must already have its own netmap adapter (hwna).
1026 * The bridge wrapper and the hwna adapter share the same set of
1027 * netmap rings and buffers, but they have two separate sets of
1028 * krings descriptors, with tx/rx meanings swapped:
1029 *
1030 * netmap
1031 * bwrap krings rings krings hwna
1032 * +------+ +------+ +-----+ +------+ +------+
1033 * |tx_rings->| |\ /| |----| |<-tx_rings|
1034 * | | +------+ \ / +-----+ +------+ | |
1035 * | | X | |
1036 * | | / \ | |
1037 * | | +------+/ \+-----+ +------+ | |
1038 * |rx_rings->| | | |----| |<-rx_rings|
1039 * | | +------+ +-----+ +------+ | |
1040 * +------+ +------+
1041 *
1042 * - packets coming from the bridge go to the brwap rx rings,
1043 * which are also the hwna tx rings. The bwrap notify callback
1044 * will then complete the hwna tx (see netmap_bwrap_notify).
1045 *
1046 * - packets coming from the outside go to the hwna rx rings,
1047 * which are also the bwrap tx rings. The (overwritten) hwna
1048 * notify method will then complete the bridge tx
1049 * (see netmap_bwrap_intr_notify).
1050 *
1051 * The bridge wrapper may optionally connect the hwna 'host' rings
1052 * to the bridge. This is done by using a second port in the
1053 * bridge and connecting it to the 'host' netmap_vp_adapter
1054 * contained in the netmap_bwrap_adapter. The brwap host adapter
1055 * cross-links the hwna host rings in the same way as shown above.
1056 *
1057 * - packets coming from the bridge and directed to the host stack
1058 * are handled by the bwrap host notify callback
1059 * (see netmap_bwrap_host_notify)
1060 *
1061 * - packets coming from the host stack are still handled by the
1062 * overwritten hwna notify callback (netmap_bwrap_intr_notify),
1063 * but are diverted to the host adapter depending on the ring number.
1064 *
1065 */
1066 struct netmap_bwrap_adapter {
1067 struct netmap_vp_adapter up;
1068 struct netmap_vp_adapter host; /* for host rings */
1069 struct netmap_adapter *hwna; /* the underlying device */
1070
1071 /*
1072 * When we attach a physical interface to the bridge, we
1073 * allow the controlling process to terminate, so we need
1074 * a place to store the n_detmap_priv_d data structure.
1075 * This is only done when physical interfaces
1076 * are attached to a bridge.
1077 */
1078 struct netmap_priv_d *na_kpriv;
1079 struct nm_bdg_polling_state *na_polling_state;
1080 /* we overwrite the hwna->na_vp pointer, so we save
1081 * here its original value, to be restored at detach
1082 */
1083 struct netmap_vp_adapter *saved_na_vp;
1084 };
1085 int nm_is_bwrap(struct netmap_adapter *na);
1086 int nm_bdg_polling(struct nmreq_header *hdr);
1087
1088 #ifdef WITH_VALE
1089 int netmap_vale_attach(struct nmreq_header *hdr, void *auth_token);
1090 int netmap_vale_detach(struct nmreq_header *hdr, void *auth_token);
1091 int netmap_vale_list(struct nmreq_header *hdr);
1092 int netmap_vi_create(struct nmreq_header *hdr, int);
1093 int nm_vi_create(struct nmreq_header *);
1094 int nm_vi_destroy(const char *name);
1095 #else /* !WITH_VALE */
1096 #define netmap_vi_create(hdr, a) (EOPNOTSUPP)
1097 #endif /* WITH_VALE */
1098
1099 #ifdef WITH_PIPES
1100
1101 #define NM_MAXPIPES 64 /* max number of pipes per adapter */
1102
1103 struct netmap_pipe_adapter {
1104 /* pipe identifier is up.name */
1105 struct netmap_adapter up;
1106
1107 #define NM_PIPE_ROLE_MASTER 0x1
1108 #define NM_PIPE_ROLE_SLAVE 0x2
1109 int role; /* either NM_PIPE_ROLE_MASTER or NM_PIPE_ROLE_SLAVE */
1110
1111 struct netmap_adapter *parent; /* adapter that owns the memory */
1112 struct netmap_pipe_adapter *peer; /* the other end of the pipe */
1113 int peer_ref; /* 1 iff we are holding a ref to the peer */
1114 struct ifnet *parent_ifp; /* maybe null */
1115
1116 u_int parent_slot; /* index in the parent pipe array */
1117 };
1118
1119 #endif /* WITH_PIPES */
1120
1121 #ifdef WITH_NMNULL
1122 struct netmap_null_adapter {
1123 struct netmap_adapter up;
1124 };
1125 #endif /* WITH_NMNULL */
1126
1127
1128 /* return slots reserved to rx clients; used in drivers */
1129 static inline uint32_t
nm_kr_rxspace(struct netmap_kring * k)1130 nm_kr_rxspace(struct netmap_kring *k)
1131 {
1132 int space = k->nr_hwtail - k->nr_hwcur;
1133 if (space < 0)
1134 space += k->nkr_num_slots;
1135 nm_prdis("preserving %d rx slots %d -> %d", space, k->nr_hwcur, k->nr_hwtail);
1136
1137 return space;
1138 }
1139
1140 /* return slots reserved to tx clients */
1141 #define nm_kr_txspace(_k) nm_kr_rxspace(_k)
1142
1143
1144 /* True if no space in the tx ring, only valid after txsync_prologue */
1145 static inline int
nm_kr_txempty(struct netmap_kring * kring)1146 nm_kr_txempty(struct netmap_kring *kring)
1147 {
1148 return kring->rhead == kring->nr_hwtail;
1149 }
1150
1151 /* True if no more completed slots in the rx ring, only valid after
1152 * rxsync_prologue */
1153 #define nm_kr_rxempty(_k) nm_kr_txempty(_k)
1154
1155 /* True if the application needs to wait for more space on the ring
1156 * (more received packets or more free tx slots).
1157 * Only valid after *xsync_prologue. */
1158 static inline int
nm_kr_wouldblock(struct netmap_kring * kring)1159 nm_kr_wouldblock(struct netmap_kring *kring)
1160 {
1161 return kring->rcur == kring->nr_hwtail;
1162 }
1163
1164 /*
1165 * protect against multiple threads using the same ring.
1166 * also check that the ring has not been stopped or locked
1167 */
1168 #define NM_KR_BUSY 1 /* some other thread is syncing the ring */
1169 #define NM_KR_STOPPED 2 /* unbounded stop (ifconfig down or driver unload) */
1170 #define NM_KR_LOCKED 3 /* bounded, brief stop for mutual exclusion */
1171
1172
1173 /* release the previously acquired right to use the *sync() methods of the ring */
nm_kr_put(struct netmap_kring * kr)1174 static __inline void nm_kr_put(struct netmap_kring *kr)
1175 {
1176 NM_ATOMIC_CLEAR(&kr->nr_busy);
1177 }
1178
1179
1180 /* true if the ifp that backed the adapter has disappeared (e.g., the
1181 * driver has been unloaded)
1182 */
1183 static inline int nm_iszombie(struct netmap_adapter *na);
1184
1185 /* try to obtain exclusive right to issue the *sync() operations on the ring.
1186 * The right is obtained and must be later relinquished via nm_kr_put() if and
1187 * only if nm_kr_tryget() returns 0.
1188 * If can_sleep is 1 there are only two other possible outcomes:
1189 * - the function returns NM_KR_BUSY
1190 * - the function returns NM_KR_STOPPED and sets the POLLERR bit in *perr
1191 * (if non-null)
1192 * In both cases the caller will typically skip the ring, possibly collecting
1193 * errors along the way.
1194 * If the calling context does not allow sleeping, the caller must pass 0 in can_sleep.
1195 * In the latter case, the function may also return NM_KR_LOCKED and leave *perr
1196 * untouched: ideally, the caller should try again at a later time.
1197 */
nm_kr_tryget(struct netmap_kring * kr,int can_sleep,int * perr)1198 static __inline int nm_kr_tryget(struct netmap_kring *kr, int can_sleep, int *perr)
1199 {
1200 int busy = 1, stopped;
1201 /* check a first time without taking the lock
1202 * to avoid starvation for nm_kr_get()
1203 */
1204 retry:
1205 stopped = kr->nkr_stopped;
1206 if (unlikely(stopped)) {
1207 goto stop;
1208 }
1209 busy = NM_ATOMIC_TEST_AND_SET(&kr->nr_busy);
1210 /* we should not return NM_KR_BUSY if the ring was
1211 * actually stopped, so check another time after
1212 * the barrier provided by the atomic operation
1213 */
1214 stopped = kr->nkr_stopped;
1215 if (unlikely(stopped)) {
1216 goto stop;
1217 }
1218
1219 if (unlikely(nm_iszombie(kr->na))) {
1220 stopped = NM_KR_STOPPED;
1221 goto stop;
1222 }
1223
1224 return unlikely(busy) ? NM_KR_BUSY : 0;
1225
1226 stop:
1227 if (!busy)
1228 nm_kr_put(kr);
1229 if (stopped == NM_KR_STOPPED) {
1230 /* if POLLERR is defined we want to use it to simplify netmap_poll().
1231 * Otherwise, any non-zero value will do.
1232 */
1233 #ifdef POLLERR
1234 #define NM_POLLERR POLLERR
1235 #else
1236 #define NM_POLLERR 1
1237 #endif /* POLLERR */
1238 if (perr)
1239 *perr |= NM_POLLERR;
1240 #undef NM_POLLERR
1241 } else if (can_sleep) {
1242 tsleep(kr, 0, "NM_KR_TRYGET", 4);
1243 goto retry;
1244 }
1245 return stopped;
1246 }
1247
1248 /* put the ring in the 'stopped' state and wait for the current user (if any) to
1249 * notice. stopped must be either NM_KR_STOPPED or NM_KR_LOCKED
1250 */
nm_kr_stop(struct netmap_kring * kr,int stopped)1251 static __inline void nm_kr_stop(struct netmap_kring *kr, int stopped)
1252 {
1253 kr->nkr_stopped = stopped;
1254 while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
1255 tsleep(kr, 0, "NM_KR_GET", 4);
1256 }
1257
1258 /* restart a ring after a stop */
nm_kr_start(struct netmap_kring * kr)1259 static __inline void nm_kr_start(struct netmap_kring *kr)
1260 {
1261 kr->nkr_stopped = 0;
1262 nm_kr_put(kr);
1263 }
1264
1265
1266 /*
1267 * The following functions are used by individual drivers to
1268 * support netmap operation.
1269 *
1270 * netmap_attach() initializes a struct netmap_adapter, allocating the
1271 * struct netmap_ring's and the struct selinfo.
1272 *
1273 * netmap_detach() frees the memory allocated by netmap_attach().
1274 *
1275 * netmap_transmit() replaces the if_transmit routine of the interface,
1276 * and is used to intercept packets coming from the stack.
1277 *
1278 * netmap_load_map/netmap_reload_map are helper routines to set/reset
1279 * the dmamap for a packet buffer
1280 *
1281 * netmap_reset() is a helper routine to be called in the hw driver
1282 * when reinitializing a ring. It should not be called by
1283 * virtual ports (vale, pipes, monitor)
1284 */
1285 int netmap_attach(struct netmap_adapter *);
1286 int netmap_attach_ext(struct netmap_adapter *, size_t size, int override_reg);
1287 void netmap_detach(struct ifnet *);
1288 int netmap_transmit(struct ifnet *, struct mbuf *);
1289 struct netmap_slot *netmap_reset(struct netmap_adapter *na,
1290 enum txrx tx, u_int n, u_int new_cur);
1291 int netmap_ring_reinit(struct netmap_kring *);
1292 int netmap_rings_config_get(struct netmap_adapter *, struct nm_config_info *);
1293
1294 /* Return codes for netmap_*x_irq. */
1295 enum {
1296 /* Driver should do normal interrupt processing, e.g. because
1297 * the interface is not in netmap mode. */
1298 NM_IRQ_PASS = 0,
1299 /* Port is in netmap mode, and the interrupt work has been
1300 * completed. The driver does not have to notify netmap
1301 * again before the next interrupt. */
1302 NM_IRQ_COMPLETED = -1,
1303 /* Port is in netmap mode, but the interrupt work has not been
1304 * completed. The driver has to make sure netmap will be
1305 * notified again soon, even if no more interrupts come (e.g.
1306 * on Linux the driver should not call napi_complete()). */
1307 NM_IRQ_RESCHED = -2,
1308 };
1309
1310 /* default functions to handle rx/tx interrupts */
1311 int netmap_rx_irq(struct ifnet *, u_int, u_int *);
1312 #define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL)
1313 int netmap_common_irq(struct netmap_adapter *, u_int, u_int *work_done);
1314
1315
1316 #ifdef WITH_VALE
1317 /* functions used by external modules to interface with VALE */
1318 #define netmap_vp_to_ifp(_vp) ((_vp)->up.ifp)
1319 #define netmap_ifp_to_vp(_ifp) (NA(_ifp)->na_vp)
1320 #define netmap_ifp_to_host_vp(_ifp) (NA(_ifp)->na_hostvp)
1321 #define netmap_bdg_idx(_vp) ((_vp)->bdg_port)
1322 const char *netmap_bdg_name(struct netmap_vp_adapter *);
1323 #else /* !WITH_VALE */
1324 #define netmap_vp_to_ifp(_vp) NULL
1325 #define netmap_ifp_to_vp(_ifp) NULL
1326 #define netmap_ifp_to_host_vp(_ifp) NULL
1327 #define netmap_bdg_idx(_vp) -1
1328 #endif /* WITH_VALE */
1329
1330 static inline int
nm_netmap_on(struct netmap_adapter * na)1331 nm_netmap_on(struct netmap_adapter *na)
1332 {
1333 return na && na->na_flags & NAF_NETMAP_ON;
1334 }
1335
1336 static inline int
nm_native_on(struct netmap_adapter * na)1337 nm_native_on(struct netmap_adapter *na)
1338 {
1339 return nm_netmap_on(na) && (na->na_flags & NAF_NATIVE);
1340 }
1341
1342 static inline struct netmap_kring *
netmap_kring_on(struct netmap_adapter * na,u_int q,enum txrx t)1343 netmap_kring_on(struct netmap_adapter *na, u_int q, enum txrx t)
1344 {
1345 struct netmap_kring *kring = NULL;
1346
1347 if (!nm_native_on(na))
1348 return NULL;
1349
1350 if (t == NR_RX && q < na->num_rx_rings)
1351 kring = na->rx_rings[q];
1352 else if (t == NR_TX && q < na->num_tx_rings)
1353 kring = na->tx_rings[q];
1354 else
1355 return NULL;
1356
1357 return (kring->nr_mode == NKR_NETMAP_ON) ? kring : NULL;
1358 }
1359
1360 static inline int
nm_iszombie(struct netmap_adapter * na)1361 nm_iszombie(struct netmap_adapter *na)
1362 {
1363 return na == NULL || (na->na_flags & NAF_ZOMBIE);
1364 }
1365
1366 static inline void
nm_update_hostrings_mode(struct netmap_adapter * na)1367 nm_update_hostrings_mode(struct netmap_adapter *na)
1368 {
1369 /* Process nr_mode and nr_pending_mode for host rings. */
1370 na->tx_rings[na->num_tx_rings]->nr_mode =
1371 na->tx_rings[na->num_tx_rings]->nr_pending_mode;
1372 na->rx_rings[na->num_rx_rings]->nr_mode =
1373 na->rx_rings[na->num_rx_rings]->nr_pending_mode;
1374 }
1375
1376 void nm_set_native_flags(struct netmap_adapter *);
1377 void nm_clear_native_flags(struct netmap_adapter *);
1378
1379 void netmap_krings_mode_commit(struct netmap_adapter *na, int onoff);
1380
1381 /*
1382 * nm_*sync_prologue() functions are used in ioctl/poll and ptnetmap
1383 * kthreads.
1384 * We need netmap_ring* parameter, because in ptnetmap it is decoupled
1385 * from host kring.
1386 * The user-space ring pointers (head/cur/tail) are shared through
1387 * CSB between host and guest.
1388 */
1389
1390 /*
1391 * validates parameters in the ring/kring, returns a value for head
1392 * If any error, returns ring_size to force a reinit.
1393 */
1394 uint32_t nm_txsync_prologue(struct netmap_kring *, struct netmap_ring *);
1395
1396
1397 /*
1398 * validates parameters in the ring/kring, returns a value for head
1399 * If any error, returns ring_size lim to force a reinit.
1400 */
1401 uint32_t nm_rxsync_prologue(struct netmap_kring *, struct netmap_ring *);
1402
1403
1404 /* check/fix address and len in tx rings */
1405 #if 1 /* debug version */
1406 #define NM_CHECK_ADDR_LEN(_na, _a, _l) do { \
1407 if (_a == NETMAP_BUF_BASE(_na) || _l > NETMAP_BUF_SIZE(_na)) { \
1408 nm_prlim(5, "bad addr/len ring %d slot %d idx %d len %d", \
1409 kring->ring_id, nm_i, slot->buf_idx, len); \
1410 if (_l > NETMAP_BUF_SIZE(_na)) \
1411 _l = NETMAP_BUF_SIZE(_na); \
1412 } } while (0)
1413 #else /* no debug version */
1414 #define NM_CHECK_ADDR_LEN(_na, _a, _l) do { \
1415 if (_l > NETMAP_BUF_SIZE(_na)) \
1416 _l = NETMAP_BUF_SIZE(_na); \
1417 } while (0)
1418 #endif
1419
1420
1421 /*---------------------------------------------------------------*/
1422 /*
1423 * Support routines used by netmap subsystems
1424 * (native drivers, VALE, generic, pipes, monitors, ...)
1425 */
1426
1427
1428 /* common routine for all functions that create a netmap adapter. It performs
1429 * two main tasks:
1430 * - if the na points to an ifp, mark the ifp as netmap capable
1431 * using na as its native adapter;
1432 * - provide defaults for the setup callbacks and the memory allocator
1433 */
1434 int netmap_attach_common(struct netmap_adapter *);
1435 /* fill priv->np_[tr]xq{first,last} using the ringid and flags information
1436 * coming from a struct nmreq_register
1437 */
1438 int netmap_interp_ringid(struct netmap_priv_d *priv, struct nmreq_header *hdr);
1439 /* update the ring parameters (number and size of tx and rx rings).
1440 * It calls the nm_config callback, if available.
1441 */
1442 int netmap_update_config(struct netmap_adapter *na);
1443 /* create and initialize the common fields of the krings array.
1444 * using the information that must be already available in the na.
1445 * tailroom can be used to request the allocation of additional
1446 * tailroom bytes after the krings array. This is used by
1447 * netmap_vp_adapter's (i.e., VALE ports) to make room for
1448 * leasing-related data structures
1449 */
1450 int netmap_krings_create(struct netmap_adapter *na, u_int tailroom);
1451 /* deletes the kring array of the adapter. The array must have
1452 * been created using netmap_krings_create
1453 */
1454 void netmap_krings_delete(struct netmap_adapter *na);
1455
1456 int netmap_hw_krings_create(struct netmap_adapter *na);
1457 void netmap_hw_krings_delete(struct netmap_adapter *na);
1458
1459 /* set the stopped/enabled status of ring
1460 * When stopping, they also wait for all current activity on the ring to
1461 * terminate. The status change is then notified using the na nm_notify
1462 * callback.
1463 */
1464 void netmap_set_ring(struct netmap_adapter *, u_int ring_id, enum txrx, int stopped);
1465 /* set the stopped/enabled status of all rings of the adapter. */
1466 void netmap_set_all_rings(struct netmap_adapter *, int stopped);
1467 /* convenience wrappers for netmap_set_all_rings */
1468 void netmap_disable_all_rings(struct ifnet *);
1469 void netmap_enable_all_rings(struct ifnet *);
1470
1471 int netmap_buf_size_validate(const struct netmap_adapter *na, unsigned mtu);
1472 int netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
1473 struct nmreq_header *);
1474 void netmap_do_unregif(struct netmap_priv_d *priv);
1475
1476 u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
1477 int netmap_get_na(struct nmreq_header *hdr, struct netmap_adapter **na,
1478 struct ifnet **ifp, struct netmap_mem_d *nmd, int create);
1479 void netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp);
1480 int netmap_get_hw_na(struct ifnet *ifp,
1481 struct netmap_mem_d *nmd, struct netmap_adapter **na);
1482
1483 #ifdef WITH_VALE
1484 uint32_t netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
1485 struct netmap_vp_adapter *, void *private_data);
1486
1487 /* these are redefined in case of no VALE support */
1488 int netmap_get_vale_na(struct nmreq_header *hdr, struct netmap_adapter **na,
1489 struct netmap_mem_d *nmd, int create);
1490 void *netmap_vale_create(const char *bdg_name, int *return_status);
1491 int netmap_vale_destroy(const char *bdg_name, void *auth_token);
1492
1493 extern unsigned int vale_max_bridges;
1494
1495 #else /* !WITH_VALE */
1496 #define netmap_bdg_learning(_1, _2, _3, _4) 0
1497 #define netmap_get_vale_na(_1, _2, _3, _4) 0
1498 #define netmap_bdg_create(_1, _2) NULL
1499 #define netmap_bdg_destroy(_1, _2) 0
1500 #define vale_max_bridges 1
1501 #endif /* !WITH_VALE */
1502
1503 #ifdef WITH_PIPES
1504 /* max number of pipes per device */
1505 #define NM_MAXPIPES 64 /* XXX this should probably be a sysctl */
1506 void netmap_pipe_dealloc(struct netmap_adapter *);
1507 int netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na,
1508 struct netmap_mem_d *nmd, int create);
1509 #else /* !WITH_PIPES */
1510 #define NM_MAXPIPES 0
1511 #define netmap_pipe_alloc(_1, _2) 0
1512 #define netmap_pipe_dealloc(_1)
1513 #define netmap_get_pipe_na(hdr, _2, _3, _4) \
1514 ((strchr(hdr->nr_name, '{') != NULL || strchr(hdr->nr_name, '}') != NULL) ? EOPNOTSUPP : 0)
1515 #endif
1516
1517 #ifdef WITH_MONITOR
1518 int netmap_get_monitor_na(struct nmreq_header *hdr, struct netmap_adapter **na,
1519 struct netmap_mem_d *nmd, int create);
1520 void netmap_monitor_stop(struct netmap_adapter *na);
1521 #else
1522 #define netmap_get_monitor_na(hdr, _2, _3, _4) \
1523 (((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
1524 #endif
1525
1526 #ifdef WITH_NMNULL
1527 int netmap_get_null_na(struct nmreq_header *hdr, struct netmap_adapter **na,
1528 struct netmap_mem_d *nmd, int create);
1529 #else /* !WITH_NMNULL */
1530 #define netmap_get_null_na(hdr, _2, _3, _4) \
1531 (((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
1532 #endif /* WITH_NMNULL */
1533
1534 #ifdef CONFIG_NET_NS
1535 struct net *netmap_bns_get(void);
1536 void netmap_bns_put(struct net *);
1537 void netmap_bns_getbridges(struct nm_bridge **, u_int *);
1538 #else
1539 extern struct nm_bridge *nm_bridges;
1540 #define netmap_bns_get()
1541 #define netmap_bns_put(_1)
1542 #define netmap_bns_getbridges(b, n) \
1543 do { *b = nm_bridges; *n = vale_max_bridges; } while (0)
1544 #endif
1545
1546 /* Various prototypes */
1547 int netmap_poll(struct netmap_priv_d *, int events, NM_SELRECORD_T *td);
1548 int netmap_init(void);
1549 void netmap_fini(void);
1550 int netmap_get_memory(struct netmap_priv_d* p);
1551 void netmap_dtor(void *data);
1552
1553 int netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data,
1554 struct thread *, int nr_body_is_user);
1555 int netmap_ioctl_legacy(struct netmap_priv_d *priv, u_long cmd, caddr_t data,
1556 struct thread *td);
1557 size_t nmreq_size_by_type(uint16_t nr_reqtype);
1558
1559 /* netmap_adapter creation/destruction */
1560
1561 // #define NM_DEBUG_PUTGET 1
1562
1563 #ifdef NM_DEBUG_PUTGET
1564
1565 #define NM_DBG(f) __##f
1566
1567 void __netmap_adapter_get(struct netmap_adapter *na);
1568
1569 #define netmap_adapter_get(na) \
1570 do { \
1571 struct netmap_adapter *__na = na; \
1572 __netmap_adapter_get(__na); \
1573 nm_prinf("getting %p:%s -> %d", __na, (__na)->name, (__na)->na_refcount); \
1574 } while (0)
1575
1576 int __netmap_adapter_put(struct netmap_adapter *na);
1577
1578 #define netmap_adapter_put(na) \
1579 ({ \
1580 struct netmap_adapter *__na = na; \
1581 if (__na == NULL) \
1582 nm_prinf("putting NULL"); \
1583 else \
1584 nm_prinf("putting %p:%s -> %d", __na, (__na)->name, (__na)->na_refcount - 1); \
1585 __netmap_adapter_put(__na); \
1586 })
1587
1588 #else /* !NM_DEBUG_PUTGET */
1589
1590 #define NM_DBG(f) f
1591 void netmap_adapter_get(struct netmap_adapter *na);
1592 int netmap_adapter_put(struct netmap_adapter *na);
1593
1594 #endif /* !NM_DEBUG_PUTGET */
1595
1596
1597 /*
1598 * module variables
1599 */
1600 #define NETMAP_BUF_BASE(_na) ((_na)->na_lut.lut[0].vaddr)
1601 #define NETMAP_BUF_SIZE(_na) ((_na)->na_lut.objsize)
1602 extern int netmap_no_pendintr;
1603 extern int netmap_verbose;
1604 #ifdef CONFIG_NETMAP_DEBUG
1605 extern int netmap_debug; /* for debugging */
1606 #else /* !CONFIG_NETMAP_DEBUG */
1607 #define netmap_debug (0)
1608 #endif /* !CONFIG_NETMAP_DEBUG */
1609 enum { /* debug flags */
1610 NM_DEBUG_ON = 1, /* generic debug messages */
1611 NM_DEBUG_HOST = 0x2, /* debug host stack */
1612 NM_DEBUG_RXSYNC = 0x10, /* debug on rxsync/txsync */
1613 NM_DEBUG_TXSYNC = 0x20,
1614 NM_DEBUG_RXINTR = 0x100, /* debug on rx/tx intr (driver) */
1615 NM_DEBUG_TXINTR = 0x200,
1616 NM_DEBUG_NIC_RXSYNC = 0x1000, /* debug on rx/tx intr (driver) */
1617 NM_DEBUG_NIC_TXSYNC = 0x2000,
1618 NM_DEBUG_MEM = 0x4000, /* verbose memory allocations/deallocations */
1619 NM_DEBUG_VALE = 0x8000, /* debug messages from memory allocators */
1620 NM_DEBUG_BDG = NM_DEBUG_VALE,
1621 };
1622
1623 extern int netmap_txsync_retry;
1624 extern int netmap_generic_hwcsum;
1625 extern int netmap_generic_mit;
1626 extern int netmap_generic_ringsize;
1627 extern int netmap_generic_rings;
1628 #ifdef linux
1629 extern int netmap_generic_txqdisc;
1630 #endif
1631
1632 /*
1633 * NA returns a pointer to the struct netmap adapter from the ifp.
1634 * WNA is os-specific and must be defined in glue code.
1635 */
1636 #define NA(_ifp) ((struct netmap_adapter *)WNA(_ifp))
1637
1638 /*
1639 * we provide a default implementation of NM_ATTACH_NA/NM_DETACH_NA
1640 * based on the WNA field.
1641 * Glue code may override this by defining its own NM_ATTACH_NA
1642 */
1643 #ifndef NM_ATTACH_NA
1644 /*
1645 * On old versions of FreeBSD, NA(ifp) is a pspare. On linux we
1646 * overload another pointer in the netdev.
1647 *
1648 * We check if NA(ifp) is set and its first element has a related
1649 * magic value. The capenable is within the struct netmap_adapter.
1650 */
1651 #define NETMAP_MAGIC 0x52697a7a
1652
1653 #define NM_NA_VALID(ifp) (NA(ifp) && \
1654 ((uint32_t)(uintptr_t)NA(ifp) ^ NA(ifp)->magic) == NETMAP_MAGIC )
1655
1656 #define NM_ATTACH_NA(ifp, na) do { \
1657 WNA(ifp) = na; \
1658 if (NA(ifp)) \
1659 NA(ifp)->magic = \
1660 ((uint32_t)(uintptr_t)NA(ifp)) ^ NETMAP_MAGIC; \
1661 } while(0)
1662 #define NM_RESTORE_NA(ifp, na) WNA(ifp) = na;
1663
1664 #define NM_DETACH_NA(ifp) do { WNA(ifp) = NULL; } while (0)
1665 #define NM_NA_CLASH(ifp) (NA(ifp) && !NM_NA_VALID(ifp))
1666 #endif /* !NM_ATTACH_NA */
1667
1668
1669 #define NM_IS_NATIVE(ifp) (NM_NA_VALID(ifp) && NA(ifp)->nm_dtor == netmap_hw_dtor)
1670
1671 #if defined(__FreeBSD__)
1672
1673 /* Assigns the device IOMMU domain to an allocator.
1674 * Returns -ENOMEM in case the domain is different */
1675 #define nm_iommu_group_id(dev) (0)
1676
1677 /* Callback invoked by the dma machinery after a successful dmamap_load */
netmap_dmamap_cb(__unused void * arg,__unused bus_dma_segment_t * segs,__unused int nseg,__unused int error)1678 static void netmap_dmamap_cb(__unused void *arg,
1679 __unused bus_dma_segment_t * segs, __unused int nseg, __unused int error)
1680 {
1681 }
1682
1683 /* bus_dmamap_load wrapper: call aforementioned function if map != NULL.
1684 * XXX can we do it without a callback ?
1685 */
1686 static inline int
netmap_load_map(struct netmap_adapter * na,bus_dma_tag_t tag,bus_dmamap_t map,void * buf)1687 netmap_load_map(struct netmap_adapter *na,
1688 bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
1689 {
1690 if (map)
1691 bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na),
1692 netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
1693 return 0;
1694 }
1695
1696 static inline void
netmap_unload_map(struct netmap_adapter * na,bus_dma_tag_t tag,bus_dmamap_t map)1697 netmap_unload_map(struct netmap_adapter *na,
1698 bus_dma_tag_t tag, bus_dmamap_t map)
1699 {
1700 if (map)
1701 bus_dmamap_unload(tag, map);
1702 }
1703
1704 #define netmap_sync_map(na, tag, map, sz, t)
1705
1706 /* update the map when a buffer changes. */
1707 static inline void
netmap_reload_map(struct netmap_adapter * na,bus_dma_tag_t tag,bus_dmamap_t map,void * buf)1708 netmap_reload_map(struct netmap_adapter *na,
1709 bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
1710 {
1711 if (map) {
1712 bus_dmamap_unload(tag, map);
1713 bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na),
1714 netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
1715 }
1716 }
1717
1718 #elif defined(_WIN32)
1719
1720 #else /* linux */
1721
1722 int nm_iommu_group_id(bus_dma_tag_t dev);
1723 #include <linux/dma-mapping.h>
1724
1725 /*
1726 * on linux we need
1727 * dma_map_single(&pdev->dev, virt_addr, len, direction)
1728 * dma_unmap_single(&adapter->pdev->dev, phys_addr, len, direction)
1729 */
1730 #if 0
1731 struct e1000_buffer *buffer_info = &tx_ring->buffer_info[l];
1732 /* set time_stamp *before* dma to help avoid a possible race */
1733 buffer_info->time_stamp = jiffies;
1734 buffer_info->mapped_as_page = false;
1735 buffer_info->length = len;
1736 //buffer_info->next_to_watch = l;
1737 /* reload dma map */
1738 dma_unmap_single(&adapter->pdev->dev, buffer_info->dma,
1739 NETMAP_BUF_SIZE, DMA_TO_DEVICE);
1740 buffer_info->dma = dma_map_single(&adapter->pdev->dev,
1741 addr, NETMAP_BUF_SIZE, DMA_TO_DEVICE);
1742
1743 if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
1744 nm_prerr("dma mapping error");
1745 /* goto dma_error; See e1000_put_txbuf() */
1746 /* XXX reset */
1747 }
1748 tx_desc->buffer_addr = htole64(buffer_info->dma); //XXX
1749
1750 #endif
1751
1752 static inline int
netmap_load_map(struct netmap_adapter * na,bus_dma_tag_t tag,bus_dmamap_t map,void * buf,u_int size)1753 netmap_load_map(struct netmap_adapter *na,
1754 bus_dma_tag_t tag, bus_dmamap_t map, void *buf, u_int size)
1755 {
1756 if (map) {
1757 *map = dma_map_single(na->pdev, buf, size,
1758 DMA_BIDIRECTIONAL);
1759 if (dma_mapping_error(na->pdev, *map)) {
1760 *map = 0;
1761 return ENOMEM;
1762 }
1763 }
1764 return 0;
1765 }
1766
1767 static inline void
netmap_unload_map(struct netmap_adapter * na,bus_dma_tag_t tag,bus_dmamap_t map,u_int sz)1768 netmap_unload_map(struct netmap_adapter *na,
1769 bus_dma_tag_t tag, bus_dmamap_t map, u_int sz)
1770 {
1771 if (*map) {
1772 dma_unmap_single(na->pdev, *map, sz,
1773 DMA_BIDIRECTIONAL);
1774 }
1775 }
1776
1777 #ifdef NETMAP_LINUX_HAVE_DMASYNC
1778 static inline void
netmap_sync_map_cpu(struct netmap_adapter * na,bus_dma_tag_t tag,bus_dmamap_t map,u_int sz,enum txrx t)1779 netmap_sync_map_cpu(struct netmap_adapter *na,
1780 bus_dma_tag_t tag, bus_dmamap_t map, u_int sz, enum txrx t)
1781 {
1782 if (*map) {
1783 dma_sync_single_for_cpu(na->pdev, *map, sz,
1784 (t == NR_TX ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
1785 }
1786 }
1787
1788 static inline void
netmap_sync_map_dev(struct netmap_adapter * na,bus_dma_tag_t tag,bus_dmamap_t map,u_int sz,enum txrx t)1789 netmap_sync_map_dev(struct netmap_adapter *na,
1790 bus_dma_tag_t tag, bus_dmamap_t map, u_int sz, enum txrx t)
1791 {
1792 if (*map) {
1793 dma_sync_single_for_device(na->pdev, *map, sz,
1794 (t == NR_TX ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
1795 }
1796 }
1797
1798 static inline void
netmap_reload_map(struct netmap_adapter * na,bus_dma_tag_t tag,bus_dmamap_t map,void * buf)1799 netmap_reload_map(struct netmap_adapter *na,
1800 bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
1801 {
1802 u_int sz = NETMAP_BUF_SIZE(na);
1803
1804 if (*map) {
1805 dma_unmap_single(na->pdev, *map, sz,
1806 DMA_BIDIRECTIONAL);
1807 }
1808
1809 *map = dma_map_single(na->pdev, buf, sz,
1810 DMA_BIDIRECTIONAL);
1811 }
1812 #else /* !NETMAP_LINUX_HAVE_DMASYNC */
1813 #define netmap_sync_map_cpu(na, tag, map, sz, t)
1814 #define netmap_sync_map_dev(na, tag, map, sz, t)
1815 #endif /* NETMAP_LINUX_HAVE_DMASYNC */
1816
1817 #endif /* linux */
1818
1819
1820 /*
1821 * functions to map NIC to KRING indexes (n2k) and vice versa (k2n)
1822 */
1823 static inline int
netmap_idx_n2k(struct netmap_kring * kr,int idx)1824 netmap_idx_n2k(struct netmap_kring *kr, int idx)
1825 {
1826 int n = kr->nkr_num_slots;
1827
1828 if (likely(kr->nkr_hwofs == 0)) {
1829 return idx;
1830 }
1831
1832 idx += kr->nkr_hwofs;
1833 if (idx < 0)
1834 return idx + n;
1835 else if (idx < n)
1836 return idx;
1837 else
1838 return idx - n;
1839 }
1840
1841
1842 static inline int
netmap_idx_k2n(struct netmap_kring * kr,int idx)1843 netmap_idx_k2n(struct netmap_kring *kr, int idx)
1844 {
1845 int n = kr->nkr_num_slots;
1846
1847 if (likely(kr->nkr_hwofs == 0)) {
1848 return idx;
1849 }
1850
1851 idx -= kr->nkr_hwofs;
1852 if (idx < 0)
1853 return idx + n;
1854 else if (idx < n)
1855 return idx;
1856 else
1857 return idx - n;
1858 }
1859
1860
1861 /* Entries of the look-up table. */
1862 #ifdef __FreeBSD__
1863 struct lut_entry {
1864 void *vaddr; /* virtual address. */
1865 vm_paddr_t paddr; /* physical address. */
1866 };
1867 #else /* linux & _WIN32 */
1868 /* dma-mapping in linux can assign a buffer a different address
1869 * depending on the device, so we need to have a separate
1870 * physical-address look-up table for each na.
1871 * We can still share the vaddrs, though, therefore we split
1872 * the lut_entry structure.
1873 */
1874 struct lut_entry {
1875 void *vaddr; /* virtual address. */
1876 };
1877
1878 struct plut_entry {
1879 vm_paddr_t paddr; /* physical address. */
1880 };
1881 #endif /* linux & _WIN32 */
1882
1883 struct netmap_obj_pool;
1884
1885 /*
1886 * NMB return the virtual address of a buffer (buffer 0 on bad index)
1887 * PNMB also fills the physical address
1888 */
1889 static inline void *
NMB(struct netmap_adapter * na,struct netmap_slot * slot)1890 NMB(struct netmap_adapter *na, struct netmap_slot *slot)
1891 {
1892 struct lut_entry *lut = na->na_lut.lut;
1893 uint32_t i = slot->buf_idx;
1894 return (unlikely(i >= na->na_lut.objtotal)) ?
1895 lut[0].vaddr : lut[i].vaddr;
1896 }
1897
1898 static inline void *
PNMB(struct netmap_adapter * na,struct netmap_slot * slot,uint64_t * pp)1899 PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp)
1900 {
1901 uint32_t i = slot->buf_idx;
1902 struct lut_entry *lut = na->na_lut.lut;
1903 struct plut_entry *plut = na->na_lut.plut;
1904 void *ret = (i >= na->na_lut.objtotal) ? lut[0].vaddr : lut[i].vaddr;
1905
1906 #ifdef _WIN32
1907 *pp = (i >= na->na_lut.objtotal) ? (uint64_t)plut[0].paddr.QuadPart : (uint64_t)plut[i].paddr.QuadPart;
1908 #else
1909 *pp = (i >= na->na_lut.objtotal) ? plut[0].paddr : plut[i].paddr;
1910 #endif
1911 return ret;
1912 }
1913
1914
1915 /*
1916 * Structure associated to each netmap file descriptor.
1917 * It is created on open and left unbound (np_nifp == NULL).
1918 * A successful NIOCREGIF will set np_nifp and the first few fields;
1919 * this is protected by a global lock (NMG_LOCK) due to low contention.
1920 *
1921 * np_refs counts the number of references to the structure: one for the fd,
1922 * plus (on FreeBSD) one for each active mmap which we track ourselves
1923 * (linux automatically tracks them, but FreeBSD does not).
1924 * np_refs is protected by NMG_LOCK.
1925 *
1926 * Read access to the structure is lock free, because ni_nifp once set
1927 * can only go to 0 when nobody is using the entry anymore. Readers
1928 * must check that np_nifp != NULL before using the other fields.
1929 */
1930 struct netmap_priv_d {
1931 struct netmap_if * volatile np_nifp; /* netmap if descriptor. */
1932
1933 struct netmap_adapter *np_na;
1934 struct ifnet *np_ifp;
1935 uint32_t np_flags; /* from the ioctl */
1936 u_int np_qfirst[NR_TXRX],
1937 np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */
1938 uint16_t np_txpoll;
1939 uint16_t np_kloop_state; /* use with NMG_LOCK held */
1940 #define NM_SYNC_KLOOP_RUNNING (1 << 0)
1941 #define NM_SYNC_KLOOP_STOPPING (1 << 1)
1942 int np_sync_flags; /* to be passed to nm_sync */
1943
1944 int np_refs; /* use with NMG_LOCK held */
1945
1946 /* pointers to the selinfo to be used for selrecord.
1947 * Either the local or the global one depending on the
1948 * number of rings.
1949 */
1950 NM_SELINFO_T *np_si[NR_TXRX];
1951
1952 /* In the optional CSB mode, the user must specify the start address
1953 * of two arrays of Communication Status Block (CSB) entries, for the
1954 * two directions (kernel read application write, and kernel write
1955 * application read).
1956 * The number of entries must agree with the number of rings bound to
1957 * the netmap file descriptor. The entries corresponding to the TX
1958 * rings are laid out before the ones corresponding to the RX rings.
1959 *
1960 * Array of CSB entries for application --> kernel communication
1961 * (N entries). */
1962 struct nm_csb_atok *np_csb_atok_base;
1963 /* Array of CSB entries for kernel --> application communication
1964 * (N entries). */
1965 struct nm_csb_ktoa *np_csb_ktoa_base;
1966
1967 #ifdef linux
1968 struct file *np_filp; /* used by sync kloop */
1969 #endif /* linux */
1970 };
1971
1972 struct netmap_priv_d *netmap_priv_new(void);
1973 void netmap_priv_delete(struct netmap_priv_d *);
1974
nm_kring_pending(struct netmap_priv_d * np)1975 static inline int nm_kring_pending(struct netmap_priv_d *np)
1976 {
1977 struct netmap_adapter *na = np->np_na;
1978 enum txrx t;
1979 int i;
1980
1981 for_rx_tx(t) {
1982 for (i = np->np_qfirst[t]; i < np->np_qlast[t]; i++) {
1983 struct netmap_kring *kring = NMR(na, t)[i];
1984 if (kring->nr_mode != kring->nr_pending_mode) {
1985 return 1;
1986 }
1987 }
1988 }
1989 return 0;
1990 }
1991
1992 /* call with NMG_LOCK held */
1993 static __inline int
nm_si_user(struct netmap_priv_d * priv,enum txrx t)1994 nm_si_user(struct netmap_priv_d *priv, enum txrx t)
1995 {
1996 return (priv->np_na != NULL &&
1997 (priv->np_qlast[t] - priv->np_qfirst[t] > 1));
1998 }
1999
2000 #ifdef WITH_PIPES
2001 int netmap_pipe_txsync(struct netmap_kring *txkring, int flags);
2002 int netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags);
2003 int netmap_pipe_krings_create_both(struct netmap_adapter *na,
2004 struct netmap_adapter *ona);
2005 void netmap_pipe_krings_delete_both(struct netmap_adapter *na,
2006 struct netmap_adapter *ona);
2007 int netmap_pipe_reg_both(struct netmap_adapter *na,
2008 struct netmap_adapter *ona);
2009 #endif /* WITH_PIPES */
2010
2011 #ifdef WITH_MONITOR
2012
2013 struct netmap_monitor_adapter {
2014 struct netmap_adapter up;
2015
2016 struct netmap_priv_d priv;
2017 uint32_t flags;
2018 };
2019
2020 #endif /* WITH_MONITOR */
2021
2022
2023 #ifdef WITH_GENERIC
2024 /*
2025 * generic netmap emulation for devices that do not have
2026 * native netmap support.
2027 */
2028 int generic_netmap_attach(struct ifnet *ifp);
2029 int generic_rx_handler(struct ifnet *ifp, struct mbuf *m);;
2030
2031 int nm_os_catch_rx(struct netmap_generic_adapter *gna, int intercept);
2032 int nm_os_catch_tx(struct netmap_generic_adapter *gna, int intercept);
2033
2034 int na_is_generic(struct netmap_adapter *na);
2035
2036 /*
2037 * the generic transmit routine is passed a structure to optionally
2038 * build a queue of descriptors, in an OS-specific way.
2039 * The payload is at addr, if non-null, and the routine should send or queue
2040 * the packet, returning 0 if successful, 1 on failure.
2041 *
2042 * At the end, if head is non-null, there will be an additional call
2043 * to the function with addr = NULL; this should tell the OS-specific
2044 * routine to send the queue and free any resources. Failure is ignored.
2045 */
2046 struct nm_os_gen_arg {
2047 struct ifnet *ifp;
2048 void *m; /* os-specific mbuf-like object */
2049 void *head, *tail; /* tailq, if the OS-specific routine needs to build one */
2050 void *addr; /* payload of current packet */
2051 u_int len; /* packet length */
2052 u_int ring_nr; /* transmit ring index */
2053 u_int qevent; /* in txqdisc mode, place an event on this mbuf */
2054 };
2055
2056 int nm_os_generic_xmit_frame(struct nm_os_gen_arg *);
2057 int nm_os_generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx);
2058 void nm_os_generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq);
2059 void nm_os_generic_set_features(struct netmap_generic_adapter *gna);
2060
2061 static inline struct ifnet*
netmap_generic_getifp(struct netmap_generic_adapter * gna)2062 netmap_generic_getifp(struct netmap_generic_adapter *gna)
2063 {
2064 if (gna->prev)
2065 return gna->prev->ifp;
2066
2067 return gna->up.up.ifp;
2068 }
2069
2070 void netmap_generic_irq(struct netmap_adapter *na, u_int q, u_int *work_done);
2071
2072 //#define RATE_GENERIC /* Enables communication statistics for generic. */
2073 #ifdef RATE_GENERIC
2074 void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi);
2075 #else
2076 #define generic_rate(txp, txs, txi, rxp, rxs, rxi)
2077 #endif
2078
2079 /*
2080 * netmap_mitigation API. This is used by the generic adapter
2081 * to reduce the number of interrupt requests/selwakeup
2082 * to clients on incoming packets.
2083 */
2084 void nm_os_mitigation_init(struct nm_generic_mit *mit, int idx,
2085 struct netmap_adapter *na);
2086 void nm_os_mitigation_start(struct nm_generic_mit *mit);
2087 void nm_os_mitigation_restart(struct nm_generic_mit *mit);
2088 int nm_os_mitigation_active(struct nm_generic_mit *mit);
2089 void nm_os_mitigation_cleanup(struct nm_generic_mit *mit);
2090 #else /* !WITH_GENERIC */
2091 #define generic_netmap_attach(ifp) (EOPNOTSUPP)
2092 #define na_is_generic(na) (0)
2093 #endif /* WITH_GENERIC */
2094
2095 /* Shared declarations for the VALE switch. */
2096
2097 /*
2098 * Each transmit queue accumulates a batch of packets into
2099 * a structure before forwarding. Packets to the same
2100 * destination are put in a list using ft_next as a link field.
2101 * ft_frags and ft_next are valid only on the first fragment.
2102 */
2103 struct nm_bdg_fwd { /* forwarding entry for a bridge */
2104 void *ft_buf; /* netmap or indirect buffer */
2105 uint8_t ft_frags; /* how many fragments (only on 1st frag) */
2106 uint16_t ft_offset; /* dst port (unused) */
2107 uint16_t ft_flags; /* flags, e.g. indirect */
2108 uint16_t ft_len; /* src fragment len */
2109 uint16_t ft_next; /* next packet to same destination */
2110 };
2111
2112 /* struct 'virtio_net_hdr' from linux. */
2113 struct nm_vnet_hdr {
2114 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
2115 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
2116 uint8_t flags;
2117 #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */
2118 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
2119 #define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */
2120 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */
2121 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */
2122 uint8_t gso_type;
2123 uint16_t hdr_len;
2124 uint16_t gso_size;
2125 uint16_t csum_start;
2126 uint16_t csum_offset;
2127 };
2128
2129 #define WORST_CASE_GSO_HEADER (14+40+60) /* IPv6 + TCP */
2130
2131 /* Private definitions for IPv4, IPv6, UDP and TCP headers. */
2132
2133 struct nm_iphdr {
2134 uint8_t version_ihl;
2135 uint8_t tos;
2136 uint16_t tot_len;
2137 uint16_t id;
2138 uint16_t frag_off;
2139 uint8_t ttl;
2140 uint8_t protocol;
2141 uint16_t check;
2142 uint32_t saddr;
2143 uint32_t daddr;
2144 /*The options start here. */
2145 };
2146
2147 struct nm_tcphdr {
2148 uint16_t source;
2149 uint16_t dest;
2150 uint32_t seq;
2151 uint32_t ack_seq;
2152 uint8_t doff; /* Data offset + Reserved */
2153 uint8_t flags;
2154 uint16_t window;
2155 uint16_t check;
2156 uint16_t urg_ptr;
2157 };
2158
2159 struct nm_udphdr {
2160 uint16_t source;
2161 uint16_t dest;
2162 uint16_t len;
2163 uint16_t check;
2164 };
2165
2166 struct nm_ipv6hdr {
2167 uint8_t priority_version;
2168 uint8_t flow_lbl[3];
2169
2170 uint16_t payload_len;
2171 uint8_t nexthdr;
2172 uint8_t hop_limit;
2173
2174 uint8_t saddr[16];
2175 uint8_t daddr[16];
2176 };
2177
2178 /* Type used to store a checksum (in host byte order) that hasn't been
2179 * folded yet.
2180 */
2181 #define rawsum_t uint32_t
2182
2183 rawsum_t nm_os_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum);
2184 uint16_t nm_os_csum_ipv4(struct nm_iphdr *iph);
2185 void nm_os_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
2186 size_t datalen, uint16_t *check);
2187 void nm_os_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
2188 size_t datalen, uint16_t *check);
2189 uint16_t nm_os_csum_fold(rawsum_t cur_sum);
2190
2191 void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
2192 struct netmap_vp_adapter *dst_na,
2193 const struct nm_bdg_fwd *ft_p,
2194 struct netmap_ring *dst_ring,
2195 u_int *j, u_int lim, u_int *howmany);
2196
2197 /* persistent virtual port routines */
2198 int nm_os_vi_persist(const char *, struct ifnet **);
2199 void nm_os_vi_detach(struct ifnet *);
2200 void nm_os_vi_init_index(void);
2201
2202 /*
2203 * kernel thread routines
2204 */
2205 struct nm_kctx; /* OS-specific kernel context - opaque */
2206 typedef void (*nm_kctx_worker_fn_t)(void *data);
2207
2208 /* kthread configuration */
2209 struct nm_kctx_cfg {
2210 long type; /* kthread type/identifier */
2211 nm_kctx_worker_fn_t worker_fn; /* worker function */
2212 void *worker_private;/* worker parameter */
2213 int attach_user; /* attach kthread to user process */
2214 };
2215 /* kthread configuration */
2216 struct nm_kctx *nm_os_kctx_create(struct nm_kctx_cfg *cfg,
2217 void *opaque);
2218 int nm_os_kctx_worker_start(struct nm_kctx *);
2219 void nm_os_kctx_worker_stop(struct nm_kctx *);
2220 void nm_os_kctx_destroy(struct nm_kctx *);
2221 void nm_os_kctx_worker_setaff(struct nm_kctx *, int);
2222 u_int nm_os_ncpus(void);
2223
2224 int netmap_sync_kloop(struct netmap_priv_d *priv,
2225 struct nmreq_header *hdr);
2226 int netmap_sync_kloop_stop(struct netmap_priv_d *priv);
2227
2228 #ifdef WITH_PTNETMAP
2229 /* ptnetmap guest routines */
2230
2231 /*
2232 * ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver
2233 */
2234 struct ptnetmap_memdev;
2235 int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **,
2236 uint64_t *);
2237 void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *);
2238 uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int);
2239
2240 /*
2241 * netmap adapter for guest ptnetmap ports
2242 */
2243 struct netmap_pt_guest_adapter {
2244 /* The netmap adapter to be used by netmap applications.
2245 * This field must be the first, to allow upcast. */
2246 struct netmap_hw_adapter hwup;
2247
2248 /* The netmap adapter to be used by the driver. */
2249 struct netmap_hw_adapter dr;
2250
2251 /* Reference counter to track users of backend netmap port: the
2252 * network stack and netmap clients.
2253 * Used to decide when we need (de)allocate krings/rings and
2254 * start (stop) ptnetmap kthreads. */
2255 int backend_users;
2256
2257 };
2258
2259 int netmap_pt_guest_attach(struct netmap_adapter *na,
2260 unsigned int nifp_offset,
2261 unsigned int memid);
2262 bool netmap_pt_guest_txsync(struct nm_csb_atok *atok,
2263 struct nm_csb_ktoa *ktoa,
2264 struct netmap_kring *kring, int flags);
2265 bool netmap_pt_guest_rxsync(struct nm_csb_atok *atok,
2266 struct nm_csb_ktoa *ktoa,
2267 struct netmap_kring *kring, int flags);
2268 int ptnet_nm_krings_create(struct netmap_adapter *na);
2269 void ptnet_nm_krings_delete(struct netmap_adapter *na);
2270 void ptnet_nm_dtor(struct netmap_adapter *na);
2271
2272 /* Helper function wrapping nm_sync_kloop_appl_read(). */
2273 static inline void
ptnet_sync_tail(struct nm_csb_ktoa * ktoa,struct netmap_kring * kring)2274 ptnet_sync_tail(struct nm_csb_ktoa *ktoa, struct netmap_kring *kring)
2275 {
2276 struct netmap_ring *ring = kring->ring;
2277
2278 /* Update hwcur and hwtail as known by the host. */
2279 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur);
2280
2281 /* nm_sync_finalize */
2282 ring->tail = kring->rtail = kring->nr_hwtail;
2283 }
2284 #endif /* WITH_PTNETMAP */
2285
2286 #ifdef __FreeBSD__
2287 /*
2288 * FreeBSD mbuf allocator/deallocator in emulation mode:
2289 *
2290 * We allocate mbufs with m_gethdr(), since the mbuf header is needed
2291 * by the driver. We also attach a customly-provided external storage,
2292 * which in this case is a netmap buffer. When calling m_extadd(), however
2293 * we pass a NULL address, since the real address (and length) will be
2294 * filled in by nm_os_generic_xmit_frame() right before calling
2295 * if_transmit().
2296 *
2297 * The dtor function does nothing, however we need it since mb_free_ext()
2298 * has a KASSERT(), checking that the mbuf dtor function is not NULL.
2299 */
2300
void_mbuf_dtor(struct mbuf * m)2301 static void void_mbuf_dtor(struct mbuf *m) { }
2302
2303 #define SET_MBUF_DESTRUCTOR(m, fn) do { \
2304 (m)->m_ext.ext_free = (fn != NULL) ? \
2305 (void *)fn : (void *)void_mbuf_dtor; \
2306 } while (0)
2307
2308 static inline struct mbuf *
nm_os_get_mbuf(struct ifnet * ifp,int len)2309 nm_os_get_mbuf(struct ifnet *ifp, int len)
2310 {
2311 struct mbuf *m;
2312
2313 (void)ifp;
2314 (void)len;
2315
2316 m = m_gethdr(M_NOWAIT, MT_DATA);
2317 if (m == NULL) {
2318 return m;
2319 }
2320
2321 m_extadd(m, NULL /* buf */, 0 /* size */, void_mbuf_dtor,
2322 NULL, NULL, 0, EXT_NET_DRV);
2323
2324 return m;
2325 }
2326
2327 #endif /* __FreeBSD__ */
2328
2329 struct nmreq_option * nmreq_getoption(struct nmreq_header *, uint16_t);
2330
2331 int netmap_init_bridges(void);
2332 void netmap_uninit_bridges(void);
2333
2334 /* Functions to read and write CSB fields from the kernel. */
2335 #if defined (linux)
2336 #define CSB_READ(csb, field, r) (get_user(r, &csb->field))
2337 #define CSB_WRITE(csb, field, v) (put_user(v, &csb->field))
2338 #else /* ! linux */
2339 #define CSB_READ(csb, field, r) (r = fuword32(&csb->field))
2340 #define CSB_WRITE(csb, field, v) (suword32(&csb->field, v))
2341 #endif /* ! linux */
2342
2343 #endif /* _NET_NETMAP_KERN_H_ */
2344