1 /*        $NetBSD: if_bridge.c,v 1.199 2025/04/22 05:47:51 ozaki-r Exp $        */
2 
3 /*
4  * Copyright 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *        This product includes software developed for the NetBSD Project by
20  *        Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 /*
39  * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
40  * All rights reserved.
41  *
42  * Redistribution and use in source and binary forms, with or without
43  * modification, are permitted provided that the following conditions
44  * are met:
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  * 2. Redistributions in binary form must reproduce the above copyright
48  *    notice, this list of conditions and the following disclaimer in the
49  *    documentation and/or other materials provided with the distribution.
50  * 3. All advertising materials mentioning features or use of this software
51  *    must display the following acknowledgement:
52  *        This product includes software developed by Jason L. Wright
53  * 4. The name of the author may not be used to endorse or promote products
54  *    derived from this software without specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
57  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
58  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
59  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
60  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
61  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
62  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
64  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
65  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66  * POSSIBILITY OF SUCH DAMAGE.
67  *
68  * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
69  */
70 
71 /*
72  * Network interface bridge support.
73  *
74  * TODO:
75  *
76  *        - Currently only supports Ethernet-like interfaces (Ethernet,
77  *          802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78  *          to bridge other types of interfaces (FDDI-FDDI, and maybe
79  *          consider heterogenous bridges).
80  */
81 
82 #include <sys/cdefs.h>
83 __KERNEL_RCSID(0, "$NetBSD: if_bridge.c,v 1.199 2025/04/22 05:47:51 ozaki-r Exp $");
84 
85 #ifdef _KERNEL_OPT
86 #include "opt_inet.h"
87 #include "opt_net_mpsafe.h"
88 #endif /* _KERNEL_OPT */
89 
90 #include <sys/param.h>
91 #include <sys/kernel.h>
92 #include <sys/mbuf.h>
93 #include <sys/queue.h>
94 #include <sys/socket.h>
95 #include <sys/socketvar.h> /* for softnet_lock */
96 #include <sys/sockio.h>
97 #include <sys/systm.h>
98 #include <sys/proc.h>
99 #include <sys/pool.h>
100 #include <sys/kauth.h>
101 #include <sys/cpu.h>
102 #include <sys/cprng.h>
103 #include <sys/mutex.h>
104 #include <sys/kmem.h>
105 #include <sys/syslog.h>
106 
107 #include <net/bpf.h>
108 #include <net/if.h>
109 #include <net/if_dl.h>
110 #include <net/if_types.h>
111 #include <net/if_llc.h>
112 
113 #include <net/if_ether.h>
114 #include <net/if_bridgevar.h>
115 #include <net/ether_sw_offload.h>
116 
117 /* Used for bridge_ip[6]_checkbasic */
118 #include <netinet/in.h>
119 #include <netinet/in_systm.h>
120 #include <netinet/ip.h>
121 #include <netinet/ip_var.h>
122 #include <netinet/ip_private.h>                   /* XXX */
123 #include <netinet/ip6.h>
124 #include <netinet6/in6_var.h>
125 #include <netinet6/ip6_var.h>
126 #include <netinet6/ip6_private.h>       /* XXX */
127 
128 /*
129  * Size of the route hash table.  Must be a power of two.
130  */
131 #ifndef BRIDGE_RTHASH_SIZE
132 #define   BRIDGE_RTHASH_SIZE            1024
133 #endif
134 
135 #define   BRIDGE_RTHASH_MASK            (BRIDGE_RTHASH_SIZE - 1)
136 
137 #include "carp.h"
138 #if NCARP > 0
139 #include <netinet/in.h>
140 #include <netinet/in_var.h>
141 #include <netinet/ip_carp.h>
142 #endif
143 
144 #include "ioconf.h"
145 
146 __CTASSERT(sizeof(struct ifbifconf) == sizeof(struct ifbaconf));
147 __CTASSERT(offsetof(struct ifbifconf, ifbic_len) == offsetof(struct ifbaconf, ifbac_len));
148 __CTASSERT(offsetof(struct ifbifconf, ifbic_buf) == offsetof(struct ifbaconf, ifbac_buf));
149 
150 /*
151  * Maximum number of addresses to cache.
152  */
153 #ifndef BRIDGE_RTABLE_MAX
154 #define   BRIDGE_RTABLE_MAX             100
155 #endif
156 
157 /*
158  * Spanning tree defaults.
159  */
160 #define   BSTP_DEFAULT_MAX_AGE                    (20 * 256)
161 #define   BSTP_DEFAULT_HELLO_TIME                 (2 * 256)
162 #define   BSTP_DEFAULT_FORWARD_DELAY    (15 * 256)
163 #define   BSTP_DEFAULT_HOLD_TIME                  (1 * 256)
164 #define   BSTP_DEFAULT_BRIDGE_PRIORITY  0x8000
165 #define   BSTP_DEFAULT_PORT_PRIORITY    0x80
166 #define   BSTP_DEFAULT_PATH_COST                  55
167 
168 /*
169  * Timeout (in seconds) for entries learned dynamically.
170  */
171 #ifndef BRIDGE_RTABLE_TIMEOUT
172 #define   BRIDGE_RTABLE_TIMEOUT                   (20 * 60) /* same as ARP */
173 #endif
174 
175 /*
176  * Number of seconds between walks of the route list.
177  */
178 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
179 #define   BRIDGE_RTABLE_PRUNE_PERIOD    (5 * 60)
180 #endif
181 
182 #define BRIDGE_RT_LOCK(_sc)   mutex_enter((_sc)->sc_rtlist_lock)
183 #define BRIDGE_RT_UNLOCK(_sc) mutex_exit((_sc)->sc_rtlist_lock)
184 #define BRIDGE_RT_LOCKED(_sc) mutex_owned((_sc)->sc_rtlist_lock)
185 
186 #define BRIDGE_RT_PSZ_PERFORM(_sc) \
187                                         pserialize_perform((_sc)->sc_rtlist_psz)
188 
189 #define BRIDGE_RTLIST_READER_FOREACH(_brt, _sc)                       \
190           PSLIST_READER_FOREACH((_brt), &((_sc)->sc_rtlist),                    \
191               struct bridge_rtnode, brt_list)
192 #define BRIDGE_RTLIST_WRITER_FOREACH(_brt, _sc)                       \
193           PSLIST_WRITER_FOREACH((_brt), &((_sc)->sc_rtlist),                    \
194               struct bridge_rtnode, brt_list)
195 #define BRIDGE_RTLIST_WRITER_INSERT_HEAD(_sc, _brt)                             \
196           PSLIST_WRITER_INSERT_HEAD(&(_sc)->sc_rtlist, brt, brt_list)
197 #define BRIDGE_RTLIST_WRITER_REMOVE(_brt)                                       \
198           PSLIST_WRITER_REMOVE((_brt), brt_list)
199 
200 #define BRIDGE_RTHASH_READER_FOREACH(_brt, _sc, _hash)                          \
201           PSLIST_READER_FOREACH((_brt), &(_sc)->sc_rthash[(_hash)],   \
202               struct bridge_rtnode, brt_hash)
203 #define BRIDGE_RTHASH_WRITER_FOREACH(_brt, _sc, _hash)                          \
204           PSLIST_WRITER_FOREACH((_brt), &(_sc)->sc_rthash[(_hash)],   \
205               struct bridge_rtnode, brt_hash)
206 #define BRIDGE_RTHASH_WRITER_INSERT_HEAD(_sc, _hash, _brt)            \
207           PSLIST_WRITER_INSERT_HEAD(&(_sc)->sc_rthash[(_hash)], brt, brt_hash)
208 #define BRIDGE_RTHASH_WRITER_INSERT_AFTER(_brt, _new)                           \
209           PSLIST_WRITER_INSERT_AFTER((_brt), (_new), brt_hash)
210 #define BRIDGE_RTHASH_WRITER_REMOVE(_brt)                                       \
211           PSLIST_WRITER_REMOVE((_brt), brt_hash)
212 
213 #ifdef NET_MPSAFE
214 #define DECLARE_LOCK_VARIABLE
215 #define ACQUIRE_GLOBAL_LOCKS()          do { } while (0)
216 #define RELEASE_GLOBAL_LOCKS()          do { } while (0)
217 #else
218 #define DECLARE_LOCK_VARIABLE int __s
219 #define ACQUIRE_GLOBAL_LOCKS()          do {                                              \
220                                                   KERNEL_LOCK(1, NULL);                   \
221                                                   mutex_enter(softnet_lock);    \
222                                                   __s = splsoftnet();           \
223                                         } while (0)
224 #define RELEASE_GLOBAL_LOCKS()          do {                                              \
225                                                   splx(__s);                              \
226                                                   mutex_exit(softnet_lock);     \
227                                                   KERNEL_UNLOCK_ONE(NULL);      \
228                                         } while (0)
229 #endif
230 
231 struct psref_class *bridge_psref_class __read_mostly;
232 
233 int       bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
234 
235 static struct pool bridge_rtnode_pool;
236 
237 static int          bridge_clone_create(struct if_clone *, int);
238 static int          bridge_clone_destroy(struct ifnet *);
239 
240 static int          bridge_ioctl(struct ifnet *, u_long, void *);
241 static int          bridge_init(struct ifnet *);
242 static void         bridge_stop(struct ifnet *, int);
243 static void         bridge_start(struct ifnet *);
244 static void         bridge_ifdetach(void *);
245 
246 static void         bridge_input(struct ifnet *, struct mbuf *);
247 static void         bridge_forward(struct bridge_softc *, struct mbuf *);
248 
249 static void         bridge_timer(void *);
250 
251 static void         bridge_broadcast(struct bridge_softc *, struct ifnet *, bool,
252                                          struct mbuf *);
253 
254 static int          bridge_rtupdate(struct bridge_softc *, const uint8_t *,
255                                         struct ifnet *, int, uint8_t);
256 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
257 static void         bridge_rttrim(struct bridge_softc *);
258 static void         bridge_rtage(struct bridge_softc *);
259 static void         bridge_rtage_work(struct work *, void *);
260 static void         bridge_rtflush(struct bridge_softc *, int);
261 static int          bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
262 static void         bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp);
263 
264 static void         bridge_rtable_init(struct bridge_softc *);
265 static void         bridge_rtable_fini(struct bridge_softc *);
266 
267 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
268                                                               const uint8_t *);
269 static int          bridge_rtnode_insert(struct bridge_softc *,
270                                              struct bridge_rtnode *);
271 static void         bridge_rtnode_remove(struct bridge_softc *,
272                                              struct bridge_rtnode *);
273 static void         bridge_rtnode_destroy(struct bridge_rtnode *);
274 
275 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
276                                                               const char *name,
277                                                               struct psref *);
278 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
279                                                                  struct ifnet *ifp,
280                                                                  struct psref *);
281 static void         bridge_release_member(struct bridge_softc *, struct bridge_iflist *,
282                                       struct psref *);
283 static void         bridge_delete_member(struct bridge_softc *,
284                                              struct bridge_iflist *);
285 static void         bridge_acquire_member(struct bridge_softc *sc,
286                                       struct bridge_iflist *,
287                                       struct psref *);
288 
289 static int          bridge_ioctl_add(struct bridge_softc *, void *);
290 static int          bridge_ioctl_del(struct bridge_softc *, void *);
291 static int          bridge_ioctl_gifflags(struct bridge_softc *, void *);
292 static int          bridge_ioctl_sifflags(struct bridge_softc *, void *);
293 static int          bridge_ioctl_scache(struct bridge_softc *, void *);
294 static int          bridge_ioctl_gcache(struct bridge_softc *, void *);
295 static int          bridge_ioctl_gifs(struct bridge_softc *, void *);
296 static int          bridge_ioctl_rts(struct bridge_softc *, void *);
297 static int          bridge_ioctl_saddr(struct bridge_softc *, void *);
298 static int          bridge_ioctl_sto(struct bridge_softc *, void *);
299 static int          bridge_ioctl_gto(struct bridge_softc *, void *);
300 static int          bridge_ioctl_daddr(struct bridge_softc *, void *);
301 static int          bridge_ioctl_flush(struct bridge_softc *, void *);
302 static int          bridge_ioctl_gpri(struct bridge_softc *, void *);
303 static int          bridge_ioctl_spri(struct bridge_softc *, void *);
304 static int          bridge_ioctl_ght(struct bridge_softc *, void *);
305 static int          bridge_ioctl_sht(struct bridge_softc *, void *);
306 static int          bridge_ioctl_gfd(struct bridge_softc *, void *);
307 static int          bridge_ioctl_sfd(struct bridge_softc *, void *);
308 static int          bridge_ioctl_gma(struct bridge_softc *, void *);
309 static int          bridge_ioctl_sma(struct bridge_softc *, void *);
310 static int          bridge_ioctl_sifprio(struct bridge_softc *, void *);
311 static int          bridge_ioctl_sifcost(struct bridge_softc *, void *);
312 static int          bridge_ioctl_gfilt(struct bridge_softc *, void *);
313 static int          bridge_ioctl_sfilt(struct bridge_softc *, void *);
314 static int          bridge_ipf(void *, struct mbuf **, struct ifnet *, int);
315 static int          bridge_ip_checkbasic(struct mbuf **mp);
316 # ifdef INET6
317 static int          bridge_ip6_checkbasic(struct mbuf **mp);
318 # endif /* INET6 */
319 
320 struct bridge_control {
321           int       (*bc_func)(struct bridge_softc *, void *);
322           int       bc_argsize;
323           int       bc_flags;
324 };
325 
326 #define   BC_F_COPYIN                   0x01      /* copy arguments in */
327 #define   BC_F_COPYOUT                  0x02      /* copy arguments out */
328 #define   BC_F_SUSER                    0x04      /* do super-user check */
329 #define BC_F_XLATEIN                    0x08      /* xlate arguments in */
330 #define BC_F_XLATEOUT                   0x10      /* xlate arguments out */
331 
332 static const struct bridge_control bridge_control_table[] = {
333 [BRDGADD] = {bridge_ioctl_add, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
334 [BRDGDEL] = {bridge_ioctl_del, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
335 
336 [BRDGGIFFLGS] = {bridge_ioctl_gifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_COPYOUT},
337 [BRDGSIFFLGS] = {bridge_ioctl_sifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
338 
339 [BRDGSCACHE] = {bridge_ioctl_scache, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
340 [BRDGGCACHE] = {bridge_ioctl_gcache, sizeof(struct ifbrparam), BC_F_COPYOUT},
341 
342 [OBRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_COPYIN|BC_F_COPYOUT},
343 [OBRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_COPYIN|BC_F_COPYOUT},
344 
345 [BRDGSADDR] = {bridge_ioctl_saddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
346 
347 [BRDGSTO] = {bridge_ioctl_sto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
348 [BRDGGTO] = {bridge_ioctl_gto, sizeof(struct ifbrparam), BC_F_COPYOUT},
349 
350 [BRDGDADDR] = {bridge_ioctl_daddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
351 
352 [BRDGFLUSH] = {bridge_ioctl_flush, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
353 
354 [BRDGGPRI] = {bridge_ioctl_gpri, sizeof(struct ifbrparam), BC_F_COPYOUT},
355 [BRDGSPRI] = {bridge_ioctl_spri, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
356 
357 [BRDGGHT] = {bridge_ioctl_ght, sizeof(struct ifbrparam), BC_F_COPYOUT},
358 [BRDGSHT] = {bridge_ioctl_sht, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
359 
360 [BRDGGFD] = {bridge_ioctl_gfd, sizeof(struct ifbrparam), BC_F_COPYOUT},
361 [BRDGSFD] = {bridge_ioctl_sfd, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
362 
363 [BRDGGMA] = {bridge_ioctl_gma, sizeof(struct ifbrparam), BC_F_COPYOUT},
364 [BRDGSMA] = {bridge_ioctl_sma, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
365 
366 [BRDGSIFPRIO] = {bridge_ioctl_sifprio, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
367 
368 [BRDGSIFCOST] = {bridge_ioctl_sifcost, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
369 
370 [BRDGGFILT] = {bridge_ioctl_gfilt, sizeof(struct ifbrparam), BC_F_COPYOUT},
371 [BRDGSFILT] = {bridge_ioctl_sfilt, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
372 
373 [BRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_XLATEIN|BC_F_XLATEOUT},
374 [BRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_XLATEIN|BC_F_XLATEOUT},
375 };
376 
377 static const int bridge_control_table_size = __arraycount(bridge_control_table);
378 
379 static struct if_clone bridge_cloner =
380     IF_CLONE_INITIALIZER("bridge", bridge_clone_create, bridge_clone_destroy);
381 
382 /*
383  * bridgeattach:
384  *
385  *        Pseudo-device attach routine.
386  */
387 void
bridgeattach(int n)388 bridgeattach(int n)
389 {
390 
391           pool_init(&bridge_rtnode_pool, sizeof(struct bridge_rtnode),
392               0, 0, 0, "brtpl", NULL, IPL_NET);
393 
394           bridge_psref_class = psref_class_create("bridge", IPL_SOFTNET);
395 
396           if_clone_attach(&bridge_cloner);
397 }
398 
399 /*
400  * bridge_clone_create:
401  *
402  *        Create a new bridge instance.
403  */
404 static int
bridge_clone_create(struct if_clone * ifc,int unit)405 bridge_clone_create(struct if_clone *ifc, int unit)
406 {
407           struct bridge_softc *sc;
408           struct ifnet *ifp;
409           int error;
410 
411           sc = kmem_zalloc(sizeof(*sc),  KM_SLEEP);
412           ifp = &sc->sc_if;
413 
414           sc->sc_brtmax = BRIDGE_RTABLE_MAX;
415           sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
416           sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
417           sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
418           sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
419           sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
420           sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
421           sc->sc_filter_flags = 0;
422 
423           /* Initialize our routing table. */
424           bridge_rtable_init(sc);
425 
426           error = workqueue_create(&sc->sc_rtage_wq, "bridge_rtage",
427               bridge_rtage_work, sc, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
428           if (error)
429                     panic("%s: workqueue_create %d\n", __func__, error);
430 
431           callout_init(&sc->sc_brcallout, CALLOUT_MPSAFE);
432           callout_init(&sc->sc_bstpcallout, CALLOUT_MPSAFE);
433 
434           mutex_init(&sc->sc_iflist_psref.bip_lock, MUTEX_DEFAULT, IPL_NONE);
435           PSLIST_INIT(&sc->sc_iflist_psref.bip_iflist);
436           sc->sc_iflist_psref.bip_psz = pserialize_create();
437 
438           if_initname(ifp, ifc->ifc_name, unit);
439           ifp->if_softc = sc;
440 #ifdef NET_MPSAFE
441           ifp->if_extflags = IFEF_MPSAFE;
442 #endif
443           ifp->if_mtu = ETHERMTU;
444           ifp->if_ioctl = bridge_ioctl;
445           ifp->if_output = bridge_output;
446           ifp->if_start = bridge_start;
447           ifp->if_stop = bridge_stop;
448           ifp->if_init = bridge_init;
449           ifp->if_type = IFT_BRIDGE;
450           ifp->if_addrlen = 0;
451           ifp->if_dlt = DLT_EN10MB;
452           ifp->if_hdrlen = ETHER_HDR_LEN;
453           if_initialize(ifp);
454 
455           /*
456            * Set the link state to down.
457            * When interfaces are added the link state will reflect
458            * the best link state of the combined interfaces.
459            */
460           ifp->if_link_state = LINK_STATE_DOWN;
461 
462           if_alloc_sadl(ifp);
463           if_register(ifp);
464 
465           return 0;
466 }
467 
468 /*
469  * bridge_clone_destroy:
470  *
471  *        Destroy a bridge instance.
472  */
473 static int
bridge_clone_destroy(struct ifnet * ifp)474 bridge_clone_destroy(struct ifnet *ifp)
475 {
476           struct bridge_softc *sc = ifp->if_softc;
477           struct bridge_iflist *bif;
478 
479           if ((ifp->if_flags & IFF_RUNNING) != 0)
480                     bridge_stop(ifp, 1);
481 
482           BRIDGE_LOCK(sc);
483           for (;;) {
484                     bif = PSLIST_WRITER_FIRST(&sc->sc_iflist_psref.bip_iflist, struct bridge_iflist,
485                         bif_next);
486                     if (bif == NULL)
487                               break;
488                     bridge_delete_member(sc, bif);
489           }
490           PSLIST_DESTROY(&sc->sc_iflist_psref.bip_iflist);
491           BRIDGE_UNLOCK(sc);
492 
493           if_detach(ifp);
494 
495           /* Tear down the routing table. */
496           bridge_rtable_fini(sc);
497 
498           pserialize_destroy(sc->sc_iflist_psref.bip_psz);
499           mutex_destroy(&sc->sc_iflist_psref.bip_lock);
500           callout_destroy(&sc->sc_brcallout);
501           callout_destroy(&sc->sc_bstpcallout);
502           workqueue_destroy(sc->sc_rtage_wq);
503           kmem_free(sc, sizeof(*sc));
504 
505           return 0;
506 }
507 
508 /*
509  * bridge_ioctl:
510  *
511  *        Handle a control request from the operator.
512  */
513 static int
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * data)514 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
515 {
516           struct bridge_softc *sc = ifp->if_softc;
517           struct lwp *l = curlwp;       /* XXX */
518           union {
519                     struct ifbreq ifbreq;
520                     struct ifbifconf ifbifconf;
521                     struct ifbareq ifbareq;
522                     struct ifbaconf ifbaconf;
523                     struct ifbrparam ifbrparam;
524           } args;
525           struct ifdrv *ifd = (struct ifdrv *) data;
526           const struct bridge_control *bc = NULL; /* XXXGCC */
527           int error = 0;
528 
529           /* Authorize command before calling splsoftnet(). */
530           switch (cmd) {
531           case SIOCGDRVSPEC:
532           case SIOCSDRVSPEC:
533                     if (ifd->ifd_cmd >= bridge_control_table_size
534                         || (bc = &bridge_control_table[ifd->ifd_cmd]) == NULL) {
535                               error = EINVAL;
536                               return error;
537                     }
538 
539                     /* We only care about BC_F_SUSER at this point. */
540                     if ((bc->bc_flags & BC_F_SUSER) == 0)
541                               break;
542 
543                     error = kauth_authorize_network(l->l_cred,
544                         KAUTH_NETWORK_INTERFACE_BRIDGE,
545                         cmd == SIOCGDRVSPEC ?
546                          KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_GETPRIV :
547                          KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_SETPRIV,
548                          ifd, NULL, NULL);
549                     if (error)
550                               return error;
551 
552                     break;
553           }
554 
555           const int s = splsoftnet();
556 
557           switch (cmd) {
558           case SIOCGDRVSPEC:
559           case SIOCSDRVSPEC:
560                     KASSERT(bc != NULL);
561                     if (cmd == SIOCGDRVSPEC &&
562                         (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) == 0) {
563                               error = EINVAL;
564                               break;
565                     }
566                     else if (cmd == SIOCSDRVSPEC &&
567                         (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) != 0) {
568                               error = EINVAL;
569                               break;
570                     }
571 
572                     /* BC_F_SUSER is checked above, before splsoftnet(). */
573 
574                     if ((bc->bc_flags & (BC_F_XLATEIN|BC_F_XLATEOUT)) == 0
575                         && (ifd->ifd_len != bc->bc_argsize
576                               || ifd->ifd_len > sizeof(args))) {
577                               error = EINVAL;
578                               break;
579                     }
580 
581                     memset(&args, 0, sizeof(args));
582                     if (bc->bc_flags & BC_F_COPYIN) {
583                               error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
584                               if (error)
585                                         break;
586                     } else if (bc->bc_flags & BC_F_XLATEIN) {
587                               args.ifbifconf.ifbic_len = ifd->ifd_len;
588                               args.ifbifconf.ifbic_buf = ifd->ifd_data;
589                     }
590 
591                     error = (*bc->bc_func)(sc, &args);
592                     if (error)
593                               break;
594 
595                     if (bc->bc_flags & BC_F_COPYOUT) {
596                               error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
597                     } else if (bc->bc_flags & BC_F_XLATEOUT) {
598                               ifd->ifd_len = args.ifbifconf.ifbic_len;
599                               ifd->ifd_data = args.ifbifconf.ifbic_buf;
600                     }
601                     break;
602 
603           case SIOCSIFFLAGS:
604                     if ((error = ifioctl_common(ifp, cmd, data)) != 0)
605                               break;
606                     switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
607                     case IFF_RUNNING:
608                               /*
609                                * If interface is marked down and it is running,
610                                * then stop and disable it.
611                                */
612                               if_stop(ifp, 1);
613                               break;
614                     case IFF_UP:
615                               /*
616                                * If interface is marked up and it is stopped, then
617                                * start it.
618                                */
619                               error = if_init(ifp);
620                               break;
621                     default:
622                               break;
623                     }
624                     break;
625 
626           case SIOCSIFMTU:
627                     if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
628                               error = 0;
629                     break;
630 
631         case SIOCGIFCAP:
632               {
633                     struct ifcapreq *ifcr = (struct ifcapreq *)data;
634                 ifcr->ifcr_capabilities = sc->sc_capenable;
635                 ifcr->ifcr_capenable = sc->sc_capenable;
636                     break;
637               }
638 
639           default:
640                     error = ifioctl_common(ifp, cmd, data);
641                     break;
642           }
643 
644           splx(s);
645 
646           return error;
647 }
648 
649 /*
650  * bridge_lookup_member:
651  *
652  *        Lookup a bridge member interface.
653  */
654 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,const char * name,struct psref * psref)655 bridge_lookup_member(struct bridge_softc *sc, const char *name, struct psref *psref)
656 {
657           struct bridge_iflist *bif;
658           struct ifnet *ifp;
659           int s;
660 
661           s = pserialize_read_enter();
662 
663           BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
664                     ifp = bif->bif_ifp;
665                     if (strcmp(ifp->if_xname, name) == 0)
666                               break;
667           }
668           if (bif != NULL)
669                     bridge_acquire_member(sc, bif, psref);
670 
671           pserialize_read_exit(s);
672 
673           return bif;
674 }
675 
676 /*
677  * bridge_lookup_member_if:
678  *
679  *        Lookup a bridge member interface by ifnet*.
680  */
681 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp,struct psref * psref)682 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp,
683     struct psref *psref)
684 {
685           struct bridge_iflist *bif;
686           int s;
687 
688           s = pserialize_read_enter();
689 
690           bif = member_ifp->if_bridgeif;
691           if (bif != NULL) {
692                     psref_acquire(psref, &bif->bif_psref,
693                         bridge_psref_class);
694           }
695 
696           pserialize_read_exit(s);
697 
698           return bif;
699 }
700 
701 static void
bridge_acquire_member(struct bridge_softc * sc,struct bridge_iflist * bif,struct psref * psref)702 bridge_acquire_member(struct bridge_softc *sc, struct bridge_iflist *bif,
703     struct psref *psref)
704 {
705 
706           psref_acquire(psref, &bif->bif_psref, bridge_psref_class);
707 }
708 
709 /*
710  * bridge_release_member:
711  *
712  *        Release the specified member interface.
713  */
714 static void
bridge_release_member(struct bridge_softc * sc,struct bridge_iflist * bif,struct psref * psref)715 bridge_release_member(struct bridge_softc *sc, struct bridge_iflist *bif,
716     struct psref *psref)
717 {
718 
719           psref_release(psref, &bif->bif_psref, bridge_psref_class);
720 }
721 
722 /*
723  * bridge_delete_member:
724  *
725  *        Delete the specified member interface.
726  */
727 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)728 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
729 {
730           struct ifnet *ifs = bif->bif_ifp;
731 
732           KASSERT(BRIDGE_LOCKED(sc));
733 
734           ifs->_if_input = ether_input;
735           ifs->if_bridge = NULL;
736           ifs->if_bridgeif = NULL;
737 
738           PSLIST_WRITER_REMOVE(bif, bif_next);
739           BRIDGE_PSZ_PERFORM(sc);
740 
741           if_linkstate_change_disestablish(ifs,
742               bif->bif_linkstate_hook, BRIDGE_LOCK_OBJ(sc));
743           ether_ifdetachhook_disestablish(ifs,
744               bif->bif_ifdetach_hook, BRIDGE_LOCK_OBJ(sc));
745 
746           BRIDGE_UNLOCK(sc);
747 
748           switch (ifs->if_type) {
749           case IFT_ETHER:
750           case IFT_L2TP:
751                     /*
752                      * Take the interface out of promiscuous mode.
753                      * Don't call it with holding a spin lock.
754                      */
755                     (void) ifpromisc(ifs, 0);
756                     IFNET_LOCK(ifs);
757                     (void) ether_disable_vlan_mtu(ifs);
758                     IFNET_UNLOCK(ifs);
759                     break;
760           default:
761 #ifdef DIAGNOSTIC
762                     panic("%s: impossible", __func__);
763 #endif
764                     break;
765           }
766 
767           psref_target_destroy(&bif->bif_psref, bridge_psref_class);
768 
769           PSLIST_ENTRY_DESTROY(bif, bif_next);
770           kmem_free(bif, sizeof(*bif));
771 
772           BRIDGE_LOCK(sc);
773 }
774 
775 /*
776  * bridge_calc_csum_flags:
777  *
778  *        Calculate logical and b/w csum flags each member interface supports.
779  */
780 void
bridge_calc_csum_flags(struct bridge_softc * sc)781 bridge_calc_csum_flags(struct bridge_softc *sc)
782 {
783           struct bridge_iflist *bif;
784           struct ifnet *ifs = NULL;
785           int flags = ~0;
786           int capenable = ~0;
787 
788           BRIDGE_LOCK(sc);
789           BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
790                     ifs = bif->bif_ifp;
791                     flags &= ifs->if_csum_flags_tx;
792                     capenable &= ifs->if_capenable;
793           }
794           sc->sc_csum_flags_tx = flags;
795           sc->sc_capenable = (ifs != NULL) ? capenable : 0;
796           BRIDGE_UNLOCK(sc);
797 }
798 
799 /*
800  * bridge_calc_link_state:
801  *
802  *        Calculate the link state based on each member interface.
803  */
804 static void
bridge_calc_link_state(void * xsc)805 bridge_calc_link_state(void *xsc)
806 {
807           struct bridge_softc *sc = xsc;
808           struct bridge_iflist *bif;
809           struct ifnet *ifs;
810           int link_state = LINK_STATE_DOWN;
811 
812           BRIDGE_LOCK(sc);
813           BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
814                     ifs = bif->bif_ifp;
815                     if (ifs->if_link_state == LINK_STATE_UP) {
816                               link_state = LINK_STATE_UP;
817                               break;
818                     }
819                     if (ifs->if_link_state == LINK_STATE_UNKNOWN)
820                               link_state = LINK_STATE_UNKNOWN;
821           }
822           if_link_state_change(&sc->sc_if, link_state);
823           BRIDGE_UNLOCK(sc);
824 }
825 
826 static int
bridge_ioctl_add(struct bridge_softc * sc,void * arg)827 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
828 {
829           struct ifbreq *req = arg;
830           struct bridge_iflist *bif = NULL;
831           struct ifnet *ifs;
832           int error = 0;
833           struct psref psref;
834 
835           ifs = if_get(req->ifbr_ifsname, &psref);
836           if (ifs == NULL)
837                     return ENOENT;
838 
839           if (ifs->if_bridge == sc) {
840                     error = EEXIST;
841                     goto out;
842           }
843 
844           if (ifs->if_bridge != NULL) {
845                     error = EBUSY;
846                     goto out;
847           }
848 
849           if (ifs->_if_input != ether_input) {
850                     error = EINVAL;
851                     goto out;
852           }
853 
854           /* FIXME: doesn't work with non-IFF_SIMPLEX interfaces */
855           if ((ifs->if_flags & IFF_SIMPLEX) == 0) {
856                     error = EINVAL;
857                     goto out;
858           }
859 
860           bif = kmem_alloc(sizeof(*bif), KM_SLEEP);
861 
862           switch (ifs->if_type) {
863           case IFT_ETHER:
864                     if (sc->sc_if.if_mtu != ifs->if_mtu) {
865                               /* Change MTU of added interface to bridge MTU */
866                               struct ifreq ifr;
867                               memset(&ifr, 0, sizeof(ifr));
868                               ifr.ifr_mtu = sc->sc_if.if_mtu;
869                               IFNET_LOCK(ifs);
870                               error = if_ioctl(ifs, SIOCSIFMTU, &ifr);
871                               IFNET_UNLOCK(ifs);
872                               if (error != 0)
873                                         goto out;
874                     }
875                     /* FALLTHROUGH */
876           case IFT_L2TP:
877                     IFNET_LOCK(ifs);
878                     error = ether_enable_vlan_mtu(ifs);
879                     IFNET_UNLOCK(ifs);
880                     if (error > 0)
881                               goto out;
882                     /*
883                      * Place the interface into promiscuous mode.
884                      */
885                     error = ifpromisc(ifs, 1);
886                     if (error)
887                               goto out;
888                     break;
889           default:
890                     error = EINVAL;
891                     goto out;
892           }
893 
894           bif->bif_ifp = ifs;
895           bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
896           bif->bif_priority = BSTP_DEFAULT_PORT_PRIORITY;
897           bif->bif_path_cost = BSTP_DEFAULT_PATH_COST;
898           bif->bif_linkstate_hook = if_linkstate_change_establish(ifs,
899               bridge_calc_link_state, sc);
900           PSLIST_ENTRY_INIT(bif, bif_next);
901           psref_target_init(&bif->bif_psref, bridge_psref_class);
902 
903           BRIDGE_LOCK(sc);
904 
905           ifs->if_bridge = sc;
906           ifs->if_bridgeif = bif;
907           PSLIST_WRITER_INSERT_HEAD(&sc->sc_iflist_psref.bip_iflist, bif, bif_next);
908           ifs->_if_input = bridge_input;
909 
910           BRIDGE_UNLOCK(sc);
911 
912           bif->bif_ifdetach_hook = ether_ifdetachhook_establish(ifs,
913               bridge_ifdetach, (void *)ifs);
914 
915           bridge_calc_csum_flags(sc);
916           bridge_calc_link_state(sc);
917 
918           if (sc->sc_if.if_flags & IFF_RUNNING)
919                     bstp_initialization(sc);
920           else
921                     bstp_stop(sc);
922 
923 out:
924           if_put(ifs, &psref);
925           if (error) {
926                     if (bif != NULL)
927                               kmem_free(bif, sizeof(*bif));
928           }
929           return error;
930 }
931 
932 static int
bridge_ioctl_del(struct bridge_softc * sc,void * arg)933 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
934 {
935           struct ifbreq *req = arg;
936           const char *name = req->ifbr_ifsname;
937           struct bridge_iflist *bif;
938           struct ifnet *ifs;
939 
940           BRIDGE_LOCK(sc);
941 
942           /*
943            * Don't use bridge_lookup_member. We want to get a member
944            * with bif_refs == 0.
945            */
946           BRIDGE_IFLIST_WRITER_FOREACH(bif, sc) {
947                     ifs = bif->bif_ifp;
948                     if (strcmp(ifs->if_xname, name) == 0)
949                               break;
950           }
951 
952           if (bif == NULL) {
953                     BRIDGE_UNLOCK(sc);
954                     return ENOENT;
955           }
956 
957           bridge_delete_member(sc, bif);
958 
959           BRIDGE_UNLOCK(sc);
960 
961           bridge_rtdelete(sc, ifs);
962           bridge_calc_csum_flags(sc);
963           bridge_calc_link_state(sc);
964 
965           if (sc->sc_if.if_flags & IFF_RUNNING)
966                     bstp_initialization(sc);
967 
968           return 0;
969 }
970 
971 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * arg)972 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
973 {
974           struct ifbreq *req = arg;
975           struct bridge_iflist *bif;
976           struct psref psref;
977 
978           bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
979           if (bif == NULL)
980                     return ENOENT;
981 
982           req->ifbr_ifsflags = bif->bif_flags;
983           req->ifbr_state = bif->bif_state;
984           req->ifbr_priority = bif->bif_priority;
985           req->ifbr_path_cost = bif->bif_path_cost;
986           req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
987 
988           bridge_release_member(sc, bif, &psref);
989 
990           return 0;
991 }
992 
993 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * arg)994 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
995 {
996           struct ifbreq *req = arg;
997           struct bridge_iflist *bif;
998           struct psref psref;
999 
1000           bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
1001           if (bif == NULL)
1002                     return ENOENT;
1003 
1004           if (req->ifbr_ifsflags & IFBIF_STP) {
1005                     switch (bif->bif_ifp->if_type) {
1006                     case IFT_ETHER:
1007                     case IFT_L2TP:
1008                               /* These can do spanning tree. */
1009                               break;
1010 
1011                     default:
1012                               /* Nothing else can. */
1013                               bridge_release_member(sc, bif, &psref);
1014                               return EINVAL;
1015                     }
1016           }
1017 
1018           if (bif->bif_flags & IFBIF_PROTECTED) {
1019                     if ((req->ifbr_ifsflags & IFBIF_PROTECTED) == 0) {
1020                               log(LOG_INFO, "%s: disabling protection on %s\n",
1021                                   sc->sc_if.if_xname, bif->bif_ifp->if_xname);
1022                     }
1023           } else {
1024                     if (req->ifbr_ifsflags & IFBIF_PROTECTED) {
1025                               log(LOG_INFO, "%s: enabling protection on %s\n",
1026                                   sc->sc_if.if_xname, bif->bif_ifp->if_xname);
1027                     }
1028           }
1029 
1030           bif->bif_flags = req->ifbr_ifsflags;
1031 
1032           bridge_release_member(sc, bif, &psref);
1033 
1034           if (sc->sc_if.if_flags & IFF_RUNNING)
1035                     bstp_initialization(sc);
1036 
1037           return 0;
1038 }
1039 
1040 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * arg)1041 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
1042 {
1043           struct ifbrparam *param = arg;
1044 
1045           sc->sc_brtmax = param->ifbrp_csize;
1046           bridge_rttrim(sc);
1047 
1048           return 0;
1049 }
1050 
1051 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * arg)1052 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
1053 {
1054           struct ifbrparam *param = arg;
1055 
1056           param->ifbrp_csize = sc->sc_brtmax;
1057 
1058           return 0;
1059 }
1060 
1061 static int
bridge_ioctl_gifs(struct bridge_softc * sc,void * arg)1062 bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
1063 {
1064           struct ifbifconf *bifc = arg;
1065           struct bridge_iflist *bif;
1066           struct ifbreq *breqs;
1067           int i, count, error = 0;
1068 
1069 retry:
1070           BRIDGE_LOCK(sc);
1071           count = 0;
1072           BRIDGE_IFLIST_WRITER_FOREACH(bif, sc)
1073                     count++;
1074           BRIDGE_UNLOCK(sc);
1075 
1076           if (count == 0) {
1077                     bifc->ifbic_len = 0;
1078                     return 0;
1079           }
1080 
1081           if (bifc->ifbic_len == 0 || bifc->ifbic_len < (sizeof(*breqs) * count)) {
1082                     /* Tell that a larger buffer is needed */
1083                     bifc->ifbic_len = sizeof(*breqs) * count;
1084                     return 0;
1085           }
1086 
1087           breqs = kmem_alloc(sizeof(*breqs) * count, KM_SLEEP);
1088 
1089           BRIDGE_LOCK(sc);
1090 
1091           i = 0;
1092           BRIDGE_IFLIST_WRITER_FOREACH(bif, sc)
1093                     i++;
1094           if (i > count) {
1095                     /*
1096                      * The number of members has been increased.
1097                      * We need more memory!
1098                      */
1099                     BRIDGE_UNLOCK(sc);
1100                     kmem_free(breqs, sizeof(*breqs) * count);
1101                     goto retry;
1102           }
1103 
1104           i = 0;
1105           BRIDGE_IFLIST_WRITER_FOREACH(bif, sc) {
1106                     struct ifbreq *breq = &breqs[i++];
1107                     memset(breq, 0, sizeof(*breq));
1108 
1109                     strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1110                         sizeof(breq->ifbr_ifsname));
1111                     breq->ifbr_ifsflags = bif->bif_flags;
1112                     breq->ifbr_state = bif->bif_state;
1113                     breq->ifbr_priority = bif->bif_priority;
1114                     breq->ifbr_path_cost = bif->bif_path_cost;
1115                     breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1116           }
1117 
1118           /* Don't call copyout with holding the mutex */
1119           BRIDGE_UNLOCK(sc);
1120 
1121           for (i = 0; i < count; i++) {
1122                     error = copyout(&breqs[i], bifc->ifbic_req + i, sizeof(*breqs));
1123                     if (error)
1124                               break;
1125           }
1126           bifc->ifbic_len = sizeof(*breqs) * i;
1127 
1128           kmem_free(breqs, sizeof(*breqs) * count);
1129 
1130           return error;
1131 }
1132 
1133 static int
bridge_ioctl_rts(struct bridge_softc * sc,void * arg)1134 bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1135 {
1136           struct ifbaconf *bac = arg;
1137           struct bridge_rtnode *brt;
1138           struct ifbareq bareq;
1139           int count = 0, error = 0, len;
1140 
1141           if (bac->ifbac_len == 0)
1142                     return 0;
1143 
1144           BRIDGE_RT_LOCK(sc);
1145 
1146           /* The passed buffer is not enough, tell a required size. */
1147           if (bac->ifbac_len < (sizeof(bareq) * sc->sc_brtcnt)) {
1148                     count = sc->sc_brtcnt;
1149                     goto out;
1150           }
1151 
1152           len = bac->ifbac_len;
1153           BRIDGE_RTLIST_WRITER_FOREACH(brt, sc) {
1154                     if (len < sizeof(bareq))
1155                               goto out;
1156                     memset(&bareq, 0, sizeof(bareq));
1157                     strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
1158                         sizeof(bareq.ifba_ifsname));
1159                     memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1160                     if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
1161                               bareq.ifba_expire = brt->brt_expire - time_uptime;
1162                     } else
1163                               bareq.ifba_expire = 0;
1164                     bareq.ifba_flags = brt->brt_flags;
1165 
1166                     error = copyout(&bareq, bac->ifbac_req + count, sizeof(bareq));
1167                     if (error)
1168                               goto out;
1169                     count++;
1170                     len -= sizeof(bareq);
1171           }
1172 out:
1173           BRIDGE_RT_UNLOCK(sc);
1174 
1175           bac->ifbac_len = sizeof(bareq) * count;
1176           return error;
1177 }
1178 
1179 static int
bridge_ioctl_saddr(struct bridge_softc * sc,void * arg)1180 bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1181 {
1182           struct ifbareq *req = arg;
1183           struct bridge_iflist *bif;
1184           int error;
1185           struct psref psref;
1186 
1187           bif = bridge_lookup_member(sc, req->ifba_ifsname, &psref);
1188           if (bif == NULL)
1189                     return ENOENT;
1190 
1191           error = bridge_rtupdate(sc, req->ifba_dst, bif->bif_ifp, 1,
1192               req->ifba_flags);
1193 
1194           bridge_release_member(sc, bif, &psref);
1195 
1196           return error;
1197 }
1198 
1199 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * arg)1200 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1201 {
1202           struct ifbrparam *param = arg;
1203 
1204           sc->sc_brttimeout = param->ifbrp_ctime;
1205 
1206           return 0;
1207 }
1208 
1209 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * arg)1210 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1211 {
1212           struct ifbrparam *param = arg;
1213 
1214           param->ifbrp_ctime = sc->sc_brttimeout;
1215 
1216           return 0;
1217 }
1218 
1219 static int
bridge_ioctl_daddr(struct bridge_softc * sc,void * arg)1220 bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1221 {
1222           struct ifbareq *req = arg;
1223 
1224           return (bridge_rtdaddr(sc, req->ifba_dst));
1225 }
1226 
1227 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * arg)1228 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1229 {
1230           struct ifbreq *req = arg;
1231 
1232           bridge_rtflush(sc, req->ifbr_ifsflags);
1233 
1234           return 0;
1235 }
1236 
1237 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * arg)1238 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1239 {
1240           struct ifbrparam *param = arg;
1241 
1242           param->ifbrp_prio = sc->sc_bridge_priority;
1243 
1244           return 0;
1245 }
1246 
1247 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * arg)1248 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1249 {
1250           struct ifbrparam *param = arg;
1251 
1252           sc->sc_bridge_priority = param->ifbrp_prio;
1253 
1254           if (sc->sc_if.if_flags & IFF_RUNNING)
1255                     bstp_initialization(sc);
1256 
1257           return 0;
1258 }
1259 
1260 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * arg)1261 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1262 {
1263           struct ifbrparam *param = arg;
1264 
1265           param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1266 
1267           return 0;
1268 }
1269 
1270 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * arg)1271 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1272 {
1273           struct ifbrparam *param = arg;
1274 
1275           if (param->ifbrp_hellotime == 0)
1276                     return EINVAL;
1277           sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1278 
1279           if (sc->sc_if.if_flags & IFF_RUNNING)
1280                     bstp_initialization(sc);
1281 
1282           return 0;
1283 }
1284 
1285 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * arg)1286 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1287 {
1288           struct ifbrparam *param = arg;
1289 
1290           param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1291 
1292           return 0;
1293 }
1294 
1295 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * arg)1296 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1297 {
1298           struct ifbrparam *param = arg;
1299 
1300           if (param->ifbrp_fwddelay == 0)
1301                     return EINVAL;
1302           sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1303 
1304           if (sc->sc_if.if_flags & IFF_RUNNING)
1305                     bstp_initialization(sc);
1306 
1307           return 0;
1308 }
1309 
1310 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * arg)1311 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1312 {
1313           struct ifbrparam *param = arg;
1314 
1315           param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1316 
1317           return 0;
1318 }
1319 
1320 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * arg)1321 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1322 {
1323           struct ifbrparam *param = arg;
1324 
1325           if (param->ifbrp_maxage == 0)
1326                     return EINVAL;
1327           sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1328 
1329           if (sc->sc_if.if_flags & IFF_RUNNING)
1330                     bstp_initialization(sc);
1331 
1332           return 0;
1333 }
1334 
1335 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * arg)1336 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1337 {
1338           struct ifbreq *req = arg;
1339           struct bridge_iflist *bif;
1340           struct psref psref;
1341 
1342           bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
1343           if (bif == NULL)
1344                     return ENOENT;
1345 
1346           bif->bif_priority = req->ifbr_priority;
1347 
1348           if (sc->sc_if.if_flags & IFF_RUNNING)
1349                     bstp_initialization(sc);
1350 
1351           bridge_release_member(sc, bif, &psref);
1352 
1353           return 0;
1354 }
1355 
1356 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * arg)1357 bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
1358 {
1359           struct ifbrparam *param = arg;
1360 
1361           param->ifbrp_filter = sc->sc_filter_flags;
1362 
1363           return 0;
1364 }
1365 
1366 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * arg)1367 bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
1368 {
1369           struct ifbrparam *param = arg;
1370           uint32_t nflags, oflags;
1371 
1372           if (param->ifbrp_filter & ~IFBF_FILT_MASK)
1373                     return EINVAL;
1374 
1375           nflags = param->ifbrp_filter;
1376           oflags = sc->sc_filter_flags;
1377 
1378           if ((nflags & IFBF_FILT_USEIPF) && !(oflags & IFBF_FILT_USEIPF)) {
1379                     pfil_add_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1380                               sc->sc_if.if_pfil);
1381           }
1382           if (!(nflags & IFBF_FILT_USEIPF) && (oflags & IFBF_FILT_USEIPF)) {
1383                     pfil_remove_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1384                               sc->sc_if.if_pfil);
1385           }
1386 
1387           sc->sc_filter_flags = nflags;
1388 
1389           return 0;
1390 }
1391 
1392 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * arg)1393 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1394 {
1395           struct ifbreq *req = arg;
1396           struct bridge_iflist *bif;
1397           struct psref psref;
1398 
1399           bif = bridge_lookup_member(sc, req->ifbr_ifsname, &psref);
1400           if (bif == NULL)
1401                     return ENOENT;
1402 
1403           bif->bif_path_cost = req->ifbr_path_cost;
1404 
1405           if (sc->sc_if.if_flags & IFF_RUNNING)
1406                     bstp_initialization(sc);
1407 
1408           bridge_release_member(sc, bif, &psref);
1409 
1410           return 0;
1411 }
1412 
1413 /*
1414  * bridge_ifdetach:
1415  *
1416  *        Detach an interface from a bridge.  Called when a member
1417  *        interface is detaching.
1418  */
1419 static void
bridge_ifdetach(void * xifs)1420 bridge_ifdetach(void *xifs)
1421 {
1422           struct ifnet *ifs;
1423           struct bridge_softc *sc;
1424           struct ifbreq breq;
1425 
1426           ifs = (struct ifnet *)xifs;
1427           sc = ifs->if_bridge;
1428 
1429           /* ioctl_lock should prevent this from happening */
1430           KASSERT(sc != NULL);
1431 
1432           memset(&breq, 0, sizeof(breq));
1433           strlcpy(breq.ifbr_ifsname, ifs->if_xname, sizeof(breq.ifbr_ifsname));
1434 
1435           (void) bridge_ioctl_del(sc, &breq);
1436 }
1437 
1438 /*
1439  * bridge_init:
1440  *
1441  *        Initialize a bridge interface.
1442  */
1443 static int
bridge_init(struct ifnet * ifp)1444 bridge_init(struct ifnet *ifp)
1445 {
1446           struct bridge_softc *sc = ifp->if_softc;
1447 
1448           KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
1449 
1450           BRIDGE_LOCK(sc);
1451           sc->sc_stopping = false;
1452           BRIDGE_UNLOCK(sc);
1453 
1454           callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1455               bridge_timer, sc);
1456           bstp_initialization(sc);
1457 
1458           ifp->if_flags |= IFF_RUNNING;
1459           return 0;
1460 }
1461 
1462 /*
1463  * bridge_stop:
1464  *
1465  *        Stop the bridge interface.
1466  */
1467 static void
bridge_stop(struct ifnet * ifp,int disable)1468 bridge_stop(struct ifnet *ifp, int disable)
1469 {
1470           struct bridge_softc *sc = ifp->if_softc;
1471 
1472           KASSERT((ifp->if_flags & IFF_RUNNING) != 0);
1473           ifp->if_flags &= ~IFF_RUNNING;
1474 
1475           BRIDGE_LOCK(sc);
1476           sc->sc_stopping = true;
1477           BRIDGE_UNLOCK(sc);
1478 
1479           callout_halt(&sc->sc_brcallout, NULL);
1480           workqueue_wait(sc->sc_rtage_wq, &sc->sc_rtage_wk);
1481           bstp_stop(sc);
1482           bridge_rtflush(sc, IFBF_FLUSHDYN);
1483 }
1484 
1485 /*
1486  * bridge_enqueue:
1487  *
1488  *        Enqueue a packet on a bridge member interface.
1489  */
1490 void
bridge_enqueue(struct bridge_softc * sc,struct ifnet * dst_ifp,struct mbuf * m,int runfilt)1491 bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m,
1492     int runfilt)
1493 {
1494           int len, error;
1495           short mflags;
1496 
1497           if (runfilt) {
1498                     if (pfil_run_hooks(sc->sc_if.if_pfil, &m,
1499                         dst_ifp, PFIL_OUT) != 0) {
1500                               m_freem(m);
1501                               return;
1502                     }
1503                     if (m == NULL)
1504                               return;
1505           }
1506 
1507 #ifdef ALTQ
1508           KERNEL_LOCK(1, NULL);
1509           /*
1510            * If ALTQ is enabled on the member interface, do
1511            * classification; the queueing discipline might
1512            * not require classification, but might require
1513            * the address family/header pointer in the pktattr.
1514            */
1515           if (ALTQ_IS_ENABLED(&dst_ifp->if_snd)) {
1516                     /* XXX IFT_ETHER */
1517                     altq_etherclassify(&dst_ifp->if_snd, m);
1518           }
1519           KERNEL_UNLOCK_ONE(NULL);
1520 #endif /* ALTQ */
1521 
1522           if (vlan_has_tag(m) &&
1523               !vlan_is_hwtag_enabled(dst_ifp)) {
1524                     (void)ether_inject_vlantag(&m, ETHERTYPE_VLAN,
1525                         vlan_get_tag(m));
1526                     if (m == NULL) {
1527                               if_statinc(&sc->sc_if, if_oerrors);
1528                               return;
1529                     }
1530           }
1531 
1532           len = m->m_pkthdr.len;
1533           mflags = m->m_flags;
1534 
1535           error = if_transmit_lock(dst_ifp, m);
1536           if (error) {
1537                     /* mbuf is already freed */
1538                     if_statinc(&sc->sc_if, if_oerrors);
1539                     return;
1540           }
1541 
1542           net_stat_ref_t nsr = IF_STAT_GETREF(&sc->sc_if);
1543           if_statinc_ref(&sc->sc_if, nsr, if_opackets);
1544           if_statadd_ref(&sc->sc_if, nsr, if_obytes, len);
1545           if (mflags & M_MCAST)
1546                     if_statinc_ref(&sc->sc_if, nsr, if_omcasts);
1547           IF_STAT_PUTREF(&sc->sc_if);
1548 }
1549 
1550 /*
1551  * bridge_output:
1552  *
1553  *        Send output from a bridge member interface.  This
1554  *        performs the bridging function for locally originated
1555  *        packets.
1556  *
1557  *        The mbuf has the Ethernet header already attached.  We must
1558  *        enqueue or free the mbuf before returning.
1559  */
1560 int
bridge_output(struct ifnet * ifp,struct mbuf * m,const struct sockaddr * sa,const struct rtentry * rt)1561 bridge_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
1562     const struct rtentry *rt)
1563 {
1564           struct ether_header *eh;
1565           struct ifnet *dst_if;
1566           struct bridge_softc *sc;
1567           struct mbuf *n;
1568           int s, bound;
1569 
1570           /*
1571            * bridge_output() is called from ether_output(), furthermore
1572            * ifp argument doesn't point to bridge(4). So, don't assert
1573            * IFEF_MPSAFE here.
1574            */
1575 
1576           KASSERT(m->m_len >= ETHER_HDR_LEN);
1577 
1578           eh = mtod(m, struct ether_header *);
1579           sc = ifp->if_bridge;
1580 
1581           if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
1582                     if (memcmp(etherbroadcastaddr,
1583                         eh->ether_dhost, ETHER_ADDR_LEN) == 0)
1584                               m->m_flags |= M_BCAST;
1585                     else
1586                               m->m_flags |= M_MCAST;
1587           }
1588 
1589           /*
1590            * If bridge is down, but the original output interface is up,
1591            * go ahead and send out that interface.  Otherwise, the packet
1592            * is dropped below.
1593            */
1594           if (__predict_false(sc == NULL) ||
1595               (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1596                     dst_if = ifp;
1597                     goto unicast_asis;
1598           }
1599 
1600           /*
1601            * If the packet is a multicast, or we don't know a better way to
1602            * get there, send to all interfaces.
1603            */
1604           if ((m->m_flags & (M_MCAST | M_BCAST)) != 0)
1605                     dst_if = NULL;
1606           else
1607                     dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1608 
1609           /*
1610            * In general, we need to handle TX offload in software before
1611            * enqueueing a packet. However, we can send it as is in the
1612            * cases of unicast via (1) the source interface, or (2) an
1613            * interface which supports the specified offload options.
1614            * For multicast or broadcast, send it as is only if (3) all
1615            * the member interfaces support the specified options.
1616            */
1617 
1618           /*
1619            * Unicast via the source interface.
1620            */
1621           if (dst_if == ifp)
1622                     goto unicast_asis;
1623 
1624           /*
1625            * Unicast via other interface.
1626            */
1627           if (dst_if != NULL) {
1628                     KASSERT(m->m_flags & M_PKTHDR);
1629                     if (TX_OFFLOAD_SUPPORTED(dst_if->if_csum_flags_tx,
1630                         m->m_pkthdr.csum_flags)) {
1631                               /*
1632                                * Unicast via an interface which supports the
1633                                * specified offload options.
1634                                */
1635                               goto unicast_asis;
1636                     }
1637 
1638                     /*
1639                      * Handle TX offload in software. For TSO, a packet is
1640                      * split into multiple chunks. Thus, the return value of
1641                      * ether_sw_offload_tx() is mbuf queue consists of them.
1642                      */
1643                     m = ether_sw_offload_tx(ifp, m);
1644                     if (m == NULL)
1645                               return 0;
1646 
1647                     do {
1648                               n = m->m_nextpkt;
1649                               if ((dst_if->if_flags & IFF_RUNNING) == 0)
1650                                         m_freem(m);
1651                               else
1652                                         bridge_enqueue(sc, dst_if, m, 0);
1653                               m = n;
1654                     } while (m != NULL);
1655 
1656                     return 0;
1657           }
1658 
1659           /*
1660            * Multicast or broadcast.
1661            */
1662           if (TX_OFFLOAD_SUPPORTED(sc->sc_csum_flags_tx,
1663               m->m_pkthdr.csum_flags)) {
1664                     /*
1665                      * Specified TX offload options are supported by all
1666                      * the member interfaces of this bridge.
1667                      */
1668                     m->m_nextpkt = NULL;          /* XXX */
1669           } else {
1670                     /*
1671                      * Otherwise, handle TX offload in software.
1672                      */
1673                     m = ether_sw_offload_tx(ifp, m);
1674                     if (m == NULL)
1675                               return 0;
1676           }
1677 
1678           /*
1679            * When we use pppoe over bridge, bridge_output() can be called
1680            * in a lwp context by pppoe_timeout_wk().
1681            */
1682           bound = curlwp_bind();
1683           do {
1684                     /* XXX Should call bridge_broadcast, but there are locking
1685                      * issues which need resolving first. */
1686                     struct bridge_iflist *bif;
1687                     struct mbuf *mc;
1688                     bool used = false;
1689 
1690                     n = m->m_nextpkt;
1691 
1692                     s = pserialize_read_enter();
1693                     BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
1694                               struct psref psref;
1695 
1696                               bridge_acquire_member(sc, bif, &psref);
1697                               pserialize_read_exit(s);
1698 
1699                               dst_if = bif->bif_ifp;
1700                               if ((dst_if->if_flags & IFF_RUNNING) == 0)
1701                                         goto next;
1702 
1703                               /*
1704                                * If this is not the original output interface,
1705                                * and the interface is participating in spanning
1706                                * tree, make sure the port is in a state that
1707                                * allows forwarding.
1708                                */
1709                               if (dst_if != ifp &&
1710                                   (bif->bif_flags & IFBIF_STP) != 0) {
1711                                         switch (bif->bif_state) {
1712                                         case BSTP_IFSTATE_BLOCKING:
1713                                         case BSTP_IFSTATE_LISTENING:
1714                                         case BSTP_IFSTATE_DISABLED:
1715                                                   goto next;
1716                                         }
1717                               }
1718 
1719                               if (PSLIST_READER_NEXT(bif, struct bridge_iflist,
1720                                   bif_next) == NULL &&
1721                                   ((m->m_flags & (M_MCAST | M_BCAST)) == 0 ||
1722                                   dst_if == ifp))
1723                               {
1724                                         used = true;
1725                                         mc = m;
1726                               } else {
1727                                         mc = m_copypacket(m, M_DONTWAIT);
1728                                         if (mc == NULL) {
1729                                                   if_statinc(&sc->sc_if, if_oerrors);
1730                                                   goto next;
1731                                         }
1732                               }
1733 
1734                               bridge_enqueue(sc, dst_if, mc, 0);
1735 
1736                               if ((m->m_flags & (M_MCAST | M_BCAST)) != 0 &&
1737                                   dst_if != ifp)
1738                               {
1739                                         if (PSLIST_READER_NEXT(bif,
1740                                             struct bridge_iflist, bif_next) == NULL)
1741                                         {
1742                                                   used = true;
1743                                                   mc = m;
1744                                         } else {
1745                                                   mc = m_copypacket(m, M_DONTWAIT);
1746                                                   if (mc == NULL) {
1747                                                             if_statinc(&sc->sc_if,
1748                                                                 if_oerrors);
1749                                                             goto next;
1750                                                   }
1751                                         }
1752 
1753                                         m_set_rcvif(mc, dst_if);
1754                                         mc->m_flags &= ~M_PROMISC;
1755 
1756                                         const int _s = splsoftnet();
1757                                         KERNEL_LOCK_UNLESS_IFP_MPSAFE(dst_if);
1758                                         ether_input(dst_if, mc);
1759                                         KERNEL_UNLOCK_UNLESS_IFP_MPSAFE(dst_if);
1760                                         splx(_s);
1761                               }
1762 
1763 next:
1764                               s = pserialize_read_enter();
1765                               bridge_release_member(sc, bif, &psref);
1766 
1767                               /* Guarantee we don't re-enter the loop as we already
1768                                * decided we're at the end. */
1769                               if (used)
1770                                         break;
1771                     }
1772                     pserialize_read_exit(s);
1773 
1774                     if (!used)
1775                               m_freem(m);
1776 
1777                     m = n;
1778           } while (m != NULL);
1779           curlwp_bindx(bound);
1780 
1781           return 0;
1782 
1783 unicast_asis:
1784           /*
1785            * XXX Spanning tree consideration here?
1786            */
1787           if ((dst_if->if_flags & IFF_RUNNING) == 0)
1788                     m_freem(m);
1789           else
1790                     bridge_enqueue(sc, dst_if, m, 0);
1791           return 0;
1792 }
1793 
1794 /*
1795  * bridge_start:
1796  *
1797  *        Start output on a bridge.
1798  *
1799  *        NOTE: This routine should never be called in this implementation.
1800  */
1801 static void
bridge_start(struct ifnet * ifp)1802 bridge_start(struct ifnet *ifp)
1803 {
1804 
1805           printf("%s: bridge_start() called\n", ifp->if_xname);
1806 }
1807 
1808 /*
1809  * bridge_forward:
1810  *
1811  *        The forwarding function of the bridge.
1812  */
1813 static void
bridge_forward(struct bridge_softc * sc,struct mbuf * m)1814 bridge_forward(struct bridge_softc *sc, struct mbuf *m)
1815 {
1816           struct bridge_iflist *bif;
1817           struct ifnet *src_if, *dst_if;
1818           struct ether_header *eh;
1819           struct psref psref;
1820           struct psref psref_src;
1821           DECLARE_LOCK_VARIABLE;
1822           bool src_if_protected;
1823 
1824           src_if = m_get_rcvif_psref(m, &psref_src);
1825           if (src_if == NULL) {
1826                     /* Interface is being destroyed? */
1827                     goto discard;
1828           }
1829 
1830           if_statadd2(&sc->sc_if, if_ipackets, 1, if_ibytes, m->m_pkthdr.len);
1831 
1832           /*
1833            * Look up the bridge_iflist.
1834            */
1835           bif = bridge_lookup_member_if(sc, src_if, &psref);
1836           if (bif == NULL) {
1837                     /* Interface is not a bridge member (anymore?) */
1838                     goto discard;
1839           }
1840 
1841           if (bif->bif_flags & IFBIF_STP) {
1842                     switch (bif->bif_state) {
1843                     case BSTP_IFSTATE_BLOCKING:
1844                     case BSTP_IFSTATE_LISTENING:
1845                     case BSTP_IFSTATE_DISABLED:
1846                               bridge_release_member(sc, bif, &psref);
1847                               goto discard;
1848                     }
1849           }
1850 
1851           eh = mtod(m, struct ether_header *);
1852 
1853           /*
1854            * If the interface is learning, and the source
1855            * address is valid and not multicast, record
1856            * the address.
1857            */
1858           if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
1859               ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
1860               (eh->ether_shost[0] == 0 &&
1861                eh->ether_shost[1] == 0 &&
1862                eh->ether_shost[2] == 0 &&
1863                eh->ether_shost[3] == 0 &&
1864                eh->ether_shost[4] == 0 &&
1865                eh->ether_shost[5] == 0) == 0) {
1866                     (void) bridge_rtupdate(sc, eh->ether_shost,
1867                         src_if, 0, IFBAF_DYNAMIC);
1868           }
1869 
1870           if ((bif->bif_flags & IFBIF_STP) != 0 &&
1871               bif->bif_state == BSTP_IFSTATE_LEARNING) {
1872                     bridge_release_member(sc, bif, &psref);
1873                     goto discard;
1874           }
1875 
1876           src_if_protected = ((bif->bif_flags & IFBIF_PROTECTED) != 0);
1877 
1878           bridge_release_member(sc, bif, &psref);
1879 
1880           /*
1881            * At this point, the port either doesn't participate
1882            * in spanning tree or it is in the forwarding state.
1883            */
1884 
1885           /*
1886            * If the packet is unicast, destined for someone on
1887            * "this" side of the bridge, drop it.
1888            */
1889           if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
1890                     dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1891                     if (src_if == dst_if)
1892                               goto discard;
1893           } else {
1894                     /* ...forward it to all interfaces. */
1895                     if_statinc(&sc->sc_if, if_imcasts);
1896                     dst_if = NULL;
1897           }
1898 
1899           if (pfil_run_hooks(sc->sc_if.if_pfil, &m, src_if, PFIL_IN) != 0 ||
1900               m == NULL) {
1901                     goto discard;
1902           }
1903 
1904           if (dst_if == NULL) {
1905                     bridge_broadcast(sc, src_if, src_if_protected, m);
1906                     goto out;
1907           }
1908 
1909           m_put_rcvif_psref(src_if, &psref_src);
1910           src_if = NULL;
1911 
1912           /*
1913            * At this point, we're dealing with a unicast frame
1914            * going to a different interface.
1915            */
1916           if ((dst_if->if_flags & IFF_RUNNING) == 0)
1917                     goto discard;
1918 
1919           bif = bridge_lookup_member_if(sc, dst_if, &psref);
1920           if (bif == NULL) {
1921                     /* Not a member of the bridge (anymore?) */
1922                     goto discard;
1923           }
1924 
1925           if (bif->bif_flags & IFBIF_STP) {
1926                     switch (bif->bif_state) {
1927                     case BSTP_IFSTATE_DISABLED:
1928                     case BSTP_IFSTATE_BLOCKING:
1929                               bridge_release_member(sc, bif, &psref);
1930                               goto discard;
1931                     }
1932           }
1933 
1934           if ((bif->bif_flags & IFBIF_PROTECTED) && src_if_protected) {
1935                     bridge_release_member(sc, bif, &psref);
1936                     goto discard;
1937           }
1938 
1939           bridge_release_member(sc, bif, &psref);
1940 
1941           /*
1942            * Before enqueueing this packet to the destination interface,
1943            * clear any in-bound checksum flags to prevent them from being
1944            * misused as out-bound flags.
1945            */
1946           m->m_pkthdr.csum_flags = 0;
1947 
1948           ACQUIRE_GLOBAL_LOCKS();
1949           bridge_enqueue(sc, dst_if, m, 1);
1950           RELEASE_GLOBAL_LOCKS();
1951 out:
1952           if (src_if != NULL)
1953                     m_put_rcvif_psref(src_if, &psref_src);
1954           return;
1955 
1956 discard:
1957           m_freem(m);
1958           goto out;
1959 }
1960 
1961 static bool
bstp_state_before_learning(struct bridge_iflist * bif)1962 bstp_state_before_learning(struct bridge_iflist *bif)
1963 {
1964           if (bif->bif_flags & IFBIF_STP) {
1965                     switch (bif->bif_state) {
1966                     case BSTP_IFSTATE_BLOCKING:
1967                     case BSTP_IFSTATE_LISTENING:
1968                     case BSTP_IFSTATE_DISABLED:
1969                               return true;
1970                     }
1971           }
1972           return false;
1973 }
1974 
1975 static bool
bridge_ourether(struct bridge_iflist * bif,struct ether_header * eh,int src)1976 bridge_ourether(struct bridge_iflist *bif, struct ether_header *eh, int src)
1977 {
1978           uint8_t *ether = src ? eh->ether_shost : eh->ether_dhost;
1979 
1980           if (memcmp(CLLADDR(bif->bif_ifp->if_sadl), ether, ETHER_ADDR_LEN) == 0
1981 #if NCARP > 0
1982               || (bif->bif_ifp->if_carp &&
1983                   carp_ourether(bif->bif_ifp->if_carp, eh, IFT_ETHER, src) != NULL)
1984 #endif /* NCARP > 0 */
1985               )
1986                     return true;
1987 
1988           return false;
1989 }
1990 
1991 /*
1992  * bridge_input:
1993  *
1994  *        Receive input from a member interface.  Queue the packet for
1995  *        bridging if it is not for us.
1996  */
1997 static void
bridge_input(struct ifnet * ifp,struct mbuf * m)1998 bridge_input(struct ifnet *ifp, struct mbuf *m)
1999 {
2000           struct bridge_softc *sc = ifp->if_bridge;
2001           struct bridge_iflist *bif;
2002           struct ether_header *eh;
2003           struct psref psref;
2004           int bound;
2005           DECLARE_LOCK_VARIABLE;
2006 
2007           KASSERT(!cpu_intr_p());
2008 
2009           if (__predict_false(sc == NULL) ||
2010               (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
2011                     ACQUIRE_GLOBAL_LOCKS();
2012                     ether_input(ifp, m);
2013                     RELEASE_GLOBAL_LOCKS();
2014                     return;
2015           }
2016 
2017           bound = curlwp_bind();
2018           bif = bridge_lookup_member_if(sc, ifp, &psref);
2019           if (bif == NULL) {
2020                     curlwp_bindx(bound);
2021                     ACQUIRE_GLOBAL_LOCKS();
2022                     ether_input(ifp, m);
2023                     RELEASE_GLOBAL_LOCKS();
2024                     return;
2025           }
2026 
2027           eh = mtod(m, struct ether_header *);
2028 
2029           if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
2030                     if (memcmp(etherbroadcastaddr,
2031                         eh->ether_dhost, ETHER_ADDR_LEN) == 0)
2032                               m->m_flags |= M_BCAST;
2033                     else
2034                               m->m_flags |= M_MCAST;
2035           }
2036 
2037           /*
2038            * A 'fast' path for packets addressed to interfaces that are
2039            * part of this bridge.
2040            */
2041           if (!(m->m_flags & (M_BCAST|M_MCAST)) &&
2042               !bstp_state_before_learning(bif)) {
2043                     struct bridge_iflist *_bif;
2044                     struct ifnet *_ifp = NULL;
2045                     int s;
2046                     struct psref _psref;
2047 
2048                     s = pserialize_read_enter();
2049                     BRIDGE_IFLIST_READER_FOREACH(_bif, sc) {
2050                               /* It is destined for us. */
2051                               if (bridge_ourether(_bif, eh, 0)) {
2052                                         bridge_acquire_member(sc, _bif, &_psref);
2053                                         pserialize_read_exit(s);
2054                                         if (_bif->bif_flags & IFBIF_LEARNING)
2055                                                   (void) bridge_rtupdate(sc,
2056                                                       eh->ether_shost, ifp, 0, IFBAF_DYNAMIC);
2057                                         m_set_rcvif(m, _bif->bif_ifp);
2058                                         _ifp = _bif->bif_ifp;
2059                                         bridge_release_member(sc, _bif, &_psref);
2060                                         goto out;
2061                               }
2062 
2063                               /* We just received a packet that we sent out. */
2064                               if (bridge_ourether(_bif, eh, 1))
2065                                         break;
2066                     }
2067                     pserialize_read_exit(s);
2068 out:
2069 
2070                     if (_bif != NULL) {
2071                               bridge_release_member(sc, bif, &psref);
2072                               curlwp_bindx(bound);
2073                               if (_ifp != NULL) {
2074                                         m->m_flags &= ~M_PROMISC;
2075                                         ACQUIRE_GLOBAL_LOCKS();
2076                                         ether_input(_ifp, m);
2077                                         RELEASE_GLOBAL_LOCKS();
2078                               } else
2079                                         m_freem(m);
2080                               return;
2081                     }
2082           }
2083 
2084           /* Tap off 802.1D packets; they do not get forwarded. */
2085           if (bif->bif_flags & IFBIF_STP &&
2086               memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) {
2087                     bstp_input(sc, bif, m);
2088                     bridge_release_member(sc, bif, &psref);
2089                     curlwp_bindx(bound);
2090                     return;
2091           }
2092 
2093           /*
2094            * A normal switch would discard the packet here, but that's not what
2095            * we've done historically. This also prevents some obnoxious behaviour.
2096            */
2097           if (bstp_state_before_learning(bif)) {
2098                     bridge_release_member(sc, bif, &psref);
2099                     curlwp_bindx(bound);
2100                     ACQUIRE_GLOBAL_LOCKS();
2101                     ether_input(ifp, m);
2102                     RELEASE_GLOBAL_LOCKS();
2103                     return;
2104           }
2105 
2106           bridge_release_member(sc, bif, &psref);
2107 
2108           bridge_forward(sc, m);
2109 
2110           curlwp_bindx(bound);
2111 }
2112 
2113 /*
2114  * bridge_broadcast:
2115  *
2116  *        Send a frame to all interfaces that are members of
2117  *        the bridge, except for the one on which the packet
2118  *        arrived.
2119  */
2120 static void
bridge_broadcast(struct bridge_softc * sc,struct ifnet * src_if,bool src_if_protected,struct mbuf * m)2121 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
2122     bool src_if_protected, struct mbuf *m)
2123 {
2124           struct bridge_iflist *bif;
2125           struct mbuf *mc;
2126           struct ifnet *dst_if;
2127           bool bmcast;
2128           int s;
2129           DECLARE_LOCK_VARIABLE;
2130 
2131           bmcast = m->m_flags & (M_BCAST|M_MCAST);
2132 
2133           s = pserialize_read_enter();
2134           BRIDGE_IFLIST_READER_FOREACH(bif, sc) {
2135                     struct psref psref;
2136 
2137                     bridge_acquire_member(sc, bif, &psref);
2138                     pserialize_read_exit(s);
2139 
2140                     dst_if = bif->bif_ifp;
2141 
2142                     if (bif->bif_flags & IFBIF_STP) {
2143                               switch (bif->bif_state) {
2144                               case BSTP_IFSTATE_BLOCKING:
2145                               case BSTP_IFSTATE_DISABLED:
2146                                         goto next;
2147                               }
2148                     }
2149 
2150                     if ((bif->bif_flags & IFBIF_DISCOVER) == 0 && !bmcast)
2151                               goto next;
2152 
2153                     if ((dst_if->if_flags & IFF_RUNNING) == 0)
2154                               goto next;
2155 
2156                     if (dst_if != src_if) {
2157                               if ((bif->bif_flags & IFBIF_PROTECTED) &&
2158                                   src_if_protected) {
2159                                         goto next;
2160                               }
2161 
2162                               mc = m_copypacket(m, M_DONTWAIT);
2163                               if (mc == NULL) {
2164                                         if_statinc(&sc->sc_if, if_oerrors);
2165                                         goto next;
2166                               }
2167                               /*
2168                                * Before enqueueing this packet to the destination
2169                                * interface, clear any in-bound checksum flags to
2170                                * prevent them from being misused as out-bound flags.
2171                                */
2172                               mc->m_pkthdr.csum_flags = 0;
2173 
2174                               ACQUIRE_GLOBAL_LOCKS();
2175                               bridge_enqueue(sc, dst_if, mc, 1);
2176                               RELEASE_GLOBAL_LOCKS();
2177                     }
2178 
2179                     if (bmcast) {
2180                               mc = m_copypacket(m, M_DONTWAIT);
2181                               if (mc == NULL) {
2182                                         if_statinc(&sc->sc_if, if_oerrors);
2183                                         goto next;
2184                               }
2185                               /*
2186                                * Before enqueueing this packet to the destination
2187                                * interface, clear any in-bound checksum flags to
2188                                * prevent them from being misused as out-bound flags.
2189                                */
2190                               mc->m_pkthdr.csum_flags = 0;
2191 
2192                               m_set_rcvif(mc, dst_if);
2193                               mc->m_flags &= ~M_PROMISC;
2194 
2195                               ACQUIRE_GLOBAL_LOCKS();
2196                               ether_input(dst_if, mc);
2197                               RELEASE_GLOBAL_LOCKS();
2198                     }
2199 next:
2200                     s = pserialize_read_enter();
2201                     bridge_release_member(sc, bif, &psref);
2202           }
2203           pserialize_read_exit(s);
2204 
2205           m_freem(m);
2206 }
2207 
2208 static int
bridge_rtalloc(struct bridge_softc * sc,const uint8_t * dst,struct bridge_rtnode ** brtp)2209 bridge_rtalloc(struct bridge_softc *sc, const uint8_t *dst,
2210     struct bridge_rtnode **brtp)
2211 {
2212           struct bridge_rtnode *brt;
2213           int error;
2214 
2215           if (sc->sc_brtcnt >= sc->sc_brtmax)
2216                     return ENOSPC;
2217 
2218           /*
2219            * Allocate a new bridge forwarding node, and
2220            * initialize the expiration time and Ethernet
2221            * address.
2222            */
2223           brt = pool_get(&bridge_rtnode_pool, PR_NOWAIT);
2224           if (brt == NULL)
2225                     return ENOMEM;
2226 
2227           memset(brt, 0, sizeof(*brt));
2228           brt->brt_expire = time_uptime + sc->sc_brttimeout;
2229           brt->brt_flags = IFBAF_DYNAMIC;
2230           memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
2231           PSLIST_ENTRY_INIT(brt, brt_list);
2232           PSLIST_ENTRY_INIT(brt, brt_hash);
2233 
2234           BRIDGE_RT_LOCK(sc);
2235           error = bridge_rtnode_insert(sc, brt);
2236           BRIDGE_RT_UNLOCK(sc);
2237 
2238           if (error != 0) {
2239                     pool_put(&bridge_rtnode_pool, brt);
2240                     return error;
2241           }
2242 
2243           *brtp = brt;
2244           return 0;
2245 }
2246 
2247 /*
2248  * bridge_rtupdate:
2249  *
2250  *        Add a bridge routing entry.
2251  */
2252 static int
bridge_rtupdate(struct bridge_softc * sc,const uint8_t * dst,struct ifnet * dst_if,int setflags,uint8_t flags)2253 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
2254     struct ifnet *dst_if, int setflags, uint8_t flags)
2255 {
2256           struct bridge_rtnode *brt;
2257           int s;
2258 
2259 again:
2260           /*
2261            * A route for this destination might already exist.  If so,
2262            * update it, otherwise create a new one.
2263            */
2264           s = pserialize_read_enter();
2265           brt = bridge_rtnode_lookup(sc, dst);
2266 
2267           if (brt != NULL) {
2268                     brt->brt_ifp = dst_if;
2269                     if (setflags) {
2270                               brt->brt_flags = flags;
2271                               if (flags & IFBAF_STATIC)
2272                                         brt->brt_expire = 0;
2273                               else
2274                                         brt->brt_expire = time_uptime + sc->sc_brttimeout;
2275                     } else {
2276                               if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
2277                                         brt->brt_expire = time_uptime + sc->sc_brttimeout;
2278                     }
2279           }
2280           pserialize_read_exit(s);
2281 
2282           if (brt == NULL) {
2283                     int r;
2284 
2285                     r = bridge_rtalloc(sc, dst, &brt);
2286                     if (r != 0)
2287                               return r;
2288                     goto again;
2289           }
2290 
2291           return 0;
2292 }
2293 
2294 /*
2295  * bridge_rtlookup:
2296  *
2297  *        Lookup the destination interface for an address.
2298  */
2299 static struct ifnet *
bridge_rtlookup(struct bridge_softc * sc,const uint8_t * addr)2300 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
2301 {
2302           struct bridge_rtnode *brt;
2303           struct ifnet *ifs = NULL;
2304           int s;
2305 
2306           s = pserialize_read_enter();
2307           brt = bridge_rtnode_lookup(sc, addr);
2308           if (brt != NULL)
2309                     ifs = brt->brt_ifp;
2310           pserialize_read_exit(s);
2311 
2312           return ifs;
2313 }
2314 
2315 typedef bool (*bridge_iterate_cb_t)
2316     (struct bridge_softc *, struct bridge_rtnode *, bool *, void *);
2317 
2318 /*
2319  * bridge_rtlist_iterate_remove:
2320  *
2321  *        It iterates on sc->sc_rtlist and removes rtnodes of it which func
2322  *        callback judges to remove. Removals of rtnodes are done in a manner
2323  *        of pserialize. To this end, all kmem_* operations are placed out of
2324  *        mutexes.
2325  */
2326 static void
bridge_rtlist_iterate_remove(struct bridge_softc * sc,bridge_iterate_cb_t func,void * arg)2327 bridge_rtlist_iterate_remove(struct bridge_softc *sc, bridge_iterate_cb_t func, void *arg)
2328 {
2329           struct bridge_rtnode *brt;
2330           struct bridge_rtnode **brt_list;
2331           int i, count;
2332 
2333 retry:
2334           count = sc->sc_brtcnt;
2335           if (count == 0)
2336                     return;
2337           brt_list = kmem_alloc(sizeof(*brt_list) * count, KM_SLEEP);
2338 
2339           BRIDGE_RT_LOCK(sc);
2340           if (__predict_false(sc->sc_brtcnt > count)) {
2341                     /* The rtnodes increased, we need more memory */
2342                     BRIDGE_RT_UNLOCK(sc);
2343                     kmem_free(brt_list, sizeof(*brt_list) * count);
2344                     goto retry;
2345           }
2346 
2347           i = 0;
2348           /*
2349            * We don't need to use a _SAFE variant here because we know
2350            * that a removed item keeps its next pointer as-is thanks to
2351            * pslist(9) and isn't freed in the loop.
2352            */
2353           BRIDGE_RTLIST_WRITER_FOREACH(brt, sc) {
2354                     bool need_break = false;
2355                     if (func(sc, brt, &need_break, arg)) {
2356                               bridge_rtnode_remove(sc, brt);
2357                               brt_list[i++] = brt;
2358                     }
2359                     if (need_break)
2360                               break;
2361           }
2362 
2363           if (i > 0)
2364                     BRIDGE_RT_PSZ_PERFORM(sc);
2365           BRIDGE_RT_UNLOCK(sc);
2366 
2367           while (--i >= 0)
2368                     bridge_rtnode_destroy(brt_list[i]);
2369 
2370           kmem_free(brt_list, sizeof(*brt_list) * count);
2371 }
2372 
2373 static bool
bridge_rttrim0_cb(struct bridge_softc * sc,struct bridge_rtnode * brt,bool * need_break,void * arg)2374 bridge_rttrim0_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2375     bool *need_break, void *arg)
2376 {
2377           if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
2378                     /* Take into account of the subsequent removal */
2379                     if ((sc->sc_brtcnt - 1) <= sc->sc_brtmax)
2380                               *need_break = true;
2381                     return true;
2382           } else
2383                     return false;
2384 }
2385 
2386 static void
bridge_rttrim0(struct bridge_softc * sc)2387 bridge_rttrim0(struct bridge_softc *sc)
2388 {
2389           bridge_rtlist_iterate_remove(sc, bridge_rttrim0_cb, NULL);
2390 }
2391 
2392 /*
2393  * bridge_rttrim:
2394  *
2395  *        Trim the routine table so that we have a number
2396  *        of routing entries less than or equal to the
2397  *        maximum number.
2398  */
2399 static void
bridge_rttrim(struct bridge_softc * sc)2400 bridge_rttrim(struct bridge_softc *sc)
2401 {
2402 
2403           /* Make sure we actually need to do this. */
2404           if (sc->sc_brtcnt <= sc->sc_brtmax)
2405                     return;
2406 
2407           /* Force an aging cycle; this might trim enough addresses. */
2408           bridge_rtage(sc);
2409           if (sc->sc_brtcnt <= sc->sc_brtmax)
2410                     return;
2411 
2412           bridge_rttrim0(sc);
2413 
2414           return;
2415 }
2416 
2417 /*
2418  * bridge_timer:
2419  *
2420  *        Aging timer for the bridge.
2421  */
2422 static void
bridge_timer(void * arg)2423 bridge_timer(void *arg)
2424 {
2425           struct bridge_softc *sc = arg;
2426 
2427           workqueue_enqueue(sc->sc_rtage_wq, &sc->sc_rtage_wk, NULL);
2428 }
2429 
2430 static void
bridge_rtage_work(struct work * wk,void * arg)2431 bridge_rtage_work(struct work *wk, void *arg)
2432 {
2433           struct bridge_softc *sc = arg;
2434 
2435           KASSERT(wk == &sc->sc_rtage_wk);
2436 
2437           bridge_rtage(sc);
2438 
2439           BRIDGE_LOCK(sc);
2440           if (!sc->sc_stopping) {
2441                     callout_reset(&sc->sc_brcallout,
2442                         bridge_rtable_prune_period * hz, bridge_timer, sc);
2443           }
2444           BRIDGE_UNLOCK(sc);
2445 }
2446 
2447 static bool
bridge_rtage_cb(struct bridge_softc * sc,struct bridge_rtnode * brt,bool * need_break,void * arg)2448 bridge_rtage_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2449     bool *need_break, void *arg)
2450 {
2451           if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2452               time_uptime >= brt->brt_expire)
2453                     return true;
2454           else
2455                     return false;
2456 }
2457 
2458 /*
2459  * bridge_rtage:
2460  *
2461  *        Perform an aging cycle.
2462  */
2463 static void
bridge_rtage(struct bridge_softc * sc)2464 bridge_rtage(struct bridge_softc *sc)
2465 {
2466           bridge_rtlist_iterate_remove(sc, bridge_rtage_cb, NULL);
2467 }
2468 
2469 
2470 static bool
bridge_rtflush_cb(struct bridge_softc * sc,struct bridge_rtnode * brt,bool * need_break,void * arg)2471 bridge_rtflush_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2472     bool *need_break, void *arg)
2473 {
2474           int full = *(int*)arg;
2475 
2476           if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
2477                     return true;
2478           else
2479                     return false;
2480 }
2481 
2482 /*
2483  * bridge_rtflush:
2484  *
2485  *        Remove all dynamic addresses from the bridge.
2486  */
2487 static void
bridge_rtflush(struct bridge_softc * sc,int full)2488 bridge_rtflush(struct bridge_softc *sc, int full)
2489 {
2490           bridge_rtlist_iterate_remove(sc, bridge_rtflush_cb, &full);
2491 }
2492 
2493 /*
2494  * bridge_rtdaddr:
2495  *
2496  *        Remove an address from the table.
2497  */
2498 static int
bridge_rtdaddr(struct bridge_softc * sc,const uint8_t * addr)2499 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
2500 {
2501           struct bridge_rtnode *brt;
2502 
2503           BRIDGE_RT_LOCK(sc);
2504           if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL) {
2505                     BRIDGE_RT_UNLOCK(sc);
2506                     return ENOENT;
2507           }
2508           bridge_rtnode_remove(sc, brt);
2509           BRIDGE_RT_PSZ_PERFORM(sc);
2510           BRIDGE_RT_UNLOCK(sc);
2511 
2512           bridge_rtnode_destroy(brt);
2513 
2514           return 0;
2515 }
2516 
2517 /*
2518  * bridge_rtdelete:
2519  *
2520  *        Delete routes to a speicifc member interface.
2521  */
2522 static void
bridge_rtdelete(struct bridge_softc * sc,struct ifnet * ifp)2523 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp)
2524 {
2525           struct bridge_rtnode *brt;
2526 
2527           /* XXX pserialize_perform for each entry is slow */
2528 again:
2529           BRIDGE_RT_LOCK(sc);
2530           BRIDGE_RTLIST_WRITER_FOREACH(brt, sc) {
2531                     if (brt->brt_ifp == ifp)
2532                               break;
2533           }
2534           if (brt == NULL) {
2535                     BRIDGE_RT_UNLOCK(sc);
2536                     return;
2537           }
2538           bridge_rtnode_remove(sc, brt);
2539           BRIDGE_RT_PSZ_PERFORM(sc);
2540           BRIDGE_RT_UNLOCK(sc);
2541 
2542           bridge_rtnode_destroy(brt);
2543 
2544           goto again;
2545 }
2546 
2547 /*
2548  * bridge_rtable_init:
2549  *
2550  *        Initialize the route table for this bridge.
2551  */
2552 static void
bridge_rtable_init(struct bridge_softc * sc)2553 bridge_rtable_init(struct bridge_softc *sc)
2554 {
2555           int i;
2556 
2557           sc->sc_rthash = kmem_alloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
2558               KM_SLEEP);
2559 
2560           for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
2561                     PSLIST_INIT(&sc->sc_rthash[i]);
2562 
2563           sc->sc_rthash_key = cprng_fast32();
2564 
2565           PSLIST_INIT(&sc->sc_rtlist);
2566 
2567           sc->sc_rtlist_psz = pserialize_create();
2568           sc->sc_rtlist_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
2569 }
2570 
2571 /*
2572  * bridge_rtable_fini:
2573  *
2574  *        Deconstruct the route table for this bridge.
2575  */
2576 static void
bridge_rtable_fini(struct bridge_softc * sc)2577 bridge_rtable_fini(struct bridge_softc *sc)
2578 {
2579 
2580           kmem_free(sc->sc_rthash, sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE);
2581           mutex_obj_free(sc->sc_rtlist_lock);
2582           pserialize_destroy(sc->sc_rtlist_psz);
2583 }
2584 
2585 /*
2586  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
2587  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
2588  */
2589 #define   mix(a, b, c)                                                                    \
2590 do {                                                                                      \
2591           a -= b; a -= c; a ^= (c >> 13);                                                 \
2592           b -= c; b -= a; b ^= (a << 8);                                                  \
2593           c -= a; c -= b; c ^= (b >> 13);                                                 \
2594           a -= b; a -= c; a ^= (c >> 12);                                                 \
2595           b -= c; b -= a; b ^= (a << 16);                                                 \
2596           c -= a; c -= b; c ^= (b >> 5);                                                  \
2597           a -= b; a -= c; a ^= (c >> 3);                                                  \
2598           b -= c; b -= a; b ^= (a << 10);                                                 \
2599           c -= a; c -= b; c ^= (b >> 15);                                                 \
2600 } while (/*CONSTCOND*/0)
2601 
2602 static inline uint32_t
bridge_rthash(struct bridge_softc * sc,const uint8_t * addr)2603 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
2604 {
2605           uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
2606 
2607           b += addr[5] << 8;
2608           b += addr[4];
2609           a += (uint32_t)addr[3] << 24;
2610           a += addr[2] << 16;
2611           a += addr[1] << 8;
2612           a += addr[0];
2613 
2614           mix(a, b, c);
2615 
2616           return (c & BRIDGE_RTHASH_MASK);
2617 }
2618 
2619 #undef mix
2620 
2621 /*
2622  * bridge_rtnode_lookup:
2623  *
2624  *        Look up a bridge route node for the specified destination.
2625  */
2626 static struct bridge_rtnode *
bridge_rtnode_lookup(struct bridge_softc * sc,const uint8_t * addr)2627 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
2628 {
2629           struct bridge_rtnode *brt;
2630           uint32_t hash;
2631           int dir;
2632 
2633           hash = bridge_rthash(sc, addr);
2634           BRIDGE_RTHASH_READER_FOREACH(brt, sc, hash) {
2635                     dir = memcmp(addr, brt->brt_addr, ETHER_ADDR_LEN);
2636                     if (dir == 0)
2637                               return brt;
2638                     if (dir > 0)
2639                               return NULL;
2640           }
2641 
2642           return NULL;
2643 }
2644 
2645 /*
2646  * bridge_rtnode_insert:
2647  *
2648  *        Insert the specified bridge node into the route table.  We
2649  *        assume the entry is not already in the table.
2650  */
2651 static int
bridge_rtnode_insert(struct bridge_softc * sc,struct bridge_rtnode * brt)2652 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
2653 {
2654           struct bridge_rtnode *lbrt, *prev = NULL;
2655           uint32_t hash;
2656 
2657           KASSERT(BRIDGE_RT_LOCKED(sc));
2658 
2659           hash = bridge_rthash(sc, brt->brt_addr);
2660           BRIDGE_RTHASH_WRITER_FOREACH(lbrt, sc, hash) {
2661                     int dir = memcmp(brt->brt_addr, lbrt->brt_addr, ETHER_ADDR_LEN);
2662                     if (dir == 0)
2663                               return EEXIST;
2664                     if (dir > 0)
2665                               break;
2666                     prev = lbrt;
2667           }
2668           if (prev == NULL)
2669                     BRIDGE_RTHASH_WRITER_INSERT_HEAD(sc, hash, brt);
2670           else
2671                     BRIDGE_RTHASH_WRITER_INSERT_AFTER(prev, brt);
2672 
2673           BRIDGE_RTLIST_WRITER_INSERT_HEAD(sc, brt);
2674           sc->sc_brtcnt++;
2675 
2676           return 0;
2677 }
2678 
2679 /*
2680  * bridge_rtnode_remove:
2681  *
2682  *        Remove a bridge rtnode from the rthash and the rtlist of a bridge.
2683  */
2684 static void
bridge_rtnode_remove(struct bridge_softc * sc,struct bridge_rtnode * brt)2685 bridge_rtnode_remove(struct bridge_softc *sc, struct bridge_rtnode *brt)
2686 {
2687 
2688           KASSERT(BRIDGE_RT_LOCKED(sc));
2689 
2690           BRIDGE_RTHASH_WRITER_REMOVE(brt);
2691           BRIDGE_RTLIST_WRITER_REMOVE(brt);
2692           sc->sc_brtcnt--;
2693 }
2694 
2695 /*
2696  * bridge_rtnode_destroy:
2697  *
2698  *        Destroy a bridge rtnode.
2699  */
2700 static void
bridge_rtnode_destroy(struct bridge_rtnode * brt)2701 bridge_rtnode_destroy(struct bridge_rtnode *brt)
2702 {
2703 
2704           PSLIST_ENTRY_DESTROY(brt, brt_list);
2705           PSLIST_ENTRY_DESTROY(brt, brt_hash);
2706           pool_put(&bridge_rtnode_pool, brt);
2707 }
2708 
2709 extern pfil_head_t *inet_pfil_hook;                 /* XXX */
2710 extern pfil_head_t *inet6_pfil_hook;                /* XXX */
2711 
2712 /*
2713  * Send bridge packets through IPF if they are one of the types IPF can deal
2714  * with, or if they are ARP or REVARP.  (IPF will pass ARP and REVARP without
2715  * question.)
2716  */
2717 static int
bridge_ipf(void * arg,struct mbuf ** mp,struct ifnet * ifp,int dir)2718 bridge_ipf(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
2719 {
2720           int snap, error;
2721           struct ether_header *eh1, eh2;
2722           struct llc llc1;
2723           uint16_t ether_type;
2724 
2725           snap = 0;
2726           error = -1;         /* Default error if not error == 0 */
2727           eh1 = mtod(*mp, struct ether_header *);
2728           ether_type = ntohs(eh1->ether_type);
2729 
2730           /*
2731            * Check for SNAP/LLC.
2732            */
2733           if (ether_type < ETHERMTU) {
2734                     struct llc *llc2 = (struct llc *)(eh1 + 1);
2735 
2736                     if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
2737                         llc2->llc_dsap == LLC_SNAP_LSAP &&
2738                         llc2->llc_ssap == LLC_SNAP_LSAP &&
2739                         llc2->llc_control == LLC_UI) {
2740                               ether_type = htons(llc2->llc_un.type_snap.ether_type);
2741                               snap = 1;
2742                     }
2743           }
2744 
2745           /* drop VLAN traffic untagged by hardware offloading */
2746           if (vlan_has_tag(*mp))
2747                     goto bad;
2748 
2749           /*
2750            * If we're trying to filter bridge traffic, don't look at anything
2751            * other than IP and ARP traffic.  If the filter doesn't understand
2752            * IPv6, don't allow IPv6 through the bridge either.  This is lame
2753            * since if we really wanted, say, an AppleTalk filter, we are hosed,
2754            * but of course we don't have an AppleTalk filter to begin with.
2755            * (Note that since IPF doesn't understand ARP it will pass *ALL*
2756            * ARP traffic.)
2757            */
2758           switch (ether_type) {
2759                     case ETHERTYPE_ARP:
2760                     case ETHERTYPE_REVARP:
2761                               return 0; /* Automatically pass */
2762                     case ETHERTYPE_IP:
2763 # ifdef INET6
2764                     case ETHERTYPE_IPV6:
2765 # endif /* INET6 */
2766                               break;
2767                     default:
2768                               goto bad;
2769           }
2770 
2771           /* Strip off the Ethernet header and keep a copy. */
2772           m_copydata(*mp, 0, ETHER_HDR_LEN, (void *) &eh2);
2773           m_adj(*mp, ETHER_HDR_LEN);
2774 
2775           /* Strip off snap header, if present */
2776           if (snap) {
2777                     m_copydata(*mp, 0, sizeof(struct llc), (void *) &llc1);
2778                     m_adj(*mp, sizeof(struct llc));
2779           }
2780 
2781           /*
2782            * Check basic packet sanity and run IPF through pfil.
2783            */
2784           KASSERT(!cpu_intr_p());
2785           switch (ether_type)
2786           {
2787           case ETHERTYPE_IP :
2788                     error = bridge_ip_checkbasic(mp);
2789                     if (error == 0)
2790                               error = pfil_run_hooks(inet_pfil_hook, mp, ifp, dir);
2791                     break;
2792 # ifdef INET6
2793           case ETHERTYPE_IPV6 :
2794                     error = bridge_ip6_checkbasic(mp);
2795                     if (error == 0)
2796                               error = pfil_run_hooks(inet6_pfil_hook, mp, ifp, dir);
2797                     break;
2798 # endif
2799           default :
2800                     error = 0;
2801                     break;
2802           }
2803 
2804           if (*mp == NULL)
2805                     return error;
2806           if (error != 0)
2807                     goto bad;
2808 
2809           error = -1;
2810 
2811           /*
2812            * Finally, put everything back the way it was and return
2813            */
2814           if (snap) {
2815                     M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
2816                     if (*mp == NULL)
2817                               return error;
2818                     bcopy(&llc1, mtod(*mp, void *), sizeof(struct llc));
2819           }
2820 
2821           M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
2822           if (*mp == NULL)
2823                     return error;
2824           bcopy(&eh2, mtod(*mp, void *), ETHER_HDR_LEN);
2825 
2826           return 0;
2827 
2828     bad:
2829           m_freem(*mp);
2830           *mp = NULL;
2831           return error;
2832 }
2833 
2834 /*
2835  * Perform basic checks on header size since
2836  * IPF assumes ip_input has already processed
2837  * it for it.  Cut-and-pasted from ip_input.c.
2838  * Given how simple the IPv6 version is,
2839  * does the IPv4 version really need to be
2840  * this complicated?
2841  *
2842  * XXX Should we update ipstat here, or not?
2843  * XXX Right now we update ipstat but not
2844  * XXX csum_counter.
2845  */
2846 static int
bridge_ip_checkbasic(struct mbuf ** mp)2847 bridge_ip_checkbasic(struct mbuf **mp)
2848 {
2849           struct mbuf *m = *mp;
2850           struct ip *ip;
2851           int len, hlen;
2852 
2853           if (*mp == NULL)
2854                     return -1;
2855 
2856           if (M_GET_ALIGNED_HDR(&m, struct ip, true) != 0) {
2857                     /* XXXJRT new stat, please */
2858                     ip_statinc(IP_STAT_TOOSMALL);
2859                     goto bad;
2860           }
2861           ip = mtod(m, struct ip *);
2862           if (ip == NULL) goto bad;
2863 
2864           if (ip->ip_v != IPVERSION) {
2865                     ip_statinc(IP_STAT_BADVERS);
2866                     goto bad;
2867           }
2868           hlen = ip->ip_hl << 2;
2869           if (hlen < sizeof(struct ip)) { /* minimum header length */
2870                     ip_statinc(IP_STAT_BADHLEN);
2871                     goto bad;
2872           }
2873           if (hlen > m->m_len) {
2874                     if ((m = m_pullup(m, hlen)) == 0) {
2875                               ip_statinc(IP_STAT_BADHLEN);
2876                               goto bad;
2877                     }
2878                     ip = mtod(m, struct ip *);
2879                     if (ip == NULL) goto bad;
2880           }
2881 
2882           switch (m->m_pkthdr.csum_flags &
2883                   ((m_get_rcvif_NOMPSAFE(m)->if_csum_flags_rx & M_CSUM_IPv4) |
2884                    M_CSUM_IPv4_BAD)) {
2885           case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
2886                     /* INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad); */
2887                     goto bad;
2888 
2889           case M_CSUM_IPv4:
2890                     /* Checksum was okay. */
2891                     /* INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok); */
2892                     break;
2893 
2894           default:
2895                     /* Must compute it ourselves. */
2896                     /* INET_CSUM_COUNTER_INCR(&ip_swcsum); */
2897                     if (in_cksum(m, hlen) != 0)
2898                               goto bad;
2899                     break;
2900           }
2901 
2902           /* Retrieve the packet length. */
2903           len = ntohs(ip->ip_len);
2904 
2905           /*
2906            * Check for additional length bogosity
2907            */
2908           if (len < hlen) {
2909                     ip_statinc(IP_STAT_BADLEN);
2910                     goto bad;
2911           }
2912 
2913           /*
2914            * Check that the amount of data in the buffers
2915            * is as at least much as the IP header would have us expect.
2916            * Drop packet if shorter than we expect.
2917            */
2918           if (m->m_pkthdr.len < len) {
2919                     ip_statinc(IP_STAT_TOOSHORT);
2920                     goto bad;
2921           }
2922 
2923           /* Checks out, proceed */
2924           *mp = m;
2925           return 0;
2926 
2927     bad:
2928           *mp = m;
2929           return -1;
2930 }
2931 
2932 # ifdef INET6
2933 /*
2934  * Same as above, but for IPv6.
2935  * Cut-and-pasted from ip6_input.c.
2936  * XXX Should we update ip6stat, or not?
2937  */
2938 static int
bridge_ip6_checkbasic(struct mbuf ** mp)2939 bridge_ip6_checkbasic(struct mbuf **mp)
2940 {
2941           struct mbuf *m = *mp;
2942           struct ip6_hdr *ip6;
2943 
2944           /*
2945            * If the IPv6 header is not aligned, slurp it up into a new
2946            * mbuf with space for link headers, in the event we forward
2947            * it.  Otherwise, if it is aligned, make sure the entire base
2948            * IPv6 header is in the first mbuf of the chain.
2949            */
2950           if (M_GET_ALIGNED_HDR(&m, struct ip6_hdr, true) != 0) {
2951                     struct ifnet *inifp = m_get_rcvif_NOMPSAFE(m);
2952                     /* XXXJRT new stat, please */
2953                     ip6_statinc(IP6_STAT_TOOSMALL);
2954                     in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2955                     goto bad;
2956           }
2957 
2958           ip6 = mtod(m, struct ip6_hdr *);
2959 
2960           if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
2961                     ip6_statinc(IP6_STAT_BADVERS);
2962                     in6_ifstat_inc(m_get_rcvif_NOMPSAFE(m), ifs6_in_hdrerr);
2963                     goto bad;
2964           }
2965 
2966           /* Checks out, proceed */
2967           *mp = m;
2968           return 0;
2969 
2970     bad:
2971           *mp = m;
2972           return -1;
2973 }
2974 # endif /* INET6 */
2975