1 /*        $NetBSD: if.c,v 1.531 2024/12/16 05:18:37 ozaki-r Exp $     */
2 
3 /*-
4  * Copyright (c) 1999, 2000, 2001, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by William Studenmund and Jason R. Thorpe.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the project nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  */
60 
61 /*
62  * Copyright (c) 1980, 1986, 1993
63  *        The Regents of the University of California.  All rights reserved.
64  *
65  * Redistribution and use in source and binary forms, with or without
66  * modification, are permitted provided that the following conditions
67  * are met:
68  * 1. Redistributions of source code must retain the above copyright
69  *    notice, this list of conditions and the following disclaimer.
70  * 2. Redistributions in binary form must reproduce the above copyright
71  *    notice, this list of conditions and the following disclaimer in the
72  *    documentation and/or other materials provided with the distribution.
73  * 3. Neither the name of the University nor the names of its contributors
74  *    may be used to endorse or promote products derived from this software
75  *    without specific prior written permission.
76  *
77  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
78  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
79  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
80  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
81  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
82  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
83  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
84  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
85  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
86  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
87  * SUCH DAMAGE.
88  *
89  *        @(#)if.c  8.5 (Berkeley) 1/9/95
90  */
91 
92 #include <sys/cdefs.h>
93 __KERNEL_RCSID(0, "$NetBSD: if.c,v 1.531 2024/12/16 05:18:37 ozaki-r Exp $");
94 
95 #if defined(_KERNEL_OPT)
96 #include "opt_inet.h"
97 #include "opt_ipsec.h"
98 #include "opt_atalk.h"
99 #include "opt_wlan.h"
100 #include "opt_net_mpsafe.h"
101 #include "opt_mrouting.h"
102 #endif
103 
104 #include <sys/param.h>
105 #include <sys/mbuf.h>
106 #include <sys/systm.h>
107 #include <sys/callout.h>
108 #include <sys/proc.h>
109 #include <sys/socket.h>
110 #include <sys/socketvar.h>
111 #include <sys/domain.h>
112 #include <sys/protosw.h>
113 #include <sys/kernel.h>
114 #include <sys/ioctl.h>
115 #include <sys/sysctl.h>
116 #include <sys/syslog.h>
117 #include <sys/kauth.h>
118 #include <sys/kmem.h>
119 #include <sys/xcall.h>
120 #include <sys/cpu.h>
121 #include <sys/intr.h>
122 #include <sys/module_hook.h>
123 #include <sys/compat_stub.h>
124 #include <sys/msan.h>
125 #include <sys/hook.h>
126 
127 #include <net/if.h>
128 #include <net/if_dl.h>
129 #include <net/if_ether.h>
130 #include <net/if_media.h>
131 #include <net80211/ieee80211.h>
132 #include <net80211/ieee80211_ioctl.h>
133 #include <net/if_types.h>
134 #include <net/route.h>
135 #include <sys/module.h>
136 #ifdef NETATALK
137 #include <netatalk/at_extern.h>
138 #include <netatalk/at.h>
139 #endif
140 #include <net/pfil.h>
141 #include <netinet/in.h>
142 #include <netinet/in_var.h>
143 #include <netinet/ip_encap.h>
144 #include <net/bpf.h>
145 
146 #ifdef INET6
147 #include <netinet6/in6_var.h>
148 #include <netinet6/nd6.h>
149 #endif
150 
151 #include "ether.h"
152 
153 #include "bridge.h"
154 #if NBRIDGE > 0
155 #include <net/if_bridgevar.h>
156 #endif
157 
158 #include "carp.h"
159 #if NCARP > 0
160 #include <netinet/ip_carp.h>
161 #endif
162 
163 #include <compat/sys/sockio.h>
164 
165 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
166 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
167 
168 /*
169  * XXX reusing (ifp)->if_snd->ifq_lock rather than having another spin mutex
170  * for each ifnet.  It doesn't matter because:
171  * - if IFEF_MPSAFE is enabled, if_snd isn't used and lock contentions on
172  *   ifq_lock don't happen
173  * - if IFEF_MPSAFE is disabled, there is no lock contention on ifq_lock
174  *   because if_snd, if_link_state_change and if_link_state_change_process
175  *   are all called with KERNEL_LOCK
176  */
177 #define IF_LINK_STATE_CHANGE_LOCK(ifp)            \
178           mutex_enter((ifp)->if_snd.ifq_lock)
179 #define IF_LINK_STATE_CHANGE_UNLOCK(ifp)          \
180           mutex_exit((ifp)->if_snd.ifq_lock)
181 
182 /*
183  * Global list of interfaces.
184  */
185 /* DEPRECATED. Remove it once kvm(3) users disappeared */
186 struct ifnet_head             ifnet_list;
187 
188 struct pslist_head            ifnet_pslist;
189 static ifnet_t **             ifindex2ifnet = NULL;
190 static u_int                            if_index = 1;
191 static size_t                           if_indexlim = 0;
192 static uint64_t                         index_gen;
193 /* Mutex to protect the above objects. */
194 kmutex_t                      ifnet_mtx __cacheline_aligned;
195 static struct psref_class     *ifnet_psref_class __read_mostly;
196 static pserialize_t           ifnet_psz;
197 static struct workqueue                 *ifnet_link_state_wq __read_mostly;
198 
199 static struct workqueue                 *if_slowtimo_wq __read_mostly;
200 
201 static kmutex_t                         if_clone_mtx;
202 
203 struct ifnet *lo0ifp;
204 int       ifqmaxlen = IFQ_MAXLEN;
205 
206 struct psref_class            *ifa_psref_class __read_mostly;
207 
208 static int          if_delroute_matcher(struct rtentry *, void *);
209 
210 static bool if_is_unit(const char *);
211 static struct if_clone *if_clone_lookup(const char *, int *);
212 
213 static LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
214 static int if_cloners_count;
215 
216 /* Packet filtering hook for interfaces. */
217 pfil_head_t *                           if_pfil __read_mostly;
218 
219 static kauth_listener_t if_listener;
220 
221 static int doifioctl(struct socket *, u_long, void *, struct lwp *);
222 static void sysctl_sndq_setup(struct sysctllog **, const char *,
223     struct ifaltq *);
224 static void if_slowtimo_intr(void *);
225 static void if_slowtimo_work(struct work *, void *);
226 static int sysctl_if_watchdog(SYSCTLFN_PROTO);
227 static void sysctl_watchdog_setup(struct ifnet *);
228 static void if_attachdomain1(struct ifnet *);
229 static int ifconf(u_long, void *);
230 static int if_transmit(struct ifnet *, struct mbuf *);
231 static int if_clone_create(const char *);
232 static int if_clone_destroy(const char *);
233 static void if_link_state_change_work(struct work *, void *);
234 static void if_up_locked(struct ifnet *);
235 static void _if_down(struct ifnet *);
236 static void if_down_deactivated(struct ifnet *);
237 
238 struct if_percpuq {
239           struct ifnet        *ipq_ifp;
240           void                *ipq_si;
241           struct percpu       *ipq_ifqs;          /* struct ifqueue */
242 };
243 
244 static struct mbuf *if_percpuq_dequeue(struct if_percpuq *);
245 
246 static void if_percpuq_drops(void *, void *, struct cpu_info *);
247 static int sysctl_percpuq_drops_handler(SYSCTLFN_PROTO);
248 static void sysctl_percpuq_setup(struct sysctllog **, const char *,
249     struct if_percpuq *);
250 
251 struct if_deferred_start {
252           struct ifnet        *ids_ifp;
253           void                (*ids_if_start)(struct ifnet *);
254           void                *ids_si;
255 };
256 
257 static void if_deferred_start_softint(void *);
258 static void if_deferred_start_common(struct ifnet *);
259 static void if_deferred_start_destroy(struct ifnet *);
260 
261 struct if_slowtimo_data {
262           kmutex_t            isd_lock;
263           struct callout                isd_ch;
264           struct work                   isd_work;
265           struct ifnet                  *isd_ifp;
266           bool                          isd_queued;
267           bool                          isd_dying;
268           bool                          isd_trigger;
269 };
270 
271 /*
272  * Hook for if_vlan - needed by if_agr
273  */
274 struct if_vlan_vlan_input_hook_t if_vlan_vlan_input_hook;
275 
276 static void if_sysctl_setup(struct sysctllog **);
277 
278 static int
if_listener_cb(kauth_cred_t cred,kauth_action_t action,void * cookie,void * arg0,void * arg1,void * arg2,void * arg3)279 if_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
280     void *arg0, void *arg1, void *arg2, void *arg3)
281 {
282           int result;
283           enum kauth_network_req req;
284 
285           result = KAUTH_RESULT_DEFER;
286           req = (enum kauth_network_req)(uintptr_t)arg1;
287 
288           if (action != KAUTH_NETWORK_INTERFACE)
289                     return result;
290 
291           if ((req == KAUTH_REQ_NETWORK_INTERFACE_GET) ||
292               (req == KAUTH_REQ_NETWORK_INTERFACE_SET))
293                     result = KAUTH_RESULT_ALLOW;
294 
295           return result;
296 }
297 
298 /*
299  * Network interface utility routines.
300  *
301  * Routines with ifa_ifwith* names take sockaddr *'s as
302  * parameters.
303  */
304 void
ifinit(void)305 ifinit(void)
306 {
307 
308 #if (defined(INET) || defined(INET6))
309           encapinit();
310 #endif
311 
312           if_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
313               if_listener_cb, NULL);
314 
315           /* interfaces are available, inform socket code */
316           ifioctl = doifioctl;
317 }
318 
319 /*
320  * XXX Initialization before configure().
321  * XXX hack to get pfil_add_hook working in autoconf.
322  */
323 void
ifinit1(void)324 ifinit1(void)
325 {
326           int error __diagused;
327 
328 #ifdef NET_MPSAFE
329           printf("NET_MPSAFE enabled\n");
330 #endif
331 
332           mutex_init(&if_clone_mtx, MUTEX_DEFAULT, IPL_NONE);
333 
334           TAILQ_INIT(&ifnet_list);
335           mutex_init(&ifnet_mtx, MUTEX_DEFAULT, IPL_NONE);
336           ifnet_psz = pserialize_create();
337           ifnet_psref_class = psref_class_create("ifnet", IPL_SOFTNET);
338           ifa_psref_class = psref_class_create("ifa", IPL_SOFTNET);
339           error = workqueue_create(&ifnet_link_state_wq, "iflnkst",
340               if_link_state_change_work, NULL, PRI_SOFTNET, IPL_NET,
341               WQ_MPSAFE);
342           KASSERT(error == 0);
343           PSLIST_INIT(&ifnet_pslist);
344 
345           error = workqueue_create(&if_slowtimo_wq, "ifwdog",
346               if_slowtimo_work, NULL, PRI_SOFTNET, IPL_SOFTCLOCK, WQ_MPSAFE);
347           KASSERTMSG(error == 0, "error=%d", error);
348 
349           if_indexlim = 8;
350 
351           if_pfil = pfil_head_create(PFIL_TYPE_IFNET, NULL);
352           KASSERT(if_pfil != NULL);
353 
354 #if NETHER > 0 || defined(NETATALK) || defined(WLAN)
355           etherinit();
356 #endif
357 }
358 
359 /* XXX must be after domaininit() */
360 void
ifinit_post(void)361 ifinit_post(void)
362 {
363 
364           if_sysctl_setup(NULL);
365 }
366 
367 ifnet_t *
if_alloc(u_char type)368 if_alloc(u_char type)
369 {
370 
371           return kmem_zalloc(sizeof(ifnet_t), KM_SLEEP);
372 }
373 
374 void
if_free(ifnet_t * ifp)375 if_free(ifnet_t *ifp)
376 {
377 
378           kmem_free(ifp, sizeof(ifnet_t));
379 }
380 
381 void
if_initname(struct ifnet * ifp,const char * name,int unit)382 if_initname(struct ifnet *ifp, const char *name, int unit)
383 {
384 
385           (void)snprintf(ifp->if_xname, sizeof(ifp->if_xname),
386               "%s%d", name, unit);
387 }
388 
389 /*
390  * Null routines used while an interface is going away.  These routines
391  * just return an error.
392  */
393 
394 int
if_nulloutput(struct ifnet * ifp,struct mbuf * m,const struct sockaddr * so,const struct rtentry * rt)395 if_nulloutput(struct ifnet *ifp, struct mbuf *m,
396     const struct sockaddr *so, const struct rtentry *rt)
397 {
398 
399           return ENXIO;
400 }
401 
402 void
if_nullinput(struct ifnet * ifp,struct mbuf * m)403 if_nullinput(struct ifnet *ifp, struct mbuf *m)
404 {
405 
406           /* Nothing. */
407 }
408 
409 void
if_nullstart(struct ifnet * ifp)410 if_nullstart(struct ifnet *ifp)
411 {
412 
413           /* Nothing. */
414 }
415 
416 int
if_nulltransmit(struct ifnet * ifp,struct mbuf * m)417 if_nulltransmit(struct ifnet *ifp, struct mbuf *m)
418 {
419 
420           m_freem(m);
421           return ENXIO;
422 }
423 
424 int
if_nullioctl(struct ifnet * ifp,u_long cmd,void * data)425 if_nullioctl(struct ifnet *ifp, u_long cmd, void *data)
426 {
427 
428           return ENXIO;
429 }
430 
431 int
if_nullinit(struct ifnet * ifp)432 if_nullinit(struct ifnet *ifp)
433 {
434 
435           return ENXIO;
436 }
437 
438 void
if_nullstop(struct ifnet * ifp,int disable)439 if_nullstop(struct ifnet *ifp, int disable)
440 {
441 
442           /* Nothing. */
443 }
444 
445 void
if_nullslowtimo(struct ifnet * ifp)446 if_nullslowtimo(struct ifnet *ifp)
447 {
448 
449           /* Nothing. */
450 }
451 
452 void
if_nulldrain(struct ifnet * ifp)453 if_nulldrain(struct ifnet *ifp)
454 {
455 
456           /* Nothing. */
457 }
458 
459 void
if_set_sadl(struct ifnet * ifp,const void * lla,u_char addrlen,bool factory)460 if_set_sadl(struct ifnet *ifp, const void *lla, u_char addrlen, bool factory)
461 {
462           struct ifaddr *ifa;
463           struct sockaddr_dl *sdl;
464 
465           ifp->if_addrlen = addrlen;
466           if_alloc_sadl(ifp);
467           ifa = ifp->if_dl;
468           sdl = satosdl(ifa->ifa_addr);
469 
470           (void)sockaddr_dl_setaddr(sdl, sdl->sdl_len, lla, ifp->if_addrlen);
471           if (factory) {
472                     KASSERT(ifp->if_hwdl == NULL);
473                     ifp->if_hwdl = ifp->if_dl;
474                     ifaref(ifp->if_hwdl);
475           }
476           /* TBD routing socket */
477 }
478 
479 struct ifaddr *
if_dl_create(const struct ifnet * ifp,const struct sockaddr_dl ** sdlp)480 if_dl_create(const struct ifnet *ifp, const struct sockaddr_dl **sdlp)
481 {
482           unsigned socksize, ifasize;
483           int addrlen, namelen;
484           struct sockaddr_dl *mask, *sdl;
485           struct ifaddr *ifa;
486 
487           namelen = strlen(ifp->if_xname);
488           addrlen = ifp->if_addrlen;
489           socksize = roundup(sockaddr_dl_measure(namelen, addrlen),
490               sizeof(long));
491           ifasize = sizeof(*ifa) + 2 * socksize;
492           ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
493 
494           sdl = (struct sockaddr_dl *)(ifa + 1);
495           mask = (struct sockaddr_dl *)(socksize + (char *)sdl);
496 
497           sockaddr_dl_init(sdl, socksize, ifp->if_index, ifp->if_type,
498               ifp->if_xname, namelen, NULL, addrlen);
499           mask->sdl_family = AF_LINK;
500           mask->sdl_len = sockaddr_dl_measure(namelen, 0);
501           memset(&mask->sdl_data[0], 0xff, namelen);
502           ifa->ifa_rtrequest = link_rtrequest;
503           ifa->ifa_addr = (struct sockaddr *)sdl;
504           ifa->ifa_netmask = (struct sockaddr *)mask;
505           ifa_psref_init(ifa);
506 
507           *sdlp = sdl;
508 
509           return ifa;
510 }
511 
512 static void
if_sadl_setrefs(struct ifnet * ifp,struct ifaddr * ifa)513 if_sadl_setrefs(struct ifnet *ifp, struct ifaddr *ifa)
514 {
515           const struct sockaddr_dl *sdl;
516 
517           ifp->if_dl = ifa;
518           ifaref(ifa);
519           sdl = satosdl(ifa->ifa_addr);
520           ifp->if_sadl = sdl;
521 }
522 
523 /*
524  * Allocate the link level name for the specified interface.  This
525  * is an attachment helper.  It must be called after ifp->if_addrlen
526  * is initialized, which may not be the case when if_attach() is
527  * called.
528  */
529 void
if_alloc_sadl(struct ifnet * ifp)530 if_alloc_sadl(struct ifnet *ifp)
531 {
532           struct ifaddr *ifa;
533           const struct sockaddr_dl *sdl;
534 
535           /*
536            * If the interface already has a link name, release it
537            * now.  This is useful for interfaces that can change
538            * link types, and thus switch link names often.
539            */
540           if (ifp->if_sadl != NULL)
541                     if_free_sadl(ifp, 0);
542 
543           ifa = if_dl_create(ifp, &sdl);
544 
545           ifa_insert(ifp, ifa);
546           if_sadl_setrefs(ifp, ifa);
547 }
548 
549 static void
if_deactivate_sadl(struct ifnet * ifp)550 if_deactivate_sadl(struct ifnet *ifp)
551 {
552           struct ifaddr *ifa;
553 
554           KASSERT(ifp->if_dl != NULL);
555 
556           ifa = ifp->if_dl;
557 
558           ifp->if_sadl = NULL;
559 
560           ifp->if_dl = NULL;
561           ifafree(ifa);
562 }
563 
564 static void
if_replace_sadl(struct ifnet * ifp,struct ifaddr * ifa)565 if_replace_sadl(struct ifnet *ifp, struct ifaddr *ifa)
566 {
567           struct ifaddr *old;
568 
569           KASSERT(ifp->if_dl != NULL);
570 
571           old = ifp->if_dl;
572 
573           ifaref(ifa);
574           /* XXX Update if_dl and if_sadl atomically */
575           ifp->if_dl = ifa;
576           ifp->if_sadl = satosdl(ifa->ifa_addr);
577 
578           ifafree(old);
579 }
580 
581 void
if_activate_sadl(struct ifnet * ifp,struct ifaddr * ifa0,const struct sockaddr_dl * sdl)582 if_activate_sadl(struct ifnet *ifp, struct ifaddr *ifa0,
583     const struct sockaddr_dl *sdl)
584 {
585           struct ifaddr *ifa;
586           const int bound = curlwp_bind();
587 
588           KASSERT(ifa_held(ifa0));
589 
590           const int s = splsoftnet();
591 
592           if_replace_sadl(ifp, ifa0);
593 
594           int ss = pserialize_read_enter();
595           IFADDR_READER_FOREACH(ifa, ifp) {
596                     struct psref psref;
597                     ifa_acquire(ifa, &psref);
598                     pserialize_read_exit(ss);
599 
600                     rtinit(ifa, RTM_LLINFO_UPD, 0);
601 
602                     ss = pserialize_read_enter();
603                     ifa_release(ifa, &psref);
604           }
605           pserialize_read_exit(ss);
606 
607           splx(s);
608           curlwp_bindx(bound);
609 }
610 
611 /*
612  * Free the link level name for the specified interface.  This is
613  * a detach helper.  This is called from if_detach().
614  */
615 void
if_free_sadl(struct ifnet * ifp,int factory)616 if_free_sadl(struct ifnet *ifp, int factory)
617 {
618           struct ifaddr *ifa;
619 
620           if (factory && ifp->if_hwdl != NULL) {
621                     ifa = ifp->if_hwdl;
622                     ifp->if_hwdl = NULL;
623                     ifafree(ifa);
624           }
625 
626           ifa = ifp->if_dl;
627           if (ifa == NULL) {
628                     KASSERT(ifp->if_sadl == NULL);
629                     return;
630           }
631 
632           KASSERT(ifp->if_sadl != NULL);
633 
634           const int s = splsoftnet();
635           KASSERT(ifa->ifa_addr->sa_family == AF_LINK);
636           ifa_remove(ifp, ifa);
637           if_deactivate_sadl(ifp);
638           splx(s);
639 }
640 
641 static void
if_getindex(ifnet_t * ifp)642 if_getindex(ifnet_t *ifp)
643 {
644           bool hitlimit = false;
645           char xnamebuf[HOOKNAMSIZ];
646 
647           ifp->if_index_gen = index_gen++;
648           snprintf(xnamebuf, sizeof(xnamebuf), "%s-lshk", ifp->if_xname);
649           ifp->if_linkstate_hooks = simplehook_create(IPL_NET,
650               xnamebuf);
651 
652           ifp->if_index = if_index;
653           if (ifindex2ifnet == NULL) {
654                     if_index++;
655                     goto skip;
656           }
657           while (if_byindex(ifp->if_index)) {
658                     /*
659                      * If we hit USHRT_MAX, we skip back to 0 since
660                      * there are a number of places where the value
661                      * of if_index or if_index itself is compared
662                      * to or stored in an unsigned short.  By
663                      * jumping back, we won't botch those assignments
664                      * or comparisons.
665                      */
666                     if (++if_index == 0) {
667                               if_index = 1;
668                     } else if (if_index == USHRT_MAX) {
669                               /*
670                                * However, if we have to jump back to
671                                * zero *twice* without finding an empty
672                                * slot in ifindex2ifnet[], then there
673                                * there are too many (>65535) interfaces.
674                                */
675                               if (hitlimit)
676                                         panic("too many interfaces");
677                               hitlimit = true;
678                               if_index = 1;
679                     }
680                     ifp->if_index = if_index;
681           }
682 skip:
683           /*
684            * ifindex2ifnet is indexed by if_index. Since if_index will
685            * grow dynamically, it should grow too.
686            */
687           if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) {
688                     size_t m, n, oldlim;
689                     void *q;
690 
691                     oldlim = if_indexlim;
692                     while (ifp->if_index >= if_indexlim)
693                               if_indexlim <<= 1;
694 
695                     /* grow ifindex2ifnet */
696                     m = oldlim * sizeof(struct ifnet *);
697                     n = if_indexlim * sizeof(struct ifnet *);
698                     q = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
699                     if (ifindex2ifnet != NULL) {
700                               memcpy(q, ifindex2ifnet, m);
701                               free(ifindex2ifnet, M_IFADDR);
702                     }
703                     ifindex2ifnet = (struct ifnet **)q;
704           }
705           ifindex2ifnet[ifp->if_index] = ifp;
706 }
707 
708 /*
709  * Initialize an interface and assign an index for it.
710  *
711  * It must be called prior to a device specific attach routine
712  * (e.g., ether_ifattach and ieee80211_ifattach) or if_alloc_sadl,
713  * and be followed by if_register:
714  *
715  *     if_initialize(ifp);
716  *     ether_ifattach(ifp, enaddr);
717  *     if_register(ifp);
718  */
719 void
if_initialize(ifnet_t * ifp)720 if_initialize(ifnet_t *ifp)
721 {
722 
723           KASSERT(if_indexlim > 0);
724           TAILQ_INIT(&ifp->if_addrlist);
725 
726           /*
727            * Link level name is allocated later by a separate call to
728            * if_alloc_sadl().
729            */
730 
731           if (ifp->if_snd.ifq_maxlen == 0)
732                     ifp->if_snd.ifq_maxlen = ifqmaxlen;
733 
734           ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
735 
736           ifp->if_link_state = LINK_STATE_UNKNOWN;
737           ifp->if_link_queue = -1; /* all bits set, see link_state_change() */
738           ifp->if_link_scheduled = false;
739 
740           ifp->if_capenable = 0;
741           ifp->if_csum_flags_tx = 0;
742           ifp->if_csum_flags_rx = 0;
743 
744 #ifdef ALTQ
745           ifp->if_snd.altq_type = 0;
746           ifp->if_snd.altq_disc = NULL;
747           ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
748           ifp->if_snd.altq_tbr  = NULL;
749           ifp->if_snd.altq_ifp  = ifp;
750 #endif
751 
752           IFQ_LOCK_INIT(&ifp->if_snd);
753 
754           ifp->if_pfil = pfil_head_create(PFIL_TYPE_IFNET, ifp);
755           pfil_run_ifhooks(if_pfil, PFIL_IFNET_ATTACH, ifp);
756 
757           IF_AFDATA_LOCK_INIT(ifp);
758 
759           PSLIST_ENTRY_INIT(ifp, if_pslist_entry);
760           PSLIST_INIT(&ifp->if_addr_pslist);
761           psref_target_init(&ifp->if_psref, ifnet_psref_class);
762           ifp->if_ioctl_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
763           LIST_INIT(&ifp->if_multiaddrs);
764           if_stats_init(ifp);
765 
766           IFNET_GLOBAL_LOCK();
767           if_getindex(ifp);
768           IFNET_GLOBAL_UNLOCK();
769 }
770 
771 /*
772  * Register an interface to the list of "active" interfaces.
773  */
774 void
if_register(ifnet_t * ifp)775 if_register(ifnet_t *ifp)
776 {
777           /*
778            * If the driver has not supplied its own if_ioctl or if_stop,
779            * then supply the default.
780            */
781           if (ifp->if_ioctl == NULL)
782                     ifp->if_ioctl = ifioctl_common;
783           if (ifp->if_stop == NULL)
784                     ifp->if_stop = if_nullstop;
785 
786           sysctl_sndq_setup(&ifp->if_sysctl_log, ifp->if_xname, &ifp->if_snd);
787 
788           if (!STAILQ_EMPTY(&domains))
789                     if_attachdomain1(ifp);
790 
791           /* Announce the interface. */
792           rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
793 
794           if (ifp->if_slowtimo != NULL) {
795                     struct if_slowtimo_data *isd;
796 
797                     isd = kmem_zalloc(sizeof(*isd), KM_SLEEP);
798                     mutex_init(&isd->isd_lock, MUTEX_DEFAULT, IPL_SOFTCLOCK);
799                     callout_init(&isd->isd_ch, CALLOUT_MPSAFE);
800                     callout_setfunc(&isd->isd_ch, if_slowtimo_intr, ifp);
801                     isd->isd_ifp = ifp;
802 
803                     ifp->if_slowtimo_data = isd;
804 
805                     if_slowtimo_intr(ifp);
806 
807                     sysctl_watchdog_setup(ifp);
808           }
809 
810           if (ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit)
811                     ifp->if_transmit = if_transmit;
812 
813           IFNET_GLOBAL_LOCK();
814           TAILQ_INSERT_TAIL(&ifnet_list, ifp, if_list);
815           IFNET_WRITER_INSERT_TAIL(ifp);
816           IFNET_GLOBAL_UNLOCK();
817 }
818 
819 /*
820  * The if_percpuq framework
821  *
822  * It allows network device drivers to execute the network stack
823  * in softint (so called softint-based if_input). It utilizes
824  * softint and percpu ifqueue. It doesn't distribute any packets
825  * between CPUs, unlike pktqueue(9).
826  *
827  * Currently we support two options for device drivers to apply the framework:
828  * - Use it implicitly with less changes
829  *   - If you use if_attach in driver's _attach function and if_input in
830  *     driver's Rx interrupt handler, a packet is queued and a softint handles
831  *     the packet implicitly
832  * - Use it explicitly in each driver (recommended)
833  *   - You can use if_percpuq_* directly in your driver
834  *   - In this case, you need to allocate struct if_percpuq in driver's softc
835  *   - See wm(4) as a reference implementation
836  */
837 
838 static void
if_percpuq_softint(void * arg)839 if_percpuq_softint(void *arg)
840 {
841           struct if_percpuq *ipq = arg;
842           struct ifnet *ifp = ipq->ipq_ifp;
843           struct mbuf *m;
844 
845           while ((m = if_percpuq_dequeue(ipq)) != NULL) {
846                     if_statinc(ifp, if_ipackets);
847                     bpf_mtap(ifp, m, BPF_D_IN);
848 
849                     ifp->_if_input(ifp, m);
850           }
851 }
852 
853 static void
if_percpuq_init_ifq(void * p,void * arg __unused,struct cpu_info * ci __unused)854 if_percpuq_init_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused)
855 {
856           struct ifqueue *const ifq = p;
857 
858           memset(ifq, 0, sizeof(*ifq));
859           ifq->ifq_maxlen = IFQ_MAXLEN;
860 }
861 
862 struct if_percpuq *
if_percpuq_create(struct ifnet * ifp)863 if_percpuq_create(struct ifnet *ifp)
864 {
865           struct if_percpuq *ipq;
866           u_int flags = SOFTINT_NET;
867 
868           flags |= if_is_mpsafe(ifp) ? SOFTINT_MPSAFE : 0;
869 
870           ipq = kmem_zalloc(sizeof(*ipq), KM_SLEEP);
871           ipq->ipq_ifp = ifp;
872           ipq->ipq_si = softint_establish(flags, if_percpuq_softint, ipq);
873           ipq->ipq_ifqs = percpu_alloc(sizeof(struct ifqueue));
874           percpu_foreach(ipq->ipq_ifqs, &if_percpuq_init_ifq, NULL);
875 
876           sysctl_percpuq_setup(&ifp->if_sysctl_log, ifp->if_xname, ipq);
877 
878           return ipq;
879 }
880 
881 static struct mbuf *
if_percpuq_dequeue(struct if_percpuq * ipq)882 if_percpuq_dequeue(struct if_percpuq *ipq)
883 {
884           struct mbuf *m;
885           struct ifqueue *ifq;
886 
887           const int s = splnet();
888           ifq = percpu_getref(ipq->ipq_ifqs);
889           IF_DEQUEUE(ifq, m);
890           percpu_putref(ipq->ipq_ifqs);
891           splx(s);
892 
893           return m;
894 }
895 
896 static void
if_percpuq_purge_ifq(void * p,void * arg __unused,struct cpu_info * ci __unused)897 if_percpuq_purge_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused)
898 {
899           struct ifqueue *const ifq = p;
900 
901           IF_PURGE(ifq);
902 }
903 
904 void
if_percpuq_destroy(struct if_percpuq * ipq)905 if_percpuq_destroy(struct if_percpuq *ipq)
906 {
907 
908           /* if_detach may already destroy it */
909           if (ipq == NULL)
910                     return;
911 
912           softint_disestablish(ipq->ipq_si);
913           percpu_foreach(ipq->ipq_ifqs, &if_percpuq_purge_ifq, NULL);
914           percpu_free(ipq->ipq_ifqs, sizeof(struct ifqueue));
915           kmem_free(ipq, sizeof(*ipq));
916 }
917 
918 void
if_percpuq_enqueue(struct if_percpuq * ipq,struct mbuf * m)919 if_percpuq_enqueue(struct if_percpuq *ipq, struct mbuf *m)
920 {
921           struct ifqueue *ifq;
922 
923           KASSERT(ipq != NULL);
924 
925           const int s = splnet();
926           ifq = percpu_getref(ipq->ipq_ifqs);
927           if (IF_QFULL(ifq)) {
928                     IF_DROP(ifq);
929                     percpu_putref(ipq->ipq_ifqs);
930                     if_statinc(ipq->ipq_ifp, if_iqdrops);
931                     m_freem(m);
932                     goto out;
933           }
934           IF_ENQUEUE(ifq, m);
935           percpu_putref(ipq->ipq_ifqs);
936 
937           softint_schedule(ipq->ipq_si);
938 out:
939           splx(s);
940 }
941 
942 static void
if_percpuq_drops(void * p,void * arg,struct cpu_info * ci __unused)943 if_percpuq_drops(void *p, void *arg, struct cpu_info *ci __unused)
944 {
945           struct ifqueue *const ifq = p;
946           uint64_t *sum = arg;
947 
948           *sum += ifq->ifq_drops;
949 }
950 
951 static int
sysctl_percpuq_drops_handler(SYSCTLFN_ARGS)952 sysctl_percpuq_drops_handler(SYSCTLFN_ARGS)
953 {
954           struct sysctlnode node;
955           struct if_percpuq *ipq;
956           uint64_t sum = 0;
957           int error;
958 
959           node = *rnode;
960           ipq = node.sysctl_data;
961 
962           percpu_foreach(ipq->ipq_ifqs, if_percpuq_drops, &sum);
963 
964           node.sysctl_data = &sum;
965           error = sysctl_lookup(SYSCTLFN_CALL(&node));
966           if (error != 0 || newp == NULL)
967                     return error;
968 
969           return 0;
970 }
971 
972 static void
sysctl_percpuq_setup(struct sysctllog ** clog,const char * ifname,struct if_percpuq * ipq)973 sysctl_percpuq_setup(struct sysctllog **clog, const char* ifname,
974     struct if_percpuq *ipq)
975 {
976           const struct sysctlnode *cnode, *rnode;
977 
978           if (sysctl_createv(clog, 0, NULL, &rnode,
979                            CTLFLAG_PERMANENT,
980                            CTLTYPE_NODE, "interfaces",
981                            SYSCTL_DESCR("Per-interface controls"),
982                            NULL, 0, NULL, 0,
983                            CTL_NET, CTL_CREATE, CTL_EOL) != 0)
984                     goto bad;
985 
986           if (sysctl_createv(clog, 0, &rnode, &rnode,
987                            CTLFLAG_PERMANENT,
988                            CTLTYPE_NODE, ifname,
989                            SYSCTL_DESCR("Interface controls"),
990                            NULL, 0, NULL, 0,
991                            CTL_CREATE, CTL_EOL) != 0)
992                     goto bad;
993 
994           if (sysctl_createv(clog, 0, &rnode, &rnode,
995                            CTLFLAG_PERMANENT,
996                            CTLTYPE_NODE, "rcvq",
997                            SYSCTL_DESCR("Interface input queue controls"),
998                            NULL, 0, NULL, 0,
999                            CTL_CREATE, CTL_EOL) != 0)
1000                     goto bad;
1001 
1002 #ifdef NOTYET
1003           /* XXX Should show each per-CPU queue length? */
1004           if (sysctl_createv(clog, 0, &rnode, &rnode,
1005                            CTLFLAG_PERMANENT,
1006                            CTLTYPE_INT, "len",
1007                            SYSCTL_DESCR("Current input queue length"),
1008                            sysctl_percpuq_len, 0, NULL, 0,
1009                            CTL_CREATE, CTL_EOL) != 0)
1010                     goto bad;
1011 
1012           if (sysctl_createv(clog, 0, &rnode, &cnode,
1013                            CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1014                            CTLTYPE_INT, "maxlen",
1015                            SYSCTL_DESCR("Maximum allowed input queue length"),
1016                            sysctl_percpuq_maxlen_handler, 0, (void *)ipq, 0,
1017                            CTL_CREATE, CTL_EOL) != 0)
1018                     goto bad;
1019 #endif
1020 
1021           if (sysctl_createv(clog, 0, &rnode, &cnode,
1022                            CTLFLAG_PERMANENT,
1023                            CTLTYPE_QUAD, "drops",
1024                            SYSCTL_DESCR("Total packets dropped due to full input queue"),
1025                            sysctl_percpuq_drops_handler, 0, (void *)ipq, 0,
1026                            CTL_CREATE, CTL_EOL) != 0)
1027                     goto bad;
1028 
1029           return;
1030 bad:
1031           printf("%s: could not attach sysctl nodes\n", ifname);
1032           return;
1033 }
1034 
1035 /*
1036  * The deferred if_start framework
1037  *
1038  * The common APIs to defer if_start to softint when if_start is requested
1039  * from a device driver running in hardware interrupt context.
1040  */
1041 /*
1042  * Call ifp->if_start (or equivalent) in a dedicated softint for
1043  * deferred if_start.
1044  */
1045 static void
if_deferred_start_softint(void * arg)1046 if_deferred_start_softint(void *arg)
1047 {
1048           struct if_deferred_start *ids = arg;
1049           struct ifnet *ifp = ids->ids_ifp;
1050 
1051           ids->ids_if_start(ifp);
1052 }
1053 
1054 /*
1055  * The default callback function for deferred if_start.
1056  */
1057 static void
if_deferred_start_common(struct ifnet * ifp)1058 if_deferred_start_common(struct ifnet *ifp)
1059 {
1060           const int s = splnet();
1061           if_start_lock(ifp);
1062           splx(s);
1063 }
1064 
1065 static inline bool
if_snd_is_used(struct ifnet * ifp)1066 if_snd_is_used(struct ifnet *ifp)
1067 {
1068 
1069           return ALTQ_IS_ENABLED(&ifp->if_snd) ||
1070               ifp->if_transmit == if_transmit ||
1071               ifp->if_transmit == NULL ||
1072               ifp->if_transmit == if_nulltransmit;
1073 }
1074 
1075 /*
1076  * Schedule deferred if_start.
1077  */
1078 void
if_schedule_deferred_start(struct ifnet * ifp)1079 if_schedule_deferred_start(struct ifnet *ifp)
1080 {
1081 
1082           KASSERT(ifp->if_deferred_start != NULL);
1083 
1084           if (if_snd_is_used(ifp) && IFQ_IS_EMPTY(&ifp->if_snd))
1085                     return;
1086 
1087           softint_schedule(ifp->if_deferred_start->ids_si);
1088 }
1089 
1090 /*
1091  * Create an instance of deferred if_start. A driver should call the function
1092  * only if the driver needs deferred if_start. Drivers can setup their own
1093  * deferred if_start function via 2nd argument.
1094  */
1095 void
if_deferred_start_init(struct ifnet * ifp,void (* func)(struct ifnet *))1096 if_deferred_start_init(struct ifnet *ifp, void (*func)(struct ifnet *))
1097 {
1098           struct if_deferred_start *ids;
1099           u_int flags = SOFTINT_NET;
1100 
1101           flags |= if_is_mpsafe(ifp) ? SOFTINT_MPSAFE : 0;
1102 
1103           ids = kmem_zalloc(sizeof(*ids), KM_SLEEP);
1104           ids->ids_ifp = ifp;
1105           ids->ids_si = softint_establish(flags, if_deferred_start_softint, ids);
1106           if (func != NULL)
1107                     ids->ids_if_start = func;
1108           else
1109                     ids->ids_if_start = if_deferred_start_common;
1110 
1111           ifp->if_deferred_start = ids;
1112 }
1113 
1114 static void
if_deferred_start_destroy(struct ifnet * ifp)1115 if_deferred_start_destroy(struct ifnet *ifp)
1116 {
1117 
1118           if (ifp->if_deferred_start == NULL)
1119                     return;
1120 
1121           softint_disestablish(ifp->if_deferred_start->ids_si);
1122           kmem_free(ifp->if_deferred_start, sizeof(*ifp->if_deferred_start));
1123           ifp->if_deferred_start = NULL;
1124 }
1125 
1126 /*
1127  * The common interface input routine that is called by device drivers,
1128  * which should be used only when the driver's rx handler already runs
1129  * in softint.
1130  */
1131 void
if_input(struct ifnet * ifp,struct mbuf * m)1132 if_input(struct ifnet *ifp, struct mbuf *m)
1133 {
1134 
1135           KASSERT(ifp->if_percpuq == NULL);
1136           KASSERT(!cpu_intr_p());
1137 
1138           if_statinc(ifp, if_ipackets);
1139           bpf_mtap(ifp, m, BPF_D_IN);
1140 
1141           ifp->_if_input(ifp, m);
1142 }
1143 
1144 /*
1145  * DEPRECATED. Use if_initialize and if_register instead.
1146  * See the above comment of if_initialize.
1147  *
1148  * Note that it implicitly enables if_percpuq to make drivers easy to
1149  * migrate softint-based if_input without much changes. If you don't
1150  * want to enable it, use if_initialize instead.
1151  */
1152 void
if_attach(ifnet_t * ifp)1153 if_attach(ifnet_t *ifp)
1154 {
1155 
1156           if_initialize(ifp);
1157           ifp->if_percpuq = if_percpuq_create(ifp);
1158           if_register(ifp);
1159 }
1160 
1161 void
if_attachdomain(void)1162 if_attachdomain(void)
1163 {
1164           struct ifnet *ifp;
1165           const int bound = curlwp_bind();
1166 
1167           int s = pserialize_read_enter();
1168           IFNET_READER_FOREACH(ifp) {
1169                     struct psref psref;
1170                     psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class);
1171                     pserialize_read_exit(s);
1172                     if_attachdomain1(ifp);
1173                     s = pserialize_read_enter();
1174                     psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
1175           }
1176           pserialize_read_exit(s);
1177           curlwp_bindx(bound);
1178 }
1179 
1180 static void
if_attachdomain1(struct ifnet * ifp)1181 if_attachdomain1(struct ifnet *ifp)
1182 {
1183           struct domain *dp;
1184           const int s = splsoftnet();
1185 
1186           /* address family dependent data region */
1187           memset(ifp->if_afdata, 0, sizeof(ifp->if_afdata));
1188           DOMAIN_FOREACH(dp) {
1189                     if (dp->dom_ifattach != NULL)
1190                               ifp->if_afdata[dp->dom_family] =
1191                                   (*dp->dom_ifattach)(ifp);
1192           }
1193 
1194           splx(s);
1195 }
1196 
1197 /*
1198  * Deactivate an interface.  This points all of the procedure
1199  * handles at error stubs.  May be called from interrupt context.
1200  */
1201 void
if_deactivate(struct ifnet * ifp)1202 if_deactivate(struct ifnet *ifp)
1203 {
1204           const int s = splsoftnet();
1205 
1206           ifp->if_output       = if_nulloutput;
1207           ifp->_if_input       = if_nullinput;
1208           ifp->if_start        = if_nullstart;
1209           ifp->if_transmit = if_nulltransmit;
1210           ifp->if_ioctl        = if_nullioctl;
1211           ifp->if_init         = if_nullinit;
1212           ifp->if_stop         = if_nullstop;
1213           if (ifp->if_slowtimo)
1214                     ifp->if_slowtimo = if_nullslowtimo;
1215           ifp->if_drain        = if_nulldrain;
1216 
1217           /* No more packets may be enqueued. */
1218           ifp->if_snd.ifq_maxlen = 0;
1219 
1220           splx(s);
1221 }
1222 
1223 bool
if_is_deactivated(const struct ifnet * ifp)1224 if_is_deactivated(const struct ifnet *ifp)
1225 {
1226 
1227           return ifp->if_output == if_nulloutput;
1228 }
1229 
1230 void
if_purgeaddrs(struct ifnet * ifp,int family,void (* purgeaddr)(struct ifaddr *))1231 if_purgeaddrs(struct ifnet *ifp, int family,
1232     void (*purgeaddr)(struct ifaddr *))
1233 {
1234           struct ifaddr *ifa, *nifa;
1235           int s;
1236 
1237           s = pserialize_read_enter();
1238           for (ifa = IFADDR_READER_FIRST(ifp); ifa; ifa = nifa) {
1239                     nifa = IFADDR_READER_NEXT(ifa);
1240                     if (ifa->ifa_addr->sa_family != family)
1241                               continue;
1242                     pserialize_read_exit(s);
1243 
1244                     (*purgeaddr)(ifa);
1245 
1246                     s = pserialize_read_enter();
1247           }
1248           pserialize_read_exit(s);
1249 }
1250 
1251 #ifdef IFAREF_DEBUG
1252 static struct ifaddr **ifa_list;
1253 static int ifa_list_size;
1254 
1255 /* Depends on only one if_attach runs at once */
1256 static void
if_build_ifa_list(struct ifnet * ifp)1257 if_build_ifa_list(struct ifnet *ifp)
1258 {
1259           struct ifaddr *ifa;
1260           int i;
1261 
1262           KASSERT(ifa_list == NULL);
1263           KASSERT(ifa_list_size == 0);
1264 
1265           IFADDR_READER_FOREACH(ifa, ifp)
1266                     ifa_list_size++;
1267 
1268           ifa_list = kmem_alloc(sizeof(*ifa) * ifa_list_size, KM_SLEEP);
1269           i = 0;
1270           IFADDR_READER_FOREACH(ifa, ifp) {
1271                     ifa_list[i++] = ifa;
1272                     ifaref(ifa);
1273           }
1274 }
1275 
1276 static void
if_check_and_free_ifa_list(struct ifnet * ifp)1277 if_check_and_free_ifa_list(struct ifnet *ifp)
1278 {
1279           int i;
1280           struct ifaddr *ifa;
1281 
1282           if (ifa_list == NULL)
1283                     return;
1284 
1285           for (i = 0; i < ifa_list_size; i++) {
1286                     char buf[64];
1287 
1288                     ifa = ifa_list[i];
1289                     sockaddr_format(ifa->ifa_addr, buf, sizeof(buf));
1290                     if (ifa->ifa_refcnt > 1) {
1291                               log(LOG_WARNING,
1292                                   "ifa(%s) still referenced (refcnt=%d)\n",
1293                                   buf, ifa->ifa_refcnt - 1);
1294                     } else
1295                               log(LOG_DEBUG,
1296                                   "ifa(%s) not referenced (refcnt=%d)\n",
1297                                   buf, ifa->ifa_refcnt - 1);
1298                     ifafree(ifa);
1299           }
1300 
1301           kmem_free(ifa_list, sizeof(*ifa) * ifa_list_size);
1302           ifa_list = NULL;
1303           ifa_list_size = 0;
1304 }
1305 #endif
1306 
1307 /*
1308  * Detach an interface from the list of "active" interfaces,
1309  * freeing any resources as we go along.
1310  *
1311  * NOTE: This routine must be called with a valid thread context,
1312  * as it may block.
1313  */
1314 void
if_detach(struct ifnet * ifp)1315 if_detach(struct ifnet *ifp)
1316 {
1317           struct socket so;
1318           struct ifaddr *ifa;
1319 #ifdef IFAREF_DEBUG
1320           struct ifaddr *last_ifa = NULL;
1321 #endif
1322           struct domain *dp;
1323           const struct protosw *pr;
1324           int i, family, purged;
1325 
1326 #ifdef IFAREF_DEBUG
1327           if_build_ifa_list(ifp);
1328 #endif
1329           /*
1330            * XXX It's kind of lame that we have to have the
1331            * XXX socket structure...
1332            */
1333           memset(&so, 0, sizeof(so));
1334 
1335           const int s = splnet();
1336 
1337           sysctl_teardown(&ifp->if_sysctl_log);
1338 
1339           IFNET_LOCK(ifp);
1340 
1341           /*
1342            * Unset all queued link states and pretend a
1343            * link state change is scheduled.
1344            * This stops any more link state changes occurring for this
1345            * interface while it's being detached so it's safe
1346            * to drain the workqueue.
1347            */
1348           IF_LINK_STATE_CHANGE_LOCK(ifp);
1349           ifp->if_link_queue = -1; /* all bits set, see link_state_change() */
1350           ifp->if_link_scheduled = true;
1351           IF_LINK_STATE_CHANGE_UNLOCK(ifp);
1352           workqueue_wait(ifnet_link_state_wq, &ifp->if_link_work);
1353 
1354           if_deactivate(ifp);
1355           IFNET_UNLOCK(ifp);
1356 
1357           /*
1358            * Unlink from the list and wait for all readers to leave
1359            * from pserialize read sections.  Note that we can't do
1360            * psref_target_destroy here.  See below.
1361            */
1362           IFNET_GLOBAL_LOCK();
1363           ifindex2ifnet[ifp->if_index] = NULL;
1364           TAILQ_REMOVE(&ifnet_list, ifp, if_list);
1365           IFNET_WRITER_REMOVE(ifp);
1366           pserialize_perform(ifnet_psz);
1367           IFNET_GLOBAL_UNLOCK();
1368 
1369           if (ifp->if_slowtimo != NULL) {
1370                     struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
1371 
1372                     mutex_enter(&isd->isd_lock);
1373                     isd->isd_dying = true;
1374                     mutex_exit(&isd->isd_lock);
1375                     callout_halt(&isd->isd_ch, NULL);
1376                     workqueue_wait(if_slowtimo_wq, &isd->isd_work);
1377                     callout_destroy(&isd->isd_ch);
1378                     mutex_destroy(&isd->isd_lock);
1379                     kmem_free(isd, sizeof(*isd));
1380 
1381                     ifp->if_slowtimo_data = NULL; /* paraonia */
1382                     ifp->if_slowtimo = NULL;      /* paranoia */
1383           }
1384           if_deferred_start_destroy(ifp);
1385 
1386           /*
1387            * Do an if_down() to give protocols a chance to do something.
1388            */
1389           if_down_deactivated(ifp);
1390 
1391 #ifdef ALTQ
1392           if (ALTQ_IS_ENABLED(&ifp->if_snd))
1393                     altq_disable(&ifp->if_snd);
1394           if (ALTQ_IS_ATTACHED(&ifp->if_snd))
1395                     altq_detach(&ifp->if_snd);
1396 #endif
1397 
1398 #if NCARP > 0
1399           /* Remove the interface from any carp group it is a part of.  */
1400           if (ifp->if_carp != NULL && ifp->if_type != IFT_CARP)
1401                     carp_ifdetach(ifp);
1402 #endif
1403 
1404           /*
1405            * Ensure that all packets on protocol input pktqueues have been
1406            * processed, or, at least, removed from the queues.
1407            *
1408            * A cross-call will ensure that the interrupts have completed.
1409            * FIXME: not quite..
1410            */
1411           pktq_ifdetach();
1412           xc_barrier(0);
1413 
1414           /*
1415            * Rip all the addresses off the interface.  This should make
1416            * all of the routes go away.
1417            *
1418            * pr_usrreq calls can remove an arbitrary number of ifaddrs
1419            * from the list, including our "cursor", ifa.  For safety,
1420            * and to honor the TAILQ abstraction, I just restart the
1421            * loop after each removal.  Note that the loop will exit
1422            * when all of the remaining ifaddrs belong to the AF_LINK
1423            * family.  I am counting on the historical fact that at
1424            * least one pr_usrreq in each address domain removes at
1425            * least one ifaddr.
1426            */
1427 again:
1428           /*
1429            * At this point, no other one tries to remove ifa in the list,
1430            * so we don't need to take a lock or psref.  Avoid using
1431            * IFADDR_READER_FOREACH to pass over an inspection of contract
1432            * violations of pserialize.
1433            */
1434           IFADDR_WRITER_FOREACH(ifa, ifp) {
1435                     family = ifa->ifa_addr->sa_family;
1436 #ifdef IFAREF_DEBUG
1437                     printf("if_detach: ifaddr %p, family %d, refcnt %d\n",
1438                         ifa, family, ifa->ifa_refcnt);
1439                     if (last_ifa != NULL && ifa == last_ifa)
1440                               panic("if_detach: loop detected");
1441                     last_ifa = ifa;
1442 #endif
1443                     if (family == AF_LINK)
1444                               continue;
1445                     dp = pffinddomain(family);
1446                     KASSERTMSG(dp != NULL, "no domain for AF %d", family);
1447                     /*
1448                      * XXX These PURGEIF calls are redundant with the
1449                      * purge-all-families calls below, but are left in for
1450                      * now both to make a smaller change, and to avoid
1451                      * unplanned interactions with clearing of
1452                      * ifp->if_addrlist.
1453                      */
1454                     purged = 0;
1455                     for (pr = dp->dom_protosw;
1456                          pr < dp->dom_protoswNPROTOSW; pr++) {
1457                               so.so_proto = pr;
1458                               if (pr->pr_usrreqs) {
1459                                         (void) (*pr->pr_usrreqs->pr_purgeif)(&so, ifp);
1460                                         purged = 1;
1461                               }
1462                     }
1463                     if (purged == 0) {
1464                               /*
1465                                * XXX What's really the best thing to do
1466                                * XXX here?  --thorpej@NetBSD.org
1467                                */
1468                               printf("if_detach: WARNING: AF %d not purged\n",
1469                                   family);
1470                               ifa_remove(ifp, ifa);
1471                     }
1472                     goto again;
1473           }
1474 
1475           if_free_sadl(ifp, 1);
1476 
1477 restart:
1478           IFADDR_WRITER_FOREACH(ifa, ifp) {
1479                     family = ifa->ifa_addr->sa_family;
1480                     KASSERT(family == AF_LINK);
1481                     ifa_remove(ifp, ifa);
1482                     goto restart;
1483           }
1484 
1485           /* Delete stray routes from the routing table. */
1486           for (i = 0; i <= AF_MAX; i++)
1487                     rt_delete_matched_entries(i, if_delroute_matcher, ifp, false);
1488 
1489           DOMAIN_FOREACH(dp) {
1490                     if (dp->dom_ifdetach != NULL && ifp->if_afdata[dp->dom_family])
1491                     {
1492                               void *p = ifp->if_afdata[dp->dom_family];
1493                               if (p) {
1494                                         ifp->if_afdata[dp->dom_family] = NULL;
1495                                         (*dp->dom_ifdetach)(ifp, p);
1496                               }
1497                     }
1498 
1499                     /*
1500                      * One would expect multicast memberships (INET and
1501                      * INET6) on UDP sockets to be purged by the PURGEIF
1502                      * calls above, but if all addresses were removed from
1503                      * the interface prior to destruction, the calls will
1504                      * not be made (e.g. ppp, for which pppd(8) generally
1505                      * removes addresses before destroying the interface).
1506                      * Because there is no invariant that multicast
1507                      * memberships only exist for interfaces with IPv4
1508                      * addresses, we must call PURGEIF regardless of
1509                      * addresses.  (Protocols which might store ifnet
1510                      * pointers are marked with PR_PURGEIF.)
1511                      */
1512                     for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
1513                     {
1514                               so.so_proto = pr;
1515                               if (pr->pr_usrreqs && pr->pr_flags & PR_PURGEIF)
1516                                         (void)(*pr->pr_usrreqs->pr_purgeif)(&so, ifp);
1517                     }
1518           }
1519 
1520           /*
1521            * Must be done after the above pr_purgeif because if_psref may be
1522            * still used in pr_purgeif.
1523            */
1524           psref_target_destroy(&ifp->if_psref, ifnet_psref_class);
1525           PSLIST_ENTRY_DESTROY(ifp, if_pslist_entry);
1526 
1527           pfil_run_ifhooks(if_pfil, PFIL_IFNET_DETACH, ifp);
1528           (void)pfil_head_destroy(ifp->if_pfil);
1529 
1530           /* Announce that the interface is gone. */
1531           rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1532 
1533           IF_AFDATA_LOCK_DESTROY(ifp);
1534 
1535           if (ifp->if_percpuq != NULL) {
1536                     if_percpuq_destroy(ifp->if_percpuq);
1537                     ifp->if_percpuq = NULL;
1538           }
1539 
1540           mutex_obj_free(ifp->if_ioctl_lock);
1541           ifp->if_ioctl_lock = NULL;
1542           mutex_obj_free(ifp->if_snd.ifq_lock);
1543           if_stats_fini(ifp);
1544           KASSERT(!simplehook_has_hooks(ifp->if_linkstate_hooks));
1545           simplehook_destroy(ifp->if_linkstate_hooks);
1546 
1547           splx(s);
1548 
1549 #ifdef IFAREF_DEBUG
1550           if_check_and_free_ifa_list(ifp);
1551 #endif
1552 }
1553 
1554 /*
1555  * Callback for a radix tree walk to delete all references to an
1556  * ifnet.
1557  */
1558 static int
if_delroute_matcher(struct rtentry * rt,void * v)1559 if_delroute_matcher(struct rtentry *rt, void *v)
1560 {
1561           struct ifnet *ifp = (struct ifnet *)v;
1562 
1563           if (rt->rt_ifp == ifp)
1564                     return 1;
1565           else
1566                     return 0;
1567 }
1568 
1569 /*
1570  * Create a clone network interface.
1571  */
1572 static int
if_clone_create(const char * name)1573 if_clone_create(const char *name)
1574 {
1575           struct if_clone *ifc;
1576           struct ifnet *ifp;
1577           struct psref psref;
1578           int unit;
1579 
1580           KASSERT(mutex_owned(&if_clone_mtx));
1581 
1582           ifc = if_clone_lookup(name, &unit);
1583           if (ifc == NULL)
1584                     return EINVAL;
1585 
1586           ifp = if_get(name, &psref);
1587           if (ifp != NULL) {
1588                     if_put(ifp, &psref);
1589                     return EEXIST;
1590           }
1591 
1592           return (*ifc->ifc_create)(ifc, unit);
1593 }
1594 
1595 /*
1596  * Destroy a clone network interface.
1597  */
1598 static int
if_clone_destroy(const char * name)1599 if_clone_destroy(const char *name)
1600 {
1601           struct if_clone *ifc;
1602           struct ifnet *ifp;
1603           struct psref psref;
1604           int error;
1605           int (*if_ioctlfn)(struct ifnet *, u_long, void *);
1606 
1607           KASSERT(mutex_owned(&if_clone_mtx));
1608 
1609           ifc = if_clone_lookup(name, NULL);
1610           if (ifc == NULL)
1611                     return EINVAL;
1612 
1613           if (ifc->ifc_destroy == NULL)
1614                     return EOPNOTSUPP;
1615 
1616           ifp = if_get(name, &psref);
1617           if (ifp == NULL)
1618                     return ENXIO;
1619 
1620           /* We have to disable ioctls here */
1621           IFNET_LOCK(ifp);
1622           if_ioctlfn = ifp->if_ioctl;
1623           ifp->if_ioctl = if_nullioctl;
1624           IFNET_UNLOCK(ifp);
1625 
1626           /*
1627            * We cannot call ifc_destroy with holding ifp.
1628            * Releasing ifp here is safe thanks to if_clone_mtx.
1629            */
1630           if_put(ifp, &psref);
1631 
1632           error = (*ifc->ifc_destroy)(ifp);
1633 
1634           if (error != 0) {
1635                     /* We have to restore if_ioctl on error */
1636                     IFNET_LOCK(ifp);
1637                     ifp->if_ioctl = if_ioctlfn;
1638                     IFNET_UNLOCK(ifp);
1639           }
1640 
1641           return error;
1642 }
1643 
1644 static bool
if_is_unit(const char * name)1645 if_is_unit(const char *name)
1646 {
1647 
1648           while (*name != '\0') {
1649                     if (*name < '0' || *name > '9')
1650                               return false;
1651                     name++;
1652           }
1653 
1654           return true;
1655 }
1656 
1657 /*
1658  * Look up a network interface cloner.
1659  */
1660 static struct if_clone *
if_clone_lookup(const char * name,int * unitp)1661 if_clone_lookup(const char *name, int *unitp)
1662 {
1663           struct if_clone *ifc;
1664           const char *cp;
1665           char *dp, ifname[IFNAMSIZ + 3];
1666           int unit;
1667 
1668           KASSERT(mutex_owned(&if_clone_mtx));
1669 
1670           strcpy(ifname, "if_");
1671           /* separate interface name from unit */
1672           /* TODO: search unit number from backward */
1673           for (dp = ifname + 3, cp = name; cp - name < IFNAMSIZ &&
1674               *cp && !if_is_unit(cp);)
1675                     *dp++ = *cp++;
1676 
1677           if (cp == name || cp - name == IFNAMSIZ || !*cp)
1678                     return NULL;        /* No name or unit number */
1679           *dp++ = '\0';
1680 
1681 again:
1682           LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1683                     if (strcmp(ifname + 3, ifc->ifc_name) == 0)
1684                               break;
1685           }
1686 
1687           if (ifc == NULL) {
1688                     int error;
1689                     if (*ifname == '\0')
1690                               return NULL;
1691                     mutex_exit(&if_clone_mtx);
1692                     error = module_autoload(ifname, MODULE_CLASS_DRIVER);
1693                     mutex_enter(&if_clone_mtx);
1694                     if (error)
1695                               return NULL;
1696                     *ifname = '\0';
1697                     goto again;
1698           }
1699 
1700           unit = 0;
1701           while (cp - name < IFNAMSIZ && *cp) {
1702                     if (*cp < '0' || *cp > '9' || unit >= INT_MAX / 10) {
1703                               /* Bogus unit number. */
1704                               return NULL;
1705                     }
1706                     unit = (unit * 10) + (*cp++ - '0');
1707           }
1708 
1709           if (unitp != NULL)
1710                     *unitp = unit;
1711           return ifc;
1712 }
1713 
1714 /*
1715  * Register a network interface cloner.
1716  */
1717 void
if_clone_attach(struct if_clone * ifc)1718 if_clone_attach(struct if_clone *ifc)
1719 {
1720 
1721           mutex_enter(&if_clone_mtx);
1722           LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1723           if_cloners_count++;
1724           mutex_exit(&if_clone_mtx);
1725 }
1726 
1727 /*
1728  * Unregister a network interface cloner.
1729  */
1730 void
if_clone_detach(struct if_clone * ifc)1731 if_clone_detach(struct if_clone *ifc)
1732 {
1733 
1734           mutex_enter(&if_clone_mtx);
1735           LIST_REMOVE(ifc, ifc_list);
1736           if_cloners_count--;
1737           mutex_exit(&if_clone_mtx);
1738 }
1739 
1740 /*
1741  * Provide list of interface cloners to userspace.
1742  */
1743 int
if_clone_list(int buf_count,char * buffer,int * total)1744 if_clone_list(int buf_count, char *buffer, int *total)
1745 {
1746           char outbuf[IFNAMSIZ], *dst;
1747           struct if_clone *ifc;
1748           int count, error = 0;
1749 
1750           mutex_enter(&if_clone_mtx);
1751           *total = if_cloners_count;
1752           if ((dst = buffer) == NULL) {
1753                     /* Just asking how many there are. */
1754                     goto out;
1755           }
1756 
1757           if (buf_count < 0) {
1758                     error = EINVAL;
1759                     goto out;
1760           }
1761 
1762           count = (if_cloners_count < buf_count) ? if_cloners_count : buf_count;
1763 
1764           for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0;
1765                ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) {
1766                     (void)strncpy(outbuf, ifc->ifc_name, sizeof(outbuf));
1767                     if (outbuf[sizeof(outbuf) - 1] != '\0') {
1768                               error = ENAMETOOLONG;
1769                               goto out;
1770                     }
1771                     error = copyout(outbuf, dst, sizeof(outbuf));
1772                     if (error != 0)
1773                               break;
1774           }
1775 
1776 out:
1777           mutex_exit(&if_clone_mtx);
1778           return error;
1779 }
1780 
1781 void
ifa_psref_init(struct ifaddr * ifa)1782 ifa_psref_init(struct ifaddr *ifa)
1783 {
1784 
1785           psref_target_init(&ifa->ifa_psref, ifa_psref_class);
1786 }
1787 
1788 void
ifaref(struct ifaddr * ifa)1789 ifaref(struct ifaddr *ifa)
1790 {
1791 
1792           atomic_inc_uint(&ifa->ifa_refcnt);
1793 }
1794 
1795 void
ifafree(struct ifaddr * ifa)1796 ifafree(struct ifaddr *ifa)
1797 {
1798           KASSERT(ifa != NULL);
1799           KASSERTMSG(ifa->ifa_refcnt > 0, "ifa_refcnt=%d", ifa->ifa_refcnt);
1800 
1801           membar_release();
1802           if (atomic_dec_uint_nv(&ifa->ifa_refcnt) != 0)
1803                     return;
1804           membar_acquire();
1805           free(ifa, M_IFADDR);
1806 }
1807 
1808 bool
ifa_is_destroying(struct ifaddr * ifa)1809 ifa_is_destroying(struct ifaddr *ifa)
1810 {
1811 
1812           return ISSET(ifa->ifa_flags, IFA_DESTROYING);
1813 }
1814 
1815 void
ifa_insert(struct ifnet * ifp,struct ifaddr * ifa)1816 ifa_insert(struct ifnet *ifp, struct ifaddr *ifa)
1817 {
1818 
1819           ifa->ifa_ifp = ifp;
1820 
1821           /*
1822            * Check MP-safety for IFEF_MPSAFE drivers.
1823            * Check !IFF_RUNNING for initialization routines that normally don't
1824            * take IFNET_LOCK but it's safe because there is no competitor.
1825            * XXX there are false positive cases because IFF_RUNNING can be off on
1826            * if_stop.
1827            */
1828           KASSERT(!if_is_mpsafe(ifp) || !ISSET(ifp->if_flags, IFF_RUNNING) ||
1829               IFNET_LOCKED(ifp));
1830 
1831           TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
1832           IFADDR_ENTRY_INIT(ifa);
1833           IFADDR_WRITER_INSERT_TAIL(ifp, ifa);
1834 
1835           ifaref(ifa);
1836 }
1837 
1838 void
ifa_remove(struct ifnet * ifp,struct ifaddr * ifa)1839 ifa_remove(struct ifnet *ifp, struct ifaddr *ifa)
1840 {
1841 
1842           KASSERT(ifa->ifa_ifp == ifp);
1843           /*
1844            * Check MP-safety for IFEF_MPSAFE drivers.
1845            * if_is_deactivated indicates ifa_remove is called from if_detach
1846            * where it is safe even if IFNET_LOCK isn't held.
1847            */
1848           KASSERT(!if_is_mpsafe(ifp) || if_is_deactivated(ifp) ||
1849               IFNET_LOCKED(ifp));
1850 
1851           TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
1852           IFADDR_WRITER_REMOVE(ifa);
1853 #ifdef NET_MPSAFE
1854           IFNET_GLOBAL_LOCK();
1855           pserialize_perform(ifnet_psz);
1856           IFNET_GLOBAL_UNLOCK();
1857 #endif
1858 
1859 #ifdef NET_MPSAFE
1860           psref_target_destroy(&ifa->ifa_psref, ifa_psref_class);
1861 #endif
1862           IFADDR_ENTRY_DESTROY(ifa);
1863           ifafree(ifa);
1864 }
1865 
1866 void
ifa_acquire(struct ifaddr * ifa,struct psref * psref)1867 ifa_acquire(struct ifaddr *ifa, struct psref *psref)
1868 {
1869 
1870           PSREF_DEBUG_FILL_RETURN_ADDRESS(psref);
1871           psref_acquire(psref, &ifa->ifa_psref, ifa_psref_class);
1872 }
1873 
1874 void
ifa_release(struct ifaddr * ifa,struct psref * psref)1875 ifa_release(struct ifaddr *ifa, struct psref *psref)
1876 {
1877 
1878           if (ifa == NULL)
1879                     return;
1880 
1881           psref_release(psref, &ifa->ifa_psref, ifa_psref_class);
1882 }
1883 
1884 bool
ifa_held(struct ifaddr * ifa)1885 ifa_held(struct ifaddr *ifa)
1886 {
1887 
1888           return psref_held(&ifa->ifa_psref, ifa_psref_class);
1889 }
1890 
1891 static inline int
equal(const struct sockaddr * sa1,const struct sockaddr * sa2)1892 equal(const struct sockaddr *sa1, const struct sockaddr *sa2)
1893 {
1894 
1895           return sockaddr_cmp(sa1, sa2) == 0;
1896 }
1897 
1898 /*
1899  * Locate an interface based on a complete address.
1900  */
1901 /*ARGSUSED*/
1902 struct ifaddr *
ifa_ifwithaddr(const struct sockaddr * addr)1903 ifa_ifwithaddr(const struct sockaddr *addr)
1904 {
1905           struct ifnet *ifp;
1906           struct ifaddr *ifa;
1907 
1908           IFNET_READER_FOREACH(ifp) {
1909                     if (if_is_deactivated(ifp))
1910                               continue;
1911                     IFADDR_READER_FOREACH(ifa, ifp) {
1912                               if (ifa->ifa_addr->sa_family != addr->sa_family)
1913                                         continue;
1914                               if (equal(addr, ifa->ifa_addr))
1915                                         return ifa;
1916                               if ((ifp->if_flags & IFF_BROADCAST) &&
1917                                   ifa->ifa_broadaddr &&
1918                                   /* IP6 doesn't have broadcast */
1919                                   ifa->ifa_broadaddr->sa_len != 0 &&
1920                                   equal(ifa->ifa_broadaddr, addr))
1921                                         return ifa;
1922                     }
1923           }
1924           return NULL;
1925 }
1926 
1927 struct ifaddr *
ifa_ifwithaddr_psref(const struct sockaddr * addr,struct psref * psref)1928 ifa_ifwithaddr_psref(const struct sockaddr *addr, struct psref *psref)
1929 {
1930           struct ifaddr *ifa;
1931           int s = pserialize_read_enter();
1932 
1933           ifa = ifa_ifwithaddr(addr);
1934           if (ifa != NULL)
1935                     ifa_acquire(ifa, psref);
1936           pserialize_read_exit(s);
1937 
1938           return ifa;
1939 }
1940 
1941 /*
1942  * Locate the point to point interface with a given destination address.
1943  */
1944 /*ARGSUSED*/
1945 struct ifaddr *
ifa_ifwithdstaddr(const struct sockaddr * addr)1946 ifa_ifwithdstaddr(const struct sockaddr *addr)
1947 {
1948           struct ifnet *ifp;
1949           struct ifaddr *ifa;
1950 
1951           IFNET_READER_FOREACH(ifp) {
1952                     if (if_is_deactivated(ifp))
1953                               continue;
1954                     if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1955                               continue;
1956                     IFADDR_READER_FOREACH(ifa, ifp) {
1957                               if (ifa->ifa_addr->sa_family != addr->sa_family ||
1958                                   ifa->ifa_dstaddr == NULL)
1959                                         continue;
1960                               if (equal(addr, ifa->ifa_dstaddr))
1961                                         return ifa;
1962                     }
1963           }
1964 
1965           return NULL;
1966 }
1967 
1968 struct ifaddr *
ifa_ifwithdstaddr_psref(const struct sockaddr * addr,struct psref * psref)1969 ifa_ifwithdstaddr_psref(const struct sockaddr *addr, struct psref *psref)
1970 {
1971           struct ifaddr *ifa;
1972           int s;
1973 
1974           s = pserialize_read_enter();
1975           ifa = ifa_ifwithdstaddr(addr);
1976           if (ifa != NULL)
1977                     ifa_acquire(ifa, psref);
1978           pserialize_read_exit(s);
1979 
1980           return ifa;
1981 }
1982 
1983 /*
1984  * Find an interface on a specific network.  If many, choice
1985  * is most specific found.
1986  */
1987 struct ifaddr *
ifa_ifwithnet(const struct sockaddr * addr)1988 ifa_ifwithnet(const struct sockaddr *addr)
1989 {
1990           struct ifnet *ifp;
1991           struct ifaddr *ifa, *ifa_maybe = NULL;
1992           const struct sockaddr_dl *sdl;
1993           u_int af = addr->sa_family;
1994           const char *addr_data = addr->sa_data, *cplim;
1995 
1996           if (af == AF_LINK) {
1997                     sdl = satocsdl(addr);
1998                     if (sdl->sdl_index && sdl->sdl_index < if_indexlim &&
1999                         ifindex2ifnet[sdl->sdl_index] &&
2000                         !if_is_deactivated(ifindex2ifnet[sdl->sdl_index])) {
2001                               return ifindex2ifnet[sdl->sdl_index]->if_dl;
2002                     }
2003           }
2004 #ifdef NETATALK
2005           if (af == AF_APPLETALK) {
2006                     const struct sockaddr_at *sat, *sat2;
2007                     sat = (const struct sockaddr_at *)addr;
2008                     IFNET_READER_FOREACH(ifp) {
2009                               if (if_is_deactivated(ifp))
2010                                         continue;
2011                               ifa = at_ifawithnet((const struct sockaddr_at *)addr,
2012                                   ifp);
2013                               if (ifa == NULL)
2014                                         continue;
2015                               sat2 = (struct sockaddr_at *)ifa->ifa_addr;
2016                               if (sat2->sat_addr.s_net == sat->sat_addr.s_net)
2017                                         return ifa; /* exact match */
2018                               if (ifa_maybe == NULL) {
2019                                         /* else keep the if with the right range */
2020                                         ifa_maybe = ifa;
2021                               }
2022                     }
2023                     return ifa_maybe;
2024           }
2025 #endif
2026           IFNET_READER_FOREACH(ifp) {
2027                     if (if_is_deactivated(ifp))
2028                               continue;
2029                     IFADDR_READER_FOREACH(ifa, ifp) {
2030                               const char *cp, *cp2, *cp3;
2031 
2032                               if (ifa->ifa_addr->sa_family != af ||
2033                                   ifa->ifa_netmask == NULL)
2034  next:                                  continue;
2035                               cp = addr_data;
2036                               cp2 = ifa->ifa_addr->sa_data;
2037                               cp3 = ifa->ifa_netmask->sa_data;
2038                               cplim = (const char *)ifa->ifa_netmask +
2039                                   ifa->ifa_netmask->sa_len;
2040                               while (cp3 < cplim) {
2041                                         if ((*cp++ ^ *cp2++) & *cp3++) {
2042                                                   /* want to continue for() loop */
2043                                                   goto next;
2044                                         }
2045                               }
2046                               if (ifa_maybe == NULL ||
2047                                   rt_refines(ifa->ifa_netmask,
2048                                                ifa_maybe->ifa_netmask))
2049                                         ifa_maybe = ifa;
2050                     }
2051           }
2052           return ifa_maybe;
2053 }
2054 
2055 struct ifaddr *
ifa_ifwithnet_psref(const struct sockaddr * addr,struct psref * psref)2056 ifa_ifwithnet_psref(const struct sockaddr *addr, struct psref *psref)
2057 {
2058           struct ifaddr *ifa;
2059           int s;
2060 
2061           s = pserialize_read_enter();
2062           ifa = ifa_ifwithnet(addr);
2063           if (ifa != NULL)
2064                     ifa_acquire(ifa, psref);
2065           pserialize_read_exit(s);
2066 
2067           return ifa;
2068 }
2069 
2070 /*
2071  * Find the interface of the address.
2072  */
2073 struct ifaddr *
ifa_ifwithladdr(const struct sockaddr * addr)2074 ifa_ifwithladdr(const struct sockaddr *addr)
2075 {
2076           struct ifaddr *ia;
2077 
2078           if ((ia = ifa_ifwithaddr(addr)) || (ia = ifa_ifwithdstaddr(addr)) ||
2079               (ia = ifa_ifwithnet(addr)))
2080                     return ia;
2081           return NULL;
2082 }
2083 
2084 struct ifaddr *
ifa_ifwithladdr_psref(const struct sockaddr * addr,struct psref * psref)2085 ifa_ifwithladdr_psref(const struct sockaddr *addr, struct psref *psref)
2086 {
2087           struct ifaddr *ifa;
2088           int s;
2089 
2090           s = pserialize_read_enter();
2091           ifa = ifa_ifwithladdr(addr);
2092           if (ifa != NULL)
2093                     ifa_acquire(ifa, psref);
2094           pserialize_read_exit(s);
2095 
2096           return ifa;
2097 }
2098 
2099 /*
2100  * Find an interface using a specific address family
2101  */
2102 struct ifaddr *
ifa_ifwithaf(int af)2103 ifa_ifwithaf(int af)
2104 {
2105           struct ifnet *ifp;
2106           struct ifaddr *ifa = NULL;
2107           int s;
2108 
2109           s = pserialize_read_enter();
2110           IFNET_READER_FOREACH(ifp) {
2111                     if (if_is_deactivated(ifp))
2112                               continue;
2113                     IFADDR_READER_FOREACH(ifa, ifp) {
2114                               if (ifa->ifa_addr->sa_family == af)
2115                                         goto out;
2116                     }
2117           }
2118 out:
2119           pserialize_read_exit(s);
2120           return ifa;
2121 }
2122 
2123 /*
2124  * Find an interface address specific to an interface best matching
2125  * a given address.
2126  */
2127 struct ifaddr *
ifaof_ifpforaddr(const struct sockaddr * addr,struct ifnet * ifp)2128 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
2129 {
2130           struct ifaddr *ifa;
2131           const char *cp, *cp2, *cp3;
2132           const char *cplim;
2133           struct ifaddr *ifa_maybe = 0;
2134           u_int af = addr->sa_family;
2135 
2136           if (if_is_deactivated(ifp))
2137                     return NULL;
2138 
2139           if (af >= AF_MAX)
2140                     return NULL;
2141 
2142           IFADDR_READER_FOREACH(ifa, ifp) {
2143                     if (ifa->ifa_addr->sa_family != af)
2144                               continue;
2145                     ifa_maybe = ifa;
2146                     if (ifa->ifa_netmask == NULL) {
2147                               if (equal(addr, ifa->ifa_addr) ||
2148                                   (ifa->ifa_dstaddr &&
2149                                    equal(addr, ifa->ifa_dstaddr)))
2150                                         return ifa;
2151                               continue;
2152                     }
2153                     cp = addr->sa_data;
2154                     cp2 = ifa->ifa_addr->sa_data;
2155                     cp3 = ifa->ifa_netmask->sa_data;
2156                     cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
2157                     for (; cp3 < cplim; cp3++) {
2158                               if ((*cp++ ^ *cp2++) & *cp3)
2159                                         break;
2160                     }
2161                     if (cp3 == cplim)
2162                               return ifa;
2163           }
2164           return ifa_maybe;
2165 }
2166 
2167 struct ifaddr *
ifaof_ifpforaddr_psref(const struct sockaddr * addr,struct ifnet * ifp,struct psref * psref)2168 ifaof_ifpforaddr_psref(const struct sockaddr *addr, struct ifnet *ifp,
2169     struct psref *psref)
2170 {
2171           struct ifaddr *ifa;
2172           int s;
2173 
2174           s = pserialize_read_enter();
2175           ifa = ifaof_ifpforaddr(addr, ifp);
2176           if (ifa != NULL)
2177                     ifa_acquire(ifa, psref);
2178           pserialize_read_exit(s);
2179 
2180           return ifa;
2181 }
2182 
2183 /*
2184  * Default action when installing a route with a Link Level gateway.
2185  * Lookup an appropriate real ifa to point to.
2186  * This should be moved to /sys/net/link.c eventually.
2187  */
2188 void
link_rtrequest(int cmd,struct rtentry * rt,const struct rt_addrinfo * info)2189 link_rtrequest(int cmd, struct rtentry *rt, const struct rt_addrinfo *info)
2190 {
2191           struct ifaddr *ifa;
2192           const struct sockaddr *dst;
2193           struct ifnet *ifp;
2194           struct psref psref;
2195 
2196           if (cmd != RTM_ADD || ISSET(info->rti_flags, RTF_DONTCHANGEIFA))
2197                     return;
2198           ifp = rt->rt_ifa->ifa_ifp;
2199           dst = rt_getkey(rt);
2200           if ((ifa = ifaof_ifpforaddr_psref(dst, ifp, &psref)) != NULL) {
2201                     rt_replace_ifa(rt, ifa);
2202                     if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
2203                               ifa->ifa_rtrequest(cmd, rt, info);
2204                     ifa_release(ifa, &psref);
2205           }
2206 }
2207 
2208 /*
2209  * bitmask macros to manage a densely packed link_state change queue.
2210  * Because we need to store LINK_STATE_UNKNOWN(0), LINK_STATE_DOWN(1) and
2211  * LINK_STATE_UP(2) we need 2 bits for each state change.
2212  * As a state change to store is 0, treat all bits set as an unset item.
2213  */
2214 #define LQ_ITEM_BITS                    2
2215 #define LQ_ITEM_MASK                    ((1 << LQ_ITEM_BITS) - 1)
2216 #define LQ_MASK(i)            (LQ_ITEM_MASK << (i) * LQ_ITEM_BITS)
2217 #define LINK_STATE_UNSET      LQ_ITEM_MASK
2218 #define LQ_ITEM(q, i)                   (((q) & LQ_MASK((i))) >> (i) * LQ_ITEM_BITS)
2219 #define LQ_STORE(q, i, v)                                                             \
2220           do {                                                                                  \
2221                     (q) &= ~LQ_MASK((i));                                                       \
2222                     (q) |= (v) << (i) * LQ_ITEM_BITS;                                 \
2223           } while (0 /* CONSTCOND */)
2224 #define LQ_MAX(q)             ((sizeof((q)) * NBBY) / LQ_ITEM_BITS)
2225 #define LQ_POP(q, v)                                                                            \
2226           do {                                                                                  \
2227                     (v) = LQ_ITEM((q), 0);                                                      \
2228                     (q) >>= LQ_ITEM_BITS;                                                       \
2229                     (q) |= LINK_STATE_UNSET << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS;  \
2230           } while (0 /* CONSTCOND */)
2231 #define LQ_PUSH(q, v)                                                                           \
2232           do {                                                                                  \
2233                     (q) >>= LQ_ITEM_BITS;                                                       \
2234                     (q) |= (v) << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS;                   \
2235           } while (0 /* CONSTCOND */)
2236 #define LQ_FIND_UNSET(q, i)                                                           \
2237           for ((i) = 0; i < LQ_MAX((q)); (i)++) {                                     \
2238                     if (LQ_ITEM((q), (i)) == LINK_STATE_UNSET)                        \
2239                               break;                                                            \
2240           }
2241 
2242 /*
2243  * Handle a change in the interface link state and
2244  * queue notifications.
2245  */
2246 void
if_link_state_change(struct ifnet * ifp,int link_state)2247 if_link_state_change(struct ifnet *ifp, int link_state)
2248 {
2249           int idx;
2250 
2251           /* Ensure change is to a valid state */
2252           switch (link_state) {
2253           case LINK_STATE_UNKNOWN:      /* FALLTHROUGH */
2254           case LINK_STATE_DOWN:                   /* FALLTHROUGH */
2255           case LINK_STATE_UP:
2256                     break;
2257           default:
2258 #ifdef DEBUG
2259                     printf("%s: invalid link state %d\n",
2260                         ifp->if_xname, link_state);
2261 #endif
2262                     return;
2263           }
2264 
2265           IF_LINK_STATE_CHANGE_LOCK(ifp);
2266 
2267           /* Find the last unset event in the queue. */
2268           LQ_FIND_UNSET(ifp->if_link_queue, idx);
2269 
2270           if (idx == 0) {
2271                     /*
2272                      * There is no queue of link state changes.
2273                      * As we have the lock we can safely compare against the
2274                      * current link state and return if the same.
2275                      * Otherwise, if scheduled is true then the interface is being
2276                      * detached and the queue is being drained so we need
2277                      * to avoid queuing more work.
2278                      */
2279                      if (ifp->if_link_state == link_state ||
2280                          ifp->if_link_scheduled)
2281                               goto out;
2282           } else {
2283                     /* Ensure link_state doesn't match the last queued state. */
2284                     if (LQ_ITEM(ifp->if_link_queue, idx - 1)
2285                         == (uint8_t)link_state)
2286                               goto out;
2287           }
2288 
2289           /* Handle queue overflow. */
2290           if (idx == LQ_MAX(ifp->if_link_queue)) {
2291                     uint8_t lost;
2292 
2293                     /*
2294                      * The DOWN state must be protected from being pushed off
2295                      * the queue to ensure that userland will always be
2296                      * in a sane state.
2297                      * Because DOWN is protected, there is no need to protect
2298                      * UNKNOWN.
2299                      * It should be invalid to change from any other state to
2300                      * UNKNOWN anyway ...
2301                      */
2302                     lost = LQ_ITEM(ifp->if_link_queue, 0);
2303                     LQ_PUSH(ifp->if_link_queue, (uint8_t)link_state);
2304                     if (lost == LINK_STATE_DOWN) {
2305                               lost = LQ_ITEM(ifp->if_link_queue, 0);
2306                               LQ_STORE(ifp->if_link_queue, 0, LINK_STATE_DOWN);
2307                     }
2308                     printf("%s: lost link state change %s\n",
2309                         ifp->if_xname,
2310                         lost == LINK_STATE_UP ? "UP" :
2311                         lost == LINK_STATE_DOWN ? "DOWN" :
2312                         "UNKNOWN");
2313           } else
2314                     LQ_STORE(ifp->if_link_queue, idx, (uint8_t)link_state);
2315 
2316           if (ifp->if_link_scheduled)
2317                     goto out;
2318 
2319           ifp->if_link_scheduled = true;
2320           workqueue_enqueue(ifnet_link_state_wq, &ifp->if_link_work, NULL);
2321 
2322 out:
2323           IF_LINK_STATE_CHANGE_UNLOCK(ifp);
2324 }
2325 
2326 /*
2327  * Handle interface link state change notifications.
2328  */
2329 static void
if_link_state_change_process(struct ifnet * ifp,int link_state)2330 if_link_state_change_process(struct ifnet *ifp, int link_state)
2331 {
2332           struct domain *dp;
2333           const int s = splnet();
2334           bool notify;
2335 
2336           KASSERT(!cpu_intr_p());
2337 
2338           IF_LINK_STATE_CHANGE_LOCK(ifp);
2339 
2340           /* Ensure the change is still valid. */
2341           if (ifp->if_link_state == link_state) {
2342                     IF_LINK_STATE_CHANGE_UNLOCK(ifp);
2343                     splx(s);
2344                     return;
2345           }
2346 
2347 #ifdef DEBUG
2348           log(LOG_DEBUG, "%s: link state %s (was %s)\n", ifp->if_xname,
2349                     link_state == LINK_STATE_UP ? "UP" :
2350                     link_state == LINK_STATE_DOWN ? "DOWN" :
2351                     "UNKNOWN",
2352                     ifp->if_link_state == LINK_STATE_UP ? "UP" :
2353                     ifp->if_link_state == LINK_STATE_DOWN ? "DOWN" :
2354                     "UNKNOWN");
2355 #endif
2356 
2357           /*
2358            * When going from UNKNOWN to UP, we need to mark existing
2359            * addresses as tentative and restart DAD as we may have
2360            * erroneously not found a duplicate.
2361            *
2362            * This needs to happen before rt_ifmsg to avoid a race where
2363            * listeners would have an address and expect it to work right
2364            * away.
2365            */
2366           notify = (link_state == LINK_STATE_UP &&
2367               ifp->if_link_state == LINK_STATE_UNKNOWN);
2368           ifp->if_link_state = link_state;
2369           /* The following routines may sleep so release the spin mutex */
2370           IF_LINK_STATE_CHANGE_UNLOCK(ifp);
2371 
2372           KERNEL_LOCK_UNLESS_NET_MPSAFE();
2373           if (notify) {
2374                     DOMAIN_FOREACH(dp) {
2375                               if (dp->dom_if_link_state_change != NULL)
2376                                         dp->dom_if_link_state_change(ifp,
2377                                             LINK_STATE_DOWN);
2378                     }
2379           }
2380 
2381           /* Notify that the link state has changed. */
2382           rt_ifmsg(ifp);
2383 
2384           simplehook_dohooks(ifp->if_linkstate_hooks);
2385 
2386           DOMAIN_FOREACH(dp) {
2387                     if (dp->dom_if_link_state_change != NULL)
2388                               dp->dom_if_link_state_change(ifp, link_state);
2389           }
2390           KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
2391           splx(s);
2392 }
2393 
2394 /*
2395  * Process the interface link state change queue.
2396  */
2397 static void
if_link_state_change_work(struct work * work,void * arg)2398 if_link_state_change_work(struct work *work, void *arg)
2399 {
2400           struct ifnet *ifp = container_of(work, struct ifnet, if_link_work);
2401           uint8_t state;
2402 
2403           KERNEL_LOCK_UNLESS_NET_MPSAFE();
2404           const int s = splnet();
2405 
2406           /*
2407            * Pop a link state change from the queue and process it.
2408            * If there is nothing to process then if_detach() has been called.
2409            * We keep if_link_scheduled = true so the queue can safely drain
2410            * without more work being queued.
2411            */
2412           IF_LINK_STATE_CHANGE_LOCK(ifp);
2413           LQ_POP(ifp->if_link_queue, state);
2414           IF_LINK_STATE_CHANGE_UNLOCK(ifp);
2415           if (state == LINK_STATE_UNSET)
2416                     goto out;
2417 
2418           if_link_state_change_process(ifp, state);
2419 
2420           /* If there is a link state change to come, schedule it. */
2421           IF_LINK_STATE_CHANGE_LOCK(ifp);
2422           if (LQ_ITEM(ifp->if_link_queue, 0) != LINK_STATE_UNSET) {
2423                     ifp->if_link_scheduled = true;
2424                     workqueue_enqueue(ifnet_link_state_wq, &ifp->if_link_work,
2425                         NULL);
2426           } else
2427                     ifp->if_link_scheduled = false;
2428           IF_LINK_STATE_CHANGE_UNLOCK(ifp);
2429 
2430 out:
2431           splx(s);
2432           KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
2433 }
2434 
2435 void *
if_linkstate_change_establish(struct ifnet * ifp,void (* fn)(void *),void * arg)2436 if_linkstate_change_establish(struct ifnet *ifp, void (*fn)(void *), void *arg)
2437 {
2438           khook_t *hk;
2439 
2440           hk = simplehook_establish(ifp->if_linkstate_hooks, fn, arg);
2441 
2442           return (void *)hk;
2443 }
2444 
2445 void
if_linkstate_change_disestablish(struct ifnet * ifp,void * vhook,kmutex_t * lock)2446 if_linkstate_change_disestablish(struct ifnet *ifp, void *vhook,
2447     kmutex_t *lock)
2448 {
2449 
2450           simplehook_disestablish(ifp->if_linkstate_hooks, vhook, lock);
2451 }
2452 
2453 /*
2454  * Used to mark addresses on an interface as DETATCHED or TENTATIVE
2455  * and thus start Duplicate Address Detection without changing the
2456  * real link state.
2457  */
2458 void
if_domain_link_state_change(struct ifnet * ifp,int link_state)2459 if_domain_link_state_change(struct ifnet *ifp, int link_state)
2460 {
2461           struct domain *dp;
2462 
2463           const int s = splnet();
2464           KERNEL_LOCK_UNLESS_NET_MPSAFE();
2465 
2466           DOMAIN_FOREACH(dp) {
2467                     if (dp->dom_if_link_state_change != NULL)
2468                               dp->dom_if_link_state_change(ifp, link_state);
2469           }
2470 
2471           splx(s);
2472           KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
2473 }
2474 
2475 /*
2476  * Default action when installing a local route on a point-to-point
2477  * interface.
2478  */
2479 void
p2p_rtrequest(int req,struct rtentry * rt,__unused const struct rt_addrinfo * info)2480 p2p_rtrequest(int req, struct rtentry *rt,
2481     __unused const struct rt_addrinfo *info)
2482 {
2483           struct ifnet *ifp = rt->rt_ifp;
2484           struct ifaddr *ifa, *lo0ifa;
2485           int s = pserialize_read_enter();
2486 
2487           switch (req) {
2488           case RTM_ADD:
2489                     if ((rt->rt_flags & RTF_LOCAL) == 0)
2490                               break;
2491 
2492                     rt->rt_ifp = lo0ifp;
2493 
2494                     if (ISSET(info->rti_flags, RTF_DONTCHANGEIFA))
2495                               break;
2496 
2497                     IFADDR_READER_FOREACH(ifa, ifp) {
2498                               if (equal(rt_getkey(rt), ifa->ifa_addr))
2499                                         break;
2500                     }
2501                     if (ifa == NULL)
2502                               break;
2503 
2504                     /*
2505                      * Ensure lo0 has an address of the same family.
2506                      */
2507                     IFADDR_READER_FOREACH(lo0ifa, lo0ifp) {
2508                               if (lo0ifa->ifa_addr->sa_family ==
2509                                   ifa->ifa_addr->sa_family)
2510                                         break;
2511                     }
2512                     if (lo0ifa == NULL)
2513                               break;
2514 
2515                     /*
2516                      * Make sure to set rt->rt_ifa to the interface
2517                      * address we are using, otherwise we will have trouble
2518                      * with source address selection.
2519                      */
2520                     if (ifa != rt->rt_ifa)
2521                               rt_replace_ifa(rt, ifa);
2522                     break;
2523           case RTM_DELETE:
2524           default:
2525                     break;
2526           }
2527           pserialize_read_exit(s);
2528 }
2529 
2530 static void
_if_down(struct ifnet * ifp)2531 _if_down(struct ifnet *ifp)
2532 {
2533           struct ifaddr *ifa;
2534           struct domain *dp;
2535           struct psref psref;
2536 
2537           ifp->if_flags &= ~IFF_UP;
2538           nanotime(&ifp->if_lastchange);
2539 
2540           const int bound = curlwp_bind();
2541           int s = pserialize_read_enter();
2542           IFADDR_READER_FOREACH(ifa, ifp) {
2543                     ifa_acquire(ifa, &psref);
2544                     pserialize_read_exit(s);
2545 
2546                     pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
2547 
2548                     s = pserialize_read_enter();
2549                     ifa_release(ifa, &psref);
2550           }
2551           pserialize_read_exit(s);
2552           curlwp_bindx(bound);
2553 
2554           IFQ_PURGE(&ifp->if_snd);
2555 #if NCARP > 0
2556           if (ifp->if_carp)
2557                     carp_carpdev_state(ifp);
2558 #endif
2559           rt_ifmsg(ifp);
2560           DOMAIN_FOREACH(dp) {
2561                     if (dp->dom_if_down)
2562                               dp->dom_if_down(ifp);
2563           }
2564 }
2565 
2566 static void
if_down_deactivated(struct ifnet * ifp)2567 if_down_deactivated(struct ifnet *ifp)
2568 {
2569 
2570           KASSERT(if_is_deactivated(ifp));
2571           _if_down(ifp);
2572 }
2573 
2574 void
if_down_locked(struct ifnet * ifp)2575 if_down_locked(struct ifnet *ifp)
2576 {
2577 
2578           KASSERT(IFNET_LOCKED(ifp));
2579           _if_down(ifp);
2580 }
2581 
2582 /*
2583  * Mark an interface down and notify protocols of
2584  * the transition.
2585  * NOTE: must be called at splsoftnet or equivalent.
2586  */
2587 void
if_down(struct ifnet * ifp)2588 if_down(struct ifnet *ifp)
2589 {
2590 
2591           IFNET_LOCK(ifp);
2592           if_down_locked(ifp);
2593           IFNET_UNLOCK(ifp);
2594 }
2595 
2596 /*
2597  * Must be called with holding if_ioctl_lock.
2598  */
2599 static void
if_up_locked(struct ifnet * ifp)2600 if_up_locked(struct ifnet *ifp)
2601 {
2602 #ifdef notyet
2603           struct ifaddr *ifa;
2604 #endif
2605           struct domain *dp;
2606 
2607           KASSERT(IFNET_LOCKED(ifp));
2608 
2609           KASSERT(!if_is_deactivated(ifp));
2610           ifp->if_flags |= IFF_UP;
2611           nanotime(&ifp->if_lastchange);
2612 #ifdef notyet
2613           /* this has no effect on IP, and will kill all ISO connections XXX */
2614           IFADDR_READER_FOREACH(ifa, ifp)
2615                     pfctlinput(PRC_IFUP, ifa->ifa_addr);
2616 #endif
2617 #if NCARP > 0
2618           if (ifp->if_carp)
2619                     carp_carpdev_state(ifp);
2620 #endif
2621           rt_ifmsg(ifp);
2622           DOMAIN_FOREACH(dp) {
2623                     if (dp->dom_if_up)
2624                               dp->dom_if_up(ifp);
2625           }
2626 }
2627 
2628 /*
2629  * Handle interface slowtimo timer routine.  Called
2630  * from softclock, we decrement timer (if set) and
2631  * call the appropriate interface routine on expiration.
2632  */
2633 static bool
if_slowtimo_countdown(struct ifnet * ifp)2634 if_slowtimo_countdown(struct ifnet *ifp)
2635 {
2636           bool fire = false;
2637           const int s = splnet();
2638 
2639           KERNEL_LOCK(1, NULL);
2640           if (ifp->if_timer != 0 && --ifp->if_timer == 0)
2641                     fire = true;
2642           KERNEL_UNLOCK_ONE(NULL);
2643           splx(s);
2644 
2645           return fire;
2646 }
2647 
2648 static void
if_slowtimo_intr(void * arg)2649 if_slowtimo_intr(void *arg)
2650 {
2651           struct ifnet *ifp = arg;
2652           struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
2653 
2654           mutex_enter(&isd->isd_lock);
2655           if (!isd->isd_dying) {
2656                     if (isd->isd_trigger || if_slowtimo_countdown(ifp)) {
2657                               if (!isd->isd_queued) {
2658                                         isd->isd_queued = true;
2659                                         workqueue_enqueue(if_slowtimo_wq,
2660                                             &isd->isd_work, NULL);
2661                               }
2662                     } else
2663                               callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
2664           }
2665           mutex_exit(&isd->isd_lock);
2666 }
2667 
2668 static void
if_slowtimo_work(struct work * work,void * arg)2669 if_slowtimo_work(struct work *work, void *arg)
2670 {
2671           struct if_slowtimo_data *isd =
2672               container_of(work, struct if_slowtimo_data, isd_work);
2673           struct ifnet *ifp = isd->isd_ifp;
2674           const int s = splnet();
2675 
2676           KERNEL_LOCK(1, NULL);
2677           (*ifp->if_slowtimo)(ifp);
2678           KERNEL_UNLOCK_ONE(NULL);
2679           splx(s);
2680 
2681           mutex_enter(&isd->isd_lock);
2682           if (isd->isd_trigger) {
2683                     isd->isd_trigger = false;
2684                     printf("%s: watchdog triggered\n", ifp->if_xname);
2685           }
2686           isd->isd_queued = false;
2687           if (!isd->isd_dying)
2688                     callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
2689           mutex_exit(&isd->isd_lock);
2690 }
2691 
2692 static int
sysctl_if_watchdog(SYSCTLFN_ARGS)2693 sysctl_if_watchdog(SYSCTLFN_ARGS)
2694 {
2695           struct sysctlnode node = *rnode;
2696           struct ifnet *ifp = node.sysctl_data;
2697           struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
2698           int arg = 0;
2699           int error;
2700 
2701           node.sysctl_data = &arg;
2702           error = sysctl_lookup(SYSCTLFN_CALL(&node));
2703           if (error || newp == NULL)
2704                     return error;
2705           if (arg) {
2706                     mutex_enter(&isd->isd_lock);
2707                     KASSERT(!isd->isd_dying);
2708                     isd->isd_trigger = true;
2709                     callout_schedule(&isd->isd_ch, 0);
2710                     mutex_exit(&isd->isd_lock);
2711           }
2712 
2713           return 0;
2714 }
2715 
2716 static void
sysctl_watchdog_setup(struct ifnet * ifp)2717 sysctl_watchdog_setup(struct ifnet *ifp)
2718 {
2719           struct sysctllog **clog = &ifp->if_sysctl_log;
2720           const struct sysctlnode *rnode;
2721 
2722           if (sysctl_createv(clog, 0, NULL, &rnode,
2723                     CTLFLAG_PERMANENT, CTLTYPE_NODE, "interfaces",
2724                     SYSCTL_DESCR("Per-interface controls"),
2725                     NULL, 0, NULL, 0,
2726                     CTL_NET, CTL_CREATE, CTL_EOL) != 0)
2727                     goto bad;
2728           if (sysctl_createv(clog, 0, &rnode, &rnode,
2729                     CTLFLAG_PERMANENT, CTLTYPE_NODE, ifp->if_xname,
2730                     SYSCTL_DESCR("Interface controls"),
2731                     NULL, 0, NULL, 0,
2732                     CTL_CREATE, CTL_EOL) != 0)
2733                     goto bad;
2734           if (sysctl_createv(clog, 0, &rnode, &rnode,
2735                     CTLFLAG_PERMANENT, CTLTYPE_NODE, "watchdog",
2736                     SYSCTL_DESCR("Interface watchdog controls"),
2737                     NULL, 0, NULL, 0,
2738                     CTL_CREATE, CTL_EOL) != 0)
2739                     goto bad;
2740           if (sysctl_createv(clog, 0, &rnode, NULL,
2741                     CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "trigger",
2742                     SYSCTL_DESCR("Trigger watchdog timeout"),
2743                     sysctl_if_watchdog, 0, (int *)ifp, 0,
2744                     CTL_CREATE, CTL_EOL) != 0)
2745                     goto bad;
2746 
2747           return;
2748 
2749 bad:
2750           printf("%s: could not attach sysctl watchdog nodes\n", ifp->if_xname);
2751 }
2752 
2753 /*
2754  * Mark an interface up and notify protocols of
2755  * the transition.
2756  * NOTE: must be called at splsoftnet or equivalent.
2757  */
2758 void
if_up(struct ifnet * ifp)2759 if_up(struct ifnet *ifp)
2760 {
2761 
2762           IFNET_LOCK(ifp);
2763           if_up_locked(ifp);
2764           IFNET_UNLOCK(ifp);
2765 }
2766 
2767 /*
2768  * Set/clear promiscuous mode on interface ifp based on the truth value
2769  * of pswitch.  The calls are reference counted so that only the first
2770  * "on" request actually has an effect, as does the final "off" request.
2771  * Results are undefined if the "off" and "on" requests are not matched.
2772  */
2773 int
ifpromisc_locked(struct ifnet * ifp,int pswitch)2774 ifpromisc_locked(struct ifnet *ifp, int pswitch)
2775 {
2776           int pcount, ret = 0;
2777           u_short nflags;
2778 
2779           KASSERT(IFNET_LOCKED(ifp));
2780 
2781           pcount = ifp->if_pcount;
2782           if (pswitch) {
2783                     /*
2784                      * Allow the device to be "placed" into promiscuous
2785                      * mode even if it is not configured up.  It will
2786                      * consult IFF_PROMISC when it is brought up.
2787                      */
2788                     if (ifp->if_pcount++ != 0)
2789                               goto out;
2790                     nflags = ifp->if_flags | IFF_PROMISC;
2791           } else {
2792                     if (--ifp->if_pcount > 0)
2793                               goto out;
2794                     nflags = ifp->if_flags & ~IFF_PROMISC;
2795           }
2796           ret = if_flags_set(ifp, nflags);
2797           /* Restore interface state if not successful. */
2798           if (ret != 0)
2799                     ifp->if_pcount = pcount;
2800 
2801 out:
2802           return ret;
2803 }
2804 
2805 int
ifpromisc(struct ifnet * ifp,int pswitch)2806 ifpromisc(struct ifnet *ifp, int pswitch)
2807 {
2808           int e;
2809 
2810           IFNET_LOCK(ifp);
2811           e = ifpromisc_locked(ifp, pswitch);
2812           IFNET_UNLOCK(ifp);
2813 
2814           return e;
2815 }
2816 
2817 /*
2818  * if_ioctl(ifp, cmd, data)
2819  *
2820  *        Apply an ioctl command to the interface.  Returns 0 on success,
2821  *        nonzero errno(3) number on failure.
2822  *
2823  *        For SIOCADDMULTI/SIOCDELMULTI, caller need not hold locks -- it
2824  *        is the driver's responsibility to take any internal locks.
2825  *        (Kernel logic should generally invoke these only through
2826  *        if_mcast_op.)
2827  *
2828  *        For all other ioctls, caller must hold ifp->if_ioctl_lock,
2829  *        a.k.a. IFNET_LOCK.  May sleep.
2830  */
2831 int
if_ioctl(struct ifnet * ifp,u_long cmd,void * data)2832 if_ioctl(struct ifnet *ifp, u_long cmd, void *data)
2833 {
2834 
2835           switch (cmd) {
2836           case SIOCADDMULTI:
2837           case SIOCDELMULTI:
2838                     break;
2839           default:
2840                     KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);
2841           }
2842 
2843           return (*ifp->if_ioctl)(ifp, cmd, data);
2844 }
2845 
2846 /*
2847  * if_init(ifp)
2848  *
2849  *        Prepare the hardware underlying ifp to process packets
2850  *        according to its current configuration.  Returns 0 on success,
2851  *        nonzero errno(3) number on failure.
2852  *
2853  *        May sleep.  Caller must hold ifp->if_ioctl_lock, a.k.a
2854  *        IFNET_LOCK.
2855  */
2856 int
if_init(struct ifnet * ifp)2857 if_init(struct ifnet *ifp)
2858 {
2859 
2860           KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);
2861 
2862           return (*ifp->if_init)(ifp);
2863 }
2864 
2865 /*
2866  * if_stop(ifp, disable)
2867  *
2868  *        Stop the hardware underlying ifp from processing packets.
2869  *
2870  *        If disable is true, ... XXX(?)
2871  *
2872  *        May sleep.  Caller must hold ifp->if_ioctl_lock, a.k.a
2873  *        IFNET_LOCK.
2874  */
2875 void
if_stop(struct ifnet * ifp,int disable)2876 if_stop(struct ifnet *ifp, int disable)
2877 {
2878 
2879           KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);
2880 
2881           (*ifp->if_stop)(ifp, disable);
2882 }
2883 
2884 /*
2885  * Map interface name to
2886  * interface structure pointer.
2887  */
2888 struct ifnet *
ifunit(const char * name)2889 ifunit(const char *name)
2890 {
2891           struct ifnet *ifp;
2892           const char *cp = name;
2893           u_int unit = 0;
2894           u_int i;
2895 
2896           /*
2897            * If the entire name is a number, treat it as an ifindex.
2898            */
2899           for (i = 0; i < IFNAMSIZ && *cp >= '0' && *cp <= '9'; i++, cp++)
2900                     unit = unit * 10 + (*cp - '0');
2901 
2902           /*
2903            * If the number took all of the name, then it's a valid ifindex.
2904            */
2905           if (i == IFNAMSIZ || (cp != name && *cp == '\0'))
2906                     return if_byindex(unit);
2907 
2908           ifp = NULL;
2909           const int s = pserialize_read_enter();
2910           IFNET_READER_FOREACH(ifp) {
2911                     if (if_is_deactivated(ifp))
2912                               continue;
2913                     if (strcmp(ifp->if_xname, name) == 0)
2914                               goto out;
2915           }
2916 out:
2917           pserialize_read_exit(s);
2918           return ifp;
2919 }
2920 
2921 /*
2922  * Get a reference of an ifnet object by an interface name.
2923  * The returned reference is protected by psref(9). The caller
2924  * must release a returned reference by if_put after use.
2925  */
2926 struct ifnet *
if_get(const char * name,struct psref * psref)2927 if_get(const char *name, struct psref *psref)
2928 {
2929           struct ifnet *ifp;
2930           const char *cp = name;
2931           u_int unit = 0;
2932           u_int i;
2933 
2934           /*
2935            * If the entire name is a number, treat it as an ifindex.
2936            */
2937           for (i = 0; i < IFNAMSIZ && *cp >= '0' && *cp <= '9'; i++, cp++)
2938                     unit = unit * 10 + (*cp - '0');
2939 
2940           /*
2941            * If the number took all of the name, then it's a valid ifindex.
2942            */
2943           if (i == IFNAMSIZ || (cp != name && *cp == '\0'))
2944                     return if_get_byindex(unit, psref);
2945 
2946           ifp = NULL;
2947           const int s = pserialize_read_enter();
2948           IFNET_READER_FOREACH(ifp) {
2949                     if (if_is_deactivated(ifp))
2950                               continue;
2951                     if (strcmp(ifp->if_xname, name) == 0) {
2952                               PSREF_DEBUG_FILL_RETURN_ADDRESS(psref);
2953                               psref_acquire(psref, &ifp->if_psref,
2954                                   ifnet_psref_class);
2955                               goto out;
2956                     }
2957           }
2958 out:
2959           pserialize_read_exit(s);
2960           return ifp;
2961 }
2962 
2963 /*
2964  * Release a reference of an ifnet object given by if_get, if_get_byindex
2965  * or if_get_bylla.
2966  */
2967 void
if_put(const struct ifnet * ifp,struct psref * psref)2968 if_put(const struct ifnet *ifp, struct psref *psref)
2969 {
2970 
2971           if (ifp == NULL)
2972                     return;
2973 
2974           psref_release(psref, &ifp->if_psref, ifnet_psref_class);
2975 }
2976 
2977 /*
2978  * Return ifp having idx. Return NULL if not found.  Normally if_byindex
2979  * should be used.
2980  */
2981 ifnet_t *
_if_byindex(u_int idx)2982 _if_byindex(u_int idx)
2983 {
2984 
2985           return (__predict_true(idx < if_indexlim)) ? ifindex2ifnet[idx] : NULL;
2986 }
2987 
2988 /*
2989  * Return ifp having idx. Return NULL if not found or the found ifp is
2990  * already deactivated.
2991  */
2992 ifnet_t *
if_byindex(u_int idx)2993 if_byindex(u_int idx)
2994 {
2995           ifnet_t *ifp;
2996 
2997           ifp = _if_byindex(idx);
2998           if (ifp != NULL && if_is_deactivated(ifp))
2999                     ifp = NULL;
3000           return ifp;
3001 }
3002 
3003 /*
3004  * Get a reference of an ifnet object by an interface index.
3005  * The returned reference is protected by psref(9). The caller
3006  * must release a returned reference by if_put after use.
3007  */
3008 ifnet_t *
if_get_byindex(u_int idx,struct psref * psref)3009 if_get_byindex(u_int idx, struct psref *psref)
3010 {
3011           ifnet_t *ifp;
3012 
3013           const int s = pserialize_read_enter();
3014           ifp = if_byindex(idx);
3015           if (__predict_true(ifp != NULL)) {
3016                     PSREF_DEBUG_FILL_RETURN_ADDRESS(psref);
3017                     psref_acquire(psref, &ifp->if_psref, ifnet_psref_class);
3018           }
3019           pserialize_read_exit(s);
3020 
3021           return ifp;
3022 }
3023 
3024 ifnet_t *
if_get_bylla(const void * lla,unsigned char lla_len,struct psref * psref)3025 if_get_bylla(const void *lla, unsigned char lla_len, struct psref *psref)
3026 {
3027           ifnet_t *ifp;
3028 
3029           const int s = pserialize_read_enter();
3030           IFNET_READER_FOREACH(ifp) {
3031                     if (if_is_deactivated(ifp))
3032                               continue;
3033                     if (ifp->if_addrlen != lla_len)
3034                               continue;
3035                     if (memcmp(lla, CLLADDR(ifp->if_sadl), lla_len) == 0) {
3036                               psref_acquire(psref, &ifp->if_psref,
3037                                   ifnet_psref_class);
3038                               break;
3039                     }
3040           }
3041           pserialize_read_exit(s);
3042 
3043           return ifp;
3044 }
3045 
3046 /*
3047  * Note that it's safe only if the passed ifp is guaranteed to not be freed,
3048  * for example using pserialize or the ifp is already held or some other
3049  * object is held which guarantes the ifp to not be freed indirectly.
3050  */
3051 void
if_acquire(struct ifnet * ifp,struct psref * psref)3052 if_acquire(struct ifnet *ifp, struct psref *psref)
3053 {
3054 
3055           KASSERT(ifp->if_index != 0);
3056           psref_acquire(psref, &ifp->if_psref, ifnet_psref_class);
3057 }
3058 
3059 bool
if_held(struct ifnet * ifp)3060 if_held(struct ifnet *ifp)
3061 {
3062 
3063           return psref_held(&ifp->if_psref, ifnet_psref_class);
3064 }
3065 
3066 /*
3067  * Some tunnel interfaces can nest, e.g. IPv4 over IPv4 gif(4) tunnel over
3068  * IPv4. Check the tunnel nesting count.
3069  * Return > 0, if tunnel nesting count is more than limit.
3070  * Return 0, if tunnel nesting count is equal or less than limit.
3071  */
3072 int
if_tunnel_check_nesting(struct ifnet * ifp,struct mbuf * m,int limit)3073 if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, int limit)
3074 {
3075           struct m_tag *mtag;
3076           int *count;
3077 
3078           mtag = m_tag_find(m, PACKET_TAG_TUNNEL_INFO);
3079           if (mtag != NULL) {
3080                     count = (int *)(mtag + 1);
3081                     if (++(*count) > limit) {
3082                               log(LOG_NOTICE,
3083                                   "%s: recursively called too many times(%d)\n",
3084                                   ifp->if_xname, *count);
3085                               return EIO;
3086                     }
3087           } else {
3088                     mtag = m_tag_get(PACKET_TAG_TUNNEL_INFO, sizeof(*count),
3089                         M_NOWAIT);
3090                     if (mtag != NULL) {
3091                               m_tag_prepend(m, mtag);
3092                               count = (int *)(mtag + 1);
3093                               *count = 0;
3094                     } else {
3095                               log(LOG_DEBUG, "%s: m_tag_get() failed, "
3096                                   "recursion calls are not prevented.\n",
3097                                   ifp->if_xname);
3098                     }
3099           }
3100 
3101           return 0;
3102 }
3103 
3104 static void
if_tunnel_ro_init_pc(void * p,void * arg __unused,struct cpu_info * ci __unused)3105 if_tunnel_ro_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
3106 {
3107           struct tunnel_ro *tro = p;
3108 
3109           tro->tr_ro = kmem_zalloc(sizeof(*tro->tr_ro), KM_SLEEP);
3110           tro->tr_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
3111 }
3112 
3113 static void
if_tunnel_ro_fini_pc(void * p,void * arg __unused,struct cpu_info * ci __unused)3114 if_tunnel_ro_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
3115 {
3116           struct tunnel_ro *tro = p;
3117 
3118           rtcache_free(tro->tr_ro);
3119           kmem_free(tro->tr_ro, sizeof(*tro->tr_ro));
3120 
3121           mutex_obj_free(tro->tr_lock);
3122 }
3123 
3124 percpu_t *
if_tunnel_alloc_ro_percpu(void)3125 if_tunnel_alloc_ro_percpu(void)
3126 {
3127 
3128           return percpu_create(sizeof(struct tunnel_ro),
3129               if_tunnel_ro_init_pc, if_tunnel_ro_fini_pc, NULL);
3130 }
3131 
3132 void
if_tunnel_free_ro_percpu(percpu_t * ro_percpu)3133 if_tunnel_free_ro_percpu(percpu_t *ro_percpu)
3134 {
3135 
3136           percpu_free(ro_percpu, sizeof(struct tunnel_ro));
3137 }
3138 
3139 
3140 static void
if_tunnel_rtcache_free_pc(void * p,void * arg __unused,struct cpu_info * ci __unused)3141 if_tunnel_rtcache_free_pc(void *p, void *arg __unused,
3142     struct cpu_info *ci __unused)
3143 {
3144           struct tunnel_ro *tro = p;
3145 
3146           mutex_enter(tro->tr_lock);
3147           rtcache_free(tro->tr_ro);
3148           mutex_exit(tro->tr_lock);
3149 }
3150 
if_tunnel_ro_percpu_rtcache_free(percpu_t * ro_percpu)3151 void if_tunnel_ro_percpu_rtcache_free(percpu_t *ro_percpu)
3152 {
3153 
3154           percpu_foreach(ro_percpu, if_tunnel_rtcache_free_pc, NULL);
3155 }
3156 
3157 void
if_export_if_data(ifnet_t * const ifp,struct if_data * ifi,bool zero_stats)3158 if_export_if_data(ifnet_t * const ifp, struct if_data *ifi, bool zero_stats)
3159 {
3160 
3161           /* Collect the volatile stats first; this zeros *ifi. */
3162           if_stats_to_if_data(ifp, ifi, zero_stats);
3163 
3164           ifi->ifi_type = ifp->if_type;
3165           ifi->ifi_addrlen = ifp->if_addrlen;
3166           ifi->ifi_hdrlen = ifp->if_hdrlen;
3167           ifi->ifi_link_state = ifp->if_link_state;
3168           ifi->ifi_mtu = ifp->if_mtu;
3169           ifi->ifi_metric = ifp->if_metric;
3170           ifi->ifi_baudrate = ifp->if_baudrate;
3171           ifi->ifi_lastchange = ifp->if_lastchange;
3172 }
3173 
3174 /* common */
3175 int
ifioctl_common(struct ifnet * ifp,u_long cmd,void * data)3176 ifioctl_common(struct ifnet *ifp, u_long cmd, void *data)
3177 {
3178           struct ifreq *ifr;
3179           struct ifcapreq *ifcr;
3180           struct ifdatareq *ifdr;
3181           unsigned short flags;
3182           char *descr;
3183           int error;
3184 
3185           switch (cmd) {
3186           case SIOCSIFCAP:
3187                     ifcr = data;
3188                     if ((ifcr->ifcr_capenable & ~ifp->if_capabilities) != 0)
3189                               return EINVAL;
3190 
3191                     if (ifcr->ifcr_capenable == ifp->if_capenable)
3192                               return 0;
3193 
3194                     ifp->if_capenable = ifcr->ifcr_capenable;
3195 
3196                     /* Pre-compute the checksum flags mask. */
3197                     ifp->if_csum_flags_tx = 0;
3198                     ifp->if_csum_flags_rx = 0;
3199                     if (ifp->if_capenable & IFCAP_CSUM_IPv4_Tx)
3200                               ifp->if_csum_flags_tx |= M_CSUM_IPv4;
3201                     if (ifp->if_capenable & IFCAP_CSUM_IPv4_Rx)
3202                               ifp->if_csum_flags_rx |= M_CSUM_IPv4;
3203 
3204                     if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Tx)
3205                               ifp->if_csum_flags_tx |= M_CSUM_TCPv4;
3206                     if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Rx)
3207                               ifp->if_csum_flags_rx |= M_CSUM_TCPv4;
3208 
3209                     if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Tx)
3210                               ifp->if_csum_flags_tx |= M_CSUM_UDPv4;
3211                     if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Rx)
3212                               ifp->if_csum_flags_rx |= M_CSUM_UDPv4;
3213 
3214                     if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Tx)
3215                               ifp->if_csum_flags_tx |= M_CSUM_TCPv6;
3216                     if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Rx)
3217                               ifp->if_csum_flags_rx |= M_CSUM_TCPv6;
3218 
3219                     if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Tx)
3220                               ifp->if_csum_flags_tx |= M_CSUM_UDPv6;
3221                     if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Rx)
3222                               ifp->if_csum_flags_rx |= M_CSUM_UDPv6;
3223 
3224                     if (ifp->if_capenable & IFCAP_TSOv4)
3225                               ifp->if_csum_flags_tx |= M_CSUM_TSOv4;
3226                     if (ifp->if_capenable & IFCAP_TSOv6)
3227                               ifp->if_csum_flags_tx |= M_CSUM_TSOv6;
3228 
3229 #if NBRIDGE > 0
3230                     if (ifp->if_bridge != NULL)
3231                               bridge_calc_csum_flags(ifp->if_bridge);
3232 #endif
3233 
3234                     if (ifp->if_flags & IFF_UP)
3235                               return ENETRESET;
3236                     return 0;
3237           case SIOCSIFFLAGS:
3238                     ifr = data;
3239                     /*
3240                      * If if_is_mpsafe(ifp), KERNEL_LOCK isn't held here, but if_up
3241                      * and if_down aren't MP-safe yet, so we must hold the lock.
3242                      */
3243                     KERNEL_LOCK_IF_IFP_MPSAFE(ifp);
3244                     if (ifp->if_flags & IFF_UP && (ifr->ifr_flags & IFF_UP) == 0) {
3245                               const int s = splsoftnet();
3246                               if_down_locked(ifp);
3247                               splx(s);
3248                     }
3249                     if (ifr->ifr_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) {
3250                               const int s = splsoftnet();
3251                               if_up_locked(ifp);
3252                               splx(s);
3253                     }
3254                     KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp);
3255                     flags = (ifp->if_flags & IFF_CANTCHANGE) |
3256                         (ifr->ifr_flags &~ IFF_CANTCHANGE);
3257                     if (ifp->if_flags != flags) {
3258                               ifp->if_flags = flags;
3259                               /* Notify that the flags have changed. */
3260                               rt_ifmsg(ifp);
3261                     }
3262                     break;
3263           case SIOCGIFFLAGS:
3264                     ifr = data;
3265                     ifr->ifr_flags = ifp->if_flags;
3266                     break;
3267 
3268           case SIOCGIFMETRIC:
3269                     ifr = data;
3270                     ifr->ifr_metric = ifp->if_metric;
3271                     break;
3272 
3273           case SIOCGIFMTU:
3274                     ifr = data;
3275                     ifr->ifr_mtu = ifp->if_mtu;
3276                     break;
3277 
3278           case SIOCGIFDLT:
3279                     ifr = data;
3280                     ifr->ifr_dlt = ifp->if_dlt;
3281                     break;
3282 
3283           case SIOCGIFCAP:
3284                     ifcr = data;
3285                     ifcr->ifcr_capabilities = ifp->if_capabilities;
3286                     ifcr->ifcr_capenable = ifp->if_capenable;
3287                     break;
3288 
3289           case SIOCSIFMETRIC:
3290                     ifr = data;
3291                     ifp->if_metric = ifr->ifr_metric;
3292                     break;
3293 
3294           case SIOCGIFDATA:
3295                     ifdr = data;
3296                     if_export_if_data(ifp, &ifdr->ifdr_data, false);
3297                     break;
3298 
3299           case SIOCGIFINDEX:
3300                     ifr = data;
3301                     ifr->ifr_index = ifp->if_index;
3302                     break;
3303 
3304           case SIOCZIFDATA:
3305                     ifdr = data;
3306                     if_export_if_data(ifp, &ifdr->ifdr_data, true);
3307                     getnanotime(&ifp->if_lastchange);
3308                     break;
3309           case SIOCSIFMTU:
3310                     ifr = data;
3311                     if (ifp->if_mtu == ifr->ifr_mtu)
3312                               break;
3313                     ifp->if_mtu = ifr->ifr_mtu;
3314                     return ENETRESET;
3315           case SIOCSIFDESCR:
3316                     error = kauth_authorize_network(kauth_cred_get(),
3317                         KAUTH_NETWORK_INTERFACE,
3318                         KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, KAUTH_ARG(cmd),
3319                         NULL);
3320                     if (error)
3321                               return error;
3322 
3323                     ifr = data;
3324 
3325                     if (ifr->ifr_buflen > IFDESCRSIZE)
3326                               return ENAMETOOLONG;
3327 
3328                     if (ifr->ifr_buf == NULL || ifr->ifr_buflen == 0) {
3329                               /* unset description */
3330                               descr = NULL;
3331                     } else {
3332                               descr = kmem_zalloc(IFDESCRSIZE, KM_SLEEP);
3333                               /*
3334                                * copy (IFDESCRSIZE - 1) bytes to ensure
3335                                * terminating nul
3336                                */
3337                               error = copyin(ifr->ifr_buf, descr, IFDESCRSIZE - 1);
3338                               if (error) {
3339                                         kmem_free(descr, IFDESCRSIZE);
3340                                         return error;
3341                               }
3342                     }
3343 
3344                     if (ifp->if_description != NULL)
3345                               kmem_free(ifp->if_description, IFDESCRSIZE);
3346 
3347                     ifp->if_description = descr;
3348                     break;
3349 
3350           case SIOCGIFDESCR:
3351                     ifr = data;
3352                     descr = ifp->if_description;
3353 
3354                     if (descr == NULL)
3355                               return ENOMSG;
3356 
3357                     if (ifr->ifr_buflen < IFDESCRSIZE)
3358                               return EINVAL;
3359 
3360                     error = copyout(descr, ifr->ifr_buf, IFDESCRSIZE);
3361                     if (error)
3362                               return error;
3363                     break;
3364 
3365           default:
3366                     return ENOTTY;
3367           }
3368           return 0;
3369 }
3370 
3371 int
ifaddrpref_ioctl(struct socket * so,u_long cmd,void * data,struct ifnet * ifp)3372 ifaddrpref_ioctl(struct socket *so, u_long cmd, void *data, struct ifnet *ifp)
3373 {
3374           struct if_addrprefreq *ifap = (struct if_addrprefreq *)data;
3375           struct ifaddr *ifa;
3376           const struct sockaddr *any, *sa;
3377           union {
3378                     struct sockaddr sa;
3379                     struct sockaddr_storage ss;
3380           } u, v;
3381           int s, error = 0;
3382 
3383           switch (cmd) {
3384           case SIOCSIFADDRPREF:
3385                     error = kauth_authorize_network(kauth_cred_get(),
3386                         KAUTH_NETWORK_INTERFACE,
3387                         KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, KAUTH_ARG(cmd),
3388                         NULL);
3389                     if (error)
3390                               return error;
3391                     break;
3392           case SIOCGIFADDRPREF:
3393                     break;
3394           default:
3395                     return EOPNOTSUPP;
3396           }
3397 
3398           /* sanity checks */
3399           if (data == NULL || ifp == NULL) {
3400                     panic("invalid argument to %s", __func__);
3401                     /*NOTREACHED*/
3402           }
3403 
3404           /* address must be specified on ADD and DELETE */
3405           sa = sstocsa(&ifap->ifap_addr);
3406           if (sa->sa_family != sofamily(so))
3407                     return EINVAL;
3408           if ((any = sockaddr_any(sa)) == NULL || sa->sa_len != any->sa_len)
3409                     return EINVAL;
3410 
3411           sockaddr_externalize(&v.sa, sizeof(v.ss), sa);
3412 
3413           s = pserialize_read_enter();
3414           IFADDR_READER_FOREACH(ifa, ifp) {
3415                     if (ifa->ifa_addr->sa_family != sa->sa_family)
3416                               continue;
3417                     sockaddr_externalize(&u.sa, sizeof(u.ss), ifa->ifa_addr);
3418                     if (sockaddr_cmp(&u.sa, &v.sa) == 0)
3419                               break;
3420           }
3421           if (ifa == NULL) {
3422                     error = EADDRNOTAVAIL;
3423                     goto out;
3424           }
3425 
3426           switch (cmd) {
3427           case SIOCSIFADDRPREF:
3428                     ifa->ifa_preference = ifap->ifap_preference;
3429                     goto out;
3430           case SIOCGIFADDRPREF:
3431                     /* fill in the if_laddrreq structure */
3432                     (void)sockaddr_copy(sstosa(&ifap->ifap_addr),
3433                         sizeof(ifap->ifap_addr), ifa->ifa_addr);
3434                     ifap->ifap_preference = ifa->ifa_preference;
3435                     goto out;
3436           default:
3437                     error = EOPNOTSUPP;
3438           }
3439 out:
3440           pserialize_read_exit(s);
3441           return error;
3442 }
3443 
3444 /*
3445  * Interface ioctls.
3446  */
3447 static int
doifioctl(struct socket * so,u_long cmd,void * data,struct lwp * l)3448 doifioctl(struct socket *so, u_long cmd, void *data, struct lwp *l)
3449 {
3450           struct ifnet *ifp;
3451           struct ifreq *ifr;
3452           int error = 0;
3453           u_long ocmd = cmd;
3454           u_short oif_flags;
3455           struct ifreq ifrb;
3456           struct oifreq *oifr = NULL;
3457           int r;
3458           struct psref psref;
3459           bool do_if43_post = false;
3460           bool do_ifm80_post = false;
3461 
3462           switch (cmd) {
3463           case SIOCGIFCONF:
3464                     return ifconf(cmd, data);
3465           case SIOCINITIFADDR:
3466                     return EPERM;
3467           default:
3468                     MODULE_HOOK_CALL(uipc_syscalls_40_hook, (cmd, data), enosys(),
3469                         error);
3470                     if (error != ENOSYS)
3471                               return error;
3472                     MODULE_HOOK_CALL(uipc_syscalls_50_hook, (l, cmd, data),
3473                         enosys(), error);
3474                     if (error != ENOSYS)
3475                               return error;
3476                     error = 0;
3477                     break;
3478           }
3479 
3480           ifr = data;
3481           /* Pre-conversion */
3482           MODULE_HOOK_CALL(if_cvtcmd_43_hook, (&cmd, ocmd), enosys(), error);
3483           if (cmd != ocmd) {
3484                     oifr = data;
3485                     data = ifr = &ifrb;
3486                     IFREQO2N_43(oifr, ifr);
3487                     do_if43_post = true;
3488           }
3489           MODULE_HOOK_CALL(ifmedia_80_pre_hook, (ifr, &cmd, &do_ifm80_post),
3490               enosys(), error);
3491 
3492           switch (cmd) {
3493           case SIOCIFCREATE:
3494           case SIOCIFDESTROY: {
3495                     const int bound = curlwp_bind();
3496                     if (l != NULL) {
3497                               ifp = if_get(ifr->ifr_name, &psref);
3498                               error = kauth_authorize_network(l->l_cred,
3499                                   KAUTH_NETWORK_INTERFACE,
3500                                   KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp,
3501                                   KAUTH_ARG(cmd), NULL);
3502                               if (ifp != NULL)
3503                                         if_put(ifp, &psref);
3504                               if (error != 0) {
3505                                         curlwp_bindx(bound);
3506                                         return error;
3507                               }
3508                     }
3509                     KERNEL_LOCK_UNLESS_NET_MPSAFE();
3510                     mutex_enter(&if_clone_mtx);
3511                     r = (cmd == SIOCIFCREATE) ?
3512                               if_clone_create(ifr->ifr_name) :
3513                               if_clone_destroy(ifr->ifr_name);
3514                     mutex_exit(&if_clone_mtx);
3515                     KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
3516                     curlwp_bindx(bound);
3517                     return r;
3518               }
3519           case SIOCIFGCLONERS: {
3520                     struct if_clonereq *req = (struct if_clonereq *)data;
3521                     return if_clone_list(req->ifcr_count, req->ifcr_buffer,
3522                         &req->ifcr_total);
3523               }
3524           }
3525 
3526           if ((cmd & IOC_IN) == 0 || IOCPARM_LEN(cmd) < sizeof(ifr->ifr_name))
3527                     return EINVAL;
3528 
3529           const int bound = curlwp_bind();
3530           ifp = if_get(ifr->ifr_name, &psref);
3531           if (ifp == NULL) {
3532                     curlwp_bindx(bound);
3533                     return ENXIO;
3534           }
3535 
3536           switch (cmd) {
3537           case SIOCALIFADDR:
3538           case SIOCDLIFADDR:
3539           case SIOCSIFADDRPREF:
3540           case SIOCSIFFLAGS:
3541           case SIOCSIFCAP:
3542           case SIOCSIFMETRIC:
3543           case SIOCZIFDATA:
3544           case SIOCSIFMTU:
3545           case SIOCSIFPHYADDR:
3546           case SIOCDIFPHYADDR:
3547 #ifdef INET6
3548           case SIOCSIFPHYADDR_IN6:
3549 #endif
3550           case SIOCSLIFPHYADDR:
3551           case SIOCADDMULTI:
3552           case SIOCDELMULTI:
3553           case SIOCSETHERCAP:
3554           case SIOCSIFMEDIA:
3555           case SIOCSDRVSPEC:
3556           case SIOCG80211:
3557           case SIOCS80211:
3558           case SIOCS80211NWID:
3559           case SIOCS80211NWKEY:
3560           case SIOCS80211POWER:
3561           case SIOCS80211BSSID:
3562           case SIOCS80211CHANNEL:
3563           case SIOCSLINKSTR:
3564                     if (l != NULL) {
3565                               error = kauth_authorize_network(l->l_cred,
3566                                   KAUTH_NETWORK_INTERFACE,
3567                                   KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp,
3568                                   KAUTH_ARG(cmd), NULL);
3569                               if (error != 0)
3570                                         goto out;
3571                     }
3572           }
3573 
3574           oif_flags = ifp->if_flags;
3575 
3576           KERNEL_LOCK_UNLESS_IFP_MPSAFE(ifp);
3577           IFNET_LOCK(ifp);
3578 
3579           error = if_ioctl(ifp, cmd, data);
3580           if (error != ENOTTY)
3581                     ;
3582           else if (so->so_proto == NULL)
3583                     error = EOPNOTSUPP;
3584           else {
3585                     KERNEL_LOCK_IF_IFP_MPSAFE(ifp);
3586                     MODULE_HOOK_CALL(if_ifioctl_43_hook,
3587                                    (so, ocmd, cmd, data, l), enosys(), error);
3588                     if (error == ENOSYS)
3589                               error = (*so->so_proto->pr_usrreqs->pr_ioctl)(so,
3590                                   cmd, data, ifp);
3591                     KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp);
3592           }
3593 
3594           if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0) {
3595                     if ((ifp->if_flags & IFF_UP) != 0) {
3596                               const int s = splsoftnet();
3597                               if_up_locked(ifp);
3598                               splx(s);
3599                     }
3600           }
3601 
3602           /* Post-conversion */
3603           if (do_ifm80_post && (error == 0))
3604                     MODULE_HOOK_CALL(ifmedia_80_post_hook, (ifr, cmd),
3605                         enosys(), error);
3606           if (do_if43_post)
3607                     IFREQN2O_43(oifr, ifr);
3608 
3609           IFNET_UNLOCK(ifp);
3610           KERNEL_UNLOCK_UNLESS_IFP_MPSAFE(ifp);
3611 out:
3612           if_put(ifp, &psref);
3613           curlwp_bindx(bound);
3614           return error;
3615 }
3616 
3617 /*
3618  * Return interface configuration
3619  * of system.  List may be used
3620  * in later ioctl's (above) to get
3621  * other information.
3622  *
3623  * Each record is a struct ifreq.  Before the addition of
3624  * sockaddr_storage, the API rule was that sockaddr flavors that did
3625  * not fit would extend beyond the struct ifreq, with the next struct
3626  * ifreq starting sa_len beyond the struct sockaddr.  Because the
3627  * union in struct ifreq includes struct sockaddr_storage, every kind
3628  * of sockaddr must fit.  Thus, there are no longer any overlength
3629  * records.
3630  *
3631  * Records are added to the user buffer if they fit, and ifc_len is
3632  * adjusted to the length that was written.  Thus, the user is only
3633  * assured of getting the complete list if ifc_len on return is at
3634  * least sizeof(struct ifreq) less than it was on entry.
3635  *
3636  * If the user buffer pointer is NULL, this routine copies no data and
3637  * returns the amount of space that would be needed.
3638  *
3639  * Invariants:
3640  * ifrp points to the next part of the user's buffer to be used.  If
3641  * ifrp != NULL, space holds the number of bytes remaining that we may
3642  * write at ifrp.  Otherwise, space holds the number of bytes that
3643  * would have been written had there been adequate space.
3644  */
3645 /*ARGSUSED*/
3646 static int
ifconf(u_long cmd,void * data)3647 ifconf(u_long cmd, void *data)
3648 {
3649           struct ifconf *ifc = (struct ifconf *)data;
3650           struct ifnet *ifp;
3651           struct ifaddr *ifa;
3652           struct ifreq ifr, *ifrp = NULL;
3653           int space = 0, error = 0;
3654           const int sz = (int)sizeof(struct ifreq);
3655           const bool docopy = ifc->ifc_req != NULL;
3656           struct psref psref;
3657 
3658           if (docopy) {
3659                     if (ifc->ifc_len < 0)
3660                               return EINVAL;
3661 
3662                     space = ifc->ifc_len;
3663                     ifrp = ifc->ifc_req;
3664           }
3665           memset(&ifr, 0, sizeof(ifr));
3666 
3667           const int bound = curlwp_bind();
3668           int s = pserialize_read_enter();
3669           IFNET_READER_FOREACH(ifp) {
3670                     psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class);
3671                     pserialize_read_exit(s);
3672 
3673                     (void)strncpy(ifr.ifr_name, ifp->if_xname,
3674                         sizeof(ifr.ifr_name));
3675                     if (ifr.ifr_name[sizeof(ifr.ifr_name) - 1] != '\0') {
3676                               error = ENAMETOOLONG;
3677                               goto release_exit;
3678                     }
3679                     if (IFADDR_READER_EMPTY(ifp)) {
3680                               /* Interface with no addresses - send zero sockaddr. */
3681                               memset(&ifr.ifr_addr, 0, sizeof(ifr.ifr_addr));
3682                               if (!docopy) {
3683                                         space += sz;
3684                                         goto next;
3685                               }
3686                               if (space >= sz) {
3687                                         error = copyout(&ifr, ifrp, sz);
3688                                         if (error != 0)
3689                                                   goto release_exit;
3690                                         ifrp++;
3691                                         space -= sz;
3692                               }
3693                     }
3694 
3695                     s = pserialize_read_enter();
3696                     IFADDR_READER_FOREACH(ifa, ifp) {
3697                               struct sockaddr *sa = ifa->ifa_addr;
3698                               /* all sockaddrs must fit in sockaddr_storage */
3699                               KASSERT(sa->sa_len <= sizeof(ifr.ifr_ifru));
3700 
3701                               if (!docopy) {
3702                                         space += sz;
3703                                         continue;
3704                               }
3705                               memcpy(&ifr.ifr_space, sa, sa->sa_len);
3706                               pserialize_read_exit(s);
3707 
3708                               if (space >= sz) {
3709                                         error = copyout(&ifr, ifrp, sz);
3710                                         if (error != 0)
3711                                                   goto release_exit;
3712                                         ifrp++; space -= sz;
3713                               }
3714                               s = pserialize_read_enter();
3715                     }
3716                     pserialize_read_exit(s);
3717 
3718 next:
3719                     s = pserialize_read_enter();
3720                     psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
3721           }
3722           pserialize_read_exit(s);
3723           curlwp_bindx(bound);
3724 
3725           if (docopy) {
3726                     KASSERT(0 <= space && space <= ifc->ifc_len);
3727                     ifc->ifc_len -= space;
3728           } else {
3729                     KASSERT(space >= 0);
3730                     ifc->ifc_len = space;
3731           }
3732           return 0;
3733 
3734 release_exit:
3735           psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
3736           curlwp_bindx(bound);
3737           return error;
3738 }
3739 
3740 int
ifreq_setaddr(u_long cmd,struct ifreq * ifr,const struct sockaddr * sa)3741 ifreq_setaddr(u_long cmd, struct ifreq *ifr, const struct sockaddr *sa)
3742 {
3743           uint8_t len = sizeof(ifr->ifr_ifru.ifru_space);
3744           struct ifreq ifrb;
3745           struct oifreq *oifr = NULL;
3746           u_long ocmd = cmd;
3747           int hook;
3748 
3749           MODULE_HOOK_CALL(if_cvtcmd_43_hook, (&cmd, ocmd), enosys(), hook);
3750           if (hook != ENOSYS) {
3751                     if (cmd != ocmd) {
3752                               oifr = (struct oifreq *)(void *)ifr;
3753                               ifr = &ifrb;
3754                               IFREQO2N_43(oifr, ifr);
3755                                         len = sizeof(oifr->ifr_addr);
3756                     }
3757           }
3758 
3759           if (len < sa->sa_len)
3760                     return EFBIG;
3761 
3762           memset(&ifr->ifr_addr, 0, len);
3763           sockaddr_copy(&ifr->ifr_addr, len, sa);
3764 
3765           if (cmd != ocmd)
3766                     IFREQN2O_43(oifr, ifr);
3767           return 0;
3768 }
3769 
3770 /*
3771  * wrapper function for the drivers which doesn't have if_transmit().
3772  */
3773 static int
if_transmit(struct ifnet * ifp,struct mbuf * m)3774 if_transmit(struct ifnet *ifp, struct mbuf *m)
3775 {
3776           int error;
3777           size_t pktlen = m->m_pkthdr.len;
3778           bool mcast = (m->m_flags & M_MCAST) != 0;
3779 
3780           const int s = splnet();
3781 
3782           IFQ_ENQUEUE(&ifp->if_snd, m, error);
3783           if (error != 0) {
3784                     /* mbuf is already freed */
3785                     goto out;
3786           }
3787 
3788           net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
3789           if_statadd_ref(ifp, nsr, if_obytes, pktlen);
3790           if (mcast)
3791                     if_statinc_ref(ifp, nsr, if_omcasts);
3792           IF_STAT_PUTREF(ifp);
3793 
3794           if ((ifp->if_flags & IFF_OACTIVE) == 0)
3795                     if_start_lock(ifp);
3796 out:
3797           splx(s);
3798 
3799           return error;
3800 }
3801 
3802 int
if_transmit_lock(struct ifnet * ifp,struct mbuf * m)3803 if_transmit_lock(struct ifnet *ifp, struct mbuf *m)
3804 {
3805           int error;
3806 
3807           kmsan_check_mbuf(m);
3808 
3809 #ifdef ALTQ
3810           KERNEL_LOCK(1, NULL);
3811           if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
3812                     error = if_transmit(ifp, m);
3813                     KERNEL_UNLOCK_ONE(NULL);
3814           } else {
3815                     KERNEL_UNLOCK_ONE(NULL);
3816                     error = (*ifp->if_transmit)(ifp, m);
3817                     /* mbuf is already freed */
3818           }
3819 #else /* !ALTQ */
3820           error = (*ifp->if_transmit)(ifp, m);
3821           /* mbuf is already freed */
3822 #endif /* !ALTQ */
3823 
3824           return error;
3825 }
3826 
3827 /*
3828  * Queue message on interface, and start output if interface
3829  * not yet active.
3830  */
3831 int
ifq_enqueue(struct ifnet * ifp,struct mbuf * m)3832 ifq_enqueue(struct ifnet *ifp, struct mbuf *m)
3833 {
3834 
3835           return if_transmit_lock(ifp, m);
3836 }
3837 
3838 /*
3839  * Queue message on interface, possibly using a second fast queue
3840  */
3841 int
ifq_enqueue2(struct ifnet * ifp,struct ifqueue * ifq,struct mbuf * m)3842 ifq_enqueue2(struct ifnet *ifp, struct ifqueue *ifq, struct mbuf *m)
3843 {
3844           int error = 0;
3845 
3846           if (ifq != NULL
3847 #ifdef ALTQ
3848               && ALTQ_IS_ENABLED(&ifp->if_snd) == 0
3849 #endif
3850               ) {
3851                     if (IF_QFULL(ifq)) {
3852                               IF_DROP(&ifp->if_snd);
3853                               m_freem(m);
3854                               if (error == 0)
3855                                         error = ENOBUFS;
3856                     } else
3857                               IF_ENQUEUE(ifq, m);
3858           } else
3859                     IFQ_ENQUEUE(&ifp->if_snd, m, error);
3860           if (error != 0) {
3861                     if_statinc(ifp, if_oerrors);
3862                     return error;
3863           }
3864           return 0;
3865 }
3866 
3867 int
if_addr_init(ifnet_t * ifp,struct ifaddr * ifa,const bool src)3868 if_addr_init(ifnet_t *ifp, struct ifaddr *ifa, const bool src)
3869 {
3870           int rc;
3871 
3872           KASSERT(IFNET_LOCKED(ifp));
3873           if (ifp->if_initaddr != NULL)
3874                     rc = (*ifp->if_initaddr)(ifp, ifa, src);
3875           else if (src || (rc = if_ioctl(ifp, SIOCSIFDSTADDR, ifa)) == ENOTTY)
3876                     rc = if_ioctl(ifp, SIOCINITIFADDR, ifa);
3877 
3878           return rc;
3879 }
3880 
3881 int
if_do_dad(struct ifnet * ifp)3882 if_do_dad(struct ifnet *ifp)
3883 {
3884           if ((ifp->if_flags & IFF_LOOPBACK) != 0)
3885                     return 0;
3886 
3887           switch (ifp->if_type) {
3888           case IFT_FAITH:
3889                     /*
3890                      * These interfaces do not have the IFF_LOOPBACK flag,
3891                      * but loop packets back.  We do not have to do DAD on such
3892                      * interfaces.  We should even omit it, because loop-backed
3893                      * responses would confuse the DAD procedure.
3894                      */
3895                     return 0;
3896           default:
3897                     /*
3898                      * Our DAD routine requires the interface up and running.
3899                      * However, some interfaces can be up before the RUNNING
3900                      * status.  Additionally, users may try to assign addresses
3901                      * before the interface becomes up (or running).
3902                      * We simply skip DAD in such a case as a work around.
3903                      * XXX: we should rather mark "tentative" on such addresses,
3904                      * and do DAD after the interface becomes ready.
3905                      */
3906                     if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
3907                         (IFF_UP | IFF_RUNNING))
3908                               return 0;
3909 
3910                     return 1;
3911           }
3912 }
3913 
3914 /*
3915  * if_flags_set(ifp, flags)
3916  *
3917  *        Ask ifp to change ifp->if_flags to flags, as if with the
3918  *        SIOCSIFFLAGS ioctl command.
3919  *
3920  *        May sleep.  Caller must hold ifp->if_ioctl_lock, a.k.a
3921  *        IFNET_LOCK.
3922  */
3923 int
if_flags_set(ifnet_t * ifp,const u_short flags)3924 if_flags_set(ifnet_t *ifp, const u_short flags)
3925 {
3926           int rc;
3927 
3928           KASSERT(IFNET_LOCKED(ifp));
3929 
3930           if (ifp->if_setflags != NULL)
3931                     rc = (*ifp->if_setflags)(ifp, flags);
3932           else {
3933                     u_short cantflags, chgdflags;
3934                     struct ifreq ifr;
3935 
3936                     chgdflags = ifp->if_flags ^ flags;
3937                     cantflags = chgdflags & IFF_CANTCHANGE;
3938 
3939                     if (cantflags != 0)
3940                               ifp->if_flags ^= cantflags;
3941 
3942                     /*
3943                      * Traditionally, we do not call if_ioctl after
3944                      * setting/clearing only IFF_PROMISC if the interface
3945                      * isn't IFF_UP.  Uphold that tradition.
3946                      */
3947                     if (chgdflags == IFF_PROMISC && (ifp->if_flags & IFF_UP) == 0)
3948                               return 0;
3949 
3950                     memset(&ifr, 0, sizeof(ifr));
3951 
3952                     ifr.ifr_flags = flags & ~IFF_CANTCHANGE;
3953                     rc = if_ioctl(ifp, SIOCSIFFLAGS, &ifr);
3954 
3955                     if (rc != 0 && cantflags != 0)
3956                               ifp->if_flags ^= cantflags;
3957           }
3958 
3959           return rc;
3960 }
3961 
3962 /*
3963  * if_mcast_op(ifp, cmd, sa)
3964  *
3965  *        Apply a multicast command, SIOCADDMULTI/SIOCDELMULTI, to the
3966  *        interface.  Returns 0 on success, nonzero errno(3) number on
3967  *        failure.
3968  *
3969  *        May sleep.
3970  *
3971  *        Use this, not if_ioctl, for the multicast commands.
3972  */
3973 int
if_mcast_op(ifnet_t * ifp,const unsigned long cmd,const struct sockaddr * sa)3974 if_mcast_op(ifnet_t *ifp, const unsigned long cmd, const struct sockaddr *sa)
3975 {
3976           int rc;
3977           struct ifreq ifr;
3978 
3979           switch (cmd) {
3980           case SIOCADDMULTI:
3981           case SIOCDELMULTI:
3982                     break;
3983           default:
3984                     panic("invalid ifnet multicast command: 0x%lx", cmd);
3985           }
3986 
3987           ifreq_setaddr(cmd, &ifr, sa);
3988           rc = if_ioctl(ifp, cmd, &ifr);
3989 
3990           return rc;
3991 }
3992 
3993 static void
sysctl_sndq_setup(struct sysctllog ** clog,const char * ifname,struct ifaltq * ifq)3994 sysctl_sndq_setup(struct sysctllog **clog, const char *ifname,
3995     struct ifaltq *ifq)
3996 {
3997           const struct sysctlnode *cnode, *rnode;
3998 
3999           if (sysctl_createv(clog, 0, NULL, &rnode,
4000                            CTLFLAG_PERMANENT,
4001                            CTLTYPE_NODE, "interfaces",
4002                            SYSCTL_DESCR("Per-interface controls"),
4003                            NULL, 0, NULL, 0,
4004                            CTL_NET, CTL_CREATE, CTL_EOL) != 0)
4005                     goto bad;
4006 
4007           if (sysctl_createv(clog, 0, &rnode, &rnode,
4008                            CTLFLAG_PERMANENT,
4009                            CTLTYPE_NODE, ifname,
4010                            SYSCTL_DESCR("Interface controls"),
4011                            NULL, 0, NULL, 0,
4012                            CTL_CREATE, CTL_EOL) != 0)
4013                     goto bad;
4014 
4015           if (sysctl_createv(clog, 0, &rnode, &rnode,
4016                            CTLFLAG_PERMANENT,
4017                            CTLTYPE_NODE, "sndq",
4018                            SYSCTL_DESCR("Interface output queue controls"),
4019                            NULL, 0, NULL, 0,
4020                            CTL_CREATE, CTL_EOL) != 0)
4021                     goto bad;
4022 
4023           if (sysctl_createv(clog, 0, &rnode, &cnode,
4024                            CTLFLAG_PERMANENT,
4025                            CTLTYPE_INT, "len",
4026                            SYSCTL_DESCR("Current output queue length"),
4027                            NULL, 0, &ifq->ifq_len, 0,
4028                            CTL_CREATE, CTL_EOL) != 0)
4029                     goto bad;
4030 
4031           if (sysctl_createv(clog, 0, &rnode, &cnode,
4032                            CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
4033                            CTLTYPE_INT, "maxlen",
4034                            SYSCTL_DESCR("Maximum allowed output queue length"),
4035                            NULL, 0, &ifq->ifq_maxlen, 0,
4036                            CTL_CREATE, CTL_EOL) != 0)
4037                     goto bad;
4038 
4039           if (sysctl_createv(clog, 0, &rnode, &cnode,
4040                            CTLFLAG_PERMANENT,
4041                            CTLTYPE_QUAD, "drops",
4042                            SYSCTL_DESCR("Packets dropped due to full output queue"),
4043                            NULL, 0, &ifq->ifq_drops, 0,
4044                            CTL_CREATE, CTL_EOL) != 0)
4045                     goto bad;
4046 
4047           return;
4048 bad:
4049           printf("%s: could not attach sysctl nodes\n", ifname);
4050           return;
4051 }
4052 
4053 static int
if_sdl_sysctl(SYSCTLFN_ARGS)4054 if_sdl_sysctl(SYSCTLFN_ARGS)
4055 {
4056           struct ifnet *ifp;
4057           const struct sockaddr_dl *sdl;
4058           struct psref psref;
4059           int error = 0;
4060 
4061           if (namelen != 1)
4062                     return EINVAL;
4063 
4064           const int bound = curlwp_bind();
4065           ifp = if_get_byindex(name[0], &psref);
4066           if (ifp == NULL) {
4067                     error = ENODEV;
4068                     goto out0;
4069           }
4070 
4071           sdl = ifp->if_sadl;
4072           if (sdl == NULL) {
4073                     *oldlenp = 0;
4074                     goto out1;
4075           }
4076 
4077           if (oldp == NULL) {
4078                     *oldlenp = sdl->sdl_alen;
4079                     goto out1;
4080           }
4081 
4082           if (*oldlenp >= sdl->sdl_alen)
4083                     *oldlenp = sdl->sdl_alen;
4084           error = sysctl_copyout(l, &sdl->sdl_data[sdl->sdl_nlen],
4085               oldp, *oldlenp);
4086 out1:
4087           if_put(ifp, &psref);
4088 out0:
4089           curlwp_bindx(bound);
4090           return error;
4091 }
4092 
4093 static void
if_sysctl_setup(struct sysctllog ** clog)4094 if_sysctl_setup(struct sysctllog **clog)
4095 {
4096           const struct sysctlnode *rnode = NULL;
4097 
4098           sysctl_createv(clog, 0, NULL, &rnode,
4099                            CTLFLAG_PERMANENT,
4100                            CTLTYPE_NODE, "sdl",
4101                            SYSCTL_DESCR("Get active link-layer address"),
4102                            if_sdl_sysctl, 0, NULL, 0,
4103                            CTL_NET, CTL_CREATE, CTL_EOL);
4104 }
4105