xref: /freebsd-13-stable/sys/net/if.c (revision 6b81815307b00d37b81f71d68a51f67c72a2edb0)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1986, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)if.c	8.5 (Berkeley) 1/9/95
32  */
33 
34 #include "opt_bpf.h"
35 #include "opt_inet6.h"
36 #include "opt_inet.h"
37 
38 #include <sys/param.h>
39 #include <sys/conf.h>
40 #include <sys/eventhandler.h>
41 #include <sys/malloc.h>
42 #include <sys/domainset.h>
43 #include <sys/sbuf.h>
44 #include <sys/bus.h>
45 #include <sys/epoch.h>
46 #include <sys/mbuf.h>
47 #include <sys/systm.h>
48 #include <sys/priv.h>
49 #include <sys/proc.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/protosw.h>
53 #include <sys/kernel.h>
54 #include <sys/lock.h>
55 #include <sys/refcount.h>
56 #include <sys/module.h>
57 #include <sys/rwlock.h>
58 #include <sys/sockio.h>
59 #include <sys/syslog.h>
60 #include <sys/sysctl.h>
61 #include <sys/sysent.h>
62 #include <sys/taskqueue.h>
63 #include <sys/domain.h>
64 #include <sys/jail.h>
65 #include <sys/priv.h>
66 
67 #include <machine/stdarg.h>
68 #include <vm/uma.h>
69 
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_arp.h>
74 #include <net/if_clone.h>
75 #include <net/if_dl.h>
76 #include <net/if_types.h>
77 #include <net/if_var.h>
78 #include <net/if_media.h>
79 #include <net/if_vlan_var.h>
80 #include <net/radix.h>
81 #include <net/route.h>
82 #include <net/route/route_ctl.h>
83 #include <net/vnet.h>
84 
85 #if defined(INET) || defined(INET6)
86 #include <net/ethernet.h>
87 #include <netinet/in.h>
88 #include <netinet/in_var.h>
89 #include <netinet/ip.h>
90 #include <netinet/ip_carp.h>
91 #ifdef INET
92 #include <net/debugnet.h>
93 #include <netinet/if_ether.h>
94 #endif /* INET */
95 #ifdef INET6
96 #include <netinet6/in6_var.h>
97 #include <netinet6/in6_ifattach.h>
98 #endif /* INET6 */
99 #endif /* INET || INET6 */
100 
101 #include <security/mac/mac_framework.h>
102 
103 /*
104  * Consumers of struct ifreq such as tcpdump assume no pad between ifr_name
105  * and ifr_ifru when it is used in SIOCGIFCONF.
106  */
107 _Static_assert(sizeof(((struct ifreq *)0)->ifr_name) ==
108     offsetof(struct ifreq, ifr_ifru), "gap between ifr_name and ifr_ifru");
109 
110 __read_mostly epoch_t net_epoch_preempt;
111 #ifdef COMPAT_FREEBSD32
112 #include <sys/mount.h>
113 #include <compat/freebsd32/freebsd32.h>
114 
115 struct ifreq_buffer32 {
116 	uint32_t	length;		/* (size_t) */
117 	uint32_t	buffer;		/* (void *) */
118 };
119 
120 /*
121  * Interface request structure used for socket
122  * ioctl's.  All interface ioctl's must have parameter
123  * definitions which begin with ifr_name.  The
124  * remainder may be interface specific.
125  */
126 struct ifreq32 {
127 	char	ifr_name[IFNAMSIZ];		/* if name, e.g. "en0" */
128 	union {
129 		struct sockaddr	ifru_addr;
130 		struct sockaddr	ifru_dstaddr;
131 		struct sockaddr	ifru_broadaddr;
132 		struct ifreq_buffer32 ifru_buffer;
133 		short		ifru_flags[2];
134 		short		ifru_index;
135 		int		ifru_jid;
136 		int		ifru_metric;
137 		int		ifru_mtu;
138 		int		ifru_phys;
139 		int		ifru_media;
140 		uint32_t	ifru_data;
141 		int		ifru_cap[2];
142 		u_int		ifru_fib;
143 		u_char		ifru_vlan_pcp;
144 	} ifr_ifru;
145 };
146 CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32));
147 CTASSERT(__offsetof(struct ifreq, ifr_ifru) ==
148     __offsetof(struct ifreq32, ifr_ifru));
149 
150 struct ifconf32 {
151 	int32_t	ifc_len;
152 	union {
153 		uint32_t	ifcu_buf;
154 		uint32_t	ifcu_req;
155 	} ifc_ifcu;
156 };
157 #define	SIOCGIFCONF32	_IOWR('i', 36, struct ifconf32)
158 
159 struct ifdrv32 {
160 	char		ifd_name[IFNAMSIZ];
161 	uint32_t	ifd_cmd;
162 	uint32_t	ifd_len;
163 	uint32_t	ifd_data;
164 };
165 #define SIOCSDRVSPEC32	_IOC_NEWTYPE(SIOCSDRVSPEC, struct ifdrv32)
166 #define SIOCGDRVSPEC32	_IOC_NEWTYPE(SIOCGDRVSPEC, struct ifdrv32)
167 
168 struct ifgroupreq32 {
169 	char	ifgr_name[IFNAMSIZ];
170 	u_int	ifgr_len;
171 	union {
172 		char		ifgru_group[IFNAMSIZ];
173 		uint32_t	ifgru_groups;
174 	} ifgr_ifgru;
175 };
176 #define	SIOCAIFGROUP32	_IOC_NEWTYPE(SIOCAIFGROUP, struct ifgroupreq32)
177 #define	SIOCGIFGROUP32	_IOC_NEWTYPE(SIOCGIFGROUP, struct ifgroupreq32)
178 #define	SIOCDIFGROUP32	_IOC_NEWTYPE(SIOCDIFGROUP, struct ifgroupreq32)
179 #define	SIOCGIFGMEMB32	_IOC_NEWTYPE(SIOCGIFGMEMB, struct ifgroupreq32)
180 
181 struct ifmediareq32 {
182 	char		ifm_name[IFNAMSIZ];
183 	int		ifm_current;
184 	int		ifm_mask;
185 	int		ifm_status;
186 	int		ifm_active;
187 	int		ifm_count;
188 	uint32_t	ifm_ulist;	/* (int *) */
189 };
190 #define	SIOCGIFMEDIA32	_IOC_NEWTYPE(SIOCGIFMEDIA, struct ifmediareq32)
191 #define	SIOCGIFXMEDIA32	_IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32)
192 #endif /* COMPAT_FREEBSD32 */
193 
194 union ifreq_union {
195 	struct ifreq	ifr;
196 #ifdef COMPAT_FREEBSD32
197 	struct ifreq32	ifr32;
198 #endif
199 };
200 
201 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
202     "Link layers");
203 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
204     "Generic link-management");
205 
206 SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
207     &ifqmaxlen, 0, "max send queue size");
208 
209 /* Log link state change events */
210 static int log_link_state_change = 1;
211 
212 SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
213 	&log_link_state_change, 0,
214 	"log interface link state change events");
215 
216 /* Log promiscuous mode change events */
217 static int log_promisc_mode_change = 1;
218 
219 SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
220 	&log_promisc_mode_change, 1,
221 	"log promiscuous mode change events");
222 
223 /* Interface description */
224 static unsigned int ifdescr_maxlen = 1024;
225 SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
226 	&ifdescr_maxlen, 0,
227 	"administrative maximum length for interface description");
228 
229 static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
230 
231 /* global sx for non-critical path ifdescr */
232 static struct sx ifdescr_sx;
233 SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");
234 
235 void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
236 void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
237 /* These are external hooks for CARP. */
238 void	(*carp_linkstate_p)(struct ifnet *ifp);
239 void	(*carp_demote_adj_p)(int, char *);
240 int	(*carp_master_p)(struct ifaddr *);
241 #if defined(INET) || defined(INET6)
242 int	(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
243 int	(*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
244     const struct sockaddr *sa);
245 int	(*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
246 int	(*carp_attach_p)(struct ifaddr *, int);
247 void	(*carp_detach_p)(struct ifaddr *, bool);
248 #endif
249 #ifdef INET
250 int	(*carp_iamatch_p)(struct ifaddr *, uint8_t **);
251 #endif
252 #ifdef INET6
253 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
254 caddr_t	(*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
255     const struct in6_addr *taddr);
256 #endif
257 
258 struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
259 
260 /*
261  * XXX: Style; these should be sorted alphabetically, and unprototyped
262  * static functions should be prototyped. Currently they are sorted by
263  * declaration order.
264  */
265 static void	if_attachdomain(void *);
266 static void	if_attachdomain1(struct ifnet *);
267 static int	ifconf(u_long, caddr_t);
268 static void	*if_grow(void);
269 static void	if_input_default(struct ifnet *, struct mbuf *);
270 static int	if_requestencap_default(struct ifnet *, struct if_encap_req *);
271 static void	if_route(struct ifnet *, int flag, int fam);
272 static int	if_setflag(struct ifnet *, int, int, int *, int);
273 static int	if_transmit(struct ifnet *ifp, struct mbuf *m);
274 static void	if_unroute(struct ifnet *, int flag, int fam);
275 static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
276 static void	do_link_state_change(void *, int);
277 static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
278 static int	if_getgroupmembers(struct ifgroupreq *);
279 static void	if_delgroups(struct ifnet *);
280 static void	if_attach_internal(struct ifnet *, bool);
281 static void	if_detach_internal(struct ifnet *, bool);
282 static void	if_siocaddmulti(void *, int);
283 static void	if_link_ifnet(struct ifnet *);
284 static bool	if_unlink_ifnet(struct ifnet *, bool);
285 #ifdef VIMAGE
286 static void	if_vmove(struct ifnet *, struct vnet *);
287 #endif
288 
289 #ifdef INET6
290 /*
291  * XXX: declare here to avoid to include many inet6 related files..
292  * should be more generalized?
293  */
294 extern void	nd6_setmtu(struct ifnet *);
295 #endif
296 
297 /* ipsec helper hooks */
298 VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
299 VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
300 
301 VNET_DEFINE(int, if_index);
302 int	ifqmaxlen = IFQ_MAXLEN;
303 VNET_DEFINE(struct ifnethead, ifnet);	/* depend on static init XXX */
304 VNET_DEFINE(struct ifgrouphead, ifg_head);
305 
306 VNET_DEFINE_STATIC(int, if_indexlim) = 8;
307 
308 /* Table of ifnet by index. */
309 VNET_DEFINE(struct ifnet **, ifindex_table);
310 
311 #define	V_if_indexlim		VNET(if_indexlim)
312 #define	V_ifindex_table		VNET(ifindex_table)
313 
314 /*
315  * The global network interface list (V_ifnet) and related state (such as
316  * if_index, if_indexlim, and ifindex_table) are protected by an sxlock.
317  * This may be acquired to stabilise the list, or we may rely on NET_EPOCH.
318  */
319 struct sx ifnet_sxlock;
320 SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE);
321 
322 struct sx ifnet_detach_sxlock;
323 SX_SYSINIT_FLAGS(ifnet_detach, &ifnet_detach_sxlock, "ifnet_detach_sx",
324     SX_RECURSE);
325 
326 #ifdef VIMAGE
327 #define	VNET_IS_SHUTTING_DOWN(_vnet)					\
328     ((_vnet)->vnet_shutdown && (_vnet)->vnet_state < SI_SUB_VNET_DONE)
329 #endif
330 
331 static	if_com_alloc_t *if_com_alloc[256];
332 static	if_com_free_t *if_com_free[256];
333 
334 static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
335 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
336 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
337 
338 struct ifnet *
ifnet_byindex(u_short idx)339 ifnet_byindex(u_short idx)
340 {
341 
342 	if (__predict_false(idx > V_if_index))
343 		return (NULL);
344 
345 	return (V_ifindex_table[idx]);
346 }
347 
348 struct ifnet *
ifnet_byindex_ref(u_short idx)349 ifnet_byindex_ref(u_short idx)
350 {
351 	struct ifnet *ifp;
352 
353 	NET_EPOCH_ASSERT();
354 
355 	ifp = ifnet_byindex(idx);
356 	if (ifp == NULL || (ifp->if_flags & IFF_DYING))
357 		return (NULL);
358 	if (!if_try_ref(ifp))
359 		return (NULL);
360 	return (ifp);
361 }
362 
363 /*
364  * Allocate an ifindex array entry; return 0 on success or an error on
365  * failure.
366  */
367 static u_short
ifindex_alloc(void ** old)368 ifindex_alloc(void **old)
369 {
370 	u_short idx;
371 
372 	IFNET_WLOCK_ASSERT();
373 	/*
374 	 * Try to find an empty slot below V_if_index.  If we fail, take the
375 	 * next slot.
376 	 */
377 	for (idx = 1; idx <= V_if_index; idx++) {
378 		if (V_ifindex_table[idx] == NULL)
379 			break;
380 	}
381 
382 	/* Catch if_index overflow. */
383 	if (idx >= V_if_indexlim) {
384 		*old = if_grow();
385 		return (USHRT_MAX);
386 	}
387 	if (idx > V_if_index)
388 		V_if_index = idx;
389 	return (idx);
390 }
391 
392 static void
ifindex_free(u_short idx)393 ifindex_free(u_short idx)
394 {
395 
396 	IFNET_WLOCK_ASSERT();
397 
398 	V_ifindex_table[idx] = NULL;
399 	while (V_if_index > 0 &&
400 	    V_ifindex_table[V_if_index] == NULL)
401 		V_if_index--;
402 }
403 
404 static void
ifnet_setbyindex(u_short idx,struct ifnet * ifp)405 ifnet_setbyindex(u_short idx, struct ifnet *ifp)
406 {
407 
408 	ifp->if_index = idx;
409 	V_ifindex_table[idx] = ifp;
410 }
411 
412 struct ifaddr *
ifaddr_byindex(u_short idx)413 ifaddr_byindex(u_short idx)
414 {
415 	struct ifnet *ifp;
416 	struct ifaddr *ifa = NULL;
417 
418 	NET_EPOCH_ASSERT();
419 
420 	ifp = ifnet_byindex(idx);
421 	if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
422 		ifa_ref(ifa);
423 	return (ifa);
424 }
425 
426 /*
427  * Network interface utility routines.
428  *
429  * Routines with ifa_ifwith* names take sockaddr *'s as
430  * parameters.
431  */
432 
433 static void
vnet_if_init(const void * unused __unused)434 vnet_if_init(const void *unused __unused)
435 {
436 	void *old;
437 
438 	CK_STAILQ_INIT(&V_ifnet);
439 	CK_STAILQ_INIT(&V_ifg_head);
440 	IFNET_WLOCK();
441 	old = if_grow();				/* create initial table */
442 	IFNET_WUNLOCK();
443 	epoch_wait_preempt(net_epoch_preempt);
444 	free(old, M_IFNET);
445 	vnet_if_clone_init();
446 }
447 VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
448     NULL);
449 
450 #ifdef VIMAGE
451 static void
vnet_if_uninit(const void * unused __unused)452 vnet_if_uninit(const void *unused __unused)
453 {
454 
455 	VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p "
456 	    "not empty", __func__, __LINE__, &V_ifnet));
457 	VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p "
458 	    "not empty", __func__, __LINE__, &V_ifg_head));
459 
460 	free((caddr_t)V_ifindex_table, M_IFNET);
461 }
462 VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
463     vnet_if_uninit, NULL);
464 #endif
465 
466 static void
if_link_ifnet(struct ifnet * ifp)467 if_link_ifnet(struct ifnet *ifp)
468 {
469 
470 	IFNET_WLOCK();
471 	CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
472 #ifdef VIMAGE
473 	curvnet->vnet_ifcnt++;
474 #endif
475 	IFNET_WUNLOCK();
476 }
477 
478 static bool
if_unlink_ifnet(struct ifnet * ifp,bool vmove)479 if_unlink_ifnet(struct ifnet *ifp, bool vmove)
480 {
481 	struct ifnet *iter;
482 	int found = 0;
483 
484 	IFNET_WLOCK();
485 	CK_STAILQ_FOREACH(iter, &V_ifnet, if_link)
486 		if (iter == ifp) {
487 			CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link);
488 			if (!vmove)
489 				ifp->if_flags |= IFF_DYING;
490 			found = 1;
491 			break;
492 		}
493 #ifdef VIMAGE
494 	curvnet->vnet_ifcnt--;
495 #endif
496 	IFNET_WUNLOCK();
497 
498 	return (found);
499 }
500 
501 #ifdef VIMAGE
502 static void
vnet_if_return(const void * unused __unused)503 vnet_if_return(const void *unused __unused)
504 {
505 	struct ifnet *ifp, *nifp;
506 	struct ifnet **pending;
507 	int found, i;
508 
509 	i = 0;
510 
511 	/*
512 	 * We need to protect our access to the V_ifnet tailq. Ordinarily we'd
513 	 * enter NET_EPOCH, but that's not possible, because if_vmove() calls
514 	 * if_detach_internal(), which waits for NET_EPOCH callbacks to
515 	 * complete. We can't do that from within NET_EPOCH.
516 	 *
517 	 * However, we can also use the IFNET_xLOCK, which is the V_ifnet
518 	 * read/write lock. We cannot hold the lock as we call if_vmove()
519 	 * though, as that presents LOR w.r.t ifnet_sx, in_multi_sx and iflib
520 	 * ctx lock.
521 	 */
522 	IFNET_WLOCK();
523 
524 	pending = malloc(sizeof(struct ifnet *) * curvnet->vnet_ifcnt,
525 	    M_IFNET, M_WAITOK | M_ZERO);
526 
527 	/* Return all inherited interfaces to their parent vnets. */
528 	CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
529 		if (ifp->if_home_vnet != ifp->if_vnet) {
530 			found = if_unlink_ifnet(ifp, true);
531 			MPASS(found);
532 
533 			pending[i++] = ifp;
534 		}
535 	}
536 	IFNET_WUNLOCK();
537 
538 	for (int j = 0; j < i; j++) {
539 		sx_xlock(&ifnet_detach_sxlock);
540 		if_vmove(pending[j], pending[j]->if_home_vnet);
541 		sx_xunlock(&ifnet_detach_sxlock);
542 	}
543 
544 	free(pending, M_IFNET);
545 }
546 VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
547     vnet_if_return, NULL);
548 #endif
549 
550 static void *
if_grow(void)551 if_grow(void)
552 {
553 	int oldlim;
554 	u_int n;
555 	struct ifnet **e;
556 	void *old;
557 
558 	old = NULL;
559 	IFNET_WLOCK_ASSERT();
560 	oldlim = V_if_indexlim;
561 	IFNET_WUNLOCK();
562 	n = (oldlim << 1) * sizeof(*e);
563 	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
564 	IFNET_WLOCK();
565 	if (V_if_indexlim != oldlim) {
566 		free(e, M_IFNET);
567 		return (NULL);
568 	}
569 	if (V_ifindex_table != NULL) {
570 		memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
571 		old = V_ifindex_table;
572 	}
573 	V_if_indexlim <<= 1;
574 	V_ifindex_table = e;
575 	return (old);
576 }
577 
578 /*
579  * Allocate a struct ifnet and an index for an interface.  A layer 2
580  * common structure will also be allocated if an allocation routine is
581  * registered for the passed type.
582  */
583 struct ifnet *
if_alloc_domain(u_char type,int numa_domain)584 if_alloc_domain(u_char type, int numa_domain)
585 {
586 	struct ifnet *ifp;
587 	u_short idx;
588 	void *old;
589 
590 	KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large"));
591 	if (numa_domain == IF_NODOM)
592 		ifp = malloc(sizeof(struct ifnet), M_IFNET,
593 		    M_WAITOK | M_ZERO);
594 	else
595 		ifp = malloc_domainset(sizeof(struct ifnet), M_IFNET,
596 		    DOMAINSET_PREF(numa_domain), M_WAITOK | M_ZERO);
597 	ifp->if_type = type;
598 	ifp->if_alloctype = type;
599 	ifp->if_numa_domain = numa_domain;
600 #ifdef VIMAGE
601 	ifp->if_vnet = curvnet;
602 #endif
603 	if (if_com_alloc[type] != NULL) {
604 		ifp->if_l2com = if_com_alloc[type](type, ifp);
605 		KASSERT(ifp->if_l2com, ("%s: if_com_alloc[%u] failed", __func__,
606 		    type));
607 	}
608 
609 	IF_ADDR_LOCK_INIT(ifp);
610 	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
611 	TASK_INIT(&ifp->if_addmultitask, 0, if_siocaddmulti, ifp);
612 	ifp->if_afdata_initialized = 0;
613 	IF_AFDATA_LOCK_INIT(ifp);
614 	CK_STAILQ_INIT(&ifp->if_addrhead);
615 	CK_STAILQ_INIT(&ifp->if_multiaddrs);
616 	CK_STAILQ_INIT(&ifp->if_groups);
617 #ifdef MAC
618 	mac_ifnet_init(ifp);
619 #endif
620 	ifq_init(&ifp->if_snd, ifp);
621 
622 	refcount_init(&ifp->if_refcount, 1);	/* Index reference. */
623 	for (int i = 0; i < IFCOUNTERS; i++)
624 		ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
625 	ifp->if_get_counter = if_get_counter_default;
626 	ifp->if_pcp = IFNET_PCP_NONE;
627 
628 restart:
629 	IFNET_WLOCK();
630 	idx = ifindex_alloc(&old);
631 	if (__predict_false(idx == USHRT_MAX)) {
632 		IFNET_WUNLOCK();
633 		epoch_wait_preempt(net_epoch_preempt);
634 		free(old, M_IFNET);
635 		goto restart;
636 	}
637 	ifnet_setbyindex(idx, ifp);
638 	IFNET_WUNLOCK();
639 
640 	return (ifp);
641 }
642 
643 struct ifnet *
if_alloc_dev(u_char type,device_t dev)644 if_alloc_dev(u_char type, device_t dev)
645 {
646 	int numa_domain;
647 
648 	if (dev == NULL || bus_get_domain(dev, &numa_domain) != 0)
649 		return (if_alloc_domain(type, IF_NODOM));
650 	return (if_alloc_domain(type, numa_domain));
651 }
652 
653 struct ifnet *
if_alloc(u_char type)654 if_alloc(u_char type)
655 {
656 
657 	return (if_alloc_domain(type, IF_NODOM));
658 }
659 /*
660  * Do the actual work of freeing a struct ifnet, and layer 2 common
661  * structure.  This call is made when the last reference to an
662  * interface is released.
663  */
664 static void
if_free_internal(struct ifnet * ifp)665 if_free_internal(struct ifnet *ifp)
666 {
667 
668 	KASSERT((ifp->if_flags & IFF_DYING),
669 	    ("if_free_internal: interface not dying"));
670 
671 	if (if_com_free[ifp->if_alloctype] != NULL)
672 		if_com_free[ifp->if_alloctype](ifp->if_l2com,
673 		    ifp->if_alloctype);
674 
675 #ifdef MAC
676 	mac_ifnet_destroy(ifp);
677 #endif /* MAC */
678 	IF_AFDATA_DESTROY(ifp);
679 	IF_ADDR_LOCK_DESTROY(ifp);
680 	ifq_delete(&ifp->if_snd);
681 
682 	for (int i = 0; i < IFCOUNTERS; i++)
683 		counter_u64_free(ifp->if_counters[i]);
684 
685 	if_freedescr(ifp->if_description);
686 	free(ifp->if_hw_addr, M_IFADDR);
687 	free(ifp, M_IFNET);
688 }
689 
690 static void
if_destroy(epoch_context_t ctx)691 if_destroy(epoch_context_t ctx)
692 {
693 	struct ifnet *ifp;
694 
695 	ifp = __containerof(ctx, struct ifnet, if_epoch_ctx);
696 	if_free_internal(ifp);
697 }
698 
699 /*
700  * Deregister an interface and free the associated storage.
701  */
702 void
if_free(struct ifnet * ifp)703 if_free(struct ifnet *ifp)
704 {
705 
706 	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
707 
708 	CURVNET_SET_QUIET(ifp->if_vnet);
709 	IFNET_WLOCK();
710 	KASSERT(ifp == ifnet_byindex(ifp->if_index),
711 	    ("%s: freeing unallocated ifnet", ifp->if_xname));
712 
713 	ifindex_free(ifp->if_index);
714 	IFNET_WUNLOCK();
715 
716 	if (refcount_release(&ifp->if_refcount))
717 		NET_EPOCH_CALL(if_destroy, &ifp->if_epoch_ctx);
718 	CURVNET_RESTORE();
719 }
720 
721 /*
722  * Interfaces to keep an ifnet type-stable despite the possibility of the
723  * driver calling if_free().  If there are additional references, we defer
724  * freeing the underlying data structure.
725  */
726 void
if_ref(struct ifnet * ifp)727 if_ref(struct ifnet *ifp)
728 {
729 	u_int old;
730 
731 	/* We don't assert the ifnet list lock here, but arguably should. */
732 	old = refcount_acquire(&ifp->if_refcount);
733 	KASSERT(old > 0, ("%s: ifp %p has 0 refs", __func__, ifp));
734 }
735 
736 bool
if_try_ref(struct ifnet * ifp)737 if_try_ref(struct ifnet *ifp)
738 {
739 	NET_EPOCH_ASSERT();
740 	return (refcount_acquire_if_not_zero(&ifp->if_refcount));
741 }
742 
743 void
if_rele(struct ifnet * ifp)744 if_rele(struct ifnet *ifp)
745 {
746 
747 	if (!refcount_release(&ifp->if_refcount))
748 		return;
749 	NET_EPOCH_CALL(if_destroy, &ifp->if_epoch_ctx);
750 }
751 
752 void
ifq_init(struct ifaltq * ifq,struct ifnet * ifp)753 ifq_init(struct ifaltq *ifq, struct ifnet *ifp)
754 {
755 
756 	mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
757 
758 	if (ifq->ifq_maxlen == 0)
759 		ifq->ifq_maxlen = ifqmaxlen;
760 
761 	ifq->altq_type = 0;
762 	ifq->altq_disc = NULL;
763 	ifq->altq_flags &= ALTQF_CANTCHANGE;
764 	ifq->altq_tbr  = NULL;
765 	ifq->altq_ifp  = ifp;
766 }
767 
768 void
ifq_delete(struct ifaltq * ifq)769 ifq_delete(struct ifaltq *ifq)
770 {
771 	mtx_destroy(&ifq->ifq_mtx);
772 }
773 
774 /*
775  * Perform generic interface initialization tasks and attach the interface
776  * to the list of "active" interfaces.  If vmove flag is set on entry
777  * to if_attach_internal(), perform only a limited subset of initialization
778  * tasks, given that we are moving from one vnet to another an ifnet which
779  * has already been fully initialized.
780  *
781  * Note that if_detach_internal() removes group membership unconditionally
782  * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
783  * Thus, when if_vmove() is applied to a cloned interface, group membership
784  * is lost while a cloned one always joins a group whose name is
785  * ifc->ifc_name.  To recover this after if_detach_internal() and
786  * if_attach_internal(), the cloner should be specified to
787  * if_attach_internal() via ifc.  If it is non-NULL, if_attach_internal()
788  * attempts to join a group whose name is ifc->ifc_name.
789  *
790  * XXX:
791  *  - The decision to return void and thus require this function to
792  *    succeed is questionable.
793  *  - We should probably do more sanity checking.  For instance we don't
794  *    do anything to insure if_xname is unique or non-empty.
795  */
796 void
if_attach(struct ifnet * ifp)797 if_attach(struct ifnet *ifp)
798 {
799 
800 	if_attach_internal(ifp, false);
801 }
802 
803 /*
804  * Compute the least common TSO limit.
805  */
806 void
if_hw_tsomax_common(if_t ifp,struct ifnet_hw_tsomax * pmax)807 if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax)
808 {
809 	/*
810 	 * 1) If there is no limit currently, take the limit from
811 	 * the network adapter.
812 	 *
813 	 * 2) If the network adapter has a limit below the current
814 	 * limit, apply it.
815 	 */
816 	if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 &&
817 	    ifp->if_hw_tsomax < pmax->tsomaxbytes)) {
818 		pmax->tsomaxbytes = ifp->if_hw_tsomax;
819 	}
820 	if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 &&
821 	    ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) {
822 		pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
823 	}
824 	if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 &&
825 	    ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) {
826 		pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
827 	}
828 }
829 
830 /*
831  * Update TSO limit of a network adapter.
832  *
833  * Returns zero if no change. Else non-zero.
834  */
835 int
if_hw_tsomax_update(if_t ifp,struct ifnet_hw_tsomax * pmax)836 if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax)
837 {
838 	int retval = 0;
839 	if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
840 		ifp->if_hw_tsomax = pmax->tsomaxbytes;
841 		retval++;
842 	}
843 	if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) {
844 		ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize;
845 		retval++;
846 	}
847 	if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) {
848 		ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount;
849 		retval++;
850 	}
851 	return (retval);
852 }
853 
854 static void
if_attach_internal(struct ifnet * ifp,bool vmove)855 if_attach_internal(struct ifnet *ifp, bool vmove)
856 {
857 	unsigned socksize, ifasize;
858 	int namelen, masklen;
859 	struct sockaddr_dl *sdl;
860 	struct ifaddr *ifa;
861 
862 	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
863 		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
864 		    ifp->if_xname);
865 
866 #ifdef VIMAGE
867 	CURVNET_ASSERT_SET();
868 	ifp->if_vnet = curvnet;
869 	if (ifp->if_home_vnet == NULL)
870 		ifp->if_home_vnet = curvnet;
871 #endif
872 
873 	if_addgroup(ifp, IFG_ALL);
874 
875 #ifdef VIMAGE
876 	/* Restore group membership for cloned interface. */
877 	if (vmove)
878 		if_clone_restoregroup(ifp);
879 #endif
880 
881 	getmicrotime(&ifp->if_lastchange);
882 	ifp->if_epoch = time_uptime;
883 
884 	KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
885 	    (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
886 	    ("transmit and qflush must both either be set or both be NULL"));
887 	if (ifp->if_transmit == NULL) {
888 		ifp->if_transmit = if_transmit;
889 		ifp->if_qflush = if_qflush;
890 	}
891 	if (ifp->if_input == NULL)
892 		ifp->if_input = if_input_default;
893 
894 	if (ifp->if_requestencap == NULL)
895 		ifp->if_requestencap = if_requestencap_default;
896 
897 	if (!vmove) {
898 #ifdef MAC
899 		mac_ifnet_create(ifp);
900 #endif
901 
902 		/*
903 		 * Create a Link Level name for this device.
904 		 */
905 		namelen = strlen(ifp->if_xname);
906 		/*
907 		 * Always save enough space for any possiable name so we
908 		 * can do a rename in place later.
909 		 */
910 		masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
911 		socksize = masklen + ifp->if_addrlen;
912 		if (socksize < sizeof(*sdl))
913 			socksize = sizeof(*sdl);
914 		socksize = roundup2(socksize, sizeof(long));
915 		ifasize = sizeof(*ifa) + 2 * socksize;
916 		ifa = ifa_alloc(ifasize, M_WAITOK);
917 		sdl = (struct sockaddr_dl *)(ifa + 1);
918 		sdl->sdl_len = socksize;
919 		sdl->sdl_family = AF_LINK;
920 		bcopy(ifp->if_xname, sdl->sdl_data, namelen);
921 		sdl->sdl_nlen = namelen;
922 		sdl->sdl_index = ifp->if_index;
923 		sdl->sdl_type = ifp->if_type;
924 		ifp->if_addr = ifa;
925 		ifa->ifa_ifp = ifp;
926 		ifa->ifa_addr = (struct sockaddr *)sdl;
927 		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
928 		ifa->ifa_netmask = (struct sockaddr *)sdl;
929 		sdl->sdl_len = masklen;
930 		while (namelen != 0)
931 			sdl->sdl_data[--namelen] = 0xff;
932 		CK_STAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
933 		/* Reliably crash if used uninitialized. */
934 		ifp->if_broadcastaddr = NULL;
935 
936 		if (ifp->if_type == IFT_ETHER) {
937 			ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR,
938 			    M_WAITOK | M_ZERO);
939 		}
940 
941 #if defined(INET) || defined(INET6)
942 		/* Use defaults for TSO, if nothing is set */
943 		if (ifp->if_hw_tsomax == 0 &&
944 		    ifp->if_hw_tsomaxsegcount == 0 &&
945 		    ifp->if_hw_tsomaxsegsize == 0) {
946 			/*
947 			 * The TSO defaults needs to be such that an
948 			 * NFS mbuf list of 35 mbufs totalling just
949 			 * below 64K works and that a chain of mbufs
950 			 * can be defragged into at most 32 segments:
951 			 */
952 			ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) -
953 			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
954 			ifp->if_hw_tsomaxsegcount = 35;
955 			ifp->if_hw_tsomaxsegsize = 2048;	/* 2K */
956 
957 			/* XXX some drivers set IFCAP_TSO after ethernet attach */
958 			if (ifp->if_capabilities & IFCAP_TSO) {
959 				if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n",
960 				    ifp->if_hw_tsomax,
961 				    ifp->if_hw_tsomaxsegcount,
962 				    ifp->if_hw_tsomaxsegsize);
963 			}
964 		}
965 #endif
966 	}
967 #ifdef VIMAGE
968 	else {
969 		/*
970 		 * Update the interface index in the link layer address
971 		 * of the interface.
972 		 */
973 		for (ifa = ifp->if_addr; ifa != NULL;
974 		    ifa = CK_STAILQ_NEXT(ifa, ifa_link)) {
975 			if (ifa->ifa_addr->sa_family == AF_LINK) {
976 				sdl = (struct sockaddr_dl *)ifa->ifa_addr;
977 				sdl->sdl_index = ifp->if_index;
978 			}
979 		}
980 	}
981 #endif
982 
983 	if_link_ifnet(ifp);
984 
985 	if (domain_init_status >= 2)
986 		if_attachdomain1(ifp);
987 
988 	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
989 	if (IS_DEFAULT_VNET(curvnet))
990 		devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
991 }
992 
993 static void
if_epochalloc(void * dummy __unused)994 if_epochalloc(void *dummy __unused)
995 {
996 
997 	net_epoch_preempt = epoch_alloc("Net preemptible", EPOCH_PREEMPT);
998 }
999 SYSINIT(ifepochalloc, SI_SUB_EPOCH, SI_ORDER_ANY, if_epochalloc, NULL);
1000 
1001 static void
if_attachdomain(void * dummy)1002 if_attachdomain(void *dummy)
1003 {
1004 	struct ifnet *ifp;
1005 
1006 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link)
1007 		if_attachdomain1(ifp);
1008 }
1009 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
1010     if_attachdomain, NULL);
1011 
1012 static void
if_attachdomain1(struct ifnet * ifp)1013 if_attachdomain1(struct ifnet *ifp)
1014 {
1015 	struct domain *dp;
1016 
1017 	/*
1018 	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
1019 	 * cannot lock ifp->if_afdata initialization, entirely.
1020 	 */
1021 	IF_AFDATA_LOCK(ifp);
1022 	if (ifp->if_afdata_initialized >= domain_init_status) {
1023 		IF_AFDATA_UNLOCK(ifp);
1024 		log(LOG_WARNING, "%s called more than once on %s\n",
1025 		    __func__, ifp->if_xname);
1026 		return;
1027 	}
1028 	ifp->if_afdata_initialized = domain_init_status;
1029 	IF_AFDATA_UNLOCK(ifp);
1030 
1031 	/* address family dependent data region */
1032 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
1033 	for (dp = domains; dp; dp = dp->dom_next) {
1034 		if (dp->dom_ifattach)
1035 			ifp->if_afdata[dp->dom_family] =
1036 			    (*dp->dom_ifattach)(ifp);
1037 	}
1038 }
1039 
1040 /*
1041  * Remove any unicast or broadcast network addresses from an interface.
1042  */
1043 void
if_purgeaddrs(struct ifnet * ifp)1044 if_purgeaddrs(struct ifnet *ifp)
1045 {
1046 	struct ifaddr *ifa;
1047 
1048 	while (1) {
1049 		struct epoch_tracker et;
1050 
1051 		NET_EPOCH_ENTER(et);
1052 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1053 			if (ifa->ifa_addr->sa_family != AF_LINK)
1054 				break;
1055 		}
1056 		NET_EPOCH_EXIT(et);
1057 
1058 		if (ifa == NULL)
1059 			break;
1060 #ifdef INET
1061 		/* XXX: Ugly!! ad hoc just for INET */
1062 		if (ifa->ifa_addr->sa_family == AF_INET) {
1063 			struct ifaliasreq ifr;
1064 
1065 			bzero(&ifr, sizeof(ifr));
1066 			ifr.ifra_addr = *ifa->ifa_addr;
1067 			if (ifa->ifa_dstaddr)
1068 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
1069 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
1070 			    NULL) == 0)
1071 				continue;
1072 		}
1073 #endif /* INET */
1074 #ifdef INET6
1075 		if (ifa->ifa_addr->sa_family == AF_INET6) {
1076 			in6_purgeifaddr((struct in6_ifaddr *)ifa);
1077 			/* ifp_addrhead is already updated */
1078 			continue;
1079 		}
1080 #endif /* INET6 */
1081 		IF_ADDR_WLOCK(ifp);
1082 		CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
1083 		IF_ADDR_WUNLOCK(ifp);
1084 		ifa_free(ifa);
1085 	}
1086 }
1087 
1088 /*
1089  * Remove any multicast network addresses from an interface when an ifnet
1090  * is going away.
1091  */
1092 static void
if_purgemaddrs(struct ifnet * ifp)1093 if_purgemaddrs(struct ifnet *ifp)
1094 {
1095 	struct ifmultiaddr *ifma;
1096 
1097 	IF_ADDR_WLOCK(ifp);
1098 	while (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) {
1099 		ifma = CK_STAILQ_FIRST(&ifp->if_multiaddrs);
1100 		CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
1101 		if_delmulti_locked(ifp, ifma, 1);
1102 	}
1103 	IF_ADDR_WUNLOCK(ifp);
1104 }
1105 
1106 /*
1107  * Detach an interface, removing it from the list of "active" interfaces.
1108  * If vmove flag is set on entry to if_detach_internal(), perform only a
1109  * limited subset of cleanup tasks, given that we are moving an ifnet from
1110  * one vnet to another, where it must be fully operational.
1111  *
1112  * XXXRW: There are some significant questions about event ordering, and
1113  * how to prevent things from starting to use the interface during detach.
1114  */
1115 void
if_detach(struct ifnet * ifp)1116 if_detach(struct ifnet *ifp)
1117 {
1118 	bool found;
1119 
1120 	CURVNET_SET_QUIET(ifp->if_vnet);
1121 	found = if_unlink_ifnet(ifp, false);
1122 	if (found) {
1123 		sx_xlock(&ifnet_detach_sxlock);
1124 		if_detach_internal(ifp, false);
1125 		sx_xunlock(&ifnet_detach_sxlock);
1126 	}
1127 	CURVNET_RESTORE();
1128 }
1129 
1130 /*
1131  * The vmove flag, if set, indicates that we are called from a callpath
1132  * that is moving an interface to a different vnet instance.
1133  *
1134  * The shutdown flag, if set, indicates that we are called in the
1135  * process of shutting down a vnet instance.  Currently only the
1136  * vnet_if_return SYSUNINIT function sets it.  Note: we can be called
1137  * on a vnet instance shutdown without this flag being set, e.g., when
1138  * the cloned interfaces are destoyed as first thing of teardown.
1139  */
1140 static void
if_detach_internal(struct ifnet * ifp,bool vmove)1141 if_detach_internal(struct ifnet *ifp, bool vmove)
1142 {
1143 	struct ifaddr *ifa;
1144 	int i;
1145 	struct domain *dp;
1146 #ifdef VIMAGE
1147 	bool shutdown;
1148 
1149 	shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
1150 #endif
1151 
1152 	/*
1153 	 * At this point we know the interface still was on the ifnet list
1154 	 * and we removed it so we are in a stable state.
1155 	 */
1156 	NET_EPOCH_WAIT();
1157 
1158 	/*
1159 	 * Ensure all pending EPOCH(9) callbacks have been executed. This
1160 	 * fixes issues about late destruction of multicast options
1161 	 * which lead to leave group calls, which in turn access the
1162 	 * belonging ifnet structure:
1163 	 */
1164 	NET_EPOCH_DRAIN_CALLBACKS();
1165 
1166 	/*
1167 	 * In any case (destroy or vmove) detach us from the groups
1168 	 * and remove/wait for pending events on the taskq.
1169 	 * XXX-BZ in theory an interface could still enqueue a taskq change?
1170 	 */
1171 	if_delgroups(ifp);
1172 
1173 	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
1174 	taskqueue_drain(taskqueue_swi, &ifp->if_addmultitask);
1175 
1176 	if_down(ifp);
1177 
1178 #ifdef VIMAGE
1179 	/*
1180 	 * On VNET shutdown abort here as the stack teardown will do all
1181 	 * the work top-down for us.
1182 	 */
1183 	if (shutdown) {
1184 		/* Give interface users the chance to clean up. */
1185 		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1186 
1187 		/*
1188 		 * In case of a vmove we are done here without error.
1189 		 * If we would signal an error it would lead to the same
1190 		 * abort as if we did not find the ifnet anymore.
1191 		 * if_detach() calls us in void context and does not care
1192 		 * about an early abort notification, so life is splendid :)
1193 		 */
1194 		goto finish_vnet_shutdown;
1195 	}
1196 #endif
1197 
1198 	/*
1199 	 * At this point we are not tearing down a VNET and are either
1200 	 * going to destroy or vmove the interface and have to cleanup
1201 	 * accordingly.
1202 	 */
1203 
1204 	/*
1205 	 * Remove routes and flush queues.
1206 	 */
1207 #ifdef ALTQ
1208 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
1209 		altq_disable(&ifp->if_snd);
1210 	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
1211 		altq_detach(&ifp->if_snd);
1212 #endif
1213 
1214 	if_purgeaddrs(ifp);
1215 
1216 #ifdef INET
1217 	in_ifdetach(ifp);
1218 #endif
1219 
1220 #ifdef INET6
1221 	/*
1222 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
1223 	 * before removing routing entries below, since IPv6 interface direct
1224 	 * routes are expected to be removed by the IPv6-specific kernel API.
1225 	 * Otherwise, the kernel will detect some inconsistency and bark it.
1226 	 */
1227 	in6_ifdetach(ifp);
1228 #endif
1229 	if_purgemaddrs(ifp);
1230 
1231 	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1232 	if (IS_DEFAULT_VNET(curvnet))
1233 		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
1234 
1235 	if (!vmove) {
1236 		/*
1237 		 * Prevent further calls into the device driver via ifnet.
1238 		 */
1239 		if_dead(ifp);
1240 
1241 		/*
1242 		 * Clean up all addresses.
1243 		 */
1244 		IF_ADDR_WLOCK(ifp);
1245 		if (!CK_STAILQ_EMPTY(&ifp->if_addrhead)) {
1246 			ifa = CK_STAILQ_FIRST(&ifp->if_addrhead);
1247 			CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
1248 			IF_ADDR_WUNLOCK(ifp);
1249 			ifa_free(ifa);
1250 		} else
1251 			IF_ADDR_WUNLOCK(ifp);
1252 	}
1253 
1254 	rt_flushifroutes(ifp);
1255 
1256 #ifdef VIMAGE
1257 finish_vnet_shutdown:
1258 #endif
1259 	/*
1260 	 * We cannot hold the lock over dom_ifdetach calls as they might
1261 	 * sleep, for example trying to drain a callout, thus open up the
1262 	 * theoretical race with re-attaching.
1263 	 */
1264 	IF_AFDATA_LOCK(ifp);
1265 	i = ifp->if_afdata_initialized;
1266 	ifp->if_afdata_initialized = 0;
1267 	IF_AFDATA_UNLOCK(ifp);
1268 	for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
1269 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
1270 			(*dp->dom_ifdetach)(ifp,
1271 			    ifp->if_afdata[dp->dom_family]);
1272 			ifp->if_afdata[dp->dom_family] = NULL;
1273 		}
1274 	}
1275 }
1276 
1277 #ifdef VIMAGE
1278 /*
1279  * if_vmove() performs a limited version of if_detach() in current
1280  * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
1281  * An attempt is made to shrink if_index in current vnet, find an
1282  * unused if_index in target vnet and calls if_grow() if necessary,
1283  * and finally find an unused if_xname for the target vnet.
1284  */
1285 static void
if_vmove(struct ifnet * ifp,struct vnet * new_vnet)1286 if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
1287 {
1288 	void *old;
1289 
1290 #ifdef DEV_BPF
1291 	/*
1292 	 * Detach BPF file descriptors from its interface.
1293 	 */
1294 	bpf_ifdetach(ifp);
1295 #endif
1296 
1297 	/*
1298 	 * Detach from current vnet, but preserve LLADDR info, do not
1299 	 * mark as dead etc. so that the ifnet can be reattached later.
1300 	 */
1301 	if_detach_internal(ifp, true);
1302 
1303 	/*
1304 	 * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
1305 	 * the if_index for that vnet if possible.
1306 	 *
1307 	 * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
1308 	 * or we'd lock on one vnet and unlock on another.
1309 	 */
1310 	IFNET_WLOCK();
1311 	ifindex_free(ifp->if_index);
1312 	IFNET_WUNLOCK();
1313 
1314 	/*
1315 	 * Perform interface-specific reassignment tasks, if provided by
1316 	 * the driver.
1317 	 */
1318 	if (ifp->if_reassign != NULL)
1319 		ifp->if_reassign(ifp, new_vnet, NULL);
1320 
1321 	/*
1322 	 * Switch to the context of the target vnet.
1323 	 */
1324 	CURVNET_SET_QUIET(new_vnet);
1325  restart:
1326 	IFNET_WLOCK();
1327 	ifp->if_index = ifindex_alloc(&old);
1328 	if (__predict_false(ifp->if_index == USHRT_MAX)) {
1329 		IFNET_WUNLOCK();
1330 		epoch_wait_preempt(net_epoch_preempt);
1331 		free(old, M_IFNET);
1332 		goto restart;
1333 	}
1334 	ifnet_setbyindex(ifp->if_index, ifp);
1335 	IFNET_WUNLOCK();
1336 
1337 	if_attach_internal(ifp, true);
1338 
1339 	CURVNET_RESTORE();
1340 }
1341 
1342 /*
1343  * Move an ifnet to or from another child prison/vnet, specified by the jail id.
1344  */
1345 static int
if_vmove_loan(struct thread * td,struct ifnet * ifp,char * ifname,int jid)1346 if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
1347 {
1348 	struct prison *pr;
1349 	struct ifnet *difp;
1350 	bool found;
1351 	bool shutdown;
1352 
1353 	/* Try to find the prison within our visibility. */
1354 	sx_slock(&allprison_lock);
1355 	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1356 	sx_sunlock(&allprison_lock);
1357 	if (pr == NULL)
1358 		return (ENXIO);
1359 	prison_hold_locked(pr);
1360 	mtx_unlock(&pr->pr_mtx);
1361 
1362 	/* Do not try to move the iface from and to the same prison. */
1363 	if (pr->pr_vnet == ifp->if_vnet) {
1364 		prison_free(pr);
1365 		return (EEXIST);
1366 	}
1367 
1368 	/* Make sure the named iface does not exists in the dst. prison/vnet. */
1369 	/* XXX Lock interfaces to avoid races. */
1370 	CURVNET_SET_QUIET(pr->pr_vnet);
1371 	difp = ifunit(ifname);
1372 	CURVNET_RESTORE();
1373 	if (difp != NULL) {
1374 		prison_free(pr);
1375 		return (EEXIST);
1376 	}
1377 	sx_xlock(&ifnet_detach_sxlock);
1378 
1379 	/* Make sure the VNET is stable. */
1380 	shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
1381 	if (shutdown) {
1382 		sx_xunlock(&ifnet_detach_sxlock);
1383 		prison_free(pr);
1384 		return (EBUSY);
1385 	}
1386 
1387 	found = if_unlink_ifnet(ifp, true);
1388 	if (! found) {
1389 		sx_xunlock(&ifnet_detach_sxlock);
1390 		prison_free(pr);
1391 		return (ENODEV);
1392 	}
1393 
1394 	/* Move the interface into the child jail/vnet. */
1395 	if_vmove(ifp, pr->pr_vnet);
1396 
1397 	/* Report the new if_xname back to the userland. */
1398 	sprintf(ifname, "%s", ifp->if_xname);
1399 
1400 	sx_xunlock(&ifnet_detach_sxlock);
1401 
1402 	prison_free(pr);
1403 	return (0);
1404 }
1405 
1406 static int
if_vmove_reclaim(struct thread * td,char * ifname,int jid)1407 if_vmove_reclaim(struct thread *td, char *ifname, int jid)
1408 {
1409 	struct prison *pr;
1410 	struct vnet *vnet_dst;
1411 	struct ifnet *ifp;
1412 	int found;
1413  	bool shutdown;
1414 
1415 	/* Try to find the prison within our visibility. */
1416 	sx_slock(&allprison_lock);
1417 	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1418 	sx_sunlock(&allprison_lock);
1419 	if (pr == NULL)
1420 		return (ENXIO);
1421 	prison_hold_locked(pr);
1422 	mtx_unlock(&pr->pr_mtx);
1423 
1424 	/* Make sure the named iface exists in the source prison/vnet. */
1425 	CURVNET_SET(pr->pr_vnet);
1426 	ifp = ifunit(ifname);		/* XXX Lock to avoid races. */
1427 	if (ifp == NULL) {
1428 		CURVNET_RESTORE();
1429 		prison_free(pr);
1430 		return (ENXIO);
1431 	}
1432 
1433 	/* Do not try to move the iface from and to the same prison. */
1434 	vnet_dst = TD_TO_VNET(td);
1435 	if (vnet_dst == ifp->if_vnet) {
1436 		CURVNET_RESTORE();
1437 		prison_free(pr);
1438 		return (EEXIST);
1439 	}
1440 
1441 	/* Make sure the VNET is stable. */
1442 	shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
1443 	if (shutdown) {
1444 		CURVNET_RESTORE();
1445 		prison_free(pr);
1446 		return (EBUSY);
1447 	}
1448 
1449 	/* Get interface back from child jail/vnet. */
1450 	found = if_unlink_ifnet(ifp, true);
1451 	MPASS(found);
1452 	sx_xlock(&ifnet_detach_sxlock);
1453 	if_vmove(ifp, vnet_dst);
1454 	sx_xunlock(&ifnet_detach_sxlock);
1455 	CURVNET_RESTORE();
1456 
1457 	/* Report the new if_xname back to the userland. */
1458 	sprintf(ifname, "%s", ifp->if_xname);
1459 
1460 	prison_free(pr);
1461 	return (0);
1462 }
1463 #endif /* VIMAGE */
1464 
1465 /*
1466  * Add a group to an interface
1467  */
1468 int
if_addgroup(struct ifnet * ifp,const char * groupname)1469 if_addgroup(struct ifnet *ifp, const char *groupname)
1470 {
1471 	struct ifg_list		*ifgl;
1472 	struct ifg_group	*ifg = NULL;
1473 	struct ifg_member	*ifgm;
1474 	int 			 new = 0;
1475 
1476 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
1477 	    groupname[strlen(groupname) - 1] <= '9')
1478 		return (EINVAL);
1479 
1480 	IFNET_WLOCK();
1481 	CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1482 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
1483 			IFNET_WUNLOCK();
1484 			return (EEXIST);
1485 		}
1486 
1487 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) {
1488 	    	IFNET_WUNLOCK();
1489 		return (ENOMEM);
1490 	}
1491 
1492 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
1493 		free(ifgl, M_TEMP);
1494 		IFNET_WUNLOCK();
1495 		return (ENOMEM);
1496 	}
1497 
1498 	CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1499 		if (!strcmp(ifg->ifg_group, groupname))
1500 			break;
1501 
1502 	if (ifg == NULL) {
1503 		if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL) {
1504 			free(ifgl, M_TEMP);
1505 			free(ifgm, M_TEMP);
1506 			IFNET_WUNLOCK();
1507 			return (ENOMEM);
1508 		}
1509 		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
1510 		ifg->ifg_refcnt = 0;
1511 		CK_STAILQ_INIT(&ifg->ifg_members);
1512 		CK_STAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
1513 		new = 1;
1514 	}
1515 
1516 	ifg->ifg_refcnt++;
1517 	ifgl->ifgl_group = ifg;
1518 	ifgm->ifgm_ifp = ifp;
1519 
1520 	IF_ADDR_WLOCK(ifp);
1521 	CK_STAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
1522 	CK_STAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
1523 	IF_ADDR_WUNLOCK(ifp);
1524 
1525 	IFNET_WUNLOCK();
1526 
1527 	if (new)
1528 		EVENTHANDLER_INVOKE(group_attach_event, ifg);
1529 	EVENTHANDLER_INVOKE(group_change_event, groupname);
1530 
1531 	return (0);
1532 }
1533 
1534 /*
1535  * Helper function to remove a group out of an interface.  Expects the global
1536  * ifnet lock to be write-locked, and drops it before returning.
1537  */
1538 static void
_if_delgroup_locked(struct ifnet * ifp,struct ifg_list * ifgl,const char * groupname)1539 _if_delgroup_locked(struct ifnet *ifp, struct ifg_list *ifgl,
1540     const char *groupname)
1541 {
1542 	struct ifg_member *ifgm;
1543 	bool freeifgl;
1544 
1545 	IFNET_WLOCK_ASSERT();
1546 
1547 	IF_ADDR_WLOCK(ifp);
1548 	CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next);
1549 	IF_ADDR_WUNLOCK(ifp);
1550 
1551 	CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) {
1552 		if (ifgm->ifgm_ifp == ifp) {
1553 			CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
1554 			    ifg_member, ifgm_next);
1555 			break;
1556 		}
1557 	}
1558 
1559 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1560 		CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group,
1561 		    ifg_next);
1562 		freeifgl = true;
1563 	} else {
1564 		freeifgl = false;
1565 	}
1566 	IFNET_WUNLOCK();
1567 
1568 	NET_EPOCH_WAIT();
1569 	EVENTHANDLER_INVOKE(group_change_event, groupname);
1570 	if (freeifgl) {
1571 		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
1572 		free(ifgl->ifgl_group, M_TEMP);
1573 	}
1574 	free(ifgm, M_TEMP);
1575 	free(ifgl, M_TEMP);
1576 }
1577 
1578 /*
1579  * Remove a group from an interface
1580  */
1581 int
if_delgroup(struct ifnet * ifp,const char * groupname)1582 if_delgroup(struct ifnet *ifp, const char *groupname)
1583 {
1584 	struct ifg_list *ifgl;
1585 
1586 	IFNET_WLOCK();
1587 	CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1588 		if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0)
1589 			break;
1590 	if (ifgl == NULL) {
1591 		IFNET_WUNLOCK();
1592 		return (ENOENT);
1593 	}
1594 
1595 	_if_delgroup_locked(ifp, ifgl, groupname);
1596 
1597 	return (0);
1598 }
1599 
1600 /*
1601  * Remove an interface from all groups
1602  */
1603 static void
if_delgroups(struct ifnet * ifp)1604 if_delgroups(struct ifnet *ifp)
1605 {
1606 	struct ifg_list *ifgl;
1607 	char groupname[IFNAMSIZ];
1608 
1609 	IFNET_WLOCK();
1610 	while ((ifgl = CK_STAILQ_FIRST(&ifp->if_groups)) != NULL) {
1611 		strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
1612 		_if_delgroup_locked(ifp, ifgl, groupname);
1613 		IFNET_WLOCK();
1614 	}
1615 	IFNET_WUNLOCK();
1616 }
1617 
1618 /*
1619  * Stores all groups from an interface in memory pointed to by ifgr.
1620  */
1621 static int
if_getgroup(struct ifgroupreq * ifgr,struct ifnet * ifp)1622 if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp)
1623 {
1624 	int			 len, error;
1625 	struct ifg_list		*ifgl;
1626 	struct ifg_req		 ifgrq, *ifgp;
1627 
1628 	NET_EPOCH_ASSERT();
1629 
1630 	if (ifgr->ifgr_len == 0) {
1631 		CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1632 			ifgr->ifgr_len += sizeof(struct ifg_req);
1633 		return (0);
1634 	}
1635 
1636 	len = ifgr->ifgr_len;
1637 	ifgp = ifgr->ifgr_groups;
1638 	/* XXX: wire */
1639 	CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1640 		if (len < sizeof(ifgrq))
1641 			return (EINVAL);
1642 		bzero(&ifgrq, sizeof ifgrq);
1643 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
1644 		    sizeof(ifgrq.ifgrq_group));
1645 		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req))))
1646 			return (error);
1647 		len -= sizeof(ifgrq);
1648 		ifgp++;
1649 	}
1650 
1651 	return (0);
1652 }
1653 
1654 /*
1655  * Stores all members of a group in memory pointed to by igfr
1656  */
1657 static int
if_getgroupmembers(struct ifgroupreq * ifgr)1658 if_getgroupmembers(struct ifgroupreq *ifgr)
1659 {
1660 	struct ifg_group	*ifg;
1661 	struct ifg_member	*ifgm;
1662 	struct ifg_req		 ifgrq, *ifgp;
1663 	int			 len, error;
1664 
1665 	IFNET_RLOCK();
1666 	CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1667 		if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0)
1668 			break;
1669 	if (ifg == NULL) {
1670 		IFNET_RUNLOCK();
1671 		return (ENOENT);
1672 	}
1673 
1674 	if (ifgr->ifgr_len == 0) {
1675 		CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
1676 			ifgr->ifgr_len += sizeof(ifgrq);
1677 		IFNET_RUNLOCK();
1678 		return (0);
1679 	}
1680 
1681 	len = ifgr->ifgr_len;
1682 	ifgp = ifgr->ifgr_groups;
1683 	CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
1684 		if (len < sizeof(ifgrq)) {
1685 			IFNET_RUNLOCK();
1686 			return (EINVAL);
1687 		}
1688 		bzero(&ifgrq, sizeof ifgrq);
1689 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
1690 		    sizeof(ifgrq.ifgrq_member));
1691 		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1692 			IFNET_RUNLOCK();
1693 			return (error);
1694 		}
1695 		len -= sizeof(ifgrq);
1696 		ifgp++;
1697 	}
1698 	IFNET_RUNLOCK();
1699 
1700 	return (0);
1701 }
1702 
1703 /*
1704  * Return counter values from counter(9)s stored in ifnet.
1705  */
1706 uint64_t
if_get_counter_default(struct ifnet * ifp,ift_counter cnt)1707 if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
1708 {
1709 
1710 	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1711 
1712 	return (counter_u64_fetch(ifp->if_counters[cnt]));
1713 }
1714 
1715 /*
1716  * Increase an ifnet counter. Usually used for counters shared
1717  * between the stack and a driver, but function supports them all.
1718  */
1719 void
if_inc_counter(struct ifnet * ifp,ift_counter cnt,int64_t inc)1720 if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
1721 {
1722 
1723 	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1724 
1725 	counter_u64_add(ifp->if_counters[cnt], inc);
1726 }
1727 
1728 /*
1729  * Copy data from ifnet to userland API structure if_data.
1730  */
1731 void
if_data_copy(struct ifnet * ifp,struct if_data * ifd)1732 if_data_copy(struct ifnet *ifp, struct if_data *ifd)
1733 {
1734 
1735 	ifd->ifi_type = ifp->if_type;
1736 	ifd->ifi_physical = 0;
1737 	ifd->ifi_addrlen = ifp->if_addrlen;
1738 	ifd->ifi_hdrlen = ifp->if_hdrlen;
1739 	ifd->ifi_link_state = ifp->if_link_state;
1740 	ifd->ifi_vhid = 0;
1741 	ifd->ifi_datalen = sizeof(struct if_data);
1742 	ifd->ifi_mtu = ifp->if_mtu;
1743 	ifd->ifi_metric = ifp->if_metric;
1744 	ifd->ifi_baudrate = ifp->if_baudrate;
1745 	ifd->ifi_hwassist = ifp->if_hwassist;
1746 	ifd->ifi_epoch = ifp->if_epoch;
1747 	ifd->ifi_lastchange = ifp->if_lastchange;
1748 
1749 	ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
1750 	ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
1751 	ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
1752 	ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
1753 	ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS);
1754 	ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
1755 	ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
1756 	ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
1757 	ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS);
1758 	ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
1759 	ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
1760 	ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
1761 }
1762 
1763 /*
1764  * Initialization, destruction and refcounting functions for ifaddrs.
1765  */
1766 struct ifaddr *
ifa_alloc(size_t size,int flags)1767 ifa_alloc(size_t size, int flags)
1768 {
1769 	struct ifaddr *ifa;
1770 
1771 	KASSERT(size >= sizeof(struct ifaddr),
1772 	    ("%s: invalid size %zu", __func__, size));
1773 
1774 	ifa = malloc(size, M_IFADDR, M_ZERO | flags);
1775 	if (ifa == NULL)
1776 		return (NULL);
1777 
1778 	if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
1779 		goto fail;
1780 	if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
1781 		goto fail;
1782 	if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
1783 		goto fail;
1784 	if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
1785 		goto fail;
1786 
1787 	refcount_init(&ifa->ifa_refcnt, 1);
1788 
1789 	return (ifa);
1790 
1791 fail:
1792 	/* free(NULL) is okay */
1793 	counter_u64_free(ifa->ifa_opackets);
1794 	counter_u64_free(ifa->ifa_ipackets);
1795 	counter_u64_free(ifa->ifa_obytes);
1796 	counter_u64_free(ifa->ifa_ibytes);
1797 	free(ifa, M_IFADDR);
1798 
1799 	return (NULL);
1800 }
1801 
1802 void
ifa_ref(struct ifaddr * ifa)1803 ifa_ref(struct ifaddr *ifa)
1804 {
1805 	u_int old;
1806 
1807 	old = refcount_acquire(&ifa->ifa_refcnt);
1808 	KASSERT(old > 0, ("%s: ifa %p has 0 refs", __func__, ifa));
1809 }
1810 
1811 int
ifa_try_ref(struct ifaddr * ifa)1812 ifa_try_ref(struct ifaddr *ifa)
1813 {
1814 
1815 	NET_EPOCH_ASSERT();
1816 	return (refcount_acquire_if_not_zero(&ifa->ifa_refcnt));
1817 }
1818 
1819 static void
ifa_destroy(epoch_context_t ctx)1820 ifa_destroy(epoch_context_t ctx)
1821 {
1822 	struct ifaddr *ifa;
1823 
1824 	ifa = __containerof(ctx, struct ifaddr, ifa_epoch_ctx);
1825 	counter_u64_free(ifa->ifa_opackets);
1826 	counter_u64_free(ifa->ifa_ipackets);
1827 	counter_u64_free(ifa->ifa_obytes);
1828 	counter_u64_free(ifa->ifa_ibytes);
1829 	free(ifa, M_IFADDR);
1830 }
1831 
1832 void
ifa_free(struct ifaddr * ifa)1833 ifa_free(struct ifaddr *ifa)
1834 {
1835 
1836 	if (refcount_release(&ifa->ifa_refcnt))
1837 		NET_EPOCH_CALL(ifa_destroy, &ifa->ifa_epoch_ctx);
1838 }
1839 
1840 /*
1841  * XXX: Because sockaddr_dl has deeper structure than the sockaddr
1842  * structs used to represent other address families, it is necessary
1843  * to perform a different comparison.
1844  */
1845 
1846 #define	sa_dl_equal(a1, a2)	\
1847 	((((const struct sockaddr_dl *)(a1))->sdl_len ==		\
1848 	 ((const struct sockaddr_dl *)(a2))->sdl_len) &&		\
1849 	 (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)),		\
1850 	       CLLADDR((const struct sockaddr_dl *)(a2)),		\
1851 	       ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
1852 
1853 /*
1854  * Locate an interface based on a complete address.
1855  */
1856 /*ARGSUSED*/
1857 struct ifaddr *
ifa_ifwithaddr(const struct sockaddr * addr)1858 ifa_ifwithaddr(const struct sockaddr *addr)
1859 {
1860 	struct ifnet *ifp;
1861 	struct ifaddr *ifa;
1862 
1863 	NET_EPOCH_ASSERT();
1864 
1865 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1866 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1867 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1868 				continue;
1869 			if (sa_equal(addr, ifa->ifa_addr)) {
1870 				goto done;
1871 			}
1872 			/* IP6 doesn't have broadcast */
1873 			if ((ifp->if_flags & IFF_BROADCAST) &&
1874 			    ifa->ifa_broadaddr &&
1875 			    ifa->ifa_broadaddr->sa_len != 0 &&
1876 			    sa_equal(ifa->ifa_broadaddr, addr)) {
1877 				goto done;
1878 			}
1879 		}
1880 	}
1881 	ifa = NULL;
1882 done:
1883 	return (ifa);
1884 }
1885 
1886 int
ifa_ifwithaddr_check(const struct sockaddr * addr)1887 ifa_ifwithaddr_check(const struct sockaddr *addr)
1888 {
1889 	struct epoch_tracker et;
1890 	int rc;
1891 
1892 	NET_EPOCH_ENTER(et);
1893 	rc = (ifa_ifwithaddr(addr) != NULL);
1894 	NET_EPOCH_EXIT(et);
1895 	return (rc);
1896 }
1897 
1898 /*
1899  * Locate an interface based on the broadcast address.
1900  */
1901 /* ARGSUSED */
1902 struct ifaddr *
ifa_ifwithbroadaddr(const struct sockaddr * addr,int fibnum)1903 ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
1904 {
1905 	struct ifnet *ifp;
1906 	struct ifaddr *ifa;
1907 
1908 	NET_EPOCH_ASSERT();
1909 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1910 		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1911 			continue;
1912 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1913 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1914 				continue;
1915 			if ((ifp->if_flags & IFF_BROADCAST) &&
1916 			    ifa->ifa_broadaddr &&
1917 			    ifa->ifa_broadaddr->sa_len != 0 &&
1918 			    sa_equal(ifa->ifa_broadaddr, addr)) {
1919 				goto done;
1920 			}
1921 		}
1922 	}
1923 	ifa = NULL;
1924 done:
1925 	return (ifa);
1926 }
1927 
1928 /*
1929  * Locate the point to point interface with a given destination address.
1930  */
1931 /*ARGSUSED*/
1932 struct ifaddr *
ifa_ifwithdstaddr(const struct sockaddr * addr,int fibnum)1933 ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
1934 {
1935 	struct ifnet *ifp;
1936 	struct ifaddr *ifa;
1937 
1938 	NET_EPOCH_ASSERT();
1939 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1940 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1941 			continue;
1942 		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1943 			continue;
1944 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1945 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1946 				continue;
1947 			if (ifa->ifa_dstaddr != NULL &&
1948 			    sa_equal(addr, ifa->ifa_dstaddr)) {
1949 				goto done;
1950 			}
1951 		}
1952 	}
1953 	ifa = NULL;
1954 done:
1955 	return (ifa);
1956 }
1957 
1958 /*
1959  * Find an interface on a specific network.  If many, choice
1960  * is most specific found.
1961  */
1962 struct ifaddr *
ifa_ifwithnet(const struct sockaddr * addr,int ignore_ptp,int fibnum)1963 ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
1964 {
1965 	struct ifnet *ifp;
1966 	struct ifaddr *ifa;
1967 	struct ifaddr *ifa_maybe = NULL;
1968 	u_int af = addr->sa_family;
1969 	const char *addr_data = addr->sa_data, *cplim;
1970 	const struct sockaddr_dl *sdl;
1971 
1972 	NET_EPOCH_ASSERT();
1973 	/*
1974 	 * AF_LINK addresses can be looked up directly by their index number,
1975 	 * so do that if we can.
1976 	 */
1977 	if (af == AF_LINK) {
1978 		sdl = (const struct sockaddr_dl *)addr;
1979 		if (sdl->sdl_index && sdl->sdl_index <= V_if_index) {
1980 			ifp = ifnet_byindex(sdl->sdl_index);
1981 			if (ifp == NULL)
1982 				return (NULL);
1983 
1984 			return (ifp->if_addr);
1985 		}
1986 	}
1987 
1988 	/*
1989 	 * Scan though each interface, looking for ones that have addresses
1990 	 * in this address family and the requested fib.
1991 	 */
1992 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1993 		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1994 			continue;
1995 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1996 			const char *cp, *cp2, *cp3;
1997 
1998 			if (ifa->ifa_addr->sa_family != af)
1999 next:				continue;
2000 			if (af == AF_INET &&
2001 			    ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) {
2002 				/*
2003 				 * This is a bit broken as it doesn't
2004 				 * take into account that the remote end may
2005 				 * be a single node in the network we are
2006 				 * looking for.
2007 				 * The trouble is that we don't know the
2008 				 * netmask for the remote end.
2009 				 */
2010 				if (ifa->ifa_dstaddr != NULL &&
2011 				    sa_equal(addr, ifa->ifa_dstaddr)) {
2012 					goto done;
2013 				}
2014 			} else {
2015 				/*
2016 				 * Scan all the bits in the ifa's address.
2017 				 * If a bit dissagrees with what we are
2018 				 * looking for, mask it with the netmask
2019 				 * to see if it really matters.
2020 				 * (A byte at a time)
2021 				 */
2022 				if (ifa->ifa_netmask == 0)
2023 					continue;
2024 				cp = addr_data;
2025 				cp2 = ifa->ifa_addr->sa_data;
2026 				cp3 = ifa->ifa_netmask->sa_data;
2027 				cplim = ifa->ifa_netmask->sa_len
2028 					+ (char *)ifa->ifa_netmask;
2029 				while (cp3 < cplim)
2030 					if ((*cp++ ^ *cp2++) & *cp3++)
2031 						goto next; /* next address! */
2032 				/*
2033 				 * If the netmask of what we just found
2034 				 * is more specific than what we had before
2035 				 * (if we had one), or if the virtual status
2036 				 * of new prefix is better than of the old one,
2037 				 * then remember the new one before continuing
2038 				 * to search for an even better one.
2039 				 */
2040 				if (ifa_maybe == NULL ||
2041 				    ifa_preferred(ifa_maybe, ifa) ||
2042 				    rn_refines((caddr_t)ifa->ifa_netmask,
2043 				    (caddr_t)ifa_maybe->ifa_netmask)) {
2044 					ifa_maybe = ifa;
2045 				}
2046 			}
2047 		}
2048 	}
2049 	ifa = ifa_maybe;
2050 	ifa_maybe = NULL;
2051 done:
2052 	return (ifa);
2053 }
2054 
2055 /*
2056  * Find an interface address specific to an interface best matching
2057  * a given address.
2058  */
2059 struct ifaddr *
ifaof_ifpforaddr(const struct sockaddr * addr,struct ifnet * ifp)2060 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
2061 {
2062 	struct ifaddr *ifa;
2063 	const char *cp, *cp2, *cp3;
2064 	char *cplim;
2065 	struct ifaddr *ifa_maybe = NULL;
2066 	u_int af = addr->sa_family;
2067 
2068 	if (af >= AF_MAX)
2069 		return (NULL);
2070 
2071 	NET_EPOCH_ASSERT();
2072 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2073 		if (ifa->ifa_addr->sa_family != af)
2074 			continue;
2075 		if (ifa_maybe == NULL)
2076 			ifa_maybe = ifa;
2077 		if (ifa->ifa_netmask == 0) {
2078 			if (sa_equal(addr, ifa->ifa_addr) ||
2079 			    (ifa->ifa_dstaddr &&
2080 			    sa_equal(addr, ifa->ifa_dstaddr)))
2081 				goto done;
2082 			continue;
2083 		}
2084 		if (ifp->if_flags & IFF_POINTOPOINT) {
2085 			if (ifa->ifa_dstaddr && sa_equal(addr, ifa->ifa_dstaddr))
2086 				goto done;
2087 		} else {
2088 			cp = addr->sa_data;
2089 			cp2 = ifa->ifa_addr->sa_data;
2090 			cp3 = ifa->ifa_netmask->sa_data;
2091 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
2092 			for (; cp3 < cplim; cp3++)
2093 				if ((*cp++ ^ *cp2++) & *cp3)
2094 					break;
2095 			if (cp3 == cplim)
2096 				goto done;
2097 		}
2098 	}
2099 	ifa = ifa_maybe;
2100 done:
2101 	return (ifa);
2102 }
2103 
2104 /*
2105  * See whether new ifa is better than current one:
2106  * 1) A non-virtual one is preferred over virtual.
2107  * 2) A virtual in master state preferred over any other state.
2108  *
2109  * Used in several address selecting functions.
2110  */
2111 int
ifa_preferred(struct ifaddr * cur,struct ifaddr * next)2112 ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
2113 {
2114 
2115 	return (cur->ifa_carp && (!next->ifa_carp ||
2116 	    ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
2117 }
2118 
2119 struct sockaddr_dl *
link_alloc_sdl(size_t size,int flags)2120 link_alloc_sdl(size_t size, int flags)
2121 {
2122 
2123 	return (malloc(size, M_TEMP, flags));
2124 }
2125 
2126 void
link_free_sdl(struct sockaddr * sa)2127 link_free_sdl(struct sockaddr *sa)
2128 {
2129 	free(sa, M_TEMP);
2130 }
2131 
2132 /*
2133  * Fills in given sdl with interface basic info.
2134  * Returns pointer to filled sdl.
2135  */
2136 struct sockaddr_dl *
link_init_sdl(struct ifnet * ifp,struct sockaddr * paddr,u_char iftype)2137 link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype)
2138 {
2139 	struct sockaddr_dl *sdl;
2140 
2141 	sdl = (struct sockaddr_dl *)paddr;
2142 	memset(sdl, 0, sizeof(struct sockaddr_dl));
2143 	sdl->sdl_len = sizeof(struct sockaddr_dl);
2144 	sdl->sdl_family = AF_LINK;
2145 	sdl->sdl_index = ifp->if_index;
2146 	sdl->sdl_type = iftype;
2147 
2148 	return (sdl);
2149 }
2150 
2151 /*
2152  * Mark an interface down and notify protocols of
2153  * the transition.
2154  */
2155 static void
if_unroute(struct ifnet * ifp,int flag,int fam)2156 if_unroute(struct ifnet *ifp, int flag, int fam)
2157 {
2158 	struct ifaddr *ifa;
2159 	struct epoch_tracker et;
2160 
2161 	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
2162 
2163 	ifp->if_flags &= ~flag;
2164 	getmicrotime(&ifp->if_lastchange);
2165 	NET_EPOCH_ENTER(et);
2166 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
2167 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
2168 			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
2169 	NET_EPOCH_EXIT(et);
2170 	ifp->if_qflush(ifp);
2171 
2172 	if (ifp->if_carp)
2173 		(*carp_linkstate_p)(ifp);
2174 	rt_ifmsg_14(ifp, IFF_UP);
2175 }
2176 
2177 /*
2178  * Mark an interface up and notify protocols of
2179  * the transition.
2180  */
2181 static void
if_route(struct ifnet * ifp,int flag,int fam)2182 if_route(struct ifnet *ifp, int flag, int fam)
2183 {
2184 	struct ifaddr *ifa;
2185 	struct epoch_tracker et;
2186 
2187 	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
2188 
2189 	ifp->if_flags |= flag;
2190 	getmicrotime(&ifp->if_lastchange);
2191 	NET_EPOCH_ENTER(et);
2192 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
2193 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
2194 			pfctlinput(PRC_IFUP, ifa->ifa_addr);
2195 	NET_EPOCH_EXIT(et);
2196 	if (ifp->if_carp)
2197 		(*carp_linkstate_p)(ifp);
2198 	rt_ifmsg_14(ifp, IFF_UP);
2199 #ifdef INET6
2200 	in6_if_up(ifp);
2201 #endif
2202 }
2203 
2204 void	(*vlan_link_state_p)(struct ifnet *);	/* XXX: private from if_vlan */
2205 void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
2206 struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
2207 struct	ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
2208 int	(*vlan_tag_p)(struct ifnet *, uint16_t *);
2209 int	(*vlan_pcp_p)(struct ifnet *, uint16_t *);
2210 int	(*vlan_setcookie_p)(struct ifnet *, void *);
2211 void	*(*vlan_cookie_p)(struct ifnet *);
2212 
2213 /*
2214  * Handle a change in the interface link state. To avoid LORs
2215  * between driver lock and upper layer locks, as well as possible
2216  * recursions, we post event to taskqueue, and all job
2217  * is done in static do_link_state_change().
2218  */
2219 void
if_link_state_change(struct ifnet * ifp,int link_state)2220 if_link_state_change(struct ifnet *ifp, int link_state)
2221 {
2222 	/* Return if state hasn't changed. */
2223 	if (ifp->if_link_state == link_state)
2224 		return;
2225 
2226 	ifp->if_link_state = link_state;
2227 
2228 	/* XXXGL: reference ifp? */
2229 	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
2230 }
2231 
2232 static void
do_link_state_change(void * arg,int pending)2233 do_link_state_change(void *arg, int pending)
2234 {
2235 	struct ifnet *ifp;
2236 	int link_state;
2237 
2238 	ifp = arg;
2239 	link_state = ifp->if_link_state;
2240 
2241 	CURVNET_SET(ifp->if_vnet);
2242 	rt_ifmsg_14(ifp, 0);
2243 	if (ifp->if_vlantrunk != NULL)
2244 		(*vlan_link_state_p)(ifp);
2245 
2246 	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
2247 	    ifp->if_l2com != NULL)
2248 		(*ng_ether_link_state_p)(ifp, link_state);
2249 	if (ifp->if_carp)
2250 		(*carp_linkstate_p)(ifp);
2251 	if (ifp->if_bridge)
2252 		ifp->if_bridge_linkstate(ifp);
2253 	if (ifp->if_lagg)
2254 		(*lagg_linkstate_p)(ifp, link_state);
2255 
2256 	if (IS_DEFAULT_VNET(curvnet))
2257 		devctl_notify("IFNET", ifp->if_xname,
2258 		    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
2259 		    NULL);
2260 	if (pending > 1)
2261 		if_printf(ifp, "%d link states coalesced\n", pending);
2262 	if (log_link_state_change)
2263 		if_printf(ifp, "link state changed to %s\n",
2264 		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
2265 	EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
2266 	CURVNET_RESTORE();
2267 }
2268 
2269 /*
2270  * Mark an interface down and notify protocols of
2271  * the transition.
2272  */
2273 void
if_down(struct ifnet * ifp)2274 if_down(struct ifnet *ifp)
2275 {
2276 
2277 	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
2278 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
2279 }
2280 
2281 /*
2282  * Mark an interface up and notify protocols of
2283  * the transition.
2284  */
2285 void
if_up(struct ifnet * ifp)2286 if_up(struct ifnet *ifp)
2287 {
2288 
2289 	if_route(ifp, IFF_UP, AF_UNSPEC);
2290 	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
2291 }
2292 
2293 /*
2294  * Flush an interface queue.
2295  */
2296 void
if_qflush(struct ifnet * ifp)2297 if_qflush(struct ifnet *ifp)
2298 {
2299 	struct mbuf *m, *n;
2300 	struct ifaltq *ifq;
2301 
2302 	ifq = &ifp->if_snd;
2303 	IFQ_LOCK(ifq);
2304 #ifdef ALTQ
2305 	if (ALTQ_IS_ENABLED(ifq))
2306 		ALTQ_PURGE(ifq);
2307 #endif
2308 	n = ifq->ifq_head;
2309 	while ((m = n) != NULL) {
2310 		n = m->m_nextpkt;
2311 		m_freem(m);
2312 	}
2313 	ifq->ifq_head = 0;
2314 	ifq->ifq_tail = 0;
2315 	ifq->ifq_len = 0;
2316 	IFQ_UNLOCK(ifq);
2317 }
2318 
2319 /*
2320  * Map interface name to interface structure pointer, with or without
2321  * returning a reference.
2322  */
2323 struct ifnet *
ifunit_ref(const char * name)2324 ifunit_ref(const char *name)
2325 {
2326 	struct epoch_tracker et;
2327 	struct ifnet *ifp;
2328 
2329 	NET_EPOCH_ENTER(et);
2330 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2331 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
2332 		    !(ifp->if_flags & IFF_DYING))
2333 			break;
2334 	}
2335 	if (ifp != NULL) {
2336 		if_ref(ifp);
2337 	}
2338 
2339 	NET_EPOCH_EXIT(et);
2340 	return (ifp);
2341 }
2342 
2343 struct ifnet *
ifunit(const char * name)2344 ifunit(const char *name)
2345 {
2346 	struct epoch_tracker et;
2347 	struct ifnet *ifp;
2348 
2349 	NET_EPOCH_ENTER(et);
2350 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2351 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
2352 			break;
2353 	}
2354 	NET_EPOCH_EXIT(et);
2355 	return (ifp);
2356 }
2357 
2358 void *
ifr_buffer_get_buffer(void * data)2359 ifr_buffer_get_buffer(void *data)
2360 {
2361 	union ifreq_union *ifrup;
2362 
2363 	ifrup = data;
2364 #ifdef COMPAT_FREEBSD32
2365 	if (SV_CURPROC_FLAG(SV_ILP32))
2366 		return ((void *)(uintptr_t)
2367 		    ifrup->ifr32.ifr_ifru.ifru_buffer.buffer);
2368 #endif
2369 	return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer);
2370 }
2371 
2372 static void
ifr_buffer_set_buffer_null(void * data)2373 ifr_buffer_set_buffer_null(void *data)
2374 {
2375 	union ifreq_union *ifrup;
2376 
2377 	ifrup = data;
2378 #ifdef COMPAT_FREEBSD32
2379 	if (SV_CURPROC_FLAG(SV_ILP32))
2380 		ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0;
2381 	else
2382 #endif
2383 		ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL;
2384 }
2385 
2386 size_t
ifr_buffer_get_length(void * data)2387 ifr_buffer_get_length(void *data)
2388 {
2389 	union ifreq_union *ifrup;
2390 
2391 	ifrup = data;
2392 #ifdef COMPAT_FREEBSD32
2393 	if (SV_CURPROC_FLAG(SV_ILP32))
2394 		return (ifrup->ifr32.ifr_ifru.ifru_buffer.length);
2395 #endif
2396 	return (ifrup->ifr.ifr_ifru.ifru_buffer.length);
2397 }
2398 
2399 static void
ifr_buffer_set_length(void * data,size_t len)2400 ifr_buffer_set_length(void *data, size_t len)
2401 {
2402 	union ifreq_union *ifrup;
2403 
2404 	ifrup = data;
2405 #ifdef COMPAT_FREEBSD32
2406 	if (SV_CURPROC_FLAG(SV_ILP32))
2407 		ifrup->ifr32.ifr_ifru.ifru_buffer.length = len;
2408 	else
2409 #endif
2410 		ifrup->ifr.ifr_ifru.ifru_buffer.length = len;
2411 }
2412 
2413 void *
ifr_data_get_ptr(void * ifrp)2414 ifr_data_get_ptr(void *ifrp)
2415 {
2416 	union ifreq_union *ifrup;
2417 
2418 	ifrup = ifrp;
2419 #ifdef COMPAT_FREEBSD32
2420 	if (SV_CURPROC_FLAG(SV_ILP32))
2421 		return ((void *)(uintptr_t)
2422 		    ifrup->ifr32.ifr_ifru.ifru_data);
2423 #endif
2424 		return (ifrup->ifr.ifr_ifru.ifru_data);
2425 }
2426 
2427 /*
2428  * Hardware specific interface ioctls.
2429  */
2430 int
ifhwioctl(u_long cmd,struct ifnet * ifp,caddr_t data,struct thread * td)2431 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
2432 {
2433 	struct ifreq *ifr;
2434 	int error = 0, do_ifup = 0;
2435 	int new_flags, temp_flags;
2436 	size_t namelen, onamelen;
2437 	size_t descrlen;
2438 	char *descrbuf;
2439 	char new_name[IFNAMSIZ];
2440 	struct ifaddr *ifa;
2441 	struct sockaddr_dl *sdl;
2442 
2443 	ifr = (struct ifreq *)data;
2444 	switch (cmd) {
2445 	case SIOCGIFINDEX:
2446 		ifr->ifr_index = ifp->if_index;
2447 		break;
2448 
2449 	case SIOCGIFFLAGS:
2450 		temp_flags = ifp->if_flags | ifp->if_drv_flags;
2451 		ifr->ifr_flags = temp_flags & 0xffff;
2452 		ifr->ifr_flagshigh = temp_flags >> 16;
2453 		break;
2454 
2455 	case SIOCGIFCAP:
2456 		ifr->ifr_reqcap = ifp->if_capabilities;
2457 		ifr->ifr_curcap = ifp->if_capenable;
2458 		break;
2459 
2460 	case SIOCGIFDATA:
2461 	{
2462 		struct if_data ifd;
2463 
2464 		/* Ensure uninitialised padding is not leaked. */
2465 		memset(&ifd, 0, sizeof(ifd));
2466 
2467 		if_data_copy(ifp, &ifd);
2468 		error = copyout(&ifd, ifr_data_get_ptr(ifr), sizeof(ifd));
2469 		break;
2470 	}
2471 
2472 #ifdef MAC
2473 	case SIOCGIFMAC:
2474 		error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
2475 		break;
2476 #endif
2477 
2478 	case SIOCGIFMETRIC:
2479 		ifr->ifr_metric = ifp->if_metric;
2480 		break;
2481 
2482 	case SIOCGIFMTU:
2483 		ifr->ifr_mtu = ifp->if_mtu;
2484 		break;
2485 
2486 	case SIOCGIFPHYS:
2487 		/* XXXGL: did this ever worked? */
2488 		ifr->ifr_phys = 0;
2489 		break;
2490 
2491 	case SIOCGIFDESCR:
2492 		error = 0;
2493 		sx_slock(&ifdescr_sx);
2494 		if (ifp->if_description == NULL)
2495 			error = ENOMSG;
2496 		else {
2497 			/* space for terminating nul */
2498 			descrlen = strlen(ifp->if_description) + 1;
2499 			if (ifr_buffer_get_length(ifr) < descrlen)
2500 				ifr_buffer_set_buffer_null(ifr);
2501 			else
2502 				error = copyout(ifp->if_description,
2503 				    ifr_buffer_get_buffer(ifr), descrlen);
2504 			ifr_buffer_set_length(ifr, descrlen);
2505 		}
2506 		sx_sunlock(&ifdescr_sx);
2507 		break;
2508 
2509 	case SIOCSIFDESCR:
2510 		error = priv_check(td, PRIV_NET_SETIFDESCR);
2511 		if (error)
2512 			return (error);
2513 
2514 		/*
2515 		 * Copy only (length-1) bytes to make sure that
2516 		 * if_description is always nul terminated.  The
2517 		 * length parameter is supposed to count the
2518 		 * terminating nul in.
2519 		 */
2520 		if (ifr_buffer_get_length(ifr) > ifdescr_maxlen)
2521 			return (ENAMETOOLONG);
2522 		else if (ifr_buffer_get_length(ifr) == 0)
2523 			descrbuf = NULL;
2524 		else {
2525 			descrbuf = if_allocdescr(ifr_buffer_get_length(ifr), M_WAITOK);
2526 			error = copyin(ifr_buffer_get_buffer(ifr), descrbuf,
2527 			    ifr_buffer_get_length(ifr) - 1);
2528 			if (error) {
2529 				if_freedescr(descrbuf);
2530 				break;
2531 			}
2532 		}
2533 
2534 		if_setdescr(ifp, descrbuf);
2535 		getmicrotime(&ifp->if_lastchange);
2536 		break;
2537 
2538 	case SIOCGIFFIB:
2539 		ifr->ifr_fib = ifp->if_fib;
2540 		break;
2541 
2542 	case SIOCSIFFIB:
2543 		error = priv_check(td, PRIV_NET_SETIFFIB);
2544 		if (error)
2545 			return (error);
2546 		if (ifr->ifr_fib >= rt_numfibs)
2547 			return (EINVAL);
2548 
2549 		ifp->if_fib = ifr->ifr_fib;
2550 		break;
2551 
2552 	case SIOCSIFFLAGS:
2553 		error = priv_check(td, PRIV_NET_SETIFFLAGS);
2554 		if (error)
2555 			return (error);
2556 		/*
2557 		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
2558 		 * check, so we don't need special handling here yet.
2559 		 */
2560 		new_flags = (ifr->ifr_flags & 0xffff) |
2561 		    (ifr->ifr_flagshigh << 16);
2562 		if (ifp->if_flags & IFF_UP &&
2563 		    (new_flags & IFF_UP) == 0) {
2564 			if_down(ifp);
2565 		} else if (new_flags & IFF_UP &&
2566 		    (ifp->if_flags & IFF_UP) == 0) {
2567 			do_ifup = 1;
2568 		}
2569 		/* See if permanently promiscuous mode bit is about to flip */
2570 		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
2571 			if (new_flags & IFF_PPROMISC)
2572 				ifp->if_flags |= IFF_PROMISC;
2573 			else if (ifp->if_pcount == 0)
2574 				ifp->if_flags &= ~IFF_PROMISC;
2575 			if (log_promisc_mode_change)
2576                                 if_printf(ifp, "permanently promiscuous mode %s\n",
2577                                     ((new_flags & IFF_PPROMISC) ?
2578                                      "enabled" : "disabled"));
2579 		}
2580 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2581 			(new_flags &~ IFF_CANTCHANGE);
2582 		if (ifp->if_ioctl) {
2583 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
2584 		}
2585 		if (do_ifup)
2586 			if_up(ifp);
2587 		getmicrotime(&ifp->if_lastchange);
2588 		break;
2589 
2590 	case SIOCSIFCAP:
2591 		error = priv_check(td, PRIV_NET_SETIFCAP);
2592 		if (error)
2593 			return (error);
2594 		if (ifp->if_ioctl == NULL)
2595 			return (EOPNOTSUPP);
2596 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
2597 			return (EINVAL);
2598 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2599 		if (error == 0)
2600 			getmicrotime(&ifp->if_lastchange);
2601 		break;
2602 
2603 #ifdef MAC
2604 	case SIOCSIFMAC:
2605 		error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
2606 		break;
2607 #endif
2608 
2609 	case SIOCSIFNAME:
2610 		error = priv_check(td, PRIV_NET_SETIFNAME);
2611 		if (error)
2612 			return (error);
2613 		error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ,
2614 		    NULL);
2615 		if (error != 0)
2616 			return (error);
2617 		if (new_name[0] == '\0')
2618 			return (EINVAL);
2619 		if (strcmp(new_name, ifp->if_xname) == 0)
2620 			break;
2621 		if (ifunit(new_name) != NULL)
2622 			return (EEXIST);
2623 
2624 		/*
2625 		 * XXX: Locking.  Nothing else seems to lock if_flags,
2626 		 * and there are numerous other races with the
2627 		 * ifunit() checks not being atomic with namespace
2628 		 * changes (renames, vmoves, if_attach, etc).
2629 		 */
2630 		ifp->if_flags |= IFF_RENAMING;
2631 
2632 		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
2633 
2634 		if_printf(ifp, "changing name to '%s'\n", new_name);
2635 
2636 		IF_ADDR_WLOCK(ifp);
2637 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
2638 		ifa = ifp->if_addr;
2639 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
2640 		namelen = strlen(new_name);
2641 		onamelen = sdl->sdl_nlen;
2642 		/*
2643 		 * Move the address if needed.  This is safe because we
2644 		 * allocate space for a name of length IFNAMSIZ when we
2645 		 * create this in if_attach().
2646 		 */
2647 		if (namelen != onamelen) {
2648 			bcopy(sdl->sdl_data + onamelen,
2649 			    sdl->sdl_data + namelen, sdl->sdl_alen);
2650 		}
2651 		bcopy(new_name, sdl->sdl_data, namelen);
2652 		sdl->sdl_nlen = namelen;
2653 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
2654 		bzero(sdl->sdl_data, onamelen);
2655 		while (namelen != 0)
2656 			sdl->sdl_data[--namelen] = 0xff;
2657 		IF_ADDR_WUNLOCK(ifp);
2658 
2659 		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
2660 
2661 		ifp->if_flags &= ~IFF_RENAMING;
2662 		break;
2663 
2664 #ifdef VIMAGE
2665 	case SIOCSIFVNET:
2666 		error = priv_check(td, PRIV_NET_SETIFVNET);
2667 		if (error)
2668 			return (error);
2669 		error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid);
2670 		break;
2671 #endif
2672 
2673 	case SIOCSIFMETRIC:
2674 		error = priv_check(td, PRIV_NET_SETIFMETRIC);
2675 		if (error)
2676 			return (error);
2677 		ifp->if_metric = ifr->ifr_metric;
2678 		getmicrotime(&ifp->if_lastchange);
2679 		break;
2680 
2681 	case SIOCSIFPHYS:
2682 		error = priv_check(td, PRIV_NET_SETIFPHYS);
2683 		if (error)
2684 			return (error);
2685 		if (ifp->if_ioctl == NULL)
2686 			return (EOPNOTSUPP);
2687 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2688 		if (error == 0)
2689 			getmicrotime(&ifp->if_lastchange);
2690 		break;
2691 
2692 	case SIOCSIFMTU:
2693 	{
2694 		u_long oldmtu = ifp->if_mtu;
2695 
2696 		error = priv_check(td, PRIV_NET_SETIFMTU);
2697 		if (error)
2698 			return (error);
2699 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
2700 			return (EINVAL);
2701 		if (ifp->if_ioctl == NULL)
2702 			return (EOPNOTSUPP);
2703 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2704 		if (error == 0) {
2705 			getmicrotime(&ifp->if_lastchange);
2706 			rt_ifmsg_14(ifp, 0);
2707 #ifdef INET
2708 			DEBUGNET_NOTIFY_MTU(ifp);
2709 #endif
2710 		}
2711 		/*
2712 		 * If the link MTU changed, do network layer specific procedure.
2713 		 */
2714 		if (ifp->if_mtu != oldmtu) {
2715 #ifdef INET6
2716 			nd6_setmtu(ifp);
2717 #endif
2718 			rt_updatemtu(ifp);
2719 		}
2720 		break;
2721 	}
2722 
2723 	case SIOCADDMULTI:
2724 	case SIOCDELMULTI:
2725 		if (cmd == SIOCADDMULTI)
2726 			error = priv_check(td, PRIV_NET_ADDMULTI);
2727 		else
2728 			error = priv_check(td, PRIV_NET_DELMULTI);
2729 		if (error)
2730 			return (error);
2731 
2732 		/* Don't allow group membership on non-multicast interfaces. */
2733 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
2734 			return (EOPNOTSUPP);
2735 
2736 		/* Don't let users screw up protocols' entries. */
2737 		if (ifr->ifr_addr.sa_family != AF_LINK)
2738 			return (EINVAL);
2739 
2740 		if (cmd == SIOCADDMULTI) {
2741 			struct epoch_tracker et;
2742 			struct ifmultiaddr *ifma;
2743 
2744 			/*
2745 			 * Userland is only permitted to join groups once
2746 			 * via the if_addmulti() KPI, because it cannot hold
2747 			 * struct ifmultiaddr * between calls. It may also
2748 			 * lose a race while we check if the membership
2749 			 * already exists.
2750 			 */
2751 			NET_EPOCH_ENTER(et);
2752 			ifma = if_findmulti(ifp, &ifr->ifr_addr);
2753 			NET_EPOCH_EXIT(et);
2754 			if (ifma != NULL)
2755 				error = EADDRINUSE;
2756 			else
2757 				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
2758 		} else {
2759 			error = if_delmulti(ifp, &ifr->ifr_addr);
2760 		}
2761 		if (error == 0)
2762 			getmicrotime(&ifp->if_lastchange);
2763 		break;
2764 
2765 	case SIOCSIFPHYADDR:
2766 	case SIOCDIFPHYADDR:
2767 #ifdef INET6
2768 	case SIOCSIFPHYADDR_IN6:
2769 #endif
2770 	case SIOCSIFMEDIA:
2771 	case SIOCSIFGENERIC:
2772 		error = priv_check(td, PRIV_NET_HWIOCTL);
2773 		if (error)
2774 			return (error);
2775 		if (ifp->if_ioctl == NULL)
2776 			return (EOPNOTSUPP);
2777 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2778 		if (error == 0)
2779 			getmicrotime(&ifp->if_lastchange);
2780 		break;
2781 
2782 	case SIOCGIFSTATUS:
2783 	case SIOCGIFPSRCADDR:
2784 	case SIOCGIFPDSTADDR:
2785 	case SIOCGIFMEDIA:
2786 	case SIOCGIFXMEDIA:
2787 	case SIOCGIFGENERIC:
2788 	case SIOCGIFRSSKEY:
2789 	case SIOCGIFRSSHASH:
2790 	case SIOCGIFDOWNREASON:
2791 		if (ifp->if_ioctl == NULL)
2792 			return (EOPNOTSUPP);
2793 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2794 		break;
2795 
2796 	case SIOCSIFLLADDR:
2797 		error = priv_check(td, PRIV_NET_SETLLADDR);
2798 		if (error)
2799 			return (error);
2800 		error = if_setlladdr(ifp,
2801 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
2802 		break;
2803 
2804 	case SIOCGHWADDR:
2805 		error = if_gethwaddr(ifp, ifr);
2806 		break;
2807 
2808 	case SIOCAIFGROUP:
2809 		error = priv_check(td, PRIV_NET_ADDIFGROUP);
2810 		if (error)
2811 			return (error);
2812 		error = if_addgroup(ifp,
2813 		    ((struct ifgroupreq *)data)->ifgr_group);
2814 		if (error != 0)
2815 			return (error);
2816 		break;
2817 
2818 	case SIOCGIFGROUP:
2819 	{
2820 		struct epoch_tracker et;
2821 
2822 		NET_EPOCH_ENTER(et);
2823 		error = if_getgroup((struct ifgroupreq *)data, ifp);
2824 		NET_EPOCH_EXIT(et);
2825 		break;
2826 	}
2827 
2828 	case SIOCDIFGROUP:
2829 		error = priv_check(td, PRIV_NET_DELIFGROUP);
2830 		if (error)
2831 			return (error);
2832 		error = if_delgroup(ifp,
2833 		    ((struct ifgroupreq *)data)->ifgr_group);
2834 		if (error != 0)
2835 			return (error);
2836 		break;
2837 
2838 	default:
2839 		error = ENOIOCTL;
2840 		break;
2841 	}
2842 	return (error);
2843 }
2844 
2845 /*
2846  * Interface ioctls.
2847  */
2848 int
ifioctl(struct socket * so,u_long cmd,caddr_t data,struct thread * td)2849 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
2850 {
2851 #ifdef COMPAT_FREEBSD32
2852 	union {
2853 		struct ifconf ifc;
2854 		struct ifdrv ifd;
2855 		struct ifgroupreq ifgr;
2856 		struct ifmediareq ifmr;
2857 	} thunk;
2858 	u_long saved_cmd;
2859 	struct ifconf32 *ifc32;
2860 	struct ifdrv32 *ifd32;
2861 	struct ifgroupreq32 *ifgr32;
2862 	struct ifmediareq32 *ifmr32;
2863 #endif
2864 	struct ifnet *ifp;
2865 	struct ifreq *ifr;
2866 	int error;
2867 	int oif_flags;
2868 #ifdef VIMAGE
2869 	bool shutdown;
2870 #endif
2871 
2872 	CURVNET_SET(so->so_vnet);
2873 #ifdef VIMAGE
2874 	/* Make sure the VNET is stable. */
2875 	shutdown = VNET_IS_SHUTTING_DOWN(so->so_vnet);
2876 	if (shutdown) {
2877 		CURVNET_RESTORE();
2878 		return (EBUSY);
2879 	}
2880 #endif
2881 
2882 #ifdef COMPAT_FREEBSD32
2883 	saved_cmd = cmd;
2884 	switch (cmd) {
2885 	case SIOCGIFCONF32:
2886 		ifc32 = (struct ifconf32 *)data;
2887 		thunk.ifc.ifc_len = ifc32->ifc_len;
2888 		thunk.ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
2889 		data = (caddr_t)&thunk.ifc;
2890 		cmd = SIOCGIFCONF;
2891 		break;
2892 	case SIOCGDRVSPEC32:
2893 	case SIOCSDRVSPEC32:
2894 		ifd32 = (struct ifdrv32 *)data;
2895 		memcpy(thunk.ifd.ifd_name, ifd32->ifd_name,
2896 		    sizeof(thunk.ifd.ifd_name));
2897 		thunk.ifd.ifd_cmd = ifd32->ifd_cmd;
2898 		thunk.ifd.ifd_len = ifd32->ifd_len;
2899 		thunk.ifd.ifd_data = PTRIN(ifd32->ifd_data);
2900 		data = (caddr_t)&thunk.ifd;
2901 		cmd = _IOC_NEWTYPE(cmd, struct ifdrv);
2902 		break;
2903 	case SIOCAIFGROUP32:
2904 	case SIOCGIFGROUP32:
2905 	case SIOCDIFGROUP32:
2906 	case SIOCGIFGMEMB32:
2907 		ifgr32 = (struct ifgroupreq32 *)data;
2908 		memcpy(thunk.ifgr.ifgr_name, ifgr32->ifgr_name,
2909 		    sizeof(thunk.ifgr.ifgr_name));
2910 		thunk.ifgr.ifgr_len = ifgr32->ifgr_len;
2911 		switch (cmd) {
2912 		case SIOCAIFGROUP32:
2913 		case SIOCDIFGROUP32:
2914 			memcpy(thunk.ifgr.ifgr_group, ifgr32->ifgr_group,
2915 			    sizeof(thunk.ifgr.ifgr_group));
2916 			break;
2917 		case SIOCGIFGROUP32:
2918 		case SIOCGIFGMEMB32:
2919 			thunk.ifgr.ifgr_groups = PTRIN(ifgr32->ifgr_groups);
2920 			break;
2921 		}
2922 		data = (caddr_t)&thunk.ifgr;
2923 		cmd = _IOC_NEWTYPE(cmd, struct ifgroupreq);
2924 		break;
2925 	case SIOCGIFMEDIA32:
2926 	case SIOCGIFXMEDIA32:
2927 		ifmr32 = (struct ifmediareq32 *)data;
2928 		memcpy(thunk.ifmr.ifm_name, ifmr32->ifm_name,
2929 		    sizeof(thunk.ifmr.ifm_name));
2930 		thunk.ifmr.ifm_current = ifmr32->ifm_current;
2931 		thunk.ifmr.ifm_mask = ifmr32->ifm_mask;
2932 		thunk.ifmr.ifm_status = ifmr32->ifm_status;
2933 		thunk.ifmr.ifm_active = ifmr32->ifm_active;
2934 		thunk.ifmr.ifm_count = ifmr32->ifm_count;
2935 		thunk.ifmr.ifm_ulist = PTRIN(ifmr32->ifm_ulist);
2936 		data = (caddr_t)&thunk.ifmr;
2937 		cmd = _IOC_NEWTYPE(cmd, struct ifmediareq);
2938 		break;
2939 	}
2940 #endif
2941 
2942 	switch (cmd) {
2943 	case SIOCGIFCONF:
2944 		error = ifconf(cmd, data);
2945 		goto out_noref;
2946 	}
2947 
2948 	ifr = (struct ifreq *)data;
2949 	switch (cmd) {
2950 #ifdef VIMAGE
2951 	case SIOCSIFRVNET:
2952 		error = priv_check(td, PRIV_NET_SETIFVNET);
2953 		if (error == 0)
2954 			error = if_vmove_reclaim(td, ifr->ifr_name,
2955 			    ifr->ifr_jid);
2956 		goto out_noref;
2957 #endif
2958 	case SIOCIFCREATE:
2959 	case SIOCIFCREATE2:
2960 		error = priv_check(td, PRIV_NET_IFCREATE);
2961 		if (error == 0)
2962 			error = if_clone_create(ifr->ifr_name,
2963 			    sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ?
2964 			    ifr_data_get_ptr(ifr) : NULL);
2965 		goto out_noref;
2966 	case SIOCIFDESTROY:
2967 		error = priv_check(td, PRIV_NET_IFDESTROY);
2968 
2969 		if (error == 0) {
2970 			sx_xlock(&ifnet_detach_sxlock);
2971 			error = if_clone_destroy(ifr->ifr_name);
2972 			sx_xunlock(&ifnet_detach_sxlock);
2973 		}
2974 		goto out_noref;
2975 
2976 	case SIOCIFGCLONERS:
2977 		error = if_clone_list((struct if_clonereq *)data);
2978 		goto out_noref;
2979 
2980 	case SIOCGIFGMEMB:
2981 		error = if_getgroupmembers((struct ifgroupreq *)data);
2982 		goto out_noref;
2983 
2984 #if defined(INET) || defined(INET6)
2985 	case SIOCSVH:
2986 	case SIOCGVH:
2987 		if (carp_ioctl_p == NULL)
2988 			error = EPROTONOSUPPORT;
2989 		else
2990 			error = (*carp_ioctl_p)(ifr, cmd, td);
2991 		goto out_noref;
2992 #endif
2993 	}
2994 
2995 	ifp = ifunit_ref(ifr->ifr_name);
2996 	if (ifp == NULL) {
2997 		error = ENXIO;
2998 		goto out_noref;
2999 	}
3000 
3001 	error = ifhwioctl(cmd, ifp, data, td);
3002 	if (error != ENOIOCTL)
3003 		goto out_ref;
3004 
3005 	oif_flags = ifp->if_flags;
3006 	if (so->so_proto == NULL) {
3007 		error = EOPNOTSUPP;
3008 		goto out_ref;
3009 	}
3010 
3011 	/*
3012 	 * Pass the request on to the socket control method, and if the
3013 	 * latter returns EOPNOTSUPP, directly to the interface.
3014 	 *
3015 	 * Make an exception for the legacy SIOCSIF* requests.  Drivers
3016 	 * trust SIOCSIFADDR et al to come from an already privileged
3017 	 * layer, and do not perform any credentials checks or input
3018 	 * validation.
3019 	 */
3020 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data,
3021 	    ifp, td));
3022 	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
3023 	    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
3024 	    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
3025 		error = (*ifp->if_ioctl)(ifp, cmd, data);
3026 
3027 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
3028 #ifdef INET6
3029 		if (ifp->if_flags & IFF_UP)
3030 			in6_if_up(ifp);
3031 #endif
3032 	}
3033 
3034 out_ref:
3035 	if_rele(ifp);
3036 out_noref:
3037 	CURVNET_RESTORE();
3038 #ifdef COMPAT_FREEBSD32
3039 	if (error != 0)
3040 		return (error);
3041 	switch (saved_cmd) {
3042 	case SIOCGIFCONF32:
3043 		ifc32->ifc_len = thunk.ifc.ifc_len;
3044 		break;
3045 	case SIOCGDRVSPEC32:
3046 		/*
3047 		 * SIOCGDRVSPEC is IOWR, but nothing actually touches
3048 		 * the struct so just assert that ifd_len (the only
3049 		 * field it might make sense to update) hasn't
3050 		 * changed.
3051 		 */
3052 		KASSERT(thunk.ifd.ifd_len == ifd32->ifd_len,
3053 		    ("ifd_len was updated %u -> %zu", ifd32->ifd_len,
3054 			thunk.ifd.ifd_len));
3055 		break;
3056 	case SIOCGIFGROUP32:
3057 	case SIOCGIFGMEMB32:
3058 		ifgr32->ifgr_len = thunk.ifgr.ifgr_len;
3059 		break;
3060 	case SIOCGIFMEDIA32:
3061 	case SIOCGIFXMEDIA32:
3062 		ifmr32->ifm_current = thunk.ifmr.ifm_current;
3063 		ifmr32->ifm_mask = thunk.ifmr.ifm_mask;
3064 		ifmr32->ifm_status = thunk.ifmr.ifm_status;
3065 		ifmr32->ifm_active = thunk.ifmr.ifm_active;
3066 		ifmr32->ifm_count = thunk.ifmr.ifm_count;
3067 		break;
3068 	}
3069 #endif
3070 	return (error);
3071 }
3072 
3073 /*
3074  * The code common to handling reference counted flags,
3075  * e.g., in ifpromisc() and if_allmulti().
3076  * The "pflag" argument can specify a permanent mode flag to check,
3077  * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
3078  *
3079  * Only to be used on stack-owned flags, not driver-owned flags.
3080  */
3081 static int
if_setflag(struct ifnet * ifp,int flag,int pflag,int * refcount,int onswitch)3082 if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
3083 {
3084 	struct ifreq ifr;
3085 	int error;
3086 	int oldflags, oldcount;
3087 
3088 	/* Sanity checks to catch programming errors */
3089 	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
3090 	    ("%s: setting driver-owned flag %d", __func__, flag));
3091 
3092 	if (onswitch)
3093 		KASSERT(*refcount >= 0,
3094 		    ("%s: increment negative refcount %d for flag %d",
3095 		    __func__, *refcount, flag));
3096 	else
3097 		KASSERT(*refcount > 0,
3098 		    ("%s: decrement non-positive refcount %d for flag %d",
3099 		    __func__, *refcount, flag));
3100 
3101 	/* In case this mode is permanent, just touch refcount */
3102 	if (ifp->if_flags & pflag) {
3103 		*refcount += onswitch ? 1 : -1;
3104 		return (0);
3105 	}
3106 
3107 	/* Save ifnet parameters for if_ioctl() may fail */
3108 	oldcount = *refcount;
3109 	oldflags = ifp->if_flags;
3110 
3111 	/*
3112 	 * See if we aren't the only and touching refcount is enough.
3113 	 * Actually toggle interface flag if we are the first or last.
3114 	 */
3115 	if (onswitch) {
3116 		if ((*refcount)++)
3117 			return (0);
3118 		ifp->if_flags |= flag;
3119 	} else {
3120 		if (--(*refcount))
3121 			return (0);
3122 		ifp->if_flags &= ~flag;
3123 	}
3124 
3125 	/* Call down the driver since we've changed interface flags */
3126 	if (ifp->if_ioctl == NULL) {
3127 		error = EOPNOTSUPP;
3128 		goto recover;
3129 	}
3130 	ifr.ifr_flags = ifp->if_flags & 0xffff;
3131 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
3132 	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3133 	if (error)
3134 		goto recover;
3135 	/* Notify userland that interface flags have changed */
3136 	rt_ifmsg_14(ifp, flag);
3137 	return (0);
3138 
3139 recover:
3140 	/* Recover after driver error */
3141 	*refcount = oldcount;
3142 	ifp->if_flags = oldflags;
3143 	return (error);
3144 }
3145 
3146 /*
3147  * Set/clear promiscuous mode on interface ifp based on the truth value
3148  * of pswitch.  The calls are reference counted so that only the first
3149  * "on" request actually has an effect, as does the final "off" request.
3150  * Results are undefined if the "off" and "on" requests are not matched.
3151  */
3152 int
ifpromisc(struct ifnet * ifp,int pswitch)3153 ifpromisc(struct ifnet *ifp, int pswitch)
3154 {
3155 	int error;
3156 	int oldflags = ifp->if_flags;
3157 
3158 	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
3159 			   &ifp->if_pcount, pswitch);
3160 	/* If promiscuous mode status has changed, log a message */
3161 	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
3162             log_promisc_mode_change)
3163 		if_printf(ifp, "promiscuous mode %s\n",
3164 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
3165 	return (error);
3166 }
3167 
3168 /*
3169  * Return interface configuration
3170  * of system.  List may be used
3171  * in later ioctl's (above) to get
3172  * other information.
3173  */
3174 /*ARGSUSED*/
3175 static int
ifconf(u_long cmd,caddr_t data)3176 ifconf(u_long cmd, caddr_t data)
3177 {
3178 	struct ifconf *ifc = (struct ifconf *)data;
3179 	struct ifnet *ifp;
3180 	struct ifaddr *ifa;
3181 	struct ifreq ifr;
3182 	struct sbuf *sb;
3183 	int error, full = 0, valid_len, max_len;
3184 
3185 	/* Limit initial buffer size to maxphys to avoid DoS from userspace. */
3186 	max_len = maxphys - 1;
3187 
3188 	/* Prevent hostile input from being able to crash the system */
3189 	if (ifc->ifc_len <= 0)
3190 		return (EINVAL);
3191 
3192 again:
3193 	if (ifc->ifc_len <= max_len) {
3194 		max_len = ifc->ifc_len;
3195 		full = 1;
3196 	}
3197 	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
3198 	max_len = 0;
3199 	valid_len = 0;
3200 
3201 	IFNET_RLOCK();
3202 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
3203 		struct epoch_tracker et;
3204 		int addrs;
3205 
3206 		/*
3207 		 * Zero the ifr to make sure we don't disclose the contents
3208 		 * of the stack.
3209 		 */
3210 		memset(&ifr, 0, sizeof(ifr));
3211 
3212 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
3213 		    >= sizeof(ifr.ifr_name)) {
3214 			sbuf_delete(sb);
3215 			IFNET_RUNLOCK();
3216 			return (ENAMETOOLONG);
3217 		}
3218 
3219 		addrs = 0;
3220 		NET_EPOCH_ENTER(et);
3221 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3222 			struct sockaddr *sa = ifa->ifa_addr;
3223 
3224 			if (prison_if(curthread->td_ucred, sa) != 0)
3225 				continue;
3226 			addrs++;
3227 			if (sa->sa_len <= sizeof(*sa)) {
3228 				if (sa->sa_len < sizeof(*sa)) {
3229 					memset(&ifr.ifr_ifru.ifru_addr, 0,
3230 					    sizeof(ifr.ifr_ifru.ifru_addr));
3231 					memcpy(&ifr.ifr_ifru.ifru_addr, sa,
3232 					    sa->sa_len);
3233 				} else
3234 					ifr.ifr_ifru.ifru_addr = *sa;
3235 				sbuf_bcat(sb, &ifr, sizeof(ifr));
3236 				max_len += sizeof(ifr);
3237 			} else {
3238 				sbuf_bcat(sb, &ifr,
3239 				    offsetof(struct ifreq, ifr_addr));
3240 				max_len += offsetof(struct ifreq, ifr_addr);
3241 				sbuf_bcat(sb, sa, sa->sa_len);
3242 				max_len += sa->sa_len;
3243 			}
3244 
3245 			if (sbuf_error(sb) == 0)
3246 				valid_len = sbuf_len(sb);
3247 		}
3248 		NET_EPOCH_EXIT(et);
3249 		if (addrs == 0) {
3250 			sbuf_bcat(sb, &ifr, sizeof(ifr));
3251 			max_len += sizeof(ifr);
3252 
3253 			if (sbuf_error(sb) == 0)
3254 				valid_len = sbuf_len(sb);
3255 		}
3256 	}
3257 	IFNET_RUNLOCK();
3258 
3259 	/*
3260 	 * If we didn't allocate enough space (uncommon), try again.  If
3261 	 * we have already allocated as much space as we are allowed,
3262 	 * return what we've got.
3263 	 */
3264 	if (valid_len != max_len && !full) {
3265 		sbuf_delete(sb);
3266 		goto again;
3267 	}
3268 
3269 	ifc->ifc_len = valid_len;
3270 	sbuf_finish(sb);
3271 	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
3272 	sbuf_delete(sb);
3273 	return (error);
3274 }
3275 
3276 /*
3277  * Just like ifpromisc(), but for all-multicast-reception mode.
3278  */
3279 int
if_allmulti(struct ifnet * ifp,int onswitch)3280 if_allmulti(struct ifnet *ifp, int onswitch)
3281 {
3282 
3283 	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
3284 }
3285 
3286 struct ifmultiaddr *
if_findmulti(struct ifnet * ifp,const struct sockaddr * sa)3287 if_findmulti(struct ifnet *ifp, const struct sockaddr *sa)
3288 {
3289 	struct ifmultiaddr *ifma;
3290 
3291 	IF_ADDR_LOCK_ASSERT(ifp);
3292 
3293 	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3294 		if (sa->sa_family == AF_LINK) {
3295 			if (sa_dl_equal(ifma->ifma_addr, sa))
3296 				break;
3297 		} else {
3298 			if (sa_equal(ifma->ifma_addr, sa))
3299 				break;
3300 		}
3301 	}
3302 
3303 	return ifma;
3304 }
3305 
3306 /*
3307  * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
3308  * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
3309  * the ifnet multicast address list here, so the caller must do that and
3310  * other setup work (such as notifying the device driver).  The reference
3311  * count is initialized to 1.
3312  */
3313 static struct ifmultiaddr *
if_allocmulti(struct ifnet * ifp,struct sockaddr * sa,struct sockaddr * llsa,int mflags)3314 if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
3315     int mflags)
3316 {
3317 	struct ifmultiaddr *ifma;
3318 	struct sockaddr *dupsa;
3319 
3320 	ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
3321 	    M_ZERO);
3322 	if (ifma == NULL)
3323 		return (NULL);
3324 
3325 	dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
3326 	if (dupsa == NULL) {
3327 		free(ifma, M_IFMADDR);
3328 		return (NULL);
3329 	}
3330 	bcopy(sa, dupsa, sa->sa_len);
3331 	ifma->ifma_addr = dupsa;
3332 
3333 	ifma->ifma_ifp = ifp;
3334 	ifma->ifma_refcount = 1;
3335 	ifma->ifma_protospec = NULL;
3336 
3337 	if (llsa == NULL) {
3338 		ifma->ifma_lladdr = NULL;
3339 		return (ifma);
3340 	}
3341 
3342 	dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
3343 	if (dupsa == NULL) {
3344 		free(ifma->ifma_addr, M_IFMADDR);
3345 		free(ifma, M_IFMADDR);
3346 		return (NULL);
3347 	}
3348 	bcopy(llsa, dupsa, llsa->sa_len);
3349 	ifma->ifma_lladdr = dupsa;
3350 
3351 	return (ifma);
3352 }
3353 
3354 /*
3355  * if_freemulti: free ifmultiaddr structure and possibly attached related
3356  * addresses.  The caller is responsible for implementing reference
3357  * counting, notifying the driver, handling routing messages, and releasing
3358  * any dependent link layer state.
3359  */
3360 #ifdef MCAST_VERBOSE
3361 extern void kdb_backtrace(void);
3362 #endif
3363 static void
if_freemulti_internal(struct ifmultiaddr * ifma)3364 if_freemulti_internal(struct ifmultiaddr *ifma)
3365 {
3366 
3367 	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
3368 	    ifma->ifma_refcount));
3369 
3370 	if (ifma->ifma_lladdr != NULL)
3371 		free(ifma->ifma_lladdr, M_IFMADDR);
3372 #ifdef MCAST_VERBOSE
3373 	kdb_backtrace();
3374 	printf("%s freeing ifma: %p\n", __func__, ifma);
3375 #endif
3376 	free(ifma->ifma_addr, M_IFMADDR);
3377 	free(ifma, M_IFMADDR);
3378 }
3379 
3380 static void
if_destroymulti(epoch_context_t ctx)3381 if_destroymulti(epoch_context_t ctx)
3382 {
3383 	struct ifmultiaddr *ifma;
3384 
3385 	ifma = __containerof(ctx, struct ifmultiaddr, ifma_epoch_ctx);
3386 	if_freemulti_internal(ifma);
3387 }
3388 
3389 void
if_freemulti(struct ifmultiaddr * ifma)3390 if_freemulti(struct ifmultiaddr *ifma)
3391 {
3392 	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti_epoch: refcount %d",
3393 	    ifma->ifma_refcount));
3394 
3395 	NET_EPOCH_CALL(if_destroymulti, &ifma->ifma_epoch_ctx);
3396 }
3397 
3398 /*
3399  * Register an additional multicast address with a network interface.
3400  *
3401  * - If the address is already present, bump the reference count on the
3402  *   address and return.
3403  * - If the address is not link-layer, look up a link layer address.
3404  * - Allocate address structures for one or both addresses, and attach to the
3405  *   multicast address list on the interface.  If automatically adding a link
3406  *   layer address, the protocol address will own a reference to the link
3407  *   layer address, to be freed when it is freed.
3408  * - Notify the network device driver of an addition to the multicast address
3409  *   list.
3410  *
3411  * 'sa' points to caller-owned memory with the desired multicast address.
3412  *
3413  * 'retifma' will be used to return a pointer to the resulting multicast
3414  * address reference, if desired.
3415  */
3416 int
if_addmulti(struct ifnet * ifp,struct sockaddr * sa,struct ifmultiaddr ** retifma)3417 if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
3418     struct ifmultiaddr **retifma)
3419 {
3420 	struct ifmultiaddr *ifma, *ll_ifma;
3421 	struct sockaddr *llsa;
3422 	struct sockaddr_dl sdl;
3423 	int error;
3424 
3425 #ifdef INET
3426 	IN_MULTI_LIST_UNLOCK_ASSERT();
3427 #endif
3428 #ifdef INET6
3429 	IN6_MULTI_LIST_UNLOCK_ASSERT();
3430 #endif
3431 	/*
3432 	 * If the address is already present, return a new reference to it;
3433 	 * otherwise, allocate storage and set up a new address.
3434 	 */
3435 	IF_ADDR_WLOCK(ifp);
3436 	ifma = if_findmulti(ifp, sa);
3437 	if (ifma != NULL) {
3438 		ifma->ifma_refcount++;
3439 		if (retifma != NULL)
3440 			*retifma = ifma;
3441 		IF_ADDR_WUNLOCK(ifp);
3442 		return (0);
3443 	}
3444 
3445 	/*
3446 	 * The address isn't already present; resolve the protocol address
3447 	 * into a link layer address, and then look that up, bump its
3448 	 * refcount or allocate an ifma for that also.
3449 	 * Most link layer resolving functions returns address data which
3450 	 * fits inside default sockaddr_dl structure. However callback
3451 	 * can allocate another sockaddr structure, in that case we need to
3452 	 * free it later.
3453 	 */
3454 	llsa = NULL;
3455 	ll_ifma = NULL;
3456 	if (ifp->if_resolvemulti != NULL) {
3457 		/* Provide called function with buffer size information */
3458 		sdl.sdl_len = sizeof(sdl);
3459 		llsa = (struct sockaddr *)&sdl;
3460 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
3461 		if (error)
3462 			goto unlock_out;
3463 	}
3464 
3465 	/*
3466 	 * Allocate the new address.  Don't hook it up yet, as we may also
3467 	 * need to allocate a link layer multicast address.
3468 	 */
3469 	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
3470 	if (ifma == NULL) {
3471 		error = ENOMEM;
3472 		goto free_llsa_out;
3473 	}
3474 
3475 	/*
3476 	 * If a link layer address is found, we'll need to see if it's
3477 	 * already present in the address list, or allocate is as well.
3478 	 * When this block finishes, the link layer address will be on the
3479 	 * list.
3480 	 */
3481 	if (llsa != NULL) {
3482 		ll_ifma = if_findmulti(ifp, llsa);
3483 		if (ll_ifma == NULL) {
3484 			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
3485 			if (ll_ifma == NULL) {
3486 				--ifma->ifma_refcount;
3487 				if_freemulti(ifma);
3488 				error = ENOMEM;
3489 				goto free_llsa_out;
3490 			}
3491 			ll_ifma->ifma_flags |= IFMA_F_ENQUEUED;
3492 			CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
3493 			    ifma_link);
3494 		} else
3495 			ll_ifma->ifma_refcount++;
3496 		ifma->ifma_llifma = ll_ifma;
3497 	}
3498 
3499 	/*
3500 	 * We now have a new multicast address, ifma, and possibly a new or
3501 	 * referenced link layer address.  Add the primary address to the
3502 	 * ifnet address list.
3503 	 */
3504 	ifma->ifma_flags |= IFMA_F_ENQUEUED;
3505 	CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
3506 
3507 	if (retifma != NULL)
3508 		*retifma = ifma;
3509 
3510 	/*
3511 	 * Must generate the message while holding the lock so that 'ifma'
3512 	 * pointer is still valid.
3513 	 */
3514 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
3515 	IF_ADDR_WUNLOCK(ifp);
3516 
3517 	/*
3518 	 * We are certain we have added something, so call down to the
3519 	 * interface to let them know about it.
3520 	 */
3521 	if (ifp->if_ioctl != NULL) {
3522 		if (THREAD_CAN_SLEEP())
3523 			(void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
3524 		else
3525 			taskqueue_enqueue(taskqueue_swi, &ifp->if_addmultitask);
3526 	}
3527 
3528 	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3529 		link_free_sdl(llsa);
3530 
3531 	return (0);
3532 
3533 free_llsa_out:
3534 	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3535 		link_free_sdl(llsa);
3536 
3537 unlock_out:
3538 	IF_ADDR_WUNLOCK(ifp);
3539 	return (error);
3540 }
3541 
3542 static void
if_siocaddmulti(void * arg,int pending)3543 if_siocaddmulti(void *arg, int pending)
3544 {
3545 	struct ifnet *ifp;
3546 
3547 	ifp = arg;
3548 #ifdef DIAGNOSTIC
3549 	if (pending > 1)
3550 		if_printf(ifp, "%d SIOCADDMULTI coalesced\n", pending);
3551 #endif
3552 	CURVNET_SET(ifp->if_vnet);
3553 	(void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
3554 	CURVNET_RESTORE();
3555 }
3556 
3557 /*
3558  * Delete a multicast group membership by network-layer group address.
3559  *
3560  * Returns ENOENT if the entry could not be found. If ifp no longer
3561  * exists, results are undefined. This entry point should only be used
3562  * from subsystems which do appropriate locking to hold ifp for the
3563  * duration of the call.
3564  * Network-layer protocol domains must use if_delmulti_ifma().
3565  */
3566 int
if_delmulti(struct ifnet * ifp,struct sockaddr * sa)3567 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
3568 {
3569 	struct ifmultiaddr *ifma;
3570 	int lastref;
3571 
3572 	KASSERT(ifp, ("%s: NULL ifp", __func__));
3573 
3574 	IF_ADDR_WLOCK(ifp);
3575 	lastref = 0;
3576 	ifma = if_findmulti(ifp, sa);
3577 	if (ifma != NULL)
3578 		lastref = if_delmulti_locked(ifp, ifma, 0);
3579 	IF_ADDR_WUNLOCK(ifp);
3580 
3581 	if (ifma == NULL)
3582 		return (ENOENT);
3583 
3584 	if (lastref && ifp->if_ioctl != NULL) {
3585 		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3586 	}
3587 
3588 	return (0);
3589 }
3590 
3591 /*
3592  * Delete all multicast group membership for an interface.
3593  * Should be used to quickly flush all multicast filters.
3594  */
3595 void
if_delallmulti(struct ifnet * ifp)3596 if_delallmulti(struct ifnet *ifp)
3597 {
3598 	struct ifmultiaddr *ifma;
3599 	struct ifmultiaddr *next;
3600 
3601 	IF_ADDR_WLOCK(ifp);
3602 	CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
3603 		if_delmulti_locked(ifp, ifma, 0);
3604 	IF_ADDR_WUNLOCK(ifp);
3605 }
3606 
3607 void
if_delmulti_ifma(struct ifmultiaddr * ifma)3608 if_delmulti_ifma(struct ifmultiaddr *ifma)
3609 {
3610 	if_delmulti_ifma_flags(ifma, 0);
3611 }
3612 
3613 /*
3614  * Delete a multicast group membership by group membership pointer.
3615  * Network-layer protocol domains must use this routine.
3616  *
3617  * It is safe to call this routine if the ifp disappeared.
3618  */
3619 void
if_delmulti_ifma_flags(struct ifmultiaddr * ifma,int flags)3620 if_delmulti_ifma_flags(struct ifmultiaddr *ifma, int flags)
3621 {
3622 	struct ifnet *ifp;
3623 	int lastref;
3624 	MCDPRINTF("%s freeing ifma: %p\n", __func__, ifma);
3625 #ifdef INET
3626 	IN_MULTI_LIST_UNLOCK_ASSERT();
3627 #endif
3628 	ifp = ifma->ifma_ifp;
3629 #ifdef DIAGNOSTIC
3630 	if (ifp == NULL) {
3631 		printf("%s: ifma_ifp seems to be detached\n", __func__);
3632 	} else {
3633 		struct epoch_tracker et;
3634 		struct ifnet *oifp;
3635 
3636 		NET_EPOCH_ENTER(et);
3637 		CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link)
3638 			if (ifp == oifp)
3639 				break;
3640 		NET_EPOCH_EXIT(et);
3641 		if (ifp != oifp)
3642 			ifp = NULL;
3643 	}
3644 #endif
3645 	/*
3646 	 * If and only if the ifnet instance exists: Acquire the address lock.
3647 	 */
3648 	if (ifp != NULL)
3649 		IF_ADDR_WLOCK(ifp);
3650 
3651 	lastref = if_delmulti_locked(ifp, ifma, flags);
3652 
3653 	if (ifp != NULL) {
3654 		/*
3655 		 * If and only if the ifnet instance exists:
3656 		 *  Release the address lock.
3657 		 *  If the group was left: update the hardware hash filter.
3658 		 */
3659 		IF_ADDR_WUNLOCK(ifp);
3660 		if (lastref && ifp->if_ioctl != NULL) {
3661 			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3662 		}
3663 	}
3664 }
3665 
3666 /*
3667  * Perform deletion of network-layer and/or link-layer multicast address.
3668  *
3669  * Return 0 if the reference count was decremented.
3670  * Return 1 if the final reference was released, indicating that the
3671  * hardware hash filter should be reprogrammed.
3672  */
3673 static int
if_delmulti_locked(struct ifnet * ifp,struct ifmultiaddr * ifma,int detaching)3674 if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
3675 {
3676 	struct ifmultiaddr *ll_ifma;
3677 
3678 	if (ifp != NULL && ifma->ifma_ifp != NULL) {
3679 		KASSERT(ifma->ifma_ifp == ifp,
3680 		    ("%s: inconsistent ifp %p", __func__, ifp));
3681 		IF_ADDR_WLOCK_ASSERT(ifp);
3682 	}
3683 
3684 	ifp = ifma->ifma_ifp;
3685 	MCDPRINTF("%s freeing %p from %s \n", __func__, ifma, ifp ? ifp->if_xname : "");
3686 
3687 	/*
3688 	 * If the ifnet is detaching, null out references to ifnet,
3689 	 * so that upper protocol layers will notice, and not attempt
3690 	 * to obtain locks for an ifnet which no longer exists. The
3691 	 * routing socket announcement must happen before the ifnet
3692 	 * instance is detached from the system.
3693 	 */
3694 	if (detaching) {
3695 #ifdef DIAGNOSTIC
3696 		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
3697 #endif
3698 		/*
3699 		 * ifp may already be nulled out if we are being reentered
3700 		 * to delete the ll_ifma.
3701 		 */
3702 		if (ifp != NULL) {
3703 			rt_newmaddrmsg(RTM_DELMADDR, ifma);
3704 			ifma->ifma_ifp = NULL;
3705 		}
3706 	}
3707 
3708 	if (--ifma->ifma_refcount > 0)
3709 		return 0;
3710 
3711 	if (ifp != NULL && detaching == 0 && (ifma->ifma_flags & IFMA_F_ENQUEUED)) {
3712 		CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
3713 		ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
3714 	}
3715 	/*
3716 	 * If this ifma is a network-layer ifma, a link-layer ifma may
3717 	 * have been associated with it. Release it first if so.
3718 	 */
3719 	ll_ifma = ifma->ifma_llifma;
3720 	if (ll_ifma != NULL) {
3721 		KASSERT(ifma->ifma_lladdr != NULL,
3722 		    ("%s: llifma w/o lladdr", __func__));
3723 		if (detaching)
3724 			ll_ifma->ifma_ifp = NULL;	/* XXX */
3725 		if (--ll_ifma->ifma_refcount == 0) {
3726 			if (ifp != NULL) {
3727 				if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
3728 					CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr,
3729 						ifma_link);
3730 					ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
3731 				}
3732 			}
3733 			if_freemulti(ll_ifma);
3734 		}
3735 	}
3736 #ifdef INVARIANTS
3737 	if (ifp) {
3738 		struct ifmultiaddr *ifmatmp;
3739 
3740 		CK_STAILQ_FOREACH(ifmatmp, &ifp->if_multiaddrs, ifma_link)
3741 			MPASS(ifma != ifmatmp);
3742 	}
3743 #endif
3744 	if_freemulti(ifma);
3745 	/*
3746 	 * The last reference to this instance of struct ifmultiaddr
3747 	 * was released; the hardware should be notified of this change.
3748 	 */
3749 	return 1;
3750 }
3751 
3752 /*
3753  * Set the link layer address on an interface.
3754  *
3755  * At this time we only support certain types of interfaces,
3756  * and we don't allow the length of the address to change.
3757  *
3758  * Set noinline to be dtrace-friendly
3759  */
3760 __noinline int
if_setlladdr(struct ifnet * ifp,const u_char * lladdr,int len)3761 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
3762 {
3763 	struct sockaddr_dl *sdl;
3764 	struct ifaddr *ifa;
3765 	struct ifreq ifr;
3766 
3767 	ifa = ifp->if_addr;
3768 	if (ifa == NULL)
3769 		return (EINVAL);
3770 
3771 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
3772 	if (sdl == NULL)
3773 		return (EINVAL);
3774 
3775 	if (len != sdl->sdl_alen)	/* don't allow length to change */
3776 		return (EINVAL);
3777 
3778 	switch (ifp->if_type) {
3779 	case IFT_ETHER:
3780 	case IFT_XETHER:
3781 	case IFT_L2VLAN:
3782 	case IFT_BRIDGE:
3783 	case IFT_IEEE8023ADLAG:
3784 		bcopy(lladdr, LLADDR(sdl), len);
3785 		break;
3786 	default:
3787 		return (ENODEV);
3788 	}
3789 
3790 	/*
3791 	 * If the interface is already up, we need
3792 	 * to re-init it in order to reprogram its
3793 	 * address filter.
3794 	 */
3795 	if ((ifp->if_flags & IFF_UP) != 0) {
3796 		if (ifp->if_ioctl) {
3797 			ifp->if_flags &= ~IFF_UP;
3798 			ifr.ifr_flags = ifp->if_flags & 0xffff;
3799 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3800 			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3801 			ifp->if_flags |= IFF_UP;
3802 			ifr.ifr_flags = ifp->if_flags & 0xffff;
3803 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3804 			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3805 		}
3806 	}
3807 	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
3808 
3809 	return (0);
3810 }
3811 
3812 /*
3813  * Compat function for handling basic encapsulation requests.
3814  * Not converted stacks (FDDI, IB, ..) supports traditional
3815  * output model: ARP (and other similar L2 protocols) are handled
3816  * inside output routine, arpresolve/nd6_resolve() returns MAC
3817  * address instead of full prepend.
3818  *
3819  * This function creates calculated header==MAC for IPv4/IPv6 and
3820  * returns EAFNOSUPPORT (which is then handled in ARP code) for other
3821  * address families.
3822  */
3823 static int
if_requestencap_default(struct ifnet * ifp,struct if_encap_req * req)3824 if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
3825 {
3826 
3827 	if (req->rtype != IFENCAP_LL)
3828 		return (EOPNOTSUPP);
3829 
3830 	if (req->bufsize < req->lladdr_len)
3831 		return (ENOMEM);
3832 
3833 	switch (req->family) {
3834 	case AF_INET:
3835 	case AF_INET6:
3836 		break;
3837 	default:
3838 		return (EAFNOSUPPORT);
3839 	}
3840 
3841 	/* Copy lladdr to storage as is */
3842 	memmove(req->buf, req->lladdr, req->lladdr_len);
3843 	req->bufsize = req->lladdr_len;
3844 	req->lladdr_off = 0;
3845 
3846 	return (0);
3847 }
3848 
3849 /*
3850  * Tunnel interfaces can nest, also they may cause infinite recursion
3851  * calls when misconfigured. We'll prevent this by detecting loops.
3852  * High nesting level may cause stack exhaustion. We'll prevent this
3853  * by introducing upper limit.
3854  *
3855  * Return 0, if tunnel nesting count is equal or less than limit.
3856  */
3857 int
if_tunnel_check_nesting(struct ifnet * ifp,struct mbuf * m,uint32_t cookie,int limit)3858 if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie,
3859     int limit)
3860 {
3861 	struct m_tag *mtag;
3862 	int count;
3863 
3864 	count = 1;
3865 	mtag = NULL;
3866 	while ((mtag = m_tag_locate(m, cookie, 0, mtag)) != NULL) {
3867 		if (*(struct ifnet **)(mtag + 1) == ifp) {
3868 			log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp));
3869 			return (EIO);
3870 		}
3871 		count++;
3872 	}
3873 	if (count > limit) {
3874 		log(LOG_NOTICE,
3875 		    "%s: if_output recursively called too many times(%d)\n",
3876 		    if_name(ifp), count);
3877 		return (EIO);
3878 	}
3879 	mtag = m_tag_alloc(cookie, 0, sizeof(struct ifnet *), M_NOWAIT);
3880 	if (mtag == NULL)
3881 		return (ENOMEM);
3882 	*(struct ifnet **)(mtag + 1) = ifp;
3883 	m_tag_prepend(m, mtag);
3884 	return (0);
3885 }
3886 
3887 /*
3888  * Get the link layer address that was read from the hardware at attach.
3889  *
3890  * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type
3891  * their component interfaces as IFT_IEEE8023ADLAG.
3892  */
3893 int
if_gethwaddr(struct ifnet * ifp,struct ifreq * ifr)3894 if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr)
3895 {
3896 
3897 	if (ifp->if_hw_addr == NULL)
3898 		return (ENODEV);
3899 
3900 	switch (ifp->if_type) {
3901 	case IFT_ETHER:
3902 	case IFT_IEEE8023ADLAG:
3903 		bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen);
3904 		return (0);
3905 	default:
3906 		return (ENODEV);
3907 	}
3908 }
3909 
3910 /*
3911  * The name argument must be a pointer to storage which will last as
3912  * long as the interface does.  For physical devices, the result of
3913  * device_get_name(dev) is a good choice and for pseudo-devices a
3914  * static string works well.
3915  */
3916 void
if_initname(struct ifnet * ifp,const char * name,int unit)3917 if_initname(struct ifnet *ifp, const char *name, int unit)
3918 {
3919 	ifp->if_dname = name;
3920 	ifp->if_dunit = unit;
3921 	if (unit != IF_DUNIT_NONE)
3922 		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
3923 	else
3924 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
3925 }
3926 
3927 static int
if_vlog(struct ifnet * ifp,int pri,const char * fmt,va_list ap)3928 if_vlog(struct ifnet *ifp, int pri, const char *fmt, va_list ap)
3929 {
3930 	char if_fmt[256];
3931 
3932 	snprintf(if_fmt, sizeof(if_fmt), "%s: %s", ifp->if_xname, fmt);
3933 	vlog(pri, if_fmt, ap);
3934 	return (0);
3935 }
3936 
3937 
3938 int
if_printf(struct ifnet * ifp,const char * fmt,...)3939 if_printf(struct ifnet *ifp, const char *fmt, ...)
3940 {
3941 	va_list ap;
3942 
3943 	va_start(ap, fmt);
3944 	if_vlog(ifp, LOG_INFO, fmt, ap);
3945 	va_end(ap);
3946 	return (0);
3947 }
3948 
3949 int
if_log(struct ifnet * ifp,int pri,const char * fmt,...)3950 if_log(struct ifnet *ifp, int pri, const char *fmt, ...)
3951 {
3952 	va_list ap;
3953 
3954 	va_start(ap, fmt);
3955 	if_vlog(ifp, pri, fmt, ap);
3956 	va_end(ap);
3957 	return (0);
3958 }
3959 
3960 void
if_start(struct ifnet * ifp)3961 if_start(struct ifnet *ifp)
3962 {
3963 
3964 	(*(ifp)->if_start)(ifp);
3965 }
3966 
3967 /*
3968  * Backwards compatibility interface for drivers
3969  * that have not implemented it
3970  */
3971 static int
if_transmit(struct ifnet * ifp,struct mbuf * m)3972 if_transmit(struct ifnet *ifp, struct mbuf *m)
3973 {
3974 	int error;
3975 
3976 	IFQ_HANDOFF(ifp, m, error);
3977 	return (error);
3978 }
3979 
3980 static void
if_input_default(struct ifnet * ifp __unused,struct mbuf * m)3981 if_input_default(struct ifnet *ifp __unused, struct mbuf *m)
3982 {
3983 
3984 	m_freem(m);
3985 }
3986 
3987 int
if_handoff(struct ifqueue * ifq,struct mbuf * m,struct ifnet * ifp,int adjust)3988 if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
3989 {
3990 	int active = 0;
3991 
3992 	IF_LOCK(ifq);
3993 	if (_IF_QFULL(ifq)) {
3994 		IF_UNLOCK(ifq);
3995 		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
3996 		m_freem(m);
3997 		return (0);
3998 	}
3999 	if (ifp != NULL) {
4000 		if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust);
4001 		if (m->m_flags & (M_BCAST|M_MCAST))
4002 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
4003 		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
4004 	}
4005 	_IF_ENQUEUE(ifq, m);
4006 	IF_UNLOCK(ifq);
4007 	if (ifp != NULL && !active)
4008 		(*(ifp)->if_start)(ifp);
4009 	return (1);
4010 }
4011 
4012 void
if_register_com_alloc(u_char type,if_com_alloc_t * a,if_com_free_t * f)4013 if_register_com_alloc(u_char type,
4014     if_com_alloc_t *a, if_com_free_t *f)
4015 {
4016 
4017 	KASSERT(if_com_alloc[type] == NULL,
4018 	    ("if_register_com_alloc: %d already registered", type));
4019 	KASSERT(if_com_free[type] == NULL,
4020 	    ("if_register_com_alloc: %d free already registered", type));
4021 
4022 	if_com_alloc[type] = a;
4023 	if_com_free[type] = f;
4024 }
4025 
4026 void
if_deregister_com_alloc(u_char type)4027 if_deregister_com_alloc(u_char type)
4028 {
4029 
4030 	KASSERT(if_com_alloc[type] != NULL,
4031 	    ("if_deregister_com_alloc: %d not registered", type));
4032 	KASSERT(if_com_free[type] != NULL,
4033 	    ("if_deregister_com_alloc: %d free not registered", type));
4034 
4035 	/*
4036 	 * Ensure all pending EPOCH(9) callbacks have been executed. This
4037 	 * fixes issues about late invocation of if_destroy(), which leads
4038 	 * to memory leak from if_com_alloc[type] allocated if_l2com.
4039 	 */
4040 	NET_EPOCH_DRAIN_CALLBACKS();
4041 
4042 	if_com_alloc[type] = NULL;
4043 	if_com_free[type] = NULL;
4044 }
4045 
4046 /* API for driver access to network stack owned ifnet.*/
4047 uint64_t
if_setbaudrate(struct ifnet * ifp,uint64_t baudrate)4048 if_setbaudrate(struct ifnet *ifp, uint64_t baudrate)
4049 {
4050 	uint64_t oldbrate;
4051 
4052 	oldbrate = ifp->if_baudrate;
4053 	ifp->if_baudrate = baudrate;
4054 	return (oldbrate);
4055 }
4056 
4057 uint64_t
if_getbaudrate(if_t ifp)4058 if_getbaudrate(if_t ifp)
4059 {
4060 
4061 	return (((struct ifnet *)ifp)->if_baudrate);
4062 }
4063 
4064 int
if_setcapabilities(if_t ifp,int capabilities)4065 if_setcapabilities(if_t ifp, int capabilities)
4066 {
4067 	((struct ifnet *)ifp)->if_capabilities = capabilities;
4068 	return (0);
4069 }
4070 
4071 int
if_setcapabilitiesbit(if_t ifp,int setbit,int clearbit)4072 if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit)
4073 {
4074 	((struct ifnet *)ifp)->if_capabilities |= setbit;
4075 	((struct ifnet *)ifp)->if_capabilities &= ~clearbit;
4076 
4077 	return (0);
4078 }
4079 
4080 int
if_getcapabilities(if_t ifp)4081 if_getcapabilities(if_t ifp)
4082 {
4083 	return ((struct ifnet *)ifp)->if_capabilities;
4084 }
4085 
4086 int
if_setcapenable(if_t ifp,int capabilities)4087 if_setcapenable(if_t ifp, int capabilities)
4088 {
4089 	((struct ifnet *)ifp)->if_capenable = capabilities;
4090 	return (0);
4091 }
4092 
4093 int
if_setcapenablebit(if_t ifp,int setcap,int clearcap)4094 if_setcapenablebit(if_t ifp, int setcap, int clearcap)
4095 {
4096 	if(setcap)
4097 		((struct ifnet *)ifp)->if_capenable |= setcap;
4098 	if(clearcap)
4099 		((struct ifnet *)ifp)->if_capenable &= ~clearcap;
4100 
4101 	return (0);
4102 }
4103 
4104 const char *
if_getdname(if_t ifp)4105 if_getdname(if_t ifp)
4106 {
4107 	return ((struct ifnet *)ifp)->if_dname;
4108 }
4109 
4110 int
if_togglecapenable(if_t ifp,int togglecap)4111 if_togglecapenable(if_t ifp, int togglecap)
4112 {
4113 	((struct ifnet *)ifp)->if_capenable ^= togglecap;
4114 	return (0);
4115 }
4116 
4117 int
if_getcapenable(if_t ifp)4118 if_getcapenable(if_t ifp)
4119 {
4120 	return ((struct ifnet *)ifp)->if_capenable;
4121 }
4122 
4123 void
if_setdescr(if_t ifp,char * descrbuf)4124 if_setdescr(if_t ifp, char *descrbuf)
4125 {
4126 	sx_xlock(&ifdescr_sx);
4127 	char *odescrbuf = ifp->if_description;
4128 	ifp->if_description = descrbuf;
4129 	sx_xunlock(&ifdescr_sx);
4130 
4131 	if_freedescr(odescrbuf);
4132 }
4133 
4134 char *
if_allocdescr(size_t sz,int malloc_flag)4135 if_allocdescr(size_t sz, int malloc_flag)
4136 {
4137 	malloc_flag &= (M_WAITOK | M_NOWAIT);
4138 	return (malloc(sz, M_IFDESCR, M_ZERO | malloc_flag));
4139 }
4140 
4141 void
if_freedescr(char * descrbuf)4142 if_freedescr(char *descrbuf)
4143 {
4144 	free(descrbuf, M_IFDESCR);
4145 }
4146 
4147 /*
4148  * This is largely undesirable because it ties ifnet to a device, but does
4149  * provide flexiblity for an embedded product vendor. Should be used with
4150  * the understanding that it violates the interface boundaries, and should be
4151  * a last resort only.
4152  */
4153 int
if_setdev(if_t ifp,void * dev)4154 if_setdev(if_t ifp, void *dev)
4155 {
4156 	return (0);
4157 }
4158 
4159 int
if_setdrvflagbits(if_t ifp,int set_flags,int clear_flags)4160 if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags)
4161 {
4162 	((struct ifnet *)ifp)->if_drv_flags |= set_flags;
4163 	((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags;
4164 
4165 	return (0);
4166 }
4167 
4168 int
if_getdrvflags(if_t ifp)4169 if_getdrvflags(if_t ifp)
4170 {
4171 	return ((struct ifnet *)ifp)->if_drv_flags;
4172 }
4173 
4174 int
if_setdrvflags(if_t ifp,int flags)4175 if_setdrvflags(if_t ifp, int flags)
4176 {
4177 	((struct ifnet *)ifp)->if_drv_flags = flags;
4178 	return (0);
4179 }
4180 
4181 int
if_setflags(if_t ifp,int flags)4182 if_setflags(if_t ifp, int flags)
4183 {
4184 
4185 	ifp->if_flags = flags;
4186 	return (0);
4187 }
4188 
4189 int
if_setflagbits(if_t ifp,int set,int clear)4190 if_setflagbits(if_t ifp, int set, int clear)
4191 {
4192 	((struct ifnet *)ifp)->if_flags |= set;
4193 	((struct ifnet *)ifp)->if_flags &= ~clear;
4194 
4195 	return (0);
4196 }
4197 
4198 int
if_getflags(if_t ifp)4199 if_getflags(if_t ifp)
4200 {
4201 	return ((struct ifnet *)ifp)->if_flags;
4202 }
4203 
4204 int
if_clearhwassist(if_t ifp)4205 if_clearhwassist(if_t ifp)
4206 {
4207 	((struct ifnet *)ifp)->if_hwassist = 0;
4208 	return (0);
4209 }
4210 
4211 int
if_sethwassistbits(if_t ifp,int toset,int toclear)4212 if_sethwassistbits(if_t ifp, int toset, int toclear)
4213 {
4214 	((struct ifnet *)ifp)->if_hwassist |= toset;
4215 	((struct ifnet *)ifp)->if_hwassist &= ~toclear;
4216 
4217 	return (0);
4218 }
4219 
4220 int
if_sethwassist(if_t ifp,int hwassist_bit)4221 if_sethwassist(if_t ifp, int hwassist_bit)
4222 {
4223 	((struct ifnet *)ifp)->if_hwassist = hwassist_bit;
4224 	return (0);
4225 }
4226 
4227 int
if_gethwassist(if_t ifp)4228 if_gethwassist(if_t ifp)
4229 {
4230 	return ((struct ifnet *)ifp)->if_hwassist;
4231 }
4232 
4233 int
if_setmtu(if_t ifp,int mtu)4234 if_setmtu(if_t ifp, int mtu)
4235 {
4236 	((struct ifnet *)ifp)->if_mtu = mtu;
4237 	return (0);
4238 }
4239 
4240 int
if_getmtu(if_t ifp)4241 if_getmtu(if_t ifp)
4242 {
4243 	return ((struct ifnet *)ifp)->if_mtu;
4244 }
4245 
4246 int
if_getmtu_family(if_t ifp,int family)4247 if_getmtu_family(if_t ifp, int family)
4248 {
4249 	struct domain *dp;
4250 
4251 	for (dp = domains; dp; dp = dp->dom_next) {
4252 		if (dp->dom_family == family && dp->dom_ifmtu != NULL)
4253 			return (dp->dom_ifmtu((struct ifnet *)ifp));
4254 	}
4255 
4256 	return (((struct ifnet *)ifp)->if_mtu);
4257 }
4258 
4259 /*
4260  * Methods for drivers to access interface unicast and multicast
4261  * link level addresses.  Driver shall not know 'struct ifaddr' neither
4262  * 'struct ifmultiaddr'.
4263  */
4264 u_int
if_lladdr_count(if_t ifp)4265 if_lladdr_count(if_t ifp)
4266 {
4267 	struct epoch_tracker et;
4268 	struct ifaddr *ifa;
4269 	u_int count;
4270 
4271 	count = 0;
4272 	NET_EPOCH_ENTER(et);
4273 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
4274 		if (ifa->ifa_addr->sa_family == AF_LINK)
4275 			count++;
4276 	NET_EPOCH_EXIT(et);
4277 
4278 	return (count);
4279 }
4280 
4281 u_int
if_foreach_lladdr(if_t ifp,iflladdr_cb_t cb,void * cb_arg)4282 if_foreach_lladdr(if_t ifp, iflladdr_cb_t cb, void *cb_arg)
4283 {
4284 	struct epoch_tracker et;
4285 	struct ifaddr *ifa;
4286 	u_int count;
4287 
4288 	MPASS(cb);
4289 
4290 	count = 0;
4291 	NET_EPOCH_ENTER(et);
4292 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
4293 		if (ifa->ifa_addr->sa_family != AF_LINK)
4294 			continue;
4295 		count += (*cb)(cb_arg, (struct sockaddr_dl *)ifa->ifa_addr,
4296 		    count);
4297 	}
4298 	NET_EPOCH_EXIT(et);
4299 
4300 	return (count);
4301 }
4302 
4303 u_int
if_llmaddr_count(if_t ifp)4304 if_llmaddr_count(if_t ifp)
4305 {
4306 	struct epoch_tracker et;
4307 	struct ifmultiaddr *ifma;
4308 	int count;
4309 
4310 	count = 0;
4311 	NET_EPOCH_ENTER(et);
4312 	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
4313 		if (ifma->ifma_addr->sa_family == AF_LINK)
4314 			count++;
4315 	NET_EPOCH_EXIT(et);
4316 
4317 	return (count);
4318 }
4319 
4320 u_int
if_foreach_llmaddr(if_t ifp,iflladdr_cb_t cb,void * cb_arg)4321 if_foreach_llmaddr(if_t ifp, iflladdr_cb_t cb, void *cb_arg)
4322 {
4323 	struct epoch_tracker et;
4324 	struct ifmultiaddr *ifma;
4325 	u_int count;
4326 
4327 	MPASS(cb);
4328 
4329 	count = 0;
4330 	NET_EPOCH_ENTER(et);
4331 	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
4332 		if (ifma->ifma_addr->sa_family != AF_LINK)
4333 			continue;
4334 		count += (*cb)(cb_arg, (struct sockaddr_dl *)ifma->ifma_addr,
4335 		    count);
4336 	}
4337 	NET_EPOCH_EXIT(et);
4338 
4339 	return (count);
4340 }
4341 
4342 int
if_setsoftc(if_t ifp,void * softc)4343 if_setsoftc(if_t ifp, void *softc)
4344 {
4345 	((struct ifnet *)ifp)->if_softc = softc;
4346 	return (0);
4347 }
4348 
4349 void *
if_getsoftc(if_t ifp)4350 if_getsoftc(if_t ifp)
4351 {
4352 	return ((struct ifnet *)ifp)->if_softc;
4353 }
4354 
4355 void
if_setrcvif(struct mbuf * m,if_t ifp)4356 if_setrcvif(struct mbuf *m, if_t ifp)
4357 {
4358 
4359 	MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
4360 	m->m_pkthdr.rcvif = (struct ifnet *)ifp;
4361 }
4362 
4363 void
if_setvtag(struct mbuf * m,uint16_t tag)4364 if_setvtag(struct mbuf *m, uint16_t tag)
4365 {
4366 	m->m_pkthdr.ether_vtag = tag;
4367 }
4368 
4369 uint16_t
if_getvtag(struct mbuf * m)4370 if_getvtag(struct mbuf *m)
4371 {
4372 
4373 	return (m->m_pkthdr.ether_vtag);
4374 }
4375 
4376 int
if_sendq_empty(if_t ifp)4377 if_sendq_empty(if_t ifp)
4378 {
4379 	return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd);
4380 }
4381 
4382 struct ifaddr *
if_getifaddr(if_t ifp)4383 if_getifaddr(if_t ifp)
4384 {
4385 	return ((struct ifnet *)ifp)->if_addr;
4386 }
4387 
4388 int
if_setsendqready(if_t ifp)4389 if_setsendqready(if_t ifp)
4390 {
4391 	IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd);
4392 	return (0);
4393 }
4394 
4395 int
if_setsendqlen(if_t ifp,int tx_desc_count)4396 if_setsendqlen(if_t ifp, int tx_desc_count)
4397 {
4398 	IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count);
4399 	((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count;
4400 
4401 	return (0);
4402 }
4403 
4404 int
if_vlantrunkinuse(if_t ifp)4405 if_vlantrunkinuse(if_t ifp)
4406 {
4407 	return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0;
4408 }
4409 
4410 int
if_input(if_t ifp,struct mbuf * sendmp)4411 if_input(if_t ifp, struct mbuf* sendmp)
4412 {
4413 	(*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp);
4414 	return (0);
4415 
4416 }
4417 
4418 struct mbuf *
if_dequeue(if_t ifp)4419 if_dequeue(if_t ifp)
4420 {
4421 	struct mbuf *m;
4422 	IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m);
4423 
4424 	return (m);
4425 }
4426 
4427 int
if_sendq_prepend(if_t ifp,struct mbuf * m)4428 if_sendq_prepend(if_t ifp, struct mbuf *m)
4429 {
4430 	IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m);
4431 	return (0);
4432 }
4433 
4434 int
if_setifheaderlen(if_t ifp,int len)4435 if_setifheaderlen(if_t ifp, int len)
4436 {
4437 	((struct ifnet *)ifp)->if_hdrlen = len;
4438 	return (0);
4439 }
4440 
4441 caddr_t
if_getlladdr(if_t ifp)4442 if_getlladdr(if_t ifp)
4443 {
4444 	return (IF_LLADDR((struct ifnet *)ifp));
4445 }
4446 
4447 void *
if_gethandle(u_char type)4448 if_gethandle(u_char type)
4449 {
4450 	return (if_alloc(type));
4451 }
4452 
4453 void
if_bpfmtap(if_t ifh,struct mbuf * m)4454 if_bpfmtap(if_t ifh, struct mbuf *m)
4455 {
4456 	struct ifnet *ifp = (struct ifnet *)ifh;
4457 
4458 	BPF_MTAP(ifp, m);
4459 }
4460 
4461 void
if_etherbpfmtap(if_t ifh,struct mbuf * m)4462 if_etherbpfmtap(if_t ifh, struct mbuf *m)
4463 {
4464 	struct ifnet *ifp = (struct ifnet *)ifh;
4465 
4466 	ETHER_BPF_MTAP(ifp, m);
4467 }
4468 
4469 void
if_vlancap(if_t ifh)4470 if_vlancap(if_t ifh)
4471 {
4472 	struct ifnet *ifp = (struct ifnet *)ifh;
4473 	VLAN_CAPABILITIES(ifp);
4474 }
4475 
4476 int
if_sethwtsomax(if_t ifp,u_int if_hw_tsomax)4477 if_sethwtsomax(if_t ifp, u_int if_hw_tsomax)
4478 {
4479 
4480 	((struct ifnet *)ifp)->if_hw_tsomax = if_hw_tsomax;
4481         return (0);
4482 }
4483 
4484 int
if_sethwtsomaxsegcount(if_t ifp,u_int if_hw_tsomaxsegcount)4485 if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount)
4486 {
4487 
4488 	((struct ifnet *)ifp)->if_hw_tsomaxsegcount = if_hw_tsomaxsegcount;
4489         return (0);
4490 }
4491 
4492 int
if_sethwtsomaxsegsize(if_t ifp,u_int if_hw_tsomaxsegsize)4493 if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize)
4494 {
4495 
4496 	((struct ifnet *)ifp)->if_hw_tsomaxsegsize = if_hw_tsomaxsegsize;
4497         return (0);
4498 }
4499 
4500 u_int
if_gethwtsomax(if_t ifp)4501 if_gethwtsomax(if_t ifp)
4502 {
4503 
4504 	return (((struct ifnet *)ifp)->if_hw_tsomax);
4505 }
4506 
4507 u_int
if_gethwtsomaxsegcount(if_t ifp)4508 if_gethwtsomaxsegcount(if_t ifp)
4509 {
4510 
4511 	return (((struct ifnet *)ifp)->if_hw_tsomaxsegcount);
4512 }
4513 
4514 u_int
if_gethwtsomaxsegsize(if_t ifp)4515 if_gethwtsomaxsegsize(if_t ifp)
4516 {
4517 
4518 	return (((struct ifnet *)ifp)->if_hw_tsomaxsegsize);
4519 }
4520 
4521 void
if_setinitfn(if_t ifp,void (* init_fn)(void *))4522 if_setinitfn(if_t ifp, void (*init_fn)(void *))
4523 {
4524 	((struct ifnet *)ifp)->if_init = init_fn;
4525 }
4526 
4527 void
if_setioctlfn(if_t ifp,int (* ioctl_fn)(if_t,u_long,caddr_t))4528 if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t))
4529 {
4530 	((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn;
4531 }
4532 
4533 void
if_setstartfn(if_t ifp,void (* start_fn)(if_t))4534 if_setstartfn(if_t ifp, void (*start_fn)(if_t))
4535 {
4536 	((struct ifnet *)ifp)->if_start = (void *)start_fn;
4537 }
4538 
4539 void
if_settransmitfn(if_t ifp,if_transmit_fn_t start_fn)4540 if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn)
4541 {
4542 	((struct ifnet *)ifp)->if_transmit = start_fn;
4543 }
4544 
if_setqflushfn(if_t ifp,if_qflush_fn_t flush_fn)4545 void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn)
4546 {
4547 	((struct ifnet *)ifp)->if_qflush = flush_fn;
4548 
4549 }
4550 
4551 void
if_setgetcounterfn(if_t ifp,if_get_counter_t fn)4552 if_setgetcounterfn(if_t ifp, if_get_counter_t fn)
4553 {
4554 
4555 	ifp->if_get_counter = fn;
4556 }
4557 
4558 /* Revisit these - These are inline functions originally. */
4559 int
drbr_inuse_drv(if_t ifh,struct buf_ring * br)4560 drbr_inuse_drv(if_t ifh, struct buf_ring *br)
4561 {
4562 	return drbr_inuse(ifh, br);
4563 }
4564 
4565 struct mbuf*
drbr_dequeue_drv(if_t ifh,struct buf_ring * br)4566 drbr_dequeue_drv(if_t ifh, struct buf_ring *br)
4567 {
4568 	return drbr_dequeue(ifh, br);
4569 }
4570 
4571 int
drbr_needs_enqueue_drv(if_t ifh,struct buf_ring * br)4572 drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br)
4573 {
4574 	return drbr_needs_enqueue(ifh, br);
4575 }
4576 
4577 int
drbr_enqueue_drv(if_t ifh,struct buf_ring * br,struct mbuf * m)4578 drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m)
4579 {
4580 	return drbr_enqueue(ifh, br, m);
4581 
4582 }
4583