1 /*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
2 
3 /*-
4  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5  * Nottingham University 1987.
6  *
7  * This source may be freely distributed, however I would be interested
8  * in any changes that are made.
9  *
10  * This driver takes packets off the IP i/f and hands them up to a
11  * user process to have its wicked way with. This driver has it's
12  * roots in a similar driver written by Phil Cockcroft (formerly) at
13  * UCL. This driver is based much more on read/write/poll mode of
14  * operation though.
15  *
16  * $FreeBSD: stable/10/sys/net/if_tun.c 326692 2017-12-08 15:26:57Z hselasky $
17  */
18 
19 #include "opt_atalk.h"
20 #include "opt_inet.h"
21 #include "opt_inet6.h"
22 #include "opt_ipx.h"
23 
24 #include <sys/param.h>
25 #include <sys/priv.h>
26 #include <sys/proc.h>
27 #include <sys/systm.h>
28 #include <sys/jail.h>
29 #include <sys/mbuf.h>
30 #include <sys/module.h>
31 #include <sys/socket.h>
32 #include <sys/fcntl.h>
33 #include <sys/filio.h>
34 #include <sys/sockio.h>
35 #include <sys/ttycom.h>
36 #include <sys/poll.h>
37 #include <sys/selinfo.h>
38 #include <sys/signalvar.h>
39 #include <sys/filedesc.h>
40 #include <sys/kernel.h>
41 #include <sys/sysctl.h>
42 #include <sys/conf.h>
43 #include <sys/uio.h>
44 #include <sys/malloc.h>
45 #include <sys/random.h>
46 
47 #include <net/if.h>
48 #include <net/if_clone.h>
49 #include <net/if_types.h>
50 #include <net/netisr.h>
51 #include <net/route.h>
52 #include <net/vnet.h>
53 #ifdef INET
54 #include <netinet/in.h>
55 #endif
56 #include <net/bpf.h>
57 #include <net/if_tun.h>
58 
59 #include <sys/queue.h>
60 #include <sys/condvar.h>
61 
62 #include <security/mac/mac_framework.h>
63 
64 /*
65  * tun_list is protected by global tunmtx.  Other mutable fields are
66  * protected by tun->tun_mtx, or by their owning subsystem.  tun_dev is
67  * static for the duration of a tunnel interface.
68  */
69 struct tun_softc {
70 	TAILQ_ENTRY(tun_softc)	tun_list;
71 	struct cdev *tun_dev;
72 	u_short	tun_flags;		/* misc flags */
73 #define	TUN_OPEN	0x0001
74 #define	TUN_INITED	0x0002
75 #define	TUN_RCOLL	0x0004
76 #define	TUN_IASET	0x0008
77 #define	TUN_DSTADDR	0x0010
78 #define	TUN_LMODE	0x0020
79 #define	TUN_RWAIT	0x0040
80 #define	TUN_ASYNC	0x0080
81 #define	TUN_IFHEAD	0x0100
82 
83 #define TUN_READY       (TUN_OPEN | TUN_INITED)
84 
85 	/*
86 	 * XXXRW: tun_pid is used to exclusively lock /dev/tun.  Is this
87 	 * actually needed?  Can we just return EBUSY if already open?
88 	 * Problem is that this involved inherent races when a tun device
89 	 * is handed off from one process to another, as opposed to just
90 	 * being slightly stale informationally.
91 	 */
92 	pid_t	tun_pid;		/* owning pid */
93 	struct	ifnet *tun_ifp;		/* the interface */
94 	struct  sigio *tun_sigio;	/* information for async I/O */
95 	struct	selinfo	tun_rsel;	/* read select */
96 	struct mtx	tun_mtx;	/* protect mutable softc fields */
97 	struct cv	tun_cv;		/* protect against ref'd dev destroy */
98 };
99 #define TUN2IFP(sc)	((sc)->tun_ifp)
100 
101 #define TUNDEBUG	if (tundebug) if_printf
102 
103 /*
104  * All mutable global variables in if_tun are locked using tunmtx, with
105  * the exception of tundebug, which is used unlocked, and tunclones,
106  * which is static after setup.
107  */
108 static struct mtx tunmtx;
109 static const char tunname[] = "tun";
110 static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
111 static int tundebug = 0;
112 static int tundclone = 1;
113 static struct clonedevs *tunclones;
114 static TAILQ_HEAD(,tun_softc)	tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
115 SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
116 
117 SYSCTL_DECL(_net_link);
118 static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
119     "IP tunnel software network interface.");
120 SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tundclone, 0,
121     "Enable legacy devfs interface creation.");
122 
123 TUNABLE_INT("net.link.tun.devfs_cloning", &tundclone);
124 
125 static void	tunclone(void *arg, struct ucred *cred, char *name,
126 		    int namelen, struct cdev **dev);
127 static void	tuncreate(const char *name, struct cdev *dev);
128 static int	tunifioctl(struct ifnet *, u_long, caddr_t);
129 static void	tuninit(struct ifnet *);
130 static int	tunmodevent(module_t, int, void *);
131 static int	tunoutput(struct ifnet *, struct mbuf *,
132 		    const struct sockaddr *, struct route *ro);
133 static void	tunstart(struct ifnet *);
134 
135 static int	tun_clone_create(struct if_clone *, int, caddr_t);
136 static void	tun_clone_destroy(struct ifnet *);
137 static struct if_clone *tun_cloner;
138 
139 static d_open_t		tunopen;
140 static d_close_t	tunclose;
141 static d_read_t		tunread;
142 static d_write_t	tunwrite;
143 static d_ioctl_t	tunioctl;
144 static d_poll_t		tunpoll;
145 static d_kqfilter_t	tunkqfilter;
146 
147 static int		tunkqread(struct knote *, long);
148 static int		tunkqwrite(struct knote *, long);
149 static void		tunkqdetach(struct knote *);
150 
151 static struct filterops tun_read_filterops = {
152 	.f_isfd =	1,
153 	.f_attach =	NULL,
154 	.f_detach =	tunkqdetach,
155 	.f_event =	tunkqread,
156 };
157 
158 static struct filterops tun_write_filterops = {
159 	.f_isfd =	1,
160 	.f_attach =	NULL,
161 	.f_detach =	tunkqdetach,
162 	.f_event =	tunkqwrite,
163 };
164 
165 static struct cdevsw tun_cdevsw = {
166 	.d_version =	D_VERSION,
167 	.d_flags =	D_NEEDMINOR,
168 	.d_open =	tunopen,
169 	.d_close =	tunclose,
170 	.d_read =	tunread,
171 	.d_write =	tunwrite,
172 	.d_ioctl =	tunioctl,
173 	.d_poll =	tunpoll,
174 	.d_kqfilter =	tunkqfilter,
175 	.d_name =	tunname,
176 };
177 
178 static int
tun_clone_create(struct if_clone * ifc,int unit,caddr_t params)179 tun_clone_create(struct if_clone *ifc, int unit, caddr_t params)
180 {
181 	struct cdev *dev;
182 	int i;
183 
184 	/* find any existing device, or allocate new unit number */
185 	i = clone_create(&tunclones, &tun_cdevsw, &unit, &dev, 0);
186 	if (i) {
187 		/* No preexisting struct cdev *, create one */
188 		dev = make_dev(&tun_cdevsw, unit,
189 		    UID_UUCP, GID_DIALER, 0600, "%s%d", tunname, unit);
190 	}
191 	tuncreate(tunname, dev);
192 
193 	return (0);
194 }
195 
196 static void
tunclone(void * arg,struct ucred * cred,char * name,int namelen,struct cdev ** dev)197 tunclone(void *arg, struct ucred *cred, char *name, int namelen,
198     struct cdev **dev)
199 {
200 	char devname[SPECNAMELEN + 1];
201 	int u, i, append_unit;
202 
203 	if (*dev != NULL)
204 		return;
205 
206 	/*
207 	 * If tun cloning is enabled, only the superuser can create an
208 	 * interface.
209 	 */
210 	if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)
211 		return;
212 
213 	if (strcmp(name, tunname) == 0) {
214 		u = -1;
215 	} else if (dev_stdclone(name, NULL, tunname, &u) != 1)
216 		return;	/* Don't recognise the name */
217 	if (u != -1 && u > IF_MAXUNIT)
218 		return;	/* Unit number too high */
219 
220 	if (u == -1)
221 		append_unit = 1;
222 	else
223 		append_unit = 0;
224 
225 	CURVNET_SET(CRED_TO_VNET(cred));
226 	/* find any existing device, or allocate new unit number */
227 	i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0);
228 	if (i) {
229 		if (append_unit) {
230 			namelen = snprintf(devname, sizeof(devname), "%s%d",
231 			    name, u);
232 			name = devname;
233 		}
234 		/* No preexisting struct cdev *, create one */
235 		*dev = make_dev_credf(MAKEDEV_REF, &tun_cdevsw, u, cred,
236 		    UID_UUCP, GID_DIALER, 0600, "%s", name);
237 	}
238 
239 	if_clone_create(name, namelen, NULL);
240 	CURVNET_RESTORE();
241 }
242 
243 static void
tun_destroy(struct tun_softc * tp)244 tun_destroy(struct tun_softc *tp)
245 {
246 	struct cdev *dev;
247 
248 	mtx_lock(&tp->tun_mtx);
249 	if ((tp->tun_flags & TUN_OPEN) != 0)
250 		cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
251 	else
252 		mtx_unlock(&tp->tun_mtx);
253 
254 	CURVNET_SET(TUN2IFP(tp)->if_vnet);
255 	dev = tp->tun_dev;
256 	bpfdetach(TUN2IFP(tp));
257 	if_detach(TUN2IFP(tp));
258 	if_free(TUN2IFP(tp));
259 	destroy_dev(dev);
260 	seldrain(&tp->tun_rsel);
261 	knlist_clear(&tp->tun_rsel.si_note, 0);
262 	knlist_destroy(&tp->tun_rsel.si_note);
263 	mtx_destroy(&tp->tun_mtx);
264 	cv_destroy(&tp->tun_cv);
265 	free(tp, M_TUN);
266 	CURVNET_RESTORE();
267 }
268 
269 static void
tun_clone_destroy(struct ifnet * ifp)270 tun_clone_destroy(struct ifnet *ifp)
271 {
272 	struct tun_softc *tp = ifp->if_softc;
273 
274 	mtx_lock(&tunmtx);
275 	TAILQ_REMOVE(&tunhead, tp, tun_list);
276 	mtx_unlock(&tunmtx);
277 	tun_destroy(tp);
278 }
279 
280 static int
tunmodevent(module_t mod,int type,void * data)281 tunmodevent(module_t mod, int type, void *data)
282 {
283 	static eventhandler_tag tag;
284 	struct tun_softc *tp;
285 
286 	switch (type) {
287 	case MOD_LOAD:
288 		mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
289 		clone_setup(&tunclones);
290 		tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
291 		if (tag == NULL)
292 			return (ENOMEM);
293 		tun_cloner = if_clone_simple(tunname, tun_clone_create,
294 		    tun_clone_destroy, 0);
295 		break;
296 	case MOD_UNLOAD:
297 		if_clone_detach(tun_cloner);
298 		EVENTHANDLER_DEREGISTER(dev_clone, tag);
299 		drain_dev_clone_events();
300 
301 		mtx_lock(&tunmtx);
302 		while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
303 			TAILQ_REMOVE(&tunhead, tp, tun_list);
304 			mtx_unlock(&tunmtx);
305 			tun_destroy(tp);
306 			mtx_lock(&tunmtx);
307 		}
308 		mtx_unlock(&tunmtx);
309 		clone_cleanup(&tunclones);
310 		mtx_destroy(&tunmtx);
311 		break;
312 	default:
313 		return EOPNOTSUPP;
314 	}
315 	return 0;
316 }
317 
318 static moduledata_t tun_mod = {
319 	"if_tun",
320 	tunmodevent,
321 	0
322 };
323 
324 DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
325 MODULE_VERSION(if_tun, 1);
326 
327 static void
tunstart(struct ifnet * ifp)328 tunstart(struct ifnet *ifp)
329 {
330 	struct tun_softc *tp = ifp->if_softc;
331 	struct mbuf *m;
332 
333 	TUNDEBUG(ifp,"%s starting\n", ifp->if_xname);
334 	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
335 		IFQ_LOCK(&ifp->if_snd);
336 		IFQ_POLL_NOLOCK(&ifp->if_snd, m);
337 		if (m == NULL) {
338 			IFQ_UNLOCK(&ifp->if_snd);
339 			return;
340 		}
341 		IFQ_UNLOCK(&ifp->if_snd);
342 	}
343 
344 	mtx_lock(&tp->tun_mtx);
345 	if (tp->tun_flags & TUN_RWAIT) {
346 		tp->tun_flags &= ~TUN_RWAIT;
347 		wakeup(tp);
348 	}
349 	selwakeuppri(&tp->tun_rsel, PZERO + 1);
350 	KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
351 	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
352 		mtx_unlock(&tp->tun_mtx);
353 		pgsigio(&tp->tun_sigio, SIGIO, 0);
354 	} else
355 		mtx_unlock(&tp->tun_mtx);
356 }
357 
358 /* XXX: should return an error code so it can fail. */
359 static void
tuncreate(const char * name,struct cdev * dev)360 tuncreate(const char *name, struct cdev *dev)
361 {
362 	struct tun_softc *sc;
363 	struct ifnet *ifp;
364 
365 	sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
366 	mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
367 	cv_init(&sc->tun_cv, "tun_condvar");
368 	sc->tun_flags = TUN_INITED;
369 	sc->tun_dev = dev;
370 	mtx_lock(&tunmtx);
371 	TAILQ_INSERT_TAIL(&tunhead, sc, tun_list);
372 	mtx_unlock(&tunmtx);
373 
374 	ifp = sc->tun_ifp = if_alloc(IFT_PPP);
375 	if (ifp == NULL)
376 		panic("%s%d: failed to if_alloc() interface.\n",
377 		    name, dev2unit(dev));
378 	if_initname(ifp, name, dev2unit(dev));
379 	ifp->if_mtu = TUNMTU;
380 	ifp->if_ioctl = tunifioctl;
381 	ifp->if_output = tunoutput;
382 	ifp->if_start = tunstart;
383 	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
384 	ifp->if_softc = sc;
385 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
386 	ifp->if_snd.ifq_drv_maxlen = 0;
387 	IFQ_SET_READY(&ifp->if_snd);
388 	knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx);
389 	ifp->if_capabilities |= IFCAP_LINKSTATE;
390 	ifp->if_capenable |= IFCAP_LINKSTATE;
391 
392 	if_attach(ifp);
393 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
394 	dev->si_drv1 = sc;
395 	TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
396 	    ifp->if_xname, dev2unit(dev));
397 }
398 
399 static int
tunopen(struct cdev * dev,int flag,int mode,struct thread * td)400 tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
401 {
402 	struct ifnet	*ifp;
403 	struct tun_softc *tp;
404 
405 	/*
406 	 * XXXRW: Non-atomic test and set of dev->si_drv1 requires
407 	 * synchronization.
408 	 */
409 	tp = dev->si_drv1;
410 	if (!tp) {
411 		tuncreate(tunname, dev);
412 		tp = dev->si_drv1;
413 	}
414 
415 	/*
416 	 * XXXRW: This use of tun_pid is subject to error due to the
417 	 * fact that a reference to the tunnel can live beyond the
418 	 * death of the process that created it.  Can we replace this
419 	 * with a simple busy flag?
420 	 */
421 	mtx_lock(&tp->tun_mtx);
422 	if (tp->tun_pid != 0 && tp->tun_pid != td->td_proc->p_pid) {
423 		mtx_unlock(&tp->tun_mtx);
424 		return (EBUSY);
425 	}
426 	tp->tun_pid = td->td_proc->p_pid;
427 
428 	tp->tun_flags |= TUN_OPEN;
429 	ifp = TUN2IFP(tp);
430 	if_link_state_change(ifp, LINK_STATE_UP);
431 	TUNDEBUG(ifp, "open\n");
432 	mtx_unlock(&tp->tun_mtx);
433 
434 	return (0);
435 }
436 
437 /*
438  * tunclose - close the device - mark i/f down & delete
439  * routing info
440  */
441 static	int
tunclose(struct cdev * dev,int foo,int bar,struct thread * td)442 tunclose(struct cdev *dev, int foo, int bar, struct thread *td)
443 {
444 	struct tun_softc *tp;
445 	struct ifnet *ifp;
446 
447 	tp = dev->si_drv1;
448 	ifp = TUN2IFP(tp);
449 
450 	mtx_lock(&tp->tun_mtx);
451 	tp->tun_flags &= ~TUN_OPEN;
452 	tp->tun_pid = 0;
453 
454 	/*
455 	 * junk all pending output
456 	 */
457 	CURVNET_SET(ifp->if_vnet);
458 	IFQ_PURGE(&ifp->if_snd);
459 
460 	if (ifp->if_flags & IFF_UP) {
461 		mtx_unlock(&tp->tun_mtx);
462 		if_down(ifp);
463 		mtx_lock(&tp->tun_mtx);
464 	}
465 
466 	/* Delete all addresses and routes which reference this interface. */
467 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
468 		struct ifaddr *ifa;
469 
470 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
471 		mtx_unlock(&tp->tun_mtx);
472 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
473 			/* deal w/IPv4 PtP destination; unlocked read */
474 			if (ifa->ifa_addr->sa_family == AF_INET) {
475 				rtinit(ifa, (int)RTM_DELETE,
476 				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
477 			} else {
478 				rtinit(ifa, (int)RTM_DELETE, 0);
479 			}
480 		}
481 		if_purgeaddrs(ifp);
482 		mtx_lock(&tp->tun_mtx);
483 	}
484 	if_link_state_change(ifp, LINK_STATE_DOWN);
485 	CURVNET_RESTORE();
486 
487 	funsetown(&tp->tun_sigio);
488 	selwakeuppri(&tp->tun_rsel, PZERO + 1);
489 	KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
490 	TUNDEBUG (ifp, "closed\n");
491 
492 	cv_broadcast(&tp->tun_cv);
493 	mtx_unlock(&tp->tun_mtx);
494 	return (0);
495 }
496 
497 static void
tuninit(struct ifnet * ifp)498 tuninit(struct ifnet *ifp)
499 {
500 	struct tun_softc *tp = ifp->if_softc;
501 #ifdef INET
502 	struct ifaddr *ifa;
503 #endif
504 
505 	TUNDEBUG(ifp, "tuninit\n");
506 
507 	mtx_lock(&tp->tun_mtx);
508 	ifp->if_flags |= IFF_UP;
509 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
510 	getmicrotime(&ifp->if_lastchange);
511 
512 #ifdef INET
513 	if_addr_rlock(ifp);
514 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
515 		if (ifa->ifa_addr->sa_family == AF_INET) {
516 			struct sockaddr_in *si;
517 
518 			si = (struct sockaddr_in *)ifa->ifa_addr;
519 			if (si->sin_addr.s_addr)
520 				tp->tun_flags |= TUN_IASET;
521 
522 			si = (struct sockaddr_in *)ifa->ifa_dstaddr;
523 			if (si && si->sin_addr.s_addr)
524 				tp->tun_flags |= TUN_DSTADDR;
525 		}
526 	}
527 	if_addr_runlock(ifp);
528 #endif
529 	mtx_unlock(&tp->tun_mtx);
530 }
531 
532 /*
533  * Process an ioctl request.
534  */
535 static int
tunifioctl(struct ifnet * ifp,u_long cmd,caddr_t data)536 tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
537 {
538 	struct ifreq *ifr = (struct ifreq *)data;
539 	struct tun_softc *tp = ifp->if_softc;
540 	struct ifstat *ifs;
541 	int		error = 0;
542 
543 	switch(cmd) {
544 	case SIOCGIFSTATUS:
545 		ifs = (struct ifstat *)data;
546 		mtx_lock(&tp->tun_mtx);
547 		if (tp->tun_pid)
548 			sprintf(ifs->ascii + strlen(ifs->ascii),
549 			    "\tOpened by PID %d\n", tp->tun_pid);
550 		mtx_unlock(&tp->tun_mtx);
551 		break;
552 	case SIOCSIFADDR:
553 		tuninit(ifp);
554 		TUNDEBUG(ifp, "address set\n");
555 		break;
556 	case SIOCSIFMTU:
557 		ifp->if_mtu = ifr->ifr_mtu;
558 		TUNDEBUG(ifp, "mtu set\n");
559 		break;
560 	case SIOCSIFFLAGS:
561 	case SIOCADDMULTI:
562 	case SIOCDELMULTI:
563 		break;
564 	default:
565 		error = EINVAL;
566 	}
567 	return (error);
568 }
569 
570 /*
571  * tunoutput - queue packets from higher level ready to put out.
572  */
573 static int
tunoutput(struct ifnet * ifp,struct mbuf * m0,const struct sockaddr * dst,struct route * ro)574 tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
575     struct route *ro)
576 {
577 	struct tun_softc *tp = ifp->if_softc;
578 	u_short cached_tun_flags;
579 	int error;
580 	u_int32_t af;
581 
582 	TUNDEBUG (ifp, "tunoutput\n");
583 
584 #ifdef MAC
585 	error = mac_ifnet_check_transmit(ifp, m0);
586 	if (error) {
587 		m_freem(m0);
588 		return (error);
589 	}
590 #endif
591 
592 	/* Could be unlocked read? */
593 	mtx_lock(&tp->tun_mtx);
594 	cached_tun_flags = tp->tun_flags;
595 	mtx_unlock(&tp->tun_mtx);
596 	if ((cached_tun_flags & TUN_READY) != TUN_READY) {
597 		TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
598 		m_freem (m0);
599 		return (EHOSTDOWN);
600 	}
601 
602 	if ((ifp->if_flags & IFF_UP) != IFF_UP) {
603 		m_freem (m0);
604 		return (EHOSTDOWN);
605 	}
606 
607 	/* BPF writes need to be handled specially. */
608 	if (dst->sa_family == AF_UNSPEC)
609 		bcopy(dst->sa_data, &af, sizeof(af));
610 	else
611 		af = dst->sa_family;
612 
613 	if (bpf_peers_present(ifp->if_bpf))
614 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
615 
616 	/* prepend sockaddr? this may abort if the mbuf allocation fails */
617 	if (cached_tun_flags & TUN_LMODE) {
618 		/* allocate space for sockaddr */
619 		M_PREPEND(m0, dst->sa_len, M_NOWAIT);
620 
621 		/* if allocation failed drop packet */
622 		if (m0 == NULL) {
623 			ifp->if_iqdrops++;
624 			ifp->if_oerrors++;
625 			return (ENOBUFS);
626 		} else {
627 			bcopy(dst, m0->m_data, dst->sa_len);
628 		}
629 	}
630 
631 	if (cached_tun_flags & TUN_IFHEAD) {
632 		/* Prepend the address family */
633 		M_PREPEND(m0, 4, M_NOWAIT);
634 
635 		/* if allocation failed drop packet */
636 		if (m0 == NULL) {
637 			ifp->if_iqdrops++;
638 			ifp->if_oerrors++;
639 			return (ENOBUFS);
640 		} else
641 			*(u_int32_t *)m0->m_data = htonl(af);
642 	} else {
643 #ifdef INET
644 		if (af != AF_INET)
645 #endif
646 		{
647 			m_freem(m0);
648 			return (EAFNOSUPPORT);
649 		}
650 	}
651 
652 	error = (ifp->if_transmit)(ifp, m0);
653 	if (error)
654 		return (ENOBUFS);
655 	ifp->if_opackets++;
656 	return (0);
657 }
658 
659 /*
660  * the cdevsw interface is now pretty minimal.
661  */
662 static	int
tunioctl(struct cdev * dev,u_long cmd,caddr_t data,int flag,struct thread * td)663 tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
664     struct thread *td)
665 {
666 	int		error;
667 	struct tun_softc *tp = dev->si_drv1;
668 	struct tuninfo *tunp;
669 
670 	switch (cmd) {
671 	case TUNSIFINFO:
672 		tunp = (struct tuninfo *)data;
673 		if (tunp->mtu < IF_MINMTU)
674 			return (EINVAL);
675 		if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
676 			error = priv_check(td, PRIV_NET_SETIFMTU);
677 			if (error)
678 				return (error);
679 		}
680 		if (TUN2IFP(tp)->if_type != tunp->type)
681 			return (EPROTOTYPE);
682 		mtx_lock(&tp->tun_mtx);
683 		TUN2IFP(tp)->if_mtu = tunp->mtu;
684 		TUN2IFP(tp)->if_baudrate = tunp->baudrate;
685 		mtx_unlock(&tp->tun_mtx);
686 		break;
687 	case TUNGIFINFO:
688 		tunp = (struct tuninfo *)data;
689 		mtx_lock(&tp->tun_mtx);
690 		tunp->mtu = TUN2IFP(tp)->if_mtu;
691 		tunp->type = TUN2IFP(tp)->if_type;
692 		tunp->baudrate = TUN2IFP(tp)->if_baudrate;
693 		mtx_unlock(&tp->tun_mtx);
694 		break;
695 	case TUNSDEBUG:
696 		tundebug = *(int *)data;
697 		break;
698 	case TUNGDEBUG:
699 		*(int *)data = tundebug;
700 		break;
701 	case TUNSLMODE:
702 		mtx_lock(&tp->tun_mtx);
703 		if (*(int *)data) {
704 			tp->tun_flags |= TUN_LMODE;
705 			tp->tun_flags &= ~TUN_IFHEAD;
706 		} else
707 			tp->tun_flags &= ~TUN_LMODE;
708 		mtx_unlock(&tp->tun_mtx);
709 		break;
710 	case TUNSIFHEAD:
711 		mtx_lock(&tp->tun_mtx);
712 		if (*(int *)data) {
713 			tp->tun_flags |= TUN_IFHEAD;
714 			tp->tun_flags &= ~TUN_LMODE;
715 		} else
716 			tp->tun_flags &= ~TUN_IFHEAD;
717 		mtx_unlock(&tp->tun_mtx);
718 		break;
719 	case TUNGIFHEAD:
720 		mtx_lock(&tp->tun_mtx);
721 		*(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
722 		mtx_unlock(&tp->tun_mtx);
723 		break;
724 	case TUNSIFMODE:
725 		/* deny this if UP */
726 		if (TUN2IFP(tp)->if_flags & IFF_UP)
727 			return(EBUSY);
728 
729 		switch (*(int *)data & ~IFF_MULTICAST) {
730 		case IFF_POINTOPOINT:
731 		case IFF_BROADCAST:
732 			mtx_lock(&tp->tun_mtx);
733 			TUN2IFP(tp)->if_flags &=
734 			    ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
735 			TUN2IFP(tp)->if_flags |= *(int *)data;
736 			mtx_unlock(&tp->tun_mtx);
737 			break;
738 		default:
739 			return(EINVAL);
740 		}
741 		break;
742 	case TUNSIFPID:
743 		mtx_lock(&tp->tun_mtx);
744 		tp->tun_pid = curthread->td_proc->p_pid;
745 		mtx_unlock(&tp->tun_mtx);
746 		break;
747 	case FIONBIO:
748 		break;
749 	case FIOASYNC:
750 		mtx_lock(&tp->tun_mtx);
751 		if (*(int *)data)
752 			tp->tun_flags |= TUN_ASYNC;
753 		else
754 			tp->tun_flags &= ~TUN_ASYNC;
755 		mtx_unlock(&tp->tun_mtx);
756 		break;
757 	case FIONREAD:
758 		if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
759 			struct mbuf *mb;
760 			IFQ_LOCK(&TUN2IFP(tp)->if_snd);
761 			IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
762 			for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
763 				*(int *)data += mb->m_len;
764 			IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
765 		} else
766 			*(int *)data = 0;
767 		break;
768 	case FIOSETOWN:
769 		return (fsetown(*(int *)data, &tp->tun_sigio));
770 
771 	case FIOGETOWN:
772 		*(int *)data = fgetown(&tp->tun_sigio);
773 		return (0);
774 
775 	/* This is deprecated, FIOSETOWN should be used instead. */
776 	case TIOCSPGRP:
777 		return (fsetown(-(*(int *)data), &tp->tun_sigio));
778 
779 	/* This is deprecated, FIOGETOWN should be used instead. */
780 	case TIOCGPGRP:
781 		*(int *)data = -fgetown(&tp->tun_sigio);
782 		return (0);
783 
784 	default:
785 		return (ENOTTY);
786 	}
787 	return (0);
788 }
789 
790 /*
791  * The cdevsw read interface - reads a packet at a time, or at
792  * least as much of a packet as can be read.
793  */
794 static	int
tunread(struct cdev * dev,struct uio * uio,int flag)795 tunread(struct cdev *dev, struct uio *uio, int flag)
796 {
797 	struct tun_softc *tp = dev->si_drv1;
798 	struct ifnet	*ifp = TUN2IFP(tp);
799 	struct mbuf	*m;
800 	int		error=0, len;
801 
802 	TUNDEBUG (ifp, "read\n");
803 	mtx_lock(&tp->tun_mtx);
804 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
805 		mtx_unlock(&tp->tun_mtx);
806 		TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
807 		return (EHOSTDOWN);
808 	}
809 
810 	tp->tun_flags &= ~TUN_RWAIT;
811 
812 	do {
813 		IFQ_DEQUEUE(&ifp->if_snd, m);
814 		if (m == NULL) {
815 			if (flag & O_NONBLOCK) {
816 				mtx_unlock(&tp->tun_mtx);
817 				return (EWOULDBLOCK);
818 			}
819 			tp->tun_flags |= TUN_RWAIT;
820 			error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
821 			    "tunread", 0);
822 			if (error != 0) {
823 				mtx_unlock(&tp->tun_mtx);
824 				return (error);
825 			}
826 		}
827 	} while (m == NULL);
828 	mtx_unlock(&tp->tun_mtx);
829 
830 	while (m && uio->uio_resid > 0 && error == 0) {
831 		len = min(uio->uio_resid, m->m_len);
832 		if (len != 0)
833 			error = uiomove(mtod(m, void *), len, uio);
834 		m = m_free(m);
835 	}
836 
837 	if (m) {
838 		TUNDEBUG(ifp, "Dropping mbuf\n");
839 		m_freem(m);
840 	}
841 	return (error);
842 }
843 
844 /*
845  * the cdevsw write interface - an atomic write is a packet - or else!
846  */
847 static	int
tunwrite(struct cdev * dev,struct uio * uio,int flag)848 tunwrite(struct cdev *dev, struct uio *uio, int flag)
849 {
850 	struct tun_softc *tp = dev->si_drv1;
851 	struct ifnet	*ifp = TUN2IFP(tp);
852 	struct mbuf	*m;
853 	uint32_t	family;
854 	int 		isr;
855 
856 	TUNDEBUG(ifp, "tunwrite\n");
857 
858 	if ((ifp->if_flags & IFF_UP) != IFF_UP)
859 		/* ignore silently */
860 		return (0);
861 
862 	if (uio->uio_resid == 0)
863 		return (0);
864 
865 	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
866 		TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
867 		return (EIO);
868 	}
869 
870 	if ((m = m_uiotombuf(uio, M_NOWAIT, 0, 0, M_PKTHDR)) == NULL) {
871 		ifp->if_ierrors++;
872 		return (ENOBUFS);
873 	}
874 
875 	m->m_pkthdr.rcvif = ifp;
876 #ifdef MAC
877 	mac_ifnet_create_mbuf(ifp, m);
878 #endif
879 
880 	/* Could be unlocked read? */
881 	mtx_lock(&tp->tun_mtx);
882 	if (tp->tun_flags & TUN_IFHEAD) {
883 		mtx_unlock(&tp->tun_mtx);
884 		if (m->m_len < sizeof(family) &&
885 		    (m = m_pullup(m, sizeof(family))) == NULL)
886 			return (ENOBUFS);
887 		family = ntohl(*mtod(m, u_int32_t *));
888 		m_adj(m, sizeof(family));
889 	} else {
890 		mtx_unlock(&tp->tun_mtx);
891 		family = AF_INET;
892 	}
893 
894 	BPF_MTAP2(ifp, &family, sizeof(family), m);
895 
896 	switch (family) {
897 #ifdef INET
898 	case AF_INET:
899 		isr = NETISR_IP;
900 		break;
901 #endif
902 #ifdef INET6
903 	case AF_INET6:
904 		isr = NETISR_IPV6;
905 		break;
906 #endif
907 #ifdef IPX
908 	case AF_IPX:
909 		isr = NETISR_IPX;
910 		break;
911 #endif
912 #ifdef NETATALK
913 	case AF_APPLETALK:
914 		isr = NETISR_ATALK2;
915 		break;
916 #endif
917 	default:
918 		m_freem(m);
919 		return (EAFNOSUPPORT);
920 	}
921 	if (harvest.point_to_point)
922 		random_harvest(&(m->m_data), 12, 2, RANDOM_NET_TUN);
923 	ifp->if_ibytes += m->m_pkthdr.len;
924 	ifp->if_ipackets++;
925 	CURVNET_SET(ifp->if_vnet);
926 	M_SETFIB(m, ifp->if_fib);
927 	netisr_dispatch(isr, m);
928 	CURVNET_RESTORE();
929 	return (0);
930 }
931 
932 /*
933  * tunpoll - the poll interface, this is only useful on reads
934  * really. The write detect always returns true, write never blocks
935  * anyway, it either accepts the packet or drops it.
936  */
937 static	int
tunpoll(struct cdev * dev,int events,struct thread * td)938 tunpoll(struct cdev *dev, int events, struct thread *td)
939 {
940 	struct tun_softc *tp = dev->si_drv1;
941 	struct ifnet	*ifp = TUN2IFP(tp);
942 	int		revents = 0;
943 	struct mbuf	*m;
944 
945 	TUNDEBUG(ifp, "tunpoll\n");
946 
947 	if (events & (POLLIN | POLLRDNORM)) {
948 		IFQ_LOCK(&ifp->if_snd);
949 		IFQ_POLL_NOLOCK(&ifp->if_snd, m);
950 		if (m != NULL) {
951 			TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
952 			revents |= events & (POLLIN | POLLRDNORM);
953 		} else {
954 			TUNDEBUG(ifp, "tunpoll waiting\n");
955 			selrecord(td, &tp->tun_rsel);
956 		}
957 		IFQ_UNLOCK(&ifp->if_snd);
958 	}
959 	if (events & (POLLOUT | POLLWRNORM))
960 		revents |= events & (POLLOUT | POLLWRNORM);
961 
962 	return (revents);
963 }
964 
965 /*
966  * tunkqfilter - support for the kevent() system call.
967  */
968 static int
tunkqfilter(struct cdev * dev,struct knote * kn)969 tunkqfilter(struct cdev *dev, struct knote *kn)
970 {
971 	struct tun_softc	*tp = dev->si_drv1;
972 	struct ifnet	*ifp = TUN2IFP(tp);
973 
974 	switch(kn->kn_filter) {
975 	case EVFILT_READ:
976 		TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
977 		    ifp->if_xname, dev2unit(dev));
978 		kn->kn_fop = &tun_read_filterops;
979 		break;
980 
981 	case EVFILT_WRITE:
982 		TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
983 		    ifp->if_xname, dev2unit(dev));
984 		kn->kn_fop = &tun_write_filterops;
985 		break;
986 
987 	default:
988 		TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
989 		    ifp->if_xname, dev2unit(dev));
990 		return(EINVAL);
991 	}
992 
993 	kn->kn_hook = tp;
994 	knlist_add(&tp->tun_rsel.si_note, kn, 0);
995 
996 	return (0);
997 }
998 
999 /*
1000  * Return true of there is data in the interface queue.
1001  */
1002 static int
tunkqread(struct knote * kn,long hint)1003 tunkqread(struct knote *kn, long hint)
1004 {
1005 	int			ret;
1006 	struct tun_softc	*tp = kn->kn_hook;
1007 	struct cdev		*dev = tp->tun_dev;
1008 	struct ifnet	*ifp = TUN2IFP(tp);
1009 
1010 	if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
1011 		TUNDEBUG(ifp,
1012 		    "%s have data in the queue.  Len = %d, minor = %#x\n",
1013 		    ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
1014 		ret = 1;
1015 	} else {
1016 		TUNDEBUG(ifp,
1017 		    "%s waiting for data, minor = %#x\n", ifp->if_xname,
1018 		    dev2unit(dev));
1019 		ret = 0;
1020 	}
1021 
1022 	return (ret);
1023 }
1024 
1025 /*
1026  * Always can write, always return MTU in kn->data.
1027  */
1028 static int
tunkqwrite(struct knote * kn,long hint)1029 tunkqwrite(struct knote *kn, long hint)
1030 {
1031 	struct tun_softc	*tp = kn->kn_hook;
1032 	struct ifnet	*ifp = TUN2IFP(tp);
1033 
1034 	kn->kn_data = ifp->if_mtu;
1035 
1036 	return (1);
1037 }
1038 
1039 static void
tunkqdetach(struct knote * kn)1040 tunkqdetach(struct knote *kn)
1041 {
1042 	struct tun_softc	*tp = kn->kn_hook;
1043 
1044 	knlist_remove(&tp->tun_rsel.si_note, kn, 0);
1045 }
1046