xref: /freebsd-13-stable/sys/kern/uipc_domain.c (revision 64dfe0a94bfda5fb858d0e5642e3f82e8dc22dd7)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)uipc_domain.c	8.2 (Berkeley) 10/18/93
32  */
33 
34 #include <sys/cdefs.h>
35 #include <sys/param.h>
36 #include <sys/socket.h>
37 #include <sys/protosw.h>
38 #include <sys/domain.h>
39 #include <sys/eventhandler.h>
40 #include <sys/epoch.h>
41 #include <sys/mbuf.h>
42 #include <sys/kernel.h>
43 #include <sys/lock.h>
44 #include <sys/mutex.h>
45 #include <sys/rmlock.h>
46 #include <sys/socketvar.h>
47 #include <sys/systm.h>
48 
49 #include <net/vnet.h>
50 
51 /*
52  * System initialization
53  *
54  * Note: domain initialization takes place on a per domain basis
55  * as a result of traversing a SYSINIT linker set.  Most likely,
56  * each domain would want to call DOMAIN_SET(9) itself, which
57  * would cause the domain to be added just after domaininit()
58  * is called during startup.
59  *
60  * See DOMAIN_SET(9) for details on its use.
61  */
62 
63 static void domaininit(void *);
64 SYSINIT(domain, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, domaininit, NULL);
65 
66 static void domainfinalize(void *);
67 SYSINIT(domainfin, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, domainfinalize,
68     NULL);
69 
70 static struct callout pffast_callout;
71 static struct callout pfslow_callout;
72 
73 static void	pffasttimo(void *);
74 static void	pfslowtimo(void *);
75 
76 static struct rmlock pftimo_lock;
77 RM_SYSINIT(pftimo_lock, &pftimo_lock, "pftimo");
78 
79 static LIST_HEAD(, protosw) pffast_list =
80     LIST_HEAD_INITIALIZER(pffast_list);
81 static LIST_HEAD(, protosw) pfslow_list =
82     LIST_HEAD_INITIALIZER(pfslow_list);
83 
84 struct domain *domains;		/* registered protocol domains */
85 int domain_init_status = 0;
86 static struct mtx dom_mtx;		/* domain list lock */
87 MTX_SYSINIT(domain, &dom_mtx, "domain list", MTX_DEF);
88 
89 /*
90  * Dummy protocol specific user requests function pointer array.
91  * All functions return EOPNOTSUPP.
92  */
93 struct pr_usrreqs nousrreqs = {
94 	.pru_accept =		pru_accept_notsupp,
95 	.pru_attach =		pru_attach_notsupp,
96 	.pru_bind =		pru_bind_notsupp,
97 	.pru_connect =		pru_connect_notsupp,
98 	.pru_connect2 =		pru_connect2_notsupp,
99 	.pru_control =		pru_control_notsupp,
100 	.pru_disconnect	=	pru_disconnect_notsupp,
101 	.pru_listen =		pru_listen_notsupp,
102 	.pru_peeraddr =		pru_peeraddr_notsupp,
103 	.pru_rcvd =		pru_rcvd_notsupp,
104 	.pru_rcvoob =		pru_rcvoob_notsupp,
105 	.pru_send =		pru_send_notsupp,
106 	.pru_sense =		pru_sense_null,
107 	.pru_shutdown =		pru_shutdown_notsupp,
108 	.pru_sockaddr =		pru_sockaddr_notsupp,
109 	.pru_sosend =		pru_sosend_notsupp,
110 	.pru_soreceive =	pru_soreceive_notsupp,
111 	.pru_sopoll =		pru_sopoll_notsupp,
112 };
113 
114 static void
protosw_init(struct protosw * pr)115 protosw_init(struct protosw *pr)
116 {
117 	struct pr_usrreqs *pu;
118 
119 	pu = pr->pr_usrreqs;
120 	KASSERT(pu != NULL, ("protosw_init: %ssw[%d] has no usrreqs!",
121 	    pr->pr_domain->dom_name,
122 	    (int)(pr - pr->pr_domain->dom_protosw)));
123 
124 	/*
125 	 * Protocol switch methods fall into three categories: mandatory,
126 	 * mandatory but protosw_init() provides a default, and optional.
127 	 *
128 	 * For true protocols (i.e., pru_attach != NULL), KASSERT truly
129 	 * mandatory methods with no defaults, and initialize defaults for
130 	 * other mandatory methods if the protocol hasn't defined an
131 	 * implementation (NULL function pointer).
132 	 */
133 #if 0
134 	if (pu->pru_attach != NULL) {
135 		KASSERT(pu->pru_abort != NULL,
136 		    ("protosw_init: %ssw[%d] pru_abort NULL",
137 		    pr->pr_domain->dom_name,
138 		    (int)(pr - pr->pr_domain->dom_protosw)));
139 		KASSERT(pu->pru_send != NULL,
140 		    ("protosw_init: %ssw[%d] pru_send NULL",
141 		    pr->pr_domain->dom_name,
142 		    (int)(pr - pr->pr_domain->dom_protosw)));
143 	}
144 #endif
145 
146 #define DEFAULT(foo, bar)	if ((foo) == NULL)  (foo) = (bar)
147 	DEFAULT(pu->pru_accept, pru_accept_notsupp);
148 	DEFAULT(pu->pru_aio_queue, pru_aio_queue_notsupp);
149 	DEFAULT(pu->pru_bind, pru_bind_notsupp);
150 	DEFAULT(pu->pru_bindat, pru_bindat_notsupp);
151 	DEFAULT(pu->pru_connect, pru_connect_notsupp);
152 	DEFAULT(pu->pru_connect2, pru_connect2_notsupp);
153 	DEFAULT(pu->pru_connectat, pru_connectat_notsupp);
154 	DEFAULT(pu->pru_control, pru_control_notsupp);
155 	DEFAULT(pu->pru_disconnect, pru_disconnect_notsupp);
156 	DEFAULT(pu->pru_listen, pru_listen_notsupp);
157 	DEFAULT(pu->pru_peeraddr, pru_peeraddr_notsupp);
158 	DEFAULT(pu->pru_rcvd, pru_rcvd_notsupp);
159 	DEFAULT(pu->pru_rcvoob, pru_rcvoob_notsupp);
160 	DEFAULT(pu->pru_sense, pru_sense_null);
161 	DEFAULT(pu->pru_shutdown, pru_shutdown_notsupp);
162 	DEFAULT(pu->pru_sockaddr, pru_sockaddr_notsupp);
163 	DEFAULT(pu->pru_sosend, sosend_generic);
164 	DEFAULT(pu->pru_soreceive, soreceive_generic);
165 	DEFAULT(pu->pru_sopoll, sopoll_generic);
166 	DEFAULT(pu->pru_ready, pru_ready_notsupp);
167 #undef DEFAULT
168 	if (pr->pr_init)
169 		(*pr->pr_init)();
170 }
171 
172 /*
173  * Add a new protocol domain to the list of supported domains
174  * Note: you can't unload it again because a socket may be using it.
175  * XXX can't fail at this time.
176  */
177 void
domain_init(void * arg)178 domain_init(void *arg)
179 {
180 	struct domain *dp = arg;
181 	struct protosw *pr;
182 
183 	if (dp->dom_init)
184 		(*dp->dom_init)();
185 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
186 		protosw_init(pr);
187 
188 		/*
189 		 * Note that with VIMAGE enabled, domain_init() will be
190 		 * re-invoked for each new vnet that's created.  The below lists
191 		 * are intended to be system-wide, so avoid altering global
192 		 * state for non-default vnets.
193 		 */
194 		if (IS_DEFAULT_VNET(curvnet)) {
195 			rm_wlock(&pftimo_lock);
196 			if (pr->pr_fasttimo != NULL)
197 				LIST_INSERT_HEAD(&pffast_list, pr,
198 				    pr_fasttimos);
199 			if (pr->pr_slowtimo != NULL)
200 				LIST_INSERT_HEAD(&pfslow_list, pr,
201 				    pr_slowtimos);
202 			rm_wunlock(&pftimo_lock);
203 		}
204 	}
205 
206 	/*
207 	 * update global information about maximums
208 	 */
209 	max_hdr = max_linkhdr + max_protohdr;
210 	max_datalen = MHLEN - max_hdr;
211 	if (max_datalen < 1)
212 		panic("%s: max_datalen < 1", __func__);
213 }
214 
215 #ifdef VIMAGE
216 void
vnet_domain_init(void * arg)217 vnet_domain_init(void *arg)
218 {
219 
220 	/* Virtualized case is no different -- call init functions. */
221 	domain_init(arg);
222 }
223 
224 void
vnet_domain_uninit(void * arg)225 vnet_domain_uninit(void *arg)
226 {
227 	struct domain *dp = arg;
228 
229 	if (dp->dom_destroy)
230 		(*dp->dom_destroy)();
231 }
232 #endif
233 
234 /*
235  * Add a new protocol domain to the list of supported domains
236  * Note: you cant unload it again because a socket may be using it.
237  * XXX can't fail at this time.
238  */
239 void
domain_add(void * data)240 domain_add(void *data)
241 {
242 	struct domain *dp;
243 
244 	dp = (struct domain *)data;
245 	mtx_lock(&dom_mtx);
246 	dp->dom_next = domains;
247 	domains = dp;
248 
249 	KASSERT(domain_init_status >= 1,
250 	    ("attempt to domain_add(%s) before domaininit()",
251 	    dp->dom_name));
252 #ifndef INVARIANTS
253 	if (domain_init_status < 1)
254 		printf("WARNING: attempt to domain_add(%s) before "
255 		    "domaininit()\n", dp->dom_name);
256 #endif
257 #ifdef notyet
258 	KASSERT(domain_init_status < 2,
259 	    ("attempt to domain_add(%s) after domainfinalize()",
260 	    dp->dom_name));
261 #else
262 	if (domain_init_status >= 2)
263 		printf("WARNING: attempt to domain_add(%s) after "
264 		    "domainfinalize()\n", dp->dom_name);
265 #endif
266 	mtx_unlock(&dom_mtx);
267 }
268 
269 void
domain_remove(void * data)270 domain_remove(void *data)
271 {
272 	struct domain *dp = (struct domain *)data;
273 
274 	if (dp->dom_family != PF_NETLINK)
275 		return;
276 
277 	mtx_lock(&dom_mtx);
278 	if (domains == dp) {
279 		domains = dp->dom_next;
280 	} else {
281 		struct domain *curr;
282 		for (curr = domains; curr != NULL; curr = curr->dom_next) {
283 			if (curr->dom_next == dp) {
284 				curr->dom_next = dp->dom_next;
285 				break;
286 			}
287 		}
288 	}
289 	mtx_unlock(&dom_mtx);
290 }
291 
292 /* ARGSUSED*/
293 static void
domaininit(void * dummy)294 domaininit(void *dummy)
295 {
296 
297 	if (max_linkhdr < 16)		/* XXX */
298 		max_linkhdr = 16;
299 
300 	callout_init(&pffast_callout, 1);
301 	callout_init(&pfslow_callout, 1);
302 
303 	mtx_lock(&dom_mtx);
304 	KASSERT(domain_init_status == 0, ("domaininit called too late!"));
305 	domain_init_status = 1;
306 	mtx_unlock(&dom_mtx);
307 }
308 
309 /* ARGSUSED*/
310 static void
domainfinalize(void * dummy)311 domainfinalize(void *dummy)
312 {
313 
314 	mtx_lock(&dom_mtx);
315 	KASSERT(domain_init_status == 1, ("domainfinalize called too late!"));
316 	domain_init_status = 2;
317 	mtx_unlock(&dom_mtx);
318 
319 	callout_reset(&pffast_callout, 1, pffasttimo, NULL);
320 	callout_reset(&pfslow_callout, 1, pfslowtimo, NULL);
321 }
322 
323 struct domain *
pffinddomain(int family)324 pffinddomain(int family)
325 {
326 	struct domain *dp;
327 
328 	for (dp = domains; dp != NULL; dp = dp->dom_next)
329 		if (dp->dom_family == family)
330 			return (dp);
331 	return (NULL);
332 }
333 
334 struct protosw *
pffindtype(int family,int type)335 pffindtype(int family, int type)
336 {
337 	struct domain *dp;
338 	struct protosw *pr;
339 
340 	dp = pffinddomain(family);
341 	if (dp == NULL)
342 		return (NULL);
343 
344 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
345 		if (pr->pr_type && pr->pr_type == type)
346 			return (pr);
347 	return (NULL);
348 }
349 
350 struct protosw *
pffindproto(int family,int protocol,int type)351 pffindproto(int family, int protocol, int type)
352 {
353 	struct domain *dp;
354 	struct protosw *pr;
355 	struct protosw *maybe;
356 
357 	maybe = NULL;
358 	if (family == 0)
359 		return (NULL);
360 
361 	dp = pffinddomain(family);
362 	if (dp == NULL)
363 		return (NULL);
364 
365 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
366 		if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
367 			return (pr);
368 
369 		if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
370 		    pr->pr_protocol == 0 && maybe == NULL)
371 			maybe = pr;
372 	}
373 	return (maybe);
374 }
375 
376 /*
377  * The caller must make sure that the new protocol is fully set up and ready to
378  * accept requests before it is registered.
379  */
380 int
pf_proto_register(int family,struct protosw * npr)381 pf_proto_register(int family, struct protosw *npr)
382 {
383 	VNET_ITERATOR_DECL(vnet_iter);
384 	struct domain *dp;
385 	struct protosw *pr, *fpr;
386 
387 	/* Sanity checks. */
388 	if (family == 0)
389 		return (EPFNOSUPPORT);
390 	if (npr->pr_type == 0)
391 		return (EPROTOTYPE);
392 	if (npr->pr_protocol == 0)
393 		return (EPROTONOSUPPORT);
394 	if (npr->pr_usrreqs == NULL)
395 		return (ENXIO);
396 
397 	/* Try to find the specified domain based on the family. */
398 	dp = pffinddomain(family);
399 	if (dp == NULL)
400 		return (EPFNOSUPPORT);
401 
402 	/* Initialize backpointer to struct domain. */
403 	npr->pr_domain = dp;
404 	fpr = NULL;
405 
406 	/*
407 	 * Protect us against races when two protocol registrations for
408 	 * the same protocol happen at the same time.
409 	 */
410 	mtx_lock(&dom_mtx);
411 
412 	/* The new protocol must not yet exist. */
413 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
414 		if ((pr->pr_type == npr->pr_type) &&
415 		    (pr->pr_protocol == npr->pr_protocol)) {
416 			mtx_unlock(&dom_mtx);
417 			return (EEXIST);	/* XXX: Check only protocol? */
418 		}
419 		/* While here, remember the first free spacer. */
420 		if ((fpr == NULL) && (pr->pr_protocol == PROTO_SPACER))
421 			fpr = pr;
422 	}
423 
424 	/* If no free spacer is found we can't add the new protocol. */
425 	if (fpr == NULL) {
426 		mtx_unlock(&dom_mtx);
427 		return (ENOMEM);
428 	}
429 
430 	/* Copy the new struct protosw over the spacer. */
431 	bcopy(npr, fpr, sizeof(*fpr));
432 
433 	rm_wlock(&pftimo_lock);
434 	if (fpr->pr_fasttimo != NULL)
435 		LIST_INSERT_HEAD(&pffast_list, fpr, pr_fasttimos);
436 	if (fpr->pr_slowtimo != NULL)
437 		LIST_INSERT_HEAD(&pfslow_list, fpr, pr_slowtimos);
438 	rm_wunlock(&pftimo_lock);
439 
440 	/* Job is done, no more protection required. */
441 	mtx_unlock(&dom_mtx);
442 
443 	/* Initialize and activate the protocol. */
444 	VNET_LIST_RLOCK();
445 	VNET_FOREACH(vnet_iter) {
446 		CURVNET_SET_QUIET(vnet_iter);
447 		protosw_init(fpr);
448 		CURVNET_RESTORE();
449 	}
450 	VNET_LIST_RUNLOCK();
451 
452 	return (0);
453 }
454 
455 /*
456  * The caller must make sure the protocol and its functions correctly shut down
457  * all sockets and release all locks and memory references.
458  */
459 int
pf_proto_unregister(int family,int protocol,int type)460 pf_proto_unregister(int family, int protocol, int type)
461 {
462 	struct domain *dp;
463 	struct protosw *pr, *dpr;
464 
465 	/* Sanity checks. */
466 	if (family == 0)
467 		return (EPFNOSUPPORT);
468 	if (protocol == 0)
469 		return (EPROTONOSUPPORT);
470 	if (type == 0)
471 		return (EPROTOTYPE);
472 
473 	/* Try to find the specified domain based on the family type. */
474 	dp = pffinddomain(family);
475 	if (dp == NULL)
476 		return (EPFNOSUPPORT);
477 
478 	dpr = NULL;
479 
480 	/* Lock out everyone else while we are manipulating the protosw. */
481 	mtx_lock(&dom_mtx);
482 
483 	/* The protocol must exist and only once. */
484 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
485 		if ((pr->pr_type == type) && (pr->pr_protocol == protocol)) {
486 			if (dpr != NULL) {
487 				mtx_unlock(&dom_mtx);
488 				return (EMLINK);   /* Should not happen! */
489 			} else
490 				dpr = pr;
491 		}
492 	}
493 
494 	/* Protocol does not exist. */
495 	if (dpr == NULL) {
496 		mtx_unlock(&dom_mtx);
497 		return (EPROTONOSUPPORT);
498 	}
499 
500 	rm_wlock(&pftimo_lock);
501 	if (dpr->pr_fasttimo != NULL)
502 		LIST_REMOVE(dpr, pr_fasttimos);
503 	if (dpr->pr_slowtimo != NULL)
504 		LIST_REMOVE(dpr, pr_slowtimos);
505 	rm_wunlock(&pftimo_lock);
506 
507 	/* De-orbit the protocol and make the slot available again. */
508 	dpr->pr_type = 0;
509 	dpr->pr_domain = dp;
510 	dpr->pr_protocol = PROTO_SPACER;
511 	dpr->pr_flags = 0;
512 	dpr->pr_input = NULL;
513 	dpr->pr_output = NULL;
514 	dpr->pr_ctlinput = NULL;
515 	dpr->pr_ctloutput = NULL;
516 	dpr->pr_init = NULL;
517 	dpr->pr_fasttimo = NULL;
518 	dpr->pr_slowtimo = NULL;
519 	dpr->pr_drain = NULL;
520 	dpr->pr_usrreqs = &nousrreqs;
521 
522 	/* Job is done, not more protection required. */
523 	mtx_unlock(&dom_mtx);
524 
525 	return (0);
526 }
527 
528 void
pfctlinput(int cmd,struct sockaddr * sa)529 pfctlinput(int cmd, struct sockaddr *sa)
530 {
531 	struct domain *dp;
532 	struct protosw *pr;
533 
534 	NET_EPOCH_ASSERT();
535 
536 	for (dp = domains; dp; dp = dp->dom_next)
537 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
538 			if (pr->pr_ctlinput)
539 				(*pr->pr_ctlinput)(cmd, sa, (void *)0);
540 }
541 
542 static void
pfslowtimo(void * arg)543 pfslowtimo(void *arg)
544 {
545 	struct rm_priotracker tracker;
546 	struct epoch_tracker et;
547 	struct protosw *pr;
548 
549 	rm_rlock(&pftimo_lock, &tracker);
550 	NET_EPOCH_ENTER(et);
551 	LIST_FOREACH(pr, &pfslow_list, pr_slowtimos) {
552 		(*pr->pr_slowtimo)();
553 	}
554 	NET_EPOCH_EXIT(et);
555 	rm_runlock(&pftimo_lock, &tracker);
556 	callout_reset(&pfslow_callout, hz / PR_SLOWHZ, pfslowtimo, NULL);
557 }
558 
559 static void
pffasttimo(void * arg)560 pffasttimo(void *arg)
561 {
562 	struct rm_priotracker tracker;
563 	struct epoch_tracker et;
564 	struct protosw *pr;
565 
566 	rm_rlock(&pftimo_lock, &tracker);
567 	NET_EPOCH_ENTER(et);
568 	LIST_FOREACH(pr, &pffast_list, pr_fasttimos) {
569 		(*pr->pr_fasttimo)();
570 	}
571 	NET_EPOCH_EXIT(et);
572 	rm_runlock(&pftimo_lock, &tracker);
573 	callout_reset(&pffast_callout, hz / PR_FASTHZ, pffasttimo, NULL);
574 }
575