1 /* $OpenBSD: uipc_usrreq.c,v 1.215 2025/01/31 13:40:33 bluhm Exp $ */
2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */
3
4 /*
5 * Copyright (c) 1982, 1986, 1989, 1991, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/unpcb.h>
45 #include <sys/un.h>
46 #include <sys/namei.h>
47 #include <sys/vnode.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/mbuf.h>
51 #include <sys/task.h>
52 #include <sys/pledge.h>
53 #include <sys/pool.h>
54 #include <sys/rwlock.h>
55 #include <sys/mutex.h>
56 #include <sys/sysctl.h>
57 #include <sys/lock.h>
58 #include <sys/refcnt.h>
59
60 #include "kcov.h"
61 #if NKCOV > 0
62 #include <sys/kcov.h>
63 #endif
64
65 /*
66 * Locks used to protect global data and struct members:
67 * I immutable after creation
68 * D unp_df_lock
69 * G unp_gc_lock
70 * M unp_ino_mtx
71 * R unp_rights_mtx
72 * a atomic
73 * s socket lock
74 */
75
76 struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk");
77 struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk");
78
79 struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
80 struct mutex unp_ino_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
81
82 /*
83 * Stack of sets of files that were passed over a socket but were
84 * not received and need to be closed.
85 */
86 struct unp_deferral {
87 SLIST_ENTRY(unp_deferral) ud_link; /* [D] */
88 int ud_n; /* [I] */
89 /* followed by ud_n struct fdpass */
90 struct fdpass ud_fp[]; /* [I] */
91 };
92
93 void uipc_setaddr(const struct unpcb *, struct mbuf *);
94 void unp_discard(struct fdpass *, int);
95 void unp_remove_gcrefs(struct fdpass *, int);
96 void unp_restore_gcrefs(struct fdpass *, int);
97 void unp_scan(struct mbuf *, void (*)(struct fdpass *, int));
98 int unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *);
99 static inline void unp_ref(struct unpcb *);
100 static inline void unp_rele(struct unpcb *);
101 struct socket *unp_solock_peer(struct socket *);
102
103 struct pool unpcb_pool;
104 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL);
105
106 /*
107 * Unix communications domain.
108 *
109 * TODO:
110 * RDM
111 * rethink name space problems
112 * need a proper out-of-band
113 */
114 const struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
115
116 /* [G] list of all UNIX domain sockets, for unp_gc() */
117 LIST_HEAD(unp_head, unpcb) unp_head =
118 LIST_HEAD_INITIALIZER(unp_head);
119 /* [D] list of sets of files that were sent over sockets that are now closed */
120 SLIST_HEAD(,unp_deferral) unp_deferred =
121 SLIST_HEAD_INITIALIZER(unp_deferred);
122
123 ino_t unp_ino; /* [U] prototype for fake inode numbers */
124 int unp_rights; /* [R] file descriptors in flight */
125 int unp_defer; /* [G] number of deferred fp to close by the GC task */
126 int unp_gcing; /* [G] GC task currently running */
127
128 const struct pr_usrreqs uipc_usrreqs = {
129 .pru_attach = uipc_attach,
130 .pru_detach = uipc_detach,
131 .pru_bind = uipc_bind,
132 .pru_listen = uipc_listen,
133 .pru_connect = uipc_connect,
134 .pru_accept = uipc_accept,
135 .pru_disconnect = uipc_disconnect,
136 .pru_shutdown = uipc_shutdown,
137 .pru_rcvd = uipc_rcvd,
138 .pru_send = uipc_send,
139 .pru_abort = uipc_abort,
140 .pru_sense = uipc_sense,
141 .pru_sockaddr = uipc_sockaddr,
142 .pru_peeraddr = uipc_peeraddr,
143 .pru_connect2 = uipc_connect2,
144 };
145
146 const struct pr_usrreqs uipc_dgram_usrreqs = {
147 .pru_attach = uipc_attach,
148 .pru_detach = uipc_detach,
149 .pru_bind = uipc_bind,
150 .pru_listen = uipc_listen,
151 .pru_connect = uipc_connect,
152 .pru_disconnect = uipc_disconnect,
153 .pru_shutdown = uipc_dgram_shutdown,
154 .pru_send = uipc_dgram_send,
155 .pru_sense = uipc_sense,
156 .pru_sockaddr = uipc_sockaddr,
157 .pru_peeraddr = uipc_peeraddr,
158 .pru_connect2 = uipc_connect2,
159 };
160
161 void
unp_init(void)162 unp_init(void)
163 {
164 pool_init(&unpcb_pool, sizeof(struct unpcb), 0,
165 IPL_SOFTNET, 0, "unpcb", NULL);
166 }
167
168 static inline void
unp_ref(struct unpcb * unp)169 unp_ref(struct unpcb *unp)
170 {
171 refcnt_take(&unp->unp_refcnt);
172 }
173
174 static inline void
unp_rele(struct unpcb * unp)175 unp_rele(struct unpcb *unp)
176 {
177 refcnt_rele_wake(&unp->unp_refcnt);
178 }
179
180 struct socket *
unp_solock_peer(struct socket * so)181 unp_solock_peer(struct socket *so)
182 {
183 struct unpcb *unp, *unp2;
184 struct socket *so2;
185
186 unp = so->so_pcb;
187
188 again:
189 if ((unp2 = unp->unp_conn) == NULL)
190 return NULL;
191
192 so2 = unp2->unp_socket;
193
194 if (so < so2)
195 solock(so2);
196 else if (so > so2) {
197 unp_ref(unp2);
198 sounlock(so);
199 solock(so2);
200 solock(so);
201
202 /* Datagram socket could be reconnected due to re-lock. */
203 if (unp->unp_conn != unp2) {
204 sounlock(so2);
205 unp_rele(unp2);
206 goto again;
207 }
208
209 unp_rele(unp2);
210 }
211
212 return so2;
213 }
214
215 void
uipc_setaddr(const struct unpcb * unp,struct mbuf * nam)216 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam)
217 {
218 if (unp != NULL && unp->unp_addr != NULL) {
219 nam->m_len = unp->unp_addr->m_len;
220 memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t),
221 nam->m_len);
222 } else {
223 nam->m_len = sizeof(sun_noname);
224 memcpy(mtod(nam, struct sockaddr *), &sun_noname,
225 nam->m_len);
226 }
227 }
228
229 /*
230 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
231 * for stream sockets, although the total for sender and receiver is
232 * actually only PIPSIZ.
233 * Datagram sockets really use the sendspace as the maximum datagram size,
234 * and don't really want to reserve the sendspace. Their recvspace should
235 * be large enough for at least one max-size datagram plus address.
236 */
237 #define PIPSIZ 32768
238 u_int unpst_sendspace = PIPSIZ; /* [a] */
239 u_int unpst_recvspace = PIPSIZ; /* [a] */
240 u_int unpsq_sendspace = PIPSIZ; /* [a] */
241 u_int unpsq_recvspace = PIPSIZ; /* [a] */
242 u_int unpdg_sendspace = 8192; /* [a] really max datagram size */
243 u_int unpdg_recvspace = PIPSIZ; /* [a] */
244
245 const struct sysctl_bounded_args unpstctl_vars[] = {
246 { UNPCTL_RECVSPACE, &unpst_recvspace, 0, SB_MAX },
247 { UNPCTL_SENDSPACE, &unpst_sendspace, 0, SB_MAX },
248 };
249 const struct sysctl_bounded_args unpsqctl_vars[] = {
250 { UNPCTL_RECVSPACE, &unpsq_recvspace, 0, SB_MAX },
251 { UNPCTL_SENDSPACE, &unpsq_sendspace, 0, SB_MAX },
252 };
253 const struct sysctl_bounded_args unpdgctl_vars[] = {
254 { UNPCTL_RECVSPACE, &unpdg_recvspace, 0, SB_MAX },
255 { UNPCTL_SENDSPACE, &unpdg_sendspace, 0, SB_MAX },
256 };
257
258 int
uipc_attach(struct socket * so,int proto,int wait)259 uipc_attach(struct socket *so, int proto, int wait)
260 {
261 struct unpcb *unp;
262 int error;
263
264 if (so->so_pcb)
265 return EISCONN;
266 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
267 switch (so->so_type) {
268
269 case SOCK_STREAM:
270 error = soreserve(so,
271 atomic_load_int(&unpst_sendspace),
272 atomic_load_int(&unpst_recvspace));
273 break;
274
275 case SOCK_SEQPACKET:
276 error = soreserve(so,
277 atomic_load_int(&unpsq_sendspace),
278 atomic_load_int(&unpsq_recvspace));
279 break;
280
281 case SOCK_DGRAM:
282 error = soreserve(so,
283 atomic_load_int(&unpdg_sendspace),
284 atomic_load_int(&unpdg_recvspace));
285 break;
286
287 default:
288 panic("unp_attach");
289 }
290 if (error)
291 return (error);
292 }
293 unp = pool_get(&unpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) |
294 PR_ZERO);
295 if (unp == NULL)
296 return (ENOBUFS);
297 refcnt_init(&unp->unp_refcnt);
298 unp->unp_socket = so;
299 so->so_pcb = unp;
300 getnanotime(&unp->unp_ctime);
301
302 rw_enter_write(&unp_gc_lock);
303 LIST_INSERT_HEAD(&unp_head, unp, unp_link);
304 rw_exit_write(&unp_gc_lock);
305
306 return (0);
307 }
308
309 int
uipc_detach(struct socket * so)310 uipc_detach(struct socket *so)
311 {
312 struct unpcb *unp = sotounpcb(so);
313
314 if (unp == NULL)
315 return (EINVAL);
316
317 unp_detach(unp);
318
319 return (0);
320 }
321
322 int
uipc_bind(struct socket * so,struct mbuf * nam,struct proc * p)323 uipc_bind(struct socket *so, struct mbuf *nam, struct proc *p)
324 {
325 struct unpcb *unp = sotounpcb(so);
326 struct sockaddr_un *soun;
327 struct mbuf *nam2;
328 struct vnode *vp;
329 struct vattr vattr;
330 int error;
331 struct nameidata nd;
332 size_t pathlen;
333
334 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
335 return (EINVAL);
336 if (unp->unp_vnode != NULL)
337 return (EINVAL);
338 if ((error = unp_nam2sun(nam, &soun, &pathlen)))
339 return (error);
340
341 unp->unp_flags |= UNP_BINDING;
342
343 /*
344 * Enforce `i_lock' -> `solock' because fifo subsystem
345 * requires it. The socket can't be closed concurrently
346 * because the file descriptor reference is still held.
347 */
348
349 sounlock(unp->unp_socket);
350
351 nam2 = m_getclr(M_WAITOK, MT_SONAME);
352 nam2->m_len = sizeof(struct sockaddr_un);
353 memcpy(mtod(nam2, struct sockaddr_un *), soun,
354 offsetof(struct sockaddr_un, sun_path) + pathlen);
355 /* No need to NUL terminate: m_getclr() returns zero'd mbufs. */
356
357 soun = mtod(nam2, struct sockaddr_un *);
358
359 /* Fixup sun_len to keep it in sync with m_len. */
360 soun->sun_len = nam2->m_len;
361
362 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
363 soun->sun_path, p);
364 nd.ni_pledge = PLEDGE_UNIX;
365 nd.ni_unveil = UNVEIL_CREATE;
366
367 KERNEL_LOCK();
368 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
369 error = namei(&nd);
370 if (error != 0) {
371 m_freem(nam2);
372 solock(unp->unp_socket);
373 goto out;
374 }
375 vp = nd.ni_vp;
376 if (vp != NULL) {
377 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
378 if (nd.ni_dvp == vp)
379 vrele(nd.ni_dvp);
380 else
381 vput(nd.ni_dvp);
382 vrele(vp);
383 m_freem(nam2);
384 error = EADDRINUSE;
385 solock(unp->unp_socket);
386 goto out;
387 }
388 vattr_null(&vattr);
389 vattr.va_type = VSOCK;
390 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
391 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
392 vput(nd.ni_dvp);
393 if (error) {
394 m_freem(nam2);
395 solock(unp->unp_socket);
396 goto out;
397 }
398 solock(unp->unp_socket);
399 unp->unp_addr = nam2;
400 vp = nd.ni_vp;
401 vp->v_socket = unp->unp_socket;
402 unp->unp_vnode = vp;
403 unp->unp_connid.uid = p->p_ucred->cr_uid;
404 unp->unp_connid.gid = p->p_ucred->cr_gid;
405 unp->unp_connid.pid = p->p_p->ps_pid;
406 unp->unp_flags |= UNP_FEIDSBIND;
407 VOP_UNLOCK(vp);
408 out:
409 KERNEL_UNLOCK();
410 unp->unp_flags &= ~UNP_BINDING;
411
412 return (error);
413 }
414
415 int
uipc_listen(struct socket * so)416 uipc_listen(struct socket *so)
417 {
418 struct unpcb *unp = sotounpcb(so);
419
420 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
421 return (EINVAL);
422 if (unp->unp_vnode == NULL)
423 return (EINVAL);
424 return (0);
425 }
426
427 int
uipc_connect(struct socket * so,struct mbuf * nam)428 uipc_connect(struct socket *so, struct mbuf *nam)
429 {
430 return unp_connect(so, nam, curproc);
431 }
432
433 int
uipc_accept(struct socket * so,struct mbuf * nam)434 uipc_accept(struct socket *so, struct mbuf *nam)
435 {
436 struct socket *so2;
437 struct unpcb *unp = sotounpcb(so);
438
439 /*
440 * Pass back name of connected socket, if it was bound and
441 * we are still connected (our peer may have closed already!).
442 */
443 so2 = unp_solock_peer(so);
444 uipc_setaddr(unp->unp_conn, nam);
445
446 if (so2 != NULL && so2 != so)
447 sounlock(so2);
448 return (0);
449 }
450
451 int
uipc_disconnect(struct socket * so)452 uipc_disconnect(struct socket *so)
453 {
454 struct unpcb *unp = sotounpcb(so);
455
456 unp_disconnect(unp);
457 return (0);
458 }
459
460 int
uipc_shutdown(struct socket * so)461 uipc_shutdown(struct socket *so)
462 {
463 struct unpcb *unp = sotounpcb(so);
464 struct socket *so2;
465
466 socantsendmore(so);
467
468 if (unp->unp_conn != NULL) {
469 so2 = unp->unp_conn->unp_socket;
470 socantrcvmore(so2);
471 }
472
473 return (0);
474 }
475
476 int
uipc_dgram_shutdown(struct socket * so)477 uipc_dgram_shutdown(struct socket *so)
478 {
479 socantsendmore(so);
480 return (0);
481 }
482
483 void
uipc_rcvd(struct socket * so)484 uipc_rcvd(struct socket *so)
485 {
486 struct unpcb *unp = sotounpcb(so);
487 struct socket *so2;
488
489 if (unp->unp_conn == NULL)
490 return;
491 so2 = unp->unp_conn->unp_socket;
492
493 /*
494 * Adjust backpressure on sender
495 * and wakeup any waiting to write.
496 */
497 mtx_enter(&so->so_rcv.sb_mtx);
498 mtx_enter(&so2->so_snd.sb_mtx);
499 so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt;
500 so2->so_snd.sb_cc = so->so_rcv.sb_cc;
501 mtx_leave(&so2->so_snd.sb_mtx);
502 mtx_leave(&so->so_rcv.sb_mtx);
503 sowwakeup(so2);
504 }
505
506 int
uipc_send(struct socket * so,struct mbuf * m,struct mbuf * nam,struct mbuf * control)507 uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
508 struct mbuf *control)
509 {
510 struct unpcb *unp = sotounpcb(so);
511 struct socket *so2;
512 int error = 0, dowakeup = 0;
513
514 if (control) {
515 sounlock(so);
516 error = unp_internalize(control, curproc);
517 solock(so);
518 if (error)
519 goto out;
520 }
521
522 /*
523 * We hold both solock() and `sb_mtx' mutex while modifying
524 * SS_CANTSENDMORE flag. solock() is enough to check it.
525 */
526 if (so->so_snd.sb_state & SS_CANTSENDMORE) {
527 error = EPIPE;
528 goto dispose;
529 }
530 if (unp->unp_conn == NULL) {
531 error = ENOTCONN;
532 goto dispose;
533 }
534
535 so2 = unp->unp_conn->unp_socket;
536
537 /*
538 * Send to paired receive port, and then raise
539 * send buffer counts to maintain backpressure.
540 * Wake up readers.
541 */
542 /*
543 * sbappend*() should be serialized together
544 * with so_snd modification.
545 */
546 mtx_enter(&so2->so_rcv.sb_mtx);
547 mtx_enter(&so->so_snd.sb_mtx);
548 if (control) {
549 if (sbappendcontrol(so2, &so2->so_rcv, m, control)) {
550 control = NULL;
551 } else {
552 mtx_leave(&so->so_snd.sb_mtx);
553 mtx_leave(&so2->so_rcv.sb_mtx);
554 error = ENOBUFS;
555 goto dispose;
556 }
557 } else if (so->so_type == SOCK_SEQPACKET)
558 sbappendrecord(so2, &so2->so_rcv, m);
559 else
560 sbappend(so2, &so2->so_rcv, m);
561 so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt;
562 so->so_snd.sb_cc = so2->so_rcv.sb_cc;
563 if (so2->so_rcv.sb_cc > 0)
564 dowakeup = 1;
565 mtx_leave(&so->so_snd.sb_mtx);
566 mtx_leave(&so2->so_rcv.sb_mtx);
567
568 if (dowakeup)
569 sorwakeup(so2);
570
571 m = NULL;
572
573 dispose:
574 /* we need to undo unp_internalize in case of errors */
575 if (control && error)
576 unp_dispose(control);
577
578 out:
579 m_freem(control);
580 m_freem(m);
581
582 return (error);
583 }
584
585 int
uipc_dgram_send(struct socket * so,struct mbuf * m,struct mbuf * nam,struct mbuf * control)586 uipc_dgram_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
587 struct mbuf *control)
588 {
589 struct unpcb *unp = sotounpcb(so);
590 struct socket *so2;
591 const struct sockaddr *from;
592 int error = 0, dowakeup = 0;
593
594 if (control) {
595 sounlock(so);
596 error = unp_internalize(control, curproc);
597 solock(so);
598 if (error)
599 goto out;
600 }
601
602 if (nam) {
603 if (unp->unp_conn) {
604 error = EISCONN;
605 goto dispose;
606 }
607 error = unp_connect(so, nam, curproc);
608 if (error)
609 goto dispose;
610 }
611
612 if (unp->unp_conn == NULL) {
613 if (nam != NULL)
614 error = ECONNREFUSED;
615 else
616 error = ENOTCONN;
617 goto dispose;
618 }
619
620 so2 = unp->unp_conn->unp_socket;
621
622 if (unp->unp_addr)
623 from = mtod(unp->unp_addr, struct sockaddr *);
624 else
625 from = &sun_noname;
626
627 mtx_enter(&so2->so_rcv.sb_mtx);
628 if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) {
629 dowakeup = 1;
630 m = NULL;
631 control = NULL;
632 } else
633 error = ENOBUFS;
634 mtx_leave(&so2->so_rcv.sb_mtx);
635
636 if (dowakeup)
637 sorwakeup(so2);
638 if (nam)
639 unp_disconnect(unp);
640
641 dispose:
642 /* we need to undo unp_internalize in case of errors */
643 if (control && error)
644 unp_dispose(control);
645
646 out:
647 m_freem(control);
648 m_freem(m);
649
650 return (error);
651 }
652
653 void
uipc_abort(struct socket * so)654 uipc_abort(struct socket *so)
655 {
656 struct unpcb *unp = sotounpcb(so);
657
658 unp_detach(unp);
659 sofree(so, 1);
660 }
661
662 int
uipc_sense(struct socket * so,struct stat * sb)663 uipc_sense(struct socket *so, struct stat *sb)
664 {
665 struct unpcb *unp = sotounpcb(so);
666
667 sb->st_blksize = so->so_snd.sb_hiwat;
668 sb->st_dev = NODEV;
669 mtx_enter(&unp_ino_mtx);
670 if (unp->unp_ino == 0)
671 unp->unp_ino = unp_ino++;
672 mtx_leave(&unp_ino_mtx);
673 sb->st_atim.tv_sec =
674 sb->st_mtim.tv_sec =
675 sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec;
676 sb->st_atim.tv_nsec =
677 sb->st_mtim.tv_nsec =
678 sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec;
679 sb->st_ino = unp->unp_ino;
680
681 return (0);
682 }
683
684 int
uipc_sockaddr(struct socket * so,struct mbuf * nam)685 uipc_sockaddr(struct socket *so, struct mbuf *nam)
686 {
687 struct unpcb *unp = sotounpcb(so);
688
689 uipc_setaddr(unp, nam);
690 return (0);
691 }
692
693 int
uipc_peeraddr(struct socket * so,struct mbuf * nam)694 uipc_peeraddr(struct socket *so, struct mbuf *nam)
695 {
696 struct unpcb *unp = sotounpcb(so);
697 struct socket *so2;
698
699 so2 = unp_solock_peer(so);
700 uipc_setaddr(unp->unp_conn, nam);
701 if (so2 != NULL && so2 != so)
702 sounlock(so2);
703 return (0);
704 }
705
706 int
uipc_connect2(struct socket * so,struct socket * so2)707 uipc_connect2(struct socket *so, struct socket *so2)
708 {
709 struct unpcb *unp = sotounpcb(so), *unp2;
710 int error;
711
712 if ((error = unp_connect2(so, so2)))
713 return (error);
714
715 unp->unp_connid.uid = curproc->p_ucred->cr_uid;
716 unp->unp_connid.gid = curproc->p_ucred->cr_gid;
717 unp->unp_connid.pid = curproc->p_p->ps_pid;
718 unp->unp_flags |= UNP_FEIDS;
719 unp2 = sotounpcb(so2);
720 unp2->unp_connid.uid = curproc->p_ucred->cr_uid;
721 unp2->unp_connid.gid = curproc->p_ucred->cr_gid;
722 unp2->unp_connid.pid = curproc->p_p->ps_pid;
723 unp2->unp_flags |= UNP_FEIDS;
724
725 return (0);
726 }
727
728 int
uipc_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)729 uipc_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
730 size_t newlen)
731 {
732 int *valp = &unp_defer;
733
734 /* All sysctl names at this level are terminal. */
735 switch (name[0]) {
736 case SOCK_STREAM:
737 if (namelen != 2)
738 return (ENOTDIR);
739 return sysctl_bounded_arr(unpstctl_vars, nitems(unpstctl_vars),
740 name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
741 case SOCK_SEQPACKET:
742 if (namelen != 2)
743 return (ENOTDIR);
744 return sysctl_bounded_arr(unpsqctl_vars, nitems(unpsqctl_vars),
745 name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
746 case SOCK_DGRAM:
747 if (namelen != 2)
748 return (ENOTDIR);
749 return sysctl_bounded_arr(unpdgctl_vars, nitems(unpdgctl_vars),
750 name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
751 case NET_UNIX_INFLIGHT:
752 valp = &unp_rights;
753 /* FALLTHROUGH */
754 case NET_UNIX_DEFERRED:
755 if (namelen != 1)
756 return (ENOTDIR);
757 return sysctl_rdint(oldp, oldlenp, newp, *valp);
758 default:
759 return (ENOPROTOOPT);
760 }
761 }
762
763 void
unp_detach(struct unpcb * unp)764 unp_detach(struct unpcb *unp)
765 {
766 struct socket *so = unp->unp_socket;
767 struct vnode *vp = unp->unp_vnode;
768 struct unpcb *unp2;
769
770 unp->unp_vnode = NULL;
771
772 rw_enter_write(&unp_gc_lock);
773 LIST_REMOVE(unp, unp_link);
774 rw_exit_write(&unp_gc_lock);
775
776 if (vp != NULL) {
777 /* Enforce `i_lock' -> solock() lock order. */
778 sounlock(so);
779 VOP_LOCK(vp, LK_EXCLUSIVE);
780 vp->v_socket = NULL;
781
782 KERNEL_LOCK();
783 vput(vp);
784 KERNEL_UNLOCK();
785 solock(so);
786 }
787
788 if (unp->unp_conn != NULL) {
789 /*
790 * Datagram socket could be connected to itself.
791 * Such socket will be disconnected here.
792 */
793 unp_disconnect(unp);
794 }
795
796 while ((unp2 = SLIST_FIRST(&unp->unp_refs)) != NULL) {
797 struct socket *so2 = unp2->unp_socket;
798
799 if (so < so2)
800 solock(so2);
801 else {
802 unp_ref(unp2);
803 sounlock(so);
804 solock(so2);
805 solock(so);
806
807 if (unp2->unp_conn != unp) {
808 /* `unp2' was disconnected due to re-lock. */
809 sounlock(so2);
810 unp_rele(unp2);
811 continue;
812 }
813
814 unp_rele(unp2);
815 }
816
817 unp2->unp_conn = NULL;
818 SLIST_REMOVE(&unp->unp_refs, unp2, unpcb, unp_nextref);
819 so2->so_error = ECONNRESET;
820 so2->so_state &= ~SS_ISCONNECTED;
821
822 sounlock(so2);
823 }
824
825 sounlock(so);
826 refcnt_finalize(&unp->unp_refcnt, "unpfinal");
827 solock(so);
828
829 soisdisconnected(so);
830 so->so_pcb = NULL;
831 m_freem(unp->unp_addr);
832 pool_put(&unpcb_pool, unp);
833 if (unp_rights)
834 task_add(systqmp, &unp_gc_task);
835 }
836
837 int
unp_connect(struct socket * so,struct mbuf * nam,struct proc * p)838 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
839 {
840 struct sockaddr_un *soun;
841 struct vnode *vp;
842 struct socket *so2, *so3;
843 struct unpcb *unp, *unp2, *unp3;
844 struct nameidata nd;
845 int error;
846
847 unp = sotounpcb(so);
848 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
849 return (EISCONN);
850 if ((error = unp_nam2sun(nam, &soun, NULL)))
851 return (error);
852
853 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
854 nd.ni_pledge = PLEDGE_UNIX;
855 nd.ni_unveil = UNVEIL_WRITE;
856
857 unp->unp_flags |= UNP_CONNECTING;
858
859 /*
860 * Enforce `i_lock' -> `solock' because fifo subsystem
861 * requires it. The socket can't be closed concurrently
862 * because the file descriptor reference is still held.
863 */
864
865 sounlock(so);
866
867 KERNEL_LOCK();
868 error = namei(&nd);
869 if (error != 0)
870 goto unlock;
871 vp = nd.ni_vp;
872 if (vp->v_type != VSOCK) {
873 error = ENOTSOCK;
874 goto put;
875 }
876 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
877 goto put;
878 so2 = vp->v_socket;
879 if (so2 == NULL) {
880 error = ECONNREFUSED;
881 goto put;
882 }
883 if (so->so_type != so2->so_type) {
884 error = EPROTOTYPE;
885 goto put;
886 }
887
888 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
889 solock(so2);
890
891 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
892 (so3 = sonewconn(so2, 0, M_WAIT)) == NULL) {
893 sounlock(so2);
894 error = ECONNREFUSED;
895 goto put;
896 }
897
898 /*
899 * Since `so2' is protected by vnode(9) lock, `so3'
900 * can't be PRU_ABORT'ed here.
901 */
902 sounlock(so2);
903 sounlock(so3);
904 solock_pair(so, so3);
905
906 unp2 = sotounpcb(so2);
907 unp3 = sotounpcb(so3);
908
909 /*
910 * `unp_addr', `unp_connid' and 'UNP_FEIDSBIND' flag
911 * are immutable since we set them in uipc_bind().
912 */
913 if (unp2->unp_addr)
914 unp3->unp_addr =
915 m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT);
916 unp3->unp_connid.uid = p->p_ucred->cr_uid;
917 unp3->unp_connid.gid = p->p_ucred->cr_gid;
918 unp3->unp_connid.pid = p->p_p->ps_pid;
919 unp3->unp_flags |= UNP_FEIDS;
920
921 if (unp2->unp_flags & UNP_FEIDSBIND) {
922 unp->unp_connid = unp2->unp_connid;
923 unp->unp_flags |= UNP_FEIDS;
924 }
925
926 so2 = so3;
927 } else
928 solock_pair(so, so2);
929
930 error = unp_connect2(so, so2);
931
932 /*
933 * `so2' can't be PRU_ABORT'ed concurrently
934 */
935 sounlock_pair(so, so2);
936 put:
937 vput(vp);
938 unlock:
939 KERNEL_UNLOCK();
940 solock(so);
941 unp->unp_flags &= ~UNP_CONNECTING;
942
943 /*
944 * The peer socket could be closed by concurrent thread
945 * when `so' and `vp' are unlocked.
946 */
947 if (error == 0 && unp->unp_conn == NULL)
948 error = ECONNREFUSED;
949
950 return (error);
951 }
952
953 int
unp_connect2(struct socket * so,struct socket * so2)954 unp_connect2(struct socket *so, struct socket *so2)
955 {
956 struct unpcb *unp = sotounpcb(so);
957 struct unpcb *unp2;
958
959 soassertlocked(so);
960 soassertlocked(so2);
961
962 if (so2->so_type != so->so_type)
963 return (EPROTOTYPE);
964 unp2 = sotounpcb(so2);
965 unp->unp_conn = unp2;
966 switch (so->so_type) {
967
968 case SOCK_DGRAM:
969 SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref);
970 soisconnected(so);
971 break;
972
973 case SOCK_STREAM:
974 case SOCK_SEQPACKET:
975 unp2->unp_conn = unp;
976 soisconnected(so);
977 soisconnected(so2);
978 break;
979
980 default:
981 panic("unp_connect2");
982 }
983 return (0);
984 }
985
986 void
unp_disconnect(struct unpcb * unp)987 unp_disconnect(struct unpcb *unp)
988 {
989 struct socket *so2;
990 struct unpcb *unp2;
991
992 if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL)
993 return;
994
995 unp2 = unp->unp_conn;
996 unp->unp_conn = NULL;
997
998 switch (unp->unp_socket->so_type) {
999
1000 case SOCK_DGRAM:
1001 SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref);
1002 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1003 break;
1004
1005 case SOCK_STREAM:
1006 case SOCK_SEQPACKET:
1007 unp->unp_socket->so_snd.sb_mbcnt = 0;
1008 unp->unp_socket->so_snd.sb_cc = 0;
1009 soisdisconnected(unp->unp_socket);
1010 unp2->unp_conn = NULL;
1011 unp2->unp_socket->so_snd.sb_mbcnt = 0;
1012 unp2->unp_socket->so_snd.sb_cc = 0;
1013 soisdisconnected(unp2->unp_socket);
1014 break;
1015 }
1016
1017 if (so2 != unp->unp_socket)
1018 sounlock(so2);
1019 }
1020
1021 static struct unpcb *
fptounp(struct file * fp)1022 fptounp(struct file *fp)
1023 {
1024 struct socket *so;
1025
1026 if (fp->f_type != DTYPE_SOCKET)
1027 return (NULL);
1028 if ((so = fp->f_data) == NULL)
1029 return (NULL);
1030 if (so->so_proto->pr_domain != &unixdomain)
1031 return (NULL);
1032 return (sotounpcb(so));
1033 }
1034
1035 int
unp_externalize(struct mbuf * rights,socklen_t controllen,int flags)1036 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags)
1037 {
1038 struct proc *p = curproc; /* XXX */
1039 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1040 struct filedesc *fdp = p->p_fd;
1041 int i, *fds = NULL;
1042 struct fdpass *rp;
1043 struct file *fp;
1044 int nfds, error = 0;
1045
1046 /*
1047 * This code only works because SCM_RIGHTS is the only supported
1048 * control message type on unix sockets. Enforce this here.
1049 */
1050 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET)
1051 return EINVAL;
1052
1053 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
1054 sizeof(struct fdpass);
1055 if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr)))
1056 controllen = 0;
1057 else
1058 controllen -= CMSG_ALIGN(sizeof(struct cmsghdr));
1059 if (nfds > controllen / sizeof(int)) {
1060 error = EMSGSIZE;
1061 goto out;
1062 }
1063
1064 /* Make sure the recipient should be able to see the descriptors.. */
1065 rp = (struct fdpass *)CMSG_DATA(cm);
1066
1067 /* fdp->fd_rdir requires KERNEL_LOCK() */
1068 KERNEL_LOCK();
1069
1070 for (i = 0; i < nfds; i++) {
1071 fp = rp->fp;
1072 rp++;
1073 error = pledge_recvfd(p, fp);
1074 if (error)
1075 break;
1076
1077 /*
1078 * No to block devices. If passing a directory,
1079 * make sure that it is underneath the root.
1080 */
1081 if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) {
1082 struct vnode *vp = (struct vnode *)fp->f_data;
1083
1084 if (vp->v_type == VBLK ||
1085 (vp->v_type == VDIR &&
1086 !vn_isunder(vp, fdp->fd_rdir, p))) {
1087 error = EPERM;
1088 break;
1089 }
1090 }
1091 }
1092
1093 KERNEL_UNLOCK();
1094
1095 if (error)
1096 goto out;
1097
1098 fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK);
1099
1100 fdplock(fdp);
1101 restart:
1102 /*
1103 * First loop -- allocate file descriptor table slots for the
1104 * new descriptors.
1105 */
1106 rp = ((struct fdpass *)CMSG_DATA(cm));
1107 for (i = 0; i < nfds; i++) {
1108 if ((error = fdalloc(p, 0, &fds[i])) != 0) {
1109 /*
1110 * Back out what we've done so far.
1111 */
1112 for (--i; i >= 0; i--)
1113 fdremove(fdp, fds[i]);
1114
1115 if (error == ENOSPC) {
1116 fdexpand(p);
1117 goto restart;
1118 }
1119
1120 fdpunlock(fdp);
1121
1122 /*
1123 * This is the error that has historically
1124 * been returned, and some callers may
1125 * expect it.
1126 */
1127
1128 error = EMSGSIZE;
1129 goto out;
1130 }
1131
1132 /*
1133 * Make the slot reference the descriptor so that
1134 * fdalloc() works properly.. We finalize it all
1135 * in the loop below.
1136 */
1137 mtx_enter(&fdp->fd_fplock);
1138 KASSERT(fdp->fd_ofiles[fds[i]] == NULL);
1139 fdp->fd_ofiles[fds[i]] = rp->fp;
1140 mtx_leave(&fdp->fd_fplock);
1141
1142 fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED);
1143 if (flags & MSG_CMSG_CLOEXEC)
1144 fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE;
1145
1146 rp++;
1147 }
1148
1149 /*
1150 * Keep `fdp' locked to prevent concurrent close() of just
1151 * inserted descriptors. Such descriptors could have the only
1152 * `f_count' reference which is now shared between control
1153 * message and `fdp'.
1154 */
1155
1156 /*
1157 * Now that adding them has succeeded, update all of the
1158 * descriptor passing state.
1159 */
1160 rp = (struct fdpass *)CMSG_DATA(cm);
1161
1162 for (i = 0; i < nfds; i++) {
1163 struct unpcb *unp;
1164
1165 fp = rp->fp;
1166 rp++;
1167 if ((unp = fptounp(fp)) != NULL) {
1168 rw_enter_write(&unp_gc_lock);
1169 unp->unp_msgcount--;
1170 rw_exit_write(&unp_gc_lock);
1171 }
1172 }
1173 fdpunlock(fdp);
1174
1175 mtx_enter(&unp_rights_mtx);
1176 unp_rights -= nfds;
1177 mtx_leave(&unp_rights_mtx);
1178
1179 /*
1180 * Copy temporary array to message and adjust length, in case of
1181 * transition from large struct file pointers to ints.
1182 */
1183 memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int));
1184 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
1185 rights->m_len = CMSG_LEN(nfds * sizeof(int));
1186 out:
1187 if (fds != NULL)
1188 free(fds, M_TEMP, nfds * sizeof(int));
1189
1190 if (error) {
1191 if (nfds > 0) {
1192 /*
1193 * No lock required. We are the only `cm' holder.
1194 */
1195 rp = ((struct fdpass *)CMSG_DATA(cm));
1196 unp_discard(rp, nfds);
1197 }
1198 }
1199
1200 return (error);
1201 }
1202
1203 int
unp_internalize(struct mbuf * control,struct proc * p)1204 unp_internalize(struct mbuf *control, struct proc *p)
1205 {
1206 struct filedesc *fdp = p->p_fd;
1207 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1208 struct fdpass *rp;
1209 struct file *fp;
1210 struct unpcb *unp;
1211 int i, error;
1212 int nfds, *ip, fd, neededspace;
1213
1214 /*
1215 * Check for two potential msg_controllen values because
1216 * IETF stuck their nose in a place it does not belong.
1217 */
1218 if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0))
1219 return (EINVAL);
1220 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1221 !(cm->cmsg_len == control->m_len ||
1222 control->m_len == CMSG_ALIGN(cm->cmsg_len)))
1223 return (EINVAL);
1224 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
1225
1226 mtx_enter(&unp_rights_mtx);
1227 if (unp_rights + nfds > maxfiles / 10) {
1228 mtx_leave(&unp_rights_mtx);
1229 return (EMFILE);
1230 }
1231 unp_rights += nfds;
1232 mtx_leave(&unp_rights_mtx);
1233
1234 /* Make sure we have room for the struct file pointers */
1235 morespace:
1236 neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) -
1237 control->m_len;
1238 if (neededspace > m_trailingspace(control)) {
1239 char *tmp;
1240 /* if we already have a cluster, the message is just too big */
1241 if (control->m_flags & M_EXT) {
1242 error = E2BIG;
1243 goto nospace;
1244 }
1245
1246 /* copy cmsg data temporarily out of the mbuf */
1247 tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
1248 memcpy(tmp, mtod(control, caddr_t), control->m_len);
1249
1250 /* allocate a cluster and try again */
1251 MCLGET(control, M_WAIT);
1252 if ((control->m_flags & M_EXT) == 0) {
1253 free(tmp, M_TEMP, control->m_len);
1254 error = ENOBUFS; /* allocation failed */
1255 goto nospace;
1256 }
1257
1258 /* copy the data back into the cluster */
1259 cm = mtod(control, struct cmsghdr *);
1260 memcpy(cm, tmp, control->m_len);
1261 free(tmp, M_TEMP, control->m_len);
1262 goto morespace;
1263 }
1264
1265 /* adjust message & mbuf to note amount of space actually used. */
1266 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass));
1267 control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass));
1268
1269 ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
1270 rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1;
1271 fdplock(fdp);
1272 for (i = 0; i < nfds; i++) {
1273 memcpy(&fd, ip, sizeof fd);
1274 ip--;
1275 if ((fp = fd_getfile(fdp, fd)) == NULL) {
1276 error = EBADF;
1277 goto fail;
1278 }
1279 if (fp->f_count >= FDUP_MAX_COUNT) {
1280 error = EDEADLK;
1281 goto fail;
1282 }
1283 error = pledge_sendfd(p, fp);
1284 if (error)
1285 goto fail;
1286
1287 /* kqueue descriptors cannot be copied */
1288 if (fp->f_type == DTYPE_KQUEUE) {
1289 error = EINVAL;
1290 goto fail;
1291 }
1292 #if NKCOV > 0
1293 /* kcov descriptors cannot be copied */
1294 if (fp->f_type == DTYPE_VNODE && kcov_vnode(fp->f_data)) {
1295 error = EINVAL;
1296 goto fail;
1297 }
1298 #endif
1299 rp->fp = fp;
1300 rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED;
1301 rp--;
1302 if ((unp = fptounp(fp)) != NULL) {
1303 rw_enter_write(&unp_gc_lock);
1304 unp->unp_msgcount++;
1305 unp->unp_file = fp;
1306 rw_exit_write(&unp_gc_lock);
1307 }
1308 }
1309 fdpunlock(fdp);
1310 return (0);
1311 fail:
1312 fdpunlock(fdp);
1313 if (fp != NULL)
1314 FRELE(fp, p);
1315 /* Back out what we just did. */
1316 for ( ; i > 0; i--) {
1317 rp++;
1318 fp = rp->fp;
1319 if ((unp = fptounp(fp)) != NULL) {
1320 rw_enter_write(&unp_gc_lock);
1321 unp->unp_msgcount--;
1322 rw_exit_write(&unp_gc_lock);
1323 }
1324 FRELE(fp, p);
1325 }
1326
1327 nospace:
1328 mtx_enter(&unp_rights_mtx);
1329 unp_rights -= nfds;
1330 mtx_leave(&unp_rights_mtx);
1331
1332 return (error);
1333 }
1334
1335 void
unp_gc(void * arg __unused)1336 unp_gc(void *arg __unused)
1337 {
1338 struct unp_deferral *defer;
1339 struct file *fp;
1340 struct socket *so;
1341 struct unpcb *unp;
1342 int nunref, i;
1343
1344 rw_enter_write(&unp_gc_lock);
1345 if (unp_gcing)
1346 goto unlock;
1347 unp_gcing = 1;
1348 rw_exit_write(&unp_gc_lock);
1349
1350 rw_enter_write(&unp_df_lock);
1351 /* close any fds on the deferred list */
1352 while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) {
1353 SLIST_REMOVE_HEAD(&unp_deferred, ud_link);
1354 rw_exit_write(&unp_df_lock);
1355 for (i = 0; i < defer->ud_n; i++) {
1356 fp = defer->ud_fp[i].fp;
1357 if (fp == NULL)
1358 continue;
1359 if ((unp = fptounp(fp)) != NULL) {
1360 rw_enter_write(&unp_gc_lock);
1361 unp->unp_msgcount--;
1362 rw_exit_write(&unp_gc_lock);
1363 }
1364 mtx_enter(&unp_rights_mtx);
1365 unp_rights--;
1366 mtx_leave(&unp_rights_mtx);
1367 /* closef() expects a refcount of 2 */
1368 FREF(fp);
1369 (void) closef(fp, NULL);
1370 }
1371 free(defer, M_TEMP, sizeof(*defer) +
1372 sizeof(struct fdpass) * defer->ud_n);
1373 rw_enter_write(&unp_df_lock);
1374 }
1375 rw_exit_write(&unp_df_lock);
1376
1377 nunref = 0;
1378
1379 rw_enter_write(&unp_gc_lock);
1380
1381 /*
1382 * Determine sockets which may be prospectively dead. Such
1383 * sockets have their `unp_msgcount' equal to the `f_count'.
1384 * If `unp_msgcount' is 0, the socket has not been passed
1385 * and can't be unreferenced.
1386 */
1387 LIST_FOREACH(unp, &unp_head, unp_link) {
1388 unp->unp_gcflags = 0;
1389
1390 if (unp->unp_msgcount == 0)
1391 continue;
1392 if ((fp = unp->unp_file) == NULL)
1393 continue;
1394 if (fp->f_count == unp->unp_msgcount) {
1395 unp->unp_gcflags |= UNP_GCDEAD;
1396 unp->unp_gcrefs = unp->unp_msgcount;
1397 nunref++;
1398 }
1399 }
1400
1401 /*
1402 * Scan all sockets previously marked as dead. Remove
1403 * the `unp_gcrefs' reference each socket holds on any
1404 * dead socket in its buffer.
1405 */
1406 LIST_FOREACH(unp, &unp_head, unp_link) {
1407 if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1408 continue;
1409 so = unp->unp_socket;
1410 mtx_enter(&so->so_rcv.sb_mtx);
1411 unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs);
1412 mtx_leave(&so->so_rcv.sb_mtx);
1413 }
1414
1415 /*
1416 * If the dead socket has `unp_gcrefs' reference counter
1417 * greater than 0, it can't be unreferenced. Mark it as
1418 * alive and increment the `unp_gcrefs' reference for each
1419 * dead socket within its buffer. Repeat this until we
1420 * have no new alive sockets found.
1421 */
1422 do {
1423 unp_defer = 0;
1424
1425 LIST_FOREACH(unp, &unp_head, unp_link) {
1426 if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1427 continue;
1428 if (unp->unp_gcrefs == 0)
1429 continue;
1430
1431 unp->unp_gcflags &= ~UNP_GCDEAD;
1432
1433 so = unp->unp_socket;
1434 mtx_enter(&so->so_rcv.sb_mtx);
1435 unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs);
1436 mtx_leave(&so->so_rcv.sb_mtx);
1437
1438 KASSERT(nunref > 0);
1439 nunref--;
1440 }
1441 } while (unp_defer > 0);
1442
1443 /*
1444 * If there are any unreferenced sockets, then for each dispose
1445 * of files in its receive buffer and then close it.
1446 */
1447 if (nunref) {
1448 LIST_FOREACH(unp, &unp_head, unp_link) {
1449 if (unp->unp_gcflags & UNP_GCDEAD) {
1450 struct sockbuf *sb = &unp->unp_socket->so_rcv;
1451 struct mbuf *m;
1452
1453 /*
1454 * This socket could still be connected
1455 * and if so it's `so_rcv' is still
1456 * accessible by concurrent PRU_SEND
1457 * thread.
1458 */
1459
1460 mtx_enter(&sb->sb_mtx);
1461 m = sb->sb_mb;
1462 memset(&sb->sb_startzero, 0,
1463 (caddr_t)&sb->sb_endzero -
1464 (caddr_t)&sb->sb_startzero);
1465 sb->sb_timeo_nsecs = INFSLP;
1466 mtx_leave(&sb->sb_mtx);
1467
1468 unp_scan(m, unp_discard);
1469 m_purge(m);
1470 }
1471 }
1472 }
1473
1474 unp_gcing = 0;
1475 unlock:
1476 rw_exit_write(&unp_gc_lock);
1477 }
1478
1479 void
unp_dispose(struct mbuf * m)1480 unp_dispose(struct mbuf *m)
1481 {
1482
1483 if (m)
1484 unp_scan(m, unp_discard);
1485 }
1486
1487 void
unp_scan(struct mbuf * m0,void (* op)(struct fdpass *,int))1488 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int))
1489 {
1490 struct mbuf *m;
1491 struct fdpass *rp;
1492 struct cmsghdr *cm;
1493 int qfds;
1494
1495 while (m0) {
1496 for (m = m0; m; m = m->m_next) {
1497 if (m->m_type == MT_CONTROL &&
1498 m->m_len >= sizeof(*cm)) {
1499 cm = mtod(m, struct cmsghdr *);
1500 if (cm->cmsg_level != SOL_SOCKET ||
1501 cm->cmsg_type != SCM_RIGHTS)
1502 continue;
1503 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
1504 / sizeof(struct fdpass);
1505 if (qfds > 0) {
1506 rp = (struct fdpass *)CMSG_DATA(cm);
1507 op(rp, qfds);
1508 }
1509 break; /* XXX, but saves time */
1510 }
1511 }
1512 m0 = m0->m_nextpkt;
1513 }
1514 }
1515
1516 void
unp_discard(struct fdpass * rp,int nfds)1517 unp_discard(struct fdpass *rp, int nfds)
1518 {
1519 struct unp_deferral *defer;
1520
1521 /* copy the file pointers to a deferral structure */
1522 defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK);
1523 defer->ud_n = nfds;
1524 memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds);
1525 memset(rp, 0, sizeof(*rp) * nfds);
1526
1527 rw_enter_write(&unp_df_lock);
1528 SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link);
1529 rw_exit_write(&unp_df_lock);
1530
1531 task_add(systqmp, &unp_gc_task);
1532 }
1533
1534 void
unp_remove_gcrefs(struct fdpass * rp,int nfds)1535 unp_remove_gcrefs(struct fdpass *rp, int nfds)
1536 {
1537 struct unpcb *unp;
1538 int i;
1539
1540 rw_assert_wrlock(&unp_gc_lock);
1541
1542 for (i = 0; i < nfds; i++) {
1543 if (rp[i].fp == NULL)
1544 continue;
1545 if ((unp = fptounp(rp[i].fp)) == NULL)
1546 continue;
1547 if (unp->unp_gcflags & UNP_GCDEAD) {
1548 KASSERT(unp->unp_gcrefs > 0);
1549 unp->unp_gcrefs--;
1550 }
1551 }
1552 }
1553
1554 void
unp_restore_gcrefs(struct fdpass * rp,int nfds)1555 unp_restore_gcrefs(struct fdpass *rp, int nfds)
1556 {
1557 struct unpcb *unp;
1558 int i;
1559
1560 rw_assert_wrlock(&unp_gc_lock);
1561
1562 for (i = 0; i < nfds; i++) {
1563 if (rp[i].fp == NULL)
1564 continue;
1565 if ((unp = fptounp(rp[i].fp)) == NULL)
1566 continue;
1567 if (unp->unp_gcflags & UNP_GCDEAD) {
1568 unp->unp_gcrefs++;
1569 unp_defer++;
1570 }
1571 }
1572 }
1573
1574 int
unp_nam2sun(struct mbuf * nam,struct sockaddr_un ** sun,size_t * pathlen)1575 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen)
1576 {
1577 struct sockaddr *sa = mtod(nam, struct sockaddr *);
1578 size_t size, len;
1579
1580 if (nam->m_len < offsetof(struct sockaddr, sa_data))
1581 return EINVAL;
1582 if (sa->sa_family != AF_UNIX)
1583 return EAFNOSUPPORT;
1584 if (sa->sa_len != nam->m_len)
1585 return EINVAL;
1586 if (sa->sa_len > sizeof(struct sockaddr_un))
1587 return EINVAL;
1588 *sun = (struct sockaddr_un *)sa;
1589
1590 /* ensure that sun_path is NUL terminated and fits */
1591 size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path);
1592 len = strnlen((*sun)->sun_path, size);
1593 if (len == sizeof((*sun)->sun_path))
1594 return EINVAL;
1595 if (len == size) {
1596 if (m_trailingspace(nam) == 0)
1597 return EINVAL;
1598 nam->m_len++;
1599 (*sun)->sun_len++;
1600 (*sun)->sun_path[len] = '\0';
1601 }
1602 if (pathlen != NULL)
1603 *pathlen = len;
1604
1605 return 0;
1606 }
1607