1 /*	$OpenBSD: uipc_usrreq.c,v 1.31 2006/02/27 23:38:11 miod Exp $	*/
2 /*	$NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/socket.h>
42 #include <sys/socketvar.h>
43 #include <sys/unpcb.h>
44 #include <sys/un.h>
45 #include <sys/namei.h>
46 #include <sys/vnode.h>
47 #include <sys/file.h>
48 #include <sys/stat.h>
49 #include <sys/mbuf.h>
50 
51 /*
52  * Unix communications domain.
53  *
54  * TODO:
55  *	SEQPACKET, RDM
56  *	rethink name space problems
57  *	need a proper out-of-band
58  */
59 struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
60 ino_t	unp_ino;			/* prototype for fake inode numbers */
61 
62 /*ARGSUSED*/
63 int
uipc_usrreq(struct socket * so,int req,struct mbuf * m,struct mbuf * nam,struct mbuf * control)64 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
65     struct mbuf *control)
66 {
67 	struct unpcb *unp = sotounpcb(so);
68 	struct socket *so2;
69 	int error = 0;
70 	struct proc *p = curproc;	/* XXX */
71 
72 	if (req == PRU_CONTROL)
73 		return (EOPNOTSUPP);
74 	if (req != PRU_SEND && control && control->m_len) {
75 		error = EOPNOTSUPP;
76 		goto release;
77 	}
78 	if (unp == NULL && req != PRU_ATTACH) {
79 		error = EINVAL;
80 		goto release;
81 	}
82 	switch (req) {
83 
84 	case PRU_ATTACH:
85 		if (unp) {
86 			error = EISCONN;
87 			break;
88 		}
89 		error = unp_attach(so);
90 		break;
91 
92 	case PRU_DETACH:
93 		unp_detach(unp);
94 		break;
95 
96 	case PRU_BIND:
97 		error = unp_bind(unp, nam, p);
98 		break;
99 
100 	case PRU_LISTEN:
101 		if (unp->unp_vnode == NULL)
102 			error = EINVAL;
103 		break;
104 
105 	case PRU_CONNECT:
106 		error = unp_connect(so, nam, p);
107 		break;
108 
109 	case PRU_CONNECT2:
110 		error = unp_connect2(so, (struct socket *)nam);
111 		break;
112 
113 	case PRU_DISCONNECT:
114 		unp_disconnect(unp);
115 		break;
116 
117 	case PRU_ACCEPT:
118 		/*
119 		 * Pass back name of connected socket,
120 		 * if it was bound and we are still connected
121 		 * (our peer may have closed already!).
122 		 */
123 		if (unp->unp_conn && unp->unp_conn->unp_addr) {
124 			nam->m_len = unp->unp_conn->unp_addr->m_len;
125 			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
126 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
127 		} else {
128 			nam->m_len = sizeof(sun_noname);
129 			*(mtod(nam, struct sockaddr *)) = sun_noname;
130 		}
131 		break;
132 
133 	case PRU_SHUTDOWN:
134 		socantsendmore(so);
135 		unp_shutdown(unp);
136 		break;
137 
138 	case PRU_RCVD:
139 		switch (so->so_type) {
140 
141 		case SOCK_DGRAM:
142 			panic("uipc 1");
143 			/*NOTREACHED*/
144 
145 		case SOCK_STREAM:
146 #define	rcv (&so->so_rcv)
147 #define snd (&so2->so_snd)
148 			if (unp->unp_conn == NULL)
149 				break;
150 			so2 = unp->unp_conn->unp_socket;
151 			/*
152 			 * Adjust backpressure on sender
153 			 * and wakeup any waiting to write.
154 			 */
155 			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
156 			unp->unp_mbcnt = rcv->sb_mbcnt;
157 			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
158 			unp->unp_cc = rcv->sb_cc;
159 			sowwakeup(so2);
160 #undef snd
161 #undef rcv
162 			break;
163 
164 		default:
165 			panic("uipc 2");
166 		}
167 		break;
168 
169 	case PRU_SEND:
170 		if (control && (error = unp_internalize(control, p)))
171 			break;
172 		switch (so->so_type) {
173 
174 		case SOCK_DGRAM: {
175 			struct sockaddr *from;
176 
177 			if (nam) {
178 				if (unp->unp_conn) {
179 					error = EISCONN;
180 					break;
181 				}
182 				error = unp_connect(so, nam, p);
183 				if (error)
184 					break;
185 			} else {
186 				if (unp->unp_conn == NULL) {
187 					error = ENOTCONN;
188 					break;
189 				}
190 			}
191 			so2 = unp->unp_conn->unp_socket;
192 			if (unp->unp_addr)
193 				from = mtod(unp->unp_addr, struct sockaddr *);
194 			else
195 				from = &sun_noname;
196 			if (sbappendaddr(&so2->so_rcv, from, m, control)) {
197 				sorwakeup(so2);
198 				m = NULL;
199 				control = NULL;
200 			} else
201 				error = ENOBUFS;
202 			if (nam)
203 				unp_disconnect(unp);
204 			break;
205 		}
206 
207 		case SOCK_STREAM:
208 #define	rcv (&so2->so_rcv)
209 #define	snd (&so->so_snd)
210 			if (so->so_state & SS_CANTSENDMORE) {
211 				error = EPIPE;
212 				break;
213 			}
214 			if (unp->unp_conn == NULL) {
215 				error = ENOTCONN;
216 				break;
217 			}
218 			so2 = unp->unp_conn->unp_socket;
219 			/*
220 			 * Send to paired receive port, and then reduce
221 			 * send buffer hiwater marks to maintain backpressure.
222 			 * Wake up readers.
223 			 */
224 			if (control) {
225 				if (sbappendcontrol(rcv, m, control))
226 					control = NULL;
227 			} else
228 				sbappend(rcv, m);
229 			snd->sb_mbmax -=
230 			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
231 			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
232 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
233 			unp->unp_conn->unp_cc = rcv->sb_cc;
234 			sorwakeup(so2);
235 			m = NULL;
236 #undef snd
237 #undef rcv
238 			break;
239 
240 		default:
241 			panic("uipc 4");
242 		}
243 		break;
244 
245 	case PRU_ABORT:
246 		unp_drop(unp, ECONNABORTED);
247 		break;
248 
249 	case PRU_SENSE:
250 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
251 		if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) {
252 			so2 = unp->unp_conn->unp_socket;
253 			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
254 		}
255 		((struct stat *) m)->st_dev = NODEV;
256 		if (unp->unp_ino == 0)
257 			unp->unp_ino = unp_ino++;
258 		((struct stat *) m)->st_atimespec =
259 		    ((struct stat *) m)->st_mtimespec =
260 		    ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
261 		((struct stat *) m)->st_ino = unp->unp_ino;
262 		return (0);
263 
264 	case PRU_RCVOOB:
265 		return (EOPNOTSUPP);
266 
267 	case PRU_SENDOOB:
268 		error = EOPNOTSUPP;
269 		break;
270 
271 	case PRU_SOCKADDR:
272 		if (unp->unp_addr) {
273 			nam->m_len = unp->unp_addr->m_len;
274 			bcopy(mtod(unp->unp_addr, caddr_t),
275 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
276 		} else
277 			nam->m_len = 0;
278 		break;
279 
280 	case PRU_PEERADDR:
281 		if (unp->unp_conn && unp->unp_conn->unp_addr) {
282 			nam->m_len = unp->unp_conn->unp_addr->m_len;
283 			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
284 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
285 		} else
286 			nam->m_len = 0;
287 		break;
288 
289 	case PRU_PEEREID:
290 		if (unp->unp_flags & UNP_FEIDS) {
291 			nam->m_len = sizeof(struct unpcbid);
292 			bcopy((caddr_t)(&(unp->unp_connid)),
293 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
294 		} else
295 			nam->m_len = 0;
296 		break;
297 
298 	case PRU_SLOWTIMO:
299 		break;
300 
301 	default:
302 		panic("piusrreq");
303 	}
304 release:
305 	if (control)
306 		m_freem(control);
307 	if (m)
308 		m_freem(m);
309 	return (error);
310 }
311 
312 /*
313  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
314  * for stream sockets, although the total for sender and receiver is
315  * actually only PIPSIZ.
316  * Datagram sockets really use the sendspace as the maximum datagram size,
317  * and don't really want to reserve the sendspace.  Their recvspace should
318  * be large enough for at least one max-size datagram plus address.
319  */
320 #define	PIPSIZ	4096
321 u_long	unpst_sendspace = PIPSIZ;
322 u_long	unpst_recvspace = PIPSIZ;
323 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
324 u_long	unpdg_recvspace = 4*1024;
325 
326 int	unp_rights;			/* file descriptors in flight */
327 
328 int
unp_attach(struct socket * so)329 unp_attach(struct socket *so)
330 {
331 	struct unpcb *unp;
332 	struct timeval tv;
333 	int error;
334 
335 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
336 		switch (so->so_type) {
337 
338 		case SOCK_STREAM:
339 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
340 			break;
341 
342 		case SOCK_DGRAM:
343 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
344 			break;
345 
346 		default:
347 			panic("unp_attach");
348 		}
349 		if (error)
350 			return (error);
351 	}
352 	unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
353 	if (unp == NULL)
354 		return (ENOBUFS);
355 	bzero((caddr_t)unp, sizeof(*unp));
356 	unp->unp_socket = so;
357 	so->so_pcb = unp;
358 	microtime(&tv);
359 	TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime);
360 	return (0);
361 }
362 
363 void
unp_detach(struct unpcb * unp)364 unp_detach(struct unpcb *unp)
365 {
366 
367 	if (unp->unp_vnode) {
368 		unp->unp_vnode->v_socket = NULL;
369 		vrele(unp->unp_vnode);
370 		unp->unp_vnode = NULL;
371 	}
372 	if (unp->unp_conn)
373 		unp_disconnect(unp);
374 	while (unp->unp_refs)
375 		unp_drop(unp->unp_refs, ECONNRESET);
376 	soisdisconnected(unp->unp_socket);
377 	unp->unp_socket->so_pcb = NULL;
378 	m_freem(unp->unp_addr);
379 	if (unp_rights) {
380 		/*
381 		 * Normally the receive buffer is flushed later,
382 		 * in sofree, but if our receive buffer holds references
383 		 * to descriptors that are now garbage, we will dispose
384 		 * of those descriptor references after the garbage collector
385 		 * gets them (resulting in a "panic: closef: count < 0").
386 		 */
387 		sorflush(unp->unp_socket);
388 		free(unp, M_PCB);
389 		unp_gc();
390 	} else
391 		free(unp, M_PCB);
392 }
393 
394 int
unp_bind(struct unpcb * unp,struct mbuf * nam,struct proc * p)395 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p)
396 {
397 	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
398 	struct vnode *vp;
399 	struct vattr vattr;
400 	int error, namelen;
401 	struct nameidata nd;
402 	char buf[MLEN];
403 
404 	if (unp->unp_vnode != NULL)
405 		return (EINVAL);
406 	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
407 	if (namelen <= 0 || namelen >= MLEN)
408 		return EINVAL;
409 	strncpy(buf, soun->sun_path, namelen);
410 	buf[namelen] = 0;       /* null-terminate the string */
411 	NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, buf, p);
412 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
413 	if ((error = namei(&nd)) != 0)
414 		return (error);
415 	vp = nd.ni_vp;
416 	if (vp != NULL) {
417 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
418 		if (nd.ni_dvp == vp)
419 			vrele(nd.ni_dvp);
420 		else
421 			vput(nd.ni_dvp);
422 		vrele(vp);
423 		return (EADDRINUSE);
424 	}
425 	VATTR_NULL(&vattr);
426 	vattr.va_type = VSOCK;
427 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
428 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
429 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
430 	if (error)
431 		return (error);
432 	vp = nd.ni_vp;
433 	vp->v_socket = unp->unp_socket;
434 	unp->unp_vnode = vp;
435 	unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
436 	VOP_UNLOCK(vp, 0, p);
437 	return (0);
438 }
439 
440 int
unp_connect(struct socket * so,struct mbuf * nam,struct proc * p)441 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
442 {
443 	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
444 	struct vnode *vp;
445 	struct socket *so2, *so3;
446 	struct unpcb *unp2, *unp3;
447 	int error;
448 	struct nameidata nd;
449 
450 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
451 	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
452 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
453 			return (EMSGSIZE);
454 	} else
455 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
456 	if ((error = namei(&nd)) != 0)
457 		return (error);
458 	vp = nd.ni_vp;
459 	if (vp->v_type != VSOCK) {
460 		error = ENOTSOCK;
461 		goto bad;
462 	}
463 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
464 		goto bad;
465 	so2 = vp->v_socket;
466 	if (so2 == NULL) {
467 		error = ECONNREFUSED;
468 		goto bad;
469 	}
470 	if (so->so_type != so2->so_type) {
471 		error = EPROTOTYPE;
472 		goto bad;
473 	}
474 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
475 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
476 		    (so3 = sonewconn(so2, 0)) == 0) {
477 			error = ECONNREFUSED;
478 			goto bad;
479 		}
480 		unp2 = sotounpcb(so2);
481 		unp3 = sotounpcb(so3);
482 		if (unp2->unp_addr)
483 			unp3->unp_addr =
484 			    m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
485 		unp3->unp_connid.unp_euid = p->p_ucred->cr_uid;
486 		unp3->unp_connid.unp_egid = p->p_ucred->cr_gid;
487 		unp3->unp_flags |= UNP_FEIDS;
488 		so2 = so3;
489 	}
490 	error = unp_connect2(so, so2);
491 bad:
492 	vput(vp);
493 	return (error);
494 }
495 
496 int
unp_connect2(struct socket * so,struct socket * so2)497 unp_connect2(struct socket *so, struct socket *so2)
498 {
499 	struct unpcb *unp = sotounpcb(so);
500 	struct unpcb *unp2;
501 
502 	if (so2->so_type != so->so_type)
503 		return (EPROTOTYPE);
504 	unp2 = sotounpcb(so2);
505 	unp->unp_conn = unp2;
506 	switch (so->so_type) {
507 
508 	case SOCK_DGRAM:
509 		unp->unp_nextref = unp2->unp_refs;
510 		unp2->unp_refs = unp;
511 		soisconnected(so);
512 		break;
513 
514 	case SOCK_STREAM:
515 		unp2->unp_conn = unp;
516 		soisconnected(so);
517 		soisconnected(so2);
518 		break;
519 
520 	default:
521 		panic("unp_connect2");
522 	}
523 	return (0);
524 }
525 
526 void
unp_disconnect(struct unpcb * unp)527 unp_disconnect(struct unpcb *unp)
528 {
529 	struct unpcb *unp2 = unp->unp_conn;
530 
531 	if (unp2 == NULL)
532 		return;
533 	unp->unp_conn = NULL;
534 	switch (unp->unp_socket->so_type) {
535 
536 	case SOCK_DGRAM:
537 		if (unp2->unp_refs == unp)
538 			unp2->unp_refs = unp->unp_nextref;
539 		else {
540 			unp2 = unp2->unp_refs;
541 			for (;;) {
542 				if (unp2 == NULL)
543 					panic("unp_disconnect");
544 				if (unp2->unp_nextref == unp)
545 					break;
546 				unp2 = unp2->unp_nextref;
547 			}
548 			unp2->unp_nextref = unp->unp_nextref;
549 		}
550 		unp->unp_nextref = NULL;
551 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
552 		break;
553 
554 	case SOCK_STREAM:
555 		soisdisconnected(unp->unp_socket);
556 		unp2->unp_conn = NULL;
557 		soisdisconnected(unp2->unp_socket);
558 		break;
559 	}
560 }
561 
562 #ifdef notdef
unp_abort(struct unpcb * unp)563 unp_abort(struct unpcb *unp)
564 {
565 	unp_detach(unp);
566 }
567 #endif
568 
569 void
unp_shutdown(struct unpcb * unp)570 unp_shutdown(struct unpcb *unp)
571 {
572 	struct socket *so;
573 
574 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
575 	    (so = unp->unp_conn->unp_socket))
576 		socantrcvmore(so);
577 }
578 
579 void
unp_drop(struct unpcb * unp,int errno)580 unp_drop(struct unpcb *unp, int errno)
581 {
582 	struct socket *so = unp->unp_socket;
583 
584 	so->so_error = errno;
585 	unp_disconnect(unp);
586 	if (so->so_head) {
587 		so->so_pcb = NULL;
588 		sofree(so);
589 		m_freem(unp->unp_addr);
590 		free(unp, M_PCB);
591 	}
592 }
593 
594 #ifdef notdef
unp_drain(void)595 unp_drain(void)
596 {
597 
598 }
599 #endif
600 
601 int
unp_externalize(struct mbuf * rights)602 unp_externalize(struct mbuf *rights)
603 {
604 	struct proc *p = curproc;		/* XXX */
605 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
606 	int i, *fdp;
607 	struct file **rp;
608 	struct file *fp;
609 	int nfds, error = 0;
610 
611 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
612 	    sizeof(struct file *);
613 	rp = (struct file **)CMSG_DATA(cm);
614 
615 	fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK);
616 
617 #ifdef notyet
618 	/* Make sure the recipient should be able to see the descriptors.. */
619 	if (p->p_cwdi->cwdi_rdir != NULL) {
620 		rp = (struct file **)CMSG_DATA(cm);
621 		for (i = 0; i < nfds; i++) {
622 			fp = *rp++;
623 			/*
624 			 * If we are in a chroot'ed directory, and
625 			 * someone wants to pass us a directory, make
626 			 * sure it's inside the subtree we're allowed
627 			 * to access.
628 			 */
629 			if (fp->f_type == DTYPE_VNODE) {
630 				struct vnode *vp = (struct vnode *)fp->f_data;
631 				if ((vp->v_type == VDIR) &&
632 				    !vn_isunder(vp, p->p_cwdi->cwdi_rdir, p)) {
633 					error = EPERM;
634 					break;
635 				}
636 			}
637 		}
638 	}
639 #endif
640 
641 restart:
642 	fdplock(p->p_fd, p);
643 	if (error != 0) {
644 		rp = ((struct file **)CMSG_DATA(cm));
645 		for (i = 0; i < nfds; i++) {
646 			fp = *rp;
647 			/*
648 			 * zero the pointer before calling unp_discard,
649 			 * since it may end up in unp_gc()..
650 			 */
651 			*rp++ = NULL;
652 			unp_discard(fp);
653 		}
654 		goto out;
655 	}
656 
657 	/*
658 	 * First loop -- allocate file descriptor table slots for the
659 	 * new descriptors.
660 	 */
661 	rp = ((struct file **)CMSG_DATA(cm));
662 	for (i = 0; i < nfds; i++) {
663 		bcopy(rp, &fp, sizeof(fp));
664 		rp++;
665 		if ((error = fdalloc(p, 0, &fdp[i])) != 0) {
666 			/*
667 			 * Back out what we've done so far.
668 			 */
669 			for (--i; i >= 0; i--)
670 				fdremove(p->p_fd, fdp[i]);
671 
672 			if (error == ENOSPC) {
673 				fdexpand(p);
674 				error = 0;
675 			} else {
676 				/*
677 				 * This is the error that has historically
678 				 * been returned, and some callers may
679 				 * expect it.
680 				 */
681 				error = EMSGSIZE;
682 			}
683 			fdpunlock(p->p_fd);
684 			goto restart;
685 		}
686 
687 		/*
688 		 * Make the slot reference the descriptor so that
689 		 * fdalloc() works properly.. We finalize it all
690 		 * in the loop below.
691 		 */
692 		p->p_fd->fd_ofiles[fdp[i]] = fp;
693 	}
694 
695 	/*
696 	 * Now that adding them has succeeded, update all of the
697 	 * descriptor passing state.
698 	 */
699 	rp = (struct file **)CMSG_DATA(cm);
700 	for (i = 0; i < nfds; i++) {
701 		fp = *rp++;
702 		fp->f_msgcount--;
703 		unp_rights--;
704 	}
705 
706 	/*
707 	 * Copy temporary array to message and adjust length, in case of
708 	 * transition from large struct file pointers to ints.
709 	 */
710 	memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
711 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
712 	rights->m_len = CMSG_SPACE(nfds * sizeof(int));
713  out:
714 	fdpunlock(p->p_fd);
715 	free(fdp, M_TEMP);
716 	return (error);
717 }
718 
719 int
unp_internalize(struct mbuf * control,struct proc * p)720 unp_internalize(struct mbuf *control, struct proc *p)
721 {
722 	struct filedesc *fdp = p->p_fd;
723 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
724 	struct file **rp, *fp;
725 	int i, error;
726 	int nfds, *ip, fd, neededspace;
727 
728 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
729 	    cm->cmsg_len != control->m_len)
730 		return (EINVAL);
731 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
732 
733 	/* Make sure we have room for the struct file pointers */
734 morespace:
735 	neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
736 	    control->m_len;
737 	if (neededspace > M_TRAILINGSPACE(control)) {
738 		/* if we already have a cluster, the message is just too big */
739 		if (control->m_flags & M_EXT)
740 			return (E2BIG);
741 
742 		/* allocate a cluster and try again */
743 		MCLGET(control, M_WAIT);
744 		if ((control->m_flags & M_EXT) == 0)
745 			return (ENOBUFS);       /* allocation failed */
746 
747 		/* copy the data to the cluster */
748 		memcpy(mtod(control, char *), cm, cm->cmsg_len);
749 		cm = mtod(control, struct cmsghdr *);
750 		goto morespace;
751 	}
752 
753 	/* adjust message & mbuf to note amount of space actually used. */
754 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *));
755 	control->m_len = CMSG_SPACE(nfds * sizeof(struct file *));
756 
757 	ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
758 	rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1;
759 	for (i = 0; i < nfds; i++) {
760 		bcopy(ip, &fd, sizeof fd);
761 		ip--;
762 		if ((fp = fd_getfile(fdp, fd)) == NULL) {
763 			error = EBADF;
764 			goto fail;
765 		}
766 		if (fp->f_count == LONG_MAX-2 ||
767 		    fp->f_msgcount == LONG_MAX-2) {
768 			error = EDEADLK;
769 			goto fail;
770 		}
771 		bcopy(&fp, rp, sizeof fp);
772 		rp--;
773 		fp->f_count++;
774 		fp->f_msgcount++;
775 		unp_rights++;
776 	}
777 	return (0);
778 fail:
779 	/* Back out what we just did. */
780 	for ( ; i > 0; i--) {
781 		bcopy(rp, &fp, sizeof(fp));
782 		rp++;
783 		fp->f_count--;
784 		fp->f_msgcount--;
785 		unp_rights--;
786 	}
787 
788 	return (error);
789 }
790 
791 int	unp_defer, unp_gcing;
792 extern	struct domain unixdomain;
793 
794 void
unp_gc(void)795 unp_gc(void)
796 {
797 	struct file *fp, *nextfp;
798 	struct socket *so;
799 	struct file **extra_ref, **fpp;
800 	int nunref, i;
801 
802 	if (unp_gcing)
803 		return;
804 	unp_gcing = 1;
805 	unp_defer = 0;
806 	LIST_FOREACH(fp, &filehead, f_list)
807 		fp->f_flag &= ~(FMARK|FDEFER);
808 	do {
809 		LIST_FOREACH(fp, &filehead, f_list) {
810 			if (fp->f_flag & FDEFER) {
811 				fp->f_flag &= ~FDEFER;
812 				unp_defer--;
813 			} else {
814 				if (fp->f_count == 0)
815 					continue;
816 				if (fp->f_flag & FMARK)
817 					continue;
818 				if (fp->f_count == fp->f_msgcount)
819 					continue;
820 			}
821 			fp->f_flag |= FMARK;
822 
823 			if (fp->f_type != DTYPE_SOCKET ||
824 			    (so = (struct socket *)fp->f_data) == NULL)
825 				continue;
826 			if (so->so_proto->pr_domain != &unixdomain ||
827 			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
828 				continue;
829 #ifdef notdef
830 			if (so->so_rcv.sb_flags & SB_LOCK) {
831 				/*
832 				 * This is problematical; it's not clear
833 				 * we need to wait for the sockbuf to be
834 				 * unlocked (on a uniprocessor, at least),
835 				 * and it's also not clear what to do
836 				 * if sbwait returns an error due to receipt
837 				 * of a signal.  If sbwait does return
838 				 * an error, we'll go into an infinite
839 				 * loop.  Delete all of this for now.
840 				 */
841 				(void) sbwait(&so->so_rcv);
842 				goto restart;
843 			}
844 #endif
845 			unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
846 		}
847 	} while (unp_defer);
848 	/*
849 	 * We grab an extra reference to each of the file table entries
850 	 * that are not otherwise accessible and then free the rights
851 	 * that are stored in messages on them.
852 	 *
853 	 * The bug in the original code is a little tricky, so I'll describe
854 	 * what's wrong with it here.
855 	 *
856 	 * It is incorrect to simply unp_discard each entry for f_msgcount
857 	 * times -- consider the case of sockets A and B that contain
858 	 * references to each other.  On a last close of some other socket,
859 	 * we trigger a gc since the number of outstanding rights (unp_rights)
860 	 * is non-zero.  If during the sweep phase the gc code un_discards,
861 	 * we end up doing a (full) closef on the descriptor.  A closef on A
862 	 * results in the following chain.  Closef calls soo_close, which
863 	 * calls soclose.   Soclose calls first (through the switch
864 	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
865 	 * returns because the previous instance had set unp_gcing, and
866 	 * we return all the way back to soclose, which marks the socket
867 	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
868 	 * to free up the rights that are queued in messages on the socket A,
869 	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
870 	 * switch unp_dispose, which unp_scans with unp_discard.  This second
871 	 * instance of unp_discard just calls closef on B.
872 	 *
873 	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
874 	 * which results in another closef on A.  Unfortunately, A is already
875 	 * being closed, and the descriptor has already been marked with
876 	 * SS_NOFDREF, and soclose panics at this point.
877 	 *
878 	 * Here, we first take an extra reference to each inaccessible
879 	 * descriptor.  Then, we call sorflush ourself, since we know
880 	 * it is a Unix domain socket anyhow.  After we destroy all the
881 	 * rights carried in messages, we do a last closef to get rid
882 	 * of our extra reference.  This is the last close, and the
883 	 * unp_detach etc will shut down the socket.
884 	 *
885 	 * 91/09/19, bsy@cs.cmu.edu
886 	 */
887 	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
888 	for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref;
889 	    fp != NULL; fp = nextfp) {
890 		nextfp = LIST_NEXT(fp, f_list);
891 		if (fp->f_count == 0)
892 			continue;
893 		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
894 			*fpp++ = fp;
895 			nunref++;
896 			FREF(fp);
897 			fp->f_count++;
898 		}
899 	}
900 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
901 	        if ((*fpp)->f_type == DTYPE_SOCKET && (*fpp)->f_data != NULL)
902 		        sorflush((struct socket *)(*fpp)->f_data);
903 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
904 		(void) closef(*fpp, NULL);
905 	free((caddr_t)extra_ref, M_FILE);
906 	unp_gcing = 0;
907 }
908 
909 void
unp_dispose(struct mbuf * m)910 unp_dispose(struct mbuf *m)
911 {
912 
913 	if (m)
914 		unp_scan(m, unp_discard, 1);
915 }
916 
917 void
unp_scan(struct mbuf * m0,void (* op)(struct file *),int discard)918 unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard)
919 {
920 	struct mbuf *m;
921 	struct file **rp, *fp;
922 	struct cmsghdr *cm;
923 	int i;
924 	int qfds;
925 
926 	while (m0) {
927 		for (m = m0; m; m = m->m_next) {
928 			if (m->m_type == MT_CONTROL &&
929 			    m->m_len >= sizeof(*cm)) {
930 				cm = mtod(m, struct cmsghdr *);
931 				if (cm->cmsg_level != SOL_SOCKET ||
932 				    cm->cmsg_type != SCM_RIGHTS)
933 					continue;
934 				qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
935 				    / sizeof(struct file *);
936 				rp = (struct file **)CMSG_DATA(cm);
937 				for (i = 0; i < qfds; i++) {
938 					fp = *rp;
939 					if (discard)
940 						*rp = 0;
941 					(*op)(fp);
942 					rp++;
943 				}
944 				break;		/* XXX, but saves time */
945 			}
946 		}
947 		m0 = m0->m_nextpkt;
948 	}
949 }
950 
951 void
unp_mark(struct file * fp)952 unp_mark(struct file *fp)
953 {
954 	if (fp == NULL)
955 		return;
956 
957 	if (fp->f_flag & FMARK)
958 		return;
959 
960 	if (fp->f_flag & FDEFER)
961 		return;
962 
963 	if (fp->f_type == DTYPE_SOCKET) {
964 		unp_defer++;
965 		fp->f_flag |= FDEFER;
966 	} else {
967 		fp->f_flag |= FMARK;
968 	}
969 }
970 
971 void
unp_discard(struct file * fp)972 unp_discard(struct file *fp)
973 {
974 
975 	if (fp == NULL)
976 		return;
977 	FREF(fp);
978 	fp->f_msgcount--;
979 	unp_rights--;
980 	(void) closef(fp, NULL);
981 }
982