1 /* $OpenBSD: uipc_usrreq.c,v 1.31 2006/02/27 23:38:11 miod Exp $ */
2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */
3
4 /*
5 * Copyright (c) 1982, 1986, 1989, 1991, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/socket.h>
42 #include <sys/socketvar.h>
43 #include <sys/unpcb.h>
44 #include <sys/un.h>
45 #include <sys/namei.h>
46 #include <sys/vnode.h>
47 #include <sys/file.h>
48 #include <sys/stat.h>
49 #include <sys/mbuf.h>
50
51 /*
52 * Unix communications domain.
53 *
54 * TODO:
55 * SEQPACKET, RDM
56 * rethink name space problems
57 * need a proper out-of-band
58 */
59 struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
60 ino_t unp_ino; /* prototype for fake inode numbers */
61
62 /*ARGSUSED*/
63 int
uipc_usrreq(struct socket * so,int req,struct mbuf * m,struct mbuf * nam,struct mbuf * control)64 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
65 struct mbuf *control)
66 {
67 struct unpcb *unp = sotounpcb(so);
68 struct socket *so2;
69 int error = 0;
70 struct proc *p = curproc; /* XXX */
71
72 if (req == PRU_CONTROL)
73 return (EOPNOTSUPP);
74 if (req != PRU_SEND && control && control->m_len) {
75 error = EOPNOTSUPP;
76 goto release;
77 }
78 if (unp == NULL && req != PRU_ATTACH) {
79 error = EINVAL;
80 goto release;
81 }
82 switch (req) {
83
84 case PRU_ATTACH:
85 if (unp) {
86 error = EISCONN;
87 break;
88 }
89 error = unp_attach(so);
90 break;
91
92 case PRU_DETACH:
93 unp_detach(unp);
94 break;
95
96 case PRU_BIND:
97 error = unp_bind(unp, nam, p);
98 break;
99
100 case PRU_LISTEN:
101 if (unp->unp_vnode == NULL)
102 error = EINVAL;
103 break;
104
105 case PRU_CONNECT:
106 error = unp_connect(so, nam, p);
107 break;
108
109 case PRU_CONNECT2:
110 error = unp_connect2(so, (struct socket *)nam);
111 break;
112
113 case PRU_DISCONNECT:
114 unp_disconnect(unp);
115 break;
116
117 case PRU_ACCEPT:
118 /*
119 * Pass back name of connected socket,
120 * if it was bound and we are still connected
121 * (our peer may have closed already!).
122 */
123 if (unp->unp_conn && unp->unp_conn->unp_addr) {
124 nam->m_len = unp->unp_conn->unp_addr->m_len;
125 bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
126 mtod(nam, caddr_t), (unsigned)nam->m_len);
127 } else {
128 nam->m_len = sizeof(sun_noname);
129 *(mtod(nam, struct sockaddr *)) = sun_noname;
130 }
131 break;
132
133 case PRU_SHUTDOWN:
134 socantsendmore(so);
135 unp_shutdown(unp);
136 break;
137
138 case PRU_RCVD:
139 switch (so->so_type) {
140
141 case SOCK_DGRAM:
142 panic("uipc 1");
143 /*NOTREACHED*/
144
145 case SOCK_STREAM:
146 #define rcv (&so->so_rcv)
147 #define snd (&so2->so_snd)
148 if (unp->unp_conn == NULL)
149 break;
150 so2 = unp->unp_conn->unp_socket;
151 /*
152 * Adjust backpressure on sender
153 * and wakeup any waiting to write.
154 */
155 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
156 unp->unp_mbcnt = rcv->sb_mbcnt;
157 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
158 unp->unp_cc = rcv->sb_cc;
159 sowwakeup(so2);
160 #undef snd
161 #undef rcv
162 break;
163
164 default:
165 panic("uipc 2");
166 }
167 break;
168
169 case PRU_SEND:
170 if (control && (error = unp_internalize(control, p)))
171 break;
172 switch (so->so_type) {
173
174 case SOCK_DGRAM: {
175 struct sockaddr *from;
176
177 if (nam) {
178 if (unp->unp_conn) {
179 error = EISCONN;
180 break;
181 }
182 error = unp_connect(so, nam, p);
183 if (error)
184 break;
185 } else {
186 if (unp->unp_conn == NULL) {
187 error = ENOTCONN;
188 break;
189 }
190 }
191 so2 = unp->unp_conn->unp_socket;
192 if (unp->unp_addr)
193 from = mtod(unp->unp_addr, struct sockaddr *);
194 else
195 from = &sun_noname;
196 if (sbappendaddr(&so2->so_rcv, from, m, control)) {
197 sorwakeup(so2);
198 m = NULL;
199 control = NULL;
200 } else
201 error = ENOBUFS;
202 if (nam)
203 unp_disconnect(unp);
204 break;
205 }
206
207 case SOCK_STREAM:
208 #define rcv (&so2->so_rcv)
209 #define snd (&so->so_snd)
210 if (so->so_state & SS_CANTSENDMORE) {
211 error = EPIPE;
212 break;
213 }
214 if (unp->unp_conn == NULL) {
215 error = ENOTCONN;
216 break;
217 }
218 so2 = unp->unp_conn->unp_socket;
219 /*
220 * Send to paired receive port, and then reduce
221 * send buffer hiwater marks to maintain backpressure.
222 * Wake up readers.
223 */
224 if (control) {
225 if (sbappendcontrol(rcv, m, control))
226 control = NULL;
227 } else
228 sbappend(rcv, m);
229 snd->sb_mbmax -=
230 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
231 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
232 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
233 unp->unp_conn->unp_cc = rcv->sb_cc;
234 sorwakeup(so2);
235 m = NULL;
236 #undef snd
237 #undef rcv
238 break;
239
240 default:
241 panic("uipc 4");
242 }
243 break;
244
245 case PRU_ABORT:
246 unp_drop(unp, ECONNABORTED);
247 break;
248
249 case PRU_SENSE:
250 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
251 if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) {
252 so2 = unp->unp_conn->unp_socket;
253 ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
254 }
255 ((struct stat *) m)->st_dev = NODEV;
256 if (unp->unp_ino == 0)
257 unp->unp_ino = unp_ino++;
258 ((struct stat *) m)->st_atimespec =
259 ((struct stat *) m)->st_mtimespec =
260 ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
261 ((struct stat *) m)->st_ino = unp->unp_ino;
262 return (0);
263
264 case PRU_RCVOOB:
265 return (EOPNOTSUPP);
266
267 case PRU_SENDOOB:
268 error = EOPNOTSUPP;
269 break;
270
271 case PRU_SOCKADDR:
272 if (unp->unp_addr) {
273 nam->m_len = unp->unp_addr->m_len;
274 bcopy(mtod(unp->unp_addr, caddr_t),
275 mtod(nam, caddr_t), (unsigned)nam->m_len);
276 } else
277 nam->m_len = 0;
278 break;
279
280 case PRU_PEERADDR:
281 if (unp->unp_conn && unp->unp_conn->unp_addr) {
282 nam->m_len = unp->unp_conn->unp_addr->m_len;
283 bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
284 mtod(nam, caddr_t), (unsigned)nam->m_len);
285 } else
286 nam->m_len = 0;
287 break;
288
289 case PRU_PEEREID:
290 if (unp->unp_flags & UNP_FEIDS) {
291 nam->m_len = sizeof(struct unpcbid);
292 bcopy((caddr_t)(&(unp->unp_connid)),
293 mtod(nam, caddr_t), (unsigned)nam->m_len);
294 } else
295 nam->m_len = 0;
296 break;
297
298 case PRU_SLOWTIMO:
299 break;
300
301 default:
302 panic("piusrreq");
303 }
304 release:
305 if (control)
306 m_freem(control);
307 if (m)
308 m_freem(m);
309 return (error);
310 }
311
312 /*
313 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
314 * for stream sockets, although the total for sender and receiver is
315 * actually only PIPSIZ.
316 * Datagram sockets really use the sendspace as the maximum datagram size,
317 * and don't really want to reserve the sendspace. Their recvspace should
318 * be large enough for at least one max-size datagram plus address.
319 */
320 #define PIPSIZ 4096
321 u_long unpst_sendspace = PIPSIZ;
322 u_long unpst_recvspace = PIPSIZ;
323 u_long unpdg_sendspace = 2*1024; /* really max datagram size */
324 u_long unpdg_recvspace = 4*1024;
325
326 int unp_rights; /* file descriptors in flight */
327
328 int
unp_attach(struct socket * so)329 unp_attach(struct socket *so)
330 {
331 struct unpcb *unp;
332 struct timeval tv;
333 int error;
334
335 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
336 switch (so->so_type) {
337
338 case SOCK_STREAM:
339 error = soreserve(so, unpst_sendspace, unpst_recvspace);
340 break;
341
342 case SOCK_DGRAM:
343 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
344 break;
345
346 default:
347 panic("unp_attach");
348 }
349 if (error)
350 return (error);
351 }
352 unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
353 if (unp == NULL)
354 return (ENOBUFS);
355 bzero((caddr_t)unp, sizeof(*unp));
356 unp->unp_socket = so;
357 so->so_pcb = unp;
358 microtime(&tv);
359 TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime);
360 return (0);
361 }
362
363 void
unp_detach(struct unpcb * unp)364 unp_detach(struct unpcb *unp)
365 {
366
367 if (unp->unp_vnode) {
368 unp->unp_vnode->v_socket = NULL;
369 vrele(unp->unp_vnode);
370 unp->unp_vnode = NULL;
371 }
372 if (unp->unp_conn)
373 unp_disconnect(unp);
374 while (unp->unp_refs)
375 unp_drop(unp->unp_refs, ECONNRESET);
376 soisdisconnected(unp->unp_socket);
377 unp->unp_socket->so_pcb = NULL;
378 m_freem(unp->unp_addr);
379 if (unp_rights) {
380 /*
381 * Normally the receive buffer is flushed later,
382 * in sofree, but if our receive buffer holds references
383 * to descriptors that are now garbage, we will dispose
384 * of those descriptor references after the garbage collector
385 * gets them (resulting in a "panic: closef: count < 0").
386 */
387 sorflush(unp->unp_socket);
388 free(unp, M_PCB);
389 unp_gc();
390 } else
391 free(unp, M_PCB);
392 }
393
394 int
unp_bind(struct unpcb * unp,struct mbuf * nam,struct proc * p)395 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p)
396 {
397 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
398 struct vnode *vp;
399 struct vattr vattr;
400 int error, namelen;
401 struct nameidata nd;
402 char buf[MLEN];
403
404 if (unp->unp_vnode != NULL)
405 return (EINVAL);
406 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
407 if (namelen <= 0 || namelen >= MLEN)
408 return EINVAL;
409 strncpy(buf, soun->sun_path, namelen);
410 buf[namelen] = 0; /* null-terminate the string */
411 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, buf, p);
412 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
413 if ((error = namei(&nd)) != 0)
414 return (error);
415 vp = nd.ni_vp;
416 if (vp != NULL) {
417 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
418 if (nd.ni_dvp == vp)
419 vrele(nd.ni_dvp);
420 else
421 vput(nd.ni_dvp);
422 vrele(vp);
423 return (EADDRINUSE);
424 }
425 VATTR_NULL(&vattr);
426 vattr.va_type = VSOCK;
427 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
428 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
429 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
430 if (error)
431 return (error);
432 vp = nd.ni_vp;
433 vp->v_socket = unp->unp_socket;
434 unp->unp_vnode = vp;
435 unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
436 VOP_UNLOCK(vp, 0, p);
437 return (0);
438 }
439
440 int
unp_connect(struct socket * so,struct mbuf * nam,struct proc * p)441 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
442 {
443 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
444 struct vnode *vp;
445 struct socket *so2, *so3;
446 struct unpcb *unp2, *unp3;
447 int error;
448 struct nameidata nd;
449
450 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
451 if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) { /* XXX */
452 if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
453 return (EMSGSIZE);
454 } else
455 *(mtod(nam, caddr_t) + nam->m_len) = 0;
456 if ((error = namei(&nd)) != 0)
457 return (error);
458 vp = nd.ni_vp;
459 if (vp->v_type != VSOCK) {
460 error = ENOTSOCK;
461 goto bad;
462 }
463 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
464 goto bad;
465 so2 = vp->v_socket;
466 if (so2 == NULL) {
467 error = ECONNREFUSED;
468 goto bad;
469 }
470 if (so->so_type != so2->so_type) {
471 error = EPROTOTYPE;
472 goto bad;
473 }
474 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
475 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
476 (so3 = sonewconn(so2, 0)) == 0) {
477 error = ECONNREFUSED;
478 goto bad;
479 }
480 unp2 = sotounpcb(so2);
481 unp3 = sotounpcb(so3);
482 if (unp2->unp_addr)
483 unp3->unp_addr =
484 m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
485 unp3->unp_connid.unp_euid = p->p_ucred->cr_uid;
486 unp3->unp_connid.unp_egid = p->p_ucred->cr_gid;
487 unp3->unp_flags |= UNP_FEIDS;
488 so2 = so3;
489 }
490 error = unp_connect2(so, so2);
491 bad:
492 vput(vp);
493 return (error);
494 }
495
496 int
unp_connect2(struct socket * so,struct socket * so2)497 unp_connect2(struct socket *so, struct socket *so2)
498 {
499 struct unpcb *unp = sotounpcb(so);
500 struct unpcb *unp2;
501
502 if (so2->so_type != so->so_type)
503 return (EPROTOTYPE);
504 unp2 = sotounpcb(so2);
505 unp->unp_conn = unp2;
506 switch (so->so_type) {
507
508 case SOCK_DGRAM:
509 unp->unp_nextref = unp2->unp_refs;
510 unp2->unp_refs = unp;
511 soisconnected(so);
512 break;
513
514 case SOCK_STREAM:
515 unp2->unp_conn = unp;
516 soisconnected(so);
517 soisconnected(so2);
518 break;
519
520 default:
521 panic("unp_connect2");
522 }
523 return (0);
524 }
525
526 void
unp_disconnect(struct unpcb * unp)527 unp_disconnect(struct unpcb *unp)
528 {
529 struct unpcb *unp2 = unp->unp_conn;
530
531 if (unp2 == NULL)
532 return;
533 unp->unp_conn = NULL;
534 switch (unp->unp_socket->so_type) {
535
536 case SOCK_DGRAM:
537 if (unp2->unp_refs == unp)
538 unp2->unp_refs = unp->unp_nextref;
539 else {
540 unp2 = unp2->unp_refs;
541 for (;;) {
542 if (unp2 == NULL)
543 panic("unp_disconnect");
544 if (unp2->unp_nextref == unp)
545 break;
546 unp2 = unp2->unp_nextref;
547 }
548 unp2->unp_nextref = unp->unp_nextref;
549 }
550 unp->unp_nextref = NULL;
551 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
552 break;
553
554 case SOCK_STREAM:
555 soisdisconnected(unp->unp_socket);
556 unp2->unp_conn = NULL;
557 soisdisconnected(unp2->unp_socket);
558 break;
559 }
560 }
561
562 #ifdef notdef
unp_abort(struct unpcb * unp)563 unp_abort(struct unpcb *unp)
564 {
565 unp_detach(unp);
566 }
567 #endif
568
569 void
unp_shutdown(struct unpcb * unp)570 unp_shutdown(struct unpcb *unp)
571 {
572 struct socket *so;
573
574 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
575 (so = unp->unp_conn->unp_socket))
576 socantrcvmore(so);
577 }
578
579 void
unp_drop(struct unpcb * unp,int errno)580 unp_drop(struct unpcb *unp, int errno)
581 {
582 struct socket *so = unp->unp_socket;
583
584 so->so_error = errno;
585 unp_disconnect(unp);
586 if (so->so_head) {
587 so->so_pcb = NULL;
588 sofree(so);
589 m_freem(unp->unp_addr);
590 free(unp, M_PCB);
591 }
592 }
593
594 #ifdef notdef
unp_drain(void)595 unp_drain(void)
596 {
597
598 }
599 #endif
600
601 int
unp_externalize(struct mbuf * rights)602 unp_externalize(struct mbuf *rights)
603 {
604 struct proc *p = curproc; /* XXX */
605 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
606 int i, *fdp;
607 struct file **rp;
608 struct file *fp;
609 int nfds, error = 0;
610
611 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
612 sizeof(struct file *);
613 rp = (struct file **)CMSG_DATA(cm);
614
615 fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK);
616
617 #ifdef notyet
618 /* Make sure the recipient should be able to see the descriptors.. */
619 if (p->p_cwdi->cwdi_rdir != NULL) {
620 rp = (struct file **)CMSG_DATA(cm);
621 for (i = 0; i < nfds; i++) {
622 fp = *rp++;
623 /*
624 * If we are in a chroot'ed directory, and
625 * someone wants to pass us a directory, make
626 * sure it's inside the subtree we're allowed
627 * to access.
628 */
629 if (fp->f_type == DTYPE_VNODE) {
630 struct vnode *vp = (struct vnode *)fp->f_data;
631 if ((vp->v_type == VDIR) &&
632 !vn_isunder(vp, p->p_cwdi->cwdi_rdir, p)) {
633 error = EPERM;
634 break;
635 }
636 }
637 }
638 }
639 #endif
640
641 restart:
642 fdplock(p->p_fd, p);
643 if (error != 0) {
644 rp = ((struct file **)CMSG_DATA(cm));
645 for (i = 0; i < nfds; i++) {
646 fp = *rp;
647 /*
648 * zero the pointer before calling unp_discard,
649 * since it may end up in unp_gc()..
650 */
651 *rp++ = NULL;
652 unp_discard(fp);
653 }
654 goto out;
655 }
656
657 /*
658 * First loop -- allocate file descriptor table slots for the
659 * new descriptors.
660 */
661 rp = ((struct file **)CMSG_DATA(cm));
662 for (i = 0; i < nfds; i++) {
663 bcopy(rp, &fp, sizeof(fp));
664 rp++;
665 if ((error = fdalloc(p, 0, &fdp[i])) != 0) {
666 /*
667 * Back out what we've done so far.
668 */
669 for (--i; i >= 0; i--)
670 fdremove(p->p_fd, fdp[i]);
671
672 if (error == ENOSPC) {
673 fdexpand(p);
674 error = 0;
675 } else {
676 /*
677 * This is the error that has historically
678 * been returned, and some callers may
679 * expect it.
680 */
681 error = EMSGSIZE;
682 }
683 fdpunlock(p->p_fd);
684 goto restart;
685 }
686
687 /*
688 * Make the slot reference the descriptor so that
689 * fdalloc() works properly.. We finalize it all
690 * in the loop below.
691 */
692 p->p_fd->fd_ofiles[fdp[i]] = fp;
693 }
694
695 /*
696 * Now that adding them has succeeded, update all of the
697 * descriptor passing state.
698 */
699 rp = (struct file **)CMSG_DATA(cm);
700 for (i = 0; i < nfds; i++) {
701 fp = *rp++;
702 fp->f_msgcount--;
703 unp_rights--;
704 }
705
706 /*
707 * Copy temporary array to message and adjust length, in case of
708 * transition from large struct file pointers to ints.
709 */
710 memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
711 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
712 rights->m_len = CMSG_SPACE(nfds * sizeof(int));
713 out:
714 fdpunlock(p->p_fd);
715 free(fdp, M_TEMP);
716 return (error);
717 }
718
719 int
unp_internalize(struct mbuf * control,struct proc * p)720 unp_internalize(struct mbuf *control, struct proc *p)
721 {
722 struct filedesc *fdp = p->p_fd;
723 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
724 struct file **rp, *fp;
725 int i, error;
726 int nfds, *ip, fd, neededspace;
727
728 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
729 cm->cmsg_len != control->m_len)
730 return (EINVAL);
731 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
732
733 /* Make sure we have room for the struct file pointers */
734 morespace:
735 neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
736 control->m_len;
737 if (neededspace > M_TRAILINGSPACE(control)) {
738 /* if we already have a cluster, the message is just too big */
739 if (control->m_flags & M_EXT)
740 return (E2BIG);
741
742 /* allocate a cluster and try again */
743 MCLGET(control, M_WAIT);
744 if ((control->m_flags & M_EXT) == 0)
745 return (ENOBUFS); /* allocation failed */
746
747 /* copy the data to the cluster */
748 memcpy(mtod(control, char *), cm, cm->cmsg_len);
749 cm = mtod(control, struct cmsghdr *);
750 goto morespace;
751 }
752
753 /* adjust message & mbuf to note amount of space actually used. */
754 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *));
755 control->m_len = CMSG_SPACE(nfds * sizeof(struct file *));
756
757 ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
758 rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1;
759 for (i = 0; i < nfds; i++) {
760 bcopy(ip, &fd, sizeof fd);
761 ip--;
762 if ((fp = fd_getfile(fdp, fd)) == NULL) {
763 error = EBADF;
764 goto fail;
765 }
766 if (fp->f_count == LONG_MAX-2 ||
767 fp->f_msgcount == LONG_MAX-2) {
768 error = EDEADLK;
769 goto fail;
770 }
771 bcopy(&fp, rp, sizeof fp);
772 rp--;
773 fp->f_count++;
774 fp->f_msgcount++;
775 unp_rights++;
776 }
777 return (0);
778 fail:
779 /* Back out what we just did. */
780 for ( ; i > 0; i--) {
781 bcopy(rp, &fp, sizeof(fp));
782 rp++;
783 fp->f_count--;
784 fp->f_msgcount--;
785 unp_rights--;
786 }
787
788 return (error);
789 }
790
791 int unp_defer, unp_gcing;
792 extern struct domain unixdomain;
793
794 void
unp_gc(void)795 unp_gc(void)
796 {
797 struct file *fp, *nextfp;
798 struct socket *so;
799 struct file **extra_ref, **fpp;
800 int nunref, i;
801
802 if (unp_gcing)
803 return;
804 unp_gcing = 1;
805 unp_defer = 0;
806 LIST_FOREACH(fp, &filehead, f_list)
807 fp->f_flag &= ~(FMARK|FDEFER);
808 do {
809 LIST_FOREACH(fp, &filehead, f_list) {
810 if (fp->f_flag & FDEFER) {
811 fp->f_flag &= ~FDEFER;
812 unp_defer--;
813 } else {
814 if (fp->f_count == 0)
815 continue;
816 if (fp->f_flag & FMARK)
817 continue;
818 if (fp->f_count == fp->f_msgcount)
819 continue;
820 }
821 fp->f_flag |= FMARK;
822
823 if (fp->f_type != DTYPE_SOCKET ||
824 (so = (struct socket *)fp->f_data) == NULL)
825 continue;
826 if (so->so_proto->pr_domain != &unixdomain ||
827 (so->so_proto->pr_flags&PR_RIGHTS) == 0)
828 continue;
829 #ifdef notdef
830 if (so->so_rcv.sb_flags & SB_LOCK) {
831 /*
832 * This is problematical; it's not clear
833 * we need to wait for the sockbuf to be
834 * unlocked (on a uniprocessor, at least),
835 * and it's also not clear what to do
836 * if sbwait returns an error due to receipt
837 * of a signal. If sbwait does return
838 * an error, we'll go into an infinite
839 * loop. Delete all of this for now.
840 */
841 (void) sbwait(&so->so_rcv);
842 goto restart;
843 }
844 #endif
845 unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
846 }
847 } while (unp_defer);
848 /*
849 * We grab an extra reference to each of the file table entries
850 * that are not otherwise accessible and then free the rights
851 * that are stored in messages on them.
852 *
853 * The bug in the original code is a little tricky, so I'll describe
854 * what's wrong with it here.
855 *
856 * It is incorrect to simply unp_discard each entry for f_msgcount
857 * times -- consider the case of sockets A and B that contain
858 * references to each other. On a last close of some other socket,
859 * we trigger a gc since the number of outstanding rights (unp_rights)
860 * is non-zero. If during the sweep phase the gc code un_discards,
861 * we end up doing a (full) closef on the descriptor. A closef on A
862 * results in the following chain. Closef calls soo_close, which
863 * calls soclose. Soclose calls first (through the switch
864 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
865 * returns because the previous instance had set unp_gcing, and
866 * we return all the way back to soclose, which marks the socket
867 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
868 * to free up the rights that are queued in messages on the socket A,
869 * i.e., the reference on B. The sorflush calls via the dom_dispose
870 * switch unp_dispose, which unp_scans with unp_discard. This second
871 * instance of unp_discard just calls closef on B.
872 *
873 * Well, a similar chain occurs on B, resulting in a sorflush on B,
874 * which results in another closef on A. Unfortunately, A is already
875 * being closed, and the descriptor has already been marked with
876 * SS_NOFDREF, and soclose panics at this point.
877 *
878 * Here, we first take an extra reference to each inaccessible
879 * descriptor. Then, we call sorflush ourself, since we know
880 * it is a Unix domain socket anyhow. After we destroy all the
881 * rights carried in messages, we do a last closef to get rid
882 * of our extra reference. This is the last close, and the
883 * unp_detach etc will shut down the socket.
884 *
885 * 91/09/19, bsy@cs.cmu.edu
886 */
887 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
888 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref;
889 fp != NULL; fp = nextfp) {
890 nextfp = LIST_NEXT(fp, f_list);
891 if (fp->f_count == 0)
892 continue;
893 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
894 *fpp++ = fp;
895 nunref++;
896 FREF(fp);
897 fp->f_count++;
898 }
899 }
900 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
901 if ((*fpp)->f_type == DTYPE_SOCKET && (*fpp)->f_data != NULL)
902 sorflush((struct socket *)(*fpp)->f_data);
903 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
904 (void) closef(*fpp, NULL);
905 free((caddr_t)extra_ref, M_FILE);
906 unp_gcing = 0;
907 }
908
909 void
unp_dispose(struct mbuf * m)910 unp_dispose(struct mbuf *m)
911 {
912
913 if (m)
914 unp_scan(m, unp_discard, 1);
915 }
916
917 void
unp_scan(struct mbuf * m0,void (* op)(struct file *),int discard)918 unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard)
919 {
920 struct mbuf *m;
921 struct file **rp, *fp;
922 struct cmsghdr *cm;
923 int i;
924 int qfds;
925
926 while (m0) {
927 for (m = m0; m; m = m->m_next) {
928 if (m->m_type == MT_CONTROL &&
929 m->m_len >= sizeof(*cm)) {
930 cm = mtod(m, struct cmsghdr *);
931 if (cm->cmsg_level != SOL_SOCKET ||
932 cm->cmsg_type != SCM_RIGHTS)
933 continue;
934 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
935 / sizeof(struct file *);
936 rp = (struct file **)CMSG_DATA(cm);
937 for (i = 0; i < qfds; i++) {
938 fp = *rp;
939 if (discard)
940 *rp = 0;
941 (*op)(fp);
942 rp++;
943 }
944 break; /* XXX, but saves time */
945 }
946 }
947 m0 = m0->m_nextpkt;
948 }
949 }
950
951 void
unp_mark(struct file * fp)952 unp_mark(struct file *fp)
953 {
954 if (fp == NULL)
955 return;
956
957 if (fp->f_flag & FMARK)
958 return;
959
960 if (fp->f_flag & FDEFER)
961 return;
962
963 if (fp->f_type == DTYPE_SOCKET) {
964 unp_defer++;
965 fp->f_flag |= FDEFER;
966 } else {
967 fp->f_flag |= FMARK;
968 }
969 }
970
971 void
unp_discard(struct file * fp)972 unp_discard(struct file *fp)
973 {
974
975 if (fp == NULL)
976 return;
977 FREF(fp);
978 fp->f_msgcount--;
979 unp_rights--;
980 (void) closef(fp, NULL);
981 }
982