1 /** $MirOS: src/sys/kern/sys_generic.c,v 1.2 2005/03/06 21:28:03 tg Exp $ */
2 /* $OpenBSD: sys_generic.c,v 1.47 2003/12/10 23:10:08 millert Exp $ */
3 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */
4
5 /*
6 * Copyright (c) 1996 Theo de Raadt
7 * Copyright (c) 1982, 1986, 1989, 1993
8 * The Regents of the University of California. All rights reserved.
9 * (c) UNIX System Laboratories, Inc.
10 * All or some portions of this file are derived from material licensed
11 * to the University of California by American Telephone and Telegraph
12 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
13 * the permission of UNIX System Laboratories, Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94
40 */
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/filedesc.h>
45 #include <sys/ioctl.h>
46 #include <sys/file.h>
47 #include <sys/proc.h>
48 #include <sys/resourcevar.h>
49 #include <sys/socketvar.h>
50 #include <sys/signalvar.h>
51 #include <sys/uio.h>
52 #include <sys/kernel.h>
53 #include <sys/stat.h>
54 #include <sys/malloc.h>
55 #include <sys/poll.h>
56 #ifdef KTRACE
57 #include <sys/ktrace.h>
58 #endif
59
60 #include <sys/mount.h>
61 #include <sys/syscallargs.h>
62
63 #include <uvm/uvm_extern.h>
64
65 int selscan(struct proc *, fd_set *, fd_set *, int, register_t *);
66 int seltrue(dev_t, int, struct proc *);
67 void pollscan(struct proc *, struct pollfd *, u_int, register_t *);
68
69 /*
70 * Read system call.
71 */
72 /* ARGSUSED */
73 int
sys_read(p,v,retval)74 sys_read(p, v, retval)
75 struct proc *p;
76 void *v;
77 register_t *retval;
78 {
79 struct sys_read_args /* {
80 syscallarg(int) fd;
81 syscallarg(void *) buf;
82 syscallarg(size_t) nbyte;
83 } */ *uap = v;
84 int fd = SCARG(uap, fd);
85 struct file *fp;
86 struct filedesc *fdp = p->p_fd;
87
88 if ((fp = fd_getfile(fdp, fd)) == NULL)
89 return (EBADF);
90 if ((fp->f_flag & FREAD) == 0)
91 return (EBADF);
92
93 FREF(fp);
94
95 /* dofileread() will FRELE the descriptor for us */
96 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
97 &fp->f_offset, retval));
98 }
99
100 int
dofileread(p,fd,fp,buf,nbyte,offset,retval)101 dofileread(p, fd, fp, buf, nbyte, offset, retval)
102 struct proc *p;
103 int fd;
104 struct file *fp;
105 void *buf;
106 size_t nbyte;
107 off_t *offset;
108 register_t *retval;
109 {
110 struct uio auio;
111 struct iovec aiov;
112 long cnt, error = 0;
113 #ifdef KTRACE
114 struct iovec ktriov;
115 #endif
116
117 aiov.iov_base = buf;
118 aiov.iov_len = nbyte;
119 auio.uio_iov = &aiov;
120 auio.uio_iovcnt = 1;
121 auio.uio_resid = nbyte;
122 auio.uio_rw = UIO_READ;
123 auio.uio_segflg = UIO_USERSPACE;
124 auio.uio_procp = p;
125
126 /*
127 * Reads return ssize_t because -1 is returned on error. Therefore
128 * we must restrict the length to SSIZE_MAX to avoid garbage return
129 * values.
130 */
131 if (auio.uio_resid > SSIZE_MAX) {
132 error = EINVAL;
133 goto out;
134 }
135
136 #ifdef KTRACE
137 /*
138 * if tracing, save a copy of iovec
139 */
140 if (KTRPOINT(p, KTR_GENIO))
141 ktriov = aiov;
142 #endif
143 cnt = auio.uio_resid;
144 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred);
145 if (error)
146 if (auio.uio_resid != cnt && (error == ERESTART ||
147 error == EINTR || error == EWOULDBLOCK))
148 error = 0;
149 cnt -= auio.uio_resid;
150 #ifdef KTRACE
151 if (KTRPOINT(p, KTR_GENIO) && error == 0)
152 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
153 #endif
154 *retval = cnt;
155 out:
156 FRELE(fp);
157 return (error);
158 }
159
160 /*
161 * Scatter read system call.
162 */
163 int
sys_readv(p,v,retval)164 sys_readv(p, v, retval)
165 struct proc *p;
166 void *v;
167 register_t *retval;
168 {
169 struct sys_readv_args /* {
170 syscallarg(int) fd;
171 syscallarg(const struct iovec *) iovp;
172 syscallarg(int) iovcnt;
173 } */ *uap = v;
174 int fd = SCARG(uap, fd);
175 struct file *fp;
176 struct filedesc *fdp = p->p_fd;
177
178 if ((fp = fd_getfile(fdp, fd)) == NULL)
179 return (EBADF);
180 if ((fp->f_flag & FREAD) == 0)
181 return (EBADF);
182
183 FREF(fp);
184
185 /* dofilereadv() will FRELE the descriptor for us */
186 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
187 &fp->f_offset, retval));
188 }
189
190 int
dofilereadv(p,fd,fp,iovp,iovcnt,offset,retval)191 dofilereadv(p, fd, fp, iovp, iovcnt, offset, retval)
192 struct proc *p;
193 int fd;
194 struct file *fp;
195 const struct iovec *iovp;
196 int iovcnt;
197 off_t *offset;
198 register_t *retval;
199 {
200 struct uio auio;
201 struct iovec *iov;
202 struct iovec *needfree;
203 struct iovec aiov[UIO_SMALLIOV];
204 long i, cnt, error = 0;
205 u_int iovlen;
206 #ifdef KTRACE
207 struct iovec *ktriov = NULL;
208 #endif
209
210 /* note: can't use iovlen until iovcnt is validated */
211 iovlen = iovcnt * sizeof(struct iovec);
212 if ((u_int)iovcnt > UIO_SMALLIOV) {
213 if ((u_int)iovcnt > IOV_MAX) {
214 error = EINVAL;
215 goto out;
216 }
217 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK);
218 } else if ((u_int)iovcnt > 0) {
219 iov = aiov;
220 needfree = NULL;
221 } else {
222 error = EINVAL;
223 goto out;
224 }
225
226 auio.uio_iov = iov;
227 auio.uio_iovcnt = iovcnt;
228 auio.uio_rw = UIO_READ;
229 auio.uio_segflg = UIO_USERSPACE;
230 auio.uio_procp = p;
231 error = copyin(iovp, iov, iovlen);
232 if (error)
233 goto done;
234 auio.uio_resid = 0;
235 for (i = 0; i < iovcnt; i++) {
236 auio.uio_resid += iov->iov_len;
237 /*
238 * Reads return ssize_t because -1 is returned on error.
239 * Therefore we must restrict the length to SSIZE_MAX to
240 * avoid garbage return values.
241 */
242 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
243 error = EINVAL;
244 goto done;
245 }
246 iov++;
247 }
248 #ifdef KTRACE
249 /*
250 * if tracing, save a copy of iovec
251 */
252 if (KTRPOINT(p, KTR_GENIO)) {
253 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
254 bcopy(auio.uio_iov, ktriov, iovlen);
255 }
256 #endif
257 cnt = auio.uio_resid;
258 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred);
259 if (error)
260 if (auio.uio_resid != cnt && (error == ERESTART ||
261 error == EINTR || error == EWOULDBLOCK))
262 error = 0;
263 cnt -= auio.uio_resid;
264 #ifdef KTRACE
265 if (ktriov != NULL) {
266 if (error == 0)
267 ktrgenio(p, fd, UIO_READ, ktriov, cnt,
268 error);
269 free(ktriov, M_TEMP);
270 }
271 #endif
272 *retval = cnt;
273 done:
274 if (needfree)
275 free(needfree, M_IOV);
276 out:
277 FRELE(fp);
278 return (error);
279 }
280
281 /*
282 * Write system call
283 */
284 int
sys_write(p,v,retval)285 sys_write(p, v, retval)
286 struct proc *p;
287 void *v;
288 register_t *retval;
289 {
290 struct sys_write_args /* {
291 syscallarg(int) fd;
292 syscallarg(const void *) buf;
293 syscallarg(size_t) nbyte;
294 } */ *uap = v;
295 int fd = SCARG(uap, fd);
296 struct file *fp;
297 struct filedesc *fdp = p->p_fd;
298
299 if ((fp = fd_getfile(fdp, fd)) == NULL)
300 return (EBADF);
301 if ((fp->f_flag & FWRITE) == 0)
302 return (EBADF);
303
304 FREF(fp);
305
306 /* dofilewrite() will FRELE the descriptor for us */
307 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
308 &fp->f_offset, retval));
309 }
310
311 int
dofilewrite(p,fd,fp,buf,nbyte,offset,retval)312 dofilewrite(p, fd, fp, buf, nbyte, offset, retval)
313 struct proc *p;
314 int fd;
315 struct file *fp;
316 const void *buf;
317 size_t nbyte;
318 off_t *offset;
319 register_t *retval;
320 {
321 struct uio auio;
322 struct iovec aiov;
323 long cnt, error = 0;
324 #ifdef KTRACE
325 struct iovec ktriov;
326 #endif
327
328 aiov.iov_base = (void *)buf; /* XXX kills const */
329 aiov.iov_len = nbyte;
330 auio.uio_iov = &aiov;
331 auio.uio_iovcnt = 1;
332 auio.uio_resid = nbyte;
333 auio.uio_rw = UIO_WRITE;
334 auio.uio_segflg = UIO_USERSPACE;
335 auio.uio_procp = p;
336
337 /*
338 * Writes return ssize_t because -1 is returned on error. Therefore
339 * we must restrict the length to SSIZE_MAX to avoid garbage return
340 * values.
341 */
342 if (auio.uio_resid > SSIZE_MAX) {
343 error = EINVAL;
344 goto out;
345 }
346
347 #ifdef KTRACE
348 /*
349 * if tracing, save a copy of iovec
350 */
351 if (KTRPOINT(p, KTR_GENIO))
352 ktriov = aiov;
353 #endif
354 cnt = auio.uio_resid;
355 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred);
356 if (error) {
357 if (auio.uio_resid != cnt && (error == ERESTART ||
358 error == EINTR || error == EWOULDBLOCK))
359 error = 0;
360 if (error == EPIPE)
361 psignal(p, SIGPIPE);
362 }
363 cnt -= auio.uio_resid;
364 #ifdef KTRACE
365 if (KTRPOINT(p, KTR_GENIO) && error == 0)
366 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
367 #endif
368 *retval = cnt;
369 out:
370 FRELE(fp);
371 return (error);
372 }
373
374 /*
375 * Gather write system call
376 */
377 int
sys_writev(p,v,retval)378 sys_writev(p, v, retval)
379 struct proc *p;
380 void *v;
381 register_t *retval;
382 {
383 struct sys_writev_args /* {
384 syscallarg(int) fd;
385 syscallarg(const struct iovec *) iovp;
386 syscallarg(int) iovcnt;
387 } */ *uap = v;
388 int fd = SCARG(uap, fd);
389 struct file *fp;
390 struct filedesc *fdp = p->p_fd;
391
392 if ((fp = fd_getfile(fdp, fd)) == NULL)
393 return (EBADF);
394 if ((fp->f_flag & FWRITE) == 0)
395 return (EBADF);
396
397 FREF(fp);
398
399 /* dofilewritev() will FRELE the descriptor for us */
400 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
401 &fp->f_offset, retval));
402 }
403
404 int
dofilewritev(p,fd,fp,iovp,iovcnt,offset,retval)405 dofilewritev(p, fd, fp, iovp, iovcnt, offset, retval)
406 struct proc *p;
407 int fd;
408 struct file *fp;
409 const struct iovec *iovp;
410 int iovcnt;
411 off_t *offset;
412 register_t *retval;
413 {
414 struct uio auio;
415 struct iovec *iov;
416 struct iovec *needfree;
417 struct iovec aiov[UIO_SMALLIOV];
418 long i, cnt, error = 0;
419 u_int iovlen;
420 #ifdef KTRACE
421 struct iovec *ktriov = NULL;
422 #endif
423
424 /* note: can't use iovlen until iovcnt is validated */
425 iovlen = iovcnt * sizeof(struct iovec);
426 if ((u_int)iovcnt > UIO_SMALLIOV) {
427 if ((u_int)iovcnt > IOV_MAX) {
428 error = EINVAL;
429 goto out;
430 }
431 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK);
432 } else if ((u_int)iovcnt > 0) {
433 iov = aiov;
434 needfree = NULL;
435 } else {
436 error = EINVAL;
437 goto out;
438 }
439
440 auio.uio_iov = iov;
441 auio.uio_iovcnt = iovcnt;
442 auio.uio_rw = UIO_WRITE;
443 auio.uio_segflg = UIO_USERSPACE;
444 auio.uio_procp = p;
445 error = copyin(iovp, iov, iovlen);
446 if (error)
447 goto done;
448 auio.uio_resid = 0;
449 for (i = 0; i < iovcnt; i++) {
450 auio.uio_resid += iov->iov_len;
451 /*
452 * Writes return ssize_t because -1 is returned on error.
453 * Therefore we must restrict the length to SSIZE_MAX to
454 * avoid garbage return values.
455 */
456 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
457 error = EINVAL;
458 goto done;
459 }
460 iov++;
461 }
462 #ifdef KTRACE
463 /*
464 * if tracing, save a copy of iovec
465 */
466 if (KTRPOINT(p, KTR_GENIO)) {
467 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
468 bcopy(auio.uio_iov, ktriov, iovlen);
469 }
470 #endif
471 cnt = auio.uio_resid;
472 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred);
473 if (error) {
474 if (auio.uio_resid != cnt && (error == ERESTART ||
475 error == EINTR || error == EWOULDBLOCK))
476 error = 0;
477 if (error == EPIPE)
478 psignal(p, SIGPIPE);
479 }
480 cnt -= auio.uio_resid;
481 #ifdef KTRACE
482 if (ktriov != NULL) {
483 if (error == 0)
484 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt,
485 error);
486 free(ktriov, M_TEMP);
487 }
488 #endif
489 *retval = cnt;
490 done:
491 if (needfree)
492 free(needfree, M_IOV);
493 out:
494 FRELE(fp);
495 return (error);
496 }
497
498 /*
499 * Ioctl system call
500 */
501 /* ARGSUSED */
502 int
sys_ioctl(p,v,retval)503 sys_ioctl(p, v, retval)
504 struct proc *p;
505 void *v;
506 register_t *retval;
507 {
508 struct sys_ioctl_args /* {
509 syscallarg(int) fd;
510 syscallarg(u_long) com;
511 syscallarg(void *) data;
512 } */ *uap = v;
513 struct file *fp;
514 struct filedesc *fdp;
515 u_long com;
516 int error;
517 u_int size;
518 caddr_t data, memp;
519 int tmp;
520 #define STK_PARAMS 128
521 char stkbuf[STK_PARAMS];
522
523 fdp = p->p_fd;
524 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
525 return (EBADF);
526
527 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
528 return (EBADF);
529
530 switch (com = SCARG(uap, com)) {
531 case FIONCLEX:
532 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
533 return (0);
534 case FIOCLEX:
535 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
536 return (0);
537 }
538
539 /*
540 * Interpret high order word to find amount of data to be
541 * copied to/from the user's address space.
542 */
543 size = IOCPARM_LEN(com);
544 if (size > IOCPARM_MAX)
545 return (ENOTTY);
546 FREF(fp);
547 memp = NULL;
548 if (size > sizeof (stkbuf)) {
549 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
550 data = memp;
551 } else
552 data = stkbuf;
553 if (com&IOC_IN) {
554 if (size) {
555 error = copyin(SCARG(uap, data), data, (u_int)size);
556 if (error) {
557 goto out;
558 }
559 } else
560 *(caddr_t *)data = SCARG(uap, data);
561 } else if ((com&IOC_OUT) && size)
562 /*
563 * Zero the buffer so the user always
564 * gets back something deterministic.
565 */
566 bzero(data, size);
567 else if (com&IOC_VOID)
568 *(caddr_t *)data = SCARG(uap, data);
569
570 switch (com) {
571
572 case FIONBIO:
573 if ((tmp = *(int *)data) != 0)
574 fp->f_flag |= FNONBLOCK;
575 else
576 fp->f_flag &= ~FNONBLOCK;
577 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
578 break;
579
580 case FIOASYNC:
581 if ((tmp = *(int *)data) != 0)
582 fp->f_flag |= FASYNC;
583 else
584 fp->f_flag &= ~FASYNC;
585 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
586 break;
587
588 case FIOSETOWN:
589 tmp = *(int *)data;
590 if (fp->f_type == DTYPE_SOCKET) {
591 struct socket *so = (struct socket *)fp->f_data;
592
593 so->so_pgid = tmp;
594 so->so_siguid = p->p_cred->p_ruid;
595 so->so_sigeuid = p->p_ucred->cr_uid;
596 error = 0;
597 break;
598 }
599 if (tmp <= 0) {
600 tmp = -tmp;
601 } else {
602 struct proc *p1 = pfind(tmp);
603 if (p1 == 0) {
604 error = ESRCH;
605 break;
606 }
607 tmp = p1->p_pgrp->pg_id;
608 }
609 error = (*fp->f_ops->fo_ioctl)
610 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
611 break;
612
613 case FIOGETOWN:
614 if (fp->f_type == DTYPE_SOCKET) {
615 error = 0;
616 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
617 break;
618 }
619 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
620 *(int *)data = -*(int *)data;
621 break;
622
623 default:
624 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
625 /*
626 * Copy any data to user, size was
627 * already set and checked above.
628 */
629 if (error == 0 && (com&IOC_OUT) && size)
630 error = copyout(data, SCARG(uap, data), (u_int)size);
631 break;
632 }
633 out:
634 FRELE(fp);
635 if (memp)
636 free(memp, M_IOCTLOPS);
637 return (error);
638 }
639
640 int selwait, nselcoll;
641
642 /*
643 * Select system call.
644 */
645 int
sys_select(struct proc * p,void * v,register_t * retval)646 sys_select(struct proc *p, void *v, register_t *retval)
647 {
648 struct sys_select_args /* {
649 syscallarg(int) nd;
650 syscallarg(fd_set *) in;
651 syscallarg(fd_set *) ou;
652 syscallarg(fd_set *) ex;
653 syscallarg(struct timeval *) tv;
654 } */ *uap = v;
655 fd_set bits[6], *pibits[3], *pobits[3];
656 struct timeval atv;
657 int s, ncoll, error = 0, timo = 0;
658 u_int nd, ni;
659
660 nd = SCARG(uap, nd);
661 if (nd > p->p_fd->fd_nfiles) {
662 /* forgiving; slightly wrong */
663 nd = p->p_fd->fd_nfiles;
664 }
665 ni = howmany(nd, NFDBITS) * sizeof(fd_mask);
666 if (nd > FD_SETSIZE) {
667 caddr_t mbits;
668
669 mbits = malloc(ni * 6, M_TEMP, M_WAITOK);
670 bzero(mbits, ni * 6);
671 pibits[0] = (fd_set *)&mbits[ni * 0];
672 pibits[1] = (fd_set *)&mbits[ni * 1];
673 pibits[2] = (fd_set *)&mbits[ni * 2];
674 pobits[0] = (fd_set *)&mbits[ni * 3];
675 pobits[1] = (fd_set *)&mbits[ni * 4];
676 pobits[2] = (fd_set *)&mbits[ni * 5];
677 } else {
678 bzero(bits, sizeof(bits));
679 pibits[0] = &bits[0];
680 pibits[1] = &bits[1];
681 pibits[2] = &bits[2];
682 pobits[0] = &bits[3];
683 pobits[1] = &bits[4];
684 pobits[2] = &bits[5];
685 }
686
687 #define getbits(name, x) \
688 if (SCARG(uap, name) && (error = copyin(SCARG(uap, name), \
689 pibits[x], ni))) \
690 goto done;
691 getbits(in, 0);
692 getbits(ou, 1);
693 getbits(ex, 2);
694 #undef getbits
695
696 if (SCARG(uap, tv)) {
697 error = copyin(SCARG(uap, tv), &atv, sizeof (atv));
698 if (error)
699 goto done;
700 if (itimerfix(&atv)) {
701 error = EINVAL;
702 goto done;
703 }
704 s = splclock();
705 timeradd(&atv, &time, &atv);
706 splx(s);
707 } else
708 timo = 0;
709 retry:
710 ncoll = nselcoll;
711 p->p_flag |= P_SELECT;
712 error = selscan(p, pibits[0], pobits[0], nd, retval);
713 if (error || *retval)
714 goto done;
715 if (SCARG(uap, tv)) {
716 /*
717 * We have to recalculate the timeout on every retry.
718 */
719 timo = hzto(&atv);
720 if (timo <= 0)
721 goto done;
722 }
723 s = splhigh();
724 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
725 splx(s);
726 goto retry;
727 }
728 p->p_flag &= ~P_SELECT;
729 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo);
730 splx(s);
731 if (error == 0)
732 goto retry;
733 done:
734 p->p_flag &= ~P_SELECT;
735 /* select is not restarted after signals... */
736 if (error == ERESTART)
737 error = EINTR;
738 if (error == EWOULDBLOCK)
739 error = 0;
740 #define putbits(name, x) \
741 if (SCARG(uap, name) && (error2 = copyout(pobits[x], \
742 SCARG(uap, name), ni))) \
743 error = error2;
744 if (error == 0) {
745 int error2;
746
747 putbits(in, 0);
748 putbits(ou, 1);
749 putbits(ex, 2);
750 #undef putbits
751 }
752
753 if (pibits[0] != &bits[0])
754 free(pibits[0], M_TEMP);
755 return (error);
756 }
757
758 int
selscan(p,ibits,obits,nfd,retval)759 selscan(p, ibits, obits, nfd, retval)
760 struct proc *p;
761 fd_set *ibits, *obits;
762 int nfd;
763 register_t *retval;
764 {
765 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits;
766 register struct filedesc *fdp = p->p_fd;
767 register int msk, i, j, fd;
768 register fd_mask bits;
769 struct file *fp;
770 int ni, n = 0;
771 static const int flag[3] = { POLLIN, POLLOUT, POLLPRI };
772
773 /*
774 * if nfd > FD_SETSIZE then the fd_set's contain nfd bits (rounded
775 * up to the next byte) otherwise the fd_set's are normal sized.
776 */
777 ni = sizeof(fd_set);
778 if (nfd > FD_SETSIZE)
779 ni = howmany(nfd, NFDBITS) * sizeof(fd_mask);
780
781 for (msk = 0; msk < 3; msk++) {
782 fd_set *pibits = (fd_set *)&cibits[msk*ni];
783 fd_set *pobits = (fd_set *)&cobits[msk*ni];
784
785 for (i = 0; i < nfd; i += NFDBITS) {
786 bits = pibits->fds_bits[i/NFDBITS];
787 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
788 bits &= ~(1 << j);
789 if ((fp = fd_getfile(fdp, fd)) == NULL)
790 return (EBADF);
791 FREF(fp);
792 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
793 FD_SET(fd, pobits);
794 n++;
795 }
796 FRELE(fp);
797 }
798 }
799 }
800 *retval = n;
801 return (0);
802 }
803
804 /*ARGSUSED*/
805 int
seltrue(dev,events,p)806 seltrue(dev, events, p)
807 dev_t dev;
808 int events;
809 struct proc *p;
810 {
811
812 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
813 }
814
815 /*
816 * Record a select request.
817 */
818 void
selrecord(selector,sip)819 selrecord(selector, sip)
820 struct proc *selector;
821 struct selinfo *sip;
822 {
823 struct proc *p;
824 pid_t mypid;
825
826 mypid = selector->p_pid;
827 if (sip->si_selpid == mypid)
828 return;
829 if (sip->si_selpid && (p = pfind(sip->si_selpid)) &&
830 p->p_wchan == (caddr_t)&selwait)
831 sip->si_flags |= SI_COLL;
832 else
833 sip->si_selpid = mypid;
834 }
835
836 /*
837 * Do a wakeup when a selectable event occurs.
838 */
839 void
selwakeup(sip)840 selwakeup(sip)
841 register struct selinfo *sip;
842 {
843 register struct proc *p;
844 int s;
845
846 if (sip->si_selpid == 0)
847 return;
848 if (sip->si_flags & SI_COLL) {
849 nselcoll++;
850 sip->si_flags &= ~SI_COLL;
851 wakeup(&selwait);
852 }
853 p = pfind(sip->si_selpid);
854 sip->si_selpid = 0;
855 if (p != NULL) {
856 s = splhigh();
857 if (p->p_wchan == (caddr_t)&selwait) {
858 if (p->p_stat == SSLEEP)
859 setrunnable(p);
860 else
861 unsleep(p);
862 } else if (p->p_flag & P_SELECT)
863 p->p_flag &= ~P_SELECT;
864 splx(s);
865 }
866 }
867
868 void
pollscan(p,pl,nfd,retval)869 pollscan(p, pl, nfd, retval)
870 struct proc *p;
871 struct pollfd *pl;
872 u_int nfd;
873 register_t *retval;
874 {
875 struct filedesc *fdp = p->p_fd;
876 struct file *fp;
877 u_int i;
878 int n = 0;
879
880 for (i = 0; i < nfd; i++, pl++) {
881 /* Check the file descriptor. */
882 if (pl->fd < 0) {
883 pl->revents = 0;
884 continue;
885 }
886 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) {
887 pl->revents = POLLNVAL;
888 n++;
889 continue;
890 }
891 FREF(fp);
892 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p);
893 FRELE(fp);
894 if (pl->revents != 0)
895 n++;
896 }
897 *retval = n;
898 }
899
900 /*
901 * We are using the same mechanism as select only we encode/decode args
902 * differently.
903 */
904 int
sys_poll(struct proc * p,void * v,register_t * retval)905 sys_poll(struct proc *p, void *v, register_t *retval)
906 {
907 struct sys_poll_args /* {
908 syscallarg(struct pollfd *) fds;
909 syscallarg(u_int) nfds;
910 syscallarg(int) timeout;
911 } */ *uap = v;
912 size_t sz;
913 struct pollfd pfds[4], *pl = pfds;
914 int msec = SCARG(uap, timeout);
915 struct timeval atv;
916 int timo = 0, ncoll, i, s, error;
917 extern int nselcoll, selwait;
918 u_int nfds = SCARG(uap, nfds);
919
920 /* Standards say no more than MAX_OPEN; this is possibly better. */
921 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles))
922 return (EINVAL);
923
924 sz = sizeof(struct pollfd) * nfds;
925
926 /* optimize for the default case, of a small nfds value */
927 if (sz > sizeof(pfds))
928 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK);
929
930 if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0)
931 goto bad;
932
933 for (i = 0; i < nfds; i++)
934 pl[i].revents = 0;
935
936 if (msec != INFTIM) {
937 atv.tv_sec = msec / 1000;
938 atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000;
939
940 if (itimerfix(&atv)) {
941 error = EINVAL;
942 goto done;
943 }
944 s = splclock();
945 timeradd(&atv, &time, &atv);
946 splx(s);
947 } else
948 timo = 0;
949
950 retry:
951 ncoll = nselcoll;
952 p->p_flag |= P_SELECT;
953 pollscan(p, pl, nfds, retval);
954 if (*retval)
955 goto done;
956 if (msec != INFTIM) {
957 /*
958 * We have to recalculate the timeout on every retry.
959 */
960 timo = hzto(&atv);
961 if (timo <= 0)
962 goto done;
963 }
964 s = splhigh();
965 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
966 splx(s);
967 goto retry;
968 }
969 p->p_flag &= ~P_SELECT;
970 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo);
971 splx(s);
972 if (error == 0)
973 goto retry;
974
975 done:
976 p->p_flag &= ~P_SELECT;
977 /*
978 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is
979 * ignored (since the whole point is to see what would block).
980 */
981 switch (error) {
982 case ERESTART:
983 error = EINTR;
984 break;
985 case EWOULDBLOCK:
986 case 0:
987 error = copyout(pl, SCARG(uap, fds), sz);
988 break;
989 }
990 bad:
991 if (pl != pfds)
992 free(pl, M_TEMP);
993 return (error);
994 }
995