1 /** $MirOS: src/sys/kern/kern_descrip.c,v 1.3 2006/01/31 10:27:03 tg Exp $ */
2 /* $OpenBSD: kern_descrip.c,v 1.68 2004/01/12 18:06:51 tedu Exp $ */
3 /* $NetBSD: kern_descrip.c,v 1.42 1996/03/30 22:24:38 christos Exp $ */
4
5 /*
6 * Copyright (c) 1982, 1986, 1989, 1991, 1993
7 * The Regents of the University of California. All rights reserved.
8 * (c) UNIX System Laboratories, Inc.
9 * All or some portions of this file are derived from material licensed
10 * to the University of California by American Telephone and Telegraph
11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
12 * the permission of UNIX System Laboratories, Inc.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
39 */
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/filedesc.h>
44 #include <sys/kernel.h>
45 #include <sys/vnode.h>
46 #include <sys/proc.h>
47 #include <sys/file.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/stat.h>
51 #include <sys/ioctl.h>
52 #include <sys/fcntl.h>
53 #include <sys/malloc.h>
54 #include <sys/syslog.h>
55 #include <sys/ucred.h>
56 #include <sys/unistd.h>
57 #include <sys/resourcevar.h>
58 #include <sys/conf.h>
59 #include <sys/mount.h>
60 #include <sys/syscallargs.h>
61 #include <sys/event.h>
62 #include <sys/pool.h>
63
64 #include <uvm/uvm_extern.h>
65
66 #include <sys/pipe.h>
67
68 /*
69 * Descriptor management.
70 */
71 struct filelist filehead; /* head of list of open files */
72 int nfiles; /* actual number of open files */
73
74 static __inline void fd_used(struct filedesc *, int);
75 static __inline void fd_unused(struct filedesc *, int);
76 static __inline int find_next_zero(u_int *, int, u_int);
77 int finishdup(struct proc *, struct file *, int, int, register_t *);
78 int find_last_set(struct filedesc *, int);
79
80 struct pool file_pool;
81 struct pool fdesc_pool;
82
83 void
filedesc_init()84 filedesc_init()
85 {
86 pool_init(&file_pool, sizeof(struct file), 0, 0, 0, "filepl",
87 &pool_allocator_nointr);
88 pool_init(&fdesc_pool, sizeof(struct filedesc0), 0, 0, 0, "fdescpl",
89 &pool_allocator_nointr);
90 LIST_INIT(&filehead);
91 }
92
93 static __inline int
find_next_zero(u_int * bitmap,int want,u_int bits)94 find_next_zero (u_int *bitmap, int want, u_int bits)
95 {
96 int i, off, maxoff;
97 u_int sub;
98
99 if (want > bits)
100 return -1;
101
102 off = want >> NDENTRYSHIFT;
103 i = want & NDENTRYMASK;
104 if (i) {
105 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i));
106 if (sub != ~0)
107 goto found;
108 off++;
109 }
110
111 maxoff = NDLOSLOTS(bits);
112 while (off < maxoff) {
113 if ((sub = bitmap[off]) != ~0)
114 goto found;
115 off++;
116 }
117
118 return -1;
119
120 found:
121 return (off << NDENTRYSHIFT) + ffs(~sub) - 1;
122 }
123
124 int
find_last_set(struct filedesc * fd,int last)125 find_last_set(struct filedesc *fd, int last)
126 {
127 int off, i;
128 struct file **ofiles = fd->fd_ofiles;
129 u_int *bitmap = fd->fd_lomap;
130
131 off = (last - 1) >> NDENTRYSHIFT;
132
133 while (off >= 0 && !bitmap[off])
134 off--;
135 if (off < 0)
136 return 0;
137
138 i = ((off + 1) << NDENTRYSHIFT) - 1;
139 if (i >= last)
140 i = last - 1;
141
142 while (i > 0 && ofiles[i] == NULL)
143 i--;
144 return i;
145 }
146
147 static __inline void
fd_used(fdp,fd)148 fd_used(fdp, fd)
149 struct filedesc *fdp;
150 int fd;
151 {
152 u_int off = fd >> NDENTRYSHIFT;
153
154 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK);
155 if (fdp->fd_lomap[off] == ~0)
156 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK);
157
158 if (fd > fdp->fd_lastfile)
159 fdp->fd_lastfile = fd;
160 }
161
162 static __inline void
fd_unused(fdp,fd)163 fd_unused(fdp, fd)
164 struct filedesc *fdp;
165 int fd;
166 {
167 u_int off = fd >> NDENTRYSHIFT;
168
169 if (fd < fdp->fd_freefile)
170 fdp->fd_freefile = fd;
171
172 if (fdp->fd_lomap[off] == ~0)
173 fdp->fd_himap[off >> NDENTRYSHIFT] &= ~(1 << (off & NDENTRYMASK));
174 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK));
175
176 #ifdef DIAGNOSTIC
177 if (fd > fdp->fd_lastfile)
178 panic("fd_unused: fd_lastfile inconsistent");
179 #endif
180 if (fd == fdp->fd_lastfile)
181 fdp->fd_lastfile = find_last_set(fdp, fd);
182 }
183
184 struct file *
fd_getfile(fdp,fd)185 fd_getfile(fdp, fd)
186 struct filedesc *fdp;
187 int fd;
188 {
189 struct file *fp;
190
191 if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL)
192 return (NULL);
193
194 if (!FILE_IS_USABLE(fp))
195 return (NULL);
196
197 return (fp);
198 }
199
200 /*
201 * System calls on descriptors.
202 */
203
204 /*
205 * Duplicate a file descriptor.
206 */
207 /* ARGSUSED */
208 int
sys_dup(p,v,retval)209 sys_dup(p, v, retval)
210 struct proc *p;
211 void *v;
212 register_t *retval;
213 {
214 struct sys_dup_args /* {
215 syscallarg(int) fd;
216 } */ *uap = v;
217 struct filedesc *fdp = p->p_fd;
218 int old = SCARG(uap, fd);
219 struct file *fp;
220 int new;
221 int error;
222
223 restart:
224 if ((fp = fd_getfile(fdp, old)) == NULL)
225 return (EBADF);
226 FREF(fp);
227 fdplock(fdp, p);
228 if ((error = fdalloc(p, 0, &new)) != 0) {
229 FRELE(fp);
230 if (error == ENOSPC) {
231 fdexpand(p);
232 fdpunlock(fdp);
233 goto restart;
234 }
235 goto out;
236 }
237 error = finishdup(p, fp, old, new, retval);
238
239 out:
240 fdpunlock(fdp);
241 return (error);
242 }
243
244 /*
245 * Duplicate a file descriptor to a particular value.
246 */
247 /* ARGSUSED */
248 int
sys_dup2(p,v,retval)249 sys_dup2(p, v, retval)
250 struct proc *p;
251 void *v;
252 register_t *retval;
253 {
254 struct sys_dup2_args /* {
255 syscallarg(int) from;
256 syscallarg(int) to;
257 } */ *uap = v;
258 int old = SCARG(uap, from), new = SCARG(uap, to);
259 struct filedesc *fdp = p->p_fd;
260 struct file *fp;
261 int i, error;
262
263 restart:
264 if ((fp = fd_getfile(fdp, old)) == NULL)
265 return (EBADF);
266 if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
267 (u_int)new >= maxfiles)
268 return (EBADF);
269 if (old == new) {
270 /*
271 * NOTE! This doesn't clear the close-on-exec flag. This might
272 * or might not be the intended behavior from the start, but
273 * this is what everyone else does.
274 */
275 *retval = new;
276 return (0);
277 }
278 FREF(fp);
279 fdplock(fdp, p);
280 if (new >= fdp->fd_nfiles) {
281 if ((error = fdalloc(p, new, &i)) != 0) {
282 FRELE(fp);
283 if (error == ENOSPC) {
284 fdexpand(p);
285 fdpunlock(fdp);
286 goto restart;
287 }
288 goto out;
289 }
290 if (new != i)
291 panic("dup2: fdalloc");
292 }
293 /* finishdup() does FRELE */
294 error = finishdup(p, fp, old, new, retval);
295
296 out:
297 fdpunlock(fdp);
298 return (error);
299 }
300
301 /*
302 * The file control system call.
303 */
304 /* ARGSUSED */
305 int
sys_fcntl(p,v,retval)306 sys_fcntl(p, v, retval)
307 struct proc *p;
308 void *v;
309 register_t *retval;
310 {
311 struct sys_fcntl_args /* {
312 syscallarg(int) fd;
313 syscallarg(int) cmd;
314 syscallarg(void *) arg;
315 } */ *uap = v;
316 int fd = SCARG(uap, fd);
317 struct filedesc *fdp = p->p_fd;
318 struct file *fp;
319 struct vnode *vp;
320 int i, tmp, newmin, flg = F_POSIX;
321 struct flock fl;
322 int error = 0;
323
324 restart:
325 if ((fp = fd_getfile(fdp, fd)) == NULL)
326 return (EBADF);
327 FREF(fp);
328 switch (SCARG(uap, cmd)) {
329
330 case F_DUPFD:
331 newmin = (long)SCARG(uap, arg);
332 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
333 (u_int)newmin >= maxfiles) {
334 error = EINVAL;
335 break;
336 }
337 fdplock(fdp, p);
338 if ((error = fdalloc(p, newmin, &i)) != 0) {
339 if (error == ENOSPC) {
340 fdexpand(p);
341 FRELE(fp);
342 fdpunlock(fdp);
343 goto restart;
344 }
345 }
346 /* finishdup will FRELE for us. */
347 if (!error)
348 error = finishdup(p, fp, fd, i, retval);
349 else
350 FRELE(fp);
351
352 fdpunlock(fdp);
353 return (error);
354
355 case F_GETFD:
356 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0;
357 break;
358
359 case F_SETFD:
360 if ((long)SCARG(uap, arg) & 1)
361 fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
362 else
363 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
364 break;
365
366 case F_GETFL:
367 *retval = OFLAGS(fp->f_flag);
368 break;
369
370 case F_SETFL:
371 fp->f_flag &= ~FCNTLFLAGS;
372 fp->f_flag |= FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
373 tmp = fp->f_flag & FNONBLOCK;
374 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
375 if (error)
376 break;
377 tmp = fp->f_flag & FASYNC;
378 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
379 if (!error)
380 break;
381 fp->f_flag &= ~FNONBLOCK;
382 tmp = 0;
383 (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
384 break;
385
386 case F_GETOWN:
387 if (fp->f_type == DTYPE_SOCKET) {
388 *retval = ((struct socket *)fp->f_data)->so_pgid;
389 break;
390 }
391 error = (*fp->f_ops->fo_ioctl)
392 (fp, TIOCGPGRP, (caddr_t)&tmp, p);
393 *retval = -tmp;
394 break;
395
396 case F_SETOWN:
397 if (fp->f_type == DTYPE_SOCKET) {
398 struct socket *so = (struct socket *)fp->f_data;
399
400 so->so_pgid = (long)SCARG(uap, arg);
401 so->so_siguid = p->p_cred->p_ruid;
402 so->so_sigeuid = p->p_ucred->cr_uid;
403 break;
404 }
405 if ((long)SCARG(uap, arg) <= 0) {
406 SCARG(uap, arg) = (void *)(-(long)SCARG(uap, arg));
407 } else {
408 struct proc *p1 = pfind((long)SCARG(uap, arg));
409 if (p1 == 0) {
410 error = ESRCH;
411 break;
412 }
413 SCARG(uap, arg) = (void *)(long)p1->p_pgrp->pg_id;
414 }
415 error = ((*fp->f_ops->fo_ioctl)
416 (fp, TIOCSPGRP, (caddr_t)&SCARG(uap, arg), p));
417 break;
418
419 case F_SETLKW:
420 flg |= F_WAIT;
421 /* FALLTHROUGH */
422
423 case F_SETLK:
424 if (fp->f_type != DTYPE_VNODE) {
425 error = EBADF;
426 break;
427 }
428 vp = (struct vnode *)fp->f_data;
429 /* Copy in the lock structure */
430 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
431 sizeof (fl));
432 if (error)
433 break;
434 if (fl.l_whence == SEEK_CUR) {
435 if (fl.l_start == 0 && fl.l_len < 0) {
436 /* lockf(3) compliance hack */
437 fl.l_len = -fl.l_len;
438 fl.l_start = fp->f_offset - fl.l_len;
439 } else
440 fl.l_start += fp->f_offset;
441 }
442 switch (fl.l_type) {
443
444 case F_RDLCK:
445 if ((fp->f_flag & FREAD) == 0) {
446 error = EBADF;
447 goto out;
448 }
449 p->p_flag |= P_ADVLOCK;
450 error = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
451 goto out;
452
453 case F_WRLCK:
454 if ((fp->f_flag & FWRITE) == 0) {
455 error = EBADF;
456 goto out;
457 }
458 p->p_flag |= P_ADVLOCK;
459 error = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
460 goto out;
461
462 case F_UNLCK:
463 error = (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
464 F_POSIX));
465 goto out;
466
467 default:
468 error = EINVAL;
469 goto out;
470 }
471
472 case F_GETLK:
473 if (fp->f_type != DTYPE_VNODE) {
474 error = EBADF;
475 break;
476 }
477 vp = (struct vnode *)fp->f_data;
478 /* Copy in the lock structure */
479 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
480 sizeof (fl));
481 if (error)
482 break;
483 if (fl.l_whence == SEEK_CUR) {
484 if (fl.l_start == 0 && fl.l_len < 0) {
485 /* lockf(3) compliance hack */
486 fl.l_len = -fl.l_len;
487 fl.l_start = fp->f_offset - fl.l_len;
488 } else
489 fl.l_start += fp->f_offset;
490 }
491 if (fl.l_type != F_RDLCK &&
492 fl.l_type != F_WRLCK &&
493 fl.l_type != F_UNLCK &&
494 fl.l_type != 0) {
495 error = EINVAL;
496 break;
497 }
498 error = VOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX);
499 if (error)
500 break;
501 error = (copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg),
502 sizeof (fl)));
503 break;
504
505 default:
506 error = EINVAL;
507 break;
508 }
509 out:
510 FRELE(fp);
511 return (error);
512 }
513
514 /*
515 * Common code for dup, dup2, and fcntl(F_DUPFD).
516 */
517 int
finishdup(struct proc * p,struct file * fp,int old,int new,register_t * retval)518 finishdup(struct proc *p, struct file *fp, int old, int new, register_t *retval)
519 {
520 struct file *oldfp;
521 struct filedesc *fdp = p->p_fd;
522
523 if (fp->f_count == LONG_MAX-2) {
524 FRELE(fp);
525 return (EDEADLK);
526 }
527
528 /*
529 * Don't fd_getfile here. We want to closef LARVAL files and
530 * closef can deal with that.
531 */
532 oldfp = fdp->fd_ofiles[new];
533 if (oldfp != NULL)
534 FREF(oldfp);
535
536 fdp->fd_ofiles[new] = fp;
537 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE;
538 fp->f_count++;
539 FRELE(fp);
540 if (oldfp == NULL)
541 fd_used(fdp, new);
542 *retval = new;
543
544 if (oldfp != NULL) {
545 if (new < fdp->fd_knlistsize)
546 knote_fdclose(p, new);
547 closef(oldfp, p);
548 }
549
550 return (0);
551 }
552
553 void
fdremove(fdp,fd)554 fdremove(fdp, fd)
555 struct filedesc *fdp;
556 int fd;
557 {
558 fdp->fd_ofiles[fd] = NULL;
559 fd_unused(fdp, fd);
560 }
561
562 int
fdrelease(p,fd)563 fdrelease(p, fd)
564 struct proc *p;
565 int fd;
566 {
567 struct filedesc *fdp = p->p_fd;
568 struct file **fpp, *fp;
569
570 /*
571 * Don't fd_getfile here. We want to closef LARVAL files and closef
572 * can deal with that.
573 */
574 fpp = &fdp->fd_ofiles[fd];
575 fp = *fpp;
576 if (fp == NULL)
577 return (EBADF);
578 FREF(fp);
579 *fpp = NULL;
580 fdp->fd_ofileflags[fd] = 0;
581 fd_unused(fdp, fd);
582 if (fd < fdp->fd_knlistsize)
583 knote_fdclose(p, fd);
584 return (closef(fp, p));
585 }
586
587 /*
588 * Close a file descriptor.
589 */
590 /* ARGSUSED */
591 int
sys_close(p,v,retval)592 sys_close(p, v, retval)
593 struct proc *p;
594 void *v;
595 register_t *retval;
596 {
597 struct sys_close_args /* {
598 syscallarg(int) fd;
599 } */ *uap = v;
600 int fd = SCARG(uap, fd), error;
601 struct filedesc *fdp = p->p_fd;
602
603 if (fd_getfile(fdp, fd) == NULL)
604 return (EBADF);
605 fdplock(fdp, p);
606 error = fdrelease(p, fd);
607 fdpunlock(fdp);
608
609 return (error);
610 }
611
612 /*
613 * Return status information about a file descriptor.
614 */
615 /* ARGSUSED */
616 int
sys_fstat(p,v,retval)617 sys_fstat(p, v, retval)
618 struct proc *p;
619 void *v;
620 register_t *retval;
621 {
622 struct sys_fstat_args /* {
623 syscallarg(int) fd;
624 syscallarg(struct stat *) sb;
625 } */ *uap = v;
626 int fd = SCARG(uap, fd);
627 struct filedesc *fdp = p->p_fd;
628 struct file *fp;
629 struct stat ub;
630 int error;
631
632 if ((fp = fd_getfile(fdp, fd)) == NULL)
633 return (EBADF);
634 FREF(fp);
635 error = (*fp->f_ops->fo_stat)(fp, &ub, p);
636 FRELE(fp);
637 if (error == 0) {
638 /* Don't let non-root see generation numbers
639 (for NFS security) */
640 if (suser(p, 0))
641 ub.st_gen = 0;
642 error = copyout((caddr_t)&ub, (caddr_t)SCARG(uap, sb),
643 sizeof (ub));
644 }
645 return (error);
646 }
647
648 /*
649 * Return pathconf information about a file descriptor.
650 */
651 /* ARGSUSED */
652 int
sys_fpathconf(p,v,retval)653 sys_fpathconf(p, v, retval)
654 struct proc *p;
655 void *v;
656 register_t *retval;
657 {
658 struct sys_fpathconf_args /* {
659 syscallarg(int) fd;
660 syscallarg(int) name;
661 } */ *uap = v;
662 int fd = SCARG(uap, fd);
663 struct filedesc *fdp = p->p_fd;
664 struct file *fp;
665 struct vnode *vp;
666 int error;
667
668 if ((fp = fd_getfile(fdp, fd)) == NULL)
669 return (EBADF);
670 FREF(fp);
671 switch (fp->f_type) {
672 case DTYPE_PIPE:
673 case DTYPE_SOCKET:
674 if (SCARG(uap, name) != _PC_PIPE_BUF) {
675 error = EINVAL;
676 break;
677 }
678 *retval = PIPE_BUF;
679 error = 0;
680 break;
681
682 case DTYPE_VNODE:
683 vp = (struct vnode *)fp->f_data;
684 error = VOP_PATHCONF(vp, SCARG(uap, name), retval);
685 break;
686
687 default:
688 error = EOPNOTSUPP;
689 break;
690 }
691 FRELE(fp);
692 return (error);
693 }
694
695 /*
696 * Allocate a file descriptor for the process.
697 */
698 int
fdalloc(p,want,result)699 fdalloc(p, want, result)
700 struct proc *p;
701 int want;
702 int *result;
703 {
704 struct filedesc *fdp = p->p_fd;
705 int lim, last, i;
706 u_int new, off;
707
708 /*
709 * Search for a free descriptor starting at the higher
710 * of want or fd_freefile. If that fails, consider
711 * expanding the ofile array.
712 */
713 restart:
714 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
715 last = min(fdp->fd_nfiles, lim);
716 if ((i = want) < fdp->fd_freefile)
717 i = fdp->fd_freefile;
718 off = i >> NDENTRYSHIFT;
719 new = find_next_zero(fdp->fd_himap, off,
720 (last + NDENTRIES - 1) >> NDENTRYSHIFT);
721 if (new != -1) {
722 i = find_next_zero(&fdp->fd_lomap[new],
723 new > off ? 0 : i & NDENTRYMASK,
724 NDENTRIES);
725 if (i == -1) {
726 /*
727 * Free file descriptor in this block was
728 * below want, try again with higher want.
729 */
730 want = (new + 1) << NDENTRYSHIFT;
731 goto restart;
732 }
733 i += (new << NDENTRYSHIFT);
734 if (i < last) {
735 fd_used(fdp, i);
736 if (want <= fdp->fd_freefile)
737 fdp->fd_freefile = i;
738 *result = i;
739 return (0);
740 }
741 }
742 if (fdp->fd_nfiles >= lim)
743 return (EMFILE);
744
745 return (ENOSPC);
746 }
747
748 void
fdexpand(p)749 fdexpand(p)
750 struct proc *p;
751 {
752 struct filedesc *fdp = p->p_fd;
753 int nfiles, i;
754 struct file **newofile;
755 char *newofileflags;
756 u_int *newhimap, *newlomap;
757
758 /*
759 * No space in current array.
760 */
761 if (fdp->fd_nfiles < NDEXTENT)
762 nfiles = NDEXTENT;
763 else
764 nfiles = 2 * fdp->fd_nfiles;
765
766 newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
767 newofileflags = (char *) &newofile[nfiles];
768
769 /*
770 * Copy the existing ofile and ofileflags arrays
771 * and zero the new portion of each array.
772 */
773 bcopy(fdp->fd_ofiles, newofile,
774 (i = sizeof(struct file *) * fdp->fd_nfiles));
775 bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
776 bcopy(fdp->fd_ofileflags, newofileflags,
777 (i = sizeof(char) * fdp->fd_nfiles));
778 bzero(newofileflags + i, nfiles * sizeof(char) - i);
779
780 if (fdp->fd_nfiles > NDFILE)
781 free(fdp->fd_ofiles, M_FILEDESC);
782
783 if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) {
784 newhimap = malloc(NDHISLOTS(nfiles) * sizeof(u_int),
785 M_FILEDESC, M_WAITOK);
786 newlomap = malloc(NDLOSLOTS(nfiles) * sizeof(u_int),
787 M_FILEDESC, M_WAITOK);
788
789 bcopy(fdp->fd_himap, newhimap,
790 (i = NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int)));
791 bzero((char *)newhimap + i,
792 NDHISLOTS(nfiles) * sizeof(u_int) - i);
793
794 bcopy(fdp->fd_lomap, newlomap,
795 (i = NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int)));
796 bzero((char *)newlomap + i,
797 NDLOSLOTS(nfiles) * sizeof(u_int) - i);
798
799 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
800 free(fdp->fd_himap, M_FILEDESC);
801 free(fdp->fd_lomap, M_FILEDESC);
802 }
803 fdp->fd_himap = newhimap;
804 fdp->fd_lomap = newlomap;
805 }
806 fdp->fd_ofiles = newofile;
807 fdp->fd_ofileflags = newofileflags;
808 fdp->fd_nfiles = nfiles;
809 }
810
811 /*
812 * Create a new open file structure and allocate
813 * a file descriptor for the process that refers to it.
814 */
815 int
falloc(p,resultfp,resultfd)816 falloc(p, resultfp, resultfd)
817 struct proc *p;
818 struct file **resultfp;
819 int *resultfd;
820 {
821 struct file *fp, *fq;
822 int error, i;
823
824 restart:
825 if ((error = fdalloc(p, 0, &i)) != 0) {
826 if (error == ENOSPC) {
827 fdexpand(p);
828 goto restart;
829 }
830 return (error);
831 }
832 if (nfiles >= maxfiles) {
833 fd_unused(p->p_fd, i);
834 tablefull("file");
835 return (ENFILE);
836 }
837 /*
838 * Allocate a new file descriptor.
839 * If the process has file descriptor zero open, add to the list
840 * of open files at that point, otherwise put it at the front of
841 * the list of open files.
842 */
843 nfiles++;
844 fp = pool_get(&file_pool, PR_WAITOK);
845 bzero(fp, sizeof(struct file));
846 fp->f_iflags = FIF_LARVAL;
847 if ((fq = p->p_fd->fd_ofiles[0]) != NULL) {
848 LIST_INSERT_AFTER(fq, fp, f_list);
849 } else {
850 LIST_INSERT_HEAD(&filehead, fp, f_list);
851 }
852 p->p_fd->fd_ofiles[i] = fp;
853 fp->f_count = 1;
854 fp->f_cred = p->p_ucred;
855 crhold(fp->f_cred);
856 if (resultfp)
857 *resultfp = fp;
858 if (resultfd)
859 *resultfd = i;
860 FREF(fp);
861 return (0);
862 }
863
864 /*
865 * Build a new filedesc structure.
866 */
867 struct filedesc *
fdinit(struct proc * p)868 fdinit(struct proc *p)
869 {
870 struct filedesc0 *newfdp;
871 extern int cmask;
872
873 newfdp = pool_get(&fdesc_pool, PR_WAITOK);
874 bzero(newfdp, sizeof(struct filedesc0));
875 if (p != NULL) {
876 struct filedesc *fdp = p->p_fd;
877
878 newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
879 VREF(newfdp->fd_fd.fd_cdir);
880 newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
881 if (newfdp->fd_fd.fd_rdir)
882 VREF(newfdp->fd_fd.fd_rdir);
883 }
884 rw_init(&newfdp->fd_fd.fd_lock);
885
886 /* Create the file descriptor table. */
887 newfdp->fd_fd.fd_refcnt = 1;
888 newfdp->fd_fd.fd_cmask = cmask;
889 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
890 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
891 newfdp->fd_fd.fd_nfiles = NDFILE;
892 newfdp->fd_fd.fd_himap = newfdp->fd_dhimap;
893 newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap;
894 newfdp->fd_fd.fd_knlistsize = -1;
895
896 newfdp->fd_fd.fd_freefile = 0;
897 newfdp->fd_fd.fd_lastfile = 0;
898
899 return (&newfdp->fd_fd);
900 }
901
902 /*
903 * Share a filedesc structure.
904 */
905 struct filedesc *
fdshare(p)906 fdshare(p)
907 struct proc *p;
908 {
909 p->p_fd->fd_refcnt++;
910 return (p->p_fd);
911 }
912
913 /*
914 * Copy a filedesc structure.
915 */
916 struct filedesc *
fdcopy(p)917 fdcopy(p)
918 struct proc *p;
919 {
920 struct filedesc *newfdp, *fdp = p->p_fd;
921 struct file **fpp;
922 int i;
923
924 newfdp = pool_get(&fdesc_pool, PR_WAITOK);
925 bcopy(fdp, newfdp, sizeof(struct filedesc));
926 if (newfdp->fd_cdir)
927 VREF(newfdp->fd_cdir);
928 if (newfdp->fd_rdir)
929 VREF(newfdp->fd_rdir);
930 newfdp->fd_refcnt = 1;
931
932 /*
933 * If the number of open files fits in the internal arrays
934 * of the open file structure, use them, otherwise allocate
935 * additional memory for the number of descriptors currently
936 * in use.
937 */
938 if (newfdp->fd_lastfile < NDFILE) {
939 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
940 newfdp->fd_ofileflags =
941 ((struct filedesc0 *) newfdp)->fd_dfileflags;
942 i = NDFILE;
943 } else {
944 /*
945 * Compute the smallest multiple of NDEXTENT needed
946 * for the file descriptors currently in use,
947 * allowing the table to shrink.
948 */
949 i = newfdp->fd_nfiles;
950 while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
951 i /= 2;
952 newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK);
953 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
954 }
955 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) {
956 newfdp->fd_himap =
957 ((struct filedesc0 *) newfdp)->fd_dhimap;
958 newfdp->fd_lomap =
959 ((struct filedesc0 *) newfdp)->fd_dlomap;
960 } else {
961 newfdp->fd_himap = malloc(NDHISLOTS(i) * sizeof(u_int),
962 M_FILEDESC, M_WAITOK);
963 newfdp->fd_lomap = malloc(NDLOSLOTS(i) * sizeof(u_int),
964 M_FILEDESC, M_WAITOK);
965 }
966 newfdp->fd_nfiles = i;
967 bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
968 bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
969 bcopy(fdp->fd_himap, newfdp->fd_himap, NDHISLOTS(i) * sizeof(u_int));
970 bcopy(fdp->fd_lomap, newfdp->fd_lomap, NDLOSLOTS(i) * sizeof(u_int));
971
972 /*
973 * kq descriptors cannot be copied.
974 */
975 if (newfdp->fd_knlistsize != -1) {
976 fpp = newfdp->fd_ofiles;
977 for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++)
978 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE)
979 fdremove(newfdp, i);
980 newfdp->fd_knlist = NULL;
981 newfdp->fd_knlistsize = -1;
982 newfdp->fd_knhash = NULL;
983 newfdp->fd_knhashmask = 0;
984 }
985
986 fpp = newfdp->fd_ofiles;
987 for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++)
988 if (*fpp != NULL) {
989 /*
990 * XXX Gruesome hack. If count gets too high, fail
991 * to copy an fd, since fdcopy()'s callers do not
992 * permit it to indicate failure yet.
993 */
994 if ((*fpp)->f_count == LONG_MAX-2)
995 fdremove(newfdp, i);
996 else
997 (*fpp)->f_count++;
998 }
999 return (newfdp);
1000 }
1001
1002 /*
1003 * Release a filedesc structure.
1004 */
1005 void
fdfree(p)1006 fdfree(p)
1007 struct proc *p;
1008 {
1009 struct filedesc *fdp = p->p_fd;
1010 struct file **fpp, *fp;
1011 int i;
1012
1013 if (--fdp->fd_refcnt > 0)
1014 return;
1015 fpp = fdp->fd_ofiles;
1016 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) {
1017 fp = *fpp;
1018 if (fp != NULL) {
1019 FREF(fp);
1020 *fpp = NULL;
1021 (void) closef(fp, p);
1022 }
1023 }
1024 p->p_fd = NULL;
1025 if (fdp->fd_nfiles > NDFILE)
1026 free(fdp->fd_ofiles, M_FILEDESC);
1027 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
1028 free(fdp->fd_himap, M_FILEDESC);
1029 free(fdp->fd_lomap, M_FILEDESC);
1030 }
1031 if (fdp->fd_cdir)
1032 vrele(fdp->fd_cdir);
1033 if (fdp->fd_rdir)
1034 vrele(fdp->fd_rdir);
1035 if (fdp->fd_knlist)
1036 FREE(fdp->fd_knlist, M_TEMP);
1037 if (fdp->fd_knhash)
1038 FREE(fdp->fd_knhash, M_TEMP);
1039 pool_put(&fdesc_pool, fdp);
1040 }
1041
1042 /*
1043 * Internal form of close.
1044 * Decrement reference count on file structure.
1045 * Note: p may be NULL when closing a file
1046 * that was being passed in a message.
1047 *
1048 * The fp must have its usecount bumped and will be FRELEd here.
1049 */
1050 int
closef(struct file * fp,struct proc * p)1051 closef(struct file *fp, struct proc *p)
1052 {
1053 struct vnode *vp;
1054 struct flock lf;
1055 int error;
1056
1057 if (fp == NULL)
1058 return (0);
1059
1060 /*
1061 * POSIX record locking dictates that any close releases ALL
1062 * locks owned by this process. This is handled by setting
1063 * a flag in the unlock to free ONLY locks obeying POSIX
1064 * semantics, and not to free BSD-style file locks.
1065 * If the descriptor was in a message, POSIX-style locks
1066 * aren't passed with the descriptor.
1067 */
1068 if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1069 lf.l_whence = SEEK_SET;
1070 lf.l_start = 0;
1071 lf.l_len = 0;
1072 lf.l_type = F_UNLCK;
1073 vp = (struct vnode *)fp->f_data;
1074 (void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
1075 }
1076
1077 /*
1078 * Some files passed to this function could be accessed
1079 * without a FILE_IS_USABLE check (and in some cases it's perfectly
1080 * legal), we must beware of files where someone already won the
1081 * race to FIF_WANTCLOSE.
1082 */
1083 if ((fp->f_iflags & FIF_WANTCLOSE) != 0) {
1084 FRELE(fp);
1085 return (0);
1086 }
1087
1088 if (--fp->f_count > 0) {
1089 FRELE(fp);
1090 return (0);
1091 }
1092
1093 #ifdef DIAGNOSTIC
1094 if (fp->f_count < 0)
1095 panic("closef: count < 0");
1096 #endif
1097
1098 /* Wait for the last usecount to drain. */
1099 fp->f_iflags |= FIF_WANTCLOSE;
1100 while (fp->f_usecount > 1)
1101 tsleep(&fp->f_usecount, PRIBIO, "closef", 0);
1102
1103 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1104 lf.l_whence = SEEK_SET;
1105 lf.l_start = 0;
1106 lf.l_len = 0;
1107 lf.l_type = F_UNLCK;
1108 vp = (struct vnode *)fp->f_data;
1109 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1110 }
1111 if (fp->f_ops)
1112 error = (*fp->f_ops->fo_close)(fp, p);
1113 else
1114 error = 0;
1115
1116 /* Free fp */
1117 LIST_REMOVE(fp, f_list);
1118 crfree(fp->f_cred);
1119 #ifdef DIAGNOSTIC
1120 if (fp->f_count != 0 || fp->f_usecount != 1)
1121 panic("closef: count: %ld/%d", fp->f_count, fp->f_usecount);
1122 #endif
1123 nfiles--;
1124 pool_put(&file_pool, fp);
1125
1126 return (error);
1127 }
1128
1129 /*
1130 * Apply an advisory lock on a file descriptor.
1131 *
1132 * Just attempt to get a record lock of the requested type on
1133 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1134 */
1135 /* ARGSUSED */
1136 int
sys_flock(p,v,retval)1137 sys_flock(p, v, retval)
1138 struct proc *p;
1139 void *v;
1140 register_t *retval;
1141 {
1142 struct sys_flock_args /* {
1143 syscallarg(int) fd;
1144 syscallarg(int) how;
1145 } */ *uap = v;
1146 int fd = SCARG(uap, fd);
1147 int how = SCARG(uap, how);
1148 struct filedesc *fdp = p->p_fd;
1149 struct file *fp;
1150 struct vnode *vp;
1151 struct flock lf;
1152 int error;
1153
1154 if ((fp = fd_getfile(fdp, fd)) == NULL)
1155 return (EBADF);
1156 if (fp->f_type != DTYPE_VNODE)
1157 return (EOPNOTSUPP);
1158 vp = (struct vnode *)fp->f_data;
1159 lf.l_whence = SEEK_SET;
1160 lf.l_start = 0;
1161 lf.l_len = 0;
1162 if (how & LOCK_UN) {
1163 lf.l_type = F_UNLCK;
1164 fp->f_flag &= ~FHASLOCK;
1165 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1166 goto out;
1167 }
1168 if (how & LOCK_EX)
1169 lf.l_type = F_WRLCK;
1170 else if (how & LOCK_SH)
1171 lf.l_type = F_RDLCK;
1172 else {
1173 error = EINVAL;
1174 goto out;
1175 }
1176 fp->f_flag |= FHASLOCK;
1177 if (how & LOCK_NB)
1178 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK);
1179 else
1180 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT);
1181 out:
1182 return (error);
1183 }
1184
1185 /*
1186 * File Descriptor pseudo-device driver (/dev/fd/).
1187 *
1188 * Opening minor device N dup()s the file (if any) connected to file
1189 * descriptor N belonging to the calling process. Note that this driver
1190 * consists of only the ``open()'' routine, because all subsequent
1191 * references to this file will be direct to the other driver.
1192 */
1193 /* ARGSUSED */
1194 int
filedescopen(dev,mode,type,p)1195 filedescopen(dev, mode, type, p)
1196 dev_t dev;
1197 int mode, type;
1198 struct proc *p;
1199 {
1200
1201 /*
1202 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1203 * the file descriptor being sought for duplication. The error
1204 * return ensures that the vnode for this device will be released
1205 * by vn_open. Open will detect this special error and take the
1206 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1207 * will simply report the error.
1208 */
1209 p->p_dupfd = minor(dev);
1210 return (ENODEV);
1211 }
1212
1213 /*
1214 * Duplicate the specified descriptor to a free descriptor.
1215 */
1216 int
dupfdopen(fdp,indx,dfd,mode,error)1217 dupfdopen(fdp, indx, dfd, mode, error)
1218 struct filedesc *fdp;
1219 int indx, dfd;
1220 int mode;
1221 int error;
1222 {
1223 struct file *wfp;
1224
1225 /*
1226 * Assume that the filename was user-specified; applications do
1227 * not tend to opens of /dev/fd/# when they can just call dup()
1228 */
1229 if ((curproc->p_flag & (P_SUGIDEXEC | P_SUGID))) {
1230 if (curproc->p_descfd == 255)
1231 return (EPERM);
1232 if (curproc->p_descfd != curproc->p_dupfd)
1233 return (EPERM);
1234 }
1235
1236 /*
1237 * If the to-be-dup'd fd number is greater than the allowed number
1238 * of file descriptors, or the fd to be dup'd has already been
1239 * closed, reject. Note, there is no need to check for new == old
1240 * because fd_getfile will return NULL if the file at indx is
1241 * newly created by falloc (FIF_LARVAL).
1242 */
1243 if ((wfp = fd_getfile(fdp, dfd)) == NULL)
1244 return (EBADF);
1245
1246 /*
1247 * There are two cases of interest here.
1248 *
1249 * For ENODEV simply dup (dfd) to file descriptor
1250 * (indx) and return.
1251 *
1252 * For ENXIO steal away the file structure from (dfd) and
1253 * store it in (indx). (dfd) is effectively closed by
1254 * this operation.
1255 *
1256 * Any other error code is just returned.
1257 */
1258 switch (error) {
1259 case ENODEV:
1260 /*
1261 * Check that the mode the file is being opened for is a
1262 * subset of the mode of the existing descriptor.
1263 */
1264 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1265 return (EACCES);
1266 if (wfp->f_count == LONG_MAX-2)
1267 return (EDEADLK);
1268 fdp->fd_ofiles[indx] = wfp;
1269 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1270 wfp->f_count++;
1271 fd_used(fdp, indx);
1272 return (0);
1273
1274 case ENXIO:
1275 /*
1276 * Steal away the file pointer from dfd, and stuff it into indx.
1277 */
1278 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1279 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1280 fdp->fd_ofiles[dfd] = NULL;
1281 fdp->fd_ofileflags[dfd] = 0;
1282 /*
1283 * Complete the clean up of the filedesc structure by
1284 * recomputing the various hints.
1285 */
1286 fd_used(fdp, indx);
1287 fd_unused(fdp, dfd);
1288 return (0);
1289
1290 default:
1291 return (error);
1292 }
1293 /* NOTREACHED */
1294 }
1295
1296 /*
1297 * Close any files on exec?
1298 */
1299 void
fdcloseexec(p)1300 fdcloseexec(p)
1301 struct proc *p;
1302 {
1303 struct filedesc *fdp = p->p_fd;
1304 int fd;
1305
1306 for (fd = 0; fd <= fdp->fd_lastfile; fd++)
1307 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE)
1308 (void) fdrelease(p, fd);
1309 }
1310
1311 int
sys_closefrom(struct proc * p,void * v,register_t * retval)1312 sys_closefrom(struct proc *p, void *v, register_t *retval)
1313 {
1314 struct sys_closefrom_args *uap = v;
1315 struct filedesc *fdp = p->p_fd;
1316 u_int startfd, i;
1317
1318 startfd = SCARG(uap, fd);
1319 fdplock(fdp, p);
1320
1321 if (startfd > fdp->fd_lastfile) {
1322 fdpunlock(fdp);
1323 return (EBADF);
1324 }
1325
1326 for (i = startfd; i <= fdp->fd_lastfile; i++)
1327 fdrelease(p, i);
1328
1329 fdpunlock(fdp);
1330 return (0);
1331 }
1332