1 /** $MirOS: src/sys/kern/vfs_subr.c,v 1.3 2005/07/04 00:10:43 tg Exp $ */
2 /* $OpenBSD: vfs_subr.c,v 1.114 2005/05/26 00:33:45 pedro Exp $ */
3 /* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */
4
5 /*
6 * Copyright (c) 1989, 1993
7 * The Regents of the University of California. All rights reserved.
8 * (c) UNIX System Laboratories, Inc.
9 * All or some portions of this file are derived from material licensed
10 * to the University of California by American Telephone and Telegraph
11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
12 * the permission of UNIX System Laboratories, Inc.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
39 */
40
41 /*
42 * External virtual filesystem routines
43 */
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/mount.h>
49 #include <sys/time.h>
50 #include <sys/fcntl.h>
51 #include <sys/kernel.h>
52 #include <sys/vnode.h>
53 #include <sys/stat.h>
54 #include <sys/namei.h>
55 #include <sys/ucred.h>
56 #include <sys/buf.h>
57 #include <sys/errno.h>
58 #include <sys/malloc.h>
59 #include <sys/domain.h>
60 #include <sys/mbuf.h>
61 #include <sys/syscallargs.h>
62 #include <sys/pool.h>
63
64 #include <uvm/uvm_extern.h>
65 #include <sys/sysctl.h>
66
67 #include <miscfs/specfs/specdev.h>
68
69 enum vtype iftovt_tab[16] = {
70 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
71 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
72 };
73 int vttoif_tab[9] = {
74 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
75 S_IFSOCK, S_IFIFO, S_IFMT,
76 };
77
78 int doforce = 1; /* 1 => permit forcible unmounting */
79 int prtactive = 0; /* 1 => print out reclaim of active vnodes */
80 int suid_clear = 1; /* 1 => clear SUID / SGID on owner change */
81
82 /*
83 * Insq/Remq for the vnode usage lists.
84 */
85 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
86 #define bufremvn(bp) { \
87 LIST_REMOVE(bp, b_vnbufs); \
88 LIST_NEXT(bp, b_vnbufs) = NOLIST; \
89 }
90
91 struct freelst vnode_hold_list; /* list of vnodes referencing buffers */
92 struct freelst vnode_free_list; /* vnode free list */
93
94 struct mntlist mountlist; /* mounted filesystem list */
95 struct simplelock mountlist_slock;
96 static struct simplelock mntid_slock;
97 struct simplelock mntvnode_slock;
98 struct simplelock vnode_free_list_slock;
99 struct simplelock spechash_slock;
100
101 void vclean(struct vnode *, int, struct proc *);
102
103 void insmntque(struct vnode *, struct mount *);
104 int getdevvp(dev_t, struct vnode **, enum vtype);
105
106 int vfs_hang_addrlist(struct mount *, struct netexport *,
107 struct export_args *);
108 int vfs_free_netcred(struct radix_node *, void *);
109 void vfs_free_addrlist(struct netexport *);
110 void vputonfreelist(struct vnode *);
111
112 int vflush_vnode(struct vnode *, void *);
113
114 #ifdef DEBUG
115 void printlockedvnodes(void);
116 #endif
117
118 #define VN_KNOTE(vp, b) \
119 KNOTE((struct klist *)&vp->v_selectinfo.vsi_selinfo.si_note, (b))
120
121 struct pool vnode_pool;
122
123 /*
124 * Initialize the vnode management data structures.
125 */
126 void
vntblinit()127 vntblinit()
128 {
129
130 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodes",
131 &pool_allocator_nointr);
132 simple_lock_init(&mntvnode_slock);
133 simple_lock_init(&mntid_slock);
134 simple_lock_init(&spechash_slock);
135 TAILQ_INIT(&vnode_hold_list);
136 TAILQ_INIT(&vnode_free_list);
137 simple_lock_init(&vnode_free_list_slock);
138 CIRCLEQ_INIT(&mountlist);
139 simple_lock_init(&mountlist_slock);
140 /*
141 * Initialize the filesystem syncer.
142 */
143 vn_initialize_syncerd();
144 }
145
146 /*
147 * Mark a mount point as busy. Used to synchronize access and to delay
148 * unmounting. Interlock is not released on failure.
149 *
150 * historical behavior:
151 * - LK_NOWAIT means that we should just ignore the mount point if it's
152 * being unmounted.
153 * - no flags means that we should sleep on the mountpoint and then
154 * fail.
155 */
156 int
vfs_busy(struct mount * mp,int flags,struct simplelock * interlkp,struct proc * p)157 vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp,
158 struct proc *p)
159 {
160 int lkflags;
161
162 switch (flags) {
163 case LK_NOWAIT:
164 lkflags = LK_SHARED|LK_NOWAIT;
165 break;
166 case 0:
167 lkflags = LK_SHARED;
168 break;
169 default:
170 lkflags = flags;
171 }
172
173 /*
174 * Always sleepfail. We will only sleep for an exclusive lock
175 * and the exclusive lock will only be acquired when unmounting.
176 */
177 lkflags |= LK_SLEEPFAIL;
178
179 if (interlkp)
180 lkflags |= LK_INTERLOCK;
181 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
182 return (ENOENT);
183 return (0);
184 }
185
186
187 /*
188 * Free a busy file system
189 */
190 void
vfs_unbusy(struct mount * mp,struct proc * p)191 vfs_unbusy(struct mount *mp, struct proc *p)
192 {
193 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
194 }
195
196 int
vfs_isbusy(struct mount * mp)197 vfs_isbusy(struct mount *mp)
198 {
199 return (lockstatus(&mp->mnt_lock));
200 }
201
202 /*
203 * Lookup a filesystem type, and if found allocate and initialize
204 * a mount structure for it.
205 *
206 * Devname is usually updated by mount(8) after booting.
207 */
208 int
vfs_rootmountalloc(fstypename,devname,mpp)209 vfs_rootmountalloc(fstypename, devname, mpp)
210 char *fstypename;
211 char *devname;
212 struct mount **mpp;
213 {
214 struct proc *p = curproc; /* XXX */
215 struct vfsconf *vfsp;
216 struct mount *mp;
217
218 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
219 if (!strcmp(vfsp->vfc_name, fstypename))
220 break;
221 if (vfsp == NULL)
222 return (ENODEV);
223 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
224 bzero((char *)mp, (u_long)sizeof(struct mount));
225 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
226 (void)vfs_busy(mp, LK_NOWAIT, 0, p);
227 LIST_INIT(&mp->mnt_vnodelist);
228 mp->mnt_vfc = vfsp;
229 mp->mnt_op = vfsp->vfc_vfsops;
230 mp->mnt_flag = MNT_RDONLY;
231 mp->mnt_vnodecovered = NULLVP;
232 vfsp->vfc_refcount++;
233 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
234 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
235 mp->mnt_stat.f_mntonname[0] = '/';
236 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
237 *mpp = mp;
238 return (0);
239 }
240
241 /*
242 * Find an appropriate filesystem to use for the root. If a filesystem
243 * has not been preselected, walk through the list of known filesystems
244 * trying those that have mountroot routines, and try them until one
245 * works or we have tried them all.
246 */
247 int
vfs_mountroot()248 vfs_mountroot()
249 {
250 struct vfsconf *vfsp;
251 int error;
252
253 if (mountroot != NULL)
254 return ((*mountroot)());
255 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
256 if (vfsp->vfc_mountroot == NULL)
257 continue;
258 if ((error = (*vfsp->vfc_mountroot)()) == 0)
259 return (0);
260 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
261 }
262 return (ENODEV);
263 }
264
265 /*
266 * Lookup a mount point by filesystem identifier.
267 */
268 struct mount *
vfs_getvfs(fsid)269 vfs_getvfs(fsid)
270 fsid_t *fsid;
271 {
272 register struct mount *mp;
273
274 simple_lock(&mountlist_slock);
275 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
276 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
277 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
278 simple_unlock(&mountlist_slock);
279 return (mp);
280 }
281 }
282 simple_unlock(&mountlist_slock);
283 return ((struct mount *)0);
284 }
285
286
287 /*
288 * Get a new unique fsid
289 */
290 void
vfs_getnewfsid(mp)291 vfs_getnewfsid(mp)
292 struct mount *mp;
293 {
294 static u_short xxxfs_mntid;
295
296 fsid_t tfsid;
297 int mtype;
298
299 simple_lock(&mntid_slock);
300 mtype = mp->mnt_vfc->vfc_typenum;
301 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
302 mp->mnt_stat.f_fsid.val[1] = mtype;
303 if (xxxfs_mntid == 0)
304 ++xxxfs_mntid;
305 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
306 tfsid.val[1] = mtype;
307 if (!CIRCLEQ_EMPTY(&mountlist)) {
308 while (vfs_getvfs(&tfsid)) {
309 tfsid.val[0]++;
310 xxxfs_mntid++;
311 }
312 }
313 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
314 simple_unlock(&mntid_slock);
315 }
316
317 /*
318 * Make a 'unique' number from a mount type name.
319 * Note that this is no longer used for ffs which
320 * now has an on-disk filesystem id.
321 */
322 long
makefstype(type)323 makefstype(type)
324 char *type;
325 {
326 long rv;
327
328 for (rv = 0; *type; type++) {
329 rv <<= 2;
330 rv ^= *type;
331 }
332 return rv;
333 }
334
335 /*
336 * Set vnode attributes to VNOVAL
337 */
338 void
vattr_null(vap)339 vattr_null(vap)
340 register struct vattr *vap;
341 {
342
343 vap->va_type = VNON;
344 /* XXX These next two used to be one line, but for a GCC bug. */
345 vap->va_size = VNOVAL;
346 vap->va_bytes = VNOVAL;
347 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
348 vap->va_fsid = vap->va_fileid =
349 vap->va_blocksize = vap->va_rdev =
350 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
351 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
352 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
353 vap->va_flags = vap->va_gen = VNOVAL;
354 vap->va_vaflags = 0;
355 }
356
357 /*
358 * Routines having to do with the management of the vnode table.
359 */
360 extern int (**dead_vnodeop_p)(void *);
361 long numvnodes;
362
363 /*
364 * Return the next vnode from the free list.
365 */
366 int
getnewvnode(tag,mp,vops,vpp)367 getnewvnode(tag, mp, vops, vpp)
368 enum vtagtype tag;
369 struct mount *mp;
370 int (**vops)(void *);
371 struct vnode **vpp;
372 {
373 struct proc *p = curproc; /* XXX */
374 struct freelst *listhd;
375 static int toggle;
376 struct vnode *vp;
377 int s;
378
379 /*
380 * We must choose whether to allocate a new vnode or recycle an
381 * existing one. The criterion for allocating a new one is that
382 * the total number of vnodes is less than the number desired or
383 * there are no vnodes on either free list. Generally we only
384 * want to recycle vnodes that have no buffers associated with
385 * them, so we look first on the vnode_free_list. If it is empty,
386 * we next consider vnodes with referencing buffers on the
387 * vnode_hold_list. The toggle ensures that half the time we
388 * will use a buffer from the vnode_hold_list, and half the time
389 * we will allocate a new one unless the list has grown to twice
390 * the desired size. We are reticent to recycle vnodes from the
391 * vnode_hold_list because we will lose the identity of all its
392 * referencing buffers.
393 */
394 toggle ^= 1;
395 if (numvnodes > 2 * desiredvnodes)
396 toggle = 0;
397
398 simple_lock(&vnode_free_list_slock);
399 s = splbio();
400 if ((numvnodes < desiredvnodes) ||
401 ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
402 ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
403 splx(s);
404 simple_unlock(&vnode_free_list_slock);
405 vp = pool_get(&vnode_pool, PR_WAITOK);
406 bzero((char *)vp, sizeof *vp);
407 simple_lock_init(&vp->v_interlock);
408 numvnodes++;
409 } else {
410 for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
411 vp = TAILQ_NEXT(vp, v_freelist)) {
412 if (simple_lock_try(&vp->v_interlock)) {
413 if (VOP_ISLOCKED(vp) == 0)
414 break;
415 else
416 simple_unlock(&vp->v_interlock);
417 }
418 }
419 /*
420 * Unless this is a bad time of the month, at most
421 * the first NCPUS items on the free list are
422 * locked, so this is close enough to being empty.
423 */
424 if (vp == NULL) {
425 splx(s);
426 simple_unlock(&vnode_free_list_slock);
427 tablefull("vnode");
428 *vpp = 0;
429 return (ENFILE);
430 }
431 if (vp->v_usecount) {
432 vprint("free vnode", vp);
433 panic("free vnode isn't");
434 }
435
436 TAILQ_REMOVE(listhd, vp, v_freelist);
437 vp->v_bioflag &= ~VBIOONFREELIST;
438 splx(s);
439
440 simple_unlock(&vnode_free_list_slock);
441 if (vp->v_type != VBAD)
442 vgonel(vp, p);
443 else
444 simple_unlock(&vp->v_interlock);
445 #ifdef DIAGNOSTIC
446 if (vp->v_data) {
447 vprint("cleaned vnode", vp);
448 panic("cleaned vnode isn't");
449 }
450 s = splbio();
451 if (vp->v_numoutput)
452 panic("Clean vnode has pending I/O's");
453 splx(s);
454 #endif
455 vp->v_flag = 0;
456 vp->v_socket = 0;
457 }
458 vp->v_type = VNON;
459 cache_purge(vp);
460 vp->v_vnlock = NULL;
461 vp->v_tag = tag;
462 vp->v_op = vops;
463 insmntque(vp, mp);
464 *vpp = vp;
465 vp->v_usecount = 1;
466 vp->v_data = 0;
467 simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
468 return (0);
469 }
470
471 /*
472 * Move a vnode from one mount queue to another.
473 */
474 void
insmntque(vp,mp)475 insmntque(vp, mp)
476 register struct vnode *vp;
477 register struct mount *mp;
478 {
479 simple_lock(&mntvnode_slock);
480
481 /*
482 * Delete from old mount point vnode list, if on one.
483 */
484 if (vp->v_mount != NULL)
485 LIST_REMOVE(vp, v_mntvnodes);
486 /*
487 * Insert into list of vnodes for the new mount point, if available.
488 */
489 if ((vp->v_mount = mp) != NULL)
490 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
491
492 simple_unlock(&mntvnode_slock);
493 }
494
495
496 /*
497 * Create a vnode for a block device.
498 * Used for root filesystem, argdev, and swap areas.
499 * Also used for memory file system special devices.
500 */
501 int
bdevvp(dev,vpp)502 bdevvp(dev, vpp)
503 dev_t dev;
504 struct vnode **vpp;
505 {
506
507 return (getdevvp(dev, vpp, VBLK));
508 }
509
510 /*
511 * Create a vnode for a character device.
512 * Used for kernfs and some console handling.
513 */
514 int
cdevvp(dev,vpp)515 cdevvp(dev, vpp)
516 dev_t dev;
517 struct vnode **vpp;
518 {
519
520 return (getdevvp(dev, vpp, VCHR));
521 }
522
523 /*
524 * Create a vnode for a device.
525 * Used by bdevvp (block device) for root file system etc.,
526 * and by cdevvp (character device) for console and kernfs.
527 */
528 int
getdevvp(dev,vpp,type)529 getdevvp(dev, vpp, type)
530 dev_t dev;
531 struct vnode **vpp;
532 enum vtype type;
533 {
534 register struct vnode *vp;
535 struct vnode *nvp;
536 int error;
537
538 if (dev == NODEV) {
539 *vpp = NULLVP;
540 return (0);
541 }
542 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
543 if (error) {
544 *vpp = NULLVP;
545 return (error);
546 }
547 vp = nvp;
548 vp->v_type = type;
549 if ((nvp = checkalias(vp, dev, NULL)) != 0) {
550 vput(vp);
551 vp = nvp;
552 }
553 *vpp = vp;
554 return (0);
555 }
556
557 /*
558 * Check to see if the new vnode represents a special device
559 * for which we already have a vnode (either because of
560 * bdevvp() or because of a different vnode representing
561 * the same block device). If such an alias exists, deallocate
562 * the existing contents and return the aliased vnode. The
563 * caller is responsible for filling it with its new contents.
564 */
565 struct vnode *
checkalias(nvp,nvp_rdev,mp)566 checkalias(nvp, nvp_rdev, mp)
567 register struct vnode *nvp;
568 dev_t nvp_rdev;
569 struct mount *mp;
570 {
571 struct proc *p = curproc;
572 register struct vnode *vp;
573 struct vnode **vpp;
574
575 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
576 return (NULLVP);
577
578 vpp = &speclisth[SPECHASH(nvp_rdev)];
579 loop:
580 simple_lock(&spechash_slock);
581 for (vp = *vpp; vp; vp = vp->v_specnext) {
582 simple_lock(&vp->v_interlock);
583 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) {
584 simple_unlock(&vp->v_interlock);
585 continue;
586 }
587 /*
588 * Alias, but not in use, so flush it out.
589 */
590 if (vp->v_usecount == 0) {
591 simple_unlock(&spechash_slock);
592 vgonel(vp, p);
593 goto loop;
594 }
595 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
596 simple_unlock(&spechash_slock);
597 goto loop;
598 }
599 break;
600 }
601
602 /*
603 * Common case is actually in the if statement
604 */
605 if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) {
606 MALLOC(nvp->v_specinfo, struct specinfo *,
607 sizeof(struct specinfo), M_VNODE, M_WAITOK);
608 nvp->v_rdev = nvp_rdev;
609 nvp->v_hashchain = vpp;
610 nvp->v_specnext = *vpp;
611 nvp->v_specmountpoint = NULL;
612 nvp->v_speclockf = NULL;
613 simple_unlock(&spechash_slock);
614 *vpp = nvp;
615 if (vp != NULLVP) {
616 nvp->v_flag |= VALIASED;
617 vp->v_flag |= VALIASED;
618 vput(vp);
619 }
620 return (NULLVP);
621 }
622
623 /*
624 * This code is the uncommon case. It is called in case
625 * we found an alias that was VT_NON && vtype of VBLK
626 * This means we found a block device that was created
627 * using bdevvp.
628 * An example of such a vnode is the root partition device vnode
629 * created in ffs_mountroot.
630 *
631 * The vnodes created by bdevvp should not be aliased (why?).
632 */
633
634 simple_unlock(&spechash_slock);
635 VOP_UNLOCK(vp, 0, p);
636 simple_lock(&vp->v_interlock);
637 vclean(vp, 0, p);
638 vp->v_vnlock = NULL;
639 vp->v_op = nvp->v_op;
640 vp->v_tag = nvp->v_tag;
641 nvp->v_type = VNON;
642 insmntque(vp, mp);
643 return (vp);
644 }
645
646 /*
647 * Grab a particular vnode from the free list, increment its
648 * reference count and lock it. If the vnode lock bit is set,
649 * the vnode is being eliminated in vgone. In that case, we
650 * cannot grab it, so the process is awakened when the
651 * transition is completed, and an error code is returned to
652 * indicate that the vnode is no longer usable, possibly
653 * having been changed to a new file system type.
654 */
655 int
vget(vp,flags,p)656 vget(vp, flags, p)
657 struct vnode *vp;
658 int flags;
659 struct proc *p;
660 {
661 int error, s, onfreelist;
662
663 /*
664 * If the vnode is in the process of being cleaned out for
665 * another use, we wait for the cleaning to finish and then
666 * return failure. Cleaning is determined by checking that
667 * the VXLOCK flag is set.
668 */
669 if ((flags & LK_INTERLOCK) == 0) {
670 simple_lock(&vp->v_interlock);
671 flags |= LK_INTERLOCK;
672 }
673
674 if (vp->v_flag & VXLOCK) {
675 if (flags & LK_NOWAIT) {
676 simple_unlock(&vp->v_interlock);
677 return (EBUSY);
678 }
679
680 vp->v_flag |= VXWANT;
681 ltsleep(vp, PINOD | PNORELOCK, "vget", 0, &vp->v_interlock);
682 return (ENOENT);
683 }
684
685 onfreelist = vp->v_bioflag & VBIOONFREELIST;
686 if (vp->v_usecount == 0 && onfreelist) {
687 s = splbio();
688 simple_lock(&vnode_free_list_slock);
689 if (vp->v_holdcnt > 0)
690 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
691 else
692 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
693 simple_unlock(&vnode_free_list_slock);
694 vp->v_bioflag &= ~VBIOONFREELIST;
695 splx(s);
696 }
697
698 vp->v_usecount++;
699 if (flags & LK_TYPE_MASK) {
700 if ((error = vn_lock(vp, flags, p)) != 0) {
701 vp->v_usecount--;
702 if (vp->v_usecount == 0 && onfreelist)
703 vputonfreelist(vp);
704
705 simple_unlock(&vp->v_interlock);
706 }
707 return (error);
708 }
709
710 simple_unlock(&vp->v_interlock);
711
712 return (0);
713 }
714
715
716 #ifdef DIAGNOSTIC
717 /*
718 * Vnode reference.
719 */
720 void
vref(vp)721 vref(vp)
722 struct vnode *vp;
723 {
724 simple_lock(&vp->v_interlock);
725 if (vp->v_usecount == 0)
726 panic("vref used where vget required");
727 vp->v_usecount++;
728 simple_unlock(&vp->v_interlock);
729 }
730 #endif /* DIAGNOSTIC */
731
732 void
vputonfreelist(struct vnode * vp)733 vputonfreelist(struct vnode *vp)
734 {
735 int s;
736 struct freelst *lst;
737
738 s = splbio();
739 #ifdef DIAGNOSTIC
740 if (vp->v_usecount != 0)
741 panic("Use count is not zero!");
742
743 if (vp->v_bioflag & VBIOONFREELIST) {
744 vprint("vnode already on free list: ", vp);
745 panic("vnode already on free list");
746 }
747 #endif
748
749 vp->v_bioflag |= VBIOONFREELIST;
750
751 if (vp->v_holdcnt > 0)
752 lst = &vnode_hold_list;
753 else
754 lst = &vnode_free_list;
755
756 if (vp->v_type == VBAD)
757 TAILQ_INSERT_HEAD(lst, vp, v_freelist);
758 else
759 TAILQ_INSERT_TAIL(lst, vp, v_freelist);
760
761 splx(s);
762 }
763
764 /*
765 * vput(), just unlock and vrele()
766 */
767 void
vput(vp)768 vput(vp)
769 register struct vnode *vp;
770 {
771 struct proc *p = curproc; /* XXX */
772
773 #ifdef DIAGNOSTIC
774 if (vp == NULL)
775 panic("vput: null vp");
776 #endif
777 simple_lock(&vp->v_interlock);
778
779 #ifdef DIAGNOSTIC
780 if (vp->v_usecount == 0) {
781 vprint("vput: bad ref count", vp);
782 panic("vput: ref cnt");
783 }
784 #endif
785 vp->v_usecount--;
786 if (vp->v_usecount > 0) {
787 simple_unlock(&vp->v_interlock);
788 VOP_UNLOCK(vp, 0, p);
789 return;
790 }
791
792 #ifdef DIAGNOSTIC
793 if (vp->v_writecount != 0) {
794 vprint("vput: bad writecount", vp);
795 panic("vput: v_writecount != 0");
796 }
797 #endif
798 simple_unlock(&vp->v_interlock);
799
800 VOP_INACTIVE(vp, p);
801
802 simple_lock(&vp->v_interlock);
803
804 if (vp->v_usecount == 0)
805 vputonfreelist(vp);
806
807 simple_unlock(&vp->v_interlock);
808 }
809
810 /*
811 * Vnode release - use for active VNODES.
812 * If count drops to zero, call inactive routine and return to freelist.
813 */
814 void
vrele(vp)815 vrele(vp)
816 register struct vnode *vp;
817 {
818 struct proc *p = curproc; /* XXX */
819
820 #ifdef DIAGNOSTIC
821 if (vp == NULL)
822 panic("vrele: null vp");
823 #endif
824 simple_lock(&vp->v_interlock);
825 #ifdef DIAGNOSTIC
826 if (vp->v_usecount == 0) {
827 vprint("vrele: bad ref count", vp);
828 panic("vrele: ref cnt");
829 }
830 #endif
831 vp->v_usecount--;
832 if (vp->v_usecount > 0) {
833 simple_unlock(&vp->v_interlock);
834 return;
835 }
836
837 #ifdef DIAGNOSTIC
838 if (vp->v_writecount != 0) {
839 vprint("vrele: bad writecount", vp);
840 panic("vrele: v_writecount != 0");
841 }
842 #endif
843 if (vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p)) {
844 vprint("vrele: cannot lock", vp);
845 return;
846 }
847
848 VOP_INACTIVE(vp, p);
849
850 simple_lock(&vp->v_interlock);
851
852 if (vp->v_usecount == 0)
853 vputonfreelist(vp);
854
855 simple_unlock(&vp->v_interlock);
856 }
857
858 void vhold(struct vnode *vp);
859
860 /*
861 * Page or buffer structure gets a reference.
862 */
863 void
vhold(vp)864 vhold(vp)
865 register struct vnode *vp;
866 {
867
868 /*
869 * If it is on the freelist and the hold count is currently
870 * zero, move it to the hold list.
871 */
872 simple_lock(&vp->v_interlock);
873 if ((vp->v_bioflag & VBIOONFREELIST) &&
874 vp->v_holdcnt == 0 && vp->v_usecount == 0) {
875 simple_lock(&vnode_free_list_slock);
876 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
877 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
878 simple_unlock(&vnode_free_list_slock);
879 }
880 vp->v_holdcnt++;
881 simple_unlock(&vp->v_interlock);
882 }
883
884 /*
885 * Remove any vnodes in the vnode table belonging to mount point mp.
886 *
887 * If MNT_NOFORCE is specified, there should not be any active ones,
888 * return error if any are found (nb: this is a user error, not a
889 * system error). If MNT_FORCE is specified, detach any active vnodes
890 * that are found.
891 */
892 #ifdef DEBUG
893 int busyprt = 0; /* print out busy vnodes */
894 struct ctldebug debug1 = { "busyprt", &busyprt };
895 #endif
896
897 int
vfs_mount_foreach_vnode(struct mount * mp,int (* func)(struct vnode *,void *),void * arg)898 vfs_mount_foreach_vnode(struct mount *mp,
899 int (*func)(struct vnode *, void *), void *arg) {
900 struct vnode *vp, *nvp;
901 int error = 0;
902
903 simple_lock(&mntvnode_slock);
904 loop:
905 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
906 if (vp->v_mount != mp)
907 goto loop;
908 nvp = LIST_NEXT(vp, v_mntvnodes);
909 simple_lock(&vp->v_interlock);
910 simple_unlock(&mntvnode_slock);
911
912 error = func(vp, arg);
913
914 simple_lock(&mntvnode_slock);
915
916 if (error != 0)
917 break;
918 }
919 simple_unlock(&mntvnode_slock);
920
921 return (error);
922 }
923
924 struct vflush_args {
925 struct vnode *skipvp;
926 int busy;
927 int flags;
928 };
929
930 int
vflush_vnode(struct vnode * vp,void * arg)931 vflush_vnode(struct vnode *vp, void *arg) {
932 struct vflush_args *va = arg;
933 struct proc *p = curproc;
934
935 if (vp == va->skipvp) {
936 simple_unlock(&vp->v_interlock);
937 return (0);
938 }
939
940 if ((va->flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
941 simple_unlock(&vp->v_interlock);
942 return (0);
943 }
944
945 /*
946 * If WRITECLOSE is set, only flush out regular file
947 * vnodes open for writing.
948 */
949 if ((va->flags & WRITECLOSE) &&
950 (vp->v_writecount == 0 || vp->v_type != VREG)) {
951 simple_unlock(&vp->v_interlock);
952 return (0);
953 }
954
955 /*
956 * With v_usecount == 0, all we need to do is clear
957 * out the vnode data structures and we are done.
958 */
959 if (vp->v_usecount == 0) {
960 vgonel(vp, p);
961 return (0);
962 }
963
964 /*
965 * If FORCECLOSE is set, forcibly close the vnode.
966 * For block or character devices, revert to an
967 * anonymous device. For all other files, just kill them.
968 */
969 if (va->flags & FORCECLOSE) {
970 if (vp->v_type != VBLK && vp->v_type != VCHR) {
971 vgonel(vp, p);
972 } else {
973 vclean(vp, 0, p);
974 vp->v_op = spec_vnodeop_p;
975 insmntque(vp, (struct mount *)0);
976 }
977 return (0);
978 }
979
980 #ifdef DEBUG
981 if (busyprt)
982 vprint("vflush: busy vnode", vp);
983 #endif
984 simple_unlock(&vp->v_interlock);
985 va->busy++;
986 return (0);
987 }
988
989 int
vflush(mp,skipvp,flags)990 vflush(mp, skipvp, flags)
991 struct mount *mp;
992 struct vnode *skipvp;
993 int flags;
994 {
995 struct vflush_args va;
996 va.skipvp = skipvp;
997 va.busy = 0;
998 va.flags = flags;
999
1000 vfs_mount_foreach_vnode(mp, vflush_vnode, &va);
1001
1002 if (va.busy)
1003 return (EBUSY);
1004 return (0);
1005 }
1006
1007 /*
1008 * Disassociate the underlying file system from a vnode.
1009 * The vnode interlock is held on entry.
1010 */
1011 void
vclean(vp,flags,p)1012 vclean(vp, flags, p)
1013 register struct vnode *vp;
1014 int flags;
1015 struct proc *p;
1016 {
1017 int active;
1018
1019 /*
1020 * Check to see if the vnode is in use.
1021 * If so we have to reference it before we clean it out
1022 * so that its count cannot fall to zero and generate a
1023 * race against ourselves to recycle it.
1024 */
1025 if ((active = vp->v_usecount) != 0)
1026 vp->v_usecount++;
1027
1028 /*
1029 * Prevent the vnode from being recycled or
1030 * brought into use while we clean it out.
1031 */
1032 if (vp->v_flag & VXLOCK)
1033 panic("vclean: deadlock");
1034 vp->v_flag |= VXLOCK;
1035 /*
1036 * Even if the count is zero, the VOP_INACTIVE routine may still
1037 * have the object locked while it cleans it out. The VOP_LOCK
1038 * ensures that the VOP_INACTIVE routine is done with its work.
1039 * For active vnodes, it ensures that no other activity can
1040 * occur while the underlying object is being cleaned out.
1041 */
1042 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1043
1044 /*
1045 * Clean out any VM data associated with the vnode.
1046 */
1047 uvm_vnp_terminate(vp);
1048 /*
1049 * Clean out any buffers associated with the vnode.
1050 */
1051 if (flags & DOCLOSE)
1052 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1053 /*
1054 * If purging an active vnode, it must be closed and
1055 * deactivated before being reclaimed. Note that the
1056 * VOP_INACTIVE will unlock the vnode
1057 */
1058 if (active) {
1059 if (flags & DOCLOSE)
1060 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
1061 VOP_INACTIVE(vp, p);
1062 } else {
1063 /*
1064 * Any other processes trying to obtain this lock must first
1065 * wait for VXLOCK to clear, then call the new lock operation.
1066 */
1067 VOP_UNLOCK(vp, 0, p);
1068 }
1069
1070 /*
1071 * Reclaim the vnode.
1072 */
1073 if (VOP_RECLAIM(vp, p))
1074 panic("vclean: cannot reclaim");
1075 if (active) {
1076 simple_lock(&vp->v_interlock);
1077
1078 vp->v_usecount--;
1079 if (vp->v_usecount == 0) {
1080 if (vp->v_holdcnt > 0)
1081 panic("vclean: not clean");
1082 vputonfreelist(vp);
1083 }
1084
1085 simple_unlock(&vp->v_interlock);
1086 }
1087
1088 /*
1089 * Done with purge, notify sleepers of the grim news.
1090 */
1091 vp->v_op = dead_vnodeop_p;
1092 simple_lock(&vp->v_selectinfo.vsi_lock);
1093 VN_KNOTE(vp, NOTE_REVOKE);
1094 simple_unlock(&vp->v_selectinfo.vsi_lock);
1095 vp->v_tag = VT_NON;
1096 vp->v_flag &= ~VXLOCK;
1097 #ifdef VFSDEBUG
1098 vp->v_flag &= ~VLOCKSWORK;
1099 #endif
1100 if (vp->v_flag & VXWANT) {
1101 vp->v_flag &= ~VXWANT;
1102 wakeup(vp);
1103 }
1104 }
1105
1106 /*
1107 * Recycle an unused vnode to the front of the free list.
1108 * Release the passed interlock if the vnode will be recycled.
1109 */
1110 int
vrecycle(vp,inter_lkp,p)1111 vrecycle(vp, inter_lkp, p)
1112 struct vnode *vp;
1113 struct simplelock *inter_lkp;
1114 struct proc *p;
1115 {
1116
1117 simple_lock(&vp->v_interlock);
1118 if (vp->v_usecount == 0) {
1119 if (inter_lkp)
1120 simple_unlock(inter_lkp);
1121 vgonel(vp, p);
1122 return (1);
1123 }
1124 simple_unlock(&vp->v_interlock);
1125 return (0);
1126 }
1127
1128
1129 /*
1130 * Eliminate all activity associated with a vnode
1131 * in preparation for reuse.
1132 */
1133 void
vgone(vp)1134 vgone(vp)
1135 register struct vnode *vp;
1136 {
1137 struct proc *p = curproc;
1138
1139 simple_lock (&vp->v_interlock);
1140 vgonel(vp, p);
1141 }
1142
1143 /*
1144 * vgone, with the vp interlock held.
1145 */
1146 void
vgonel(vp,p)1147 vgonel(vp, p)
1148 struct vnode *vp;
1149 struct proc *p;
1150 {
1151 register struct vnode *vq;
1152 struct vnode *vx;
1153 struct mount *mp;
1154 int flags;
1155
1156 /*
1157 * If a vgone (or vclean) is already in progress,
1158 * wait until it is done and return.
1159 */
1160 if (vp->v_flag & VXLOCK) {
1161 vp->v_flag |= VXWANT;
1162 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1163 return;
1164 }
1165
1166 /*
1167 * Clean out the filesystem specific data.
1168 */
1169 vclean(vp, DOCLOSE, p);
1170 /*
1171 * Delete from old mount point vnode list, if on one.
1172 */
1173 if (vp->v_mount != NULL)
1174 insmntque(vp, (struct mount *)0);
1175 /*
1176 * If special device, remove it from special device alias list
1177 * if it is on one.
1178 */
1179 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1180 simple_lock(&spechash_slock);
1181 if (*vp->v_hashchain == vp) {
1182 *vp->v_hashchain = vp->v_specnext;
1183 } else {
1184 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1185 if (vq->v_specnext != vp)
1186 continue;
1187 vq->v_specnext = vp->v_specnext;
1188 break;
1189 }
1190 if (vq == NULL)
1191 panic("missing bdev");
1192 }
1193 if (vp->v_flag & VALIASED) {
1194 vx = NULL;
1195 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1196 if (vq->v_rdev != vp->v_rdev ||
1197 vq->v_type != vp->v_type)
1198 continue;
1199 if (vx)
1200 break;
1201 vx = vq;
1202 }
1203 if (vx == NULL)
1204 panic("missing alias");
1205 if (vq == NULL)
1206 vx->v_flag &= ~VALIASED;
1207 vp->v_flag &= ~VALIASED;
1208 }
1209 simple_unlock(&spechash_slock);
1210
1211 /*
1212 * If we have a mount point associated with the vnode, we must
1213 * flush it out now, as to not leave a dangling zombie mount
1214 * point laying around in VFS.
1215 */
1216 mp = vp->v_specmountpoint;
1217 if (mp != NULL) {
1218 if (!vfs_busy(mp, LK_EXCLUSIVE, NULL, p)) {
1219 flags = MNT_FORCE | MNT_DOOMED;
1220 dounmount(mp, flags, p, NULL);
1221 }
1222 }
1223
1224 FREE(vp->v_specinfo, M_VNODE);
1225 vp->v_specinfo = NULL;
1226 }
1227 /*
1228 * If it is on the freelist and not already at the head,
1229 * move it to the head of the list.
1230 */
1231 vp->v_type = VBAD;
1232
1233 /*
1234 * Move onto the free list, unless we were called from
1235 * getnewvnode and we're not on any free list
1236 */
1237 if (vp->v_usecount == 0 &&
1238 (vp->v_bioflag & VBIOONFREELIST)) {
1239 int s;
1240
1241 simple_lock(&vnode_free_list_slock);
1242 s = splbio();
1243
1244 if (vp->v_holdcnt > 0)
1245 panic("vgonel: not clean");
1246
1247 if (TAILQ_FIRST(&vnode_free_list) != vp) {
1248 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1249 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1250 }
1251 splx(s);
1252 simple_unlock(&vnode_free_list_slock);
1253 }
1254 }
1255
1256 /*
1257 * Lookup a vnode by device number.
1258 */
1259 int
vfinddev(dev,type,vpp)1260 vfinddev(dev, type, vpp)
1261 dev_t dev;
1262 enum vtype type;
1263 struct vnode **vpp;
1264 {
1265 register struct vnode *vp;
1266 int rc =0;
1267
1268 simple_lock(&spechash_slock);
1269 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1270 if (dev != vp->v_rdev || type != vp->v_type)
1271 continue;
1272 *vpp = vp;
1273 rc = 1;
1274 break;
1275 }
1276 simple_unlock(&spechash_slock);
1277 return (rc);
1278 }
1279
1280 /*
1281 * Revoke all the vnodes corresponding to the specified minor number
1282 * range (endpoints inclusive) of the specified major.
1283 */
1284 void
vdevgone(maj,minl,minh,type)1285 vdevgone(maj, minl, minh, type)
1286 int maj, minl, minh;
1287 enum vtype type;
1288 {
1289 struct vnode *vp;
1290 int mn;
1291
1292 for (mn = minl; mn <= minh; mn++)
1293 if (vfinddev(makedev(maj, mn), type, &vp))
1294 VOP_REVOKE(vp, REVOKEALL);
1295 }
1296
1297 /*
1298 * Calculate the total number of references to a special device.
1299 */
1300 int
vcount(vp)1301 vcount(vp)
1302 struct vnode *vp;
1303 {
1304 struct vnode *vq, *vnext;
1305 int count;
1306
1307 loop:
1308 if ((vp->v_flag & VALIASED) == 0)
1309 return (vp->v_usecount);
1310 simple_lock(&spechash_slock);
1311 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1312 vnext = vq->v_specnext;
1313 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1314 continue;
1315 /*
1316 * Alias, but not in use, so flush it out.
1317 */
1318 if (vq->v_usecount == 0 && vq != vp) {
1319 simple_unlock(&spechash_slock);
1320 vgone(vq);
1321 goto loop;
1322 }
1323 count += vq->v_usecount;
1324 }
1325 simple_unlock(&spechash_slock);
1326 return (count);
1327 }
1328
1329 /*
1330 * Print out a description of a vnode.
1331 */
1332 static char *typename[] =
1333 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1334
1335 void
vprint(label,vp)1336 vprint(label, vp)
1337 char *label;
1338 register struct vnode *vp;
1339 {
1340 char buf[64];
1341
1342 if (label != NULL)
1343 printf("%s: ", label);
1344 printf("type %s, usecount %u, writecount %u, holdcount %u,",
1345 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1346 vp->v_holdcnt);
1347 buf[0] = '\0';
1348 if (vp->v_flag & VROOT)
1349 strlcat(buf, "|VROOT", sizeof buf);
1350 if (vp->v_flag & VTEXT)
1351 strlcat(buf, "|VTEXT", sizeof buf);
1352 if (vp->v_flag & VSYSTEM)
1353 strlcat(buf, "|VSYSTEM", sizeof buf);
1354 if (vp->v_flag & VXLOCK)
1355 strlcat(buf, "|VXLOCK", sizeof buf);
1356 if (vp->v_flag & VXWANT)
1357 strlcat(buf, "|VXWANT", sizeof buf);
1358 if (vp->v_bioflag & VBIOWAIT)
1359 strlcat(buf, "|VBIOWAIT", sizeof buf);
1360 if (vp->v_bioflag & VBIOONFREELIST)
1361 strlcat(buf, "|VBIOONFREELIST", sizeof buf);
1362 if (vp->v_bioflag & VBIOONSYNCLIST)
1363 strlcat(buf, "|VBIOONSYNCLIST", sizeof buf);
1364 if (vp->v_flag & VALIASED)
1365 strlcat(buf, "|VALIASED", sizeof buf);
1366 if (buf[0] != '\0')
1367 printf(" flags (%s)", &buf[1]);
1368 if (vp->v_data == NULL) {
1369 printf("\n");
1370 } else {
1371 printf("\n\t");
1372 VOP_PRINT(vp);
1373 }
1374 }
1375
1376 #ifdef DEBUG
1377 /*
1378 * List all of the locked vnodes in the system.
1379 * Called when debugging the kernel.
1380 */
1381 void
printlockedvnodes()1382 printlockedvnodes()
1383 {
1384 struct proc *p = curproc;
1385 register struct mount *mp, *nmp;
1386 register struct vnode *vp;
1387
1388 printf("Locked vnodes\n");
1389 simple_lock(&mountlist_slock);
1390 for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1391 mp = nmp) {
1392 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1393 nmp = CIRCLEQ_NEXT(mp, mnt_list);
1394 continue;
1395 }
1396 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1397 if (VOP_ISLOCKED(vp))
1398 vprint((char *)0, vp);
1399 }
1400 simple_lock(&mountlist_slock);
1401 nmp = CIRCLEQ_NEXT(mp, mnt_list);
1402 vfs_unbusy(mp, p);
1403 }
1404 simple_unlock(&mountlist_slock);
1405
1406 }
1407 #endif
1408
1409 /*
1410 * Top level filesystem related information gathering.
1411 */
1412 int
vfs_sysctl(name,namelen,oldp,oldlenp,newp,newlen,p)1413 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1414 int *name;
1415 u_int namelen;
1416 void *oldp;
1417 size_t *oldlenp;
1418 void *newp;
1419 size_t newlen;
1420 struct proc *p;
1421 {
1422 struct vfsconf *vfsp;
1423
1424 /* all sysctl names at this level are at least name and field */
1425 if (namelen < 2)
1426 return (ENOTDIR); /* overloaded */
1427 if (name[0] != VFS_GENERIC) {
1428 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1429 if (vfsp->vfc_typenum == name[0])
1430 break;
1431 if (vfsp == NULL)
1432 return (EOPNOTSUPP);
1433 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1434 oldp, oldlenp, newp, newlen, p));
1435 }
1436 switch (name[1]) {
1437 case VFS_MAXTYPENUM:
1438 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1439 case VFS_CONF:
1440 if (namelen < 3)
1441 return (ENOTDIR); /* overloaded */
1442 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1443 if (vfsp->vfc_typenum == name[2])
1444 break;
1445 if (vfsp == NULL)
1446 return (EOPNOTSUPP);
1447 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1448 sizeof(struct vfsconf)));
1449 }
1450 return (EOPNOTSUPP);
1451 }
1452
1453 int kinfo_vdebug = 1;
1454 int kinfo_vgetfailed;
1455 #define KINFO_VNODESLOP 10
1456 /*
1457 * Dump vnode list (via sysctl).
1458 * Copyout address of vnode followed by vnode.
1459 */
1460 /* ARGSUSED */
1461 int
sysctl_vnode(where,sizep,p)1462 sysctl_vnode(where, sizep, p)
1463 char *where;
1464 size_t *sizep;
1465 struct proc *p;
1466 {
1467 register struct mount *mp, *nmp;
1468 struct vnode *vp, *nvp;
1469 register char *bp = where, *savebp;
1470 char *ewhere;
1471 int error;
1472
1473 if (where == NULL) {
1474 *sizep = (numvnodes + KINFO_VNODESLOP) * sizeof(struct e_vnode);
1475 return (0);
1476 }
1477 ewhere = where + *sizep;
1478
1479 simple_lock(&mountlist_slock);
1480 for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1481 mp = nmp) {
1482 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1483 nmp = CIRCLEQ_NEXT(mp, mnt_list);
1484 continue;
1485 }
1486 savebp = bp;
1487 again:
1488 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL;
1489 vp = nvp) {
1490 /*
1491 * Check that the vp is still associated with
1492 * this filesystem. RACE: could have been
1493 * recycled onto the same filesystem.
1494 */
1495 if (vp->v_mount != mp) {
1496 simple_unlock(&mntvnode_slock);
1497 if (kinfo_vdebug)
1498 printf("kinfo: vp changed\n");
1499 bp = savebp;
1500 goto again;
1501 }
1502 nvp = LIST_NEXT(vp, v_mntvnodes);
1503 if (bp + sizeof(struct e_vnode) > ewhere) {
1504 simple_unlock(&mntvnode_slock);
1505 *sizep = bp - where;
1506 vfs_unbusy(mp, p);
1507 return (ENOMEM);
1508 }
1509 if ((error = copyout(&vp,
1510 &((struct e_vnode *)bp)->vptr,
1511 sizeof(struct vnode *))) ||
1512 (error = copyout(vp,
1513 &((struct e_vnode *)bp)->vnode,
1514 sizeof(struct vnode)))) {
1515 vfs_unbusy(mp, p);
1516 return (error);
1517 }
1518 bp += sizeof(struct e_vnode);
1519 simple_lock(&mntvnode_slock);
1520 }
1521
1522 simple_unlock(&mntvnode_slock);
1523 simple_lock(&mountlist_slock);
1524 nmp = CIRCLEQ_NEXT(mp, mnt_list);
1525 vfs_unbusy(mp, p);
1526 }
1527
1528 simple_unlock(&mountlist_slock);
1529
1530 *sizep = bp - where;
1531 return (0);
1532 }
1533
1534 /*
1535 * Check to see if a filesystem is mounted on a block device.
1536 */
1537 int
vfs_mountedon(vp)1538 vfs_mountedon(vp)
1539 register struct vnode *vp;
1540 {
1541 register struct vnode *vq;
1542 int error = 0;
1543
1544 if (vp->v_specmountpoint != NULL)
1545 return (EBUSY);
1546 if (vp->v_flag & VALIASED) {
1547 simple_lock(&spechash_slock);
1548 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1549 if (vq->v_rdev != vp->v_rdev ||
1550 vq->v_type != vp->v_type)
1551 continue;
1552 if (vq->v_specmountpoint != NULL) {
1553 error = EBUSY;
1554 break;
1555 }
1556 }
1557 simple_unlock(&spechash_slock);
1558 }
1559 return (error);
1560 }
1561
1562 /*
1563 * Build hash lists of net addresses and hang them off the mount point.
1564 * Called by ufs_mount() to set up the lists of export addresses.
1565 */
1566 int
vfs_hang_addrlist(mp,nep,argp)1567 vfs_hang_addrlist(mp, nep, argp)
1568 struct mount *mp;
1569 struct netexport *nep;
1570 struct export_args *argp;
1571 {
1572 register struct netcred *np;
1573 register struct radix_node_head *rnh;
1574 register int i;
1575 struct radix_node *rn;
1576 struct sockaddr *saddr, *smask = 0;
1577 struct domain *dom;
1578 int error;
1579
1580 if (argp->ex_addrlen == 0) {
1581 if (mp->mnt_flag & MNT_DEFEXPORTED)
1582 return (EPERM);
1583 np = &nep->ne_defexported;
1584 np->netc_exflags = argp->ex_flags;
1585 np->netc_anon = argp->ex_anon;
1586 np->netc_anon.cr_ref = 1;
1587 mp->mnt_flag |= MNT_DEFEXPORTED;
1588 return (0);
1589 }
1590 if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN ||
1591 argp->ex_addrlen < 0 || argp->ex_masklen < 0)
1592 return (EINVAL);
1593 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1594 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1595 bzero(np, i);
1596 saddr = (struct sockaddr *)(np + 1);
1597 error = copyin(argp->ex_addr, saddr, argp->ex_addrlen);
1598 if (error)
1599 goto out;
1600 if (saddr->sa_len > argp->ex_addrlen)
1601 saddr->sa_len = argp->ex_addrlen;
1602 if (argp->ex_masklen) {
1603 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1604 error = copyin(argp->ex_mask, smask, argp->ex_masklen);
1605 if (error)
1606 goto out;
1607 if (smask->sa_len > argp->ex_masklen)
1608 smask->sa_len = argp->ex_masklen;
1609 }
1610 i = saddr->sa_family;
1611 if (i < 0 || i > AF_MAX) {
1612 error = EINVAL;
1613 goto out;
1614 }
1615 if ((rnh = nep->ne_rtable[i]) == 0) {
1616 /*
1617 * Seems silly to initialize every AF when most are not
1618 * used, do so on demand here
1619 */
1620 for (dom = domains; dom; dom = dom->dom_next)
1621 if (dom->dom_family == i && dom->dom_rtattach) {
1622 dom->dom_rtattach((void **)&nep->ne_rtable[i],
1623 dom->dom_rtoffset);
1624 break;
1625 }
1626 if ((rnh = nep->ne_rtable[i]) == 0) {
1627 error = ENOBUFS;
1628 goto out;
1629 }
1630 }
1631 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1632 np->netc_rnodes);
1633 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1634 error = EPERM;
1635 goto out;
1636 }
1637 np->netc_exflags = argp->ex_flags;
1638 np->netc_anon = argp->ex_anon;
1639 np->netc_anon.cr_ref = 1;
1640 return (0);
1641 out:
1642 free(np, M_NETADDR);
1643 return (error);
1644 }
1645
1646 /* ARGSUSED */
1647 int
vfs_free_netcred(rn,w)1648 vfs_free_netcred(rn, w)
1649 struct radix_node *rn;
1650 void *w;
1651 {
1652 register struct radix_node_head *rnh = (struct radix_node_head *)w;
1653
1654 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh, NULL);
1655 free(rn, M_NETADDR);
1656 return (0);
1657 }
1658
1659 /*
1660 * Free the net address hash lists that are hanging off the mount points.
1661 */
1662 void
vfs_free_addrlist(nep)1663 vfs_free_addrlist(nep)
1664 struct netexport *nep;
1665 {
1666 register int i;
1667 register struct radix_node_head *rnh;
1668
1669 for (i = 0; i <= AF_MAX; i++)
1670 if ((rnh = nep->ne_rtable[i]) != NULL) {
1671 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1672 free(rnh, M_RTABLE);
1673 nep->ne_rtable[i] = 0;
1674 }
1675 }
1676
1677 int
vfs_export(mp,nep,argp)1678 vfs_export(mp, nep, argp)
1679 struct mount *mp;
1680 struct netexport *nep;
1681 struct export_args *argp;
1682 {
1683 int error;
1684
1685 if (argp->ex_flags & MNT_DELEXPORT) {
1686 vfs_free_addrlist(nep);
1687 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1688 }
1689 if (argp->ex_flags & MNT_EXPORTED) {
1690 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1691 return (error);
1692 mp->mnt_flag |= MNT_EXPORTED;
1693 }
1694 return (0);
1695 }
1696
1697 struct netcred *
vfs_export_lookup(mp,nep,nam)1698 vfs_export_lookup(mp, nep, nam)
1699 register struct mount *mp;
1700 struct netexport *nep;
1701 struct mbuf *nam;
1702 {
1703 register struct netcred *np;
1704 register struct radix_node_head *rnh;
1705 struct sockaddr *saddr;
1706
1707 np = NULL;
1708 if (mp->mnt_flag & MNT_EXPORTED) {
1709 /*
1710 * Lookup in the export list first.
1711 */
1712 if (nam != NULL) {
1713 saddr = mtod(nam, struct sockaddr *);
1714 rnh = nep->ne_rtable[saddr->sa_family];
1715 if (rnh != NULL) {
1716 np = (struct netcred *)
1717 (*rnh->rnh_matchaddr)((caddr_t)saddr,
1718 rnh);
1719 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1720 np = NULL;
1721 }
1722 }
1723 /*
1724 * If no address match, use the default if it exists.
1725 */
1726 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1727 np = &nep->ne_defexported;
1728 }
1729 return (np);
1730 }
1731
1732 /*
1733 * Do the usual access checking.
1734 * file_mode, uid and gid are from the vnode in question,
1735 * while acc_mode and cred are from the VOP_ACCESS parameter list
1736 */
1737 int
vaccess(file_mode,uid,gid,acc_mode,cred)1738 vaccess(file_mode, uid, gid, acc_mode, cred)
1739 mode_t file_mode;
1740 uid_t uid;
1741 gid_t gid;
1742 mode_t acc_mode;
1743 struct ucred *cred;
1744 {
1745 mode_t mask;
1746
1747 /* User id 0 always gets access. */
1748 if (cred->cr_uid == 0)
1749 return 0;
1750
1751 mask = 0;
1752
1753 /* Otherwise, check the owner. */
1754 if (cred->cr_uid == uid) {
1755 if (acc_mode & VEXEC)
1756 mask |= S_IXUSR;
1757 if (acc_mode & VREAD)
1758 mask |= S_IRUSR;
1759 if (acc_mode & VWRITE)
1760 mask |= S_IWUSR;
1761 return (file_mode & mask) == mask ? 0 : EACCES;
1762 }
1763
1764 /* Otherwise, check the groups. */
1765 if (cred->cr_gid == gid || groupmember(gid, cred)) {
1766 if (acc_mode & VEXEC)
1767 mask |= S_IXGRP;
1768 if (acc_mode & VREAD)
1769 mask |= S_IRGRP;
1770 if (acc_mode & VWRITE)
1771 mask |= S_IWGRP;
1772 return (file_mode & mask) == mask ? 0 : EACCES;
1773 }
1774
1775 /* Otherwise, check everyone else. */
1776 if (acc_mode & VEXEC)
1777 mask |= S_IXOTH;
1778 if (acc_mode & VREAD)
1779 mask |= S_IROTH;
1780 if (acc_mode & VWRITE)
1781 mask |= S_IWOTH;
1782 return (file_mode & mask) == mask ? 0 : EACCES;
1783 }
1784
1785 /*
1786 * Unmount all file systems.
1787 * We traverse the list in reverse order under the assumption that doing so
1788 * will avoid needing to worry about dependencies.
1789 */
1790 void
vfs_unmountall(void)1791 vfs_unmountall(void)
1792 {
1793 struct mount *mp, *nmp;
1794 int allerror, error, again = 1;
1795 struct proc *p = curproc;
1796
1797 retry:
1798 allerror = 0;
1799 for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1800 mp = nmp) {
1801 nmp = CIRCLEQ_PREV(mp, mnt_list);
1802 if ((vfs_busy(mp, LK_EXCLUSIVE|LK_NOWAIT, NULL, p)) != 0)
1803 continue;
1804 if ((error = dounmount(mp, MNT_FORCE, curproc, NULL)) != 0) {
1805 printf("unmount of %s failed with error %d\n",
1806 mp->mnt_stat.f_mntonname, error);
1807 allerror = 1;
1808 }
1809 }
1810
1811 if (allerror) {
1812 printf("WARNING: some file systems would not unmount\n");
1813 if (again) {
1814 printf("retrying\n");
1815 again = 0;
1816 goto retry;
1817 }
1818 }
1819 }
1820
1821 /*
1822 * Sync and unmount file systems before shutting down.
1823 */
1824 void
vfs_shutdown()1825 vfs_shutdown()
1826 {
1827 #ifdef ACCOUNTING
1828 extern void acct_shutdown(void);
1829
1830 acct_shutdown();
1831 #endif
1832
1833 /* XXX Should suspend scheduling. */
1834 (void) spl0();
1835
1836 printf("syncing disks... ");
1837
1838 if (panicstr == 0) {
1839 /* Sync before unmount, in case we hang on something. */
1840 sys_sync(&proc0, (void *)0, (register_t *)0);
1841
1842 /* Unmount file systems. */
1843 vfs_unmountall();
1844 }
1845
1846 if (vfs_syncwait(1))
1847 printf("giving up\n");
1848 else
1849 printf("done\n");
1850 }
1851
1852 /*
1853 * perform sync() operation and wait for buffers to flush.
1854 * assumtions: called w/ scheduler disabled and physical io enabled
1855 * for now called at spl0() XXX
1856 */
1857 int
vfs_syncwait(verbose)1858 vfs_syncwait(verbose)
1859 int verbose;
1860 {
1861 register struct buf *bp;
1862 int iter, nbusy, dcount, s;
1863 struct proc *p;
1864
1865 p = curproc? curproc : &proc0;
1866 sys_sync(p, (void *)0, (register_t *)0);
1867
1868 /* Wait for sync to finish. */
1869 dcount = 10000;
1870 for (iter = 0; iter < 20; iter++) {
1871 nbusy = 0;
1872 for (bp = &buf[nbuf]; --bp >= buf; ) {
1873 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1874 nbusy++;
1875 /*
1876 * With soft updates, some buffers that are
1877 * written will be remarked as dirty until other
1878 * buffers are written.
1879 */
1880 if (bp->b_flags & B_DELWRI) {
1881 s = splbio();
1882 bremfree(bp);
1883 bp->b_flags |= B_BUSY;
1884 splx(s);
1885 nbusy++;
1886 bawrite(bp);
1887 if (dcount-- <= 0) {
1888 if (verbose)
1889 printf("softdep ");
1890 return 1;
1891 }
1892 }
1893 }
1894 if (nbusy == 0)
1895 break;
1896 if (verbose)
1897 printf("%d ", nbusy);
1898 DELAY(40000 * iter);
1899 }
1900
1901 return nbusy;
1902 }
1903
1904 /*
1905 * posix file system related system variables.
1906 */
1907 int
fs_posix_sysctl(name,namelen,oldp,oldlenp,newp,newlen,p)1908 fs_posix_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1909 int *name;
1910 u_int namelen;
1911 void *oldp;
1912 size_t *oldlenp;
1913 void *newp;
1914 size_t newlen;
1915 struct proc *p;
1916 {
1917 /* all sysctl names at this level are terminal */
1918 if (namelen != 1)
1919 return (ENOTDIR);
1920
1921 switch (name[0]) {
1922 case FS_POSIX_SETUID:
1923 if (newp && securelevel > 0)
1924 return (EPERM);
1925 return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
1926 default:
1927 return (EOPNOTSUPP);
1928 }
1929 /* NOTREACHED */
1930 }
1931
1932 /*
1933 * file system related system variables.
1934 */
1935 int
fs_sysctl(name,namelen,oldp,oldlenp,newp,newlen,p)1936 fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1937 int *name;
1938 u_int namelen;
1939 void *oldp;
1940 size_t *oldlenp;
1941 void *newp;
1942 size_t newlen;
1943 struct proc *p;
1944 {
1945 sysctlfn *fn;
1946
1947 switch (name[0]) {
1948 case FS_POSIX:
1949 fn = fs_posix_sysctl;
1950 break;
1951 default:
1952 return (EOPNOTSUPP);
1953 }
1954 return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
1955 }
1956
1957
1958 /*
1959 * Routines dealing with vnodes and buffers
1960 */
1961
1962 /*
1963 * Wait for all outstanding I/Os to complete
1964 *
1965 * Manipulates v_numoutput. Must be called at splbio()
1966 */
1967 int
vwaitforio(vp,slpflag,wmesg,timeo)1968 vwaitforio(vp, slpflag, wmesg, timeo)
1969 struct vnode *vp;
1970 int slpflag, timeo;
1971 char *wmesg;
1972 {
1973 int error = 0;
1974
1975 splassert(IPL_BIO);
1976
1977 while (vp->v_numoutput) {
1978 vp->v_bioflag |= VBIOWAIT;
1979 error = tsleep(&vp->v_numoutput,
1980 slpflag | (PRIBIO + 1), wmesg, timeo);
1981 if (error)
1982 break;
1983 }
1984
1985 return (error);
1986 }
1987
1988 /*
1989 * Update outstanding I/O count and do wakeup if requested.
1990 *
1991 * Manipulates v_numoutput. Must be called at splbio()
1992 */
1993 void
vwakeup(vp)1994 vwakeup(vp)
1995 struct vnode *vp;
1996 {
1997 splassert(IPL_BIO);
1998
1999 if (vp != NULL) {
2000 if (vp->v_numoutput-- == 0)
2001 panic("vwakeup: neg numoutput");
2002 if ((vp->v_bioflag & VBIOWAIT) && vp->v_numoutput == 0) {
2003 vp->v_bioflag &= ~VBIOWAIT;
2004 wakeup(&vp->v_numoutput);
2005 }
2006 }
2007 }
2008
2009 /*
2010 * Flush out and invalidate all buffers associated with a vnode.
2011 * Called with the underlying object locked.
2012 */
2013 int
vinvalbuf(vp,flags,cred,p,slpflag,slptimeo)2014 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
2015 register struct vnode *vp;
2016 int flags;
2017 struct ucred *cred;
2018 struct proc *p;
2019 int slpflag, slptimeo;
2020 {
2021 register struct buf *bp;
2022 struct buf *nbp, *blist;
2023 int s, error;
2024
2025 if (flags & V_SAVE) {
2026 s = splbio();
2027 vwaitforio(vp, 0, "vinvalbuf", 0);
2028 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
2029 splx(s);
2030 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
2031 return (error);
2032 s = splbio();
2033 if (vp->v_numoutput > 0 ||
2034 !LIST_EMPTY(&vp->v_dirtyblkhd))
2035 panic("vinvalbuf: dirty bufs");
2036 }
2037 splx(s);
2038 }
2039 loop:
2040 s = splbio();
2041 for (;;) {
2042 if ((blist = LIST_FIRST(&vp->v_cleanblkhd)) &&
2043 (flags & V_SAVEMETA))
2044 while (blist && blist->b_lblkno < 0)
2045 blist = LIST_NEXT(blist, b_vnbufs);
2046 if (blist == NULL &&
2047 (blist = LIST_FIRST(&vp->v_dirtyblkhd)) &&
2048 (flags & V_SAVEMETA))
2049 while (blist && blist->b_lblkno < 0)
2050 blist = LIST_NEXT(blist, b_vnbufs);
2051 if (!blist)
2052 break;
2053
2054 for (bp = blist; bp; bp = nbp) {
2055 nbp = LIST_NEXT(bp, b_vnbufs);
2056 if (flags & V_SAVEMETA && bp->b_lblkno < 0)
2057 continue;
2058 if (bp->b_flags & B_BUSY) {
2059 bp->b_flags |= B_WANTED;
2060 error = tsleep(bp, slpflag | (PRIBIO + 1),
2061 "vinvalbuf", slptimeo);
2062 if (error) {
2063 splx(s);
2064 return (error);
2065 }
2066 break;
2067 }
2068 bremfree(bp);
2069 bp->b_flags |= B_BUSY;
2070 /*
2071 * XXX Since there are no node locks for NFS, I believe
2072 * there is a slight chance that a delayed write will
2073 * occur while sleeping just above, so check for it.
2074 */
2075 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
2076 splx(s);
2077 (void) VOP_BWRITE(bp);
2078 goto loop;
2079 }
2080 bp->b_flags |= B_INVAL;
2081 brelse(bp);
2082 }
2083 }
2084 if (!(flags & V_SAVEMETA) &&
2085 (!LIST_EMPTY(&vp->v_dirtyblkhd) || !LIST_EMPTY(&vp->v_cleanblkhd)))
2086 panic("vinvalbuf: flush failed");
2087 splx(s);
2088 return (0);
2089 }
2090
2091 void
vflushbuf(vp,sync)2092 vflushbuf(vp, sync)
2093 register struct vnode *vp;
2094 int sync;
2095 {
2096 register struct buf *bp, *nbp;
2097 int s;
2098
2099 loop:
2100 s = splbio();
2101 for (bp = LIST_FIRST(&vp->v_dirtyblkhd);
2102 bp != LIST_END(&vp->v_dirtyblkhd); bp = nbp) {
2103 nbp = LIST_NEXT(bp, b_vnbufs);
2104 if ((bp->b_flags & B_BUSY))
2105 continue;
2106 if ((bp->b_flags & B_DELWRI) == 0)
2107 panic("vflushbuf: not dirty");
2108 bremfree(bp);
2109 bp->b_flags |= B_BUSY;
2110 splx(s);
2111 /*
2112 * Wait for I/O associated with indirect blocks to complete,
2113 * since there is no way to quickly wait for them below.
2114 */
2115 if (bp->b_vp == vp || sync == 0)
2116 (void) bawrite(bp);
2117 else
2118 (void) bwrite(bp);
2119 goto loop;
2120 }
2121 if (sync == 0) {
2122 splx(s);
2123 return;
2124 }
2125 vwaitforio(vp, 0, "vflushbuf", 0);
2126 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
2127 splx(s);
2128 vprint("vflushbuf: dirty", vp);
2129 goto loop;
2130 }
2131 splx(s);
2132 }
2133
2134 /*
2135 * Associate a buffer with a vnode.
2136 *
2137 * Manipulates buffer vnode queues. Must be called at splbio().
2138 */
2139 void
bgetvp(vp,bp)2140 bgetvp(vp, bp)
2141 register struct vnode *vp;
2142 register struct buf *bp;
2143 {
2144 splassert(IPL_BIO);
2145
2146
2147 if (bp->b_vp)
2148 panic("bgetvp: not free");
2149 vhold(vp);
2150 bp->b_vp = vp;
2151 if (vp->v_type == VBLK || vp->v_type == VCHR)
2152 bp->b_dev = vp->v_rdev;
2153 else
2154 bp->b_dev = NODEV;
2155 /*
2156 * Insert onto list for new vnode.
2157 */
2158 bufinsvn(bp, &vp->v_cleanblkhd);
2159 }
2160
2161 /*
2162 * Disassociate a buffer from a vnode.
2163 *
2164 * Manipulates vnode buffer queues. Must be called at splbio().
2165 */
2166 void
brelvp(bp)2167 brelvp(bp)
2168 register struct buf *bp;
2169 {
2170 struct vnode *vp;
2171
2172 splassert(IPL_BIO);
2173
2174 if ((vp = bp->b_vp) == (struct vnode *) 0)
2175 panic("brelvp: NULL");
2176 /*
2177 * Delete from old vnode list, if on one.
2178 */
2179 if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
2180 bufremvn(bp);
2181 if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2182 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2183 vp->v_bioflag &= ~VBIOONSYNCLIST;
2184 LIST_REMOVE(vp, v_synclist);
2185 }
2186 bp->b_vp = (struct vnode *) 0;
2187
2188 simple_lock(&vp->v_interlock);
2189 #ifdef DIAGNOSTIC
2190 if (vp->v_holdcnt == 0)
2191 panic("brelvp: holdcnt");
2192 #endif
2193 vp->v_holdcnt--;
2194
2195 /*
2196 * If it is on the holdlist and the hold count drops to
2197 * zero, move it to the free list.
2198 */
2199 if ((vp->v_bioflag & VBIOONFREELIST) &&
2200 vp->v_holdcnt == 0 && vp->v_usecount == 0) {
2201 simple_lock(&vnode_free_list_slock);
2202 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
2203 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2204 simple_unlock(&vnode_free_list_slock);
2205 }
2206 simple_unlock(&vp->v_interlock);
2207 }
2208
2209 /*
2210 * Replaces the current vnode associated with the buffer, if any,
2211 * with a new vnode.
2212 *
2213 * If an output I/O is pending on the buffer, the old vnode
2214 * I/O count is adjusted.
2215 *
2216 * Ignores vnode buffer queues. Must be called at splbio().
2217 */
2218 void
buf_replacevnode(bp,newvp)2219 buf_replacevnode(bp, newvp)
2220 struct buf *bp;
2221 struct vnode *newvp;
2222 {
2223 struct vnode *oldvp = bp->b_vp;
2224
2225 splassert(IPL_BIO);
2226
2227 if (oldvp)
2228 brelvp(bp);
2229
2230 if ((bp->b_flags & (B_READ | B_DONE)) == 0) {
2231 newvp->v_numoutput++; /* put it on swapdev */
2232 vwakeup(oldvp);
2233 }
2234
2235 bgetvp(newvp, bp);
2236 bufremvn(bp);
2237 }
2238
2239 /*
2240 * Used to assign buffers to the appropriate clean or dirty list on
2241 * the vnode and to add newly dirty vnodes to the appropriate
2242 * filesystem syncer list.
2243 *
2244 * Manipulates vnode buffer queues. Must be called at splbio().
2245 */
2246 void
reassignbuf(bp)2247 reassignbuf(bp)
2248 struct buf *bp;
2249 {
2250 struct buflists *listheadp;
2251 int delay;
2252 struct vnode *vp = bp->b_vp;
2253
2254 splassert(IPL_BIO);
2255
2256 /*
2257 * Delete from old vnode list, if on one.
2258 */
2259 if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
2260 bufremvn(bp);
2261
2262 /*
2263 * If dirty, put on list of dirty buffers;
2264 * otherwise insert onto list of clean buffers.
2265 */
2266 if ((bp->b_flags & B_DELWRI) == 0) {
2267 listheadp = &vp->v_cleanblkhd;
2268 if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2269 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2270 vp->v_bioflag &= ~VBIOONSYNCLIST;
2271 LIST_REMOVE(vp, v_synclist);
2272 }
2273 } else {
2274 listheadp = &vp->v_dirtyblkhd;
2275 if ((vp->v_bioflag & VBIOONSYNCLIST) == 0) {
2276 switch (vp->v_type) {
2277 case VDIR:
2278 delay = syncdelay / 2;
2279 break;
2280 case VBLK:
2281 if (vp->v_specmountpoint != NULL) {
2282 delay = syncdelay / 3;
2283 break;
2284 }
2285 /* fall through */
2286 default:
2287 delay = syncdelay;
2288 }
2289 vn_syncer_add_to_worklist(vp, delay);
2290 }
2291 }
2292 bufinsvn(bp, listheadp);
2293 }
2294
2295 int
vfs_register(vfs)2296 vfs_register(vfs)
2297 struct vfsconf *vfs;
2298 {
2299 struct vfsconf *vfsp;
2300 struct vfsconf **vfspp;
2301
2302 #ifdef DIAGNOSTIC
2303 /* Paranoia? */
2304 if (vfs->vfc_refcount != 0)
2305 printf("vfs_register called with vfc_refcount > 0\n");
2306 #endif
2307
2308 /* Check if filesystem already known */
2309 for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2310 vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
2311 if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2312 return (EEXIST);
2313
2314 if (vfs->vfc_typenum > maxvfsconf)
2315 maxvfsconf = vfs->vfc_typenum;
2316
2317 vfs->vfc_next = NULL;
2318
2319 /* Add to the end of the list */
2320 *vfspp = vfs;
2321
2322 /* Call vfs_init() */
2323 if (vfs->vfc_vfsops->vfs_init)
2324 (*(vfs->vfc_vfsops->vfs_init))(vfs);
2325
2326 return 0;
2327 }
2328
2329 int
vfs_unregister(vfs)2330 vfs_unregister(vfs)
2331 struct vfsconf *vfs;
2332 {
2333 struct vfsconf *vfsp;
2334 struct vfsconf **vfspp;
2335 int maxtypenum;
2336
2337 /* Find our vfsconf struct */
2338 for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2339 vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) {
2340 if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2341 break;
2342 }
2343
2344 if (!vfsp) /* Not found */
2345 return (ENOENT);
2346
2347 if (vfsp->vfc_refcount) /* In use */
2348 return (EBUSY);
2349
2350 /* Remove from list and free */
2351 *vfspp = vfsp->vfc_next;
2352
2353 maxtypenum = 0;
2354
2355 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2356 if (vfsp->vfc_typenum > maxtypenum)
2357 maxtypenum = vfsp->vfc_typenum;
2358
2359 maxvfsconf = maxtypenum;
2360 return 0;
2361 }
2362
2363 /*
2364 * Check if vnode represents a disk device
2365 */
2366 int
vn_isdisk(vp,errp)2367 vn_isdisk(vp, errp)
2368 struct vnode *vp;
2369 int *errp;
2370 {
2371 if (vp->v_type != VBLK && vp->v_type != VCHR)
2372 return (0);
2373
2374 return (1);
2375 }
2376