1 /* $NetBSD: vfs_subr.c,v 1.502 2024/12/07 02:27:38 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008, 2019, 2020
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10 * NASA Ames Research Center, by Charles M. Hannum, by Andrew Doran,
11 * by Marshall Kirk McKusick and Greg Ganger at the University of Michigan.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 /*
36 * Copyright (c) 1989, 1993
37 * The Regents of the University of California. All rights reserved.
38 * (c) UNIX System Laboratories, Inc.
39 * All or some portions of this file are derived from material licensed
40 * to the University of California by American Telephone and Telegraph
41 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
42 * the permission of UNIX System Laboratories, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 * 3. Neither the name of the University nor the names of its contributors
53 * may be used to endorse or promote products derived from this software
54 * without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 * SUCH DAMAGE.
67 *
68 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
69 */
70
71 #include <sys/cdefs.h>
72 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.502 2024/12/07 02:27:38 riastradh Exp $");
73
74 #ifdef _KERNEL_OPT
75 #include "opt_compat_43.h"
76 #include "opt_compat_netbsd.h"
77 #include "opt_ddb.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/types.h>
82
83 #include <sys/buf.h>
84 #include <sys/conf.h>
85 #include <sys/dirent.h>
86 #include <sys/errno.h>
87 #include <sys/filedesc.h>
88 #include <sys/fstrans.h>
89 #include <sys/kauth.h>
90 #include <sys/kernel.h>
91 #include <sys/kmem.h>
92 #include <sys/module.h>
93 #include <sys/mount.h>
94 #include <sys/namei.h>
95 #include <sys/sdt.h>
96 #include <sys/stat.h>
97 #include <sys/syscallargs.h>
98 #include <sys/sysctl.h>
99 #include <sys/systm.h>
100 #include <sys/vnode_impl.h>
101
102 #include <miscfs/deadfs/deadfs.h>
103 #include <miscfs/genfs/genfs.h>
104 #include <miscfs/specfs/specdev.h>
105
106 #include <uvm/uvm_ddb.h>
107
108 SDT_PROBE_DEFINE3(vfs, syncer, worklist, vnode__add,
109 "struct vnode *"/*vp*/,
110 "int"/*delayx*/,
111 "int"/*slot*/);
112 SDT_PROBE_DEFINE4(vfs, syncer, worklist, vnode__update,
113 "struct vnode *"/*vp*/,
114 "int"/*delayx*/,
115 "int"/*oslot*/,
116 "int"/*nslot*/);
117 SDT_PROBE_DEFINE1(vfs, syncer, worklist, vnode__remove,
118 "struct vnode *"/*vp*/);
119
120 SDT_PROBE_DEFINE3(vfs, syncer, worklist, mount__add,
121 "struct mount *"/*mp*/,
122 "int"/*vdelay*/,
123 "int"/*slot*/);
124 SDT_PROBE_DEFINE4(vfs, syncer, worklist, mount__update,
125 "struct mount *"/*vp*/,
126 "int"/*vdelay*/,
127 "int"/*oslot*/,
128 "int"/*nslot*/);
129 SDT_PROBE_DEFINE1(vfs, syncer, worklist, mount__remove,
130 "struct mount *"/*mp*/);
131
132 SDT_PROBE_DEFINE1(vfs, syncer, sync, start,
133 "int"/*starttime*/);
134 SDT_PROBE_DEFINE1(vfs, syncer, sync, mount__start,
135 "struct mount *"/*mp*/);
136 SDT_PROBE_DEFINE2(vfs, syncer, sync, mount__done,
137 "struct mount *"/*mp*/,
138 "int"/*error*/);
139 SDT_PROBE_DEFINE1(vfs, syncer, sync, mount__skip,
140 "struct mount *"/*mp*/);
141 SDT_PROBE_DEFINE1(vfs, syncer, sync, vnode__start,
142 "struct vnode *"/*vp*/);
143 SDT_PROBE_DEFINE2(vfs, syncer, sync, vnode__done,
144 "struct vnode *"/*vp*/,
145 "int"/*error*/);
146 SDT_PROBE_DEFINE2(vfs, syncer, sync, vnode__fail__lock,
147 "struct vnode *"/*vp*/,
148 "int"/*error*/);
149 SDT_PROBE_DEFINE2(vfs, syncer, sync, vnode__fail__vget,
150 "struct vnode *"/*vp*/,
151 "int"/*error*/);
152 SDT_PROBE_DEFINE2(vfs, syncer, sync, done,
153 "int"/*starttime*/,
154 "int"/*endtime*/);
155
156 const enum vtype iftovt_tab[16] = {
157 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
158 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
159 };
160 const int vttoif_tab[9] = {
161 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
162 S_IFSOCK, S_IFIFO, S_IFMT,
163 };
164
165 /*
166 * Insq/Remq for the vnode usage lists.
167 */
168 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
169 #define bufremvn(bp) { \
170 LIST_REMOVE(bp, b_vnbufs); \
171 (bp)->b_vnbufs.le_next = NOLIST; \
172 }
173
174 int doforce = 1; /* 1 => permit forcible unmounting */
175
176 /*
177 * Local declarations.
178 */
179
180 static void vn_initialize_syncerd(void);
181
182 /*
183 * Initialize the vnode management data structures.
184 */
185 void
vntblinit(void)186 vntblinit(void)
187 {
188
189 vn_initialize_syncerd();
190 vfs_mount_sysinit();
191 vfs_vnode_sysinit();
192 }
193
194 /*
195 * Flush out and invalidate all buffers associated with a vnode.
196 * Called with the underlying vnode locked, which should prevent new dirty
197 * buffers from being queued.
198 */
199 int
vinvalbuf(struct vnode * vp,int flags,kauth_cred_t cred,struct lwp * l,bool catch_p,int slptimeo)200 vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l,
201 bool catch_p, int slptimeo)
202 {
203 struct buf *bp, *nbp;
204 int error;
205 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
206 (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0);
207
208 /* XXXUBC this doesn't look at flags or slp* */
209 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
210 error = VOP_PUTPAGES(vp, 0, 0, flushflags);
211 if (error) {
212 return error;
213 }
214
215 if (flags & V_SAVE) {
216 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0);
217 if (error)
218 return error;
219 KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd));
220 }
221
222 mutex_enter(&bufcache_lock);
223 restart:
224 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
225 KASSERT(bp->b_vp == vp);
226 nbp = LIST_NEXT(bp, b_vnbufs);
227 error = bbusy(bp, catch_p, slptimeo, NULL);
228 if (error != 0) {
229 if (error == EPASSTHROUGH)
230 goto restart;
231 mutex_exit(&bufcache_lock);
232 return error;
233 }
234 brelsel(bp, BC_INVAL | BC_VFLUSH);
235 }
236
237 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
238 KASSERT(bp->b_vp == vp);
239 nbp = LIST_NEXT(bp, b_vnbufs);
240 error = bbusy(bp, catch_p, slptimeo, NULL);
241 if (error != 0) {
242 if (error == EPASSTHROUGH)
243 goto restart;
244 mutex_exit(&bufcache_lock);
245 return error;
246 }
247 /*
248 * XXX Since there are no node locks for NFS, I believe
249 * there is a slight chance that a delayed write will
250 * occur while sleeping just above, so check for it.
251 */
252 if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) {
253 #ifdef DEBUG
254 printf("buffer still DELWRI\n");
255 #endif
256 bp->b_cflags |= BC_BUSY | BC_VFLUSH;
257 mutex_exit(&bufcache_lock);
258 VOP_BWRITE(bp->b_vp, bp);
259 mutex_enter(&bufcache_lock);
260 goto restart;
261 }
262 brelsel(bp, BC_INVAL | BC_VFLUSH);
263 }
264
265 #ifdef DIAGNOSTIC
266 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
267 panic("vinvalbuf: flush failed, vp %p", vp);
268 #endif
269
270 mutex_exit(&bufcache_lock);
271
272 return 0;
273 }
274
275 /*
276 * Destroy any in core blocks past the truncation length.
277 * Called with the underlying vnode locked, which should prevent new dirty
278 * buffers from being queued.
279 */
280 int
vtruncbuf(struct vnode * vp,daddr_t lbn,bool catch_p,int slptimeo)281 vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch_p, int slptimeo)
282 {
283 struct buf *bp, *nbp;
284 int error;
285 voff_t off;
286
287 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
288 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
289 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
290 if (error) {
291 return error;
292 }
293
294 mutex_enter(&bufcache_lock);
295 restart:
296 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
297 KASSERT(bp->b_vp == vp);
298 nbp = LIST_NEXT(bp, b_vnbufs);
299 if (bp->b_lblkno < lbn)
300 continue;
301 error = bbusy(bp, catch_p, slptimeo, NULL);
302 if (error != 0) {
303 if (error == EPASSTHROUGH)
304 goto restart;
305 mutex_exit(&bufcache_lock);
306 return error;
307 }
308 brelsel(bp, BC_INVAL | BC_VFLUSH);
309 }
310
311 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
312 KASSERT(bp->b_vp == vp);
313 nbp = LIST_NEXT(bp, b_vnbufs);
314 if (bp->b_lblkno < lbn)
315 continue;
316 error = bbusy(bp, catch_p, slptimeo, NULL);
317 if (error != 0) {
318 if (error == EPASSTHROUGH)
319 goto restart;
320 mutex_exit(&bufcache_lock);
321 return error;
322 }
323 brelsel(bp, BC_INVAL | BC_VFLUSH);
324 }
325 mutex_exit(&bufcache_lock);
326
327 return 0;
328 }
329
330 /*
331 * Flush all dirty buffers from a vnode.
332 * Called with the underlying vnode locked, which should prevent new dirty
333 * buffers from being queued.
334 */
335 int
vflushbuf(struct vnode * vp,int flags)336 vflushbuf(struct vnode *vp, int flags)
337 {
338 struct buf *bp, *nbp;
339 int error, pflags;
340 bool dirty, sync;
341
342 sync = (flags & FSYNC_WAIT) != 0;
343 pflags = PGO_CLEANIT | PGO_ALLPAGES |
344 (sync ? PGO_SYNCIO : 0) |
345 ((flags & FSYNC_LAZY) ? PGO_LAZY : 0);
346 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
347 (void) VOP_PUTPAGES(vp, 0, 0, pflags);
348
349 loop:
350 mutex_enter(&bufcache_lock);
351 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
352 KASSERT(bp->b_vp == vp);
353 nbp = LIST_NEXT(bp, b_vnbufs);
354 if ((bp->b_cflags & BC_BUSY))
355 continue;
356 if ((bp->b_oflags & BO_DELWRI) == 0)
357 panic("vflushbuf: not dirty, bp %p", bp);
358 bp->b_cflags |= BC_BUSY | BC_VFLUSH;
359 mutex_exit(&bufcache_lock);
360 /*
361 * Wait for I/O associated with indirect blocks to complete,
362 * since there is no way to quickly wait for them below.
363 */
364 if (bp->b_vp == vp || !sync)
365 (void) bawrite(bp);
366 else {
367 error = bwrite(bp);
368 if (error)
369 return error;
370 }
371 goto loop;
372 }
373 mutex_exit(&bufcache_lock);
374
375 if (!sync)
376 return 0;
377
378 mutex_enter(vp->v_interlock);
379 while (vp->v_numoutput != 0)
380 cv_wait(&vp->v_cv, vp->v_interlock);
381 dirty = !LIST_EMPTY(&vp->v_dirtyblkhd);
382 mutex_exit(vp->v_interlock);
383
384 if (dirty) {
385 vprint("vflushbuf: dirty", vp);
386 goto loop;
387 }
388
389 return 0;
390 }
391
392 /*
393 * Create a vnode for a block device.
394 * Used for root filesystem and swap areas.
395 * Also used for memory file system special devices.
396 */
397 int
bdevvp(dev_t dev,vnode_t ** vpp)398 bdevvp(dev_t dev, vnode_t **vpp)
399 {
400 struct vattr va;
401
402 vattr_null(&va);
403 va.va_type = VBLK;
404 va.va_rdev = dev;
405
406 return vcache_new(dead_rootmount, NULL, &va, NOCRED, NULL, vpp);
407 }
408
409 /*
410 * Create a vnode for a character device.
411 * Used for kernfs and some console handling.
412 */
413 int
cdevvp(dev_t dev,vnode_t ** vpp)414 cdevvp(dev_t dev, vnode_t **vpp)
415 {
416 struct vattr va;
417
418 vattr_null(&va);
419 va.va_type = VCHR;
420 va.va_rdev = dev;
421
422 return vcache_new(dead_rootmount, NULL, &va, NOCRED, NULL, vpp);
423 }
424
425 /*
426 * Associate a buffer with a vnode. There must already be a hold on
427 * the vnode.
428 */
429 void
bgetvp(struct vnode * vp,struct buf * bp)430 bgetvp(struct vnode *vp, struct buf *bp)
431 {
432
433 KASSERT(bp->b_vp == NULL);
434 KASSERT(bp->b_objlock == &buffer_lock);
435 KASSERT(mutex_owned(vp->v_interlock));
436 KASSERT(mutex_owned(&bufcache_lock));
437 KASSERT((bp->b_cflags & BC_BUSY) != 0);
438 KASSERT(!cv_has_waiters(&bp->b_done));
439
440 vholdl(vp);
441 bp->b_vp = vp;
442 if (vp->v_type == VBLK || vp->v_type == VCHR)
443 bp->b_dev = vp->v_rdev;
444 else
445 bp->b_dev = NODEV;
446
447 /*
448 * Insert onto list for new vnode.
449 */
450 bufinsvn(bp, &vp->v_cleanblkhd);
451 bp->b_objlock = vp->v_interlock;
452 }
453
454 /*
455 * Disassociate a buffer from a vnode.
456 */
457 void
brelvp(struct buf * bp)458 brelvp(struct buf *bp)
459 {
460 struct vnode *vp = bp->b_vp;
461
462 KASSERT(vp != NULL);
463 KASSERT(bp->b_objlock == vp->v_interlock);
464 KASSERT(mutex_owned(vp->v_interlock));
465 KASSERT(mutex_owned(&bufcache_lock));
466 KASSERT((bp->b_cflags & BC_BUSY) != 0);
467 KASSERT(!cv_has_waiters(&bp->b_done));
468
469 /*
470 * Delete from old vnode list, if on one.
471 */
472 if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
473 bufremvn(bp);
474
475 if ((vp->v_iflag & (VI_ONWORKLST | VI_PAGES)) == VI_ONWORKLST &&
476 LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
477 vn_syncer_remove_from_worklist(vp);
478
479 bp->b_objlock = &buffer_lock;
480 bp->b_vp = NULL;
481 holdrelel(vp);
482 }
483
484 /*
485 * Reassign a buffer from one vnode list to another.
486 * The list reassignment must be within the same vnode.
487 * Used to assign file specific control information
488 * (indirect blocks) to the list to which they belong.
489 */
490 void
reassignbuf(struct buf * bp,struct vnode * vp)491 reassignbuf(struct buf *bp, struct vnode *vp)
492 {
493 struct buflists *listheadp;
494 int delayx;
495
496 KASSERT(mutex_owned(&bufcache_lock));
497 KASSERT(bp->b_objlock == vp->v_interlock);
498 KASSERT(mutex_owned(vp->v_interlock));
499 KASSERT((bp->b_cflags & BC_BUSY) != 0);
500
501 /*
502 * Delete from old vnode list, if on one.
503 */
504 if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
505 bufremvn(bp);
506
507 /*
508 * If dirty, put on list of dirty buffers;
509 * otherwise insert onto list of clean buffers.
510 */
511 if ((bp->b_oflags & BO_DELWRI) == 0) {
512 listheadp = &vp->v_cleanblkhd;
513 if ((vp->v_iflag & (VI_ONWORKLST | VI_PAGES)) ==
514 VI_ONWORKLST &&
515 LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
516 vn_syncer_remove_from_worklist(vp);
517 } else {
518 listheadp = &vp->v_dirtyblkhd;
519 if ((vp->v_iflag & VI_ONWORKLST) == 0) {
520 switch (vp->v_type) {
521 case VDIR:
522 delayx = dirdelay;
523 break;
524 case VBLK:
525 if (spec_node_getmountedfs(vp) != NULL) {
526 delayx = metadelay;
527 break;
528 }
529 /* fall through */
530 default:
531 delayx = filedelay;
532 break;
533 }
534 if (!vp->v_mount ||
535 (vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
536 vn_syncer_add_to_worklist(vp, delayx);
537 }
538 }
539 bufinsvn(bp, listheadp);
540 }
541
542 /*
543 * Lookup a vnode by device number and return it referenced.
544 */
545 int
vfinddev(dev_t dev,enum vtype type,vnode_t ** vpp)546 vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
547 {
548
549 return (spec_node_lookup_by_dev(type, dev, VDEAD_NOWAIT, vpp) == 0);
550 }
551
552 /*
553 * Revoke all the vnodes corresponding to the specified minor number
554 * range (endpoints inclusive) of the specified major.
555 */
556 void
vdevgone(int maj,int minl,int minh,enum vtype type)557 vdevgone(int maj, int minl, int minh, enum vtype type)
558 {
559 vnode_t *vp;
560 dev_t dev;
561 int mn;
562
563 for (mn = minl; mn <= minh; mn++) {
564 dev = makedev(maj, mn);
565 /*
566 * Notify anyone trying to get at this device that it
567 * has been detached, and then revoke it.
568 */
569 switch (type) {
570 case VBLK:
571 bdev_detached(dev);
572 break;
573 case VCHR:
574 cdev_detached(dev);
575 break;
576 default:
577 panic("invalid specnode type: %d", type);
578 }
579 /*
580 * Passing 0 as flags, instead of VDEAD_NOWAIT, means
581 * spec_node_lookup_by_dev will wait for vnodes it
582 * finds concurrently being revoked before returning.
583 */
584 while (spec_node_lookup_by_dev(type, dev, 0, &vp) == 0) {
585 VOP_REVOKE(vp, REVOKEALL);
586 vrele(vp);
587 }
588 }
589 }
590
591 /*
592 * The filesystem synchronizer mechanism - syncer.
593 *
594 * It is useful to delay writes of file data and filesystem metadata for
595 * a certain amount of time so that quickly created and deleted files need
596 * not waste disk bandwidth being created and removed. To implement this,
597 * vnodes are appended to a "workitem" queue.
598 *
599 * Most pending metadata should not wait for more than ten seconds. Thus,
600 * mounted on block devices are delayed only about a half the time that file
601 * data is delayed. Similarly, directory updates are more critical, so are
602 * only delayed about a third the time that file data is delayed.
603 *
604 * There are SYNCER_MAXDELAY queues that are processed in a round-robin
605 * manner at a rate of one each second (driven off the filesystem syner
606 * thread). The syncer_delayno variable indicates the next queue that is
607 * to be processed. Items that need to be processed soon are placed in
608 * this queue:
609 *
610 * syncer_workitem_pending[syncer_delayno]
611 *
612 * A delay of e.g. fifteen seconds is done by placing the request fifteen
613 * entries later in the queue:
614 *
615 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
616 *
617 * Flag VI_ONWORKLST indicates that vnode is added into the queue.
618 */
619
620 #define SYNCER_MAXDELAY 32
621
622 typedef TAILQ_HEAD(synclist, vnode_impl) synclist_t;
623
624 static void vn_syncer_add1(struct vnode *, int);
625 static void sysctl_vfs_syncfs_setup(struct sysctllog **);
626
627 /*
628 * Defines and variables for the syncer process.
629 */
630 int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */
631 time_t syncdelay = 30; /* max time to delay syncing data */
632 time_t filedelay = 30; /* time to delay syncing files */
633 time_t dirdelay = 15; /* time to delay syncing directories */
634 time_t metadelay = 10; /* time to delay syncing metadata */
635 time_t lockdelay = 1; /* time to delay if locking fails */
636
637 static kmutex_t syncer_data_lock; /* short term lock on data structs */
638
639 static int syncer_delayno = 0;
640 static long syncer_last;
641 static synclist_t * syncer_workitem_pending;
642
643 static void
vn_initialize_syncerd(void)644 vn_initialize_syncerd(void)
645 {
646 int i;
647
648 syncer_last = SYNCER_MAXDELAY + 2;
649
650 sysctl_vfs_syncfs_setup(NULL);
651
652 syncer_workitem_pending =
653 kmem_alloc(syncer_last * sizeof (struct synclist), KM_SLEEP);
654
655 for (i = 0; i < syncer_last; i++)
656 TAILQ_INIT(&syncer_workitem_pending[i]);
657
658 mutex_init(&syncer_data_lock, MUTEX_DEFAULT, IPL_NONE);
659 }
660
661 /*
662 * Return delay factor appropriate for the given file system. For
663 * WAPBL we use the sync vnode to burst out metadata updates: sync
664 * those file systems more frequently.
665 */
666 static inline int
sync_delay(struct mount * mp)667 sync_delay(struct mount *mp)
668 {
669
670 return mp->mnt_wapbl != NULL ? metadelay : syncdelay;
671 }
672
673 /*
674 * Compute the next slot index from delay.
675 */
676 static inline int
sync_delay_slot(int delayx)677 sync_delay_slot(int delayx)
678 {
679
680 if (delayx > syncer_maxdelay - 2)
681 delayx = syncer_maxdelay - 2;
682 return (syncer_delayno + delayx) % syncer_last;
683 }
684
685 /*
686 * Add an item to the syncer work queue.
687 */
688 static void
vn_syncer_add1(struct vnode * vp,int delayx)689 vn_syncer_add1(struct vnode *vp, int delayx)
690 {
691 synclist_t *slp;
692 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
693
694 KASSERT(mutex_owned(&syncer_data_lock));
695
696 if (vp->v_iflag & VI_ONWORKLST) {
697 /*
698 * Remove in order to adjust the position of the vnode.
699 * Note: called from sched_sync(), which will not hold
700 * interlock, therefore we cannot modify v_iflag here.
701 */
702 slp = &syncer_workitem_pending[vip->vi_synclist_slot];
703 TAILQ_REMOVE(slp, vip, vi_synclist);
704 } else {
705 KASSERT(mutex_owned(vp->v_interlock));
706 vp->v_iflag |= VI_ONWORKLST;
707 }
708
709 vip->vi_synclist_slot = sync_delay_slot(delayx);
710
711 slp = &syncer_workitem_pending[vip->vi_synclist_slot];
712 TAILQ_INSERT_TAIL(slp, vip, vi_synclist);
713 }
714
715 void
vn_syncer_add_to_worklist(struct vnode * vp,int delayx)716 vn_syncer_add_to_worklist(struct vnode *vp, int delayx)
717 {
718 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
719
720 KASSERT(mutex_owned(vp->v_interlock));
721
722 mutex_enter(&syncer_data_lock);
723 vn_syncer_add1(vp, delayx);
724 SDT_PROBE3(vfs, syncer, worklist, vnode__add,
725 vp, delayx, vip->vi_synclist_slot);
726 mutex_exit(&syncer_data_lock);
727 }
728
729 /*
730 * Remove an item from the syncer work queue.
731 */
732 void
vn_syncer_remove_from_worklist(struct vnode * vp)733 vn_syncer_remove_from_worklist(struct vnode *vp)
734 {
735 synclist_t *slp;
736 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
737
738 KASSERT(mutex_owned(vp->v_interlock));
739
740 if (vp->v_iflag & VI_ONWORKLST) {
741 mutex_enter(&syncer_data_lock);
742 SDT_PROBE1(vfs, syncer, worklist, vnode__remove, vp);
743 vp->v_iflag &= ~VI_ONWORKLST;
744 slp = &syncer_workitem_pending[vip->vi_synclist_slot];
745 TAILQ_REMOVE(slp, vip, vi_synclist);
746 mutex_exit(&syncer_data_lock);
747 }
748 }
749
750 /*
751 * Add this mount point to the syncer.
752 */
753 void
vfs_syncer_add_to_worklist(struct mount * mp)754 vfs_syncer_add_to_worklist(struct mount *mp)
755 {
756 static int start, incr, next;
757 int vdelay;
758
759 KASSERT(mutex_owned(mp->mnt_updating));
760 KASSERT((mp->mnt_iflag & IMNT_ONWORKLIST) == 0);
761
762 /*
763 * We attempt to scatter the mount points on the list
764 * so that they will go off at evenly distributed times
765 * even if all the filesystems are mounted at once.
766 */
767
768 next += incr;
769 if (next == 0 || next > syncer_maxdelay) {
770 start /= 2;
771 incr /= 2;
772 if (start == 0) {
773 start = syncer_maxdelay / 2;
774 incr = syncer_maxdelay;
775 }
776 next = start;
777 }
778 mp->mnt_iflag |= IMNT_ONWORKLIST;
779 vdelay = sync_delay(mp);
780 mp->mnt_synclist_slot = vdelay > 0 ? next % vdelay : 0;
781 SDT_PROBE3(vfs, syncer, worklist, mount__add,
782 mp, vdelay, mp->mnt_synclist_slot);
783 }
784
785 /*
786 * Remove the mount point from the syncer.
787 */
788 void
vfs_syncer_remove_from_worklist(struct mount * mp)789 vfs_syncer_remove_from_worklist(struct mount *mp)
790 {
791
792 KASSERT(mutex_owned(mp->mnt_updating));
793 KASSERT((mp->mnt_iflag & IMNT_ONWORKLIST) != 0);
794
795 SDT_PROBE1(vfs, syncer, worklist, mount__remove, mp);
796 mp->mnt_iflag &= ~IMNT_ONWORKLIST;
797 }
798
799 /*
800 * Try lazy sync, return true on success.
801 */
802 static bool
lazy_sync_vnode(struct vnode * vp)803 lazy_sync_vnode(struct vnode *vp)
804 {
805 bool synced;
806 int error;
807
808 KASSERT(mutex_owned(&syncer_data_lock));
809
810 synced = false;
811 if ((error = vcache_tryvget(vp)) == 0) {
812 mutex_exit(&syncer_data_lock);
813 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT)) == 0) {
814 synced = true;
815 SDT_PROBE1(vfs, syncer, sync, vnode__start, vp);
816 error = VOP_FSYNC(vp, curlwp->l_cred,
817 FSYNC_LAZY, 0, 0);
818 SDT_PROBE2(vfs, syncer, sync, vnode__done, vp, error);
819 vput(vp);
820 } else {
821 SDT_PROBE2(vfs, syncer, sync, vnode__fail__lock,
822 vp, error);
823 vrele(vp);
824 }
825 mutex_enter(&syncer_data_lock);
826 } else {
827 SDT_PROBE2(vfs, syncer, sync, vnode__fail__vget, vp, error);
828 }
829 return synced;
830 }
831
832 /*
833 * System filesystem synchronizer daemon.
834 */
835 void
sched_sync(void * arg)836 sched_sync(void *arg)
837 {
838 mount_iterator_t *iter;
839 synclist_t *slp;
840 struct vnode_impl *vi;
841 struct vnode *vp;
842 struct mount *mp;
843 time_t starttime, endtime;
844 int vdelay, oslot, nslot, delayx;
845 bool synced;
846 int error;
847
848 for (;;) {
849 starttime = time_second;
850 SDT_PROBE1(vfs, syncer, sync, start, starttime);
851
852 /*
853 * Sync mounts whose dirty time has expired.
854 */
855 mountlist_iterator_init(&iter);
856 while ((mp = mountlist_iterator_trynext(iter)) != NULL) {
857 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0 ||
858 mp->mnt_synclist_slot != syncer_delayno) {
859 SDT_PROBE1(vfs, syncer, sync, mount__skip,
860 mp);
861 continue;
862 }
863
864 vdelay = sync_delay(mp);
865 oslot = mp->mnt_synclist_slot;
866 nslot = sync_delay_slot(vdelay);
867 mp->mnt_synclist_slot = nslot;
868 SDT_PROBE4(vfs, syncer, worklist, mount__update,
869 mp, vdelay, oslot, nslot);
870
871 SDT_PROBE1(vfs, syncer, sync, mount__start, mp);
872 error = VFS_SYNC(mp, MNT_LAZY, curlwp->l_cred);
873 SDT_PROBE2(vfs, syncer, sync, mount__done,
874 mp, error);
875 }
876 mountlist_iterator_destroy(iter);
877
878 mutex_enter(&syncer_data_lock);
879
880 /*
881 * Push files whose dirty time has expired.
882 */
883 slp = &syncer_workitem_pending[syncer_delayno];
884 syncer_delayno += 1;
885 if (syncer_delayno >= syncer_last)
886 syncer_delayno = 0;
887
888 while ((vi = TAILQ_FIRST(slp)) != NULL) {
889 vp = VIMPL_TO_VNODE(vi);
890 synced = lazy_sync_vnode(vp);
891
892 /*
893 * XXX The vnode may have been recycled, in which
894 * case it may have a new identity.
895 */
896 vi = TAILQ_FIRST(slp);
897 if (vi != NULL && VIMPL_TO_VNODE(vi) == vp) {
898 /*
899 * Put us back on the worklist. The worklist
900 * routine will remove us from our current
901 * position and then add us back in at a later
902 * position.
903 *
904 * Try again sooner rather than later if
905 * we were unable to lock the vnode. Lock
906 * failure should not prevent us from doing
907 * the sync "soon".
908 *
909 * If we locked it yet arrive here, it's
910 * likely that lazy sync is in progress and
911 * so the vnode still has dirty metadata.
912 * syncdelay is mainly to get this vnode out
913 * of the way so we do not consider it again
914 * "soon" in this loop, so the delay time is
915 * not critical as long as it is not "soon".
916 * While write-back strategy is the file
917 * system's domain, we expect write-back to
918 * occur no later than syncdelay seconds
919 * into the future.
920 */
921 delayx = synced ? syncdelay : lockdelay;
922 oslot = vi->vi_synclist_slot;
923 vn_syncer_add1(vp, delayx);
924 nslot = vi->vi_synclist_slot;
925 SDT_PROBE4(vfs, syncer, worklist,
926 vnode__update,
927 vp, delayx, oslot, nslot);
928 }
929 }
930
931 endtime = time_second;
932
933 SDT_PROBE2(vfs, syncer, sync, done, starttime, endtime);
934
935 /*
936 * If it has taken us less than a second to process the
937 * current work, then wait. Otherwise start right over
938 * again. We can still lose time if any single round
939 * takes more than two seconds, but it does not really
940 * matter as we are just trying to generally pace the
941 * filesystem activity.
942 */
943 if (endtime == starttime) {
944 kpause("syncer", false, hz, &syncer_data_lock);
945 }
946 mutex_exit(&syncer_data_lock);
947 }
948 }
949
950 static void
sysctl_vfs_syncfs_setup(struct sysctllog ** clog)951 sysctl_vfs_syncfs_setup(struct sysctllog **clog)
952 {
953 const struct sysctlnode *rnode, *cnode;
954
955 sysctl_createv(clog, 0, NULL, &rnode,
956 CTLFLAG_PERMANENT,
957 CTLTYPE_NODE, "sync",
958 SYSCTL_DESCR("syncer options"),
959 NULL, 0, NULL, 0,
960 CTL_VFS, CTL_CREATE, CTL_EOL);
961
962 sysctl_createv(clog, 0, &rnode, &cnode,
963 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
964 CTLTYPE_QUAD, "delay",
965 SYSCTL_DESCR("max time to delay syncing data"),
966 NULL, 0, &syncdelay, 0,
967 CTL_CREATE, CTL_EOL);
968
969 sysctl_createv(clog, 0, &rnode, &cnode,
970 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
971 CTLTYPE_QUAD, "filedelay",
972 SYSCTL_DESCR("time to delay syncing files"),
973 NULL, 0, &filedelay, 0,
974 CTL_CREATE, CTL_EOL);
975
976 sysctl_createv(clog, 0, &rnode, &cnode,
977 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
978 CTLTYPE_QUAD, "dirdelay",
979 SYSCTL_DESCR("time to delay syncing directories"),
980 NULL, 0, &dirdelay, 0,
981 CTL_CREATE, CTL_EOL);
982
983 sysctl_createv(clog, 0, &rnode, &cnode,
984 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
985 CTLTYPE_QUAD, "metadelay",
986 SYSCTL_DESCR("time to delay syncing metadata"),
987 NULL, 0, &metadelay, 0,
988 CTL_CREATE, CTL_EOL);
989 }
990
991 /*
992 * sysctl helper routine to return list of supported fstypes
993 */
994 int
sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)995 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
996 {
997 char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
998 char *where = oldp;
999 struct vfsops *v;
1000 size_t needed, left, slen;
1001 int error, first;
1002
1003 if (newp != NULL)
1004 return SET_ERROR(EPERM);
1005 if (namelen != 0)
1006 return SET_ERROR(EINVAL);
1007
1008 first = 1;
1009 error = 0;
1010 needed = 0;
1011 left = *oldlenp;
1012
1013 sysctl_unlock();
1014 mutex_enter(&vfs_list_lock);
1015 LIST_FOREACH(v, &vfs_list, vfs_list) {
1016 if (where == NULL)
1017 needed += strlen(v->vfs_name) + 1;
1018 else {
1019 memset(bf, 0, sizeof(bf));
1020 if (first) {
1021 strncpy(bf, v->vfs_name, sizeof(bf));
1022 first = 0;
1023 } else {
1024 bf[0] = ' ';
1025 strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
1026 }
1027 bf[sizeof(bf)-1] = '\0';
1028 slen = strlen(bf);
1029 if (left < slen + 1)
1030 break;
1031 v->vfs_refcount++;
1032 mutex_exit(&vfs_list_lock);
1033 /* +1 to copy out the trailing NUL byte */
1034 error = copyout(bf, where, slen + 1);
1035 mutex_enter(&vfs_list_lock);
1036 v->vfs_refcount--;
1037 if (error)
1038 break;
1039 where += slen;
1040 needed += slen;
1041 left -= slen;
1042 }
1043 }
1044 mutex_exit(&vfs_list_lock);
1045 sysctl_relock();
1046 *oldlenp = needed;
1047 return error;
1048 }
1049
1050 int kinfo_vdebug = 1;
1051 int kinfo_vgetfailed;
1052
1053 #define KINFO_VNODESLOP 10
1054
1055 /*
1056 * Dump vnode list (via sysctl).
1057 * Copyout address of vnode followed by vnode.
1058 */
1059 int
sysctl_kern_vnode(SYSCTLFN_ARGS)1060 sysctl_kern_vnode(SYSCTLFN_ARGS)
1061 {
1062 char *where = oldp;
1063 size_t *sizep = oldlenp;
1064 struct mount *mp;
1065 vnode_t *vp, vbuf;
1066 mount_iterator_t *iter;
1067 struct vnode_iterator *marker;
1068 char *bp = where;
1069 char *ewhere;
1070 int error;
1071
1072 if (namelen != 0)
1073 return SET_ERROR(EOPNOTSUPP);
1074 if (newp != NULL)
1075 return SET_ERROR(EPERM);
1076
1077 #define VPTRSZ sizeof(vnode_t *)
1078 #define VNODESZ sizeof(vnode_t)
1079 if (where == NULL) {
1080 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1081 return 0;
1082 }
1083 ewhere = where + *sizep;
1084
1085 sysctl_unlock();
1086 mountlist_iterator_init(&iter);
1087 while ((mp = mountlist_iterator_next(iter)) != NULL) {
1088 vfs_vnode_iterator_init(mp, &marker);
1089 while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) {
1090 if (bp + VPTRSZ + VNODESZ > ewhere) {
1091 vrele(vp);
1092 vfs_vnode_iterator_destroy(marker);
1093 mountlist_iterator_destroy(iter);
1094 sysctl_relock();
1095 *sizep = bp - where;
1096 return SET_ERROR(ENOMEM);
1097 }
1098 memcpy(&vbuf, vp, VNODESZ);
1099 if ((error = copyout(&vp, bp, VPTRSZ)) ||
1100 (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) {
1101 vrele(vp);
1102 vfs_vnode_iterator_destroy(marker);
1103 mountlist_iterator_destroy(iter);
1104 sysctl_relock();
1105 return error;
1106 }
1107 vrele(vp);
1108 bp += VPTRSZ + VNODESZ;
1109 }
1110 vfs_vnode_iterator_destroy(marker);
1111 }
1112 mountlist_iterator_destroy(iter);
1113 sysctl_relock();
1114
1115 *sizep = bp - where;
1116 return 0;
1117 }
1118
1119 /*
1120 * Set vnode attributes to VNOVAL
1121 */
1122 void
vattr_null(struct vattr * vap)1123 vattr_null(struct vattr *vap)
1124 {
1125
1126 memset(vap, 0, sizeof(*vap));
1127
1128 vap->va_type = VNON;
1129
1130 /*
1131 * Assign individually so that it is safe even if size and
1132 * sign of each member are varied.
1133 */
1134 vap->va_mode = VNOVAL;
1135 vap->va_nlink = VNOVAL;
1136 vap->va_uid = VNOVAL;
1137 vap->va_gid = VNOVAL;
1138 vap->va_fsid = VNOVAL;
1139 vap->va_fileid = VNOVAL;
1140 vap->va_size = VNOVAL;
1141 vap->va_blocksize = VNOVAL;
1142 vap->va_atime.tv_sec =
1143 vap->va_mtime.tv_sec =
1144 vap->va_ctime.tv_sec =
1145 vap->va_birthtime.tv_sec = VNOVAL;
1146 vap->va_atime.tv_nsec =
1147 vap->va_mtime.tv_nsec =
1148 vap->va_ctime.tv_nsec =
1149 vap->va_birthtime.tv_nsec = VNOVAL;
1150 vap->va_gen = VNOVAL;
1151 vap->va_flags = VNOVAL;
1152 vap->va_rdev = VNOVAL;
1153 vap->va_bytes = VNOVAL;
1154 }
1155
1156 /*
1157 * Vnode state to string.
1158 */
1159 const char *
vstate_name(enum vnode_state state)1160 vstate_name(enum vnode_state state)
1161 {
1162
1163 switch (state) {
1164 case VS_ACTIVE:
1165 return "ACTIVE";
1166 case VS_MARKER:
1167 return "MARKER";
1168 case VS_LOADING:
1169 return "LOADING";
1170 case VS_LOADED:
1171 return "LOADED";
1172 case VS_BLOCKED:
1173 return "BLOCKED";
1174 case VS_RECLAIMING:
1175 return "RECLAIMING";
1176 case VS_RECLAIMED:
1177 return "RECLAIMED";
1178 default:
1179 return "ILLEGAL";
1180 }
1181 }
1182
1183 /*
1184 * Print a description of a vnode (common part).
1185 */
1186 static void
1187 vprint_common(struct vnode *vp, const char *prefix,
1188 void (*pr)(const char *, ...) __printflike(1, 2))
1189 {
1190 int n;
1191 char bf[96];
1192 const uint8_t *cp;
1193 vnode_impl_t *vip;
1194 const char * const vnode_tags[] = { VNODE_TAGS };
1195 const char * const vnode_types[] = { VNODE_TYPES };
1196 const char vnode_flagbits[] = VNODE_FLAGBITS;
1197
1198 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
1199 #define ARRAY_PRINT(idx, arr) \
1200 ((unsigned int)(idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
1201
1202 vip = VNODE_TO_VIMPL(vp);
1203
1204 snprintb(bf, sizeof(bf),
1205 vnode_flagbits, vp->v_iflag | vp->v_vflag | vp->v_uflag);
1206
1207 (*pr)("vnode %p flags %s\n", vp, bf);
1208 (*pr)("%stag %s(%d) type %s(%d) mount %p typedata %p\n", prefix,
1209 ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
1210 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
1211 vp->v_mount, vp->v_mountedhere);
1212 (*pr)("%susecount %d writecount %d holdcount %d\n", prefix,
1213 vrefcnt(vp), vp->v_writecount, vp->v_holdcnt);
1214 (*pr)("%ssize %" PRIx64 " writesize %" PRIx64 " numoutput %d\n",
1215 prefix, vp->v_size, vp->v_writesize, vp->v_numoutput);
1216 (*pr)("%sdata %p lock %p\n", prefix, vp->v_data, &vip->vi_lock);
1217
1218 (*pr)("%sstate %s key(%p %zd)", prefix, vstate_name(vip->vi_state),
1219 vip->vi_key.vk_mount, vip->vi_key.vk_key_len);
1220 n = vip->vi_key.vk_key_len;
1221 cp = vip->vi_key.vk_key;
1222 while (n-- > 0)
1223 (*pr)(" %02x", *cp++);
1224 (*pr)("\n");
1225 (*pr)("%slrulisthd %p\n", prefix, vip->vi_lrulisthd);
1226
1227 #undef ARRAY_PRINT
1228 #undef ARRAY_SIZE
1229 }
1230
1231 /*
1232 * Print out a description of a vnode.
1233 */
1234 void
vprint(const char * label,struct vnode * vp)1235 vprint(const char *label, struct vnode *vp)
1236 {
1237
1238 if (label != NULL)
1239 printf("%s: ", label);
1240 vprint_common(vp, "\t", printf);
1241 if (vp->v_data != NULL) {
1242 printf("\t");
1243 VOP_PRINT(vp);
1244 }
1245 }
1246
1247 /*
1248 * Given a file system name, look up the vfsops for that
1249 * file system, or return NULL if file system isn't present
1250 * in the kernel.
1251 */
1252 struct vfsops *
vfs_getopsbyname(const char * name)1253 vfs_getopsbyname(const char *name)
1254 {
1255 struct vfsops *v;
1256
1257 mutex_enter(&vfs_list_lock);
1258 LIST_FOREACH(v, &vfs_list, vfs_list) {
1259 if (strcmp(v->vfs_name, name) == 0)
1260 break;
1261 }
1262 if (v != NULL)
1263 v->vfs_refcount++;
1264 mutex_exit(&vfs_list_lock);
1265
1266 return v;
1267 }
1268
1269 void
copy_statvfs_info(struct statvfs * sbp,const struct mount * mp)1270 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
1271 {
1272 const struct statvfs *mbp;
1273
1274 if (sbp == (mbp = &mp->mnt_stat))
1275 return;
1276
1277 (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
1278 sbp->f_fsid = mbp->f_fsid;
1279 sbp->f_owner = mbp->f_owner;
1280 sbp->f_flag = mbp->f_flag;
1281 sbp->f_syncwrites = mbp->f_syncwrites;
1282 sbp->f_asyncwrites = mbp->f_asyncwrites;
1283 sbp->f_syncreads = mbp->f_syncreads;
1284 sbp->f_asyncreads = mbp->f_asyncreads;
1285 (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
1286 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
1287 sizeof(sbp->f_fstypename));
1288 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
1289 sizeof(sbp->f_mntonname));
1290 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
1291 sizeof(sbp->f_mntfromname));
1292 (void)memcpy(sbp->f_mntfromlabel, mp->mnt_stat.f_mntfromlabel,
1293 sizeof(sbp->f_mntfromlabel));
1294 sbp->f_namemax = mbp->f_namemax;
1295 }
1296
1297 int
set_statvfs_info(const char * onp,int ukon,const char * fromp,int ukfrom,const char * vfsname,struct mount * mp,struct lwp * l)1298 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
1299 const char *vfsname, struct mount *mp, struct lwp *l)
1300 {
1301 int error;
1302 size_t size;
1303 struct statvfs *sfs = &mp->mnt_stat;
1304 int (*fun)(const void *, void *, size_t, size_t *);
1305
1306 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsname,
1307 sizeof(mp->mnt_stat.f_fstypename));
1308
1309 if (onp) {
1310 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1311 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
1312 if (cwdi->cwdi_rdir != NULL) {
1313 size_t len;
1314 char *bp;
1315 char *path = PNBUF_GET();
1316
1317 bp = path + MAXPATHLEN;
1318 *--bp = '\0';
1319 rw_enter(&cwdi->cwdi_lock, RW_READER);
1320 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
1321 path, MAXPATHLEN / 2, 0, l);
1322 rw_exit(&cwdi->cwdi_lock);
1323 if (error) {
1324 PNBUF_PUT(path);
1325 return error;
1326 }
1327
1328 len = strlen(bp);
1329 if (len > sizeof(sfs->f_mntonname) - 1)
1330 len = sizeof(sfs->f_mntonname) - 1;
1331 (void)strncpy(sfs->f_mntonname, bp, len);
1332 PNBUF_PUT(path);
1333
1334 if (len < sizeof(sfs->f_mntonname) - 1) {
1335 error = (*fun)(onp, &sfs->f_mntonname[len],
1336 sizeof(sfs->f_mntonname) - len - 1, &size);
1337 if (error)
1338 return error;
1339 size += len;
1340 } else {
1341 size = len;
1342 }
1343 } else {
1344 error = (*fun)(onp, &sfs->f_mntonname,
1345 sizeof(sfs->f_mntonname) - 1, &size);
1346 if (error)
1347 return error;
1348 }
1349 (void)memset(sfs->f_mntonname + size, 0,
1350 sizeof(sfs->f_mntonname) - size);
1351 }
1352
1353 if (fromp) {
1354 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
1355 error = (*fun)(fromp, sfs->f_mntfromname,
1356 sizeof(sfs->f_mntfromname) - 1, &size);
1357 if (error)
1358 return error;
1359 (void)memset(sfs->f_mntfromname + size, 0,
1360 sizeof(sfs->f_mntfromname) - size);
1361 }
1362 return 0;
1363 }
1364
1365 /*
1366 * Knob to control the precision of file timestamps:
1367 *
1368 * 0 = seconds only; nanoseconds zeroed.
1369 * 1 = seconds and nanoseconds, accurate within 1/HZ.
1370 * 2 = seconds and nanoseconds, truncated to microseconds.
1371 * >=3 = seconds and nanoseconds, maximum precision.
1372 */
1373 enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC };
1374
1375 int vfs_timestamp_precision __read_mostly = TSP_NSEC;
1376
1377 void
vfs_timestamp(struct timespec * tsp)1378 vfs_timestamp(struct timespec *tsp)
1379 {
1380 struct timeval tv;
1381
1382 switch (vfs_timestamp_precision) {
1383 case TSP_SEC:
1384 tsp->tv_sec = time_second;
1385 tsp->tv_nsec = 0;
1386 break;
1387 case TSP_HZ:
1388 getnanotime(tsp);
1389 break;
1390 case TSP_USEC:
1391 microtime(&tv);
1392 TIMEVAL_TO_TIMESPEC(&tv, tsp);
1393 break;
1394 case TSP_NSEC:
1395 default:
1396 nanotime(tsp);
1397 break;
1398 }
1399 }
1400
1401 /*
1402 * The purpose of this routine is to remove granularity from accmode_t,
1403 * reducing it into standard unix access bits - VEXEC, VREAD, VWRITE,
1404 * VADMIN and VAPPEND.
1405 *
1406 * If it returns 0, the caller is supposed to continue with the usual
1407 * access checks using 'accmode' as modified by this routine. If it
1408 * returns nonzero value, the caller is supposed to return that value
1409 * as errno.
1410 *
1411 * Note that after this routine runs, accmode may be zero.
1412 */
1413 int
vfs_unixify_accmode(accmode_t * accmode)1414 vfs_unixify_accmode(accmode_t *accmode)
1415 {
1416
1417 /*
1418 * There is no way to specify explicit "deny" rule using
1419 * file mode or POSIX.1e ACLs.
1420 */
1421 if (*accmode & VEXPLICIT_DENY) {
1422 *accmode = 0;
1423 return 0;
1424 }
1425
1426 /*
1427 * None of these can be translated into usual access bits.
1428 * Also, the common case for NFSv4 ACLs is to not contain
1429 * either of these bits. Caller should check for VWRITE
1430 * on the containing directory instead.
1431 */
1432 if (*accmode & (VDELETE_CHILD | VDELETE))
1433 return SET_ERROR(EPERM);
1434
1435 if (*accmode & VADMIN_PERMS) {
1436 *accmode &= ~VADMIN_PERMS;
1437 *accmode |= VADMIN;
1438 }
1439
1440 /*
1441 * There is no way to deny VREAD_ATTRIBUTES, VREAD_ACL
1442 * or VSYNCHRONIZE using file mode or POSIX.1e ACL.
1443 */
1444 *accmode &= ~(VSTAT_PERMS | VSYNCHRONIZE);
1445
1446 return 0;
1447 }
1448
1449 time_t rootfstime; /* recorded root fs time, if known */
1450 void
setrootfstime(time_t t)1451 setrootfstime(time_t t)
1452 {
1453
1454 rootfstime = t;
1455 }
1456
1457 static const uint8_t vttodt_tab[] = {
1458 [VNON] = DT_UNKNOWN,
1459 [VREG] = DT_REG,
1460 [VDIR] = DT_DIR,
1461 [VBLK] = DT_BLK,
1462 [VCHR] = DT_CHR,
1463 [VLNK] = DT_LNK,
1464 [VSOCK] = DT_SOCK,
1465 [VFIFO] = DT_FIFO,
1466 [VBAD] = DT_UNKNOWN
1467 };
1468
1469 uint8_t
vtype2dt(enum vtype vt)1470 vtype2dt(enum vtype vt)
1471 {
1472
1473 CTASSERT(VBAD == __arraycount(vttodt_tab) - 1);
1474 return vttodt_tab[vt];
1475 }
1476
1477 int
VFS_MOUNT(struct mount * mp,const char * a,void * b,size_t * c)1478 VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c)
1479 {
1480 int mpsafe = mp->mnt_iflag & IMNT_MPSAFE;
1481 int error;
1482
1483 /*
1484 * Note: The first time through, the vfs_mount function may set
1485 * IMNT_MPSAFE, so we have to cache it on entry in order to
1486 * avoid leaking a kernel lock.
1487 *
1488 * XXX Maybe the MPSAFE bit should be set in struct vfsops and
1489 * not in struct mount.
1490 */
1491 if (mpsafe) {
1492 KERNEL_LOCK(1, NULL);
1493 }
1494 error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c);
1495 if (mpsafe) {
1496 KERNEL_UNLOCK_ONE(NULL);
1497 }
1498
1499 return error;
1500 }
1501
1502 int
VFS_START(struct mount * mp,int a)1503 VFS_START(struct mount *mp, int a)
1504 {
1505 int error;
1506
1507 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1508 KERNEL_LOCK(1, NULL);
1509 }
1510 error = (*(mp->mnt_op->vfs_start))(mp, a);
1511 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1512 KERNEL_UNLOCK_ONE(NULL);
1513 }
1514
1515 return error;
1516 }
1517
1518 int
VFS_UNMOUNT(struct mount * mp,int a)1519 VFS_UNMOUNT(struct mount *mp, int a)
1520 {
1521 int error;
1522
1523 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1524 KERNEL_LOCK(1, NULL);
1525 }
1526 error = (*(mp->mnt_op->vfs_unmount))(mp, a);
1527 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1528 KERNEL_UNLOCK_ONE(NULL);
1529 }
1530
1531 return error;
1532 }
1533
1534 int
VFS_ROOT(struct mount * mp,int lktype,struct vnode ** a)1535 VFS_ROOT(struct mount *mp, int lktype, struct vnode **a)
1536 {
1537 int error;
1538
1539 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1540 KERNEL_LOCK(1, NULL);
1541 }
1542 error = (*(mp->mnt_op->vfs_root))(mp, lktype, a);
1543 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1544 KERNEL_UNLOCK_ONE(NULL);
1545 }
1546
1547 return error;
1548 }
1549
1550 int
VFS_QUOTACTL(struct mount * mp,struct quotactl_args * args)1551 VFS_QUOTACTL(struct mount *mp, struct quotactl_args *args)
1552 {
1553 int error;
1554
1555 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1556 KERNEL_LOCK(1, NULL);
1557 }
1558 error = (*(mp->mnt_op->vfs_quotactl))(mp, args);
1559 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1560 KERNEL_UNLOCK_ONE(NULL);
1561 }
1562
1563 return error;
1564 }
1565
1566 int
VFS_STATVFS(struct mount * mp,struct statvfs * a)1567 VFS_STATVFS(struct mount *mp, struct statvfs *a)
1568 {
1569 int error;
1570
1571 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1572 KERNEL_LOCK(1, NULL);
1573 }
1574 error = (*(mp->mnt_op->vfs_statvfs))(mp, a);
1575 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1576 KERNEL_UNLOCK_ONE(NULL);
1577 }
1578
1579 return error;
1580 }
1581
1582 int
VFS_SYNC(struct mount * mp,int a,struct kauth_cred * b)1583 VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b)
1584 {
1585 int error;
1586
1587 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1588 KERNEL_LOCK(1, NULL);
1589 }
1590 error = (*(mp->mnt_op->vfs_sync))(mp, a, b);
1591 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1592 KERNEL_UNLOCK_ONE(NULL);
1593 }
1594
1595 return error;
1596 }
1597
1598 int
VFS_FHTOVP(struct mount * mp,struct fid * a,int b,struct vnode ** c)1599 VFS_FHTOVP(struct mount *mp, struct fid *a, int b, struct vnode **c)
1600 {
1601 int error;
1602
1603 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1604 KERNEL_LOCK(1, NULL);
1605 }
1606 error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b, c);
1607 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1608 KERNEL_UNLOCK_ONE(NULL);
1609 }
1610
1611 return error;
1612 }
1613
1614 int
VFS_VPTOFH(struct vnode * vp,struct fid * a,size_t * b)1615 VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b)
1616 {
1617 int error;
1618
1619 if ((vp->v_vflag & VV_MPSAFE) == 0) {
1620 KERNEL_LOCK(1, NULL);
1621 }
1622 error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b);
1623 if ((vp->v_vflag & VV_MPSAFE) == 0) {
1624 KERNEL_UNLOCK_ONE(NULL);
1625 }
1626
1627 return error;
1628 }
1629
1630 int
VFS_SNAPSHOT(struct mount * mp,struct vnode * a,struct timespec * b)1631 VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b)
1632 {
1633 int error;
1634
1635 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1636 KERNEL_LOCK(1, NULL);
1637 }
1638 error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b);
1639 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1640 KERNEL_UNLOCK_ONE(NULL);
1641 }
1642
1643 return error;
1644 }
1645
1646 int
VFS_EXTATTRCTL(struct mount * mp,int a,struct vnode * b,int c,const char * d)1647 VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d)
1648 {
1649 int error;
1650
1651 KERNEL_LOCK(1, NULL); /* XXXSMP check ffs */
1652 error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d);
1653 KERNEL_UNLOCK_ONE(NULL); /* XXX */
1654
1655 return error;
1656 }
1657
1658 int
VFS_SUSPENDCTL(struct mount * mp,int a)1659 VFS_SUSPENDCTL(struct mount *mp, int a)
1660 {
1661 int error;
1662
1663 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1664 KERNEL_LOCK(1, NULL);
1665 }
1666 error = (*(mp->mnt_op->vfs_suspendctl))(mp, a);
1667 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1668 KERNEL_UNLOCK_ONE(NULL);
1669 }
1670
1671 return error;
1672 }
1673
1674 #if defined(DDB) || defined(DEBUGPRINT)
1675 static const char buf_flagbits[] = BUF_FLAGBITS;
1676
1677 void
vfs_buf_print(struct buf * bp,int full,void (* pr)(const char *,...))1678 vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
1679 {
1680 char bf[1024];
1681
1682 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%"
1683 PRIx64 " dev 0x%x\n",
1684 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev);
1685
1686 snprintb(bf, sizeof(bf),
1687 buf_flagbits, bp->b_flags | bp->b_oflags | bp->b_cflags);
1688 (*pr)(" error %d flags %s\n", bp->b_error, bf);
1689
1690 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
1691 bp->b_bufsize, bp->b_bcount, bp->b_resid);
1692 (*pr)(" data %p saveaddr %p\n",
1693 bp->b_data, bp->b_saveaddr);
1694 (*pr)(" iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock);
1695 }
1696
1697 void
vfs_vnode_print(struct vnode * vp,int full,void (* pr)(const char *,...))1698 vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
1699 {
1700
1701 uvm_object_printit(&vp->v_uobj, full, pr);
1702 (*pr)("\n");
1703 vprint_common(vp, "", pr);
1704 if (full) {
1705 struct buf *bp;
1706
1707 (*pr)("clean bufs:\n");
1708 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
1709 (*pr)(" bp %p\n", bp);
1710 vfs_buf_print(bp, full, pr);
1711 }
1712
1713 (*pr)("dirty bufs:\n");
1714 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
1715 (*pr)(" bp %p\n", bp);
1716 vfs_buf_print(bp, full, pr);
1717 }
1718 }
1719 }
1720
1721 void
vfs_vnode_lock_print(void * vlock,int full,void (* pr)(const char *,...))1722 vfs_vnode_lock_print(void *vlock, int full, void (*pr)(const char *, ...))
1723 {
1724 struct mount *mp;
1725 vnode_impl_t *vip;
1726
1727 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) {
1728 TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
1729 if (&vip->vi_lock == vlock ||
1730 VIMPL_TO_VNODE(vip)->v_interlock == vlock)
1731 vfs_vnode_print(VIMPL_TO_VNODE(vip), full, pr);
1732 }
1733 }
1734 }
1735
1736 void
vfs_mount_print_all(int full,void (* pr)(const char *,...))1737 vfs_mount_print_all(int full, void (*pr)(const char *, ...))
1738 {
1739 struct mount *mp;
1740 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp))
1741 vfs_mount_print(mp, full, pr);
1742 }
1743
1744 void
vfs_mount_print(struct mount * mp,int full,void (* pr)(const char *,...))1745 vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
1746 {
1747 char sbuf[256];
1748
1749 (*pr)("vnodecovered = %p data = %p\n",
1750 mp->mnt_vnodecovered, mp->mnt_data);
1751
1752 (*pr)("fs_bshift %d dev_bshift = %d\n",
1753 mp->mnt_fs_bshift, mp->mnt_dev_bshift);
1754
1755 snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag);
1756 (*pr)("flag = %s\n", sbuf);
1757
1758 snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag);
1759 (*pr)("iflag = %s\n", sbuf);
1760
1761 (*pr)("refcnt = %d updating @ %p\n", mp->mnt_refcnt, mp->mnt_updating);
1762
1763 (*pr)("statvfs cache:\n");
1764 (*pr)("\tbsize = %lu\n", mp->mnt_stat.f_bsize);
1765 (*pr)("\tfrsize = %lu\n", mp->mnt_stat.f_frsize);
1766 (*pr)("\tiosize = %lu\n", mp->mnt_stat.f_iosize);
1767
1768 (*pr)("\tblocks = %"PRIu64"\n", mp->mnt_stat.f_blocks);
1769 (*pr)("\tbfree = %"PRIu64"\n", mp->mnt_stat.f_bfree);
1770 (*pr)("\tbavail = %"PRIu64"\n", mp->mnt_stat.f_bavail);
1771 (*pr)("\tbresvd = %"PRIu64"\n", mp->mnt_stat.f_bresvd);
1772
1773 (*pr)("\tfiles = %"PRIu64"\n", mp->mnt_stat.f_files);
1774 (*pr)("\tffree = %"PRIu64"\n", mp->mnt_stat.f_ffree);
1775 (*pr)("\tfavail = %"PRIu64"\n", mp->mnt_stat.f_favail);
1776 (*pr)("\tfresvd = %"PRIu64"\n", mp->mnt_stat.f_fresvd);
1777
1778 (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
1779 mp->mnt_stat.f_fsidx.__fsid_val[0],
1780 mp->mnt_stat.f_fsidx.__fsid_val[1]);
1781
1782 (*pr)("\towner = %"PRIu32"\n", mp->mnt_stat.f_owner);
1783 (*pr)("\tnamemax = %lu\n", mp->mnt_stat.f_namemax);
1784
1785 snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag);
1786
1787 (*pr)("\tflag = %s\n", sbuf);
1788 (*pr)("\tsyncwrites = %" PRIu64 "\n", mp->mnt_stat.f_syncwrites);
1789 (*pr)("\tasyncwrites = %" PRIu64 "\n", mp->mnt_stat.f_asyncwrites);
1790 (*pr)("\tsyncreads = %" PRIu64 "\n", mp->mnt_stat.f_syncreads);
1791 (*pr)("\tasyncreads = %" PRIu64 "\n", mp->mnt_stat.f_asyncreads);
1792 (*pr)("\tfstypename = %s\n", mp->mnt_stat.f_fstypename);
1793 (*pr)("\tmntonname = %s\n", mp->mnt_stat.f_mntonname);
1794 (*pr)("\tmntfromname = %s\n", mp->mnt_stat.f_mntfromname);
1795
1796 {
1797 int cnt = 0;
1798 vnode_t *vp;
1799 vnode_impl_t *vip;
1800 (*pr)("locked vnodes =");
1801 TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
1802 vp = VIMPL_TO_VNODE(vip);
1803 if (VOP_ISLOCKED(vp)) {
1804 if ((++cnt % 6) == 0) {
1805 (*pr)(" %p,\n\t", vp);
1806 } else {
1807 (*pr)(" %p,", vp);
1808 }
1809 }
1810 }
1811 (*pr)("\n");
1812 }
1813
1814 if (full) {
1815 int cnt = 0;
1816 vnode_t *vp;
1817 vnode_impl_t *vip;
1818
1819 (*pr)("all vnodes =");
1820 TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
1821 vp = VIMPL_TO_VNODE(vip);
1822 if (!TAILQ_NEXT(vip, vi_mntvnodes)) {
1823 (*pr)(" %p", vp);
1824 } else if ((++cnt % 6) == 0) {
1825 (*pr)(" %p,\n\t", vp);
1826 } else {
1827 (*pr)(" %p,", vp);
1828 }
1829 }
1830 (*pr)("\n");
1831 }
1832 }
1833
1834 /*
1835 * List all of the locked vnodes in the system.
1836 */
1837 void printlockedvnodes(void);
1838
1839 void
printlockedvnodes(void)1840 printlockedvnodes(void)
1841 {
1842 struct mount *mp;
1843 vnode_t *vp;
1844 vnode_impl_t *vip;
1845
1846 printf("Locked vnodes\n");
1847 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) {
1848 TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
1849 vp = VIMPL_TO_VNODE(vip);
1850 if (VOP_ISLOCKED(vp))
1851 vprint(NULL, vp);
1852 }
1853 }
1854 }
1855
1856 #endif /* DDB || DEBUGPRINT */
1857