1 /*	$OpenBSD: vnode.h,v 1.61 2005/05/26 00:33:45 pedro Exp $	*/
2 /*	$NetBSD: vnode.h,v 1.38 1996/02/29 20:59:05 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)vnode.h	8.11 (Berkeley) 11/21/94
33  */
34 
35 #include <sys/types.h>
36 #include <sys/queue.h>
37 #include <sys/lock.h>
38 #include <sys/select.h>
39 
40 /* XXX: clean up includes later */
41 #include <uvm/uvm_pglist.h>	/* XXX */
42 #include <sys/lock.h>		/* XXX */
43 #include <uvm/uvm.h>		/* XXX */
44 #include <uvm/uvm_vnode.h>	/* XXX */
45 
46 /*
47  * The vnode is the focus of all file activity in UNIX.  There is a
48  * unique vnode allocated for each active file, each current directory,
49  * each mounted-on file, text file, and the root.
50  */
51 
52 /*
53  * Vnode types.  VNON means no type.
54  */
55 enum vtype	{ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD };
56 
57 /*
58  * Vnode tag types.
59  * These are for the benefit of external programs only (e.g., pstat)
60  * and should NEVER be inspected by the kernel.
61  *
62  * Note that v_tag is actually used to tell MFS from FFS, and EXT2FS from
63  * the rest, so don't believe the above comment!
64  */
65 enum vtagtype	{
66 	VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_MSDOSFS, VT_LFS, VT_LOFS, VT_FDESC,
67 	VT_PORTAL, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS, VT_ADOSFS, VT_EXT2FS,
68 	VT_NCPFS, VT_VFS, VT_XFS, VT_NTFS, VT_UDF
69 };
70 
71 /*
72  * Each underlying filesystem allocates its own private area and hangs
73  * it from v_data.  If non-null, this area is freed in getnewvnode().
74  */
75 LIST_HEAD(buflists, buf);
76 
77 /*
78  * Reading or writing any of these items requires holding the appropriate lock.
79  * v_freelist is locked by the global vnode_free_list simple lock.
80  * v_mntvnodes is locked by the global mntvnodes simple lock.
81  * v_flag, v_usecount, v_holdcount and v_writecount are
82  *    locked by the v_interlock simple lock.
83  */
84 
85 struct vnode {
86 	struct uvm_vnode v_uvm;			/* uvm data */
87 	int	(**v_op)(void *);		/* vnode operations vector */
88 	enum	vtype v_type;			/* vnode type */
89 	u_int	v_flag;				/* vnode flags (see below) */
90 	u_int   v_usecount;			/* reference count of users */
91 	/* reference count of writers */
92 	u_int   v_writecount;
93 	/* Flags that can be read/written in interrupts */
94 	u_int   v_bioflag;
95 	u_int   v_holdcnt;			/* buffer references */
96 	u_int   v_id;				/* capability identifier */
97 	struct	mount *v_mount;			/* ptr to vfs we are in */
98 	TAILQ_ENTRY(vnode) v_freelist;		/* vnode freelist */
99 	LIST_ENTRY(vnode) v_mntvnodes;		/* vnodes for mount point */
100 	struct	buflists v_cleanblkhd;		/* clean blocklist head */
101 	struct	buflists v_dirtyblkhd;		/* dirty blocklist head */
102 	u_int   v_numoutput;			/* num of writes in progress */
103 	LIST_ENTRY(vnode) v_synclist;		/* vnode with dirty buffers */
104 	union {
105 		struct mount	*vu_mountedhere;/* ptr to mounted vfs (VDIR) */
106 		struct socket	*vu_socket;	/* unix ipc (VSOCK) */
107 		struct specinfo	*vu_specinfo;	/* device (VCHR, VBLK) */
108 		struct fifoinfo	*vu_fifoinfo;	/* fifo (VFIFO) */
109 	} v_un;
110 
111 	struct  simplelock v_interlock;		/* lock on usecount and flag */
112 	struct  lock *v_vnlock;			/* used for non-locking fs's */
113 	enum	vtagtype v_tag;			/* type of underlying data */
114 	void	*v_data;			/* private data for fs */
115 	struct {
116 		struct	simplelock vsi_lock;	/* lock to protect below */
117 		struct	selinfo vsi_selinfo;	/* identity of poller(s) */
118 	} v_selectinfo;
119 };
120 #define	v_mountedhere	v_un.vu_mountedhere
121 #define	v_socket	v_un.vu_socket
122 #define	v_specinfo	v_un.vu_specinfo
123 #define	v_fifoinfo	v_un.vu_fifoinfo
124 
125 /*
126  * Vnode flags.
127  */
128 #define	VROOT		0x0001	/* root of its file system */
129 #define	VTEXT		0x0002	/* vnode is a pure text prototype */
130 #define	VSYSTEM		0x0004	/* vnode being used by kernel */
131 #define	VISTTY		0x0008	/* vnode represents a tty */
132 #define	VXLOCK		0x0100	/* vnode is locked to change underlying type */
133 #define	VXWANT		0x0200	/* process is waiting for vnode */
134 #define	VALIASED	0x0800	/* vnode has an alias */
135 #define VLOCKSWORK	0x4000	/* FS supports locking discipline */
136 
137 /*
138  * (v_bioflag) Flags that may be manipulated by interrupt handlers
139  */
140 #define	VBIOWAIT	0x0001	/* waiting for output to complete */
141 #define VBIOONSYNCLIST	0x0002	/* Vnode is on syncer worklist */
142 #define VBIOONFREELIST  0x0004  /* Vnode is on a free list */
143 
144 /*
145  * Vnode attributes.  A field value of VNOVAL represents a field whose value
146  * is unavailable (getattr) or which is not to be changed (setattr).
147  */
148 struct vattr {
149 	enum vtype	va_type;	/* vnode type (for create) */
150 	mode_t		va_mode;	/* files access mode and type */
151 	nlink_t		va_nlink;	/* number of references to file */
152 	uid_t		va_uid;		/* owner user id */
153 	gid_t		va_gid;		/* owner group id */
154 	long		va_fsid;	/* file system id (dev for now) */
155 	long		va_fileid;	/* file id */
156 	u_quad_t	va_size;	/* file size in bytes */
157 	long		va_blocksize;	/* blocksize preferred for i/o */
158 	struct timespec	va_atime;	/* time of last access */
159 	struct timespec	va_mtime;	/* time of last modification */
160 	struct timespec	va_ctime;	/* time file changed */
161 	u_long		va_gen;		/* generation number of file */
162 	u_long		va_flags;	/* flags defined for file */
163 	dev_t		va_rdev;	/* device the special file represents */
164 	u_quad_t	va_bytes;	/* bytes of disk space held by file */
165 	u_quad_t	va_filerev;	/* file modification number */
166 	u_int		va_vaflags;	/* operations flags, see below */
167 	long		va_spare;	/* remain quad aligned */
168 };
169 
170 /*
171  * Flags for va_cflags.
172  */
173 #define	VA_UTIMES_NULL	0x01		/* utimes argument was NULL */
174 #define VA_EXCLUSIVE    0x02		/* exclusive create request */
175 /*
176  * Flags for ioflag.
177  */
178 #define	IO_UNIT		0x01		/* do I/O as atomic unit */
179 #define	IO_APPEND	0x02		/* append write to end */
180 #define	IO_SYNC		0x04		/* do I/O synchronously */
181 #define	IO_NODELOCKED	0x08		/* underlying node already locked */
182 #define	IO_NDELAY	0x10		/* FNDELAY flag set in file table */
183 #define	IO_NOLIMIT	0x20		/* don't enforce limits on i/o */
184 
185 /*
186  *  Modes.  Some values same as Ixxx entries from inode.h for now.
187  */
188 #define	VSUID	04000		/* set user id on execution */
189 #define	VSGID	02000		/* set group id on execution */
190 #define	VSVTX	01000		/* save swapped text even after use */
191 #define	VREAD	00400		/* read, write, execute permissions */
192 #define	VWRITE	00200
193 #define	VEXEC	00100
194 
195 /*
196  * Token indicating no attribute value yet assigned.
197  */
198 #define	VNOVAL	(-1)
199 
200 /*
201  * Structure returned by the KERN_VNODE sysctl
202  */
203 struct e_vnode {
204 	struct vnode *vptr;
205 	struct vnode vnode;
206 };
207 
208 #ifdef _KERNEL
209 /*
210  * Convert between vnode types and inode formats (since POSIX.1
211  * defines mode word of stat structure in terms of inode formats).
212  */
213 extern enum vtype	iftovt_tab[];
214 extern int		vttoif_tab[];
215 #define IFTOVT(mode)	(iftovt_tab[((mode) & S_IFMT) >> 12])
216 #define VTTOIF(indx)	(vttoif_tab[(int)(indx)])
217 #define MAKEIMODE(indx, mode)	(int)(VTTOIF(indx) | (mode))
218 
219 /*
220  * Flags to various vnode functions.
221  */
222 #define	SKIPSYSTEM	0x0001		/* vflush: skip vnodes marked VSYSTEM */
223 #define	FORCECLOSE	0x0002		/* vflush: force file closeure */
224 #define	WRITECLOSE	0x0004		/* vflush: only close writeable files */
225 #define	DOCLOSE		0x0008		/* vclean: close active files */
226 #define	V_SAVE		0x0001		/* vinvalbuf: sync file first */
227 #define	V_SAVEMETA	0x0002		/* vinvalbuf: leave indirect blocks */
228 
229 #define REVOKEALL	0x0001		/* vop_reovke: revoke all aliases */
230 
231 
232 TAILQ_HEAD(freelst, vnode);
233 extern struct freelst vnode_hold_list;	/* free vnodes referencing buffers */
234 extern struct freelst vnode_free_list;	/* vnode free list */
235 extern struct simplelock vnode_free_list_slock;
236 
237 #ifdef DIAGNOSTIC
238 #define	VATTR_NULL(vap)	vattr_null(vap)
239 
240 #define	VREF(vp)	vref(vp)
241 void	vref(struct vnode *);
242 #else
243 #define	VATTR_NULL(vap)	(*(vap) = va_null)	/* initialize a vattr */
244 
245 static __inline void vref(struct vnode *);
246 #define	VREF(vp)	vref(vp)		/* increase reference */
247 static __inline void
vref(vp)248 vref(vp)
249 	struct vnode *vp;
250 {
251 	simple_lock(&vp->v_interlock);
252 	vp->v_usecount++;
253 	simple_unlock(&vp->v_interlock);
254 }
255 #endif /* DIAGNOSTIC */
256 
257 #define	NULLVP	((struct vnode *)NULL)
258 
259 /*
260  * Global vnode data.
261  */
262 extern	struct vnode *rootvnode;	/* root (i.e. "/") vnode */
263 extern	int desiredvnodes;		/* number of vnodes desired */
264 extern	time_t syncdelay;		/* time to delay syncing vnodes */
265 extern	int rushjob;			/* # of slots syncer should run ASAP */
266 extern	struct vattr va_null;		/* predefined null vattr structure */
267 
268 /*
269  * Macro/function to check for client cache inconsistency w.r.t. leasing.
270  */
271 #define	LEASE_READ	0x1		/* Check lease for readers */
272 #define	LEASE_WRITE	0x2		/* Check lease for modifiers */
273 
274 #endif /* _KERNEL */
275 
276 
277 /*
278  * Mods for exensibility.
279  */
280 
281 /*
282  * Flags for vdesc_flags:
283  */
284 #define VDESC_MAX_VPS		16
285 /* Low order 16 flag bits are reserved for willrele flags for vp arguments. */
286 #define VDESC_VP0_WILLRELE      0x00000001
287 #define VDESC_VP1_WILLRELE      0x00000002
288 #define VDESC_VP2_WILLRELE      0x00000004
289 #define VDESC_VP3_WILLRELE      0x00000008
290 #define VDESC_VP0_WILLUNLOCK    0x00000100
291 #define VDESC_VP1_WILLUNLOCK    0x00000200
292 #define VDESC_VP2_WILLUNLOCK    0x00000400
293 #define VDESC_VP3_WILLUNLOCK    0x00000800
294 #define VDESC_VP0_WILLPUT       0x00000101
295 #define VDESC_VP1_WILLPUT       0x00000202
296 #define VDESC_VP2_WILLPUT       0x00000404
297 #define VDESC_VP3_WILLPUT       0x00000808
298 #define VDESC_NOMAP_VPP         0x00010000
299 #define VDESC_VPP_WILLRELE      0x00020000
300 
301 /*
302  * VDESC_NO_OFFSET is used to identify the end of the offset list
303  * and in places where no such field exists.
304  */
305 #define VDESC_NO_OFFSET -1
306 
307 /*
308  * This structure describes the vnode operation taking place.
309  */
310 struct vnodeop_desc {
311 	int	vdesc_offset;		/* offset in vector--first for speed */
312 	char    *vdesc_name;		/* a readable name for debugging */
313 	int	vdesc_flags;		/* VDESC_* flags */
314 
315 	/*
316 	 * These ops are used by bypass routines to map and locate arguments.
317 	 * Creds and procs are not needed in bypass routines, but sometimes
318 	 * they are useful to (for example) transport layers.
319 	 * Nameidata is useful because it has a cred in it.
320 	 */
321 	int	*vdesc_vp_offsets;	/* list ended by VDESC_NO_OFFSET */
322 	int	vdesc_vpp_offset;	/* return vpp location */
323 	int	vdesc_cred_offset;	/* cred location, if any */
324 	int	vdesc_proc_offset;	/* proc location, if any */
325 	int	vdesc_componentname_offset; /* if any */
326 	/*
327 	 * Finally, we've got a list of private data (about each operation)
328 	 * for each transport layer.  (Support to manage this list is not
329 	 * yet part of BSD.)
330 	 */
331 	caddr_t	*vdesc_transports;
332 };
333 
334 #ifdef _KERNEL
335 /*
336  * A list of all the operation descs.
337  */
338 extern struct vnodeop_desc *vnodeop_descs[];
339 
340 
341 /*
342  * Interlock for scanning list of vnodes attached to a mountpoint
343  */
344 extern struct simplelock mntvnode_slock;
345 
346 /*
347  * This macro is very helpful in defining those offsets in the vdesc struct.
348  *
349  * This is stolen from X11R4.  I ingored all the fancy stuff for
350  * Crays, so if you decide to port this to such a serious machine,
351  * you might want to consult Intrisics.h's XtOffset{,Of,To}.
352  */
353 #define VOPARG_OFFSET(p_type,field) \
354 	((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL)))
355 #define VOPARG_OFFSETOF(s_type,field) \
356 	VOPARG_OFFSET(s_type*,field)
357 #define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \
358 	((S_TYPE)(((char *)(STRUCT_P))+(S_OFFSET)))
359 
360 
361 /*
362  * This structure is used to configure the new vnodeops vector.
363  */
364 struct vnodeopv_entry_desc {
365 	struct vnodeop_desc *opve_op;   /* which operation this is */
366 	int (*opve_impl)(void *);	/* code implementing this operation */
367 };
368 struct vnodeopv_desc {
369 			/* ptr to the ptr to the vector where op should go */
370 	int (***opv_desc_vector_p)(void *);
371 	struct vnodeopv_entry_desc *opv_desc_ops;   /* null terminated list */
372 };
373 
374 /*
375  * A default routine which just returns an error.
376  */
377 int vn_default_error(void *);
378 
379 /*
380  * A generic structure.
381  * This can be used by bypass routines to identify generic arguments.
382  */
383 struct vop_generic_args {
384 	struct vnodeop_desc *a_desc;
385 	/* other random data follows, presumably */
386 };
387 
388 /*
389  * VOCALL calls an op given an ops vector.  We break it out because BSD's
390  * vclean changes the ops vector and then wants to call ops with the old
391  * vector.
392  */
393 #define VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP))
394 
395 /*
396  * This call works for vnodes in the kernel.
397  */
398 #define VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP))
399 #define VDESC(OP) (& __CONCAT(OP,_desc))
400 #define VOFFSET(OP) (VDESC(OP)->vdesc_offset)
401 
402 /*
403  * Finally, include the default set of vnode operations.
404  */
405 #include <sys/vnode_if.h>
406 
407 /*
408  * Public vnode manipulation functions.
409  */
410 struct file;
411 struct filedesc;
412 struct mount;
413 struct nameidata;
414 struct proc;
415 struct stat;
416 struct ucred;
417 struct uio;
418 struct vattr;
419 struct vnode;
420 
421 int	bdevvp(dev_t dev, struct vnode **vpp);
422 int	cdevvp(dev_t dev, struct vnode **vpp);
423 int	getnewvnode(enum vtagtype tag, struct mount *mp,
424 	    int (**vops)(void *), struct vnode **vpp);
425 int	getvnode(struct filedesc *fdp, int fd, struct file **fpp);
426 void	getnewfsid(struct mount *, int);
427 void	vattr_null(struct vattr *vap);
428 int	vcount(struct vnode *vp);
429 int	vfinddev(dev_t, enum vtype, struct vnode **);
430 void	vflushbuf(struct vnode *vp, int sync);
431 int	vflush(struct mount *mp, struct vnode *vp, int flags);
432 void	vntblinit(void);
433 void    vn_initialize_syncerd(void);
434 int	vwaitforio(struct vnode *, int, char *, int);
435 void	vwakeup(struct vnode *);
436 void	vdevgone(int, int, int, enum vtype);
437 int	vget(struct vnode *vp, int lockflag, struct proc *p);
438 void	vgone(struct vnode *vp);
439 void    vgonel(struct vnode *, struct proc *);
440 int	vinvalbuf(struct vnode *vp, int save, struct ucred *cred,
441 	    struct proc *p, int slpflag, int slptimeo);
442 void	vprint(char *label, struct vnode *vp);
443 int	vop_generic_bwrite(void *ap);
444 void	vn_update(void);
445 int	vn_close(struct vnode *vp,
446 	    int flags, struct ucred *cred, struct proc *p);
447 int	vn_open(struct nameidata *ndp, int fmode, int cmode);
448 int	vrecycle(struct vnode *vp, struct simplelock *inter_lkp,
449 	    struct proc *p);
450 int	vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base,
451 	    int len, off_t offset, enum uio_seg segflg, int ioflg,
452 	    struct ucred *cred, size_t *aresid, struct proc *p);
453 int	vn_lock(struct vnode *vp, int flags, struct proc *p);
454 
455 int	vop_generic_abortop(void *);
456 int	vop_generic_islocked(void *);
457 int	vop_generic_lock(void *);
458 int	vop_generic_unlock(void *);
459 int	vop_generic_revoke(void *);
460 int	vop_generic_kqfilter(void *);
461 
462 int	vn_stat(struct vnode *vp, struct stat *sb, struct proc *p);
463 int	vn_statfile(struct file *fp, struct stat *sb, struct proc *p);
464 int	vn_writechk(struct vnode *vp);
465 void	vn_marktext(struct vnode *vp);
466 void	vn_syncer_add_to_worklist(struct vnode *vp, int delay);
467 void    sched_sync(struct proc *);
468 
469 struct vnode *
470 	checkalias(struct vnode *vp, dev_t nvp_rdev, struct mount *mp);
471 void	vput(struct vnode *vp);
472 void	vrele(struct vnode *vp);
473 int	vaccess(mode_t file_mode, uid_t uid, gid_t gid,
474 	    mode_t acc_mode, struct ucred *cred);
475 
476 int	vn_isdisk(struct vnode *vp, int *errp);
477 
478 int	softdep_fsync(struct vnode *vp);
479 
480 #endif /* _KERNEL */
481