1 /*     $NetBSD: buf.h,v 1.135 2024/05/12 10:34:56 rillig Exp $ */
2 
3 /*-
4  * Copyright (c) 1999, 2000, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center, and by Andrew Doran.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *        The Regents of the University of California.  All rights reserved.
36  * (c) UNIX System Laboratories, Inc.
37  * All or some portions of this file are derived from material licensed
38  * to the University of California by American Telephone and Telegraph
39  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40  * the permission of UNIX System Laboratories, Inc.
41  *
42  * Redistribution and use in source and binary forms, with or without
43  * modification, are permitted provided that the following conditions
44  * are met:
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  * 2. Redistributions in binary form must reproduce the above copyright
48  *    notice, this list of conditions and the following disclaimer in the
49  *    documentation and/or other materials provided with the distribution.
50  * 3. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *        @(#)buf.h 8.9 (Berkeley) 3/30/95
67  */
68 
69 #ifndef _SYS_BUF_H_
70 #define   _SYS_BUF_H_
71 
72 #include <sys/pool.h>
73 #include <sys/queue.h>
74 #include <sys/mutex.h>
75 #include <sys/condvar.h>
76 #include <sys/rbtree.h>
77 #if defined(_KERNEL)
78 #include <sys/workqueue.h>
79 #endif /* defined(_KERNEL) */
80 
81 struct buf;
82 struct mount;
83 struct vnode;
84 struct kauth_cred;
85 
86 #define NOLIST ((struct buf *)0x87654321)
87 
88 extern kmutex_t bufcache_lock;
89 extern kmutex_t buffer_lock;
90 
91 #if defined(_KERNEL)
92 extern void (*biodone_vfs)(buf_t *);
93 #endif
94 
95 /*
96  * The buffer header describes an I/O operation in the kernel.
97  *
98  * Field markings and the corresponding locks:
99  *
100  * b      thread of execution that holds BC_BUSY, does not correspond
101  *          directly to any particular LWP
102  * c      bufcache_lock
103  * o      b_objlock
104  *
105  * For buffers associated with a vnode, b_objlock points to vp->v_interlock.
106  * If not associated with a vnode, it points to the generic buffer_lock.
107  */
108 
109 /* required for the conditional union member below to be ~safe */
110 #if defined(_KERNEL)
111 __CTASSERT(sizeof(struct work) <= sizeof(TAILQ_ENTRY(buf)));
112 #endif
113 
114 struct buf {
115           union {
116                     TAILQ_ENTRY(buf) u_actq;
117                     rb_node_t u_rbnode;
118 #if defined(_KERNEL)
119                     /* u_work is smaller than u_actq */
120                     struct work u_work;
121 #endif
122           } b_u;                                            /* b: device driver queue */
123 #define   b_actq    b_u.u_actq
124 #define   b_work    b_u.u_work
125           void                          (*b_iodone)(struct buf *);/* b: call when done */
126           int                           b_error;  /* b: errno value. */
127           int                           b_resid;  /* b: remaining I/O. */
128           u_int                         b_flags;  /* b: B_* flags */
129           int                           b_prio;             /* b: priority for queue */
130           int                           b_bufsize;          /* b: allocated size */
131           int                           b_bcount; /* b: valid bytes in buffer */
132           dev_t                         b_dev;              /* b: associated device */
133           void                          *b_data;  /* b: fs private data */
134           daddr_t                       b_blkno;  /* b: physical block number
135                                                                   (partition relative) */
136           daddr_t                       b_rawblkno;         /* b: raw physical block number
137                                                                   (volume relative) */
138           struct proc                   *b_proc;  /* b: proc if BB_PHYS */
139           void                          *b_saveaddr;        /* b: saved b_data for physio */
140           struct cpu_info               *b_ci;              /* b: originating CPU */
141 
142           /*
143            * b: private data for owner.
144            *  - buffer cache buffers are owned by corresponding filesystem.
145            *  - non-buffer cache buffers are owned by subsystem which
146            *    allocated them. (filesystem, disk driver, etc)
147            */
148           void      *b_private;
149           off_t     b_dcookie;                    /* NFS: Offset cookie if dir block */
150 
151           kcondvar_t                    b_busy;             /* c: threads waiting on buf */
152           void                          *b_unused;          /*  : unused */
153           LIST_ENTRY(buf)               b_hash;             /* c: hash chain */
154           LIST_ENTRY(buf)               b_vnbufs; /* c: associated vnode */
155           TAILQ_ENTRY(buf)    b_freelist;         /* c: position if not active */
156           TAILQ_ENTRY(buf)    b_wapbllist;        /* c: transaction buffer list */
157           daddr_t                       b_lblkno; /* c: logical block number */
158           int                           b_freelistindex;/* c: free list index (BQ_) */
159           u_int                         b_cflags; /* c: BC_* flags */
160           struct vnode                  *b_vp;              /* c: file vnode */
161 
162           kcondvar_t                    b_done;             /* o: waiting on completion */
163           u_int                         b_oflags; /* o: BO_* flags */
164           kmutex_t            *b_objlock;         /* o: completion lock */
165 };
166 
167 /*
168  * For portability with historic industry practice, the cylinder number has
169  * to be maintained in the `b_resid' field.
170  */
171 #define   b_cylinder b_resid            /* Cylinder number for disksort(). */
172 
173 /*
174  * These flags are kept in b_cflags (owned by buffer cache).
175  */
176 #define   BC_AGE              0x00000001          /* Move to age queue when I/O done. */
177 #define   BC_BUSY             0x00000010          /* I/O in progress. */
178 #define   BC_INVAL  0x00002000          /* Does not contain valid info. */
179 #define   BC_NOCACHE          0x00008000          /* Do not cache block after use. */
180 #define   BC_WANTED 0x00800000          /* Process wants this buffer. */
181 #define   BC_VFLUSH 0x04000000          /* Buffer is being synced. */
182 
183 /*
184  * These flags are kept in b_oflags (owned by associated object).
185  */
186 #define   BO_DELWRI 0x00000080          /* Delay I/O until buffer reused. */
187 #define   BO_DONE             0x00000200          /* I/O completed. */
188 
189 /*
190  * These flags are kept in b_flags (owned by buffer holder).
191  */
192 #define   B_WRITE             0x00000000          /* Write buffer (pseudo flag). */
193 #define   B_ASYNC             0x00000004          /* Start I/O, do not wait. */
194 #define   B_COWDONE 0x00000400          /* Copy-on-write already done. */
195 #define   B_GATHERED          0x00001000          /* LFS: already in a segment. */
196 #define   B_LOCKED  0x00004000          /* Locked in core (not reusable). */
197 #define   B_PHYS              0x00040000          /* I/O to user memory. */
198 #define   B_RAW               0x00080000          /* Set by physio for raw transfers. */
199 #define   B_READ              0x00100000          /* Read buffer. */
200 #define   B_DEVPRIVATE        0x02000000          /* Device driver private flag. */
201 #define   B_MEDIA_FUA         0x08000000          /* Set Force Unit Access for media. */
202 #define   B_MEDIA_DPO         0x10000000          /* Set Disable Page Out for media. */
203 
204 #define BUF_FLAGBITS \
205     "\20\1AGE\3ASYNC\4BAD\5BUSY\10DELWRI" \
206     "\12DONE\13COWDONE\15GATHERED\16INVAL\17LOCKED\20NOCACHE" \
207     "\23PHYS\24RAW\25READ\32DEVPRIVATE\33VFLUSH\34MEDIA_FUA\35MEDIA_DPO"
208 
209 /* Avoid weird code due to B_WRITE being a "pseudo flag" */
210 #define BUF_ISREAD(bp)        (((bp)->b_flags & B_READ) == B_READ)
211 #define BUF_ISWRITE(bp)       (((bp)->b_flags & B_READ) == B_WRITE)
212 
213 /* Media flags, to be passed for nested I/O */
214 #define B_MEDIA_FLAGS         (B_MEDIA_FUA|B_MEDIA_DPO)
215 
216 /*
217  * This structure describes a clustered I/O.  It is stored in the b_saveaddr
218  * field of the buffer on which I/O is done.  At I/O completion, cluster
219  * callback uses the structure to parcel I/O's to individual buffers, and
220  * then free's this structure.
221  */
222 struct cluster_save {
223           long      bs_bcount;                    /* Saved b_bcount. */
224           long      bs_bufsize;                   /* Saved b_bufsize. */
225           void      *bs_saveaddr;                 /* Saved b_addr. */
226           int       bs_nchildren;                 /* Number of associated buffers. */
227           struct buf *bs_children;      /* List of associated buffers. */
228 };
229 
230 /*
231  * Zero out the buffer's data area.
232  */
233 #define   clrbuf(bp)                                                                      \
234 do {                                                                                      \
235           memset((bp)->b_data, 0, (u_int)(bp)->b_bcount);                       \
236           (bp)->b_resid = 0;                                                    \
237 } while (0)
238 
239 /* Flags to low-level allocation routines. */
240 #define B_CLRBUF    0x01      /* Request allocated buffer be cleared. */
241 #define B_SYNC                0x02      /* Do all allocations synchronously. */
242 #define B_METAONLY  0x04      /* Return indirect block buffer. */
243 #define B_CONTIG    0x08      /* Allocate file contiguously. */
244 
245 /* Flags to bread() and breadn(). */
246 #define B_MODIFY    0x01      /* Hint: caller might modify buffer */
247 
248 #ifdef _KERNEL
249 
250 #define   BIO_GETPRIO(bp)               ((bp)->b_prio)
251 #define   BIO_SETPRIO(bp, prio)         (bp)->b_prio = (prio)
252 #define   BIO_COPYPRIO(bp1, bp2)        BIO_SETPRIO(bp1, BIO_GETPRIO(bp2))
253 
254 #define   BPRIO_NPRIO                   3
255 #define   BPRIO_TIMECRITICAL  2
256 #define   BPRIO_TIMELIMITED   1
257 #define   BPRIO_TIMENONCRITICAL         0
258 #define   BPRIO_DEFAULT                 BPRIO_TIMELIMITED
259 
260 __BEGIN_DECLS
261 /*
262  * bufferio(9) ops
263  */
264 void      biodone(buf_t *);
265 int       biowait(buf_t *);
266 buf_t     *getiobuf(struct vnode *, bool);
267 void      putiobuf(buf_t *);
268 void      nestiobuf_setup(buf_t *, buf_t *, int, size_t);
269 void      nestiobuf_done(buf_t *, int, int);
270 
271 void      nestiobuf_iodone(buf_t *);
272 int       physio(void (*)(buf_t *), buf_t *, dev_t, int,
273                  void (*)(buf_t *), struct uio *);
274 
275 /*
276  * buffercache(9) ops
277  */
278 int       bread(struct vnode *, daddr_t, int, int, buf_t **);
279 int       breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
280                  int, buf_t **);
281 int       bwrite(buf_t *);
282 void      bawrite(buf_t *);
283 void      bdwrite(buf_t *);
284 buf_t     *getblk(struct vnode *, daddr_t, int, int, int);
285 buf_t     *geteblk(int);
286 buf_t     *incore(struct vnode *, daddr_t);
287 int       allocbuf(buf_t *, int, int);
288 void      brelsel(buf_t *, int);
289 void      brelse(buf_t *, int);
290 void      binvalbuf(struct vnode *, daddr_t);
291 
292 /*
293  * So-far indeterminate ops that might belong to either
294  * bufferio(9) or buffercache(9).
295  */
296 void      bremfree(buf_t *);
297 void      bufinit(void);
298 void      bufinit2(void);
299 void      minphys(buf_t *);
300 void      brelvp(buf_t *);
301 void      reassignbuf(buf_t *, struct vnode *);
302 void      bgetvp(struct vnode *, buf_t *);
303 u_long    buf_memcalc(void);
304 int       buf_drain(int);
305 int       buf_setvalimit(vsize_t);
306 #if defined(DDB) || defined(DEBUGPRINT)
307 void      vfs_buf_print(buf_t *, int, void (*)(const char *, ...)
308     __printflike(1, 2));
309 #endif
310 void      buf_init(buf_t *);
311 void      buf_destroy(buf_t *);
312 int       bbusy(buf_t *, bool, int, kmutex_t *);
313 u_int     buf_nbuf(void);
314 
315 void      biohist_init(void);
316 
317 __END_DECLS
318 #endif /* _KERNEL */
319 #endif /* !_SYS_BUF_H_ */
320