1 /*        $NetBSD: filedesc.h,v 1.71 2024/11/10 16:14:52 riastradh Exp $        */
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 1990, 1993
31  *        The Regents of the University of California.  All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  * 3. Neither the name of the University nor the names of its contributors
42  *    may be used to endorse or promote products derived from this software
43  *    without specific prior written permission.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55  * SUCH DAMAGE.
56  *
57  *        @(#)filedesc.h      8.1 (Berkeley) 6/2/93
58  */
59 
60 #ifndef _SYS_FILEDESC_H_
61 #define   _SYS_FILEDESC_H_
62 
63 #include <sys/param.h>
64 #include <sys/queue.h>
65 #include <sys/mutex.h>
66 #include <sys/rwlock.h>
67 #include <sys/condvar.h>
68 
69 /*
70  * This structure is used for the management of descriptors.  It may be
71  * shared by multiple processes.
72  *
73  * A process is initially started out with NDFILE descriptors stored within
74  * this structure, selected to be enough for typical applications based on
75  * the historical limit of 20 open files (and the usage of descriptors by
76  * shells).  If these descriptors are exhausted, a larger descriptor table
77  * may be allocated, up to a process' resource limit; the internal arrays
78  * are then unused.  The initial expansion is set to NDEXTENT; each time
79  * it runs out, it is doubled until the resource limit is reached. NDEXTENT
80  * should be selected to be the biggest multiple of OFILESIZE (see below)
81  * that will fit in a power-of-two sized piece of memory.
82  */
83 #define   NDFILE              20
84 #define   NDEXTENT  50                  /* 250 bytes in 256-byte alloc */
85 #define   NDENTRIES 32                  /* 32 fds per entry */
86 #define   NDENTRYMASK         (NDENTRIES - 1)
87 #define   NDENTRYSHIFT        5                   /* bits per entry */
88 #define   NDLOSLOTS(x)        (((x) + NDENTRIES - 1) >> NDENTRYSHIFT)
89 #define   NDHISLOTS(x)        ((NDLOSLOTS(x) + NDENTRIES - 1) >> NDENTRYSHIFT)
90 #define   NDFDFILE  6                   /* first 6 descriptors are free */
91 
92 /*
93  * Process-private descriptor reference, one for each descriptor slot
94  * in use.  Locks:
95  *
96  * :      unlocked
97  * a      atomic operations + filedesc_t::fd_lock in some cases
98  * d      filedesc_t::fd_lock
99  *
100  * Note that ff_exclose and ff_allocated are likely to be byte sized
101  * (bool).  In general adjacent sub-word sized fields must be locked
102  * the same way, but in this case it's ok: ff_exclose can only be
103  * modified while the descriptor slot is live, and ff_allocated when
104  * it's invalid.
105  *
106  * NOTE: ff_exclose should generally be set with fd_set_exclose(), not
107  * written to directly, when implementing flags like O_CLOEXEC or
108  * SOCK_CLOEXEC, so that struct filedesc::fd_exclose is updated as
109  * needed.  See PR kern/58855: close-on-exec is broken for dup3 and
110  * opening cloning devices.
111  */
112 typedef struct fdfile {
113           bool                ff_exclose;         /* :: close on exec (fd_set_exclose) */
114           bool                ff_allocated;       /* d: descriptor slot is allocated */
115           u_int               ff_refcnt;          /* a: reference count on structure */
116           struct file         *ff_file; /* d: pointer to file if open */
117           SLIST_HEAD(,knote) ff_knlist; /* d: knotes attached to this fd */
118           kcondvar_t          ff_closing;         /* d: notifier for close */
119 } fdfile_t;
120 
121 #define FDFILE_SIZE ((sizeof(fdfile_t)+CACHE_LINE_SIZE-1)/CACHE_LINE_SIZE*CACHE_LINE_SIZE)
122 
123 /* Reference count */
124 #define   FR_CLOSING          (0x80000000)        /* closing: must interlock */
125 #define   FR_MASK             (~FR_CLOSING)       /* reference count */
126 
127 /*
128  * Open file table, potentially many 'active' tables per filedesc_t
129  * in a multi-threaded process, or with a shared filedesc_t (clone()).
130  * nfiles is first to avoid pointer arithmetic.
131  */
132 typedef struct fdtab {
133           u_int               dt_nfiles;          /* number of open files allocated */
134           struct fdtab        *dt_link; /* for lists of dtab */
135           fdfile_t  *dt_ff[NDFILE];     /* file structures for open fds */
136 } fdtab_t;
137 
138 typedef struct filedesc {
139           /*
140            * Built-in fdfile_t records first, since they have strict
141            * alignment requirements.
142            */
143           uint8_t             fd_dfdfile[NDFDFILE][FDFILE_SIZE];
144           /*
145            * All of the remaining fields are locked by fd_lock.
146            */
147           kmutex_t  fd_lock;  /* lock on structure */
148           fdtab_t * volatile fd_dt;     /* active descriptor table */
149           uint32_t  *fd_himap;          /* each bit points to 32 fds */
150           uint32_t  *fd_lomap;          /* bitmap of free fds */
151           struct klist        *fd_knhash;         /* hash of attached non-fd knotes */
152           int                 fd_lastkqfile;      /* max descriptor for kqueue */
153           int                 fd_lastfile;        /* high-water mark of fd_ofiles */
154           int                 fd_refcnt;          /* reference count */
155           u_long              fd_knhashmask;      /* size of fd_knhash */
156           int                 fd_freefile;        /* approx. next free file */
157           int                 fd_unused;          /* unused */
158           bool                fd_exclose;         /* non-zero if >0 fd with EXCLOSE */
159           /*
160            * This structure is used when the number of open files is
161            * <= NDFILE, and are then pointed to by the pointers above.
162            */
163           fdtab_t             fd_dtbuiltin;
164           /*
165            * These arrays are used when the number of open files is
166            * <= 1024, and are then pointed to by the pointers above.
167            */
168 #define fd_startzero          fd_dhimap /* area to zero on return to cache */
169           uint32_t  fd_dhimap[NDENTRIES >> NDENTRYSHIFT];
170           uint32_t  fd_dlomap[NDENTRIES];
171 } filedesc_t;
172 
173 /*
174  * Working directory, root and umask information.  Serialization:
175  *
176  * a      atomic operations
177  * l      cwdi_lock
178  */
179 typedef struct cwdinfo {
180           struct vnode        *cwdi_cdir;         /* l: current directory */
181           struct vnode        *cwdi_rdir;         /* l: root directory */
182           struct vnode        *cwdi_edir;         /* l: emulation root (if known) */
183           u_int               cwdi_cmask;         /* a: mask for file creation */
184           u_int               cwdi_refcnt;        /* a: reference count */
185 
186           krwlock_t cwdi_lock /* :: lock on struct */
187               __aligned(COHERENCY_UNIT);          /* -> gets own cache line */
188 } cwdinfo_t;
189 
190 #ifdef _KERNEL
191 
192 struct fileops;
193 struct socket;
194 struct proc;
195 
196 extern struct cwdinfo cwdi0;
197 
198 /*
199  * Kernel global variables and routines.
200  */
201 void      fd_sys_init(void);
202 int       fd_open(const char*, int, int, int*);
203 int       fd_dupopen(int, bool, int, int *);
204 int       fd_alloc(struct proc *, int, int *);
205 void      fd_tryexpand(struct proc *);
206 int       fd_allocfile(file_t **, int *);
207 void      fd_affix(struct proc *, file_t *, unsigned);
208 void      fd_abort(struct proc *, file_t *, unsigned);
209 filedesc_t *fd_copy(void);
210 filedesc_t *fd_init(filedesc_t *);
211 void      fd_share(proc_t *);
212 void      fd_hold(lwp_t *);
213 void      fd_free(void);
214 void      fd_closeexec(void);
215 void      fd_ktrexecfd(void);
216 int       fd_checkstd(void);
217 file_t    *fd_getfile(unsigned);
218 file_t    *fd_getfile2(proc_t *, unsigned);
219 void      fd_putfile(unsigned);
220 int       fd_getvnode(unsigned, file_t **);
221 int       fd_getsock(unsigned, struct socket **);
222 int       fd_getsock1(unsigned, struct socket **, file_t **);
223 void      fd_putvnode(unsigned);
224 void      fd_putsock(unsigned);
225 int       fd_close(unsigned);
226 int       fd_dup(file_t *, int, int *, bool);
227 int       fd_dup2(file_t *, unsigned, int);
228 int       fd_clone(file_t *, unsigned, int, const struct fileops *, void *);
229 void      fd_set_exclose(struct lwp *, int, bool);
230 int       pipe1(struct lwp *, int *, int);
231 int       dodup(struct lwp *, int, int, int, register_t *);
232 
233 void      cwd_sys_init(void);
234 struct cwdinfo *cwdinit(void);
235 void      cwdshare(proc_t *);
236 void      cwdunshare(proc_t *);
237 void      cwdfree(struct cwdinfo *);
238 void      cwdexec(struct proc *);
239 
240 #define GETCWD_CHECK_ACCESS 0x0001
241 int       getcwd_common(struct vnode *, struct vnode *, char **, char *, int,
242     int, struct lwp *);
243 int       vnode_to_path(char *, size_t, struct vnode *, struct lwp *,
244     struct proc *);
245 
246 int       closef(file_t *);
247 file_t *fgetdummy(void);
248 void      fputdummy(file_t *);
249 
250 struct stat;
251 int       do_sys_fstat(int, struct stat *);
252 struct flock;
253 int       do_fcntl_lock(int, int, struct flock *);
254 int       do_posix_fadvise(int, off_t, off_t, int);
255 
256 extern kmutex_t filelist_lock;
257 extern filedesc_t filedesc0;
258 
259 #endif /* _KERNEL */
260 
261 #endif /* !_SYS_FILEDESC_H_ */
262