xref: /dragonfly/sys/kern/vfs_helper.c (revision 2b3f93ea6d1f70880f3e87f3c2cbe0dc0bfc9332)
1 /*
2  * (The copyright below applies to ufs_access())
3  *
4  * Copyright (c) 1982, 1986, 1989, 1993, 1995
5  *        The Regents of the University of California.  All rights reserved.
6  * (c) UNIX System Laboratories, Inc.
7  * All or some portions of this file are derived from material licensed
8  * to the University of California by American Telephone and Telegraph
9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10  * the permission of UNIX System Laboratories, Inc.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  * @(#)ufs_vnops.c  8.27 (Berkeley) 5/27/95
37  */
38 
39 #include "opt_quota.h"
40 #include "opt_suiddir.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/uio.h>
45 #include <sys/conf.h>
46 #include <sys/kernel.h>
47 #include <sys/fcntl.h>
48 #include <sys/stat.h>
49 #include <sys/mount.h>
50 #include <sys/unistd.h>
51 #include <sys/vnode.h>
52 #include <sys/file.h>                   /* XXX */
53 #include <sys/proc.h>
54 #include <sys/caps.h>
55 #include <sys/jail.h>
56 #include <sys/sysctl.h>
57 #include <sys/sfbuf.h>
58 #include <vm/vm_extern.h>
59 #include <vm/vm_object.h>
60 #include <vm/vm_page2.h>
61 
62 #ifdef LWBUF_IS_OPTIMAL
63 
64 static int vm_read_shortcut_enable = 1;
65 SYSCTL_INT(_vm, OID_AUTO, read_shortcut_enable, CTLFLAG_RW,
66             &vm_read_shortcut_enable, 0, "Direct vm_object vop_read shortcut");
67 
68 #endif
69 
70 /*
71  * vop_helper_access()
72  *
73  *        Provide standard UNIX semanics for VOP_ACCESS, but without the quota
74  *        code.  This procedure was basically pulled out of UFS.
75  */
76 int
vop_helper_access(struct vop_access_args * ap,uid_t ino_uid,gid_t ino_gid,mode_t ino_mode,u_int32_t ino_flags)77 vop_helper_access(struct vop_access_args *ap, uid_t ino_uid, gid_t ino_gid,
78                       mode_t ino_mode, u_int32_t ino_flags)
79 {
80           struct vnode *vp = ap->a_vp;
81           struct ucred *cred = ap->a_cred;
82           mode_t mask, mode = ap->a_mode;
83           gid_t *gp;
84           int i;
85           uid_t proc_uid;
86           gid_t proc_gid;
87 
88           if (ap->a_flags & AT_EACCESS) {
89                     proc_uid = cred->cr_uid;
90                     proc_gid = cred->cr_gid;
91           } else {
92                     proc_uid = cred->cr_ruid;
93                     proc_gid = cred->cr_rgid;
94           }
95 
96           /*
97            * Disallow write attempts on read-only filesystems;
98            * unless the file is a socket, fifo, or a block or
99            * character device resident on the filesystem.
100            */
101           if (mode & VWRITE) {
102                     switch (vp->v_type) {
103                     case VDIR:
104                     case VLNK:
105                     case VREG:
106                     case VDATABASE:
107                               if (vp->v_mount->mnt_flag & MNT_RDONLY)
108                                         return (EROFS);
109                               break;
110                     default:
111                               break;
112                     }
113           }
114 
115           /* If immutable bit set, nobody gets to write it. */
116           if ((mode & VWRITE) && (ino_flags & IMMUTABLE))
117                     return (EPERM);
118 
119           /* Otherwise, user id 0 always gets access. */
120           if (proc_uid == 0)
121                     return (0);
122 
123           mask = 0;
124 
125           /* Otherwise, check the owner. */
126           if (proc_uid == ino_uid) {
127                     if (mode & VEXEC)
128                               mask |= S_IXUSR;
129                     if (mode & VREAD)
130                               mask |= S_IRUSR;
131                     if (mode & VWRITE)
132                               mask |= S_IWUSR;
133                     return ((ino_mode & mask) == mask ? 0 : EACCES);
134           }
135 
136           /*
137            * Otherwise, check the groups.
138            * We must special-case the primary group to, if needed, check against
139            * the real gid and not the effective one.
140            */
141           if (proc_gid == ino_gid) {
142                     if (mode & VEXEC)
143                               mask |= S_IXGRP;
144                     if (mode & VREAD)
145                               mask |= S_IRGRP;
146                     if (mode & VWRITE)
147                               mask |= S_IWGRP;
148                     return ((ino_mode & mask) == mask ? 0 : EACCES);
149           }
150           for (i = 1, gp = &cred->cr_groups[1]; i < cred->cr_ngroups; i++, gp++)
151                     if (ino_gid == *gp) {
152                               if (mode & VEXEC)
153                                         mask |= S_IXGRP;
154                               if (mode & VREAD)
155                                         mask |= S_IRGRP;
156                               if (mode & VWRITE)
157                                         mask |= S_IWGRP;
158                               return ((ino_mode & mask) == mask ? 0 : EACCES);
159                     }
160 
161           /* Otherwise, check everyone else. */
162           if (mode & VEXEC)
163                     mask |= S_IXOTH;
164           if (mode & VREAD)
165                     mask |= S_IROTH;
166           if (mode & VWRITE)
167                     mask |= S_IWOTH;
168           return ((ino_mode & mask) == mask ? 0 : EACCES);
169 }
170 
171 int
vop_helper_setattr_flags(u_int32_t * ino_flags,u_int32_t vaflags,uid_t uid,struct ucred * cred)172 vop_helper_setattr_flags(u_int32_t *ino_flags, u_int32_t vaflags,
173                                uid_t uid, struct ucred *cred)
174 {
175           int error;
176 
177           /*
178            * If uid doesn't match only a privileged user can change the flags
179            */
180           if (cred->cr_uid != uid &&
181               (error = caps_priv_check(cred, SYSCAP_NOVFS_SYSFLAGS)))
182           {
183                     return(error);
184           }
185           if (cred->cr_uid == 0 &&
186               (!jailed(cred) || PRISON_CAP_ISSET(cred->cr_prison->pr_caps,
187                     PRISON_CAP_VFS_CHFLAGS))) {
188                     if ((*ino_flags & (SF_NOUNLINK|SF_IMMUTABLE|SF_APPEND)) &&
189                         securelevel > 0)
190                               return (EPERM);
191                     *ino_flags = vaflags;
192           } else {
193                     if (*ino_flags & (SF_NOUNLINK|SF_IMMUTABLE|SF_APPEND) ||
194                         (vaflags & UF_SETTABLE) != vaflags)
195                               return (EPERM);
196                     *ino_flags &= SF_SETTABLE;
197                     *ino_flags |= vaflags & UF_SETTABLE;
198           }
199           return(0);
200 }
201 
202 /*
203  * This helper function may be used by VFSs to implement UNIX initial
204  * ownership semantics when creating new objects inside directories.
205  */
206 uid_t
vop_helper_create_uid(struct mount * mp,mode_t dmode,uid_t duid,struct ucred * cred,mode_t * modep)207 vop_helper_create_uid(struct mount *mp, mode_t dmode, uid_t duid,
208                           struct ucred *cred, mode_t *modep)
209 {
210 #ifdef SUIDDIR
211           if ((mp->mnt_flag & MNT_SUIDDIR) && (dmode & S_ISUID) &&
212               duid != cred->cr_uid && duid) {
213                     *modep &= ~07111;
214                     return(duid);
215           }
216 #endif
217           return(cred->cr_uid);
218 }
219 
220 /*
221  * This helper may be used by VFSs to implement unix chmod semantics.
222  */
223 int
vop_helper_chmod(struct vnode * vp,mode_t new_mode,struct ucred * cred,uid_t cur_uid,gid_t cur_gid,mode_t * cur_modep)224 vop_helper_chmod(struct vnode *vp, mode_t new_mode, struct ucred *cred,
225                      uid_t cur_uid, gid_t cur_gid, mode_t *cur_modep)
226 {
227           int error;
228 
229           if (cred->cr_uid != cur_uid) {
230                     error = caps_priv_check(cred, SYSCAP_NOVFS_CHMOD);
231                     if (error)
232                               return (error);
233           }
234           if (cred->cr_uid) {
235                     if (vp->v_type != VDIR && (*cur_modep & S_ISTXT))
236                               return (EFTYPE);
237                     if (!groupmember(cur_gid, cred) && (*cur_modep & S_ISGID))
238                               return (EPERM);
239           }
240           *cur_modep &= ~ALLPERMS;
241           *cur_modep |= new_mode & ALLPERMS;
242           return(0);
243 }
244 
245 /*
246  * This helper may be used by VFSs to implement unix chown semantics.
247  */
248 int
vop_helper_chown(struct vnode * vp,uid_t new_uid,gid_t new_gid,struct ucred * cred,uid_t * cur_uidp,gid_t * cur_gidp,mode_t * cur_modep)249 vop_helper_chown(struct vnode *vp, uid_t new_uid, gid_t new_gid,
250                      struct ucred *cred,
251                      uid_t *cur_uidp, gid_t *cur_gidp, mode_t *cur_modep)
252 {
253           gid_t ogid;
254           uid_t ouid;
255           int error;
256 
257           if (new_uid == (uid_t)VNOVAL)
258                     new_uid = *cur_uidp;
259           if (new_gid == (gid_t)VNOVAL)
260                     new_gid = *cur_gidp;
261 
262           /*
263            * If we don't own the file, are trying to change the owner
264            * of the file, or are not a member of the target group,
265            * the caller must be privileged or the call fails.
266            */
267           if ((cred->cr_uid != *cur_uidp || new_uid != *cur_uidp ||
268               (new_gid != *cur_gidp && !(cred->cr_gid == new_gid ||
269               groupmember(new_gid, cred)))) &&
270               (error = caps_priv_check(cred, SYSCAP_NOVFS_CHOWN)))
271           {
272                     return (error);
273           }
274           ogid = *cur_gidp;
275           ouid = *cur_uidp;
276           /* XXX QUOTA CODE */
277           *cur_uidp = new_uid;
278           *cur_gidp = new_gid;
279           /* XXX QUOTA CODE */
280 
281           /*
282            * DragonFly clears both SUID and SGID if either the owner or
283            * group is changed and root isn't doing it.  If root is doing
284            * it we do not clear SUID/SGID.
285            */
286           if (cred->cr_uid != 0 && (ouid != new_uid || ogid != new_gid))
287                     *cur_modep &= ~(S_ISUID | S_ISGID);
288           return(0);
289 }
290 
291 #ifdef LWBUF_IS_OPTIMAL
292 
293 /*
294  * A VFS can call this function to try to dispose of a read request
295  * directly from the VM system, pretty much bypassing almost all VFS
296  * overhead except for atime updates.
297  *
298  * If 0 is returned some or all of the uio was handled.  The caller must
299  * check the uio and handle the remainder.
300  *
301  * The caller must fail on a non-zero error.
302  */
303 int
vop_helper_read_shortcut(struct vop_read_args * ap)304 vop_helper_read_shortcut(struct vop_read_args *ap)
305 {
306           struct vnode *vp;
307           struct uio *uio;
308           struct lwbuf *lwb;
309           struct lwbuf lwb_cache;
310           vm_object_t obj;
311           vm_page_t m;
312           int offset;
313           int n;
314           int error;
315 
316           vp = ap->a_vp;
317           uio = ap->a_uio;
318 
319           /*
320            * We can't short-cut if there is no VM object or this is a special
321            * UIO_NOCOPY read (typically from VOP_STRATEGY()).  We also can't
322            * do this if we cannot extract the filesize from the vnode.
323            */
324           if (vm_read_shortcut_enable == 0)
325                     return(0);
326           if (vp->v_object == NULL || uio->uio_segflg == UIO_NOCOPY)
327                     return(0);
328           if (vp->v_filesize == NOOFFSET)
329                     return(0);
330           if (uio->uio_resid == 0)
331                     return(0);
332 
333           /*
334            * Iterate the uio on a page-by-page basis
335            *
336            * XXX can we leave the object held shared during the uiomove()?
337            */
338           obj = vp->v_object;
339           vm_object_hold_shared(obj);
340 
341           error = 0;
342           while (uio->uio_resid && error == 0) {
343                     offset = (int)uio->uio_offset & PAGE_MASK;
344                     n = PAGE_SIZE - offset;
345                     if (n > uio->uio_resid)
346                               n = uio->uio_resid;
347                     if (vp->v_filesize < uio->uio_offset)
348                               break;
349                     if (uio->uio_offset + n > vp->v_filesize)
350                               n = vp->v_filesize - uio->uio_offset;
351                     if (n == 0)
352                               break;    /* hit EOF */
353 
354                     m = vm_page_lookup_sbusy_try(obj, OFF_TO_IDX(uio->uio_offset),
355                                                        0, PAGE_SIZE);
356                     if (error || m == NULL) {
357                               error = 0;
358                               break;
359                     }
360                     if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
361                               vm_page_sbusy_drop(m);
362                               break;
363                     }
364                     lwb = lwbuf_alloc(m, &lwb_cache);
365 
366                     /*
367                      * Use a no-fault uiomove() to avoid deadlocking against
368                      * our VM object (which could livelock on the same object
369                      * due to shared-vs-exclusive), or deadlocking against
370                      * our busied page.  Returns EFAULT on any fault which
371                      * winds up diving a vnode.
372                      */
373                     error = uiomove_nofault((char *)lwbuf_kva(lwb) + offset,
374                                                   n, uio);
375 
376                     vm_page_flag_set(m, PG_REFERENCED);
377                     lwbuf_free(lwb);
378                     vm_page_sbusy_drop(m);
379           }
380           vm_object_drop(obj);
381 
382           /*
383            * Ignore EFAULT since we used uiomove_nofault(), causes caller
384            * to fall-back to normal code for this case.
385            */
386           if (error == EFAULT)
387                     error = 0;
388 
389           return (error);
390 }
391 
392 #else
393 
394 /*
395  * If lwbuf's aren't optimal then it's best to just use the buffer
396  * cache.
397  */
398 int
vop_helper_read_shortcut(struct vop_read_args * ap)399 vop_helper_read_shortcut(struct vop_read_args *ap)
400 {
401           return(0);
402 }
403 
404 #endif
405