1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25 * Copyright (c) 2014 Integros [integros.com]
26 * Copyright 2017 Nexenta Systems, Inc.
27 */
28
29 /* Portions Copyright 2007 Jeremy Teo */
30 /* Portions Copyright 2010 Robert Milkowski */
31
32
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/time.h>
36 #include <sys/systm.h>
37 #include <sys/sysmacros.h>
38 #include <sys/resource.h>
39 #include <sys/vfs.h>
40 #include <sys/endian.h>
41 #include <sys/vm.h>
42 #include <sys/vnode.h>
43 #if __FreeBSD_version >= 1300102
44 #include <sys/smr.h>
45 #endif
46 #include <sys/dirent.h>
47 #include <sys/file.h>
48 #include <sys/stat.h>
49 #include <sys/kmem.h>
50 #include <sys/taskq.h>
51 #include <sys/uio.h>
52 #include <sys/atomic.h>
53 #include <sys/namei.h>
54 #include <sys/mman.h>
55 #include <sys/cmn_err.h>
56 #include <sys/kdb.h>
57 #include <sys/sysproto.h>
58 #include <sys/errno.h>
59 #include <sys/unistd.h>
60 #include <sys/zfs_dir.h>
61 #include <sys/zfs_ioctl.h>
62 #include <sys/fs/zfs.h>
63 #include <sys/dmu.h>
64 #include <sys/dmu_objset.h>
65 #include <sys/spa.h>
66 #include <sys/txg.h>
67 #include <sys/dbuf.h>
68 #include <sys/zap.h>
69 #include <sys/sa.h>
70 #include <sys/policy.h>
71 #include <sys/sunddi.h>
72 #include <sys/filio.h>
73 #include <sys/sid.h>
74 #include <sys/zfs_ctldir.h>
75 #include <sys/zfs_fuid.h>
76 #include <sys/zfs_quota.h>
77 #include <sys/zfs_sa.h>
78 #include <sys/zfs_rlock.h>
79 #include <sys/extdirent.h>
80 #include <sys/bio.h>
81 #include <sys/buf.h>
82 #include <sys/sched.h>
83 #include <sys/acl.h>
84 #include <sys/vmmeter.h>
85 #include <vm/vm_param.h>
86 #include <sys/zil.h>
87 #include <sys/zfs_vnops.h>
88
89 #include <vm/vm_object.h>
90
91 #include <sys/extattr.h>
92 #include <sys/priv.h>
93
94 #ifndef VN_OPEN_INVFS
95 #define VN_OPEN_INVFS 0x0
96 #endif
97
98 VFS_SMR_DECLARE;
99
100 #if __FreeBSD_version < 1300103
101 #define NDFREE_PNBUF(ndp) NDFREE((ndp), NDF_ONLY_PNBUF)
102 #endif
103
104 #if __FreeBSD_version >= 1300047
105 #define vm_page_wire_lock(pp)
106 #define vm_page_wire_unlock(pp)
107 #else
108 #define vm_page_wire_lock(pp) vm_page_lock(pp)
109 #define vm_page_wire_unlock(pp) vm_page_unlock(pp)
110 #endif
111
112 #ifdef DEBUG_VFS_LOCKS
113 #define VNCHECKREF(vp) \
114 VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp, \
115 ("%s: wrong ref counts", __func__));
116 #else
117 #define VNCHECKREF(vp)
118 #endif
119
120 #if __FreeBSD_version >= 1400045
121 typedef uint64_t cookie_t;
122 #else
123 typedef ulong_t cookie_t;
124 #endif
125
126 /*
127 * Programming rules.
128 *
129 * Each vnode op performs some logical unit of work. To do this, the ZPL must
130 * properly lock its in-core state, create a DMU transaction, do the work,
131 * record this work in the intent log (ZIL), commit the DMU transaction,
132 * and wait for the intent log to commit if it is a synchronous operation.
133 * Moreover, the vnode ops must work in both normal and log replay context.
134 * The ordering of events is important to avoid deadlocks and references
135 * to freed memory. The example below illustrates the following Big Rules:
136 *
137 * (1) A check must be made in each zfs thread for a mounted file system.
138 * This is done avoiding races using ZFS_ENTER(zfsvfs).
139 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes
140 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros
141 * can return EIO from the calling function.
142 *
143 * (2) VN_RELE() should always be the last thing except for zil_commit()
144 * (if necessary) and ZFS_EXIT(). This is for 3 reasons:
145 * First, if it's the last reference, the vnode/znode
146 * can be freed, so the zp may point to freed memory. Second, the last
147 * reference will call zfs_zinactive(), which may induce a lot of work --
148 * pushing cached pages (which acquires range locks) and syncing out
149 * cached atime changes. Third, zfs_zinactive() may require a new tx,
150 * which could deadlock the system if you were already holding one.
151 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
152 *
153 * (3) All range locks must be grabbed before calling dmu_tx_assign(),
154 * as they can span dmu_tx_assign() calls.
155 *
156 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
157 * dmu_tx_assign(). This is critical because we don't want to block
158 * while holding locks.
159 *
160 * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This
161 * reduces lock contention and CPU usage when we must wait (note that if
162 * throughput is constrained by the storage, nearly every transaction
163 * must wait).
164 *
165 * Note, in particular, that if a lock is sometimes acquired before
166 * the tx assigns, and sometimes after (e.g. z_lock), then failing
167 * to use a non-blocking assign can deadlock the system. The scenario:
168 *
169 * Thread A has grabbed a lock before calling dmu_tx_assign().
170 * Thread B is in an already-assigned tx, and blocks for this lock.
171 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
172 * forever, because the previous txg can't quiesce until B's tx commits.
173 *
174 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
175 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent
176 * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
177 * to indicate that this operation has already called dmu_tx_wait().
178 * This will ensure that we don't retry forever, waiting a short bit
179 * each time.
180 *
181 * (5) If the operation succeeded, generate the intent log entry for it
182 * before dropping locks. This ensures that the ordering of events
183 * in the intent log matches the order in which they actually occurred.
184 * During ZIL replay the zfs_log_* functions will update the sequence
185 * number to indicate the zil transaction has replayed.
186 *
187 * (6) At the end of each vnode op, the DMU tx must always commit,
188 * regardless of whether there were any errors.
189 *
190 * (7) After dropping all locks, invoke zil_commit(zilog, foid)
191 * to ensure that synchronous semantics are provided when necessary.
192 *
193 * In general, this is how things should be ordered in each vnode op:
194 *
195 * ZFS_ENTER(zfsvfs); // exit if unmounted
196 * top:
197 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD())
198 * rw_enter(...); // grab any other locks you need
199 * tx = dmu_tx_create(...); // get DMU tx
200 * dmu_tx_hold_*(); // hold each object you might modify
201 * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
202 * if (error) {
203 * rw_exit(...); // drop locks
204 * zfs_dirent_unlock(dl); // unlock directory entry
205 * VN_RELE(...); // release held vnodes
206 * if (error == ERESTART) {
207 * waited = B_TRUE;
208 * dmu_tx_wait(tx);
209 * dmu_tx_abort(tx);
210 * goto top;
211 * }
212 * dmu_tx_abort(tx); // abort DMU tx
213 * ZFS_EXIT(zfsvfs); // finished in zfs
214 * return (error); // really out of space
215 * }
216 * error = do_real_work(); // do whatever this VOP does
217 * if (error == 0)
218 * zfs_log_*(...); // on success, make ZIL entry
219 * dmu_tx_commit(tx); // commit DMU tx -- error or not
220 * rw_exit(...); // drop locks
221 * zfs_dirent_unlock(dl); // unlock directory entry
222 * VN_RELE(...); // release held vnodes
223 * zil_commit(zilog, foid); // synchronous when necessary
224 * ZFS_EXIT(zfsvfs); // finished in zfs
225 * return (error); // done, report error
226 */
227
228 /* ARGSUSED */
229 static int
zfs_open(vnode_t ** vpp,int flag,cred_t * cr)230 zfs_open(vnode_t **vpp, int flag, cred_t *cr)
231 {
232 znode_t *zp = VTOZ(*vpp);
233 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
234
235 ZFS_ENTER(zfsvfs);
236 ZFS_VERIFY_ZP(zp);
237
238 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
239 ((flag & FAPPEND) == 0)) {
240 ZFS_EXIT(zfsvfs);
241 return (SET_ERROR(EPERM));
242 }
243
244 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
245 ZTOV(zp)->v_type == VREG &&
246 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
247 if (fs_vscan(*vpp, cr, 0) != 0) {
248 ZFS_EXIT(zfsvfs);
249 return (SET_ERROR(EACCES));
250 }
251 }
252
253 /* Keep a count of the synchronous opens in the znode */
254 if (flag & (FSYNC | FDSYNC))
255 atomic_inc_32(&zp->z_sync_cnt);
256
257 ZFS_EXIT(zfsvfs);
258 return (0);
259 }
260
261 /* ARGSUSED */
262 static int
zfs_close(vnode_t * vp,int flag,int count,offset_t offset,cred_t * cr)263 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
264 {
265 znode_t *zp = VTOZ(vp);
266 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
267
268 ZFS_ENTER(zfsvfs);
269 ZFS_VERIFY_ZP(zp);
270
271 /* Decrement the synchronous opens in the znode */
272 if ((flag & (FSYNC | FDSYNC)) && (count == 1))
273 atomic_dec_32(&zp->z_sync_cnt);
274
275 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
276 ZTOV(zp)->v_type == VREG &&
277 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
278 VERIFY0(fs_vscan(vp, cr, 1));
279
280 ZFS_EXIT(zfsvfs);
281 return (0);
282 }
283
284 /* ARGSUSED */
285 static int
zfs_ioctl(vnode_t * vp,ulong_t com,intptr_t data,int flag,cred_t * cred,int * rvalp)286 zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred,
287 int *rvalp)
288 {
289 loff_t off;
290 int error;
291
292 switch (com) {
293 case _FIOFFS:
294 {
295 return (0);
296
297 /*
298 * The following two ioctls are used by bfu. Faking out,
299 * necessary to avoid bfu errors.
300 */
301 }
302 case _FIOGDIO:
303 case _FIOSDIO:
304 {
305 return (0);
306 }
307
308 case F_SEEK_DATA:
309 case F_SEEK_HOLE:
310 {
311 off = *(offset_t *)data;
312 /* offset parameter is in/out */
313 error = zfs_holey(VTOZ(vp), com, &off);
314 if (error)
315 return (error);
316 *(offset_t *)data = off;
317 return (0);
318 }
319 }
320 return (SET_ERROR(ENOTTY));
321 }
322
323 static vm_page_t
page_busy(vnode_t * vp,int64_t start,int64_t off,int64_t nbytes)324 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
325 {
326 vm_object_t obj;
327 vm_page_t pp;
328 int64_t end;
329
330 /*
331 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
332 * aligned boundaries, if the range is not aligned. As a result a
333 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
334 * It may happen that all DEV_BSIZE subranges are marked clean and thus
335 * the whole page would be considered clean despite have some
336 * dirty data.
337 * For this reason we should shrink the range to DEV_BSIZE aligned
338 * boundaries before calling vm_page_clear_dirty.
339 */
340 end = rounddown2(off + nbytes, DEV_BSIZE);
341 off = roundup2(off, DEV_BSIZE);
342 nbytes = end - off;
343
344 obj = vp->v_object;
345 zfs_vmobject_assert_wlocked_12(obj);
346 #if __FreeBSD_version < 1300050
347 for (;;) {
348 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
349 pp->valid) {
350 if (vm_page_xbusied(pp)) {
351 /*
352 * Reference the page before unlocking and
353 * sleeping so that the page daemon is less
354 * likely to reclaim it.
355 */
356 vm_page_reference(pp);
357 vm_page_lock(pp);
358 zfs_vmobject_wunlock(obj);
359 vm_page_busy_sleep(pp, "zfsmwb", true);
360 zfs_vmobject_wlock(obj);
361 continue;
362 }
363 vm_page_sbusy(pp);
364 } else if (pp != NULL) {
365 ASSERT(!pp->valid);
366 pp = NULL;
367 }
368 if (pp != NULL) {
369 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
370 vm_object_pip_add(obj, 1);
371 pmap_remove_write(pp);
372 if (nbytes != 0)
373 vm_page_clear_dirty(pp, off, nbytes);
374 }
375 break;
376 }
377 #else
378 vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start),
379 VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL |
380 VM_ALLOC_IGN_SBUSY);
381 if (pp != NULL) {
382 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
383 vm_object_pip_add(obj, 1);
384 pmap_remove_write(pp);
385 if (nbytes != 0)
386 vm_page_clear_dirty(pp, off, nbytes);
387 }
388 #endif
389 return (pp);
390 }
391
392 static void
page_unbusy(vm_page_t pp)393 page_unbusy(vm_page_t pp)
394 {
395
396 vm_page_sunbusy(pp);
397 #if __FreeBSD_version >= 1300041
398 vm_object_pip_wakeup(pp->object);
399 #else
400 vm_object_pip_subtract(pp->object, 1);
401 #endif
402 }
403
404 #if __FreeBSD_version > 1300051
405 static vm_page_t
page_hold(vnode_t * vp,int64_t start)406 page_hold(vnode_t *vp, int64_t start)
407 {
408 vm_object_t obj;
409 vm_page_t m;
410
411 obj = vp->v_object;
412 vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start),
413 VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY |
414 VM_ALLOC_NOBUSY);
415 return (m);
416 }
417 #else
418 static vm_page_t
page_hold(vnode_t * vp,int64_t start)419 page_hold(vnode_t *vp, int64_t start)
420 {
421 vm_object_t obj;
422 vm_page_t pp;
423
424 obj = vp->v_object;
425 zfs_vmobject_assert_wlocked(obj);
426
427 for (;;) {
428 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
429 pp->valid) {
430 if (vm_page_xbusied(pp)) {
431 /*
432 * Reference the page before unlocking and
433 * sleeping so that the page daemon is less
434 * likely to reclaim it.
435 */
436 vm_page_reference(pp);
437 vm_page_lock(pp);
438 zfs_vmobject_wunlock(obj);
439 vm_page_busy_sleep(pp, "zfsmwb", true);
440 zfs_vmobject_wlock(obj);
441 continue;
442 }
443
444 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
445 vm_page_wire_lock(pp);
446 vm_page_hold(pp);
447 vm_page_wire_unlock(pp);
448
449 } else
450 pp = NULL;
451 break;
452 }
453 return (pp);
454 }
455 #endif
456
457 static void
page_unhold(vm_page_t pp)458 page_unhold(vm_page_t pp)
459 {
460
461 vm_page_wire_lock(pp);
462 #if __FreeBSD_version >= 1300035
463 vm_page_unwire(pp, PQ_ACTIVE);
464 #else
465 vm_page_unhold(pp);
466 #endif
467 vm_page_wire_unlock(pp);
468 }
469
470 /*
471 * When a file is memory mapped, we must keep the IO data synchronized
472 * between the DMU cache and the memory mapped pages. What this means:
473 *
474 * On Write: If we find a memory mapped page, we write to *both*
475 * the page and the dmu buffer.
476 */
477 void
update_pages(znode_t * zp,int64_t start,int len,objset_t * os)478 update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
479 {
480 vm_object_t obj;
481 struct sf_buf *sf;
482 vnode_t *vp = ZTOV(zp);
483 caddr_t va;
484 int off;
485
486 ASSERT3P(vp->v_mount, !=, NULL);
487 obj = vp->v_object;
488 ASSERT3P(obj, !=, NULL);
489
490 off = start & PAGEOFFSET;
491 zfs_vmobject_wlock_12(obj);
492 #if __FreeBSD_version >= 1300041
493 vm_object_pip_add(obj, 1);
494 #endif
495 for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
496 vm_page_t pp;
497 int nbytes = imin(PAGESIZE - off, len);
498
499 if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
500 zfs_vmobject_wunlock_12(obj);
501
502 va = zfs_map_page(pp, &sf);
503 (void) dmu_read(os, zp->z_id, start + off, nbytes,
504 va + off, DMU_READ_PREFETCH);
505 zfs_unmap_page(sf);
506
507 zfs_vmobject_wlock_12(obj);
508 page_unbusy(pp);
509 }
510 len -= nbytes;
511 off = 0;
512 }
513 #if __FreeBSD_version >= 1300041
514 vm_object_pip_wakeup(obj);
515 #else
516 vm_object_pip_wakeupn(obj, 0);
517 #endif
518 zfs_vmobject_wunlock_12(obj);
519 }
520
521 /*
522 * Read with UIO_NOCOPY flag means that sendfile(2) requests
523 * ZFS to populate a range of page cache pages with data.
524 *
525 * NOTE: this function could be optimized to pre-allocate
526 * all pages in advance, drain exclusive busy on all of them,
527 * map them into contiguous KVA region and populate them
528 * in one single dmu_read() call.
529 */
530 int
mappedread_sf(znode_t * zp,int nbytes,zfs_uio_t * uio)531 mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio)
532 {
533 vnode_t *vp = ZTOV(zp);
534 objset_t *os = zp->z_zfsvfs->z_os;
535 struct sf_buf *sf;
536 vm_object_t obj;
537 vm_page_t pp;
538 int64_t start;
539 caddr_t va;
540 int len = nbytes;
541 int error = 0;
542
543 ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY);
544 ASSERT3P(vp->v_mount, !=, NULL);
545 obj = vp->v_object;
546 ASSERT3P(obj, !=, NULL);
547 ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET);
548
549 zfs_vmobject_wlock_12(obj);
550 for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) {
551 int bytes = MIN(PAGESIZE, len);
552
553 pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start),
554 VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
555 if (vm_page_none_valid(pp)) {
556 zfs_vmobject_wunlock_12(obj);
557 va = zfs_map_page(pp, &sf);
558 error = dmu_read(os, zp->z_id, start, bytes, va,
559 DMU_READ_PREFETCH);
560 if (bytes != PAGESIZE && error == 0)
561 bzero(va + bytes, PAGESIZE - bytes);
562 zfs_unmap_page(sf);
563 zfs_vmobject_wlock_12(obj);
564 #if __FreeBSD_version >= 1300081
565 if (error == 0) {
566 vm_page_valid(pp);
567 vm_page_activate(pp);
568 vm_page_do_sunbusy(pp);
569 } else {
570 zfs_vmobject_wlock(obj);
571 if (!vm_page_wired(pp) && pp->valid == 0 &&
572 vm_page_busy_tryupgrade(pp))
573 vm_page_free(pp);
574 else
575 vm_page_sunbusy(pp);
576 zfs_vmobject_wunlock(obj);
577 }
578 #else
579 vm_page_do_sunbusy(pp);
580 vm_page_lock(pp);
581 if (error) {
582 if (pp->wire_count == 0 && pp->valid == 0 &&
583 !vm_page_busied(pp))
584 vm_page_free(pp);
585 } else {
586 pp->valid = VM_PAGE_BITS_ALL;
587 vm_page_activate(pp);
588 }
589 vm_page_unlock(pp);
590 #endif
591 } else {
592 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
593 vm_page_do_sunbusy(pp);
594 }
595 if (error)
596 break;
597 zfs_uio_advance(uio, bytes);
598 len -= bytes;
599 }
600 zfs_vmobject_wunlock_12(obj);
601 return (error);
602 }
603
604 /*
605 * When a file is memory mapped, we must keep the IO data synchronized
606 * between the DMU cache and the memory mapped pages. What this means:
607 *
608 * On Read: We "read" preferentially from memory mapped pages,
609 * else we default from the dmu buffer.
610 *
611 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
612 * the file is memory mapped.
613 */
614 int
mappedread(znode_t * zp,int nbytes,zfs_uio_t * uio)615 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
616 {
617 vnode_t *vp = ZTOV(zp);
618 vm_object_t obj;
619 int64_t start;
620 int len = nbytes;
621 int off;
622 int error = 0;
623
624 ASSERT3P(vp->v_mount, !=, NULL);
625 obj = vp->v_object;
626 ASSERT3P(obj, !=, NULL);
627
628 start = zfs_uio_offset(uio);
629 off = start & PAGEOFFSET;
630 zfs_vmobject_wlock_12(obj);
631 for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
632 vm_page_t pp;
633 uint64_t bytes = MIN(PAGESIZE - off, len);
634
635 if ((pp = page_hold(vp, start))) {
636 struct sf_buf *sf;
637 caddr_t va;
638
639 zfs_vmobject_wunlock_12(obj);
640 va = zfs_map_page(pp, &sf);
641 error = vn_io_fault_uiomove(va + off, bytes,
642 GET_UIO_STRUCT(uio));
643 zfs_unmap_page(sf);
644 zfs_vmobject_wlock_12(obj);
645 page_unhold(pp);
646 } else {
647 zfs_vmobject_wunlock_12(obj);
648 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
649 uio, bytes);
650 zfs_vmobject_wlock_12(obj);
651 }
652 len -= bytes;
653 off = 0;
654 if (error)
655 break;
656 }
657 zfs_vmobject_wunlock_12(obj);
658 return (error);
659 }
660
661 int
zfs_write_simple(znode_t * zp,const void * data,size_t len,loff_t pos,size_t * presid)662 zfs_write_simple(znode_t *zp, const void *data, size_t len,
663 loff_t pos, size_t *presid)
664 {
665 int error = 0;
666 ssize_t resid;
667
668 error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos,
669 UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread);
670
671 if (error) {
672 return (SET_ERROR(error));
673 } else if (presid == NULL) {
674 if (resid != 0) {
675 error = SET_ERROR(EIO);
676 }
677 } else {
678 *presid = resid;
679 }
680 return (error);
681 }
682
683 void
zfs_zrele_async(znode_t * zp)684 zfs_zrele_async(znode_t *zp)
685 {
686 vnode_t *vp = ZTOV(zp);
687 objset_t *os = ITOZSB(vp)->z_os;
688
689 VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os)));
690 }
691
692 static int
zfs_dd_callback(struct mount * mp,void * arg,int lkflags,struct vnode ** vpp)693 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
694 {
695 int error;
696
697 *vpp = arg;
698 error = vn_lock(*vpp, lkflags);
699 if (error != 0)
700 vrele(*vpp);
701 return (error);
702 }
703
704 static int
zfs_lookup_lock(vnode_t * dvp,vnode_t * vp,const char * name,int lkflags)705 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags)
706 {
707 znode_t *zdp = VTOZ(dvp);
708 zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs;
709 int error;
710 int ltype;
711
712 if (zfsvfs->z_replay == B_FALSE)
713 ASSERT_VOP_LOCKED(dvp, __func__);
714
715 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
716 ASSERT3P(dvp, ==, vp);
717 vref(dvp);
718 ltype = lkflags & LK_TYPE_MASK;
719 if (ltype != VOP_ISLOCKED(dvp)) {
720 if (ltype == LK_EXCLUSIVE)
721 vn_lock(dvp, LK_UPGRADE | LK_RETRY);
722 else /* if (ltype == LK_SHARED) */
723 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
724
725 /*
726 * Relock for the "." case could leave us with
727 * reclaimed vnode.
728 */
729 if (VN_IS_DOOMED(dvp)) {
730 vrele(dvp);
731 return (SET_ERROR(ENOENT));
732 }
733 }
734 return (0);
735 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
736 /*
737 * Note that in this case, dvp is the child vnode, and we
738 * are looking up the parent vnode - exactly reverse from
739 * normal operation. Unlocking dvp requires some rather
740 * tricky unlock/relock dance to prevent mp from being freed;
741 * use vn_vget_ino_gen() which takes care of all that.
742 *
743 * XXX Note that there is a time window when both vnodes are
744 * unlocked. It is possible, although highly unlikely, that
745 * during that window the parent-child relationship between
746 * the vnodes may change, for example, get reversed.
747 * In that case we would have a wrong lock order for the vnodes.
748 * All other filesystems seem to ignore this problem, so we
749 * do the same here.
750 * A potential solution could be implemented as follows:
751 * - using LK_NOWAIT when locking the second vnode and retrying
752 * if necessary
753 * - checking that the parent-child relationship still holds
754 * after locking both vnodes and retrying if it doesn't
755 */
756 error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp);
757 return (error);
758 } else {
759 error = vn_lock(vp, lkflags);
760 if (error != 0)
761 vrele(vp);
762 return (error);
763 }
764 }
765
766 /*
767 * Lookup an entry in a directory, or an extended attribute directory.
768 * If it exists, return a held vnode reference for it.
769 *
770 * IN: dvp - vnode of directory to search.
771 * nm - name of entry to lookup.
772 * pnp - full pathname to lookup [UNUSED].
773 * flags - LOOKUP_XATTR set if looking for an attribute.
774 * rdir - root directory vnode [UNUSED].
775 * cr - credentials of caller.
776 * ct - caller context
777 *
778 * OUT: vpp - vnode of located entry, NULL if not found.
779 *
780 * RETURN: 0 on success, error code on failure.
781 *
782 * Timestamps:
783 * NA
784 */
785 /* ARGSUSED */
786 static int
zfs_lookup(vnode_t * dvp,const char * nm,vnode_t ** vpp,struct componentname * cnp,int nameiop,cred_t * cr,int flags,boolean_t cached)787 zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
788 struct componentname *cnp, int nameiop, cred_t *cr, int flags,
789 boolean_t cached)
790 {
791 znode_t *zdp = VTOZ(dvp);
792 znode_t *zp;
793 zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
794 #if __FreeBSD_version > 1300124
795 seqc_t dvp_seqc;
796 #endif
797 int error = 0;
798
799 /*
800 * Fast path lookup, however we must skip DNLC lookup
801 * for case folding or normalizing lookups because the
802 * DNLC code only stores the passed in name. This means
803 * creating 'a' and removing 'A' on a case insensitive
804 * file system would work, but DNLC still thinks 'a'
805 * exists and won't let you create it again on the next
806 * pass through fast path.
807 */
808 if (!(flags & LOOKUP_XATTR)) {
809 if (dvp->v_type != VDIR) {
810 return (SET_ERROR(ENOTDIR));
811 } else if (zdp->z_sa_hdl == NULL) {
812 return (SET_ERROR(EIO));
813 }
814 }
815
816 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp,
817 const char *, nm);
818
819 ZFS_ENTER(zfsvfs);
820 ZFS_VERIFY_ZP(zdp);
821
822 #if __FreeBSD_version > 1300124
823 dvp_seqc = vn_seqc_read_notmodify(dvp);
824 #endif
825
826 *vpp = NULL;
827
828 if (flags & LOOKUP_XATTR) {
829 /*
830 * If the xattr property is off, refuse the lookup request.
831 */
832 if (!(zfsvfs->z_flags & ZSB_XATTR)) {
833 ZFS_EXIT(zfsvfs);
834 return (SET_ERROR(EOPNOTSUPP));
835 }
836
837 /*
838 * We don't allow recursive attributes..
839 * Maybe someday we will.
840 */
841 if (zdp->z_pflags & ZFS_XATTR) {
842 ZFS_EXIT(zfsvfs);
843 return (SET_ERROR(EINVAL));
844 }
845
846 if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) {
847 ZFS_EXIT(zfsvfs);
848 return (error);
849 }
850 *vpp = ZTOV(zp);
851
852 /*
853 * Do we have permission to get into attribute directory?
854 */
855 error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr);
856 if (error) {
857 vrele(ZTOV(zp));
858 }
859
860 ZFS_EXIT(zfsvfs);
861 return (error);
862 }
863
864 /*
865 * Check accessibility of directory if we're not coming in via
866 * VOP_CACHEDLOOKUP.
867 */
868 if (!cached) {
869 #ifdef NOEXECCHECK
870 if ((cnp->cn_flags & NOEXECCHECK) != 0) {
871 cnp->cn_flags &= ~NOEXECCHECK;
872 } else
873 #endif
874 if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
875 ZFS_EXIT(zfsvfs);
876 return (error);
877 }
878 }
879
880 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
881 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
882 ZFS_EXIT(zfsvfs);
883 return (SET_ERROR(EILSEQ));
884 }
885
886
887 /*
888 * First handle the special cases.
889 */
890 if ((cnp->cn_flags & ISDOTDOT) != 0) {
891 /*
892 * If we are a snapshot mounted under .zfs, return
893 * the vp for the snapshot directory.
894 */
895 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
896 struct componentname cn;
897 vnode_t *zfsctl_vp;
898 int ltype;
899
900 ZFS_EXIT(zfsvfs);
901 ltype = VOP_ISLOCKED(dvp);
902 VOP_UNLOCK1(dvp);
903 error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
904 &zfsctl_vp);
905 if (error == 0) {
906 cn.cn_nameptr = "snapshot";
907 cn.cn_namelen = strlen(cn.cn_nameptr);
908 cn.cn_nameiop = cnp->cn_nameiop;
909 cn.cn_flags = cnp->cn_flags & ~ISDOTDOT;
910 cn.cn_lkflags = cnp->cn_lkflags;
911 error = VOP_LOOKUP(zfsctl_vp, vpp, &cn);
912 vput(zfsctl_vp);
913 }
914 vn_lock(dvp, ltype | LK_RETRY);
915 return (error);
916 }
917 }
918 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
919 ZFS_EXIT(zfsvfs);
920 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
921 return (SET_ERROR(ENOTSUP));
922 error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
923 return (error);
924 }
925
926 /*
927 * The loop is retry the lookup if the parent-child relationship
928 * changes during the dot-dot locking complexities.
929 */
930 for (;;) {
931 uint64_t parent;
932
933 error = zfs_dirlook(zdp, nm, &zp);
934 if (error == 0)
935 *vpp = ZTOV(zp);
936
937 ZFS_EXIT(zfsvfs);
938 if (error != 0)
939 break;
940
941 error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags);
942 if (error != 0) {
943 /*
944 * If we've got a locking error, then the vnode
945 * got reclaimed because of a force unmount.
946 * We never enter doomed vnodes into the name cache.
947 */
948 *vpp = NULL;
949 return (error);
950 }
951
952 if ((cnp->cn_flags & ISDOTDOT) == 0)
953 break;
954
955 ZFS_ENTER(zfsvfs);
956 if (zdp->z_sa_hdl == NULL) {
957 error = SET_ERROR(EIO);
958 } else {
959 error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
960 &parent, sizeof (parent));
961 }
962 if (error != 0) {
963 ZFS_EXIT(zfsvfs);
964 vput(ZTOV(zp));
965 break;
966 }
967 if (zp->z_id == parent) {
968 ZFS_EXIT(zfsvfs);
969 break;
970 }
971 vput(ZTOV(zp));
972 }
973
974 if (error != 0)
975 *vpp = NULL;
976
977 /* Translate errors and add SAVENAME when needed. */
978 if (cnp->cn_flags & ISLASTCN) {
979 switch (nameiop) {
980 case CREATE:
981 case RENAME:
982 if (error == ENOENT) {
983 error = EJUSTRETURN;
984 #if __FreeBSD_version < 1400068
985 cnp->cn_flags |= SAVENAME;
986 #endif
987 break;
988 }
989 fallthrough;
990 case DELETE:
991 #if __FreeBSD_version < 1400068
992 if (error == 0)
993 cnp->cn_flags |= SAVENAME;
994 #endif
995 break;
996 }
997 }
998
999 #if __FreeBSD_version > 1300124
1000 if ((cnp->cn_flags & ISDOTDOT) != 0) {
1001 /*
1002 * FIXME: zfs_lookup_lock relocks vnodes and does nothing to
1003 * handle races. In particular different callers may end up
1004 * with different vnodes and will try to add conflicting
1005 * entries to the namecache.
1006 *
1007 * While finding different result may be acceptable in face
1008 * of concurrent modification, adding conflicting entries
1009 * trips over an assert in the namecache.
1010 *
1011 * Ultimately let an entry through once everything settles.
1012 */
1013 if (!vn_seqc_consistent(dvp, dvp_seqc)) {
1014 cnp->cn_flags &= ~MAKEENTRY;
1015 }
1016 }
1017 #endif
1018
1019 /* Insert name into cache (as non-existent) if appropriate. */
1020 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
1021 error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0)
1022 cache_enter(dvp, NULL, cnp);
1023
1024 /* Insert name into cache if appropriate. */
1025 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
1026 error == 0 && (cnp->cn_flags & MAKEENTRY)) {
1027 if (!(cnp->cn_flags & ISLASTCN) ||
1028 (nameiop != DELETE && nameiop != RENAME)) {
1029 cache_enter(dvp, *vpp, cnp);
1030 }
1031 }
1032
1033 return (error);
1034 }
1035
1036 /*
1037 * Attempt to create a new entry in a directory. If the entry
1038 * already exists, truncate the file if permissible, else return
1039 * an error. Return the vp of the created or trunc'd file.
1040 *
1041 * IN: dvp - vnode of directory to put new file entry in.
1042 * name - name of new file entry.
1043 * vap - attributes of new file.
1044 * excl - flag indicating exclusive or non-exclusive mode.
1045 * mode - mode to open file with.
1046 * cr - credentials of caller.
1047 * flag - large file flag [UNUSED].
1048 * ct - caller context
1049 * vsecp - ACL to be set
1050 *
1051 * OUT: vpp - vnode of created or trunc'd entry.
1052 *
1053 * RETURN: 0 on success, error code on failure.
1054 *
1055 * Timestamps:
1056 * dvp - ctime|mtime updated if new entry created
1057 * vp - ctime|mtime always, atime if new
1058 */
1059
1060 /* ARGSUSED */
1061 int
zfs_create(znode_t * dzp,const char * name,vattr_t * vap,int excl,int mode,znode_t ** zpp,cred_t * cr,int flag,vsecattr_t * vsecp)1062 zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
1063 znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp)
1064 {
1065 znode_t *zp;
1066 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1067 zilog_t *zilog;
1068 objset_t *os;
1069 dmu_tx_t *tx;
1070 int error;
1071 uid_t uid = crgetuid(cr);
1072 gid_t gid = crgetgid(cr);
1073 uint64_t projid = ZFS_DEFAULT_PROJID;
1074 zfs_acl_ids_t acl_ids;
1075 boolean_t fuid_dirtied;
1076 uint64_t txtype;
1077 #ifdef DEBUG_VFS_LOCKS
1078 vnode_t *dvp = ZTOV(dzp);
1079 #endif
1080
1081 /*
1082 * If we have an ephemeral id, ACL, or XVATTR then
1083 * make sure file system is at proper version
1084 */
1085 if (zfsvfs->z_use_fuids == B_FALSE &&
1086 (vsecp || (vap->va_mask & AT_XVATTR) ||
1087 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1088 return (SET_ERROR(EINVAL));
1089
1090 ZFS_ENTER(zfsvfs);
1091 ZFS_VERIFY_ZP(dzp);
1092 os = zfsvfs->z_os;
1093 zilog = zfsvfs->z_log;
1094
1095 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
1096 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1097 ZFS_EXIT(zfsvfs);
1098 return (SET_ERROR(EILSEQ));
1099 }
1100
1101 if (vap->va_mask & AT_XVATTR) {
1102 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
1103 crgetuid(cr), cr, vap->va_type)) != 0) {
1104 ZFS_EXIT(zfsvfs);
1105 return (error);
1106 }
1107 }
1108
1109 *zpp = NULL;
1110
1111 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
1112 vap->va_mode &= ~S_ISVTX;
1113
1114 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
1115 if (error) {
1116 ZFS_EXIT(zfsvfs);
1117 return (error);
1118 }
1119 ASSERT3P(zp, ==, NULL);
1120
1121 /*
1122 * Create a new file object and update the directory
1123 * to reference it.
1124 */
1125 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
1126 goto out;
1127 }
1128
1129 /*
1130 * We only support the creation of regular files in
1131 * extended attribute directories.
1132 */
1133
1134 if ((dzp->z_pflags & ZFS_XATTR) &&
1135 (vap->va_type != VREG)) {
1136 error = SET_ERROR(EINVAL);
1137 goto out;
1138 }
1139
1140 if ((error = zfs_acl_ids_create(dzp, 0, vap,
1141 cr, vsecp, &acl_ids)) != 0)
1142 goto out;
1143
1144 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
1145 projid = zfs_inherit_projid(dzp);
1146 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
1147 zfs_acl_ids_free(&acl_ids);
1148 error = SET_ERROR(EDQUOT);
1149 goto out;
1150 }
1151
1152 getnewvnode_reserve_();
1153
1154 tx = dmu_tx_create(os);
1155
1156 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1157 ZFS_SA_BASE_ATTR_SIZE);
1158
1159 fuid_dirtied = zfsvfs->z_fuid_dirty;
1160 if (fuid_dirtied)
1161 zfs_fuid_txhold(zfsvfs, tx);
1162 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
1163 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
1164 if (!zfsvfs->z_use_sa &&
1165 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1166 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
1167 0, acl_ids.z_aclp->z_acl_bytes);
1168 }
1169 error = dmu_tx_assign(tx, TXG_WAIT);
1170 if (error) {
1171 zfs_acl_ids_free(&acl_ids);
1172 dmu_tx_abort(tx);
1173 getnewvnode_drop_reserve();
1174 ZFS_EXIT(zfsvfs);
1175 return (error);
1176 }
1177 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1178 if (fuid_dirtied)
1179 zfs_fuid_sync(zfsvfs, tx);
1180
1181 (void) zfs_link_create(dzp, name, zp, tx, ZNEW);
1182 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
1183 zfs_log_create(zilog, tx, txtype, dzp, zp, name,
1184 vsecp, acl_ids.z_fuidp, vap);
1185 zfs_acl_ids_free(&acl_ids);
1186 dmu_tx_commit(tx);
1187
1188 getnewvnode_drop_reserve();
1189
1190 out:
1191 VNCHECKREF(dvp);
1192 if (error == 0) {
1193 *zpp = zp;
1194 }
1195
1196 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1197 zil_commit(zilog, 0);
1198
1199 ZFS_EXIT(zfsvfs);
1200 return (error);
1201 }
1202
1203 /*
1204 * Remove an entry from a directory.
1205 *
1206 * IN: dvp - vnode of directory to remove entry from.
1207 * name - name of entry to remove.
1208 * cr - credentials of caller.
1209 * ct - caller context
1210 * flags - case flags
1211 *
1212 * RETURN: 0 on success, error code on failure.
1213 *
1214 * Timestamps:
1215 * dvp - ctime|mtime
1216 * vp - ctime (if nlink > 0)
1217 */
1218
1219 /*ARGSUSED*/
1220 static int
zfs_remove_(vnode_t * dvp,vnode_t * vp,const char * name,cred_t * cr)1221 zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1222 {
1223 znode_t *dzp = VTOZ(dvp);
1224 znode_t *zp;
1225 znode_t *xzp;
1226 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1227 zilog_t *zilog;
1228 uint64_t xattr_obj;
1229 uint64_t obj = 0;
1230 dmu_tx_t *tx;
1231 boolean_t unlinked;
1232 uint64_t txtype;
1233 int error;
1234
1235
1236 ZFS_ENTER(zfsvfs);
1237 ZFS_VERIFY_ZP(dzp);
1238 zp = VTOZ(vp);
1239 ZFS_VERIFY_ZP(zp);
1240 zilog = zfsvfs->z_log;
1241
1242 xattr_obj = 0;
1243 xzp = NULL;
1244
1245 if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
1246 goto out;
1247 }
1248
1249 /*
1250 * Need to use rmdir for removing directories.
1251 */
1252 if (vp->v_type == VDIR) {
1253 error = SET_ERROR(EPERM);
1254 goto out;
1255 }
1256
1257 vnevent_remove(vp, dvp, name, ct);
1258
1259 obj = zp->z_id;
1260
1261 /* are there any extended attributes? */
1262 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
1263 &xattr_obj, sizeof (xattr_obj));
1264 if (error == 0 && xattr_obj) {
1265 error = zfs_zget(zfsvfs, xattr_obj, &xzp);
1266 ASSERT0(error);
1267 }
1268
1269 /*
1270 * We may delete the znode now, or we may put it in the unlinked set;
1271 * it depends on whether we're the last link, and on whether there are
1272 * other holds on the vnode. So we dmu_tx_hold() the right things to
1273 * allow for either case.
1274 */
1275 tx = dmu_tx_create(zfsvfs->z_os);
1276 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1277 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1278 zfs_sa_upgrade_txholds(tx, zp);
1279 zfs_sa_upgrade_txholds(tx, dzp);
1280
1281 if (xzp) {
1282 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1283 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
1284 }
1285
1286 /* charge as an update -- would be nice not to charge at all */
1287 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1288
1289 /*
1290 * Mark this transaction as typically resulting in a net free of space
1291 */
1292 dmu_tx_mark_netfree(tx);
1293
1294 error = dmu_tx_assign(tx, TXG_WAIT);
1295 if (error) {
1296 dmu_tx_abort(tx);
1297 ZFS_EXIT(zfsvfs);
1298 return (error);
1299 }
1300
1301 /*
1302 * Remove the directory entry.
1303 */
1304 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked);
1305
1306 if (error) {
1307 dmu_tx_commit(tx);
1308 goto out;
1309 }
1310
1311 if (unlinked) {
1312 zfs_unlinked_add(zp, tx);
1313 vp->v_vflag |= VV_NOSYNC;
1314 }
1315 /* XXX check changes to linux vnops */
1316 txtype = TX_REMOVE;
1317 zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
1318
1319 dmu_tx_commit(tx);
1320 out:
1321
1322 if (xzp)
1323 vrele(ZTOV(xzp));
1324
1325 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1326 zil_commit(zilog, 0);
1327
1328
1329 ZFS_EXIT(zfsvfs);
1330 return (error);
1331 }
1332
1333
1334 static int
zfs_lookup_internal(znode_t * dzp,const char * name,vnode_t ** vpp,struct componentname * cnp,int nameiop)1335 zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp,
1336 struct componentname *cnp, int nameiop)
1337 {
1338 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1339 int error;
1340
1341 cnp->cn_nameptr = __DECONST(char *, name);
1342 cnp->cn_namelen = strlen(name);
1343 cnp->cn_nameiop = nameiop;
1344 cnp->cn_flags = ISLASTCN;
1345 #if __FreeBSD_version < 1400068
1346 cnp->cn_flags |= SAVENAME;
1347 #endif
1348 cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
1349 cnp->cn_cred = kcred;
1350 #if __FreeBSD_version < 1400037
1351 cnp->cn_thread = curthread;
1352 #endif
1353
1354 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) {
1355 struct vop_lookup_args a;
1356
1357 a.a_gen.a_desc = &vop_lookup_desc;
1358 a.a_dvp = ZTOV(dzp);
1359 a.a_vpp = vpp;
1360 a.a_cnp = cnp;
1361 error = vfs_cache_lookup(&a);
1362 } else {
1363 error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred, 0,
1364 B_FALSE);
1365 }
1366 #ifdef ZFS_DEBUG
1367 if (error) {
1368 printf("got error %d on name %s on op %d\n", error, name,
1369 nameiop);
1370 kdb_backtrace();
1371 }
1372 #endif
1373 return (error);
1374 }
1375
1376 int
zfs_remove(znode_t * dzp,const char * name,cred_t * cr,int flags)1377 zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags)
1378 {
1379 vnode_t *vp;
1380 int error;
1381 struct componentname cn;
1382
1383 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1384 return (error);
1385
1386 error = zfs_remove_(ZTOV(dzp), vp, name, cr);
1387 vput(vp);
1388 return (error);
1389 }
1390 /*
1391 * Create a new directory and insert it into dvp using the name
1392 * provided. Return a pointer to the inserted directory.
1393 *
1394 * IN: dvp - vnode of directory to add subdir to.
1395 * dirname - name of new directory.
1396 * vap - attributes of new directory.
1397 * cr - credentials of caller.
1398 * ct - caller context
1399 * flags - case flags
1400 * vsecp - ACL to be set
1401 *
1402 * OUT: vpp - vnode of created directory.
1403 *
1404 * RETURN: 0 on success, error code on failure.
1405 *
1406 * Timestamps:
1407 * dvp - ctime|mtime updated
1408 * vp - ctime|mtime|atime updated
1409 */
1410 /*ARGSUSED*/
1411 int
zfs_mkdir(znode_t * dzp,const char * dirname,vattr_t * vap,znode_t ** zpp,cred_t * cr,int flags,vsecattr_t * vsecp)1412 zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
1413 cred_t *cr, int flags, vsecattr_t *vsecp)
1414 {
1415 znode_t *zp;
1416 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1417 zilog_t *zilog;
1418 uint64_t txtype;
1419 dmu_tx_t *tx;
1420 int error;
1421 uid_t uid = crgetuid(cr);
1422 gid_t gid = crgetgid(cr);
1423 zfs_acl_ids_t acl_ids;
1424 boolean_t fuid_dirtied;
1425
1426 ASSERT3U(vap->va_type, ==, VDIR);
1427
1428 /*
1429 * If we have an ephemeral id, ACL, or XVATTR then
1430 * make sure file system is at proper version
1431 */
1432 if (zfsvfs->z_use_fuids == B_FALSE &&
1433 ((vap->va_mask & AT_XVATTR) ||
1434 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1435 return (SET_ERROR(EINVAL));
1436
1437 ZFS_ENTER(zfsvfs);
1438 ZFS_VERIFY_ZP(dzp);
1439 zilog = zfsvfs->z_log;
1440
1441 if (dzp->z_pflags & ZFS_XATTR) {
1442 ZFS_EXIT(zfsvfs);
1443 return (SET_ERROR(EINVAL));
1444 }
1445
1446 if (zfsvfs->z_utf8 && u8_validate(dirname,
1447 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1448 ZFS_EXIT(zfsvfs);
1449 return (SET_ERROR(EILSEQ));
1450 }
1451
1452 if (vap->va_mask & AT_XVATTR) {
1453 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
1454 crgetuid(cr), cr, vap->va_type)) != 0) {
1455 ZFS_EXIT(zfsvfs);
1456 return (error);
1457 }
1458 }
1459
1460 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
1461 NULL, &acl_ids)) != 0) {
1462 ZFS_EXIT(zfsvfs);
1463 return (error);
1464 }
1465
1466 /*
1467 * First make sure the new directory doesn't exist.
1468 *
1469 * Existence is checked first to make sure we don't return
1470 * EACCES instead of EEXIST which can cause some applications
1471 * to fail.
1472 */
1473 *zpp = NULL;
1474
1475 if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) {
1476 zfs_acl_ids_free(&acl_ids);
1477 ZFS_EXIT(zfsvfs);
1478 return (error);
1479 }
1480 ASSERT3P(zp, ==, NULL);
1481
1482 if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
1483 zfs_acl_ids_free(&acl_ids);
1484 ZFS_EXIT(zfsvfs);
1485 return (error);
1486 }
1487
1488 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
1489 zfs_acl_ids_free(&acl_ids);
1490 ZFS_EXIT(zfsvfs);
1491 return (SET_ERROR(EDQUOT));
1492 }
1493
1494 /*
1495 * Add a new entry to the directory.
1496 */
1497 getnewvnode_reserve_();
1498 tx = dmu_tx_create(zfsvfs->z_os);
1499 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
1500 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
1501 fuid_dirtied = zfsvfs->z_fuid_dirty;
1502 if (fuid_dirtied)
1503 zfs_fuid_txhold(zfsvfs, tx);
1504 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1505 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1506 acl_ids.z_aclp->z_acl_bytes);
1507 }
1508
1509 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1510 ZFS_SA_BASE_ATTR_SIZE);
1511
1512 error = dmu_tx_assign(tx, TXG_WAIT);
1513 if (error) {
1514 zfs_acl_ids_free(&acl_ids);
1515 dmu_tx_abort(tx);
1516 getnewvnode_drop_reserve();
1517 ZFS_EXIT(zfsvfs);
1518 return (error);
1519 }
1520
1521 /*
1522 * Create new node.
1523 */
1524 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1525
1526 if (fuid_dirtied)
1527 zfs_fuid_sync(zfsvfs, tx);
1528
1529 /*
1530 * Now put new name in parent dir.
1531 */
1532 (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW);
1533
1534 *zpp = zp;
1535
1536 txtype = zfs_log_create_txtype(Z_DIR, NULL, vap);
1537 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL,
1538 acl_ids.z_fuidp, vap);
1539
1540 zfs_acl_ids_free(&acl_ids);
1541
1542 dmu_tx_commit(tx);
1543
1544 getnewvnode_drop_reserve();
1545
1546 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1547 zil_commit(zilog, 0);
1548
1549 ZFS_EXIT(zfsvfs);
1550 return (0);
1551 }
1552
1553 #if __FreeBSD_version < 1300124
1554 static void
cache_vop_rmdir(struct vnode * dvp,struct vnode * vp)1555 cache_vop_rmdir(struct vnode *dvp, struct vnode *vp)
1556 {
1557
1558 cache_purge(dvp);
1559 cache_purge(vp);
1560 }
1561 #endif
1562
1563 /*
1564 * Remove a directory subdir entry. If the current working
1565 * directory is the same as the subdir to be removed, the
1566 * remove will fail.
1567 *
1568 * IN: dvp - vnode of directory to remove from.
1569 * name - name of directory to be removed.
1570 * cwd - vnode of current working directory.
1571 * cr - credentials of caller.
1572 * ct - caller context
1573 * flags - case flags
1574 *
1575 * RETURN: 0 on success, error code on failure.
1576 *
1577 * Timestamps:
1578 * dvp - ctime|mtime updated
1579 */
1580 /*ARGSUSED*/
1581 static int
zfs_rmdir_(vnode_t * dvp,vnode_t * vp,const char * name,cred_t * cr)1582 zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1583 {
1584 znode_t *dzp = VTOZ(dvp);
1585 znode_t *zp = VTOZ(vp);
1586 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1587 zilog_t *zilog;
1588 dmu_tx_t *tx;
1589 int error;
1590
1591 ZFS_ENTER(zfsvfs);
1592 ZFS_VERIFY_ZP(dzp);
1593 ZFS_VERIFY_ZP(zp);
1594 zilog = zfsvfs->z_log;
1595
1596
1597 if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
1598 goto out;
1599 }
1600
1601 if (vp->v_type != VDIR) {
1602 error = SET_ERROR(ENOTDIR);
1603 goto out;
1604 }
1605
1606 vnevent_rmdir(vp, dvp, name, ct);
1607
1608 tx = dmu_tx_create(zfsvfs->z_os);
1609 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1610 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1611 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1612 zfs_sa_upgrade_txholds(tx, zp);
1613 zfs_sa_upgrade_txholds(tx, dzp);
1614 dmu_tx_mark_netfree(tx);
1615 error = dmu_tx_assign(tx, TXG_WAIT);
1616 if (error) {
1617 dmu_tx_abort(tx);
1618 ZFS_EXIT(zfsvfs);
1619 return (error);
1620 }
1621
1622 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
1623
1624 if (error == 0) {
1625 uint64_t txtype = TX_RMDIR;
1626 zfs_log_remove(zilog, tx, txtype, dzp, name,
1627 ZFS_NO_OBJECT, B_FALSE);
1628 }
1629
1630 dmu_tx_commit(tx);
1631
1632 cache_vop_rmdir(dvp, vp);
1633 out:
1634 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1635 zil_commit(zilog, 0);
1636
1637 ZFS_EXIT(zfsvfs);
1638 return (error);
1639 }
1640
1641 int
zfs_rmdir(znode_t * dzp,const char * name,znode_t * cwd,cred_t * cr,int flags)1642 zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags)
1643 {
1644 struct componentname cn;
1645 vnode_t *vp;
1646 int error;
1647
1648 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1649 return (error);
1650
1651 error = zfs_rmdir_(ZTOV(dzp), vp, name, cr);
1652 vput(vp);
1653 return (error);
1654 }
1655
1656 /*
1657 * Read as many directory entries as will fit into the provided
1658 * buffer from the given directory cursor position (specified in
1659 * the uio structure).
1660 *
1661 * IN: vp - vnode of directory to read.
1662 * uio - structure supplying read location, range info,
1663 * and return buffer.
1664 * cr - credentials of caller.
1665 * ct - caller context
1666 * flags - case flags
1667 *
1668 * OUT: uio - updated offset and range, buffer filled.
1669 * eofp - set to true if end-of-file detected.
1670 *
1671 * RETURN: 0 on success, error code on failure.
1672 *
1673 * Timestamps:
1674 * vp - atime updated
1675 *
1676 * Note that the low 4 bits of the cookie returned by zap is always zero.
1677 * This allows us to use the low range for "special" directory entries:
1678 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem,
1679 * we use the offset 2 for the '.zfs' directory.
1680 */
1681 /* ARGSUSED */
1682 static int
zfs_readdir(vnode_t * vp,zfs_uio_t * uio,cred_t * cr,int * eofp,int * ncookies,cookie_t ** cookies)1683 zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
1684 int *ncookies, cookie_t **cookies)
1685 {
1686 znode_t *zp = VTOZ(vp);
1687 iovec_t *iovp;
1688 edirent_t *eodp;
1689 dirent64_t *odp;
1690 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1691 objset_t *os;
1692 caddr_t outbuf;
1693 size_t bufsize;
1694 zap_cursor_t zc;
1695 zap_attribute_t zap;
1696 uint_t bytes_wanted;
1697 uint64_t offset; /* must be unsigned; checks for < 1 */
1698 uint64_t parent;
1699 int local_eof;
1700 int outcount;
1701 int error;
1702 uint8_t prefetch;
1703 boolean_t check_sysattrs;
1704 uint8_t type;
1705 int ncooks;
1706 cookie_t *cooks = NULL;
1707 int flags = 0;
1708
1709 ZFS_ENTER(zfsvfs);
1710 ZFS_VERIFY_ZP(zp);
1711
1712 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
1713 &parent, sizeof (parent))) != 0) {
1714 ZFS_EXIT(zfsvfs);
1715 return (error);
1716 }
1717
1718 /*
1719 * If we are not given an eof variable,
1720 * use a local one.
1721 */
1722 if (eofp == NULL)
1723 eofp = &local_eof;
1724
1725 /*
1726 * Check for valid iov_len.
1727 */
1728 if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) {
1729 ZFS_EXIT(zfsvfs);
1730 return (SET_ERROR(EINVAL));
1731 }
1732
1733 /*
1734 * Quit if directory has been removed (posix)
1735 */
1736 if ((*eofp = zp->z_unlinked) != 0) {
1737 ZFS_EXIT(zfsvfs);
1738 return (0);
1739 }
1740
1741 error = 0;
1742 os = zfsvfs->z_os;
1743 offset = zfs_uio_offset(uio);
1744 prefetch = zp->z_zn_prefetch;
1745
1746 /*
1747 * Initialize the iterator cursor.
1748 */
1749 if (offset <= 3) {
1750 /*
1751 * Start iteration from the beginning of the directory.
1752 */
1753 zap_cursor_init(&zc, os, zp->z_id);
1754 } else {
1755 /*
1756 * The offset is a serialized cursor.
1757 */
1758 zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
1759 }
1760
1761 /*
1762 * Get space to change directory entries into fs independent format.
1763 */
1764 iovp = GET_UIO_STRUCT(uio)->uio_iov;
1765 bytes_wanted = iovp->iov_len;
1766 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) {
1767 bufsize = bytes_wanted;
1768 outbuf = kmem_alloc(bufsize, KM_SLEEP);
1769 odp = (struct dirent64 *)outbuf;
1770 } else {
1771 bufsize = bytes_wanted;
1772 outbuf = NULL;
1773 odp = (struct dirent64 *)iovp->iov_base;
1774 }
1775 eodp = (struct edirent *)odp;
1776
1777 if (ncookies != NULL) {
1778 /*
1779 * Minimum entry size is dirent size and 1 byte for a file name.
1780 */
1781 ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) -
1782 sizeof (((struct dirent *)NULL)->d_name) + 1);
1783 cooks = malloc(ncooks * sizeof (*cooks), M_TEMP, M_WAITOK);
1784 *cookies = cooks;
1785 *ncookies = ncooks;
1786 }
1787 /*
1788 * If this VFS supports the system attribute view interface; and
1789 * we're looking at an extended attribute directory; and we care
1790 * about normalization conflicts on this vfs; then we must check
1791 * for normalization conflicts with the sysattr name space.
1792 */
1793 #ifdef TODO
1794 check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
1795 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm &&
1796 (flags & V_RDDIR_ENTFLAGS);
1797 #else
1798 check_sysattrs = 0;
1799 #endif
1800
1801 /*
1802 * Transform to file-system independent format
1803 */
1804 outcount = 0;
1805 while (outcount < bytes_wanted) {
1806 ino64_t objnum;
1807 ushort_t reclen;
1808 off64_t *next = NULL;
1809
1810 /*
1811 * Special case `.', `..', and `.zfs'.
1812 */
1813 if (offset == 0) {
1814 (void) strcpy(zap.za_name, ".");
1815 zap.za_normalization_conflict = 0;
1816 objnum = zp->z_id;
1817 type = DT_DIR;
1818 } else if (offset == 1) {
1819 (void) strcpy(zap.za_name, "..");
1820 zap.za_normalization_conflict = 0;
1821 objnum = parent;
1822 type = DT_DIR;
1823 } else if (offset == 2 && zfs_show_ctldir(zp)) {
1824 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
1825 zap.za_normalization_conflict = 0;
1826 objnum = ZFSCTL_INO_ROOT;
1827 type = DT_DIR;
1828 } else {
1829 /*
1830 * Grab next entry.
1831 */
1832 if ((error = zap_cursor_retrieve(&zc, &zap))) {
1833 if ((*eofp = (error == ENOENT)) != 0)
1834 break;
1835 else
1836 goto update;
1837 }
1838
1839 if (zap.za_integer_length != 8 ||
1840 zap.za_num_integers != 1) {
1841 cmn_err(CE_WARN, "zap_readdir: bad directory "
1842 "entry, obj = %lld, offset = %lld\n",
1843 (u_longlong_t)zp->z_id,
1844 (u_longlong_t)offset);
1845 error = SET_ERROR(ENXIO);
1846 goto update;
1847 }
1848
1849 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
1850 /*
1851 * MacOS X can extract the object type here such as:
1852 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
1853 */
1854 type = ZFS_DIRENT_TYPE(zap.za_first_integer);
1855
1856 if (check_sysattrs && !zap.za_normalization_conflict) {
1857 #ifdef TODO
1858 zap.za_normalization_conflict =
1859 xattr_sysattr_casechk(zap.za_name);
1860 #else
1861 panic("%s:%u: TODO", __func__, __LINE__);
1862 #endif
1863 }
1864 }
1865
1866 if (flags & V_RDDIR_ACCFILTER) {
1867 /*
1868 * If we have no access at all, don't include
1869 * this entry in the returned information
1870 */
1871 znode_t *ezp;
1872 if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0)
1873 goto skip_entry;
1874 if (!zfs_has_access(ezp, cr)) {
1875 vrele(ZTOV(ezp));
1876 goto skip_entry;
1877 }
1878 vrele(ZTOV(ezp));
1879 }
1880
1881 if (flags & V_RDDIR_ENTFLAGS)
1882 reclen = EDIRENT_RECLEN(strlen(zap.za_name));
1883 else
1884 reclen = DIRENT64_RECLEN(strlen(zap.za_name));
1885
1886 /*
1887 * Will this entry fit in the buffer?
1888 */
1889 if (outcount + reclen > bufsize) {
1890 /*
1891 * Did we manage to fit anything in the buffer?
1892 */
1893 if (!outcount) {
1894 error = SET_ERROR(EINVAL);
1895 goto update;
1896 }
1897 break;
1898 }
1899 if (flags & V_RDDIR_ENTFLAGS) {
1900 /*
1901 * Add extended flag entry:
1902 */
1903 eodp->ed_ino = objnum;
1904 eodp->ed_reclen = reclen;
1905 /* NOTE: ed_off is the offset for the *next* entry */
1906 next = &(eodp->ed_off);
1907 eodp->ed_eflags = zap.za_normalization_conflict ?
1908 ED_CASE_CONFLICT : 0;
1909 (void) strncpy(eodp->ed_name, zap.za_name,
1910 EDIRENT_NAMELEN(reclen));
1911 eodp = (edirent_t *)((intptr_t)eodp + reclen);
1912 } else {
1913 /*
1914 * Add normal entry:
1915 */
1916 odp->d_ino = objnum;
1917 odp->d_reclen = reclen;
1918 odp->d_namlen = strlen(zap.za_name);
1919 /* NOTE: d_off is the offset for the *next* entry. */
1920 next = &odp->d_off;
1921 strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
1922 odp->d_type = type;
1923 dirent_terminate(odp);
1924 odp = (dirent64_t *)((intptr_t)odp + reclen);
1925 }
1926 outcount += reclen;
1927
1928 ASSERT3S(outcount, <=, bufsize);
1929
1930 /* Prefetch znode */
1931 if (prefetch)
1932 dmu_prefetch(os, objnum, 0, 0, 0,
1933 ZIO_PRIORITY_SYNC_READ);
1934
1935 skip_entry:
1936 /*
1937 * Move to the next entry, fill in the previous offset.
1938 */
1939 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
1940 zap_cursor_advance(&zc);
1941 offset = zap_cursor_serialize(&zc);
1942 } else {
1943 offset += 1;
1944 }
1945
1946 /* Fill the offset right after advancing the cursor. */
1947 if (next != NULL)
1948 *next = offset;
1949 if (cooks != NULL) {
1950 *cooks++ = offset;
1951 ncooks--;
1952 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
1953 }
1954 }
1955 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
1956
1957 /* Subtract unused cookies */
1958 if (ncookies != NULL)
1959 *ncookies -= ncooks;
1960
1961 if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) {
1962 iovp->iov_base += outcount;
1963 iovp->iov_len -= outcount;
1964 zfs_uio_resid(uio) -= outcount;
1965 } else if ((error =
1966 zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) {
1967 /*
1968 * Reset the pointer.
1969 */
1970 offset = zfs_uio_offset(uio);
1971 }
1972
1973 update:
1974 zap_cursor_fini(&zc);
1975 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1)
1976 kmem_free(outbuf, bufsize);
1977
1978 if (error == ENOENT)
1979 error = 0;
1980
1981 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
1982
1983 zfs_uio_setoffset(uio, offset);
1984 ZFS_EXIT(zfsvfs);
1985 if (error != 0 && cookies != NULL) {
1986 free(*cookies, M_TEMP);
1987 *cookies = NULL;
1988 *ncookies = 0;
1989 }
1990 return (error);
1991 }
1992
1993 /*
1994 * Get the requested file attributes and place them in the provided
1995 * vattr structure.
1996 *
1997 * IN: vp - vnode of file.
1998 * vap - va_mask identifies requested attributes.
1999 * If AT_XVATTR set, then optional attrs are requested
2000 * flags - ATTR_NOACLCHECK (CIFS server context)
2001 * cr - credentials of caller.
2002 *
2003 * OUT: vap - attribute values.
2004 *
2005 * RETURN: 0 (always succeeds).
2006 */
2007 /* ARGSUSED */
2008 static int
zfs_getattr(vnode_t * vp,vattr_t * vap,int flags,cred_t * cr)2009 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
2010 {
2011 znode_t *zp = VTOZ(vp);
2012 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2013 int error = 0;
2014 uint32_t blksize;
2015 u_longlong_t nblocks;
2016 uint64_t mtime[2], ctime[2], crtime[2], rdev;
2017 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
2018 xoptattr_t *xoap = NULL;
2019 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2020 sa_bulk_attr_t bulk[4];
2021 int count = 0;
2022
2023 ZFS_ENTER(zfsvfs);
2024 ZFS_VERIFY_ZP(zp);
2025
2026 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
2027
2028 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
2029 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
2030 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
2031 if (vp->v_type == VBLK || vp->v_type == VCHR)
2032 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
2033 &rdev, 8);
2034
2035 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
2036 ZFS_EXIT(zfsvfs);
2037 return (error);
2038 }
2039
2040 /*
2041 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
2042 * Also, if we are the owner don't bother, since owner should
2043 * always be allowed to read basic attributes of file.
2044 */
2045 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
2046 (vap->va_uid != crgetuid(cr))) {
2047 if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
2048 skipaclchk, cr))) {
2049 ZFS_EXIT(zfsvfs);
2050 return (error);
2051 }
2052 }
2053
2054 /*
2055 * Return all attributes. It's cheaper to provide the answer
2056 * than to determine whether we were asked the question.
2057 */
2058
2059 vap->va_type = IFTOVT(zp->z_mode);
2060 vap->va_mode = zp->z_mode & ~S_IFMT;
2061 vn_fsid(vp, vap);
2062 vap->va_nodeid = zp->z_id;
2063 vap->va_nlink = zp->z_links;
2064 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) &&
2065 zp->z_links < ZFS_LINK_MAX)
2066 vap->va_nlink++;
2067 vap->va_size = zp->z_size;
2068 if (vp->v_type == VBLK || vp->v_type == VCHR)
2069 vap->va_rdev = zfs_cmpldev(rdev);
2070 else
2071 vap->va_rdev = 0;
2072 vap->va_gen = zp->z_gen;
2073 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */
2074 vap->va_filerev = zp->z_seq;
2075
2076 /*
2077 * Add in any requested optional attributes and the create time.
2078 * Also set the corresponding bits in the returned attribute bitmap.
2079 */
2080 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
2081 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
2082 xoap->xoa_archive =
2083 ((zp->z_pflags & ZFS_ARCHIVE) != 0);
2084 XVA_SET_RTN(xvap, XAT_ARCHIVE);
2085 }
2086
2087 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
2088 xoap->xoa_readonly =
2089 ((zp->z_pflags & ZFS_READONLY) != 0);
2090 XVA_SET_RTN(xvap, XAT_READONLY);
2091 }
2092
2093 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
2094 xoap->xoa_system =
2095 ((zp->z_pflags & ZFS_SYSTEM) != 0);
2096 XVA_SET_RTN(xvap, XAT_SYSTEM);
2097 }
2098
2099 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
2100 xoap->xoa_hidden =
2101 ((zp->z_pflags & ZFS_HIDDEN) != 0);
2102 XVA_SET_RTN(xvap, XAT_HIDDEN);
2103 }
2104
2105 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2106 xoap->xoa_nounlink =
2107 ((zp->z_pflags & ZFS_NOUNLINK) != 0);
2108 XVA_SET_RTN(xvap, XAT_NOUNLINK);
2109 }
2110
2111 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2112 xoap->xoa_immutable =
2113 ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
2114 XVA_SET_RTN(xvap, XAT_IMMUTABLE);
2115 }
2116
2117 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2118 xoap->xoa_appendonly =
2119 ((zp->z_pflags & ZFS_APPENDONLY) != 0);
2120 XVA_SET_RTN(xvap, XAT_APPENDONLY);
2121 }
2122
2123 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2124 xoap->xoa_nodump =
2125 ((zp->z_pflags & ZFS_NODUMP) != 0);
2126 XVA_SET_RTN(xvap, XAT_NODUMP);
2127 }
2128
2129 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
2130 xoap->xoa_opaque =
2131 ((zp->z_pflags & ZFS_OPAQUE) != 0);
2132 XVA_SET_RTN(xvap, XAT_OPAQUE);
2133 }
2134
2135 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2136 xoap->xoa_av_quarantined =
2137 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
2138 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
2139 }
2140
2141 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2142 xoap->xoa_av_modified =
2143 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
2144 XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
2145 }
2146
2147 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
2148 vp->v_type == VREG) {
2149 zfs_sa_get_scanstamp(zp, xvap);
2150 }
2151
2152 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2153 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
2154 XVA_SET_RTN(xvap, XAT_REPARSE);
2155 }
2156 if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
2157 xoap->xoa_generation = zp->z_gen;
2158 XVA_SET_RTN(xvap, XAT_GEN);
2159 }
2160
2161 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
2162 xoap->xoa_offline =
2163 ((zp->z_pflags & ZFS_OFFLINE) != 0);
2164 XVA_SET_RTN(xvap, XAT_OFFLINE);
2165 }
2166
2167 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
2168 xoap->xoa_sparse =
2169 ((zp->z_pflags & ZFS_SPARSE) != 0);
2170 XVA_SET_RTN(xvap, XAT_SPARSE);
2171 }
2172
2173 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2174 xoap->xoa_projinherit =
2175 ((zp->z_pflags & ZFS_PROJINHERIT) != 0);
2176 XVA_SET_RTN(xvap, XAT_PROJINHERIT);
2177 }
2178
2179 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2180 xoap->xoa_projid = zp->z_projid;
2181 XVA_SET_RTN(xvap, XAT_PROJID);
2182 }
2183 }
2184
2185 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
2186 ZFS_TIME_DECODE(&vap->va_mtime, mtime);
2187 ZFS_TIME_DECODE(&vap->va_ctime, ctime);
2188 ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
2189
2190
2191 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
2192 vap->va_blksize = blksize;
2193 vap->va_bytes = nblocks << 9; /* nblocks * 512 */
2194
2195 if (zp->z_blksz == 0) {
2196 /*
2197 * Block size hasn't been set; suggest maximal I/O transfers.
2198 */
2199 vap->va_blksize = zfsvfs->z_max_blksz;
2200 }
2201
2202 ZFS_EXIT(zfsvfs);
2203 return (0);
2204 }
2205
2206 /*
2207 * Set the file attributes to the values contained in the
2208 * vattr structure.
2209 *
2210 * IN: zp - znode of file to be modified.
2211 * vap - new attribute values.
2212 * If AT_XVATTR set, then optional attrs are being set
2213 * flags - ATTR_UTIME set if non-default time values provided.
2214 * - ATTR_NOACLCHECK (CIFS context only).
2215 * cr - credentials of caller.
2216 * ct - caller context
2217 *
2218 * RETURN: 0 on success, error code on failure.
2219 *
2220 * Timestamps:
2221 * vp - ctime updated, mtime updated if size changed.
2222 */
2223 /* ARGSUSED */
2224 int
zfs_setattr(znode_t * zp,vattr_t * vap,int flags,cred_t * cr)2225 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
2226 {
2227 vnode_t *vp = ZTOV(zp);
2228 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2229 objset_t *os;
2230 zilog_t *zilog;
2231 dmu_tx_t *tx;
2232 vattr_t oldva;
2233 xvattr_t tmpxvattr;
2234 uint_t mask = vap->va_mask;
2235 uint_t saved_mask = 0;
2236 uint64_t saved_mode;
2237 int trim_mask = 0;
2238 uint64_t new_mode;
2239 uint64_t new_uid, new_gid;
2240 uint64_t xattr_obj;
2241 uint64_t mtime[2], ctime[2];
2242 uint64_t projid = ZFS_INVALID_PROJID;
2243 znode_t *attrzp;
2244 int need_policy = FALSE;
2245 int err, err2;
2246 zfs_fuid_info_t *fuidp = NULL;
2247 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
2248 xoptattr_t *xoap;
2249 zfs_acl_t *aclp;
2250 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2251 boolean_t fuid_dirtied = B_FALSE;
2252 sa_bulk_attr_t bulk[7], xattr_bulk[7];
2253 int count = 0, xattr_count = 0;
2254
2255 if (mask == 0)
2256 return (0);
2257
2258 if (mask & AT_NOSET)
2259 return (SET_ERROR(EINVAL));
2260
2261 ZFS_ENTER(zfsvfs);
2262 ZFS_VERIFY_ZP(zp);
2263
2264 os = zfsvfs->z_os;
2265 zilog = zfsvfs->z_log;
2266
2267 /*
2268 * Make sure that if we have ephemeral uid/gid or xvattr specified
2269 * that file system is at proper version level
2270 */
2271
2272 if (zfsvfs->z_use_fuids == B_FALSE &&
2273 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
2274 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
2275 (mask & AT_XVATTR))) {
2276 ZFS_EXIT(zfsvfs);
2277 return (SET_ERROR(EINVAL));
2278 }
2279
2280 if (mask & AT_SIZE && vp->v_type == VDIR) {
2281 ZFS_EXIT(zfsvfs);
2282 return (SET_ERROR(EISDIR));
2283 }
2284
2285 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
2286 ZFS_EXIT(zfsvfs);
2287 return (SET_ERROR(EINVAL));
2288 }
2289
2290 /*
2291 * If this is an xvattr_t, then get a pointer to the structure of
2292 * optional attributes. If this is NULL, then we have a vattr_t.
2293 */
2294 xoap = xva_getxoptattr(xvap);
2295
2296 xva_init(&tmpxvattr);
2297
2298 /*
2299 * Immutable files can only alter immutable bit and atime
2300 */
2301 if ((zp->z_pflags & ZFS_IMMUTABLE) &&
2302 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
2303 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
2304 ZFS_EXIT(zfsvfs);
2305 return (SET_ERROR(EPERM));
2306 }
2307
2308 /*
2309 * Note: ZFS_READONLY is handled in zfs_zaccess_common.
2310 */
2311
2312 /*
2313 * Verify timestamps doesn't overflow 32 bits.
2314 * ZFS can handle large timestamps, but 32bit syscalls can't
2315 * handle times greater than 2039. This check should be removed
2316 * once large timestamps are fully supported.
2317 */
2318 if (mask & (AT_ATIME | AT_MTIME)) {
2319 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
2320 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
2321 ZFS_EXIT(zfsvfs);
2322 return (SET_ERROR(EOVERFLOW));
2323 }
2324 }
2325 if (xoap != NULL && (mask & AT_XVATTR)) {
2326 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
2327 TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
2328 ZFS_EXIT(zfsvfs);
2329 return (SET_ERROR(EOVERFLOW));
2330 }
2331
2332 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2333 if (!dmu_objset_projectquota_enabled(os) ||
2334 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) {
2335 ZFS_EXIT(zfsvfs);
2336 return (SET_ERROR(EOPNOTSUPP));
2337 }
2338
2339 projid = xoap->xoa_projid;
2340 if (unlikely(projid == ZFS_INVALID_PROJID)) {
2341 ZFS_EXIT(zfsvfs);
2342 return (SET_ERROR(EINVAL));
2343 }
2344
2345 if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
2346 projid = ZFS_INVALID_PROJID;
2347 else
2348 need_policy = TRUE;
2349 }
2350
2351 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
2352 (xoap->xoa_projinherit !=
2353 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
2354 (!dmu_objset_projectquota_enabled(os) ||
2355 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) {
2356 ZFS_EXIT(zfsvfs);
2357 return (SET_ERROR(EOPNOTSUPP));
2358 }
2359 }
2360
2361 attrzp = NULL;
2362 aclp = NULL;
2363
2364 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
2365 ZFS_EXIT(zfsvfs);
2366 return (SET_ERROR(EROFS));
2367 }
2368
2369 /*
2370 * First validate permissions
2371 */
2372
2373 if (mask & AT_SIZE) {
2374 /*
2375 * XXX - Note, we are not providing any open
2376 * mode flags here (like FNDELAY), so we may
2377 * block if there are locks present... this
2378 * should be addressed in openat().
2379 */
2380 /* XXX - would it be OK to generate a log record here? */
2381 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
2382 if (err) {
2383 ZFS_EXIT(zfsvfs);
2384 return (err);
2385 }
2386 }
2387
2388 if (mask & (AT_ATIME|AT_MTIME) ||
2389 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
2390 XVA_ISSET_REQ(xvap, XAT_READONLY) ||
2391 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
2392 XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
2393 XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
2394 XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
2395 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
2396 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
2397 skipaclchk, cr);
2398 }
2399
2400 if (mask & (AT_UID|AT_GID)) {
2401 int idmask = (mask & (AT_UID|AT_GID));
2402 int take_owner;
2403 int take_group;
2404
2405 /*
2406 * NOTE: even if a new mode is being set,
2407 * we may clear S_ISUID/S_ISGID bits.
2408 */
2409
2410 if (!(mask & AT_MODE))
2411 vap->va_mode = zp->z_mode;
2412
2413 /*
2414 * Take ownership or chgrp to group we are a member of
2415 */
2416
2417 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
2418 take_group = (mask & AT_GID) &&
2419 zfs_groupmember(zfsvfs, vap->va_gid, cr);
2420
2421 /*
2422 * If both AT_UID and AT_GID are set then take_owner and
2423 * take_group must both be set in order to allow taking
2424 * ownership.
2425 *
2426 * Otherwise, send the check through secpolicy_vnode_setattr()
2427 *
2428 */
2429
2430 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
2431 ((idmask == AT_UID) && take_owner) ||
2432 ((idmask == AT_GID) && take_group)) {
2433 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
2434 skipaclchk, cr) == 0) {
2435 /*
2436 * Remove setuid/setgid for non-privileged users
2437 */
2438 secpolicy_setid_clear(vap, vp, cr);
2439 trim_mask = (mask & (AT_UID|AT_GID));
2440 } else {
2441 need_policy = TRUE;
2442 }
2443 } else {
2444 need_policy = TRUE;
2445 }
2446 }
2447
2448 oldva.va_mode = zp->z_mode;
2449 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
2450 if (mask & AT_XVATTR) {
2451 /*
2452 * Update xvattr mask to include only those attributes
2453 * that are actually changing.
2454 *
2455 * the bits will be restored prior to actually setting
2456 * the attributes so the caller thinks they were set.
2457 */
2458 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2459 if (xoap->xoa_appendonly !=
2460 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
2461 need_policy = TRUE;
2462 } else {
2463 XVA_CLR_REQ(xvap, XAT_APPENDONLY);
2464 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
2465 }
2466 }
2467
2468 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2469 if (xoap->xoa_projinherit !=
2470 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
2471 need_policy = TRUE;
2472 } else {
2473 XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
2474 XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT);
2475 }
2476 }
2477
2478 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2479 if (xoap->xoa_nounlink !=
2480 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
2481 need_policy = TRUE;
2482 } else {
2483 XVA_CLR_REQ(xvap, XAT_NOUNLINK);
2484 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
2485 }
2486 }
2487
2488 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2489 if (xoap->xoa_immutable !=
2490 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
2491 need_policy = TRUE;
2492 } else {
2493 XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
2494 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
2495 }
2496 }
2497
2498 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2499 if (xoap->xoa_nodump !=
2500 ((zp->z_pflags & ZFS_NODUMP) != 0)) {
2501 need_policy = TRUE;
2502 } else {
2503 XVA_CLR_REQ(xvap, XAT_NODUMP);
2504 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
2505 }
2506 }
2507
2508 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2509 if (xoap->xoa_av_modified !=
2510 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
2511 need_policy = TRUE;
2512 } else {
2513 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
2514 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
2515 }
2516 }
2517
2518 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2519 if ((vp->v_type != VREG &&
2520 xoap->xoa_av_quarantined) ||
2521 xoap->xoa_av_quarantined !=
2522 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
2523 need_policy = TRUE;
2524 } else {
2525 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
2526 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
2527 }
2528 }
2529
2530 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2531 ZFS_EXIT(zfsvfs);
2532 return (SET_ERROR(EPERM));
2533 }
2534
2535 if (need_policy == FALSE &&
2536 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
2537 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
2538 need_policy = TRUE;
2539 }
2540 }
2541
2542 if (mask & AT_MODE) {
2543 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
2544 err = secpolicy_setid_setsticky_clear(vp, vap,
2545 &oldva, cr);
2546 if (err) {
2547 ZFS_EXIT(zfsvfs);
2548 return (err);
2549 }
2550 trim_mask |= AT_MODE;
2551 } else {
2552 need_policy = TRUE;
2553 }
2554 }
2555
2556 if (need_policy) {
2557 /*
2558 * If trim_mask is set then take ownership
2559 * has been granted or write_acl is present and user
2560 * has the ability to modify mode. In that case remove
2561 * UID|GID and or MODE from mask so that
2562 * secpolicy_vnode_setattr() doesn't revoke it.
2563 */
2564
2565 if (trim_mask) {
2566 saved_mask = vap->va_mask;
2567 vap->va_mask &= ~trim_mask;
2568 if (trim_mask & AT_MODE) {
2569 /*
2570 * Save the mode, as secpolicy_vnode_setattr()
2571 * will overwrite it with ova.va_mode.
2572 */
2573 saved_mode = vap->va_mode;
2574 }
2575 }
2576 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
2577 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
2578 if (err) {
2579 ZFS_EXIT(zfsvfs);
2580 return (err);
2581 }
2582
2583 if (trim_mask) {
2584 vap->va_mask |= saved_mask;
2585 if (trim_mask & AT_MODE) {
2586 /*
2587 * Recover the mode after
2588 * secpolicy_vnode_setattr().
2589 */
2590 vap->va_mode = saved_mode;
2591 }
2592 }
2593 }
2594
2595 /*
2596 * secpolicy_vnode_setattr, or take ownership may have
2597 * changed va_mask
2598 */
2599 mask = vap->va_mask;
2600
2601 if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) {
2602 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
2603 &xattr_obj, sizeof (xattr_obj));
2604
2605 if (err == 0 && xattr_obj) {
2606 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
2607 if (err == 0) {
2608 err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE);
2609 if (err != 0)
2610 vrele(ZTOV(attrzp));
2611 }
2612 if (err)
2613 goto out2;
2614 }
2615 if (mask & AT_UID) {
2616 new_uid = zfs_fuid_create(zfsvfs,
2617 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
2618 if (new_uid != zp->z_uid &&
2619 zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
2620 new_uid)) {
2621 if (attrzp)
2622 vput(ZTOV(attrzp));
2623 err = SET_ERROR(EDQUOT);
2624 goto out2;
2625 }
2626 }
2627
2628 if (mask & AT_GID) {
2629 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
2630 cr, ZFS_GROUP, &fuidp);
2631 if (new_gid != zp->z_gid &&
2632 zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
2633 new_gid)) {
2634 if (attrzp)
2635 vput(ZTOV(attrzp));
2636 err = SET_ERROR(EDQUOT);
2637 goto out2;
2638 }
2639 }
2640
2641 if (projid != ZFS_INVALID_PROJID &&
2642 zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
2643 if (attrzp)
2644 vput(ZTOV(attrzp));
2645 err = SET_ERROR(EDQUOT);
2646 goto out2;
2647 }
2648 }
2649 tx = dmu_tx_create(os);
2650
2651 if (mask & AT_MODE) {
2652 uint64_t pmode = zp->z_mode;
2653 uint64_t acl_obj;
2654 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
2655
2656 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
2657 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
2658 err = SET_ERROR(EPERM);
2659 goto out;
2660 }
2661
2662 if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)))
2663 goto out;
2664
2665 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
2666 /*
2667 * Are we upgrading ACL from old V0 format
2668 * to V1 format?
2669 */
2670 if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
2671 zfs_znode_acl_version(zp) ==
2672 ZFS_ACL_VERSION_INITIAL) {
2673 dmu_tx_hold_free(tx, acl_obj, 0,
2674 DMU_OBJECT_END);
2675 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2676 0, aclp->z_acl_bytes);
2677 } else {
2678 dmu_tx_hold_write(tx, acl_obj, 0,
2679 aclp->z_acl_bytes);
2680 }
2681 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
2682 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2683 0, aclp->z_acl_bytes);
2684 }
2685 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2686 } else {
2687 if (((mask & AT_XVATTR) &&
2688 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
2689 (projid != ZFS_INVALID_PROJID &&
2690 !(zp->z_pflags & ZFS_PROJID)))
2691 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2692 else
2693 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2694 }
2695
2696 if (attrzp) {
2697 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
2698 }
2699
2700 fuid_dirtied = zfsvfs->z_fuid_dirty;
2701 if (fuid_dirtied)
2702 zfs_fuid_txhold(zfsvfs, tx);
2703
2704 zfs_sa_upgrade_txholds(tx, zp);
2705
2706 err = dmu_tx_assign(tx, TXG_WAIT);
2707 if (err)
2708 goto out;
2709
2710 count = 0;
2711 /*
2712 * Set each attribute requested.
2713 * We group settings according to the locks they need to acquire.
2714 *
2715 * Note: you cannot set ctime directly, although it will be
2716 * updated as a side-effect of calling this function.
2717 */
2718
2719 if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
2720 /*
2721 * For the existed object that is upgraded from old system,
2722 * its on-disk layout has no slot for the project ID attribute.
2723 * But quota accounting logic needs to access related slots by
2724 * offset directly. So we need to adjust old objects' layout
2725 * to make the project ID to some unified and fixed offset.
2726 */
2727 if (attrzp)
2728 err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
2729 if (err == 0)
2730 err = sa_add_projid(zp->z_sa_hdl, tx, projid);
2731
2732 if (unlikely(err == EEXIST))
2733 err = 0;
2734 else if (err != 0)
2735 goto out;
2736 else
2737 projid = ZFS_INVALID_PROJID;
2738 }
2739
2740 if (mask & (AT_UID|AT_GID|AT_MODE))
2741 mutex_enter(&zp->z_acl_lock);
2742
2743 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
2744 &zp->z_pflags, sizeof (zp->z_pflags));
2745
2746 if (attrzp) {
2747 if (mask & (AT_UID|AT_GID|AT_MODE))
2748 mutex_enter(&attrzp->z_acl_lock);
2749 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2750 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
2751 sizeof (attrzp->z_pflags));
2752 if (projid != ZFS_INVALID_PROJID) {
2753 attrzp->z_projid = projid;
2754 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2755 SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
2756 sizeof (attrzp->z_projid));
2757 }
2758 }
2759
2760 if (mask & (AT_UID|AT_GID)) {
2761
2762 if (mask & AT_UID) {
2763 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
2764 &new_uid, sizeof (new_uid));
2765 zp->z_uid = new_uid;
2766 if (attrzp) {
2767 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2768 SA_ZPL_UID(zfsvfs), NULL, &new_uid,
2769 sizeof (new_uid));
2770 attrzp->z_uid = new_uid;
2771 }
2772 }
2773
2774 if (mask & AT_GID) {
2775 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
2776 NULL, &new_gid, sizeof (new_gid));
2777 zp->z_gid = new_gid;
2778 if (attrzp) {
2779 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2780 SA_ZPL_GID(zfsvfs), NULL, &new_gid,
2781 sizeof (new_gid));
2782 attrzp->z_gid = new_gid;
2783 }
2784 }
2785 if (!(mask & AT_MODE)) {
2786 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
2787 NULL, &new_mode, sizeof (new_mode));
2788 new_mode = zp->z_mode;
2789 }
2790 err = zfs_acl_chown_setattr(zp);
2791 ASSERT0(err);
2792 if (attrzp) {
2793 vn_seqc_write_begin(ZTOV(attrzp));
2794 err = zfs_acl_chown_setattr(attrzp);
2795 vn_seqc_write_end(ZTOV(attrzp));
2796 ASSERT0(err);
2797 }
2798 }
2799
2800 if (mask & AT_MODE) {
2801 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
2802 &new_mode, sizeof (new_mode));
2803 zp->z_mode = new_mode;
2804 ASSERT3P(aclp, !=, NULL);
2805 err = zfs_aclset_common(zp, aclp, cr, tx);
2806 ASSERT0(err);
2807 if (zp->z_acl_cached)
2808 zfs_acl_free(zp->z_acl_cached);
2809 zp->z_acl_cached = aclp;
2810 aclp = NULL;
2811 }
2812
2813
2814 if (mask & AT_ATIME) {
2815 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
2816 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
2817 &zp->z_atime, sizeof (zp->z_atime));
2818 }
2819
2820 if (mask & AT_MTIME) {
2821 ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
2822 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
2823 mtime, sizeof (mtime));
2824 }
2825
2826 if (projid != ZFS_INVALID_PROJID) {
2827 zp->z_projid = projid;
2828 SA_ADD_BULK_ATTR(bulk, count,
2829 SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
2830 sizeof (zp->z_projid));
2831 }
2832
2833 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
2834 if (mask & AT_SIZE && !(mask & AT_MTIME)) {
2835 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
2836 NULL, mtime, sizeof (mtime));
2837 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2838 &ctime, sizeof (ctime));
2839 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
2840 } else if (mask != 0) {
2841 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2842 &ctime, sizeof (ctime));
2843 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime);
2844 if (attrzp) {
2845 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2846 SA_ZPL_CTIME(zfsvfs), NULL,
2847 &ctime, sizeof (ctime));
2848 zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
2849 mtime, ctime);
2850 }
2851 }
2852
2853 /*
2854 * Do this after setting timestamps to prevent timestamp
2855 * update from toggling bit
2856 */
2857
2858 if (xoap && (mask & AT_XVATTR)) {
2859
2860 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
2861 xoap->xoa_createtime = vap->va_birthtime;
2862 /*
2863 * restore trimmed off masks
2864 * so that return masks can be set for caller.
2865 */
2866
2867 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
2868 XVA_SET_REQ(xvap, XAT_APPENDONLY);
2869 }
2870 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
2871 XVA_SET_REQ(xvap, XAT_NOUNLINK);
2872 }
2873 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
2874 XVA_SET_REQ(xvap, XAT_IMMUTABLE);
2875 }
2876 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
2877 XVA_SET_REQ(xvap, XAT_NODUMP);
2878 }
2879 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
2880 XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
2881 }
2882 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
2883 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
2884 }
2885 if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) {
2886 XVA_SET_REQ(xvap, XAT_PROJINHERIT);
2887 }
2888
2889 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
2890 ASSERT3S(vp->v_type, ==, VREG);
2891
2892 zfs_xvattr_set(zp, xvap, tx);
2893 }
2894
2895 if (fuid_dirtied)
2896 zfs_fuid_sync(zfsvfs, tx);
2897
2898 if (mask != 0)
2899 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
2900
2901 if (mask & (AT_UID|AT_GID|AT_MODE))
2902 mutex_exit(&zp->z_acl_lock);
2903
2904 if (attrzp) {
2905 if (mask & (AT_UID|AT_GID|AT_MODE))
2906 mutex_exit(&attrzp->z_acl_lock);
2907 }
2908 out:
2909 if (err == 0 && attrzp) {
2910 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
2911 xattr_count, tx);
2912 ASSERT0(err2);
2913 }
2914
2915 if (attrzp)
2916 vput(ZTOV(attrzp));
2917
2918 if (aclp)
2919 zfs_acl_free(aclp);
2920
2921 if (fuidp) {
2922 zfs_fuid_info_free(fuidp);
2923 fuidp = NULL;
2924 }
2925
2926 if (err) {
2927 dmu_tx_abort(tx);
2928 } else {
2929 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
2930 dmu_tx_commit(tx);
2931 }
2932
2933 out2:
2934 if (os->os_sync == ZFS_SYNC_ALWAYS)
2935 zil_commit(zilog, 0);
2936
2937 ZFS_EXIT(zfsvfs);
2938 return (err);
2939 }
2940
2941 /*
2942 * Look up the directory entries corresponding to the source and target
2943 * directory/name pairs.
2944 */
2945 static int
zfs_rename_relock_lookup(znode_t * sdzp,const struct componentname * scnp,znode_t ** szpp,znode_t * tdzp,const struct componentname * tcnp,znode_t ** tzpp)2946 zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp,
2947 znode_t **szpp, znode_t *tdzp, const struct componentname *tcnp,
2948 znode_t **tzpp)
2949 {
2950 zfsvfs_t *zfsvfs;
2951 znode_t *szp, *tzp;
2952 int error;
2953
2954 /*
2955 * Before using sdzp and tdzp we must ensure that they are live.
2956 * As a porting legacy from illumos we have two things to worry
2957 * about. One is typical for FreeBSD and it is that the vnode is
2958 * not reclaimed (doomed). The other is that the znode is live.
2959 * The current code can invalidate the znode without acquiring the
2960 * corresponding vnode lock if the object represented by the znode
2961 * and vnode is no longer valid after a rollback or receive operation.
2962 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock
2963 * that protects the znodes from the invalidation.
2964 */
2965 zfsvfs = sdzp->z_zfsvfs;
2966 ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
2967 ZFS_ENTER(zfsvfs);
2968 ZFS_VERIFY_ZP(sdzp);
2969 ZFS_VERIFY_ZP(tdzp);
2970
2971 /*
2972 * Re-resolve svp to be certain it still exists and fetch the
2973 * correct vnode.
2974 */
2975 error = zfs_dirent_lookup(sdzp, scnp->cn_nameptr, &szp, ZEXISTS);
2976 if (error != 0) {
2977 /* Source entry invalid or not there. */
2978 if ((scnp->cn_flags & ISDOTDOT) != 0 ||
2979 (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
2980 error = SET_ERROR(EINVAL);
2981 goto out;
2982 }
2983 *szpp = szp;
2984
2985 /*
2986 * Re-resolve tvp, if it disappeared we just carry on.
2987 */
2988 error = zfs_dirent_lookup(tdzp, tcnp->cn_nameptr, &tzp, 0);
2989 if (error != 0) {
2990 vrele(ZTOV(szp));
2991 if ((tcnp->cn_flags & ISDOTDOT) != 0)
2992 error = SET_ERROR(EINVAL);
2993 goto out;
2994 }
2995 *tzpp = tzp;
2996 out:
2997 ZFS_EXIT(zfsvfs);
2998 return (error);
2999 }
3000
3001 /*
3002 * We acquire all but fdvp locks using non-blocking acquisitions. If we
3003 * fail to acquire any lock in the path we will drop all held locks,
3004 * acquire the new lock in a blocking fashion, and then release it and
3005 * restart the rename. This acquire/release step ensures that we do not
3006 * spin on a lock waiting for release. On error release all vnode locks
3007 * and decrement references the way tmpfs_rename() would do.
3008 */
3009 static int
zfs_rename_relock(struct vnode * sdvp,struct vnode ** svpp,struct vnode * tdvp,struct vnode ** tvpp,const struct componentname * scnp,const struct componentname * tcnp)3010 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
3011 struct vnode *tdvp, struct vnode **tvpp,
3012 const struct componentname *scnp, const struct componentname *tcnp)
3013 {
3014 struct vnode *nvp, *svp, *tvp;
3015 znode_t *sdzp, *tdzp, *szp, *tzp;
3016 int error;
3017
3018 VOP_UNLOCK1(tdvp);
3019 if (*tvpp != NULL && *tvpp != tdvp)
3020 VOP_UNLOCK1(*tvpp);
3021
3022 relock:
3023 error = vn_lock(sdvp, LK_EXCLUSIVE);
3024 if (error)
3025 goto out;
3026 error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
3027 if (error != 0) {
3028 VOP_UNLOCK1(sdvp);
3029 if (error != EBUSY)
3030 goto out;
3031 error = vn_lock(tdvp, LK_EXCLUSIVE);
3032 if (error)
3033 goto out;
3034 VOP_UNLOCK1(tdvp);
3035 goto relock;
3036 }
3037 tdzp = VTOZ(tdvp);
3038 sdzp = VTOZ(sdvp);
3039
3040 error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp);
3041 if (error != 0) {
3042 VOP_UNLOCK1(sdvp);
3043 VOP_UNLOCK1(tdvp);
3044 goto out;
3045 }
3046 svp = ZTOV(szp);
3047 tvp = tzp != NULL ? ZTOV(tzp) : NULL;
3048
3049 /*
3050 * Now try acquire locks on svp and tvp.
3051 */
3052 nvp = svp;
3053 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
3054 if (error != 0) {
3055 VOP_UNLOCK1(sdvp);
3056 VOP_UNLOCK1(tdvp);
3057 if (tvp != NULL)
3058 vrele(tvp);
3059 if (error != EBUSY) {
3060 vrele(nvp);
3061 goto out;
3062 }
3063 error = vn_lock(nvp, LK_EXCLUSIVE);
3064 if (error != 0) {
3065 vrele(nvp);
3066 goto out;
3067 }
3068 VOP_UNLOCK1(nvp);
3069 /*
3070 * Concurrent rename race.
3071 * XXX ?
3072 */
3073 if (nvp == tdvp) {
3074 vrele(nvp);
3075 error = SET_ERROR(EINVAL);
3076 goto out;
3077 }
3078 vrele(*svpp);
3079 *svpp = nvp;
3080 goto relock;
3081 }
3082 vrele(*svpp);
3083 *svpp = nvp;
3084
3085 if (*tvpp != NULL)
3086 vrele(*tvpp);
3087 *tvpp = NULL;
3088 if (tvp != NULL) {
3089 nvp = tvp;
3090 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
3091 if (error != 0) {
3092 VOP_UNLOCK1(sdvp);
3093 VOP_UNLOCK1(tdvp);
3094 VOP_UNLOCK1(*svpp);
3095 if (error != EBUSY) {
3096 vrele(nvp);
3097 goto out;
3098 }
3099 error = vn_lock(nvp, LK_EXCLUSIVE);
3100 if (error != 0) {
3101 vrele(nvp);
3102 goto out;
3103 }
3104 vput(nvp);
3105 goto relock;
3106 }
3107 *tvpp = nvp;
3108 }
3109
3110 return (0);
3111
3112 out:
3113 return (error);
3114 }
3115
3116 /*
3117 * Note that we must use VRELE_ASYNC in this function as it walks
3118 * up the directory tree and vrele may need to acquire an exclusive
3119 * lock if a last reference to a vnode is dropped.
3120 */
3121 static int
zfs_rename_check(znode_t * szp,znode_t * sdzp,znode_t * tdzp)3122 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp)
3123 {
3124 zfsvfs_t *zfsvfs;
3125 znode_t *zp, *zp1;
3126 uint64_t parent;
3127 int error;
3128
3129 zfsvfs = tdzp->z_zfsvfs;
3130 if (tdzp == szp)
3131 return (SET_ERROR(EINVAL));
3132 if (tdzp == sdzp)
3133 return (0);
3134 if (tdzp->z_id == zfsvfs->z_root)
3135 return (0);
3136 zp = tdzp;
3137 for (;;) {
3138 ASSERT(!zp->z_unlinked);
3139 if ((error = sa_lookup(zp->z_sa_hdl,
3140 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
3141 break;
3142
3143 if (parent == szp->z_id) {
3144 error = SET_ERROR(EINVAL);
3145 break;
3146 }
3147 if (parent == zfsvfs->z_root)
3148 break;
3149 if (parent == sdzp->z_id)
3150 break;
3151
3152 error = zfs_zget(zfsvfs, parent, &zp1);
3153 if (error != 0)
3154 break;
3155
3156 if (zp != tdzp)
3157 VN_RELE_ASYNC(ZTOV(zp),
3158 dsl_pool_zrele_taskq(
3159 dmu_objset_pool(zfsvfs->z_os)));
3160 zp = zp1;
3161 }
3162
3163 if (error == ENOTDIR)
3164 panic("checkpath: .. not a directory\n");
3165 if (zp != tdzp)
3166 VN_RELE_ASYNC(ZTOV(zp),
3167 dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
3168 return (error);
3169 }
3170
3171 #if __FreeBSD_version < 1300124
3172 static void
cache_vop_rename(struct vnode * fdvp,struct vnode * fvp,struct vnode * tdvp,struct vnode * tvp,struct componentname * fcnp,struct componentname * tcnp)3173 cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
3174 struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp)
3175 {
3176
3177 cache_purge(fvp);
3178 if (tvp != NULL)
3179 cache_purge(tvp);
3180 cache_purge_negative(tdvp);
3181 }
3182 #endif
3183
3184 static int
3185 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3186 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3187 cred_t *cr);
3188
3189 /*
3190 * Move an entry from the provided source directory to the target
3191 * directory. Change the entry name as indicated.
3192 *
3193 * IN: sdvp - Source directory containing the "old entry".
3194 * scnp - Old entry name.
3195 * tdvp - Target directory to contain the "new entry".
3196 * tcnp - New entry name.
3197 * cr - credentials of caller.
3198 * INOUT: svpp - Source file
3199 * tvpp - Target file, may point to NULL initially
3200 *
3201 * RETURN: 0 on success, error code on failure.
3202 *
3203 * Timestamps:
3204 * sdvp,tdvp - ctime|mtime updated
3205 */
3206 /*ARGSUSED*/
3207 static int
zfs_do_rename(vnode_t * sdvp,vnode_t ** svpp,struct componentname * scnp,vnode_t * tdvp,vnode_t ** tvpp,struct componentname * tcnp,cred_t * cr)3208 zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3209 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3210 cred_t *cr)
3211 {
3212 int error;
3213
3214 ASSERT_VOP_ELOCKED(tdvp, __func__);
3215 if (*tvpp != NULL)
3216 ASSERT_VOP_ELOCKED(*tvpp, __func__);
3217
3218 /* Reject renames across filesystems. */
3219 if ((*svpp)->v_mount != tdvp->v_mount ||
3220 ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) {
3221 error = SET_ERROR(EXDEV);
3222 goto out;
3223 }
3224
3225 if (zfsctl_is_node(tdvp)) {
3226 error = SET_ERROR(EXDEV);
3227 goto out;
3228 }
3229
3230 /*
3231 * Lock all four vnodes to ensure safety and semantics of renaming.
3232 */
3233 error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp);
3234 if (error != 0) {
3235 /* no vnodes are locked in the case of error here */
3236 return (error);
3237 }
3238
3239 error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr);
3240 VOP_UNLOCK1(sdvp);
3241 VOP_UNLOCK1(*svpp);
3242 out:
3243 if (*tvpp != NULL)
3244 VOP_UNLOCK1(*tvpp);
3245 if (tdvp != *tvpp)
3246 VOP_UNLOCK1(tdvp);
3247
3248 return (error);
3249 }
3250
3251 static int
zfs_do_rename_impl(vnode_t * sdvp,vnode_t ** svpp,struct componentname * scnp,vnode_t * tdvp,vnode_t ** tvpp,struct componentname * tcnp,cred_t * cr)3252 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3253 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3254 cred_t *cr)
3255 {
3256 dmu_tx_t *tx;
3257 zfsvfs_t *zfsvfs;
3258 zilog_t *zilog;
3259 znode_t *tdzp, *sdzp, *tzp, *szp;
3260 const char *snm = scnp->cn_nameptr;
3261 const char *tnm = tcnp->cn_nameptr;
3262 int error;
3263
3264 tdzp = VTOZ(tdvp);
3265 sdzp = VTOZ(sdvp);
3266 zfsvfs = tdzp->z_zfsvfs;
3267
3268 ZFS_ENTER(zfsvfs);
3269 ZFS_VERIFY_ZP(tdzp);
3270 ZFS_VERIFY_ZP(sdzp);
3271 zilog = zfsvfs->z_log;
3272
3273 if (zfsvfs->z_utf8 && u8_validate(tnm,
3274 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3275 error = SET_ERROR(EILSEQ);
3276 goto out;
3277 }
3278
3279 /* If source and target are the same file, there is nothing to do. */
3280 if ((*svpp) == (*tvpp)) {
3281 error = 0;
3282 goto out;
3283 }
3284
3285 if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
3286 ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
3287 (*tvpp)->v_mountedhere != NULL)) {
3288 error = SET_ERROR(EXDEV);
3289 goto out;
3290 }
3291
3292 szp = VTOZ(*svpp);
3293 ZFS_VERIFY_ZP(szp);
3294 tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
3295 if (tzp != NULL)
3296 ZFS_VERIFY_ZP(tzp);
3297
3298 /*
3299 * This is to prevent the creation of links into attribute space
3300 * by renaming a linked file into/outof an attribute directory.
3301 * See the comment in zfs_link() for why this is considered bad.
3302 */
3303 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
3304 error = SET_ERROR(EINVAL);
3305 goto out;
3306 }
3307
3308 /*
3309 * If we are using project inheritance, means if the directory has
3310 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3311 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3312 * such case, we only allow renames into our tree when the project
3313 * IDs are the same.
3314 */
3315 if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3316 tdzp->z_projid != szp->z_projid) {
3317 error = SET_ERROR(EXDEV);
3318 goto out;
3319 }
3320
3321 /*
3322 * Must have write access at the source to remove the old entry
3323 * and write access at the target to create the new entry.
3324 * Note that if target and source are the same, this can be
3325 * done in a single check.
3326 */
3327 if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
3328 goto out;
3329
3330 if ((*svpp)->v_type == VDIR) {
3331 /*
3332 * Avoid ".", "..", and aliases of "." for obvious reasons.
3333 */
3334 if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') ||
3335 sdzp == szp ||
3336 (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
3337 error = EINVAL;
3338 goto out;
3339 }
3340
3341 /*
3342 * Check to make sure rename is valid.
3343 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
3344 */
3345 if ((error = zfs_rename_check(szp, sdzp, tdzp)))
3346 goto out;
3347 }
3348
3349 /*
3350 * Does target exist?
3351 */
3352 if (tzp) {
3353 /*
3354 * Source and target must be the same type.
3355 */
3356 if ((*svpp)->v_type == VDIR) {
3357 if ((*tvpp)->v_type != VDIR) {
3358 error = SET_ERROR(ENOTDIR);
3359 goto out;
3360 } else {
3361 cache_purge(tdvp);
3362 if (sdvp != tdvp)
3363 cache_purge(sdvp);
3364 }
3365 } else {
3366 if ((*tvpp)->v_type == VDIR) {
3367 error = SET_ERROR(EISDIR);
3368 goto out;
3369 }
3370 }
3371 }
3372
3373 vn_seqc_write_begin(*svpp);
3374 vn_seqc_write_begin(sdvp);
3375 if (*tvpp != NULL)
3376 vn_seqc_write_begin(*tvpp);
3377 if (tdvp != *tvpp)
3378 vn_seqc_write_begin(tdvp);
3379
3380 vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
3381 if (tzp)
3382 vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
3383
3384 /*
3385 * notify the target directory if it is not the same
3386 * as source directory.
3387 */
3388 if (tdvp != sdvp) {
3389 vnevent_rename_dest_dir(tdvp, ct);
3390 }
3391
3392 tx = dmu_tx_create(zfsvfs->z_os);
3393 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3394 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
3395 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
3396 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
3397 if (sdzp != tdzp) {
3398 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
3399 zfs_sa_upgrade_txholds(tx, tdzp);
3400 }
3401 if (tzp) {
3402 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
3403 zfs_sa_upgrade_txholds(tx, tzp);
3404 }
3405
3406 zfs_sa_upgrade_txholds(tx, szp);
3407 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
3408 error = dmu_tx_assign(tx, TXG_WAIT);
3409 if (error) {
3410 dmu_tx_abort(tx);
3411 goto out_seq;
3412 }
3413
3414 if (tzp) /* Attempt to remove the existing target */
3415 error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
3416
3417 if (error == 0) {
3418 error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING);
3419 if (error == 0) {
3420 szp->z_pflags |= ZFS_AV_MODIFIED;
3421
3422 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
3423 (void *)&szp->z_pflags, sizeof (uint64_t), tx);
3424 ASSERT0(error);
3425
3426 error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING,
3427 NULL);
3428 if (error == 0) {
3429 zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
3430 snm, tdzp, tnm, szp);
3431
3432 /*
3433 * Update path information for the target vnode
3434 */
3435 vn_renamepath(tdvp, *svpp, tnm, strlen(tnm));
3436 } else {
3437 /*
3438 * At this point, we have successfully created
3439 * the target name, but have failed to remove
3440 * the source name. Since the create was done
3441 * with the ZRENAMING flag, there are
3442 * complications; for one, the link count is
3443 * wrong. The easiest way to deal with this
3444 * is to remove the newly created target, and
3445 * return the original error. This must
3446 * succeed; fortunately, it is very unlikely to
3447 * fail, since we just created it.
3448 */
3449 VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx,
3450 ZRENAMING, NULL));
3451 }
3452 }
3453 if (error == 0) {
3454 cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp);
3455 }
3456 }
3457
3458 dmu_tx_commit(tx);
3459
3460 out_seq:
3461 vn_seqc_write_end(*svpp);
3462 vn_seqc_write_end(sdvp);
3463 if (*tvpp != NULL)
3464 vn_seqc_write_end(*tvpp);
3465 if (tdvp != *tvpp)
3466 vn_seqc_write_end(tdvp);
3467
3468 out:
3469 if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3470 zil_commit(zilog, 0);
3471 ZFS_EXIT(zfsvfs);
3472
3473 return (error);
3474 }
3475
3476 int
zfs_rename(znode_t * sdzp,const char * sname,znode_t * tdzp,const char * tname,cred_t * cr,int flags)3477 zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
3478 cred_t *cr, int flags)
3479 {
3480 struct componentname scn, tcn;
3481 vnode_t *sdvp, *tdvp;
3482 vnode_t *svp, *tvp;
3483 int error;
3484 svp = tvp = NULL;
3485
3486 sdvp = ZTOV(sdzp);
3487 tdvp = ZTOV(tdzp);
3488 error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE);
3489 if (sdzp->z_zfsvfs->z_replay == B_FALSE)
3490 VOP_UNLOCK1(sdvp);
3491 if (error != 0)
3492 goto fail;
3493 VOP_UNLOCK1(svp);
3494
3495 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
3496 error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME);
3497 if (error == EJUSTRETURN)
3498 tvp = NULL;
3499 else if (error != 0) {
3500 VOP_UNLOCK1(tdvp);
3501 goto fail;
3502 }
3503
3504 error = zfs_do_rename(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr);
3505 fail:
3506 if (svp != NULL)
3507 vrele(svp);
3508 if (tvp != NULL)
3509 vrele(tvp);
3510
3511 return (error);
3512 }
3513
3514 /*
3515 * Insert the indicated symbolic reference entry into the directory.
3516 *
3517 * IN: dvp - Directory to contain new symbolic link.
3518 * link - Name for new symlink entry.
3519 * vap - Attributes of new entry.
3520 * cr - credentials of caller.
3521 * ct - caller context
3522 * flags - case flags
3523 *
3524 * RETURN: 0 on success, error code on failure.
3525 *
3526 * Timestamps:
3527 * dvp - ctime|mtime updated
3528 */
3529 /*ARGSUSED*/
3530 int
zfs_symlink(znode_t * dzp,const char * name,vattr_t * vap,const char * link,znode_t ** zpp,cred_t * cr,int flags)3531 zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
3532 const char *link, znode_t **zpp, cred_t *cr, int flags)
3533 {
3534 znode_t *zp;
3535 dmu_tx_t *tx;
3536 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
3537 zilog_t *zilog;
3538 uint64_t len = strlen(link);
3539 int error;
3540 zfs_acl_ids_t acl_ids;
3541 boolean_t fuid_dirtied;
3542 uint64_t txtype = TX_SYMLINK;
3543
3544 ASSERT3S(vap->va_type, ==, VLNK);
3545
3546 ZFS_ENTER(zfsvfs);
3547 ZFS_VERIFY_ZP(dzp);
3548 zilog = zfsvfs->z_log;
3549
3550 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
3551 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3552 ZFS_EXIT(zfsvfs);
3553 return (SET_ERROR(EILSEQ));
3554 }
3555
3556 if (len > MAXPATHLEN) {
3557 ZFS_EXIT(zfsvfs);
3558 return (SET_ERROR(ENAMETOOLONG));
3559 }
3560
3561 if ((error = zfs_acl_ids_create(dzp, 0,
3562 vap, cr, NULL, &acl_ids)) != 0) {
3563 ZFS_EXIT(zfsvfs);
3564 return (error);
3565 }
3566
3567 /*
3568 * Attempt to lock directory; fail if entry already exists.
3569 */
3570 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
3571 if (error) {
3572 zfs_acl_ids_free(&acl_ids);
3573 ZFS_EXIT(zfsvfs);
3574 return (error);
3575 }
3576
3577 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
3578 zfs_acl_ids_free(&acl_ids);
3579 ZFS_EXIT(zfsvfs);
3580 return (error);
3581 }
3582
3583 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids,
3584 0 /* projid */)) {
3585 zfs_acl_ids_free(&acl_ids);
3586 ZFS_EXIT(zfsvfs);
3587 return (SET_ERROR(EDQUOT));
3588 }
3589
3590 getnewvnode_reserve_();
3591 tx = dmu_tx_create(zfsvfs->z_os);
3592 fuid_dirtied = zfsvfs->z_fuid_dirty;
3593 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
3594 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
3595 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
3596 ZFS_SA_BASE_ATTR_SIZE + len);
3597 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
3598 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
3599 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
3600 acl_ids.z_aclp->z_acl_bytes);
3601 }
3602 if (fuid_dirtied)
3603 zfs_fuid_txhold(zfsvfs, tx);
3604 error = dmu_tx_assign(tx, TXG_WAIT);
3605 if (error) {
3606 zfs_acl_ids_free(&acl_ids);
3607 dmu_tx_abort(tx);
3608 getnewvnode_drop_reserve();
3609 ZFS_EXIT(zfsvfs);
3610 return (error);
3611 }
3612
3613 /*
3614 * Create a new object for the symlink.
3615 * for version 4 ZPL datasets the symlink will be an SA attribute
3616 */
3617 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
3618
3619 if (fuid_dirtied)
3620 zfs_fuid_sync(zfsvfs, tx);
3621
3622 if (zp->z_is_sa)
3623 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
3624 __DECONST(void *, link), len, tx);
3625 else
3626 zfs_sa_symlink(zp, __DECONST(char *, link), len, tx);
3627
3628 zp->z_size = len;
3629 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
3630 &zp->z_size, sizeof (zp->z_size), tx);
3631 /*
3632 * Insert the new object into the directory.
3633 */
3634 (void) zfs_link_create(dzp, name, zp, tx, ZNEW);
3635
3636 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
3637 *zpp = zp;
3638
3639 zfs_acl_ids_free(&acl_ids);
3640
3641 dmu_tx_commit(tx);
3642
3643 getnewvnode_drop_reserve();
3644
3645 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3646 zil_commit(zilog, 0);
3647
3648 ZFS_EXIT(zfsvfs);
3649 return (error);
3650 }
3651
3652 /*
3653 * Return, in the buffer contained in the provided uio structure,
3654 * the symbolic path referred to by vp.
3655 *
3656 * IN: vp - vnode of symbolic link.
3657 * uio - structure to contain the link path.
3658 * cr - credentials of caller.
3659 * ct - caller context
3660 *
3661 * OUT: uio - structure containing the link path.
3662 *
3663 * RETURN: 0 on success, error code on failure.
3664 *
3665 * Timestamps:
3666 * vp - atime updated
3667 */
3668 /* ARGSUSED */
3669 static int
zfs_readlink(vnode_t * vp,zfs_uio_t * uio,cred_t * cr,caller_context_t * ct)3670 zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct)
3671 {
3672 znode_t *zp = VTOZ(vp);
3673 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3674 int error;
3675
3676 ZFS_ENTER(zfsvfs);
3677 ZFS_VERIFY_ZP(zp);
3678
3679 if (zp->z_is_sa)
3680 error = sa_lookup_uio(zp->z_sa_hdl,
3681 SA_ZPL_SYMLINK(zfsvfs), uio);
3682 else
3683 error = zfs_sa_readlink(zp, uio);
3684
3685 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
3686
3687 ZFS_EXIT(zfsvfs);
3688 return (error);
3689 }
3690
3691 /*
3692 * Insert a new entry into directory tdvp referencing svp.
3693 *
3694 * IN: tdvp - Directory to contain new entry.
3695 * svp - vnode of new entry.
3696 * name - name of new entry.
3697 * cr - credentials of caller.
3698 *
3699 * RETURN: 0 on success, error code on failure.
3700 *
3701 * Timestamps:
3702 * tdvp - ctime|mtime updated
3703 * svp - ctime updated
3704 */
3705 /* ARGSUSED */
3706 int
zfs_link(znode_t * tdzp,znode_t * szp,const char * name,cred_t * cr,int flags)3707 zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
3708 int flags)
3709 {
3710 znode_t *tzp;
3711 zfsvfs_t *zfsvfs = tdzp->z_zfsvfs;
3712 zilog_t *zilog;
3713 dmu_tx_t *tx;
3714 int error;
3715 uint64_t parent;
3716 uid_t owner;
3717
3718 ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR);
3719
3720 ZFS_ENTER(zfsvfs);
3721 ZFS_VERIFY_ZP(tdzp);
3722 zilog = zfsvfs->z_log;
3723
3724 /*
3725 * POSIX dictates that we return EPERM here.
3726 * Better choices include ENOTSUP or EISDIR.
3727 */
3728 if (ZTOV(szp)->v_type == VDIR) {
3729 ZFS_EXIT(zfsvfs);
3730 return (SET_ERROR(EPERM));
3731 }
3732
3733 ZFS_VERIFY_ZP(szp);
3734
3735 /*
3736 * If we are using project inheritance, means if the directory has
3737 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3738 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3739 * such case, we only allow hard link creation in our tree when the
3740 * project IDs are the same.
3741 */
3742 if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3743 tdzp->z_projid != szp->z_projid) {
3744 ZFS_EXIT(zfsvfs);
3745 return (SET_ERROR(EXDEV));
3746 }
3747
3748 if (szp->z_pflags & (ZFS_APPENDONLY |
3749 ZFS_IMMUTABLE | ZFS_READONLY)) {
3750 ZFS_EXIT(zfsvfs);
3751 return (SET_ERROR(EPERM));
3752 }
3753
3754 /* Prevent links to .zfs/shares files */
3755
3756 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
3757 &parent, sizeof (uint64_t))) != 0) {
3758 ZFS_EXIT(zfsvfs);
3759 return (error);
3760 }
3761 if (parent == zfsvfs->z_shares_dir) {
3762 ZFS_EXIT(zfsvfs);
3763 return (SET_ERROR(EPERM));
3764 }
3765
3766 if (zfsvfs->z_utf8 && u8_validate(name,
3767 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3768 ZFS_EXIT(zfsvfs);
3769 return (SET_ERROR(EILSEQ));
3770 }
3771
3772 /*
3773 * We do not support links between attributes and non-attributes
3774 * because of the potential security risk of creating links
3775 * into "normal" file space in order to circumvent restrictions
3776 * imposed in attribute space.
3777 */
3778 if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
3779 ZFS_EXIT(zfsvfs);
3780 return (SET_ERROR(EINVAL));
3781 }
3782
3783
3784 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
3785 if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) {
3786 ZFS_EXIT(zfsvfs);
3787 return (SET_ERROR(EPERM));
3788 }
3789
3790 if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
3791 ZFS_EXIT(zfsvfs);
3792 return (error);
3793 }
3794
3795 /*
3796 * Attempt to lock directory; fail if entry already exists.
3797 */
3798 error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW);
3799 if (error) {
3800 ZFS_EXIT(zfsvfs);
3801 return (error);
3802 }
3803
3804 tx = dmu_tx_create(zfsvfs->z_os);
3805 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3806 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
3807 zfs_sa_upgrade_txholds(tx, szp);
3808 zfs_sa_upgrade_txholds(tx, tdzp);
3809 error = dmu_tx_assign(tx, TXG_WAIT);
3810 if (error) {
3811 dmu_tx_abort(tx);
3812 ZFS_EXIT(zfsvfs);
3813 return (error);
3814 }
3815
3816 error = zfs_link_create(tdzp, name, szp, tx, 0);
3817
3818 if (error == 0) {
3819 uint64_t txtype = TX_LINK;
3820 zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
3821 }
3822
3823 dmu_tx_commit(tx);
3824
3825 if (error == 0) {
3826 vnevent_link(ZTOV(szp), ct);
3827 }
3828
3829 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3830 zil_commit(zilog, 0);
3831
3832 ZFS_EXIT(zfsvfs);
3833 return (error);
3834 }
3835
3836 /*
3837 * Free or allocate space in a file. Currently, this function only
3838 * supports the `F_FREESP' command. However, this command is somewhat
3839 * misnamed, as its functionality includes the ability to allocate as
3840 * well as free space.
3841 *
3842 * IN: ip - inode of file to free data in.
3843 * cmd - action to take (only F_FREESP supported).
3844 * bfp - section of file to free/alloc.
3845 * flag - current file open mode flags.
3846 * offset - current file offset.
3847 * cr - credentials of caller.
3848 *
3849 * RETURN: 0 on success, error code on failure.
3850 *
3851 * Timestamps:
3852 * ip - ctime|mtime updated
3853 */
3854 /* ARGSUSED */
3855 int
zfs_space(znode_t * zp,int cmd,flock64_t * bfp,int flag,offset_t offset,cred_t * cr)3856 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
3857 offset_t offset, cred_t *cr)
3858 {
3859 zfsvfs_t *zfsvfs = ZTOZSB(zp);
3860 uint64_t off, len;
3861 int error;
3862
3863 ZFS_ENTER(zfsvfs);
3864 ZFS_VERIFY_ZP(zp);
3865
3866 if (cmd != F_FREESP) {
3867 ZFS_EXIT(zfsvfs);
3868 return (SET_ERROR(EINVAL));
3869 }
3870
3871 /*
3872 * Callers might not be able to detect properly that we are read-only,
3873 * so check it explicitly here.
3874 */
3875 if (zfs_is_readonly(zfsvfs)) {
3876 ZFS_EXIT(zfsvfs);
3877 return (SET_ERROR(EROFS));
3878 }
3879
3880 if (bfp->l_len < 0) {
3881 ZFS_EXIT(zfsvfs);
3882 return (SET_ERROR(EINVAL));
3883 }
3884
3885 /*
3886 * Permissions aren't checked on Solaris because on this OS
3887 * zfs_space() can only be called with an opened file handle.
3888 * On Linux we can get here through truncate_range() which
3889 * operates directly on inodes, so we need to check access rights.
3890 */
3891 if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
3892 ZFS_EXIT(zfsvfs);
3893 return (error);
3894 }
3895
3896 off = bfp->l_start;
3897 len = bfp->l_len; /* 0 means from off to end of file */
3898
3899 error = zfs_freesp(zp, off, len, flag, TRUE);
3900
3901 ZFS_EXIT(zfsvfs);
3902 return (error);
3903 }
3904
3905 /*ARGSUSED*/
3906 static void
zfs_inactive(vnode_t * vp,cred_t * cr,caller_context_t * ct)3907 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
3908 {
3909 znode_t *zp = VTOZ(vp);
3910 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3911 int error;
3912
3913 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
3914 if (zp->z_sa_hdl == NULL) {
3915 /*
3916 * The fs has been unmounted, or we did a
3917 * suspend/resume and this file no longer exists.
3918 */
3919 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3920 vrecycle(vp);
3921 return;
3922 }
3923
3924 if (zp->z_unlinked) {
3925 /*
3926 * Fast path to recycle a vnode of a removed file.
3927 */
3928 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3929 vrecycle(vp);
3930 return;
3931 }
3932
3933 if (zp->z_atime_dirty && zp->z_unlinked == 0) {
3934 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
3935
3936 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
3937 zfs_sa_upgrade_txholds(tx, zp);
3938 error = dmu_tx_assign(tx, TXG_WAIT);
3939 if (error) {
3940 dmu_tx_abort(tx);
3941 } else {
3942 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
3943 (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
3944 zp->z_atime_dirty = 0;
3945 dmu_tx_commit(tx);
3946 }
3947 }
3948 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3949 }
3950
3951
3952 CTASSERT(sizeof (struct zfid_short) <= sizeof (struct fid));
3953 CTASSERT(sizeof (struct zfid_long) <= sizeof (struct fid));
3954
3955 /*ARGSUSED*/
3956 static int
zfs_fid(vnode_t * vp,fid_t * fidp,caller_context_t * ct)3957 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
3958 {
3959 znode_t *zp = VTOZ(vp);
3960 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3961 uint32_t gen;
3962 uint64_t gen64;
3963 uint64_t object = zp->z_id;
3964 zfid_short_t *zfid;
3965 int size, i, error;
3966
3967 ZFS_ENTER(zfsvfs);
3968 ZFS_VERIFY_ZP(zp);
3969
3970 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
3971 &gen64, sizeof (uint64_t))) != 0) {
3972 ZFS_EXIT(zfsvfs);
3973 return (error);
3974 }
3975
3976 gen = (uint32_t)gen64;
3977
3978 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
3979 fidp->fid_len = size;
3980
3981 zfid = (zfid_short_t *)fidp;
3982
3983 zfid->zf_len = size;
3984
3985 for (i = 0; i < sizeof (zfid->zf_object); i++)
3986 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
3987
3988 /* Must have a non-zero generation number to distinguish from .zfs */
3989 if (gen == 0)
3990 gen = 1;
3991 for (i = 0; i < sizeof (zfid->zf_gen); i++)
3992 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
3993
3994 if (size == LONG_FID_LEN) {
3995 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
3996 zfid_long_t *zlfid;
3997
3998 zlfid = (zfid_long_t *)fidp;
3999
4000 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
4001 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
4002
4003 /* XXX - this should be the generation number for the objset */
4004 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
4005 zlfid->zf_setgen[i] = 0;
4006 }
4007
4008 ZFS_EXIT(zfsvfs);
4009 return (0);
4010 }
4011
4012 static int
zfs_pathconf(vnode_t * vp,int cmd,ulong_t * valp,cred_t * cr,caller_context_t * ct)4013 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
4014 caller_context_t *ct)
4015 {
4016 znode_t *zp;
4017 zfsvfs_t *zfsvfs;
4018
4019 switch (cmd) {
4020 case _PC_LINK_MAX:
4021 *valp = MIN(LONG_MAX, ZFS_LINK_MAX);
4022 return (0);
4023
4024 case _PC_FILESIZEBITS:
4025 *valp = 64;
4026 return (0);
4027 case _PC_MIN_HOLE_SIZE:
4028 *valp = (int)SPA_MINBLOCKSIZE;
4029 return (0);
4030 case _PC_ACL_EXTENDED:
4031 #if 0 /* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */
4032 zp = VTOZ(vp);
4033 zfsvfs = zp->z_zfsvfs;
4034 ZFS_ENTER(zfsvfs);
4035 ZFS_VERIFY_ZP(zp);
4036 *valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0;
4037 ZFS_EXIT(zfsvfs);
4038 #else
4039 *valp = 0;
4040 #endif
4041 return (0);
4042
4043 case _PC_ACL_NFS4:
4044 zp = VTOZ(vp);
4045 zfsvfs = zp->z_zfsvfs;
4046 ZFS_ENTER(zfsvfs);
4047 ZFS_VERIFY_ZP(zp);
4048 *valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0;
4049 ZFS_EXIT(zfsvfs);
4050 return (0);
4051
4052 case _PC_ACL_PATH_MAX:
4053 *valp = ACL_MAX_ENTRIES;
4054 return (0);
4055
4056 default:
4057 return (EOPNOTSUPP);
4058 }
4059 }
4060
4061 static int
zfs_getpages(struct vnode * vp,vm_page_t * ma,int count,int * rbehind,int * rahead)4062 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
4063 int *rahead)
4064 {
4065 znode_t *zp = VTOZ(vp);
4066 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4067 zfs_locked_range_t *lr;
4068 vm_object_t object;
4069 off_t start, end, obj_size;
4070 uint_t blksz;
4071 int pgsin_b, pgsin_a;
4072 int error;
4073
4074 ZFS_ENTER_ERROR(zfsvfs, zfs_vm_pagerret_error);
4075 ZFS_VERIFY_ZP_ERROR(zp, zfs_vm_pagerret_error);
4076
4077 start = IDX_TO_OFF(ma[0]->pindex);
4078 end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
4079
4080 /*
4081 * Lock a range covering all required and optional pages.
4082 * Note that we need to handle the case of the block size growing.
4083 */
4084 for (;;) {
4085 blksz = zp->z_blksz;
4086 lr = zfs_rangelock_tryenter(&zp->z_rangelock,
4087 rounddown(start, blksz),
4088 roundup(end, blksz) - rounddown(start, blksz), RL_READER);
4089 if (lr == NULL) {
4090 if (rahead != NULL) {
4091 *rahead = 0;
4092 rahead = NULL;
4093 }
4094 if (rbehind != NULL) {
4095 *rbehind = 0;
4096 rbehind = NULL;
4097 }
4098 break;
4099 }
4100 if (blksz == zp->z_blksz)
4101 break;
4102 zfs_rangelock_exit(lr);
4103 }
4104
4105 object = ma[0]->object;
4106 zfs_vmobject_wlock(object);
4107 obj_size = object->un_pager.vnp.vnp_size;
4108 zfs_vmobject_wunlock(object);
4109 if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
4110 if (lr != NULL)
4111 zfs_rangelock_exit(lr);
4112 ZFS_EXIT(zfsvfs);
4113 return (zfs_vm_pagerret_bad);
4114 }
4115
4116 pgsin_b = 0;
4117 if (rbehind != NULL) {
4118 pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz));
4119 pgsin_b = MIN(*rbehind, pgsin_b);
4120 }
4121
4122 pgsin_a = 0;
4123 if (rahead != NULL) {
4124 pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end);
4125 if (end + IDX_TO_OFF(pgsin_a) >= obj_size)
4126 pgsin_a = OFF_TO_IDX(round_page(obj_size) - end);
4127 pgsin_a = MIN(*rahead, pgsin_a);
4128 }
4129
4130 /*
4131 * NB: we need to pass the exact byte size of the data that we expect
4132 * to read after accounting for the file size. This is required because
4133 * ZFS will panic if we request DMU to read beyond the end of the last
4134 * allocated block.
4135 */
4136 error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count, &pgsin_b,
4137 &pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE));
4138
4139 if (lr != NULL)
4140 zfs_rangelock_exit(lr);
4141 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
4142
4143 dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE);
4144
4145 ZFS_EXIT(zfsvfs);
4146
4147 if (error != 0)
4148 return (zfs_vm_pagerret_error);
4149
4150 VM_CNT_INC(v_vnodein);
4151 VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a);
4152 if (rbehind != NULL)
4153 *rbehind = pgsin_b;
4154 if (rahead != NULL)
4155 *rahead = pgsin_a;
4156 return (zfs_vm_pagerret_ok);
4157 }
4158
4159 #ifndef _SYS_SYSPROTO_H_
4160 struct vop_getpages_args {
4161 struct vnode *a_vp;
4162 vm_page_t *a_m;
4163 int a_count;
4164 int *a_rbehind;
4165 int *a_rahead;
4166 };
4167 #endif
4168
4169 static int
zfs_freebsd_getpages(struct vop_getpages_args * ap)4170 zfs_freebsd_getpages(struct vop_getpages_args *ap)
4171 {
4172
4173 return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
4174 ap->a_rahead));
4175 }
4176
4177 static int
zfs_putpages(struct vnode * vp,vm_page_t * ma,size_t len,int flags,int * rtvals)4178 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
4179 int *rtvals)
4180 {
4181 znode_t *zp = VTOZ(vp);
4182 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4183 zfs_locked_range_t *lr;
4184 dmu_tx_t *tx;
4185 struct sf_buf *sf;
4186 vm_object_t object;
4187 vm_page_t m;
4188 caddr_t va;
4189 size_t tocopy;
4190 size_t lo_len;
4191 vm_ooffset_t lo_off;
4192 vm_ooffset_t off;
4193 uint_t blksz;
4194 int ncount;
4195 int pcount;
4196 int err;
4197 int i;
4198
4199 object = vp->v_object;
4200 KASSERT(ma[0]->object == object, ("mismatching object"));
4201 KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length"));
4202
4203 pcount = btoc(len);
4204 ncount = pcount;
4205 for (i = 0; i < pcount; i++)
4206 rtvals[i] = zfs_vm_pagerret_error;
4207
4208 ZFS_ENTER_ERROR(zfsvfs, zfs_vm_pagerret_error);
4209 ZFS_VERIFY_ZP_ERROR(zp, zfs_vm_pagerret_error);
4210
4211 off = IDX_TO_OFF(ma[0]->pindex);
4212 blksz = zp->z_blksz;
4213 lo_off = rounddown(off, blksz);
4214 lo_len = roundup(len + (off - lo_off), blksz);
4215 lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER);
4216
4217 zfs_vmobject_wlock(object);
4218 if (len + off > object->un_pager.vnp.vnp_size) {
4219 if (object->un_pager.vnp.vnp_size > off) {
4220 int pgoff;
4221
4222 len = object->un_pager.vnp.vnp_size - off;
4223 ncount = btoc(len);
4224 if ((pgoff = (int)len & PAGE_MASK) != 0) {
4225 /*
4226 * If the object is locked and the following
4227 * conditions hold, then the page's dirty
4228 * field cannot be concurrently changed by a
4229 * pmap operation.
4230 */
4231 m = ma[ncount - 1];
4232 vm_page_assert_sbusied(m);
4233 KASSERT(!pmap_page_is_write_mapped(m),
4234 ("zfs_putpages: page %p is not read-only",
4235 m));
4236 vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
4237 pgoff);
4238 }
4239 } else {
4240 len = 0;
4241 ncount = 0;
4242 }
4243 if (ncount < pcount) {
4244 for (i = ncount; i < pcount; i++) {
4245 rtvals[i] = zfs_vm_pagerret_bad;
4246 }
4247 }
4248 }
4249 zfs_vmobject_wunlock(object);
4250
4251 if (ncount == 0)
4252 goto out;
4253
4254 if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) ||
4255 zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) ||
4256 (zp->z_projid != ZFS_DEFAULT_PROJID &&
4257 zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
4258 zp->z_projid))) {
4259 goto out;
4260 }
4261
4262 tx = dmu_tx_create(zfsvfs->z_os);
4263 dmu_tx_hold_write(tx, zp->z_id, off, len);
4264
4265 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4266 zfs_sa_upgrade_txholds(tx, zp);
4267 err = dmu_tx_assign(tx, TXG_WAIT);
4268 if (err != 0) {
4269 dmu_tx_abort(tx);
4270 goto out;
4271 }
4272
4273 if (zp->z_blksz < PAGE_SIZE) {
4274 for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
4275 tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
4276 va = zfs_map_page(ma[i], &sf);
4277 dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
4278 zfs_unmap_page(sf);
4279 }
4280 } else {
4281 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx);
4282 }
4283
4284 if (err == 0) {
4285 uint64_t mtime[2], ctime[2];
4286 sa_bulk_attr_t bulk[3];
4287 int count = 0;
4288
4289 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4290 &mtime, 16);
4291 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4292 &ctime, 16);
4293 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4294 &zp->z_pflags, 8);
4295 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
4296 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
4297 ASSERT0(err);
4298 /*
4299 * XXX we should be passing a callback to undirty
4300 * but that would make the locking messier
4301 */
4302 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
4303 len, 0, NULL, NULL);
4304
4305 zfs_vmobject_wlock(object);
4306 for (i = 0; i < ncount; i++) {
4307 rtvals[i] = zfs_vm_pagerret_ok;
4308 vm_page_undirty(ma[i]);
4309 }
4310 zfs_vmobject_wunlock(object);
4311 VM_CNT_INC(v_vnodeout);
4312 VM_CNT_ADD(v_vnodepgsout, ncount);
4313 }
4314 dmu_tx_commit(tx);
4315
4316 out:
4317 zfs_rangelock_exit(lr);
4318 if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 ||
4319 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4320 zil_commit(zfsvfs->z_log, zp->z_id);
4321
4322 dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);
4323
4324 ZFS_EXIT(zfsvfs);
4325 return (rtvals[0]);
4326 }
4327
4328 #ifndef _SYS_SYSPROTO_H_
4329 struct vop_putpages_args {
4330 struct vnode *a_vp;
4331 vm_page_t *a_m;
4332 int a_count;
4333 int a_sync;
4334 int *a_rtvals;
4335 };
4336 #endif
4337
4338 static int
zfs_freebsd_putpages(struct vop_putpages_args * ap)4339 zfs_freebsd_putpages(struct vop_putpages_args *ap)
4340 {
4341
4342 return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync,
4343 ap->a_rtvals));
4344 }
4345
4346 #ifndef _SYS_SYSPROTO_H_
4347 struct vop_bmap_args {
4348 struct vnode *a_vp;
4349 daddr_t a_bn;
4350 struct bufobj **a_bop;
4351 daddr_t *a_bnp;
4352 int *a_runp;
4353 int *a_runb;
4354 };
4355 #endif
4356
4357 static int
zfs_freebsd_bmap(struct vop_bmap_args * ap)4358 zfs_freebsd_bmap(struct vop_bmap_args *ap)
4359 {
4360
4361 if (ap->a_bop != NULL)
4362 *ap->a_bop = &ap->a_vp->v_bufobj;
4363 if (ap->a_bnp != NULL)
4364 *ap->a_bnp = ap->a_bn;
4365 if (ap->a_runp != NULL)
4366 *ap->a_runp = 0;
4367 if (ap->a_runb != NULL)
4368 *ap->a_runb = 0;
4369
4370 return (0);
4371 }
4372
4373 #ifndef _SYS_SYSPROTO_H_
4374 struct vop_open_args {
4375 struct vnode *a_vp;
4376 int a_mode;
4377 struct ucred *a_cred;
4378 struct thread *a_td;
4379 };
4380 #endif
4381
4382 static int
zfs_freebsd_open(struct vop_open_args * ap)4383 zfs_freebsd_open(struct vop_open_args *ap)
4384 {
4385 vnode_t *vp = ap->a_vp;
4386 znode_t *zp = VTOZ(vp);
4387 int error;
4388
4389 error = zfs_open(&vp, ap->a_mode, ap->a_cred);
4390 if (error == 0)
4391 vnode_create_vobject(vp, zp->z_size, ap->a_td);
4392 return (error);
4393 }
4394
4395 #ifndef _SYS_SYSPROTO_H_
4396 struct vop_close_args {
4397 struct vnode *a_vp;
4398 int a_fflag;
4399 struct ucred *a_cred;
4400 struct thread *a_td;
4401 };
4402 #endif
4403
4404 static int
zfs_freebsd_close(struct vop_close_args * ap)4405 zfs_freebsd_close(struct vop_close_args *ap)
4406 {
4407
4408 return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred));
4409 }
4410
4411 #ifndef _SYS_SYSPROTO_H_
4412 struct vop_ioctl_args {
4413 struct vnode *a_vp;
4414 ulong_t a_command;
4415 caddr_t a_data;
4416 int a_fflag;
4417 struct ucred *cred;
4418 struct thread *td;
4419 };
4420 #endif
4421
4422 static int
zfs_freebsd_ioctl(struct vop_ioctl_args * ap)4423 zfs_freebsd_ioctl(struct vop_ioctl_args *ap)
4424 {
4425
4426 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
4427 ap->a_fflag, ap->a_cred, NULL));
4428 }
4429
4430 static int
ioflags(int ioflags)4431 ioflags(int ioflags)
4432 {
4433 int flags = 0;
4434
4435 if (ioflags & IO_APPEND)
4436 flags |= FAPPEND;
4437 if (ioflags & IO_NDELAY)
4438 flags |= FNONBLOCK;
4439 if (ioflags & IO_SYNC)
4440 flags |= (FSYNC | FDSYNC | FRSYNC);
4441
4442 return (flags);
4443 }
4444
4445 #ifndef _SYS_SYSPROTO_H_
4446 struct vop_read_args {
4447 struct vnode *a_vp;
4448 struct uio *a_uio;
4449 int a_ioflag;
4450 struct ucred *a_cred;
4451 };
4452 #endif
4453
4454 static int
zfs_freebsd_read(struct vop_read_args * ap)4455 zfs_freebsd_read(struct vop_read_args *ap)
4456 {
4457 zfs_uio_t uio;
4458 zfs_uio_init(&uio, ap->a_uio);
4459 return (zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4460 ap->a_cred));
4461 }
4462
4463 #ifndef _SYS_SYSPROTO_H_
4464 struct vop_write_args {
4465 struct vnode *a_vp;
4466 struct uio *a_uio;
4467 int a_ioflag;
4468 struct ucred *a_cred;
4469 };
4470 #endif
4471
4472 static int
zfs_freebsd_write(struct vop_write_args * ap)4473 zfs_freebsd_write(struct vop_write_args *ap)
4474 {
4475 zfs_uio_t uio;
4476 zfs_uio_init(&uio, ap->a_uio);
4477 return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4478 ap->a_cred));
4479 }
4480
4481 #if __FreeBSD_version >= 1300102
4482 /*
4483 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
4484 * the comment above cache_fplookup for details.
4485 */
4486 static int
zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args * v)4487 zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v)
4488 {
4489 vnode_t *vp;
4490 znode_t *zp;
4491 uint64_t pflags;
4492
4493 vp = v->a_vp;
4494 zp = VTOZ_SMR(vp);
4495 if (__predict_false(zp == NULL))
4496 return (EAGAIN);
4497 pflags = atomic_load_64(&zp->z_pflags);
4498 if (pflags & ZFS_AV_QUARANTINED)
4499 return (EAGAIN);
4500 if (pflags & ZFS_XATTR)
4501 return (EAGAIN);
4502 if ((pflags & ZFS_NO_EXECS_DENIED) == 0)
4503 return (EAGAIN);
4504 return (0);
4505 }
4506 #endif
4507
4508 #if __FreeBSD_version >= 1300139
4509 static int
zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args * v)4510 zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v)
4511 {
4512 vnode_t *vp;
4513 znode_t *zp;
4514 char *target;
4515
4516 vp = v->a_vp;
4517 zp = VTOZ_SMR(vp);
4518 if (__predict_false(zp == NULL)) {
4519 return (EAGAIN);
4520 }
4521
4522 target = atomic_load_consume_ptr(&zp->z_cached_symlink);
4523 if (target == NULL) {
4524 return (EAGAIN);
4525 }
4526 return (cache_symlink_resolve(v->a_fpl, target, strlen(target)));
4527 }
4528 #endif
4529
4530 #ifndef _SYS_SYSPROTO_H_
4531 struct vop_access_args {
4532 struct vnode *a_vp;
4533 accmode_t a_accmode;
4534 struct ucred *a_cred;
4535 struct thread *a_td;
4536 };
4537 #endif
4538
4539 static int
zfs_freebsd_access(struct vop_access_args * ap)4540 zfs_freebsd_access(struct vop_access_args *ap)
4541 {
4542 vnode_t *vp = ap->a_vp;
4543 znode_t *zp = VTOZ(vp);
4544 accmode_t accmode;
4545 int error = 0;
4546
4547
4548 if (ap->a_accmode == VEXEC) {
4549 if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0)
4550 return (0);
4551 }
4552
4553 /*
4554 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
4555 */
4556 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
4557 if (accmode != 0)
4558 error = zfs_access(zp, accmode, 0, ap->a_cred);
4559
4560 /*
4561 * VADMIN has to be handled by vaccess().
4562 */
4563 if (error == 0) {
4564 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
4565 if (accmode != 0) {
4566 #if __FreeBSD_version >= 1300105
4567 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
4568 zp->z_gid, accmode, ap->a_cred);
4569 #else
4570 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
4571 zp->z_gid, accmode, ap->a_cred, NULL);
4572 #endif
4573 }
4574 }
4575
4576 /*
4577 * For VEXEC, ensure that at least one execute bit is set for
4578 * non-directories.
4579 */
4580 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
4581 (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
4582 error = EACCES;
4583 }
4584
4585 return (error);
4586 }
4587
4588 #ifndef _SYS_SYSPROTO_H_
4589 struct vop_lookup_args {
4590 struct vnode *a_dvp;
4591 struct vnode **a_vpp;
4592 struct componentname *a_cnp;
4593 };
4594 #endif
4595
4596 static int
zfs_freebsd_lookup(struct vop_lookup_args * ap,boolean_t cached)4597 zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached)
4598 {
4599 struct componentname *cnp = ap->a_cnp;
4600 char nm[NAME_MAX + 1];
4601
4602 ASSERT3U(cnp->cn_namelen, <, sizeof (nm));
4603 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm)));
4604
4605 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
4606 cnp->cn_cred, 0, cached));
4607 }
4608
4609 static int
zfs_freebsd_cachedlookup(struct vop_cachedlookup_args * ap)4610 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap)
4611 {
4612
4613 return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE));
4614 }
4615
4616 #ifndef _SYS_SYSPROTO_H_
4617 struct vop_lookup_args {
4618 struct vnode *a_dvp;
4619 struct vnode **a_vpp;
4620 struct componentname *a_cnp;
4621 };
4622 #endif
4623
4624 static int
zfs_cache_lookup(struct vop_lookup_args * ap)4625 zfs_cache_lookup(struct vop_lookup_args *ap)
4626 {
4627 zfsvfs_t *zfsvfs;
4628
4629 zfsvfs = ap->a_dvp->v_mount->mnt_data;
4630 if (zfsvfs->z_use_namecache)
4631 return (vfs_cache_lookup(ap));
4632 else
4633 return (zfs_freebsd_lookup(ap, B_FALSE));
4634 }
4635
4636 #ifndef _SYS_SYSPROTO_H_
4637 struct vop_create_args {
4638 struct vnode *a_dvp;
4639 struct vnode **a_vpp;
4640 struct componentname *a_cnp;
4641 struct vattr *a_vap;
4642 };
4643 #endif
4644
4645 static int
zfs_freebsd_create(struct vop_create_args * ap)4646 zfs_freebsd_create(struct vop_create_args *ap)
4647 {
4648 zfsvfs_t *zfsvfs;
4649 struct componentname *cnp = ap->a_cnp;
4650 vattr_t *vap = ap->a_vap;
4651 znode_t *zp = NULL;
4652 int rc, mode;
4653
4654 #if __FreeBSD_version < 1400068
4655 ASSERT(cnp->cn_flags & SAVENAME);
4656 #endif
4657
4658 vattr_init_mask(vap);
4659 mode = vap->va_mode & ALLPERMS;
4660 zfsvfs = ap->a_dvp->v_mount->mnt_data;
4661 *ap->a_vpp = NULL;
4662
4663 rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, !EXCL, mode,
4664 &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */);
4665 if (rc == 0)
4666 *ap->a_vpp = ZTOV(zp);
4667 if (zfsvfs->z_use_namecache &&
4668 rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0)
4669 cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
4670
4671 return (rc);
4672 }
4673
4674 #ifndef _SYS_SYSPROTO_H_
4675 struct vop_remove_args {
4676 struct vnode *a_dvp;
4677 struct vnode *a_vp;
4678 struct componentname *a_cnp;
4679 };
4680 #endif
4681
4682 static int
zfs_freebsd_remove(struct vop_remove_args * ap)4683 zfs_freebsd_remove(struct vop_remove_args *ap)
4684 {
4685
4686 #if __FreeBSD_version < 1400068
4687 ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4688 #endif
4689
4690 return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
4691 ap->a_cnp->cn_cred));
4692 }
4693
4694 #ifndef _SYS_SYSPROTO_H_
4695 struct vop_mkdir_args {
4696 struct vnode *a_dvp;
4697 struct vnode **a_vpp;
4698 struct componentname *a_cnp;
4699 struct vattr *a_vap;
4700 };
4701 #endif
4702
4703 static int
zfs_freebsd_mkdir(struct vop_mkdir_args * ap)4704 zfs_freebsd_mkdir(struct vop_mkdir_args *ap)
4705 {
4706 vattr_t *vap = ap->a_vap;
4707 znode_t *zp = NULL;
4708 int rc;
4709
4710 #if __FreeBSD_version < 1400068
4711 ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4712 #endif
4713
4714 vattr_init_mask(vap);
4715 *ap->a_vpp = NULL;
4716
4717 rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp,
4718 ap->a_cnp->cn_cred, 0, NULL);
4719
4720 if (rc == 0)
4721 *ap->a_vpp = ZTOV(zp);
4722 return (rc);
4723 }
4724
4725 #ifndef _SYS_SYSPROTO_H_
4726 struct vop_rmdir_args {
4727 struct vnode *a_dvp;
4728 struct vnode *a_vp;
4729 struct componentname *a_cnp;
4730 };
4731 #endif
4732
4733 static int
zfs_freebsd_rmdir(struct vop_rmdir_args * ap)4734 zfs_freebsd_rmdir(struct vop_rmdir_args *ap)
4735 {
4736 struct componentname *cnp = ap->a_cnp;
4737
4738 #if __FreeBSD_version < 1400068
4739 ASSERT(cnp->cn_flags & SAVENAME);
4740 #endif
4741
4742 return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
4743 }
4744
4745 #ifndef _SYS_SYSPROTO_H_
4746 struct vop_readdir_args {
4747 struct vnode *a_vp;
4748 struct uio *a_uio;
4749 struct ucred *a_cred;
4750 int *a_eofflag;
4751 int *a_ncookies;
4752 cookie_t **a_cookies;
4753 };
4754 #endif
4755
4756 static int
zfs_freebsd_readdir(struct vop_readdir_args * ap)4757 zfs_freebsd_readdir(struct vop_readdir_args *ap)
4758 {
4759 zfs_uio_t uio;
4760 zfs_uio_init(&uio, ap->a_uio);
4761 return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag,
4762 ap->a_ncookies, ap->a_cookies));
4763 }
4764
4765 #ifndef _SYS_SYSPROTO_H_
4766 struct vop_fsync_args {
4767 struct vnode *a_vp;
4768 int a_waitfor;
4769 struct thread *a_td;
4770 };
4771 #endif
4772
4773 static int
zfs_freebsd_fsync(struct vop_fsync_args * ap)4774 zfs_freebsd_fsync(struct vop_fsync_args *ap)
4775 {
4776
4777 vop_stdfsync(ap);
4778 return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred));
4779 }
4780
4781 #ifndef _SYS_SYSPROTO_H_
4782 struct vop_getattr_args {
4783 struct vnode *a_vp;
4784 struct vattr *a_vap;
4785 struct ucred *a_cred;
4786 };
4787 #endif
4788
4789 static int
zfs_freebsd_getattr(struct vop_getattr_args * ap)4790 zfs_freebsd_getattr(struct vop_getattr_args *ap)
4791 {
4792 vattr_t *vap = ap->a_vap;
4793 xvattr_t xvap;
4794 ulong_t fflags = 0;
4795 int error;
4796
4797 xva_init(&xvap);
4798 xvap.xva_vattr = *vap;
4799 xvap.xva_vattr.va_mask |= AT_XVATTR;
4800
4801 /* Convert chflags into ZFS-type flags. */
4802 /* XXX: what about SF_SETTABLE?. */
4803 XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
4804 XVA_SET_REQ(&xvap, XAT_APPENDONLY);
4805 XVA_SET_REQ(&xvap, XAT_NOUNLINK);
4806 XVA_SET_REQ(&xvap, XAT_NODUMP);
4807 XVA_SET_REQ(&xvap, XAT_READONLY);
4808 XVA_SET_REQ(&xvap, XAT_ARCHIVE);
4809 XVA_SET_REQ(&xvap, XAT_SYSTEM);
4810 XVA_SET_REQ(&xvap, XAT_HIDDEN);
4811 XVA_SET_REQ(&xvap, XAT_REPARSE);
4812 XVA_SET_REQ(&xvap, XAT_OFFLINE);
4813 XVA_SET_REQ(&xvap, XAT_SPARSE);
4814
4815 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred);
4816 if (error != 0)
4817 return (error);
4818
4819 /* Convert ZFS xattr into chflags. */
4820 #define FLAG_CHECK(fflag, xflag, xfield) do { \
4821 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \
4822 fflags |= (fflag); \
4823 } while (0)
4824 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
4825 xvap.xva_xoptattrs.xoa_immutable);
4826 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
4827 xvap.xva_xoptattrs.xoa_appendonly);
4828 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
4829 xvap.xva_xoptattrs.xoa_nounlink);
4830 FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE,
4831 xvap.xva_xoptattrs.xoa_archive);
4832 FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
4833 xvap.xva_xoptattrs.xoa_nodump);
4834 FLAG_CHECK(UF_READONLY, XAT_READONLY,
4835 xvap.xva_xoptattrs.xoa_readonly);
4836 FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM,
4837 xvap.xva_xoptattrs.xoa_system);
4838 FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN,
4839 xvap.xva_xoptattrs.xoa_hidden);
4840 FLAG_CHECK(UF_REPARSE, XAT_REPARSE,
4841 xvap.xva_xoptattrs.xoa_reparse);
4842 FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE,
4843 xvap.xva_xoptattrs.xoa_offline);
4844 FLAG_CHECK(UF_SPARSE, XAT_SPARSE,
4845 xvap.xva_xoptattrs.xoa_sparse);
4846
4847 #undef FLAG_CHECK
4848 *vap = xvap.xva_vattr;
4849 vap->va_flags = fflags;
4850 return (0);
4851 }
4852
4853 #ifndef _SYS_SYSPROTO_H_
4854 struct vop_setattr_args {
4855 struct vnode *a_vp;
4856 struct vattr *a_vap;
4857 struct ucred *a_cred;
4858 };
4859 #endif
4860
4861 static int
zfs_freebsd_setattr(struct vop_setattr_args * ap)4862 zfs_freebsd_setattr(struct vop_setattr_args *ap)
4863 {
4864 vnode_t *vp = ap->a_vp;
4865 vattr_t *vap = ap->a_vap;
4866 cred_t *cred = ap->a_cred;
4867 xvattr_t xvap;
4868 ulong_t fflags;
4869 uint64_t zflags;
4870
4871 vattr_init_mask(vap);
4872 vap->va_mask &= ~AT_NOSET;
4873
4874 xva_init(&xvap);
4875 xvap.xva_vattr = *vap;
4876
4877 zflags = VTOZ(vp)->z_pflags;
4878
4879 if (vap->va_flags != VNOVAL) {
4880 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
4881 int error;
4882
4883 if (zfsvfs->z_use_fuids == B_FALSE)
4884 return (EOPNOTSUPP);
4885
4886 fflags = vap->va_flags;
4887 /*
4888 * XXX KDM
4889 * We need to figure out whether it makes sense to allow
4890 * UF_REPARSE through, since we don't really have other
4891 * facilities to handle reparse points and zfs_setattr()
4892 * doesn't currently allow setting that attribute anyway.
4893 */
4894 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE|
4895 UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE|
4896 UF_OFFLINE|UF_SPARSE)) != 0)
4897 return (EOPNOTSUPP);
4898 /*
4899 * Unprivileged processes are not permitted to unset system
4900 * flags, or modify flags if any system flags are set.
4901 * Privileged non-jail processes may not modify system flags
4902 * if securelevel > 0 and any existing system flags are set.
4903 * Privileged jail processes behave like privileged non-jail
4904 * processes if the PR_ALLOW_CHFLAGS permission bit is set;
4905 * otherwise, they behave like unprivileged processes.
4906 */
4907 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 ||
4908 spl_priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) {
4909 if (zflags &
4910 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
4911 error = securelevel_gt(cred, 0);
4912 if (error != 0)
4913 return (error);
4914 }
4915 } else {
4916 /*
4917 * Callers may only modify the file flags on
4918 * objects they have VADMIN rights for.
4919 */
4920 if ((error = VOP_ACCESS(vp, VADMIN, cred,
4921 curthread)) != 0)
4922 return (error);
4923 if (zflags &
4924 (ZFS_IMMUTABLE | ZFS_APPENDONLY |
4925 ZFS_NOUNLINK)) {
4926 return (EPERM);
4927 }
4928 if (fflags &
4929 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) {
4930 return (EPERM);
4931 }
4932 }
4933
4934 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \
4935 if (((fflags & (fflag)) && !(zflags & (zflag))) || \
4936 ((zflags & (zflag)) && !(fflags & (fflag)))) { \
4937 XVA_SET_REQ(&xvap, (xflag)); \
4938 (xfield) = ((fflags & (fflag)) != 0); \
4939 } \
4940 } while (0)
4941 /* Convert chflags into ZFS-type flags. */
4942 /* XXX: what about SF_SETTABLE?. */
4943 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
4944 xvap.xva_xoptattrs.xoa_immutable);
4945 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
4946 xvap.xva_xoptattrs.xoa_appendonly);
4947 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
4948 xvap.xva_xoptattrs.xoa_nounlink);
4949 FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE,
4950 xvap.xva_xoptattrs.xoa_archive);
4951 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
4952 xvap.xva_xoptattrs.xoa_nodump);
4953 FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY,
4954 xvap.xva_xoptattrs.xoa_readonly);
4955 FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM,
4956 xvap.xva_xoptattrs.xoa_system);
4957 FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN,
4958 xvap.xva_xoptattrs.xoa_hidden);
4959 FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE,
4960 xvap.xva_xoptattrs.xoa_reparse);
4961 FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE,
4962 xvap.xva_xoptattrs.xoa_offline);
4963 FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE,
4964 xvap.xva_xoptattrs.xoa_sparse);
4965 #undef FLAG_CHANGE
4966 }
4967 if (vap->va_birthtime.tv_sec != VNOVAL) {
4968 xvap.xva_vattr.va_mask |= AT_XVATTR;
4969 XVA_SET_REQ(&xvap, XAT_CREATETIME);
4970 }
4971 return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred));
4972 }
4973
4974 #ifndef _SYS_SYSPROTO_H_
4975 struct vop_rename_args {
4976 struct vnode *a_fdvp;
4977 struct vnode *a_fvp;
4978 struct componentname *a_fcnp;
4979 struct vnode *a_tdvp;
4980 struct vnode *a_tvp;
4981 struct componentname *a_tcnp;
4982 };
4983 #endif
4984
4985 static int
zfs_freebsd_rename(struct vop_rename_args * ap)4986 zfs_freebsd_rename(struct vop_rename_args *ap)
4987 {
4988 vnode_t *fdvp = ap->a_fdvp;
4989 vnode_t *fvp = ap->a_fvp;
4990 vnode_t *tdvp = ap->a_tdvp;
4991 vnode_t *tvp = ap->a_tvp;
4992 int error;
4993
4994 #if __FreeBSD_version < 1400068
4995 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
4996 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
4997 #endif
4998
4999 error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
5000 ap->a_tcnp, ap->a_fcnp->cn_cred);
5001
5002 vrele(fdvp);
5003 vrele(fvp);
5004 vrele(tdvp);
5005 if (tvp != NULL)
5006 vrele(tvp);
5007
5008 return (error);
5009 }
5010
5011 #ifndef _SYS_SYSPROTO_H_
5012 struct vop_symlink_args {
5013 struct vnode *a_dvp;
5014 struct vnode **a_vpp;
5015 struct componentname *a_cnp;
5016 struct vattr *a_vap;
5017 char *a_target;
5018 };
5019 #endif
5020
5021 static int
zfs_freebsd_symlink(struct vop_symlink_args * ap)5022 zfs_freebsd_symlink(struct vop_symlink_args *ap)
5023 {
5024 struct componentname *cnp = ap->a_cnp;
5025 vattr_t *vap = ap->a_vap;
5026 znode_t *zp = NULL;
5027 #if __FreeBSD_version >= 1300139
5028 char *symlink;
5029 size_t symlink_len;
5030 #endif
5031 int rc;
5032
5033 #if __FreeBSD_version < 1400068
5034 ASSERT(cnp->cn_flags & SAVENAME);
5035 #endif
5036
5037 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */
5038 vattr_init_mask(vap);
5039 *ap->a_vpp = NULL;
5040
5041 rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap,
5042 ap->a_target, &zp, cnp->cn_cred, 0 /* flags */);
5043 if (rc == 0) {
5044 *ap->a_vpp = ZTOV(zp);
5045 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
5046 #if __FreeBSD_version >= 1300139
5047 MPASS(zp->z_cached_symlink == NULL);
5048 symlink_len = strlen(ap->a_target);
5049 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
5050 if (symlink != NULL) {
5051 memcpy(symlink, ap->a_target, symlink_len);
5052 symlink[symlink_len] = '\0';
5053 atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
5054 (uintptr_t)symlink);
5055 }
5056 #endif
5057 }
5058 return (rc);
5059 }
5060
5061 #ifndef _SYS_SYSPROTO_H_
5062 struct vop_readlink_args {
5063 struct vnode *a_vp;
5064 struct uio *a_uio;
5065 struct ucred *a_cred;
5066 };
5067 #endif
5068
5069 static int
zfs_freebsd_readlink(struct vop_readlink_args * ap)5070 zfs_freebsd_readlink(struct vop_readlink_args *ap)
5071 {
5072 zfs_uio_t uio;
5073 int error;
5074 #if __FreeBSD_version >= 1300139
5075 znode_t *zp = VTOZ(ap->a_vp);
5076 char *symlink, *base;
5077 size_t symlink_len;
5078 bool trycache;
5079 #endif
5080
5081 zfs_uio_init(&uio, ap->a_uio);
5082 #if __FreeBSD_version >= 1300139
5083 trycache = false;
5084 if (zfs_uio_segflg(&uio) == UIO_SYSSPACE &&
5085 zfs_uio_iovcnt(&uio) == 1) {
5086 base = zfs_uio_iovbase(&uio, 0);
5087 symlink_len = zfs_uio_iovlen(&uio, 0);
5088 trycache = true;
5089 }
5090 #endif
5091 error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL);
5092 #if __FreeBSD_version >= 1300139
5093 if (atomic_load_ptr(&zp->z_cached_symlink) != NULL ||
5094 error != 0 || !trycache) {
5095 return (error);
5096 }
5097 symlink_len -= zfs_uio_resid(&uio);
5098 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
5099 if (symlink != NULL) {
5100 memcpy(symlink, base, symlink_len);
5101 symlink[symlink_len] = '\0';
5102 if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
5103 (uintptr_t)NULL, (uintptr_t)symlink)) {
5104 cache_symlink_free(symlink, symlink_len + 1);
5105 }
5106 }
5107 #endif
5108 return (error);
5109 }
5110
5111 #ifndef _SYS_SYSPROTO_H_
5112 struct vop_link_args {
5113 struct vnode *a_tdvp;
5114 struct vnode *a_vp;
5115 struct componentname *a_cnp;
5116 };
5117 #endif
5118
5119 static int
zfs_freebsd_link(struct vop_link_args * ap)5120 zfs_freebsd_link(struct vop_link_args *ap)
5121 {
5122 struct componentname *cnp = ap->a_cnp;
5123 vnode_t *vp = ap->a_vp;
5124 vnode_t *tdvp = ap->a_tdvp;
5125
5126 if (tdvp->v_mount != vp->v_mount)
5127 return (EXDEV);
5128
5129 #if __FreeBSD_version < 1400068
5130 ASSERT(cnp->cn_flags & SAVENAME);
5131 #endif
5132
5133 return (zfs_link(VTOZ(tdvp), VTOZ(vp),
5134 cnp->cn_nameptr, cnp->cn_cred, 0));
5135 }
5136
5137 #ifndef _SYS_SYSPROTO_H_
5138 struct vop_inactive_args {
5139 struct vnode *a_vp;
5140 struct thread *a_td;
5141 };
5142 #endif
5143
5144 static int
zfs_freebsd_inactive(struct vop_inactive_args * ap)5145 zfs_freebsd_inactive(struct vop_inactive_args *ap)
5146 {
5147 vnode_t *vp = ap->a_vp;
5148
5149 #if __FreeBSD_version >= 1300123
5150 zfs_inactive(vp, curthread->td_ucred, NULL);
5151 #else
5152 zfs_inactive(vp, ap->a_td->td_ucred, NULL);
5153 #endif
5154 return (0);
5155 }
5156
5157 #if __FreeBSD_version >= 1300042
5158 #ifndef _SYS_SYSPROTO_H_
5159 struct vop_need_inactive_args {
5160 struct vnode *a_vp;
5161 struct thread *a_td;
5162 };
5163 #endif
5164
5165 static int
zfs_freebsd_need_inactive(struct vop_need_inactive_args * ap)5166 zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap)
5167 {
5168 vnode_t *vp = ap->a_vp;
5169 znode_t *zp = VTOZ(vp);
5170 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5171 int need;
5172
5173 if (vn_need_pageq_flush(vp))
5174 return (1);
5175
5176 if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs))
5177 return (1);
5178 need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty);
5179 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5180
5181 return (need);
5182 }
5183 #endif
5184
5185 #ifndef _SYS_SYSPROTO_H_
5186 struct vop_reclaim_args {
5187 struct vnode *a_vp;
5188 struct thread *a_td;
5189 };
5190 #endif
5191
5192 static int
zfs_freebsd_reclaim(struct vop_reclaim_args * ap)5193 zfs_freebsd_reclaim(struct vop_reclaim_args *ap)
5194 {
5195 vnode_t *vp = ap->a_vp;
5196 znode_t *zp = VTOZ(vp);
5197 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5198
5199 ASSERT3P(zp, !=, NULL);
5200
5201 #if __FreeBSD_version < 1300042
5202 /* Destroy the vm object and flush associated pages. */
5203 vnode_destroy_vobject(vp);
5204 #endif
5205 /*
5206 * z_teardown_inactive_lock protects from a race with
5207 * zfs_znode_dmu_fini in zfsvfs_teardown during
5208 * force unmount.
5209 */
5210 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
5211 if (zp->z_sa_hdl == NULL)
5212 zfs_znode_free(zp);
5213 else
5214 zfs_zinactive(zp);
5215 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5216
5217 vp->v_data = NULL;
5218 return (0);
5219 }
5220
5221 #ifndef _SYS_SYSPROTO_H_
5222 struct vop_fid_args {
5223 struct vnode *a_vp;
5224 struct fid *a_fid;
5225 };
5226 #endif
5227
5228 static int
zfs_freebsd_fid(struct vop_fid_args * ap)5229 zfs_freebsd_fid(struct vop_fid_args *ap)
5230 {
5231
5232 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
5233 }
5234
5235
5236 #ifndef _SYS_SYSPROTO_H_
5237 struct vop_pathconf_args {
5238 struct vnode *a_vp;
5239 int a_name;
5240 register_t *a_retval;
5241 } *ap;
5242 #endif
5243
5244 static int
zfs_freebsd_pathconf(struct vop_pathconf_args * ap)5245 zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
5246 {
5247 ulong_t val;
5248 int error;
5249
5250 error = zfs_pathconf(ap->a_vp, ap->a_name, &val,
5251 curthread->td_ucred, NULL);
5252 if (error == 0) {
5253 *ap->a_retval = val;
5254 return (error);
5255 }
5256 if (error != EOPNOTSUPP)
5257 return (error);
5258
5259 switch (ap->a_name) {
5260 case _PC_NAME_MAX:
5261 *ap->a_retval = NAME_MAX;
5262 return (0);
5263 #if __FreeBSD_version >= 1400032
5264 case _PC_DEALLOC_PRESENT:
5265 *ap->a_retval = 1;
5266 return (0);
5267 #endif
5268 case _PC_PIPE_BUF:
5269 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
5270 *ap->a_retval = PIPE_BUF;
5271 return (0);
5272 }
5273 return (EINVAL);
5274 default:
5275 return (vop_stdpathconf(ap));
5276 }
5277 }
5278
5279 /*
5280 * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
5281 * extended attribute name:
5282 *
5283 * NAMESPACE PREFIX
5284 * system freebsd:system:
5285 * user (none, can be used to access ZFS fsattr(5) attributes
5286 * created on Solaris)
5287 */
5288 static int
zfs_create_attrname(int attrnamespace,const char * name,char * attrname,size_t size)5289 zfs_create_attrname(int attrnamespace, const char *name, char *attrname,
5290 size_t size)
5291 {
5292 const char *namespace, *prefix, *suffix;
5293
5294 /* We don't allow '/' character in attribute name. */
5295 if (strchr(name, '/') != NULL)
5296 return (SET_ERROR(EINVAL));
5297 /* We don't allow attribute names that start with "freebsd:" string. */
5298 if (strncmp(name, "freebsd:", 8) == 0)
5299 return (SET_ERROR(EINVAL));
5300
5301 bzero(attrname, size);
5302
5303 switch (attrnamespace) {
5304 case EXTATTR_NAMESPACE_USER:
5305 #if 0
5306 prefix = "freebsd:";
5307 namespace = EXTATTR_NAMESPACE_USER_STRING;
5308 suffix = ":";
5309 #else
5310 /*
5311 * This is the default namespace by which we can access all
5312 * attributes created on Solaris.
5313 */
5314 prefix = namespace = suffix = "";
5315 #endif
5316 break;
5317 case EXTATTR_NAMESPACE_SYSTEM:
5318 prefix = "freebsd:";
5319 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING;
5320 suffix = ":";
5321 break;
5322 case EXTATTR_NAMESPACE_EMPTY:
5323 default:
5324 return (SET_ERROR(EINVAL));
5325 }
5326 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix,
5327 name) >= size) {
5328 return (SET_ERROR(ENAMETOOLONG));
5329 }
5330 return (0);
5331 }
5332
5333 static int
zfs_ensure_xattr_cached(znode_t * zp)5334 zfs_ensure_xattr_cached(znode_t *zp)
5335 {
5336 int error = 0;
5337
5338 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5339
5340 if (zp->z_xattr_cached != NULL)
5341 return (0);
5342
5343 if (rw_write_held(&zp->z_xattr_lock))
5344 return (zfs_sa_get_xattr(zp));
5345
5346 if (!rw_tryupgrade(&zp->z_xattr_lock)) {
5347 rw_exit(&zp->z_xattr_lock);
5348 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5349 }
5350 if (zp->z_xattr_cached == NULL)
5351 error = zfs_sa_get_xattr(zp);
5352 rw_downgrade(&zp->z_xattr_lock);
5353 return (error);
5354 }
5355
5356 #ifndef _SYS_SYSPROTO_H_
5357 struct vop_getextattr {
5358 IN struct vnode *a_vp;
5359 IN int a_attrnamespace;
5360 IN const char *a_name;
5361 INOUT struct uio *a_uio;
5362 OUT size_t *a_size;
5363 IN struct ucred *a_cred;
5364 IN struct thread *a_td;
5365 };
5366 #endif
5367
5368 static int
zfs_getextattr_dir(struct vop_getextattr_args * ap,const char * attrname)5369 zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname)
5370 {
5371 struct thread *td = ap->a_td;
5372 struct nameidata nd;
5373 struct vattr va;
5374 vnode_t *xvp = NULL, *vp;
5375 int error, flags;
5376
5377 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5378 LOOKUP_XATTR, B_FALSE);
5379 if (error != 0)
5380 return (error);
5381
5382 flags = FREAD;
5383 #if __FreeBSD_version < 1400043
5384 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
5385 xvp, td);
5386 #else
5387 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5388 #endif
5389 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL);
5390 if (error != 0)
5391 return (error);
5392 vp = nd.ni_vp;
5393 NDFREE_PNBUF(&nd);
5394
5395 if (ap->a_size != NULL) {
5396 error = VOP_GETATTR(vp, &va, ap->a_cred);
5397 if (error == 0)
5398 *ap->a_size = (size_t)va.va_size;
5399 } else if (ap->a_uio != NULL)
5400 error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5401
5402 VOP_UNLOCK1(vp);
5403 vn_close(vp, flags, ap->a_cred, td);
5404 return (error);
5405 }
5406
5407 static int
zfs_getextattr_sa(struct vop_getextattr_args * ap,const char * attrname)5408 zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname)
5409 {
5410 znode_t *zp = VTOZ(ap->a_vp);
5411 uchar_t *nv_value;
5412 uint_t nv_size;
5413 int error;
5414
5415 error = zfs_ensure_xattr_cached(zp);
5416 if (error != 0)
5417 return (error);
5418
5419 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5420 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5421
5422 error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname,
5423 &nv_value, &nv_size);
5424 if (error)
5425 return (error);
5426
5427 if (ap->a_size != NULL)
5428 *ap->a_size = nv_size;
5429 else if (ap->a_uio != NULL)
5430 error = uiomove(nv_value, nv_size, ap->a_uio);
5431
5432 return (error);
5433 }
5434
5435 /*
5436 * Vnode operation to retrieve a named extended attribute.
5437 */
5438 static int
zfs_getextattr(struct vop_getextattr_args * ap)5439 zfs_getextattr(struct vop_getextattr_args *ap)
5440 {
5441 znode_t *zp = VTOZ(ap->a_vp);
5442 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5443 char attrname[EXTATTR_MAXNAMELEN+1];
5444 int error;
5445
5446 /*
5447 * If the xattr property is off, refuse the request.
5448 */
5449 if (!(zfsvfs->z_flags & ZSB_XATTR))
5450 return (SET_ERROR(EOPNOTSUPP));
5451
5452 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5453 ap->a_cred, ap->a_td, VREAD);
5454 if (error != 0)
5455 return (error);
5456
5457 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5458 sizeof (attrname));
5459 if (error != 0)
5460 return (error);
5461
5462 error = ENOENT;
5463 ZFS_ENTER(zfsvfs);
5464 ZFS_VERIFY_ZP(zp);
5465 rw_enter(&zp->z_xattr_lock, RW_READER);
5466 if (zfsvfs->z_use_sa && zp->z_is_sa)
5467 error = zfs_getextattr_sa(ap, attrname);
5468 if (error == ENOENT)
5469 error = zfs_getextattr_dir(ap, attrname);
5470 rw_exit(&zp->z_xattr_lock);
5471 ZFS_EXIT(zfsvfs);
5472 if (error == ENOENT)
5473 error = SET_ERROR(ENOATTR);
5474 return (error);
5475 }
5476
5477 #ifndef _SYS_SYSPROTO_H_
5478 struct vop_deleteextattr {
5479 IN struct vnode *a_vp;
5480 IN int a_attrnamespace;
5481 IN const char *a_name;
5482 IN struct ucred *a_cred;
5483 IN struct thread *a_td;
5484 };
5485 #endif
5486
5487 static int
zfs_deleteextattr_dir(struct vop_deleteextattr_args * ap,const char * attrname)5488 zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname)
5489 {
5490 struct nameidata nd;
5491 vnode_t *xvp = NULL, *vp;
5492 int error;
5493
5494 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5495 LOOKUP_XATTR, B_FALSE);
5496 if (error != 0)
5497 return (error);
5498
5499 #if __FreeBSD_version < 1400043
5500 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5501 UIO_SYSSPACE, attrname, xvp, ap->a_td);
5502 #else
5503 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5504 UIO_SYSSPACE, attrname, xvp);
5505 #endif
5506 error = namei(&nd);
5507 if (error != 0)
5508 return (error);
5509
5510 vp = nd.ni_vp;
5511 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
5512 NDFREE_PNBUF(&nd);
5513
5514 vput(nd.ni_dvp);
5515 if (vp == nd.ni_dvp)
5516 vrele(vp);
5517 else
5518 vput(vp);
5519
5520 return (error);
5521 }
5522
5523 static int
zfs_deleteextattr_sa(struct vop_deleteextattr_args * ap,const char * attrname)5524 zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname)
5525 {
5526 znode_t *zp = VTOZ(ap->a_vp);
5527 nvlist_t *nvl;
5528 int error;
5529
5530 error = zfs_ensure_xattr_cached(zp);
5531 if (error != 0)
5532 return (error);
5533
5534 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5535 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5536
5537 nvl = zp->z_xattr_cached;
5538 error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY);
5539 if (error == 0)
5540 error = zfs_sa_set_xattr(zp);
5541 if (error != 0) {
5542 zp->z_xattr_cached = NULL;
5543 nvlist_free(nvl);
5544 }
5545 return (error);
5546 }
5547
5548 /*
5549 * Vnode operation to remove a named attribute.
5550 */
5551 static int
zfs_deleteextattr(struct vop_deleteextattr_args * ap)5552 zfs_deleteextattr(struct vop_deleteextattr_args *ap)
5553 {
5554 znode_t *zp = VTOZ(ap->a_vp);
5555 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5556 char attrname[EXTATTR_MAXNAMELEN+1];
5557 int error;
5558
5559 /*
5560 * If the xattr property is off, refuse the request.
5561 */
5562 if (!(zfsvfs->z_flags & ZSB_XATTR))
5563 return (SET_ERROR(EOPNOTSUPP));
5564
5565 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5566 ap->a_cred, ap->a_td, VWRITE);
5567 if (error != 0)
5568 return (error);
5569
5570 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5571 sizeof (attrname));
5572 if (error != 0)
5573 return (error);
5574
5575 size_t size = 0;
5576 struct vop_getextattr_args vga = {
5577 .a_vp = ap->a_vp,
5578 .a_size = &size,
5579 .a_cred = ap->a_cred,
5580 .a_td = ap->a_td,
5581 };
5582 error = ENOENT;
5583 ZFS_ENTER(zfsvfs);
5584 ZFS_VERIFY_ZP(zp);
5585 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5586 if (zfsvfs->z_use_sa && zp->z_is_sa) {
5587 error = zfs_getextattr_sa(&vga, attrname);
5588 if (error == 0)
5589 error = zfs_deleteextattr_sa(ap, attrname);
5590 }
5591 if (error == ENOENT) {
5592 error = zfs_getextattr_dir(&vga, attrname);
5593 if (error == 0)
5594 error = zfs_deleteextattr_dir(ap, attrname);
5595 }
5596 rw_exit(&zp->z_xattr_lock);
5597 ZFS_EXIT(zfsvfs);
5598 if (error == ENOENT)
5599 error = SET_ERROR(ENOATTR);
5600 return (error);
5601 }
5602
5603 #ifndef _SYS_SYSPROTO_H_
5604 struct vop_setextattr {
5605 IN struct vnode *a_vp;
5606 IN int a_attrnamespace;
5607 IN const char *a_name;
5608 INOUT struct uio *a_uio;
5609 IN struct ucred *a_cred;
5610 IN struct thread *a_td;
5611 };
5612 #endif
5613
5614 static int
zfs_setextattr_dir(struct vop_setextattr_args * ap,const char * attrname)5615 zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname)
5616 {
5617 struct thread *td = ap->a_td;
5618 struct nameidata nd;
5619 struct vattr va;
5620 vnode_t *xvp = NULL, *vp;
5621 int error, flags;
5622
5623 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5624 LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE);
5625 if (error != 0)
5626 return (error);
5627
5628 flags = FFLAGS(O_WRONLY | O_CREAT);
5629 #if __FreeBSD_version < 1400043
5630 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp, td);
5631 #else
5632 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5633 #endif
5634 error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred,
5635 NULL);
5636 if (error != 0)
5637 return (error);
5638 vp = nd.ni_vp;
5639 NDFREE_PNBUF(&nd);
5640
5641 VATTR_NULL(&va);
5642 va.va_size = 0;
5643 error = VOP_SETATTR(vp, &va, ap->a_cred);
5644 if (error == 0)
5645 VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5646
5647 VOP_UNLOCK1(vp);
5648 vn_close(vp, flags, ap->a_cred, td);
5649 return (error);
5650 }
5651
5652 static int
zfs_setextattr_sa(struct vop_setextattr_args * ap,const char * attrname)5653 zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname)
5654 {
5655 znode_t *zp = VTOZ(ap->a_vp);
5656 nvlist_t *nvl;
5657 size_t sa_size;
5658 int error;
5659
5660 error = zfs_ensure_xattr_cached(zp);
5661 if (error != 0)
5662 return (error);
5663
5664 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5665 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5666
5667 nvl = zp->z_xattr_cached;
5668 size_t entry_size = ap->a_uio->uio_resid;
5669 if (entry_size > DXATTR_MAX_ENTRY_SIZE)
5670 return (SET_ERROR(EFBIG));
5671 error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
5672 if (error != 0)
5673 return (error);
5674 if (sa_size > DXATTR_MAX_SA_SIZE)
5675 return (SET_ERROR(EFBIG));
5676 uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP);
5677 error = uiomove(buf, entry_size, ap->a_uio);
5678 if (error == 0)
5679 error = nvlist_add_byte_array(nvl, attrname, buf, entry_size);
5680 kmem_free(buf, entry_size);
5681 if (error == 0)
5682 error = zfs_sa_set_xattr(zp);
5683 if (error != 0) {
5684 zp->z_xattr_cached = NULL;
5685 nvlist_free(nvl);
5686 }
5687 return (error);
5688 }
5689
5690 /*
5691 * Vnode operation to set a named attribute.
5692 */
5693 static int
zfs_setextattr(struct vop_setextattr_args * ap)5694 zfs_setextattr(struct vop_setextattr_args *ap)
5695 {
5696 znode_t *zp = VTOZ(ap->a_vp);
5697 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5698 char attrname[EXTATTR_MAXNAMELEN+1];
5699 int error;
5700
5701 /*
5702 * If the xattr property is off, refuse the request.
5703 */
5704 if (!(zfsvfs->z_flags & ZSB_XATTR))
5705 return (SET_ERROR(EOPNOTSUPP));
5706
5707 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5708 ap->a_cred, ap->a_td, VWRITE);
5709 if (error != 0)
5710 return (error);
5711
5712 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5713 sizeof (attrname));
5714 if (error != 0)
5715 return (error);
5716
5717 struct vop_deleteextattr_args vda = {
5718 .a_vp = ap->a_vp,
5719 .a_cred = ap->a_cred,
5720 .a_td = ap->a_td,
5721 };
5722 error = ENOENT;
5723 ZFS_ENTER(zfsvfs);
5724 ZFS_VERIFY_ZP(zp);
5725 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5726 if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) {
5727 error = zfs_setextattr_sa(ap, attrname);
5728 if (error == 0)
5729 /*
5730 * Successfully put into SA, we need to clear the one
5731 * in dir if present.
5732 */
5733 zfs_deleteextattr_dir(&vda, attrname);
5734 }
5735 if (error) {
5736 error = zfs_setextattr_dir(ap, attrname);
5737 if (error == 0 && zp->z_is_sa)
5738 /*
5739 * Successfully put into dir, we need to clear the one
5740 * in SA if present.
5741 */
5742 zfs_deleteextattr_sa(&vda, attrname);
5743 }
5744 rw_exit(&zp->z_xattr_lock);
5745 ZFS_EXIT(zfsvfs);
5746 return (error);
5747 }
5748
5749 #ifndef _SYS_SYSPROTO_H_
5750 struct vop_listextattr {
5751 IN struct vnode *a_vp;
5752 IN int a_attrnamespace;
5753 INOUT struct uio *a_uio;
5754 OUT size_t *a_size;
5755 IN struct ucred *a_cred;
5756 IN struct thread *a_td;
5757 };
5758 #endif
5759
5760 static int
zfs_listextattr_dir(struct vop_listextattr_args * ap,const char * attrprefix)5761 zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix)
5762 {
5763 struct thread *td = ap->a_td;
5764 struct nameidata nd;
5765 uint8_t dirbuf[sizeof (struct dirent)];
5766 struct iovec aiov;
5767 struct uio auio;
5768 vnode_t *xvp = NULL, *vp;
5769 int error, eof;
5770
5771 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5772 LOOKUP_XATTR, B_FALSE);
5773 if (error != 0) {
5774 /*
5775 * ENOATTR means that the EA directory does not yet exist,
5776 * i.e. there are no extended attributes there.
5777 */
5778 if (error == ENOATTR)
5779 error = 0;
5780 return (error);
5781 }
5782
5783 #if __FreeBSD_version < 1400043
5784 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5785 UIO_SYSSPACE, ".", xvp, td);
5786 #else
5787 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5788 UIO_SYSSPACE, ".", xvp);
5789 #endif
5790 error = namei(&nd);
5791 if (error != 0)
5792 return (error);
5793 vp = nd.ni_vp;
5794 NDFREE_PNBUF(&nd);
5795
5796 auio.uio_iov = &aiov;
5797 auio.uio_iovcnt = 1;
5798 auio.uio_segflg = UIO_SYSSPACE;
5799 auio.uio_td = td;
5800 auio.uio_rw = UIO_READ;
5801 auio.uio_offset = 0;
5802
5803 size_t plen = strlen(attrprefix);
5804
5805 do {
5806 aiov.iov_base = (void *)dirbuf;
5807 aiov.iov_len = sizeof (dirbuf);
5808 auio.uio_resid = sizeof (dirbuf);
5809 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL);
5810 if (error != 0)
5811 break;
5812 int done = sizeof (dirbuf) - auio.uio_resid;
5813 for (int pos = 0; pos < done; ) {
5814 struct dirent *dp = (struct dirent *)(dirbuf + pos);
5815 pos += dp->d_reclen;
5816 /*
5817 * XXX: Temporarily we also accept DT_UNKNOWN, as this
5818 * is what we get when attribute was created on Solaris.
5819 */
5820 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN)
5821 continue;
5822 else if (plen == 0 &&
5823 strncmp(dp->d_name, "freebsd:", 8) == 0)
5824 continue;
5825 else if (strncmp(dp->d_name, attrprefix, plen) != 0)
5826 continue;
5827 uint8_t nlen = dp->d_namlen - plen;
5828 if (ap->a_size != NULL) {
5829 *ap->a_size += 1 + nlen;
5830 } else if (ap->a_uio != NULL) {
5831 /*
5832 * Format of extattr name entry is one byte for
5833 * length and the rest for name.
5834 */
5835 error = uiomove(&nlen, 1, ap->a_uio);
5836 if (error == 0) {
5837 char *namep = dp->d_name + plen;
5838 error = uiomove(namep, nlen, ap->a_uio);
5839 }
5840 if (error != 0)
5841 break;
5842 }
5843 }
5844 } while (!eof && error == 0);
5845
5846 vput(vp);
5847 return (error);
5848 }
5849
5850 static int
zfs_listextattr_sa(struct vop_listextattr_args * ap,const char * attrprefix)5851 zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix)
5852 {
5853 znode_t *zp = VTOZ(ap->a_vp);
5854 int error;
5855
5856 error = zfs_ensure_xattr_cached(zp);
5857 if (error != 0)
5858 return (error);
5859
5860 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5861 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5862
5863 size_t plen = strlen(attrprefix);
5864 nvpair_t *nvp = NULL;
5865 while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
5866 ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
5867
5868 const char *name = nvpair_name(nvp);
5869 if (plen == 0 && strncmp(name, "freebsd:", 8) == 0)
5870 continue;
5871 else if (strncmp(name, attrprefix, plen) != 0)
5872 continue;
5873 uint8_t nlen = strlen(name) - plen;
5874 if (ap->a_size != NULL) {
5875 *ap->a_size += 1 + nlen;
5876 } else if (ap->a_uio != NULL) {
5877 /*
5878 * Format of extattr name entry is one byte for
5879 * length and the rest for name.
5880 */
5881 error = uiomove(&nlen, 1, ap->a_uio);
5882 if (error == 0) {
5883 char *namep = __DECONST(char *, name) + plen;
5884 error = uiomove(namep, nlen, ap->a_uio);
5885 }
5886 if (error != 0)
5887 break;
5888 }
5889 }
5890
5891 return (error);
5892 }
5893
5894 /*
5895 * Vnode operation to retrieve extended attributes on a vnode.
5896 */
5897 static int
zfs_listextattr(struct vop_listextattr_args * ap)5898 zfs_listextattr(struct vop_listextattr_args *ap)
5899 {
5900 znode_t *zp = VTOZ(ap->a_vp);
5901 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5902 char attrprefix[16];
5903 int error;
5904
5905 if (ap->a_size != NULL)
5906 *ap->a_size = 0;
5907
5908 /*
5909 * If the xattr property is off, refuse the request.
5910 */
5911 if (!(zfsvfs->z_flags & ZSB_XATTR))
5912 return (SET_ERROR(EOPNOTSUPP));
5913
5914 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5915 ap->a_cred, ap->a_td, VREAD);
5916 if (error != 0)
5917 return (error);
5918
5919 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
5920 sizeof (attrprefix));
5921 if (error != 0)
5922 return (error);
5923
5924 ZFS_ENTER(zfsvfs);
5925 ZFS_VERIFY_ZP(zp);
5926 rw_enter(&zp->z_xattr_lock, RW_READER);
5927 if (zfsvfs->z_use_sa && zp->z_is_sa)
5928 error = zfs_listextattr_sa(ap, attrprefix);
5929 if (error == 0)
5930 error = zfs_listextattr_dir(ap, attrprefix);
5931 rw_exit(&zp->z_xattr_lock);
5932 ZFS_EXIT(zfsvfs);
5933 return (error);
5934 }
5935
5936 #ifndef _SYS_SYSPROTO_H_
5937 struct vop_getacl_args {
5938 struct vnode *vp;
5939 acl_type_t type;
5940 struct acl *aclp;
5941 struct ucred *cred;
5942 struct thread *td;
5943 };
5944 #endif
5945
5946 static int
zfs_freebsd_getacl(struct vop_getacl_args * ap)5947 zfs_freebsd_getacl(struct vop_getacl_args *ap)
5948 {
5949 int error;
5950 vsecattr_t vsecattr;
5951
5952 if (ap->a_type != ACL_TYPE_NFS4)
5953 return (EINVAL);
5954
5955 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
5956 if ((error = zfs_getsecattr(VTOZ(ap->a_vp),
5957 &vsecattr, 0, ap->a_cred)))
5958 return (error);
5959
5960 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp,
5961 vsecattr.vsa_aclcnt);
5962 if (vsecattr.vsa_aclentp != NULL)
5963 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz);
5964
5965 return (error);
5966 }
5967
5968 #ifndef _SYS_SYSPROTO_H_
5969 struct vop_setacl_args {
5970 struct vnode *vp;
5971 acl_type_t type;
5972 struct acl *aclp;
5973 struct ucred *cred;
5974 struct thread *td;
5975 };
5976 #endif
5977
5978 static int
zfs_freebsd_setacl(struct vop_setacl_args * ap)5979 zfs_freebsd_setacl(struct vop_setacl_args *ap)
5980 {
5981 int error;
5982 vsecattr_t vsecattr;
5983 int aclbsize; /* size of acl list in bytes */
5984 aclent_t *aaclp;
5985
5986 if (ap->a_type != ACL_TYPE_NFS4)
5987 return (EINVAL);
5988
5989 if (ap->a_aclp == NULL)
5990 return (EINVAL);
5991
5992 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
5993 return (EINVAL);
5994
5995 /*
5996 * With NFSv4 ACLs, chmod(2) may need to add additional entries,
5997 * splitting every entry into two and appending "canonical six"
5998 * entries at the end. Don't allow for setting an ACL that would
5999 * cause chmod(2) to run out of ACL entries.
6000 */
6001 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES)
6002 return (ENOSPC);
6003
6004 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR);
6005 if (error != 0)
6006 return (error);
6007
6008 vsecattr.vsa_mask = VSA_ACE;
6009 aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t);
6010 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP);
6011 aaclp = vsecattr.vsa_aclentp;
6012 vsecattr.vsa_aclentsz = aclbsize;
6013
6014 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp);
6015 error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred);
6016 kmem_free(aaclp, aclbsize);
6017
6018 return (error);
6019 }
6020
6021 #ifndef _SYS_SYSPROTO_H_
6022 struct vop_aclcheck_args {
6023 struct vnode *vp;
6024 acl_type_t type;
6025 struct acl *aclp;
6026 struct ucred *cred;
6027 struct thread *td;
6028 };
6029 #endif
6030
6031 static int
zfs_freebsd_aclcheck(struct vop_aclcheck_args * ap)6032 zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap)
6033 {
6034
6035 return (EOPNOTSUPP);
6036 }
6037
6038 static int
zfs_vptocnp(struct vop_vptocnp_args * ap)6039 zfs_vptocnp(struct vop_vptocnp_args *ap)
6040 {
6041 vnode_t *covered_vp;
6042 vnode_t *vp = ap->a_vp;
6043 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
6044 znode_t *zp = VTOZ(vp);
6045 int ltype;
6046 int error;
6047
6048 ZFS_ENTER(zfsvfs);
6049 ZFS_VERIFY_ZP(zp);
6050
6051 /*
6052 * If we are a snapshot mounted under .zfs, run the operation
6053 * on the covered vnode.
6054 */
6055 if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) {
6056 char name[MAXNAMLEN + 1];
6057 znode_t *dzp;
6058 size_t len;
6059
6060 error = zfs_znode_parent_and_name(zp, &dzp, name);
6061 if (error == 0) {
6062 len = strlen(name);
6063 if (*ap->a_buflen < len)
6064 error = SET_ERROR(ENOMEM);
6065 }
6066 if (error == 0) {
6067 *ap->a_buflen -= len;
6068 bcopy(name, ap->a_buf + *ap->a_buflen, len);
6069 *ap->a_vpp = ZTOV(dzp);
6070 }
6071 ZFS_EXIT(zfsvfs);
6072 return (error);
6073 }
6074 ZFS_EXIT(zfsvfs);
6075
6076 covered_vp = vp->v_mount->mnt_vnodecovered;
6077 #if __FreeBSD_version >= 1300045
6078 enum vgetstate vs = vget_prep(covered_vp);
6079 #else
6080 vhold(covered_vp);
6081 #endif
6082 ltype = VOP_ISLOCKED(vp);
6083 VOP_UNLOCK1(vp);
6084 #if __FreeBSD_version >= 1300045
6085 error = vget_finish(covered_vp, LK_SHARED, vs);
6086 #else
6087 error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread);
6088 #endif
6089 if (error == 0) {
6090 #if __FreeBSD_version >= 1300123
6091 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf,
6092 ap->a_buflen);
6093 #else
6094 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred,
6095 ap->a_buf, ap->a_buflen);
6096 #endif
6097 vput(covered_vp);
6098 }
6099 vn_lock(vp, ltype | LK_RETRY);
6100 if (VN_IS_DOOMED(vp))
6101 error = SET_ERROR(ENOENT);
6102 return (error);
6103 }
6104
6105 #if __FreeBSD_version >= 1400032
6106 static int
zfs_deallocate(struct vop_deallocate_args * ap)6107 zfs_deallocate(struct vop_deallocate_args *ap)
6108 {
6109 znode_t *zp = VTOZ(ap->a_vp);
6110 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
6111 zilog_t *zilog;
6112 off_t off, len, file_sz;
6113 int error;
6114
6115 ZFS_ENTER(zfsvfs);
6116 ZFS_VERIFY_ZP(zp);
6117
6118 /*
6119 * Callers might not be able to detect properly that we are read-only,
6120 * so check it explicitly here.
6121 */
6122 if (zfs_is_readonly(zfsvfs)) {
6123 ZFS_EXIT(zfsvfs);
6124 return (SET_ERROR(EROFS));
6125 }
6126
6127 zilog = zfsvfs->z_log;
6128 off = *ap->a_offset;
6129 len = *ap->a_len;
6130 file_sz = zp->z_size;
6131 if (off + len > file_sz)
6132 len = file_sz - off;
6133 /* Fast path for out-of-range request. */
6134 if (len <= 0) {
6135 *ap->a_len = 0;
6136 ZFS_EXIT(zfsvfs);
6137 return (0);
6138 }
6139
6140 error = zfs_freesp(zp, off, len, O_RDWR, TRUE);
6141 if (error == 0) {
6142 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS ||
6143 (ap->a_ioflag & IO_SYNC) != 0)
6144 zil_commit(zilog, zp->z_id);
6145 *ap->a_offset = off + len;
6146 *ap->a_len = 0;
6147 }
6148
6149 ZFS_EXIT(zfsvfs);
6150 return (error);
6151 }
6152 #endif
6153
6154 struct vop_vector zfs_vnodeops;
6155 struct vop_vector zfs_fifoops;
6156 struct vop_vector zfs_shareops;
6157
6158 struct vop_vector zfs_vnodeops = {
6159 .vop_default = &default_vnodeops,
6160 .vop_inactive = zfs_freebsd_inactive,
6161 #if __FreeBSD_version >= 1300042
6162 .vop_need_inactive = zfs_freebsd_need_inactive,
6163 #endif
6164 .vop_reclaim = zfs_freebsd_reclaim,
6165 #if __FreeBSD_version >= 1300102
6166 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
6167 #endif
6168 #if __FreeBSD_version >= 1300139
6169 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6170 #endif
6171 .vop_access = zfs_freebsd_access,
6172 .vop_allocate = VOP_EINVAL,
6173 #if __FreeBSD_version >= 1400032
6174 .vop_deallocate = zfs_deallocate,
6175 #endif
6176 .vop_lookup = zfs_cache_lookup,
6177 .vop_cachedlookup = zfs_freebsd_cachedlookup,
6178 .vop_getattr = zfs_freebsd_getattr,
6179 .vop_setattr = zfs_freebsd_setattr,
6180 .vop_create = zfs_freebsd_create,
6181 .vop_mknod = (vop_mknod_t *)zfs_freebsd_create,
6182 .vop_mkdir = zfs_freebsd_mkdir,
6183 .vop_readdir = zfs_freebsd_readdir,
6184 .vop_fsync = zfs_freebsd_fsync,
6185 .vop_open = zfs_freebsd_open,
6186 .vop_close = zfs_freebsd_close,
6187 .vop_rmdir = zfs_freebsd_rmdir,
6188 .vop_ioctl = zfs_freebsd_ioctl,
6189 .vop_link = zfs_freebsd_link,
6190 .vop_symlink = zfs_freebsd_symlink,
6191 .vop_readlink = zfs_freebsd_readlink,
6192 .vop_read = zfs_freebsd_read,
6193 .vop_write = zfs_freebsd_write,
6194 .vop_remove = zfs_freebsd_remove,
6195 .vop_rename = zfs_freebsd_rename,
6196 .vop_pathconf = zfs_freebsd_pathconf,
6197 .vop_bmap = zfs_freebsd_bmap,
6198 .vop_fid = zfs_freebsd_fid,
6199 .vop_getextattr = zfs_getextattr,
6200 .vop_deleteextattr = zfs_deleteextattr,
6201 .vop_setextattr = zfs_setextattr,
6202 .vop_listextattr = zfs_listextattr,
6203 .vop_getacl = zfs_freebsd_getacl,
6204 .vop_setacl = zfs_freebsd_setacl,
6205 .vop_aclcheck = zfs_freebsd_aclcheck,
6206 .vop_getpages = zfs_freebsd_getpages,
6207 .vop_putpages = zfs_freebsd_putpages,
6208 .vop_vptocnp = zfs_vptocnp,
6209 #if __FreeBSD_version >= 1300064
6210 .vop_lock1 = vop_lock,
6211 .vop_unlock = vop_unlock,
6212 .vop_islocked = vop_islocked,
6213 #endif
6214 #if __FreeBSD_version >= 1400043
6215 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6216 #endif
6217 };
6218 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);
6219
6220 struct vop_vector zfs_fifoops = {
6221 .vop_default = &fifo_specops,
6222 .vop_fsync = zfs_freebsd_fsync,
6223 #if __FreeBSD_version >= 1300102
6224 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
6225 #endif
6226 #if __FreeBSD_version >= 1300139
6227 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6228 #endif
6229 .vop_access = zfs_freebsd_access,
6230 .vop_getattr = zfs_freebsd_getattr,
6231 .vop_inactive = zfs_freebsd_inactive,
6232 .vop_read = VOP_PANIC,
6233 .vop_reclaim = zfs_freebsd_reclaim,
6234 .vop_setattr = zfs_freebsd_setattr,
6235 .vop_write = VOP_PANIC,
6236 .vop_pathconf = zfs_freebsd_pathconf,
6237 .vop_fid = zfs_freebsd_fid,
6238 .vop_getacl = zfs_freebsd_getacl,
6239 .vop_setacl = zfs_freebsd_setacl,
6240 .vop_aclcheck = zfs_freebsd_aclcheck,
6241 #if __FreeBSD_version >= 1400043
6242 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6243 #endif
6244 };
6245 VFS_VOP_VECTOR_REGISTER(zfs_fifoops);
6246
6247 /*
6248 * special share hidden files vnode operations template
6249 */
6250 struct vop_vector zfs_shareops = {
6251 .vop_default = &default_vnodeops,
6252 #if __FreeBSD_version >= 1300121
6253 .vop_fplookup_vexec = VOP_EAGAIN,
6254 #endif
6255 #if __FreeBSD_version >= 1300139
6256 .vop_fplookup_symlink = VOP_EAGAIN,
6257 #endif
6258 .vop_access = zfs_freebsd_access,
6259 .vop_inactive = zfs_freebsd_inactive,
6260 .vop_reclaim = zfs_freebsd_reclaim,
6261 .vop_fid = zfs_freebsd_fid,
6262 .vop_pathconf = zfs_freebsd_pathconf,
6263 #if __FreeBSD_version >= 1400043
6264 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6265 #endif
6266 };
6267 VFS_VOP_VECTOR_REGISTER(zfs_shareops);
6268