1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/time.h>
29 #include <sys/systm.h>
30 #include <sys/sysmacros.h>
31 #include <sys/resource.h>
32 #include <sys/vfs.h>
33 #include <sys/vnode.h>
34 #include <sys/file.h>
35 #include <sys/kmem.h>
36 #include <sys/uio.h>
37 #include <sys/cmn_err.h>
38 #include <sys/errno.h>
39 #include <sys/stat.h>
40 #include <sys/unistd.h>
41 #include <sys/sunddi.h>
42 #include <sys/random.h>
43 #include <sys/policy.h>
44 #ifdef __FreeBSD__
45 #include <sys/kcondvar.h>
46 #include <sys/callb.h>
47 #include <sys/smp.h>
48 #endif
49 #include <sys/zfs_dir.h>
50 #include <sys/zfs_acl.h>
51 #include <sys/fs/zfs.h>
52 #include <sys/zap.h>
53 #include <sys/dmu.h>
54 #include <sys/atomic.h>
55 #include <sys/zfs_ctldir.h>
56 #include <sys/zfs_fuid.h>
57 #include <sys/sa.h>
58 #include <sys/zfs_sa.h>
59 #include <sys/dnlc.h>
60 #include <sys/extdirent.h>
61 
62 /*
63  * zfs_match_find() is used by zfs_dirent_lookup() to peform zap lookups
64  * of names after deciding which is the appropriate lookup interface.
65  */
66 static int
zfs_match_find(zfsvfs_t * zfsvfs,znode_t * dzp,const char * name,boolean_t exact,uint64_t * zoid)67 zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, const char *name,
68     boolean_t exact, uint64_t *zoid)
69 {
70           int error;
71 
72           if (zfsvfs->z_norm) {
73                     matchtype_t mt = exact? MT_EXACT : MT_FIRST;
74 
75                     /*
76                      * In the non-mixed case we only expect there would ever
77                      * be one match, but we need to use the normalizing lookup.
78                      */
79                     error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
80                         zoid, mt, NULL, 0, NULL);
81           } else {
82                     error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
83           }
84           *zoid = ZFS_DIRENT_OBJ(*zoid);
85 
86           return (error);
87 }
88 
89 /*
90  * Look up a directory entry under a locked vnode.
91  * dvp being locked gives us a guarantee that there are no concurrent
92  * modification of the directory and, thus, if a node can be found in
93  * the directory, then it must not be unlinked.
94  *
95  * Input arguments:
96  *        dzp       - znode for directory
97  *        name      - name of entry to lock
98  *        flag      - ZNEW: if the entry already exists, fail with EEXIST.
99  *                    ZEXISTS: if the entry does not exist, fail with ENOENT.
100  *                    ZXATTR: we want dzp's xattr directory
101  *
102  * Output arguments:
103  *        zpp       - pointer to the znode for the entry (NULL if there isn't one)
104  *
105  * Return value: 0 on success or errno on failure.
106  *
107  * NOTE: Always checks for, and rejects, '.' and '..'.
108  */
109 int
zfs_dirent_lookup(znode_t * dzp,const char * name,znode_t ** zpp,int flag)110 zfs_dirent_lookup(znode_t *dzp, const char *name, znode_t **zpp, int flag)
111 {
112           zfsvfs_t  *zfsvfs = dzp->z_zfsvfs;
113           boolean_t exact;
114           uint64_t  zoid;
115           vnode_t             *vp = NULL;
116           int                 error = 0;
117 
118           ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
119 
120           *zpp = NULL;
121 
122           /*
123            * Verify that we are not trying to lock '.', '..', or '.zfs'
124            */
125           if (name[0] == '.' &&
126               (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) ||
127               zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)
128                     return (SET_ERROR(EEXIST));
129 
130           /*
131            * Case sensitivity and normalization preferences are set when
132            * the file system is created.  These are stored in the
133            * zfsvfs->z_case and zfsvfs->z_norm fields.  These choices
134            * affect how we perform zap lookups.
135            *
136            * Decide if exact matches should be requested when performing
137            * a zap lookup on file systems supporting case-insensitive
138            * access.
139            *
140            * NB: we do not need to worry about this flag for ZFS_CASE_SENSITIVE
141            * because in that case MT_EXACT and MT_FIRST should produce exactly
142            * the same result.
143            */
144           exact = zfsvfs->z_case == ZFS_CASE_MIXED;
145 
146           if (dzp->z_unlinked && !(flag & ZXATTR))
147                     return (ENOENT);
148           if (flag & ZXATTR) {
149                     error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
150                         sizeof (zoid));
151                     if (error == 0)
152                               error = (zoid == 0 ? ENOENT : 0);
153           } else {
154                     error = zfs_match_find(zfsvfs, dzp, name, exact, &zoid);
155           }
156           if (error) {
157                     if (error != ENOENT || (flag & ZEXISTS)) {
158                               return (error);
159                     }
160           } else {
161                     if (flag & ZNEW) {
162                               return (SET_ERROR(EEXIST));
163                     }
164                     error = zfs_zget(zfsvfs, zoid, zpp);
165                     if (error)
166                               return (error);
167                     ASSERT(!(*zpp)->z_unlinked);
168           }
169 
170           return (0);
171 }
172 
173 static int
zfs_dd_lookup(znode_t * dzp,znode_t ** zpp)174 zfs_dd_lookup(znode_t *dzp, znode_t **zpp)
175 {
176           zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
177           znode_t *zp;
178           uint64_t parent;
179           int error;
180 
181           ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
182           ASSERT(RRM_READ_HELD(&zfsvfs->z_teardown_lock));
183 
184           if (dzp->z_unlinked)
185                     return (ENOENT);
186 
187           if ((error = sa_lookup(dzp->z_sa_hdl,
188               SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
189                     return (error);
190 
191           error = zfs_zget(zfsvfs, parent, &zp);
192           if (error == 0)
193                     *zpp = zp;
194           return (error);
195 }
196 
197 int
zfs_dirlook(znode_t * dzp,const char * name,znode_t ** zpp)198 zfs_dirlook(znode_t *dzp, const char *name, znode_t **zpp)
199 {
200           zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
201           znode_t *zp;
202           int error = 0;
203 
204           ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
205           ASSERT(RRM_READ_HELD(&zfsvfs->z_teardown_lock));
206 
207           if (dzp->z_unlinked)
208                     return (SET_ERROR(ENOENT));
209 
210           if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
211                     *zpp = dzp;
212           } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
213                     error = zfs_dd_lookup(dzp, zpp);
214           } else {
215                     error = zfs_dirent_lookup(dzp, name, &zp, ZEXISTS);
216                     if (error == 0) {
217                               dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
218                               *zpp = zp;
219                     }
220           }
221           return (error);
222 }
223 
224 /*
225  * unlinked Set (formerly known as the "delete queue") Error Handling
226  *
227  * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
228  * don't specify the name of the entry that we will be manipulating.  We
229  * also fib and say that we won't be adding any new entries to the
230  * unlinked set, even though we might (this is to lower the minimum file
231  * size that can be deleted in a full filesystem).  So on the small
232  * chance that the nlink list is using a fat zap (ie. has more than
233  * 2000 entries), we *may* not pre-read a block that's needed.
234  * Therefore it is remotely possible for some of the assertions
235  * regarding the unlinked set below to fail due to i/o error.  On a
236  * nondebug system, this will result in the space being leaked.
237  */
238 void
zfs_unlinked_add(znode_t * zp,dmu_tx_t * tx)239 zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
240 {
241           zfsvfs_t *zfsvfs = zp->z_zfsvfs;
242 
243           ASSERT(zp->z_unlinked);
244           ASSERT(zp->z_links == 0);
245 
246           VERIFY3U(0, ==,
247               zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
248 }
249 
250 /*
251  * Clean up any znodes that had no links when we either crashed or
252  * (force) umounted the file system.
253  */
254 void
zfs_unlinked_drain(zfsvfs_t * zfsvfs)255 zfs_unlinked_drain(zfsvfs_t *zfsvfs)
256 {
257           zap_cursor_t        zc;
258           zap_attribute_t zap;
259           dmu_object_info_t doi;
260           znode_t             *zp;
261           int                 error;
262 
263           /*
264            * Interate over the contents of the unlinked set.
265            */
266           for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
267               zap_cursor_retrieve(&zc, &zap) == 0;
268               zap_cursor_advance(&zc)) {
269 
270                     /*
271                      * See what kind of object we have in list
272                      */
273 
274                     error = dmu_object_info(zfsvfs->z_os,
275                         zap.za_first_integer, &doi);
276                     if (error != 0)
277                               continue;
278 
279                     ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
280                         (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
281                     /*
282                      * We need to re-mark these list entries for deletion,
283                      * so we pull them back into core and set zp->z_unlinked.
284                      */
285                     error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
286 
287                     /*
288                      * We may pick up znodes that are already marked for deletion.
289                      * This could happen during the purge of an extended attribute
290                      * directory.  All we need to do is skip over them, since they
291                      * are already in the system marked z_unlinked.
292                      */
293                     if (error != 0)
294                               continue;
295 
296                     vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_RETRY);
297                     zp->z_unlinked = B_TRUE;
298                     vput(ZTOV(zp));
299           }
300           zap_cursor_fini(&zc);
301 }
302 
303 /*
304  * Delete the entire contents of a directory.  Return a count
305  * of the number of entries that could not be deleted. If we encounter
306  * an error, return a count of at least one so that the directory stays
307  * in the unlinked set.
308  *
309  * NOTE: this function assumes that the directory is inactive,
310  *        so there is no need to lock its entries before deletion.
311  *        Also, it assumes the directory contents is *only* regular
312  *        files.
313  */
314 static int
zfs_purgedir(znode_t * dzp)315 zfs_purgedir(znode_t *dzp)
316 {
317           zap_cursor_t        zc;
318           zap_attribute_t     zap;
319           znode_t             *xzp;
320           dmu_tx_t  *tx;
321           zfsvfs_t  *zfsvfs = dzp->z_zfsvfs;
322           int skipped = 0;
323           int error;
324 
325           for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
326               (error = zap_cursor_retrieve(&zc, &zap)) == 0;
327               zap_cursor_advance(&zc)) {
328                     error = zfs_zget(zfsvfs,
329                         ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
330                     if (error) {
331                               skipped += 1;
332                               continue;
333                     }
334 
335                     vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY);
336                     ASSERT((ZTOV(xzp)->v_type == VREG) ||
337                         (ZTOV(xzp)->v_type == VLNK));
338 
339                     tx = dmu_tx_create(zfsvfs->z_os);
340                     dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
341                     dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
342                     dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
343                     dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
344                     /* Is this really needed ? */
345                     zfs_sa_upgrade_txholds(tx, xzp);
346                     dmu_tx_mark_netfree(tx);
347                     error = dmu_tx_assign(tx, TXG_WAIT);
348                     if (error) {
349                               dmu_tx_abort(tx);
350                               vput(ZTOV(xzp));
351                               skipped += 1;
352                               continue;
353                     }
354 
355                     error = zfs_link_destroy(dzp, zap.za_name, xzp, tx, 0, NULL);
356                     if (error)
357                               skipped += 1;
358                     dmu_tx_commit(tx);
359 
360                     vput(ZTOV(xzp));
361           }
362           zap_cursor_fini(&zc);
363           if (error != ENOENT)
364                     skipped += 1;
365           return (skipped);
366 }
367 
368 void
zfs_rmnode(znode_t * zp)369 zfs_rmnode(znode_t *zp)
370 {
371           zfsvfs_t  *zfsvfs = zp->z_zfsvfs;
372           objset_t  *os = zfsvfs->z_os;
373           znode_t             *xzp = NULL;
374           dmu_tx_t  *tx;
375           uint64_t  acl_obj;
376           uint64_t  xattr_obj;
377           int                 error;
378 
379           ASSERT(zp->z_links == 0);
380 #ifndef __NetBSD__
381           ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
382 #endif
383 
384           /*
385            * If this is an attribute directory, purge its contents.
386            */
387           if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR &&
388               (zp->z_pflags & ZFS_XATTR)) {
389                     if (zfs_purgedir(zp) != 0) {
390                               /*
391                                * Not enough space to delete some xattrs.
392                                * Leave it in the unlinked set.
393                                */
394                               zfs_znode_dmu_fini(zp);
395                               zfs_znode_free(zp);
396                               return;
397                     }
398           } else {
399                     /*
400                      * Free up all the data in the file.  We don't do this for
401                      * XATTR directories because we need truncate and remove to be
402                      * in the same tx, like in zfs_znode_delete(). Otherwise, if
403                      * we crash here we'll end up with an inconsistent truncated
404                      * zap object in the delete queue.  Note a truncated file is
405                      * harmless since it only contains user data.
406                      */
407                     error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
408                     if (error) {
409                               /*
410                                * Not enough space.  Leave the file in the unlinked
411                                * set.
412                                */
413                               zfs_znode_dmu_fini(zp);
414                               zfs_znode_free(zp);
415                               return;
416                     }
417           }
418 
419           /*
420            * If the file has extended attributes, we're going to unlink
421            * the xattr dir.
422            */
423           error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
424               &xattr_obj, sizeof (xattr_obj));
425           if (error == 0 && xattr_obj) {
426                     error = zfs_zget(zfsvfs, xattr_obj, &xzp);
427                     ASSERT3S(error, ==, 0);
428                     vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY);
429           }
430 
431           acl_obj = zfs_external_acl(zp);
432 
433           /*
434            * Set up the final transaction.
435            */
436           tx = dmu_tx_create(os);
437           dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
438           dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
439           if (xzp) {
440                     dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
441                     dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
442           }
443           if (acl_obj)
444                     dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
445 
446           zfs_sa_upgrade_txholds(tx, zp);
447           error = dmu_tx_assign(tx, TXG_WAIT);
448           if (error) {
449                     /*
450                      * Not enough space to delete the file.  Leave it in the
451                      * unlinked set, leaking it until the fs is remounted (at
452                      * which point we'll call zfs_unlinked_drain() to process it).
453                      */
454                     dmu_tx_abort(tx);
455                     zfs_znode_dmu_fini(zp);
456                     zfs_znode_free(zp);
457                     goto out;
458           }
459 
460           if (xzp) {
461                     ASSERT(error == 0);
462                     xzp->z_unlinked = B_TRUE;     /* mark xzp for deletion */
463                     xzp->z_links = 0;   /* no more links to it */
464                     VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
465                         &xzp->z_links, sizeof (xzp->z_links), tx));
466                     zfs_unlinked_add(xzp, tx);
467           }
468 
469           /* Remove this znode from the unlinked set */
470           VERIFY3U(0, ==,
471               zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
472 
473           zfs_znode_delete(zp, tx);
474 
475           dmu_tx_commit(tx);
476 out:
477           if (xzp)
478                     vput(ZTOV(xzp));
479 }
480 
481 static uint64_t
zfs_dirent(znode_t * zp,uint64_t mode)482 zfs_dirent(znode_t *zp, uint64_t mode)
483 {
484           uint64_t de = zp->z_id;
485 
486           if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE)
487                     de |= IFTODT(mode) << 60;
488           return (de);
489 }
490 
491 /*
492  * Link zp into dzp.  Can only fail if zp has been unlinked.
493  */
494 int
zfs_link_create(znode_t * dzp,const char * name,znode_t * zp,dmu_tx_t * tx,int flag)495 zfs_link_create(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
496     int flag)
497 {
498           zfsvfs_t *zfsvfs = zp->z_zfsvfs;
499           vnode_t *vp = ZTOV(zp);
500           uint64_t value;
501           int zp_is_dir = (vp->v_type == VDIR);
502           sa_bulk_attr_t bulk[5];
503           uint64_t mtime[2], ctime[2];
504           int count = 0;
505           int error;
506 
507           ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
508           ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
509 #if 0
510           if (zp_is_dir) {
511                     error = 0;
512                     if (dzp->z_links >= LINK_MAX)
513                               error = SET_ERROR(EMLINK);
514                     return (error);
515           }
516 #endif
517           if (!(flag & ZRENAMING)) {
518                     if (zp->z_unlinked) {         /* no new links to unlinked zp */
519                               ASSERT(!(flag & (ZNEW | ZEXISTS)));
520                               return (SET_ERROR(ENOENT));
521                     }
522 #if 0
523                     if (zp->z_links >= LINK_MAX) {
524                               return (SET_ERROR(EMLINK));
525                     }
526 #endif
527                     zp->z_links++;
528                     SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
529                         &zp->z_links, sizeof (zp->z_links));
530 
531           } else {
532                     ASSERT(zp->z_unlinked == 0);
533           }
534           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
535               &dzp->z_id, sizeof (dzp->z_id));
536           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
537               &zp->z_pflags, sizeof (zp->z_pflags));
538 
539           if (!(flag & ZNEW)) {
540                     SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
541                         ctime, sizeof (ctime));
542                     zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
543                         ctime, B_TRUE);
544           }
545           error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
546           ASSERT0(error);
547 
548           dzp->z_size++;
549           dzp->z_links += zp_is_dir;
550           count = 0;
551           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
552               &dzp->z_size, sizeof (dzp->z_size));
553           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
554               &dzp->z_links, sizeof (dzp->z_links));
555           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
556               mtime, sizeof (mtime));
557           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
558               ctime, sizeof (ctime));
559           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
560               &dzp->z_pflags, sizeof (dzp->z_pflags));
561           zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
562           error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
563           ASSERT0(error);
564 
565           value = zfs_dirent(zp, zp->z_mode);
566           error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, name,
567               8, 1, &value, tx);
568           VERIFY0(error);
569 
570           return (0);
571 }
572 
573 static int
zfs_dropname(znode_t * dzp,const char * name,znode_t * zp,dmu_tx_t * tx,int flag)574 zfs_dropname(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
575     int flag)
576 {
577           int error;
578 
579           if (zp->z_zfsvfs->z_norm) {
580                     if (zp->z_zfsvfs->z_case == ZFS_CASE_MIXED)
581                               error = zap_remove_norm(zp->z_zfsvfs->z_os,
582                                   dzp->z_id, name, MT_EXACT, tx);
583                     else
584                               error = zap_remove_norm(zp->z_zfsvfs->z_os,
585                                   dzp->z_id, name, MT_FIRST, tx);
586           } else {
587                     error = zap_remove(zp->z_zfsvfs->z_os,
588                         dzp->z_id, name, tx);
589           }
590 
591           return (error);
592 }
593 
594 /*
595  * Unlink zp from dzp, and mark zp for deletion if this was the last link.
596  * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
597  * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
598  * If it's non-NULL, we use it to indicate whether the znode needs deletion,
599  * and it's the caller's job to do it.
600  */
601 int
zfs_link_destroy(znode_t * dzp,const char * name,znode_t * zp,dmu_tx_t * tx,int flag,boolean_t * unlinkedp)602 zfs_link_destroy(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
603     int flag, boolean_t *unlinkedp)
604 {
605           zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
606           vnode_t *vp = ZTOV(zp);
607           int zp_is_dir = (vp->v_type == VDIR);
608           boolean_t unlinked = B_FALSE;
609           sa_bulk_attr_t bulk[5];
610           uint64_t mtime[2], ctime[2];
611           int count = 0;
612           int error;
613 
614           ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
615           ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
616 
617           if (!(flag & ZRENAMING)) {
618 
619                     if (zp_is_dir && !zfs_dirempty(zp)) {
620 #ifdef illumos
621                               return (SET_ERROR(EEXIST));
622 #else
623                               return (SET_ERROR(ENOTEMPTY));
624 #endif
625                     }
626 
627                     /*
628                      * If we get here, we are going to try to remove the object.
629                      * First try removing the name from the directory; if that
630                      * fails, return the error.
631                      */
632                     error = zfs_dropname(dzp, name, zp, tx, flag);
633                     if (error != 0) {
634                               return (error);
635                     }
636 
637                     if (zp->z_links <= zp_is_dir) {
638                               zfs_panic_recover("zfs: link count on vnode %p is %u, "
639                                   "should be at least %u", zp->z_vnode,
640                                   (int)zp->z_links,
641                                   zp_is_dir + 1);
642                               zp->z_links = zp_is_dir + 1;
643                     }
644                     if (--zp->z_links == zp_is_dir) {
645                               zp->z_unlinked = B_TRUE;
646                               zp->z_links = 0;
647                               unlinked = B_TRUE;
648                     } else {
649                               SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
650                                   NULL, &ctime, sizeof (ctime));
651                               SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
652                                   NULL, &zp->z_pflags, sizeof (zp->z_pflags));
653                               zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
654                                   B_TRUE);
655                     }
656                     SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
657                         NULL, &zp->z_links, sizeof (zp->z_links));
658                     error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
659                     count = 0;
660                     ASSERT0(error);
661           } else {
662                     ASSERT(zp->z_unlinked == 0);
663                     error = zfs_dropname(dzp, name, zp, tx, flag);
664                     if (error != 0)
665                               return (error);
666           }
667 
668           dzp->z_size--;                /* one dirent removed */
669           dzp->z_links -= zp_is_dir;    /* ".." link from zp */
670           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
671               NULL, &dzp->z_links, sizeof (dzp->z_links));
672           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
673               NULL, &dzp->z_size, sizeof (dzp->z_size));
674           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
675               NULL, ctime, sizeof (ctime));
676           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
677               NULL, mtime, sizeof (mtime));
678           SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
679               NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
680           zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
681           error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
682           ASSERT0(error);
683 
684           if (unlinkedp != NULL)
685                     *unlinkedp = unlinked;
686           else if (unlinked)
687                     zfs_unlinked_add(zp, tx);
688 
689           return (0);
690 }
691 
692 /*
693  * Indicate whether the directory is empty.
694  */
695 boolean_t
zfs_dirempty(znode_t * dzp)696 zfs_dirempty(znode_t *dzp)
697 {
698           return (dzp->z_size == 2);
699 }
700 
701 int
zfs_make_xattrdir(znode_t * zp,vattr_t * vap,vnode_t ** xvpp,cred_t * cr)702 zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr)
703 {
704           zfsvfs_t *zfsvfs = zp->z_zfsvfs;
705           znode_t *xzp;
706           dmu_tx_t *tx;
707           int error;
708           zfs_acl_ids_t acl_ids;
709           boolean_t fuid_dirtied;
710           uint64_t parent;
711 
712           *xvpp = NULL;
713 
714           /*
715            * In FreeBSD, access checking for creating an EA is being done
716            * in zfs_setextattr(),
717            */
718 #ifndef __FreeBSD_kernel__
719           if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr))
720                     return (error);
721 #endif
722 
723           if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
724               &acl_ids)) != 0)
725                     return (error);
726           if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
727                     zfs_acl_ids_free(&acl_ids);
728                     return (SET_ERROR(EDQUOT));
729           }
730 
731           getnewvnode_reserve(1);
732 
733           tx = dmu_tx_create(zfsvfs->z_os);
734           dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
735               ZFS_SA_BASE_ATTR_SIZE);
736           dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
737           dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
738           fuid_dirtied = zfsvfs->z_fuid_dirty;
739           if (fuid_dirtied)
740                     zfs_fuid_txhold(zfsvfs, tx);
741           error = dmu_tx_assign(tx, TXG_WAIT);
742           if (error) {
743                     zfs_acl_ids_free(&acl_ids);
744                     dmu_tx_abort(tx);
745                     return (error);
746           }
747           zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
748 
749           if (fuid_dirtied)
750                     zfs_fuid_sync(zfsvfs, tx);
751 
752 #ifdef DEBUG
753           error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
754               &parent, sizeof (parent));
755           ASSERT(error == 0 && parent == zp->z_id);
756 #endif
757 
758           VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id,
759               sizeof (xzp->z_id), tx));
760 
761           (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
762               xzp, "", NULL, acl_ids.z_fuidp, vap);
763 
764           zfs_acl_ids_free(&acl_ids);
765           dmu_tx_commit(tx);
766 
767           getnewvnode_drop_reserve();
768 
769           *xvpp = ZTOV(xzp);
770 
771           return (0);
772 }
773 
774 /*
775  * Return a znode for the extended attribute directory for zp.
776  * ** If the directory does not already exist, it is created **
777  *
778  *        IN:       zp        - znode to obtain attribute directory from
779  *                  cr        - credentials of caller
780  *                  flags     - flags from the VOP_LOOKUP call
781  *
782  *        OUT:      xzpp      - pointer to extended attribute znode
783  *
784  *        RETURN:   0 on success
785  *                  error number on failure
786  */
787 int
zfs_get_xattrdir(znode_t * zp,vnode_t ** xvpp,cred_t * cr,int flags)788 zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags)
789 {
790           zfsvfs_t  *zfsvfs = zp->z_zfsvfs;
791           znode_t             *xzp;
792           vattr_t             va;
793           int                 error;
794 top:
795           error = zfs_dirent_lookup(zp, "", &xzp, ZXATTR);
796           if (error)
797                     return (error);
798 
799           if (xzp != NULL) {
800                     *xvpp = ZTOV(xzp);
801                     return (0);
802           }
803 
804 
805           if (!(flags & CREATE_XATTR_DIR)) {
806 #ifdef illumos
807                     return (SET_ERROR(ENOENT));
808 #else
809                     return (SET_ERROR(ENOATTR));
810 #endif
811           }
812 
813           if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
814                     return (SET_ERROR(EROFS));
815           }
816 
817           /*
818            * The ability to 'create' files in an attribute
819            * directory comes from the write_xattr permission on the base file.
820            *
821            * The ability to 'search' an attribute directory requires
822            * read_xattr permission on the base file.
823            *
824            * Once in a directory the ability to read/write attributes
825            * is controlled by the permissions on the attribute file.
826            */
827           va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
828           va.va_type = VDIR;
829           va.va_mode = S_IFDIR | S_ISVTX | 0777;
830           zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
831 
832           error = zfs_make_xattrdir(zp, &va, xvpp, cr);
833 
834           if (error == ERESTART) {
835                     /* NB: we already did dmu_tx_wait() if necessary */
836                     goto top;
837           }
838           if (error == 0)
839                     VOP_UNLOCK(*xvpp, 0);
840 
841           return (error);
842 }
843 
844 /*
845  * Decide whether it is okay to remove within a sticky directory.
846  *
847  * In sticky directories, write access is not sufficient;
848  * you can remove entries from a directory only if:
849  *
850  *        you own the directory,
851  *        you own the entry,
852  *        the entry is a plain file and you have write access,
853  *        or you are privileged (checked in secpolicy...).
854  *
855  * The function returns 0 if remove access is granted.
856  */
857 int
zfs_sticky_remove_access(znode_t * zdp,znode_t * zp,cred_t * cr)858 zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
859 {
860           uid_t               uid;
861           uid_t               downer;
862           uid_t               fowner;
863           zfsvfs_t  *zfsvfs = zdp->z_zfsvfs;
864 
865           if (zdp->z_zfsvfs->z_replay)
866                     return (0);
867 
868           if ((zdp->z_mode & S_ISVTX) == 0)
869                     return (0);
870 
871           downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER);
872           fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER);
873 
874           if ((uid = crgetuid(cr)) == downer || uid == fowner ||
875               (ZTOV(zp)->v_type == VREG &&
876               zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0))
877                     return (0);
878           else
879                     return (secpolicy_vnode_remove(ZTOV(zp), cr));
880 }
881