1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25  * Copyright 2015 RackTop Systems.
26  * Copyright 2016 Nexenta Systems, Inc.
27  */
28 
29 /*
30  * Pool import support functions.
31  *
32  * To import a pool, we rely on reading the configuration information from the
33  * ZFS label of each device.  If we successfully read the label, then we
34  * organize the configuration information in the following hierarchy:
35  *
36  *        pool guid -> toplevel vdev guid -> label txg
37  *
38  * Duplicate entries matching this same tuple will be discarded.  Once we have
39  * examined every device, we pick the best label txg config for each toplevel
40  * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
41  * update any paths that have changed.  Finally, we attempt to import the pool
42  * using our derived config, and record the results.
43  */
44 
45 #include <ctype.h>
46 #include <devid.h>
47 #include <dirent.h>
48 #include <errno.h>
49 #include <libintl.h>
50 #include <stddef.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <sys/stat.h>
54 #include <unistd.h>
55 #include <fcntl.h>
56 #include <thread_pool.h>
57 #ifdef __FreeBSD__
58 #include <libgeom.h>
59 #endif
60 #ifdef __NetBSD__
61 #include <util.h>
62 static int native_ioctl(int fd, unsigned long cmd, void *arg);
63 #endif
64 
65 #include <sys/vdev_impl.h>
66 
67 #include "libzfs.h"
68 #include "libzfs_impl.h"
69 
70 /*
71  * Intermediate structures used to gather configuration information.
72  */
73 typedef struct config_entry {
74           uint64_t            ce_txg;
75           nvlist_t            *ce_config;
76           struct config_entry *ce_next;
77 } config_entry_t;
78 
79 typedef struct vdev_entry {
80           uint64_t            ve_guid;
81           config_entry_t                *ve_configs;
82           struct vdev_entry   *ve_next;
83 } vdev_entry_t;
84 
85 typedef struct pool_entry {
86           uint64_t            pe_guid;
87           vdev_entry_t                  *pe_vdevs;
88           struct pool_entry   *pe_next;
89 } pool_entry_t;
90 
91 typedef struct name_entry {
92           char                          *ne_name;
93           uint64_t            ne_guid;
94           struct name_entry   *ne_next;
95 } name_entry_t;
96 
97 typedef struct pool_list {
98           pool_entry_t                  *pools;
99           name_entry_t                  *names;
100 } pool_list_t;
101 
102 static char *
get_devid(const char * path)103 get_devid(const char *path)
104 {
105 #ifdef have_devid
106           int fd;
107           ddi_devid_t devid;
108           char *minor, *ret;
109 
110           if ((fd = open(path, O_RDONLY)) < 0)
111                     return (NULL);
112 
113           minor = NULL;
114           ret = NULL;
115           if (devid_get(fd, &devid) == 0) {
116                     if (devid_get_minor_name(fd, &minor) == 0)
117                               ret = devid_str_encode(devid, minor);
118                     if (minor != NULL)
119                               devid_str_free(minor);
120                     devid_free(devid);
121           }
122           (void) close(fd);
123 
124           return (ret);
125 #else
126           return (NULL);
127 #endif
128 }
129 
130 
131 /*
132  * Go through and fix up any path and/or devid information for the given vdev
133  * configuration.
134  */
135 static int
fix_paths(nvlist_t * nv,name_entry_t * names)136 fix_paths(nvlist_t *nv, name_entry_t *names)
137 {
138           nvlist_t **child;
139           uint_t c, children;
140           uint64_t guid;
141           name_entry_t *ne, *best;
142           char *path, *devid;
143           int matched;
144 
145           if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
146               &child, &children) == 0) {
147                     for (c = 0; c < children; c++)
148                               if (fix_paths(child[c], names) != 0)
149                                         return (-1);
150                     return (0);
151           }
152 
153           /*
154            * This is a leaf (file or disk) vdev.  In either case, go through
155            * the name list and see if we find a matching guid.  If so, replace
156            * the path and see if we can calculate a new devid.
157            *
158            * There may be multiple names associated with a particular guid, in
159            * which case we have overlapping slices or multiple paths to the same
160            * disk.  If this is the case, then we want to pick the path that is
161            * the most similar to the original, where "most similar" is the number
162            * of matching characters starting from the end of the path.  This will
163            * preserve slice numbers even if the disks have been reorganized, and
164            * will also catch preferred disk names if multiple paths exist.
165            */
166           verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
167           if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
168                     path = NULL;
169 
170           matched = 0;
171           best = NULL;
172           for (ne = names; ne != NULL; ne = ne->ne_next) {
173                     if (ne->ne_guid == guid) {
174                               const char *src, *dst;
175                               int count;
176 
177                               if (path == NULL) {
178                                         best = ne;
179                                         break;
180                               }
181 
182                               src = ne->ne_name + strlen(ne->ne_name) - 1;
183                               dst = path + strlen(path) - 1;
184                               for (count = 0; src >= ne->ne_name && dst >= path;
185                                   src--, dst--, count++)
186                                         if (*src != *dst)
187                                                   break;
188 
189                               /*
190                                * At this point, 'count' is the number of characters
191                                * matched from the end.
192                                */
193                               if (count > matched || best == NULL) {
194                                         best = ne;
195                                         matched = count;
196                               }
197                     }
198           }
199 
200           if (best == NULL)
201                     return (0);
202 
203           if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
204                     return (-1);
205 
206           if ((devid = get_devid(best->ne_name)) == NULL) {
207                     (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
208           } else {
209                     if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0) {
210                               devid_str_free(devid);
211                               return (-1);
212                     }
213                     devid_str_free(devid);
214           }
215 
216           return (0);
217 }
218 
219 /*
220  * Add the given configuration to the list of known devices.
221  */
222 static int
add_config(libzfs_handle_t * hdl,pool_list_t * pl,const char * path,nvlist_t * config)223 add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
224     nvlist_t *config)
225 {
226           uint64_t pool_guid, vdev_guid, top_guid, txg, state;
227           pool_entry_t *pe;
228           vdev_entry_t *ve;
229           config_entry_t *ce;
230           name_entry_t *ne;
231 
232           /*
233            * If this is a hot spare not currently in use or level 2 cache
234            * device, add it to the list of names to translate, but don't do
235            * anything else.
236            */
237           if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
238               &state) == 0 &&
239               (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
240               nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
241                     if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
242                               return (-1);
243 
244                     if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
245                               free(ne);
246                               return (-1);
247                     }
248                     ne->ne_guid = vdev_guid;
249                     ne->ne_next = pl->names;
250                     pl->names = ne;
251                     return (0);
252           }
253 
254           /*
255            * If we have a valid config but cannot read any of these fields, then
256            * it means we have a half-initialized label.  In vdev_label_init()
257            * we write a label with txg == 0 so that we can identify the device
258            * in case the user refers to the same disk later on.  If we fail to
259            * create the pool, we'll be left with a label in this state
260            * which should not be considered part of a valid pool.
261            */
262           if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
263               &pool_guid) != 0 ||
264               nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
265               &vdev_guid) != 0 ||
266               nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
267               &top_guid) != 0 ||
268               nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
269               &txg) != 0 || txg == 0) {
270                     nvlist_free(config);
271                     return (0);
272           }
273 
274           /*
275            * First, see if we know about this pool.  If not, then add it to the
276            * list of known pools.
277            */
278           for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
279                     if (pe->pe_guid == pool_guid)
280                               break;
281           }
282 
283           if (pe == NULL) {
284                     if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
285                               nvlist_free(config);
286                               return (-1);
287                     }
288                     pe->pe_guid = pool_guid;
289                     pe->pe_next = pl->pools;
290                     pl->pools = pe;
291           }
292 
293           /*
294            * Second, see if we know about this toplevel vdev.  Add it if its
295            * missing.
296            */
297           for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
298                     if (ve->ve_guid == top_guid)
299                               break;
300           }
301 
302           if (ve == NULL) {
303                     if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
304                               nvlist_free(config);
305                               return (-1);
306                     }
307                     ve->ve_guid = top_guid;
308                     ve->ve_next = pe->pe_vdevs;
309                     pe->pe_vdevs = ve;
310           }
311 
312           /*
313            * Third, see if we have a config with a matching transaction group.  If
314            * so, then we do nothing.  Otherwise, add it to the list of known
315            * configs.
316            */
317           for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
318                     if (ce->ce_txg == txg)
319                               break;
320           }
321 
322           if (ce == NULL) {
323                     if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
324                               nvlist_free(config);
325                               return (-1);
326                     }
327                     ce->ce_txg = txg;
328                     ce->ce_config = config;
329                     ce->ce_next = ve->ve_configs;
330                     ve->ve_configs = ce;
331           } else {
332                     nvlist_free(config);
333           }
334 
335           /*
336            * At this point we've successfully added our config to the list of
337            * known configs.  The last thing to do is add the vdev guid -> path
338            * mappings so that we can fix up the configuration as necessary before
339            * doing the import.
340            */
341           if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
342                     return (-1);
343 
344           if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
345                     free(ne);
346                     return (-1);
347           }
348 
349           ne->ne_guid = vdev_guid;
350           ne->ne_next = pl->names;
351           pl->names = ne;
352 
353           return (0);
354 }
355 
356 /*
357  * Returns true if the named pool matches the given GUID.
358  */
359 static int
pool_active(libzfs_handle_t * hdl,const char * name,uint64_t guid,boolean_t * isactive)360 pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
361     boolean_t *isactive)
362 {
363           zpool_handle_t *zhp;
364           uint64_t theguid;
365 
366           if (zpool_open_silent(hdl, name, &zhp) != 0)
367                     return (-1);
368 
369           if (zhp == NULL) {
370                     *isactive = B_FALSE;
371                     return (0);
372           }
373 
374           verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
375               &theguid) == 0);
376 
377           zpool_close(zhp);
378 
379           *isactive = (theguid == guid);
380           return (0);
381 }
382 
383 static nvlist_t *
refresh_config(libzfs_handle_t * hdl,nvlist_t * config)384 refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
385 {
386           nvlist_t *nvl;
387           zfs_cmd_t zc = { 0 };
388           int err;
389 
390           if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
391                     return (NULL);
392 
393           if (zcmd_alloc_dst_nvlist(hdl, &zc,
394               zc.zc_nvlist_conf_size * 2) != 0) {
395                     zcmd_free_nvlists(&zc);
396                     return (NULL);
397           }
398 
399           while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
400               &zc)) != 0 && errno == ENOMEM) {
401                     if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
402                               zcmd_free_nvlists(&zc);
403                               return (NULL);
404                     }
405           }
406 
407           if (err) {
408                     zcmd_free_nvlists(&zc);
409                     return (NULL);
410           }
411 
412           if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) {
413                     zcmd_free_nvlists(&zc);
414                     return (NULL);
415           }
416 
417           zcmd_free_nvlists(&zc);
418           return (nvl);
419 }
420 
421 /*
422  * Determine if the vdev id is a hole in the namespace.
423  */
424 boolean_t
vdev_is_hole(uint64_t * hole_array,uint_t holes,uint_t id)425 vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
426 {
427           for (int c = 0; c < holes; c++) {
428 
429                     /* Top-level is a hole */
430                     if (hole_array[c] == id)
431                               return (B_TRUE);
432           }
433           return (B_FALSE);
434 }
435 
436 /*
437  * Convert our list of pools into the definitive set of configurations.  We
438  * start by picking the best config for each toplevel vdev.  Once that's done,
439  * we assemble the toplevel vdevs into a full config for the pool.  We make a
440  * pass to fix up any incorrect paths, and then add it to the main list to
441  * return to the user.
442  */
443 static nvlist_t *
get_configs(libzfs_handle_t * hdl,pool_list_t * pl,boolean_t active_ok)444 get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
445 {
446           pool_entry_t *pe;
447           vdev_entry_t *ve;
448           config_entry_t *ce;
449           nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot;
450           nvlist_t **spares, **l2cache;
451           uint_t i, nspares, nl2cache;
452           boolean_t config_seen;
453           uint64_t best_txg;
454           char *name, *hostname = NULL;
455           uint64_t guid;
456           uint_t children = 0;
457           nvlist_t **child = NULL;
458           uint_t holes;
459           uint64_t *hole_array, max_id;
460           uint_t c;
461           boolean_t isactive;
462           uint64_t hostid;
463           nvlist_t *nvl;
464           boolean_t found_one = B_FALSE;
465           boolean_t valid_top_config = B_FALSE;
466 
467           if (nvlist_alloc(&ret, 0, 0) != 0)
468                     goto nomem;
469 
470           for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
471                     uint64_t id, max_txg = 0;
472 
473                     if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
474                               goto nomem;
475                     config_seen = B_FALSE;
476 
477                     /*
478                      * Iterate over all toplevel vdevs.  Grab the pool configuration
479                      * from the first one we find, and then go through the rest and
480                      * add them as necessary to the 'vdevs' member of the config.
481                      */
482                     for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
483 
484                               /*
485                                * Determine the best configuration for this vdev by
486                                * selecting the config with the latest transaction
487                                * group.
488                                */
489                               best_txg = 0;
490                               for (ce = ve->ve_configs; ce != NULL;
491                                   ce = ce->ce_next) {
492 
493                                         if (ce->ce_txg > best_txg) {
494                                                   tmp = ce->ce_config;
495                                                   best_txg = ce->ce_txg;
496                                         }
497                               }
498 
499                               /*
500                                * We rely on the fact that the max txg for the
501                                * pool will contain the most up-to-date information
502                                * about the valid top-levels in the vdev namespace.
503                                */
504                               if (best_txg > max_txg) {
505                                         (void) nvlist_remove(config,
506                                             ZPOOL_CONFIG_VDEV_CHILDREN,
507                                             DATA_TYPE_UINT64);
508                                         (void) nvlist_remove(config,
509                                             ZPOOL_CONFIG_HOLE_ARRAY,
510                                             DATA_TYPE_UINT64_ARRAY);
511 
512                                         max_txg = best_txg;
513                                         hole_array = NULL;
514                                         holes = 0;
515                                         max_id = 0;
516                                         valid_top_config = B_FALSE;
517 
518                                         if (nvlist_lookup_uint64(tmp,
519                                             ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
520                                                   verify(nvlist_add_uint64(config,
521                                                       ZPOOL_CONFIG_VDEV_CHILDREN,
522                                                       max_id) == 0);
523                                                   valid_top_config = B_TRUE;
524                                         }
525 
526                                         if (nvlist_lookup_uint64_array(tmp,
527                                             ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
528                                             &holes) == 0) {
529                                                   verify(nvlist_add_uint64_array(config,
530                                                       ZPOOL_CONFIG_HOLE_ARRAY,
531                                                       hole_array, holes) == 0);
532                                         }
533                               }
534 
535                               if (!config_seen) {
536                                         /*
537                                          * Copy the relevant pieces of data to the pool
538                                          * configuration:
539                                          *
540                                          *        version
541                                          *        pool guid
542                                          *        name
543                                          *        comment (if available)
544                                          *        pool state
545                                          *        hostid (if available)
546                                          *        hostname (if available)
547                                          */
548                                         uint64_t state, version;
549                                         char *comment = NULL;
550 
551                                         version = fnvlist_lookup_uint64(tmp,
552                                             ZPOOL_CONFIG_VERSION);
553                                         fnvlist_add_uint64(config,
554                                             ZPOOL_CONFIG_VERSION, version);
555                                         guid = fnvlist_lookup_uint64(tmp,
556                                             ZPOOL_CONFIG_POOL_GUID);
557                                         fnvlist_add_uint64(config,
558                                             ZPOOL_CONFIG_POOL_GUID, guid);
559                                         name = fnvlist_lookup_string(tmp,
560                                             ZPOOL_CONFIG_POOL_NAME);
561                                         fnvlist_add_string(config,
562                                             ZPOOL_CONFIG_POOL_NAME, name);
563 
564                                         if (nvlist_lookup_string(tmp,
565                                             ZPOOL_CONFIG_COMMENT, &comment) == 0)
566                                                   fnvlist_add_string(config,
567                                                       ZPOOL_CONFIG_COMMENT, comment);
568 
569                                         state = fnvlist_lookup_uint64(tmp,
570                                             ZPOOL_CONFIG_POOL_STATE);
571                                         fnvlist_add_uint64(config,
572                                             ZPOOL_CONFIG_POOL_STATE, state);
573 
574                                         hostid = 0;
575                                         if (nvlist_lookup_uint64(tmp,
576                                             ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
577                                                   fnvlist_add_uint64(config,
578                                                       ZPOOL_CONFIG_HOSTID, hostid);
579                                                   hostname = fnvlist_lookup_string(tmp,
580                                                       ZPOOL_CONFIG_HOSTNAME);
581                                                   fnvlist_add_string(config,
582                                                       ZPOOL_CONFIG_HOSTNAME, hostname);
583                                         }
584 
585                                         config_seen = B_TRUE;
586                               }
587 
588                               /*
589                                * Add this top-level vdev to the child array.
590                                */
591                               verify(nvlist_lookup_nvlist(tmp,
592                                   ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
593                               verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
594                                   &id) == 0);
595 
596                               if (id >= children) {
597                                         nvlist_t **newchild;
598 
599                                         newchild = zfs_alloc(hdl, (id + 1) *
600                                             sizeof (nvlist_t *));
601                                         if (newchild == NULL)
602                                                   goto nomem;
603 
604                                         for (c = 0; c < children; c++)
605                                                   newchild[c] = child[c];
606 
607                                         free(child);
608                                         child = newchild;
609                                         children = id + 1;
610                               }
611                               if (nvlist_dup(nvtop, &child[id], 0) != 0)
612                                         goto nomem;
613 
614                     }
615 
616                     /*
617                      * If we have information about all the top-levels then
618                      * clean up the nvlist which we've constructed. This
619                      * means removing any extraneous devices that are
620                      * beyond the valid range or adding devices to the end
621                      * of our array which appear to be missing.
622                      */
623                     if (valid_top_config) {
624                               if (max_id < children) {
625                                         for (c = max_id; c < children; c++)
626                                                   nvlist_free(child[c]);
627                                         children = max_id;
628                               } else if (max_id > children) {
629                                         nvlist_t **newchild;
630 
631                                         newchild = zfs_alloc(hdl, (max_id) *
632                                             sizeof (nvlist_t *));
633                                         if (newchild == NULL)
634                                                   goto nomem;
635 
636                                         for (c = 0; c < children; c++)
637                                                   newchild[c] = child[c];
638 
639                                         free(child);
640                                         child = newchild;
641                                         children = max_id;
642                               }
643                     }
644 
645                     verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
646                         &guid) == 0);
647 
648                     /*
649                      * The vdev namespace may contain holes as a result of
650                      * device removal. We must add them back into the vdev
651                      * tree before we process any missing devices.
652                      */
653                     if (holes > 0) {
654                               ASSERT(valid_top_config);
655 
656                               for (c = 0; c < children; c++) {
657                                         nvlist_t *holey;
658 
659                                         if (child[c] != NULL ||
660                                             !vdev_is_hole(hole_array, holes, c))
661                                                   continue;
662 
663                                         if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
664                                             0) != 0)
665                                                   goto nomem;
666 
667                                         /*
668                                          * Holes in the namespace are treated as
669                                          * "hole" top-level vdevs and have a
670                                          * special flag set on them.
671                                          */
672                                         if (nvlist_add_string(holey,
673                                             ZPOOL_CONFIG_TYPE,
674                                             VDEV_TYPE_HOLE) != 0 ||
675                                             nvlist_add_uint64(holey,
676                                             ZPOOL_CONFIG_ID, c) != 0 ||
677                                             nvlist_add_uint64(holey,
678                                             ZPOOL_CONFIG_GUID, 0ULL) != 0) {
679                                                   nvlist_free(holey);
680                                                   goto nomem;
681                                         }
682                                         child[c] = holey;
683                               }
684                     }
685 
686                     /*
687                      * Look for any missing top-level vdevs.  If this is the case,
688                      * create a faked up 'missing' vdev as a placeholder.  We cannot
689                      * simply compress the child array, because the kernel performs
690                      * certain checks to make sure the vdev IDs match their location
691                      * in the configuration.
692                      */
693                     for (c = 0; c < children; c++) {
694                               if (child[c] == NULL) {
695                                         nvlist_t *missing;
696                                         if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
697                                             0) != 0)
698                                                   goto nomem;
699                                         if (nvlist_add_string(missing,
700                                             ZPOOL_CONFIG_TYPE,
701                                             VDEV_TYPE_MISSING) != 0 ||
702                                             nvlist_add_uint64(missing,
703                                             ZPOOL_CONFIG_ID, c) != 0 ||
704                                             nvlist_add_uint64(missing,
705                                             ZPOOL_CONFIG_GUID, 0ULL) != 0) {
706                                                   nvlist_free(missing);
707                                                   goto nomem;
708                                         }
709                                         child[c] = missing;
710                               }
711                     }
712 
713                     /*
714                      * Put all of this pool's top-level vdevs into a root vdev.
715                      */
716                     if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
717                               goto nomem;
718                     if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
719                         VDEV_TYPE_ROOT) != 0 ||
720                         nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
721                         nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
722                         nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
723                         child, children) != 0) {
724                               nvlist_free(nvroot);
725                               goto nomem;
726                     }
727 
728                     for (c = 0; c < children; c++)
729                               nvlist_free(child[c]);
730                     free(child);
731                     children = 0;
732                     child = NULL;
733 
734                     /*
735                      * Go through and fix up any paths and/or devids based on our
736                      * known list of vdev GUID -> path mappings.
737                      */
738                     if (fix_paths(nvroot, pl->names) != 0) {
739                               nvlist_free(nvroot);
740                               goto nomem;
741                     }
742 
743                     /*
744                      * Add the root vdev to this pool's configuration.
745                      */
746                     if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
747                         nvroot) != 0) {
748                               nvlist_free(nvroot);
749                               goto nomem;
750                     }
751                     nvlist_free(nvroot);
752 
753                     /*
754                      * zdb uses this path to report on active pools that were
755                      * imported or created using -R.
756                      */
757                     if (active_ok)
758                               goto add_pool;
759 
760                     /*
761                      * Determine if this pool is currently active, in which case we
762                      * can't actually import it.
763                      */
764                     verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
765                         &name) == 0);
766                     verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
767                         &guid) == 0);
768 
769                     if (pool_active(hdl, name, guid, &isactive) != 0)
770                               goto error;
771 
772                     if (isactive) {
773                               nvlist_free(config);
774                               config = NULL;
775                               continue;
776                     }
777 
778                     if ((nvl = refresh_config(hdl, config)) == NULL) {
779                               nvlist_free(config);
780                               config = NULL;
781                               continue;
782                     }
783 
784                     nvlist_free(config);
785                     config = nvl;
786 
787                     /*
788                      * Go through and update the paths for spares, now that we have
789                      * them.
790                      */
791                     verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
792                         &nvroot) == 0);
793                     if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
794                         &spares, &nspares) == 0) {
795                               for (i = 0; i < nspares; i++) {
796                                         if (fix_paths(spares[i], pl->names) != 0)
797                                                   goto nomem;
798                               }
799                     }
800 
801                     /*
802                      * Update the paths for l2cache devices.
803                      */
804                     if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
805                         &l2cache, &nl2cache) == 0) {
806                               for (i = 0; i < nl2cache; i++) {
807                                         if (fix_paths(l2cache[i], pl->names) != 0)
808                                                   goto nomem;
809                               }
810                     }
811 
812                     /*
813                      * Restore the original information read from the actual label.
814                      */
815                     (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
816                         DATA_TYPE_UINT64);
817                     (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
818                         DATA_TYPE_STRING);
819                     if (hostid != 0) {
820                               verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
821                                   hostid) == 0);
822                               verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
823                                   hostname) == 0);
824                     }
825 
826 add_pool:
827                     /*
828                      * Add this pool to the list of configs.
829                      */
830                     verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
831                         &name) == 0);
832                     if (nvlist_add_nvlist(ret, name, config) != 0)
833                               goto nomem;
834 
835                     found_one = B_TRUE;
836                     nvlist_free(config);
837                     config = NULL;
838           }
839 
840           if (!found_one) {
841                     nvlist_free(ret);
842                     ret = NULL;
843           }
844 
845           return (ret);
846 
847 nomem:
848           (void) no_memory(hdl);
849 error:
850           nvlist_free(config);
851           nvlist_free(ret);
852           for (c = 0; c < children; c++)
853                     nvlist_free(child[c]);
854           free(child);
855 
856           return (NULL);
857 }
858 
859 /*
860  * Return the offset of the given label.
861  */
862 static uint64_t
label_offset(uint64_t size,int l)863 label_offset(uint64_t size, int l)
864 {
865           ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
866           return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
867               0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
868 }
869 
870 /*
871  * Given a file descriptor, read the label information and return an nvlist
872  * describing the configuration, if there is one.
873  */
874 int
zpool_read_label(int fd,nvlist_t ** config)875 zpool_read_label(int fd, nvlist_t **config)
876 {
877           struct stat64 statbuf;
878           int l;
879           vdev_label_t *label;
880           uint64_t state, txg, size;
881 
882           *config = NULL;
883 
884           if (fstat64(fd, &statbuf) == -1)
885                     return (0);
886           size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
887 
888           if ((label = malloc(sizeof (vdev_label_t))) == NULL)
889                     return (-1);
890 
891           for (l = 0; l < VDEV_LABELS; l++) {
892                     if (pread64(fd, label, sizeof (vdev_label_t),
893                         label_offset(size, l)) != sizeof (vdev_label_t))
894                               continue;
895 
896                     if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
897                         sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
898                               continue;
899 
900                     if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
901                         &state) != 0 || state > POOL_STATE_L2CACHE) {
902                               nvlist_free(*config);
903                               continue;
904                     }
905 
906                     if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
907                         (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
908                         &txg) != 0 || txg == 0)) {
909                               nvlist_free(*config);
910                               continue;
911                     }
912 
913                     free(label);
914                     return (0);
915           }
916 
917           free(label);
918           *config = NULL;
919           return (0);
920 }
921 
922 typedef struct rdsk_node {
923           char *rn_name;
924           int rn_dfd;
925           libzfs_handle_t *rn_hdl;
926           nvlist_t *rn_config;
927           avl_tree_t *rn_avl;
928           avl_node_t rn_node;
929           boolean_t rn_nozpool;
930 } rdsk_node_t;
931 
932 static int
slice_cache_compare(const void * arg1,const void * arg2)933 slice_cache_compare(const void *arg1, const void *arg2)
934 {
935           const char  *nm1 = ((rdsk_node_t *)arg1)->rn_name;
936           const char  *nm2 = ((rdsk_node_t *)arg2)->rn_name;
937           char *nm1slice, *nm2slice;
938           int rv;
939 
940           /*
941            * slices zero and two are the most likely to provide results,
942            * so put those first
943            */
944           nm1slice = strstr(nm1, "s0");
945           nm2slice = strstr(nm2, "s0");
946           if (nm1slice && !nm2slice) {
947                     return (-1);
948           }
949           if (!nm1slice && nm2slice) {
950                     return (1);
951           }
952           nm1slice = strstr(nm1, "s2");
953           nm2slice = strstr(nm2, "s2");
954           if (nm1slice && !nm2slice) {
955                     return (-1);
956           }
957           if (!nm1slice && nm2slice) {
958                     return (1);
959           }
960 
961           rv = strcmp(nm1, nm2);
962           if (rv == 0)
963                     return (0);
964           return (rv > 0 ? 1 : -1);
965 }
966 
967 #ifdef illumos
968 static void
check_one_slice(avl_tree_t * r,char * diskname,uint_t partno,diskaddr_t size,uint_t blksz)969 check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
970     diskaddr_t size, uint_t blksz)
971 {
972           rdsk_node_t tmpnode;
973           rdsk_node_t *node;
974           char sname[MAXNAMELEN];
975 
976           tmpnode.rn_name = &sname[0];
977           (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
978               diskname, partno);
979           /*
980            * protect against division by zero for disk labels that
981            * contain a bogus sector size
982            */
983           if (blksz == 0)
984                     blksz = DEV_BSIZE;
985           /* too small to contain a zpool? */
986           if ((size < (SPA_MINDEVSIZE / blksz)) &&
987               (node = avl_find(r, &tmpnode, NULL)))
988                     node->rn_nozpool = B_TRUE;
989 }
990 #endif    /* illumos */
991 
992 static void
nozpool_all_slices(avl_tree_t * r,const char * sname)993 nozpool_all_slices(avl_tree_t *r, const char *sname)
994 {
995 #ifdef illumos
996           char diskname[MAXNAMELEN];
997           char *ptr;
998           int i;
999 
1000           (void) strncpy(diskname, sname, MAXNAMELEN);
1001           if (((ptr = strrchr(diskname, 's')) == NULL) &&
1002               ((ptr = strrchr(diskname, 'p')) == NULL))
1003                     return;
1004           ptr[0] = 's';
1005           ptr[1] = '\0';
1006           for (i = 0; i < NDKMAP; i++)
1007                     check_one_slice(r, diskname, i, 0, 1);
1008           ptr[0] = 'p';
1009           for (i = 0; i <= FD_NUMPART; i++)
1010                     check_one_slice(r, diskname, i, 0, 1);
1011 #endif    /* illumos */
1012 }
1013 
1014 #ifdef illumos
1015 static void
check_slices(avl_tree_t * r,int fd,const char * sname)1016 check_slices(avl_tree_t *r, int fd, const char *sname)
1017 {
1018           struct extvtoc vtoc;
1019           struct dk_gpt *gpt;
1020           char diskname[MAXNAMELEN];
1021           char *ptr;
1022           int i;
1023 
1024           (void) strncpy(diskname, sname, MAXNAMELEN);
1025           if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
1026                     return;
1027           ptr[1] = '\0';
1028 
1029           if (read_extvtoc(fd, &vtoc) >= 0) {
1030                     for (i = 0; i < NDKMAP; i++)
1031                               check_one_slice(r, diskname, i,
1032                                   vtoc.v_part[i].p_size, vtoc.v_sectorsz);
1033           } else if (efi_alloc_and_read(fd, &gpt) >= 0) {
1034                     /*
1035                      * on x86 we'll still have leftover links that point
1036                      * to slices s[9-15], so use NDKMAP instead
1037                      */
1038                     for (i = 0; i < NDKMAP; i++)
1039                               check_one_slice(r, diskname, i,
1040                                   gpt->efi_parts[i].p_size, gpt->efi_lbasize);
1041                     /* nodes p[1-4] are never used with EFI labels */
1042                     ptr[0] = 'p';
1043                     for (i = 1; i <= FD_NUMPART; i++)
1044                               check_one_slice(r, diskname, i, 0, 1);
1045                     efi_free(gpt);
1046           }
1047 }
1048 #endif    /* illumos */
1049 
1050 static void
zpool_open_func(void * arg)1051 zpool_open_func(void *arg)
1052 {
1053           rdsk_node_t *rn = arg;
1054           struct stat64 statbuf;
1055           nvlist_t *config;
1056           int fd;
1057 
1058           if (rn->rn_nozpool)
1059                     return;
1060           if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
1061                     /* symlink to a device that's no longer there */
1062                     if (errno == ENOENT)
1063                               nozpool_all_slices(rn->rn_avl, rn->rn_name);
1064                     return;
1065           }
1066           /*
1067            * Ignore failed stats.  We only want regular
1068            * files, character devs and block devs.
1069            */
1070           if (fstat64(fd, &statbuf) != 0 ||
1071               (!S_ISREG(statbuf.st_mode) &&
1072               !S_ISCHR(statbuf.st_mode) &&
1073               !S_ISBLK(statbuf.st_mode))) {
1074                     (void) close(fd);
1075                     return;
1076           }
1077           /* this file is too small to hold a zpool */
1078 #ifdef illumos
1079           if (S_ISREG(statbuf.st_mode) &&
1080               statbuf.st_size < SPA_MINDEVSIZE) {
1081                     (void) close(fd);
1082                     return;
1083           } else if (!S_ISREG(statbuf.st_mode)) {
1084                     /*
1085                      * Try to read the disk label first so we don't have to
1086                      * open a bunch of minor nodes that can't have a zpool.
1087                      */
1088                     check_slices(rn->rn_avl, fd, rn->rn_name);
1089           }
1090 #endif /* illumos */
1091 #ifdef __FreeBSD__
1092           if (statbuf.st_size < SPA_MINDEVSIZE) {
1093                     (void) close(fd);
1094                     return;
1095           }
1096 #endif /* __FreeBSD__ */
1097 #ifdef __NetBSD__
1098           struct dkwedge_list dkwl;
1099           off_t size;
1100 
1101           /* skip devices with wedges */
1102           memset(&dkwl, 0, sizeof(dkwl));
1103           if (native_ioctl(fd, DIOCLWEDGES, &dkwl) == 0 &&
1104               dkwl.dkwl_nwedges > 0) {
1105                     (void) close(fd);
1106                     return;
1107           }
1108 
1109           if (native_ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
1110               size < SPA_MINDEVSIZE) {
1111                     (void) close(fd);
1112                     return;
1113           }
1114 #endif
1115 
1116           if ((zpool_read_label(fd, &config)) != 0) {
1117                     (void) close(fd);
1118                     (void) no_memory(rn->rn_hdl);
1119                     return;
1120           }
1121           (void) close(fd);
1122 
1123           rn->rn_config = config;
1124 }
1125 
1126 /*
1127  * Given a file descriptor, clear (zero) the label information.
1128  */
1129 int
zpool_clear_label(int fd)1130 zpool_clear_label(int fd)
1131 {
1132           struct stat64 statbuf;
1133           int l;
1134           vdev_label_t *label;
1135           uint64_t size;
1136 
1137           if (fstat64(fd, &statbuf) == -1)
1138                     return (0);
1139           size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
1140 
1141           if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
1142                     return (-1);
1143 
1144           for (l = 0; l < VDEV_LABELS; l++) {
1145                     if (pwrite64(fd, label, sizeof (vdev_label_t),
1146                         label_offset(size, l)) != sizeof (vdev_label_t)) {
1147                               free(label);
1148                               return (-1);
1149                     }
1150           }
1151 
1152           free(label);
1153           return (0);
1154 }
1155 
1156 /*
1157  * Given a list of directories to search, find all pools stored on disk.  This
1158  * includes partial pools which are not available to import.  If no args are
1159  * given (argc is 0), then the default directory (/dev/dsk) is searched.
1160  * poolname or guid (but not both) are provided by the caller when trying
1161  * to import a specific pool.
1162  */
1163 static nvlist_t *
zpool_find_import_impl(libzfs_handle_t * hdl,importargs_t * iarg)1164 zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
1165 {
1166           int i, dirs = iarg->paths;
1167           struct dirent64 *dp;
1168           char path[MAXPATHLEN];
1169           char *end, **dir = iarg->path;
1170           size_t pathleft;
1171           nvlist_t *ret = NULL;
1172           static char *default_dir = "/dev";
1173           pool_list_t pools = { 0 };
1174           pool_entry_t *pe, *penext;
1175           vdev_entry_t *ve, *venext;
1176           config_entry_t *ce, *cenext;
1177           name_entry_t *ne, *nenext;
1178           avl_tree_t slice_cache;
1179           rdsk_node_t *slice;
1180           void *cookie;
1181 
1182           if (dirs == 0) {
1183                     dirs = 1;
1184                     dir = &default_dir;
1185           }
1186 
1187           /*
1188            * Go through and read the label configuration information from every
1189            * possible device, organizing the information according to pool GUID
1190            * and toplevel GUID.
1191            */
1192           for (i = 0; i < dirs; i++) {
1193                     tpool_t *t;
1194                     char rdsk[MAXPATHLEN];
1195                     int dfd;
1196                     boolean_t config_failed = B_FALSE;
1197                     DIR *dirp;
1198 
1199                     /* use realpath to normalize the path */
1200                     if (realpath(dir[i], path) == 0) {
1201                               (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1202                                   dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
1203                               goto error;
1204                     }
1205                     end = &path[strlen(path)];
1206                     *end++ = '/';
1207                     *end = 0;
1208                     pathleft = &path[sizeof (path)] - end;
1209 
1210 #ifdef illumos
1211                     /*
1212                      * Using raw devices instead of block devices when we're
1213                      * reading the labels skips a bunch of slow operations during
1214                      * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
1215                      */
1216                     if (strcmp(path, ZFS_DISK_ROOTD) == 0)
1217                               (void) strlcpy(rdsk, ZFS_RDISK_ROOTD, sizeof (rdsk));
1218                     else
1219 #endif
1220                               (void) strlcpy(rdsk, path, sizeof (rdsk));
1221 
1222                     if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
1223                         (dirp = fdopendir(dfd)) == NULL) {
1224                               if (dfd >= 0)
1225                                         (void) close(dfd);
1226                               zfs_error_aux(hdl, strerror(errno));
1227                               (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1228                                   dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1229                                   rdsk);
1230                               goto error;
1231                     }
1232 
1233                     avl_create(&slice_cache, slice_cache_compare,
1234                         sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
1235 
1236 #ifdef __FreeBSD__
1237                     if (strcmp(rdsk, "/dev/") == 0) {
1238                               struct gmesh mesh;
1239                               struct gclass *mp;
1240                               struct ggeom *gp;
1241                               struct gprovider *pp;
1242 
1243                               errno = geom_gettree(&mesh);
1244                               if (errno != 0) {
1245                                         zfs_error_aux(hdl, strerror(errno));
1246                                         (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1247                                             dgettext(TEXT_DOMAIN, "cannot get GEOM tree"));
1248                                         goto error;
1249                               }
1250 
1251                               LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
1252                               LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
1253                                                   LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
1254                                                             slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
1255                                                             slice->rn_name = zfs_strdup(hdl, pp->lg_name);
1256                                                             slice->rn_avl = &slice_cache;
1257                                                             slice->rn_dfd = dfd;
1258                                                             slice->rn_hdl = hdl;
1259                                                             slice->rn_nozpool = B_FALSE;
1260                                                             avl_add(&slice_cache, slice);
1261                                                   }
1262                                         }
1263                               }
1264 
1265                               geom_deletetree(&mesh);
1266                               goto skipdir;
1267                     }
1268 #endif
1269 #ifdef __NetBSD__
1270                     if (strcmp(rdsk, "/dev/") == 0) {
1271                               static const char mib_name[] = "hw.disknames";
1272                               size_t len;
1273                               char *disknames, *last, *name;
1274 
1275                               if (sysctlbyname(mib_name, NULL, &len, NULL, 0) == -1) {
1276                                         zfs_error_aux(hdl, strerror(errno));
1277                                         (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1278                                             dgettext(TEXT_DOMAIN, "cannot get hw.disknames list"));
1279 
1280                                         avl_destroy(&slice_cache);
1281                                         (void) closedir(dirp);
1282                                         goto error;
1283                               }
1284                               disknames = zfs_alloc(hdl, len + 2);
1285                               (void)sysctlbyname(mib_name, disknames, &len, NULL, 0);
1286 
1287                               for ((name = strtok_r(disknames, " ", &last)); name;
1288                                   (name = strtok_r(NULL, " ", &last))) {
1289                                         slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
1290                                         slice->rn_name = zfs_strdup(hdl, name);
1291                                         slice->rn_avl = &slice_cache;
1292                                         slice->rn_dfd = dfd;
1293                                         slice->rn_hdl = hdl;
1294                                         slice->rn_nozpool = B_FALSE;
1295                                         avl_add(&slice_cache, slice);
1296                               }
1297                               free(disknames);
1298 
1299                               goto skipdir;
1300                     }
1301 #endif
1302 
1303                     /*
1304                      * This is not MT-safe, but we have no MT consumers of libzfs
1305                      */
1306                     while ((dp = readdir64(dirp)) != NULL) {
1307                               const char *name = dp->d_name;
1308                               if (name[0] == '.' &&
1309                                   (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
1310                                         continue;
1311 
1312                               slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
1313                               slice->rn_name = zfs_strdup(hdl, name);
1314                               slice->rn_avl = &slice_cache;
1315                               slice->rn_dfd = dfd;
1316                               slice->rn_hdl = hdl;
1317                               slice->rn_nozpool = B_FALSE;
1318                               avl_add(&slice_cache, slice);
1319                     }
1320 skipdir:
1321                     /*
1322                      * create a thread pool to do all of this in parallel;
1323                      * rn_nozpool is not protected, so this is racy in that
1324                      * multiple tasks could decide that the same slice can
1325                      * not hold a zpool, which is benign.  Also choose
1326                      * double the number of processors; we hold a lot of
1327                      * locks in the kernel, so going beyond this doesn't
1328                      * buy us much.
1329                      */
1330                     t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
1331                         0, NULL);
1332                     for (slice = avl_first(&slice_cache); slice;
1333                         (slice = avl_walk(&slice_cache, slice,
1334                         AVL_AFTER)))
1335                               (void) tpool_dispatch(t, zpool_open_func, slice);
1336                     tpool_wait(t);
1337                     tpool_destroy(t);
1338 
1339                     cookie = NULL;
1340                     while ((slice = avl_destroy_nodes(&slice_cache,
1341                         &cookie)) != NULL) {
1342                               if (slice->rn_config != NULL && !config_failed) {
1343                                         nvlist_t *config = slice->rn_config;
1344                                         boolean_t matched = B_TRUE;
1345 
1346                                         if (iarg->poolname != NULL) {
1347                                                   char *pname;
1348 
1349                                                   matched = nvlist_lookup_string(config,
1350                                                       ZPOOL_CONFIG_POOL_NAME,
1351                                                       &pname) == 0 &&
1352                                                       strcmp(iarg->poolname, pname) == 0;
1353                                         } else if (iarg->guid != 0) {
1354                                                   uint64_t this_guid;
1355 
1356                                                   matched = nvlist_lookup_uint64(config,
1357                                                       ZPOOL_CONFIG_POOL_GUID,
1358                                                       &this_guid) == 0 &&
1359                                                       iarg->guid == this_guid;
1360                                         }
1361                                         if (!matched) {
1362                                                   nvlist_free(config);
1363                                         } else {
1364                                                   /*
1365                                                    * use the non-raw path for the config
1366                                                    */
1367                                                   (void) strlcpy(end, slice->rn_name,
1368                                                       pathleft);
1369                                                   if (add_config(hdl, &pools, path,
1370                                                       config) != 0)
1371                                                             config_failed = B_TRUE;
1372                                         }
1373                               }
1374                               free(slice->rn_name);
1375                               free(slice);
1376                     }
1377                     avl_destroy(&slice_cache);
1378 
1379                     (void) closedir(dirp);
1380 
1381                     if (config_failed)
1382                               goto error;
1383           }
1384 
1385           ret = get_configs(hdl, &pools, iarg->can_be_active);
1386 
1387 error:
1388           for (pe = pools.pools; pe != NULL; pe = penext) {
1389                     penext = pe->pe_next;
1390                     for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
1391                               venext = ve->ve_next;
1392                               for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
1393                                         cenext = ce->ce_next;
1394                                         nvlist_free(ce->ce_config);
1395                                         free(ce);
1396                               }
1397                               free(ve);
1398                     }
1399                     free(pe);
1400           }
1401 
1402           for (ne = pools.names; ne != NULL; ne = nenext) {
1403                     nenext = ne->ne_next;
1404                     free(ne->ne_name);
1405                     free(ne);
1406           }
1407 
1408           return (ret);
1409 }
1410 
1411 nvlist_t *
zpool_find_import(libzfs_handle_t * hdl,int argc,char ** argv)1412 zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
1413 {
1414           importargs_t iarg = { 0 };
1415 
1416           iarg.paths = argc;
1417           iarg.path = argv;
1418 
1419           return (zpool_find_import_impl(hdl, &iarg));
1420 }
1421 
1422 /*
1423  * Given a cache file, return the contents as a list of importable pools.
1424  * poolname or guid (but not both) are provided by the caller when trying
1425  * to import a specific pool.
1426  */
1427 nvlist_t *
zpool_find_import_cached(libzfs_handle_t * hdl,const char * cachefile,char * poolname,uint64_t guid)1428 zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile,
1429     char *poolname, uint64_t guid)
1430 {
1431           char *buf;
1432           int fd;
1433           struct stat64 statbuf;
1434           nvlist_t *raw, *src, *dst;
1435           nvlist_t *pools;
1436           nvpair_t *elem;
1437           char *name;
1438           uint64_t this_guid;
1439           boolean_t active;
1440 
1441           verify(poolname == NULL || guid == 0);
1442 
1443           if ((fd = open(cachefile, O_RDONLY)) < 0) {
1444                     zfs_error_aux(hdl, "%s", strerror(errno));
1445                     (void) zfs_error(hdl, EZFS_BADCACHE,
1446                         dgettext(TEXT_DOMAIN, "failed to open cache file"));
1447                     return (NULL);
1448           }
1449 
1450           if (fstat64(fd, &statbuf) != 0) {
1451                     zfs_error_aux(hdl, "%s", strerror(errno));
1452                     (void) close(fd);
1453                     (void) zfs_error(hdl, EZFS_BADCACHE,
1454                         dgettext(TEXT_DOMAIN, "failed to get size of cache file"));
1455                     return (NULL);
1456           }
1457 
1458           if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) {
1459                     (void) close(fd);
1460                     return (NULL);
1461           }
1462 
1463           if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1464                     (void) close(fd);
1465                     free(buf);
1466                     (void) zfs_error(hdl, EZFS_BADCACHE,
1467                         dgettext(TEXT_DOMAIN,
1468                         "failed to read cache file contents"));
1469                     return (NULL);
1470           }
1471 
1472           (void) close(fd);
1473 
1474           if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) {
1475                     free(buf);
1476                     (void) zfs_error(hdl, EZFS_BADCACHE,
1477                         dgettext(TEXT_DOMAIN,
1478                         "invalid or corrupt cache file contents"));
1479                     return (NULL);
1480           }
1481 
1482           free(buf);
1483 
1484           /*
1485            * Go through and get the current state of the pools and refresh their
1486            * state.
1487            */
1488           if (nvlist_alloc(&pools, 0, 0) != 0) {
1489                     (void) no_memory(hdl);
1490                     nvlist_free(raw);
1491                     return (NULL);
1492           }
1493 
1494           elem = NULL;
1495           while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) {
1496                     src = fnvpair_value_nvlist(elem);
1497 
1498                     name = fnvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME);
1499                     if (poolname != NULL && strcmp(poolname, name) != 0)
1500                               continue;
1501 
1502                     this_guid = fnvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID);
1503                     if (guid != 0 && guid != this_guid)
1504                               continue;
1505 
1506                     if (pool_active(hdl, name, this_guid, &active) != 0) {
1507                               nvlist_free(raw);
1508                               nvlist_free(pools);
1509                               return (NULL);
1510                     }
1511 
1512                     if (active)
1513                               continue;
1514 
1515                     if ((dst = refresh_config(hdl, src)) == NULL) {
1516                               nvlist_free(raw);
1517                               nvlist_free(pools);
1518                               return (NULL);
1519                     }
1520 
1521                     if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) {
1522                               (void) no_memory(hdl);
1523                               nvlist_free(dst);
1524                               nvlist_free(raw);
1525                               nvlist_free(pools);
1526                               return (NULL);
1527                     }
1528                     nvlist_free(dst);
1529           }
1530 
1531           nvlist_free(raw);
1532           return (pools);
1533 }
1534 
1535 static int
name_or_guid_exists(zpool_handle_t * zhp,void * data)1536 name_or_guid_exists(zpool_handle_t *zhp, void *data)
1537 {
1538           importargs_t *import = data;
1539           int found = 0;
1540 
1541           if (import->poolname != NULL) {
1542                     char *pool_name;
1543 
1544                     verify(nvlist_lookup_string(zhp->zpool_config,
1545                         ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0);
1546                     if (strcmp(pool_name, import->poolname) == 0)
1547                               found = 1;
1548           } else {
1549                     uint64_t pool_guid;
1550 
1551                     verify(nvlist_lookup_uint64(zhp->zpool_config,
1552                         ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0);
1553                     if (pool_guid == import->guid)
1554                               found = 1;
1555           }
1556 
1557           zpool_close(zhp);
1558           return (found);
1559 }
1560 
1561 nvlist_t *
zpool_search_import(libzfs_handle_t * hdl,importargs_t * import)1562 zpool_search_import(libzfs_handle_t *hdl, importargs_t *import)
1563 {
1564           verify(import->poolname == NULL || import->guid == 0);
1565 
1566           if (import->unique)
1567                     import->exists = zpool_iter(hdl, name_or_guid_exists, import);
1568 
1569           if (import->cachefile != NULL)
1570                     return (zpool_find_import_cached(hdl, import->cachefile,
1571                         import->poolname, import->guid));
1572 
1573           return (zpool_find_import_impl(hdl, import));
1574 }
1575 
1576 boolean_t
find_guid(nvlist_t * nv,uint64_t guid)1577 find_guid(nvlist_t *nv, uint64_t guid)
1578 {
1579           uint64_t tmp;
1580           nvlist_t **child;
1581           uint_t c, children;
1582 
1583           verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
1584           if (tmp == guid)
1585                     return (B_TRUE);
1586 
1587           if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1588               &child, &children) == 0) {
1589                     for (c = 0; c < children; c++)
1590                               if (find_guid(child[c], guid))
1591                                         return (B_TRUE);
1592           }
1593 
1594           return (B_FALSE);
1595 }
1596 
1597 typedef struct aux_cbdata {
1598           const char          *cb_type;
1599           uint64_t  cb_guid;
1600           zpool_handle_t      *cb_zhp;
1601 } aux_cbdata_t;
1602 
1603 static int
find_aux(zpool_handle_t * zhp,void * data)1604 find_aux(zpool_handle_t *zhp, void *data)
1605 {
1606           aux_cbdata_t *cbp = data;
1607           nvlist_t **list;
1608           uint_t i, count;
1609           uint64_t guid;
1610           nvlist_t *nvroot;
1611 
1612           verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1613               &nvroot) == 0);
1614 
1615           if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type,
1616               &list, &count) == 0) {
1617                     for (i = 0; i < count; i++) {
1618                               verify(nvlist_lookup_uint64(list[i],
1619                                   ZPOOL_CONFIG_GUID, &guid) == 0);
1620                               if (guid == cbp->cb_guid) {
1621                                         cbp->cb_zhp = zhp;
1622                                         return (1);
1623                               }
1624                     }
1625           }
1626 
1627           zpool_close(zhp);
1628           return (0);
1629 }
1630 
1631 /*
1632  * Determines if the pool is in use.  If so, it returns true and the state of
1633  * the pool as well as the name of the pool.  Both strings are allocated and
1634  * must be freed by the caller.
1635  */
1636 int
zpool_in_use(libzfs_handle_t * hdl,int fd,pool_state_t * state,char ** namestr,boolean_t * inuse)1637 zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
1638     boolean_t *inuse)
1639 {
1640           nvlist_t *config;
1641           char *name;
1642           boolean_t ret;
1643           uint64_t guid, vdev_guid;
1644           zpool_handle_t *zhp;
1645           nvlist_t *pool_config;
1646           uint64_t stateval, isspare;
1647           aux_cbdata_t cb = { 0 };
1648           boolean_t isactive;
1649 
1650           *inuse = B_FALSE;
1651 
1652           if (zpool_read_label(fd, &config) != 0) {
1653                     (void) no_memory(hdl);
1654                     return (-1);
1655           }
1656 
1657           if (config == NULL)
1658                     return (0);
1659 
1660           verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
1661               &stateval) == 0);
1662           verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
1663               &vdev_guid) == 0);
1664 
1665           if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
1666                     verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1667                         &name) == 0);
1668                     verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1669                         &guid) == 0);
1670           }
1671 
1672           switch (stateval) {
1673           case POOL_STATE_EXPORTED:
1674                     /*
1675                      * A pool with an exported state may in fact be imported
1676                      * read-only, so check the in-core state to see if it's
1677                      * active and imported read-only.  If it is, set
1678                      * its state to active.
1679                      */
1680                     if (pool_active(hdl, name, guid, &isactive) == 0 && isactive &&
1681                         (zhp = zpool_open_canfail(hdl, name)) != NULL) {
1682                               if (zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL))
1683                                         stateval = POOL_STATE_ACTIVE;
1684 
1685                               /*
1686                                * All we needed the zpool handle for is the
1687                                * readonly prop check.
1688                                */
1689                               zpool_close(zhp);
1690                     }
1691 
1692                     ret = B_TRUE;
1693                     break;
1694 
1695           case POOL_STATE_ACTIVE:
1696                     /*
1697                      * For an active pool, we have to determine if it's really part
1698                      * of a currently active pool (in which case the pool will exist
1699                      * and the guid will be the same), or whether it's part of an
1700                      * active pool that was disconnected without being explicitly
1701                      * exported.
1702                      */
1703                     if (pool_active(hdl, name, guid, &isactive) != 0) {
1704                               nvlist_free(config);
1705                               return (-1);
1706                     }
1707 
1708                     if (isactive) {
1709                               /*
1710                                * Because the device may have been removed while
1711                                * offlined, we only report it as active if the vdev is
1712                                * still present in the config.  Otherwise, pretend like
1713                                * it's not in use.
1714                                */
1715                               if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
1716                                   (pool_config = zpool_get_config(zhp, NULL))
1717                                   != NULL) {
1718                                         nvlist_t *nvroot;
1719 
1720                                         verify(nvlist_lookup_nvlist(pool_config,
1721                                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1722                                         ret = find_guid(nvroot, vdev_guid);
1723                               } else {
1724                                         ret = B_FALSE;
1725                               }
1726 
1727                               /*
1728                                * If this is an active spare within another pool, we
1729                                * treat it like an unused hot spare.  This allows the
1730                                * user to create a pool with a hot spare that currently
1731                                * in use within another pool.  Since we return B_TRUE,
1732                                * libdiskmgt will continue to prevent generic consumers
1733                                * from using the device.
1734                                */
1735                               if (ret && nvlist_lookup_uint64(config,
1736                                   ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
1737                                         stateval = POOL_STATE_SPARE;
1738 
1739                               if (zhp != NULL)
1740                                         zpool_close(zhp);
1741                     } else {
1742                               stateval = POOL_STATE_POTENTIALLY_ACTIVE;
1743                               ret = B_TRUE;
1744                     }
1745                     break;
1746 
1747           case POOL_STATE_SPARE:
1748                     /*
1749                      * For a hot spare, it can be either definitively in use, or
1750                      * potentially active.  To determine if it's in use, we iterate
1751                      * over all pools in the system and search for one with a spare
1752                      * with a matching guid.
1753                      *
1754                      * Due to the shared nature of spares, we don't actually report
1755                      * the potentially active case as in use.  This means the user
1756                      * can freely create pools on the hot spares of exported pools,
1757                      * but to do otherwise makes the resulting code complicated, and
1758                      * we end up having to deal with this case anyway.
1759                      */
1760                     cb.cb_zhp = NULL;
1761                     cb.cb_guid = vdev_guid;
1762                     cb.cb_type = ZPOOL_CONFIG_SPARES;
1763                     if (zpool_iter(hdl, find_aux, &cb) == 1) {
1764                               name = (char *)zpool_get_name(cb.cb_zhp);
1765                               ret = B_TRUE;
1766                     } else {
1767                               ret = B_FALSE;
1768                     }
1769                     break;
1770 
1771           case POOL_STATE_L2CACHE:
1772 
1773                     /*
1774                      * Check if any pool is currently using this l2cache device.
1775                      */
1776                     cb.cb_zhp = NULL;
1777                     cb.cb_guid = vdev_guid;
1778                     cb.cb_type = ZPOOL_CONFIG_L2CACHE;
1779                     if (zpool_iter(hdl, find_aux, &cb) == 1) {
1780                               name = (char *)zpool_get_name(cb.cb_zhp);
1781                               ret = B_TRUE;
1782                     } else {
1783                               ret = B_FALSE;
1784                     }
1785                     break;
1786 
1787           default:
1788                     ret = B_FALSE;
1789           }
1790 
1791 
1792           if (ret) {
1793                     if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
1794                               if (cb.cb_zhp)
1795                                         zpool_close(cb.cb_zhp);
1796                               nvlist_free(config);
1797                               return (-1);
1798                     }
1799                     *state = (pool_state_t)stateval;
1800           }
1801 
1802           if (cb.cb_zhp)
1803                     zpool_close(cb.cb_zhp);
1804 
1805           nvlist_free(config);
1806           *inuse = ret;
1807           return (0);
1808 }
1809 
1810 #ifdef __NetBSD__
1811 /*
1812  * This needs to be at the end of the file so that we can #undef ioctl
1813  * without affecting anything else.
1814  */
1815 #undef ioctl
1816 
1817 static int
native_ioctl(int fd,unsigned long cmd,void * arg)1818 native_ioctl(int fd, unsigned long cmd, void *arg)
1819 {
1820 
1821           return ioctl(fd, cmd, arg);
1822 }
1823 #endif
1824