1 /*        $NetBSD: metadata.c,v 1.1.1.3 2009/12/02 00:26:39 haad Exp $          */
2 
3 /*
4  * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
5  * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
6  *
7  * This file is part of LVM2.
8  *
9  * This copyrighted material is made available to anyone wishing to use,
10  * modify, copy, or redistribute it subject to the terms and conditions
11  * of the GNU Lesser General Public License v.2.1.
12  *
13  * You should have received a copy of the GNU Lesser General Public License
14  * along with this program; if not, write to the Free Software Foundation,
15  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16  */
17 
18 #include "lib.h"
19 #include "device.h"
20 #include "metadata.h"
21 #include "toolcontext.h"
22 #include "lvm-string.h"
23 #include "lvm-file.h"
24 #include "lvmcache.h"
25 #include "memlock.h"
26 #include "str_list.h"
27 #include "pv_alloc.h"
28 #include "segtype.h"
29 #include "activate.h"
30 #include "display.h"
31 #include "locking.h"
32 #include "archiver.h"
33 #include "defaults.h"
34 #include "filter-persistent.h"
35 
36 #include <sys/param.h>
37 
38 /*
39  * FIXME: Check for valid handle before dereferencing field or log error?
40  */
41 #define pv_field(handle, field)                                       \
42           (((const struct physical_volume *)(handle))->field)
43 
44 static struct physical_volume *_pv_read(struct cmd_context *cmd,
45                                                   struct dm_pool *pvmem,
46                                                   const char *pv_name,
47                                                   struct dm_list *mdas,
48                                                   uint64_t *label_sector,
49                                                   int warnings, int scan_label_only);
50 
51 static struct physical_volume *_find_pv_by_name(struct cmd_context *cmd,
52                                                             const char *pv_name);
53 
54 static struct pv_list *_find_pv_in_vg(const struct volume_group *vg,
55                                               const char *pv_name);
56 
57 static struct physical_volume *_find_pv_in_vg_by_uuid(const struct volume_group *vg,
58                                                                   const struct id *id);
59 
60 static uint32_t _vg_bad_status_bits(const struct volume_group *vg,
61                                             uint32_t status);
62 
63 const char _really_init[] =
64     "Really INITIALIZE physical volume \"%s\" of volume group \"%s\" [y/n]? ";
65 
set_pe_align(struct physical_volume * pv,unsigned long data_alignment)66 unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignment)
67 {
68           if (pv->pe_align)
69                     goto out;
70 
71           if (data_alignment)
72                     pv->pe_align = data_alignment;
73           else
74                     pv->pe_align = MAX(65536UL, lvm_getpagesize()) >> SECTOR_SHIFT;
75 
76           if (!pv->dev)
77                     goto out;
78 
79           /*
80            * Align to stripe-width of underlying md device if present
81            */
82           if (find_config_tree_bool(pv->fmt->cmd, "devices/md_chunk_alignment",
83                                           DEFAULT_MD_CHUNK_ALIGNMENT))
84                     pv->pe_align = MAX(pv->pe_align,
85                                            dev_md_stripe_width(pv->fmt->cmd->sysfs_dir,
86                                                                    pv->dev));
87 
88           /*
89            * Align to topology's minimum_io_size or optimal_io_size if present
90            * - minimum_io_size - the smallest request the device can perform
91            *   w/o incurring a read-modify-write penalty (e.g. MD's chunk size)
92            * - optimal_io_size - the device's preferred unit of receiving I/O
93            *   (e.g. MD's stripe width)
94            */
95           if (find_config_tree_bool(pv->fmt->cmd,
96                                           "devices/data_alignment_detection",
97                                           DEFAULT_DATA_ALIGNMENT_DETECTION)) {
98                     pv->pe_align = MAX(pv->pe_align,
99                                            dev_minimum_io_size(pv->fmt->cmd->sysfs_dir,
100                                                                    pv->dev));
101 
102                     pv->pe_align = MAX(pv->pe_align,
103                                            dev_optimal_io_size(pv->fmt->cmd->sysfs_dir,
104                                                                    pv->dev));
105           }
106 
107           log_very_verbose("%s: Setting PE alignment to %lu sectors.",
108                                dev_name(pv->dev), pv->pe_align);
109 
110 out:
111           return pv->pe_align;
112 }
113 
set_pe_align_offset(struct physical_volume * pv,unsigned long data_alignment_offset)114 unsigned long set_pe_align_offset(struct physical_volume *pv,
115                                           unsigned long data_alignment_offset)
116 {
117           if (pv->pe_align_offset)
118                     goto out;
119 
120           if (data_alignment_offset)
121                     pv->pe_align_offset = data_alignment_offset;
122 
123           if (!pv->dev)
124                     goto out;
125 
126           if (find_config_tree_bool(pv->fmt->cmd,
127                                           "devices/data_alignment_offset_detection",
128                                           DEFAULT_DATA_ALIGNMENT_OFFSET_DETECTION))
129                     pv->pe_align_offset =
130                               MAX(pv->pe_align_offset,
131                                   dev_alignment_offset(pv->fmt->cmd->sysfs_dir,
132                                                              pv->dev));
133 
134           log_very_verbose("%s: Setting PE alignment offset to %lu sectors.",
135                                dev_name(pv->dev), pv->pe_align_offset);
136 
137 out:
138           return pv->pe_align_offset;
139 }
140 
141 /**
142  * add_pv_to_vg - Add a physical volume to a volume group
143  * @vg - volume group to add to
144  * @pv_name - name of the pv (to be removed)
145  * @pv - physical volume to add to volume group
146  *
147  * Returns:
148  *  0 - failure
149  *  1 - success
150  * FIXME: remove pv_name - obtain safely from pv
151  */
add_pv_to_vg(struct volume_group * vg,const char * pv_name,struct physical_volume * pv)152 int add_pv_to_vg(struct volume_group *vg, const char *pv_name,
153                      struct physical_volume *pv)
154 {
155           struct pv_list *pvl;
156           struct format_instance *fid = vg->fid;
157           struct dm_pool *mem = vg->vgmem;
158 
159           log_verbose("Adding physical volume '%s' to volume group '%s'",
160                         pv_name, vg->name);
161 
162           if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) {
163                     log_error("pv_list allocation for '%s' failed", pv_name);
164                     return 0;
165           }
166 
167           if (!is_orphan_vg(pv->vg_name)) {
168                     log_error("Physical volume '%s' is already in volume group "
169                                 "'%s'", pv_name, pv->vg_name);
170                     return 0;
171           }
172 
173           if (pv->fmt != fid->fmt) {
174                     log_error("Physical volume %s is of different format type (%s)",
175                                 pv_name, pv->fmt->name);
176                     return 0;
177           }
178 
179           /* Ensure PV doesn't depend on another PV already in the VG */
180           if (pv_uses_vg(pv, vg)) {
181                     log_error("Physical volume %s might be constructed from same "
182                                 "volume group %s", pv_name, vg->name);
183                     return 0;
184           }
185 
186           if (!(pv->vg_name = dm_pool_strdup(mem, vg->name))) {
187                     log_error("vg->name allocation failed for '%s'", pv_name);
188                     return 0;
189           }
190 
191           memcpy(&pv->vgid, &vg->id, sizeof(vg->id));
192 
193           /* Units of 512-byte sectors */
194           pv->pe_size = vg->extent_size;
195 
196           /*
197            * pe_count must always be calculated by pv_setup
198            */
199           pv->pe_alloc_count = 0;
200 
201           if (!fid->fmt->ops->pv_setup(fid->fmt, UINT64_C(0), 0,
202                                              vg->extent_size, 0, 0, 0UL, UINT64_C(0),
203                                              &fid->metadata_areas, pv, vg)) {
204                     log_error("Format-specific setup of physical volume '%s' "
205                                 "failed.", pv_name);
206                     return 0;
207           }
208 
209           if (_find_pv_in_vg(vg, pv_name)) {
210                     log_error("Physical volume '%s' listed more than once.",
211                                 pv_name);
212                     return 0;
213           }
214 
215           if (vg->pv_count && (vg->pv_count == vg->max_pv)) {
216                     log_error("No space for '%s' - volume group '%s' "
217                                 "holds max %d physical volume(s).", pv_name,
218                                 vg->name, vg->max_pv);
219                     return 0;
220           }
221 
222           if (!alloc_pv_segment_whole_pv(mem, pv))
223                     return_0;
224 
225           pvl->pv = pv;
226           dm_list_add(&vg->pvs, &pvl->list);
227 
228           if ((uint64_t) vg->extent_count + pv->pe_count > UINT32_MAX) {
229                     log_error("Unable to add %s to %s: new extent count (%"
230                                 PRIu64 ") exceeds limit (%" PRIu32 ").",
231                                 pv_name, vg->name,
232                                 (uint64_t) vg->extent_count + pv->pe_count,
233                                 UINT32_MAX);
234                     return 0;
235           }
236 
237           vg->pv_count++;
238           vg->extent_count += pv->pe_count;
239           vg->free_count += pv->pe_count;
240 
241           return 1;
242 }
243 
_copy_pv(struct dm_pool * pvmem,struct physical_volume * pv_to,struct physical_volume * pv_from)244 static int _copy_pv(struct dm_pool *pvmem,
245                         struct physical_volume *pv_to,
246                         struct physical_volume *pv_from)
247 {
248           memcpy(pv_to, pv_from, sizeof(*pv_to));
249 
250           if (!(pv_to->vg_name = dm_pool_strdup(pvmem, pv_from->vg_name)))
251                     return_0;
252 
253           if (!str_list_dup(pvmem, &pv_to->tags, &pv_from->tags))
254                     return_0;
255 
256           if (!peg_dup(pvmem, &pv_to->segments, &pv_from->segments))
257                     return_0;
258 
259           return 1;
260 }
261 
_copy_pvl(struct dm_pool * pvmem,struct pv_list * pvl_from)262 static struct pv_list *_copy_pvl(struct dm_pool *pvmem, struct pv_list *pvl_from)
263 {
264           struct pv_list *pvl_to = NULL;
265 
266           if (!(pvl_to = dm_pool_zalloc(pvmem, sizeof(*pvl_to))))
267                     return_NULL;
268 
269           if (!(pvl_to->pv = dm_pool_alloc(pvmem, sizeof(*pvl_to->pv))))
270                     goto_bad;
271 
272           if(!_copy_pv(pvmem, pvl_to->pv, pvl_from->pv))
273                     goto_bad;
274 
275           return pvl_to;
276 bad:
277           dm_pool_free(pvmem, pvl_to);
278           return NULL;
279 }
280 
get_pv_from_vg_by_id(const struct format_type * fmt,const char * vg_name,const char * vgid,const char * pvid,struct physical_volume * pv)281 int get_pv_from_vg_by_id(const struct format_type *fmt, const char *vg_name,
282                                const char *vgid, const char *pvid,
283                                struct physical_volume *pv)
284 {
285           struct volume_group *vg;
286           struct pv_list *pvl;
287           int r = 0, consistent = 0;
288 
289           if (!(vg = vg_read_internal(fmt->cmd, vg_name, vgid, &consistent))) {
290                     log_error("get_pv_from_vg_by_id: vg_read_internal failed to read VG %s",
291                                 vg_name);
292                     return 0;
293           }
294 
295           if (!consistent)
296                     log_warn("WARNING: Volume group %s is not consistent",
297                                vg_name);
298 
299           dm_list_iterate_items(pvl, &vg->pvs) {
300                     if (id_equal(&pvl->pv->id, (const struct id *) pvid)) {
301                               if (!_copy_pv(fmt->cmd->mem, pv, pvl->pv)) {
302                                         log_error("internal PV duplication failed");
303                                         r = 0;
304                                         goto out;
305                               }
306                               r = 1;
307                               goto out;
308                     }
309           }
310 out:
311           vg_release(vg);
312           return r;
313 }
314 
move_pv(struct volume_group * vg_from,struct volume_group * vg_to,const char * pv_name)315 int move_pv(struct volume_group *vg_from, struct volume_group *vg_to,
316               const char *pv_name)
317 {
318           struct physical_volume *pv;
319           struct pv_list *pvl;
320 
321           /* FIXME: handle tags */
322           if (!(pvl = find_pv_in_vg(vg_from, pv_name))) {
323                     log_error("Physical volume %s not in volume group %s",
324                                 pv_name, vg_from->name);
325                     return 0;
326           }
327 
328           if (_vg_bad_status_bits(vg_from, RESIZEABLE_VG) ||
329               _vg_bad_status_bits(vg_to, RESIZEABLE_VG))
330                     return 0;
331 
332           dm_list_move(&vg_to->pvs, &pvl->list);
333 
334           vg_from->pv_count--;
335           vg_to->pv_count++;
336 
337           pv = pvl->pv;
338 
339           vg_from->extent_count -= pv_pe_count(pv);
340           vg_to->extent_count += pv_pe_count(pv);
341 
342           vg_from->free_count -= pv_pe_count(pv) - pv_pe_alloc_count(pv);
343           vg_to->free_count += pv_pe_count(pv) - pv_pe_alloc_count(pv);
344 
345           return 1;
346 }
347 
move_pvs_used_by_lv(struct volume_group * vg_from,struct volume_group * vg_to,const char * lv_name)348 int move_pvs_used_by_lv(struct volume_group *vg_from,
349                               struct volume_group *vg_to,
350                               const char *lv_name)
351 {
352           struct lv_segment *lvseg;
353           unsigned s;
354           struct lv_list *lvl;
355           struct logical_volume *lv;
356 
357           /* FIXME: handle tags */
358           if (!(lvl = find_lv_in_vg(vg_from, lv_name))) {
359                     log_error("Logical volume %s not in volume group %s",
360                                 lv_name, vg_from->name);
361                     return 0;
362           }
363 
364           if (_vg_bad_status_bits(vg_from, RESIZEABLE_VG) ||
365               _vg_bad_status_bits(vg_to, RESIZEABLE_VG))
366                     return 0;
367 
368           dm_list_iterate_items(lvseg, &lvl->lv->segments) {
369                     if (lvseg->log_lv)
370                               if (!move_pvs_used_by_lv(vg_from, vg_to,
371                                                                  lvseg->log_lv->name))
372                                         return_0;
373                     for (s = 0; s < lvseg->area_count; s++) {
374                               if (seg_type(lvseg, s) == AREA_PV) {
375                                         if (!move_pv(vg_from, vg_to,
376                                                         pv_dev_name(seg_pv(lvseg, s))))
377                                                   return_0;
378                               } else if (seg_type(lvseg, s) == AREA_LV) {
379                                         lv = seg_lv(lvseg, s);
380                                         if (!move_pvs_used_by_lv(vg_from, vg_to,
381                                                                            lv->name))
382                                             return_0;
383                               }
384                     }
385           }
386           return 1;
387 }
388 
validate_new_vg_name(struct cmd_context * cmd,const char * vg_name)389 static int validate_new_vg_name(struct cmd_context *cmd, const char *vg_name)
390 {
391           char vg_path[PATH_MAX];
392 
393           if (!validate_name(vg_name))
394                     return_0;
395 
396           snprintf(vg_path, PATH_MAX, "%s%s", cmd->dev_dir, vg_name);
397           if (path_exists(vg_path)) {
398                     log_error("%s: already exists in filesystem", vg_path);
399                     return 0;
400           }
401 
402           return 1;
403 }
404 
validate_vg_rename_params(struct cmd_context * cmd,const char * vg_name_old,const char * vg_name_new)405 int validate_vg_rename_params(struct cmd_context *cmd,
406                                     const char *vg_name_old,
407                                     const char *vg_name_new)
408 {
409           unsigned length;
410           char *dev_dir;
411 
412           dev_dir = cmd->dev_dir;
413           length = strlen(dev_dir);
414 
415           /* Check sanity of new name */
416           if (strlen(vg_name_new) > NAME_LEN - length - 2) {
417                     log_error("New volume group path exceeds maximum length "
418                                 "of %d!", NAME_LEN - length - 2);
419                     return 0;
420           }
421 
422           if (!validate_new_vg_name(cmd, vg_name_new)) {
423                     log_error("New volume group name \"%s\" is invalid",
424                                 vg_name_new);
425                     return 0;
426           }
427 
428           if (!strcmp(vg_name_old, vg_name_new)) {
429                     log_error("Old and new volume group names must differ");
430                     return 0;
431           }
432 
433           return 1;
434 }
435 
vg_rename(struct cmd_context * cmd,struct volume_group * vg,const char * new_name)436 int vg_rename(struct cmd_context *cmd, struct volume_group *vg,
437                 const char *new_name)
438 {
439           struct dm_pool *mem = vg->vgmem;
440           struct pv_list *pvl;
441 
442           if (!(vg->name = dm_pool_strdup(mem, new_name))) {
443                     log_error("vg->name allocation failed for '%s'", new_name);
444                     return 0;
445           }
446 
447           dm_list_iterate_items(pvl, &vg->pvs) {
448                     if (!(pvl->pv->vg_name = dm_pool_strdup(mem, new_name))) {
449                               log_error("pv->vg_name allocation failed for '%s'",
450                                           pv_dev_name(pvl->pv));
451                               return 0;
452                     }
453           }
454 
455           return 1;
456 }
457 
remove_lvs_in_vg(struct cmd_context * cmd,struct volume_group * vg,force_t force)458 int remove_lvs_in_vg(struct cmd_context *cmd,
459                          struct volume_group *vg,
460                          force_t force)
461 {
462           struct dm_list *lst;
463           struct lv_list *lvl;
464 
465           while ((lst = dm_list_first(&vg->lvs))) {
466                     lvl = dm_list_item(lst, struct lv_list);
467                     if (!lv_remove_with_dependencies(cmd, lvl->lv, force))
468                         return 0;
469           }
470 
471           return 1;
472 }
473 
vg_remove_check(struct volume_group * vg)474 int vg_remove_check(struct volume_group *vg)
475 {
476           unsigned lv_count;
477           struct pv_list *pvl, *tpvl;
478 
479           if (vg_read_error(vg) || vg_missing_pv_count(vg)) {
480                     log_error("Volume group \"%s\" not found, is inconsistent "
481                                 "or has PVs missing.", vg ? vg->name : "");
482                     log_error("Consider vgreduce --removemissing if metadata "
483                                 "is inconsistent.");
484                     return 0;
485           }
486 
487           if (!vg_check_status(vg, EXPORTED_VG))
488                     return 0;
489 
490           lv_count = vg_visible_lvs(vg);
491 
492           if (lv_count) {
493                     log_error("Volume group \"%s\" still contains %u "
494                                 "logical volume(s)", vg->name, lv_count);
495                     return 0;
496           }
497 
498           if (!archive(vg))
499                     return 0;
500 
501           dm_list_iterate_items_safe(pvl, tpvl, &vg->pvs) {
502                     dm_list_del(&pvl->list);
503                     dm_list_add(&vg->removed_pvs, &pvl->list);
504           }
505           return 1;
506 }
507 
vg_remove(struct volume_group * vg)508 int vg_remove(struct volume_group *vg)
509 {
510           struct physical_volume *pv;
511           struct pv_list *pvl;
512           int ret = 1;
513 
514           if (!lock_vol(vg->cmd, VG_ORPHANS, LCK_VG_WRITE)) {
515                     log_error("Can't get lock for orphan PVs");
516                     return 0;
517           }
518 
519           if (!vg_remove_mdas(vg)) {
520                     log_error("vg_remove_mdas %s failed", vg->name);
521                     unlock_vg(vg->cmd, VG_ORPHANS);
522                     return 0;
523           }
524 
525           /* init physical volumes */
526           dm_list_iterate_items(pvl, &vg->removed_pvs) {
527                     pv = pvl->pv;
528                     log_verbose("Removing physical volume \"%s\" from "
529                                   "volume group \"%s\"", pv_dev_name(pv), vg->name);
530                     pv->vg_name = vg->fid->fmt->orphan_vg_name;
531                     pv->status = ALLOCATABLE_PV;
532 
533                     if (!dev_get_size(pv_dev(pv), &pv->size)) {
534                               log_error("%s: Couldn't get size.", pv_dev_name(pv));
535                               ret = 0;
536                               continue;
537                     }
538 
539                     /* FIXME Write to same sector label was read from */
540                     if (!pv_write(vg->cmd, pv, NULL, INT64_C(-1))) {
541                               log_error("Failed to remove physical volume \"%s\""
542                                           " from volume group \"%s\"",
543                                           pv_dev_name(pv), vg->name);
544                               ret = 0;
545                     }
546           }
547 
548           backup_remove(vg->cmd, vg->name);
549 
550           if (ret)
551                     log_print("Volume group \"%s\" successfully removed", vg->name);
552           else
553                     log_error("Volume group \"%s\" not properly removed", vg->name);
554 
555           unlock_vg(vg->cmd, VG_ORPHANS);
556           return ret;
557 }
558 
559 /*
560  * Extend a VG by a single PV / device path
561  *
562  * Parameters:
563  * - vg: handle of volume group to extend by 'pv_name'
564  * - pv_name: device path of PV to add to VG
565  * - pp: parameters to pass to implicit pvcreate; if NULL, do not pvcreate
566  *
567  */
vg_extend_single_pv(struct volume_group * vg,char * pv_name,struct pvcreate_params * pp)568 static int vg_extend_single_pv(struct volume_group *vg, char *pv_name,
569                                      struct pvcreate_params *pp)
570 {
571           struct physical_volume *pv;
572 
573           pv = pv_by_path(vg->fid->fmt->cmd, pv_name);
574           if (!pv && !pp) {
575                     log_error("%s not identified as an existing "
576                                 "physical volume", pv_name);
577                     return 0;
578           } else if (!pv && pp) {
579                     pv = pvcreate_single(vg->cmd, pv_name, pp);
580                     if (!pv)
581                               return 0;
582           }
583           if (!add_pv_to_vg(vg, pv_name, pv))
584                     return 0;
585           return 1;
586 }
587 
588 /*
589  * Extend a VG by a single PV / device path
590  *
591  * Parameters:
592  * - vg: handle of volume group to extend by 'pv_name'
593  * - pv_count: count of device paths of PVs
594  * - pv_names: device paths of PVs to add to VG
595  * - pp: parameters to pass to implicit pvcreate; if NULL, do not pvcreate
596  *
597  */
vg_extend(struct volume_group * vg,int pv_count,char ** pv_names,struct pvcreate_params * pp)598 int vg_extend(struct volume_group *vg, int pv_count, char **pv_names,
599                 struct pvcreate_params *pp)
600 {
601           int i;
602 
603           if (_vg_bad_status_bits(vg, RESIZEABLE_VG))
604                     return 0;
605 
606           /* attach each pv */
607           for (i = 0; i < pv_count; i++) {
608                     if (!vg_extend_single_pv(vg, pv_names[i], pp))
609                               goto bad;
610           }
611 
612 /* FIXME Decide whether to initialise and add new mdahs to format instance */
613 
614           return 1;
615 
616       bad:
617           log_error("Unable to add physical volume '%s' to "
618                       "volume group '%s'.", pv_names[i], vg->name);
619           return 0;
620 }
621 
622 /* FIXME: use this inside vgreduce_single? */
vg_reduce(struct volume_group * vg,char * pv_name)623 int vg_reduce(struct volume_group *vg, char *pv_name)
624 {
625           struct physical_volume *pv;
626           struct pv_list *pvl;
627 
628           if (_vg_bad_status_bits(vg, RESIZEABLE_VG))
629                     return 0;
630 
631           if (!archive(vg))
632                     goto bad;
633 
634           /* remove each pv */
635           if (!(pvl = find_pv_in_vg(vg, pv_name))) {
636                     log_error("Physical volume %s not in volume group %s.",
637                                 pv_name, vg->name);
638                     goto bad;
639           }
640 
641           pv = pvl->pv;
642 
643           if (pv_pe_alloc_count(pv)) {
644                     log_error("Physical volume %s still in use.",
645                                 pv_name);
646                     goto bad;
647           }
648 
649           if (!dev_get_size(pv_dev(pv), &pv->size)) {
650                     log_error("%s: Couldn't get size.", pv_name);
651                     goto bad;
652           }
653 
654           vg->pv_count--;
655           vg->free_count -= pv_pe_count(pv) - pv_pe_alloc_count(pv);
656           vg->extent_count -= pv_pe_count(pv);
657 
658           /* add pv to the remove_pvs list */
659           dm_list_del(&pvl->list);
660           dm_list_add(&vg->removed_pvs, &pvl->list);
661 
662           return 1;
663 
664       bad:
665           log_error("Unable to remove physical volume '%s' from "
666                       "volume group '%s'.", pv_name, vg->name);
667           return 0;
668 }
669 
strip_dir(const char * vg_name,const char * dev_dir)670 const char *strip_dir(const char *vg_name, const char *dev_dir)
671 {
672           size_t len = strlen(dev_dir);
673           if (!strncmp(vg_name, dev_dir, len))
674                     vg_name += len;
675 
676           return vg_name;
677 }
678 
679 /*
680  * Validate parameters to vg_create() before calling.
681  * FIXME: Move inside vg_create library function.
682  * FIXME: Change vgcreate_params struct to individual gets/sets
683  */
vgcreate_params_validate(struct cmd_context * cmd,struct vgcreate_params * vp)684 int vgcreate_params_validate(struct cmd_context *cmd,
685                                    struct vgcreate_params *vp)
686 {
687           if (!validate_new_vg_name(cmd, vp->vg_name)) {
688                     log_error("New volume group name \"%s\" is invalid",
689                                 vp->vg_name);
690                     return 1;
691           }
692 
693           if (vp->alloc == ALLOC_INHERIT) {
694                     log_error("Volume Group allocation policy cannot inherit "
695                                 "from anything");
696                     return 1;
697           }
698 
699           if (!vp->extent_size) {
700                     log_error("Physical extent size may not be zero");
701                     return 1;
702           }
703 
704           if (!(cmd->fmt->features & FMT_UNLIMITED_VOLS)) {
705                     if (!vp->max_lv)
706                               vp->max_lv = 255;
707                     if (!vp->max_pv)
708                               vp->max_pv = 255;
709                     if (vp->max_lv > 255 || vp->max_pv > 255) {
710                               log_error("Number of volumes may not exceed 255");
711                               return 1;
712                     }
713           }
714 
715           return 0;
716 }
717 
718 /*
719  * Create a (struct volume_group) volume group handle from a struct volume_group pointer and a
720  * possible failure code or zero for success.
721  */
_vg_make_handle(struct cmd_context * cmd,struct volume_group * vg,uint32_t failure)722 static struct volume_group *_vg_make_handle(struct cmd_context *cmd,
723                                    struct volume_group *vg,
724                                    uint32_t failure)
725 {
726           struct dm_pool *vgmem;
727 
728           if (!vg) {
729                     if (!(vgmem = dm_pool_create("lvm2 vg_handle", VG_MEMPOOL_CHUNK)) ||
730                         !(vg = dm_pool_zalloc(vgmem, sizeof(*vg)))) {
731                               log_error("Error allocating vg handle.");
732                               if (vgmem)
733                                         dm_pool_destroy(vgmem);
734                               return_NULL;
735                     }
736                     vg->vgmem = vgmem;
737           }
738 
739           vg->read_status = failure;
740 
741           return (struct volume_group *)vg;
742 }
743 
lv_has_unknown_segments(const struct logical_volume * lv)744 int lv_has_unknown_segments(const struct logical_volume *lv)
745 {
746           struct lv_segment *seg;
747           /* foreach segment */
748           dm_list_iterate_items(seg, &lv->segments)
749                     if (seg_unknown(seg))
750                               return 1;
751           return 0;
752 }
753 
vg_has_unknown_segments(const struct volume_group * vg)754 int vg_has_unknown_segments(const struct volume_group *vg)
755 {
756           struct lv_list *lvl;
757 
758           /* foreach LV */
759           dm_list_iterate_items(lvl, &vg->lvs)
760                     if (lv_has_unknown_segments(lvl->lv))
761                               return 1;
762           return 0;
763 }
764 
765 /*
766  * Create a VG with default parameters.
767  * Returns:
768  * - struct volume_group* with SUCCESS code: VG structure created
769  * - NULL or struct volume_group* with FAILED_* code: error creating VG structure
770  * Use vg_read_error() to determine success or failure.
771  * FIXME: cleanup usage of _vg_make_handle()
772  */
vg_create(struct cmd_context * cmd,const char * vg_name)773 struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name)
774 {
775           struct volume_group *vg;
776           int consistent = 0;
777           struct dm_pool *mem;
778           uint32_t rc;
779 
780           if (!validate_name(vg_name)) {
781                     log_error("Invalid vg name %s", vg_name);
782                     /* FIXME: use _vg_make_handle() w/proper error code */
783                     return NULL;
784           }
785 
786           rc = vg_lock_newname(cmd, vg_name);
787           if (rc != SUCCESS)
788                     /* NOTE: let caller decide - this may be check for existence */
789                     return _vg_make_handle(cmd, NULL, rc);
790 
791           /* FIXME: Is this vg_read_internal necessary? Move it inside
792              vg_lock_newname? */
793           /* is this vg name already in use ? */
794           if ((vg = vg_read_internal(cmd, vg_name, NULL, &consistent))) {
795                     log_error("A volume group called '%s' already exists.", vg_name);
796                     unlock_and_release_vg(cmd, vg, vg_name);
797                     return _vg_make_handle(cmd, NULL, FAILED_EXIST);
798           }
799 
800           if (!(mem = dm_pool_create("lvm2 vg_create", VG_MEMPOOL_CHUNK)))
801                     goto_bad;
802 
803           if (!(vg = dm_pool_zalloc(mem, sizeof(*vg))))
804                     goto_bad;
805 
806           if (!id_create(&vg->id)) {
807                     log_error("Couldn't create uuid for volume group '%s'.",
808                                 vg_name);
809                     goto bad;
810           }
811 
812           /* Strip dev_dir if present */
813           vg_name = strip_dir(vg_name, cmd->dev_dir);
814 
815           vg->vgmem = mem;
816           vg->cmd = cmd;
817 
818           if (!(vg->name = dm_pool_strdup(mem, vg_name)))
819                     goto_bad;
820 
821           vg->seqno = 0;
822 
823           vg->status = (RESIZEABLE_VG | LVM_READ | LVM_WRITE);
824           if (!(vg->system_id = dm_pool_alloc(mem, NAME_LEN)))
825                     goto_bad;
826 
827           *vg->system_id = '\0';
828 
829           vg->extent_size = DEFAULT_EXTENT_SIZE * 2;
830           vg->extent_count = 0;
831           vg->free_count = 0;
832 
833           vg->max_lv = DEFAULT_MAX_LV;
834           vg->max_pv = DEFAULT_MAX_PV;
835 
836           vg->alloc = DEFAULT_ALLOC_POLICY;
837 
838           vg->pv_count = 0;
839           dm_list_init(&vg->pvs);
840 
841           dm_list_init(&vg->lvs);
842 
843           dm_list_init(&vg->tags);
844 
845           /* initialize removed_pvs list */
846           dm_list_init(&vg->removed_pvs);
847 
848           if (!(vg->fid = cmd->fmt->ops->create_instance(cmd->fmt, vg_name,
849                                                                    NULL, NULL))) {
850                     log_error("Failed to create format instance");
851                     goto bad;
852           }
853 
854           if (vg->fid->fmt->ops->vg_setup &&
855               !vg->fid->fmt->ops->vg_setup(vg->fid, vg)) {
856                     log_error("Format specific setup of volume group '%s' failed.",
857                                 vg_name);
858                     goto bad;
859           }
860           return _vg_make_handle(cmd, vg, SUCCESS);
861 
862 bad:
863           unlock_and_release_vg(cmd, vg, vg_name);
864           /* FIXME: use _vg_make_handle() w/proper error code */
865           return NULL;
866 }
867 
extents_from_size(struct cmd_context * cmd,uint64_t size,uint32_t extent_size)868 uint64_t extents_from_size(struct cmd_context *cmd, uint64_t size,
869                                  uint32_t extent_size)
870 {
871           if (size % extent_size) {
872                     size += extent_size - size % extent_size;
873                     log_print("Rounding up size to full physical extent %s",
874                                 display_size(cmd, size));
875           }
876 
877           if (size > (uint64_t) UINT32_MAX * extent_size) {
878                     log_error("Volume too large (%s) for extent size %s. "
879                                 "Upper limit is %s.",
880                                 display_size(cmd, size),
881                                 display_size(cmd, (uint64_t) extent_size),
882                                 display_size(cmd, (uint64_t) UINT32_MAX *
883                                                extent_size));
884                     return 0;
885           }
886 
887           return (uint64_t) size / extent_size;
888 }
889 
_recalc_extents(uint32_t * extents,const char * desc1,const char * desc2,uint32_t old_size,uint32_t new_size)890 static int _recalc_extents(uint32_t *extents, const char *desc1,
891                                  const char *desc2, uint32_t old_size,
892                                  uint32_t new_size)
893 {
894           uint64_t size = (uint64_t) old_size * (*extents);
895 
896           if (size % new_size) {
897                     log_error("New size %" PRIu64 " for %s%s not an exact number "
898                                 "of new extents.", size, desc1, desc2);
899                     return 0;
900           }
901 
902           size /= new_size;
903 
904           if (size > UINT32_MAX) {
905                     log_error("New extent count %" PRIu64 " for %s%s exceeds "
906                                 "32 bits.", size, desc1, desc2);
907                     return 0;
908           }
909 
910           *extents = (uint32_t) size;
911 
912           return 1;
913 }
914 
vg_set_extent_size(struct volume_group * vg,uint32_t new_size)915 int vg_set_extent_size(struct volume_group *vg, uint32_t new_size)
916 {
917           uint32_t old_size = vg->extent_size;
918           struct pv_list *pvl;
919           struct lv_list *lvl;
920           struct physical_volume *pv;
921           struct logical_volume *lv;
922           struct lv_segment *seg;
923           struct pv_segment *pvseg;
924           uint32_t s;
925 
926           if (!vg_is_resizeable(vg)) {
927                     log_error("Volume group \"%s\" must be resizeable "
928                                 "to change PE size", vg->name);
929                     return 0;
930           }
931 
932           if (!new_size) {
933                     log_error("Physical extent size may not be zero");
934                     return 0;
935           }
936 
937           if (new_size == vg->extent_size)
938                     return 1;
939 
940           if (new_size & (new_size - 1)) {
941                     log_error("Physical extent size must be a power of 2.");
942                     return 0;
943           }
944 
945           if (new_size > vg->extent_size) {
946                     if ((uint64_t) vg_size(vg) % new_size) {
947                               /* FIXME Adjust used PV sizes instead */
948                               log_error("New extent size is not a perfect fit");
949                               return 0;
950                     }
951           }
952 
953           vg->extent_size = new_size;
954 
955           if (vg->fid->fmt->ops->vg_setup &&
956               !vg->fid->fmt->ops->vg_setup(vg->fid, vg))
957                     return_0;
958 
959           if (!_recalc_extents(&vg->extent_count, vg->name, "", old_size,
960                                    new_size))
961                     return_0;
962 
963           if (!_recalc_extents(&vg->free_count, vg->name, " free space",
964                                    old_size, new_size))
965                     return_0;
966 
967           /* foreach PV */
968           dm_list_iterate_items(pvl, &vg->pvs) {
969                     pv = pvl->pv;
970 
971                     pv->pe_size = new_size;
972                     if (!_recalc_extents(&pv->pe_count, pv_dev_name(pv), "",
973                                              old_size, new_size))
974                               return_0;
975 
976                     if (!_recalc_extents(&pv->pe_alloc_count, pv_dev_name(pv),
977                                              " allocated space", old_size, new_size))
978                               return_0;
979 
980                     /* foreach free PV Segment */
981                     dm_list_iterate_items(pvseg, &pv->segments) {
982                               if (pvseg_is_allocated(pvseg))
983                                         continue;
984 
985                               if (!_recalc_extents(&pvseg->pe, pv_dev_name(pv),
986                                                        " PV segment start", old_size,
987                                                        new_size))
988                                         return_0;
989                               if (!_recalc_extents(&pvseg->len, pv_dev_name(pv),
990                                                        " PV segment length", old_size,
991                                                        new_size))
992                                         return_0;
993                     }
994           }
995 
996           /* foreach LV */
997           dm_list_iterate_items(lvl, &vg->lvs) {
998                     lv = lvl->lv;
999 
1000                     if (!_recalc_extents(&lv->le_count, lv->name, "", old_size,
1001                                              new_size))
1002                               return_0;
1003 
1004                     dm_list_iterate_items(seg, &lv->segments) {
1005                               if (!_recalc_extents(&seg->le, lv->name,
1006                                                        " segment start", old_size,
1007                                                        new_size))
1008                                         return_0;
1009 
1010                               if (!_recalc_extents(&seg->len, lv->name,
1011                                                        " segment length", old_size,
1012                                                        new_size))
1013                                         return_0;
1014 
1015                               if (!_recalc_extents(&seg->area_len, lv->name,
1016                                                        " area length", old_size,
1017                                                        new_size))
1018                                         return_0;
1019 
1020                               if (!_recalc_extents(&seg->extents_copied, lv->name,
1021                                                        " extents moved", old_size,
1022                                                        new_size))
1023                                         return_0;
1024 
1025                               /* foreach area */
1026                               for (s = 0; s < seg->area_count; s++) {
1027                                         switch (seg_type(seg, s)) {
1028                                         case AREA_PV:
1029                                                   if (!_recalc_extents
1030                                                       (&seg_pe(seg, s),
1031                                                        lv->name,
1032                                                        " pvseg start", old_size,
1033                                                        new_size))
1034                                                             return_0;
1035                                                   if (!_recalc_extents
1036                                                       (&seg_pvseg(seg, s)->len,
1037                                                        lv->name,
1038                                                        " pvseg length", old_size,
1039                                                        new_size))
1040                                                             return_0;
1041                                                   break;
1042                                         case AREA_LV:
1043                                                   if (!_recalc_extents
1044                                                       (&seg_le(seg, s), lv->name,
1045                                                        " area start", old_size,
1046                                                        new_size))
1047                                                             return_0;
1048                                                   break;
1049                                         case AREA_UNASSIGNED:
1050                                                   log_error("Unassigned area %u found in "
1051                                                               "segment", s);
1052                                                   return 0;
1053                                         }
1054                               }
1055                     }
1056 
1057           }
1058 
1059           return 1;
1060 }
1061 
vg_set_max_lv(struct volume_group * vg,uint32_t max_lv)1062 int vg_set_max_lv(struct volume_group *vg, uint32_t max_lv)
1063 {
1064           if (!vg_is_resizeable(vg)) {
1065                     log_error("Volume group \"%s\" must be resizeable "
1066                                 "to change MaxLogicalVolume", vg->name);
1067                     return 0;
1068           }
1069 
1070           if (!(vg->fid->fmt->features & FMT_UNLIMITED_VOLS)) {
1071                     if (!max_lv)
1072                               max_lv = 255;
1073                     else if (max_lv > 255) {
1074                               log_error("MaxLogicalVolume limit is 255");
1075                               return 0;
1076                     }
1077           }
1078 
1079           if (max_lv && max_lv < vg_visible_lvs(vg)) {
1080                     log_error("MaxLogicalVolume is less than the current number "
1081                                 "%d of LVs for %s", vg_visible_lvs(vg),
1082                                 vg->name);
1083                     return 0;
1084           }
1085           vg->max_lv = max_lv;
1086 
1087           return 1;
1088 }
1089 
vg_set_max_pv(struct volume_group * vg,uint32_t max_pv)1090 int vg_set_max_pv(struct volume_group *vg, uint32_t max_pv)
1091 {
1092           if (!vg_is_resizeable(vg)) {
1093                     log_error("Volume group \"%s\" must be resizeable "
1094                                 "to change MaxPhysicalVolumes", vg->name);
1095                     return 0;
1096           }
1097 
1098           if (!(vg->fid->fmt->features & FMT_UNLIMITED_VOLS)) {
1099                     if (!max_pv)
1100                               max_pv = 255;
1101                     else if (max_pv > 255) {
1102                               log_error("MaxPhysicalVolume limit is 255");
1103                               return 0;
1104                     }
1105           }
1106 
1107           if (max_pv && max_pv < vg->pv_count) {
1108                     log_error("MaxPhysicalVolumes is less than the current number "
1109                                 "%d of PVs for \"%s\"", vg->pv_count,
1110                                 vg->name);
1111                     return 0;
1112           }
1113           vg->max_pv = max_pv;
1114           return 1;
1115 }
1116 
vg_set_alloc_policy(struct volume_group * vg,alloc_policy_t alloc)1117 int vg_set_alloc_policy(struct volume_group *vg, alloc_policy_t alloc)
1118 {
1119           if (alloc == ALLOC_INHERIT) {
1120                     log_error("Volume Group allocation policy cannot inherit "
1121                                 "from anything");
1122                     return 0;
1123           }
1124 
1125           if (alloc == vg->alloc)
1126                     return 1;
1127 
1128           vg->alloc = alloc;
1129           return 1;
1130 }
1131 
vg_set_clustered(struct volume_group * vg,int clustered)1132 int vg_set_clustered(struct volume_group *vg, int clustered)
1133 {
1134           struct lv_list *lvl;
1135           if (clustered) {
1136                     dm_list_iterate_items(lvl, &vg->lvs) {
1137                               if (lv_is_origin(lvl->lv) || lv_is_cow(lvl->lv)) {
1138                                         log_error("Volume group %s contains snapshots "
1139                                                     "that are not yet supported.",
1140                                                     vg->name);
1141                                         return 0;
1142                               }
1143                     }
1144           }
1145 
1146           if (clustered)
1147                     vg->status |= CLUSTERED;
1148           else
1149                     vg->status &= ~CLUSTERED;
1150           return 1;
1151 }
1152 
1153 /*
1154  * Separate metadata areas after splitting a VG.
1155  * Also accepts orphan VG as destination (for vgreduce).
1156  */
vg_split_mdas(struct cmd_context * cmd __attribute ((unused)),struct volume_group * vg_from,struct volume_group * vg_to)1157 int vg_split_mdas(struct cmd_context *cmd __attribute((unused)),
1158                       struct volume_group *vg_from, struct volume_group *vg_to)
1159 {
1160           struct metadata_area *mda, *mda2;
1161           struct dm_list *mdas_from, *mdas_to;
1162           int common_mda = 0;
1163 
1164           mdas_from = &vg_from->fid->metadata_areas;
1165           mdas_to = &vg_to->fid->metadata_areas;
1166 
1167           dm_list_iterate_items_safe(mda, mda2, mdas_from) {
1168                     if (!mda->ops->mda_in_vg) {
1169                               common_mda = 1;
1170                               continue;
1171                     }
1172 
1173                     if (!mda->ops->mda_in_vg(vg_from->fid, vg_from, mda)) {
1174                               if (is_orphan_vg(vg_to->name))
1175                                         dm_list_del(&mda->list);
1176                               else
1177                                         dm_list_move(mdas_to, &mda->list);
1178                     }
1179           }
1180 
1181           if (dm_list_empty(mdas_from) ||
1182               (!is_orphan_vg(vg_to->name) && dm_list_empty(mdas_to)))
1183                     return common_mda;
1184 
1185           return 1;
1186 }
1187 
1188 /*
1189  * See if we may pvcreate on this device.
1190  * 0 indicates we may not.
1191  */
pvcreate_check(struct cmd_context * cmd,const char * name,struct pvcreate_params * pp)1192 static int pvcreate_check(struct cmd_context *cmd, const char *name,
1193                                 struct pvcreate_params *pp)
1194 {
1195           struct physical_volume *pv;
1196           struct device *dev;
1197           uint64_t md_superblock, swap_signature;
1198           int wipe_md, wipe_swap;
1199 
1200           /* FIXME Check partition type is LVM unless --force is given */
1201 
1202           /* Is there a pv here already? */
1203           pv = pv_read(cmd, name, NULL, NULL, 0, 0);
1204 
1205           /*
1206            * If a PV has no MDAs it may appear to be an orphan until the
1207            * metadata is read off another PV in the same VG.  Detecting
1208            * this means checking every VG by scanning every PV on the
1209            * system.
1210            */
1211           if (pv && is_orphan(pv)) {
1212                     if (!scan_vgs_for_pvs(cmd))
1213                               return_0;
1214                     pv = pv_read(cmd, name, NULL, NULL, 0, 0);
1215           }
1216 
1217           /* Allow partial & exported VGs to be destroyed. */
1218           /* We must have -ff to overwrite a non orphan */
1219           if (pv && !is_orphan(pv) && pp->force != DONT_PROMPT_OVERRIDE) {
1220                     log_error("Can't initialize physical volume \"%s\" of "
1221                                 "volume group \"%s\" without -ff", name, pv_vg_name(pv));
1222                     return 0;
1223           }
1224 
1225           /* prompt */
1226           if (pv && !is_orphan(pv) && !pp->yes &&
1227               yes_no_prompt(_really_init, name, pv_vg_name(pv)) == 'n') {
1228                     log_print("%s: physical volume not initialized", name);
1229                     return 0;
1230           }
1231 
1232           if (sigint_caught())
1233                     return 0;
1234 
1235           dev = dev_cache_get(name, cmd->filter);
1236 
1237           /* Is there an md superblock here? */
1238           if (!dev && md_filtering()) {
1239                     unlock_vg(cmd, VG_ORPHANS);
1240 
1241                     persistent_filter_wipe(cmd->filter);
1242                     lvmcache_destroy(cmd, 1);
1243 
1244                     init_md_filtering(0);
1245                     if (!lock_vol(cmd, VG_ORPHANS, LCK_VG_WRITE)) {
1246                               log_error("Can't get lock for orphan PVs");
1247                               init_md_filtering(1);
1248                               return 0;
1249                     }
1250                     dev = dev_cache_get(name, cmd->filter);
1251                     init_md_filtering(1);
1252           }
1253 
1254           if (!dev) {
1255                     log_error("Device %s not found (or ignored by filtering).", name);
1256                     return 0;
1257           }
1258 
1259           /*
1260            * This test will fail if the device belongs to an MD array.
1261            */
1262           if (!dev_test_excl(dev)) {
1263                     /* FIXME Detect whether device-mapper itself is still using it */
1264                     log_error("Can't open %s exclusively.  Mounted filesystem?",
1265                                 name);
1266                     return 0;
1267           }
1268 
1269           /* Wipe superblock? */
1270           if ((wipe_md = dev_is_md(dev, &md_superblock)) == 1 &&
1271               ((!pp->idp && !pp->restorefile) || pp->yes ||
1272                (yes_no_prompt("Software RAID md superblock "
1273                                   "detected on %s. Wipe it? [y/n] ", name) == 'y'))) {
1274                     log_print("Wiping software RAID md superblock on %s", name);
1275                     if (!dev_set(dev, md_superblock, 4, 0)) {
1276                               log_error("Failed to wipe RAID md superblock on %s",
1277                                           name);
1278                               return 0;
1279                     }
1280           }
1281 
1282           if (wipe_md == -1) {
1283                     log_error("Fatal error while trying to detect software "
1284                                 "RAID md superblock on %s", name);
1285                     return 0;
1286           }
1287 
1288           if ((wipe_swap = dev_is_swap(dev, &swap_signature)) == 1 &&
1289               ((!pp->idp && !pp->restorefile) || pp->yes ||
1290                (yes_no_prompt("Swap signature detected on %s. Wipe it? [y/n] ",
1291                                   name) == 'y'))) {
1292                     log_print("Wiping swap signature on %s", name);
1293                     if (!dev_set(dev, swap_signature, 10, 0)) {
1294                               log_error("Failed to wipe swap signature on %s", name);
1295                               return 0;
1296                     }
1297           }
1298 
1299           if (wipe_swap == -1) {
1300                     log_error("Fatal error while trying to detect swap "
1301                                 "signature on %s", name);
1302                     return 0;
1303           }
1304 
1305           if (sigint_caught())
1306                     return 0;
1307 
1308           if (pv && !is_orphan(pv) && pp->force) {
1309                     log_warn("WARNING: Forcing physical volume creation on "
1310                                 "%s%s%s%s", name,
1311                                 !is_orphan(pv) ? " of volume group \"" : "",
1312                                 !is_orphan(pv) ? pv_vg_name(pv) : "",
1313                                 !is_orphan(pv) ? "\"" : "");
1314           }
1315 
1316           return 1;
1317 }
1318 
pvcreate_params_set_defaults(struct pvcreate_params * pp)1319 void pvcreate_params_set_defaults(struct pvcreate_params *pp)
1320 {
1321           memset(pp, 0, sizeof(*pp));
1322           pp->zero = 1;
1323           pp->size = 0;
1324           pp->data_alignment = UINT64_C(0);
1325           pp->data_alignment_offset = UINT64_C(0);
1326           pp->pvmetadatacopies = DEFAULT_PVMETADATACOPIES;
1327           pp->pvmetadatasize = DEFAULT_PVMETADATASIZE;
1328           pp->labelsector = DEFAULT_LABELSECTOR;
1329           pp->idp = 0;
1330           pp->pe_start = 0;
1331           pp->extent_count = 0;
1332           pp->extent_size = 0;
1333           pp->restorefile = 0;
1334           pp->force = PROMPT;
1335           pp->yes = 0;
1336 }
1337 
1338 /*
1339  * pvcreate_single() - initialize a device with PV label and metadata area
1340  *
1341  * Parameters:
1342  * - pv_name: device path to initialize
1343  * - pp: parameters to pass to pv_create; if NULL, use default values
1344  *
1345  * Returns:
1346  * NULL: error
1347  * struct physical_volume * (non-NULL): handle to physical volume created
1348  */
pvcreate_single(struct cmd_context * cmd,const char * pv_name,struct pvcreate_params * pp)1349 struct physical_volume * pvcreate_single(struct cmd_context *cmd,
1350                                                    const char *pv_name,
1351                                                    struct pvcreate_params *pp)
1352 {
1353           void *pv;
1354           struct device *dev;
1355           struct dm_list mdas;
1356           struct pvcreate_params default_pp;
1357           char buffer[64] __attribute((aligned(8)));
1358 
1359           pvcreate_params_set_defaults(&default_pp);
1360           if (!pp)
1361                     pp = &default_pp;
1362 
1363           if (pp->idp) {
1364                     if ((dev = device_from_pvid(cmd, pp->idp)) &&
1365                         (dev != dev_cache_get(pv_name, cmd->filter))) {
1366                               if (!id_write_format((const struct id*)&pp->idp->uuid,
1367                                   buffer, sizeof(buffer)))
1368                                         return_NULL;
1369                               log_error("uuid %s already in use on \"%s\"", buffer,
1370                                           dev_name(dev));
1371                               return NULL;
1372                     }
1373           }
1374 
1375           if (!pvcreate_check(cmd, pv_name, pp))
1376                     goto error;
1377 
1378           if (sigint_caught())
1379                     goto error;
1380 
1381           if (!(dev = dev_cache_get(pv_name, cmd->filter))) {
1382                     log_error("%s: Couldn't find device.  Check your filters?",
1383                                 pv_name);
1384                     goto error;
1385           }
1386 
1387           dm_list_init(&mdas);
1388           if (!(pv = pv_create(cmd, dev, pp->idp, pp->size,
1389                                    pp->data_alignment, pp->data_alignment_offset,
1390                                    pp->pe_start, pp->extent_count, pp->extent_size,
1391                                    pp->pvmetadatacopies,
1392                                    pp->pvmetadatasize,&mdas))) {
1393                     log_error("Failed to setup physical volume \"%s\"", pv_name);
1394                     goto error;
1395           }
1396 
1397           log_verbose("Set up physical volume for \"%s\" with %" PRIu64
1398                         " available sectors", pv_name, pv_size(pv));
1399 
1400           /* Wipe existing label first */
1401           if (!label_remove(pv_dev(pv))) {
1402                     log_error("Failed to wipe existing label on %s", pv_name);
1403                     goto error;
1404           }
1405 
1406           if (pp->zero) {
1407                     log_verbose("Zeroing start of device %s", pv_name);
1408                     if (!dev_open_quiet(dev)) {
1409                               log_error("%s not opened: device not zeroed", pv_name);
1410                               goto error;
1411                     }
1412 
1413                     if (!dev_set(dev, UINT64_C(0), (size_t) 2048, 0)) {
1414                               log_error("%s not wiped: aborting", pv_name);
1415                               dev_close(dev);
1416                               goto error;
1417                     }
1418                     dev_close(dev);
1419           }
1420 
1421           log_very_verbose("Writing physical volume data to disk \"%s\"",
1422                                pv_name);
1423           if (!(pv_write(cmd, (struct physical_volume *)pv, &mdas,
1424                            pp->labelsector))) {
1425                     log_error("Failed to write physical volume \"%s\"", pv_name);
1426                     goto error;
1427           }
1428 
1429           log_print("Physical volume \"%s\" successfully created", pv_name);
1430 
1431           return pv;
1432 
1433       error:
1434           return NULL;
1435 }
1436 
_free_pv(struct dm_pool * mem,struct physical_volume * pv)1437 static void _free_pv(struct dm_pool *mem, struct physical_volume *pv)
1438 {
1439           dm_pool_free(mem, pv);
1440 }
1441 
_alloc_pv(struct dm_pool * mem,struct device * dev)1442 static struct physical_volume *_alloc_pv(struct dm_pool *mem, struct device *dev)
1443 {
1444           struct physical_volume *pv = dm_pool_zalloc(mem, sizeof(*pv));
1445 
1446           if (!pv)
1447                     return_NULL;
1448 
1449           if (!(pv->vg_name = dm_pool_zalloc(mem, NAME_LEN))) {
1450                     dm_pool_free(mem, pv);
1451                     return NULL;
1452           }
1453 
1454           pv->pe_size = 0;
1455           pv->pe_start = 0;
1456           pv->pe_count = 0;
1457           pv->pe_alloc_count = 0;
1458           pv->pe_align = 0;
1459           pv->pe_align_offset = 0;
1460           pv->fmt = NULL;
1461           pv->dev = dev;
1462 
1463           pv->status = ALLOCATABLE_PV;
1464 
1465           dm_list_init(&pv->tags);
1466           dm_list_init(&pv->segments);
1467 
1468           return pv;
1469 }
1470 
1471 /**
1472  * pv_create - initialize a physical volume for use with a volume group
1473  *
1474  * @fmt: format type
1475  * @dev: PV device to initialize
1476  * @size: size of the PV in sectors
1477  * @data_alignment: requested alignment of data
1478  * @data_alignment_offset: requested offset to aligned data
1479  * @pe_start: physical extent start
1480  * @existing_extent_count
1481  * @existing_extent_size
1482  * @pvmetadatacopies
1483  * @pvmetadatasize
1484  * @mdas
1485  *
1486  * Returns:
1487  *   PV handle - physical volume initialized successfully
1488  *   NULL - invalid parameter or problem initializing the physical volume
1489  *
1490  * Note:
1491  *   FIXME: shorten argument list and replace with explict 'set' functions
1492  */
pv_create(const struct cmd_context * cmd,struct device * dev,struct id * id,uint64_t size,unsigned long data_alignment,unsigned long data_alignment_offset,uint64_t pe_start,uint32_t existing_extent_count,uint32_t existing_extent_size,int pvmetadatacopies,uint64_t pvmetadatasize,struct dm_list * mdas)1493 struct physical_volume *pv_create(const struct cmd_context *cmd,
1494                                           struct device *dev,
1495                                           struct id *id, uint64_t size,
1496                                           unsigned long data_alignment,
1497                                           unsigned long data_alignment_offset,
1498                                           uint64_t pe_start,
1499                                           uint32_t existing_extent_count,
1500                                           uint32_t existing_extent_size,
1501                                           int pvmetadatacopies,
1502                                           uint64_t pvmetadatasize, struct dm_list *mdas)
1503 {
1504           const struct format_type *fmt = cmd->fmt;
1505           struct dm_pool *mem = fmt->cmd->mem;
1506           struct physical_volume *pv = _alloc_pv(mem, dev);
1507 
1508           if (!pv)
1509                     return NULL;
1510 
1511           if (id)
1512                     memcpy(&pv->id, id, sizeof(*id));
1513           else if (!id_create(&pv->id)) {
1514                     log_error("Failed to create random uuid for %s.",
1515                                 dev_name(dev));
1516                     goto bad;
1517           }
1518 
1519           if (!dev_get_size(pv->dev, &pv->size)) {
1520                     log_error("%s: Couldn't get size.", pv_dev_name(pv));
1521                     goto bad;
1522           }
1523 
1524           if (size) {
1525                     if (size > pv->size)
1526                               log_warn("WARNING: %s: Overriding real size. "
1527                                           "You could lose data.", pv_dev_name(pv));
1528                     log_verbose("%s: Pretending size is %" PRIu64 " sectors.",
1529                                   pv_dev_name(pv), size);
1530                     pv->size = size;
1531           }
1532 
1533           if (pv->size < PV_MIN_SIZE) {
1534                     log_error("%s: Size must exceed minimum of %ld sectors.",
1535                                 pv_dev_name(pv), PV_MIN_SIZE);
1536                     goto bad;
1537           }
1538 
1539           if (pv->size < data_alignment) {
1540                     log_error("%s: Data alignment must not exceed device size.",
1541                                 pv_dev_name(pv));
1542                     goto bad;
1543           }
1544 
1545           pv->fmt = fmt;
1546           pv->vg_name = fmt->orphan_vg_name;
1547 
1548           if (!fmt->ops->pv_setup(fmt, pe_start, existing_extent_count,
1549                                         existing_extent_size, data_alignment,
1550                                         data_alignment_offset,
1551                                         pvmetadatacopies, pvmetadatasize, mdas,
1552                                         pv, NULL)) {
1553                     log_error("%s: Format-specific setup of physical volume "
1554                                 "failed.", pv_dev_name(pv));
1555                     goto bad;
1556           }
1557 
1558           return pv;
1559 
1560       bad:
1561           _free_pv(mem, pv);
1562           return NULL;
1563 }
1564 
1565 /* FIXME: liblvm todo - make into function that returns handle */
find_pv_in_vg(const struct volume_group * vg,const char * pv_name)1566 struct pv_list *find_pv_in_vg(const struct volume_group *vg,
1567                                     const char *pv_name)
1568 {
1569           return _find_pv_in_vg(vg, pv_name);
1570 }
1571 
_find_pv_in_vg(const struct volume_group * vg,const char * pv_name)1572 static struct pv_list *_find_pv_in_vg(const struct volume_group *vg,
1573                                               const char *pv_name)
1574 {
1575           struct pv_list *pvl;
1576 
1577           dm_list_iterate_items(pvl, &vg->pvs)
1578                     if (pvl->pv->dev == dev_cache_get(pv_name, vg->cmd->filter))
1579                               return pvl;
1580 
1581           return NULL;
1582 }
1583 
find_pv_in_pv_list(const struct dm_list * pl,const struct physical_volume * pv)1584 struct pv_list *find_pv_in_pv_list(const struct dm_list *pl,
1585                                            const struct physical_volume *pv)
1586 {
1587           struct pv_list *pvl;
1588 
1589           dm_list_iterate_items(pvl, pl)
1590                     if (pvl->pv == pv)
1591                               return pvl;
1592 
1593           return NULL;
1594 }
1595 
pv_is_in_vg(struct volume_group * vg,struct physical_volume * pv)1596 int pv_is_in_vg(struct volume_group *vg, struct physical_volume *pv)
1597 {
1598           struct pv_list *pvl;
1599 
1600           dm_list_iterate_items(pvl, &vg->pvs)
1601                     if (pv == pvl->pv)
1602                                return 1;
1603 
1604           return 0;
1605 }
1606 
1607 /**
1608  * find_pv_in_vg_by_uuid - Find PV in VG by PV UUID
1609  * @vg: volume group to search
1610  * @id: UUID of the PV to match
1611  *
1612  * Returns:
1613  *   PV handle - if UUID of PV found in VG
1614  *   NULL - invalid parameter or UUID of PV not found in VG
1615  *
1616  * Note
1617  *   FIXME - liblvm todo - make into function that takes VG handle
1618  */
find_pv_in_vg_by_uuid(const struct volume_group * vg,const struct id * id)1619 struct physical_volume *find_pv_in_vg_by_uuid(const struct volume_group *vg,
1620                                   const struct id *id)
1621 {
1622           return _find_pv_in_vg_by_uuid(vg, id);
1623 }
1624 
1625 
_find_pv_in_vg_by_uuid(const struct volume_group * vg,const struct id * id)1626 static struct physical_volume *_find_pv_in_vg_by_uuid(const struct volume_group *vg,
1627                                                                   const struct id *id)
1628 {
1629           struct pv_list *pvl;
1630 
1631           dm_list_iterate_items(pvl, &vg->pvs)
1632                     if (id_equal(&pvl->pv->id, id))
1633                               return pvl->pv;
1634 
1635           return NULL;
1636 }
1637 
find_lv_in_vg(const struct volume_group * vg,const char * lv_name)1638 struct lv_list *find_lv_in_vg(const struct volume_group *vg,
1639                                     const char *lv_name)
1640 {
1641           struct lv_list *lvl;
1642           const char *ptr;
1643 
1644           /* Use last component */
1645           if ((ptr = strrchr(lv_name, '/')))
1646                     ptr++;
1647           else
1648                     ptr = lv_name;
1649 
1650           dm_list_iterate_items(lvl, &vg->lvs)
1651                     if (!strcmp(lvl->lv->name, ptr))
1652                               return lvl;
1653 
1654           return NULL;
1655 }
1656 
find_lv_in_lv_list(const struct dm_list * ll,const struct logical_volume * lv)1657 struct lv_list *find_lv_in_lv_list(const struct dm_list *ll,
1658                                            const struct logical_volume *lv)
1659 {
1660           struct lv_list *lvl;
1661 
1662           dm_list_iterate_items(lvl, ll)
1663                     if (lvl->lv == lv)
1664                               return lvl;
1665 
1666           return NULL;
1667 }
1668 
find_lv_in_vg_by_lvid(struct volume_group * vg,const union lvid * lvid)1669 struct lv_list *find_lv_in_vg_by_lvid(struct volume_group *vg,
1670                                               const union lvid *lvid)
1671 {
1672           struct lv_list *lvl;
1673 
1674           dm_list_iterate_items(lvl, &vg->lvs)
1675                     if (!strncmp(lvl->lv->lvid.s, lvid->s, sizeof(*lvid)))
1676                               return lvl;
1677 
1678           return NULL;
1679 }
1680 
find_lv(const struct volume_group * vg,const char * lv_name)1681 struct logical_volume *find_lv(const struct volume_group *vg,
1682                                      const char *lv_name)
1683 {
1684           struct lv_list *lvl = find_lv_in_vg(vg, lv_name);
1685           return lvl ? lvl->lv : NULL;
1686 }
1687 
find_pv(struct volume_group * vg,struct device * dev)1688 struct physical_volume *find_pv(struct volume_group *vg, struct device *dev)
1689 {
1690           struct pv_list *pvl;
1691 
1692           dm_list_iterate_items(pvl, &vg->pvs)
1693                     if (dev == pvl->pv->dev)
1694                               return pvl->pv;
1695 
1696           return NULL;
1697 }
1698 
1699 /* FIXME: liblvm todo - make into function that returns handle */
find_pv_by_name(struct cmd_context * cmd,const char * pv_name)1700 struct physical_volume *find_pv_by_name(struct cmd_context *cmd,
1701                                                   const char *pv_name)
1702 {
1703           return _find_pv_by_name(cmd, pv_name);
1704 }
1705 
1706 
_find_pv_by_name(struct cmd_context * cmd,const char * pv_name)1707 static struct physical_volume *_find_pv_by_name(struct cmd_context *cmd,
1708                                                             const char *pv_name)
1709 {
1710           struct physical_volume *pv;
1711 
1712           if (!(pv = _pv_read(cmd, cmd->mem, pv_name, NULL, NULL, 1, 0))) {
1713                     log_error("Physical volume %s not found", pv_name);
1714                     return NULL;
1715           }
1716 
1717           if (is_orphan_vg(pv->vg_name)) {
1718                     /* If a PV has no MDAs - need to search all VGs for it */
1719                     if (!scan_vgs_for_pvs(cmd))
1720                               return_NULL;
1721                     if (!(pv = _pv_read(cmd, cmd->mem, pv_name, NULL, NULL, 1, 0))) {
1722                               log_error("Physical volume %s not found", pv_name);
1723                               return NULL;
1724                     }
1725           }
1726 
1727           if (is_orphan_vg(pv->vg_name)) {
1728                     log_error("Physical volume %s not in a volume group", pv_name);
1729                     return NULL;
1730           }
1731 
1732           return pv;
1733 }
1734 
1735 /* Find segment at a given logical extent in an LV */
find_seg_by_le(const struct logical_volume * lv,uint32_t le)1736 struct lv_segment *find_seg_by_le(const struct logical_volume *lv, uint32_t le)
1737 {
1738           struct lv_segment *seg;
1739 
1740           dm_list_iterate_items(seg, &lv->segments)
1741                     if (le >= seg->le && le < seg->le + seg->len)
1742                               return seg;
1743 
1744           return NULL;
1745 }
1746 
first_seg(const struct logical_volume * lv)1747 struct lv_segment *first_seg(const struct logical_volume *lv)
1748 {
1749           struct lv_segment *seg;
1750 
1751           dm_list_iterate_items(seg, &lv->segments)
1752                     return seg;
1753 
1754           return NULL;
1755 }
1756 
1757 /* Find segment at a given physical extent in a PV */
find_peg_by_pe(const struct physical_volume * pv,uint32_t pe)1758 struct pv_segment *find_peg_by_pe(const struct physical_volume *pv, uint32_t pe)
1759 {
1760           struct pv_segment *peg;
1761 
1762           dm_list_iterate_items(peg, &pv->segments)
1763                     if (pe >= peg->pe && pe < peg->pe + peg->len)
1764                               return peg;
1765 
1766           return NULL;
1767 }
1768 
vg_remove_mdas(struct volume_group * vg)1769 int vg_remove_mdas(struct volume_group *vg)
1770 {
1771           struct metadata_area *mda;
1772 
1773           /* FIXME Improve recovery situation? */
1774           /* Remove each copy of the metadata */
1775           dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
1776                     if (mda->ops->vg_remove &&
1777                         !mda->ops->vg_remove(vg->fid, vg, mda))
1778                               return_0;
1779           }
1780 
1781           return 1;
1782 }
1783 
snapshot_count(const struct volume_group * vg)1784 unsigned snapshot_count(const struct volume_group *vg)
1785 {
1786           struct lv_list *lvl;
1787           unsigned num_snapshots = 0;
1788 
1789           dm_list_iterate_items(lvl, &vg->lvs)
1790                     if (lv_is_cow(lvl->lv))
1791                               num_snapshots++;
1792 
1793           return num_snapshots;
1794 }
1795 
vg_visible_lvs(const struct volume_group * vg)1796 unsigned vg_visible_lvs(const struct volume_group *vg)
1797 {
1798           struct lv_list *lvl;
1799           unsigned lv_count = 0;
1800 
1801           dm_list_iterate_items(lvl, &vg->lvs) {
1802                     if (lv_is_visible(lvl->lv))
1803                               lv_count++;
1804           }
1805 
1806           return lv_count;
1807 }
1808 
1809 /*
1810  * Determine whether two vgs are compatible for merging.
1811  */
vgs_are_compatible(struct cmd_context * cmd __attribute ((unused)),struct volume_group * vg_from,struct volume_group * vg_to)1812 int vgs_are_compatible(struct cmd_context *cmd __attribute((unused)),
1813                            struct volume_group *vg_from,
1814                            struct volume_group *vg_to)
1815 {
1816           struct lv_list *lvl1, *lvl2;
1817           struct pv_list *pvl;
1818           char *name1, *name2;
1819 
1820           if (lvs_in_vg_activated(vg_from)) {
1821                     log_error("Logical volumes in \"%s\" must be inactive",
1822                                 vg_from->name);
1823                     return 0;
1824           }
1825 
1826           /* Check compatibility */
1827           if (vg_to->extent_size != vg_from->extent_size) {
1828                     log_error("Extent sizes differ: %d (%s) and %d (%s)",
1829                                 vg_to->extent_size, vg_to->name,
1830                                 vg_from->extent_size, vg_from->name);
1831                     return 0;
1832           }
1833 
1834           if (vg_to->max_pv &&
1835               (vg_to->max_pv < vg_to->pv_count + vg_from->pv_count)) {
1836                     log_error("Maximum number of physical volumes (%d) exceeded "
1837                                 " for \"%s\" and \"%s\"", vg_to->max_pv, vg_to->name,
1838                                 vg_from->name);
1839                     return 0;
1840           }
1841 
1842           if (vg_to->max_lv &&
1843               (vg_to->max_lv < vg_visible_lvs(vg_to) + vg_visible_lvs(vg_from))) {
1844                     log_error("Maximum number of logical volumes (%d) exceeded "
1845                                 " for \"%s\" and \"%s\"", vg_to->max_lv, vg_to->name,
1846                                 vg_from->name);
1847                     return 0;
1848           }
1849 
1850           /* Metadata types must be the same */
1851           if (vg_to->fid->fmt != vg_from->fid->fmt) {
1852                     log_error("Metadata types differ for \"%s\" and \"%s\"",
1853                                 vg_to->name, vg_from->name);
1854                     return 0;
1855           }
1856 
1857           /* Clustering attribute must be the same */
1858           if (vg_is_clustered(vg_to) != vg_is_clustered(vg_from)) {
1859                     log_error("Clustered attribute differs for \"%s\" and \"%s\"",
1860                                 vg_to->name, vg_from->name);
1861                     return 0;
1862           }
1863 
1864           /* Check no conflicts with LV names */
1865           dm_list_iterate_items(lvl1, &vg_to->lvs) {
1866                     name1 = lvl1->lv->name;
1867 
1868                     dm_list_iterate_items(lvl2, &vg_from->lvs) {
1869                               name2 = lvl2->lv->name;
1870 
1871                               if (!strcmp(name1, name2)) {
1872                                         log_error("Duplicate logical volume "
1873                                                     "name \"%s\" "
1874                                                     "in \"%s\" and \"%s\"",
1875                                                     name1, vg_to->name, vg_from->name);
1876                                         return 0;
1877                               }
1878                     }
1879           }
1880 
1881           /* Check no PVs are constructed from either VG */
1882           dm_list_iterate_items(pvl, &vg_to->pvs) {
1883                     if (pv_uses_vg(pvl->pv, vg_from)) {
1884                               log_error("Physical volume %s might be constructed "
1885                                           "from same volume group %s.",
1886                                           pv_dev_name(pvl->pv), vg_from->name);
1887                               return 0;
1888                     }
1889           }
1890 
1891           dm_list_iterate_items(pvl, &vg_from->pvs) {
1892                     if (pv_uses_vg(pvl->pv, vg_to)) {
1893                               log_error("Physical volume %s might be constructed "
1894                                           "from same volume group %s.",
1895                                           pv_dev_name(pvl->pv), vg_to->name);
1896                               return 0;
1897                     }
1898           }
1899 
1900           return 1;
1901 }
1902 
1903 struct _lv_postorder_baton {
1904           int (*fn)(struct logical_volume *lv, void *data);
1905           void *data;
1906 };
1907 
1908 static int _lv_postorder_visit(struct logical_volume *,
1909                                      int (*fn)(struct logical_volume *lv, void *data),
1910                                      void *data);
1911 
_lv_postorder_level(struct logical_volume * lv,void * data)1912 static int _lv_postorder_level(struct logical_volume *lv, void *data)
1913 {
1914           struct _lv_postorder_baton *baton = data;
1915           if (lv->status & POSTORDER_OPEN_FLAG)
1916                     return 1; // a data structure loop has closed...
1917           lv->status |= POSTORDER_OPEN_FLAG;
1918           int r =_lv_postorder_visit(lv, baton->fn, baton->data);
1919           lv->status &= ~POSTORDER_OPEN_FLAG;
1920           lv->status |= POSTORDER_FLAG;
1921           return r;
1922 };
1923 
_lv_each_dependency(struct logical_volume * lv,int (* fn)(struct logical_volume * lv,void * data),void * data)1924 static int _lv_each_dependency(struct logical_volume *lv,
1925                                      int (*fn)(struct logical_volume *lv, void *data),
1926                                      void *data)
1927 {
1928           int i, s;
1929           struct lv_segment *lvseg;
1930 
1931           struct logical_volume *deps[] = {
1932                     lv->snapshot ? lv->snapshot->origin : 0,
1933                     lv->snapshot ? lv->snapshot->cow : 0 };
1934           for (i = 0; i < sizeof(deps) / sizeof(*deps); ++i) {
1935                     if (deps[i] && !fn(deps[i], data))
1936                               return_0;
1937           }
1938 
1939           dm_list_iterate_items(lvseg, &lv->segments) {
1940                     if (lvseg->log_lv && !fn(lvseg->log_lv, data))
1941                               return_0;
1942                     for (s = 0; s < lvseg->area_count; ++s) {
1943                               if (seg_type(lvseg, s) == AREA_LV && !fn(seg_lv(lvseg,s), data))
1944                                         return_0;
1945                     }
1946           }
1947           return 1;
1948 }
1949 
_lv_postorder_cleanup(struct logical_volume * lv,void * data)1950 static int _lv_postorder_cleanup(struct logical_volume *lv, void *data)
1951 {
1952           if (!(lv->status & POSTORDER_FLAG))
1953                     return 1;
1954           lv->status &= ~POSTORDER_FLAG;
1955 
1956           if (!_lv_each_dependency(lv, _lv_postorder_cleanup, data))
1957                     return_0;
1958           return 1;
1959 }
1960 
_lv_postorder_visit(struct logical_volume * lv,int (* fn)(struct logical_volume * lv,void * data),void * data)1961 static int _lv_postorder_visit(struct logical_volume *lv,
1962                                      int (*fn)(struct logical_volume *lv, void *data),
1963                                      void *data)
1964 {
1965           struct _lv_postorder_baton baton;
1966           int r;
1967 
1968           if (lv->status & POSTORDER_FLAG)
1969                     return 1;
1970 
1971           baton.fn = fn;
1972           baton.data = data;
1973           r = _lv_each_dependency(lv, _lv_postorder_level, &baton);
1974           if (r)
1975                     r = fn(lv, data);
1976 
1977           return r;
1978 }
1979 
1980 /*
1981  * This will walk the LV dependency graph in depth-first order and in the
1982  * postorder, call a callback function "fn". The void *data is passed along all
1983  * the calls. The callback may return zero to indicate an error and terminate
1984  * the depth-first walk. The error is propagated to return value of
1985  * _lv_postorder.
1986  */
_lv_postorder(struct logical_volume * lv,int (* fn)(struct logical_volume * lv,void * data),void * data)1987 static int _lv_postorder(struct logical_volume *lv,
1988                                      int (*fn)(struct logical_volume *lv, void *data),
1989                                      void *data)
1990 {
1991           int r;
1992           r = _lv_postorder_visit(lv, fn, data);
1993           _lv_postorder_cleanup(lv, 0);
1994           return r;
1995 }
1996 
1997 struct _lv_mark_if_partial_baton {
1998           int partial;
1999 };
2000 
_lv_mark_if_partial_collect(struct logical_volume * lv,void * data)2001 static int _lv_mark_if_partial_collect(struct logical_volume *lv, void *data)
2002 {
2003           struct _lv_mark_if_partial_baton *baton = data;
2004           if (lv->status & PARTIAL_LV)
2005                     baton->partial = 1;
2006 
2007           return 1;
2008 }
2009 
_lv_mark_if_partial_single(struct logical_volume * lv,void * data)2010 static int _lv_mark_if_partial_single(struct logical_volume *lv, void *data)
2011 {
2012           int s;
2013           struct _lv_mark_if_partial_baton baton;
2014           struct lv_segment *lvseg;
2015 
2016           dm_list_iterate_items(lvseg, &lv->segments) {
2017                     for (s = 0; s < lvseg->area_count; ++s) {
2018                               if (seg_type(lvseg, s) == AREA_PV) {
2019                                         if (seg_pv(lvseg, s)->status & MISSING_PV)
2020                                                   lv->status |= PARTIAL_LV;
2021                               }
2022                     }
2023           }
2024 
2025           baton.partial = 0;
2026           _lv_each_dependency(lv, _lv_mark_if_partial_collect, &baton);
2027 
2028           if (baton.partial)
2029                     lv->status |= PARTIAL_LV;
2030 
2031           return 1;
2032 }
2033 
_lv_mark_if_partial(struct logical_volume * lv)2034 static int _lv_mark_if_partial(struct logical_volume *lv)
2035 {
2036           return _lv_postorder(lv, _lv_mark_if_partial_single, NULL);
2037 }
2038 
2039 /*
2040  * Mark LVs with missing PVs using PARTIAL_LV status flag. The flag is
2041  * propagated transitively, so LVs referencing other LVs are marked
2042  * partial as well, if any of their referenced LVs are marked partial.
2043  */
_vg_mark_partial_lvs(struct volume_group * vg)2044 static int _vg_mark_partial_lvs(struct volume_group *vg)
2045 {
2046           struct logical_volume *lv;
2047           struct lv_list *lvl;
2048 
2049           dm_list_iterate_items(lvl, &vg->lvs) {
2050                     lv = lvl->lv;
2051                     if (!_lv_mark_if_partial(lv))
2052                               return_0;
2053           }
2054           return 1;
2055 }
2056 
2057 /*
2058  * Be sure that all PV devices have cached read ahead in dev-cache
2059  * Currently it takes read_ahead from first PV segment only
2060  */
_lv_read_ahead_single(struct logical_volume * lv,void * data)2061 static int _lv_read_ahead_single(struct logical_volume *lv, void *data)
2062 {
2063           struct lv_segment *seg = first_seg(lv);
2064           uint32_t seg_read_ahead = 0, *read_ahead = data;
2065 
2066           if (seg && seg->area_count && seg_type(seg, 0) == AREA_PV)
2067                     dev_get_read_ahead(seg_pv(seg, 0)->dev, &seg_read_ahead);
2068 
2069           if (seg_read_ahead > *read_ahead)
2070                     *read_ahead = seg_read_ahead;
2071 
2072           return 1;
2073 }
2074 
2075 /*
2076  * Calculate readahead for logical volume from underlying PV devices.
2077  * If read_ahead is NULL, only ensure that readahead of PVs are preloaded
2078  * into PV struct device in dev cache.
2079  */
lv_calculate_readahead(const struct logical_volume * lv,uint32_t * read_ahead)2080 void lv_calculate_readahead(const struct logical_volume *lv, uint32_t *read_ahead)
2081 {
2082           uint32_t _read_ahead = 0;
2083 
2084           if (lv->read_ahead == DM_READ_AHEAD_AUTO)
2085                     _lv_postorder((struct logical_volume *)lv, _lv_read_ahead_single, &_read_ahead);
2086 
2087           if (read_ahead) {
2088                     log_debug("Calculated readahead of LV %s is %u", lv->name, _read_ahead);
2089                     *read_ahead = _read_ahead;
2090           }
2091 }
2092 
vg_validate(struct volume_group * vg)2093 int vg_validate(struct volume_group *vg)
2094 {
2095           struct pv_list *pvl, *pvl2;
2096           struct lv_list *lvl, *lvl2;
2097           char uuid[64] __attribute((aligned(8)));
2098           int r = 1;
2099           uint32_t hidden_lv_count = 0;
2100 
2101           /* FIXME Also check there's no data/metadata overlap */
2102 
2103           dm_list_iterate_items(pvl, &vg->pvs) {
2104                     dm_list_iterate_items(pvl2, &vg->pvs) {
2105                               if (pvl == pvl2)
2106                                         break;
2107                               if (id_equal(&pvl->pv->id,
2108                                              &pvl2->pv->id)) {
2109                                         if (!id_write_format(&pvl->pv->id, uuid,
2110                                                                  sizeof(uuid)))
2111                                                    stack;
2112                                         log_error("Internal error: Duplicate PV id "
2113                                                     "%s detected for %s in %s.",
2114                                                     uuid, pv_dev_name(pvl->pv),
2115                                                     vg->name);
2116                                         r = 0;
2117                               }
2118                     }
2119 
2120                     if (strcmp(pvl->pv->vg_name, vg->name)) {
2121                               log_error("Internal error: VG name for PV %s is corrupted",
2122                                           pv_dev_name(pvl->pv));
2123                               r = 0;
2124                     }
2125           }
2126 
2127           if (!check_pv_segments(vg)) {
2128                     log_error("Internal error: PV segments corrupted in %s.",
2129                                 vg->name);
2130                     r = 0;
2131           }
2132 
2133           /*
2134            * Count all non-snapshot invisible LVs
2135            */
2136           dm_list_iterate_items(lvl, &vg->lvs) {
2137                     if (lvl->lv->status & VISIBLE_LV)
2138                               continue;
2139 
2140                     /* snapshots */
2141                     if (lv_is_cow(lvl->lv))
2142                               continue;
2143 
2144                     /* virtual origins are always hidden */
2145                     if (lv_is_origin(lvl->lv) && !lv_is_virtual_origin(lvl->lv))
2146                               continue;
2147 
2148                     /* count other non-snapshot invisible volumes */
2149                     hidden_lv_count++;
2150 
2151                     /*
2152                      *  FIXME: add check for unreferenced invisible LVs
2153                      *   - snapshot cow & origin
2154                      *   - mirror log & images
2155                      *   - mirror conversion volumes (_mimagetmp*)
2156                      */
2157           }
2158 
2159           /*
2160            * all volumes = visible LVs + snapshot_cows + invisible LVs
2161            */
2162           if (((uint32_t) dm_list_size(&vg->lvs)) !=
2163               vg_visible_lvs(vg) + snapshot_count(vg) + hidden_lv_count) {
2164                     log_error("Internal error: #internal LVs (%u) != #LVs (%"
2165                                 PRIu32 ") + #snapshots (%" PRIu32 ") + #internal LVs %u in VG %s",
2166                                 dm_list_size(&vg->lvs), vg_visible_lvs(vg),
2167                                 snapshot_count(vg), hidden_lv_count, vg->name);
2168                     r = 0;
2169           }
2170 
2171           dm_list_iterate_items(lvl, &vg->lvs) {
2172                     dm_list_iterate_items(lvl2, &vg->lvs) {
2173                               if (lvl == lvl2)
2174                                         break;
2175                               if (!strcmp(lvl->lv->name, lvl2->lv->name)) {
2176                                         log_error("Internal error: Duplicate LV name "
2177                                                     "%s detected in %s.", lvl->lv->name,
2178                                                     vg->name);
2179                                         r = 0;
2180                               }
2181                               if (id_equal(&lvl->lv->lvid.id[1],
2182                                              &lvl2->lv->lvid.id[1])) {
2183                                         if (!id_write_format(&lvl->lv->lvid.id[1], uuid,
2184                                                                  sizeof(uuid)))
2185                                                    stack;
2186                                         log_error("Internal error: Duplicate LV id "
2187                                                     "%s detected for %s and %s in %s.",
2188                                                     uuid, lvl->lv->name, lvl2->lv->name,
2189                                                     vg->name);
2190                                         r = 0;
2191                               }
2192                     }
2193           }
2194 
2195           dm_list_iterate_items(lvl, &vg->lvs) {
2196                     if (!check_lv_segments(lvl->lv, 1)) {
2197                               log_error("Internal error: LV segments corrupted in %s.",
2198                                           lvl->lv->name);
2199                               r = 0;
2200                     }
2201           }
2202 
2203           if (!(vg->fid->fmt->features & FMT_UNLIMITED_VOLS) &&
2204               (!vg->max_lv || !vg->max_pv)) {
2205                     log_error("Internal error: Volume group %s has limited PV/LV count"
2206                                 " but limit is not set.", vg->name);
2207                     r = 0;
2208           }
2209 
2210           if (vg_max_lv_reached(vg))
2211                     stack;
2212 
2213           return r;
2214 }
2215 
2216 /*
2217  * After vg_write() returns success,
2218  * caller MUST call either vg_commit() or vg_revert()
2219  */
vg_write(struct volume_group * vg)2220 int vg_write(struct volume_group *vg)
2221 {
2222           struct dm_list *mdah;
2223           struct metadata_area *mda;
2224 
2225           if (!vg_validate(vg))
2226                     return_0;
2227 
2228           if (vg->status & PARTIAL_VG) {
2229                     log_error("Cannot update partial volume group %s.", vg->name);
2230                     return 0;
2231           }
2232 
2233           if (vg_missing_pv_count(vg) && !vg->cmd->handles_missing_pvs) {
2234                     log_error("Cannot update volume group %s while physical "
2235                                 "volumes are missing.", vg->name);
2236                     return 0;
2237           }
2238 
2239           if (vg_has_unknown_segments(vg) && !vg->cmd->handles_unknown_segments) {
2240                     log_error("Cannot update volume group %s with unknown segments in it!",
2241                                 vg->name);
2242                     return 0;
2243           }
2244 
2245 
2246           if (dm_list_empty(&vg->fid->metadata_areas)) {
2247                     log_error("Aborting vg_write: No metadata areas to write to!");
2248                     return 0;
2249           }
2250 
2251           if (!drop_cached_metadata(vg)) {
2252                     log_error("Unable to drop cached metadata for VG %s.", vg->name);
2253                     return 0;
2254           }
2255 
2256           vg->seqno++;
2257 
2258           /* Write to each copy of the metadata area */
2259           dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
2260                     if (!mda->ops->vg_write) {
2261                               log_error("Format does not support writing volume"
2262                                           "group metadata areas");
2263                               /* Revert */
2264                               dm_list_uniterate(mdah, &vg->fid->metadata_areas, &mda->list) {
2265                                         mda = dm_list_item(mdah, struct metadata_area);
2266 
2267                                         if (mda->ops->vg_revert &&
2268                                             !mda->ops->vg_revert(vg->fid, vg, mda)) {
2269                                                   stack;
2270                                         }
2271                               }
2272                               return 0;
2273                     }
2274                     if (!mda->ops->vg_write(vg->fid, vg, mda)) {
2275                               stack;
2276                               /* Revert */
2277                               dm_list_uniterate(mdah, &vg->fid->metadata_areas, &mda->list) {
2278                                         mda = dm_list_item(mdah, struct metadata_area);
2279 
2280                                         if (mda->ops->vg_revert &&
2281                                             !mda->ops->vg_revert(vg->fid, vg, mda)) {
2282                                                   stack;
2283                                         }
2284                               }
2285                               return 0;
2286                     }
2287           }
2288 
2289           /* Now pre-commit each copy of the new metadata */
2290           dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
2291                     if (mda->ops->vg_precommit &&
2292                         !mda->ops->vg_precommit(vg->fid, vg, mda)) {
2293                               stack;
2294                               /* Revert */
2295                               dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
2296                                         if (mda->ops->vg_revert &&
2297                                             !mda->ops->vg_revert(vg->fid, vg, mda)) {
2298                                                   stack;
2299                                         }
2300                               }
2301                               return 0;
2302                     }
2303           }
2304 
2305           return 1;
2306 }
2307 
2308 /* Commit pending changes */
vg_commit(struct volume_group * vg)2309 int vg_commit(struct volume_group *vg)
2310 {
2311           struct metadata_area *mda;
2312           int cache_updated = 0;
2313           int failed = 0;
2314 
2315           if (!vgname_is_locked(vg->name)) {
2316                     log_error("Internal error: Attempt to write new VG metadata "
2317                                 "without locking %s", vg->name);
2318                     return cache_updated;
2319           }
2320 
2321           /* Commit to each copy of the metadata area */
2322           dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
2323                     failed = 0;
2324                     if (mda->ops->vg_commit &&
2325                         !mda->ops->vg_commit(vg->fid, vg, mda)) {
2326                               stack;
2327                               failed = 1;
2328                     }
2329                     /* Update cache first time we succeed */
2330                     if (!failed && !cache_updated) {
2331                               lvmcache_update_vg(vg, 0);
2332                               cache_updated = 1;
2333                     }
2334           }
2335 
2336           /* If update failed, remove any cached precommitted metadata. */
2337           if (!cache_updated && !drop_cached_metadata(vg))
2338                     log_error("Attempt to drop cached metadata failed "
2339                                 "after commit for VG %s.", vg->name);
2340 
2341           /* If at least one mda commit succeeded, it was committed */
2342           return cache_updated;
2343 }
2344 
2345 /* Don't commit any pending changes */
vg_revert(struct volume_group * vg)2346 int vg_revert(struct volume_group *vg)
2347 {
2348           struct metadata_area *mda;
2349 
2350           dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
2351                     if (mda->ops->vg_revert &&
2352                         !mda->ops->vg_revert(vg->fid, vg, mda)) {
2353                               stack;
2354                     }
2355           }
2356 
2357           if (!drop_cached_metadata(vg))
2358                     log_error("Attempt to drop cached metadata failed "
2359                                 "after reverted update for VG %s.", vg->name);
2360 
2361           return 1;
2362 }
2363 
2364 /* Make orphan PVs look like a VG */
_vg_read_orphans(struct cmd_context * cmd,const char * orphan_vgname)2365 static struct volume_group *_vg_read_orphans(struct cmd_context *cmd,
2366                                                        const char *orphan_vgname)
2367 {
2368           struct lvmcache_vginfo *vginfo;
2369           struct lvmcache_info *info;
2370           struct pv_list *pvl;
2371           struct volume_group *vg;
2372           struct physical_volume *pv;
2373           struct dm_pool *mem;
2374 
2375           lvmcache_label_scan(cmd, 0);
2376 
2377           if (!(vginfo = vginfo_from_vgname(orphan_vgname, NULL)))
2378                     return_NULL;
2379 
2380           if (!(mem = dm_pool_create("vg_read orphan", VG_MEMPOOL_CHUNK)))
2381                     return_NULL;
2382 
2383           if (!(vg = dm_pool_zalloc(mem, sizeof(*vg)))) {
2384                     log_error("vg allocation failed");
2385                     return NULL;
2386           }
2387           dm_list_init(&vg->pvs);
2388           dm_list_init(&vg->lvs);
2389           dm_list_init(&vg->tags);
2390           dm_list_init(&vg->removed_pvs);
2391           vg->vgmem = mem;
2392           vg->cmd = cmd;
2393           if (!(vg->name = dm_pool_strdup(mem, orphan_vgname))) {
2394                     log_error("vg name allocation failed");
2395                     goto bad;
2396           }
2397 
2398           /* create format instance with appropriate metadata area */
2399           if (!(vg->fid = vginfo->fmt->ops->create_instance(vginfo->fmt,
2400                                                                         orphan_vgname, NULL,
2401                                                                         NULL))) {
2402                     log_error("Failed to create format instance");
2403                     goto bad;
2404           }
2405 
2406           dm_list_iterate_items(info, &vginfo->infos) {
2407                     if (!(pv = _pv_read(cmd, mem, dev_name(info->dev), NULL, NULL, 1, 0))) {
2408                               continue;
2409                     }
2410                     if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) {
2411                               log_error("pv_list allocation failed");
2412                               goto bad;
2413                     }
2414                     pvl->pv = pv;
2415                     dm_list_add(&vg->pvs, &pvl->list);
2416                     vg->pv_count++;
2417           }
2418 
2419           return vg;
2420 bad:
2421           dm_pool_destroy(mem);
2422           return NULL;
2423 }
2424 
_update_pv_list(struct dm_pool * pvmem,struct dm_list * all_pvs,struct volume_group * vg)2425 static int _update_pv_list(struct dm_pool *pvmem, struct dm_list *all_pvs, struct volume_group *vg)
2426 {
2427           struct pv_list *pvl, *pvl2;
2428 
2429           dm_list_iterate_items(pvl, &vg->pvs) {
2430                     dm_list_iterate_items(pvl2, all_pvs) {
2431                               if (pvl->pv->dev == pvl2->pv->dev)
2432                                         goto next_pv;
2433                     }
2434 
2435                     /*
2436                      * PV is not on list so add it.
2437                      */
2438                     if (!(pvl2 = _copy_pvl(pvmem, pvl))) {
2439                               log_error("pv_list allocation for '%s' failed",
2440                                           pv_dev_name(pvl->pv));
2441                               return 0;
2442                     }
2443                     dm_list_add(all_pvs, &pvl2->list);
2444   next_pv:
2445                     ;
2446           }
2447 
2448           return 1;
2449 }
2450 
vg_missing_pv_count(const struct volume_group * vg)2451 int vg_missing_pv_count(const struct volume_group *vg)
2452 {
2453           int ret = 0;
2454           struct pv_list *pvl;
2455           dm_list_iterate_items(pvl, &vg->pvs) {
2456                     if (pvl->pv->status & MISSING_PV)
2457                               ++ ret;
2458           }
2459           return ret;
2460 }
2461 
2462 /* Caller sets consistent to 1 if it's safe for vg_read_internal to correct
2463  * inconsistent metadata on disk (i.e. the VG write lock is held).
2464  * This guarantees only consistent metadata is returned.
2465  * If consistent is 0, caller must check whether consistent == 1 on return
2466  * and take appropriate action if it isn't (e.g. abort; get write lock
2467  * and call vg_read_internal again).
2468  *
2469  * If precommitted is set, use precommitted metadata if present.
2470  *
2471  * Either of vgname or vgid may be NULL.
2472  */
_vg_read(struct cmd_context * cmd,const char * vgname,const char * vgid,int * consistent,unsigned precommitted)2473 static struct volume_group *_vg_read(struct cmd_context *cmd,
2474                                              const char *vgname,
2475                                              const char *vgid,
2476                                              int *consistent, unsigned precommitted)
2477 {
2478           struct format_instance *fid;
2479           const struct format_type *fmt;
2480           struct volume_group *vg, *correct_vg = NULL;
2481           struct metadata_area *mda;
2482           struct lvmcache_info *info;
2483           int inconsistent = 0;
2484           int inconsistent_vgid = 0;
2485           int inconsistent_pvs = 0;
2486           unsigned use_precommitted = precommitted;
2487           unsigned saved_handles_missing_pvs = cmd->handles_missing_pvs;
2488           struct dm_list *pvids;
2489           struct pv_list *pvl, *pvl2;
2490           struct dm_list all_pvs;
2491           char uuid[64] __attribute((aligned(8)));
2492 
2493           if (is_orphan_vg(vgname)) {
2494                     if (use_precommitted) {
2495                               log_error("Internal error: vg_read_internal requires vgname "
2496                                           "with pre-commit.");
2497                               return NULL;
2498                     }
2499                     *consistent = 1;
2500                     return _vg_read_orphans(cmd, vgname);
2501           }
2502 
2503           if ((correct_vg = lvmcache_get_vg(vgid, precommitted))) {
2504                     if (vg_missing_pv_count(correct_vg)) {
2505                               log_verbose("There are %d physical volumes missing.",
2506                                             vg_missing_pv_count(correct_vg));
2507                               _vg_mark_partial_lvs(correct_vg);
2508                     }
2509                     *consistent = 1;
2510                     return correct_vg;
2511           }
2512 
2513           /* Find the vgname in the cache */
2514           /* If it's not there we must do full scan to be completely sure */
2515           if (!(fmt = fmt_from_vgname(vgname, vgid))) {
2516                     lvmcache_label_scan(cmd, 0);
2517                     if (!(fmt = fmt_from_vgname(vgname, vgid))) {
2518                               if (memlock())
2519                                         return_NULL;
2520                               lvmcache_label_scan(cmd, 2);
2521                               if (!(fmt = fmt_from_vgname(vgname, vgid)))
2522                                         return_NULL;
2523                     }
2524           }
2525 
2526           /* Now determine the correct vgname if none was supplied */
2527           if (!vgname && !(vgname = vgname_from_vgid(cmd->mem, vgid)))
2528                     return_NULL;
2529 
2530           if (use_precommitted && !(fmt->features & FMT_PRECOMMIT))
2531                     use_precommitted = 0;
2532 
2533           /* create format instance with appropriate metadata area */
2534           if (!(fid = fmt->ops->create_instance(fmt, vgname, vgid, NULL))) {
2535                     log_error("Failed to create format instance");
2536                     return NULL;
2537           }
2538 
2539           /* Store pvids for later so we can check if any are missing */
2540           if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid)))
2541                     return_NULL;
2542 
2543           /* Ensure contents of all metadata areas match - else do recovery */
2544           dm_list_iterate_items(mda, &fid->metadata_areas) {
2545                     if ((use_precommitted &&
2546                          !(vg = mda->ops->vg_read_precommit(fid, vgname, mda))) ||
2547                         (!use_precommitted &&
2548                          !(vg = mda->ops->vg_read(fid, vgname, mda)))) {
2549                               inconsistent = 1;
2550                               vg_release(vg);
2551                               continue;
2552                     }
2553                     if (!correct_vg) {
2554                               correct_vg = vg;
2555                               continue;
2556                     }
2557 
2558                     /* FIXME Also ensure contents same - checksum compare? */
2559                     if (correct_vg->seqno != vg->seqno) {
2560                               inconsistent = 1;
2561                               if (vg->seqno > correct_vg->seqno) {
2562                                         vg_release(correct_vg);
2563                                         correct_vg = vg;
2564                               }
2565                     }
2566 
2567                     if (vg != correct_vg)
2568                               vg_release(vg);
2569           }
2570 
2571           /* Ensure every PV in the VG was in the cache */
2572           if (correct_vg) {
2573                     /*
2574                      * If the VG has PVs without mdas, they may still be
2575                      * orphans in the cache: update the cache state here.
2576                      */
2577                     if (!inconsistent &&
2578                         dm_list_size(&correct_vg->pvs) > dm_list_size(pvids)) {
2579                               dm_list_iterate_items(pvl, &correct_vg->pvs) {
2580                                         if (!pvl->pv->dev) {
2581                                                   inconsistent_pvs = 1;
2582                                                   break;
2583                                         }
2584 
2585                                         if (str_list_match_item(pvids, pvl->pv->dev->pvid))
2586                                                   continue;
2587 
2588                                         /*
2589                                          * PV not marked as belonging to this VG in cache.
2590                                          * Check it's an orphan without metadata area.
2591                                          */
2592                                         if (!(info = info_from_pvid(pvl->pv->dev->pvid, 1)) ||
2593                                            !info->vginfo || !is_orphan_vg(info->vginfo->vgname) ||
2594                                            dm_list_size(&info->mdas)) {
2595                                                   inconsistent_pvs = 1;
2596                                                   break;
2597                                         }
2598                               }
2599 
2600                               /* If the check passed, let's update VG and recalculate pvids */
2601                               if (!inconsistent_pvs) {
2602                                         log_debug("Updating cache for PVs without mdas "
2603                                                     "in VG %s.", vgname);
2604                                         lvmcache_update_vg(correct_vg, use_precommitted);
2605 
2606                                         if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid)))
2607                                                   return_NULL;
2608                               }
2609                     }
2610 
2611                     if (dm_list_size(&correct_vg->pvs) != dm_list_size(pvids)
2612                         + vg_missing_pv_count(correct_vg)) {
2613                               log_debug("Cached VG %s had incorrect PV list",
2614                                           vgname);
2615 
2616                               if (memlock())
2617                                         inconsistent = 1;
2618                               else {
2619                                         vg_release(correct_vg);
2620                                         correct_vg = NULL;
2621                               }
2622                     } else dm_list_iterate_items(pvl, &correct_vg->pvs) {
2623                               if (pvl->pv->status & MISSING_PV)
2624                                         continue;
2625                               if (!str_list_match_item(pvids, pvl->pv->dev->pvid)) {
2626                                         log_debug("Cached VG %s had incorrect PV list",
2627                                                     vgname);
2628                                         vg_release(correct_vg);
2629                                         correct_vg = NULL;
2630                                         break;
2631                               }
2632                     }
2633           }
2634 
2635           dm_list_init(&all_pvs);
2636 
2637           /* Failed to find VG where we expected it - full scan and retry */
2638           if (!correct_vg) {
2639                     inconsistent = 0;
2640 
2641                     if (memlock())
2642                               return_NULL;
2643                     lvmcache_label_scan(cmd, 2);
2644                     if (!(fmt = fmt_from_vgname(vgname, vgid)))
2645                               return_NULL;
2646 
2647                     if (precommitted && !(fmt->features & FMT_PRECOMMIT))
2648                               use_precommitted = 0;
2649 
2650                     /* create format instance with appropriate metadata area */
2651                     if (!(fid = fmt->ops->create_instance(fmt, vgname, vgid, NULL))) {
2652                               log_error("Failed to create format instance");
2653                               return NULL;
2654                     }
2655 
2656                     /* Ensure contents of all metadata areas match - else recover */
2657                     dm_list_iterate_items(mda, &fid->metadata_areas) {
2658                               if ((use_precommitted &&
2659                                    !(vg = mda->ops->vg_read_precommit(fid, vgname,
2660                                                                                 mda))) ||
2661                                   (!use_precommitted &&
2662                                    !(vg = mda->ops->vg_read(fid, vgname, mda)))) {
2663                                         inconsistent = 1;
2664                                         continue;
2665                               }
2666                               if (!correct_vg) {
2667                                         correct_vg = vg;
2668                                         if (!_update_pv_list(cmd->mem, &all_pvs, correct_vg)) {
2669                                                   vg_release(vg);
2670                                                   return_NULL;
2671                                         }
2672                                         continue;
2673                               }
2674 
2675                               if (strncmp((char *)vg->id.uuid,
2676                                   (char *)correct_vg->id.uuid, ID_LEN)) {
2677                                         inconsistent = 1;
2678                                         inconsistent_vgid = 1;
2679                               }
2680 
2681                               /* FIXME Also ensure contents same - checksums same? */
2682                               if (correct_vg->seqno != vg->seqno) {
2683                                         inconsistent = 1;
2684                                         if (!_update_pv_list(cmd->mem, &all_pvs, vg)) {
2685                                                   vg_release(vg);
2686                                                   vg_release(correct_vg);
2687                                                   return_NULL;
2688                                         }
2689                                         if (vg->seqno > correct_vg->seqno) {
2690                                                   vg_release(correct_vg);
2691                                                   correct_vg = vg;
2692                                         }
2693                               }
2694 
2695                               if (vg != correct_vg)
2696                                         vg_release(vg);
2697                     }
2698 
2699                     /* Give up looking */
2700                     if (!correct_vg)
2701                               return_NULL;
2702           }
2703 
2704           lvmcache_update_vg(correct_vg, use_precommitted);
2705 
2706           if (inconsistent) {
2707                     /* FIXME Test should be if we're *using* precommitted metadata not if we were searching for it */
2708                     if (use_precommitted) {
2709                               log_error("Inconsistent pre-commit metadata copies "
2710                                           "for volume group %s", vgname);
2711                               vg_release(correct_vg);
2712                               return NULL;
2713                     }
2714 
2715                     if (!*consistent)
2716                               return correct_vg;
2717 
2718                     /* Don't touch if vgids didn't match */
2719                     if (inconsistent_vgid) {
2720                               log_error("Inconsistent metadata UUIDs found for "
2721                                           "volume group %s", vgname);
2722                               *consistent = 0;
2723                               return correct_vg;
2724                     }
2725 
2726                     log_warn("WARNING: Inconsistent metadata found for VG %s - updating "
2727                                "to use version %u", vgname, correct_vg->seqno);
2728 
2729                     cmd->handles_missing_pvs = 1;
2730                     if (!vg_write(correct_vg)) {
2731                               log_error("Automatic metadata correction failed");
2732                               vg_release(correct_vg);
2733                               cmd->handles_missing_pvs = saved_handles_missing_pvs;
2734                               return NULL;
2735                     }
2736                     cmd->handles_missing_pvs = saved_handles_missing_pvs;
2737 
2738                     if (!vg_commit(correct_vg)) {
2739                               log_error("Automatic metadata correction commit "
2740                                           "failed");
2741                               vg_release(correct_vg);
2742                               return NULL;
2743                     }
2744 
2745                     dm_list_iterate_items(pvl, &all_pvs) {
2746                               dm_list_iterate_items(pvl2, &correct_vg->pvs) {
2747                                         if (pvl->pv->dev == pvl2->pv->dev)
2748                                                   goto next_pv;
2749                               }
2750                               if (!id_write_format(&pvl->pv->id, uuid, sizeof(uuid))) {
2751                                         vg_release(correct_vg);
2752                                         return_NULL;
2753                               }
2754                               log_error("Removing PV %s (%s) that no longer belongs to VG %s",
2755                                           pv_dev_name(pvl->pv), uuid, correct_vg->name);
2756                               if (!pv_write_orphan(cmd, pvl->pv)) {
2757                                         vg_release(correct_vg);
2758                                         return_NULL;
2759                               }
2760       next_pv:
2761                               ;
2762                     }
2763           }
2764 
2765           if (vg_missing_pv_count(correct_vg)) {
2766                     log_verbose("There are %d physical volumes missing.",
2767                                   vg_missing_pv_count(correct_vg));
2768                     _vg_mark_partial_lvs(correct_vg);
2769           }
2770 
2771           if ((correct_vg->status & PVMOVE) && !pvmove_mode()) {
2772                     log_error("WARNING: Interrupted pvmove detected in "
2773                                 "volume group %s", correct_vg->name);
2774                     log_error("Please restore the metadata by running "
2775                                 "vgcfgrestore.");
2776                     vg_release(correct_vg);
2777                     return NULL;
2778           }
2779 
2780           *consistent = 1;
2781           return correct_vg;
2782 }
2783 
vg_read_internal(struct cmd_context * cmd,const char * vgname,const char * vgid,int * consistent)2784 struct volume_group *vg_read_internal(struct cmd_context *cmd, const char *vgname,
2785                                    const char *vgid, int *consistent)
2786 {
2787           struct volume_group *vg;
2788           struct lv_list *lvl;
2789 
2790           if (!(vg = _vg_read(cmd, vgname, vgid, consistent, 0)))
2791                     return NULL;
2792 
2793           if (!check_pv_segments(vg)) {
2794                     log_error("Internal error: PV segments corrupted in %s.",
2795                                 vg->name);
2796                     vg_release(vg);
2797                     return NULL;
2798           }
2799 
2800           dm_list_iterate_items(lvl, &vg->lvs) {
2801                     if (!check_lv_segments(lvl->lv, 1)) {
2802                               log_error("Internal error: LV segments corrupted in %s.",
2803                                           lvl->lv->name);
2804                               vg_release(vg);
2805                               return NULL;
2806                     }
2807           }
2808 
2809           return vg;
2810 }
2811 
vg_release(struct volume_group * vg)2812 void vg_release(struct volume_group *vg)
2813 {
2814           if (!vg || !vg->vgmem)
2815                     return;
2816 
2817           if (vg->cmd && vg->vgmem == vg->cmd->mem)
2818                     log_error("Internal error: global memory pool used for VG %s",
2819                                 vg->name);
2820 
2821           dm_pool_destroy(vg->vgmem);
2822 }
2823 
2824 /* This is only called by lv_from_lvid, which is only called from
2825  * activate.c so we know the appropriate VG lock is already held and
2826  * the vg_read_internal is therefore safe.
2827  */
_vg_read_by_vgid(struct cmd_context * cmd,const char * vgid,unsigned precommitted)2828 static struct volume_group *_vg_read_by_vgid(struct cmd_context *cmd,
2829                                                       const char *vgid,
2830                                                       unsigned precommitted)
2831 {
2832           const char *vgname;
2833           struct dm_list *vgnames;
2834           struct volume_group *vg = NULL;
2835           struct lvmcache_vginfo *vginfo;
2836           struct str_list *strl;
2837           int consistent = 0;
2838 
2839           /* Is corresponding vgname already cached? */
2840           if ((vginfo = vginfo_from_vgid(vgid)) &&
2841               vginfo->vgname && !is_orphan_vg(vginfo->vgname)) {
2842                     if ((vg = _vg_read(cmd, NULL, vgid,
2843                                            &consistent, precommitted)) &&
2844                         !strncmp((char *)vg->id.uuid, vgid, ID_LEN)) {
2845 
2846                               if (!consistent) {
2847                                         log_error("Volume group %s metadata is "
2848                                                     "inconsistent", vg->name);
2849                               }
2850                               return vg;
2851                     }
2852                     vg_release(vg);
2853           }
2854 
2855           /* Mustn't scan if memory locked: ensure cache gets pre-populated! */
2856           if (memlock())
2857                     goto out;
2858 
2859           /* FIXME Need a genuine read by ID here - don't vg_read_internal by name! */
2860           /* FIXME Disabled vgrenames while active for now because we aren't
2861            *       allowed to do a full scan here any more. */
2862 
2863           // The slow way - full scan required to cope with vgrename
2864           if (!(vgnames = get_vgnames(cmd, 2))) {
2865                     log_error("vg_read_by_vgid: get_vgnames failed");
2866                     goto out;
2867           }
2868 
2869           dm_list_iterate_items(strl, vgnames) {
2870                     vgname = strl->str;
2871                     if (!vgname || is_orphan_vg(vgname))
2872                               continue; // FIXME Unnecessary?
2873                     consistent = 0;
2874                     if ((vg = _vg_read(cmd, vgname, vgid, &consistent,
2875                                            precommitted)) &&
2876                         !strncmp((char *)vg->id.uuid, vgid, ID_LEN)) {
2877 
2878                               if (!consistent) {
2879                                         log_error("Volume group %s metadata is "
2880                                                     "inconsistent", vgname);
2881                                         goto out;
2882                               }
2883                               return vg;
2884                     }
2885           }
2886 
2887 out:
2888           vg_release(vg);
2889           return NULL;
2890 }
2891 
2892 /* Only called by activate.c */
lv_from_lvid(struct cmd_context * cmd,const char * lvid_s,unsigned precommitted)2893 struct logical_volume *lv_from_lvid(struct cmd_context *cmd, const char *lvid_s,
2894                                             unsigned precommitted)
2895 {
2896           struct lv_list *lvl;
2897           struct volume_group *vg;
2898           const union lvid *lvid;
2899 
2900           lvid = (const union lvid *) lvid_s;
2901 
2902           log_very_verbose("Finding volume group for uuid %s", lvid_s);
2903           if (!(vg = _vg_read_by_vgid(cmd, (char *)lvid->id[0].uuid, precommitted))) {
2904                     log_error("Volume group for uuid not found: %s", lvid_s);
2905                     return NULL;
2906           }
2907 
2908           log_verbose("Found volume group \"%s\"", vg->name);
2909           if (vg->status & EXPORTED_VG) {
2910                     log_error("Volume group \"%s\" is exported", vg->name);
2911                     goto out;
2912           }
2913           if (!(lvl = find_lv_in_vg_by_lvid(vg, lvid))) {
2914                     log_very_verbose("Can't find logical volume id %s", lvid_s);
2915                     goto out;
2916           }
2917 
2918           return lvl->lv;
2919 out:
2920           vg_release(vg);
2921           return NULL;
2922 }
2923 
2924 /**
2925  * pv_read - read and return a handle to a physical volume
2926  * @cmd: LVM command initiating the pv_read
2927  * @pv_name: full device name of the PV, including the path
2928  * @mdas: list of metadata areas of the PV
2929  * @label_sector: sector number where the PV label is stored on @pv_name
2930  * @warnings:
2931  *
2932  * Returns:
2933  *   PV handle - valid pv_name and successful read of the PV, or
2934  *   NULL - invalid parameter or error in reading the PV
2935  *
2936  * Note:
2937  *   FIXME - liblvm todo - make into function that returns handle
2938  */
pv_read(struct cmd_context * cmd,const char * pv_name,struct dm_list * mdas,uint64_t * label_sector,int warnings,int scan_label_only)2939 struct physical_volume *pv_read(struct cmd_context *cmd, const char *pv_name,
2940                                         struct dm_list *mdas, uint64_t *label_sector,
2941                                         int warnings, int scan_label_only)
2942 {
2943           return _pv_read(cmd, cmd->mem, pv_name, mdas, label_sector, warnings, scan_label_only);
2944 }
2945 
2946 /* FIXME Use label functions instead of PV functions */
_pv_read(struct cmd_context * cmd,struct dm_pool * pvmem,const char * pv_name,struct dm_list * mdas,uint64_t * label_sector,int warnings,int scan_label_only)2947 static struct physical_volume *_pv_read(struct cmd_context *cmd,
2948                                                   struct dm_pool *pvmem,
2949                                                   const char *pv_name,
2950                                                   struct dm_list *mdas,
2951                                                   uint64_t *label_sector,
2952                                                   int warnings, int scan_label_only)
2953 {
2954           struct physical_volume *pv;
2955           struct label *label;
2956           struct lvmcache_info *info;
2957           struct device *dev;
2958 
2959           if (!(dev = dev_cache_get(pv_name, cmd->filter)))
2960                     return_NULL;
2961 
2962           if (!(label_read(dev, &label, UINT64_C(0)))) {
2963                     if (warnings)
2964                               log_error("No physical volume label read from %s",
2965                                           pv_name);
2966                     return NULL;
2967           }
2968 
2969           info = (struct lvmcache_info *) label->info;
2970           if (label_sector && *label_sector)
2971                     *label_sector = label->sector;
2972 
2973           if (!(pv = dm_pool_zalloc(pvmem, sizeof(*pv)))) {
2974                     log_error("pv allocation for '%s' failed", pv_name);
2975                     return NULL;
2976           }
2977 
2978           dm_list_init(&pv->tags);
2979           dm_list_init(&pv->segments);
2980 
2981           /* FIXME Move more common code up here */
2982           if (!(info->fmt->ops->pv_read(info->fmt, pv_name, pv, mdas,
2983                 scan_label_only))) {
2984                     log_error("Failed to read existing physical volume '%s'",
2985                                 pv_name);
2986                     return NULL;
2987           }
2988 
2989           if (!pv->size)
2990                     return NULL;
2991 
2992           if (!alloc_pv_segment_whole_pv(pvmem, pv))
2993                     return_NULL;
2994 
2995           return pv;
2996 }
2997 
2998 /* May return empty list */
get_vgnames(struct cmd_context * cmd,int full_scan)2999 struct dm_list *get_vgnames(struct cmd_context *cmd, int full_scan)
3000 {
3001           return lvmcache_get_vgnames(cmd, full_scan);
3002 }
3003 
get_vgids(struct cmd_context * cmd,int full_scan)3004 struct dm_list *get_vgids(struct cmd_context *cmd, int full_scan)
3005 {
3006           return lvmcache_get_vgids(cmd, full_scan);
3007 }
3008 
_get_pvs(struct cmd_context * cmd,struct dm_list ** pvslist)3009 static int _get_pvs(struct cmd_context *cmd, struct dm_list **pvslist)
3010 {
3011           struct str_list *strl;
3012           struct dm_list * uninitialized_var(results);
3013           const char *vgname, *vgid;
3014           struct pv_list *pvl, *pvl_copy;
3015           struct dm_list *vgids;
3016           struct volume_group *vg;
3017           int consistent = 0;
3018           int old_pvmove;
3019 
3020           lvmcache_label_scan(cmd, 0);
3021 
3022           if (pvslist) {
3023                     if (!(results = dm_pool_alloc(cmd->mem, sizeof(*results)))) {
3024                               log_error("PV list allocation failed");
3025                               return 0;
3026                     }
3027 
3028                     dm_list_init(results);
3029           }
3030 
3031           /* Get list of VGs */
3032           if (!(vgids = get_vgids(cmd, 0))) {
3033                     log_error("get_pvs: get_vgids failed");
3034                     return 0;
3035           }
3036 
3037           /* Read every VG to ensure cache consistency */
3038           /* Orphan VG is last on list */
3039           old_pvmove = pvmove_mode();
3040           init_pvmove(1);
3041           dm_list_iterate_items(strl, vgids) {
3042                     vgid = strl->str;
3043                     if (!vgid)
3044                               continue; /* FIXME Unnecessary? */
3045                     consistent = 0;
3046                     if (!(vgname = vgname_from_vgid(NULL, vgid))) {
3047                               stack;
3048                               continue;
3049                     }
3050                     if (!(vg = vg_read_internal(cmd, vgname, vgid, &consistent))) {
3051                               stack;
3052                               continue;
3053                     }
3054                     if (!consistent)
3055                               log_warn("WARNING: Volume Group %s is not consistent",
3056                                          vgname);
3057 
3058                     /* Move PVs onto results list */
3059                     if (pvslist)
3060                               dm_list_iterate_items(pvl, &vg->pvs) {
3061                                         if (!(pvl_copy = _copy_pvl(cmd->mem, pvl))) {
3062                                                   log_error("PV list allocation failed");
3063                                                   vg_release(vg);
3064                                                   return 0;
3065                                         }
3066                                         dm_list_add(results, &pvl_copy->list);
3067                               }
3068                     vg_release(vg);
3069           }
3070           init_pvmove(old_pvmove);
3071 
3072           if (pvslist)
3073                     *pvslist = results;
3074           else
3075                     dm_pool_free(cmd->mem, vgids);
3076 
3077           return 1;
3078 }
3079 
get_pvs(struct cmd_context * cmd)3080 struct dm_list *get_pvs(struct cmd_context *cmd)
3081 {
3082           struct dm_list *results;
3083 
3084           if (!_get_pvs(cmd, &results))
3085                     return NULL;
3086 
3087           return results;
3088 }
3089 
scan_vgs_for_pvs(struct cmd_context * cmd)3090 int scan_vgs_for_pvs(struct cmd_context *cmd)
3091 {
3092           return _get_pvs(cmd, NULL);
3093 }
3094 
pv_write(struct cmd_context * cmd __attribute ((unused)),struct physical_volume * pv,struct dm_list * mdas,int64_t label_sector)3095 int pv_write(struct cmd_context *cmd __attribute((unused)),
3096                struct physical_volume *pv,
3097                struct dm_list *mdas, int64_t label_sector)
3098 {
3099           if (!pv->fmt->ops->pv_write) {
3100                     log_error("Format does not support writing physical volumes");
3101                     return 0;
3102           }
3103 
3104           if (!is_orphan_vg(pv->vg_name) || pv->pe_alloc_count) {
3105                     log_error("Assertion failed: can't _pv_write non-orphan PV "
3106                                 "(in VG %s)", pv->vg_name);
3107                     return 0;
3108           }
3109 
3110           if (!pv->fmt->ops->pv_write(pv->fmt, pv, mdas, label_sector))
3111                     return_0;
3112 
3113           return 1;
3114 }
3115 
pv_write_orphan(struct cmd_context * cmd,struct physical_volume * pv)3116 int pv_write_orphan(struct cmd_context *cmd, struct physical_volume *pv)
3117 {
3118           const char *old_vg_name = pv->vg_name;
3119 
3120           pv->vg_name = cmd->fmt->orphan_vg_name;
3121           pv->status = ALLOCATABLE_PV;
3122           pv->pe_alloc_count = 0;
3123 
3124           if (!dev_get_size(pv->dev, &pv->size)) {
3125                     log_error("%s: Couldn't get size.", pv_dev_name(pv));
3126                     return 0;
3127           }
3128 
3129           if (!pv_write(cmd, pv, NULL, INT64_C(-1))) {
3130                     log_error("Failed to clear metadata from physical "
3131                                 "volume \"%s\" after removal from \"%s\"",
3132                                 pv_dev_name(pv), old_vg_name);
3133                     return 0;
3134           }
3135 
3136           return 1;
3137 }
3138 
3139 /**
3140  * is_orphan_vg - Determine whether a vg_name is an orphan
3141  * @vg_name: pointer to the vg_name
3142  */
is_orphan_vg(const char * vg_name)3143 int is_orphan_vg(const char *vg_name)
3144 {
3145           return (vg_name && vg_name[0] == ORPHAN_PREFIX[0]) ? 1 : 0;
3146 }
3147 
3148 /**
3149  * is_orphan - Determine whether a pv is an orphan based on its vg_name
3150  * @pv: handle to the physical volume
3151  */
is_orphan(const struct physical_volume * pv)3152 int is_orphan(const struct physical_volume *pv)
3153 {
3154           return is_orphan_vg(pv_field(pv, vg_name));
3155 }
3156 
3157 /**
3158  * is_pv - Determine whether a pv is a real pv or dummy one
3159  * @pv: handle to device
3160  */
is_pv(struct physical_volume * pv)3161 int is_pv(struct physical_volume *pv)
3162 {
3163           return (pv_field(pv, vg_name) ? 1 : 0);
3164 }
3165 
3166 /*
3167  * Returns:
3168  *  0 - fail
3169  *  1 - success
3170  */
pv_analyze(struct cmd_context * cmd,const char * pv_name,uint64_t label_sector)3171 int pv_analyze(struct cmd_context *cmd, const char *pv_name,
3172                  uint64_t label_sector)
3173 {
3174           struct label *label;
3175           struct device *dev;
3176           struct metadata_area *mda;
3177           struct lvmcache_info *info;
3178 
3179           dev = dev_cache_get(pv_name, cmd->filter);
3180           if (!dev) {
3181                     log_error("Device %s not found (or ignored by filtering).",
3182                                 pv_name);
3183                     return 0;
3184           }
3185 
3186           /*
3187            * First, scan for LVM labels.
3188            */
3189           if (!label_read(dev, &label, label_sector)) {
3190                     log_error("Could not find LVM label on %s",
3191                                 pv_name);
3192                     return 0;
3193           }
3194 
3195           log_print("Found label on %s, sector %"PRIu64", type=%s",
3196                       pv_name, label->sector, label->type);
3197 
3198           /*
3199            * Next, loop through metadata areas
3200            */
3201           info = label->info;
3202           dm_list_iterate_items(mda, &info->mdas)
3203                     mda->ops->pv_analyze_mda(info->fmt, mda);
3204 
3205           return 1;
3206 }
3207 
3208 /* FIXME: remove / combine this with locking? */
vg_check_write_mode(struct volume_group * vg)3209 int vg_check_write_mode(struct volume_group *vg)
3210 {
3211           if (vg->open_mode != 'w') {
3212                     log_errno(EPERM, "Attempt to modify a read-only VG");
3213                     return 0;
3214           }
3215           return 1;
3216 }
3217 
3218 /*
3219  * Performs a set of checks against a VG according to bits set in status
3220  * and returns FAILED_* bits for those that aren't acceptable.
3221  *
3222  * FIXME Remove the unnecessary duplicate definitions and return bits directly.
3223  */
_vg_bad_status_bits(const struct volume_group * vg,uint32_t status)3224 static uint32_t _vg_bad_status_bits(const struct volume_group *vg,
3225                                             uint32_t status)
3226 {
3227           uint32_t failure = 0;
3228 
3229           if ((status & CLUSTERED) &&
3230               (vg_is_clustered(vg)) && !locking_is_clustered()) {
3231                     log_error("Skipping clustered volume group %s", vg->name);
3232                     /* Return because other flags are considered undefined. */
3233                     return FAILED_CLUSTERED;
3234           }
3235 
3236           if ((status & EXPORTED_VG) &&
3237               vg_is_exported(vg)) {
3238                     log_error("Volume group %s is exported", vg->name);
3239                     failure |= FAILED_EXPORTED;
3240           }
3241 
3242           if ((status & LVM_WRITE) &&
3243               !(vg->status & LVM_WRITE)) {
3244                     log_error("Volume group %s is read-only", vg->name);
3245                     failure |= FAILED_READ_ONLY;
3246           }
3247 
3248           if ((status & RESIZEABLE_VG) &&
3249               !vg_is_resizeable(vg)) {
3250                     log_error("Volume group %s is not resizeable.", vg->name);
3251                     failure |= FAILED_RESIZEABLE;
3252           }
3253 
3254           return failure;
3255 }
3256 
3257 /**
3258  * vg_check_status - check volume group status flags and log error
3259  * @vg - volume group to check status flags
3260  * @status - specific status flags to check (e.g. EXPORTED_VG)
3261  */
vg_check_status(const struct volume_group * vg,uint32_t status)3262 int vg_check_status(const struct volume_group *vg, uint32_t status)
3263 {
3264           return !_vg_bad_status_bits(vg, status);
3265 }
3266 
_recover_vg(struct cmd_context * cmd,const char * lock_name,const char * vg_name,const char * vgid,uint32_t lock_flags)3267 static struct volume_group *_recover_vg(struct cmd_context *cmd, const char *lock_name,
3268                                const char *vg_name, const char *vgid,
3269                                uint32_t lock_flags)
3270 {
3271           int consistent = 1;
3272           struct volume_group *vg;
3273 
3274           lock_flags &= ~LCK_TYPE_MASK;
3275           lock_flags |= LCK_WRITE;
3276 
3277           unlock_vg(cmd, lock_name);
3278 
3279           dev_close_all();
3280 
3281           if (!lock_vol(cmd, lock_name, lock_flags))
3282                     return_NULL;
3283 
3284           if (!(vg = vg_read_internal(cmd, vg_name, vgid, &consistent)))
3285                     return_NULL;
3286 
3287           if (!consistent) {
3288                     vg_release(vg);
3289                     return_NULL;
3290           }
3291 
3292           return (struct volume_group *)vg;
3293 }
3294 
3295 /*
3296  * Consolidated locking, reading, and status flag checking.
3297  *
3298  * If the metadata is inconsistent, setting READ_ALLOW_INCONSISTENT in
3299  * misc_flags will return it with FAILED_INCONSISTENT set instead of
3300  * giving you nothing.
3301  *
3302  * Use vg_read_error(vg) to determine the result.  Nonzero means there were
3303  * problems reading the volume group.
3304  * Zero value means that the VG is open and appropriate locks are held.
3305  */
_vg_lock_and_read(struct cmd_context * cmd,const char * vg_name,const char * vgid,uint32_t lock_flags,uint32_t status_flags,uint32_t misc_flags)3306 static struct volume_group *_vg_lock_and_read(struct cmd_context *cmd, const char *vg_name,
3307                                      const char *vgid, uint32_t lock_flags,
3308                                      uint32_t status_flags, uint32_t misc_flags)
3309 {
3310           struct volume_group *vg = NULL;
3311           const char *lock_name;
3312           int consistent = 1;
3313           int consistent_in;
3314           uint32_t failure = 0;
3315           int already_locked;
3316 
3317           if (misc_flags & READ_ALLOW_INCONSISTENT || !(lock_flags & LCK_WRITE))
3318                     consistent = 0;
3319 
3320           if (!validate_name(vg_name) && !is_orphan_vg(vg_name)) {
3321                     log_error("Volume group name %s has invalid characters",
3322                                 vg_name);
3323                     return NULL;
3324           }
3325 
3326           lock_name = is_orphan_vg(vg_name) ? VG_ORPHANS : vg_name;
3327           already_locked = vgname_is_locked(lock_name);
3328 
3329           if (!already_locked && !(misc_flags & READ_WITHOUT_LOCK) &&
3330               !lock_vol(cmd, lock_name, lock_flags)) {
3331                     log_error("Can't get lock for %s", vg_name);
3332                     return _vg_make_handle(cmd, vg, FAILED_LOCKING);
3333           }
3334 
3335           if (is_orphan_vg(vg_name))
3336                     status_flags &= ~LVM_WRITE;
3337 
3338           consistent_in = consistent;
3339 
3340           /* If consistent == 1, we get NULL here if correction fails. */
3341           if (!(vg = vg_read_internal(cmd, vg_name, vgid, &consistent))) {
3342                     if (consistent_in && !consistent) {
3343                               log_error("Volume group \"%s\" inconsistent.", vg_name);
3344                               failure |= FAILED_INCONSISTENT;
3345                               goto_bad;
3346                     }
3347 
3348                     log_error("Volume group \"%s\" not found", vg_name);
3349 
3350                     failure |= FAILED_NOTFOUND;
3351                     goto_bad;
3352           }
3353 
3354           if (vg_is_clustered(vg) && !locking_is_clustered()) {
3355                     log_error("Skipping clustered volume group %s", vg->name);
3356                     failure |= FAILED_CLUSTERED;
3357                     goto_bad;
3358           }
3359 
3360           /* consistent == 0 when VG is not found, but failed == FAILED_NOTFOUND */
3361           if (!consistent && !failure) {
3362                     vg_release(vg);
3363                     if (!(vg = _recover_vg(cmd, lock_name, vg_name, vgid, lock_flags))) {
3364                               log_error("Recovery of volume group \"%s\" failed.",
3365                                           vg_name);
3366                               failure |= FAILED_INCONSISTENT;
3367                               goto_bad;
3368                     }
3369           }
3370 
3371           /*
3372            * Check that the tool can handle tricky cases -- missing PVs and
3373            * unknown segment types.
3374            */
3375 
3376           if (!cmd->handles_missing_pvs && vg_missing_pv_count(vg) &&
3377               (lock_flags & LCK_WRITE)) {
3378                     log_error("Cannot change VG %s while PVs are missing.", vg->name);
3379                     log_error("Consider vgreduce --removemissing.");
3380                     failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */
3381                     goto_bad;
3382           }
3383 
3384           if (!cmd->handles_unknown_segments && vg_has_unknown_segments(vg) &&
3385               (lock_flags & LCK_WRITE)) {
3386                     log_error("Cannot change VG %s with unknown segments in it!",
3387                                 vg->name);
3388                     failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */
3389                     goto_bad;
3390           }
3391 
3392           failure |= _vg_bad_status_bits(vg, status_flags);
3393           if (failure)
3394                     goto_bad;
3395 
3396           return _vg_make_handle(cmd, vg, failure);
3397 
3398 bad:
3399           if (!already_locked && !(misc_flags & READ_WITHOUT_LOCK))
3400                     unlock_vg(cmd, lock_name);
3401 
3402           return _vg_make_handle(cmd, vg, failure);
3403 }
3404 
3405 /*
3406  * vg_read: High-level volume group metadata read function.
3407  *
3408  * vg_read_error() must be used on any handle returned to check for errors.
3409  *
3410  *  - metadata inconsistent and automatic correction failed: FAILED_INCONSISTENT
3411  *  - VG is read-only: FAILED_READ_ONLY
3412  *  - VG is EXPORTED, unless flags has READ_ALLOW_EXPORTED: FAILED_EXPORTED
3413  *  - VG is not RESIZEABLE: FAILED_RESIZEABLE
3414  *  - locking failed: FAILED_LOCKING
3415  *
3416  * On failures, all locks are released, unless one of the following applies:
3417  *  - vgname_is_locked(lock_name) is true
3418  * FIXME: remove the above 2 conditions if possible and make an error always
3419  * release the lock.
3420  *
3421  * Volume groups are opened read-only unless flags contains READ_FOR_UPDATE.
3422  *
3423  * Checking for VG existence:
3424  *
3425  * FIXME: We want vg_read to attempt automatic recovery after acquiring a
3426  * temporary write lock: if that fails, we bail out as usual, with failed &
3427  * FAILED_INCONSISTENT. If it works, we are good to go. Code that's been in
3428  * toollib just set lock_flags to LCK_VG_WRITE and called vg_read_internal with
3429  * *consistent = 1.
3430  */
vg_read(struct cmd_context * cmd,const char * vg_name,const char * vgid,uint32_t flags)3431 struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name,
3432                 const char *vgid, uint32_t flags)
3433 {
3434           uint32_t status = 0;
3435           uint32_t lock_flags = LCK_VG_READ;
3436 
3437           if (flags & READ_FOR_UPDATE) {
3438                     status |= EXPORTED_VG | LVM_WRITE;
3439                     lock_flags = LCK_VG_WRITE;
3440           }
3441 
3442           if (flags & READ_ALLOW_EXPORTED)
3443                     status &= ~EXPORTED_VG;
3444 
3445           return _vg_lock_and_read(cmd, vg_name, vgid, lock_flags, status, flags);
3446 }
3447 
3448 /*
3449  * A high-level volume group metadata reading function. Open a volume group for
3450  * later update (this means the user code can change the metadata and later
3451  * request the new metadata to be written and committed).
3452  */
vg_read_for_update(struct cmd_context * cmd,const char * vg_name,const char * vgid,uint32_t flags)3453 struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name,
3454                                const char *vgid, uint32_t flags)
3455 {
3456           return vg_read(cmd, vg_name, vgid, flags | READ_FOR_UPDATE);
3457 }
3458 
3459 /*
3460  * Test the validity of a VG handle returned by vg_read() or vg_read_for_update().
3461  */
vg_read_error(struct volume_group * vg_handle)3462 uint32_t vg_read_error(struct volume_group *vg_handle)
3463 {
3464           if (!vg_handle)
3465                     return FAILED_ALLOCATION;
3466 
3467           return vg_handle->read_status;
3468 }
3469 
3470 /*
3471  * Lock a vgname and/or check for existence.
3472  * Takes a WRITE lock on the vgname before scanning.
3473  * If scanning fails or vgname found, release the lock.
3474  * NOTE: If you find the return codes confusing, you might think of this
3475  * function as similar to an open() call with O_CREAT and O_EXCL flags
3476  * (open returns fail with -EEXIST if file already exists).
3477  *
3478  * Returns:
3479  * FAILED_LOCKING - Cannot lock name
3480  * FAILED_EXIST - VG name already exists - cannot reserve
3481  * SUCCESS - VG name does not exist in system and WRITE lock held
3482  */
vg_lock_newname(struct cmd_context * cmd,const char * vgname)3483 uint32_t vg_lock_newname(struct cmd_context *cmd, const char *vgname)
3484 {
3485           if (!lock_vol(cmd, vgname, LCK_VG_WRITE)) {
3486                     return FAILED_LOCKING;
3487           }
3488 
3489           /* Find the vgname in the cache */
3490           /* If it's not there we must do full scan to be completely sure */
3491           if (!fmt_from_vgname(vgname, NULL)) {
3492                     lvmcache_label_scan(cmd, 0);
3493                     if (!fmt_from_vgname(vgname, NULL)) {
3494                               if (memlock()) {
3495                                         /*
3496                                          * FIXME: Disallow calling this function if
3497                                          * memlock() is true.
3498                                          */
3499                                         unlock_vg(cmd, vgname);
3500                                         return FAILED_LOCKING;
3501                               }
3502                               lvmcache_label_scan(cmd, 2);
3503                               if (!fmt_from_vgname(vgname, NULL)) {
3504                                         /* vgname not found after scanning */
3505                                         return SUCCESS;
3506                               }
3507                     }
3508           }
3509 
3510           /* Found vgname so cannot reserve. */
3511           unlock_vg(cmd, vgname);
3512           return FAILED_EXIST;
3513 }
3514 
3515 /*
3516  * Gets/Sets for external LVM library
3517  */
pv_id(const struct physical_volume * pv)3518 struct id pv_id(const struct physical_volume *pv)
3519 {
3520           return pv_field(pv, id);
3521 }
3522 
pv_format_type(const struct physical_volume * pv)3523 const struct format_type *pv_format_type(const struct physical_volume *pv)
3524 {
3525           return pv_field(pv, fmt);
3526 }
3527 
pv_vgid(const struct physical_volume * pv)3528 struct id pv_vgid(const struct physical_volume *pv)
3529 {
3530           return pv_field(pv, vgid);
3531 }
3532 
pv_dev(const struct physical_volume * pv)3533 struct device *pv_dev(const struct physical_volume *pv)
3534 {
3535           return pv_field(pv, dev);
3536 }
3537 
pv_vg_name(const struct physical_volume * pv)3538 const char *pv_vg_name(const struct physical_volume *pv)
3539 {
3540           return pv_field(pv, vg_name);
3541 }
3542 
pv_dev_name(const struct physical_volume * pv)3543 const char *pv_dev_name(const struct physical_volume *pv)
3544 {
3545           return dev_name(pv_dev(pv));
3546 }
3547 
pv_size(const struct physical_volume * pv)3548 uint64_t pv_size(const struct physical_volume *pv)
3549 {
3550           return pv_field(pv, size);
3551 }
3552 
pv_status(const struct physical_volume * pv)3553 uint32_t pv_status(const struct physical_volume *pv)
3554 {
3555           return pv_field(pv, status);
3556 }
3557 
pv_pe_size(const struct physical_volume * pv)3558 uint32_t pv_pe_size(const struct physical_volume *pv)
3559 {
3560           return pv_field(pv, pe_size);
3561 }
3562 
pv_pe_start(const struct physical_volume * pv)3563 uint64_t pv_pe_start(const struct physical_volume *pv)
3564 {
3565           return pv_field(pv, pe_start);
3566 }
3567 
pv_pe_count(const struct physical_volume * pv)3568 uint32_t pv_pe_count(const struct physical_volume *pv)
3569 {
3570           return pv_field(pv, pe_count);
3571 }
3572 
pv_pe_alloc_count(const struct physical_volume * pv)3573 uint32_t pv_pe_alloc_count(const struct physical_volume *pv)
3574 {
3575           return pv_field(pv, pe_alloc_count);
3576 }
3577 
pv_mda_count(const struct physical_volume * pv)3578 uint32_t pv_mda_count(const struct physical_volume *pv)
3579 {
3580           struct lvmcache_info *info;
3581 
3582           info = info_from_pvid((const char *)&pv->id.uuid, 0);
3583           return info ? dm_list_size(&info->mdas) : UINT64_C(0);
3584 }
3585 
vg_seqno(const struct volume_group * vg)3586 uint32_t vg_seqno(const struct volume_group *vg)
3587 {
3588           return vg->seqno;
3589 }
3590 
vg_status(const struct volume_group * vg)3591 uint32_t vg_status(const struct volume_group *vg)
3592 {
3593           return vg->status;
3594 }
3595 
vg_size(const struct volume_group * vg)3596 uint64_t vg_size(const struct volume_group *vg)
3597 {
3598           return (uint64_t) vg->extent_count * vg->extent_size;
3599 }
3600 
vg_free(const struct volume_group * vg)3601 uint64_t vg_free(const struct volume_group *vg)
3602 {
3603           return (uint64_t) vg->free_count * vg->extent_size;
3604 }
3605 
vg_extent_size(const struct volume_group * vg)3606 uint64_t vg_extent_size(const struct volume_group *vg)
3607 {
3608           return (uint64_t) vg->extent_size;
3609 }
3610 
vg_extent_count(const struct volume_group * vg)3611 uint64_t vg_extent_count(const struct volume_group *vg)
3612 {
3613           return (uint64_t) vg->extent_count;
3614 }
3615 
vg_free_count(const struct volume_group * vg)3616 uint64_t vg_free_count(const struct volume_group *vg)
3617 {
3618           return (uint64_t) vg->free_count;
3619 }
3620 
vg_pv_count(const struct volume_group * vg)3621 uint64_t vg_pv_count(const struct volume_group *vg)
3622 {
3623           return (uint64_t) vg->pv_count;
3624 }
3625 
vg_max_pv(const struct volume_group * vg)3626 uint64_t vg_max_pv(const struct volume_group *vg)
3627 {
3628           return (uint64_t) vg->max_pv;
3629 }
3630 
vg_max_lv(const struct volume_group * vg)3631 uint64_t vg_max_lv(const struct volume_group *vg)
3632 {
3633           return (uint64_t) vg->max_lv;
3634 }
3635 
vg_mda_count(const struct volume_group * vg)3636 uint32_t vg_mda_count(const struct volume_group *vg)
3637 {
3638           return dm_list_size(&vg->fid->metadata_areas);
3639 }
3640 
lv_size(const struct logical_volume * lv)3641 uint64_t lv_size(const struct logical_volume *lv)
3642 {
3643           return lv->size;
3644 }
3645 
3646 /**
3647  * pv_by_path - Given a device path return a PV handle if it is a PV
3648  * @cmd - handle to the LVM command instance
3649  * @pv_name - device path to read for the PV
3650  *
3651  * Returns:
3652  *  NULL - device path does not contain a valid PV
3653  *  non-NULL - PV handle corresponding to device path
3654  *
3655  * FIXME: merge with find_pv_by_name ?
3656  */
pv_by_path(struct cmd_context * cmd,const char * pv_name)3657 struct physical_volume *pv_by_path(struct cmd_context *cmd, const char *pv_name)
3658 {
3659           struct dm_list mdas;
3660 
3661           dm_list_init(&mdas);
3662           return _pv_read(cmd, cmd->mem, pv_name, &mdas, NULL, 1, 0);
3663 }
3664