xref: /NextBSD/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c (revision 4557fabb34e865d7f40be64b39c9e34fa41dbb60)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23  * All rights reserved.
24  *
25  * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26  */
27 
28 #include <sys/zfs_context.h>
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/bio.h>
32 #include <sys/disk.h>
33 #include <sys/spa.h>
34 #include <sys/spa_impl.h>
35 #include <sys/vdev_impl.h>
36 #include <sys/fs/zfs.h>
37 #include <sys/zio.h>
38 #include <geom/geom.h>
39 #include <geom/geom_int.h>
40 
41 /*
42  * Virtual device vector for GEOM.
43  */
44 
45 static g_attrchanged_t vdev_geom_attrchanged;
46 struct g_class zfs_vdev_class = {
47 	.name = "ZFS::VDEV",
48 	.version = G_VERSION,
49 	.attrchanged = vdev_geom_attrchanged,
50 };
51 
52 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
53 
54 SYSCTL_DECL(_vfs_zfs_vdev);
55 /* Don't send BIO_FLUSH. */
56 static int vdev_geom_bio_flush_disable;
57 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RWTUN,
58     &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
59 /* Don't send BIO_DELETE. */
60 static int vdev_geom_bio_delete_disable;
61 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RWTUN,
62     &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
63 
64 static void
vdev_geom_set_rotation_rate(vdev_t * vd,struct g_consumer * cp)65 vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp)
66 {
67 	int error;
68 	uint16_t rate;
69 
70 	error = g_getattr("GEOM::rotation_rate", cp, &rate);
71 	if (error == 0)
72 		vd->vdev_rotation_rate = rate;
73 	else
74 		vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN;
75 }
76 
77 static void
vdev_geom_attrchanged(struct g_consumer * cp,const char * attr)78 vdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
79 {
80 	vdev_t *vd;
81 
82 	vd = cp->private;
83 	if (vd == NULL)
84 		return;
85 
86 	if (strcmp(attr, "GEOM::rotation_rate") == 0) {
87 		vdev_geom_set_rotation_rate(vd, cp);
88 		return;
89 	}
90 }
91 
92 static void
vdev_geom_orphan(struct g_consumer * cp)93 vdev_geom_orphan(struct g_consumer *cp)
94 {
95 	vdev_t *vd;
96 
97 	g_topology_assert();
98 
99 	vd = cp->private;
100 	if (vd == NULL)
101 		return;
102 
103 	/*
104 	 * Orphan callbacks occur from the GEOM event thread.
105 	 * Concurrent with this call, new I/O requests may be
106 	 * working their way through GEOM about to find out
107 	 * (only once executed by the g_down thread) that we've
108 	 * been orphaned from our disk provider.  These I/Os
109 	 * must be retired before we can detach our consumer.
110 	 * This is most easily achieved by acquiring the
111 	 * SPA ZIO configuration lock as a writer, but doing
112 	 * so with the GEOM topology lock held would cause
113 	 * a lock order reversal.  Instead, rely on the SPA's
114 	 * async removal support to invoke a close on this
115 	 * vdev once it is safe to do so.
116 	 */
117 	zfs_post_remove(vd->vdev_spa, vd);
118 	vd->vdev_remove_wanted = B_TRUE;
119 	spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
120 }
121 
122 static struct g_consumer *
vdev_geom_attach(struct g_provider * pp)123 vdev_geom_attach(struct g_provider *pp)
124 {
125 	struct g_geom *gp;
126 	struct g_consumer *cp;
127 
128 	g_topology_assert();
129 
130 	ZFS_LOG(1, "Attaching to %s.", pp->name);
131 	/* Do we have geom already? No? Create one. */
132 	LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
133 		if (gp->flags & G_GEOM_WITHER)
134 			continue;
135 		if (strcmp(gp->name, "zfs::vdev") != 0)
136 			continue;
137 		break;
138 	}
139 	if (gp == NULL) {
140 		gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
141 		gp->orphan = vdev_geom_orphan;
142 		cp = g_new_consumer(gp);
143 		if (g_attach(cp, pp) != 0) {
144 			g_wither_geom(gp, ENXIO);
145 			return (NULL);
146 		}
147 		if (g_access(cp, 1, 0, 1) != 0) {
148 			g_wither_geom(gp, ENXIO);
149 			return (NULL);
150 		}
151 		ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
152 	} else {
153 		/* Check if we are already connected to this provider. */
154 		LIST_FOREACH(cp, &gp->consumer, consumer) {
155 			if (cp->provider == pp) {
156 				ZFS_LOG(1, "Found consumer for %s.", pp->name);
157 				break;
158 			}
159 		}
160 		if (cp == NULL) {
161 			cp = g_new_consumer(gp);
162 			if (g_attach(cp, pp) != 0) {
163 				g_destroy_consumer(cp);
164 				return (NULL);
165 			}
166 			if (g_access(cp, 1, 0, 1) != 0) {
167 				g_detach(cp);
168 				g_destroy_consumer(cp);
169 				return (NULL);
170 			}
171 			ZFS_LOG(1, "Created consumer for %s.", pp->name);
172 		} else {
173 			if (g_access(cp, 1, 0, 1) != 0)
174 				return (NULL);
175 			ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
176 		}
177 	}
178 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
179 	return (cp);
180 }
181 
182 static void
vdev_geom_detach(void * arg,int flag __unused)183 vdev_geom_detach(void *arg, int flag __unused)
184 {
185 	struct g_geom *gp;
186 	struct g_consumer *cp;
187 
188 	g_topology_assert();
189 	cp = arg;
190 	gp = cp->geom;
191 
192 	ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
193 	g_access(cp, -1, 0, -1);
194 	/* Destroy consumer on last close. */
195 	if (cp->acr == 0 && cp->ace == 0) {
196 		ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
197 		if (cp->acw > 0)
198 			g_access(cp, 0, -cp->acw, 0);
199 		g_detach(cp);
200 		g_destroy_consumer(cp);
201 	}
202 	/* Destroy geom if there are no consumers left. */
203 	if (LIST_EMPTY(&gp->consumer)) {
204 		ZFS_LOG(1, "Destroyed geom %s.", gp->name);
205 		g_wither_geom(gp, ENXIO);
206 	}
207 }
208 
209 static void
nvlist_get_guids(nvlist_t * list,uint64_t * pguid,uint64_t * vguid)210 nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid)
211 {
212 
213 	nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid);
214 	nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid);
215 }
216 
217 static int
vdev_geom_io(struct g_consumer * cp,int cmd,void * data,off_t offset,off_t size)218 vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
219 {
220 	struct bio *bp;
221 	u_char *p;
222 	off_t off, maxio;
223 	int error;
224 
225 	ASSERT((offset % cp->provider->sectorsize) == 0);
226 	ASSERT((size % cp->provider->sectorsize) == 0);
227 
228 	bp = g_alloc_bio();
229 	off = offset;
230 	offset += size;
231 	p = data;
232 	maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
233 	error = 0;
234 
235 	for (; off < offset; off += maxio, p += maxio, size -= maxio) {
236 		bzero(bp, sizeof(*bp));
237 		bp->bio_cmd = cmd;
238 		bp->bio_done = NULL;
239 		bp->bio_offset = off;
240 		bp->bio_length = MIN(size, maxio);
241 		bp->bio_data = p;
242 		g_io_request(bp, cp);
243 		error = biowait(bp, "vdev_geom_io");
244 		if (error != 0)
245 			break;
246 	}
247 
248 	g_destroy_bio(bp);
249 	return (error);
250 }
251 
252 static void
vdev_geom_taste_orphan(struct g_consumer * cp)253 vdev_geom_taste_orphan(struct g_consumer *cp)
254 {
255 
256 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
257 	    cp->provider->name));
258 }
259 
260 static int
vdev_geom_read_config(struct g_consumer * cp,nvlist_t ** config)261 vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
262 {
263 	struct g_provider *pp;
264 	vdev_label_t *label;
265 	char *p, *buf;
266 	size_t buflen;
267 	uint64_t psize;
268 	off_t offset, size;
269 	uint64_t state, txg;
270 	int error, l, len;
271 
272 	g_topology_assert_not();
273 
274 	pp = cp->provider;
275 	ZFS_LOG(1, "Reading config from %s...", pp->name);
276 
277 	psize = pp->mediasize;
278 	psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
279 
280 	size = sizeof(*label) + pp->sectorsize -
281 	    ((sizeof(*label) - 1) % pp->sectorsize) - 1;
282 
283 	label = kmem_alloc(size, KM_SLEEP);
284 	buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
285 
286 	*config = NULL;
287 	for (l = 0; l < VDEV_LABELS; l++) {
288 
289 		offset = vdev_label_offset(psize, l, 0);
290 		if ((offset % pp->sectorsize) != 0)
291 			continue;
292 
293 		if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
294 			continue;
295 		buf = label->vl_vdev_phys.vp_nvlist;
296 
297 		if (nvlist_unpack(buf, buflen, config, 0) != 0)
298 			continue;
299 
300 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
301 		    &state) != 0 || state > POOL_STATE_L2CACHE) {
302 			nvlist_free(*config);
303 			*config = NULL;
304 			continue;
305 		}
306 
307 		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
308 		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
309 		    &txg) != 0 || txg == 0)) {
310 			nvlist_free(*config);
311 			*config = NULL;
312 			continue;
313 		}
314 
315 		break;
316 	}
317 
318 	kmem_free(label, size);
319 	return (*config == NULL ? ENOENT : 0);
320 }
321 
322 static void
resize_configs(nvlist_t *** configs,uint64_t * count,uint64_t id)323 resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
324 {
325 	nvlist_t **new_configs;
326 	uint64_t i;
327 
328 	if (id < *count)
329 		return;
330 	new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
331 	    KM_SLEEP);
332 	for (i = 0; i < *count; i++)
333 		new_configs[i] = (*configs)[i];
334 	if (*configs != NULL)
335 		kmem_free(*configs, *count * sizeof(void *));
336 	*configs = new_configs;
337 	*count = id + 1;
338 }
339 
340 static void
process_vdev_config(nvlist_t *** configs,uint64_t * count,nvlist_t * cfg,const char * name,uint64_t * known_pool_guid)341 process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
342     const char *name, uint64_t* known_pool_guid)
343 {
344 	nvlist_t *vdev_tree;
345 	uint64_t pool_guid;
346 	uint64_t vdev_guid, known_guid;
347 	uint64_t id, txg, known_txg;
348 	char *pname;
349 	int i;
350 
351 	if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
352 	    strcmp(pname, name) != 0)
353 		goto ignore;
354 
355 	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
356 		goto ignore;
357 
358 	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
359 		goto ignore;
360 
361 	if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
362 		goto ignore;
363 
364 	if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
365 		goto ignore;
366 
367 	VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
368 
369 	if (*known_pool_guid != 0) {
370 		if (pool_guid != *known_pool_guid)
371 			goto ignore;
372 	} else
373 		*known_pool_guid = pool_guid;
374 
375 	resize_configs(configs, count, id);
376 
377 	if ((*configs)[id] != NULL) {
378 		VERIFY(nvlist_lookup_uint64((*configs)[id],
379 		    ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
380 		if (txg <= known_txg)
381 			goto ignore;
382 		nvlist_free((*configs)[id]);
383 	}
384 
385 	(*configs)[id] = cfg;
386 	return;
387 
388 ignore:
389 	nvlist_free(cfg);
390 }
391 
392 static int
vdev_geom_attach_taster(struct g_consumer * cp,struct g_provider * pp)393 vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
394 {
395 	int error;
396 
397 	if (pp->flags & G_PF_WITHER)
398 		return (EINVAL);
399 	g_attach(cp, pp);
400 	error = g_access(cp, 1, 0, 0);
401 	if (error == 0) {
402 		if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
403 			error = EINVAL;
404 		else if (pp->mediasize < SPA_MINDEVSIZE)
405 			error = EINVAL;
406 		if (error != 0)
407 			g_access(cp, -1, 0, 0);
408 	}
409 	if (error != 0)
410 		g_detach(cp);
411 	return (error);
412 }
413 
414 static void
vdev_geom_detach_taster(struct g_consumer * cp)415 vdev_geom_detach_taster(struct g_consumer *cp)
416 {
417 	g_access(cp, -1, 0, 0);
418 	g_detach(cp);
419 }
420 
421 int
vdev_geom_read_pool_label(const char * name,nvlist_t *** configs,uint64_t * count)422 vdev_geom_read_pool_label(const char *name,
423     nvlist_t ***configs, uint64_t *count)
424 {
425 	struct g_class *mp;
426 	struct g_geom *gp, *zgp;
427 	struct g_provider *pp;
428 	struct g_consumer *zcp;
429 	nvlist_t *vdev_cfg;
430 	uint64_t pool_guid;
431 	int error;
432 
433 	DROP_GIANT();
434 	g_topology_lock();
435 
436 	zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
437 	/* This orphan function should be never called. */
438 	zgp->orphan = vdev_geom_taste_orphan;
439 	zcp = g_new_consumer(zgp);
440 
441 	*configs = NULL;
442 	*count = 0;
443 	pool_guid = 0;
444 	LIST_FOREACH(mp, &g_classes, class) {
445 		if (mp == &zfs_vdev_class)
446 			continue;
447 		LIST_FOREACH(gp, &mp->geom, geom) {
448 			if (gp->flags & G_GEOM_WITHER)
449 				continue;
450 			LIST_FOREACH(pp, &gp->provider, provider) {
451 				if (pp->flags & G_PF_WITHER)
452 					continue;
453 				if (vdev_geom_attach_taster(zcp, pp) != 0)
454 					continue;
455 				g_topology_unlock();
456 				error = vdev_geom_read_config(zcp, &vdev_cfg);
457 				g_topology_lock();
458 				vdev_geom_detach_taster(zcp);
459 				if (error)
460 					continue;
461 				ZFS_LOG(1, "successfully read vdev config");
462 
463 				process_vdev_config(configs, count,
464 				    vdev_cfg, name, &pool_guid);
465 			}
466 		}
467 	}
468 
469 	g_destroy_consumer(zcp);
470 	g_destroy_geom(zgp);
471 	g_topology_unlock();
472 	PICKUP_GIANT();
473 
474 	return (*count > 0 ? 0 : ENOENT);
475 }
476 
477 static void
vdev_geom_read_guids(struct g_consumer * cp,uint64_t * pguid,uint64_t * vguid)478 vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid)
479 {
480 	nvlist_t *config;
481 
482 	g_topology_assert_not();
483 
484 	*pguid = 0;
485 	*vguid = 0;
486 	if (vdev_geom_read_config(cp, &config) == 0) {
487 		nvlist_get_guids(config, pguid, vguid);
488 		nvlist_free(config);
489 	}
490 }
491 
492 static struct g_consumer *
vdev_geom_attach_by_guids(uint64_t pool_guid,uint64_t vdev_guid)493 vdev_geom_attach_by_guids(uint64_t pool_guid, uint64_t vdev_guid)
494 {
495 	struct g_class *mp;
496 	struct g_geom *gp, *zgp;
497 	struct g_provider *pp;
498 	struct g_consumer *cp, *zcp;
499 	uint64_t pguid, vguid;
500 
501 	g_topology_assert();
502 
503 	zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
504 	/* This orphan function should be never called. */
505 	zgp->orphan = vdev_geom_taste_orphan;
506 	zcp = g_new_consumer(zgp);
507 
508 	cp = NULL;
509 	LIST_FOREACH(mp, &g_classes, class) {
510 		if (mp == &zfs_vdev_class)
511 			continue;
512 		LIST_FOREACH(gp, &mp->geom, geom) {
513 			if (gp->flags & G_GEOM_WITHER)
514 				continue;
515 			LIST_FOREACH(pp, &gp->provider, provider) {
516 				if (vdev_geom_attach_taster(zcp, pp) != 0)
517 					continue;
518 				g_topology_unlock();
519 				vdev_geom_read_guids(zcp, &pguid, &vguid);
520 				g_topology_lock();
521 				vdev_geom_detach_taster(zcp);
522 				if (pguid != pool_guid || vguid != vdev_guid)
523 					continue;
524 				cp = vdev_geom_attach(pp);
525 				if (cp == NULL) {
526 					printf("ZFS WARNING: Unable to "
527 					    "attach to %s.\n", pp->name);
528 					continue;
529 				}
530 				break;
531 			}
532 			if (cp != NULL)
533 				break;
534 		}
535 		if (cp != NULL)
536 			break;
537 	}
538 end:
539 	g_destroy_consumer(zcp);
540 	g_destroy_geom(zgp);
541 	return (cp);
542 }
543 
544 static struct g_consumer *
vdev_geom_open_by_guids(vdev_t * vd)545 vdev_geom_open_by_guids(vdev_t *vd)
546 {
547 	struct g_consumer *cp;
548 	char *buf;
549 	size_t len;
550 
551 	g_topology_assert();
552 
553 	ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid);
554 	cp = vdev_geom_attach_by_guids(spa_guid(vd->vdev_spa), vd->vdev_guid);
555 	if (cp != NULL) {
556 		len = strlen(cp->provider->name) + strlen("/dev/") + 1;
557 		buf = kmem_alloc(len, KM_SLEEP);
558 
559 		snprintf(buf, len, "/dev/%s", cp->provider->name);
560 		spa_strfree(vd->vdev_path);
561 		vd->vdev_path = buf;
562 
563 		ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.",
564 		    (uintmax_t)spa_guid(vd->vdev_spa),
565 		    (uintmax_t)vd->vdev_guid, vd->vdev_path);
566 	} else {
567 		ZFS_LOG(1, "Search by guid [%ju:%ju] failed.",
568 		    (uintmax_t)spa_guid(vd->vdev_spa),
569 		    (uintmax_t)vd->vdev_guid);
570 	}
571 
572 	return (cp);
573 }
574 
575 static struct g_consumer *
vdev_geom_open_by_path(vdev_t * vd,int check_guid)576 vdev_geom_open_by_path(vdev_t *vd, int check_guid)
577 {
578 	struct g_provider *pp;
579 	struct g_consumer *cp;
580 	uint64_t pguid, vguid;
581 
582 	g_topology_assert();
583 
584 	cp = NULL;
585 	pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
586 	if (pp != NULL) {
587 		ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
588 		cp = vdev_geom_attach(pp);
589 		if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
590 		    pp->sectorsize <= VDEV_PAD_SIZE) {
591 			g_topology_unlock();
592 			vdev_geom_read_guids(cp, &pguid, &vguid);
593 			g_topology_lock();
594 			if (pguid != spa_guid(vd->vdev_spa) ||
595 			    vguid != vd->vdev_guid) {
596 				vdev_geom_detach(cp, 0);
597 				cp = NULL;
598 				ZFS_LOG(1, "guid mismatch for provider %s: "
599 				    "%ju:%ju != %ju:%ju.", vd->vdev_path,
600 				    (uintmax_t)spa_guid(vd->vdev_spa),
601 				    (uintmax_t)vd->vdev_guid,
602 				    (uintmax_t)pguid, (uintmax_t)vguid);
603 			} else {
604 				ZFS_LOG(1, "guid match for provider %s.",
605 				    vd->vdev_path);
606 			}
607 		}
608 	}
609 
610 	return (cp);
611 }
612 
613 static int
vdev_geom_open(vdev_t * vd,uint64_t * psize,uint64_t * max_psize,uint64_t * logical_ashift,uint64_t * physical_ashift)614 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
615     uint64_t *logical_ashift, uint64_t *physical_ashift)
616 {
617 	struct g_provider *pp;
618 	struct g_consumer *cp;
619 	size_t bufsize;
620 	int error;
621 
622 	/*
623 	 * We must have a pathname, and it must be absolute.
624 	 */
625 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
626 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
627 		return (EINVAL);
628 	}
629 
630 	vd->vdev_tsd = NULL;
631 
632 	DROP_GIANT();
633 	g_topology_lock();
634 	error = 0;
635 
636 	if (vd->vdev_spa->spa_splitting_newspa ||
637 	    (vd->vdev_prevstate == VDEV_STATE_UNKNOWN &&
638 	     vd->vdev_spa->spa_load_state == SPA_LOAD_NONE)) {
639 		/*
640 		 * We are dealing with a vdev that hasn't been previously
641 		 * opened (since boot), and we are not loading an
642 		 * existing pool configuration.  This looks like a
643 		 * vdev add operation to a new or existing pool.
644 		 * Assume the user knows what he/she is doing and find
645 		 * GEOM provider by its name, ignoring GUID mismatches.
646 		 *
647 		 * XXPOLICY: It would be safer to only allow a device
648 		 *           that is unlabeled or labeled but missing
649 		 *           GUID information to be opened in this fashion,
650 		 *           unless we are doing a split, in which case we
651 		 *           should allow any guid.
652 		 */
653 		cp = vdev_geom_open_by_path(vd, 0);
654 	} else {
655 		/*
656 		 * Try using the recorded path for this device, but only
657 		 * accept it if its label data contains the expected GUIDs.
658 		 */
659 		cp = vdev_geom_open_by_path(vd, 1);
660 		if (cp == NULL) {
661 			/*
662 			 * The device at vd->vdev_path doesn't have the
663 			 * expected GUIDs. The disks might have merely
664 			 * moved around so try all other GEOM providers
665 			 * to find one with the right GUIDs.
666 			 */
667 			cp = vdev_geom_open_by_guids(vd);
668 		}
669 	}
670 
671 	if (cp == NULL) {
672 		ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
673 		error = ENOENT;
674 	} else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
675 	    !ISP2(cp->provider->sectorsize)) {
676 		ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
677 		    vd->vdev_path);
678 		vdev_geom_detach(cp, 0);
679 		error = EINVAL;
680 		cp = NULL;
681 	} else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
682 		int i;
683 
684 		for (i = 0; i < 5; i++) {
685 			error = g_access(cp, 0, 1, 0);
686 			if (error == 0)
687 				break;
688 			g_topology_unlock();
689 			tsleep(vd, 0, "vdev", hz / 2);
690 			g_topology_lock();
691 		}
692 		if (error != 0) {
693 			printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
694 			    vd->vdev_path, error);
695 			vdev_geom_detach(cp, 0);
696 			cp = NULL;
697 		}
698 	}
699 	g_topology_unlock();
700 	PICKUP_GIANT();
701 	if (cp == NULL) {
702 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
703 		return (error);
704 	}
705 
706 	cp->private = vd;
707 	vd->vdev_tsd = cp;
708 	pp = cp->provider;
709 
710 	/*
711 	 * Determine the actual size of the device.
712 	 */
713 	*max_psize = *psize = pp->mediasize;
714 
715 	/*
716 	 * Determine the device's minimum transfer size and preferred
717 	 * transfer size.
718 	 */
719 	*logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
720 	*physical_ashift = 0;
721 	if (pp->stripesize)
722 		*physical_ashift = highbit(pp->stripesize) - 1;
723 
724 	/*
725 	 * Clear the nowritecache settings, so that on a vdev_reopen()
726 	 * we will try again.
727 	 */
728 	vd->vdev_nowritecache = B_FALSE;
729 
730 	if (vd->vdev_physpath != NULL)
731 		spa_strfree(vd->vdev_physpath);
732 	bufsize = sizeof("/dev/") + strlen(pp->name);
733 	vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP);
734 	snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name);
735 
736 	/*
737 	 * Determine the device's rotation rate.
738 	 */
739 	vdev_geom_set_rotation_rate(vd, cp);
740 
741 	return (0);
742 }
743 
744 static void
vdev_geom_close(vdev_t * vd)745 vdev_geom_close(vdev_t *vd)
746 {
747 	struct g_consumer *cp;
748 
749 	cp = vd->vdev_tsd;
750 	if (cp == NULL)
751 		return;
752 	vd->vdev_tsd = NULL;
753 	vd->vdev_delayed_close = B_FALSE;
754 	cp->private = NULL;	/* XXX locking */
755 	g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
756 }
757 
758 static void
vdev_geom_io_intr(struct bio * bp)759 vdev_geom_io_intr(struct bio *bp)
760 {
761 	vdev_t *vd;
762 	zio_t *zio;
763 
764 	zio = bp->bio_caller1;
765 	vd = zio->io_vd;
766 	zio->io_error = bp->bio_error;
767 	if (zio->io_error == 0 && bp->bio_resid != 0)
768 		zio->io_error = SET_ERROR(EIO);
769 
770 	switch(zio->io_error) {
771 	case ENOTSUP:
772 		/*
773 		 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
774 		 * that future attempts will never succeed. In this case
775 		 * we set a persistent flag so that we don't bother with
776 		 * requests in the future.
777 		 */
778 		switch(bp->bio_cmd) {
779 		case BIO_FLUSH:
780 			vd->vdev_nowritecache = B_TRUE;
781 			break;
782 		case BIO_DELETE:
783 			vd->vdev_notrim = B_TRUE;
784 			break;
785 		}
786 		break;
787 	case ENXIO:
788 		if (!vd->vdev_remove_wanted) {
789 			/*
790 			 * If provider's error is set we assume it is being
791 			 * removed.
792 			 */
793 			if (bp->bio_to->error != 0) {
794 				vd->vdev_remove_wanted = B_TRUE;
795 				spa_async_request(zio->io_spa,
796 				    SPA_ASYNC_REMOVE);
797 			} else if (!vd->vdev_delayed_close) {
798 				vd->vdev_delayed_close = B_TRUE;
799 			}
800 		}
801 		break;
802 	}
803 	g_destroy_bio(bp);
804 	zio_interrupt(zio);
805 }
806 
807 static void
vdev_geom_io_start(zio_t * zio)808 vdev_geom_io_start(zio_t *zio)
809 {
810 	vdev_t *vd;
811 	struct g_consumer *cp;
812 	struct bio *bp;
813 	int error;
814 
815 	vd = zio->io_vd;
816 
817 	switch (zio->io_type) {
818 	case ZIO_TYPE_IOCTL:
819 		/* XXPOLICY */
820 		if (!vdev_readable(vd)) {
821 			zio->io_error = SET_ERROR(ENXIO);
822 			zio_interrupt(zio);
823 			return;
824 		} else {
825 			switch (zio->io_cmd) {
826 			case DKIOCFLUSHWRITECACHE:
827 				if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
828 					break;
829 				if (vd->vdev_nowritecache) {
830 					zio->io_error = SET_ERROR(ENOTSUP);
831 					break;
832 				}
833 				goto sendreq;
834 			default:
835 				zio->io_error = SET_ERROR(ENOTSUP);
836 			}
837 		}
838 
839 		zio_execute(zio);
840 		return;
841 	case ZIO_TYPE_FREE:
842 		if (vd->vdev_notrim) {
843 			zio->io_error = SET_ERROR(ENOTSUP);
844 		} else if (!vdev_geom_bio_delete_disable) {
845 			goto sendreq;
846 		}
847 		zio_execute(zio);
848 		return;
849 	}
850 sendreq:
851 	ASSERT(zio->io_type == ZIO_TYPE_READ ||
852 	    zio->io_type == ZIO_TYPE_WRITE ||
853 	    zio->io_type == ZIO_TYPE_FREE ||
854 	    zio->io_type == ZIO_TYPE_IOCTL);
855 
856 	cp = vd->vdev_tsd;
857 	if (cp == NULL) {
858 		zio->io_error = SET_ERROR(ENXIO);
859 		zio_interrupt(zio);
860 		return;
861 	}
862 	bp = g_alloc_bio();
863 	bp->bio_caller1 = zio;
864 	switch (zio->io_type) {
865 	case ZIO_TYPE_READ:
866 	case ZIO_TYPE_WRITE:
867 		bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
868 		bp->bio_data = zio->io_data;
869 		bp->bio_offset = zio->io_offset;
870 		bp->bio_length = zio->io_size;
871 		break;
872 	case ZIO_TYPE_FREE:
873 		bp->bio_cmd = BIO_DELETE;
874 		bp->bio_data = NULL;
875 		bp->bio_offset = zio->io_offset;
876 		bp->bio_length = zio->io_size;
877 		break;
878 	case ZIO_TYPE_IOCTL:
879 		bp->bio_cmd = BIO_FLUSH;
880 		bp->bio_flags |= BIO_ORDERED;
881 		bp->bio_data = NULL;
882 		bp->bio_offset = cp->provider->mediasize;
883 		bp->bio_length = 0;
884 		break;
885 	}
886 	bp->bio_done = vdev_geom_io_intr;
887 
888 	g_io_request(bp, cp);
889 }
890 
891 static void
vdev_geom_io_done(zio_t * zio)892 vdev_geom_io_done(zio_t *zio)
893 {
894 }
895 
896 static void
vdev_geom_hold(vdev_t * vd)897 vdev_geom_hold(vdev_t *vd)
898 {
899 }
900 
901 static void
vdev_geom_rele(vdev_t * vd)902 vdev_geom_rele(vdev_t *vd)
903 {
904 }
905 
906 vdev_ops_t vdev_geom_ops = {
907 	vdev_geom_open,
908 	vdev_geom_close,
909 	vdev_default_asize,
910 	vdev_geom_io_start,
911 	vdev_geom_io_done,
912 	NULL,
913 	vdev_geom_hold,
914 	vdev_geom_rele,
915 	VDEV_TYPE_DISK,		/* name of this vdev type */
916 	B_TRUE			/* leaf vdev */
917 };
918