1 /**	$MirOS: src/sys/dev/raidframe/rf_disks.c,v 1.2 2005/03/06 21:27:56 tg Exp $	*/
2 /*	$OpenBSD: rf_disks.c,v 1.10 2003/11/27 20:13:27 henning Exp $	*/
3 /*	$NetBSD: rf_disks.c,v 1.31 2000/06/02 01:17:14 oster Exp $	*/
4 
5 /*
6  * Copyright (c) 1999 The NetBSD Foundation, Inc.
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to The NetBSD Foundation
10  * by Greg Oster
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the NetBSD
23  *	Foundation, Inc. and its contributors.
24  * 4. Neither the name of The NetBSD Foundation nor the names of its
25  *    contributors may be used to endorse or promote products derived
26  *    from this software without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38  * POSSIBILITY OF SUCH DAMAGE.
39  */
40 /*
41  * Copyright (c) 1995 Carnegie-Mellon University.
42  * All rights reserved.
43  *
44  * Author: Mark Holland
45  *
46  * Permission to use, copy, modify and distribute this software and
47  * its documentation is hereby granted, provided that both the copyright
48  * notice and this permission notice appear in all copies of the
49  * software, derivative works or modified versions, and any portions
50  * thereof, and that both notices appear in supporting documentation.
51  *
52  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
53  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
54  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
55  *
56  * Carnegie Mellon requests users of this software to return to
57  *
58  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
59  *  School of Computer Science
60  *  Carnegie Mellon University
61  *  Pittsburgh PA 15213-3890
62  *
63  * any improvements or extensions that they make and grant Carnegie the
64  * rights to redistribute these changes.
65  */
66 
67 /***************************************************************
68  * rf_disks.c -- Code to perform operations on the actual disks.
69  ***************************************************************/
70 
71 #include "rf_types.h"
72 #include "rf_raid.h"
73 #include "rf_alloclist.h"
74 #include "rf_utils.h"
75 #include "rf_configure.h"
76 #include "rf_general.h"
77 #include "rf_options.h"
78 #include "rf_kintf.h"
79 
80 #if defined(__NetBSD__)
81 #include "rf_netbsd.h"
82 #elif defined(__OpenBSD__)
83 #include "rf_openbsd.h"
84 #endif
85 
86 #include <sys/types.h>
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/proc.h>
90 #include <sys/ioctl.h>
91 #include <sys/fcntl.h>
92 #ifdef	__NETBSD__
93 #include <sys/vnode.h>
94 #endif	/* __NETBSD__ */
95 
96 int  rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
97 void rf_print_label_status(RF_Raid_t *, int, int, char *,
98 	RF_ComponentLabel_t *);
99 int  rf_check_label_vitals(RF_Raid_t *, int, int, char *,
100 	RF_ComponentLabel_t *, int, int);
101 
102 #define	DPRINTF6(a,b,c,d,e,f)	if (rf_diskDebug) printf(a,b,c,d,e,f)
103 #define	DPRINTF7(a,b,c,d,e,f,g)	if (rf_diskDebug) printf(a,b,c,d,e,f,g)
104 
105 /****************************************************************************
106  *
107  * Initialize the disks comprising the array.
108  *
109  * We want the spare disks to have regular row,col numbers so that we can
110  * easily substitue a spare for a failed disk. But, the driver code assumes
111  * throughout that the array contains numRow by numCol _non-spare_ disks, so
112  * it's not clear how to fit in the spares. This is an unfortunate holdover
113  * from raidSim. The quick and dirty fix is to make row zero bigger than the
114  * rest, and put all the spares in it. This probably needs to get changed
115  * eventually.
116  *
117  ****************************************************************************/
118 int
rf_ConfigureDisks(RF_ShutdownList_t ** listp,RF_Raid_t * raidPtr,RF_Config_t * cfgPtr)119 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
120     RF_Config_t *cfgPtr)
121 {
122 	RF_RaidDisk_t **disks;
123 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
124 	RF_RowCol_t r, c;
125 	int bs, ret;
126 	unsigned i, count, foundone = 0, numFailuresThisRow;
127 	int force;
128 
129 	force = cfgPtr->force;
130 
131  	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
132  	if (ret)
133 		goto fail;
134 
135  	disks = raidPtr->Disks;
136 
137 	for (r = 0; r < raidPtr->numRow; r++) {
138 		numFailuresThisRow = 0;
139 		for (c = 0; c < raidPtr->numCol; c++) {
140 			ret = rf_ConfigureDisk(raidPtr,
141 			    &cfgPtr->devnames[r][c][0], &disks[r][c], r, c);
142 
143 			if (ret)
144 				goto fail;
145 
146 			if (disks[r][c].status == rf_ds_optimal) {
147 				raidread_component_label(
148 					 raidPtr->raid_cinfo[r][c].ci_dev,
149 					 raidPtr->raid_cinfo[r][c].ci_vp,
150 					 &raidPtr->raid_cinfo[r][c].ci_label);
151 			}
152 
153 			if (disks[r][c].status != rf_ds_optimal) {
154 				numFailuresThisRow++;
155 			} else {
156 				if (disks[r][c].numBlocks < min_numblks)
157 					min_numblks = disks[r][c].numBlocks;
158 				DPRINTF7("Disk at row %d col %d: dev %s"
159 				    " numBlocks %ld blockSize %d (%ld MB)\n",
160 				    r, c, disks[r][c].devname,
161 				    (long int) disks[r][c].numBlocks,
162 				    disks[r][c].blockSize,
163 				    (long int) disks[r][c].numBlocks *
164 				     disks[r][c].blockSize / 1024 / 1024);
165 			}
166 		}
167 		/* XXX Fix for n-fault tolerant. */
168 		/*
169 		 * XXX This should probably check to see how many failures
170 		 * we can handle for this configuration !
171 		 */
172 		if (numFailuresThisRow > 0)
173 			raidPtr->status[r] = rf_rs_degraded;
174 	}
175 	/*
176 	 * All disks must be the same size & have the same block size, bs must
177 	 * be a power of 2.
178 	 */
179 	bs = 0;
180 	for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
181 		for (c = 0; !foundone && c < raidPtr->numCol; c++) {
182 			if (disks[r][c].status == rf_ds_optimal) {
183 				bs = disks[r][c].blockSize;
184 				foundone = 1;
185 			}
186 		}
187 	}
188 	if (!foundone) {
189 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in"
190 		    " the array.\n");
191 		ret = EINVAL;
192 		goto fail;
193 	}
194 	for (count = 0, i = 1; i; i <<= 1)
195 		if (bs & i)
196 			count++;
197 	if (count != 1) {
198 		RF_ERRORMSG1("Error: block size on disks (%d) must be a"
199 		    " power of 2.\n", bs);
200 		ret = EINVAL;
201 		goto fail;
202 	}
203 
204 	if (rf_CheckLabels(raidPtr, cfgPtr)) {
205 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
206 		if (force != 0) {
207 			printf("raid%d: Fatal errors being ignored.\n",
208 			    raidPtr->raidid);
209 		} else {
210 			ret = EINVAL;
211 			goto fail;
212 		}
213 	}
214 
215 	for (r = 0; r < raidPtr->numRow; r++) {
216 		for (c = 0; c < raidPtr->numCol; c++) {
217 			if (disks[r][c].status == rf_ds_optimal) {
218 				if (disks[r][c].blockSize != bs) {
219 					RF_ERRORMSG2("Error: block size of"
220 					    " disk at r %d c %d different from"
221 					    " disk at r 0 c 0.\n", r, c);
222 					ret = EINVAL;
223 					goto fail;
224 				}
225 				if (disks[r][c].numBlocks != min_numblks) {
226 					RF_ERRORMSG3("WARNING: truncating disk"
227 					    " at r %d c %d to %d blocks.\n",
228 					    r, c, (int) min_numblks);
229 					disks[r][c].numBlocks = min_numblks;
230 				}
231 			}
232 		}
233 	}
234 
235 	raidPtr->sectorsPerDisk = min_numblks;
236 	raidPtr->logBytesPerSector = ffs(bs) - 1;
237 	raidPtr->bytesPerSector = bs;
238 	raidPtr->sectorMask = bs - 1;
239 	return (0);
240 
241 fail:
242 	rf_UnconfigureVnodes(raidPtr);
243 
244 	return (ret);
245 }
246 
247 
248 /****************************************************************************
249  * Set up the data structures describing the spare disks in the array.
250  * Recall from the above comment that the spare disk descriptors are stored
251  * in row zero, which is specially expanded to hold them.
252  ****************************************************************************/
253 int
rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp,RF_Raid_t * raidPtr,RF_Config_t * cfgPtr)254 rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
255     RF_Config_t * cfgPtr)
256 {
257 	int i, ret;
258 	unsigned int bs;
259 	RF_RaidDisk_t *disks;
260 	int num_spares_done;
261 
262 	num_spares_done = 0;
263 
264 	/*
265 	 * The space for the spares should have already been allocated by
266 	 * ConfigureDisks().
267 	 */
268 
269 	disks = &raidPtr->Disks[0][raidPtr->numCol];
270 	for (i = 0; i < raidPtr->numSpare; i++) {
271 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
272 		    &disks[i], 0, raidPtr->numCol + i);
273 		if (ret)
274 			goto fail;
275 		if (disks[i].status != rf_ds_optimal) {
276 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
277 			    &cfgPtr->spare_names[i][0]);
278 		} else {
279 			/* Change status to spare. */
280 			disks[i].status = rf_ds_spare;
281 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld"
282 			    " blockSize %d (%ld MB).\n", i, disks[i].devname,
283 			    (long int) disks[i].numBlocks, disks[i].blockSize,
284 			    (long int) disks[i].numBlocks *
285 			    disks[i].blockSize / 1024 / 1024);
286 		}
287 		num_spares_done++;
288 	}
289 
290 	/* Check sizes and block sizes on spare disks. */
291 	bs = 1 << raidPtr->logBytesPerSector;
292 	for (i = 0; i < raidPtr->numSpare; i++) {
293 		if (disks[i].blockSize != bs) {
294 			RF_ERRORMSG3("Block size of %d on spare disk %s is"
295 			    " not the same as on other disks (%d).\n",
296 			    disks[i].blockSize, disks[i].devname, bs);
297 			ret = EINVAL;
298 			goto fail;
299 		}
300 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
301 			RF_ERRORMSG3("Spare disk %s (%llu blocks) is too small"
302 			    " to serve as a spare (need %llu blocks).\n",
303 			    disks[i].devname, disks[i].numBlocks,
304 			    raidPtr->sectorsPerDisk);
305 			ret = EINVAL;
306 			goto fail;
307 		} else
308 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
309 				RF_ERRORMSG2("Warning: truncating spare disk"
310 				    " %s to %llu blocks.\n", disks[i].devname,
311 				    raidPtr->sectorsPerDisk);
312 
313 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
314 			}
315 	}
316 
317 	return (0);
318 
319 fail:
320 
321 	/*
322 	 * Release the hold on the main components. We've failed to allocate
323 	 * a spare, and since we're failing, we need to free things...
324 	 *
325 	 * XXX Failing to allocate a spare is *not* that big of a deal...
326 	 * We *can* survive without it, if need be, esp. if we get hot
327 	 * adding working.
328 	 * If we don't fail out here, then we need a way to remove this spare...
329 	 * That should be easier to do here than if we are "live"...
330 	 */
331 
332 	rf_UnconfigureVnodes(raidPtr);
333 
334 	return (ret);
335 }
336 
337 int
rf_AllocDiskStructures(RF_Raid_t * raidPtr,RF_Config_t * cfgPtr)338 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
339 {
340 	RF_RaidDisk_t **disks;
341 	int ret;
342 	int r;
343 
344 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
345 	    (RF_RaidDisk_t **), raidPtr->cleanupList);
346 	if (disks == NULL) {
347 		ret = ENOMEM;
348 		goto fail;
349 	}
350 	raidPtr->Disks = disks;
351 	/* Get space for the device-specific stuff... */
352 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
353 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
354 	    raidPtr->cleanupList);
355 	if (raidPtr->raid_cinfo == NULL) {
356 		ret = ENOMEM;
357 		goto fail;
358 	}
359 
360 	for (r = 0; r < raidPtr->numRow; r++) {
361 		/*
362 		 * We allocate RF_MAXSPARE on the first row so that we
363 		 * have room to do hot-swapping of spares.
364 		 */
365 		RF_CallocAndAdd(disks[r], raidPtr->numCol +
366 		    ((r == 0) ? RF_MAXSPARE : 0), sizeof(RF_RaidDisk_t),
367 		    (RF_RaidDisk_t *), raidPtr->cleanupList);
368 		if (disks[r] == NULL) {
369 			ret = ENOMEM;
370 			goto fail;
371 		}
372 		/* Get more space for device specific stuff... */
373 		RF_CallocAndAdd(raidPtr->raid_cinfo[r], raidPtr->numCol +
374 		    ((r == 0) ? raidPtr->numSpare : 0),
375 		    sizeof(struct raidcinfo), (struct raidcinfo *),
376 		    raidPtr->cleanupList);
377 		if (raidPtr->raid_cinfo[r] == NULL) {
378 			ret = ENOMEM;
379 			goto fail;
380 		}
381 	}
382 	return(0);
383 fail:
384 	rf_UnconfigureVnodes(raidPtr);
385 
386 	return(ret);
387 }
388 
389 
390 /* Configure a single disk during auto-configuration at boot. */
391 int
rf_AutoConfigureDisks(RF_Raid_t * raidPtr,RF_Config_t * cfgPtr,RF_AutoConfig_t * auto_config)392 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
393     RF_AutoConfig_t *auto_config)
394 {
395 	RF_RaidDisk_t **disks;
396 	RF_RaidDisk_t *diskPtr;
397 	RF_RowCol_t r, c;
398 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
399 	int bs, ret;
400 	int numFailuresThisRow;
401 	int force;
402 	RF_AutoConfig_t *ac;
403 	int parity_good;
404 	int mod_counter;
405 	int mod_counter_found;
406 
407 #if	DEBUG
408 	printf("Starting autoconfiguration of RAID set...\n");
409 #endif	/* DEBUG */
410 	force = cfgPtr->force;
411 
412 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
413 	if (ret)
414 		goto fail;
415 
416 	disks = raidPtr->Disks;
417 
418 	/* Assume the parity will be fine... */
419 	parity_good = RF_RAID_CLEAN;
420 
421 	/* Check for mod_counters that are too low. */
422 	mod_counter_found = 0;
423 	ac = auto_config;
424 	while(ac!=NULL) {
425 		if (mod_counter_found == 0) {
426 			mod_counter = ac->clabel->mod_counter;
427 			mod_counter_found = 1;
428 		} else {
429 			if (ac->clabel->mod_counter > mod_counter) {
430 				mod_counter = ac->clabel->mod_counter;
431 			}
432 		}
433 		ac->flag = 0; /* Clear the general purpose flag. */
434 		ac = ac->next;
435 	}
436 
437 	for (r = 0; r < raidPtr->numRow; r++) {
438 		numFailuresThisRow = 0;
439 		for (c = 0; c < raidPtr->numCol; c++) {
440 			diskPtr = &disks[r][c];
441 
442 			/* Find this row/col in the autoconfig. */
443 #if	DEBUG
444 			printf("Looking for %d,%d in autoconfig.\n", r, c);
445 #endif	/* DEBUG */
446 			ac = auto_config;
447 			while(ac!=NULL) {
448 				if (ac->clabel == NULL) {
449 					/* Big-time bad news. */
450 					goto fail;
451 				}
452 				if ((ac->clabel->row == r) &&
453 				    (ac->clabel->column == c) &&
454 				    (ac->clabel->mod_counter == mod_counter)) {
455 					/* It's this one... */
456 					/*
457 					 * Flag it as 'used', so we don't
458 					 * free it later.
459 					 */
460 					ac->flag = 1;
461 #if	DEBUG
462 					printf("Found: %s at %d,%d.\n",
463 					    ac->devname, r, c);
464 #endif	/* DEBUG */
465 
466 					break;
467 				}
468 				ac = ac->next;
469 			}
470 
471 			if (ac == NULL) {
472 				/*
473 				 * We didn't find an exact match with a
474 				 * correct mod_counter above...  Can we
475 				 * find one with an incorrect mod_counter
476 				 * to use instead ?  (This one, if we find
477 				 * it, will be marked as failed once the
478 				 * set configures)
479 				 */
480 
481 				ac = auto_config;
482 				while(ac!=NULL) {
483 					if (ac->clabel == NULL) {
484 						/* Big-time bad news. */
485 						goto fail;
486 					}
487 					if ((ac->clabel->row == r) &&
488 					    (ac->clabel->column == c)) {
489 						/*
490 						 * It's this one...
491 						 * Flag it as 'used', so we
492 						 * don't free it later.
493 						 */
494 						ac->flag = 1;
495 #if	DEBUG
496 						printf("Found(low mod_counter)"
497 						    ": %s at %d,%d.\n",
498 						    ac->devname, r, c);
499 #endif	/* DEBUG */
500 
501 						break;
502 					}
503 					ac = ac->next;
504 				}
505 			}
506 
507 
508 
509 			if (ac!=NULL) {
510 				/* Found it. Configure it... */
511 				diskPtr->blockSize = ac->clabel->blockSize;
512 				diskPtr->numBlocks = ac->clabel->numBlocks;
513 				/*
514 				 * Note: rf_protectedSectors is already
515 				 * factored into numBlocks here.
516 				 */
517 				raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
518 				raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
519 
520 				memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
521 				    ac->clabel, sizeof(*ac->clabel));
522 				snprintf(diskPtr->devname,
523 				    sizeof diskPtr->devname, "/dev/%s",
524 				    ac->devname);
525 
526 				/*
527 				 * Note the fact that this component was
528 				 * autoconfigured. You'll need this info
529 				 * later. Trust me :)
530 				 */
531 				diskPtr->auto_configured = 1;
532 				diskPtr->dev = ac->dev;
533 
534 				/*
535 				 * We allow the user to specify that
536 				 * only a fraction of the disks should
537 				 * be used. This is just for debug: it
538 				 * speeds up the parity scan.
539 				 */
540 
541 				diskPtr->numBlocks = diskPtr->numBlocks *
542 					rf_sizePercentage / 100;
543 
544 				/*
545 				 * XXX These will get set multiple times,
546 				 * but since we're autoconfiguring, they'd
547 				 * better be always the same each time !
548 				 * If not, this is the least of your worries.
549 				 */
550 
551 				bs = diskPtr->blockSize;
552 				min_numblks = diskPtr->numBlocks;
553 
554 				/*
555 				 * This gets done multiple times, but that's
556 				 * fine -- the serial number will be the same
557 				 * for all components, guaranteed.
558 				 */
559 				raidPtr->serial_number =
560 				    ac->clabel->serial_number;
561 				/*
562 				 * Check the last time the label
563 				 * was modified.
564 				 */
565 				if (ac->clabel->mod_counter != mod_counter) {
566 					/*
567 					 * Even though we've filled in all
568 					 * of the above, we don't trust
569 					 * this component since it's
570 					 * modification counter is not
571 					 * in sync with the rest, and we really
572 					 * consider it to be failed.
573 					 */
574 					disks[r][c].status = rf_ds_failed;
575 					numFailuresThisRow++;
576 				} else {
577 					if (ac->clabel->clean != RF_RAID_CLEAN)
578 					{
579 						parity_good = RF_RAID_DIRTY;
580 					}
581 				}
582 			} else {
583 				/*
584 				 * Didn't find it at all !!!
585 				 * Component must really be dead.
586 				 */
587 				disks[r][c].status = rf_ds_failed;
588 				snprintf(disks[r][c].devname,
589 				    sizeof disks[r][c].devname, "component%d",
590 				    r * raidPtr->numCol + c);
591 				numFailuresThisRow++;
592 			}
593 		}
594 		/* XXX Fix for n-fault tolerant. */
595 		/*
596 		 * XXX This should probably check to see how many failures
597 		 * we can handle for this configuration !
598 		 */
599 		if (numFailuresThisRow > 0)
600 			raidPtr->status[r] = rf_rs_degraded;
601 	}
602 
603 	/* Close the device for the ones that didn't get used. */
604 
605 	ac = auto_config;
606 	while(ac != NULL) {
607 		if (ac->flag == 0) {
608 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
609 			vput(ac->vp);
610 			ac->vp = NULL;
611 #if	DEBUG
612 			printf("Released %s from auto-config set.\n",
613 			    ac->devname);
614 #endif  /* DEBUG */
615 		}
616 		ac = ac->next;
617 	}
618 
619 	raidPtr->mod_counter = mod_counter;
620 
621 	/* Note the state of the parity, if any. */
622 	raidPtr->parity_good = parity_good;
623 	raidPtr->sectorsPerDisk = min_numblks;
624 	raidPtr->logBytesPerSector = ffs(bs) - 1;
625 	raidPtr->bytesPerSector = bs;
626 	raidPtr->sectorMask = bs - 1;
627 	return (0);
628 
629 fail:
630 
631 	rf_UnconfigureVnodes(raidPtr);
632 
633 	return (ret);
634 
635 }
636 
637 /* Configure a single disk in the array. */
638 int
rf_ConfigureDisk(RF_Raid_t * raidPtr,char * buf,RF_RaidDisk_t * diskPtr,RF_RowCol_t row,RF_RowCol_t col)639 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *buf, RF_RaidDisk_t *diskPtr,
640     RF_RowCol_t row, RF_RowCol_t col)
641 {
642 	char *p;
643 	int retcode;
644 
645 	struct partinfo dpart;
646 	struct vnode *vp;
647 	struct vattr va;
648 	struct proc *proc;
649 	int error;
650 
651 	retcode = 0;
652 	p = rf_find_non_white(buf);
653 	if (p[strlen(p) - 1] == '\n') {
654 		/* Strip off the newline. */
655 		p[strlen(p) - 1] = '\0';
656 	}
657 	(void) strlcpy(diskPtr->devname, p, sizeof diskPtr->devname);
658 
659 	proc = raidPtr->engine_thread;
660 
661 	/* Let's start by claiming the component is fine and well... */
662 	diskPtr->status = rf_ds_optimal;
663 
664 	raidPtr->raid_cinfo[row][col].ci_vp = NULL;
665 	raidPtr->raid_cinfo[row][col].ci_dev = 0;
666 
667 	error = raidlookup(diskPtr->devname, curproc, &vp);
668 	if (error) {
669 		printf("raidlookup on device: %s failed !\n", diskPtr->devname);
670 		if (error == ENXIO) {
671 			/* The component isn't there...  Must be dead :-( */
672 			diskPtr->status = rf_ds_failed;
673 		} else {
674 			return (error);
675 		}
676 	}
677 	if (diskPtr->status == rf_ds_optimal) {
678 
679 		if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
680 			return (error);
681 		}
682 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, FREAD,
683 		    proc->p_ucred, proc);
684 		if (error) {
685 			return (error);
686 		}
687 		diskPtr->blockSize = dpart.disklab->d_secsize;
688 
689 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
690  		diskPtr->partitionSize = dpart.part->p_size;
691 
692 		raidPtr->raid_cinfo[row][col].ci_vp = vp;
693 		raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
694 
695  		/* This component was not automatically configured. */
696  		diskPtr->auto_configured = 0;
697 		diskPtr->dev = va.va_rdev;
698 
699 		/*
700 		 * We allow the user to specify that only a fraction of the
701 		 * disks should be used. This is just for debug: it speeds up
702 		 * the parity scan.
703 		 */
704 		diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage
705 		    / 100;
706 	}
707 	return (0);
708 }
709 
710 void
rf_print_label_status(RF_Raid_t * raidPtr,int row,int column,char * dev_name,RF_ComponentLabel_t * ci_label)711 rf_print_label_status(RF_Raid_t *raidPtr, int row, int column, char *dev_name,
712     RF_ComponentLabel_t *ci_label)
713 {
714 
715 	printf("raid%d: Component %s being configured at row: %d col: %d\n",
716 	    raidPtr->raidid, dev_name, row, column);
717 	printf("         Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
718 	    ci_label->row, ci_label->column, ci_label->num_rows,
719 	    ci_label->num_columns);
720 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
721 	    ci_label->version, ci_label->serial_number, ci_label->mod_counter);
722 	printf("         Clean: %s Status: %d\n",
723 	    ci_label->clean ? "Yes" : "No", ci_label->status);
724 }
725 
726 int
rf_check_label_vitals(RF_Raid_t * raidPtr,int row,int column,char * dev_name,RF_ComponentLabel_t * ci_label,int serial_number,int mod_counter)727 rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column, char *dev_name,
728     RF_ComponentLabel_t *ci_label, int serial_number, int mod_counter)
729 {
730 	int fatal_error = 0;
731 
732 	if (serial_number != ci_label->serial_number) {
733 		printf("%s has a different serial number: %d %d.\n",
734 		    dev_name, serial_number, ci_label->serial_number);
735 		fatal_error = 1;
736 	}
737 	if (mod_counter != ci_label->mod_counter) {
738 		printf("%s has a different modfication count: %d %d.\n",
739 		    dev_name, mod_counter, ci_label->mod_counter);
740 	}
741 
742 	if (row != ci_label->row) {
743 		printf("Row out of alignment for: %s.\n", dev_name);
744 		fatal_error = 1;
745 	}
746 	if (column != ci_label->column) {
747 		printf("Column out of alignment for: %s.\n", dev_name);
748 		fatal_error = 1;
749 	}
750 	if (raidPtr->numRow != ci_label->num_rows) {
751 		printf("Number of rows do not match for: %s.\n", dev_name);
752 		fatal_error = 1;
753 	}
754 	if (raidPtr->numCol != ci_label->num_columns) {
755 		printf("Number of columns do not match for: %s.\n", dev_name);
756 		fatal_error = 1;
757 	}
758 	if (ci_label->clean == 0) {
759 		/* It's not clean, but that's not fatal. */
760 		printf("%s is not clean !\n", dev_name);
761 	}
762 	return(fatal_error);
763 }
764 
765 
766 /*
767  *
768  * rf_CheckLabels() - Check all the component labels for consistency.
769  * Return an error if there is anything major amiss.
770  *
771  */
772 
773 int
rf_CheckLabels(RF_Raid_t * raidPtr,RF_Config_t * cfgPtr)774 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
775 {
776 	int r, c;
777 	char *dev_name;
778 	RF_ComponentLabel_t *ci_label;
779 	int serial_number = 0;
780 	int mod_number = 0;
781 	int fatal_error = 0;
782 	int mod_values[4];
783 	int mod_count[4];
784 	int ser_values[4];
785 	int ser_count[4];
786 	int num_ser;
787 	int num_mod;
788 	int i;
789 	int found;
790 	int hosed_row;
791 	int hosed_column;
792 	int too_fatal;
793 	int parity_good;
794 	int force;
795 
796 	hosed_row = -1;
797 	hosed_column = -1;
798 	too_fatal = 0;
799 	force = cfgPtr->force;
800 
801 	/*
802 	 * We're going to try to be a little intelligent here. If one
803 	 * component's label is bogus, and we can identify that it's the
804 	 * *only* one that's gone, we'll mark it as "failed" and allow
805 	 * the configuration to proceed. This will be the *only* case
806 	 * that we'll proceed if there would be (otherwise) fatal errors.
807 	 *
808 	 * Basically we simply keep a count of how many components had
809 	 * what serial number. If all but one agree, we simply mark
810 	 * the disagreeing component as being failed, and allow
811 	 * things to come up "normally".
812 	 *
813 	 * We do this first for serial numbers, and then for "mod_counter".
814 	 *
815 	 */
816 
817 	num_ser = 0;
818 	num_mod = 0;
819 	for (r = 0; r < raidPtr->numRow && !fatal_error; r++) {
820 		for (c = 0; c < raidPtr->numCol; c++) {
821 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
822 			found = 0;
823 			for(i = 0; i < num_ser; i++) {
824 				if (ser_values[i] == ci_label->serial_number) {
825 					ser_count[i]++;
826 					found = 1;
827 					break;
828 				}
829 			}
830 			if (!found) {
831 				ser_values[num_ser] = ci_label->serial_number;
832 				ser_count[num_ser] = 1;
833 				num_ser++;
834 				if (num_ser > 2) {
835 					fatal_error = 1;
836 					break;
837 				}
838 			}
839 			found = 0;
840 			for(i = 0; i < num_mod; i++) {
841 				if (mod_values[i] == ci_label->mod_counter) {
842 					mod_count[i]++;
843 					found = 1;
844 					break;
845 				}
846 			}
847 			if (!found) {
848 				mod_values[num_mod] = ci_label->mod_counter;
849 				mod_count[num_mod] = 1;
850 				num_mod++;
851 				if (num_mod > 2) {
852 					fatal_error = 1;
853 					break;
854 				}
855 			}
856 		}
857 	}
858 #if	DEBUG
859 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
860 	for(i = 0; i < num_ser; i++) {
861 		printf("%d %d\n", ser_values[i], ser_count[i]);
862 	}
863 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
864 	for(i = 0; i < num_mod; i++) {
865 		printf("%d %d\n", mod_values[i], mod_count[i]);
866 	}
867 #endif  /* DEBUG */
868 	serial_number = ser_values[0];
869 	if (num_ser == 2) {
870 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
871 			/* Locate the maverick component. */
872 			if (ser_count[1] > ser_count[0]) {
873 				serial_number = ser_values[1];
874 			}
875 			for (r = 0; r < raidPtr->numRow; r++) {
876 				for (c = 0; c < raidPtr->numCol; c++) {
877 					ci_label =
878 					    &raidPtr->raid_cinfo[r][c].ci_label;
879 					if (serial_number !=
880 					    ci_label->serial_number) {
881 						hosed_row = r;
882 						hosed_column = c;
883 						break;
884 					}
885 				}
886 			}
887 			printf("Hosed component: %s.\n",
888 			    &cfgPtr->devnames[hosed_row][hosed_column][0]);
889 			if (!force) {
890 				/*
891 				 * We'll fail this component, as if there are
892 				 * other major errors, we aren't forcing things
893 				 * and we'll abort the config anyways.
894 				 */
895 				raidPtr->Disks[hosed_row][hosed_column].status
896 				    = rf_ds_failed;
897 				raidPtr->numFailures++;
898 				raidPtr->status[hosed_row] = rf_rs_degraded;
899 			}
900 		} else {
901 			too_fatal = 1;
902 		}
903 		if (cfgPtr->parityConfig == '0') {
904 			/*
905 			 * We've identified two different serial numbers.
906 			 * RAID 0 can't cope with that, so we'll punt.
907 			 */
908 			too_fatal = 1;
909 		}
910 
911 	}
912 
913 	/*
914 	 * Record the serial number for later. If we bail later, setting
915 	 * this doesn't matter, otherwise we've got the best guess at the
916 	 * correct serial number.
917 	 */
918 	raidPtr->serial_number = serial_number;
919 
920 	mod_number = mod_values[0];
921 	if (num_mod == 2) {
922 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
923 			/* Locate the maverick component. */
924 			if (mod_count[1] > mod_count[0]) {
925 				mod_number = mod_values[1];
926 			} else if (mod_count[1] < mod_count[0]) {
927 				mod_number = mod_values[0];
928 			} else {
929 				/*
930 				 * Counts of different modification values
931 				 * are the same. Assume greater value is
932 				 * the correct one, all other things
933 				 * considered.
934 				 */
935 				if (mod_values[0] > mod_values[1]) {
936 					mod_number = mod_values[0];
937 				} else {
938 					mod_number = mod_values[1];
939 				}
940 
941 			}
942 			for (r = 0; r < raidPtr->numRow && !too_fatal; r++) {
943 				for (c = 0; c < raidPtr->numCol; c++) {
944 					ci_label =
945 					    &raidPtr->raid_cinfo[r][c].ci_label;
946 					if (mod_number !=
947 					    ci_label->mod_counter) {
948 						if ((hosed_row == r) &&
949 						    (hosed_column == c)) {
950 							/*
951 							 * Same one. Can
952 							 * deal with it.
953 							 */
954 						} else {
955 							hosed_row = r;
956 							hosed_column = c;
957 							if (num_ser != 1) {
958 								too_fatal = 1;
959 								break;
960 							}
961 						}
962 					}
963 				}
964 			}
965 			printf("Hosed component: %s.\n",
966 			    &cfgPtr->devnames[hosed_row][hosed_column][0]);
967 			if (!force) {
968 				/*
969 				 * We'll fail this component, as if there are
970 				 * other major errors, we aren't forcing things
971 				 * and we'll abort the config anyways.
972 				 */
973 				if (raidPtr
974 				    ->Disks[hosed_row][hosed_column].status !=
975 				    rf_ds_failed) {
976 					raidPtr->Disks[hosed_row]
977 					    [hosed_column].status =
978 					    rf_ds_failed;
979 					raidPtr->numFailures++;
980 					raidPtr->status[hosed_row] =
981 					    rf_rs_degraded;
982 				}
983 			}
984 		} else {
985 			too_fatal = 1;
986 		}
987 		if (cfgPtr->parityConfig == '0') {
988 			/*
989 			 * We've identified two different mod counters.
990 			 * RAID 0 can't cope with that, so we'll punt.
991 			 */
992 			too_fatal = 1;
993 		}
994 	}
995 
996 	raidPtr->mod_counter = mod_number;
997 
998 	if (too_fatal) {
999 		/*
1000 		 * We've had both a serial number mismatch, and a mod_counter
1001 		 * mismatch -- and they involved two different components !!!
1002 		 * Bail -- make things fail so that the user must force
1003 		 * the issue...
1004 		 */
1005 		hosed_row = -1;
1006 		hosed_column = -1;
1007 	}
1008 
1009 	if (num_ser > 2) {
1010 		printf("raid%d: Too many different serial numbers !\n",
1011 		    raidPtr->raidid);
1012 	}
1013 
1014 	if (num_mod > 2) {
1015 		printf("raid%d: Too many different mod counters !\n",
1016 		    raidPtr->raidid);
1017 	}
1018 
1019 	/*
1020 	 * We start by assuming the parity will be good, and flee from
1021 	 * that notion at the slightest sign of trouble.
1022 	 */
1023 
1024 	parity_good = RF_RAID_CLEAN;
1025 	for (r = 0; r < raidPtr->numRow; r++) {
1026 		for (c = 0; c < raidPtr->numCol; c++) {
1027 			dev_name = &cfgPtr->devnames[r][c][0];
1028 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
1029 
1030 			if ((r == hosed_row) && (c == hosed_column)) {
1031 				printf("raid%d: Ignoring %s.\n",
1032 				    raidPtr->raidid, dev_name);
1033 			} else {
1034 				rf_print_label_status(raidPtr, r, c, dev_name,
1035 				    ci_label);
1036 				if (rf_check_label_vitals(raidPtr, r, c,
1037 				     dev_name, ci_label, serial_number,
1038 				     mod_number)) {
1039 					fatal_error = 1;
1040 				}
1041 				if (ci_label->clean != RF_RAID_CLEAN) {
1042 					parity_good = RF_RAID_DIRTY;
1043 				}
1044 			}
1045 		}
1046 	}
1047 	if (fatal_error) {
1048 		parity_good = RF_RAID_DIRTY;
1049 	}
1050 
1051 	/* We note the state of the parity. */
1052 	raidPtr->parity_good = parity_good;
1053 
1054 	return(fatal_error);
1055 }
1056 
1057 int
rf_add_hot_spare(RF_Raid_t * raidPtr,RF_SingleComponent_t * sparePtr)1058 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1059 {
1060 	RF_RaidDisk_t *disks;
1061 	RF_DiskQueue_t *spareQueues;
1062 	int ret;
1063 	unsigned int bs;
1064 	int spare_number;
1065 
1066 #if 0
1067 	printf("Just in rf_add_hot_spare: %d.\n", raidPtr->numSpare);
1068 	printf("Num col: %d.\n", raidPtr->numCol);
1069 #endif
1070 	if (raidPtr->numSpare >= RF_MAXSPARE) {
1071 		RF_ERRORMSG1("Too many spares: %d.\n", raidPtr->numSpare);
1072 		return(EINVAL);
1073  	}
1074 
1075 	RF_LOCK_MUTEX(raidPtr->mutex);
1076 
1077 	/* The beginning of the spares... */
1078 	disks = &raidPtr->Disks[0][raidPtr->numCol];
1079 
1080 	spare_number = raidPtr->numSpare;
1081 
1082 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1083 	    &disks[spare_number], 0, raidPtr->numCol + spare_number);
1084 
1085 	if (ret)
1086 		goto fail;
1087 	if (disks[spare_number].status != rf_ds_optimal) {
1088 		RF_ERRORMSG1("Warning: spare disk %s failed TUR.\n",
1089 		    sparePtr->component_name);
1090 		ret = EINVAL;
1091 		goto fail;
1092 	} else {
1093 		disks[spare_number].status = rf_ds_spare;
1094 		DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d"
1095 		    " (%ld MB).\n", spare_number, disks[spare_number].devname,
1096 		    (long int) disks[spare_number].numBlocks,
1097 		    disks[spare_number].blockSize,
1098 		    (long int) disks[spare_number].numBlocks *
1099 		     disks[spare_number].blockSize / 1024 / 1024);
1100 	}
1101 
1102 
1103 	/* Check sizes and block sizes on the spare disk. */
1104 	bs = 1 << raidPtr->logBytesPerSector;
1105 	if (disks[spare_number].blockSize != bs) {
1106 		RF_ERRORMSG3("Block size of %d on spare disk %s is not"
1107 		    " the same as on other disks (%d).\n",
1108 		    disks[spare_number].blockSize,
1109 		    disks[spare_number].devname, bs);
1110 		ret = EINVAL;
1111 		goto fail;
1112 	}
1113 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1114 		RF_ERRORMSG3("Spare disk %s (%llu blocks) is too small to serve"
1115 		    " as a spare (need %llu blocks).\n",
1116 		    disks[spare_number].devname, disks[spare_number].numBlocks,
1117 		    raidPtr->sectorsPerDisk);
1118 		ret = EINVAL;
1119 		goto fail;
1120 	} else {
1121 		if (disks[spare_number].numBlocks >
1122 		    raidPtr->sectorsPerDisk) {
1123 			RF_ERRORMSG2("Warning: truncating spare disk %s to %llu"
1124 			    " blocks.\n", disks[spare_number].devname,
1125 			    raidPtr->sectorsPerDisk);
1126 
1127 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1128 		}
1129 	}
1130 
1131 	spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1132 	ret = rf_ConfigureDiskQueue(raidPtr, &spareQueues[spare_number],
1133 	    0, raidPtr->numCol + spare_number, raidPtr->qType,
1134 	    raidPtr->sectorsPerDisk, raidPtr->Disks[0][raidPtr->numCol +
1135 	     spare_number].dev, raidPtr->maxOutstanding,
1136 	    &raidPtr->shutdownList, raidPtr->cleanupList);
1137 
1138 
1139 	raidPtr->numSpare++;
1140 	RF_UNLOCK_MUTEX(raidPtr->mutex);
1141 	return (0);
1142 
1143 fail:
1144 	RF_UNLOCK_MUTEX(raidPtr->mutex);
1145 	return(ret);
1146 }
1147 
1148 int
rf_remove_hot_spare(RF_Raid_t * raidPtr,RF_SingleComponent_t * sparePtr)1149 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1150 {
1151 	int spare_number;
1152 
1153 	if (raidPtr->numSpare == 0) {
1154 		printf("No spares to remove !\n");
1155 		return(EINVAL);
1156 	}
1157 
1158 	spare_number = sparePtr->column;
1159 
1160 	return(EINVAL);	/* XXX Not implemented yet. */
1161 #if 0
1162 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1163 		return(EINVAL);
1164 	}
1165 
1166 	/* Verify that this spare isn't in use... */
1167 
1168 	/* It's gone... */
1169 
1170 	raidPtr->numSpare--;
1171 
1172 	return (0);
1173 #endif
1174 }
1175 
1176 int
rf_delete_component(RF_Raid_t * raidPtr,RF_SingleComponent_t * component)1177 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1178 {
1179 	RF_RaidDisk_t *disks;
1180 
1181 	if ((component->row < 0) ||
1182 	    (component->row >= raidPtr->numRow) ||
1183 	    (component->column < 0) ||
1184 	    (component->column >= raidPtr->numCol)) {
1185 		return(EINVAL);
1186 	}
1187 
1188 	disks = &raidPtr->Disks[component->row][component->column];
1189 
1190 	/* 1. This component must be marked as 'failed'. */
1191 
1192 	return(EINVAL); /* Not implemented yet. */
1193 }
1194 
1195 int
rf_incorporate_hot_spare(RF_Raid_t * raidPtr,RF_SingleComponent_t * component)1196 rf_incorporate_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1197 {
1198 
1199 	/*
1200 	 * Issues here include how to 'move' this in if there is IO
1201 	 * taking place (e.g. component queues and such).
1202 	 */
1203 
1204 	return(EINVAL); /* Not implemented yet. */
1205 }
1206