1 /** $MirOS: src/sys/dev/raidframe/rf_disks.c,v 1.2 2005/03/06 21:27:56 tg Exp $ */
2 /* $OpenBSD: rf_disks.c,v 1.10 2003/11/27 20:13:27 henning Exp $ */
3 /* $NetBSD: rf_disks.c,v 1.31 2000/06/02 01:17:14 oster Exp $ */
4
5 /*
6 * Copyright (c) 1999 The NetBSD Foundation, Inc.
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to The NetBSD Foundation
10 * by Greg Oster
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40 /*
41 * Copyright (c) 1995 Carnegie-Mellon University.
42 * All rights reserved.
43 *
44 * Author: Mark Holland
45 *
46 * Permission to use, copy, modify and distribute this software and
47 * its documentation is hereby granted, provided that both the copyright
48 * notice and this permission notice appear in all copies of the
49 * software, derivative works or modified versions, and any portions
50 * thereof, and that both notices appear in supporting documentation.
51 *
52 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
53 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
54 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
55 *
56 * Carnegie Mellon requests users of this software to return to
57 *
58 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
59 * School of Computer Science
60 * Carnegie Mellon University
61 * Pittsburgh PA 15213-3890
62 *
63 * any improvements or extensions that they make and grant Carnegie the
64 * rights to redistribute these changes.
65 */
66
67 /***************************************************************
68 * rf_disks.c -- Code to perform operations on the actual disks.
69 ***************************************************************/
70
71 #include "rf_types.h"
72 #include "rf_raid.h"
73 #include "rf_alloclist.h"
74 #include "rf_utils.h"
75 #include "rf_configure.h"
76 #include "rf_general.h"
77 #include "rf_options.h"
78 #include "rf_kintf.h"
79
80 #if defined(__NetBSD__)
81 #include "rf_netbsd.h"
82 #elif defined(__OpenBSD__)
83 #include "rf_openbsd.h"
84 #endif
85
86 #include <sys/types.h>
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/proc.h>
90 #include <sys/ioctl.h>
91 #include <sys/fcntl.h>
92 #ifdef __NETBSD__
93 #include <sys/vnode.h>
94 #endif /* __NETBSD__ */
95
96 int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
97 void rf_print_label_status(RF_Raid_t *, int, int, char *,
98 RF_ComponentLabel_t *);
99 int rf_check_label_vitals(RF_Raid_t *, int, int, char *,
100 RF_ComponentLabel_t *, int, int);
101
102 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
103 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
104
105 /****************************************************************************
106 *
107 * Initialize the disks comprising the array.
108 *
109 * We want the spare disks to have regular row,col numbers so that we can
110 * easily substitue a spare for a failed disk. But, the driver code assumes
111 * throughout that the array contains numRow by numCol _non-spare_ disks, so
112 * it's not clear how to fit in the spares. This is an unfortunate holdover
113 * from raidSim. The quick and dirty fix is to make row zero bigger than the
114 * rest, and put all the spares in it. This probably needs to get changed
115 * eventually.
116 *
117 ****************************************************************************/
118 int
rf_ConfigureDisks(RF_ShutdownList_t ** listp,RF_Raid_t * raidPtr,RF_Config_t * cfgPtr)119 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
120 RF_Config_t *cfgPtr)
121 {
122 RF_RaidDisk_t **disks;
123 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
124 RF_RowCol_t r, c;
125 int bs, ret;
126 unsigned i, count, foundone = 0, numFailuresThisRow;
127 int force;
128
129 force = cfgPtr->force;
130
131 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
132 if (ret)
133 goto fail;
134
135 disks = raidPtr->Disks;
136
137 for (r = 0; r < raidPtr->numRow; r++) {
138 numFailuresThisRow = 0;
139 for (c = 0; c < raidPtr->numCol; c++) {
140 ret = rf_ConfigureDisk(raidPtr,
141 &cfgPtr->devnames[r][c][0], &disks[r][c], r, c);
142
143 if (ret)
144 goto fail;
145
146 if (disks[r][c].status == rf_ds_optimal) {
147 raidread_component_label(
148 raidPtr->raid_cinfo[r][c].ci_dev,
149 raidPtr->raid_cinfo[r][c].ci_vp,
150 &raidPtr->raid_cinfo[r][c].ci_label);
151 }
152
153 if (disks[r][c].status != rf_ds_optimal) {
154 numFailuresThisRow++;
155 } else {
156 if (disks[r][c].numBlocks < min_numblks)
157 min_numblks = disks[r][c].numBlocks;
158 DPRINTF7("Disk at row %d col %d: dev %s"
159 " numBlocks %ld blockSize %d (%ld MB)\n",
160 r, c, disks[r][c].devname,
161 (long int) disks[r][c].numBlocks,
162 disks[r][c].blockSize,
163 (long int) disks[r][c].numBlocks *
164 disks[r][c].blockSize / 1024 / 1024);
165 }
166 }
167 /* XXX Fix for n-fault tolerant. */
168 /*
169 * XXX This should probably check to see how many failures
170 * we can handle for this configuration !
171 */
172 if (numFailuresThisRow > 0)
173 raidPtr->status[r] = rf_rs_degraded;
174 }
175 /*
176 * All disks must be the same size & have the same block size, bs must
177 * be a power of 2.
178 */
179 bs = 0;
180 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
181 for (c = 0; !foundone && c < raidPtr->numCol; c++) {
182 if (disks[r][c].status == rf_ds_optimal) {
183 bs = disks[r][c].blockSize;
184 foundone = 1;
185 }
186 }
187 }
188 if (!foundone) {
189 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in"
190 " the array.\n");
191 ret = EINVAL;
192 goto fail;
193 }
194 for (count = 0, i = 1; i; i <<= 1)
195 if (bs & i)
196 count++;
197 if (count != 1) {
198 RF_ERRORMSG1("Error: block size on disks (%d) must be a"
199 " power of 2.\n", bs);
200 ret = EINVAL;
201 goto fail;
202 }
203
204 if (rf_CheckLabels(raidPtr, cfgPtr)) {
205 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
206 if (force != 0) {
207 printf("raid%d: Fatal errors being ignored.\n",
208 raidPtr->raidid);
209 } else {
210 ret = EINVAL;
211 goto fail;
212 }
213 }
214
215 for (r = 0; r < raidPtr->numRow; r++) {
216 for (c = 0; c < raidPtr->numCol; c++) {
217 if (disks[r][c].status == rf_ds_optimal) {
218 if (disks[r][c].blockSize != bs) {
219 RF_ERRORMSG2("Error: block size of"
220 " disk at r %d c %d different from"
221 " disk at r 0 c 0.\n", r, c);
222 ret = EINVAL;
223 goto fail;
224 }
225 if (disks[r][c].numBlocks != min_numblks) {
226 RF_ERRORMSG3("WARNING: truncating disk"
227 " at r %d c %d to %d blocks.\n",
228 r, c, (int) min_numblks);
229 disks[r][c].numBlocks = min_numblks;
230 }
231 }
232 }
233 }
234
235 raidPtr->sectorsPerDisk = min_numblks;
236 raidPtr->logBytesPerSector = ffs(bs) - 1;
237 raidPtr->bytesPerSector = bs;
238 raidPtr->sectorMask = bs - 1;
239 return (0);
240
241 fail:
242 rf_UnconfigureVnodes(raidPtr);
243
244 return (ret);
245 }
246
247
248 /****************************************************************************
249 * Set up the data structures describing the spare disks in the array.
250 * Recall from the above comment that the spare disk descriptors are stored
251 * in row zero, which is specially expanded to hold them.
252 ****************************************************************************/
253 int
rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp,RF_Raid_t * raidPtr,RF_Config_t * cfgPtr)254 rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
255 RF_Config_t * cfgPtr)
256 {
257 int i, ret;
258 unsigned int bs;
259 RF_RaidDisk_t *disks;
260 int num_spares_done;
261
262 num_spares_done = 0;
263
264 /*
265 * The space for the spares should have already been allocated by
266 * ConfigureDisks().
267 */
268
269 disks = &raidPtr->Disks[0][raidPtr->numCol];
270 for (i = 0; i < raidPtr->numSpare; i++) {
271 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
272 &disks[i], 0, raidPtr->numCol + i);
273 if (ret)
274 goto fail;
275 if (disks[i].status != rf_ds_optimal) {
276 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
277 &cfgPtr->spare_names[i][0]);
278 } else {
279 /* Change status to spare. */
280 disks[i].status = rf_ds_spare;
281 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld"
282 " blockSize %d (%ld MB).\n", i, disks[i].devname,
283 (long int) disks[i].numBlocks, disks[i].blockSize,
284 (long int) disks[i].numBlocks *
285 disks[i].blockSize / 1024 / 1024);
286 }
287 num_spares_done++;
288 }
289
290 /* Check sizes and block sizes on spare disks. */
291 bs = 1 << raidPtr->logBytesPerSector;
292 for (i = 0; i < raidPtr->numSpare; i++) {
293 if (disks[i].blockSize != bs) {
294 RF_ERRORMSG3("Block size of %d on spare disk %s is"
295 " not the same as on other disks (%d).\n",
296 disks[i].blockSize, disks[i].devname, bs);
297 ret = EINVAL;
298 goto fail;
299 }
300 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
301 RF_ERRORMSG3("Spare disk %s (%llu blocks) is too small"
302 " to serve as a spare (need %llu blocks).\n",
303 disks[i].devname, disks[i].numBlocks,
304 raidPtr->sectorsPerDisk);
305 ret = EINVAL;
306 goto fail;
307 } else
308 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
309 RF_ERRORMSG2("Warning: truncating spare disk"
310 " %s to %llu blocks.\n", disks[i].devname,
311 raidPtr->sectorsPerDisk);
312
313 disks[i].numBlocks = raidPtr->sectorsPerDisk;
314 }
315 }
316
317 return (0);
318
319 fail:
320
321 /*
322 * Release the hold on the main components. We've failed to allocate
323 * a spare, and since we're failing, we need to free things...
324 *
325 * XXX Failing to allocate a spare is *not* that big of a deal...
326 * We *can* survive without it, if need be, esp. if we get hot
327 * adding working.
328 * If we don't fail out here, then we need a way to remove this spare...
329 * That should be easier to do here than if we are "live"...
330 */
331
332 rf_UnconfigureVnodes(raidPtr);
333
334 return (ret);
335 }
336
337 int
rf_AllocDiskStructures(RF_Raid_t * raidPtr,RF_Config_t * cfgPtr)338 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
339 {
340 RF_RaidDisk_t **disks;
341 int ret;
342 int r;
343
344 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
345 (RF_RaidDisk_t **), raidPtr->cleanupList);
346 if (disks == NULL) {
347 ret = ENOMEM;
348 goto fail;
349 }
350 raidPtr->Disks = disks;
351 /* Get space for the device-specific stuff... */
352 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
353 sizeof(struct raidcinfo *), (struct raidcinfo **),
354 raidPtr->cleanupList);
355 if (raidPtr->raid_cinfo == NULL) {
356 ret = ENOMEM;
357 goto fail;
358 }
359
360 for (r = 0; r < raidPtr->numRow; r++) {
361 /*
362 * We allocate RF_MAXSPARE on the first row so that we
363 * have room to do hot-swapping of spares.
364 */
365 RF_CallocAndAdd(disks[r], raidPtr->numCol +
366 ((r == 0) ? RF_MAXSPARE : 0), sizeof(RF_RaidDisk_t),
367 (RF_RaidDisk_t *), raidPtr->cleanupList);
368 if (disks[r] == NULL) {
369 ret = ENOMEM;
370 goto fail;
371 }
372 /* Get more space for device specific stuff... */
373 RF_CallocAndAdd(raidPtr->raid_cinfo[r], raidPtr->numCol +
374 ((r == 0) ? raidPtr->numSpare : 0),
375 sizeof(struct raidcinfo), (struct raidcinfo *),
376 raidPtr->cleanupList);
377 if (raidPtr->raid_cinfo[r] == NULL) {
378 ret = ENOMEM;
379 goto fail;
380 }
381 }
382 return(0);
383 fail:
384 rf_UnconfigureVnodes(raidPtr);
385
386 return(ret);
387 }
388
389
390 /* Configure a single disk during auto-configuration at boot. */
391 int
rf_AutoConfigureDisks(RF_Raid_t * raidPtr,RF_Config_t * cfgPtr,RF_AutoConfig_t * auto_config)392 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
393 RF_AutoConfig_t *auto_config)
394 {
395 RF_RaidDisk_t **disks;
396 RF_RaidDisk_t *diskPtr;
397 RF_RowCol_t r, c;
398 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
399 int bs, ret;
400 int numFailuresThisRow;
401 int force;
402 RF_AutoConfig_t *ac;
403 int parity_good;
404 int mod_counter;
405 int mod_counter_found;
406
407 #if DEBUG
408 printf("Starting autoconfiguration of RAID set...\n");
409 #endif /* DEBUG */
410 force = cfgPtr->force;
411
412 ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
413 if (ret)
414 goto fail;
415
416 disks = raidPtr->Disks;
417
418 /* Assume the parity will be fine... */
419 parity_good = RF_RAID_CLEAN;
420
421 /* Check for mod_counters that are too low. */
422 mod_counter_found = 0;
423 ac = auto_config;
424 while(ac!=NULL) {
425 if (mod_counter_found == 0) {
426 mod_counter = ac->clabel->mod_counter;
427 mod_counter_found = 1;
428 } else {
429 if (ac->clabel->mod_counter > mod_counter) {
430 mod_counter = ac->clabel->mod_counter;
431 }
432 }
433 ac->flag = 0; /* Clear the general purpose flag. */
434 ac = ac->next;
435 }
436
437 for (r = 0; r < raidPtr->numRow; r++) {
438 numFailuresThisRow = 0;
439 for (c = 0; c < raidPtr->numCol; c++) {
440 diskPtr = &disks[r][c];
441
442 /* Find this row/col in the autoconfig. */
443 #if DEBUG
444 printf("Looking for %d,%d in autoconfig.\n", r, c);
445 #endif /* DEBUG */
446 ac = auto_config;
447 while(ac!=NULL) {
448 if (ac->clabel == NULL) {
449 /* Big-time bad news. */
450 goto fail;
451 }
452 if ((ac->clabel->row == r) &&
453 (ac->clabel->column == c) &&
454 (ac->clabel->mod_counter == mod_counter)) {
455 /* It's this one... */
456 /*
457 * Flag it as 'used', so we don't
458 * free it later.
459 */
460 ac->flag = 1;
461 #if DEBUG
462 printf("Found: %s at %d,%d.\n",
463 ac->devname, r, c);
464 #endif /* DEBUG */
465
466 break;
467 }
468 ac = ac->next;
469 }
470
471 if (ac == NULL) {
472 /*
473 * We didn't find an exact match with a
474 * correct mod_counter above... Can we
475 * find one with an incorrect mod_counter
476 * to use instead ? (This one, if we find
477 * it, will be marked as failed once the
478 * set configures)
479 */
480
481 ac = auto_config;
482 while(ac!=NULL) {
483 if (ac->clabel == NULL) {
484 /* Big-time bad news. */
485 goto fail;
486 }
487 if ((ac->clabel->row == r) &&
488 (ac->clabel->column == c)) {
489 /*
490 * It's this one...
491 * Flag it as 'used', so we
492 * don't free it later.
493 */
494 ac->flag = 1;
495 #if DEBUG
496 printf("Found(low mod_counter)"
497 ": %s at %d,%d.\n",
498 ac->devname, r, c);
499 #endif /* DEBUG */
500
501 break;
502 }
503 ac = ac->next;
504 }
505 }
506
507
508
509 if (ac!=NULL) {
510 /* Found it. Configure it... */
511 diskPtr->blockSize = ac->clabel->blockSize;
512 diskPtr->numBlocks = ac->clabel->numBlocks;
513 /*
514 * Note: rf_protectedSectors is already
515 * factored into numBlocks here.
516 */
517 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
518 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
519
520 memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
521 ac->clabel, sizeof(*ac->clabel));
522 snprintf(diskPtr->devname,
523 sizeof diskPtr->devname, "/dev/%s",
524 ac->devname);
525
526 /*
527 * Note the fact that this component was
528 * autoconfigured. You'll need this info
529 * later. Trust me :)
530 */
531 diskPtr->auto_configured = 1;
532 diskPtr->dev = ac->dev;
533
534 /*
535 * We allow the user to specify that
536 * only a fraction of the disks should
537 * be used. This is just for debug: it
538 * speeds up the parity scan.
539 */
540
541 diskPtr->numBlocks = diskPtr->numBlocks *
542 rf_sizePercentage / 100;
543
544 /*
545 * XXX These will get set multiple times,
546 * but since we're autoconfiguring, they'd
547 * better be always the same each time !
548 * If not, this is the least of your worries.
549 */
550
551 bs = diskPtr->blockSize;
552 min_numblks = diskPtr->numBlocks;
553
554 /*
555 * This gets done multiple times, but that's
556 * fine -- the serial number will be the same
557 * for all components, guaranteed.
558 */
559 raidPtr->serial_number =
560 ac->clabel->serial_number;
561 /*
562 * Check the last time the label
563 * was modified.
564 */
565 if (ac->clabel->mod_counter != mod_counter) {
566 /*
567 * Even though we've filled in all
568 * of the above, we don't trust
569 * this component since it's
570 * modification counter is not
571 * in sync with the rest, and we really
572 * consider it to be failed.
573 */
574 disks[r][c].status = rf_ds_failed;
575 numFailuresThisRow++;
576 } else {
577 if (ac->clabel->clean != RF_RAID_CLEAN)
578 {
579 parity_good = RF_RAID_DIRTY;
580 }
581 }
582 } else {
583 /*
584 * Didn't find it at all !!!
585 * Component must really be dead.
586 */
587 disks[r][c].status = rf_ds_failed;
588 snprintf(disks[r][c].devname,
589 sizeof disks[r][c].devname, "component%d",
590 r * raidPtr->numCol + c);
591 numFailuresThisRow++;
592 }
593 }
594 /* XXX Fix for n-fault tolerant. */
595 /*
596 * XXX This should probably check to see how many failures
597 * we can handle for this configuration !
598 */
599 if (numFailuresThisRow > 0)
600 raidPtr->status[r] = rf_rs_degraded;
601 }
602
603 /* Close the device for the ones that didn't get used. */
604
605 ac = auto_config;
606 while(ac != NULL) {
607 if (ac->flag == 0) {
608 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
609 vput(ac->vp);
610 ac->vp = NULL;
611 #if DEBUG
612 printf("Released %s from auto-config set.\n",
613 ac->devname);
614 #endif /* DEBUG */
615 }
616 ac = ac->next;
617 }
618
619 raidPtr->mod_counter = mod_counter;
620
621 /* Note the state of the parity, if any. */
622 raidPtr->parity_good = parity_good;
623 raidPtr->sectorsPerDisk = min_numblks;
624 raidPtr->logBytesPerSector = ffs(bs) - 1;
625 raidPtr->bytesPerSector = bs;
626 raidPtr->sectorMask = bs - 1;
627 return (0);
628
629 fail:
630
631 rf_UnconfigureVnodes(raidPtr);
632
633 return (ret);
634
635 }
636
637 /* Configure a single disk in the array. */
638 int
rf_ConfigureDisk(RF_Raid_t * raidPtr,char * buf,RF_RaidDisk_t * diskPtr,RF_RowCol_t row,RF_RowCol_t col)639 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *buf, RF_RaidDisk_t *diskPtr,
640 RF_RowCol_t row, RF_RowCol_t col)
641 {
642 char *p;
643 int retcode;
644
645 struct partinfo dpart;
646 struct vnode *vp;
647 struct vattr va;
648 struct proc *proc;
649 int error;
650
651 retcode = 0;
652 p = rf_find_non_white(buf);
653 if (p[strlen(p) - 1] == '\n') {
654 /* Strip off the newline. */
655 p[strlen(p) - 1] = '\0';
656 }
657 (void) strlcpy(diskPtr->devname, p, sizeof diskPtr->devname);
658
659 proc = raidPtr->engine_thread;
660
661 /* Let's start by claiming the component is fine and well... */
662 diskPtr->status = rf_ds_optimal;
663
664 raidPtr->raid_cinfo[row][col].ci_vp = NULL;
665 raidPtr->raid_cinfo[row][col].ci_dev = 0;
666
667 error = raidlookup(diskPtr->devname, curproc, &vp);
668 if (error) {
669 printf("raidlookup on device: %s failed !\n", diskPtr->devname);
670 if (error == ENXIO) {
671 /* The component isn't there... Must be dead :-( */
672 diskPtr->status = rf_ds_failed;
673 } else {
674 return (error);
675 }
676 }
677 if (diskPtr->status == rf_ds_optimal) {
678
679 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
680 return (error);
681 }
682 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, FREAD,
683 proc->p_ucred, proc);
684 if (error) {
685 return (error);
686 }
687 diskPtr->blockSize = dpart.disklab->d_secsize;
688
689 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
690 diskPtr->partitionSize = dpart.part->p_size;
691
692 raidPtr->raid_cinfo[row][col].ci_vp = vp;
693 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
694
695 /* This component was not automatically configured. */
696 diskPtr->auto_configured = 0;
697 diskPtr->dev = va.va_rdev;
698
699 /*
700 * We allow the user to specify that only a fraction of the
701 * disks should be used. This is just for debug: it speeds up
702 * the parity scan.
703 */
704 diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage
705 / 100;
706 }
707 return (0);
708 }
709
710 void
rf_print_label_status(RF_Raid_t * raidPtr,int row,int column,char * dev_name,RF_ComponentLabel_t * ci_label)711 rf_print_label_status(RF_Raid_t *raidPtr, int row, int column, char *dev_name,
712 RF_ComponentLabel_t *ci_label)
713 {
714
715 printf("raid%d: Component %s being configured at row: %d col: %d\n",
716 raidPtr->raidid, dev_name, row, column);
717 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
718 ci_label->row, ci_label->column, ci_label->num_rows,
719 ci_label->num_columns);
720 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
721 ci_label->version, ci_label->serial_number, ci_label->mod_counter);
722 printf(" Clean: %s Status: %d\n",
723 ci_label->clean ? "Yes" : "No", ci_label->status);
724 }
725
726 int
rf_check_label_vitals(RF_Raid_t * raidPtr,int row,int column,char * dev_name,RF_ComponentLabel_t * ci_label,int serial_number,int mod_counter)727 rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column, char *dev_name,
728 RF_ComponentLabel_t *ci_label, int serial_number, int mod_counter)
729 {
730 int fatal_error = 0;
731
732 if (serial_number != ci_label->serial_number) {
733 printf("%s has a different serial number: %d %d.\n",
734 dev_name, serial_number, ci_label->serial_number);
735 fatal_error = 1;
736 }
737 if (mod_counter != ci_label->mod_counter) {
738 printf("%s has a different modfication count: %d %d.\n",
739 dev_name, mod_counter, ci_label->mod_counter);
740 }
741
742 if (row != ci_label->row) {
743 printf("Row out of alignment for: %s.\n", dev_name);
744 fatal_error = 1;
745 }
746 if (column != ci_label->column) {
747 printf("Column out of alignment for: %s.\n", dev_name);
748 fatal_error = 1;
749 }
750 if (raidPtr->numRow != ci_label->num_rows) {
751 printf("Number of rows do not match for: %s.\n", dev_name);
752 fatal_error = 1;
753 }
754 if (raidPtr->numCol != ci_label->num_columns) {
755 printf("Number of columns do not match for: %s.\n", dev_name);
756 fatal_error = 1;
757 }
758 if (ci_label->clean == 0) {
759 /* It's not clean, but that's not fatal. */
760 printf("%s is not clean !\n", dev_name);
761 }
762 return(fatal_error);
763 }
764
765
766 /*
767 *
768 * rf_CheckLabels() - Check all the component labels for consistency.
769 * Return an error if there is anything major amiss.
770 *
771 */
772
773 int
rf_CheckLabels(RF_Raid_t * raidPtr,RF_Config_t * cfgPtr)774 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
775 {
776 int r, c;
777 char *dev_name;
778 RF_ComponentLabel_t *ci_label;
779 int serial_number = 0;
780 int mod_number = 0;
781 int fatal_error = 0;
782 int mod_values[4];
783 int mod_count[4];
784 int ser_values[4];
785 int ser_count[4];
786 int num_ser;
787 int num_mod;
788 int i;
789 int found;
790 int hosed_row;
791 int hosed_column;
792 int too_fatal;
793 int parity_good;
794 int force;
795
796 hosed_row = -1;
797 hosed_column = -1;
798 too_fatal = 0;
799 force = cfgPtr->force;
800
801 /*
802 * We're going to try to be a little intelligent here. If one
803 * component's label is bogus, and we can identify that it's the
804 * *only* one that's gone, we'll mark it as "failed" and allow
805 * the configuration to proceed. This will be the *only* case
806 * that we'll proceed if there would be (otherwise) fatal errors.
807 *
808 * Basically we simply keep a count of how many components had
809 * what serial number. If all but one agree, we simply mark
810 * the disagreeing component as being failed, and allow
811 * things to come up "normally".
812 *
813 * We do this first for serial numbers, and then for "mod_counter".
814 *
815 */
816
817 num_ser = 0;
818 num_mod = 0;
819 for (r = 0; r < raidPtr->numRow && !fatal_error; r++) {
820 for (c = 0; c < raidPtr->numCol; c++) {
821 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
822 found = 0;
823 for(i = 0; i < num_ser; i++) {
824 if (ser_values[i] == ci_label->serial_number) {
825 ser_count[i]++;
826 found = 1;
827 break;
828 }
829 }
830 if (!found) {
831 ser_values[num_ser] = ci_label->serial_number;
832 ser_count[num_ser] = 1;
833 num_ser++;
834 if (num_ser > 2) {
835 fatal_error = 1;
836 break;
837 }
838 }
839 found = 0;
840 for(i = 0; i < num_mod; i++) {
841 if (mod_values[i] == ci_label->mod_counter) {
842 mod_count[i]++;
843 found = 1;
844 break;
845 }
846 }
847 if (!found) {
848 mod_values[num_mod] = ci_label->mod_counter;
849 mod_count[num_mod] = 1;
850 num_mod++;
851 if (num_mod > 2) {
852 fatal_error = 1;
853 break;
854 }
855 }
856 }
857 }
858 #if DEBUG
859 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
860 for(i = 0; i < num_ser; i++) {
861 printf("%d %d\n", ser_values[i], ser_count[i]);
862 }
863 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
864 for(i = 0; i < num_mod; i++) {
865 printf("%d %d\n", mod_values[i], mod_count[i]);
866 }
867 #endif /* DEBUG */
868 serial_number = ser_values[0];
869 if (num_ser == 2) {
870 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
871 /* Locate the maverick component. */
872 if (ser_count[1] > ser_count[0]) {
873 serial_number = ser_values[1];
874 }
875 for (r = 0; r < raidPtr->numRow; r++) {
876 for (c = 0; c < raidPtr->numCol; c++) {
877 ci_label =
878 &raidPtr->raid_cinfo[r][c].ci_label;
879 if (serial_number !=
880 ci_label->serial_number) {
881 hosed_row = r;
882 hosed_column = c;
883 break;
884 }
885 }
886 }
887 printf("Hosed component: %s.\n",
888 &cfgPtr->devnames[hosed_row][hosed_column][0]);
889 if (!force) {
890 /*
891 * We'll fail this component, as if there are
892 * other major errors, we aren't forcing things
893 * and we'll abort the config anyways.
894 */
895 raidPtr->Disks[hosed_row][hosed_column].status
896 = rf_ds_failed;
897 raidPtr->numFailures++;
898 raidPtr->status[hosed_row] = rf_rs_degraded;
899 }
900 } else {
901 too_fatal = 1;
902 }
903 if (cfgPtr->parityConfig == '0') {
904 /*
905 * We've identified two different serial numbers.
906 * RAID 0 can't cope with that, so we'll punt.
907 */
908 too_fatal = 1;
909 }
910
911 }
912
913 /*
914 * Record the serial number for later. If we bail later, setting
915 * this doesn't matter, otherwise we've got the best guess at the
916 * correct serial number.
917 */
918 raidPtr->serial_number = serial_number;
919
920 mod_number = mod_values[0];
921 if (num_mod == 2) {
922 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
923 /* Locate the maverick component. */
924 if (mod_count[1] > mod_count[0]) {
925 mod_number = mod_values[1];
926 } else if (mod_count[1] < mod_count[0]) {
927 mod_number = mod_values[0];
928 } else {
929 /*
930 * Counts of different modification values
931 * are the same. Assume greater value is
932 * the correct one, all other things
933 * considered.
934 */
935 if (mod_values[0] > mod_values[1]) {
936 mod_number = mod_values[0];
937 } else {
938 mod_number = mod_values[1];
939 }
940
941 }
942 for (r = 0; r < raidPtr->numRow && !too_fatal; r++) {
943 for (c = 0; c < raidPtr->numCol; c++) {
944 ci_label =
945 &raidPtr->raid_cinfo[r][c].ci_label;
946 if (mod_number !=
947 ci_label->mod_counter) {
948 if ((hosed_row == r) &&
949 (hosed_column == c)) {
950 /*
951 * Same one. Can
952 * deal with it.
953 */
954 } else {
955 hosed_row = r;
956 hosed_column = c;
957 if (num_ser != 1) {
958 too_fatal = 1;
959 break;
960 }
961 }
962 }
963 }
964 }
965 printf("Hosed component: %s.\n",
966 &cfgPtr->devnames[hosed_row][hosed_column][0]);
967 if (!force) {
968 /*
969 * We'll fail this component, as if there are
970 * other major errors, we aren't forcing things
971 * and we'll abort the config anyways.
972 */
973 if (raidPtr
974 ->Disks[hosed_row][hosed_column].status !=
975 rf_ds_failed) {
976 raidPtr->Disks[hosed_row]
977 [hosed_column].status =
978 rf_ds_failed;
979 raidPtr->numFailures++;
980 raidPtr->status[hosed_row] =
981 rf_rs_degraded;
982 }
983 }
984 } else {
985 too_fatal = 1;
986 }
987 if (cfgPtr->parityConfig == '0') {
988 /*
989 * We've identified two different mod counters.
990 * RAID 0 can't cope with that, so we'll punt.
991 */
992 too_fatal = 1;
993 }
994 }
995
996 raidPtr->mod_counter = mod_number;
997
998 if (too_fatal) {
999 /*
1000 * We've had both a serial number mismatch, and a mod_counter
1001 * mismatch -- and they involved two different components !!!
1002 * Bail -- make things fail so that the user must force
1003 * the issue...
1004 */
1005 hosed_row = -1;
1006 hosed_column = -1;
1007 }
1008
1009 if (num_ser > 2) {
1010 printf("raid%d: Too many different serial numbers !\n",
1011 raidPtr->raidid);
1012 }
1013
1014 if (num_mod > 2) {
1015 printf("raid%d: Too many different mod counters !\n",
1016 raidPtr->raidid);
1017 }
1018
1019 /*
1020 * We start by assuming the parity will be good, and flee from
1021 * that notion at the slightest sign of trouble.
1022 */
1023
1024 parity_good = RF_RAID_CLEAN;
1025 for (r = 0; r < raidPtr->numRow; r++) {
1026 for (c = 0; c < raidPtr->numCol; c++) {
1027 dev_name = &cfgPtr->devnames[r][c][0];
1028 ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
1029
1030 if ((r == hosed_row) && (c == hosed_column)) {
1031 printf("raid%d: Ignoring %s.\n",
1032 raidPtr->raidid, dev_name);
1033 } else {
1034 rf_print_label_status(raidPtr, r, c, dev_name,
1035 ci_label);
1036 if (rf_check_label_vitals(raidPtr, r, c,
1037 dev_name, ci_label, serial_number,
1038 mod_number)) {
1039 fatal_error = 1;
1040 }
1041 if (ci_label->clean != RF_RAID_CLEAN) {
1042 parity_good = RF_RAID_DIRTY;
1043 }
1044 }
1045 }
1046 }
1047 if (fatal_error) {
1048 parity_good = RF_RAID_DIRTY;
1049 }
1050
1051 /* We note the state of the parity. */
1052 raidPtr->parity_good = parity_good;
1053
1054 return(fatal_error);
1055 }
1056
1057 int
rf_add_hot_spare(RF_Raid_t * raidPtr,RF_SingleComponent_t * sparePtr)1058 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1059 {
1060 RF_RaidDisk_t *disks;
1061 RF_DiskQueue_t *spareQueues;
1062 int ret;
1063 unsigned int bs;
1064 int spare_number;
1065
1066 #if 0
1067 printf("Just in rf_add_hot_spare: %d.\n", raidPtr->numSpare);
1068 printf("Num col: %d.\n", raidPtr->numCol);
1069 #endif
1070 if (raidPtr->numSpare >= RF_MAXSPARE) {
1071 RF_ERRORMSG1("Too many spares: %d.\n", raidPtr->numSpare);
1072 return(EINVAL);
1073 }
1074
1075 RF_LOCK_MUTEX(raidPtr->mutex);
1076
1077 /* The beginning of the spares... */
1078 disks = &raidPtr->Disks[0][raidPtr->numCol];
1079
1080 spare_number = raidPtr->numSpare;
1081
1082 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1083 &disks[spare_number], 0, raidPtr->numCol + spare_number);
1084
1085 if (ret)
1086 goto fail;
1087 if (disks[spare_number].status != rf_ds_optimal) {
1088 RF_ERRORMSG1("Warning: spare disk %s failed TUR.\n",
1089 sparePtr->component_name);
1090 ret = EINVAL;
1091 goto fail;
1092 } else {
1093 disks[spare_number].status = rf_ds_spare;
1094 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d"
1095 " (%ld MB).\n", spare_number, disks[spare_number].devname,
1096 (long int) disks[spare_number].numBlocks,
1097 disks[spare_number].blockSize,
1098 (long int) disks[spare_number].numBlocks *
1099 disks[spare_number].blockSize / 1024 / 1024);
1100 }
1101
1102
1103 /* Check sizes and block sizes on the spare disk. */
1104 bs = 1 << raidPtr->logBytesPerSector;
1105 if (disks[spare_number].blockSize != bs) {
1106 RF_ERRORMSG3("Block size of %d on spare disk %s is not"
1107 " the same as on other disks (%d).\n",
1108 disks[spare_number].blockSize,
1109 disks[spare_number].devname, bs);
1110 ret = EINVAL;
1111 goto fail;
1112 }
1113 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1114 RF_ERRORMSG3("Spare disk %s (%llu blocks) is too small to serve"
1115 " as a spare (need %llu blocks).\n",
1116 disks[spare_number].devname, disks[spare_number].numBlocks,
1117 raidPtr->sectorsPerDisk);
1118 ret = EINVAL;
1119 goto fail;
1120 } else {
1121 if (disks[spare_number].numBlocks >
1122 raidPtr->sectorsPerDisk) {
1123 RF_ERRORMSG2("Warning: truncating spare disk %s to %llu"
1124 " blocks.\n", disks[spare_number].devname,
1125 raidPtr->sectorsPerDisk);
1126
1127 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1128 }
1129 }
1130
1131 spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1132 ret = rf_ConfigureDiskQueue(raidPtr, &spareQueues[spare_number],
1133 0, raidPtr->numCol + spare_number, raidPtr->qType,
1134 raidPtr->sectorsPerDisk, raidPtr->Disks[0][raidPtr->numCol +
1135 spare_number].dev, raidPtr->maxOutstanding,
1136 &raidPtr->shutdownList, raidPtr->cleanupList);
1137
1138
1139 raidPtr->numSpare++;
1140 RF_UNLOCK_MUTEX(raidPtr->mutex);
1141 return (0);
1142
1143 fail:
1144 RF_UNLOCK_MUTEX(raidPtr->mutex);
1145 return(ret);
1146 }
1147
1148 int
rf_remove_hot_spare(RF_Raid_t * raidPtr,RF_SingleComponent_t * sparePtr)1149 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1150 {
1151 int spare_number;
1152
1153 if (raidPtr->numSpare == 0) {
1154 printf("No spares to remove !\n");
1155 return(EINVAL);
1156 }
1157
1158 spare_number = sparePtr->column;
1159
1160 return(EINVAL); /* XXX Not implemented yet. */
1161 #if 0
1162 if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1163 return(EINVAL);
1164 }
1165
1166 /* Verify that this spare isn't in use... */
1167
1168 /* It's gone... */
1169
1170 raidPtr->numSpare--;
1171
1172 return (0);
1173 #endif
1174 }
1175
1176 int
rf_delete_component(RF_Raid_t * raidPtr,RF_SingleComponent_t * component)1177 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1178 {
1179 RF_RaidDisk_t *disks;
1180
1181 if ((component->row < 0) ||
1182 (component->row >= raidPtr->numRow) ||
1183 (component->column < 0) ||
1184 (component->column >= raidPtr->numCol)) {
1185 return(EINVAL);
1186 }
1187
1188 disks = &raidPtr->Disks[component->row][component->column];
1189
1190 /* 1. This component must be marked as 'failed'. */
1191
1192 return(EINVAL); /* Not implemented yet. */
1193 }
1194
1195 int
rf_incorporate_hot_spare(RF_Raid_t * raidPtr,RF_SingleComponent_t * component)1196 rf_incorporate_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1197 {
1198
1199 /*
1200 * Issues here include how to 'move' this in if there is IO
1201 * taking place (e.g. component queues and such).
1202 */
1203
1204 return(EINVAL); /* Not implemented yet. */
1205 }
1206