1 /* $MirOS: src/sys/dev/raidframe/rf_openbsdkintf.c,v 1.5 2006/08/09 19:45:41 tg Exp $ */
2 /* $OpenBSD: rf_openbsdkintf.c,v 1.31 2005/12/08 05:53:45 tedu Exp $ */
3 /* $NetBSD: rf_netbsdkintf.c,v 1.109 2001/07/27 03:30:07 oster Exp $ */
4
5 /*-
6 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to The NetBSD Foundation
10 * by Greg Oster; Jason R. Thorpe.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Copyright (c) 1988 University of Utah.
43 * Copyright (c) 1990, 1993
44 * The Regents of the University of California. All rights reserved.
45 *
46 * This code is derived from software contributed to Berkeley by
47 * the Systems Programming Group of the University of Utah Computer
48 * Science Department.
49 *
50 * Redistribution and use in source and binary forms, with or without
51 * modification, are permitted provided that the following conditions
52 * are met:
53 * 1. Redistributions of source code must retain the above copyright
54 * notice, this list of conditions and the following disclaimer.
55 * 2. Redistributions in binary form must reproduce the above copyright
56 * notice, this list of conditions and the following disclaimer in the
57 * documentation and/or other materials provided with the distribution.
58 * 3. Neither the name of the University nor the names of its contributors
59 * may be used to endorse or promote products derived from this software
60 * without specific prior written permission.
61 *
62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72 * SUCH DAMAGE.
73 *
74 * from: Utah $Hdr: cd.c 1.6 90/11/28$
75 *
76 * @(#)cd.c 8.2 (Berkeley) 11/16/93
77 */
78
79 /*
80 * Copyright (c) 1995 Carnegie-Mellon University.
81 * All rights reserved.
82 *
83 * Authors: Mark Holland, Jim Zelenka
84 *
85 * Permission to use, copy, modify and distribute this software and
86 * its documentation is hereby granted, provided that both the copyright
87 * notice and this permission notice appear in all copies of the
88 * software, derivative works or modified versions, and any portions
89 * thereof, and that both notices appear in supporting documentation.
90 *
91 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
92 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
93 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
94 *
95 * Carnegie Mellon requests users of this software to return to
96 *
97 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
98 * School of Computer Science
99 * Carnegie Mellon University
100 * Pittsburgh PA 15213-3890
101 *
102 * any improvements or extensions that they make and grant Carnegie the
103 * rights to redistribute these changes.
104 */
105
106 /*****************************************************************************
107 *
108 * rf_kintf.c -- The kernel interface routines for RAIDframe.
109 *
110 *****************************************************************************/
111
112 #include <sys/errno.h>
113
114 #include <sys/param.h>
115 #include <sys/pool.h>
116 #include <sys/malloc.h>
117 #include <sys/queue.h>
118 #include <sys/disk.h>
119 #include <sys/device.h>
120 #include <sys/stat.h>
121 #include <sys/ioctl.h>
122 #include <sys/fcntl.h>
123 #include <sys/systm.h>
124 #include <sys/namei.h>
125 #include <sys/conf.h>
126 #include <sys/lock.h>
127 #include <sys/buf.h>
128 #include <sys/user.h>
129 #include <sys/reboot.h>
130
131 #include "raid.h"
132 #include "rf_raid.h"
133 #include "rf_raidframe.h"
134 #include "rf_copyback.h"
135 #include "rf_dag.h"
136 #include "rf_dagflags.h"
137 #include "rf_desc.h"
138 #include "rf_diskqueue.h"
139 #include "rf_engine.h"
140 #include "rf_acctrace.h"
141 #include "rf_etimer.h"
142 #include "rf_general.h"
143 #include "rf_debugMem.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_debugprint.h"
149 #include "rf_threadstuff.h"
150 #include "rf_configure.h"
151
152 int rf_kdebug_level = 0;
153
154 #ifdef RAIDDEBUG
155 #define db1_printf(a) do { if (rf_kdebug_level > 0) printf a; } while(0)
156 #else /* RAIDDEBUG */
157 #define db1_printf(a) (void)0
158 #endif /* ! RAIDDEBUG */
159
160 static RF_Raid_t **raidPtrs; /* Global raid device descriptors. */
161
162 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex);
163
164 /* Requests to install a spare table. */
165 static RF_SparetWait_t *rf_sparet_wait_queue;
166
167 /* Responses from installation process. */
168 static RF_SparetWait_t *rf_sparet_resp_queue;
169
170 /* Prototypes. */
171 void rf_KernelWakeupFunc(struct buf *);
172 void rf_InitBP(struct buf *, struct vnode *, unsigned, dev_t, RF_SectorNum_t,
173 RF_SectorCount_t, caddr_t, void (*)(struct buf *), void *, int,
174 struct proc *);
175 void raidinit(RF_Raid_t *);
176
177 void raidattach(int);
178 int raidsize(dev_t);
179 int raidopen(dev_t, int, int, struct proc *);
180 int raidclose(dev_t, int, int, struct proc *);
181 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
182 int raidwrite(dev_t, struct uio *, int);
183 int raidread(dev_t, struct uio *, int);
184 void raidstrategy(struct buf *);
185 int raiddump(dev_t, daddr_t, caddr_t, size_t);
186
187 /*
188 * Pilfered from ccd.c
189 */
190 struct raidbuf {
191 struct buf rf_buf; /* New I/O buf. MUST BE FIRST!!! */
192 struct buf *rf_obp; /* Ptr. to original I/O buf. */
193 int rf_flags; /* Miscellaneous flags. */
194 RF_DiskQueueData_t *req; /* The request that this was part of. */
195 };
196
197 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
198 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
199
200 /*
201 * Some port (like i386) use a swapgeneric that wants to snoop around
202 * in this raid_cd structure. It is preserved (for now) to remain
203 * compatible with such practice.
204 */
205 struct cfdriver raid_cd = {
206 NULL, "raid", DV_DISK
207 };
208
209 /*
210 * XXX Not sure if the following should be replacing the raidPtrs above,
211 * or if it should be used in conjunction with that...
212 */
213 struct raid_softc {
214 int sc_flags; /* Flags. */
215 int sc_cflags; /* Configuration flags. */
216 size_t sc_size; /* Size of the raid device. */
217 char sc_xname[20]; /* XXX external name. */
218 struct disk sc_dkdev; /* Generic disk device info. */
219 struct pool sc_cbufpool; /* Component buffer pool. */
220 struct buf sc_q; /* Used for the device queue. */
221 };
222
223 /* sc_flags */
224 #define RAIDF_INITED 0x01 /* Unit has been initialized. */
225 #define RAIDF_WLABEL 0x02 /* Label area is writable. */
226 #define RAIDF_LABELLING 0x04 /* Unit is currently being labelled. */
227 #define RAIDF_WANTED 0x40 /* Someone is waiting to obtain a lock. */
228 #define RAIDF_LOCKED 0x80 /* Unit is locked. */
229
230 #define raidunit(x) DISKUNIT(x)
231 int numraid = 0;
232
233 /*
234 * Here we define a cfattach structure for inserting any new raid device
235 * into the device tree. This is needed by some archs that look for
236 * bootable devices in there.
237 */
238 int rf_probe(struct device *, void *, void *);
239 void rf_attach(struct device *, struct device *, void *);
240 int rf_detach(struct device *, int);
241 int rf_activate(struct device *, enum devact);
242 void rf_zeroref(struct device *);
243
244 struct cfattach raid_ca = {
245 sizeof(struct raid_softc), rf_probe, rf_attach,
246 rf_detach, rf_activate, rf_zeroref
247 };
248
249 /*
250 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
251 * Be aware that large numbers can allow the driver to consume a lot of
252 * kernel memory, especially on writes, and in degraded mode reads.
253 *
254 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
255 * a single 64K write will typically require 64K for the old data,
256 * 64K for the old parity, and 64K for the new parity, for a total
257 * of 192K (if the parity buffer is not re-used immediately).
258 * Even it if is used immedately, that's still 128K, which when multiplied
259 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
260 *
261 * Now in degraded mode, for example, a 64K read on the above setup may
262 * require data reconstruction, which will require *all* of the 4 remaining
263 * disks to participate -- 4 * 32K/disk == 128K again.
264 */
265
266 #ifndef RAIDOUTSTANDING
267 #define RAIDOUTSTANDING 6
268 #endif
269
270 #define RAIDLABELDEV(dev) \
271 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
272
273 /* Declared here, and made public, for the benefit of KVM stuff... */
274 struct raid_softc *raid_softc;
275 struct raid_softc **raid_scPtrs;
276
277 void rf_shutdown_hook(RF_ThreadArg_t);
278 void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, struct disklabel *);
279 void raidgetdisklabel(dev_t);
280 void raidmakedisklabel(struct raid_softc *);
281
282 int raidlock(struct raid_softc *);
283 void raidunlock(struct raid_softc *);
284
285 void rf_markalldirty(RF_Raid_t *);
286
287 struct device *raidrootdev;
288
289 int findblkmajor(struct device *dv);
290 char *findblkname(int);
291
292 void rf_ReconThread(struct rf_recon_req *);
293 /* XXX what I want is: */
294 /*void rf_ReconThread(RF_Raid_t *raidPtr);*/
295 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
296 void rf_CopybackThread(RF_Raid_t *raidPtr);
297 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
298 #ifdef RAID_AUTOCONFIG
299 void rf_buildroothack(void *);
300 int rf_reasonable_label(RF_ComponentLabel_t *);
301 #endif /* RAID_AUTOCONFIG */
302
303 RF_AutoConfig_t *rf_find_raid_components(void);
304 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
305 int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
306 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *,
307 RF_Raid_t *);
308 int rf_set_autoconfig(RF_Raid_t *, int);
309 int rf_set_rootpartition(RF_Raid_t *, int);
310 void rf_release_all_vps(RF_ConfigSet_t *);
311 void rf_cleanup_config_set(RF_ConfigSet_t *);
312 int rf_have_enough_components(RF_ConfigSet_t *);
313 int rf_auto_config_set(RF_ConfigSet_t *, int *);
314
315 #ifdef RAID_AUTOCONFIG
316 static int raidautoconfig = 0; /*
317 * Debugging, mostly. Set to 0 to not
318 * allow autoconfig to take place.
319 * Note that this is overridden by having
320 * RAID_AUTOCONFIG as an option in the
321 * kernel config file.
322 */
323 #endif /* RAID_AUTOCONFIG */
324
325 int
rf_probe(struct device * parent,void * match_,void * aux)326 rf_probe(struct device *parent, void *match_, void *aux)
327 {
328 return 0;
329 }
330
331 void
rf_attach(struct device * parent,struct device * self,void * aux)332 rf_attach(struct device *parent, struct device *self, void *aux)
333 {
334 /*struct raid_softc *raid = (void *)self;*/
335 }
336
337 int
rf_detach(struct device * self,int flags)338 rf_detach(struct device *self, int flags)
339 {
340 return 0;
341 }
342
343 int
rf_activate(struct device * self,enum devact act)344 rf_activate(struct device *self, enum devact act)
345 {
346 return 0;
347 }
348
349 void
rf_zeroref(struct device * self)350 rf_zeroref(struct device *self)
351 {
352 }
353
354 void
raidattach(int num)355 raidattach(int num)
356 {
357 int raidID;
358 int i, rc;
359 #ifdef RAID_AUTOCONFIG
360 RF_AutoConfig_t *ac_list; /* Autoconfig list. */
361 RF_ConfigSet_t *config_sets;
362 #endif /* RAID_AUTOCONFIG */
363
364 db1_printf(("raidattach: Asked for %d units\n", num));
365
366 if (num <= 0) {
367 #ifdef DIAGNOSTIC
368 panic("raidattach: count <= 0");
369 #endif /* DIAGNOSTIC */
370 return;
371 }
372
373 /* This is where all the initialization stuff gets done. */
374
375 numraid = num;
376
377 /* Make some space for requested number of units... */
378 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
379 if (raidPtrs == NULL) {
380 panic("raidPtrs is NULL!!");
381 }
382
383 rc = rf_mutex_init(&rf_sparet_wait_mutex);
384 if (rc) {
385 RF_PANIC();
386 }
387
388 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
389
390 for (i = 0; i < num; i++)
391 raidPtrs[i] = NULL;
392 rc = rf_BootRaidframe();
393 if (rc == 0)
394 printf("Kernelised RAIDframe activated\n");
395 else
396 panic("Serious error booting RAID !!!");
397
398 /*
399 * Put together some datastructures like the CCD device does...
400 * This lets us lock the device and what-not when it gets opened.
401 */
402
403 raid_softc = (struct raid_softc *)
404 malloc(num * sizeof(struct raid_softc), M_RAIDFRAME, M_NOWAIT);
405 if (raid_softc == NULL) {
406 printf("WARNING: no memory for RAIDframe driver\n");
407 return;
408 }
409
410 bzero(raid_softc, num * sizeof (struct raid_softc));
411
412 raid_scPtrs = (struct raid_softc **)
413 malloc(num * sizeof(struct raid_softc *), M_RAIDFRAME,
414 M_NOWAIT);
415 if (raid_scPtrs == NULL) {
416 printf("WARNING: no memory for RAIDframe driver\n");
417 return;
418 }
419
420 bzero(raid_scPtrs, num * sizeof (struct raid_softc *));
421
422 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
423 M_RAIDFRAME, M_NOWAIT);
424 if (raidrootdev == NULL) {
425 panic("No memory for RAIDframe driver!!?!?!");
426 }
427
428 for (raidID = 0; raidID < num; raidID++) {
429 #if 0
430 SIMPLEQ_INIT(&raid_softc[raidID].sc_q);
431 #endif
432
433 raidrootdev[raidID].dv_class = DV_DISK;
434 raidrootdev[raidID].dv_cfdata = NULL;
435 raidrootdev[raidID].dv_unit = raidID;
436 raidrootdev[raidID].dv_parent = NULL;
437 raidrootdev[raidID].dv_flags = 0;
438 snprintf(raidrootdev[raidID].dv_xname,
439 sizeof raidrootdev[raidID].dv_xname,"raid%d",raidID);
440
441 RF_Calloc(raidPtrs[raidID], 1, sizeof (RF_Raid_t),
442 (RF_Raid_t *));
443 if (raidPtrs[raidID] == NULL) {
444 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
445 numraid = raidID;
446 return;
447 }
448 }
449
450 raid_cd.cd_devs = (void **) raid_scPtrs;
451 raid_cd.cd_ndevs = num;
452
453 #ifdef RAID_AUTOCONFIG
454 raidautoconfig = 1;
455
456 if (raidautoconfig) {
457 /* 1. Locate all RAID components on the system. */
458
459 #ifdef RAIDDEBUG
460 printf("Searching for raid components...\n");
461 #endif /* RAIDDEBUG */
462 ac_list = rf_find_raid_components();
463
464 /* 2. Sort them into their respective sets. */
465
466 config_sets = rf_create_auto_sets(ac_list);
467
468 /*
469 * 3. Evaluate each set and configure the valid ones
470 * This gets done in rf_buildroothack().
471 */
472
473 /*
474 * Schedule the creation of the thread to do the
475 * "/ on RAID" stuff.
476 */
477
478 rf_buildroothack(config_sets);
479
480 }
481 #endif /* RAID_AUTOCONFIG */
482
483 }
484
485 #ifdef RAID_AUTOCONFIG
486 void
rf_buildroothack(void * arg)487 rf_buildroothack(void *arg)
488 {
489 extern int rootdev_override;
490 RF_ConfigSet_t *config_sets = arg;
491 RF_ConfigSet_t *cset;
492 RF_ConfigSet_t *next_cset;
493 int retcode;
494 int raidID;
495 int rootID;
496 int num_root;
497 int majdev;
498
499 rootID = 0;
500 num_root = 0;
501 cset = config_sets;
502 while(cset != NULL ) {
503 next_cset = cset->next;
504 if (rf_have_enough_components(cset) &&
505 cset->ac->clabel->autoconfigure==1) {
506 retcode = rf_auto_config_set(cset,&raidID);
507 if (!retcode) {
508 if (cset->rootable) {
509 rootID = raidID;
510 #ifdef RAIDDEBUG
511 printf("eligible root device %d:"
512 " raid%d\n", num_root, rootID);
513 #endif /* RAIDDEBUG */
514 num_root++;
515 }
516 } else {
517 /* The autoconfig didn't work :( */
518 #ifdef RAIDDEBUG
519 printf("Autoconfig failed with code %d for"
520 " raid%d\n", retcode, raidID);
521 #endif /* RAIDDEBUG */
522 rf_release_all_vps(cset);
523 }
524 } else {
525 /*
526 * We're not autoconfiguring this set...
527 * Release the associated resources.
528 */
529 rf_release_all_vps(cset);
530 }
531 /* Cleanup. */
532 rf_cleanup_config_set(cset);
533 cset = next_cset;
534 }
535 if (boothowto & RB_ASKNAME) {
536 /* We don't auto-config... */
537 } else {
538 /* They didn't ask, and we found something bootable... */
539
540 if (num_root == 1) {
541 majdev = findblkmajor(&raidrootdev[rootID]);
542 if (majdev < 0)
543 boothowto |= RB_ASKNAME;
544 else if ((rootdev == NODEV) /* config bsd generic */
545 || (major(rootdev) == majdev) /* root on raid */
546 ) {
547 extern char root_devname[];
548
549 rootdev = MAKEDISKDEV(majdev,rootID,0);
550 boothowto |= RB_DFLTROOT;
551 snprintf(root_devname, 16, "raid%da", rootID);
552 } else if (rootdev_override) {
553 printf("raidframe: eligible root device raid%da ignored (rootdev set via UKC)\n", rootID);
554 } else {
555 /* Found a RAID, but e.g. RAMDISK kernel */
556 printf("raidframe: Found eligible root device, but this is not a generic kernel.\nraidframe: Please choose a root device.\nPossible answer: rd0a (if you booted a ramdisk)\n");
557 boothowto |= RB_ASKNAME;
558 }
559 } else if ((num_root > 1) && !rootdev_override) {
560 /* We can't guess... Require the user to answer... */
561 printf("raidframe: Found more than one eligible root device.\nraidframe: Please choose a root device.\nPossible answers: [rsw]d0a raid[0-9]a\n");
562 boothowto |= RB_ASKNAME;
563 }
564 }
565 }
566 #endif /* RAID_AUTOCONFIG */
567
568 void
rf_shutdown_hook(RF_ThreadArg_t arg)569 rf_shutdown_hook(RF_ThreadArg_t arg)
570 {
571 int unit;
572 struct raid_softc *rs;
573 RF_Raid_t *raidPtr;
574
575 /* Don't do it if we are not "safe". */
576 if (boothowto & RB_NOSYNC)
577 return;
578
579 raidPtr = (RF_Raid_t *) arg;
580 unit = raidPtr->raidid;
581 rs = &raid_softc[unit];
582
583 /* Shutdown the system. */
584
585 if (rf_hook_cookies != NULL && rf_hook_cookies[unit] != NULL)
586 rf_hook_cookies[unit] = NULL;
587
588 rf_Shutdown(raidPtr);
589
590 pool_destroy(&rs->sc_cbufpool);
591
592 /* It's no longer initialised... */
593 rs->sc_flags &= ~RAIDF_INITED;
594
595 /* config_detach the device. */
596 config_detach(device_lookup(&raid_cd, unit), 0);
597
598 /* Detach the disk. */
599 disk_detach(&rs->sc_dkdev);
600 }
601
602 int
raidsize(dev_t dev)603 raidsize(dev_t dev)
604 {
605 struct raid_softc *rs;
606 struct disklabel *lp;
607 int part, unit, omask, size;
608
609 unit = raidunit(dev);
610 if (unit >= numraid)
611 return (-1);
612 rs = &raid_softc[unit];
613
614 if ((rs->sc_flags & RAIDF_INITED) == 0)
615 return (-1);
616
617 part = DISKPART(dev);
618 omask = rs->sc_dkdev.dk_openmask & (1 << part);
619 lp = rs->sc_dkdev.dk_label;
620
621 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
622 return (-1);
623
624 if (lp->d_partitions[part].p_fstype != FS_SWAP)
625 size = -1;
626 else
627 size = lp->d_partitions[part].p_size *
628 (lp->d_secsize / DEV_BSIZE);
629
630 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
631 return (-1);
632
633 return (size);
634
635 }
636
637 int
raiddump(dev_t dev,daddr_t blkno,caddr_t va,size_t size)638 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
639 {
640 /* Not implemented. */
641 return (ENXIO);
642 }
643
644 /* ARGSUSED */
645 int
raidopen(dev_t dev,int flags,int fmt,struct proc * p)646 raidopen(dev_t dev, int flags, int fmt, struct proc *p)
647 {
648 int unit = raidunit(dev);
649 struct raid_softc *rs;
650 struct disklabel *lp;
651 int part,pmask;
652 int error = 0;
653
654 if (unit >= numraid)
655 return (ENXIO);
656 rs = &raid_softc[unit];
657
658 if ((error = raidlock(rs)) != 0)
659 return (error);
660 lp = rs->sc_dkdev.dk_label;
661
662 part = DISKPART(dev);
663 pmask = (1 << part);
664
665 db1_printf(
666 ("Opening raid device number: %d partition: %d\n", unit, part));
667
668
669 if ((rs->sc_flags & RAIDF_INITED) && (rs->sc_dkdev.dk_openmask == 0))
670 raidgetdisklabel(dev);
671
672 /* Make sure that this partition exists. */
673
674 if (part != RAW_PART) {
675 db1_printf(("Not a raw partition..\n"));
676 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
677 ((part >= lp->d_npartitions) ||
678 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
679 error = ENXIO;
680 raidunlock(rs);
681 db1_printf(("Bailing out...\n"));
682 return (error);
683 }
684 }
685
686 /* Prevent this unit from being unconfigured while opened. */
687 switch (fmt) {
688 case S_IFCHR:
689 rs->sc_dkdev.dk_copenmask |= pmask;
690 break;
691
692 case S_IFBLK:
693 rs->sc_dkdev.dk_bopenmask |= pmask;
694 break;
695 }
696
697 if ((rs->sc_dkdev.dk_openmask == 0) &&
698 ((rs->sc_flags & RAIDF_INITED) != 0)) {
699 /*
700 * First one... Mark things as dirty... Note that we *MUST*
701 * have done a configure before this. I DO NOT WANT TO BE
702 * SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
703 * THAT THEY BELONG TOGETHER!!!!!
704 */
705 /*
706 * XXX should check to see if we're only open for reading
707 * here... If so, we needn't do this, but then need some
708 * other way of keeping track of what's happened...
709 */
710
711 rf_markalldirty( raidPtrs[unit] );
712 }
713
714 rs->sc_dkdev.dk_openmask =
715 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
716
717 raidunlock(rs);
718
719 return (error);
720 }
721
722 /* ARGSUSED */
723 int
raidclose(dev_t dev,int flags,int fmt,struct proc * p)724 raidclose(dev_t dev, int flags, int fmt, struct proc *p)
725 {
726 int unit = raidunit(dev);
727 struct raid_softc *rs;
728 int error = 0;
729 int part;
730
731 if (unit >= numraid)
732 return (ENXIO);
733 rs = &raid_softc[unit];
734
735 if ((error = raidlock(rs)) != 0)
736 return (error);
737
738 part = DISKPART(dev);
739
740 /* ...that much closer to allowing unconfiguration... */
741 switch (fmt) {
742 case S_IFCHR:
743 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
744 break;
745
746 case S_IFBLK:
747 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
748 break;
749 }
750 rs->sc_dkdev.dk_openmask =
751 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
752
753 if ((rs->sc_dkdev.dk_openmask == 0) &&
754 ((rs->sc_flags & RAIDF_INITED) != 0)) {
755 /*
756 * Last one... Device is not unconfigured yet.
757 * Device shutdown has taken care of setting the
758 * clean bits if RAIDF_INITED is not set.
759 * Mark things as clean...
760 */
761 db1_printf(("Last one on raid%d. Updating status.\n",unit));
762 rf_update_component_labels(raidPtrs[unit],
763 RF_FINAL_COMPONENT_UPDATE);
764 }
765
766 raidunlock(rs);
767 return (0);
768 }
769
770 void
raidstrategy(struct buf * bp)771 raidstrategy(struct buf *bp)
772 {
773 int s;
774
775 unsigned int raidID = raidunit(bp->b_dev);
776 RF_Raid_t *raidPtr;
777 struct raid_softc *rs = &raid_softc[raidID];
778 struct disklabel *lp;
779 int wlabel;
780
781 s = splbio();
782
783 if ((rs->sc_flags & RAIDF_INITED) ==0) {
784 bp->b_error = ENXIO;
785 bp->b_flags |= B_ERROR;
786 bp->b_resid = bp->b_bcount;
787 biodone(bp);
788 goto raidstrategy_end;
789 }
790 if (raidID >= numraid || !raidPtrs[raidID]) {
791 bp->b_error = ENODEV;
792 bp->b_flags |= B_ERROR;
793 bp->b_resid = bp->b_bcount;
794 biodone(bp);
795 goto raidstrategy_end;
796 }
797 raidPtr = raidPtrs[raidID];
798 if (!raidPtr->valid) {
799 bp->b_error = ENODEV;
800 bp->b_flags |= B_ERROR;
801 bp->b_resid = bp->b_bcount;
802 biodone(bp);
803 goto raidstrategy_end;
804 }
805 if (bp->b_bcount == 0) {
806 db1_printf(("b_bcount is zero..\n"));
807 biodone(bp);
808 goto raidstrategy_end;
809 }
810 lp = rs->sc_dkdev.dk_label;
811
812 /*
813 * Do bounds checking and adjust transfer. If there's an
814 * error, the bounds check will flag that for us.
815 */
816 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
817 if (DISKPART(bp->b_dev) != RAW_PART)
818 if (bounds_check_with_label(bp, lp, rs->sc_dkdev.dk_cpulabel,
819 wlabel) <= 0) {
820 db1_printf(("Bounds check failed!!:%d %d\n",
821 (int)bp->b_blkno, (int)wlabel));
822 biodone(bp);
823 goto raidstrategy_end;
824 }
825
826 bp->b_resid = 0;
827
828 bp->b_actf = rs->sc_q.b_actf;
829 rs->sc_q.b_actf = bp;
830 rs->sc_q.b_active++;
831
832 raidstart(raidPtrs[raidID]);
833
834 raidstrategy_end:
835 splx(s);
836 }
837
838 /* ARGSUSED */
839 int
raidread(dev_t dev,struct uio * uio,int flags)840 raidread(dev_t dev, struct uio *uio, int flags)
841 {
842 int unit = raidunit(dev);
843 struct raid_softc *rs;
844 int part;
845
846 if (unit >= numraid)
847 return (ENXIO);
848 rs = &raid_softc[unit];
849
850 if ((rs->sc_flags & RAIDF_INITED) == 0)
851 return (ENXIO);
852 part = DISKPART(dev);
853
854 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
855
856 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
857 }
858
859 /* ARGSUSED */
860 int
raidwrite(dev_t dev,struct uio * uio,int flags)861 raidwrite(dev_t dev, struct uio *uio, int flags)
862 {
863 int unit = raidunit(dev);
864 struct raid_softc *rs;
865
866 if (unit >= numraid)
867 return (ENXIO);
868 rs = &raid_softc[unit];
869
870 if ((rs->sc_flags & RAIDF_INITED) == 0)
871 return (ENXIO);
872 db1_printf(("raidwrite\n"));
873 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
874 }
875
876 int
raidioctl(dev_t dev,u_long cmd,caddr_t data,int flag,struct proc * p)877 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
878 {
879 int unit = raidunit(dev);
880 int error = 0;
881 int part, pmask;
882 struct raid_softc *rs;
883 RF_Config_t *k_cfg, *u_cfg;
884 RF_Raid_t *raidPtr;
885 RF_RaidDisk_t *diskPtr;
886 RF_AccTotals_t *totals;
887 RF_DeviceConfig_t *d_cfg, **ucfgp;
888 u_char *specific_buf;
889 int retcode = 0;
890 int row;
891 int column;
892 struct rf_recon_req *rrcopy, *rr;
893 RF_ComponentLabel_t *clabel;
894 RF_ComponentLabel_t ci_label;
895 RF_ComponentLabel_t **clabel_ptr;
896 RF_SingleComponent_t *sparePtr,*componentPtr;
897 RF_SingleComponent_t hot_spare;
898 RF_SingleComponent_t component;
899 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
900 int i, j, d;
901
902 if (unit >= numraid)
903 return (ENXIO);
904 rs = &raid_softc[unit];
905 raidPtr = raidPtrs[unit];
906
907 db1_printf(("raidioctl: %d %d %d %d\n", (int)dev, (int)DISKPART(dev),
908 (int)unit, (int)cmd));
909
910 /* Must be open for writes for these commands... */
911 switch (cmd) {
912 case DIOCSDINFO:
913 case DIOCWDINFO:
914 case DIOCWLABEL:
915 if ((flag & FWRITE) == 0)
916 return (EBADF);
917 }
918
919 /* Must be initialised for these... */
920 switch (cmd) {
921 case DIOCGDINFO:
922 case DIOCSDINFO:
923 case DIOCWDINFO:
924 case DIOCGPART:
925 case DIOCWLABEL:
926 case DIOCGPDINFO:
927 case RAIDFRAME_SHUTDOWN:
928 case RAIDFRAME_REWRITEPARITY:
929 case RAIDFRAME_GET_INFO:
930 case RAIDFRAME_RESET_ACCTOTALS:
931 case RAIDFRAME_GET_ACCTOTALS:
932 case RAIDFRAME_KEEP_ACCTOTALS:
933 case RAIDFRAME_GET_SIZE:
934 case RAIDFRAME_FAIL_DISK:
935 case RAIDFRAME_COPYBACK:
936 case RAIDFRAME_CHECK_RECON_STATUS:
937 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
938 case RAIDFRAME_GET_COMPONENT_LABEL:
939 case RAIDFRAME_SET_COMPONENT_LABEL:
940 case RAIDFRAME_ADD_HOT_SPARE:
941 case RAIDFRAME_REMOVE_HOT_SPARE:
942 case RAIDFRAME_INIT_LABELS:
943 case RAIDFRAME_REBUILD_IN_PLACE:
944 case RAIDFRAME_CHECK_PARITY:
945 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
946 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
947 case RAIDFRAME_CHECK_COPYBACK_STATUS:
948 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
949 case RAIDFRAME_SET_AUTOCONFIG:
950 case RAIDFRAME_SET_ROOT:
951 case RAIDFRAME_DELETE_COMPONENT:
952 case RAIDFRAME_INCORPORATE_HOT_SPARE:
953 if ((rs->sc_flags & RAIDF_INITED) == 0)
954 return (ENXIO);
955 }
956
957 switch (cmd) {
958 /* Configure the system. */
959 case RAIDFRAME_CONFIGURE:
960
961 if (raidPtr->valid) {
962 /* There is a valid RAID set running on this unit ! */
963 printf("raid%d: Device already configured!\n",unit);
964 return(EINVAL);
965 }
966
967 /*
968 * Copy-in the configuration information.
969 * data points to a pointer to the configuration structure.
970 */
971 u_cfg = *((RF_Config_t **)data);
972 RF_Malloc(k_cfg, sizeof (RF_Config_t), (RF_Config_t *));
973 if (k_cfg == NULL) {
974 return (ENOMEM);
975 }
976 retcode = copyin((caddr_t)u_cfg, (caddr_t)k_cfg,
977 sizeof (RF_Config_t));
978 if (retcode) {
979 RF_Free(k_cfg, sizeof(RF_Config_t));
980 return (retcode);
981 }
982
983 /*
984 * Allocate a buffer for the layout-specific data,
985 * and copy it in.
986 */
987 if (k_cfg->layoutSpecificSize) {
988 if (k_cfg->layoutSpecificSize > 10000) {
989 /* Sanity check. */
990 RF_Free(k_cfg, sizeof(RF_Config_t));
991 return (EINVAL);
992 }
993 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
994 (u_char *));
995 if (specific_buf == NULL) {
996 RF_Free(k_cfg, sizeof (RF_Config_t));
997 return (ENOMEM);
998 }
999 retcode = copyin(k_cfg->layoutSpecific,
1000 (caddr_t)specific_buf, k_cfg->layoutSpecificSize);
1001 if (retcode) {
1002 RF_Free(k_cfg, sizeof(RF_Config_t));
1003 RF_Free(specific_buf,
1004 k_cfg->layoutSpecificSize);
1005 return (retcode);
1006 }
1007 } else
1008 specific_buf = NULL;
1009 k_cfg->layoutSpecific = specific_buf;
1010
1011 /*
1012 * We should do some kind of sanity check on the
1013 * configuration.
1014 * Store the sum of all the bytes in the last byte ?
1015 */
1016
1017 /*
1018 * Clear the entire RAID descriptor, just to make sure
1019 * there is no stale data left in the case of a
1020 * reconfiguration.
1021 */
1022 bzero((char *) raidPtr, sizeof(RF_Raid_t));
1023
1024 /* Configure the system. */
1025 raidPtr->raidid = unit;
1026
1027 retcode = rf_Configure(raidPtr, k_cfg, NULL);
1028
1029 if (retcode == 0) {
1030
1031 /*
1032 * Allow this many simultaneous IO's to
1033 * this RAID device.
1034 */
1035 raidPtr->openings = RAIDOUTSTANDING;
1036
1037 raidinit(raidPtr);
1038 rf_markalldirty(raidPtr);
1039 }
1040
1041 /* Free the buffers. No return code here. */
1042 if (k_cfg->layoutSpecificSize) {
1043 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1044 }
1045 RF_Free(k_cfg, sizeof (RF_Config_t));
1046
1047 return (retcode);
1048
1049 case RAIDFRAME_SHUTDOWN:
1050 /* Shutdown the system. */
1051
1052 if ((error = raidlock(rs)) != 0)
1053 return (error);
1054
1055 /*
1056 * If somebody has a partition mounted, we shouldn't
1057 * shutdown.
1058 */
1059
1060 part = DISKPART(dev);
1061 pmask = (1 << part);
1062 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1063 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1064 (rs->sc_dkdev.dk_copenmask & pmask))) {
1065 raidunlock(rs);
1066 return (EBUSY);
1067 }
1068
1069 if ((retcode = rf_Shutdown(raidPtr)) == 0) {
1070
1071 pool_destroy(&rs->sc_cbufpool);
1072
1073 /* It's no longer initialised... */
1074 rs->sc_flags &= ~RAIDF_INITED;
1075
1076 /* config_detach the device. */
1077 config_detach(device_lookup(&raid_cd, unit), 0);
1078
1079 /* Detach the disk. */
1080 disk_detach(&rs->sc_dkdev);
1081 }
1082
1083 raidunlock(rs);
1084
1085 return (retcode);
1086
1087 case RAIDFRAME_GET_COMPONENT_LABEL:
1088 clabel_ptr = (RF_ComponentLabel_t **) data;
1089 /*
1090 * We need to read the component label for the disk indicated
1091 * by row,column in clabel.
1092 */
1093
1094 /*
1095 * For practice, let's get it directly from disk, rather
1096 * than from the in-core copy.
1097 */
1098 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1099 (RF_ComponentLabel_t *));
1100 if (clabel == NULL)
1101 return (ENOMEM);
1102
1103 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
1104
1105 retcode = copyin( *clabel_ptr, clabel,
1106 sizeof(RF_ComponentLabel_t));
1107
1108 if (retcode) {
1109 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1110 return(retcode);
1111 }
1112
1113 row = clabel->row;
1114 column = clabel->column;
1115
1116 if ((row < 0) || (row >= raidPtr->numRow) ||
1117 (column < 0) || (column >= raidPtr->numCol)) {
1118 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1119 return(EINVAL);
1120 }
1121
1122 raidread_component_label(raidPtr->Disks[row][column].dev,
1123 raidPtr->raid_cinfo[row][column].ci_vp, clabel );
1124
1125 retcode = copyout((caddr_t) clabel,
1126 (caddr_t) *clabel_ptr,
1127 sizeof(RF_ComponentLabel_t));
1128 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1129 return (retcode);
1130
1131 case RAIDFRAME_SET_COMPONENT_LABEL:
1132 clabel = (RF_ComponentLabel_t *) data;
1133
1134 /* XXX check the label for valid stuff... */
1135 /*
1136 * Note that some things *should not* get modified --
1137 * the user should be re-initing the labels instead of
1138 * trying to patch things.
1139 */
1140
1141 #ifdef RAIDDEBUG
1142 printf("Got component label:\n");
1143 printf("Version: %d\n",clabel->version);
1144 printf("Serial Number: %d\n",clabel->serial_number);
1145 printf("Mod counter: %d\n",clabel->mod_counter);
1146 printf("Row: %d\n", clabel->row);
1147 printf("Column: %d\n", clabel->column);
1148 printf("Num Rows: %d\n", clabel->num_rows);
1149 printf("Num Columns: %d\n", clabel->num_columns);
1150 printf("Clean: %d\n", clabel->clean);
1151 printf("Status: %d\n", clabel->status);
1152 #endif /* RAIDDEBUG */
1153
1154 row = clabel->row;
1155 column = clabel->column;
1156
1157 if ((row < 0) || (row >= raidPtr->numRow) ||
1158 (column < 0) || (column >= raidPtr->numCol)) {
1159 return(EINVAL);
1160 }
1161
1162 /* XXX this isn't allowed to do anything for now :-) */
1163 #if 0
1164 raidwrite_component_label(raidPtr->Disks[row][column].dev,
1165 raidPtr->raid_cinfo[row][column].ci_vp, clabel );
1166 #endif
1167 return (0);
1168
1169 case RAIDFRAME_INIT_LABELS:
1170 clabel = (RF_ComponentLabel_t *) data;
1171 /*
1172 * We only want the serial number from the above.
1173 * We get all the rest of the information from
1174 * the config that was used to create this RAID
1175 * set.
1176 */
1177
1178 raidPtr->serial_number = clabel->serial_number;
1179
1180 raid_init_component_label(raidPtr, &ci_label);
1181 ci_label.serial_number = clabel->serial_number;
1182
1183 for(row=0;row<raidPtr->numRow;row++) {
1184 ci_label.row = row;
1185 for(column=0;column<raidPtr->numCol;column++) {
1186 diskPtr = &raidPtr->Disks[row][column];
1187 if (!RF_DEAD_DISK(diskPtr->status)) {
1188 ci_label.partitionSize =
1189 diskPtr->partitionSize;
1190 ci_label.column = column;
1191 raidwrite_component_label(
1192 raidPtr->Disks[row][column].dev,
1193 raidPtr->raid_cinfo[row][column].ci_vp,
1194 &ci_label );
1195 }
1196 }
1197 }
1198
1199 return (retcode);
1200
1201 case RAIDFRAME_REWRITEPARITY:
1202
1203 if (raidPtr->Layout.map->faultsTolerated == 0) {
1204 /* Parity for RAID 0 is trivially correct. */
1205 raidPtr->parity_good = RF_RAID_CLEAN;
1206 return(0);
1207 }
1208
1209
1210 if (raidPtr->parity_rewrite_in_progress == 1) {
1211 /* Re-write is already in progress ! */
1212 return(EINVAL);
1213 }
1214
1215 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1216 rf_RewriteParityThread,
1217 raidPtr,"raid_parity");
1218
1219 return (retcode);
1220
1221 case RAIDFRAME_SET_AUTOCONFIG:
1222 d = rf_set_autoconfig(raidPtr, *(int *) data);
1223 db1_printf(("New autoconfig value is: %d\n", d));
1224 *(int *) data = d;
1225 return (retcode);
1226
1227 case RAIDFRAME_SET_ROOT:
1228 d = rf_set_rootpartition(raidPtr, *(int *) data);
1229 db1_printf(("New rootpartition value is: %d\n", d));
1230 *(int *) data = d;
1231 return (retcode);
1232
1233
1234 case RAIDFRAME_ADD_HOT_SPARE:
1235 sparePtr = (RF_SingleComponent_t *) data;
1236 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1237 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1238 return(retcode);
1239
1240 case RAIDFRAME_REMOVE_HOT_SPARE:
1241 return(retcode);
1242
1243 case RAIDFRAME_DELETE_COMPONENT:
1244 componentPtr = (RF_SingleComponent_t *)data;
1245 memcpy( &component, componentPtr,
1246 sizeof(RF_SingleComponent_t));
1247 retcode = rf_delete_component(raidPtr, &component);
1248 return(retcode);
1249
1250 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1251 componentPtr = (RF_SingleComponent_t *)data;
1252 memcpy( &component, componentPtr,
1253 sizeof(RF_SingleComponent_t));
1254 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1255 return(retcode);
1256
1257 case RAIDFRAME_REBUILD_IN_PLACE:
1258
1259 if (raidPtr->Layout.map->faultsTolerated == 0) {
1260 /* Can't do this on a RAID 0 !! */
1261 return(EINVAL);
1262 }
1263
1264 if (raidPtr->recon_in_progress == 1) {
1265 /* A reconstruct is already in progress ! */
1266 return(EINVAL);
1267 }
1268
1269 componentPtr = (RF_SingleComponent_t *) data;
1270 memcpy( &component, componentPtr,
1271 sizeof(RF_SingleComponent_t));
1272 row = component.row;
1273 column = component.column;
1274 db1_printf(("Rebuild: %d %d\n",row, column));
1275 if ((row < 0) || (row >= raidPtr->numRow) ||
1276 (column < 0) || (column >= raidPtr->numCol)) {
1277 return(EINVAL);
1278 }
1279
1280 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1281 if (rrcopy == NULL)
1282 return(ENOMEM);
1283
1284 rrcopy->raidPtr = (void *) raidPtr;
1285 rrcopy->row = row;
1286 rrcopy->col = column;
1287
1288 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1289 rf_ReconstructInPlaceThread,
1290 rrcopy,"raid_reconip");
1291
1292 return (retcode);
1293
1294 case RAIDFRAME_GET_INFO:
1295 if (!raidPtr->valid)
1296 return (ENODEV);
1297 ucfgp = (RF_DeviceConfig_t **) data;
1298 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1299 (RF_DeviceConfig_t *));
1300 if (d_cfg == NULL)
1301 return (ENOMEM);
1302 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1303 d_cfg->rows = raidPtr->numRow;
1304 d_cfg->cols = raidPtr->numCol;
1305 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1306 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1307 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1308 return (ENOMEM);
1309 }
1310 d_cfg->nspares = raidPtr->numSpare;
1311 if (d_cfg->nspares >= RF_MAX_DISKS) {
1312 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1313 return (ENOMEM);
1314 }
1315 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1316 d = 0;
1317 for (i = 0; i < d_cfg->rows; i++) {
1318 for (j = 0; j < d_cfg->cols; j++) {
1319 d_cfg->devs[d] = raidPtr->Disks[i][j];
1320 d++;
1321 }
1322 }
1323 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1324 d_cfg->spares[i] = raidPtr->Disks[0][j];
1325 }
1326 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1327 sizeof(RF_DeviceConfig_t));
1328 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1329
1330 return (retcode);
1331
1332 case RAIDFRAME_CHECK_PARITY:
1333 *(int *) data = raidPtr->parity_good;
1334 return (0);
1335
1336 case RAIDFRAME_RESET_ACCTOTALS:
1337 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1338 return (0);
1339
1340 case RAIDFRAME_GET_ACCTOTALS:
1341 totals = (RF_AccTotals_t *) data;
1342 *totals = raidPtr->acc_totals;
1343 return (0);
1344
1345 case RAIDFRAME_KEEP_ACCTOTALS:
1346 raidPtr->keep_acc_totals = *(int *)data;
1347 return (0);
1348
1349 case RAIDFRAME_GET_SIZE:
1350 *(int *) data = raidPtr->totalSectors;
1351 return (0);
1352
1353 /* Fail a disk & optionally start reconstruction. */
1354 case RAIDFRAME_FAIL_DISK:
1355 rr = (struct rf_recon_req *)data;
1356
1357 if (rr->row < 0 || rr->row >= raidPtr->numRow ||
1358 rr->col < 0 || rr->col >= raidPtr->numCol)
1359 return (EINVAL);
1360
1361 db1_printf(("raid%d: Failing the disk: row: %d col: %d\n",
1362 unit, rr->row, rr->col));
1363
1364 /*
1365 * Make a copy of the recon request so that we don't
1366 * rely on the user's buffer.
1367 */
1368 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1369 if (rrcopy == NULL)
1370 return(ENOMEM);
1371 bcopy(rr, rrcopy, sizeof(*rr));
1372 rrcopy->raidPtr = (void *)raidPtr;
1373
1374 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1375 rf_ReconThread,
1376 rrcopy,"raid_recon");
1377 return (0);
1378
1379 /*
1380 * Invoke a copyback operation after recon on whatever
1381 * disk needs it, if any.
1382 */
1383 case RAIDFRAME_COPYBACK:
1384 if (raidPtr->Layout.map->faultsTolerated == 0) {
1385 /* This makes no sense on a RAID 0 !! */
1386 return(EINVAL);
1387 }
1388
1389 if (raidPtr->copyback_in_progress == 1) {
1390 /* Copyback is already in progress ! */
1391 return(EINVAL);
1392 }
1393
1394 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1395 rf_CopybackThread,
1396 raidPtr,"raid_copyback");
1397 return (retcode);
1398
1399 /* Return the percentage completion of reconstruction. */
1400 case RAIDFRAME_CHECK_RECON_STATUS:
1401 if (raidPtr->Layout.map->faultsTolerated == 0) {
1402 /*
1403 * This makes no sense on a RAID 0, so tell the
1404 * user it's done.
1405 */
1406 *(int *) data = 100;
1407 return(0);
1408 }
1409 row = 0; /* XXX we only consider a single row... */
1410 if (raidPtr->status[row] != rf_rs_reconstructing)
1411 *(int *)data = 100;
1412 else
1413 *(int *)data =
1414 raidPtr->reconControl[row]->percentComplete;
1415 return (0);
1416
1417 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1418 progressInfoPtr = (RF_ProgressInfo_t **) data;
1419 row = 0; /* XXX we only consider a single row... */
1420 if (raidPtr->status[row] != rf_rs_reconstructing) {
1421 progressInfo.remaining = 0;
1422 progressInfo.completed = 100;
1423 progressInfo.total = 100;
1424 } else {
1425 progressInfo.total =
1426 raidPtr->reconControl[row]->numRUsTotal;
1427 progressInfo.completed =
1428 raidPtr->reconControl[row]->numRUsComplete;
1429 progressInfo.remaining = progressInfo.total -
1430 progressInfo.completed;
1431 }
1432 retcode = copyout((caddr_t) &progressInfo,
1433 (caddr_t) *progressInfoPtr,
1434 sizeof(RF_ProgressInfo_t));
1435 return (retcode);
1436
1437 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1438 if (raidPtr->Layout.map->faultsTolerated == 0) {
1439 /*
1440 * This makes no sense on a RAID 0, so tell the
1441 * user it's done.
1442 */
1443 *(int *) data = 100;
1444 return(0);
1445 }
1446 if (raidPtr->parity_rewrite_in_progress == 1) {
1447 *(int *) data = 100 *
1448 raidPtr->parity_rewrite_stripes_done /
1449 raidPtr->Layout.numStripe;
1450 } else {
1451 *(int *) data = 100;
1452 }
1453 return (0);
1454
1455 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1456 progressInfoPtr = (RF_ProgressInfo_t **) data;
1457 if (raidPtr->parity_rewrite_in_progress == 1) {
1458 progressInfo.total = raidPtr->Layout.numStripe;
1459 progressInfo.completed =
1460 raidPtr->parity_rewrite_stripes_done;
1461 progressInfo.remaining = progressInfo.total -
1462 progressInfo.completed;
1463 } else {
1464 progressInfo.remaining = 0;
1465 progressInfo.completed = 100;
1466 progressInfo.total = 100;
1467 }
1468 retcode = copyout((caddr_t) &progressInfo,
1469 (caddr_t) *progressInfoPtr,
1470 sizeof(RF_ProgressInfo_t));
1471 return (retcode);
1472
1473 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1474 if (raidPtr->Layout.map->faultsTolerated == 0) {
1475 /* This makes no sense on a RAID 0 !! */
1476 *(int *) data = 100;
1477 return(0);
1478 }
1479 if (raidPtr->copyback_in_progress == 1) {
1480 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1481 raidPtr->Layout.numStripe;
1482 } else {
1483 *(int *) data = 100;
1484 }
1485 return (0);
1486
1487 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1488 progressInfoPtr = (RF_ProgressInfo_t **) data;
1489 if (raidPtr->copyback_in_progress == 1) {
1490 progressInfo.total = raidPtr->Layout.numStripe;
1491 progressInfo.completed =
1492 raidPtr->copyback_stripes_done;
1493 progressInfo.remaining = progressInfo.total -
1494 progressInfo.completed;
1495 } else {
1496 progressInfo.remaining = 0;
1497 progressInfo.completed = 100;
1498 progressInfo.total = 100;
1499 }
1500 retcode = copyout((caddr_t) &progressInfo,
1501 (caddr_t) *progressInfoPtr,
1502 sizeof(RF_ProgressInfo_t));
1503 return (retcode);
1504
1505 #if 0
1506 case RAIDFRAME_SPARET_WAIT:
1507 /*
1508 * The sparetable daemon calls this to wait for the
1509 * kernel to need a spare table.
1510 * This ioctl does not return until a spare table is needed.
1511 * XXX -- Calling mpsleep here in the ioctl code is almost
1512 * certainly wrong and evil. -- XXX
1513 * XXX -- I should either compute the spare table in the
1514 * kernel, or have a different. -- XXX
1515 * XXX -- Interface (a different character device) for
1516 * delivering the table. -- XXX
1517 */
1518 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1519 while (!rf_sparet_wait_queue)
1520 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH,
1521 "sparet wait", 0,
1522 (void *)simple_lock_addr(rf_sparet_wait_mutex),
1523 MS_LOCK_SIMPLE);
1524 waitreq = rf_sparet_wait_queue;
1525 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1526 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1527
1528 *((RF_SparetWait_t *)data) = *waitreq;
1529
1530 RF_Free(waitreq, sizeof *waitreq);
1531 return (0);
1532
1533 case RAIDFRAME_ABORT_SPARET_WAIT:
1534 /*
1535 * Wakes up a process waiting on SPARET_WAIT and puts an
1536 * error code in it that will cause the dameon to exit.
1537 */
1538 RF_Malloc(waitreq, sizeof (*waitreq), (RF_SparetWait_t *));
1539 waitreq->fcol = -1;
1540 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1541 waitreq->next = rf_sparet_wait_queue;
1542 rf_sparet_wait_queue = waitreq;
1543 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1544 wakeup(&rf_sparet_wait_queue);
1545 return (0);
1546
1547 case RAIDFRAME_SEND_SPARET:
1548 /*
1549 * Used by the spare table daemon to deliver a spare table
1550 * into the kernel.
1551 */
1552
1553 /* Install the spare table. */
1554 retcode = rf_SetSpareTable(raidPtr,*(void **)data);
1555
1556 /*
1557 * Respond to the requestor. The return status of the
1558 * spare table installation is passed in the "fcol" field.
1559 */
1560 RF_Malloc(waitreq, sizeof *waitreq, (RF_SparetWait_t *));
1561 waitreq->fcol = retcode;
1562 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1563 waitreq->next = rf_sparet_resp_queue;
1564 rf_sparet_resp_queue = waitreq;
1565 wakeup(&rf_sparet_resp_queue);
1566 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1567
1568 return (retcode);
1569 #endif
1570 /* Fall through to the os-specific code below. */
1571 default:
1572 break;
1573 }
1574
1575 if (!raidPtr->valid)
1576 return (EINVAL);
1577
1578 /*
1579 * Add support for "regular" device ioctls here.
1580 */
1581 switch (cmd) {
1582 case DIOCGDINFO:
1583 *(struct disklabel *)data = *(rs->sc_dkdev.dk_label);
1584 break;
1585
1586 case DIOCGPART:
1587 ((struct partinfo *)data)->disklab = rs->sc_dkdev.dk_label;
1588 ((struct partinfo *)data)->part =
1589 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1590 break;
1591
1592 case DIOCWDINFO:
1593 case DIOCSDINFO:
1594 {
1595 struct disklabel *lp;
1596 lp = (struct disklabel *)data;
1597
1598 if ((error = raidlock(rs)) != 0)
1599 return (error);
1600
1601 rs->sc_flags |= RAIDF_LABELLING;
1602
1603 error = setdisklabel(rs->sc_dkdev.dk_label,
1604 lp, 0, rs->sc_dkdev.dk_cpulabel);
1605 if (error == 0) {
1606 if (cmd == DIOCWDINFO)
1607 error = writedisklabel(RAIDLABELDEV(dev),
1608 raidstrategy, rs->sc_dkdev.dk_label,
1609 rs->sc_dkdev.dk_cpulabel);
1610 }
1611
1612 rs->sc_flags &= ~RAIDF_LABELLING;
1613
1614 raidunlock(rs);
1615
1616 if (error)
1617 return (error);
1618 break;
1619 }
1620
1621 case DIOCWLABEL:
1622 if (*(int *)data != 0)
1623 rs->sc_flags |= RAIDF_WLABEL;
1624 else
1625 rs->sc_flags &= ~RAIDF_WLABEL;
1626 break;
1627
1628 case DIOCGPDINFO:
1629 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1630 break;
1631
1632 default:
1633 retcode = ENOTTY;
1634 }
1635
1636 return (retcode);
1637 }
1638
1639 /*
1640 * raidinit -- Complete the rest of the initialization for the
1641 * RAIDframe device.
1642 */
1643 void
raidinit(RF_Raid_t * raidPtr)1644 raidinit(RF_Raid_t *raidPtr)
1645 {
1646 struct raid_softc *rs;
1647 struct cfdata *cf;
1648 int unit;
1649
1650 unit = raidPtr->raidid;
1651
1652 rs = &raid_softc[unit];
1653 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1654 0, 0, "raidpl", NULL);
1655
1656 /* XXX should check return code first... */
1657 rs->sc_flags |= RAIDF_INITED;
1658
1659 /* XXX doesn't check bounds. */
1660 snprintf(rs->sc_xname, sizeof rs->sc_xname, "raid%d", unit);
1661
1662 rs->sc_dkdev.dk_name = rs->sc_xname;
1663
1664 /*
1665 * disk_attach actually creates space for the CPU disklabel, among
1666 * other things, so it's critical to call this *BEFORE* we try
1667 * putzing with disklabels.
1668 */
1669 disk_attach(&rs->sc_dkdev);
1670
1671 /*
1672 * XXX There may be a weird interaction here between this, and
1673 * protectedSectors, as used in RAIDframe.
1674 */
1675 rs->sc_size = raidPtr->totalSectors;
1676
1677 /*
1678 * config_attach the raid device into the device tree.
1679 * For autoconf rootdev selection...
1680 */
1681 cf = malloc(sizeof(struct cfdata), M_RAIDFRAME, M_NOWAIT);
1682 if (cf == NULL) {
1683 printf("WARNING: no memory for cfdata struct\n");
1684 return;
1685 }
1686 bzero(cf, sizeof(struct cfdata));
1687
1688 cf->cf_attach = &raid_ca;
1689 cf->cf_driver = &raid_cd;
1690 cf->cf_unit = unit;
1691
1692 config_attach(NULL, cf, NULL, NULL);
1693 }
1694
1695 /*
1696 * Wake up the daemon & tell it to get us a spare table.
1697 * XXX
1698 * The entries in the queues should be tagged with the raidPtr so that
1699 * in the extremely rare case that two recons happen at once, we know
1700 * which devices were requesting a spare table.
1701 * XXX
1702 *
1703 * XXX This code is not currently used. GO
1704 */
1705 int
rf_GetSpareTableFromDaemon(RF_SparetWait_t * req)1706 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1707 {
1708 int retcode;
1709
1710 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1711 req->next = rf_sparet_wait_queue;
1712 rf_sparet_wait_queue = req;
1713 wakeup(&rf_sparet_wait_queue);
1714
1715 /* mpsleep unlocks the mutex. */
1716 while (!rf_sparet_resp_queue) {
1717 tsleep(&rf_sparet_resp_queue, PRIBIO,
1718 "RAIDframe getsparetable", 0);
1719 }
1720 req = rf_sparet_resp_queue;
1721 rf_sparet_resp_queue = req->next;
1722 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1723
1724 retcode = req->fcol;
1725 /* This is not the same req as we alloc'd. */
1726 RF_Free(req, sizeof *req);
1727 return (retcode);
1728 }
1729
1730 /*
1731 * A wrapper around rf_DoAccess that extracts appropriate info from the
1732 * bp and passes it down.
1733 * Any calls originating in the kernel must use non-blocking I/O.
1734 * Do some extra sanity checking to return "appropriate" error values for
1735 * certain conditions (to make some standard utilities work).
1736 *
1737 * Formerly known as: rf_DoAccessKernel
1738 */
1739 void
raidstart(RF_Raid_t * raidPtr)1740 raidstart(RF_Raid_t *raidPtr)
1741 {
1742 RF_SectorCount_t num_blocks, pb, sum;
1743 RF_RaidAddr_t raid_addr;
1744 int retcode;
1745 struct partition *pp;
1746 daddr_t blocknum;
1747 int unit;
1748 struct raid_softc *rs;
1749 int do_async;
1750 struct buf *bp;
1751
1752 unit = raidPtr->raidid;
1753 rs = &raid_softc[unit];
1754
1755 /* Quick check to see if anything has died recently. */
1756 RF_LOCK_MUTEX(raidPtr->mutex);
1757 if (raidPtr->numNewFailures > 0) {
1758 rf_update_component_labels(raidPtr,
1759 RF_NORMAL_COMPONENT_UPDATE);
1760 raidPtr->numNewFailures--;
1761 }
1762 RF_UNLOCK_MUTEX(raidPtr->mutex);
1763
1764 /* Check to see if we're at the limit... */
1765 RF_LOCK_MUTEX(raidPtr->mutex);
1766 while (raidPtr->openings > 0) {
1767 RF_UNLOCK_MUTEX(raidPtr->mutex);
1768
1769 bp = rs->sc_q.b_actf;
1770 if (bp == NULL) {
1771 /* Nothing more to do. */
1772 return;
1773 }
1774 rs->sc_q.b_actf = bp->b_actf;
1775
1776 /*
1777 * Ok, for the bp we have here, bp->b_blkno is relative to the
1778 * partition... We need to make it absolute to the underlying
1779 * device...
1780 */
1781
1782 blocknum = bp->b_blkno;
1783 if (DISKPART(bp->b_dev) != RAW_PART) {
1784 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1785 blocknum += pp->p_offset;
1786 }
1787
1788 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1789 (int) blocknum));
1790
1791 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1792 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1793
1794 /*
1795 * *THIS* is where we adjust what block we're going to...
1796 * But DO NOT TOUCH bp->b_blkno !!!
1797 */
1798 raid_addr = blocknum;
1799
1800 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1801 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1802 sum = raid_addr + num_blocks + pb;
1803 if (1 || rf_debugKernelAccess) {
1804 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d)"
1805 " (%d)\n", (int)raid_addr, (int)sum,
1806 (int)num_blocks, (int)pb, (int)bp->b_resid));
1807 }
1808 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1809 || (sum < num_blocks) || (sum < pb)) {
1810 bp->b_error = ENOSPC;
1811 bp->b_flags |= B_ERROR;
1812 bp->b_resid = bp->b_bcount;
1813 /* db1_printf(("%s: Calling biodone on 0x%x\n",
1814 __func__, bp)); */
1815 splassert(IPL_BIO);
1816 biodone(bp);
1817 RF_LOCK_MUTEX(raidPtr->mutex);
1818 continue;
1819 }
1820 /*
1821 * XXX rf_DoAccess() should do this, not just DoAccessKernel().
1822 */
1823
1824 if (bp->b_bcount & raidPtr->sectorMask) {
1825 bp->b_error = EINVAL;
1826 bp->b_flags |= B_ERROR;
1827 bp->b_resid = bp->b_bcount;
1828 /* db1_printf(("%s: Calling biodone on 0x%x\n",
1829 __func__, bp)); */
1830 splassert(IPL_BIO);
1831 biodone(bp);
1832 RF_LOCK_MUTEX(raidPtr->mutex);
1833 continue;
1834
1835 }
1836 db1_printf(("Calling DoAccess..\n"));
1837
1838
1839 RF_LOCK_MUTEX(raidPtr->mutex);
1840 raidPtr->openings--;
1841 RF_UNLOCK_MUTEX(raidPtr->mutex);
1842
1843 /*
1844 * Everything is async.
1845 */
1846 do_async = 1;
1847
1848 disk_busy(&rs->sc_dkdev);
1849
1850 /*
1851 * XXX we're still at splbio() here... Do we *really*
1852 * need to be ?
1853 */
1854
1855 /*
1856 * Don't ever condition on bp->b_flags & B_WRITE.
1857 * Always condition on B_READ instead.
1858 */
1859
1860 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1861 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1862 do_async, raid_addr, num_blocks,
1863 bp->b_data, bp, NULL, NULL,
1864 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1865
1866 RF_LOCK_MUTEX(raidPtr->mutex);
1867 }
1868 RF_UNLOCK_MUTEX(raidPtr->mutex);
1869 }
1870
1871 /* Invoke an I/O from kernel mode. Disk queue should be locked upon entry. */
1872
1873 int
rf_DispatchKernelIO(RF_DiskQueue_t * queue,RF_DiskQueueData_t * req)1874 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1875 {
1876 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1877 struct buf *bp;
1878 struct raidbuf *raidbp = NULL;
1879 struct raid_softc *rs;
1880 int unit;
1881 /*int s = splbio();*/ /* Want to test this. */
1882
1883 /*
1884 * XXX along with the vnode, we also need the softc associated with
1885 * this device...
1886 */
1887 req->queue = queue;
1888
1889 unit = queue->raidPtr->raidid;
1890
1891 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1892
1893 if (unit >= numraid) {
1894 printf("Invalid unit number: %d %d\n", unit, numraid);
1895 panic("Invalid Unit number in rf_DispatchKernelIO");
1896 }
1897
1898 rs = &raid_softc[unit];
1899
1900 bp = req->bp;
1901
1902 #if 1
1903 /*
1904 * XXX When there is a physical disk failure, someone is passing
1905 * us a buffer that contains old stuff !! Attempt to deal with
1906 * this problem without taking a performance hit...
1907 * (not sure where the real bug is; it's buried in RAIDframe
1908 * somewhere) :-( GO )
1909 */
1910 if (bp->b_flags & B_ERROR) {
1911 bp->b_flags &= ~B_ERROR;
1912 }
1913 if (bp->b_error!=0) {
1914 bp->b_error = 0;
1915 }
1916 #endif
1917
1918 raidbp = RAIDGETBUF(rs);
1919
1920 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1921
1922 /*
1923 * Context for raidiodone.
1924 */
1925 raidbp->rf_obp = bp;
1926 raidbp->req = req;
1927
1928 LIST_INIT(&raidbp->rf_buf.b_dep);
1929
1930 switch (req->type) {
1931 case RF_IO_TYPE_NOP:
1932 /* Used primarily to unlock a locked queue. */
1933
1934 db1_printf(("rf_DispatchKernelIO: NOP to r %d c %d\n",
1935 queue->row, queue->col));
1936
1937 /* XXX need to do something extra here... */
1938
1939 /*
1940 * I'm leaving this in, as I've never actually seen it
1941 * used, and I'd like folks to report it... GO
1942 */
1943 db1_printf(("WAKEUP CALLED\n"));
1944 queue->numOutstanding++;
1945
1946 /* XXX need to glue the original buffer into this ?? */
1947
1948 rf_KernelWakeupFunc(&raidbp->rf_buf);
1949 break;
1950
1951 case RF_IO_TYPE_READ:
1952 case RF_IO_TYPE_WRITE:
1953 if (req->tracerec) {
1954 RF_ETIMER_START(req->tracerec->timer);
1955 }
1956
1957 rf_InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1958 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1959 req->sectorOffset, req->numSector,
1960 req->buf, rf_KernelWakeupFunc, (void *)req,
1961 queue->raidPtr->logBytesPerSector, req->b_proc);
1962
1963 if (rf_debugKernelAccess) {
1964 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1965 (long)bp->b_blkno));
1966 }
1967 queue->numOutstanding++;
1968 queue->last_deq_sector = req->sectorOffset;
1969
1970 /*
1971 * Acc wouldn't have been let in if there were any
1972 * pending reqs at any other priority.
1973 */
1974 queue->curPriority = req->priority;
1975
1976 db1_printf(("Going for %c to unit %d row %d col %d\n",
1977 req->type, unit, queue->row, queue->col));
1978 db1_printf(("sector %d count %d (%d bytes) %d\n",
1979 (int)req->sectorOffset, (int)req->numSector,
1980 (int)(req->numSector << queue->raidPtr->logBytesPerSector),
1981 (int)queue->raidPtr->logBytesPerSector));
1982 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1983 raidbp->rf_buf.b_vp->v_numoutput++;
1984 }
1985
1986 VOP_STRATEGY(&raidbp->rf_buf);
1987 break;
1988
1989 default:
1990 panic("bad req->type in rf_DispatchKernelIO");
1991 }
1992 db1_printf(("Exiting from DispatchKernelIO\n"));
1993 /*splx(s);*/ /* want to test this */
1994 return (0);
1995 }
1996
1997 /*
1998 * This is the callback function associated with a I/O invoked from
1999 * kernel code.
2000 */
2001 void
rf_KernelWakeupFunc(struct buf * vbp)2002 rf_KernelWakeupFunc(struct buf *vbp)
2003 {
2004 RF_DiskQueueData_t *req = NULL;
2005 RF_DiskQueue_t *queue;
2006 struct raidbuf *raidbp = (struct raidbuf *)vbp;
2007 struct buf *bp;
2008 struct raid_softc *rs;
2009 int unit;
2010 int s;
2011
2012 s = splbio();
2013 db1_printf(("recovering the request queue:\n"));
2014 req = raidbp->req;
2015
2016 bp = raidbp->rf_obp;
2017
2018 queue = (RF_DiskQueue_t *)req->queue;
2019
2020 if (raidbp->rf_buf.b_flags & B_ERROR) {
2021 bp->b_flags |= B_ERROR;
2022 bp->b_error =
2023 raidbp->rf_buf.b_error ? raidbp->rf_buf.b_error : EIO;
2024 }
2025
2026 #if 1
2027 /* XXX Methinks this could be wrong... */
2028 bp->b_resid = raidbp->rf_buf.b_resid;
2029 #endif
2030
2031 if (req->tracerec) {
2032 RF_ETIMER_STOP(req->tracerec->timer);
2033 RF_ETIMER_EVAL(req->tracerec->timer);
2034 RF_LOCK_MUTEX(rf_tracing_mutex);
2035 req->tracerec->diskwait_us +=
2036 RF_ETIMER_VAL_US(req->tracerec->timer);
2037 req->tracerec->phys_io_us +=
2038 RF_ETIMER_VAL_US(req->tracerec->timer);
2039 req->tracerec->num_phys_ios++;
2040 RF_UNLOCK_MUTEX(rf_tracing_mutex);
2041 }
2042
2043 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
2044
2045 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
2046
2047 /*
2048 * XXX Ok, let's get aggressive... If B_ERROR is set, let's go
2049 * ballistic, and mark the component as hosed...
2050 */
2051 if (bp->b_flags & B_ERROR) {
2052 /* Mark the disk as dead but only mark it once... */
2053 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
2054 rf_ds_optimal) {
2055 printf("raid%d: IO Error. Marking %s as failed.\n",
2056 unit,
2057 queue->raidPtr->
2058 Disks[queue->row][queue->col].devname);
2059 queue->raidPtr->Disks[queue->row][queue->col].status =
2060 rf_ds_failed;
2061 queue->raidPtr->status[queue->row] = rf_rs_degraded;
2062 queue->raidPtr->numFailures++;
2063 queue->raidPtr->numNewFailures++;
2064 } else {
2065 /* Disk is already dead... */
2066 /* printf("Disk already marked as dead!\n"); */
2067 }
2068 }
2069
2070 rs = &raid_softc[unit];
2071 RAIDPUTBUF(rs, raidbp);
2072
2073 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
2074 (req->CompleteFunc)(req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
2075
2076 splx(s);
2077 }
2078
2079 /*
2080 * Initialize a buf structure for doing an I/O in the kernel.
2081 */
2082 void
rf_InitBP(struct buf * bp,struct vnode * b_vp,unsigned rw_flag,dev_t dev,RF_SectorNum_t startSect,RF_SectorCount_t numSect,caddr_t buf,void (* cbFunc)(struct buf *),void * cbArg,int logBytesPerSector,struct proc * b_proc)2083 rf_InitBP(
2084 struct buf *bp,
2085 struct vnode *b_vp,
2086 unsigned rw_flag,
2087 dev_t dev,
2088 RF_SectorNum_t startSect,
2089 RF_SectorCount_t numSect,
2090 caddr_t buf,
2091 void (*cbFunc)(struct buf *),
2092 void *cbArg,
2093 int logBytesPerSector,
2094 struct proc *b_proc
2095 )
2096 {
2097 /*bp->b_flags = B_PHYS | rw_flag;*/
2098 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too ??? */
2099 bp->b_bcount = numSect << logBytesPerSector;
2100 bp->b_bufsize = bp->b_bcount;
2101 bp->b_error = 0;
2102 bp->b_dev = dev;
2103 bp->b_data = buf;
2104 bp->b_blkno = startSect;
2105 bp->b_resid = bp->b_bcount; /* XXX is this right !??!?!! */
2106 if (bp->b_bcount == 0) {
2107 panic("bp->b_bcount is zero in rf_InitBP!!");
2108 }
2109 bp->b_proc = b_proc;
2110 bp->b_iodone = cbFunc;
2111 bp->b_vp = b_vp;
2112 LIST_INIT(&bp->b_dep);
2113 }
2114
2115 void
raidgetdefaultlabel(RF_Raid_t * raidPtr,struct raid_softc * rs,struct disklabel * lp)2116 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2117 struct disklabel *lp)
2118 {
2119 db1_printf(("Building a default label...\n"));
2120 bzero(lp, sizeof(*lp));
2121
2122 /* Fabricate a label... */
2123 lp->d_secperunit = raidPtr->totalSectors;
2124 lp->d_secsize = raidPtr->bytesPerSector;
2125 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2126 lp->d_ntracks = 4 * raidPtr->numCol;
2127 lp->d_ncylinders = raidPtr->totalSectors /
2128 (lp->d_nsectors * lp->d_ntracks);
2129 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2130
2131 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2132 lp->d_type = DTYPE_RAID;
2133 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2134 lp->d_rpm = 3600;
2135 lp->d_interleave = 1;
2136 lp->d_flags = 0;
2137
2138 lp->d_partitions[RAW_PART].p_offset = 0;
2139 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2140 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2141 lp->d_npartitions = RAW_PART + 1;
2142
2143 lp->d_magic = DISKMAGIC;
2144 lp->d_magic2 = DISKMAGIC;
2145 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2146 }
2147
2148 /*
2149 * Read the disklabel from the raid device.
2150 * If one is not present, fake one up.
2151 */
2152 void
raidgetdisklabel(dev_t dev)2153 raidgetdisklabel(dev_t dev)
2154 {
2155 int unit = raidunit(dev);
2156 struct raid_softc *rs = &raid_softc[unit];
2157 char *errstring;
2158 struct disklabel *lp = rs->sc_dkdev.dk_label;
2159 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2160 RF_Raid_t *raidPtr;
2161 int i;
2162 struct partition *pp;
2163
2164 db1_printf(("Getting the disklabel...\n"));
2165
2166 bzero(clp, sizeof(*clp));
2167
2168 raidPtr = raidPtrs[unit];
2169
2170 raidgetdefaultlabel(raidPtr, rs, lp);
2171
2172 /*
2173 * Call the generic disklabel extraction routine.
2174 */
2175 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, lp,
2176 rs->sc_dkdev.dk_cpulabel, 0);
2177 if (errstring) {
2178 printf("%s: %s\n", rs->sc_xname, errstring);
2179 return;
2180 /*raidmakedisklabel(rs);*/
2181 }
2182
2183 /*
2184 * Sanity check whether the found disklabel is valid.
2185 *
2186 * This is necessary since total size of the raid device
2187 * may vary when an interleave is changed even though exactly
2188 * same componets are used, and old disklabel may used
2189 * if that is found.
2190 */
2191 #ifdef RAIDDEBUG
2192 if (lp->d_secperunit != rs->sc_size)
2193 printf("WARNING: %s: "
2194 "total sector size in disklabel (%d) != "
2195 "the size of raid (%ld)\n", rs->sc_xname,
2196 lp->d_secperunit, (long) rs->sc_size);
2197 #endif /* RAIDDEBUG */
2198 for (i = 0; i < lp->d_npartitions; i++) {
2199 pp = &lp->d_partitions[i];
2200 if (pp->p_offset + pp->p_size > rs->sc_size)
2201 printf("WARNING: %s: end of partition `%c' "
2202 "exceeds the size of raid (%ld)\n",
2203 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2204 }
2205 }
2206
2207 /*
2208 * Take care of things one might want to take care of in the event
2209 * that a disklabel isn't present.
2210 */
2211 void
raidmakedisklabel(struct raid_softc * rs)2212 raidmakedisklabel(struct raid_softc *rs)
2213 {
2214 struct disklabel *lp = rs->sc_dkdev.dk_label;
2215 db1_printf(("Making a label..\n"));
2216
2217 /*
2218 * For historical reasons, if there's no disklabel present
2219 * the raw partition must be marked FS_BSDFFS.
2220 */
2221
2222 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2223
2224 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2225
2226 lp->d_checksum = dkcksum(lp);
2227 }
2228
2229 /*
2230 * Lookup the provided name in the filesystem. If the file exists,
2231 * is a valid block device, and isn't being used by anyone else,
2232 * set *vpp to the file's vnode.
2233 * You'll find the original of this in ccd.c
2234 */
2235 int
raidlookup(char * path,struct proc * p,struct vnode ** vpp)2236 raidlookup(char *path, struct proc *p, struct vnode **vpp /* result */)
2237 {
2238 struct nameidata nd;
2239 struct vnode *vp;
2240 struct vattr va;
2241 int error;
2242
2243 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2244 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
2245 #ifdef RAIDDEBUG
2246 printf("RAIDframe: vn_open returned %d\n", error);
2247 #endif /* RAIDDEBUG */
2248 return (error);
2249 }
2250 vp = nd.ni_vp;
2251 if (vp->v_usecount > 1) {
2252 VOP_UNLOCK(vp, 0, p);
2253 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
2254 return (EBUSY);
2255 }
2256 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2257 VOP_UNLOCK(vp, 0, p);
2258 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
2259 return (error);
2260 }
2261 /* XXX: eventually we should handle VREG, too. */
2262 if (va.va_type != VBLK) {
2263 VOP_UNLOCK(vp, 0, p);
2264 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
2265 return (ENOTBLK);
2266 }
2267 VOP_UNLOCK(vp, 0, p);
2268 *vpp = vp;
2269 return (0);
2270 }
2271
2272 /*
2273 * Wait interruptibly for an exclusive lock.
2274 *
2275 * XXX
2276 * Several drivers do this; it should be abstracted and made MP-safe.
2277 * (Hmm... where have we seen this warning before :-> GO )
2278 */
2279 int
raidlock(struct raid_softc * rs)2280 raidlock(struct raid_softc *rs)
2281 {
2282 int error;
2283
2284 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2285 rs->sc_flags |= RAIDF_WANTED;
2286 if ((error = tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2287 return (error);
2288 }
2289 rs->sc_flags |= RAIDF_LOCKED;
2290 return (0);
2291 }
2292
2293 /*
2294 * Unlock and wake up any waiters.
2295 */
2296 void
raidunlock(struct raid_softc * rs)2297 raidunlock(struct raid_softc *rs)
2298 {
2299 rs->sc_flags &= ~RAIDF_LOCKED;
2300 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2301 rs->sc_flags &= ~RAIDF_WANTED;
2302 wakeup(rs);
2303 }
2304 }
2305
2306
2307 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2308 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2309
2310 int
raidmarkclean(dev_t dev,struct vnode * b_vp,int mod_counter)2311 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2312 {
2313 RF_ComponentLabel_t clabel;
2314 raidread_component_label(dev, b_vp, &clabel);
2315 clabel.mod_counter = mod_counter;
2316 clabel.clean = RF_RAID_CLEAN;
2317 raidwrite_component_label(dev, b_vp, &clabel);
2318 return(0);
2319 }
2320
2321
2322 int
raidmarkdirty(dev_t dev,struct vnode * b_vp,int mod_counter)2323 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2324 {
2325 RF_ComponentLabel_t clabel;
2326 raidread_component_label(dev, b_vp, &clabel);
2327 clabel.mod_counter = mod_counter;
2328 clabel.clean = RF_RAID_DIRTY;
2329 raidwrite_component_label(dev, b_vp, &clabel);
2330 return(0);
2331 }
2332
2333 /* ARGSUSED */
2334 int
raidread_component_label(dev_t dev,struct vnode * b_vp,RF_ComponentLabel_t * clabel)2335 raidread_component_label(dev_t dev, struct vnode *b_vp,
2336 RF_ComponentLabel_t *clabel)
2337 {
2338 struct buf *bp;
2339 int error;
2340
2341 /*
2342 * XXX should probably ensure that we don't try to do this if
2343 * someone has changed rf_protected_sectors.
2344 */
2345
2346 if (b_vp == NULL) {
2347 /*
2348 * For whatever reason, this component is not valid.
2349 * Don't try to read a component label from it.
2350 */
2351 return(EINVAL);
2352 }
2353
2354 /* Get a block of the appropriate size... */
2355 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2356 bp->b_dev = dev;
2357
2358 /* Get our ducks in a row for the read. */
2359 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2360 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2361 bp->b_flags |= B_READ;
2362 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2363
2364 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2365
2366 error = biowait(bp);
2367
2368 if (!error) {
2369 memcpy(clabel, bp->b_data, sizeof(RF_ComponentLabel_t));
2370 #if 0
2371 rf_print_component_label( clabel );
2372 #endif
2373 } else {
2374 db1_printf(("Failed to read RAID component label!\n"));
2375 }
2376
2377 brelse(bp);
2378 return(error);
2379 }
2380
2381 /* ARGSUSED */
2382 int
raidwrite_component_label(dev_t dev,struct vnode * b_vp,RF_ComponentLabel_t * clabel)2383 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2384 RF_ComponentLabel_t *clabel)
2385 {
2386 struct buf *bp;
2387 int error;
2388
2389 /* Get a block of the appropriate size... */
2390 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2391 bp->b_dev = dev;
2392
2393 /* Get our ducks in a row for the write. */
2394 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2395 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2396 bp->b_flags |= B_WRITE;
2397 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2398
2399 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2400
2401 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2402
2403 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2404 error = biowait(bp);
2405 brelse(bp);
2406 if (error) {
2407 printf("Failed to write RAID component info!\n");
2408 }
2409
2410 return(error);
2411 }
2412
2413 void
rf_markalldirty(RF_Raid_t * raidPtr)2414 rf_markalldirty(RF_Raid_t *raidPtr)
2415 {
2416 RF_ComponentLabel_t clabel;
2417 int r,c;
2418
2419 raidPtr->mod_counter++;
2420 for (r = 0; r < raidPtr->numRow; r++) {
2421 for (c = 0; c < raidPtr->numCol; c++) {
2422 /*
2423 * We don't want to touch (at all) a disk that has
2424 * failed.
2425 */
2426 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2427 raidread_component_label(
2428 raidPtr->Disks[r][c].dev,
2429 raidPtr->raid_cinfo[r][c].ci_vp, &clabel);
2430 if (clabel.status == rf_ds_spared) {
2431 /*
2432 * XXX do something special...
2433 * But whatever you do, don't
2434 * try to access it !!!
2435 */
2436 } else {
2437 #if 0
2438 clabel.status =
2439 raidPtr->Disks[r][c].status;
2440 raidwrite_component_label(
2441 raidPtr->Disks[r][c].dev,
2442 raidPtr->raid_cinfo[r][c].ci_vp,
2443 &clabel);
2444 #endif
2445 raidmarkdirty(
2446 raidPtr->Disks[r][c].dev,
2447 raidPtr->raid_cinfo[r][c].ci_vp,
2448 raidPtr->mod_counter);
2449 }
2450 }
2451 }
2452 }
2453 /*printf("Component labels marked dirty.\n");*/
2454 #if 0
2455 for( c = 0; c < raidPtr->numSpare ; c++) {
2456 sparecol = raidPtr->numCol + c;
2457 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2458 /*
2459 * XXX This is where we get fancy and map this spare
2460 * into it's correct spot in the array.
2461 */
2462 /*
2463 * We claim this disk is "optimal" if it's
2464 * rf_ds_used_spare, as that means it should be
2465 * directly substitutable for the disk it replaced.
2466 * We note that too...
2467 */
2468
2469 for(i=0;i<raidPtr->numRow;i++) {
2470 for(j=0;j<raidPtr->numCol;j++) {
2471 if ((raidPtr->Disks[i][j].spareRow ==
2472 r) &&
2473 (raidPtr->Disks[i][j].spareCol ==
2474 sparecol)) {
2475 srow = r;
2476 scol = sparecol;
2477 break;
2478 }
2479 }
2480 }
2481
2482 raidread_component_label(
2483 raidPtr->Disks[r][sparecol].dev,
2484 raidPtr->raid_cinfo[r][sparecol].ci_vp, &clabel);
2485 /* Make sure status is noted. */
2486 clabel.version = RF_COMPONENT_LABEL_VERSION;
2487 clabel.mod_counter = raidPtr->mod_counter;
2488 clabel.serial_number = raidPtr->serial_number;
2489 clabel.row = srow;
2490 clabel.column = scol;
2491 clabel.num_rows = raidPtr->numRow;
2492 clabel.num_columns = raidPtr->numCol;
2493 clabel.clean = RF_RAID_DIRTY; /* Changed in a bit. */
2494 clabel.status = rf_ds_optimal;
2495 raidwrite_component_label(
2496 raidPtr->Disks[r][sparecol].dev,
2497 raidPtr->raid_cinfo[r][sparecol].ci_vp, &clabel);
2498 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2499 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2500 }
2501 }
2502
2503 #endif
2504 }
2505
2506
2507 void
rf_update_component_labels(RF_Raid_t * raidPtr,int final)2508 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2509 {
2510 RF_ComponentLabel_t clabel;
2511 int sparecol;
2512 int r,c;
2513 int i,j;
2514 int srow, scol;
2515
2516 srow = -1;
2517 scol = -1;
2518
2519 /*
2520 * XXX should do extra checks to make sure things really are clean,
2521 * rather than blindly setting the clean bit...
2522 */
2523
2524 raidPtr->mod_counter++;
2525
2526 for (r = 0; r < raidPtr->numRow; r++) {
2527 for (c = 0; c < raidPtr->numCol; c++) {
2528 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2529 raidread_component_label(
2530 raidPtr->Disks[r][c].dev,
2531 raidPtr->raid_cinfo[r][c].ci_vp,
2532 &clabel);
2533 /* Make sure status is noted. */
2534 clabel.status = rf_ds_optimal;
2535 /* Bump the counter. */
2536 clabel.mod_counter = raidPtr->mod_counter;
2537
2538 raidwrite_component_label(
2539 raidPtr->Disks[r][c].dev,
2540 raidPtr->raid_cinfo[r][c].ci_vp,
2541 &clabel);
2542 if (final == RF_FINAL_COMPONENT_UPDATE) {
2543 if (raidPtr->parity_good ==
2544 RF_RAID_CLEAN) {
2545 raidmarkclean(
2546 raidPtr->Disks[r][c].dev,
2547 raidPtr->
2548 raid_cinfo[r][c].ci_vp,
2549 raidPtr->mod_counter);
2550 }
2551 }
2552 }
2553 /* Else we don't touch it... */
2554 }
2555 }
2556
2557 for( c = 0; c < raidPtr->numSpare ; c++) {
2558 sparecol = raidPtr->numCol + c;
2559 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2560 /*
2561 * We claim this disk is "optimal" if it's
2562 * rf_ds_used_spare, as that means it should be
2563 * directly substitutable for the disk it replaced.
2564 * We note that too...
2565 */
2566
2567 for(i=0;i<raidPtr->numRow;i++) {
2568 for(j=0;j<raidPtr->numCol;j++) {
2569 if ((raidPtr->Disks[i][j].spareRow ==
2570 0) &&
2571 (raidPtr->Disks[i][j].spareCol ==
2572 sparecol)) {
2573 srow = i;
2574 scol = j;
2575 break;
2576 }
2577 }
2578 }
2579
2580 /* XXX Shouldn't *really* need this... */
2581 raidread_component_label(
2582 raidPtr->Disks[0][sparecol].dev,
2583 raidPtr->raid_cinfo[0][sparecol].ci_vp, &clabel);
2584 /* Make sure status is noted. */
2585
2586 raid_init_component_label(raidPtr, &clabel);
2587
2588 clabel.mod_counter = raidPtr->mod_counter;
2589 clabel.row = srow;
2590 clabel.column = scol;
2591 clabel.status = rf_ds_optimal;
2592
2593 raidwrite_component_label(
2594 raidPtr->Disks[0][sparecol].dev,
2595 raidPtr->raid_cinfo[0][sparecol].ci_vp, &clabel);
2596 if (final == RF_FINAL_COMPONENT_UPDATE) {
2597 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2598 raidmarkclean(raidPtr->
2599 Disks[0][sparecol].dev,
2600 raidPtr->
2601 raid_cinfo[0][sparecol].ci_vp,
2602 raidPtr->mod_counter);
2603 }
2604 }
2605 }
2606 }
2607 /*printf("Component labels updated\n");*/
2608 }
2609
2610 void
rf_close_component(RF_Raid_t * raidPtr,struct vnode * vp,int auto_configured)2611 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2612 {
2613 struct proc *p = curproc;
2614
2615 if (vp != NULL) {
2616 if (auto_configured == 1) {
2617 /* component was opened by rf_find_raid_components() */
2618 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, p);
2619 vrele(vp);
2620 } else {
2621 /* component was opened by raidlookup() */
2622 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2623 }
2624 } else {
2625 printf("vnode was NULL\n");
2626 }
2627 }
2628
2629 void
rf_UnconfigureVnodes(RF_Raid_t * raidPtr)2630 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2631 {
2632 int r,c;
2633 struct vnode *vp;
2634 int acd;
2635
2636
2637 /* We take this opportunity to close the vnodes like we should... */
2638
2639 for (r = 0; r < raidPtr->numRow; r++) {
2640 for (c = 0; c < raidPtr->numCol; c++) {
2641 db1_printf(("Closing vnode for row: %d col: %d\n",
2642 r, c));
2643 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2644 acd = raidPtr->Disks[r][c].auto_configured;
2645 rf_close_component(raidPtr, vp, acd);
2646 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2647 raidPtr->Disks[r][c].auto_configured = 0;
2648 }
2649 }
2650 for (r = 0; r < raidPtr->numSpare; r++) {
2651 db1_printf(("Closing vnode for spare: %d\n", r));
2652 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2653 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2654 rf_close_component(raidPtr, vp, acd);
2655 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2656 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2657 }
2658 }
2659
2660
2661 void
rf_ReconThread(struct rf_recon_req * req)2662 rf_ReconThread(struct rf_recon_req *req)
2663 {
2664 int s;
2665 RF_Raid_t *raidPtr;
2666
2667 s = splbio();
2668 raidPtr = (RF_Raid_t *) req->raidPtr;
2669 raidPtr->recon_in_progress = 1;
2670
2671 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2672 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2673
2674 /* XXX Get rid of this! we don't need it at all... */
2675 RF_Free(req, sizeof(*req));
2676
2677 raidPtr->recon_in_progress = 0;
2678 splx(s);
2679
2680 /* That's all... */
2681 kthread_exit(0); /* Does not return. */
2682 }
2683
2684 void
rf_RewriteParityThread(RF_Raid_t * raidPtr)2685 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2686 {
2687 int retcode;
2688 int s;
2689
2690 s = splbio();
2691 raidPtr->parity_rewrite_in_progress = 1;
2692 retcode = rf_RewriteParity(raidPtr);
2693 if (retcode) {
2694 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2695 } else {
2696 /*
2697 * Set the clean bit ! If we shutdown correctly,
2698 * the clean bit on each component label will get
2699 * set.
2700 */
2701 raidPtr->parity_good = RF_RAID_CLEAN;
2702 }
2703 raidPtr->parity_rewrite_in_progress = 0;
2704 splx(s);
2705
2706 /* Anyone waiting for us to stop ? If so, inform them... */
2707 if (raidPtr->waitShutdown) {
2708 wakeup(&raidPtr->parity_rewrite_in_progress);
2709 }
2710
2711 /* That's all... */
2712 kthread_exit(0); /* Does not return. */
2713 }
2714
2715
2716 void
rf_CopybackThread(RF_Raid_t * raidPtr)2717 rf_CopybackThread(RF_Raid_t *raidPtr)
2718 {
2719 int s;
2720
2721 s = splbio();
2722 raidPtr->copyback_in_progress = 1;
2723 rf_CopybackReconstructedData(raidPtr);
2724 raidPtr->copyback_in_progress = 0;
2725 splx(s);
2726
2727 /* That's all... */
2728 kthread_exit(0); /* Does not return. */
2729 }
2730
2731
2732 void
rf_ReconstructInPlaceThread(struct rf_recon_req * req)2733 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2734 {
2735 int retcode;
2736 int s;
2737 RF_Raid_t *raidPtr;
2738
2739 s = splbio();
2740 raidPtr = req->raidPtr;
2741 raidPtr->recon_in_progress = 1;
2742 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2743 RF_Free(req, sizeof(*req));
2744 raidPtr->recon_in_progress = 0;
2745 splx(s);
2746
2747 /* That's all... */
2748 kthread_exit(0); /* Does not return. */
2749 }
2750
2751
2752 RF_AutoConfig_t *
rf_find_raid_components(void)2753 rf_find_raid_components(void)
2754 {
2755 #ifdef RAID_AUTOCONFIG
2756 int major;
2757 struct vnode *vp;
2758 struct disklabel label;
2759 struct device *dv;
2760 dev_t dev;
2761 int error;
2762 int i;
2763 int good_one;
2764 RF_ComponentLabel_t *clabel;
2765 RF_AutoConfig_t *ac;
2766 #endif /* RAID_AUTOCONFIG */
2767 RF_AutoConfig_t *ac_list;
2768
2769
2770 /* Initialize the AutoConfig list. */
2771 ac_list = NULL;
2772
2773 #ifdef RAID_AUTOCONFIG
2774 /* We begin by trolling through *all* the devices on the system. */
2775
2776 for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) {
2777
2778 /* We are only interested in disks... */
2779 if (dv->dv_class != DV_DISK)
2780 continue;
2781
2782 /* We don't care about floppies... */
2783 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2784 continue;
2785 }
2786
2787 /*
2788 * We need to find the device_name_to_block_device_major
2789 * stuff.
2790 */
2791 major = findblkmajor(dv);
2792
2793 /* Get a vnode for the raw partition of this disk. */
2794
2795 dev = MAKEDISKDEV(major, dv->dv_unit, RAW_PART);
2796 if (bdevvp(dev, &vp))
2797 panic("RAID can't alloc vnode");
2798
2799 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2800
2801 if (error) {
2802 /*
2803 * "Who cares." Continue looking
2804 * for something that exists.
2805 */
2806 vput(vp);
2807 continue;
2808 }
2809
2810 /* Ok, the disk exists. Go get the disklabel. */
2811 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2812 FREAD, NOCRED, 0);
2813 if (error) {
2814 /*
2815 * XXX can't happen - open() would
2816 * have errored out (or faked up one).
2817 */
2818 printf("can't get label for dev %s%c (%d)!?!?\n",
2819 dv->dv_xname, 'a' + RAW_PART, error);
2820 }
2821
2822 /*
2823 * We don't need this any more. We'll allocate it again
2824 * a little later if we really do...
2825 */
2826 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2827 vrele(vp);
2828
2829 for (i=0; i < label.d_npartitions; i++) {
2830 /*
2831 * We only support partitions marked as RAID.
2832 * Except on sparc/sparc64 where FS_RAID doesn't
2833 * fit in the SUN disklabel and we need to look
2834 * into each and every partition !!!
2835 */
2836 #if !defined(__sparc__) && !defined(__sparc64__) && !defined(__sun3__)
2837 if (label.d_partitions[i].p_fstype != FS_RAID)
2838 continue;
2839 #else /* !__sparc__ && !__sparc64__ && !__sun3__ */
2840 if (label.d_partitions[i].p_fstype == FS_SWAP ||
2841 label.d_partitions[i].p_fstype == FS_UNUSED)
2842 continue;
2843 #endif /* __sparc__ || __sparc64__ || __sun3__ */
2844
2845 dev = MAKEDISKDEV(major, dv->dv_unit, i);
2846 if (bdevvp(dev, &vp))
2847 panic("RAID can't alloc vnode");
2848
2849 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2850 if (error) {
2851 /* Whatever... */
2852 vput(vp);
2853 continue;
2854 }
2855
2856 good_one = 0;
2857
2858 clabel = (RF_ComponentLabel_t *)
2859 malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME,
2860 M_NOWAIT);
2861 if (clabel == NULL) {
2862 /* XXX CLEANUP HERE. */
2863 printf("RAID auto config: out of memory!\n");
2864 return(NULL); /* XXX probably should panic ? */
2865 }
2866
2867 if (!raidread_component_label(dev, vp, clabel)) {
2868 /* Got the label. Does it look reasonable ? */
2869 if (rf_reasonable_label(clabel) &&
2870 (clabel->partitionSize <=
2871 label.d_partitions[i].p_size)) {
2872 #ifdef RAIDDEBUG
2873 printf("Component on: %s%c: %d\n",
2874 dv->dv_xname, 'a'+i,
2875 label.d_partitions[i].p_size);
2876 rf_print_component_label(clabel);
2877 #endif /* RAIDDEBUG */
2878 /*
2879 * If it's reasonable, add it,
2880 * else ignore it.
2881 */
2882 ac = (RF_AutoConfig_t *)
2883 malloc(sizeof(RF_AutoConfig_t),
2884 M_RAIDFRAME, M_NOWAIT);
2885 if (ac == NULL) {
2886 /* XXX should panic ??? */
2887 return(NULL);
2888 }
2889
2890 snprintf(ac->devname,
2891 sizeof ac->devname, "%s%c",
2892 dv->dv_xname, 'a'+i);
2893 ac->dev = dev;
2894 ac->vp = vp;
2895 ac->clabel = clabel;
2896 ac->next = ac_list;
2897 ac_list = ac;
2898 good_one = 1;
2899 }
2900 }
2901 if (!good_one) {
2902 /* Cleanup. */
2903 free(clabel, M_RAIDFRAME);
2904 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2905 vrele(vp);
2906 }
2907 }
2908 }
2909 #endif /* RAID_AUTOCONFIG */
2910 return(ac_list);
2911 }
2912
2913 #ifdef RAID_AUTOCONFIG
2914 int
rf_reasonable_label(RF_ComponentLabel_t * clabel)2915 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2916 {
2917
2918 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2919 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2920 ((clabel->clean == RF_RAID_CLEAN) ||
2921 (clabel->clean == RF_RAID_DIRTY)) &&
2922 clabel->row >=0 &&
2923 clabel->column >= 0 &&
2924 clabel->num_rows > 0 &&
2925 clabel->num_columns > 0 &&
2926 clabel->row < clabel->num_rows &&
2927 clabel->column < clabel->num_columns &&
2928 clabel->blockSize > 0 &&
2929 clabel->numBlocks > 0) {
2930 /* Label looks reasonable enough... */
2931 return(1);
2932 }
2933 return(0);
2934 }
2935 #endif /* RAID_AUTOCONFIG */
2936
2937 void
rf_print_component_label(RF_ComponentLabel_t * clabel)2938 rf_print_component_label(RF_ComponentLabel_t *clabel)
2939 {
2940 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2941 clabel->row, clabel->column, clabel->num_rows, clabel->num_columns);
2942 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2943 clabel->version, clabel->serial_number, clabel->mod_counter);
2944 printf(" Clean: %s Status: %d\n", clabel->clean ? "Yes" : "No",
2945 clabel->status );
2946 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2947 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2948 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2949 (char) clabel->parityConfig, clabel->blockSize, clabel->numBlocks);
2950 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2951 printf(" Contains root partition: %s\n", clabel->root_partition ?
2952 "Yes" : "No" );
2953 printf(" Last configured as: raid%d\n", clabel->last_unit );
2954 #if 0
2955 printf(" Config order: %d\n", clabel->config_order);
2956 #endif
2957 }
2958
2959 RF_ConfigSet_t *
rf_create_auto_sets(RF_AutoConfig_t * ac_list)2960 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2961 {
2962 RF_AutoConfig_t *ac;
2963 RF_ConfigSet_t *config_sets;
2964 RF_ConfigSet_t *cset;
2965 RF_AutoConfig_t *ac_next;
2966
2967
2968 config_sets = NULL;
2969
2970 /*
2971 * Go through the AutoConfig list, and figure out which components
2972 * belong to what sets.
2973 */
2974 ac = ac_list;
2975 while(ac!=NULL) {
2976 /*
2977 * We're going to putz with ac->next, so save it here
2978 * for use at the end of the loop.
2979 */
2980 ac_next = ac->next;
2981
2982 if (config_sets == NULL) {
2983 /* We will need at least this one... */
2984 config_sets = (RF_ConfigSet_t *)
2985 malloc(sizeof(RF_ConfigSet_t), M_RAIDFRAME,
2986 M_NOWAIT);
2987 if (config_sets == NULL) {
2988 panic("rf_create_auto_sets: No memory!");
2989 }
2990 /* This one is easy :) */
2991 config_sets->ac = ac;
2992 config_sets->next = NULL;
2993 config_sets->rootable = 0;
2994 ac->next = NULL;
2995 } else {
2996 /* Which set does this component fit into ? */
2997 cset = config_sets;
2998 while(cset!=NULL) {
2999 if (rf_does_it_fit(cset, ac)) {
3000 /* Looks like it matches... */
3001 ac->next = cset->ac;
3002 cset->ac = ac;
3003 break;
3004 }
3005 cset = cset->next;
3006 }
3007 if (cset==NULL) {
3008 /* Didn't find a match above... new set... */
3009 cset = (RF_ConfigSet_t *)
3010 malloc(sizeof(RF_ConfigSet_t),
3011 M_RAIDFRAME, M_NOWAIT);
3012 if (cset == NULL) {
3013 panic("rf_create_auto_sets: No memory!");
3014 }
3015 cset->ac = ac;
3016 ac->next = NULL;
3017 cset->next = config_sets;
3018 cset->rootable = 0;
3019 config_sets = cset;
3020 }
3021 }
3022 ac = ac_next;
3023 }
3024
3025
3026 return(config_sets);
3027 }
3028
3029 int
rf_does_it_fit(RF_ConfigSet_t * cset,RF_AutoConfig_t * ac)3030 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3031 {
3032 RF_ComponentLabel_t *clabel1, *clabel2;
3033
3034 /*
3035 * If this one matches the *first* one in the set, that's good
3036 * enough, since the other members of the set would have been
3037 * through here too...
3038 */
3039 /*
3040 * Note that we are not checking partitionSize here...
3041 *
3042 * Note that we are also not checking the mod_counters here.
3043 * If everything else matches except the mod_counter, that's
3044 * good enough for this test. We will deal with the mod_counters
3045 * a little later in the autoconfiguration process.
3046 *
3047 * (clabel1->mod_counter == clabel2->mod_counter) &&
3048 *
3049 * The reason we don't check for this is that failed disks
3050 * will have lower modification counts. If those disks are
3051 * not added to the set they used to belong to, then they will
3052 * form their own set, which may result in 2 different sets,
3053 * for example, competing to be configured at raid0, and
3054 * perhaps competing to be the root filesystem set. If the
3055 * wrong ones get configured, or both attempt to become /,
3056 * weird behaviour and or serious lossage will occur. Thus we
3057 * need to bring them into the fold here, and kick them out at
3058 * a later point.
3059 */
3060
3061 clabel1 = cset->ac->clabel;
3062 clabel2 = ac->clabel;
3063 if ((clabel1->version == clabel2->version) &&
3064 (clabel1->serial_number == clabel2->serial_number) &&
3065 (clabel1->num_rows == clabel2->num_rows) &&
3066 (clabel1->num_columns == clabel2->num_columns) &&
3067 (clabel1->sectPerSU == clabel2->sectPerSU) &&
3068 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3069 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3070 (clabel1->parityConfig == clabel2->parityConfig) &&
3071 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3072 (clabel1->blockSize == clabel2->blockSize) &&
3073 (clabel1->numBlocks == clabel2->numBlocks) &&
3074 (clabel1->autoconfigure == clabel2->autoconfigure) &&
3075 (clabel1->root_partition == clabel2->root_partition) &&
3076 (clabel1->last_unit == clabel2->last_unit) &&
3077 (clabel1->config_order == clabel2->config_order)) {
3078 /* If it get's here, it almost *has* to be a match. */
3079 } else {
3080 /* It's not consistent with somebody in the set... Punt. */
3081 return(0);
3082 }
3083 /* All was fine.. It must fit... */
3084 return(1);
3085 }
3086
3087 int
rf_have_enough_components(RF_ConfigSet_t * cset)3088 rf_have_enough_components(RF_ConfigSet_t *cset)
3089 {
3090 RF_AutoConfig_t *ac;
3091 RF_AutoConfig_t *auto_config;
3092 RF_ComponentLabel_t *clabel;
3093 int r,c;
3094 int num_rows;
3095 int num_cols;
3096 int num_missing;
3097 int mod_counter;
3098 int mod_counter_found;
3099 int even_pair_failed;
3100 char parity_type;
3101
3102
3103 /*
3104 * Check to see that we have enough 'live' components
3105 * of this set. If so, we can configure it if necessary.
3106 */
3107
3108 num_rows = cset->ac->clabel->num_rows;
3109 num_cols = cset->ac->clabel->num_columns;
3110 parity_type = cset->ac->clabel->parityConfig;
3111
3112 /* XXX Check for duplicate components !?!?!? */
3113
3114 /* Determine what the mod_counter is supposed to be for this set. */
3115
3116 mod_counter_found = 0;
3117 mod_counter = 0;
3118 ac = cset->ac;
3119 while(ac!=NULL) {
3120 if (mod_counter_found==0) {
3121 mod_counter = ac->clabel->mod_counter;
3122 mod_counter_found = 1;
3123 } else {
3124 if (ac->clabel->mod_counter > mod_counter) {
3125 mod_counter = ac->clabel->mod_counter;
3126 }
3127 }
3128 ac = ac->next;
3129 }
3130
3131 num_missing = 0;
3132 auto_config = cset->ac;
3133
3134 for(r=0; r<num_rows; r++) {
3135 even_pair_failed = 0;
3136 for(c=0; c<num_cols; c++) {
3137 ac = auto_config;
3138 while(ac!=NULL) {
3139 if ((ac->clabel->row == r) &&
3140 (ac->clabel->column == c) &&
3141 (ac->clabel->mod_counter == mod_counter)) {
3142 /* It's this one... */
3143 #ifdef RAIDDEBUG
3144 printf("Found: %s at %d,%d\n",
3145 ac->devname,r,c);
3146 #endif /* RAIDDEBUG */
3147 break;
3148 }
3149 ac=ac->next;
3150 }
3151 if (ac==NULL) {
3152 /* Didn't find one here! */
3153 /*
3154 * Special case for RAID 1, especially
3155 * where there are more than 2
3156 * components (where RAIDframe treats
3157 * things a little differently :( )
3158 */
3159 if (parity_type == '1') {
3160 if (c%2 == 0) { /* Even component. */
3161 even_pair_failed = 1;
3162 } else { /*
3163 * Odd component.
3164 * If we're failed,
3165 * and so is the even
3166 * component, it's
3167 * "Good Night, Charlie"
3168 */
3169 if (even_pair_failed == 1) {
3170 return(0);
3171 }
3172 }
3173 } else {
3174 /* Normal accounting. */
3175 num_missing++;
3176 }
3177 }
3178 if ((parity_type == '1') && (c%2 == 1)) {
3179 /*
3180 * Just did an even component, and we didn't
3181 * bail... Reset the even_pair_failed flag,
3182 * and go on to the next component...
3183 */
3184 even_pair_failed = 0;
3185 }
3186 }
3187 }
3188
3189 clabel = cset->ac->clabel;
3190
3191 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3192 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3193 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3194 /* XXX This needs to be made *much* more general. */
3195 /* Too many failures. */
3196 return(0);
3197 }
3198 /*
3199 * Otherwise, all is well, and we've got enough to take a kick
3200 * at autoconfiguring this set.
3201 */
3202 return(1);
3203 }
3204
3205 void
rf_create_configuration(RF_AutoConfig_t * ac,RF_Config_t * config,RF_Raid_t * raidPtr)3206 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3207 RF_Raid_t *raidPtr)
3208 {
3209 RF_ComponentLabel_t *clabel;
3210 int i;
3211
3212 clabel = ac->clabel;
3213
3214 /* 1. Fill in the common stuff. */
3215 config->numRow = clabel->num_rows;
3216 config->numCol = clabel->num_columns;
3217 config->numSpare = 0; /* XXX Should this be set here ? */
3218 config->sectPerSU = clabel->sectPerSU;
3219 config->SUsPerPU = clabel->SUsPerPU;
3220 config->SUsPerRU = clabel->SUsPerRU;
3221 config->parityConfig = clabel->parityConfig;
3222 /* XXX... */
3223 strlcpy(config->diskQueueType,"fifo", sizeof config->diskQueueType);
3224 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3225 config->layoutSpecificSize = 0; /* XXX ?? */
3226
3227 while(ac!=NULL) {
3228 /*
3229 * row/col values will be in range due to the checks
3230 * in reasonable_label().
3231 */
3232 strlcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3233 ac->devname,
3234 sizeof config->devnames[ac->clabel->row][ac->clabel->column]);
3235 ac = ac->next;
3236 }
3237
3238 for(i=0;i<RF_MAXDBGV;i++) {
3239 config->debugVars[i][0] = 0;
3240 }
3241
3242 #ifdef RAID_DEBUG_ALL
3243
3244 #ifdef RF_DBG_OPTION
3245 #undef RF_DBG_OPTION
3246 #endif /* RF_DBG_OPTION */
3247
3248 #ifdef __STDC__
3249 #define RF_DBG_OPTION(_option_,_val_) do { \
3250 snprintf(&(config->debugVars[i++][0]), 50, "%s %ld", \
3251 #_option_, _val_); \
3252 } while (0)
3253 #else /* __STDC__ */
3254 #define RF_DBG_OPTION(_option_,_val_) do { \
3255 snprintf(&(config->debugVars[i++][0]), 50, "%s %ld", \
3256 "/**/_option_/**/", _val_); \
3257 } while (0)
3258 #endif /* __STDC__ */
3259
3260 i = 0;
3261
3262 /* RF_DBG_OPTION(accessDebug, 0); */
3263 /* RF_DBG_OPTION(accessTraceBufSize, 0); */
3264 RF_DBG_OPTION(cscanDebug, 1); /* Debug CSCAN sorting. */
3265 RF_DBG_OPTION(dagDebug, 1);
3266 /* RF_DBG_OPTION(debugPrintUseBuffer, 0); */
3267 RF_DBG_OPTION(degDagDebug, 1);
3268 RF_DBG_OPTION(disableAsyncAccs, 1);
3269 RF_DBG_OPTION(diskDebug, 1);
3270 RF_DBG_OPTION(enableAtomicRMW, 0);
3271 /*
3272 * This debug variable enables locking of the
3273 * disk arm during small-write operations.
3274 * Setting this variable to anything other than
3275 * 0 will result in deadlock. (wvcii)
3276 */
3277 RF_DBG_OPTION(engineDebug, 1);
3278 RF_DBG_OPTION(fifoDebug, 1); /* Debug fifo queueing. */
3279 /* RF_DBG_OPTION(floatingRbufDebug, 1); */
3280 /* RF_DBG_OPTION(forceHeadSepLimit, -1); */
3281 /* RF_DBG_OPTION(forceNumFloatingReconBufs, -1); */
3282 /*
3283 * Wire down the number of extra recon buffers
3284 * to use.
3285 */
3286 /* RF_DBG_OPTION(keepAccTotals, 1); */
3287 /* Turn on keep_acc_totals. */
3288 RF_DBG_OPTION(lockTableSize, RF_DEFAULT_LOCK_TABLE_SIZE);
3289 RF_DBG_OPTION(mapDebug, 1);
3290 RF_DBG_OPTION(maxNumTraces, -1);
3291
3292 /* RF_DBG_OPTION(memChunkDebug, 1); */
3293 /* RF_DBG_OPTION(memDebug, 1); */
3294 /* RF_DBG_OPTION(memDebugAddress, 1); */
3295 /* RF_DBG_OPTION(numBufsToAccumulate, 1); */
3296 /*
3297 * Number of buffers to accumulate before
3298 * doing XOR.
3299 */
3300 RF_DBG_OPTION(prReconSched, 0);
3301 RF_DBG_OPTION(printDAGsDebug, 1);
3302 RF_DBG_OPTION(printStatesDebug, 1);
3303 RF_DBG_OPTION(protectedSectors, 64L);
3304 /*
3305 * Number of sectors at start of disk to exclude
3306 * from RAID address space.
3307 */
3308 RF_DBG_OPTION(pssDebug, 1);
3309 RF_DBG_OPTION(queueDebug, 1);
3310 RF_DBG_OPTION(quiesceDebug, 1);
3311 RF_DBG_OPTION(raidSectorOffset, 0);
3312 /*
3313 * Value added to all incoming sectors to debug
3314 * alignment problems.
3315 */
3316 RF_DBG_OPTION(reconDebug, 1);
3317 RF_DBG_OPTION(reconbufferDebug, 1);
3318 RF_DBG_OPTION(scanDebug, 1); /* Debug SCAN sorting. */
3319 RF_DBG_OPTION(showXorCallCounts, 0);
3320 /* Show n-way Xor call counts. */
3321 RF_DBG_OPTION(shutdownDebug, 1); /* Show shutdown calls. */
3322 RF_DBG_OPTION(sizePercentage, 100);
3323 RF_DBG_OPTION(sstfDebug, 1);
3324 /* Turn on debugging info for sstf queueing. */
3325 RF_DBG_OPTION(stripeLockDebug, 1);
3326 RF_DBG_OPTION(suppressLocksAndLargeWrites, 0);
3327 RF_DBG_OPTION(suppressTraceDelays, 0);
3328 RF_DBG_OPTION(useMemChunks, 1);
3329 RF_DBG_OPTION(validateDAGDebug, 1);
3330 RF_DBG_OPTION(validateVisitedDebug, 1);
3331 /* XXX turn to zero by default ? */
3332 RF_DBG_OPTION(verifyParityDebug, 1);
3333 RF_DBG_OPTION(debugKernelAccess, 1);
3334 /* DoAccessKernel debugging. */
3335
3336 #if RF_INCLUDE_PARITYLOGGING > 0
3337 RF_DBG_OPTION(forceParityLogReint, 0);
3338 RF_DBG_OPTION(numParityRegions, 0);
3339 /* Number of regions in the array. */
3340 RF_DBG_OPTION(numReintegrationThreads, 1);
3341 RF_DBG_OPTION(parityLogDebug, 1);
3342 /* If nonzero, enables debugging of parity logging. */
3343 RF_DBG_OPTION(totalInCoreLogCapacity, 1024 * 1024);
3344 /* Target bytes available for in-core logs. */
3345 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
3346
3347 #endif /* RAID_DEBUG_ALL */
3348 }
3349
3350 int
rf_set_autoconfig(RF_Raid_t * raidPtr,int new_value)3351 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3352 {
3353 RF_ComponentLabel_t clabel;
3354 struct vnode *vp;
3355 dev_t dev;
3356 int row, column;
3357
3358 raidPtr->autoconfigure = new_value;
3359 for(row=0; row<raidPtr->numRow; row++) {
3360 for(column=0; column<raidPtr->numCol; column++) {
3361 if (raidPtr->Disks[row][column].status ==
3362 rf_ds_optimal) {
3363 dev = raidPtr->Disks[row][column].dev;
3364 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3365 raidread_component_label(dev, vp, &clabel);
3366 clabel.autoconfigure = new_value;
3367 raidwrite_component_label(dev, vp, &clabel);
3368 }
3369 }
3370 }
3371 return(new_value);
3372 }
3373
3374 int
rf_set_rootpartition(RF_Raid_t * raidPtr,int new_value)3375 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3376 {
3377 RF_ComponentLabel_t clabel;
3378 struct vnode *vp;
3379 dev_t dev;
3380 int row, column;
3381
3382 raidPtr->root_partition = new_value;
3383 for(row=0; row<raidPtr->numRow; row++) {
3384 for(column=0; column<raidPtr->numCol; column++) {
3385 if (raidPtr->Disks[row][column].status ==
3386 rf_ds_optimal) {
3387 dev = raidPtr->Disks[row][column].dev;
3388 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3389 raidread_component_label(dev, vp, &clabel);
3390 clabel.root_partition = new_value;
3391 raidwrite_component_label(dev, vp, &clabel);
3392 }
3393 }
3394 }
3395 return(new_value);
3396 }
3397
3398 void
rf_release_all_vps(RF_ConfigSet_t * cset)3399 rf_release_all_vps(RF_ConfigSet_t *cset)
3400 {
3401 RF_AutoConfig_t *ac;
3402
3403 ac = cset->ac;
3404 while(ac!=NULL) {
3405 /* Close the vp, and give it back. */
3406 if (ac->vp) {
3407 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3408 vrele(ac->vp);
3409 ac->vp = NULL;
3410 }
3411 ac = ac->next;
3412 }
3413 }
3414
3415
3416 void
rf_cleanup_config_set(RF_ConfigSet_t * cset)3417 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3418 {
3419 RF_AutoConfig_t *ac;
3420 RF_AutoConfig_t *next_ac;
3421
3422 ac = cset->ac;
3423 while(ac!=NULL) {
3424 next_ac = ac->next;
3425 /* Nuke the label. */
3426 free(ac->clabel, M_RAIDFRAME);
3427 /* Cleanup the config structure. */
3428 free(ac, M_RAIDFRAME);
3429 /* "next..." */
3430 ac = next_ac;
3431 }
3432 /* And, finally, nuke the config set. */
3433 free(cset, M_RAIDFRAME);
3434 }
3435
3436
3437 void
raid_init_component_label(RF_Raid_t * raidPtr,RF_ComponentLabel_t * clabel)3438 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3439 {
3440 /* Current version number. */
3441 clabel->version = RF_COMPONENT_LABEL_VERSION;
3442 clabel->serial_number = raidPtr->serial_number;
3443 clabel->mod_counter = raidPtr->mod_counter;
3444 clabel->num_rows = raidPtr->numRow;
3445 clabel->num_columns = raidPtr->numCol;
3446 clabel->clean = RF_RAID_DIRTY; /* Not clean. */
3447 clabel->status = rf_ds_optimal; /* "It's good !" */
3448
3449 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3450 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3451 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3452
3453 clabel->blockSize = raidPtr->bytesPerSector;
3454 clabel->numBlocks = raidPtr->sectorsPerDisk;
3455
3456 /* XXX Not portable. */
3457 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3458 clabel->maxOutstanding = raidPtr->maxOutstanding;
3459 clabel->autoconfigure = raidPtr->autoconfigure;
3460 clabel->root_partition = raidPtr->root_partition;
3461 clabel->last_unit = raidPtr->raidid;
3462 clabel->config_order = raidPtr->config_order;
3463 }
3464
3465 int
rf_auto_config_set(RF_ConfigSet_t * cset,int * unit)3466 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3467 {
3468 RF_Raid_t *raidPtr;
3469 RF_Config_t *config;
3470 int raidID;
3471 int retcode;
3472
3473 db1_printf(("RAID autoconfigure\n"));
3474
3475 retcode = 0;
3476 *unit = -1;
3477
3478 /* 1. Create a config structure. */
3479
3480 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), M_RAIDFRAME,
3481 M_NOWAIT);
3482 if (config==NULL) {
3483 printf("Out of mem!?!?\n");
3484 /* XXX Do something more intelligent here. */
3485 return(1);
3486 }
3487
3488 memset(config, 0, sizeof(RF_Config_t));
3489
3490 /* XXX raidID needs to be set correctly... */
3491
3492 /*
3493 * 2. Figure out what RAID ID this one is supposed to live at.
3494 * See if we can get the same RAID dev that it was configured
3495 * on last time...
3496 */
3497
3498 raidID = cset->ac->clabel->last_unit;
3499 if ((raidID < 0) || (raidID >= numraid)) {
3500 /* Let's not wander off into lala land. */
3501 raidID = numraid - 1;
3502 }
3503 if (raidPtrs[raidID]->valid != 0) {
3504
3505 /*
3506 * Nope... Go looking for an alternative...
3507 * Start high so we don't immediately use raid0 if that's
3508 * not taken.
3509 */
3510
3511 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3512 if (raidPtrs[raidID]->valid == 0) {
3513 /* We can use this one ! */
3514 break;
3515 }
3516 }
3517 }
3518
3519 if (raidID < 0) {
3520 /* Punt... */
3521 printf("Unable to auto configure this set!\n");
3522 printf("(Out of RAID devs!)\n");
3523 return(1);
3524 }
3525 raidPtr = raidPtrs[raidID];
3526
3527 /* XXX All this stuff should be done SOMEWHERE ELSE ! */
3528 raidPtr->raidid = raidID;
3529 raidPtr->openings = RAIDOUTSTANDING;
3530
3531 /* 3. Build the configuration structure. */
3532 rf_create_configuration(cset->ac, config, raidPtr);
3533
3534 /* 4. Do the configuration. */
3535 retcode = rf_Configure(raidPtr, config, cset->ac);
3536
3537 if (retcode == 0) {
3538
3539 raidinit(raidPtrs[raidID]);
3540
3541 rf_markalldirty(raidPtrs[raidID]);
3542 raidPtrs[raidID]->autoconfigure = 1; /* XXX Do this here ? */
3543 if (cset->ac->clabel->root_partition==1) {
3544 /*
3545 * Everything configured just fine. Make a note
3546 * that this set is eligible to be root.
3547 */
3548 cset->rootable = 1;
3549 /* XXX Do this here ? */
3550 raidPtrs[raidID]->root_partition = 1;
3551 }
3552 }
3553
3554 printf(": (%s) total number of sectors is %lu (%lu MB)%s\n",
3555 (raidPtrs[raidID]->Layout).map->configName,
3556 (unsigned long) raidPtrs[raidID]->totalSectors,
3557 (unsigned long) (raidPtrs[raidID]->totalSectors / 1024 *
3558 (1 << raidPtrs[raidID]->logBytesPerSector) / 1024),
3559 raidPtrs[raidID]->root_partition ? " as root" : "");
3560
3561 /* 5. Cleanup. */
3562 free(config, M_RAIDFRAME);
3563
3564 *unit = raidID;
3565 return(retcode);
3566 }
3567
3568 void
rf_disk_unbusy(RF_RaidAccessDesc_t * desc)3569 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3570 {
3571 struct buf *bp;
3572
3573 bp = (struct buf *)desc->bp;
3574 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3575 (bp->b_bcount - bp->b_resid),
3576 (bp->b_flags & B_READ));
3577 }
3578