1 /* $MirOS: src/sys/dev/raidframe/rf_openbsdkintf.c,v 1.5 2006/08/09 19:45:41 tg Exp $ */
2 /* $OpenBSD: rf_openbsdkintf.c,v 1.31 2005/12/08 05:53:45 tedu Exp $	*/
3 /* $NetBSD: rf_netbsdkintf.c,v 1.109 2001/07/27 03:30:07 oster Exp $	*/
4 
5 /*-
6  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to The NetBSD Foundation
10  * by Greg Oster; Jason R. Thorpe.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	  This product includes software developed by the NetBSD
23  *	  Foundation, Inc. and its contributors.
24  * 4. Neither the name of The NetBSD Foundation nor the names of its
25  *    contributors may be used to endorse or promote products derived
26  *    from this software without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38  * POSSIBILITY OF SUCH DAMAGE.
39  */
40 
41 /*
42  * Copyright (c) 1988 University of Utah.
43  * Copyright (c) 1990, 1993
44  *	The Regents of the University of California.  All rights reserved.
45  *
46  * This code is derived from software contributed to Berkeley by
47  * the Systems Programming Group of the University of Utah Computer
48  * Science Department.
49  *
50  * Redistribution and use in source and binary forms, with or without
51  * modification, are permitted provided that the following conditions
52  * are met:
53  * 1. Redistributions of source code must retain the above copyright
54  *    notice, this list of conditions and the following disclaimer.
55  * 2. Redistributions in binary form must reproduce the above copyright
56  *    notice, this list of conditions and the following disclaimer in the
57  *    documentation and/or other materials provided with the distribution.
58  * 3. Neither the name of the University nor the names of its contributors
59  *    may be used to endorse or promote products derived from this software
60  *    without specific prior written permission.
61  *
62  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72  * SUCH DAMAGE.
73  *
74  * from: Utah $Hdr: cd.c 1.6 90/11/28$
75  *
76  *	@(#)cd.c	8.2 (Berkeley) 11/16/93
77  */
78 
79 /*
80  * Copyright (c) 1995 Carnegie-Mellon University.
81  * All rights reserved.
82  *
83  * Authors: Mark Holland, Jim Zelenka
84  *
85  * Permission to use, copy, modify and distribute this software and
86  * its documentation is hereby granted, provided that both the copyright
87  * notice and this permission notice appear in all copies of the
88  * software, derivative works or modified versions, and any portions
89  * thereof, and that both notices appear in supporting documentation.
90  *
91  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
92  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
93  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
94  *
95  * Carnegie Mellon requests users of this software to return to
96  *
97  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
98  *  School of Computer Science
99  *  Carnegie Mellon University
100  *  Pittsburgh PA 15213-3890
101  *
102  * any improvements or extensions that they make and grant Carnegie the
103  * rights to redistribute these changes.
104  */
105 
106 /*****************************************************************************
107  *
108  * rf_kintf.c -- The kernel interface routines for RAIDframe.
109  *
110  *****************************************************************************/
111 
112 #include <sys/errno.h>
113 
114 #include <sys/param.h>
115 #include <sys/pool.h>
116 #include <sys/malloc.h>
117 #include <sys/queue.h>
118 #include <sys/disk.h>
119 #include <sys/device.h>
120 #include <sys/stat.h>
121 #include <sys/ioctl.h>
122 #include <sys/fcntl.h>
123 #include <sys/systm.h>
124 #include <sys/namei.h>
125 #include <sys/conf.h>
126 #include <sys/lock.h>
127 #include <sys/buf.h>
128 #include <sys/user.h>
129 #include <sys/reboot.h>
130 
131 #include "raid.h"
132 #include "rf_raid.h"
133 #include "rf_raidframe.h"
134 #include "rf_copyback.h"
135 #include "rf_dag.h"
136 #include "rf_dagflags.h"
137 #include "rf_desc.h"
138 #include "rf_diskqueue.h"
139 #include "rf_engine.h"
140 #include "rf_acctrace.h"
141 #include "rf_etimer.h"
142 #include "rf_general.h"
143 #include "rf_debugMem.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_debugprint.h"
149 #include "rf_threadstuff.h"
150 #include "rf_configure.h"
151 
152 int	rf_kdebug_level = 0;
153 
154 #ifdef	RAIDDEBUG
155 #define	db1_printf(a) do { if (rf_kdebug_level > 0) printf a; } while(0)
156 #else	/* RAIDDEBUG */
157 #define	db1_printf(a) (void)0
158 #endif	/* ! RAIDDEBUG */
159 
160 static RF_Raid_t **raidPtrs;	/* Global raid device descriptors. */
161 
162 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex);
163 
164 /* Requests to install a spare table. */
165 static RF_SparetWait_t *rf_sparet_wait_queue;
166 
167 /* Responses from installation process. */
168 static RF_SparetWait_t *rf_sparet_resp_queue;
169 
170 /* Prototypes. */
171 void rf_KernelWakeupFunc(struct buf *);
172 void rf_InitBP(struct buf *, struct vnode *, unsigned, dev_t, RF_SectorNum_t,
173     RF_SectorCount_t, caddr_t, void (*)(struct buf *), void *, int,
174     struct proc *);
175 void raidinit(RF_Raid_t *);
176 
177 void raidattach(int);
178 int  raidsize(dev_t);
179 int  raidopen(dev_t, int, int, struct proc *);
180 int  raidclose(dev_t, int, int, struct proc *);
181 int  raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
182 int  raidwrite(dev_t, struct uio *, int);
183 int  raidread(dev_t, struct uio *, int);
184 void raidstrategy(struct buf *);
185 int  raiddump(dev_t, daddr_t, caddr_t, size_t);
186 
187 /*
188  * Pilfered from ccd.c
189  */
190 struct raidbuf {
191 	struct buf	 rf_buf;	/* New I/O buf.	 MUST BE FIRST!!! */
192 	struct buf	*rf_obp;	/* Ptr. to original I/O buf. */
193 	int		 rf_flags;	/* Miscellaneous flags. */
194 	RF_DiskQueueData_t *req;	/* The request that this was part of. */
195 };
196 
197 #define	RAIDGETBUF(rs)		pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
198 #define	RAIDPUTBUF(rs, cbp)	pool_put(&(rs)->sc_cbufpool, cbp)
199 
200 /*
201  * Some port (like i386) use a swapgeneric that wants to snoop around
202  * in this raid_cd structure.  It is preserved (for now) to remain
203  * compatible with such practice.
204  */
205 struct cfdriver raid_cd = {
206 	NULL, "raid", DV_DISK
207 };
208 
209 /*
210  * XXX Not sure if the following should be replacing the raidPtrs above,
211  * or if it should be used in conjunction with that...
212  */
213 struct raid_softc {
214 	int		sc_flags;		/* Flags. */
215 	int		sc_cflags;		/* Configuration flags. */
216 	size_t		sc_size;		/* Size of the raid device. */
217 	char		sc_xname[20];		/* XXX external name. */
218 	struct disk	sc_dkdev;		/* Generic disk device info. */
219 	struct pool	sc_cbufpool;		/* Component buffer pool. */
220 	struct buf	sc_q;			/* Used for the device queue. */
221 };
222 
223 /* sc_flags */
224 #define	RAIDF_INITED	0x01	/* Unit has been initialized. */
225 #define	RAIDF_WLABEL	0x02	/* Label area is writable. */
226 #define	RAIDF_LABELLING	0x04	/* Unit is currently being labelled. */
227 #define	RAIDF_WANTED	0x40	/* Someone is waiting to obtain a lock. */
228 #define	RAIDF_LOCKED	0x80	/* Unit is locked. */
229 
230 #define	raidunit(x)	DISKUNIT(x)
231 int numraid = 0;
232 
233 /*
234  * Here we define a cfattach structure for inserting any new raid device
235  * into the device tree.  This is needed by some archs that look for
236  * bootable devices in there.
237  */
238 int  rf_probe(struct device *, void *, void *);
239 void rf_attach(struct device *, struct device *, void *);
240 int  rf_detach(struct device *, int);
241 int  rf_activate(struct device *, enum devact);
242 void rf_zeroref(struct device *);
243 
244 struct cfattach raid_ca = {
245 	sizeof(struct raid_softc), rf_probe, rf_attach,
246 	rf_detach, rf_activate, rf_zeroref
247 };
248 
249 /*
250  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
251  * Be aware that large numbers can allow the driver to consume a lot of
252  * kernel memory, especially on writes, and in degraded mode reads.
253  *
254  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
255  * a single 64K write will typically require 64K for the old data,
256  * 64K for the old parity, and 64K for the new parity, for a total
257  * of 192K (if the parity buffer is not re-used immediately).
258  * Even it if is used immedately, that's still 128K, which when multiplied
259  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
260  *
261  * Now in degraded mode, for example, a 64K read on the above setup may
262  * require data reconstruction, which will require *all* of the 4 remaining
263  * disks to participate -- 4 * 32K/disk == 128K again.
264  */
265 
266 #ifndef	RAIDOUTSTANDING
267 #define	RAIDOUTSTANDING		6
268 #endif
269 
270 #define	RAIDLABELDEV(dev)						\
271 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
272 
273 /* Declared here, and made public, for the benefit of KVM stuff... */
274 struct raid_softc  *raid_softc;
275 struct raid_softc **raid_scPtrs;
276 
277 void rf_shutdown_hook(RF_ThreadArg_t);
278 void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, struct disklabel *);
279 void raidgetdisklabel(dev_t);
280 void raidmakedisklabel(struct raid_softc *);
281 
282 int  raidlock(struct raid_softc *);
283 void raidunlock(struct raid_softc *);
284 
285 void rf_markalldirty(RF_Raid_t *);
286 
287 struct device *raidrootdev;
288 
289 int  findblkmajor(struct device *dv);
290 char *findblkname(int);
291 
292 void rf_ReconThread(struct rf_recon_req *);
293 /* XXX what I want is: */
294 /*void rf_ReconThread(RF_Raid_t *raidPtr);*/
295 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
296 void rf_CopybackThread(RF_Raid_t *raidPtr);
297 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
298 #ifdef	RAID_AUTOCONFIG
299 void rf_buildroothack(void *);
300 int  rf_reasonable_label(RF_ComponentLabel_t *);
301 #endif	/* RAID_AUTOCONFIG */
302 
303 RF_AutoConfig_t *rf_find_raid_components(void);
304 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
305 int  rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
306 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *,
307 				  RF_Raid_t *);
308 int  rf_set_autoconfig(RF_Raid_t *, int);
309 int  rf_set_rootpartition(RF_Raid_t *, int);
310 void rf_release_all_vps(RF_ConfigSet_t *);
311 void rf_cleanup_config_set(RF_ConfigSet_t *);
312 int  rf_have_enough_components(RF_ConfigSet_t *);
313 int  rf_auto_config_set(RF_ConfigSet_t *, int *);
314 
315 #ifdef	RAID_AUTOCONFIG
316 static int raidautoconfig = 0;	/*
317 				 * Debugging, mostly.  Set to 0 to not
318 				 * allow autoconfig to take place.
319 				 * Note that this is overridden by having
320 				 * RAID_AUTOCONFIG as an option in the
321 				 * kernel config file.
322 				 */
323 #endif	/* RAID_AUTOCONFIG */
324 
325 int
rf_probe(struct device * parent,void * match_,void * aux)326 rf_probe(struct device *parent, void *match_, void *aux)
327 {
328 	return 0;
329 }
330 
331 void
rf_attach(struct device * parent,struct device * self,void * aux)332 rf_attach(struct device *parent, struct device *self, void *aux)
333 {
334 	/*struct raid_softc *raid = (void *)self;*/
335 }
336 
337 int
rf_detach(struct device * self,int flags)338 rf_detach(struct device *self, int flags)
339 {
340 	return 0;
341 }
342 
343 int
rf_activate(struct device * self,enum devact act)344 rf_activate(struct device *self, enum devact act)
345 {
346 	return 0;
347 }
348 
349 void
rf_zeroref(struct device * self)350 rf_zeroref(struct device *self)
351 {
352 }
353 
354 void
raidattach(int num)355 raidattach(int num)
356 {
357 	int raidID;
358 	int i, rc;
359 #ifdef	RAID_AUTOCONFIG
360 	RF_AutoConfig_t *ac_list;	/* Autoconfig list. */
361 	RF_ConfigSet_t *config_sets;
362 #endif	/* RAID_AUTOCONFIG */
363 
364 	db1_printf(("raidattach: Asked for %d units\n", num));
365 
366 	if (num <= 0) {
367 #ifdef	DIAGNOSTIC
368 		panic("raidattach: count <= 0");
369 #endif	/* DIAGNOSTIC */
370 		return;
371 	}
372 
373 	/* This is where all the initialization stuff gets done. */
374 
375 	numraid = num;
376 
377 	/* Make some space for requested number of units... */
378 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
379 	if (raidPtrs == NULL) {
380 		panic("raidPtrs is NULL!!");
381 	}
382 
383 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
384 	if (rc) {
385 		RF_PANIC();
386 	}
387 
388 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
389 
390 	for (i = 0; i < num; i++)
391 		raidPtrs[i] = NULL;
392 	rc = rf_BootRaidframe();
393 	if (rc == 0)
394 		printf("Kernelised RAIDframe activated\n");
395 	else
396 	        panic("Serious error booting RAID !!!");
397 
398 	/*
399 	 * Put together some datastructures like the CCD device does...
400 	 * This lets us lock the device and what-not when it gets opened.
401 	 */
402 
403 	raid_softc = (struct raid_softc *)
404 		malloc(num * sizeof(struct raid_softc), M_RAIDFRAME, M_NOWAIT);
405 	if (raid_softc == NULL) {
406 		printf("WARNING: no memory for RAIDframe driver\n");
407 		return;
408 	}
409 
410 	bzero(raid_softc, num * sizeof (struct raid_softc));
411 
412 	raid_scPtrs = (struct raid_softc **)
413 		malloc(num * sizeof(struct raid_softc *), M_RAIDFRAME,
414 		    M_NOWAIT);
415 	if (raid_scPtrs == NULL) {
416 		printf("WARNING: no memory for RAIDframe driver\n");
417 		return;
418 	}
419 
420 	bzero(raid_scPtrs, num * sizeof (struct raid_softc *));
421 
422 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
423 	    M_RAIDFRAME, M_NOWAIT);
424 	if (raidrootdev == NULL) {
425 		panic("No memory for RAIDframe driver!!?!?!");
426 	}
427 
428 	for (raidID = 0; raidID < num; raidID++) {
429 #if 0
430 		SIMPLEQ_INIT(&raid_softc[raidID].sc_q);
431 #endif
432 
433 		raidrootdev[raidID].dv_class  = DV_DISK;
434 		raidrootdev[raidID].dv_cfdata = NULL;
435 		raidrootdev[raidID].dv_unit   = raidID;
436 		raidrootdev[raidID].dv_parent = NULL;
437 		raidrootdev[raidID].dv_flags  = 0;
438 		snprintf(raidrootdev[raidID].dv_xname,
439 		    sizeof raidrootdev[raidID].dv_xname,"raid%d",raidID);
440 
441 		RF_Calloc(raidPtrs[raidID], 1, sizeof (RF_Raid_t),
442 		    (RF_Raid_t *));
443 		if (raidPtrs[raidID] == NULL) {
444 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
445 			numraid = raidID;
446 			return;
447 		}
448 	}
449 
450 	raid_cd.cd_devs = (void **) raid_scPtrs;
451 	raid_cd.cd_ndevs = num;
452 
453 #ifdef	RAID_AUTOCONFIG
454 	raidautoconfig = 1;
455 
456 	if (raidautoconfig) {
457 		/* 1. Locate all RAID components on the system. */
458 
459 #ifdef	RAIDDEBUG
460 		printf("Searching for raid components...\n");
461 #endif	/* RAIDDEBUG */
462 		ac_list = rf_find_raid_components();
463 
464 		/* 2. Sort them into their respective sets. */
465 
466 		config_sets = rf_create_auto_sets(ac_list);
467 
468 		/*
469 		 * 3. Evaluate each set and configure the valid ones
470 		 * This gets done in rf_buildroothack().
471 		 */
472 
473 		/*
474 		 * Schedule the creation of the thread to do the
475 		 * "/ on RAID" stuff.
476 		 */
477 
478 		rf_buildroothack(config_sets);
479 
480 	}
481 #endif	/* RAID_AUTOCONFIG */
482 
483 }
484 
485 #ifdef	RAID_AUTOCONFIG
486 void
rf_buildroothack(void * arg)487 rf_buildroothack(void *arg)
488 {
489 	extern int rootdev_override;
490 	RF_ConfigSet_t *config_sets = arg;
491 	RF_ConfigSet_t *cset;
492 	RF_ConfigSet_t *next_cset;
493 	int retcode;
494 	int raidID;
495 	int rootID;
496 	int num_root;
497 	int majdev;
498 
499 	rootID = 0;
500 	num_root = 0;
501 	cset = config_sets;
502 	while(cset != NULL ) {
503 		next_cset = cset->next;
504 		if (rf_have_enough_components(cset) &&
505 		    cset->ac->clabel->autoconfigure==1) {
506 			retcode = rf_auto_config_set(cset,&raidID);
507 			if (!retcode) {
508 				if (cset->rootable) {
509 					rootID = raidID;
510 #ifdef	RAIDDEBUG
511 					printf("eligible root device %d:"
512 					    " raid%d\n", num_root, rootID);
513 #endif	/* RAIDDEBUG */
514 					num_root++;
515 				}
516 			} else {
517 				/* The autoconfig didn't work :( */
518 #ifdef	RAIDDEBUG
519 				printf("Autoconfig failed with code %d for"
520 				    " raid%d\n", retcode, raidID);
521 #endif	/* RAIDDEBUG */
522 				rf_release_all_vps(cset);
523 			}
524 		} else {
525 			/*
526 			 * We're not autoconfiguring this set...
527 			 * Release the associated resources.
528 			 */
529 			rf_release_all_vps(cset);
530 		}
531 		/* Cleanup. */
532 		rf_cleanup_config_set(cset);
533 		cset = next_cset;
534 	}
535 	if (boothowto & RB_ASKNAME) {
536 		/* We don't auto-config... */
537 	} else {
538 		/* They didn't ask, and we found something bootable... */
539 
540 		if (num_root == 1) {
541 			majdev = findblkmajor(&raidrootdev[rootID]);
542 			if (majdev < 0)
543 				boothowto |= RB_ASKNAME;
544 			else if ((rootdev == NODEV) /* config bsd generic */
545 			    || (major(rootdev) == majdev) /* root on raid */
546 			    ) {
547 				extern char root_devname[];
548 
549 				rootdev = MAKEDISKDEV(majdev,rootID,0);
550 				boothowto |= RB_DFLTROOT;
551 				snprintf(root_devname, 16, "raid%da", rootID);
552 			} else if (rootdev_override) {
553 				printf("raidframe: eligible root device raid%da ignored (rootdev set via UKC)\n", rootID);
554 			} else {
555 				/* Found a RAID, but e.g. RAMDISK kernel */
556 				printf("raidframe: Found eligible root device, but this is not a generic kernel.\nraidframe: Please choose a root device.\nPossible answer: rd0a (if you booted a ramdisk)\n");
557 				boothowto |= RB_ASKNAME;
558 			}
559 		} else if ((num_root > 1) && !rootdev_override) {
560 			/* We can't guess... Require the user to answer... */
561 			printf("raidframe: Found more than one eligible root device.\nraidframe: Please choose a root device.\nPossible answers: [rsw]d0a raid[0-9]a\n");
562 			boothowto |= RB_ASKNAME;
563 		}
564 	}
565 }
566 #endif	/* RAID_AUTOCONFIG */
567 
568 void
rf_shutdown_hook(RF_ThreadArg_t arg)569 rf_shutdown_hook(RF_ThreadArg_t arg)
570 {
571 	int unit;
572 	struct raid_softc *rs;
573 	RF_Raid_t *raidPtr;
574 
575 	/* Don't do it if we are not "safe". */
576 	if (boothowto & RB_NOSYNC)
577 		return;
578 
579 	raidPtr = (RF_Raid_t *) arg;
580 	unit = raidPtr->raidid;
581 	rs = &raid_softc[unit];
582 
583 	/* Shutdown the system. */
584 
585 	if (rf_hook_cookies != NULL && rf_hook_cookies[unit] != NULL)
586 		rf_hook_cookies[unit] = NULL;
587 
588 	rf_Shutdown(raidPtr);
589 
590 	pool_destroy(&rs->sc_cbufpool);
591 
592 	/* It's no longer initialised... */
593 	rs->sc_flags &= ~RAIDF_INITED;
594 
595 	/* config_detach the device. */
596 	config_detach(device_lookup(&raid_cd, unit), 0);
597 
598 	/* Detach the disk. */
599 	disk_detach(&rs->sc_dkdev);
600 }
601 
602 int
raidsize(dev_t dev)603 raidsize(dev_t dev)
604 {
605 	struct raid_softc *rs;
606 	struct disklabel *lp;
607 	int part, unit, omask, size;
608 
609 	unit = raidunit(dev);
610 	if (unit >= numraid)
611 		return (-1);
612 	rs = &raid_softc[unit];
613 
614 	if ((rs->sc_flags & RAIDF_INITED) == 0)
615 		return (-1);
616 
617 	part = DISKPART(dev);
618 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
619 	lp = rs->sc_dkdev.dk_label;
620 
621 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
622 		return (-1);
623 
624 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
625 		size = -1;
626 	else
627 		size = lp->d_partitions[part].p_size *
628 		    (lp->d_secsize / DEV_BSIZE);
629 
630 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
631 		return (-1);
632 
633 	return (size);
634 
635 }
636 
637 int
raiddump(dev_t dev,daddr_t blkno,caddr_t va,size_t size)638 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
639 {
640 	/* Not implemented. */
641 	return (ENXIO);
642 }
643 
644 /* ARGSUSED */
645 int
raidopen(dev_t dev,int flags,int fmt,struct proc * p)646 raidopen(dev_t dev, int flags, int fmt, struct proc *p)
647 {
648 	int unit = raidunit(dev);
649 	struct raid_softc *rs;
650 	struct disklabel *lp;
651 	int part,pmask;
652 	int error = 0;
653 
654 	if (unit >= numraid)
655 		return (ENXIO);
656 	rs = &raid_softc[unit];
657 
658 	if ((error = raidlock(rs)) != 0)
659 		return (error);
660 	lp = rs->sc_dkdev.dk_label;
661 
662 	part = DISKPART(dev);
663 	pmask = (1 << part);
664 
665 	db1_printf(
666 	    ("Opening raid device number: %d partition: %d\n", unit, part));
667 
668 
669 	if ((rs->sc_flags & RAIDF_INITED) && (rs->sc_dkdev.dk_openmask == 0))
670 		raidgetdisklabel(dev);
671 
672 	/* Make sure that this partition exists. */
673 
674 	if (part != RAW_PART) {
675 		db1_printf(("Not a raw partition..\n"));
676 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
677 		    ((part >= lp->d_npartitions) ||
678 		    (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
679 			error = ENXIO;
680 			raidunlock(rs);
681 			db1_printf(("Bailing out...\n"));
682 			return (error);
683 		}
684 	}
685 
686 	/* Prevent this unit from being unconfigured while opened. */
687 	switch (fmt) {
688 	case S_IFCHR:
689 		rs->sc_dkdev.dk_copenmask |= pmask;
690 		break;
691 
692 	case S_IFBLK:
693 		rs->sc_dkdev.dk_bopenmask |= pmask;
694 		break;
695 	}
696 
697 	if ((rs->sc_dkdev.dk_openmask == 0) &&
698 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
699 		/*
700 		 * First one...  Mark things as dirty...  Note that we *MUST*
701 		 * have done a configure before this.  I DO NOT WANT TO BE
702 		 * SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
703 		 * THAT THEY BELONG TOGETHER!!!!!
704 		 */
705 		/*
706 		 * XXX should check to see if we're only open for reading
707 		 * here...  If so, we needn't do this, but then need some
708 		 * other way of keeping track of what's happened...
709 		 */
710 
711 		rf_markalldirty( raidPtrs[unit] );
712 	}
713 
714 	rs->sc_dkdev.dk_openmask =
715 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
716 
717 	raidunlock(rs);
718 
719 	return (error);
720 }
721 
722 /* ARGSUSED */
723 int
raidclose(dev_t dev,int flags,int fmt,struct proc * p)724 raidclose(dev_t dev, int flags, int fmt, struct proc *p)
725 {
726 	int unit = raidunit(dev);
727 	struct raid_softc *rs;
728 	int error = 0;
729 	int part;
730 
731 	if (unit >= numraid)
732 		return (ENXIO);
733 	rs = &raid_softc[unit];
734 
735 	if ((error = raidlock(rs)) != 0)
736 		return (error);
737 
738 	part = DISKPART(dev);
739 
740 	/* ...that much closer to allowing unconfiguration... */
741 	switch (fmt) {
742 	case S_IFCHR:
743 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
744 		break;
745 
746 	case S_IFBLK:
747 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
748 		break;
749 	}
750 	rs->sc_dkdev.dk_openmask =
751 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
752 
753 	if ((rs->sc_dkdev.dk_openmask == 0) &&
754 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
755 		/*
756 		 * Last one...  Device is not unconfigured yet.
757 		 * Device shutdown has taken care of setting the
758 		 * clean bits if RAIDF_INITED is not set.
759 		 * Mark things as clean...
760 		 */
761 		db1_printf(("Last one on raid%d.  Updating status.\n",unit));
762 		rf_update_component_labels(raidPtrs[unit],
763 						 RF_FINAL_COMPONENT_UPDATE);
764 	}
765 
766 	raidunlock(rs);
767 	return (0);
768 }
769 
770 void
raidstrategy(struct buf * bp)771 raidstrategy(struct buf *bp)
772 {
773 	int s;
774 
775 	unsigned int raidID = raidunit(bp->b_dev);
776 	RF_Raid_t *raidPtr;
777 	struct raid_softc *rs = &raid_softc[raidID];
778 	struct disklabel *lp;
779 	int wlabel;
780 
781 	s = splbio();
782 
783 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
784 		bp->b_error = ENXIO;
785 		bp->b_flags |= B_ERROR;
786 		bp->b_resid = bp->b_bcount;
787 		biodone(bp);
788   		goto raidstrategy_end;
789 	}
790 	if (raidID >= numraid || !raidPtrs[raidID]) {
791 		bp->b_error = ENODEV;
792 		bp->b_flags |= B_ERROR;
793 		bp->b_resid = bp->b_bcount;
794 		biodone(bp);
795 		goto raidstrategy_end;
796 	}
797 	raidPtr = raidPtrs[raidID];
798 	if (!raidPtr->valid) {
799 		bp->b_error = ENODEV;
800 		bp->b_flags |= B_ERROR;
801 		bp->b_resid = bp->b_bcount;
802 		biodone(bp);
803 		goto raidstrategy_end;
804 	}
805 	if (bp->b_bcount == 0) {
806 		db1_printf(("b_bcount is zero..\n"));
807 		biodone(bp);
808 		goto raidstrategy_end;
809 	}
810 	lp = rs->sc_dkdev.dk_label;
811 
812 	/*
813 	 * Do bounds checking and adjust transfer.  If there's an
814 	 * error, the bounds check will flag that for us.
815 	 */
816 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
817 	if (DISKPART(bp->b_dev) != RAW_PART)
818 		if (bounds_check_with_label(bp, lp, rs->sc_dkdev.dk_cpulabel,
819 		    wlabel) <= 0) {
820 			db1_printf(("Bounds check failed!!:%d %d\n",
821 			    (int)bp->b_blkno, (int)wlabel));
822 			biodone(bp);
823 			goto raidstrategy_end;
824 		}
825 
826 	bp->b_resid = 0;
827 
828 	bp->b_actf = rs->sc_q.b_actf;
829 	rs->sc_q.b_actf = bp;
830 	rs->sc_q.b_active++;
831 
832 	raidstart(raidPtrs[raidID]);
833 
834 raidstrategy_end:
835 	splx(s);
836 }
837 
838 /* ARGSUSED */
839 int
raidread(dev_t dev,struct uio * uio,int flags)840 raidread(dev_t dev, struct uio *uio, int flags)
841 {
842 	int unit = raidunit(dev);
843 	struct raid_softc *rs;
844 	int part;
845 
846 	if (unit >= numraid)
847 		return (ENXIO);
848 	rs = &raid_softc[unit];
849 
850 	if ((rs->sc_flags & RAIDF_INITED) == 0)
851 		return (ENXIO);
852 	part = DISKPART(dev);
853 
854 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
855 
856 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
857 }
858 
859 /* ARGSUSED */
860 int
raidwrite(dev_t dev,struct uio * uio,int flags)861 raidwrite(dev_t dev, struct uio *uio, int flags)
862 {
863 	int unit = raidunit(dev);
864 	struct raid_softc *rs;
865 
866 	if (unit >= numraid)
867 		return (ENXIO);
868 	rs = &raid_softc[unit];
869 
870 	if ((rs->sc_flags & RAIDF_INITED) == 0)
871 		return (ENXIO);
872 	db1_printf(("raidwrite\n"));
873 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
874 }
875 
876 int
raidioctl(dev_t dev,u_long cmd,caddr_t data,int flag,struct proc * p)877 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
878 {
879 	int unit = raidunit(dev);
880 	int error = 0;
881 	int part, pmask;
882 	struct raid_softc *rs;
883 	RF_Config_t *k_cfg, *u_cfg;
884 	RF_Raid_t *raidPtr;
885 	RF_RaidDisk_t *diskPtr;
886 	RF_AccTotals_t *totals;
887 	RF_DeviceConfig_t *d_cfg, **ucfgp;
888 	u_char *specific_buf;
889 	int retcode = 0;
890 	int row;
891 	int column;
892 	struct rf_recon_req *rrcopy, *rr;
893 	RF_ComponentLabel_t *clabel;
894 	RF_ComponentLabel_t ci_label;
895 	RF_ComponentLabel_t **clabel_ptr;
896 	RF_SingleComponent_t *sparePtr,*componentPtr;
897 	RF_SingleComponent_t hot_spare;
898 	RF_SingleComponent_t component;
899 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
900 	int i, j, d;
901 
902 	if (unit >= numraid)
903 		return (ENXIO);
904 	rs = &raid_softc[unit];
905 	raidPtr = raidPtrs[unit];
906 
907 	db1_printf(("raidioctl: %d %d %d %d\n", (int)dev, (int)DISKPART(dev),
908 	    (int)unit, (int)cmd));
909 
910 	/* Must be open for writes for these commands... */
911 	switch (cmd) {
912 	case DIOCSDINFO:
913 	case DIOCWDINFO:
914 	case DIOCWLABEL:
915 		if ((flag & FWRITE) == 0)
916 			return (EBADF);
917 	}
918 
919 	/* Must be initialised for these... */
920 	switch (cmd) {
921 	case DIOCGDINFO:
922 	case DIOCSDINFO:
923 	case DIOCWDINFO:
924 	case DIOCGPART:
925 	case DIOCWLABEL:
926 	case DIOCGPDINFO:
927 	case RAIDFRAME_SHUTDOWN:
928 	case RAIDFRAME_REWRITEPARITY:
929 	case RAIDFRAME_GET_INFO:
930 	case RAIDFRAME_RESET_ACCTOTALS:
931 	case RAIDFRAME_GET_ACCTOTALS:
932 	case RAIDFRAME_KEEP_ACCTOTALS:
933 	case RAIDFRAME_GET_SIZE:
934 	case RAIDFRAME_FAIL_DISK:
935 	case RAIDFRAME_COPYBACK:
936 	case RAIDFRAME_CHECK_RECON_STATUS:
937 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
938 	case RAIDFRAME_GET_COMPONENT_LABEL:
939 	case RAIDFRAME_SET_COMPONENT_LABEL:
940 	case RAIDFRAME_ADD_HOT_SPARE:
941 	case RAIDFRAME_REMOVE_HOT_SPARE:
942 	case RAIDFRAME_INIT_LABELS:
943 	case RAIDFRAME_REBUILD_IN_PLACE:
944 	case RAIDFRAME_CHECK_PARITY:
945 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
946 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
947 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
948 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
949 	case RAIDFRAME_SET_AUTOCONFIG:
950 	case RAIDFRAME_SET_ROOT:
951 	case RAIDFRAME_DELETE_COMPONENT:
952 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
953 		if ((rs->sc_flags & RAIDF_INITED) == 0)
954 			return (ENXIO);
955 	}
956 
957 	switch (cmd) {
958 		/* Configure the system. */
959 	case RAIDFRAME_CONFIGURE:
960 
961 		if (raidPtr->valid) {
962 			/* There is a valid RAID set running on this unit ! */
963 			printf("raid%d: Device already configured!\n",unit);
964 			return(EINVAL);
965 		}
966 
967 		/*
968 		 * Copy-in the configuration information.
969 		 * data points to a pointer to the configuration structure.
970 		 */
971 		u_cfg = *((RF_Config_t **)data);
972 		RF_Malloc(k_cfg, sizeof (RF_Config_t), (RF_Config_t *));
973 		if (k_cfg == NULL) {
974 			return (ENOMEM);
975 		}
976 		retcode = copyin((caddr_t)u_cfg, (caddr_t)k_cfg,
977 		    sizeof (RF_Config_t));
978 		if (retcode) {
979 			RF_Free(k_cfg, sizeof(RF_Config_t));
980 			return (retcode);
981 		}
982 
983 		/*
984 		 * Allocate a buffer for the layout-specific data,
985 		 * and copy it in.
986 		 */
987 		if (k_cfg->layoutSpecificSize) {
988 			if (k_cfg->layoutSpecificSize > 10000) {
989 				/* Sanity check. */
990 				RF_Free(k_cfg, sizeof(RF_Config_t));
991 				return (EINVAL);
992 			}
993 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
994 			    (u_char *));
995 			if (specific_buf == NULL) {
996 				RF_Free(k_cfg, sizeof (RF_Config_t));
997 				return (ENOMEM);
998 			}
999 			retcode = copyin(k_cfg->layoutSpecific,
1000 			    (caddr_t)specific_buf, k_cfg->layoutSpecificSize);
1001 			if (retcode) {
1002 				RF_Free(k_cfg, sizeof(RF_Config_t));
1003 				RF_Free(specific_buf,
1004 					k_cfg->layoutSpecificSize);
1005 				return (retcode);
1006 			}
1007 		} else
1008 			specific_buf = NULL;
1009 		k_cfg->layoutSpecific = specific_buf;
1010 
1011 		/*
1012 		 * We should do some kind of sanity check on the
1013 		 * configuration.
1014 		 * Store the sum of all the bytes in the last byte ?
1015 		 */
1016 
1017 		/*
1018 		 * Clear the entire RAID descriptor, just to make sure
1019 		 *  there is no stale data left in the case of a
1020 		 *  reconfiguration.
1021 		 */
1022 		bzero((char *) raidPtr, sizeof(RF_Raid_t));
1023 
1024 		/* Configure the system. */
1025 		raidPtr->raidid = unit;
1026 
1027 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
1028 
1029 		if (retcode == 0) {
1030 
1031 			/*
1032 			 * Allow this many simultaneous IO's to
1033 			 * this RAID device.
1034 			 */
1035 			raidPtr->openings = RAIDOUTSTANDING;
1036 
1037 			raidinit(raidPtr);
1038 			rf_markalldirty(raidPtr);
1039 		}
1040 
1041 		/* Free the buffers.  No return code here. */
1042 		if (k_cfg->layoutSpecificSize) {
1043 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1044 		}
1045 		RF_Free(k_cfg, sizeof (RF_Config_t));
1046 
1047 		return (retcode);
1048 
1049 	case RAIDFRAME_SHUTDOWN:
1050 		/* Shutdown the system. */
1051 
1052 		if ((error = raidlock(rs)) != 0)
1053 			return (error);
1054 
1055 		/*
1056 		 * If somebody has a partition mounted, we shouldn't
1057 		 * shutdown.
1058 		 */
1059 
1060 		part = DISKPART(dev);
1061 		pmask = (1 << part);
1062 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1063 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1064 		    (rs->sc_dkdev.dk_copenmask & pmask))) {
1065 			raidunlock(rs);
1066 			return (EBUSY);
1067 		}
1068 
1069 		if ((retcode = rf_Shutdown(raidPtr)) == 0) {
1070 
1071 			pool_destroy(&rs->sc_cbufpool);
1072 
1073 			/* It's no longer initialised... */
1074 			rs->sc_flags &= ~RAIDF_INITED;
1075 
1076 			/* config_detach the device. */
1077 			config_detach(device_lookup(&raid_cd, unit), 0);
1078 
1079 			/* Detach the disk. */
1080 			disk_detach(&rs->sc_dkdev);
1081 		}
1082 
1083 		raidunlock(rs);
1084 
1085 		return (retcode);
1086 
1087 	case RAIDFRAME_GET_COMPONENT_LABEL:
1088 		clabel_ptr = (RF_ComponentLabel_t **) data;
1089 		/*
1090 		 * We need to read the component label for the disk indicated
1091 		 * by row,column in clabel.
1092 		 */
1093 
1094 		/*
1095 		 * For practice, let's get it directly from disk, rather
1096 		 * than from the in-core copy.
1097 		 */
1098 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1099 			   (RF_ComponentLabel_t *));
1100 		if (clabel == NULL)
1101 			return (ENOMEM);
1102 
1103 		bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
1104 
1105 		retcode = copyin( *clabel_ptr, clabel,
1106 				  sizeof(RF_ComponentLabel_t));
1107 
1108 		if (retcode) {
1109 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1110 			return(retcode);
1111 		}
1112 
1113  		row = clabel->row;
1114 		column = clabel->column;
1115 
1116 		if ((row < 0) || (row >= raidPtr->numRow) ||
1117 		    (column < 0) || (column >= raidPtr->numCol)) {
1118 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1119 			return(EINVAL);
1120   		}
1121 
1122 		raidread_component_label(raidPtr->Disks[row][column].dev,
1123 		    raidPtr->raid_cinfo[row][column].ci_vp, clabel );
1124 
1125 		retcode = copyout((caddr_t) clabel,
1126 				  (caddr_t) *clabel_ptr,
1127 				  sizeof(RF_ComponentLabel_t));
1128 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1129 		return (retcode);
1130 
1131 	case RAIDFRAME_SET_COMPONENT_LABEL:
1132 		clabel = (RF_ComponentLabel_t *) data;
1133 
1134 		/* XXX check the label for valid stuff... */
1135 		/*
1136 		 * Note that some things *should not* get modified --
1137 		 * the user should be re-initing the labels instead of
1138 		 * trying to patch things.
1139 		 */
1140 
1141 #ifdef	RAIDDEBUG
1142 		printf("Got component label:\n");
1143 		printf("Version: %d\n",clabel->version);
1144 		printf("Serial Number: %d\n",clabel->serial_number);
1145 		printf("Mod counter: %d\n",clabel->mod_counter);
1146 		printf("Row: %d\n", clabel->row);
1147 		printf("Column: %d\n", clabel->column);
1148 		printf("Num Rows: %d\n", clabel->num_rows);
1149 		printf("Num Columns: %d\n", clabel->num_columns);
1150 		printf("Clean: %d\n", clabel->clean);
1151 		printf("Status: %d\n", clabel->status);
1152 #endif	/* RAIDDEBUG */
1153 
1154 		row = clabel->row;
1155 		column = clabel->column;
1156 
1157 		if ((row < 0) || (row >= raidPtr->numRow) ||
1158 		    (column < 0) || (column >= raidPtr->numCol)) {
1159 			return(EINVAL);
1160 		}
1161 
1162  		/* XXX this isn't allowed to do anything for now :-) */
1163 #if 0
1164 		raidwrite_component_label(raidPtr->Disks[row][column].dev,
1165 		    raidPtr->raid_cinfo[row][column].ci_vp, clabel );
1166 #endif
1167 		return (0);
1168 
1169 	case RAIDFRAME_INIT_LABELS:
1170 		clabel = (RF_ComponentLabel_t *) data;
1171 		/*
1172 		 * We only want the serial number from the above.
1173 		 * We get all the rest of the information from
1174 		 * the config that was used to create this RAID
1175 		 * set.
1176 		 */
1177 
1178 		raidPtr->serial_number = clabel->serial_number;
1179 
1180 		raid_init_component_label(raidPtr, &ci_label);
1181 		ci_label.serial_number = clabel->serial_number;
1182 
1183 		for(row=0;row<raidPtr->numRow;row++) {
1184 			ci_label.row = row;
1185 			for(column=0;column<raidPtr->numCol;column++) {
1186 				diskPtr = &raidPtr->Disks[row][column];
1187 				if (!RF_DEAD_DISK(diskPtr->status)) {
1188 					ci_label.partitionSize =
1189 					    diskPtr->partitionSize;
1190 					ci_label.column = column;
1191 					raidwrite_component_label(
1192 					    raidPtr->Disks[row][column].dev,
1193 					    raidPtr->raid_cinfo[row][column].ci_vp,
1194 					    &ci_label );
1195 				}
1196 			}
1197 		}
1198 
1199 		return (retcode);
1200 
1201 	case RAIDFRAME_REWRITEPARITY:
1202 
1203 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1204 			/* Parity for RAID 0 is trivially correct. */
1205 			raidPtr->parity_good = RF_RAID_CLEAN;
1206 			return(0);
1207 		}
1208 
1209 
1210 		if (raidPtr->parity_rewrite_in_progress == 1) {
1211 			/* Re-write is already in progress ! */
1212 			return(EINVAL);
1213 		}
1214 
1215 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1216 					   rf_RewriteParityThread,
1217 					   raidPtr,"raid_parity");
1218 
1219 		return (retcode);
1220 
1221 	case RAIDFRAME_SET_AUTOCONFIG:
1222 		d = rf_set_autoconfig(raidPtr, *(int *) data);
1223 		db1_printf(("New autoconfig value is: %d\n", d));
1224 		*(int *) data = d;
1225 		return (retcode);
1226 
1227 	case RAIDFRAME_SET_ROOT:
1228 		d = rf_set_rootpartition(raidPtr, *(int *) data);
1229 		db1_printf(("New rootpartition value is: %d\n", d));
1230 		*(int *) data = d;
1231 		return (retcode);
1232 
1233 
1234 	case RAIDFRAME_ADD_HOT_SPARE:
1235 		sparePtr = (RF_SingleComponent_t *) data;
1236 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1237 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1238 		return(retcode);
1239 
1240 	case RAIDFRAME_REMOVE_HOT_SPARE:
1241 		return(retcode);
1242 
1243 	case RAIDFRAME_DELETE_COMPONENT:
1244 		componentPtr = (RF_SingleComponent_t *)data;
1245 		memcpy( &component, componentPtr,
1246 			sizeof(RF_SingleComponent_t));
1247 		retcode = rf_delete_component(raidPtr, &component);
1248 		return(retcode);
1249 
1250 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
1251 		componentPtr = (RF_SingleComponent_t *)data;
1252 		memcpy( &component, componentPtr,
1253 			sizeof(RF_SingleComponent_t));
1254 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
1255 		return(retcode);
1256 
1257 	case RAIDFRAME_REBUILD_IN_PLACE:
1258 
1259 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1260 			/* Can't do this on a RAID 0 !! */
1261 			return(EINVAL);
1262 		}
1263 
1264 		if (raidPtr->recon_in_progress == 1) {
1265 			/* A reconstruct is already in progress ! */
1266 			return(EINVAL);
1267 		}
1268 
1269 		componentPtr = (RF_SingleComponent_t *) data;
1270 		memcpy( &component, componentPtr,
1271 			sizeof(RF_SingleComponent_t));
1272 		row = component.row;
1273 		column = component.column;
1274 		db1_printf(("Rebuild: %d %d\n",row, column));
1275 		if ((row < 0) || (row >= raidPtr->numRow) ||
1276 		    (column < 0) || (column >= raidPtr->numCol)) {
1277 			return(EINVAL);
1278 		}
1279 
1280 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1281 		if (rrcopy == NULL)
1282 			return(ENOMEM);
1283 
1284 		rrcopy->raidPtr = (void *) raidPtr;
1285 		rrcopy->row = row;
1286 		rrcopy->col = column;
1287 
1288 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1289 					   rf_ReconstructInPlaceThread,
1290 					   rrcopy,"raid_reconip");
1291 
1292 		return (retcode);
1293 
1294 	case RAIDFRAME_GET_INFO:
1295 		if (!raidPtr->valid)
1296 			return (ENODEV);
1297 		ucfgp = (RF_DeviceConfig_t **) data;
1298 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1299 			  (RF_DeviceConfig_t *));
1300 		if (d_cfg == NULL)
1301 			return (ENOMEM);
1302 		bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1303 		d_cfg->rows = raidPtr->numRow;
1304 		d_cfg->cols = raidPtr->numCol;
1305 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1306 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
1307 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1308 			return (ENOMEM);
1309 		}
1310 		d_cfg->nspares = raidPtr->numSpare;
1311 		if (d_cfg->nspares >= RF_MAX_DISKS) {
1312 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1313 			return (ENOMEM);
1314 		}
1315 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1316 		d = 0;
1317 		for (i = 0; i < d_cfg->rows; i++) {
1318 			for (j = 0; j < d_cfg->cols; j++) {
1319 				d_cfg->devs[d] = raidPtr->Disks[i][j];
1320 				d++;
1321 			}
1322 		}
1323 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1324 			d_cfg->spares[i] = raidPtr->Disks[0][j];
1325 		}
1326 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1327 				  sizeof(RF_DeviceConfig_t));
1328 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1329 
1330 		return (retcode);
1331 
1332 	case RAIDFRAME_CHECK_PARITY:
1333 		*(int *) data = raidPtr->parity_good;
1334 		return (0);
1335 
1336 	case RAIDFRAME_RESET_ACCTOTALS:
1337 		bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1338 		return (0);
1339 
1340   	case RAIDFRAME_GET_ACCTOTALS:
1341 		totals = (RF_AccTotals_t *) data;
1342 		*totals = raidPtr->acc_totals;
1343 		return (0);
1344 
1345 	case RAIDFRAME_KEEP_ACCTOTALS:
1346 		raidPtr->keep_acc_totals = *(int *)data;
1347 		return (0);
1348 
1349 	case RAIDFRAME_GET_SIZE:
1350 		*(int *) data = raidPtr->totalSectors;
1351 		return (0);
1352 
1353 	/* Fail a disk & optionally start reconstruction. */
1354 	case RAIDFRAME_FAIL_DISK:
1355 		rr = (struct rf_recon_req *)data;
1356 
1357 		if (rr->row < 0 || rr->row >= raidPtr->numRow ||
1358 		    rr->col < 0 || rr->col >= raidPtr->numCol)
1359 			return (EINVAL);
1360 
1361 		db1_printf(("raid%d: Failing the disk: row: %d col: %d\n",
1362 		    unit, rr->row, rr->col));
1363 
1364 		/*
1365 		 * Make a copy of the recon request so that we don't
1366 		 * rely on the user's buffer.
1367 		 */
1368 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1369 		if (rrcopy == NULL)
1370 			return(ENOMEM);
1371 		bcopy(rr, rrcopy, sizeof(*rr));
1372 		rrcopy->raidPtr = (void *)raidPtr;
1373 
1374 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1375 					   rf_ReconThread,
1376 					   rrcopy,"raid_recon");
1377 		return (0);
1378 
1379 	/*
1380 	 * Invoke a copyback operation after recon on whatever
1381 	 * disk needs it, if any.
1382 	 */
1383 	case RAIDFRAME_COPYBACK:
1384 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1385 			/* This makes no sense on a RAID 0 !! */
1386 			return(EINVAL);
1387 		}
1388 
1389 		if (raidPtr->copyback_in_progress == 1) {
1390 			/* Copyback is already in progress ! */
1391 			return(EINVAL);
1392 		}
1393 
1394 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1395 					   rf_CopybackThread,
1396 					   raidPtr,"raid_copyback");
1397 		return (retcode);
1398 
1399 	/* Return the percentage completion of reconstruction. */
1400 	case RAIDFRAME_CHECK_RECON_STATUS:
1401 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1402 			/*
1403 			 * This makes no sense on a RAID 0, so tell the
1404 			 * user it's done.
1405 			 */
1406 			*(int *) data = 100;
1407 			return(0);
1408 		}
1409 		row = 0; /* XXX we only consider a single row... */
1410 		if (raidPtr->status[row] != rf_rs_reconstructing)
1411 			*(int *)data = 100;
1412 		else
1413 			*(int *)data =
1414 			    raidPtr->reconControl[row]->percentComplete;
1415 		return (0);
1416 
1417 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1418 		progressInfoPtr = (RF_ProgressInfo_t **) data;
1419 		row = 0; /* XXX we only consider a single row... */
1420 		if (raidPtr->status[row] != rf_rs_reconstructing) {
1421 			progressInfo.remaining = 0;
1422 			progressInfo.completed = 100;
1423 			progressInfo.total = 100;
1424 		} else {
1425 			progressInfo.total =
1426 				raidPtr->reconControl[row]->numRUsTotal;
1427 			progressInfo.completed =
1428 				raidPtr->reconControl[row]->numRUsComplete;
1429 			progressInfo.remaining = progressInfo.total -
1430 				progressInfo.completed;
1431 		}
1432 		retcode = copyout((caddr_t) &progressInfo,
1433 				  (caddr_t) *progressInfoPtr,
1434 				  sizeof(RF_ProgressInfo_t));
1435 		return (retcode);
1436 
1437 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1438 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1439 			/*
1440 			 * This makes no sense on a RAID 0, so tell the
1441 			 * user it's done.
1442 			 */
1443 			*(int *) data = 100;
1444 			return(0);
1445 		}
1446 		if (raidPtr->parity_rewrite_in_progress == 1) {
1447 			*(int *) data = 100 *
1448 				raidPtr->parity_rewrite_stripes_done /
1449 				raidPtr->Layout.numStripe;
1450 		} else {
1451 			*(int *) data = 100;
1452 		}
1453 		return (0);
1454 
1455 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1456 		progressInfoPtr = (RF_ProgressInfo_t **) data;
1457 		if (raidPtr->parity_rewrite_in_progress == 1) {
1458 			progressInfo.total = raidPtr->Layout.numStripe;
1459 			progressInfo.completed =
1460 				raidPtr->parity_rewrite_stripes_done;
1461 			progressInfo.remaining = progressInfo.total -
1462 				progressInfo.completed;
1463 		} else {
1464 			progressInfo.remaining = 0;
1465 			progressInfo.completed = 100;
1466 			progressInfo.total = 100;
1467 		}
1468 		retcode = copyout((caddr_t) &progressInfo,
1469 				  (caddr_t) *progressInfoPtr,
1470 				  sizeof(RF_ProgressInfo_t));
1471 		return (retcode);
1472 
1473 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
1474 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1475 			/* This makes no sense on a RAID 0 !! */
1476 			*(int *) data = 100;
1477 			return(0);
1478 		}
1479 		if (raidPtr->copyback_in_progress == 1) {
1480 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
1481 				raidPtr->Layout.numStripe;
1482 		} else {
1483 			*(int *) data = 100;
1484 		}
1485 		return (0);
1486 
1487 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1488 		progressInfoPtr = (RF_ProgressInfo_t **) data;
1489 		if (raidPtr->copyback_in_progress == 1) {
1490 			progressInfo.total = raidPtr->Layout.numStripe;
1491 			progressInfo.completed =
1492 				raidPtr->copyback_stripes_done;
1493 			progressInfo.remaining = progressInfo.total -
1494 				progressInfo.completed;
1495 		} else {
1496 			progressInfo.remaining = 0;
1497 			progressInfo.completed = 100;
1498 			progressInfo.total = 100;
1499 		}
1500 		retcode = copyout((caddr_t) &progressInfo,
1501 				  (caddr_t) *progressInfoPtr,
1502 				  sizeof(RF_ProgressInfo_t));
1503 		return (retcode);
1504 
1505 #if 0
1506 	case RAIDFRAME_SPARET_WAIT:
1507 		/*
1508 		 * The sparetable daemon calls this to wait for the
1509 		 * kernel to need a spare table.
1510 		 * This ioctl does not return until a spare table is needed.
1511 		 * XXX -- Calling mpsleep here in the ioctl code is almost
1512 		 * certainly wrong and evil. -- XXX
1513 		 * XXX -- I should either compute the spare table in the
1514 		 * kernel, or have a different. -- XXX
1515 		 * XXX -- Interface (a different character device) for
1516 		 * delivering the table. -- XXX
1517 		 */
1518 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1519 		while (!rf_sparet_wait_queue)
1520 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH,
1521 			    "sparet wait", 0,
1522 			    (void *)simple_lock_addr(rf_sparet_wait_mutex),
1523 			    MS_LOCK_SIMPLE);
1524 		waitreq = rf_sparet_wait_queue;
1525 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1526 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1527 
1528 		*((RF_SparetWait_t *)data) = *waitreq;
1529 
1530 		RF_Free(waitreq, sizeof *waitreq);
1531 		return (0);
1532 
1533 	case RAIDFRAME_ABORT_SPARET_WAIT:
1534 		/*
1535 		 * Wakes up a process waiting on SPARET_WAIT and puts an
1536 		 * error code in it that will cause the dameon to exit.
1537 		 */
1538 		RF_Malloc(waitreq, sizeof (*waitreq), (RF_SparetWait_t *));
1539 		waitreq->fcol = -1;
1540 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1541 		waitreq->next = rf_sparet_wait_queue;
1542 		rf_sparet_wait_queue = waitreq;
1543 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1544 		wakeup(&rf_sparet_wait_queue);
1545 		return (0);
1546 
1547 	case RAIDFRAME_SEND_SPARET:
1548 		/*
1549 		 * Used by the spare table daemon to deliver a spare table
1550 		 * into the kernel.
1551 		 */
1552 
1553 		/* Install the spare table. */
1554 		retcode = rf_SetSpareTable(raidPtr,*(void **)data);
1555 
1556 		/*
1557 		 * Respond to the requestor.  The return status of the
1558 		 * spare table installation is passed in the "fcol" field.
1559 		 */
1560 		RF_Malloc(waitreq, sizeof *waitreq, (RF_SparetWait_t *));
1561 		waitreq->fcol = retcode;
1562 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1563 		waitreq->next = rf_sparet_resp_queue;
1564 		rf_sparet_resp_queue = waitreq;
1565 		wakeup(&rf_sparet_resp_queue);
1566 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1567 
1568 		return (retcode);
1569 #endif
1570 	/* Fall through to the os-specific code below. */
1571 	default:
1572 		break;
1573 	}
1574 
1575 	if (!raidPtr->valid)
1576 		return (EINVAL);
1577 
1578 	/*
1579 	 * Add support for "regular" device ioctls here.
1580 	 */
1581 	switch (cmd) {
1582 	case DIOCGDINFO:
1583 		*(struct disklabel *)data = *(rs->sc_dkdev.dk_label);
1584 		break;
1585 
1586 	case DIOCGPART:
1587 		((struct partinfo *)data)->disklab = rs->sc_dkdev.dk_label;
1588 		((struct partinfo *)data)->part =
1589 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1590 		break;
1591 
1592 	case DIOCWDINFO:
1593 	case DIOCSDINFO:
1594 	{
1595 		struct disklabel *lp;
1596 		lp = (struct disklabel *)data;
1597 
1598 		if ((error = raidlock(rs)) != 0)
1599 			return (error);
1600 
1601 		rs->sc_flags |= RAIDF_LABELLING;
1602 
1603 		error = setdisklabel(rs->sc_dkdev.dk_label,
1604 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
1605 		if (error == 0) {
1606 			if (cmd == DIOCWDINFO)
1607 				error = writedisklabel(RAIDLABELDEV(dev),
1608 				    raidstrategy, rs->sc_dkdev.dk_label,
1609 				    rs->sc_dkdev.dk_cpulabel);
1610 		}
1611 
1612 		rs->sc_flags &= ~RAIDF_LABELLING;
1613 
1614 		raidunlock(rs);
1615 
1616 		if (error)
1617 			return (error);
1618 		break;
1619 	}
1620 
1621 	case DIOCWLABEL:
1622 		if (*(int *)data != 0)
1623 			rs->sc_flags |= RAIDF_WLABEL;
1624 		else
1625 			rs->sc_flags &= ~RAIDF_WLABEL;
1626 		break;
1627 
1628 	case DIOCGPDINFO:
1629   		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1630   		break;
1631 
1632 	default:
1633 		retcode = ENOTTY;
1634 	}
1635 
1636 	return (retcode);
1637 }
1638 
1639 /*
1640  * raidinit -- Complete the rest of the initialization for the
1641  * RAIDframe device.
1642  */
1643 void
raidinit(RF_Raid_t * raidPtr)1644 raidinit(RF_Raid_t *raidPtr)
1645 {
1646 	struct raid_softc *rs;
1647 	struct cfdata	*cf;
1648 	int unit;
1649 
1650 	unit = raidPtr->raidid;
1651 
1652 	rs = &raid_softc[unit];
1653 	pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1654 		0, 0, "raidpl", NULL);
1655 
1656 	/* XXX should check return code first... */
1657 	rs->sc_flags |= RAIDF_INITED;
1658 
1659 	/* XXX doesn't check bounds. */
1660 	snprintf(rs->sc_xname, sizeof rs->sc_xname, "raid%d", unit);
1661 
1662 	rs->sc_dkdev.dk_name = rs->sc_xname;
1663 
1664 	/*
1665 	 * disk_attach actually creates space for the CPU disklabel, among
1666 	 * other things, so it's critical to call this *BEFORE* we try
1667 	 * putzing with disklabels.
1668 	 */
1669 	disk_attach(&rs->sc_dkdev);
1670 
1671 	/*
1672 	 * XXX There may be a weird interaction here between this, and
1673 	 * protectedSectors, as used in RAIDframe.
1674 	 */
1675 	rs->sc_size = raidPtr->totalSectors;
1676 
1677 	/*
1678 	 * config_attach the raid device into the device tree.
1679 	 * For autoconf rootdev selection...
1680 	 */
1681 	cf = malloc(sizeof(struct cfdata), M_RAIDFRAME, M_NOWAIT);
1682 	if (cf == NULL) {
1683 		printf("WARNING: no memory for cfdata struct\n");
1684 		return;
1685 	}
1686 	bzero(cf, sizeof(struct cfdata));
1687 
1688 	cf->cf_attach = &raid_ca;
1689 	cf->cf_driver = &raid_cd;
1690 	cf->cf_unit   = unit;
1691 
1692 	config_attach(NULL, cf, NULL, NULL);
1693 }
1694 
1695 /*
1696  * Wake up the daemon & tell it to get us a spare table.
1697  * XXX
1698  * The entries in the queues should be tagged with the raidPtr so that
1699  * in the extremely rare case that two recons happen at once, we know
1700  * which devices were requesting a spare table.
1701  * XXX
1702  *
1703  * XXX This code is not currently used. GO
1704  */
1705 int
rf_GetSpareTableFromDaemon(RF_SparetWait_t * req)1706 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1707 {
1708 	int retcode;
1709 
1710 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1711 	req->next = rf_sparet_wait_queue;
1712 	rf_sparet_wait_queue = req;
1713 	wakeup(&rf_sparet_wait_queue);
1714 
1715 	/* mpsleep unlocks the mutex. */
1716 	while (!rf_sparet_resp_queue) {
1717 		tsleep(&rf_sparet_resp_queue, PRIBIO,
1718 		    "RAIDframe getsparetable", 0);
1719 	}
1720 	req = rf_sparet_resp_queue;
1721 	rf_sparet_resp_queue = req->next;
1722 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1723 
1724 	retcode = req->fcol;
1725 	/* This is not the same req as we alloc'd. */
1726 	RF_Free(req, sizeof *req);
1727 	return (retcode);
1728 }
1729 
1730 /*
1731  * A wrapper around rf_DoAccess that extracts appropriate info from the
1732  * bp and passes it down.
1733  * Any calls originating in the kernel must use non-blocking I/O.
1734  * Do some extra sanity checking to return "appropriate" error values for
1735  * certain conditions (to make some standard utilities work).
1736  *
1737  * Formerly known as: rf_DoAccessKernel
1738  */
1739 void
raidstart(RF_Raid_t * raidPtr)1740 raidstart(RF_Raid_t *raidPtr)
1741 {
1742 	RF_SectorCount_t num_blocks, pb, sum;
1743 	RF_RaidAddr_t raid_addr;
1744 	int retcode;
1745 	struct partition *pp;
1746 	daddr_t blocknum;
1747 	int unit;
1748 	struct raid_softc *rs;
1749 	int	do_async;
1750 	struct buf *bp;
1751 
1752 	unit = raidPtr->raidid;
1753 	rs = &raid_softc[unit];
1754 
1755 	/* Quick check to see if anything has died recently. */
1756 	RF_LOCK_MUTEX(raidPtr->mutex);
1757 	if (raidPtr->numNewFailures > 0) {
1758 		rf_update_component_labels(raidPtr,
1759 					   RF_NORMAL_COMPONENT_UPDATE);
1760 		raidPtr->numNewFailures--;
1761 	}
1762 	RF_UNLOCK_MUTEX(raidPtr->mutex);
1763 
1764 	/* Check to see if we're at the limit... */
1765 	RF_LOCK_MUTEX(raidPtr->mutex);
1766 	while (raidPtr->openings > 0) {
1767 		RF_UNLOCK_MUTEX(raidPtr->mutex);
1768 
1769 		bp = rs->sc_q.b_actf;
1770 		if (bp == NULL) {
1771 			/* Nothing more to do. */
1772 			return;
1773 		}
1774 		rs->sc_q.b_actf = bp->b_actf;
1775 
1776 		/*
1777 		 * Ok, for the bp we have here, bp->b_blkno is relative to the
1778 		 * partition... We need to make it absolute to the underlying
1779 		 * device...
1780 		 */
1781 
1782 		blocknum = bp->b_blkno;
1783 		if (DISKPART(bp->b_dev) != RAW_PART) {
1784 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1785 			blocknum += pp->p_offset;
1786 		}
1787 
1788 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1789 			    (int) blocknum));
1790 
1791 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1792 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1793 
1794 		/*
1795 		 * *THIS* is where we adjust what block we're going to...
1796 		 * But DO NOT TOUCH bp->b_blkno !!!
1797 		 */
1798 		raid_addr = blocknum;
1799 
1800 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1801 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1802 		sum = raid_addr + num_blocks + pb;
1803 		if (1 || rf_debugKernelAccess) {
1804 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d)"
1805 			    " (%d)\n", (int)raid_addr, (int)sum,
1806 			    (int)num_blocks, (int)pb, (int)bp->b_resid));
1807 		}
1808 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1809 		    || (sum < num_blocks) || (sum < pb)) {
1810 			bp->b_error = ENOSPC;
1811 			bp->b_flags |= B_ERROR;
1812 			bp->b_resid = bp->b_bcount;
1813 			/* db1_printf(("%s: Calling biodone on 0x%x\n",
1814 			    __func__, bp)); */
1815 			splassert(IPL_BIO);
1816 			biodone(bp);
1817 			RF_LOCK_MUTEX(raidPtr->mutex);
1818 			continue;
1819 		}
1820 		/*
1821 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel().
1822 		 */
1823 
1824 		if (bp->b_bcount & raidPtr->sectorMask) {
1825 			bp->b_error = EINVAL;
1826 			bp->b_flags |= B_ERROR;
1827 			bp->b_resid = bp->b_bcount;
1828 			/* db1_printf(("%s: Calling biodone on 0x%x\n",
1829 			    __func__, bp)); */
1830 			splassert(IPL_BIO);
1831 			biodone(bp);
1832 			RF_LOCK_MUTEX(raidPtr->mutex);
1833 			continue;
1834 
1835 		}
1836 		db1_printf(("Calling DoAccess..\n"));
1837 
1838 
1839 		RF_LOCK_MUTEX(raidPtr->mutex);
1840 		raidPtr->openings--;
1841 		RF_UNLOCK_MUTEX(raidPtr->mutex);
1842 
1843 		/*
1844 		 * Everything is async.
1845 		 */
1846 		do_async = 1;
1847 
1848 		disk_busy(&rs->sc_dkdev);
1849 
1850 		/*
1851 		 * XXX we're still at splbio() here...  Do we *really*
1852 		 * need to be ?
1853 		 */
1854 
1855 		/*
1856 		 * Don't ever condition on bp->b_flags & B_WRITE.
1857 		 * Always condition on B_READ instead.
1858 		 */
1859 
1860 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1861 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1862 				      do_async, raid_addr, num_blocks,
1863 				      bp->b_data, bp, NULL, NULL,
1864 				      RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1865 
1866 		RF_LOCK_MUTEX(raidPtr->mutex);
1867 	}
1868 	RF_UNLOCK_MUTEX(raidPtr->mutex);
1869 }
1870 
1871 /* Invoke an I/O from kernel mode.  Disk queue should be locked upon entry. */
1872 
1873 int
rf_DispatchKernelIO(RF_DiskQueue_t * queue,RF_DiskQueueData_t * req)1874 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1875 {
1876 	int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1877 	struct buf *bp;
1878 	struct raidbuf *raidbp = NULL;
1879 	struct raid_softc *rs;
1880 	int unit;
1881 	/*int s = splbio();*/	/* Want to test this. */
1882 
1883 	/*
1884 	 * XXX along with the vnode, we also need the softc associated with
1885 	 * this device...
1886 	 */
1887 	req->queue = queue;
1888 
1889 	unit = queue->raidPtr->raidid;
1890 
1891 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
1892 
1893 	if (unit >= numraid) {
1894 		printf("Invalid unit number: %d %d\n", unit, numraid);
1895 		panic("Invalid Unit number in rf_DispatchKernelIO");
1896 	}
1897 
1898 	rs = &raid_softc[unit];
1899 
1900 	bp = req->bp;
1901 
1902 #if 1
1903 	/*
1904 	 * XXX When there is a physical disk failure, someone is passing
1905 	 * us a buffer that contains old stuff !!  Attempt to deal with
1906 	 * this problem without taking a performance hit...
1907 	 * (not sure where the real bug is; it's buried in RAIDframe
1908 	 * somewhere) :-( GO )
1909 	 */
1910 	if (bp->b_flags & B_ERROR) {
1911 		bp->b_flags &= ~B_ERROR;
1912 	}
1913 	if (bp->b_error!=0) {
1914 		bp->b_error = 0;
1915 	}
1916 #endif
1917 
1918 	raidbp = RAIDGETBUF(rs);
1919 
1920 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
1921 
1922 	/*
1923 	 * Context for raidiodone.
1924 	 */
1925 	raidbp->rf_obp = bp;
1926 	raidbp->req = req;
1927 
1928 	LIST_INIT(&raidbp->rf_buf.b_dep);
1929 
1930 	switch (req->type) {
1931 	case RF_IO_TYPE_NOP:
1932 		/* Used primarily to unlock a locked queue. */
1933 
1934 		db1_printf(("rf_DispatchKernelIO: NOP to r %d c %d\n",
1935 		    queue->row, queue->col));
1936 
1937 		/* XXX need to do something extra here... */
1938 
1939 		/*
1940 		 * I'm leaving this in, as I've never actually seen it
1941 		 * used, and I'd like folks to report it... GO
1942 		 */
1943 		db1_printf(("WAKEUP CALLED\n"));
1944 		queue->numOutstanding++;
1945 
1946 		/* XXX need to glue the original buffer into this ?? */
1947 
1948 		rf_KernelWakeupFunc(&raidbp->rf_buf);
1949 		break;
1950 
1951 	case RF_IO_TYPE_READ:
1952 	case RF_IO_TYPE_WRITE:
1953 		if (req->tracerec) {
1954 			RF_ETIMER_START(req->tracerec->timer);
1955 		}
1956 
1957 		rf_InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1958 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
1959 		    req->sectorOffset, req->numSector,
1960 		    req->buf, rf_KernelWakeupFunc, (void *)req,
1961 		    queue->raidPtr->logBytesPerSector, req->b_proc);
1962 
1963 		if (rf_debugKernelAccess) {
1964 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
1965 			    (long)bp->b_blkno));
1966 		}
1967 		queue->numOutstanding++;
1968 		queue->last_deq_sector = req->sectorOffset;
1969 
1970 		/*
1971 		 * Acc wouldn't have been let in if there were any
1972 		 * pending reqs at any other priority.
1973 		 */
1974 		queue->curPriority = req->priority;
1975 
1976 		db1_printf(("Going for %c to unit %d row %d col %d\n",
1977 		    req->type, unit, queue->row, queue->col));
1978 		db1_printf(("sector %d count %d (%d bytes) %d\n",
1979 		    (int)req->sectorOffset, (int)req->numSector,
1980 		    (int)(req->numSector << queue->raidPtr->logBytesPerSector),
1981 		    (int)queue->raidPtr->logBytesPerSector));
1982 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1983 			raidbp->rf_buf.b_vp->v_numoutput++;
1984 		}
1985 
1986 		VOP_STRATEGY(&raidbp->rf_buf);
1987 		break;
1988 
1989 	default:
1990 		panic("bad req->type in rf_DispatchKernelIO");
1991 	}
1992 	db1_printf(("Exiting from DispatchKernelIO\n"));
1993 	/*splx(s);*/	/* want to test this */
1994 	return (0);
1995 }
1996 
1997 /*
1998  * This is the callback function associated with a I/O invoked from
1999  * kernel code.
2000  */
2001 void
rf_KernelWakeupFunc(struct buf * vbp)2002 rf_KernelWakeupFunc(struct buf *vbp)
2003 {
2004 	RF_DiskQueueData_t *req = NULL;
2005 	RF_DiskQueue_t *queue;
2006 	struct raidbuf *raidbp = (struct raidbuf *)vbp;
2007 	struct buf *bp;
2008 	struct raid_softc *rs;
2009 	int unit;
2010 	int s;
2011 
2012 	s = splbio();
2013 	db1_printf(("recovering the request queue:\n"));
2014 	req = raidbp->req;
2015 
2016 	bp = raidbp->rf_obp;
2017 
2018 	queue = (RF_DiskQueue_t *)req->queue;
2019 
2020 	if (raidbp->rf_buf.b_flags & B_ERROR) {
2021 		bp->b_flags |= B_ERROR;
2022 		bp->b_error =
2023 		    raidbp->rf_buf.b_error ? raidbp->rf_buf.b_error : EIO;
2024 	}
2025 
2026 #if 1
2027 	/* XXX Methinks this could be wrong... */
2028 	bp->b_resid = raidbp->rf_buf.b_resid;
2029 #endif
2030 
2031 	if (req->tracerec) {
2032 		RF_ETIMER_STOP(req->tracerec->timer);
2033 		RF_ETIMER_EVAL(req->tracerec->timer);
2034 		RF_LOCK_MUTEX(rf_tracing_mutex);
2035 		req->tracerec->diskwait_us +=
2036 		    RF_ETIMER_VAL_US(req->tracerec->timer);
2037 		req->tracerec->phys_io_us +=
2038 		    RF_ETIMER_VAL_US(req->tracerec->timer);
2039 		req->tracerec->num_phys_ios++;
2040 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
2041 	}
2042 
2043 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
2044 
2045 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
2046 
2047 	/*
2048 	 * XXX Ok, let's get aggressive...  If B_ERROR is set, let's go
2049 	 * ballistic, and mark the component as hosed...
2050 	 */
2051 	if (bp->b_flags & B_ERROR) {
2052 		/* Mark the disk as dead but only mark it once... */
2053 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
2054 		    rf_ds_optimal) {
2055 			printf("raid%d: IO Error.  Marking %s as failed.\n",
2056 			    unit,
2057 			    queue->raidPtr->
2058 			    Disks[queue->row][queue->col].devname);
2059 			queue->raidPtr->Disks[queue->row][queue->col].status =
2060 			    rf_ds_failed;
2061 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
2062 			queue->raidPtr->numFailures++;
2063 			queue->raidPtr->numNewFailures++;
2064 		} else {
2065 			/* Disk is already dead... */
2066 			/* printf("Disk already marked as dead!\n"); */
2067 		}
2068 	}
2069 
2070 	rs = &raid_softc[unit];
2071 	RAIDPUTBUF(rs, raidbp);
2072 
2073 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
2074 	(req->CompleteFunc)(req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
2075 
2076 	splx(s);
2077 }
2078 
2079 /*
2080  * Initialize a buf structure for doing an I/O in the kernel.
2081  */
2082 void
rf_InitBP(struct buf * bp,struct vnode * b_vp,unsigned rw_flag,dev_t dev,RF_SectorNum_t startSect,RF_SectorCount_t numSect,caddr_t buf,void (* cbFunc)(struct buf *),void * cbArg,int logBytesPerSector,struct proc * b_proc)2083 rf_InitBP(
2084 	struct buf	 *bp,
2085 	struct vnode	 *b_vp,
2086 	unsigned	  rw_flag,
2087 	dev_t		  dev,
2088 	RF_SectorNum_t	  startSect,
2089 	RF_SectorCount_t  numSect,
2090 	caddr_t		  buf,
2091 	void		(*cbFunc)(struct buf *),
2092 	void		 *cbArg,
2093 	int		  logBytesPerSector,
2094 	struct proc	 *b_proc
2095 )
2096 {
2097 	/*bp->b_flags = B_PHYS | rw_flag;*/
2098 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too ??? */
2099 	bp->b_bcount = numSect << logBytesPerSector;
2100 	bp->b_bufsize = bp->b_bcount;
2101 	bp->b_error = 0;
2102 	bp->b_dev = dev;
2103 	bp->b_data = buf;
2104 	bp->b_blkno = startSect;
2105 	bp->b_resid = bp->b_bcount;	/* XXX is this right !??!?!! */
2106 	if (bp->b_bcount == 0) {
2107 		panic("bp->b_bcount is zero in rf_InitBP!!");
2108 	}
2109 	bp->b_proc = b_proc;
2110 	bp->b_iodone = cbFunc;
2111 	bp->b_vp = b_vp;
2112 	LIST_INIT(&bp->b_dep);
2113 }
2114 
2115 void
raidgetdefaultlabel(RF_Raid_t * raidPtr,struct raid_softc * rs,struct disklabel * lp)2116 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2117     struct disklabel *lp)
2118 {
2119 	db1_printf(("Building a default label...\n"));
2120 	bzero(lp, sizeof(*lp));
2121 
2122 	/* Fabricate a label... */
2123 	lp->d_secperunit = raidPtr->totalSectors;
2124 	lp->d_secsize = raidPtr->bytesPerSector;
2125 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2126 	lp->d_ntracks = 4 * raidPtr->numCol;
2127 	lp->d_ncylinders = raidPtr->totalSectors /
2128 	    (lp->d_nsectors * lp->d_ntracks);
2129 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2130 
2131 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2132 	lp->d_type = DTYPE_RAID;
2133 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2134 	lp->d_rpm = 3600;
2135 	lp->d_interleave = 1;
2136 	lp->d_flags = 0;
2137 
2138 	lp->d_partitions[RAW_PART].p_offset = 0;
2139 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2140 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2141 	lp->d_npartitions = RAW_PART + 1;
2142 
2143 	lp->d_magic = DISKMAGIC;
2144 	lp->d_magic2 = DISKMAGIC;
2145 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2146 }
2147 
2148 /*
2149  * Read the disklabel from the raid device.
2150  * If one is not present, fake one up.
2151  */
2152 void
raidgetdisklabel(dev_t dev)2153 raidgetdisklabel(dev_t dev)
2154 {
2155 	int unit = raidunit(dev);
2156 	struct raid_softc *rs = &raid_softc[unit];
2157 	char *errstring;
2158 	struct disklabel *lp = rs->sc_dkdev.dk_label;
2159 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2160 	RF_Raid_t *raidPtr;
2161 	int i;
2162 	struct partition *pp;
2163 
2164 	db1_printf(("Getting the disklabel...\n"));
2165 
2166 	bzero(clp, sizeof(*clp));
2167 
2168 	raidPtr = raidPtrs[unit];
2169 
2170 	raidgetdefaultlabel(raidPtr, rs, lp);
2171 
2172 	/*
2173 	 * Call the generic disklabel extraction routine.
2174 	 */
2175 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, lp,
2176 	    rs->sc_dkdev.dk_cpulabel, 0);
2177 	if (errstring) {
2178 		printf("%s: %s\n", rs->sc_xname, errstring);
2179 		return;
2180 		/*raidmakedisklabel(rs);*/
2181 	}
2182 
2183 	/*
2184 	 * Sanity check whether the found disklabel is valid.
2185 	 *
2186 	 * This is necessary since total size of the raid device
2187 	 * may vary when an interleave is changed even though exactly
2188 	 * same componets are used, and old disklabel may used
2189 	 * if that is found.
2190 	 */
2191 #ifdef	RAIDDEBUG
2192 	if (lp->d_secperunit != rs->sc_size)
2193 		printf("WARNING: %s: "
2194 		    "total sector size in disklabel (%d) != "
2195 		    "the size of raid (%ld)\n", rs->sc_xname,
2196 		    lp->d_secperunit, (long) rs->sc_size);
2197 #endif	/* RAIDDEBUG */
2198 	for (i = 0; i < lp->d_npartitions; i++) {
2199 		pp = &lp->d_partitions[i];
2200 		if (pp->p_offset + pp->p_size > rs->sc_size)
2201 			printf("WARNING: %s: end of partition `%c' "
2202 			    "exceeds the size of raid (%ld)\n",
2203 			    rs->sc_xname, 'a' + i, (long) rs->sc_size);
2204 	}
2205 }
2206 
2207 /*
2208  * Take care of things one might want to take care of in the event
2209  * that a disklabel isn't present.
2210  */
2211 void
raidmakedisklabel(struct raid_softc * rs)2212 raidmakedisklabel(struct raid_softc *rs)
2213 {
2214 	struct disklabel *lp = rs->sc_dkdev.dk_label;
2215 	db1_printf(("Making a label..\n"));
2216 
2217 	/*
2218 	 * For historical reasons, if there's no disklabel present
2219 	 * the raw partition must be marked FS_BSDFFS.
2220 	 */
2221 
2222 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2223 
2224 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2225 
2226 	lp->d_checksum = dkcksum(lp);
2227 }
2228 
2229 /*
2230  * Lookup the provided name in the filesystem.	If the file exists,
2231  * is a valid block device, and isn't being used by anyone else,
2232  * set *vpp to the file's vnode.
2233  * You'll find the original of this in ccd.c
2234  */
2235 int
raidlookup(char * path,struct proc * p,struct vnode ** vpp)2236 raidlookup(char *path, struct proc *p, struct vnode **vpp /* result */)
2237 {
2238 	struct nameidata nd;
2239 	struct vnode *vp;
2240 	struct vattr va;
2241 	int error;
2242 
2243 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2244 	if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
2245 #ifdef	RAIDDEBUG
2246 		printf("RAIDframe: vn_open returned %d\n", error);
2247 #endif	/* RAIDDEBUG */
2248 		return (error);
2249 	}
2250 	vp = nd.ni_vp;
2251 	if (vp->v_usecount > 1) {
2252 		VOP_UNLOCK(vp, 0, p);
2253 		(void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
2254 		return (EBUSY);
2255 	}
2256 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2257 		VOP_UNLOCK(vp, 0, p);
2258 		(void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
2259 		return (error);
2260 	}
2261 	/* XXX: eventually we should handle VREG, too. */
2262 	if (va.va_type != VBLK) {
2263 		VOP_UNLOCK(vp, 0, p);
2264 		(void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
2265 		return (ENOTBLK);
2266 	}
2267 	VOP_UNLOCK(vp, 0, p);
2268 	*vpp = vp;
2269 	return (0);
2270 }
2271 
2272 /*
2273  * Wait interruptibly for an exclusive lock.
2274  *
2275  * XXX
2276  * Several drivers do this; it should be abstracted and made MP-safe.
2277  * (Hmm... where have we seen this warning before :->  GO )
2278  */
2279 int
raidlock(struct raid_softc * rs)2280 raidlock(struct raid_softc *rs)
2281 {
2282 	int error;
2283 
2284 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2285 		rs->sc_flags |= RAIDF_WANTED;
2286 		if ((error = tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2287 			return (error);
2288 	}
2289 	rs->sc_flags |= RAIDF_LOCKED;
2290 	return (0);
2291 }
2292 
2293 /*
2294  * Unlock and wake up any waiters.
2295  */
2296 void
raidunlock(struct raid_softc * rs)2297 raidunlock(struct raid_softc *rs)
2298 {
2299 	rs->sc_flags &= ~RAIDF_LOCKED;
2300 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2301 		rs->sc_flags &= ~RAIDF_WANTED;
2302 		wakeup(rs);
2303 	}
2304 }
2305 
2306 
2307 #define	RF_COMPONENT_INFO_OFFSET	16384	/* bytes */
2308 #define	RF_COMPONENT_INFO_SIZE		 1024	/* bytes */
2309 
2310 int
raidmarkclean(dev_t dev,struct vnode * b_vp,int mod_counter)2311 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2312 {
2313 	RF_ComponentLabel_t clabel;
2314 	raidread_component_label(dev, b_vp, &clabel);
2315 	clabel.mod_counter = mod_counter;
2316 	clabel.clean = RF_RAID_CLEAN;
2317 	raidwrite_component_label(dev, b_vp, &clabel);
2318 	return(0);
2319 }
2320 
2321 
2322 int
raidmarkdirty(dev_t dev,struct vnode * b_vp,int mod_counter)2323 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2324 {
2325 	RF_ComponentLabel_t clabel;
2326 	raidread_component_label(dev, b_vp, &clabel);
2327 	clabel.mod_counter = mod_counter;
2328 	clabel.clean = RF_RAID_DIRTY;
2329 	raidwrite_component_label(dev, b_vp, &clabel);
2330 	return(0);
2331 }
2332 
2333 /* ARGSUSED */
2334 int
raidread_component_label(dev_t dev,struct vnode * b_vp,RF_ComponentLabel_t * clabel)2335 raidread_component_label(dev_t dev, struct vnode *b_vp,
2336     RF_ComponentLabel_t *clabel)
2337 {
2338 	struct buf *bp;
2339 	int error;
2340 
2341 	/*
2342 	 * XXX should probably ensure that we don't try to do this if
2343 	 * someone has changed rf_protected_sectors.
2344 	 */
2345 
2346 	if (b_vp == NULL) {
2347 		/*
2348 		 * For whatever reason, this component is not valid.
2349 		 * Don't try to read a component label from it.
2350 		 */
2351 		return(EINVAL);
2352 	}
2353 
2354 	/* Get a block of the appropriate size... */
2355 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2356 	bp->b_dev = dev;
2357 
2358 	/* Get our ducks in a row for the read. */
2359 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2360 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2361 	bp->b_flags |= B_READ;
2362  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2363 
2364 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2365 
2366 	error = biowait(bp);
2367 
2368 	if (!error) {
2369 		memcpy(clabel, bp->b_data, sizeof(RF_ComponentLabel_t));
2370 #if 0
2371 		rf_print_component_label( clabel );
2372 #endif
2373 	} else {
2374 		db1_printf(("Failed to read RAID component label!\n"));
2375 	}
2376 
2377 	brelse(bp);
2378 	return(error);
2379 }
2380 
2381 /* ARGSUSED */
2382 int
raidwrite_component_label(dev_t dev,struct vnode * b_vp,RF_ComponentLabel_t * clabel)2383 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2384     RF_ComponentLabel_t *clabel)
2385 {
2386 	struct buf *bp;
2387 	int error;
2388 
2389 	/* Get a block of the appropriate size... */
2390 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2391 	bp->b_dev = dev;
2392 
2393 	/* Get our ducks in a row for the write. */
2394 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2395 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2396 	bp->b_flags |= B_WRITE;
2397  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2398 
2399 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2400 
2401 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2402 
2403 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2404 	error = biowait(bp);
2405 	brelse(bp);
2406 	if (error) {
2407 		printf("Failed to write RAID component info!\n");
2408 	}
2409 
2410 	return(error);
2411 }
2412 
2413 void
rf_markalldirty(RF_Raid_t * raidPtr)2414 rf_markalldirty(RF_Raid_t *raidPtr)
2415 {
2416 	RF_ComponentLabel_t clabel;
2417 	int r,c;
2418 
2419 	raidPtr->mod_counter++;
2420 	for (r = 0; r < raidPtr->numRow; r++) {
2421 		for (c = 0; c < raidPtr->numCol; c++) {
2422 			/*
2423 			 * We don't want to touch (at all) a disk that has
2424 			 * failed.
2425 			 */
2426 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2427 				raidread_component_label(
2428 				    raidPtr->Disks[r][c].dev,
2429 				    raidPtr->raid_cinfo[r][c].ci_vp, &clabel);
2430 				if (clabel.status == rf_ds_spared) {
2431 					/*
2432 					 * XXX do something special...
2433 					 * But whatever you do, don't
2434 					 * try to access it !!!
2435 					 */
2436 				} else {
2437 #if 0
2438 					clabel.status =
2439 					    raidPtr->Disks[r][c].status;
2440 					raidwrite_component_label(
2441 					    raidPtr->Disks[r][c].dev,
2442 					    raidPtr->raid_cinfo[r][c].ci_vp,
2443 					    &clabel);
2444 #endif
2445 					raidmarkdirty(
2446 					    raidPtr->Disks[r][c].dev,
2447 					    raidPtr->raid_cinfo[r][c].ci_vp,
2448 					    raidPtr->mod_counter);
2449 				}
2450 			}
2451 		}
2452 	}
2453 	/*printf("Component labels marked dirty.\n");*/
2454 #if 0
2455 	for( c = 0; c < raidPtr->numSpare ; c++) {
2456 		sparecol = raidPtr->numCol + c;
2457 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2458 			/*
2459 			 * XXX This is where we get fancy and map this spare
2460 			 * into it's correct spot in the array.
2461 			 */
2462 			/*
2463 			 * We claim this disk is "optimal" if it's
2464 			 * rf_ds_used_spare, as that means it should be
2465 			 * directly substitutable for the disk it replaced.
2466 			 * We note that too...
2467 			 */
2468 
2469 			for(i=0;i<raidPtr->numRow;i++) {
2470 				for(j=0;j<raidPtr->numCol;j++) {
2471 					if ((raidPtr->Disks[i][j].spareRow ==
2472 					     r) &&
2473 					    (raidPtr->Disks[i][j].spareCol ==
2474 					     sparecol)) {
2475 						srow = r;
2476 						scol = sparecol;
2477 						break;
2478 					}
2479 				}
2480 			}
2481 
2482 			raidread_component_label(
2483 			    raidPtr->Disks[r][sparecol].dev,
2484 			    raidPtr->raid_cinfo[r][sparecol].ci_vp, &clabel);
2485 			/* Make sure status is noted. */
2486 			clabel.version = RF_COMPONENT_LABEL_VERSION;
2487 			clabel.mod_counter = raidPtr->mod_counter;
2488 			clabel.serial_number = raidPtr->serial_number;
2489 			clabel.row = srow;
2490 			clabel.column = scol;
2491 			clabel.num_rows = raidPtr->numRow;
2492 			clabel.num_columns = raidPtr->numCol;
2493 			clabel.clean = RF_RAID_DIRTY;	/* Changed in a bit. */
2494 			clabel.status = rf_ds_optimal;
2495 			raidwrite_component_label(
2496 			    raidPtr->Disks[r][sparecol].dev,
2497 			    raidPtr->raid_cinfo[r][sparecol].ci_vp, &clabel);
2498 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2499 			    raidPtr->raid_cinfo[r][sparecol].ci_vp);
2500 		}
2501 	}
2502 
2503 #endif
2504 }
2505 
2506 
2507 void
rf_update_component_labels(RF_Raid_t * raidPtr,int final)2508 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2509 {
2510 	RF_ComponentLabel_t clabel;
2511 	int sparecol;
2512 	int r,c;
2513 	int i,j;
2514 	int srow, scol;
2515 
2516 	srow = -1;
2517 	scol = -1;
2518 
2519 	/*
2520 	 * XXX should do extra checks to make sure things really are clean,
2521 	 * rather than blindly setting the clean bit...
2522 	 */
2523 
2524 	raidPtr->mod_counter++;
2525 
2526 	for (r = 0; r < raidPtr->numRow; r++) {
2527 		for (c = 0; c < raidPtr->numCol; c++) {
2528 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2529 				raidread_component_label(
2530 					raidPtr->Disks[r][c].dev,
2531 					raidPtr->raid_cinfo[r][c].ci_vp,
2532 					&clabel);
2533 				/* Make sure status is noted. */
2534 				clabel.status = rf_ds_optimal;
2535 				/* Bump the counter. */
2536 				clabel.mod_counter = raidPtr->mod_counter;
2537 
2538 				raidwrite_component_label(
2539 					raidPtr->Disks[r][c].dev,
2540 					raidPtr->raid_cinfo[r][c].ci_vp,
2541 					&clabel);
2542 				if (final == RF_FINAL_COMPONENT_UPDATE) {
2543 					if (raidPtr->parity_good ==
2544 					    RF_RAID_CLEAN) {
2545 						raidmarkclean(
2546 						    raidPtr->Disks[r][c].dev,
2547 						    raidPtr->
2548 						    raid_cinfo[r][c].ci_vp,
2549 						    raidPtr->mod_counter);
2550 					}
2551 				}
2552 			}
2553 			/* Else we don't touch it... */
2554 		}
2555 	}
2556 
2557 	for( c = 0; c < raidPtr->numSpare ; c++) {
2558 		sparecol = raidPtr->numCol + c;
2559 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2560 			/*
2561 			 * We claim this disk is "optimal" if it's
2562 			 * rf_ds_used_spare, as that means it should be
2563 			 * directly substitutable for the disk it replaced.
2564 			 * We note that too...
2565 			 */
2566 
2567 			for(i=0;i<raidPtr->numRow;i++) {
2568 				for(j=0;j<raidPtr->numCol;j++) {
2569 					if ((raidPtr->Disks[i][j].spareRow ==
2570 					     0) &&
2571 					    (raidPtr->Disks[i][j].spareCol ==
2572 					     sparecol)) {
2573 						srow = i;
2574 						scol = j;
2575 						break;
2576 					}
2577 				}
2578 			}
2579 
2580 			/* XXX Shouldn't *really* need this... */
2581 			raidread_component_label(
2582 			    raidPtr->Disks[0][sparecol].dev,
2583 			    raidPtr->raid_cinfo[0][sparecol].ci_vp, &clabel);
2584 			/* Make sure status is noted. */
2585 
2586 			raid_init_component_label(raidPtr, &clabel);
2587 
2588 			clabel.mod_counter = raidPtr->mod_counter;
2589 			clabel.row = srow;
2590 			clabel.column = scol;
2591 			clabel.status = rf_ds_optimal;
2592 
2593 			raidwrite_component_label(
2594 			    raidPtr->Disks[0][sparecol].dev,
2595 			    raidPtr->raid_cinfo[0][sparecol].ci_vp, &clabel);
2596 			if (final == RF_FINAL_COMPONENT_UPDATE) {
2597 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
2598 					raidmarkclean(raidPtr->
2599 					    Disks[0][sparecol].dev,
2600 					    raidPtr->
2601 					    raid_cinfo[0][sparecol].ci_vp,
2602 					    raidPtr->mod_counter);
2603 				}
2604 			}
2605 		}
2606 	}
2607 	/*printf("Component labels updated\n");*/
2608 }
2609 
2610 void
rf_close_component(RF_Raid_t * raidPtr,struct vnode * vp,int auto_configured)2611 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2612 {
2613 	struct proc *p = curproc;
2614 
2615 	if (vp != NULL) {
2616 		if (auto_configured == 1) {
2617 			/* component was opened by rf_find_raid_components() */
2618 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, p);
2619 			vrele(vp);
2620 		} else {
2621 			/* component was opened by raidlookup() */
2622 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2623 		}
2624 	} else {
2625 		printf("vnode was NULL\n");
2626 	}
2627 }
2628 
2629 void
rf_UnconfigureVnodes(RF_Raid_t * raidPtr)2630 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2631 {
2632 	int r,c;
2633 	struct vnode *vp;
2634 	int acd;
2635 
2636 
2637 	/* We take this opportunity to close the vnodes like we should... */
2638 
2639 	for (r = 0; r < raidPtr->numRow; r++) {
2640 		for (c = 0; c < raidPtr->numCol; c++) {
2641 			db1_printf(("Closing vnode for row: %d col: %d\n",
2642 			    r, c));
2643 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
2644 			acd = raidPtr->Disks[r][c].auto_configured;
2645 			rf_close_component(raidPtr, vp, acd);
2646 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2647 			raidPtr->Disks[r][c].auto_configured = 0;
2648 		}
2649 	}
2650 	for (r = 0; r < raidPtr->numSpare; r++) {
2651 		db1_printf(("Closing vnode for spare: %d\n", r));
2652 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2653 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2654 		rf_close_component(raidPtr, vp, acd);
2655 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2656 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2657 	}
2658 }
2659 
2660 
2661 void
rf_ReconThread(struct rf_recon_req * req)2662 rf_ReconThread(struct rf_recon_req *req)
2663 {
2664 	int s;
2665 	RF_Raid_t *raidPtr;
2666 
2667 	s = splbio();
2668 	raidPtr = (RF_Raid_t *) req->raidPtr;
2669 	raidPtr->recon_in_progress = 1;
2670 
2671 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2672 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2673 
2674 	/* XXX Get rid of this! we don't need it at all... */
2675 	RF_Free(req, sizeof(*req));
2676 
2677 	raidPtr->recon_in_progress = 0;
2678 	splx(s);
2679 
2680 	/* That's all... */
2681 	kthread_exit(0);	/* Does not return. */
2682 }
2683 
2684 void
rf_RewriteParityThread(RF_Raid_t * raidPtr)2685 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2686 {
2687 	int retcode;
2688 	int s;
2689 
2690 	s = splbio();
2691 	raidPtr->parity_rewrite_in_progress = 1;
2692 	retcode = rf_RewriteParity(raidPtr);
2693 	if (retcode) {
2694 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2695 	} else {
2696 		/*
2697 		 * Set the clean bit !  If we shutdown correctly,
2698 		 * the clean bit on each component label will get
2699 		 * set.
2700 		 */
2701 		raidPtr->parity_good = RF_RAID_CLEAN;
2702 	}
2703 	raidPtr->parity_rewrite_in_progress = 0;
2704 	splx(s);
2705 
2706 	/* Anyone waiting for us to stop ?  If so, inform them... */
2707 	if (raidPtr->waitShutdown) {
2708 		wakeup(&raidPtr->parity_rewrite_in_progress);
2709 	}
2710 
2711 	/* That's all... */
2712 	kthread_exit(0);	/* Does not return. */
2713 }
2714 
2715 
2716 void
rf_CopybackThread(RF_Raid_t * raidPtr)2717 rf_CopybackThread(RF_Raid_t *raidPtr)
2718 {
2719 	int s;
2720 
2721 	s = splbio();
2722 	raidPtr->copyback_in_progress = 1;
2723 	rf_CopybackReconstructedData(raidPtr);
2724 	raidPtr->copyback_in_progress = 0;
2725 	splx(s);
2726 
2727 	/* That's all... */
2728 	kthread_exit(0);	/* Does not return. */
2729 }
2730 
2731 
2732 void
rf_ReconstructInPlaceThread(struct rf_recon_req * req)2733 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2734 {
2735 	int retcode;
2736 	int s;
2737 	RF_Raid_t *raidPtr;
2738 
2739 	s = splbio();
2740 	raidPtr = req->raidPtr;
2741 	raidPtr->recon_in_progress = 1;
2742 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2743 	RF_Free(req, sizeof(*req));
2744 	raidPtr->recon_in_progress = 0;
2745 	splx(s);
2746 
2747 	/* That's all... */
2748 	kthread_exit(0);	/* Does not return. */
2749 }
2750 
2751 
2752 RF_AutoConfig_t *
rf_find_raid_components(void)2753 rf_find_raid_components(void)
2754 {
2755 #ifdef	RAID_AUTOCONFIG
2756 	int major;
2757 	struct vnode *vp;
2758 	struct disklabel label;
2759 	struct device *dv;
2760 	dev_t dev;
2761 	int error;
2762 	int i;
2763 	int good_one;
2764 	RF_ComponentLabel_t *clabel;
2765 	RF_AutoConfig_t *ac;
2766 #endif	/* RAID_AUTOCONFIG */
2767 	RF_AutoConfig_t *ac_list;
2768 
2769 
2770 	/* Initialize the AutoConfig list. */
2771 	ac_list = NULL;
2772 
2773 #ifdef	RAID_AUTOCONFIG
2774 	/* We begin by trolling through *all* the devices on the system. */
2775 
2776 	for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) {
2777 
2778 		/* We are only interested in disks... */
2779 		if (dv->dv_class != DV_DISK)
2780 			continue;
2781 
2782 		/* We don't care about floppies... */
2783 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2784 			continue;
2785 		}
2786 
2787 		/*
2788 		 * We need to find the device_name_to_block_device_major
2789 		 * stuff.
2790 		 */
2791 		major = findblkmajor(dv);
2792 
2793 		/* Get a vnode for the raw partition of this disk. */
2794 
2795 		dev = MAKEDISKDEV(major, dv->dv_unit, RAW_PART);
2796 		if (bdevvp(dev, &vp))
2797 			panic("RAID can't alloc vnode");
2798 
2799 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2800 
2801 		if (error) {
2802 			/*
2803 			 * "Who cares."  Continue looking
2804 			 * for something that exists.
2805 			 */
2806 			vput(vp);
2807 			continue;
2808 		}
2809 
2810 		/* Ok, the disk exists.  Go get the disklabel. */
2811 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2812 				  FREAD, NOCRED, 0);
2813 		if (error) {
2814 			/*
2815 			 * XXX can't happen - open() would
2816 			 * have errored out (or faked up one).
2817 			 */
2818 			printf("can't get label for dev %s%c (%d)!?!?\n",
2819 			    dv->dv_xname, 'a' + RAW_PART, error);
2820 		}
2821 
2822 		/*
2823 		 * We don't need this any more.  We'll allocate it again
2824 		 * a little later if we really do...
2825 		 */
2826 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2827 		vrele(vp);
2828 
2829 		for (i=0; i < label.d_npartitions; i++) {
2830 			/*
2831 			 * We only support partitions marked as RAID.
2832 			 * Except on sparc/sparc64 where FS_RAID doesn't
2833 			 * fit in the SUN disklabel and we need to look
2834 			 * into each and every partition !!!
2835 			 */
2836 #if !defined(__sparc__) && !defined(__sparc64__) && !defined(__sun3__)
2837 			if (label.d_partitions[i].p_fstype != FS_RAID)
2838 				continue;
2839 #else /* !__sparc__ && !__sparc64__ && !__sun3__ */
2840 			if (label.d_partitions[i].p_fstype == FS_SWAP ||
2841 			    label.d_partitions[i].p_fstype == FS_UNUSED)
2842 				continue;
2843 #endif /* __sparc__ || __sparc64__ || __sun3__ */
2844 
2845 			dev = MAKEDISKDEV(major, dv->dv_unit, i);
2846 			if (bdevvp(dev, &vp))
2847 				panic("RAID can't alloc vnode");
2848 
2849 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2850 			if (error) {
2851 				/* Whatever... */
2852 				vput(vp);
2853 				continue;
2854 			}
2855 
2856 			good_one = 0;
2857 
2858 			clabel = (RF_ComponentLabel_t *)
2859 				malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME,
2860 				    M_NOWAIT);
2861 			if (clabel == NULL) {
2862 				/* XXX CLEANUP HERE. */
2863 				printf("RAID auto config: out of memory!\n");
2864 				return(NULL); /* XXX probably should panic ? */
2865 			}
2866 
2867 			if (!raidread_component_label(dev, vp, clabel)) {
2868 				/* Got the label.  Does it look reasonable ? */
2869 				if (rf_reasonable_label(clabel) &&
2870 				    (clabel->partitionSize <=
2871 				     label.d_partitions[i].p_size)) {
2872 #ifdef	RAIDDEBUG
2873 					printf("Component on: %s%c: %d\n",
2874 					    dv->dv_xname, 'a'+i,
2875 					    label.d_partitions[i].p_size);
2876 					rf_print_component_label(clabel);
2877 #endif	/* RAIDDEBUG */
2878 					/*
2879 					 * If it's reasonable, add it,
2880 					 * else ignore it.
2881 					 */
2882 					ac = (RF_AutoConfig_t *)
2883 						malloc(sizeof(RF_AutoConfig_t),
2884 						    M_RAIDFRAME, M_NOWAIT);
2885 					if (ac == NULL) {
2886 						/* XXX should panic ??? */
2887 						return(NULL);
2888 					}
2889 
2890 					snprintf(ac->devname,
2891 						 sizeof ac->devname, "%s%c",
2892 						 dv->dv_xname, 'a'+i);
2893 					ac->dev = dev;
2894 					ac->vp = vp;
2895 					ac->clabel = clabel;
2896 					ac->next = ac_list;
2897 					ac_list = ac;
2898 					good_one = 1;
2899 				}
2900 			}
2901 			if (!good_one) {
2902 				/* Cleanup. */
2903 				free(clabel, M_RAIDFRAME);
2904 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2905 				vrele(vp);
2906 			}
2907 		}
2908 	}
2909 #endif	/* RAID_AUTOCONFIG */
2910 	return(ac_list);
2911 }
2912 
2913 #ifdef	RAID_AUTOCONFIG
2914 int
rf_reasonable_label(RF_ComponentLabel_t * clabel)2915 rf_reasonable_label(RF_ComponentLabel_t *clabel)
2916 {
2917 
2918 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2919 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2920 	    ((clabel->clean == RF_RAID_CLEAN) ||
2921 	     (clabel->clean == RF_RAID_DIRTY)) &&
2922 	    clabel->row >=0 &&
2923 	    clabel->column >= 0 &&
2924 	    clabel->num_rows > 0 &&
2925 	    clabel->num_columns > 0 &&
2926 	    clabel->row < clabel->num_rows &&
2927 	    clabel->column < clabel->num_columns &&
2928 	    clabel->blockSize > 0 &&
2929 	    clabel->numBlocks > 0) {
2930 		/* Label looks reasonable enough... */
2931 		return(1);
2932 	}
2933 	return(0);
2934 }
2935 #endif	/* RAID_AUTOCONFIG */
2936 
2937 void
rf_print_component_label(RF_ComponentLabel_t * clabel)2938 rf_print_component_label(RF_ComponentLabel_t *clabel)
2939 {
2940 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2941 	    clabel->row, clabel->column, clabel->num_rows, clabel->num_columns);
2942 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
2943 	    clabel->version, clabel->serial_number, clabel->mod_counter);
2944 	printf("   Clean: %s Status: %d\n", clabel->clean ? "Yes" : "No",
2945 	    clabel->status );
2946 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2947 	    clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2948 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
2949 	    (char) clabel->parityConfig, clabel->blockSize, clabel->numBlocks);
2950 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2951 	printf("   Contains root partition: %s\n", clabel->root_partition ?
2952 	    "Yes" : "No" );
2953 	printf("   Last configured as: raid%d\n", clabel->last_unit );
2954 #if 0
2955 	printf("   Config order: %d\n", clabel->config_order);
2956 #endif
2957 }
2958 
2959 RF_ConfigSet_t *
rf_create_auto_sets(RF_AutoConfig_t * ac_list)2960 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2961 {
2962 	RF_AutoConfig_t *ac;
2963 	RF_ConfigSet_t *config_sets;
2964 	RF_ConfigSet_t *cset;
2965 	RF_AutoConfig_t *ac_next;
2966 
2967 
2968 	config_sets = NULL;
2969 
2970 	/*
2971 	 * Go through the AutoConfig list, and figure out which components
2972 	 * belong to what sets.
2973 	 */
2974 	ac = ac_list;
2975 	while(ac!=NULL) {
2976 		/*
2977 		 * We're going to putz with ac->next, so save it here
2978 		 * for use at the end of the loop.
2979 		 */
2980 		ac_next = ac->next;
2981 
2982 		if (config_sets == NULL) {
2983 			/* We will need at least this one... */
2984 			config_sets = (RF_ConfigSet_t *)
2985 				malloc(sizeof(RF_ConfigSet_t), M_RAIDFRAME,
2986 				    M_NOWAIT);
2987 			if (config_sets == NULL) {
2988 				panic("rf_create_auto_sets: No memory!");
2989 			}
2990 			/* This one is easy :) */
2991 			config_sets->ac = ac;
2992 			config_sets->next = NULL;
2993 			config_sets->rootable = 0;
2994 			ac->next = NULL;
2995 		} else {
2996 			/* Which set does this component fit into ? */
2997 			cset = config_sets;
2998 			while(cset!=NULL) {
2999 				if (rf_does_it_fit(cset, ac)) {
3000 					/* Looks like it matches... */
3001 					ac->next = cset->ac;
3002 					cset->ac = ac;
3003 					break;
3004 				}
3005 				cset = cset->next;
3006 			}
3007 			if (cset==NULL) {
3008 				/* Didn't find a match above... new set... */
3009 				cset = (RF_ConfigSet_t *)
3010 					malloc(sizeof(RF_ConfigSet_t),
3011 					    M_RAIDFRAME, M_NOWAIT);
3012 				if (cset == NULL) {
3013 					panic("rf_create_auto_sets: No memory!");
3014 				}
3015 				cset->ac = ac;
3016 				ac->next = NULL;
3017 				cset->next = config_sets;
3018 				cset->rootable = 0;
3019 				config_sets = cset;
3020 			}
3021 		}
3022 		ac = ac_next;
3023 	}
3024 
3025 
3026 	return(config_sets);
3027 }
3028 
3029 int
rf_does_it_fit(RF_ConfigSet_t * cset,RF_AutoConfig_t * ac)3030 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3031 {
3032 	RF_ComponentLabel_t *clabel1, *clabel2;
3033 
3034 	/*
3035 	 * If this one matches the *first* one in the set, that's good
3036 	 * enough, since the other members of the set would have been
3037 	 * through here too...
3038 	 */
3039 	/*
3040 	 * Note that we are not checking partitionSize here...
3041 	 *
3042 	 * Note that we are also not checking the mod_counters here.
3043 	 * If everything else matches except the mod_counter, that's
3044 	 * good enough for this test.  We will deal with the mod_counters
3045 	 * a little later in the autoconfiguration process.
3046 	 *
3047 	 *  (clabel1->mod_counter == clabel2->mod_counter) &&
3048 	 *
3049 	 * The reason we don't check for this is that failed disks
3050 	 * will have lower modification counts.  If those disks are
3051 	 * not added to the set they used to belong to, then they will
3052 	 * form their own set, which may result in 2 different sets,
3053 	 * for example, competing to be configured at raid0, and
3054 	 * perhaps competing to be the root filesystem set.  If the
3055 	 * wrong ones get configured, or both attempt to become /,
3056 	 * weird behaviour and or serious lossage will occur.  Thus we
3057 	 * need to bring them into the fold here, and kick them out at
3058 	 * a later point.
3059 	 */
3060 
3061 	clabel1 = cset->ac->clabel;
3062 	clabel2 = ac->clabel;
3063 	if ((clabel1->version == clabel2->version) &&
3064 	    (clabel1->serial_number == clabel2->serial_number) &&
3065 	    (clabel1->num_rows == clabel2->num_rows) &&
3066 	    (clabel1->num_columns == clabel2->num_columns) &&
3067 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
3068 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3069 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3070 	    (clabel1->parityConfig == clabel2->parityConfig) &&
3071 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3072 	    (clabel1->blockSize == clabel2->blockSize) &&
3073 	    (clabel1->numBlocks == clabel2->numBlocks) &&
3074 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
3075 	    (clabel1->root_partition == clabel2->root_partition) &&
3076 	    (clabel1->last_unit == clabel2->last_unit) &&
3077 	    (clabel1->config_order == clabel2->config_order)) {
3078 		/* If it get's here, it almost *has* to be a match. */
3079 	} else {
3080 		/* It's not consistent with somebody in the set...  Punt. */
3081 		return(0);
3082 	}
3083 	/* All was fine.. It must fit... */
3084 	return(1);
3085 }
3086 
3087 int
rf_have_enough_components(RF_ConfigSet_t * cset)3088 rf_have_enough_components(RF_ConfigSet_t *cset)
3089 {
3090 	RF_AutoConfig_t *ac;
3091 	RF_AutoConfig_t *auto_config;
3092 	RF_ComponentLabel_t *clabel;
3093 	int r,c;
3094 	int num_rows;
3095 	int num_cols;
3096 	int num_missing;
3097 	int mod_counter;
3098 	int mod_counter_found;
3099 	int even_pair_failed;
3100 	char parity_type;
3101 
3102 
3103 	/*
3104 	 * Check to see that we have enough 'live' components
3105 	 * of this set.  If so, we can configure it if necessary.
3106 	 */
3107 
3108 	num_rows = cset->ac->clabel->num_rows;
3109 	num_cols = cset->ac->clabel->num_columns;
3110 	parity_type = cset->ac->clabel->parityConfig;
3111 
3112 	/* XXX Check for duplicate components !?!?!? */
3113 
3114 	/* Determine what the mod_counter is supposed to be for this set. */
3115 
3116 	mod_counter_found = 0;
3117 	mod_counter = 0;
3118 	ac = cset->ac;
3119 	while(ac!=NULL) {
3120 		if (mod_counter_found==0) {
3121 			mod_counter = ac->clabel->mod_counter;
3122 			mod_counter_found = 1;
3123 		} else {
3124 			if (ac->clabel->mod_counter > mod_counter) {
3125 				mod_counter = ac->clabel->mod_counter;
3126 			}
3127 		}
3128 		ac = ac->next;
3129 	}
3130 
3131 	num_missing = 0;
3132 	auto_config = cset->ac;
3133 
3134 	for(r=0; r<num_rows; r++) {
3135 		even_pair_failed = 0;
3136 		for(c=0; c<num_cols; c++) {
3137 			ac = auto_config;
3138 			while(ac!=NULL) {
3139 				if ((ac->clabel->row == r) &&
3140 				    (ac->clabel->column == c) &&
3141 				    (ac->clabel->mod_counter == mod_counter)) {
3142 					/* It's this one... */
3143 #ifdef	RAIDDEBUG
3144 					printf("Found: %s at %d,%d\n",
3145 					    ac->devname,r,c);
3146 #endif	/* RAIDDEBUG */
3147 					break;
3148 				}
3149 				ac=ac->next;
3150 			}
3151 			if (ac==NULL) {
3152 				/* Didn't find one here! */
3153 				/*
3154 				 * Special case for RAID 1, especially
3155 				 * where there are more than 2
3156 				 * components (where RAIDframe treats
3157 				 * things a little differently :( )
3158 				 */
3159 				if (parity_type == '1') {
3160 					if (c%2 == 0) {	/* Even component. */
3161 						even_pair_failed = 1;
3162 					} else {	/*
3163 							 * Odd component.
3164 							 * If we're failed,
3165 							 * and so is the even
3166 							 * component, it's
3167 							 * "Good Night, Charlie"
3168 							 */
3169 						if (even_pair_failed == 1) {
3170 							return(0);
3171 						}
3172 					}
3173 				} else {
3174 					/* Normal accounting. */
3175 					num_missing++;
3176 				}
3177 			}
3178 			if ((parity_type == '1') && (c%2 == 1)) {
3179 				/*
3180 				 * Just did an even component, and we didn't
3181 				 * bail... Reset the even_pair_failed flag,
3182 				 * and go on to the next component...
3183 				 */
3184 				even_pair_failed = 0;
3185 			}
3186 		}
3187 	}
3188 
3189 	clabel = cset->ac->clabel;
3190 
3191 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3192 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3193 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
3194 		/* XXX This needs to be made *much* more general. */
3195 		/* Too many failures. */
3196 		return(0);
3197 	}
3198 	/*
3199 	 * Otherwise, all is well, and we've got enough to take a kick
3200 	 * at autoconfiguring this set.
3201 	 */
3202 	return(1);
3203 }
3204 
3205 void
rf_create_configuration(RF_AutoConfig_t * ac,RF_Config_t * config,RF_Raid_t * raidPtr)3206 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3207     RF_Raid_t *raidPtr)
3208 {
3209 	RF_ComponentLabel_t *clabel;
3210 	int i;
3211 
3212 	clabel = ac->clabel;
3213 
3214 	/* 1. Fill in the common stuff. */
3215 	config->numRow = clabel->num_rows;
3216 	config->numCol = clabel->num_columns;
3217 	config->numSpare = 0;	/* XXX Should this be set here ? */
3218 	config->sectPerSU = clabel->sectPerSU;
3219 	config->SUsPerPU = clabel->SUsPerPU;
3220 	config->SUsPerRU = clabel->SUsPerRU;
3221 	config->parityConfig = clabel->parityConfig;
3222 	/* XXX... */
3223 	strlcpy(config->diskQueueType,"fifo", sizeof config->diskQueueType);
3224 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3225 	config->layoutSpecificSize = 0;	/* XXX ?? */
3226 
3227 	while(ac!=NULL) {
3228 		/*
3229 		 * row/col values will be in range due to the checks
3230 		 * in reasonable_label().
3231 		 */
3232 		strlcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3233 		    ac->devname,
3234 		    sizeof config->devnames[ac->clabel->row][ac->clabel->column]);
3235 		ac = ac->next;
3236 	}
3237 
3238 	for(i=0;i<RF_MAXDBGV;i++) {
3239 		config->debugVars[i][0] = 0;
3240 	}
3241 
3242 #ifdef	RAID_DEBUG_ALL
3243 
3244 #ifdef	RF_DBG_OPTION
3245 #undef	RF_DBG_OPTION
3246 #endif	/* RF_DBG_OPTION */
3247 
3248 #ifdef	__STDC__
3249 #define	RF_DBG_OPTION(_option_,_val_)	do {				\
3250 	snprintf(&(config->debugVars[i++][0]), 50, "%s %ld",		\
3251 	    #_option_, _val_);						\
3252 } while (0)
3253 #else	/* __STDC__ */
3254 #define	RF_DBG_OPTION(_option_,_val_)	do {				\
3255 	snprintf(&(config->debugVars[i++][0]), 50, "%s %ld",		\
3256 	    "/**/_option_/**/", _val_);					\
3257 } while (0)
3258 #endif	/* __STDC__ */
3259 
3260 	i = 0;
3261 
3262 /*	RF_DBG_OPTION(accessDebug, 0);					*/
3263 /*	RF_DBG_OPTION(accessTraceBufSize, 0);				*/
3264 	RF_DBG_OPTION(cscanDebug, 1);		/* Debug CSCAN sorting.	*/
3265 	RF_DBG_OPTION(dagDebug, 1);
3266 /*	RF_DBG_OPTION(debugPrintUseBuffer, 0);				*/
3267 	RF_DBG_OPTION(degDagDebug, 1);
3268 	RF_DBG_OPTION(disableAsyncAccs, 1);
3269 	RF_DBG_OPTION(diskDebug, 1);
3270 	RF_DBG_OPTION(enableAtomicRMW, 0);
3271 		/*
3272 		 * This debug variable enables locking of the
3273 		 * disk arm during small-write operations.
3274 		 * Setting this variable to anything other than
3275 		 * 0 will result in deadlock.  (wvcii)
3276 		 */
3277 	RF_DBG_OPTION(engineDebug, 1);
3278 	RF_DBG_OPTION(fifoDebug, 1);		/* Debug fifo queueing.	*/
3279 /*	RF_DBG_OPTION(floatingRbufDebug, 1);				*/
3280 /*	RF_DBG_OPTION(forceHeadSepLimit, -1);				*/
3281 /*	RF_DBG_OPTION(forceNumFloatingReconBufs, -1);			*/
3282 		/*
3283 		 * Wire down the number of extra recon buffers
3284 		 * to use.
3285 		 */
3286 /*	RF_DBG_OPTION(keepAccTotals, 1);				*/
3287 		/* Turn on keep_acc_totals. */
3288 	RF_DBG_OPTION(lockTableSize, RF_DEFAULT_LOCK_TABLE_SIZE);
3289 	RF_DBG_OPTION(mapDebug, 1);
3290 	RF_DBG_OPTION(maxNumTraces, -1);
3291 
3292 /*	RF_DBG_OPTION(memChunkDebug, 1);				*/
3293 /*	RF_DBG_OPTION(memDebug, 1);					*/
3294 /*	RF_DBG_OPTION(memDebugAddress, 1);				*/
3295 /*	RF_DBG_OPTION(numBufsToAccumulate, 1);				*/
3296 		/*
3297 		 * Number of buffers to accumulate before
3298 		 * doing XOR.
3299 		 */
3300 	RF_DBG_OPTION(prReconSched, 0);
3301 	RF_DBG_OPTION(printDAGsDebug, 1);
3302 	RF_DBG_OPTION(printStatesDebug, 1);
3303 	RF_DBG_OPTION(protectedSectors, 64L);
3304 		/*
3305 		 * Number of sectors at start of disk to exclude
3306 		 * from RAID address space.
3307 		 */
3308 	RF_DBG_OPTION(pssDebug, 1);
3309 	RF_DBG_OPTION(queueDebug, 1);
3310 	RF_DBG_OPTION(quiesceDebug, 1);
3311 	RF_DBG_OPTION(raidSectorOffset, 0);
3312 		/*
3313 		 * Value added to all incoming sectors to debug
3314 		 * alignment problems.
3315 		 */
3316 	RF_DBG_OPTION(reconDebug, 1);
3317 	RF_DBG_OPTION(reconbufferDebug, 1);
3318 	RF_DBG_OPTION(scanDebug, 1);		/* Debug SCAN sorting.	*/
3319 	RF_DBG_OPTION(showXorCallCounts, 0);
3320 		/* Show n-way Xor call counts. */
3321 	RF_DBG_OPTION(shutdownDebug, 1);	/* Show shutdown calls.	*/
3322 	RF_DBG_OPTION(sizePercentage, 100);
3323 	RF_DBG_OPTION(sstfDebug, 1);
3324 		/* Turn on debugging info for sstf queueing. */
3325 	RF_DBG_OPTION(stripeLockDebug, 1);
3326 	RF_DBG_OPTION(suppressLocksAndLargeWrites, 0);
3327 	RF_DBG_OPTION(suppressTraceDelays, 0);
3328 	RF_DBG_OPTION(useMemChunks, 1);
3329 	RF_DBG_OPTION(validateDAGDebug, 1);
3330 	RF_DBG_OPTION(validateVisitedDebug, 1);
3331 		/* XXX turn to zero by default ? */
3332 	RF_DBG_OPTION(verifyParityDebug, 1);
3333 	RF_DBG_OPTION(debugKernelAccess, 1);
3334 		/* DoAccessKernel debugging. */
3335 
3336 #if RF_INCLUDE_PARITYLOGGING > 0
3337 	RF_DBG_OPTION(forceParityLogReint, 0);
3338 	RF_DBG_OPTION(numParityRegions, 0);
3339 		/* Number of regions in the array. */
3340 	RF_DBG_OPTION(numReintegrationThreads, 1);
3341 	RF_DBG_OPTION(parityLogDebug, 1);
3342 		/* If nonzero, enables debugging of parity logging. */
3343 	RF_DBG_OPTION(totalInCoreLogCapacity, 1024 * 1024);
3344 		/* Target bytes available for in-core logs. */
3345 #endif	/* RF_INCLUDE_PARITYLOGGING > 0 */
3346 
3347 #endif	/* RAID_DEBUG_ALL */
3348 }
3349 
3350 int
rf_set_autoconfig(RF_Raid_t * raidPtr,int new_value)3351 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3352 {
3353 	RF_ComponentLabel_t clabel;
3354 	struct vnode *vp;
3355 	dev_t dev;
3356 	int row, column;
3357 
3358 	raidPtr->autoconfigure = new_value;
3359 	for(row=0; row<raidPtr->numRow; row++) {
3360 		for(column=0; column<raidPtr->numCol; column++) {
3361 			if (raidPtr->Disks[row][column].status ==
3362 			    rf_ds_optimal) {
3363 				dev = raidPtr->Disks[row][column].dev;
3364 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
3365 				raidread_component_label(dev, vp, &clabel);
3366 				clabel.autoconfigure = new_value;
3367 				raidwrite_component_label(dev, vp, &clabel);
3368 			}
3369 		}
3370 	}
3371 	return(new_value);
3372 }
3373 
3374 int
rf_set_rootpartition(RF_Raid_t * raidPtr,int new_value)3375 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3376 {
3377 	RF_ComponentLabel_t clabel;
3378 	struct vnode *vp;
3379 	dev_t dev;
3380 	int row, column;
3381 
3382 	raidPtr->root_partition = new_value;
3383 	for(row=0; row<raidPtr->numRow; row++) {
3384 		for(column=0; column<raidPtr->numCol; column++) {
3385 			if (raidPtr->Disks[row][column].status ==
3386 			    rf_ds_optimal) {
3387 				dev = raidPtr->Disks[row][column].dev;
3388 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
3389 				raidread_component_label(dev, vp, &clabel);
3390 				clabel.root_partition = new_value;
3391 				raidwrite_component_label(dev, vp, &clabel);
3392 			}
3393 		}
3394 	}
3395 	return(new_value);
3396 }
3397 
3398 void
rf_release_all_vps(RF_ConfigSet_t * cset)3399 rf_release_all_vps(RF_ConfigSet_t *cset)
3400 {
3401 	RF_AutoConfig_t *ac;
3402 
3403 	ac = cset->ac;
3404 	while(ac!=NULL) {
3405 		/* Close the vp, and give it back. */
3406 		if (ac->vp) {
3407 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3408 			vrele(ac->vp);
3409 			ac->vp = NULL;
3410 		}
3411 		ac = ac->next;
3412 	}
3413 }
3414 
3415 
3416 void
rf_cleanup_config_set(RF_ConfigSet_t * cset)3417 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3418 {
3419 	RF_AutoConfig_t *ac;
3420 	RF_AutoConfig_t *next_ac;
3421 
3422 	ac = cset->ac;
3423 	while(ac!=NULL) {
3424 		next_ac = ac->next;
3425 		/* Nuke the label. */
3426 		free(ac->clabel, M_RAIDFRAME);
3427 		/* Cleanup the config structure. */
3428 		free(ac, M_RAIDFRAME);
3429 		/* "next..." */
3430 		ac = next_ac;
3431 	}
3432 	/* And, finally, nuke the config set. */
3433 	free(cset, M_RAIDFRAME);
3434 }
3435 
3436 
3437 void
raid_init_component_label(RF_Raid_t * raidPtr,RF_ComponentLabel_t * clabel)3438 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3439 {
3440 	/* Current version number. */
3441 	clabel->version = RF_COMPONENT_LABEL_VERSION;
3442 	clabel->serial_number = raidPtr->serial_number;
3443 	clabel->mod_counter = raidPtr->mod_counter;
3444 	clabel->num_rows = raidPtr->numRow;
3445 	clabel->num_columns = raidPtr->numCol;
3446 	clabel->clean = RF_RAID_DIRTY;	/* Not clean. */
3447 	clabel->status = rf_ds_optimal;	/* "It's good !" */
3448 
3449 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3450 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3451 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3452 
3453 	clabel->blockSize = raidPtr->bytesPerSector;
3454 	clabel->numBlocks = raidPtr->sectorsPerDisk;
3455 
3456 	/* XXX Not portable. */
3457 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3458 	clabel->maxOutstanding = raidPtr->maxOutstanding;
3459 	clabel->autoconfigure = raidPtr->autoconfigure;
3460 	clabel->root_partition = raidPtr->root_partition;
3461 	clabel->last_unit = raidPtr->raidid;
3462 	clabel->config_order = raidPtr->config_order;
3463 }
3464 
3465 int
rf_auto_config_set(RF_ConfigSet_t * cset,int * unit)3466 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3467 {
3468 	RF_Raid_t *raidPtr;
3469 	RF_Config_t *config;
3470 	int raidID;
3471 	int retcode;
3472 
3473 	db1_printf(("RAID autoconfigure\n"));
3474 
3475 	retcode = 0;
3476 	*unit = -1;
3477 
3478 	/* 1. Create a config structure. */
3479 
3480 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t), M_RAIDFRAME,
3481 	    M_NOWAIT);
3482 	if (config==NULL) {
3483 		printf("Out of mem!?!?\n");
3484 				/* XXX Do something more intelligent here. */
3485 		return(1);
3486 	}
3487 
3488 	memset(config, 0, sizeof(RF_Config_t));
3489 
3490 	/* XXX raidID needs to be set correctly... */
3491 
3492 	/*
3493 	 * 2. Figure out what RAID ID this one is supposed to live at.
3494 	 * See if we can get the same RAID dev that it was configured
3495 	 * on last time...
3496 	 */
3497 
3498 	raidID = cset->ac->clabel->last_unit;
3499 	if ((raidID < 0) || (raidID >= numraid)) {
3500 		/* Let's not wander off into lala land. */
3501 		raidID = numraid - 1;
3502 	}
3503 	if (raidPtrs[raidID]->valid != 0) {
3504 
3505 		/*
3506 		 * Nope...  Go looking for an alternative...
3507 		 * Start high so we don't immediately use raid0 if that's
3508 		 * not taken.
3509 		 */
3510 
3511 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
3512 			if (raidPtrs[raidID]->valid == 0) {
3513 				/* We can use this one ! */
3514 				break;
3515 			}
3516 		}
3517 	}
3518 
3519 	if (raidID < 0) {
3520 		/* Punt... */
3521 		printf("Unable to auto configure this set!\n");
3522 		printf("(Out of RAID devs!)\n");
3523 		return(1);
3524 	}
3525 	raidPtr = raidPtrs[raidID];
3526 
3527 	/* XXX All this stuff should be done SOMEWHERE ELSE ! */
3528 	raidPtr->raidid = raidID;
3529 	raidPtr->openings = RAIDOUTSTANDING;
3530 
3531 	/* 3. Build the configuration structure. */
3532 	rf_create_configuration(cset->ac, config, raidPtr);
3533 
3534 	/* 4. Do the configuration. */
3535 	retcode = rf_Configure(raidPtr, config, cset->ac);
3536 
3537 	if (retcode == 0) {
3538 
3539 		raidinit(raidPtrs[raidID]);
3540 
3541 		rf_markalldirty(raidPtrs[raidID]);
3542 		raidPtrs[raidID]->autoconfigure = 1; /* XXX Do this here ? */
3543 		if (cset->ac->clabel->root_partition==1) {
3544 			/*
3545 			 * Everything configured just fine.  Make a note
3546 			 * that this set is eligible to be root.
3547 			 */
3548 			cset->rootable = 1;
3549 			/* XXX Do this here ? */
3550 			raidPtrs[raidID]->root_partition = 1;
3551 		}
3552 	}
3553 
3554 	printf(": (%s) total number of sectors is %lu (%lu MB)%s\n",
3555 	    (raidPtrs[raidID]->Layout).map->configName,
3556 	    (unsigned long) raidPtrs[raidID]->totalSectors,
3557 	    (unsigned long) (raidPtrs[raidID]->totalSectors / 1024 *
3558 	    (1 << raidPtrs[raidID]->logBytesPerSector) / 1024),
3559 	    raidPtrs[raidID]->root_partition ? " as root" : "");
3560 
3561 	/* 5. Cleanup. */
3562 	free(config, M_RAIDFRAME);
3563 
3564 	*unit = raidID;
3565 	return(retcode);
3566 }
3567 
3568 void
rf_disk_unbusy(RF_RaidAccessDesc_t * desc)3569 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3570 {
3571 	struct buf *bp;
3572 
3573 	bp = (struct buf *)desc->bp;
3574 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3575 			    (bp->b_bcount - bp->b_resid),
3576 			    (bp->b_flags & B_READ));
3577 }
3578