1 /*	$OpenBSD: rf_raid1.c,v 1.5 2002/12/16 07:01:04 tdeval Exp $	*/
2 /*	$NetBSD: rf_raid1.c,v 1.5 2000/01/08 22:57:30 oster Exp $	*/
3 
4 /*
5  * Copyright (c) 1995 Carnegie-Mellon University.
6  * All rights reserved.
7  *
8  * Author: William V. Courtright II
9  *
10  * Permission to use, copy, modify and distribute this software and
11  * its documentation is hereby granted, provided that both the copyright
12  * notice and this permission notice appear in all copies of the
13  * software, derivative works or modified versions, and any portions
14  * thereof, and that both notices appear in supporting documentation.
15  *
16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19  *
20  * Carnegie Mellon requests users of this software to return to
21  *
22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
23  *  School of Computer Science
24  *  Carnegie Mellon University
25  *  Pittsburgh PA 15213-3890
26  *
27  * any improvements or extensions that they make and grant Carnegie the
28  * rights to redistribute these changes.
29  */
30 
31 /*****************************************************************************
32  *
33  * rf_raid1.c -- Implements RAID Level 1.
34  *
35  *****************************************************************************/
36 
37 #include "rf_raid.h"
38 #include "rf_raid1.h"
39 #include "rf_dag.h"
40 #include "rf_dagffrd.h"
41 #include "rf_dagffwr.h"
42 #include "rf_dagdegrd.h"
43 #include "rf_dagutils.h"
44 #include "rf_dagfuncs.h"
45 #include "rf_diskqueue.h"
46 #include "rf_general.h"
47 #include "rf_utils.h"
48 #include "rf_parityscan.h"
49 #include "rf_mcpair.h"
50 #include "rf_layout.h"
51 #include "rf_map.h"
52 #include "rf_engine.h"
53 #include "rf_reconbuffer.h"
54 
55 typedef struct RF_Raid1ConfigInfo_s {
56 	RF_RowCol_t **stripeIdentifier;
57 } RF_Raid1ConfigInfo_t;
58 
59 
60 /* Start of day code specific to RAID level 1. */
61 int
rf_ConfigureRAID1(RF_ShutdownList_t ** listp,RF_Raid_t * raidPtr,RF_Config_t * cfgPtr)62 rf_ConfigureRAID1(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
63     RF_Config_t *cfgPtr)
64 {
65 	RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
66 	RF_Raid1ConfigInfo_t *info;
67 	RF_RowCol_t i;
68 
69 	/* Create a RAID level 1 configuration structure. */
70 	RF_MallocAndAdd(info, sizeof(RF_Raid1ConfigInfo_t),
71 	    (RF_Raid1ConfigInfo_t *), raidPtr->cleanupList);
72 	if (info == NULL)
73 		return (ENOMEM);
74 	layoutPtr->layoutSpecificInfo = (void *) info;
75 
76 	/* ... and fill it in. */
77 	info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol / 2, 2,
78 	    raidPtr->cleanupList);
79 	if (info->stripeIdentifier == NULL)
80 		return (ENOMEM);
81 	for (i = 0; i < (raidPtr->numCol / 2); i++) {
82 		info->stripeIdentifier[i][0] = (2 * i);
83 		info->stripeIdentifier[i][1] = (2 * i) + 1;
84 	}
85 
86 	RF_ASSERT(raidPtr->numRow == 1);
87 
88 	/*
89 	 * This implementation of RAID level 1 uses one row of numCol disks
90 	 * and allows multiple (numCol / 2) stripes per row. A stripe
91 	 * consists of a single data unit and a single parity (mirror) unit.
92 	 * Stripe id = raidAddr / stripeUnitSize.
93 	 */
94 	raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk *
95 	    (raidPtr->numCol / 2) * layoutPtr->sectorsPerStripeUnit;
96 	layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk *
97 	    (raidPtr->numCol / 2);
98 	layoutPtr->dataSectorsPerStripe = layoutPtr->sectorsPerStripeUnit;
99 	layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit <<
100 	    raidPtr->logBytesPerSector;
101 	layoutPtr->numDataCol = 1;
102 	layoutPtr->numParityCol = 1;
103 	return (0);
104 }
105 
106 
107 /*
108  * Returns the physical disk location of the primary copy in the mirror pair.
109  */
110 void
rf_MapSectorRAID1(RF_Raid_t * raidPtr,RF_RaidAddr_t raidSector,RF_RowCol_t * row,RF_RowCol_t * col,RF_SectorNum_t * diskSector,int remap)111 rf_MapSectorRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
112     RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
113 {
114 	RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
115 	RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
116 
117 	*row = 0;
118 	*col = 2 * mirrorPair;
119 	*diskSector = ((SUID / (raidPtr->numCol / 2)) *
120 	     raidPtr->Layout.sectorsPerStripeUnit) +
121 	    (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
122 }
123 
124 
125 /*
126  * Map Parity.
127  *
128  * Returns the physical disk location of the secondary copy in the mirror
129  * pair.
130  */
131 void
rf_MapParityRAID1(RF_Raid_t * raidPtr,RF_RaidAddr_t raidSector,RF_RowCol_t * row,RF_RowCol_t * col,RF_SectorNum_t * diskSector,int remap)132 rf_MapParityRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
133     RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
134 {
135 	RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
136 	RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
137 
138 	*row = 0;
139 	*col = (2 * mirrorPair) + 1;
140 
141 	*diskSector = ((SUID / (raidPtr->numCol / 2)) *
142 	     raidPtr->Layout.sectorsPerStripeUnit) +
143 	    (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
144 }
145 
146 
147 /*
148  * IdentifyStripeRAID1
149  *
150  * Returns a list of disks for a given redundancy group.
151  */
152 void
rf_IdentifyStripeRAID1(RF_Raid_t * raidPtr,RF_RaidAddr_t addr,RF_RowCol_t ** diskids,RF_RowCol_t * outRow)153 rf_IdentifyStripeRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
154     RF_RowCol_t **diskids, RF_RowCol_t *outRow)
155 {
156 	RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout,
157 	    addr);
158 	RF_Raid1ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo;
159 	RF_ASSERT(stripeID >= 0);
160 	RF_ASSERT(addr >= 0);
161 	*outRow = 0;
162 	*diskids = info->stripeIdentifier[stripeID % (raidPtr->numCol / 2)];
163 	RF_ASSERT(*diskids);
164 }
165 
166 
167 /*
168  * MapSIDToPSIDRAID1
169  *
170  * Maps a logical stripe to a stripe in the redundant array.
171  */
172 void
rf_MapSIDToPSIDRAID1(RF_RaidLayout_t * layoutPtr,RF_StripeNum_t stripeID,RF_StripeNum_t * psID,RF_ReconUnitNum_t * which_ru)173 rf_MapSIDToPSIDRAID1(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID,
174     RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru)
175 {
176 	*which_ru = 0;
177 	*psID = stripeID;
178 }
179 
180 
181 
182 /*****************************************************************************
183  * Select a graph to perform a single-stripe access.
184  *
185  * Parameters:	raidPtr	   - Description of the physical array.
186  *		type	   - Type of operation (read or write) requested.
187  *		asmap	   - Logical & physical addresses for this access.
188  *		createFunc - Name of function to use to create the graph.
189  *****************************************************************************/
190 
191 void
rf_RAID1DagSelect(RF_Raid_t * raidPtr,RF_IoType_t type,RF_AccessStripeMap_t * asmap,RF_VoidFuncPtr * createFunc)192 rf_RAID1DagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
193     RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc)
194 {
195 	RF_RowCol_t frow, fcol, or, oc;
196 	RF_PhysDiskAddr_t *failedPDA;
197 	int prior_recon;
198 	RF_RowStatus_t rstat;
199 	RF_SectorNum_t oo;
200 
201 
202 	RF_ASSERT(RF_IO_IS_R_OR_W(type));
203 
204 	if (asmap->numDataFailed + asmap->numParityFailed > 1) {
205 		RF_ERRORMSG("Multiple disks failed in a single group !"
206 		            "  Aborting I/O operation.\n");
207 		*createFunc = NULL;
208 		return;
209 	}
210 	if (asmap->numDataFailed + asmap->numParityFailed) {
211 		/*
212 		 * We've got a fault. Re-map to spare space, iff applicable.
213 		 * Shouldn't the arch-independent code do this for us ?
214 		 * Anyway, it turns out if we don't do this here, then when
215 		 * we're reconstructing, writes go only to the surviving
216 		 * original disk, and aren't reflected on the reconstructed
217 		 * spare. Oops. --jimz
218 		 */
219 		failedPDA = asmap->failedPDAs[0];
220 		frow = failedPDA->row;
221 		fcol = failedPDA->col;
222 		rstat = raidPtr->status[frow];
223 		prior_recon = (rstat == rf_rs_reconfigured) || (
224 		    (rstat == rf_rs_reconstructing) ?
225 		    rf_CheckRUReconstructed(raidPtr->reconControl[frow]
226 		     ->reconMap, failedPDA->startSector) : 0);
227 		if (prior_recon) {
228 			or = frow;
229 			oc = fcol;
230 			oo = failedPDA->startSector;
231 			/*
232 			 * If we did distributed sparing, we'd monkey with
233 			 * that here.
234 			 * But we don't, so we'll.
235 			 */
236 			failedPDA->row = raidPtr->Disks[frow][fcol].spareRow;
237 			failedPDA->col = raidPtr->Disks[frow][fcol].spareCol;
238 			/*
239 			 * Redirect other components, iff necessary. This looks
240 			 * pretty suspicious to me, but it's what the raid5
241 			 * DAG select does.
242 			 */
243 			if (asmap->parityInfo->next) {
244 				if (failedPDA == asmap->parityInfo) {
245 					failedPDA->next->row = failedPDA->row;
246 					failedPDA->next->col = failedPDA->col;
247 				} else {
248 					if (failedPDA ==
249 					    asmap->parityInfo->next) {
250 						asmap->parityInfo->row =
251 						    failedPDA->row;
252 						asmap->parityInfo->col =
253 						    failedPDA->col;
254 					}
255 				}
256 			}
257 			if (rf_dagDebug || rf_mapDebug) {
258 				printf("raid%d: Redirected type '%c' r %d c %d"
259 				    " o %ld -> r %d c %d o %ld.\n",
260 				    raidPtr->raidid, type, or, oc, (long) oo,
261 				    failedPDA->row, failedPDA->col,
262 				    (long) failedPDA->startSector);
263 			}
264 			asmap->numDataFailed = asmap->numParityFailed = 0;
265 		}
266 	}
267 	if (type == RF_IO_TYPE_READ) {
268 		if (asmap->numDataFailed == 0)
269 			*createFunc = (RF_VoidFuncPtr)
270 			    rf_CreateMirrorIdleReadDAG;
271 		else
272 			*createFunc = (RF_VoidFuncPtr)
273 			    rf_CreateRaidOneDegradedReadDAG;
274 	} else {
275 		*createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
276 	}
277 }
278 
279 int
rf_VerifyParityRAID1(RF_Raid_t * raidPtr,RF_RaidAddr_t raidAddr,RF_PhysDiskAddr_t * parityPDA,int correct_it,RF_RaidAccessFlags_t flags)280 rf_VerifyParityRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr,
281     RF_PhysDiskAddr_t *parityPDA, int correct_it, RF_RaidAccessFlags_t flags)
282 {
283 	int nbytes, bcount, stripeWidth, ret, i, j, nbad, *bbufs;
284 	RF_DagNode_t *blockNode, *unblockNode, *wrBlock;
285 	RF_DagHeader_t *rd_dag_h, *wr_dag_h;
286 	RF_AccessStripeMapHeader_t *asm_h;
287 	RF_AllocListElem_t *allocList;
288 	RF_AccTraceEntry_t tracerec;
289 	RF_ReconUnitNum_t which_ru;
290 	RF_RaidLayout_t *layoutPtr;
291 	RF_AccessStripeMap_t *aasm;
292 	RF_SectorCount_t nsector;
293 	RF_RaidAddr_t startAddr;
294 	char *buf, *buf1, *buf2;
295 	RF_PhysDiskAddr_t *pda;
296 	RF_StripeNum_t psID;
297 	RF_MCPair_t *mcpair;
298 
299 	layoutPtr = &raidPtr->Layout;
300 	startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
301 	nsector = parityPDA->numSector;
302 	nbytes = rf_RaidAddressToByte(raidPtr, nsector);
303 	psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru);
304 
305 	asm_h = NULL;
306 	rd_dag_h = wr_dag_h = NULL;
307 	mcpair = NULL;
308 
309 	ret = RF_PARITY_COULD_NOT_VERIFY;
310 
311 	rf_MakeAllocList(allocList);
312 	if (allocList == NULL)
313 		return (RF_PARITY_COULD_NOT_VERIFY);
314 	mcpair = rf_AllocMCPair();
315 	if (mcpair == NULL)
316 		goto done;
317 	RF_ASSERT(layoutPtr->numDataCol == layoutPtr->numParityCol);
318 	stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
319 	bcount = nbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol);
320 	RF_MallocAndAdd(buf, bcount, (char *), allocList);
321 	if (buf == NULL)
322 		goto done;
323 	if (rf_verifyParityDebug) {
324 		printf("raid%d: RAID1 parity verify: buf=%lx bcount=%d"
325 		    " (%lx - %lx).\n", raidPtr->raidid, (long) buf, bcount,
326 		    (long) buf, (long) buf + bcount);
327 	}
328 	/*
329 	 * Generate a DAG that will read the entire stripe- then we can
330 	 * just compare data chunks versus "parity" chunks.
331 	 */
332 
333 	rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, nbytes, buf,
334 	    rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", allocList, flags,
335 	    RF_IO_NORMAL_PRIORITY);
336 	if (rd_dag_h == NULL)
337 		goto done;
338 	blockNode = rd_dag_h->succedents[0];
339 	unblockNode = blockNode->succedents[0]->succedents[0];
340 
341 	/*
342 	 * Map the access to physical disk addresses (PDAs)- this will
343 	 * get us both a list of data addresses, and "parity" addresses
344 	 * (which are really mirror copies).
345 	 */
346 	asm_h = rf_MapAccess(raidPtr, startAddr,
347 	    layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP);
348 	aasm = asm_h->stripeMap;
349 
350 	buf1 = buf;
351 	/*
352 	 * Loop through the data blocks, setting up read nodes for each.
353 	 */
354 	for (pda = aasm->physInfo, i = 0; i < layoutPtr->numDataCol;
355 	     i++, pda = pda->next) {
356 		RF_ASSERT(pda);
357 
358 		rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
359 
360 		RF_ASSERT(pda->numSector != 0);
361 		if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
362 			/* cannot verify parity with dead disk */
363 			goto done;
364 		}
365 		pda->bufPtr = buf1;
366 		blockNode->succedents[i]->params[0].p = pda;
367 		blockNode->succedents[i]->params[1].p = buf1;
368 		blockNode->succedents[i]->params[2].v = psID;
369 		blockNode->succedents[i]->params[3].v =
370 		    RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
371 		buf1 += nbytes;
372 	}
373 	RF_ASSERT(pda == NULL);
374 	/*
375 	 * Keep i, buf1 running.
376 	 *
377 	 * Loop through parity blocks, setting up read nodes for each.
378 	 */
379 	for (pda = aasm->parityInfo; i < layoutPtr->numDataCol +
380 	     layoutPtr->numParityCol; i++, pda = pda->next) {
381 		RF_ASSERT(pda);
382 		rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
383 		RF_ASSERT(pda->numSector != 0);
384 		if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
385 			/* Cannot verify parity with dead disk. */
386 			goto done;
387 		}
388 		pda->bufPtr = buf1;
389 		blockNode->succedents[i]->params[0].p = pda;
390 		blockNode->succedents[i]->params[1].p = buf1;
391 		blockNode->succedents[i]->params[2].v = psID;
392 		blockNode->succedents[i]->params[3].v =
393 		    RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
394 		buf1 += nbytes;
395 	}
396 	RF_ASSERT(pda == NULL);
397 
398 	bzero((char *) &tracerec, sizeof(tracerec));
399 	rd_dag_h->tracerec = &tracerec;
400 
401 	if (rf_verifyParityDebug > 1) {
402 		printf("raid%d: RAID1 parity verify read dag:\n",
403 		    raidPtr->raidid);
404 		rf_PrintDAGList(rd_dag_h);
405 	}
406 	RF_LOCK_MUTEX(mcpair->mutex);
407 	mcpair->flag = 0;
408 	rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
409 	    (void *) mcpair);
410 	while (mcpair->flag == 0) {
411 		RF_WAIT_MCPAIR(mcpair);
412 	}
413 	RF_UNLOCK_MUTEX(mcpair->mutex);
414 
415 	if (rd_dag_h->status != rf_enable) {
416 		RF_ERRORMSG("Unable to verify raid1 parity:"
417 		            " can't read stripe.\n");
418 		ret = RF_PARITY_COULD_NOT_VERIFY;
419 		goto done;
420 	}
421 	/*
422 	 * buf1 is the beginning of the data blocks chunk.
423 	 * buf2 is the beginning of the parity blocks chunk.
424 	 */
425 	buf1 = buf;
426 	buf2 = buf + (nbytes * layoutPtr->numDataCol);
427 	ret = RF_PARITY_OKAY;
428 	/*
429 	 * bbufs is "bad bufs"- an array whose entries are the data
430 	 * column numbers where we had miscompares. (That is, column 0
431 	 * and column 1 of the array are mirror copies, and are considered
432 	 * "data column 0" for this purpose).
433 	 */
434 	RF_MallocAndAdd(bbufs, layoutPtr->numParityCol * sizeof(int), (int *),
435 	    allocList);
436 	nbad = 0;
437 	/*
438 	 * Check data vs "parity" (mirror copy).
439 	 */
440 	for (i = 0; i < layoutPtr->numDataCol; i++) {
441 		if (rf_verifyParityDebug) {
442 			printf("raid%d: RAID1 parity verify %d bytes: i=%d"
443 			    " buf1=%lx buf2=%lx buf=%lx.\n", raidPtr->raidid,
444 			    nbytes, i, (long) buf1, (long) buf2, (long) buf);
445 		}
446 		ret = bcmp(buf1, buf2, nbytes);
447 		if (ret) {
448 			if (rf_verifyParityDebug > 1) {
449 				for (j = 0; j < nbytes; j++) {
450 					if (buf1[j] != buf2[j])
451 						break;
452 				}
453 				printf("psid=%ld j=%d\n", (long) psID, j);
454 				printf("buf1 %02x %02x %02x %02x %02x\n",
455 				    buf1[0] & 0xff, buf1[1] & 0xff,
456 				    buf1[2] & 0xff, buf1[3] & 0xff,
457 				    buf1[4] & 0xff);
458 				printf("buf2 %02x %02x %02x %02x %02x\n",
459 				    buf2[0] & 0xff, buf2[1] & 0xff,
460 				    buf2[2] & 0xff, buf2[3] & 0xff,
461 				    buf2[4] & 0xff);
462 			}
463 			if (rf_verifyParityDebug) {
464 				printf("raid%d: RAID1: found bad parity,"
465 				    " i=%d.\n", raidPtr->raidid, i);
466 			}
467 			/*
468 			 * Parity is bad. Keep track of which columns were bad.
469 			 */
470 			if (bbufs)
471 				bbufs[nbad] = i;
472 			nbad++;
473 			ret = RF_PARITY_BAD;
474 		}
475 		buf1 += nbytes;
476 		buf2 += nbytes;
477 	}
478 
479 	if ((ret != RF_PARITY_OKAY) && correct_it) {
480 		ret = RF_PARITY_COULD_NOT_CORRECT;
481 		if (rf_verifyParityDebug) {
482 			printf("raid%d: RAID1 parity verify:"
483 			    " parity not correct.\n", raidPtr->raidid);
484 		}
485 		if (bbufs == NULL)
486 			goto done;
487 		/*
488 		 * Make a DAG with one write node for each bad unit. We'll
489 		 * simply write the contents of the data unit onto the parity
490 		 * unit for correction. (It's possible that the mirror copy
491 		 * was the correct copy, and that we're spooging good data by
492 		 * writing bad over it, but there's no way we can know that.
493 		 */
494 		wr_dag_h = rf_MakeSimpleDAG(raidPtr, nbad, nbytes, buf,
495 		    rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", allocList,
496 		    flags, RF_IO_NORMAL_PRIORITY);
497 		if (wr_dag_h == NULL)
498 			goto done;
499 		wrBlock = wr_dag_h->succedents[0];
500 		/*
501 		 * Fill in a write node for each bad compare.
502 		 */
503 		for (i = 0; i < nbad; i++) {
504 			j = i + layoutPtr->numDataCol;
505 			pda = blockNode->succedents[j]->params[0].p;
506 			pda->bufPtr = blockNode->succedents[i]->params[1].p;
507 			wrBlock->succedents[i]->params[0].p = pda;
508 			wrBlock->succedents[i]->params[1].p = pda->bufPtr;
509 			wrBlock->succedents[i]->params[2].v = psID;
510 			wrBlock->succedents[0]->params[3].v =
511 			    RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0,
512 			     which_ru);
513 		}
514 		bzero((char *) &tracerec, sizeof(tracerec));
515 		wr_dag_h->tracerec = &tracerec;
516 		if (rf_verifyParityDebug > 1) {
517 			printf("Parity verify write dag:\n");
518 			rf_PrintDAGList(wr_dag_h);
519 		}
520 		RF_LOCK_MUTEX(mcpair->mutex);
521 		mcpair->flag = 0;
522 		/* Fire off the write DAG. */
523 		rf_DispatchDAG(wr_dag_h, (void (*) (void *))
524 		    rf_MCPairWakeupFunc, (void *) mcpair);
525 		while (!mcpair->flag) {
526 			RF_WAIT_COND(mcpair->cond, mcpair->mutex);
527 		}
528 		RF_UNLOCK_MUTEX(mcpair->mutex);
529 		if (wr_dag_h->status != rf_enable) {
530 			RF_ERRORMSG("Unable to correct RAID1 parity in"
531 			            " VerifyParity.\n");
532 			goto done;
533 		}
534 		ret = RF_PARITY_CORRECTED;
535 	}
536 done:
537 	/*
538 	 * All done. We might've gotten here without doing part of the function,
539 	 * so cleanup what we have to and return our running status.
540 	 */
541 	if (asm_h)
542 		rf_FreeAccessStripeMap(asm_h);
543 	if (rd_dag_h)
544 		rf_FreeDAG(rd_dag_h);
545 	if (wr_dag_h)
546 		rf_FreeDAG(wr_dag_h);
547 	if (mcpair)
548 		rf_FreeMCPair(mcpair);
549 	rf_FreeAllocList(allocList);
550 	if (rf_verifyParityDebug) {
551 		printf("raid%d: RAID1 parity verify, returning %d.\n",
552 		    raidPtr->raidid, ret);
553 	}
554 	return (ret);
555 }
556 
557 int
rf_SubmitReconBufferRAID1(RF_ReconBuffer_t * rbuf,int keep_it,int use_committed)558 rf_SubmitReconBufferRAID1(
559     RF_ReconBuffer_t	*rbuf,		/* The recon buffer to submit. */
560     int			 keep_it,	/*
561 					 * Whether we can keep this buffer
562 					 * or we have to return it ?
563 					 */
564     int			 use_committed	/*
565 					 * Whether to use a committed or
566 					 * an available recon buffer ?
567 					 */
568 )
569 {
570 	RF_ReconParityStripeStatus_t *pssPtr;
571 	RF_ReconCtrl_t *reconCtrlPtr;
572 	RF_RaidLayout_t *layoutPtr;
573 	int retcode, created;
574 	RF_CallbackDesc_t *cb, *p;
575 	RF_ReconBuffer_t *t;
576 	RF_Raid_t *raidPtr;
577 	caddr_t ta;
578 
579 	retcode = 0;
580 	created = 0;
581 
582 	raidPtr = rbuf->raidPtr;
583 	layoutPtr = &raidPtr->Layout;
584 	reconCtrlPtr = raidPtr->reconControl[rbuf->row];
585 
586 	RF_ASSERT(rbuf);
587 	RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
588 
589 	if (rf_reconbufferDebug) {
590 		printf("raid%d: RAID1 reconbuffer submission r%d c%d psid %ld"
591 		    " ru%d (failed offset %ld).\n", raidPtr->raidid, rbuf->row,
592 		    rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru,
593 		    (long) rbuf->failedDiskSectorOffset);
594 	}
595 	if (rf_reconDebug) {
596 		printf("RAID1 reconbuffer submit psid %ld buf %lx\n",
597 		    (long) rbuf->parityStripeID, (long) rbuf->buffer);
598 		printf("RAID1 psid %ld   %02x %02x %02x %02x %02x\n",
599 		    (long) rbuf->parityStripeID,
600 		    rbuf->buffer[0], rbuf->buffer[1], rbuf->buffer[2],
601 		    rbuf->buffer[3], rbuf->buffer[4]);
602 	}
603 	RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
604 
605 	RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
606 
607 	pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable,
608 	    rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
609 	RF_ASSERT(pssPtr);	/*
610 				 * If it didn't exist, we wouldn't have gotten
611 				 * an rbuf for it.
612 				 */
613 
614 	/*
615 	 * Since this is simple mirroring, the first submission for a stripe
616 	 * is also treated as the last.
617 	 */
618 
619 	t = NULL;
620 	if (keep_it) {
621 		if (rf_reconbufferDebug) {
622 			printf("raid%d: RAID1 rbuf submission: keeping rbuf.\n",
623 			    raidPtr->raidid);
624 		}
625 		t = rbuf;
626 	} else {
627 		if (use_committed) {
628 			if (rf_reconbufferDebug) {
629 				printf("raid%d: RAID1 rbuf submission:"
630 				    " using committed rbuf.\n",
631 				    raidPtr->raidid);
632 			}
633 			t = reconCtrlPtr->committedRbufs;
634 			RF_ASSERT(t);
635 			reconCtrlPtr->committedRbufs = t->next;
636 			t->next = NULL;
637 		} else
638 			if (reconCtrlPtr->floatingRbufs) {
639 				if (rf_reconbufferDebug) {
640 					printf("raid%d: RAID1 rbuf submission:"
641 					    " using floating rbuf.\n",
642 					    raidPtr->raidid);
643 				}
644 				t = reconCtrlPtr->floatingRbufs;
645 				reconCtrlPtr->floatingRbufs = t->next;
646 				t->next = NULL;
647 			}
648 	}
649 	if (t == NULL) {
650 		if (rf_reconbufferDebug) {
651 			printf("raid%d: RAID1 rbuf submission:"
652 			    " waiting for rbuf.\n", raidPtr->raidid);
653 		}
654 		RF_ASSERT((keep_it == 0) && (use_committed == 0));
655 		raidPtr->procsInBufWait++;
656 		if ((raidPtr->procsInBufWait == (raidPtr->numCol - 1))
657 		    && (raidPtr->numFullReconBuffers == 0)) {
658 			/* ruh-ro */
659 			RF_ERRORMSG("Buffer wait deadlock.\n");
660 			rf_PrintPSStatusTable(raidPtr, rbuf->row);
661 			RF_PANIC();
662 		}
663 		pssPtr->flags |= RF_PSS_BUFFERWAIT;
664 		cb = rf_AllocCallbackDesc();
665 		cb->row = rbuf->row;
666 		cb->col = rbuf->col;
667 		cb->callbackArg.v = rbuf->parityStripeID;
668 		cb->callbackArg2.v = rbuf->which_ru;
669 		cb->next = NULL;
670 		if (reconCtrlPtr->bufferWaitList == NULL) {
671 			/* We are the wait list- lucky us. */
672 			reconCtrlPtr->bufferWaitList = cb;
673 		} else {
674 			/* Append to wait list. */
675 			for (p = reconCtrlPtr->bufferWaitList; p->next;
676 			     p = p->next);
677 			p->next = cb;
678 		}
679 		retcode = 1;
680 		goto out;
681 	}
682 	if (t != rbuf) {
683 		t->row = rbuf->row;
684 		t->col = reconCtrlPtr->fcol;
685 		t->parityStripeID = rbuf->parityStripeID;
686 		t->which_ru = rbuf->which_ru;
687 		t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
688 		t->spRow = rbuf->spRow;
689 		t->spCol = rbuf->spCol;
690 		t->spOffset = rbuf->spOffset;
691 		/* Swap buffers. DANCE ! */
692 		ta = t->buffer;
693 		t->buffer = rbuf->buffer;
694 		rbuf->buffer = ta;
695 	}
696 	/*
697 	 * Use the rbuf we've been given as the target.
698 	 */
699 	RF_ASSERT(pssPtr->rbuf == NULL);
700 	pssPtr->rbuf = t;
701 
702 	t->count = 1;
703 	/*
704 	 * Below, we use 1 for numDataCol (which is equal to the count in the
705 	 * previous line), so we'll always be done.
706 	 */
707 	rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, 1);
708 
709 out:
710 	RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
711 	RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
712 	if (rf_reconbufferDebug) {
713 		printf("raid%d: RAID1 rbuf submission: returning %d.\n",
714 		    raidPtr->raidid, retcode);
715 	}
716 	return (retcode);
717 }
718