1 /* $OpenBSD: rf_raid1.c,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */
2 /* $NetBSD: rf_raid1.c,v 1.5 2000/01/08 22:57:30 oster Exp $ */
3
4 /*
5 * Copyright (c) 1995 Carnegie-Mellon University.
6 * All rights reserved.
7 *
8 * Author: William V. Courtright II
9 *
10 * Permission to use, copy, modify and distribute this software and
11 * its documentation is hereby granted, provided that both the copyright
12 * notice and this permission notice appear in all copies of the
13 * software, derivative works or modified versions, and any portions
14 * thereof, and that both notices appear in supporting documentation.
15 *
16 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19 *
20 * Carnegie Mellon requests users of this software to return to
21 *
22 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
23 * School of Computer Science
24 * Carnegie Mellon University
25 * Pittsburgh PA 15213-3890
26 *
27 * any improvements or extensions that they make and grant Carnegie the
28 * rights to redistribute these changes.
29 */
30
31 /*****************************************************************************
32 *
33 * rf_raid1.c -- Implements RAID Level 1.
34 *
35 *****************************************************************************/
36
37 #include "rf_raid.h"
38 #include "rf_raid1.h"
39 #include "rf_dag.h"
40 #include "rf_dagffrd.h"
41 #include "rf_dagffwr.h"
42 #include "rf_dagdegrd.h"
43 #include "rf_dagutils.h"
44 #include "rf_dagfuncs.h"
45 #include "rf_diskqueue.h"
46 #include "rf_general.h"
47 #include "rf_utils.h"
48 #include "rf_parityscan.h"
49 #include "rf_mcpair.h"
50 #include "rf_layout.h"
51 #include "rf_map.h"
52 #include "rf_engine.h"
53 #include "rf_reconbuffer.h"
54
55 typedef struct RF_Raid1ConfigInfo_s {
56 RF_RowCol_t **stripeIdentifier;
57 } RF_Raid1ConfigInfo_t;
58
59
60 /* Start of day code specific to RAID level 1. */
61 int
rf_ConfigureRAID1(RF_ShutdownList_t ** listp,RF_Raid_t * raidPtr,RF_Config_t * cfgPtr)62 rf_ConfigureRAID1(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
63 RF_Config_t *cfgPtr)
64 {
65 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
66 RF_Raid1ConfigInfo_t *info;
67 RF_RowCol_t i;
68
69 /* Create a RAID level 1 configuration structure. */
70 RF_MallocAndAdd(info, sizeof(RF_Raid1ConfigInfo_t),
71 (RF_Raid1ConfigInfo_t *), raidPtr->cleanupList);
72 if (info == NULL)
73 return (ENOMEM);
74 layoutPtr->layoutSpecificInfo = (void *) info;
75
76 /* ... and fill it in. */
77 info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol / 2, 2,
78 raidPtr->cleanupList);
79 if (info->stripeIdentifier == NULL)
80 return (ENOMEM);
81 for (i = 0; i < (raidPtr->numCol / 2); i++) {
82 info->stripeIdentifier[i][0] = (2 * i);
83 info->stripeIdentifier[i][1] = (2 * i) + 1;
84 }
85
86 RF_ASSERT(raidPtr->numRow == 1);
87
88 /*
89 * This implementation of RAID level 1 uses one row of numCol disks
90 * and allows multiple (numCol / 2) stripes per row. A stripe
91 * consists of a single data unit and a single parity (mirror) unit.
92 * Stripe id = raidAddr / stripeUnitSize.
93 */
94 raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk *
95 (raidPtr->numCol / 2) * layoutPtr->sectorsPerStripeUnit;
96 layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk *
97 (raidPtr->numCol / 2);
98 layoutPtr->dataSectorsPerStripe = layoutPtr->sectorsPerStripeUnit;
99 layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit <<
100 raidPtr->logBytesPerSector;
101 layoutPtr->numDataCol = 1;
102 layoutPtr->numParityCol = 1;
103 return (0);
104 }
105
106
107 /*
108 * Returns the physical disk location of the primary copy in the mirror pair.
109 */
110 void
rf_MapSectorRAID1(RF_Raid_t * raidPtr,RF_RaidAddr_t raidSector,RF_RowCol_t * row,RF_RowCol_t * col,RF_SectorNum_t * diskSector,int remap)111 rf_MapSectorRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
112 RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
113 {
114 RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
115 RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
116
117 *row = 0;
118 *col = 2 * mirrorPair;
119 *diskSector = ((SUID / (raidPtr->numCol / 2)) *
120 raidPtr->Layout.sectorsPerStripeUnit) +
121 (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
122 }
123
124
125 /*
126 * Map Parity.
127 *
128 * Returns the physical disk location of the secondary copy in the mirror
129 * pair.
130 */
131 void
rf_MapParityRAID1(RF_Raid_t * raidPtr,RF_RaidAddr_t raidSector,RF_RowCol_t * row,RF_RowCol_t * col,RF_SectorNum_t * diskSector,int remap)132 rf_MapParityRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
133 RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
134 {
135 RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
136 RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
137
138 *row = 0;
139 *col = (2 * mirrorPair) + 1;
140
141 *diskSector = ((SUID / (raidPtr->numCol / 2)) *
142 raidPtr->Layout.sectorsPerStripeUnit) +
143 (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
144 }
145
146
147 /*
148 * IdentifyStripeRAID1
149 *
150 * Returns a list of disks for a given redundancy group.
151 */
152 void
rf_IdentifyStripeRAID1(RF_Raid_t * raidPtr,RF_RaidAddr_t addr,RF_RowCol_t ** diskids,RF_RowCol_t * outRow)153 rf_IdentifyStripeRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
154 RF_RowCol_t **diskids, RF_RowCol_t *outRow)
155 {
156 RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout,
157 addr);
158 RF_Raid1ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo;
159 RF_ASSERT(stripeID >= 0);
160 RF_ASSERT(addr >= 0);
161 *outRow = 0;
162 *diskids = info->stripeIdentifier[stripeID % (raidPtr->numCol / 2)];
163 RF_ASSERT(*diskids);
164 }
165
166
167 /*
168 * MapSIDToPSIDRAID1
169 *
170 * Maps a logical stripe to a stripe in the redundant array.
171 */
172 void
rf_MapSIDToPSIDRAID1(RF_RaidLayout_t * layoutPtr,RF_StripeNum_t stripeID,RF_StripeNum_t * psID,RF_ReconUnitNum_t * which_ru)173 rf_MapSIDToPSIDRAID1(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID,
174 RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru)
175 {
176 *which_ru = 0;
177 *psID = stripeID;
178 }
179
180
181
182 /*****************************************************************************
183 * Select a graph to perform a single-stripe access.
184 *
185 * Parameters: raidPtr - Description of the physical array.
186 * type - Type of operation (read or write) requested.
187 * asmap - Logical & physical addresses for this access.
188 * createFunc - Name of function to use to create the graph.
189 *****************************************************************************/
190
191 void
rf_RAID1DagSelect(RF_Raid_t * raidPtr,RF_IoType_t type,RF_AccessStripeMap_t * asmap,RF_VoidFuncPtr * createFunc)192 rf_RAID1DagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
193 RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc)
194 {
195 RF_RowCol_t frow, fcol, or, oc;
196 RF_PhysDiskAddr_t *failedPDA;
197 int prior_recon;
198 RF_RowStatus_t rstat;
199 RF_SectorNum_t oo;
200
201
202 RF_ASSERT(RF_IO_IS_R_OR_W(type));
203
204 if (asmap->numDataFailed + asmap->numParityFailed > 1) {
205 RF_ERRORMSG("Multiple disks failed in a single group !"
206 " Aborting I/O operation.\n");
207 *createFunc = NULL;
208 return;
209 }
210 if (asmap->numDataFailed + asmap->numParityFailed) {
211 /*
212 * We've got a fault. Re-map to spare space, iff applicable.
213 * Shouldn't the arch-independent code do this for us ?
214 * Anyway, it turns out if we don't do this here, then when
215 * we're reconstructing, writes go only to the surviving
216 * original disk, and aren't reflected on the reconstructed
217 * spare. Oops. --jimz
218 */
219 failedPDA = asmap->failedPDAs[0];
220 frow = failedPDA->row;
221 fcol = failedPDA->col;
222 rstat = raidPtr->status[frow];
223 prior_recon = (rstat == rf_rs_reconfigured) || (
224 (rstat == rf_rs_reconstructing) ?
225 rf_CheckRUReconstructed(raidPtr->reconControl[frow]
226 ->reconMap, failedPDA->startSector) : 0);
227 if (prior_recon) {
228 or = frow;
229 oc = fcol;
230 oo = failedPDA->startSector;
231 /*
232 * If we did distributed sparing, we'd monkey with
233 * that here.
234 * But we don't, so we'll.
235 */
236 failedPDA->row = raidPtr->Disks[frow][fcol].spareRow;
237 failedPDA->col = raidPtr->Disks[frow][fcol].spareCol;
238 /*
239 * Redirect other components, iff necessary. This looks
240 * pretty suspicious to me, but it's what the raid5
241 * DAG select does.
242 */
243 if (asmap->parityInfo->next) {
244 if (failedPDA == asmap->parityInfo) {
245 failedPDA->next->row = failedPDA->row;
246 failedPDA->next->col = failedPDA->col;
247 } else {
248 if (failedPDA ==
249 asmap->parityInfo->next) {
250 asmap->parityInfo->row =
251 failedPDA->row;
252 asmap->parityInfo->col =
253 failedPDA->col;
254 }
255 }
256 }
257 if (rf_dagDebug || rf_mapDebug) {
258 printf("raid%d: Redirected type '%c' r %d c %d"
259 " o %ld -> r %d c %d o %ld.\n",
260 raidPtr->raidid, type, or, oc, (long) oo,
261 failedPDA->row, failedPDA->col,
262 (long) failedPDA->startSector);
263 }
264 asmap->numDataFailed = asmap->numParityFailed = 0;
265 }
266 }
267 if (type == RF_IO_TYPE_READ) {
268 if (asmap->numDataFailed == 0)
269 *createFunc = (RF_VoidFuncPtr)
270 rf_CreateMirrorIdleReadDAG;
271 else
272 *createFunc = (RF_VoidFuncPtr)
273 rf_CreateRaidOneDegradedReadDAG;
274 } else {
275 *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
276 }
277 }
278
279 int
rf_VerifyParityRAID1(RF_Raid_t * raidPtr,RF_RaidAddr_t raidAddr,RF_PhysDiskAddr_t * parityPDA,int correct_it,RF_RaidAccessFlags_t flags)280 rf_VerifyParityRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr,
281 RF_PhysDiskAddr_t *parityPDA, int correct_it, RF_RaidAccessFlags_t flags)
282 {
283 int nbytes, bcount, stripeWidth, ret, i, j, nbad, *bbufs;
284 RF_DagNode_t *blockNode, *unblockNode, *wrBlock;
285 RF_DagHeader_t *rd_dag_h, *wr_dag_h;
286 RF_AccessStripeMapHeader_t *asm_h;
287 RF_AllocListElem_t *allocList;
288 RF_AccTraceEntry_t tracerec;
289 RF_ReconUnitNum_t which_ru;
290 RF_RaidLayout_t *layoutPtr;
291 RF_AccessStripeMap_t *aasm;
292 RF_SectorCount_t nsector;
293 RF_RaidAddr_t startAddr;
294 char *buf, *buf1, *buf2;
295 RF_PhysDiskAddr_t *pda;
296 RF_StripeNum_t psID;
297 RF_MCPair_t *mcpair;
298
299 layoutPtr = &raidPtr->Layout;
300 startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
301 nsector = parityPDA->numSector;
302 nbytes = rf_RaidAddressToByte(raidPtr, nsector);
303 psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru);
304
305 asm_h = NULL;
306 rd_dag_h = wr_dag_h = NULL;
307 mcpair = NULL;
308
309 ret = RF_PARITY_COULD_NOT_VERIFY;
310
311 rf_MakeAllocList(allocList);
312 if (allocList == NULL)
313 return (RF_PARITY_COULD_NOT_VERIFY);
314 mcpair = rf_AllocMCPair();
315 if (mcpair == NULL)
316 goto done;
317 RF_ASSERT(layoutPtr->numDataCol == layoutPtr->numParityCol);
318 stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
319 bcount = nbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol);
320 RF_MallocAndAdd(buf, bcount, (char *), allocList);
321 if (buf == NULL)
322 goto done;
323 if (rf_verifyParityDebug) {
324 printf("raid%d: RAID1 parity verify: buf=%lx bcount=%d"
325 " (%lx - %lx).\n", raidPtr->raidid, (long) buf, bcount,
326 (long) buf, (long) buf + bcount);
327 }
328 /*
329 * Generate a DAG that will read the entire stripe- then we can
330 * just compare data chunks versus "parity" chunks.
331 */
332
333 rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, nbytes, buf,
334 rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", allocList, flags,
335 RF_IO_NORMAL_PRIORITY);
336 if (rd_dag_h == NULL)
337 goto done;
338 blockNode = rd_dag_h->succedents[0];
339 unblockNode = blockNode->succedents[0]->succedents[0];
340
341 /*
342 * Map the access to physical disk addresses (PDAs)- this will
343 * get us both a list of data addresses, and "parity" addresses
344 * (which are really mirror copies).
345 */
346 asm_h = rf_MapAccess(raidPtr, startAddr,
347 layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP);
348 aasm = asm_h->stripeMap;
349
350 buf1 = buf;
351 /*
352 * Loop through the data blocks, setting up read nodes for each.
353 */
354 for (pda = aasm->physInfo, i = 0; i < layoutPtr->numDataCol;
355 i++, pda = pda->next) {
356 RF_ASSERT(pda);
357
358 rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
359
360 RF_ASSERT(pda->numSector != 0);
361 if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
362 /* cannot verify parity with dead disk */
363 goto done;
364 }
365 pda->bufPtr = buf1;
366 blockNode->succedents[i]->params[0].p = pda;
367 blockNode->succedents[i]->params[1].p = buf1;
368 blockNode->succedents[i]->params[2].v = psID;
369 blockNode->succedents[i]->params[3].v =
370 RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
371 buf1 += nbytes;
372 }
373 RF_ASSERT(pda == NULL);
374 /*
375 * Keep i, buf1 running.
376 *
377 * Loop through parity blocks, setting up read nodes for each.
378 */
379 for (pda = aasm->parityInfo; i < layoutPtr->numDataCol +
380 layoutPtr->numParityCol; i++, pda = pda->next) {
381 RF_ASSERT(pda);
382 rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
383 RF_ASSERT(pda->numSector != 0);
384 if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
385 /* Cannot verify parity with dead disk. */
386 goto done;
387 }
388 pda->bufPtr = buf1;
389 blockNode->succedents[i]->params[0].p = pda;
390 blockNode->succedents[i]->params[1].p = buf1;
391 blockNode->succedents[i]->params[2].v = psID;
392 blockNode->succedents[i]->params[3].v =
393 RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
394 buf1 += nbytes;
395 }
396 RF_ASSERT(pda == NULL);
397
398 bzero((char *) &tracerec, sizeof(tracerec));
399 rd_dag_h->tracerec = &tracerec;
400
401 if (rf_verifyParityDebug > 1) {
402 printf("raid%d: RAID1 parity verify read dag:\n",
403 raidPtr->raidid);
404 rf_PrintDAGList(rd_dag_h);
405 }
406 RF_LOCK_MUTEX(mcpair->mutex);
407 mcpair->flag = 0;
408 rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
409 (void *) mcpair);
410 while (mcpair->flag == 0) {
411 RF_WAIT_MCPAIR(mcpair);
412 }
413 RF_UNLOCK_MUTEX(mcpair->mutex);
414
415 if (rd_dag_h->status != rf_enable) {
416 RF_ERRORMSG("Unable to verify raid1 parity:"
417 " can't read stripe.\n");
418 ret = RF_PARITY_COULD_NOT_VERIFY;
419 goto done;
420 }
421 /*
422 * buf1 is the beginning of the data blocks chunk.
423 * buf2 is the beginning of the parity blocks chunk.
424 */
425 buf1 = buf;
426 buf2 = buf + (nbytes * layoutPtr->numDataCol);
427 ret = RF_PARITY_OKAY;
428 /*
429 * bbufs is "bad bufs"- an array whose entries are the data
430 * column numbers where we had miscompares. (That is, column 0
431 * and column 1 of the array are mirror copies, and are considered
432 * "data column 0" for this purpose).
433 */
434 RF_MallocAndAdd(bbufs, layoutPtr->numParityCol * sizeof(int), (int *),
435 allocList);
436 nbad = 0;
437 /*
438 * Check data vs "parity" (mirror copy).
439 */
440 for (i = 0; i < layoutPtr->numDataCol; i++) {
441 if (rf_verifyParityDebug) {
442 printf("raid%d: RAID1 parity verify %d bytes: i=%d"
443 " buf1=%lx buf2=%lx buf=%lx.\n", raidPtr->raidid,
444 nbytes, i, (long) buf1, (long) buf2, (long) buf);
445 }
446 ret = bcmp(buf1, buf2, nbytes);
447 if (ret) {
448 if (rf_verifyParityDebug > 1) {
449 for (j = 0; j < nbytes; j++) {
450 if (buf1[j] != buf2[j])
451 break;
452 }
453 printf("psid=%ld j=%d\n", (long) psID, j);
454 printf("buf1 %02x %02x %02x %02x %02x\n",
455 buf1[0] & 0xff, buf1[1] & 0xff,
456 buf1[2] & 0xff, buf1[3] & 0xff,
457 buf1[4] & 0xff);
458 printf("buf2 %02x %02x %02x %02x %02x\n",
459 buf2[0] & 0xff, buf2[1] & 0xff,
460 buf2[2] & 0xff, buf2[3] & 0xff,
461 buf2[4] & 0xff);
462 }
463 if (rf_verifyParityDebug) {
464 printf("raid%d: RAID1: found bad parity,"
465 " i=%d.\n", raidPtr->raidid, i);
466 }
467 /*
468 * Parity is bad. Keep track of which columns were bad.
469 */
470 if (bbufs)
471 bbufs[nbad] = i;
472 nbad++;
473 ret = RF_PARITY_BAD;
474 }
475 buf1 += nbytes;
476 buf2 += nbytes;
477 }
478
479 if ((ret != RF_PARITY_OKAY) && correct_it) {
480 ret = RF_PARITY_COULD_NOT_CORRECT;
481 if (rf_verifyParityDebug) {
482 printf("raid%d: RAID1 parity verify:"
483 " parity not correct.\n", raidPtr->raidid);
484 }
485 if (bbufs == NULL)
486 goto done;
487 /*
488 * Make a DAG with one write node for each bad unit. We'll
489 * simply write the contents of the data unit onto the parity
490 * unit for correction. (It's possible that the mirror copy
491 * was the correct copy, and that we're spooging good data by
492 * writing bad over it, but there's no way we can know that.
493 */
494 wr_dag_h = rf_MakeSimpleDAG(raidPtr, nbad, nbytes, buf,
495 rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", allocList,
496 flags, RF_IO_NORMAL_PRIORITY);
497 if (wr_dag_h == NULL)
498 goto done;
499 wrBlock = wr_dag_h->succedents[0];
500 /*
501 * Fill in a write node for each bad compare.
502 */
503 for (i = 0; i < nbad; i++) {
504 j = i + layoutPtr->numDataCol;
505 pda = blockNode->succedents[j]->params[0].p;
506 pda->bufPtr = blockNode->succedents[i]->params[1].p;
507 wrBlock->succedents[i]->params[0].p = pda;
508 wrBlock->succedents[i]->params[1].p = pda->bufPtr;
509 wrBlock->succedents[i]->params[2].v = psID;
510 wrBlock->succedents[0]->params[3].v =
511 RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0,
512 which_ru);
513 }
514 bzero((char *) &tracerec, sizeof(tracerec));
515 wr_dag_h->tracerec = &tracerec;
516 if (rf_verifyParityDebug > 1) {
517 printf("Parity verify write dag:\n");
518 rf_PrintDAGList(wr_dag_h);
519 }
520 RF_LOCK_MUTEX(mcpair->mutex);
521 mcpair->flag = 0;
522 /* Fire off the write DAG. */
523 rf_DispatchDAG(wr_dag_h, (void (*) (void *))
524 rf_MCPairWakeupFunc, (void *) mcpair);
525 while (!mcpair->flag) {
526 RF_WAIT_COND(mcpair->cond, mcpair->mutex);
527 }
528 RF_UNLOCK_MUTEX(mcpair->mutex);
529 if (wr_dag_h->status != rf_enable) {
530 RF_ERRORMSG("Unable to correct RAID1 parity in"
531 " VerifyParity.\n");
532 goto done;
533 }
534 ret = RF_PARITY_CORRECTED;
535 }
536 done:
537 /*
538 * All done. We might've gotten here without doing part of the function,
539 * so cleanup what we have to and return our running status.
540 */
541 if (asm_h)
542 rf_FreeAccessStripeMap(asm_h);
543 if (rd_dag_h)
544 rf_FreeDAG(rd_dag_h);
545 if (wr_dag_h)
546 rf_FreeDAG(wr_dag_h);
547 if (mcpair)
548 rf_FreeMCPair(mcpair);
549 rf_FreeAllocList(allocList);
550 if (rf_verifyParityDebug) {
551 printf("raid%d: RAID1 parity verify, returning %d.\n",
552 raidPtr->raidid, ret);
553 }
554 return (ret);
555 }
556
557 int
rf_SubmitReconBufferRAID1(RF_ReconBuffer_t * rbuf,int keep_it,int use_committed)558 rf_SubmitReconBufferRAID1(
559 RF_ReconBuffer_t *rbuf, /* The recon buffer to submit. */
560 int keep_it, /*
561 * Whether we can keep this buffer
562 * or we have to return it ?
563 */
564 int use_committed /*
565 * Whether to use a committed or
566 * an available recon buffer ?
567 */
568 )
569 {
570 RF_ReconParityStripeStatus_t *pssPtr;
571 RF_ReconCtrl_t *reconCtrlPtr;
572 RF_RaidLayout_t *layoutPtr;
573 int retcode, created;
574 RF_CallbackDesc_t *cb, *p;
575 RF_ReconBuffer_t *t;
576 RF_Raid_t *raidPtr;
577 caddr_t ta;
578
579 retcode = 0;
580 created = 0;
581
582 raidPtr = rbuf->raidPtr;
583 layoutPtr = &raidPtr->Layout;
584 reconCtrlPtr = raidPtr->reconControl[rbuf->row];
585
586 RF_ASSERT(rbuf);
587 RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
588
589 if (rf_reconbufferDebug) {
590 printf("raid%d: RAID1 reconbuffer submission r%d c%d psid %ld"
591 " ru%d (failed offset %ld).\n", raidPtr->raidid, rbuf->row,
592 rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru,
593 (long) rbuf->failedDiskSectorOffset);
594 }
595 if (rf_reconDebug) {
596 printf("RAID1 reconbuffer submit psid %ld buf %lx\n",
597 (long) rbuf->parityStripeID, (long) rbuf->buffer);
598 printf("RAID1 psid %ld %02x %02x %02x %02x %02x\n",
599 (long) rbuf->parityStripeID,
600 rbuf->buffer[0], rbuf->buffer[1], rbuf->buffer[2],
601 rbuf->buffer[3], rbuf->buffer[4]);
602 }
603 RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
604
605 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
606
607 pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable,
608 rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
609 RF_ASSERT(pssPtr); /*
610 * If it didn't exist, we wouldn't have gotten
611 * an rbuf for it.
612 */
613
614 /*
615 * Since this is simple mirroring, the first submission for a stripe
616 * is also treated as the last.
617 */
618
619 t = NULL;
620 if (keep_it) {
621 if (rf_reconbufferDebug) {
622 printf("raid%d: RAID1 rbuf submission: keeping rbuf.\n",
623 raidPtr->raidid);
624 }
625 t = rbuf;
626 } else {
627 if (use_committed) {
628 if (rf_reconbufferDebug) {
629 printf("raid%d: RAID1 rbuf submission:"
630 " using committed rbuf.\n",
631 raidPtr->raidid);
632 }
633 t = reconCtrlPtr->committedRbufs;
634 RF_ASSERT(t);
635 reconCtrlPtr->committedRbufs = t->next;
636 t->next = NULL;
637 } else
638 if (reconCtrlPtr->floatingRbufs) {
639 if (rf_reconbufferDebug) {
640 printf("raid%d: RAID1 rbuf submission:"
641 " using floating rbuf.\n",
642 raidPtr->raidid);
643 }
644 t = reconCtrlPtr->floatingRbufs;
645 reconCtrlPtr->floatingRbufs = t->next;
646 t->next = NULL;
647 }
648 }
649 if (t == NULL) {
650 if (rf_reconbufferDebug) {
651 printf("raid%d: RAID1 rbuf submission:"
652 " waiting for rbuf.\n", raidPtr->raidid);
653 }
654 RF_ASSERT((keep_it == 0) && (use_committed == 0));
655 raidPtr->procsInBufWait++;
656 if ((raidPtr->procsInBufWait == (raidPtr->numCol - 1))
657 && (raidPtr->numFullReconBuffers == 0)) {
658 /* ruh-ro */
659 RF_ERRORMSG("Buffer wait deadlock.\n");
660 rf_PrintPSStatusTable(raidPtr, rbuf->row);
661 RF_PANIC();
662 }
663 pssPtr->flags |= RF_PSS_BUFFERWAIT;
664 cb = rf_AllocCallbackDesc();
665 cb->row = rbuf->row;
666 cb->col = rbuf->col;
667 cb->callbackArg.v = rbuf->parityStripeID;
668 cb->callbackArg2.v = rbuf->which_ru;
669 cb->next = NULL;
670 if (reconCtrlPtr->bufferWaitList == NULL) {
671 /* We are the wait list- lucky us. */
672 reconCtrlPtr->bufferWaitList = cb;
673 } else {
674 /* Append to wait list. */
675 for (p = reconCtrlPtr->bufferWaitList; p->next;
676 p = p->next);
677 p->next = cb;
678 }
679 retcode = 1;
680 goto out;
681 }
682 if (t != rbuf) {
683 t->row = rbuf->row;
684 t->col = reconCtrlPtr->fcol;
685 t->parityStripeID = rbuf->parityStripeID;
686 t->which_ru = rbuf->which_ru;
687 t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
688 t->spRow = rbuf->spRow;
689 t->spCol = rbuf->spCol;
690 t->spOffset = rbuf->spOffset;
691 /* Swap buffers. DANCE ! */
692 ta = t->buffer;
693 t->buffer = rbuf->buffer;
694 rbuf->buffer = ta;
695 }
696 /*
697 * Use the rbuf we've been given as the target.
698 */
699 RF_ASSERT(pssPtr->rbuf == NULL);
700 pssPtr->rbuf = t;
701
702 t->count = 1;
703 /*
704 * Below, we use 1 for numDataCol (which is equal to the count in the
705 * previous line), so we'll always be done.
706 */
707 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, 1);
708
709 out:
710 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
711 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
712 if (rf_reconbufferDebug) {
713 printf("raid%d: RAID1 rbuf submission: returning %d.\n",
714 raidPtr->raidid, retcode);
715 }
716 return (retcode);
717 }
718