1 /*        $NetBSD: rf_raid1.c,v 1.39 2021/07/23 22:34:12 oster Exp $  */
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: William V. Courtright II
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /*****************************************************************************
30  *
31  * rf_raid1.c -- implements RAID Level 1
32  *
33  *****************************************************************************/
34 
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: rf_raid1.c,v 1.39 2021/07/23 22:34:12 oster Exp $");
37 
38 #include "rf_raid.h"
39 #include "rf_raid1.h"
40 #include "rf_dag.h"
41 #include "rf_dagffrd.h"
42 #include "rf_dagffwr.h"
43 #include "rf_dagdegrd.h"
44 #include "rf_dagutils.h"
45 #include "rf_dagfuncs.h"
46 #include "rf_diskqueue.h"
47 #include "rf_general.h"
48 #include "rf_utils.h"
49 #include "rf_parityscan.h"
50 #include "rf_mcpair.h"
51 #include "rf_layout.h"
52 #include "rf_map.h"
53 #include "rf_engine.h"
54 #include "rf_reconbuffer.h"
55 
56 typedef struct RF_Raid1ConfigInfo_s {
57           RF_RowCol_t **stripeIdentifier;
58 }       RF_Raid1ConfigInfo_t;
59 /* start of day code specific to RAID level 1 */
60 int
rf_ConfigureRAID1(RF_ShutdownList_t ** listp,RF_Raid_t * raidPtr,RF_Config_t * cfgPtr)61 rf_ConfigureRAID1(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
62                       RF_Config_t *cfgPtr)
63 {
64           RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
65           RF_Raid1ConfigInfo_t *info;
66           RF_RowCol_t i;
67 
68           /* Sanity check the number of columns... */
69           if (raidPtr->numCol < 2 || raidPtr->numCol % 2 != 0) {
70                     return (EINVAL);
71           }
72 
73           /* create a RAID level 1 configuration structure */
74           info = RF_MallocAndAdd(sizeof(*info), raidPtr->cleanupList);
75           if (info == NULL)
76                     return (ENOMEM);
77           layoutPtr->layoutSpecificInfo = (void *) info;
78 
79           /* ... and fill it in. */
80           info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol / 2, 2, raidPtr->cleanupList);
81           if (info->stripeIdentifier == NULL)
82                     return (ENOMEM);
83           for (i = 0; i < (raidPtr->numCol / 2); i++) {
84                     info->stripeIdentifier[i][0] = (2 * i);
85                     info->stripeIdentifier[i][1] = (2 * i) + 1;
86           }
87 
88           /* this implementation of RAID level 1 uses one row of numCol disks
89            * and allows multiple (numCol / 2) stripes per row.  A stripe
90            * consists of a single data unit and a single parity (mirror) unit.
91            * stripe id = raidAddr / stripeUnitSize */
92           raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2) * layoutPtr->sectorsPerStripeUnit;
93           layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2);
94           layoutPtr->dataSectorsPerStripe = layoutPtr->sectorsPerStripeUnit;
95           layoutPtr->numDataCol = 1;
96           layoutPtr->numParityCol = 1;
97           return (0);
98 }
99 
100 
101 /* returns the physical disk location of the primary copy in the mirror pair */
102 void
rf_MapSectorRAID1(RF_Raid_t * raidPtr,RF_RaidAddr_t raidSector,RF_RowCol_t * col,RF_SectorNum_t * diskSector,int remap)103 rf_MapSectorRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
104                       RF_RowCol_t *col, RF_SectorNum_t *diskSector,
105                       int remap)
106 {
107           RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
108           RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
109 
110           *col = 2 * mirrorPair;
111           *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
112 }
113 
114 
115 /* Map Parity
116  *
117  * returns the physical disk location of the secondary copy in the mirror
118  * pair
119  */
120 void
rf_MapParityRAID1(RF_Raid_t * raidPtr,RF_RaidAddr_t raidSector,RF_RowCol_t * col,RF_SectorNum_t * diskSector,int remap)121 rf_MapParityRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
122                       RF_RowCol_t *col, RF_SectorNum_t *diskSector,
123                       int remap)
124 {
125           RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
126           RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
127 
128           *col = (2 * mirrorPair) + 1;
129 
130           *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
131 }
132 
133 
134 /* IdentifyStripeRAID1
135  *
136  * returns a list of disks for a given redundancy group
137  */
138 void
rf_IdentifyStripeRAID1(RF_Raid_t * raidPtr,RF_RaidAddr_t addr,RF_RowCol_t ** diskids)139 rf_IdentifyStripeRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
140                            RF_RowCol_t **diskids)
141 {
142           RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
143           RF_Raid1ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo;
144           RF_ASSERT(stripeID >= 0);
145           RF_ASSERT(addr >= 0);
146           *diskids = info->stripeIdentifier[stripeID % (raidPtr->numCol / 2)];
147           RF_ASSERT(*diskids);
148 }
149 
150 
151 /* MapSIDToPSIDRAID1
152  *
153  * maps a logical stripe to a stripe in the redundant array
154  */
155 void
rf_MapSIDToPSIDRAID1(RF_RaidLayout_t * layoutPtr,RF_StripeNum_t stripeID,RF_StripeNum_t * psID,RF_ReconUnitNum_t * which_ru)156 rf_MapSIDToPSIDRAID1(RF_RaidLayout_t *layoutPtr,
157                          RF_StripeNum_t stripeID,
158                          RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru)
159 {
160           *which_ru = 0;
161           *psID = stripeID;
162 }
163 
164 
165 
166 /******************************************************************************
167  * select a graph to perform a single-stripe access
168  *
169  * Parameters:  raidPtr    - description of the physical array
170  *              type       - type of operation (read or write) requested
171  *              asmap      - logical & physical addresses for this access
172  *              createFunc - name of function to use to create the graph
173  *****************************************************************************/
174 
175 void
rf_RAID1DagSelect(RF_Raid_t * raidPtr,RF_IoType_t type,RF_AccessStripeMap_t * asmap,RF_VoidFuncPtr * createFunc)176 rf_RAID1DagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
177                       RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc)
178 {
179           RF_RowCol_t fcol, oc __unused;
180           RF_PhysDiskAddr_t *failedPDA;
181           int     prior_recon;
182           RF_RowStatus_t rstat;
183           RF_SectorNum_t oo __unused;
184 
185 
186           RF_ASSERT(RF_IO_IS_R_OR_W(type));
187 
188           if (asmap->numDataFailed + asmap->numParityFailed > 1) {
189 #if RF_DEBUG_DAG
190                     if (rf_dagDebug)
191                               RF_ERRORMSG("Multiple disks failed in a single group!  Aborting I/O operation.\n");
192 #endif
193                     *createFunc = NULL;
194                     return;
195           }
196           if (asmap->numDataFailed + asmap->numParityFailed) {
197                     /*
198                    * We've got a fault. Re-map to spare space, iff applicable.
199                    * Shouldn't the arch-independent code do this for us?
200                    * Anyway, it turns out if we don't do this here, then when
201                    * we're reconstructing, writes go only to the surviving
202                    * original disk, and aren't reflected on the reconstructed
203                    * spare. Oops. --jimz
204                    */
205                     failedPDA = asmap->failedPDAs[0];
206                     fcol = failedPDA->col;
207                     rstat = raidPtr->status;
208                     prior_recon = (rstat == rf_rs_reconfigured) || (
209                         (rstat == rf_rs_reconstructing) ?
210                         rf_CheckRUReconstructed(raidPtr->reconControl->reconMap, failedPDA->startSector) : 0
211                         );
212                     if (prior_recon) {
213                               oc = fcol;
214                               oo = failedPDA->startSector;
215                               /*
216                              * If we did distributed sparing, we'd monkey with that here.
217                              * But we don't, so we'll
218                              */
219                               failedPDA->col = raidPtr->Disks[fcol].spareCol;
220                               /*
221                              * Redirect other components, iff necessary. This looks
222                              * pretty suspicious to me, but it's what the raid5
223                              * DAG select does.
224                              */
225                               if (asmap->parityInfo->next) {
226                                         if (failedPDA == asmap->parityInfo) {
227                                                   failedPDA->next->col = failedPDA->col;
228                                         } else {
229                                                   if (failedPDA == asmap->parityInfo->next) {
230                                                             asmap->parityInfo->col = failedPDA->col;
231                                                   }
232                                         }
233                               }
234 #if RF_DEBUG_DAG > 0 || RF_DEBUG_MAP > 0
235                               if (rf_dagDebug || rf_mapDebug) {
236                                         printf("raid%d: Redirected type '%c' c %d o %ld -> c %d o %ld\n",
237                                                raidPtr->raidid, type, oc,
238                                                (long) oo,
239                                                failedPDA->col,
240                                                (long) failedPDA->startSector);
241                               }
242 #endif
243                               asmap->numDataFailed = asmap->numParityFailed = 0;
244                     }
245           }
246           if (type == RF_IO_TYPE_READ) {
247                     if (asmap->numDataFailed == 0)
248                               *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorIdleReadDAG;
249                     else
250                               *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneDegradedReadDAG;
251           } else {
252                     *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
253           }
254 }
255 
256 int
rf_VerifyParityRAID1(RF_Raid_t * raidPtr,RF_RaidAddr_t raidAddr,RF_PhysDiskAddr_t * parityPDA,int correct_it,RF_RaidAccessFlags_t flags)257 rf_VerifyParityRAID1(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr,
258                          RF_PhysDiskAddr_t *parityPDA, int correct_it,
259                          RF_RaidAccessFlags_t flags)
260 {
261           int     nbytes, bcount, stripeWidth, ret, i, j, nbad, *bbufs;
262           RF_DagNode_t *blockNode, *wrBlock;
263           RF_DagHeader_t *rd_dag_h, *wr_dag_h;
264           RF_AccessStripeMapHeader_t *asm_h;
265           RF_AllocListElem_t *allocList;
266 #if RF_ACC_TRACE > 0
267           RF_AccTraceEntry_t tracerec;
268 #endif
269           RF_ReconUnitNum_t which_ru;
270           RF_RaidLayout_t *layoutPtr;
271           RF_AccessStripeMap_t *aasm;
272           RF_SectorCount_t nsector;
273           RF_RaidAddr_t startAddr;
274           char   *bf, *buf1, *buf2;
275           RF_PhysDiskAddr_t *pda;
276           RF_StripeNum_t psID;
277           RF_MCPair_t *mcpair;
278 
279           layoutPtr = &raidPtr->Layout;
280           startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
281           nsector = parityPDA->numSector;
282           nbytes = rf_RaidAddressToByte(raidPtr, nsector);
283           psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru);
284 
285           asm_h = NULL;
286           rd_dag_h = wr_dag_h = NULL;
287           mcpair = NULL;
288 
289           ret = RF_PARITY_COULD_NOT_VERIFY;
290 
291           rf_MakeAllocList(allocList);
292           if (allocList == NULL)
293                     return (RF_PARITY_COULD_NOT_VERIFY);
294           mcpair = rf_AllocMCPair(raidPtr);
295           if (mcpair == NULL)
296                     goto done;
297           RF_ASSERT(layoutPtr->numDataCol == layoutPtr->numParityCol);
298           stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
299           bcount = nbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol);
300           bf = RF_MallocAndAdd(bcount, allocList);
301           if (bf == NULL)
302                     goto done;
303 #if RF_DEBUG_VERIFYPARITY
304           if (rf_verifyParityDebug) {
305                     printf("raid%d: RAID1 parity verify: buf=%lx bcount=%d (%lx - %lx)\n",
306                            raidPtr->raidid, (long) bf, bcount, (long) bf,
307                            (long) bf + bcount);
308           }
309 #endif
310           /*
311          * Generate a DAG which will read the entire stripe- then we can
312          * just compare data chunks versus "parity" chunks.
313          */
314 
315           rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, nbytes, bf,
316               rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", allocList, flags,
317               RF_IO_NORMAL_PRIORITY);
318           if (rd_dag_h == NULL)
319                     goto done;
320           blockNode = rd_dag_h->succedents[0];
321 
322           /*
323          * Map the access to physical disk addresses (PDAs)- this will
324          * get us both a list of data addresses, and "parity" addresses
325          * (which are really mirror copies).
326          */
327           asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe,
328               bf, RF_DONT_REMAP);
329           aasm = asm_h->stripeMap;
330 
331           buf1 = bf;
332           /*
333          * Loop through the data blocks, setting up read nodes for each.
334          */
335           for (pda = aasm->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) {
336                     RF_ASSERT(pda);
337 
338                     rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
339 
340                     RF_ASSERT(pda->numSector != 0);
341                     if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
342                               /* cannot verify parity with dead disk */
343                               goto done;
344                     }
345                     pda->bufPtr = buf1;
346                     blockNode->succedents[i]->params[0].p = pda;
347                     blockNode->succedents[i]->params[1].p = buf1;
348                     blockNode->succedents[i]->params[2].v = psID;
349                     blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
350                     buf1 += nbytes;
351           }
352           RF_ASSERT(pda == NULL);
353           /*
354          * keep i, buf1 running
355          *
356          * Loop through parity blocks, setting up read nodes for each.
357          */
358           for (pda = aasm->parityInfo; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++, pda = pda->next) {
359                     RF_ASSERT(pda);
360                     rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
361                     RF_ASSERT(pda->numSector != 0);
362                     if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
363                               /* cannot verify parity with dead disk */
364                               goto done;
365                     }
366                     pda->bufPtr = buf1;
367                     blockNode->succedents[i]->params[0].p = pda;
368                     blockNode->succedents[i]->params[1].p = buf1;
369                     blockNode->succedents[i]->params[2].v = psID;
370                     blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
371                     buf1 += nbytes;
372           }
373           RF_ASSERT(pda == NULL);
374 
375 #if RF_ACC_TRACE > 0
376           memset(&tracerec, 0, sizeof(tracerec));
377           rd_dag_h->tracerec = &tracerec;
378 #endif
379 #if 0
380           if (rf_verifyParityDebug > 1) {
381                     printf("raid%d: RAID1 parity verify read dag:\n",
382                            raidPtr->raidid);
383                     rf_PrintDAGList(rd_dag_h);
384           }
385 #endif
386           RF_LOCK_MCPAIR(mcpair);
387           mcpair->flag = 0;
388           RF_UNLOCK_MCPAIR(mcpair);
389 
390           rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
391               (void *) mcpair);
392 
393           RF_LOCK_MCPAIR(mcpair);
394           while (mcpair->flag == 0) {
395                     RF_WAIT_MCPAIR(mcpair);
396           }
397           RF_UNLOCK_MCPAIR(mcpair);
398 
399           if (rd_dag_h->status != rf_enable) {
400                     RF_ERRORMSG("Unable to verify raid1 parity: can't read stripe\n");
401                     ret = RF_PARITY_COULD_NOT_VERIFY;
402                     goto done;
403           }
404           /*
405          * buf1 is the beginning of the data blocks chunk
406          * buf2 is the beginning of the parity blocks chunk
407          */
408           buf1 = bf;
409           buf2 = bf + (nbytes * layoutPtr->numDataCol);
410           ret = RF_PARITY_OKAY;
411           /*
412          * bbufs is "bad bufs"- an array whose entries are the data
413          * column numbers where we had miscompares. (That is, column 0
414          * and column 1 of the array are mirror copies, and are considered
415          * "data column 0" for this purpose).
416          */
417           bbufs = RF_MallocAndAdd(layoutPtr->numParityCol * sizeof(*bbufs),
418               allocList);
419           nbad = 0;
420           /*
421          * Check data vs "parity" (mirror copy).
422          */
423           for (i = 0; i < layoutPtr->numDataCol; i++) {
424 #if RF_DEBUG_VERIFYPARITY
425                     if (rf_verifyParityDebug) {
426                               printf("raid%d: RAID1 parity verify %d bytes: i=%d buf1=%lx buf2=%lx buf=%lx\n",
427                                      raidPtr->raidid, nbytes, i, (long) buf1,
428                                      (long) buf2, (long) bf);
429                     }
430 #endif
431                     ret = memcmp(buf1, buf2, nbytes);
432                     if (ret) {
433 #if RF_DEBUG_VERIFYPARITY
434                               if (rf_verifyParityDebug > 1) {
435                                         for (j = 0; j < nbytes; j++) {
436                                                   if (buf1[j] != buf2[j])
437                                                             break;
438                                         }
439                                         printf("psid=%ld j=%d\n", (long) psID, j);
440                                         printf("buf1 %02x %02x %02x %02x %02x\n", buf1[0] & 0xff,
441                                             buf1[1] & 0xff, buf1[2] & 0xff, buf1[3] & 0xff, buf1[4] & 0xff);
442                                         printf("buf2 %02x %02x %02x %02x %02x\n", buf2[0] & 0xff,
443                                             buf2[1] & 0xff, buf2[2] & 0xff, buf2[3] & 0xff, buf2[4] & 0xff);
444                               }
445                               if (rf_verifyParityDebug) {
446                                         printf("raid%d: RAID1: found bad parity, i=%d\n", raidPtr->raidid, i);
447                               }
448 #endif
449                               /*
450                              * Parity is bad. Keep track of which columns were bad.
451                              */
452                               if (bbufs)
453                                         bbufs[nbad] = i;
454                               nbad++;
455                               ret = RF_PARITY_BAD;
456                     }
457                     buf1 += nbytes;
458                     buf2 += nbytes;
459           }
460 
461           if ((ret != RF_PARITY_OKAY) && correct_it) {
462                     ret = RF_PARITY_COULD_NOT_CORRECT;
463 #if RF_DEBUG_VERIFYPARITY
464                     if (rf_verifyParityDebug) {
465                               printf("raid%d: RAID1 parity verify: parity not correct\n", raidPtr->raidid);
466                     }
467 #endif
468                     if (bbufs == NULL)
469                               goto done;
470                     /*
471                    * Make a DAG with one write node for each bad unit. We'll simply
472                    * write the contents of the data unit onto the parity unit for
473                    * correction. (It's possible that the mirror copy was the correct
474                    * copy, and that we're spooging good data by writing bad over it,
475                    * but there's no way we can know that.
476                    */
477                     wr_dag_h = rf_MakeSimpleDAG(raidPtr, nbad, nbytes, bf,
478                         rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", allocList, flags,
479                         RF_IO_NORMAL_PRIORITY);
480                     if (wr_dag_h == NULL)
481                               goto done;
482                     wrBlock = wr_dag_h->succedents[0];
483                     /*
484                    * Fill in a write node for each bad compare.
485                    */
486                     for (i = 0; i < nbad; i++) {
487                               j = i + layoutPtr->numDataCol;
488                               pda = blockNode->succedents[j]->params[0].p;
489                               pda->bufPtr = blockNode->succedents[i]->params[1].p;
490                               wrBlock->succedents[i]->params[0].p = pda;
491                               wrBlock->succedents[i]->params[1].p = pda->bufPtr;
492                               wrBlock->succedents[i]->params[2].v = psID;
493                               wrBlock->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
494                     }
495 #if RF_ACC_TRACE > 0
496                     memset(&tracerec, 0, sizeof(tracerec));
497                     wr_dag_h->tracerec = &tracerec;
498 #endif
499 #if 0
500                     if (rf_verifyParityDebug > 1) {
501                               printf("Parity verify write dag:\n");
502                               rf_PrintDAGList(wr_dag_h);
503                     }
504 #endif
505                     RF_LOCK_MCPAIR(mcpair);
506                     mcpair->flag = 0;
507                     RF_UNLOCK_MCPAIR(mcpair);
508 
509                     /* fire off the write DAG */
510                     rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
511                         (void *) mcpair);
512 
513                     RF_LOCK_MCPAIR(mcpair);
514                     while (!mcpair->flag) {
515                               RF_WAIT_MCPAIR(mcpair);
516                     }
517                     RF_UNLOCK_MCPAIR(mcpair);
518                     if (wr_dag_h->status != rf_enable) {
519                               RF_ERRORMSG("Unable to correct RAID1 parity in VerifyParity\n");
520                               goto done;
521                     }
522                     ret = RF_PARITY_CORRECTED;
523           }
524 done:
525           /*
526          * All done. We might've gotten here without doing part of the function,
527          * so cleanup what we have to and return our running status.
528          */
529           if (asm_h)
530                     rf_FreeAccessStripeMap(raidPtr, asm_h);
531           if (rd_dag_h)
532                     rf_FreeDAG(rd_dag_h);
533           if (wr_dag_h)
534                     rf_FreeDAG(wr_dag_h);
535           if (mcpair)
536                     rf_FreeMCPair(raidPtr, mcpair);
537           rf_FreeAllocList(allocList);
538 #if RF_DEBUG_VERIFYPARITY
539           if (rf_verifyParityDebug) {
540                     printf("raid%d: RAID1 parity verify, returning %d\n",
541                            raidPtr->raidid, ret);
542           }
543 #endif
544           return (ret);
545 }
546 
547 /* rbuf          - the recon buffer to submit
548  * keep_it       - whether we can keep this buffer or we have to return it
549  * use_committed - whether to use a committed or an available recon buffer
550  */
551 
552 int
rf_SubmitReconBufferRAID1(RF_ReconBuffer_t * rbuf,int keep_it,int use_committed)553 rf_SubmitReconBufferRAID1(RF_ReconBuffer_t *rbuf, int keep_it,
554                                 int use_committed)
555 {
556           RF_ReconParityStripeStatus_t *pssPtr;
557           RF_ReconCtrl_t *reconCtrlPtr;
558           int     retcode;
559           RF_CallbackValueDesc_t *cb, *p;
560           RF_ReconBuffer_t *t;
561           RF_Raid_t *raidPtr;
562           void *ta;
563 
564           retcode = 0;
565 
566           raidPtr = rbuf->raidPtr;
567           reconCtrlPtr = raidPtr->reconControl;
568 
569           RF_ASSERT(rbuf);
570           RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
571 
572 #if RF_DEBUG_RECON
573           if (rf_reconbufferDebug) {
574                     printf("raid%d: RAID1 reconbuffer submission c%d psid %ld ru%d (failed offset %ld)\n",
575                            raidPtr->raidid, rbuf->col,
576                            (long) rbuf->parityStripeID, rbuf->which_ru,
577                            (long) rbuf->failedDiskSectorOffset);
578           }
579 #endif
580           if (rf_reconDebug) {
581                     unsigned char *b = rbuf->buffer;
582                     printf("RAID1 reconbuffer submit psid %ld buf %lx\n",
583                         (long) rbuf->parityStripeID, (long) rbuf->buffer);
584                     printf("RAID1 psid %ld   %02x %02x %02x %02x %02x\n",
585                         (long)rbuf->parityStripeID, b[0], b[1], b[2], b[3], b[4]);
586           }
587           RF_LOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID);
588 
589           rf_lock_mutex2(reconCtrlPtr->rb_mutex);
590           while(reconCtrlPtr->rb_lock) {
591                     rf_wait_cond2(reconCtrlPtr->rb_cv, reconCtrlPtr->rb_mutex);
592           }
593           reconCtrlPtr->rb_lock = 1;
594           rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
595 
596           pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable,
597               rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, NULL);
598           RF_ASSERT(pssPtr);  /* if it didn't exist, we wouldn't have gotten
599                                          * an rbuf for it */
600 
601           /*
602          * Since this is simple mirroring, the first submission for a stripe is also
603          * treated as the last.
604          */
605 
606           t = NULL;
607           if (keep_it) {
608 #if RF_DEBUG_RECON
609                     if (rf_reconbufferDebug) {
610                               printf("raid%d: RAID1 rbuf submission: keeping rbuf\n",
611                                      raidPtr->raidid);
612                     }
613 #endif
614                     t = rbuf;
615           } else {
616                     if (use_committed) {
617 #if RF_DEBUG_RECON
618                               if (rf_reconbufferDebug) {
619                                         printf("raid%d: RAID1 rbuf submission: using committed rbuf\n", raidPtr->raidid);
620                               }
621 #endif
622                               t = reconCtrlPtr->committedRbufs;
623                               RF_ASSERT(t);
624                               reconCtrlPtr->committedRbufs = t->next;
625                               t->next = NULL;
626                     } else
627                               if (reconCtrlPtr->floatingRbufs) {
628 #if RF_DEBUG_RECON
629                                         if (rf_reconbufferDebug) {
630                                                   printf("raid%d: RAID1 rbuf submission: using floating rbuf\n", raidPtr->raidid);
631                                         }
632 #endif
633                                         t = reconCtrlPtr->floatingRbufs;
634                                         reconCtrlPtr->floatingRbufs = t->next;
635                                         t->next = NULL;
636                               }
637           }
638           if (t == NULL) {
639 #if RF_DEBUG_RECON
640                     if (rf_reconbufferDebug) {
641                               printf("raid%d: RAID1 rbuf submission: waiting for rbuf\n", raidPtr->raidid);
642                     }
643 #endif
644                     RF_ASSERT((keep_it == 0) && (use_committed == 0));
645                     raidPtr->procsInBufWait++;
646                     if ((raidPtr->procsInBufWait == (raidPtr->numCol - 1))
647                         && (raidPtr->numFullReconBuffers == 0)) {
648                               /* ruh-ro */
649                               RF_ERRORMSG("Buffer wait deadlock\n");
650                               rf_PrintPSStatusTable(raidPtr);
651                               RF_PANIC();
652                     }
653                     pssPtr->flags |= RF_PSS_BUFFERWAIT;
654                     cb = rf_AllocCallbackValueDesc(raidPtr);
655                     cb->col = rbuf->col;
656                     cb->v = rbuf->parityStripeID;
657                     cb->next = NULL;
658                     if (reconCtrlPtr->bufferWaitList == NULL) {
659                               /* we are the wait list- lucky us */
660                               reconCtrlPtr->bufferWaitList = cb;
661                     } else {
662                               /* append to wait list */
663                               for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next);
664                               p->next = cb;
665                     }
666                     retcode = 1;
667                     goto out;
668           }
669           if (t != rbuf) {
670                     t->col = reconCtrlPtr->fcol;
671                     t->parityStripeID = rbuf->parityStripeID;
672                     t->which_ru = rbuf->which_ru;
673                     t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
674                     t->spCol = rbuf->spCol;
675                     t->spOffset = rbuf->spOffset;
676                     /* Swap buffers. DANCE! */
677                     ta = t->buffer;
678                     t->buffer = rbuf->buffer;
679                     rbuf->buffer = ta;
680           }
681           /*
682          * Use the rbuf we've been given as the target.
683          */
684           RF_ASSERT(pssPtr->rbuf == NULL);
685           pssPtr->rbuf = t;
686 
687           t->count = 1;
688           /*
689          * Below, we use 1 for numDataCol (which is equal to the count in the
690          * previous line), so we'll always be done.
691          */
692           rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, 1);
693 
694 out:
695           RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID);
696           rf_lock_mutex2(reconCtrlPtr->rb_mutex);
697           reconCtrlPtr->rb_lock = 0;
698           rf_broadcast_cond2(reconCtrlPtr->rb_cv);
699           rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
700 #if RF_DEBUG_RECON
701           if (rf_reconbufferDebug) {
702                     printf("raid%d: RAID1 rbuf submission: returning %d\n",
703                            raidPtr->raidid, retcode);
704           }
705 #endif
706           return (retcode);
707 }
708 
709 RF_HeadSepLimit_t
rf_GetDefaultHeadSepLimitRAID1(RF_Raid_t * raidPtr)710 rf_GetDefaultHeadSepLimitRAID1(RF_Raid_t *raidPtr)
711 {
712           return (10);
713 }
714 
715