1 /* $OpenBSD: rf_paritylogging.c,v 1.6 2002/12/16 07:01:04 tdeval Exp $ */
2 /* $NetBSD: rf_paritylogging.c,v 1.10 2000/02/12 16:06:27 oster Exp $ */
3
4 /*
5 * Copyright (c) 1995 Carnegie-Mellon University.
6 * All rights reserved.
7 *
8 * Author: William V. Courtright II
9 *
10 * Permission to use, copy, modify and distribute this software and
11 * its documentation is hereby granted, provided that both the copyright
12 * notice and this permission notice appear in all copies of the
13 * software, derivative works or modified versions, and any portions
14 * thereof, and that both notices appear in supporting documentation.
15 *
16 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19 *
20 * Carnegie Mellon requests users of this software to return to
21 *
22 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
23 * School of Computer Science
24 * Carnegie Mellon University
25 * Pittsburgh PA 15213-3890
26 *
27 * any improvements or extensions that they make and grant Carnegie the
28 * rights to redistribute these changes.
29 */
30
31
32 /*
33 * Parity logging configuration, dag selection, and mapping is implemented here.
34 */
35
36 #include "rf_archs.h"
37
38 #if RF_INCLUDE_PARITYLOGGING > 0
39
40 #include "rf_types.h"
41 #include "rf_raid.h"
42 #include "rf_dag.h"
43 #include "rf_dagutils.h"
44 #include "rf_dagfuncs.h"
45 #include "rf_dagffrd.h"
46 #include "rf_dagffwr.h"
47 #include "rf_dagdegrd.h"
48 #include "rf_dagdegwr.h"
49 #include "rf_paritylog.h"
50 #include "rf_paritylogDiskMgr.h"
51 #include "rf_paritylogging.h"
52 #include "rf_parityloggingdags.h"
53 #include "rf_general.h"
54 #include "rf_map.h"
55 #include "rf_utils.h"
56 #include "rf_shutdown.h"
57
58 typedef struct RF_ParityLoggingConfigInfo_s {
59 RF_RowCol_t **stripeIdentifier; /*
60 * Filled in at config time & used by
61 * IdentifyStripe.
62 */
63 } RF_ParityLoggingConfigInfo_t;
64
65 void rf_FreeRegionInfo(RF_Raid_t *, RF_RegionId_t);
66 void rf_FreeParityLogQueue(RF_Raid_t *, RF_ParityLogQueue_t *);
67 void rf_FreeRegionBufferQueue(RF_RegionBufferQueue_t *);
68 void rf_ShutdownParityLogging(RF_ThreadArg_t);
69 void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t);
70 void rf_ShutdownParityLoggingPool(RF_ThreadArg_t);
71 void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t);
72 void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t);
73 void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t);
74
75
76 int
rf_ConfigureParityLogging(RF_ShutdownList_t ** listp,RF_Raid_t * raidPtr,RF_Config_t * cfgPtr)77 rf_ConfigureParityLogging(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
78 RF_Config_t *cfgPtr)
79 {
80 int i, j, startdisk, rc;
81 RF_SectorCount_t totalLogCapacity, fragmentation, lastRegionCapacity;
82 RF_SectorCount_t parityBufferCapacity, maxRegionParityRange;
83 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
84 RF_ParityLoggingConfigInfo_t *info;
85 RF_ParityLog_t *l = NULL, *next;
86 caddr_t lHeapPtr;
87
88 if (rf_numParityRegions <= 0)
89 return(EINVAL);
90
91 /*
92 * We create multiple entries on the shutdown list here, since
93 * this configuration routine is fairly complicated in and of
94 * itself, and this makes backing out of a failed configuration
95 * much simpler.
96 */
97
98 raidPtr->numSectorsPerLog = RF_DEFAULT_NUM_SECTORS_PER_LOG;
99
100 /* Create a parity logging configuration structure. */
101 RF_MallocAndAdd(info, sizeof(RF_ParityLoggingConfigInfo_t),
102 (RF_ParityLoggingConfigInfo_t *),
103 raidPtr->cleanupList);
104 if (info == NULL)
105 return (ENOMEM);
106 layoutPtr->layoutSpecificInfo = (void *) info;
107
108 RF_ASSERT(raidPtr->numRow == 1);
109
110 /*
111 * The stripe identifier must identify the disks in each stripe, IN
112 * THE ORDER THAT THEY APPEAR IN THE STRIPE.
113 */
114 info->stripeIdentifier = rf_make_2d_array((raidPtr->numCol),
115 (raidPtr->numCol),
116 raidPtr->cleanupList);
117 if (info->stripeIdentifier == NULL)
118 return (ENOMEM);
119
120 startdisk = 0;
121 for (i = 0; i < (raidPtr->numCol); i++) {
122 for (j = 0; j < (raidPtr->numCol); j++) {
123 info->stripeIdentifier[i][j] = (startdisk + j) %
124 (raidPtr->numCol - 1);
125 }
126 if ((--startdisk) < 0)
127 startdisk = raidPtr->numCol - 1 - 1;
128 }
129
130 /* Fill in the remaining layout parameters. */
131 layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
132 layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit <<
133 raidPtr->logBytesPerSector;
134 layoutPtr->numParityCol = 1;
135 layoutPtr->numParityLogCol = 1;
136 layoutPtr->numDataCol = raidPtr->numCol - layoutPtr->numParityCol -
137 layoutPtr->numParityLogCol;
138 layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol *
139 layoutPtr->sectorsPerStripeUnit;
140 layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
141 raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk *
142 layoutPtr->sectorsPerStripeUnit;
143
144 raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk *
145 layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
146
147 /*
148 * Configure parity log parameters.
149 *
150 * Parameter Comment/constraints
151 * ------------------------------------------------
152 * numParityRegions* All regions (except possibly last)
153 * of equal size.
154 * totalInCoreLogCapacity* Amount of memory in bytes available
155 * for in-core logs (default 1 MB).
156 * numSectorsPerLog# Capacity of an in-core log in sectors
157 * (1 * disk track).
158 * numParityLogs Total number of in-core logs,
159 * should be at least numParityRegions.
160 * regionLogCapacity Size of a region log (except possibly
161 * last one) in sectors.
162 * totalLogCapacity Total amount of log space in sectors.
163 *
164 * Where '*' denotes a user settable parameter.
165 * Note that logs are fixed to be the size of a disk track,
166 * value #defined in rf_paritylog.h.
167 *
168 */
169
170 totalLogCapacity = layoutPtr->stripeUnitsPerDisk *
171 layoutPtr->sectorsPerStripeUnit * layoutPtr->numParityLogCol;
172 raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions;
173 if (rf_parityLogDebug)
174 printf("bytes per sector %d\n", raidPtr->bytesPerSector);
175
176 /*
177 * Reduce fragmentation within a disk region by adjusting the number
178 * of regions in an attempt to allow an integral number of logs to fit
179 * into a disk region.
180 */
181 fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog;
182 if (fragmentation > 0)
183 for (i = 1; i < (raidPtr->numSectorsPerLog / 2); i++) {
184 if (((totalLogCapacity / (rf_numParityRegions + i)) %
185 raidPtr->numSectorsPerLog) < fragmentation) {
186 rf_numParityRegions++;
187 raidPtr->regionLogCapacity = totalLogCapacity /
188 rf_numParityRegions;
189 fragmentation = raidPtr->regionLogCapacity %
190 raidPtr->numSectorsPerLog;
191 }
192 if (((totalLogCapacity / (rf_numParityRegions - i)) %
193 raidPtr->numSectorsPerLog) < fragmentation) {
194 rf_numParityRegions--;
195 raidPtr->regionLogCapacity = totalLogCapacity /
196 rf_numParityRegions;
197 fragmentation = raidPtr->regionLogCapacity %
198 raidPtr->numSectorsPerLog;
199 }
200 }
201 /* Ensure integral number of regions per log. */
202 raidPtr->regionLogCapacity = (raidPtr->regionLogCapacity /
203 raidPtr->numSectorsPerLog) * raidPtr->numSectorsPerLog;
204
205 raidPtr->numParityLogs = rf_totalInCoreLogCapacity /
206 (raidPtr->bytesPerSector * raidPtr->numSectorsPerLog);
207 /*
208 * To avoid deadlock, must ensure that enough logs exist for each
209 * region to have one simultaneously.
210 */
211 if (raidPtr->numParityLogs < rf_numParityRegions)
212 raidPtr->numParityLogs = rf_numParityRegions;
213
214 /* Create region information structs. */
215 printf("Allocating %d bytes for in-core parity region info\n",
216 (int) (rf_numParityRegions * sizeof(RF_RegionInfo_t)));
217 RF_Malloc(raidPtr->regionInfo,
218 (rf_numParityRegions * sizeof(RF_RegionInfo_t)),
219 (RF_RegionInfo_t *));
220 if (raidPtr->regionInfo == NULL)
221 return (ENOMEM);
222
223 /* Last region may not be full capacity. */
224 lastRegionCapacity = raidPtr->regionLogCapacity;
225 while ((rf_numParityRegions - 1) * raidPtr->regionLogCapacity +
226 lastRegionCapacity > totalLogCapacity)
227 lastRegionCapacity = lastRegionCapacity -
228 raidPtr->numSectorsPerLog;
229
230 raidPtr->regionParityRange = raidPtr->sectorsPerDisk /
231 rf_numParityRegions;
232 maxRegionParityRange = raidPtr->regionParityRange;
233
234 /* I can't remember why this line is in the code -wvcii 6/30/95. */
235 /* if (raidPtr->sectorsPerDisk % rf_numParityRegions > 0)
236 * regionParityRange++; */
237
238 /* Build pool of unused parity logs. */
239 printf("Allocating %d bytes for %d parity logs\n",
240 raidPtr->numParityLogs * raidPtr->numSectorsPerLog *
241 raidPtr->bytesPerSector,
242 raidPtr->numParityLogs);
243 RF_Malloc(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs *
244 raidPtr->numSectorsPerLog * raidPtr->bytesPerSector,
245 (caddr_t));
246 if (raidPtr->parityLogBufferHeap == NULL)
247 return (ENOMEM);
248 lHeapPtr = raidPtr->parityLogBufferHeap;
249 rc = rf_mutex_init(&raidPtr->parityLogPool.mutex);
250 if (rc) {
251 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
252 __FILE__, __LINE__, rc);
253 RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs *
254 raidPtr->numSectorsPerLog * raidPtr->bytesPerSector);
255 return (ENOMEM);
256 }
257 for (i = 0; i < raidPtr->numParityLogs; i++) {
258 if (i == 0) {
259 RF_Calloc(raidPtr->parityLogPool.parityLogs, 1,
260 sizeof(RF_ParityLog_t), (RF_ParityLog_t *));
261 if (raidPtr->parityLogPool.parityLogs == NULL) {
262 RF_Free(raidPtr->parityLogBufferHeap,
263 raidPtr->numParityLogs *
264 raidPtr->numSectorsPerLog *
265 raidPtr->bytesPerSector);
266 return (ENOMEM);
267 }
268 l = raidPtr->parityLogPool.parityLogs;
269 } else {
270 RF_Calloc(l->next, 1, sizeof(RF_ParityLog_t),
271 (RF_ParityLog_t *));
272 if (l->next == NULL) {
273 RF_Free(raidPtr->parityLogBufferHeap,
274 raidPtr->numParityLogs *
275 raidPtr->numSectorsPerLog *
276 raidPtr->bytesPerSector);
277 for (l = raidPtr->parityLogPool.parityLogs;
278 l;
279 l = next) {
280 next = l->next;
281 if (l->records)
282 RF_Free(l->records,
283 (raidPtr->numSectorsPerLog *
284 sizeof(RF_ParityLogRecord_t)));
285 RF_Free(l, sizeof(RF_ParityLog_t));
286 }
287 return (ENOMEM);
288 }
289 l = l->next;
290 }
291 l->bufPtr = lHeapPtr;
292 lHeapPtr += raidPtr->numSectorsPerLog *
293 raidPtr->bytesPerSector;
294 RF_Malloc(l->records, (raidPtr->numSectorsPerLog *
295 sizeof(RF_ParityLogRecord_t)),
296 (RF_ParityLogRecord_t *));
297 if (l->records == NULL) {
298 RF_Free(raidPtr->parityLogBufferHeap,
299 raidPtr->numParityLogs *
300 raidPtr->numSectorsPerLog *
301 raidPtr->bytesPerSector);
302 for (l = raidPtr->parityLogPool.parityLogs;
303 l;
304 l = next) {
305 next = l->next;
306 if (l->records)
307 RF_Free(l->records,
308 (raidPtr->numSectorsPerLog *
309 sizeof(RF_ParityLogRecord_t)));
310 RF_Free(l, sizeof(RF_ParityLog_t));
311 }
312 return (ENOMEM);
313 }
314 }
315 rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingPool, raidPtr);
316 if (rc) {
317 RF_ERRORMSG3("Unable to create shutdown entry file %s line %d"
318 " rc=%d\n", __FILE__, __LINE__, rc);
319 rf_ShutdownParityLoggingPool(raidPtr);
320 return (rc);
321 }
322 /* Build pool of region buffers. */
323 rc = rf_mutex_init(&raidPtr->regionBufferPool.mutex);
324 if (rc) {
325 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
326 __FILE__, __LINE__, rc);
327 return (ENOMEM);
328 }
329 rc = rf_cond_init(&raidPtr->regionBufferPool.cond);
330 if (rc) {
331 RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n",
332 __FILE__, __LINE__, rc);
333 rf_mutex_destroy(&raidPtr->regionBufferPool.mutex);
334 return (ENOMEM);
335 }
336 raidPtr->regionBufferPool.bufferSize = raidPtr->regionLogCapacity *
337 raidPtr->bytesPerSector;
338 printf("regionBufferPool.bufferSize %d\n",
339 raidPtr->regionBufferPool.bufferSize);
340
341 /* For now, only one region at a time may be reintegrated. */
342 raidPtr->regionBufferPool.totalBuffers = 1;
343
344 raidPtr->regionBufferPool.availableBuffers =
345 raidPtr->regionBufferPool.totalBuffers;
346 raidPtr->regionBufferPool.availBuffersIndex = 0;
347 raidPtr->regionBufferPool.emptyBuffersIndex = 0;
348 printf("Allocating %d bytes for regionBufferPool\n",
349 (int) (raidPtr->regionBufferPool.totalBuffers *
350 sizeof(caddr_t)));
351 RF_Malloc(raidPtr->regionBufferPool.buffers,
352 raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t),
353 (caddr_t *));
354 if (raidPtr->regionBufferPool.buffers == NULL) {
355 rf_mutex_destroy(&raidPtr->regionBufferPool.mutex);
356 rf_cond_destroy(&raidPtr->regionBufferPool.cond);
357 return (ENOMEM);
358 }
359 for (i = 0; i < raidPtr->regionBufferPool.totalBuffers; i++) {
360 printf("Allocating %d bytes for regionBufferPool#%d\n",
361 (int) (raidPtr->regionBufferPool.bufferSize *
362 sizeof(char)), i);
363 RF_Malloc(raidPtr->regionBufferPool.buffers[i],
364 raidPtr->regionBufferPool.bufferSize * sizeof(char),
365 (caddr_t));
366 if (raidPtr->regionBufferPool.buffers[i] == NULL) {
367 rf_mutex_destroy(&raidPtr->regionBufferPool.mutex);
368 rf_cond_destroy(&raidPtr->regionBufferPool.cond);
369 for (j = 0; j < i; j++) {
370 RF_Free(raidPtr->regionBufferPool.buffers[i],
371 raidPtr->regionBufferPool.bufferSize *
372 sizeof(char));
373 }
374 RF_Free(raidPtr->regionBufferPool.buffers,
375 raidPtr->regionBufferPool.totalBuffers *
376 sizeof(caddr_t));
377 return (ENOMEM);
378 }
379 printf("raidPtr->regionBufferPool.buffers[%d] = %lx\n", i,
380 (long) raidPtr->regionBufferPool.buffers[i]);
381 }
382 rc = rf_ShutdownCreate(listp,
383 rf_ShutdownParityLoggingRegionBufferPool,
384 raidPtr);
385 if (rc) {
386 RF_ERRORMSG3("Unable to create shutdown entry file %s line %d"
387 " rc=%d\n", __FILE__, __LINE__, rc);
388 rf_ShutdownParityLoggingRegionBufferPool(raidPtr);
389 return (rc);
390 }
391 /* Build pool of parity buffers. */
392 parityBufferCapacity = maxRegionParityRange;
393 rc = rf_mutex_init(&raidPtr->parityBufferPool.mutex);
394 if (rc) {
395 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
396 __FILE__, __LINE__, rc);
397 return (rc);
398 }
399 rc = rf_cond_init(&raidPtr->parityBufferPool.cond);
400 if (rc) {
401 RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n",
402 __FILE__, __LINE__, rc);
403 rf_mutex_destroy(&raidPtr->parityBufferPool.mutex);
404 return (ENOMEM);
405 }
406 raidPtr->parityBufferPool.bufferSize = parityBufferCapacity *
407 raidPtr->bytesPerSector;
408 printf("parityBufferPool.bufferSize %d\n",
409 raidPtr->parityBufferPool.bufferSize);
410
411 /* For now, only one region at a time may be reintegrated. */
412 raidPtr->parityBufferPool.totalBuffers = 1;
413
414 raidPtr->parityBufferPool.availableBuffers =
415 raidPtr->parityBufferPool.totalBuffers;
416 raidPtr->parityBufferPool.availBuffersIndex = 0;
417 raidPtr->parityBufferPool.emptyBuffersIndex = 0;
418 printf("Allocating %d bytes for parityBufferPool of %d units\n",
419 (int) (raidPtr->parityBufferPool.totalBuffers *
420 sizeof(caddr_t)),
421 raidPtr->parityBufferPool.totalBuffers);
422 RF_Malloc(raidPtr->parityBufferPool.buffers,
423 raidPtr->parityBufferPool.totalBuffers * sizeof(caddr_t),
424 (caddr_t *));
425 if (raidPtr->parityBufferPool.buffers == NULL) {
426 rf_mutex_destroy(&raidPtr->parityBufferPool.mutex);
427 rf_cond_destroy(&raidPtr->parityBufferPool.cond);
428 return (ENOMEM);
429 }
430 for (i = 0; i < raidPtr->parityBufferPool.totalBuffers; i++) {
431 printf("Allocating %d bytes for parityBufferPool#%d\n",
432 (int) (raidPtr->parityBufferPool.bufferSize *
433 sizeof(char)), i);
434 RF_Malloc(raidPtr->parityBufferPool.buffers[i],
435 raidPtr->parityBufferPool.bufferSize * sizeof(char),
436 (caddr_t));
437 if (raidPtr->parityBufferPool.buffers == NULL) {
438 rf_mutex_destroy(&raidPtr->parityBufferPool.mutex);
439 rf_cond_destroy(&raidPtr->parityBufferPool.cond);
440 for (j = 0; j < i; j++) {
441 RF_Free(raidPtr->parityBufferPool.buffers[i],
442 raidPtr->regionBufferPool.bufferSize *
443 sizeof(char));
444 }
445 RF_Free(raidPtr->parityBufferPool.buffers,
446 raidPtr->regionBufferPool.totalBuffers *
447 sizeof(caddr_t));
448 return (ENOMEM);
449 }
450 printf("parityBufferPool.buffers[%d] = %lx\n", i,
451 (long) raidPtr->parityBufferPool.buffers[i]);
452 }
453 rc = rf_ShutdownCreate(listp,
454 rf_ShutdownParityLoggingParityBufferPool,
455 raidPtr);
456 if (rc) {
457 RF_ERRORMSG3("Unable to create shutdown entry file %s line %d"
458 " rc=%d\n", __FILE__, __LINE__, rc);
459 rf_ShutdownParityLoggingParityBufferPool(raidPtr);
460 return (rc);
461 }
462 /* Initialize parityLogDiskQueue. */
463 rc = rf_create_managed_mutex(listp,
464 &raidPtr->parityLogDiskQueue.mutex);
465 if (rc) {
466 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
467 __FILE__, __LINE__, rc);
468 return (rc);
469 }
470 rc = rf_create_managed_cond(listp, &raidPtr->parityLogDiskQueue.cond);
471 if (rc) {
472 RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n",
473 __FILE__, __LINE__, rc);
474 return (rc);
475 }
476 raidPtr->parityLogDiskQueue.flushQueue = NULL;
477 raidPtr->parityLogDiskQueue.reintQueue = NULL;
478 raidPtr->parityLogDiskQueue.bufHead = NULL;
479 raidPtr->parityLogDiskQueue.bufTail = NULL;
480 raidPtr->parityLogDiskQueue.reintHead = NULL;
481 raidPtr->parityLogDiskQueue.reintTail = NULL;
482 raidPtr->parityLogDiskQueue.logBlockHead = NULL;
483 raidPtr->parityLogDiskQueue.logBlockTail = NULL;
484 raidPtr->parityLogDiskQueue.reintBlockHead = NULL;
485 raidPtr->parityLogDiskQueue.reintBlockTail = NULL;
486 raidPtr->parityLogDiskQueue.freeDataList = NULL;
487 raidPtr->parityLogDiskQueue.freeCommonList = NULL;
488
489 rc = rf_ShutdownCreate(listp,
490 rf_ShutdownParityLoggingDiskQueue,
491 raidPtr);
492 if (rc) {
493 RF_ERRORMSG3("Unable to create shutdown entry file %s line %d"
494 " rc=%d\n", __FILE__, __LINE__, rc);
495 return (rc);
496 }
497 for (i = 0; i < rf_numParityRegions; i++) {
498 rc = rf_mutex_init(&raidPtr->regionInfo[i].mutex);
499 if (rc) {
500 RF_ERRORMSG3("Unable to init mutex file %s line %d"
501 " rc=%d\n", __FILE__, __LINE__, rc);
502 for (j = 0; j < i; j++)
503 rf_FreeRegionInfo(raidPtr, j);
504 RF_Free(raidPtr->regionInfo,
505 (rf_numParityRegions *
506 sizeof(RF_RegionInfo_t)));
507 return (ENOMEM);
508 }
509 rc = rf_mutex_init(&raidPtr->regionInfo[i].reintMutex);
510 if (rc) {
511 RF_ERRORMSG3("Unable to init mutex file %s line %d"
512 " rc=%d\n", __FILE__, __LINE__, rc);
513 rf_mutex_destroy(&raidPtr->regionInfo[i].mutex);
514 for (j = 0; j < i; j++)
515 rf_FreeRegionInfo(raidPtr, j);
516 RF_Free(raidPtr->regionInfo,
517 (rf_numParityRegions *
518 sizeof(RF_RegionInfo_t)));
519 return (ENOMEM);
520 }
521 raidPtr->regionInfo[i].reintInProgress = RF_FALSE;
522 raidPtr->regionInfo[i].regionStartAddr =
523 raidPtr->regionLogCapacity * i;
524 raidPtr->regionInfo[i].parityStartAddr =
525 raidPtr->regionParityRange * i;
526 if (i < rf_numParityRegions - 1) {
527 raidPtr->regionInfo[i].capacity =
528 raidPtr->regionLogCapacity;
529 raidPtr->regionInfo[i].numSectorsParity =
530 raidPtr->regionParityRange;
531 } else {
532 raidPtr->regionInfo[i].capacity = lastRegionCapacity;
533 raidPtr->regionInfo[i].numSectorsParity =
534 raidPtr->sectorsPerDisk -
535 raidPtr->regionParityRange * i;
536 if (raidPtr->regionInfo[i].numSectorsParity >
537 maxRegionParityRange)
538 maxRegionParityRange =
539 raidPtr->regionInfo[i].numSectorsParity;
540 }
541 raidPtr->regionInfo[i].diskCount = 0;
542 RF_ASSERT(raidPtr->regionInfo[i].capacity +
543 raidPtr->regionInfo[i].regionStartAddr <=
544 totalLogCapacity);
545 RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr +
546 raidPtr->regionInfo[i].numSectorsParity <=
547 raidPtr->sectorsPerDisk);
548 printf("Allocating %d bytes for region %d\n",
549 (int) (raidPtr->regionInfo[i].capacity *
550 sizeof(RF_DiskMap_t)), i);
551 RF_Malloc(raidPtr->regionInfo[i].diskMap,
552 (raidPtr->regionInfo[i].capacity *
553 sizeof(RF_DiskMap_t)),
554 (RF_DiskMap_t *));
555 if (raidPtr->regionInfo[i].diskMap == NULL) {
556 rf_mutex_destroy(&raidPtr->regionInfo[i].mutex);
557 rf_mutex_destroy(&raidPtr->regionInfo[i].reintMutex);
558 for (j = 0; j < i; j++)
559 rf_FreeRegionInfo(raidPtr, j);
560 RF_Free(raidPtr->regionInfo,
561 (rf_numParityRegions *
562 sizeof(RF_RegionInfo_t)));
563 return (ENOMEM);
564 }
565 raidPtr->regionInfo[i].loggingEnabled = RF_FALSE;
566 raidPtr->regionInfo[i].coreLog = NULL;
567 }
568 rc = rf_ShutdownCreate(listp,
569 rf_ShutdownParityLoggingRegionInfo,
570 raidPtr);
571 if (rc) {
572 RF_ERRORMSG3("Unable to create shutdown entry file %s line %d"
573 " rc=%d\n", __FILE__, __LINE__, rc);
574 rf_ShutdownParityLoggingRegionInfo(raidPtr);
575 return (rc);
576 }
577 RF_ASSERT(raidPtr->parityLogDiskQueue.threadState == 0);
578 raidPtr->parityLogDiskQueue.threadState = RF_PLOG_CREATED;
579 rc = RF_CREATE_THREAD(raidPtr->pLogDiskThreadHandle,
580 rf_ParityLoggingDiskManager, raidPtr, "rf_log");
581 if (rc) {
582 raidPtr->parityLogDiskQueue.threadState = 0;
583 RF_ERRORMSG3("Unable to create parity logging disk thread"
584 " file %s line %d rc=%d\n",
585 __FILE__, __LINE__, rc);
586 return (ENOMEM);
587 }
588 /* Wait for thread to start. */
589 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
590 while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_RUNNING)) {
591 RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond,
592 raidPtr->parityLogDiskQueue.mutex);
593 }
594 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
595
596 rc = rf_ShutdownCreate(listp, rf_ShutdownParityLogging, raidPtr);
597 if (rc) {
598 RF_ERRORMSG1("Got rc=%d adding parity logging shutdown"
599 " event.\n", rc);
600 rf_ShutdownParityLogging(raidPtr);
601 return (rc);
602 }
603 if (rf_parityLogDebug) {
604 printf("\t\t\tsize of disk log in sectors: %d\n",
605 (int) totalLogCapacity);
606 printf("\t\t\ttotal number of parity regions is %d\n",
607 (int) rf_numParityRegions);
608 printf("\t\t\tnominal sectors of log per parity region is %d\n",
609 (int) raidPtr->regionLogCapacity);
610 printf("\t\t\tnominal region fragmentation is %d sectors\n",
611 (int) fragmentation);
612 printf("\t\t\ttotal number of parity logs is %d\n",
613 raidPtr->numParityLogs);
614 printf("\t\t\tparity log size is %d sectors\n",
615 raidPtr->numSectorsPerLog);
616 printf("\t\t\ttotal in-core log space is %d bytes\n",
617 (int) rf_totalInCoreLogCapacity);
618 }
619 rf_EnableParityLogging(raidPtr);
620
621 return (0);
622 }
623
624
625 void
rf_FreeRegionInfo(RF_Raid_t * raidPtr,RF_RegionId_t regionID)626 rf_FreeRegionInfo(RF_Raid_t *raidPtr, RF_RegionId_t regionID)
627 {
628 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
629 RF_Free(raidPtr->regionInfo[regionID].diskMap,
630 (raidPtr->regionInfo[regionID].capacity *
631 sizeof(RF_DiskMap_t)));
632 if (!rf_forceParityLogReint && raidPtr->regionInfo[regionID].coreLog) {
633 rf_ReleaseParityLogs(raidPtr,
634 raidPtr->regionInfo[regionID].coreLog);
635 raidPtr->regionInfo[regionID].coreLog = NULL;
636 } else {
637 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog == NULL);
638 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == 0);
639 }
640 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
641 rf_mutex_destroy(&raidPtr->regionInfo[regionID].mutex);
642 rf_mutex_destroy(&raidPtr->regionInfo[regionID].reintMutex);
643 }
644
645
646 void
rf_FreeParityLogQueue(RF_Raid_t * raidPtr,RF_ParityLogQueue_t * queue)647 rf_FreeParityLogQueue(RF_Raid_t *raidPtr, RF_ParityLogQueue_t *queue)
648 {
649 RF_ParityLog_t *l1, *l2;
650
651 RF_LOCK_MUTEX(queue->mutex);
652 l1 = queue->parityLogs;
653 while (l1) {
654 l2 = l1;
655 l1 = l2->next;
656 RF_Free(l2->records, (raidPtr->numSectorsPerLog *
657 sizeof(RF_ParityLogRecord_t)));
658 RF_Free(l2, sizeof(RF_ParityLog_t));
659 }
660 RF_UNLOCK_MUTEX(queue->mutex);
661 rf_mutex_destroy(&queue->mutex);
662 }
663
664
665 void
rf_FreeRegionBufferQueue(RF_RegionBufferQueue_t * queue)666 rf_FreeRegionBufferQueue(RF_RegionBufferQueue_t *queue)
667 {
668 int i;
669
670 RF_LOCK_MUTEX(queue->mutex);
671 if (queue->availableBuffers != queue->totalBuffers) {
672 printf("Attempt to free region queue that is still in use !\n");
673 RF_ASSERT(0);
674 }
675 for (i = 0; i < queue->totalBuffers; i++)
676 RF_Free(queue->buffers[i], queue->bufferSize);
677 RF_Free(queue->buffers, queue->totalBuffers * sizeof(caddr_t));
678 RF_UNLOCK_MUTEX(queue->mutex);
679 rf_mutex_destroy(&queue->mutex);
680 }
681
682
683 void
rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg)684 rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg)
685 {
686 RF_Raid_t *raidPtr;
687 RF_RegionId_t i;
688
689 raidPtr = (RF_Raid_t *) arg;
690 if (rf_parityLogDebug) {
691 printf("raid%d: ShutdownParityLoggingRegionInfo\n",
692 raidPtr->raidid);
693 }
694 /* Free region information structs. */
695 for (i = 0; i < rf_numParityRegions; i++)
696 rf_FreeRegionInfo(raidPtr, i);
697 RF_Free(raidPtr->regionInfo, (rf_numParityRegions *
698 sizeof(raidPtr->regionInfo)));
699 raidPtr->regionInfo = NULL;
700 }
701
702
703 void
rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg)704 rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg)
705 {
706 RF_Raid_t *raidPtr;
707
708 raidPtr = (RF_Raid_t *) arg;
709 if (rf_parityLogDebug) {
710 printf("raid%d: ShutdownParityLoggingPool\n", raidPtr->raidid);
711 }
712 /* Free contents of parityLogPool. */
713 rf_FreeParityLogQueue(raidPtr, &raidPtr->parityLogPool);
714 RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs *
715 raidPtr->numSectorsPerLog * raidPtr->bytesPerSector);
716 }
717
718
719 void
rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg)720 rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg)
721 {
722 RF_Raid_t *raidPtr;
723
724 raidPtr = (RF_Raid_t *) arg;
725 if (rf_parityLogDebug) {
726 printf("raid%d: ShutdownParityLoggingRegionBufferPool\n",
727 raidPtr->raidid);
728 }
729 rf_FreeRegionBufferQueue(&raidPtr->regionBufferPool);
730 }
731
732
733 void
rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg)734 rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg)
735 {
736 RF_Raid_t *raidPtr;
737
738 raidPtr = (RF_Raid_t *) arg;
739 if (rf_parityLogDebug) {
740 printf("raid%d: ShutdownParityLoggingParityBufferPool\n",
741 raidPtr->raidid);
742 }
743 rf_FreeRegionBufferQueue(&raidPtr->parityBufferPool);
744 }
745
746
747 void
rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg)748 rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg)
749 {
750 RF_ParityLogData_t *d;
751 RF_CommonLogData_t *c;
752 RF_Raid_t *raidPtr;
753
754 raidPtr = (RF_Raid_t *) arg;
755 if (rf_parityLogDebug) {
756 printf("raid%d: ShutdownParityLoggingDiskQueue\n",
757 raidPtr->raidid);
758 }
759 /* Free disk manager stuff. */
760 RF_ASSERT(raidPtr->parityLogDiskQueue.bufHead == NULL);
761 RF_ASSERT(raidPtr->parityLogDiskQueue.bufTail == NULL);
762 RF_ASSERT(raidPtr->parityLogDiskQueue.reintHead == NULL);
763 RF_ASSERT(raidPtr->parityLogDiskQueue.reintTail == NULL);
764 while (raidPtr->parityLogDiskQueue.freeDataList) {
765 d = raidPtr->parityLogDiskQueue.freeDataList;
766 raidPtr->parityLogDiskQueue.freeDataList =
767 raidPtr->parityLogDiskQueue.freeDataList->next;
768 RF_Free(d, sizeof(RF_ParityLogData_t));
769 }
770 while (raidPtr->parityLogDiskQueue.freeCommonList) {
771 c = raidPtr->parityLogDiskQueue.freeCommonList;
772 rf_mutex_destroy(&c->mutex);
773 raidPtr->parityLogDiskQueue.freeCommonList =
774 raidPtr->parityLogDiskQueue.freeCommonList->next;
775 RF_Free(c, sizeof(RF_CommonLogData_t));
776 }
777 }
778
779
780 void
rf_ShutdownParityLogging(RF_ThreadArg_t arg)781 rf_ShutdownParityLogging(RF_ThreadArg_t arg)
782 {
783 RF_Raid_t *raidPtr;
784
785 raidPtr = (RF_Raid_t *) arg;
786 if (rf_parityLogDebug) {
787 printf("raid%d: ShutdownParityLogging\n", raidPtr->raidid);
788 }
789 /* Shutdown disk thread. */
790 /*
791 * This has the desirable side-effect of forcing all regions to be
792 * reintegrated. This is necessary since all parity log maps are
793 * currently held in volatile memory.
794 */
795
796 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
797 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_TERMINATE;
798 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
799 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
800 /*
801 * pLogDiskThread will now terminate when queues are cleared.
802 * Now wait for it to be done.
803 */
804 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
805 while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_SHUTDOWN)) {
806 RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond,
807 raidPtr->parityLogDiskQueue.mutex);
808 }
809 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
810 if (rf_parityLogDebug) {
811 printf("raid%d: ShutdownParityLogging done"
812 " (thread completed)\n", raidPtr->raidid);
813 }
814 }
815
816
817 int
rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr)818 rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t *raidPtr)
819 {
820 return (20);
821 }
822
823
824 RF_HeadSepLimit_t
rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr)825 rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t *raidPtr)
826 {
827 return (10);
828 }
829
830
831 /* Return the region ID for a given RAID address. */
832 RF_RegionId_t
rf_MapRegionIDParityLogging(RF_Raid_t * raidPtr,RF_SectorNum_t address)833 rf_MapRegionIDParityLogging(RF_Raid_t *raidPtr, RF_SectorNum_t address)
834 {
835 RF_RegionId_t regionID;
836
837 /* regionID = address / (raidPtr->regionParityRange *
838 * raidPtr->Layout.numDataCol); */
839 regionID = address / raidPtr->regionParityRange;
840 if (regionID == rf_numParityRegions) {
841 /* Last region may be larger than other regions. */
842 regionID--;
843 }
844 RF_ASSERT(address >= raidPtr->regionInfo[regionID].parityStartAddr);
845 RF_ASSERT(address < raidPtr->regionInfo[regionID].parityStartAddr +
846 raidPtr->regionInfo[regionID].numSectorsParity);
847 RF_ASSERT(regionID < rf_numParityRegions);
848 return (regionID);
849 }
850
851
852 /* Given a logical RAID sector, determine physical disk address of data. */
853 void
rf_MapSectorParityLogging(RF_Raid_t * raidPtr,RF_RaidAddr_t raidSector,RF_RowCol_t * row,RF_RowCol_t * col,RF_SectorNum_t * diskSector,int remap)854 rf_MapSectorParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
855 RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
856 {
857 RF_StripeNum_t SUID = raidSector /
858 raidPtr->Layout.sectorsPerStripeUnit;
859 *row = 0;
860 /* *col = (SUID % (raidPtr->numCol -
861 * raidPtr->Layout.numParityLogCol)); */
862 *col = SUID % raidPtr->Layout.numDataCol;
863 *diskSector = (SUID / (raidPtr->Layout.numDataCol)) *
864 raidPtr->Layout.sectorsPerStripeUnit +
865 (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
866 }
867
868
869 /* Given a logical RAID sector, determine physical disk address of parity. */
870 void
rf_MapParityParityLogging(RF_Raid_t * raidPtr,RF_RaidAddr_t raidSector,RF_RowCol_t * row,RF_RowCol_t * col,RF_SectorNum_t * diskSector,int remap)871 rf_MapParityParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
872 RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
873 {
874 RF_StripeNum_t SUID = raidSector /
875 raidPtr->Layout.sectorsPerStripeUnit;
876
877 *row = 0;
878 /* *col =
879 * raidPtr->Layout.numDataCol-(SUID / raidPtr->Layout.numDataCol) %
880 * (raidPtr->numCol - raidPtr->Layout.numParityLogCol); */
881 *col = raidPtr->Layout.numDataCol;
882 *diskSector = (SUID / (raidPtr->Layout.numDataCol)) *
883 raidPtr->Layout.sectorsPerStripeUnit +
884 (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
885 }
886
887
888 /*
889 * Given a regionID and sector offset, determine the physical disk address
890 * of the parity log.
891 */
892 void
rf_MapLogParityLogging(RF_Raid_t * raidPtr,RF_RegionId_t regionID,RF_SectorNum_t regionOffset,RF_RowCol_t * row,RF_RowCol_t * col,RF_SectorNum_t * startSector)893 rf_MapLogParityLogging(RF_Raid_t *raidPtr, RF_RegionId_t regionID,
894 RF_SectorNum_t regionOffset, RF_RowCol_t *row, RF_RowCol_t *col,
895 RF_SectorNum_t *startSector)
896 {
897 *row = 0;
898 *col = raidPtr->numCol - 1;
899 *startSector =
900 raidPtr->regionInfo[regionID].regionStartAddr + regionOffset;
901 }
902
903
904 /*
905 * Given a regionID, determine the physical disk address of the logged
906 * parity for that region.
907 */
908 void
rf_MapRegionParity(RF_Raid_t * raidPtr,RF_RegionId_t regionID,RF_RowCol_t * row,RF_RowCol_t * col,RF_SectorNum_t * startSector,RF_SectorCount_t * numSector)909 rf_MapRegionParity(RF_Raid_t *raidPtr, RF_RegionId_t regionID,
910 RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *startSector,
911 RF_SectorCount_t *numSector)
912 {
913 *row = 0;
914 *col = raidPtr->numCol - 2;
915 *startSector = raidPtr->regionInfo[regionID].parityStartAddr;
916 *numSector = raidPtr->regionInfo[regionID].numSectorsParity;
917 }
918
919
920 /*
921 * Given a logical RAID address, determine the participating disks in
922 * the stripe.
923 */
924 void
rf_IdentifyStripeParityLogging(RF_Raid_t * raidPtr,RF_RaidAddr_t addr,RF_RowCol_t ** diskids,RF_RowCol_t * outRow)925 rf_IdentifyStripeParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
926 RF_RowCol_t **diskids, RF_RowCol_t *outRow)
927 {
928 RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout,
929 addr);
930 RF_ParityLoggingConfigInfo_t *info = (RF_ParityLoggingConfigInfo_t *)
931 raidPtr->Layout.layoutSpecificInfo;
932 *outRow = 0;
933 *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol];
934 }
935
936
937 void
rf_MapSIDToPSIDParityLogging(RF_RaidLayout_t * layoutPtr,RF_StripeNum_t stripeID,RF_StripeNum_t * psID,RF_ReconUnitNum_t * which_ru)938 rf_MapSIDToPSIDParityLogging(RF_RaidLayout_t *layoutPtr,
939 RF_StripeNum_t stripeID, RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru)
940 {
941 *which_ru = 0;
942 *psID = stripeID;
943 }
944
945
946 /*
947 * Select an algorithm for performing an access. Returns two pointers,
948 * one to a function that will return information about the DAG, and
949 * another to a function that will create the dag.
950 */
951 void
rf_ParityLoggingDagSelect(RF_Raid_t * raidPtr,RF_IoType_t type,RF_AccessStripeMap_t * asmp,RF_VoidFuncPtr * createFunc)952 rf_ParityLoggingDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
953 RF_AccessStripeMap_t *asmp, RF_VoidFuncPtr *createFunc)
954 {
955 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
956 RF_PhysDiskAddr_t *failedPDA = NULL;
957 RF_RowCol_t frow, fcol;
958 RF_RowStatus_t rstat;
959 int prior_recon;
960
961 RF_ASSERT(RF_IO_IS_R_OR_W(type));
962
963 if (asmp->numDataFailed + asmp->numParityFailed > 1) {
964 RF_ERRORMSG("Multiple disks failed in a single group !"
965 " Aborting I/O operation.\n");
966 /* *infoFunc = */ *createFunc = NULL;
967 return;
968 } else
969 if (asmp->numDataFailed + asmp->numParityFailed == 1) {
970
971 /*
972 * If under recon & already reconstructed, redirect
973 * the access to the spare drive and eliminate the
974 * failure indication.
975 */
976 failedPDA = asmp->failedPDAs[0];
977 frow = failedPDA->row;
978 fcol = failedPDA->col;
979 rstat = raidPtr->status[failedPDA->row];
980 prior_recon = (rstat == rf_rs_reconfigured) ||
981 ((rstat == rf_rs_reconstructing) ?
982 rf_CheckRUReconstructed(raidPtr->reconControl[frow]
983 ->reconMap, failedPDA->startSector) : 0);
984 if (prior_recon) {
985 RF_RowCol_t or = failedPDA->row;
986 RF_RowCol_t oc = failedPDA->col;
987 RF_SectorNum_t oo = failedPDA->startSector;
988 if (layoutPtr->map->flags &
989 RF_DISTRIBUTE_SPARE) {
990 /* Redirect to dist spare space. */
991
992 if (failedPDA == asmp->parityInfo) {
993
994 /* Parity has failed. */
995 (layoutPtr->map->MapParity)
996 (raidPtr,
997 failedPDA->raidAddress,
998 &failedPDA->row,
999 &failedPDA->col,
1000 &failedPDA->startSector,
1001 RF_REMAP);
1002
1003 if (asmp->parityInfo->next) {
1004 /*
1005 * Redir 2nd component,
1006 * if any.
1007 */
1008 RF_PhysDiskAddr_t *p =
1009 asmp->parityInfo->next;
1010 RF_SectorNum_t SUoffs =
1011 p->startSector %
1012 layoutPtr->sectorsPerStripeUnit;
1013 p->row = failedPDA->row;
1014 p->col = failedPDA->col;
1015 /*
1016 * Cheating:
1017 * startSector is not
1018 * really a RAID
1019 * address.
1020 */
1021 p->startSector =
1022 rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
1023 failedPDA->startSector) + SUoffs;
1024 }
1025 } else
1026 if (asmp->parityInfo->next &&
1027 failedPDA ==
1028 asmp->parityInfo->next) {
1029 /*
1030 * Should not ever
1031 * happen.
1032 */
1033 RF_ASSERT(0);
1034 } else {
1035 /* Data has failed. */
1036 (layoutPtr->map
1037 ->MapSector)
1038 (raidPtr,
1039 failedPDA->raidAddress,
1040 &failedPDA->row,
1041 &failedPDA->col,
1042 &failedPDA->startSector,
1043 RF_REMAP);
1044 }
1045
1046 } else {
1047 /* Redirect to dedicated spare space. */
1048
1049 failedPDA->row =
1050 raidPtr->Disks[frow][fcol].spareRow;
1051 failedPDA->col =
1052 raidPtr->Disks[frow][fcol].spareCol;
1053
1054 /*
1055 * The parity may have two distinct
1056 * components, both of which may need
1057 * to be redirected.
1058 */
1059 if (asmp->parityInfo->next) {
1060 if (failedPDA ==
1061 asmp->parityInfo) {
1062 failedPDA->next->row =
1063 failedPDA->row;
1064 failedPDA->next->col =
1065 failedPDA->col;
1066 } else {
1067 if (failedPDA ==
1068 asmp->parityInfo
1069 ->next) {
1070 /*
1071 * Paranoid:
1072 * Should never
1073 * occur.
1074 */
1075 asmp->parityInfo
1076 ->row =
1077 failedPDA->row;
1078 asmp->parityInfo
1079 ->col =
1080 failedPDA->col;
1081 }
1082 }
1083 }
1084 }
1085
1086 RF_ASSERT(failedPDA->col != -1);
1087
1088 if (rf_dagDebug || rf_mapDebug) {
1089 printf("raid%d: Redirected type '%c'"
1090 " r %d c %d o %ld -> r %d c %d"
1091 " o %ld\n", raidPtr->raidid,
1092 type, or, oc, (long) oo,
1093 failedPDA->row, failedPDA->col,
1094 (long) failedPDA->startSector);
1095 }
1096 asmp->numDataFailed = asmp->numParityFailed = 0;
1097 }
1098 }
1099 if (type == RF_IO_TYPE_READ) {
1100
1101 if (asmp->numDataFailed == 0)
1102 *createFunc =
1103 (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG;
1104 else
1105 *createFunc =
1106 (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG;
1107
1108 } else {
1109
1110
1111 /*
1112 * If mirroring, always use large writes. If the access
1113 * requires two distinct parity updates, always do a small
1114 * write. If the stripe contains a failure but the access
1115 * does not, do a small write. The first conditional
1116 * (numStripeUnitsAccessed <= numDataCol/2) uses a
1117 * less-than-or-equal rather than just a less-than because
1118 * when G is 3 or 4, numDataCol/2 is 1, and I want
1119 * single-stripe-unit updates to use just one disk.
1120 */
1121 if ((asmp->numDataFailed + asmp->numParityFailed) == 0) {
1122 if (((asmp->numStripeUnitsAccessed <=
1123 (layoutPtr->numDataCol / 2)) &&
1124 (layoutPtr->numDataCol != 1)) ||
1125 (asmp->parityInfo->next != NULL) ||
1126 rf_CheckStripeForFailures(raidPtr, asmp)) {
1127 *createFunc = (RF_VoidFuncPtr)
1128 rf_CreateParityLoggingSmallWriteDAG;
1129 } else
1130 *createFunc = (RF_VoidFuncPtr)
1131 rf_CreateParityLoggingLargeWriteDAG;
1132 } else
1133 if (asmp->numParityFailed == 1)
1134 *createFunc = (RF_VoidFuncPtr)
1135 rf_CreateNonRedundantWriteDAG;
1136 else
1137 if (asmp->numStripeUnitsAccessed != 1 &&
1138 failedPDA->numSector !=
1139 layoutPtr->sectorsPerStripeUnit)
1140 *createFunc = NULL;
1141 else
1142 *createFunc = (RF_VoidFuncPtr)
1143 rf_CreateDegradedWriteDAG;
1144 }
1145 }
1146 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
1147