1 /*	$OpenBSD: rf_paritylogging.c,v 1.6 2002/12/16 07:01:04 tdeval Exp $	*/
2 /*	$NetBSD: rf_paritylogging.c,v 1.10 2000/02/12 16:06:27 oster Exp $	*/
3 
4 /*
5  * Copyright (c) 1995 Carnegie-Mellon University.
6  * All rights reserved.
7  *
8  * Author: William V. Courtright II
9  *
10  * Permission to use, copy, modify and distribute this software and
11  * its documentation is hereby granted, provided that both the copyright
12  * notice and this permission notice appear in all copies of the
13  * software, derivative works or modified versions, and any portions
14  * thereof, and that both notices appear in supporting documentation.
15  *
16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19  *
20  * Carnegie Mellon requests users of this software to return to
21  *
22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
23  *  School of Computer Science
24  *  Carnegie Mellon University
25  *  Pittsburgh PA 15213-3890
26  *
27  * any improvements or extensions that they make and grant Carnegie the
28  * rights to redistribute these changes.
29  */
30 
31 
32 /*
33  * Parity logging configuration, dag selection, and mapping is implemented here.
34  */
35 
36 #include "rf_archs.h"
37 
38 #if	RF_INCLUDE_PARITYLOGGING > 0
39 
40 #include "rf_types.h"
41 #include "rf_raid.h"
42 #include "rf_dag.h"
43 #include "rf_dagutils.h"
44 #include "rf_dagfuncs.h"
45 #include "rf_dagffrd.h"
46 #include "rf_dagffwr.h"
47 #include "rf_dagdegrd.h"
48 #include "rf_dagdegwr.h"
49 #include "rf_paritylog.h"
50 #include "rf_paritylogDiskMgr.h"
51 #include "rf_paritylogging.h"
52 #include "rf_parityloggingdags.h"
53 #include "rf_general.h"
54 #include "rf_map.h"
55 #include "rf_utils.h"
56 #include "rf_shutdown.h"
57 
58 typedef struct RF_ParityLoggingConfigInfo_s {
59 	RF_RowCol_t **stripeIdentifier;	/*
60 					 * Filled in at config time & used by
61 					 * IdentifyStripe.
62 					 */
63 } RF_ParityLoggingConfigInfo_t;
64 
65 void rf_FreeRegionInfo(RF_Raid_t *, RF_RegionId_t);
66 void rf_FreeParityLogQueue(RF_Raid_t *, RF_ParityLogQueue_t *);
67 void rf_FreeRegionBufferQueue(RF_RegionBufferQueue_t *);
68 void rf_ShutdownParityLogging(RF_ThreadArg_t);
69 void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t);
70 void rf_ShutdownParityLoggingPool(RF_ThreadArg_t);
71 void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t);
72 void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t);
73 void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t);
74 
75 
76 int
rf_ConfigureParityLogging(RF_ShutdownList_t ** listp,RF_Raid_t * raidPtr,RF_Config_t * cfgPtr)77 rf_ConfigureParityLogging(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
78     RF_Config_t *cfgPtr)
79 {
80 	int i, j, startdisk, rc;
81 	RF_SectorCount_t totalLogCapacity, fragmentation, lastRegionCapacity;
82 	RF_SectorCount_t parityBufferCapacity, maxRegionParityRange;
83 	RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
84 	RF_ParityLoggingConfigInfo_t *info;
85 	RF_ParityLog_t *l = NULL, *next;
86 	caddr_t lHeapPtr;
87 
88 	if (rf_numParityRegions <= 0)
89 		return(EINVAL);
90 
91 	/*
92 	 * We create multiple entries on the shutdown list here, since
93 	 * this configuration routine is fairly complicated in and of
94 	 * itself, and this makes backing out of a failed configuration
95 	 * much simpler.
96 	 */
97 
98 	raidPtr->numSectorsPerLog = RF_DEFAULT_NUM_SECTORS_PER_LOG;
99 
100 	/* Create a parity logging configuration structure. */
101 	RF_MallocAndAdd(info, sizeof(RF_ParityLoggingConfigInfo_t),
102 			(RF_ParityLoggingConfigInfo_t *),
103 			raidPtr->cleanupList);
104 	if (info == NULL)
105 		return (ENOMEM);
106 	layoutPtr->layoutSpecificInfo = (void *) info;
107 
108 	RF_ASSERT(raidPtr->numRow == 1);
109 
110 	/*
111 	 * The stripe identifier must identify the disks in each stripe, IN
112 	 * THE ORDER THAT THEY APPEAR IN THE STRIPE.
113 	 */
114 	info->stripeIdentifier = rf_make_2d_array((raidPtr->numCol),
115 						  (raidPtr->numCol),
116 						  raidPtr->cleanupList);
117 	if (info->stripeIdentifier == NULL)
118 		return (ENOMEM);
119 
120 	startdisk = 0;
121 	for (i = 0; i < (raidPtr->numCol); i++) {
122 		for (j = 0; j < (raidPtr->numCol); j++) {
123 			info->stripeIdentifier[i][j] = (startdisk + j) %
124 			    (raidPtr->numCol - 1);
125 		}
126 		if ((--startdisk) < 0)
127 			startdisk = raidPtr->numCol - 1 - 1;
128 	}
129 
130 	/* Fill in the remaining layout parameters. */
131 	layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
132 	layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit <<
133 	    raidPtr->logBytesPerSector;
134 	layoutPtr->numParityCol = 1;
135 	layoutPtr->numParityLogCol = 1;
136 	layoutPtr->numDataCol = raidPtr->numCol - layoutPtr->numParityCol -
137 	    layoutPtr->numParityLogCol;
138 	layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol *
139 	    layoutPtr->sectorsPerStripeUnit;
140 	layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
141 	raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk *
142 	    layoutPtr->sectorsPerStripeUnit;
143 
144 	raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk *
145 	    layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
146 
147 	/*
148 	 * Configure parity log parameters.
149 	 *
150 	 * Parameter			Comment/constraints
151 	 * ------------------------------------------------
152 	 * numParityRegions*		All regions (except possibly last)
153 	 *				of equal size.
154 	 * totalInCoreLogCapacity*	Amount of memory in bytes available
155 	 *				for in-core logs (default 1 MB).
156 	 * numSectorsPerLog#		Capacity of an in-core log in sectors
157 	 *				(1 * disk track).
158 	 * numParityLogs		Total number of in-core logs,
159 	 *				should be at least numParityRegions.
160 	 * regionLogCapacity		Size of a region log (except possibly
161 	 *				last one) in sectors.
162 	 * totalLogCapacity		Total amount of log space in sectors.
163 	 *
164 	 * Where '*' denotes a user settable parameter.
165 	 * Note that logs are fixed to be the size of a disk track,
166 	 * value #defined in rf_paritylog.h.
167 	 *
168 	 */
169 
170 	totalLogCapacity = layoutPtr->stripeUnitsPerDisk *
171 	    layoutPtr->sectorsPerStripeUnit * layoutPtr->numParityLogCol;
172 	raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions;
173 	if (rf_parityLogDebug)
174 		printf("bytes per sector %d\n", raidPtr->bytesPerSector);
175 
176 	/*
177 	 * Reduce fragmentation within a disk region by adjusting the number
178 	 * of regions in an attempt to allow an integral number of logs to fit
179 	 * into a disk region.
180 	 */
181 	fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog;
182 	if (fragmentation > 0)
183 		for (i = 1; i < (raidPtr->numSectorsPerLog / 2); i++) {
184 			if (((totalLogCapacity / (rf_numParityRegions + i)) %
185 			     raidPtr->numSectorsPerLog) < fragmentation) {
186 				rf_numParityRegions++;
187 				raidPtr->regionLogCapacity = totalLogCapacity /
188 				    rf_numParityRegions;
189 				fragmentation = raidPtr->regionLogCapacity %
190 				    raidPtr->numSectorsPerLog;
191 			}
192 			if (((totalLogCapacity / (rf_numParityRegions - i)) %
193 			     raidPtr->numSectorsPerLog) < fragmentation) {
194 				rf_numParityRegions--;
195 				raidPtr->regionLogCapacity = totalLogCapacity /
196 				    rf_numParityRegions;
197 				fragmentation = raidPtr->regionLogCapacity %
198 				    raidPtr->numSectorsPerLog;
199 			}
200 		}
201 	/* Ensure integral number of regions per log. */
202 	raidPtr->regionLogCapacity = (raidPtr->regionLogCapacity /
203 	    raidPtr->numSectorsPerLog) * raidPtr->numSectorsPerLog;
204 
205 	raidPtr->numParityLogs = rf_totalInCoreLogCapacity /
206 	    (raidPtr->bytesPerSector * raidPtr->numSectorsPerLog);
207 	/*
208 	 * To avoid deadlock, must ensure that enough logs exist for each
209 	 * region to have one simultaneously.
210 	 */
211 	if (raidPtr->numParityLogs < rf_numParityRegions)
212 		raidPtr->numParityLogs = rf_numParityRegions;
213 
214 	/* Create region information structs. */
215 	printf("Allocating %d bytes for in-core parity region info\n",
216 	       (int) (rf_numParityRegions * sizeof(RF_RegionInfo_t)));
217 	RF_Malloc(raidPtr->regionInfo,
218 		  (rf_numParityRegions * sizeof(RF_RegionInfo_t)),
219 		  (RF_RegionInfo_t *));
220 	if (raidPtr->regionInfo == NULL)
221 		return (ENOMEM);
222 
223 	/* Last region may not be full capacity. */
224 	lastRegionCapacity = raidPtr->regionLogCapacity;
225 	while ((rf_numParityRegions - 1) * raidPtr->regionLogCapacity +
226 	       lastRegionCapacity > totalLogCapacity)
227 		lastRegionCapacity = lastRegionCapacity -
228 		    raidPtr->numSectorsPerLog;
229 
230 	raidPtr->regionParityRange = raidPtr->sectorsPerDisk /
231 	    rf_numParityRegions;
232 	maxRegionParityRange = raidPtr->regionParityRange;
233 
234 	/* I can't remember why this line is in the code -wvcii 6/30/95. */
235 	/* if (raidPtr->sectorsPerDisk % rf_numParityRegions > 0)
236 	 *	regionParityRange++; */
237 
238 	/* Build pool of unused parity logs. */
239 	printf("Allocating %d bytes for %d parity logs\n",
240 	       raidPtr->numParityLogs * raidPtr->numSectorsPerLog *
241 	       raidPtr->bytesPerSector,
242 	       raidPtr->numParityLogs);
243 	RF_Malloc(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs *
244 		  raidPtr->numSectorsPerLog * raidPtr->bytesPerSector,
245 		  (caddr_t));
246 	if (raidPtr->parityLogBufferHeap == NULL)
247 		return (ENOMEM);
248 	lHeapPtr = raidPtr->parityLogBufferHeap;
249 	rc = rf_mutex_init(&raidPtr->parityLogPool.mutex);
250 	if (rc) {
251 		RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
252 			     __FILE__, __LINE__, rc);
253 		RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs *
254 			raidPtr->numSectorsPerLog * raidPtr->bytesPerSector);
255 		return (ENOMEM);
256 	}
257 	for (i = 0; i < raidPtr->numParityLogs; i++) {
258 		if (i == 0) {
259 			RF_Calloc(raidPtr->parityLogPool.parityLogs, 1,
260 				  sizeof(RF_ParityLog_t), (RF_ParityLog_t *));
261 			if (raidPtr->parityLogPool.parityLogs == NULL) {
262 				RF_Free(raidPtr->parityLogBufferHeap,
263 					raidPtr->numParityLogs *
264 					raidPtr->numSectorsPerLog *
265 					raidPtr->bytesPerSector);
266 				return (ENOMEM);
267 			}
268 			l = raidPtr->parityLogPool.parityLogs;
269 		} else {
270 			RF_Calloc(l->next, 1, sizeof(RF_ParityLog_t),
271 				  (RF_ParityLog_t *));
272 			if (l->next == NULL) {
273 				RF_Free(raidPtr->parityLogBufferHeap,
274 					raidPtr->numParityLogs *
275 					raidPtr->numSectorsPerLog *
276 					raidPtr->bytesPerSector);
277 				for (l = raidPtr->parityLogPool.parityLogs;
278 				     l;
279 				     l = next) {
280 					next = l->next;
281 					if (l->records)
282 						RF_Free(l->records,
283 						    (raidPtr->numSectorsPerLog *
284 						 sizeof(RF_ParityLogRecord_t)));
285 					RF_Free(l, sizeof(RF_ParityLog_t));
286 				}
287 				return (ENOMEM);
288 			}
289 			l = l->next;
290 		}
291 		l->bufPtr = lHeapPtr;
292 		lHeapPtr += raidPtr->numSectorsPerLog *
293 		    raidPtr->bytesPerSector;
294 		RF_Malloc(l->records, (raidPtr->numSectorsPerLog *
295 				       sizeof(RF_ParityLogRecord_t)),
296 			  (RF_ParityLogRecord_t *));
297 		if (l->records == NULL) {
298 			RF_Free(raidPtr->parityLogBufferHeap,
299 				raidPtr->numParityLogs *
300 				raidPtr->numSectorsPerLog *
301 				raidPtr->bytesPerSector);
302 			for (l = raidPtr->parityLogPool.parityLogs;
303 			     l;
304 			     l = next) {
305 				next = l->next;
306 				if (l->records)
307 					RF_Free(l->records,
308 						(raidPtr->numSectorsPerLog *
309 						 sizeof(RF_ParityLogRecord_t)));
310 				RF_Free(l, sizeof(RF_ParityLog_t));
311 			}
312 			return (ENOMEM);
313 		}
314 	}
315 	rc = rf_ShutdownCreate(listp, rf_ShutdownParityLoggingPool, raidPtr);
316 	if (rc) {
317 		RF_ERRORMSG3("Unable to create shutdown entry file %s line %d"
318 			     " rc=%d\n", __FILE__, __LINE__, rc);
319 		rf_ShutdownParityLoggingPool(raidPtr);
320 		return (rc);
321 	}
322 	/* Build pool of region buffers. */
323 	rc = rf_mutex_init(&raidPtr->regionBufferPool.mutex);
324 	if (rc) {
325 		RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
326 			     __FILE__, __LINE__, rc);
327 		return (ENOMEM);
328 	}
329 	rc = rf_cond_init(&raidPtr->regionBufferPool.cond);
330 	if (rc) {
331 		RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n",
332 			     __FILE__, __LINE__, rc);
333 		rf_mutex_destroy(&raidPtr->regionBufferPool.mutex);
334 		return (ENOMEM);
335 	}
336 	raidPtr->regionBufferPool.bufferSize = raidPtr->regionLogCapacity *
337 	    raidPtr->bytesPerSector;
338 	printf("regionBufferPool.bufferSize %d\n",
339 	       raidPtr->regionBufferPool.bufferSize);
340 
341 	/* For now, only one region at a time may be reintegrated. */
342 	raidPtr->regionBufferPool.totalBuffers = 1;
343 
344 	raidPtr->regionBufferPool.availableBuffers =
345 	    raidPtr->regionBufferPool.totalBuffers;
346 	raidPtr->regionBufferPool.availBuffersIndex = 0;
347 	raidPtr->regionBufferPool.emptyBuffersIndex = 0;
348 	printf("Allocating %d bytes for regionBufferPool\n",
349 	       (int) (raidPtr->regionBufferPool.totalBuffers *
350 		      sizeof(caddr_t)));
351 	RF_Malloc(raidPtr->regionBufferPool.buffers,
352 		  raidPtr->regionBufferPool.totalBuffers * sizeof(caddr_t),
353 		  (caddr_t *));
354 	if (raidPtr->regionBufferPool.buffers == NULL) {
355 		rf_mutex_destroy(&raidPtr->regionBufferPool.mutex);
356 		rf_cond_destroy(&raidPtr->regionBufferPool.cond);
357 		return (ENOMEM);
358 	}
359 	for (i = 0; i < raidPtr->regionBufferPool.totalBuffers; i++) {
360 		printf("Allocating %d bytes for regionBufferPool#%d\n",
361 		       (int) (raidPtr->regionBufferPool.bufferSize *
362 			      sizeof(char)), i);
363 		RF_Malloc(raidPtr->regionBufferPool.buffers[i],
364 			  raidPtr->regionBufferPool.bufferSize * sizeof(char),
365 			  (caddr_t));
366 		if (raidPtr->regionBufferPool.buffers[i] == NULL) {
367 			rf_mutex_destroy(&raidPtr->regionBufferPool.mutex);
368 			rf_cond_destroy(&raidPtr->regionBufferPool.cond);
369 			for (j = 0; j < i; j++) {
370 				RF_Free(raidPtr->regionBufferPool.buffers[i],
371 					raidPtr->regionBufferPool.bufferSize *
372 					sizeof(char));
373 			}
374 			RF_Free(raidPtr->regionBufferPool.buffers,
375 				raidPtr->regionBufferPool.totalBuffers *
376 				sizeof(caddr_t));
377 			return (ENOMEM);
378 		}
379 		printf("raidPtr->regionBufferPool.buffers[%d] = %lx\n", i,
380 		    (long) raidPtr->regionBufferPool.buffers[i]);
381 	}
382 	rc = rf_ShutdownCreate(listp,
383 			       rf_ShutdownParityLoggingRegionBufferPool,
384 			       raidPtr);
385 	if (rc) {
386 		RF_ERRORMSG3("Unable to create shutdown entry file %s line %d"
387 			     " rc=%d\n", __FILE__, __LINE__, rc);
388 		rf_ShutdownParityLoggingRegionBufferPool(raidPtr);
389 		return (rc);
390 	}
391 	/* Build pool of parity buffers. */
392 	parityBufferCapacity = maxRegionParityRange;
393 	rc = rf_mutex_init(&raidPtr->parityBufferPool.mutex);
394 	if (rc) {
395 		RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
396 			     __FILE__, __LINE__, rc);
397 		return (rc);
398 	}
399 	rc = rf_cond_init(&raidPtr->parityBufferPool.cond);
400 	if (rc) {
401 		RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n",
402 			     __FILE__, __LINE__, rc);
403 		rf_mutex_destroy(&raidPtr->parityBufferPool.mutex);
404 		return (ENOMEM);
405 	}
406 	raidPtr->parityBufferPool.bufferSize = parityBufferCapacity *
407 	    raidPtr->bytesPerSector;
408 	printf("parityBufferPool.bufferSize %d\n",
409 	       raidPtr->parityBufferPool.bufferSize);
410 
411 	/* For now, only one region at a time may be reintegrated. */
412 	raidPtr->parityBufferPool.totalBuffers = 1;
413 
414 	raidPtr->parityBufferPool.availableBuffers =
415 	    raidPtr->parityBufferPool.totalBuffers;
416 	raidPtr->parityBufferPool.availBuffersIndex = 0;
417 	raidPtr->parityBufferPool.emptyBuffersIndex = 0;
418 	printf("Allocating %d bytes for parityBufferPool of %d units\n",
419 	       (int) (raidPtr->parityBufferPool.totalBuffers *
420 		      sizeof(caddr_t)),
421 	       raidPtr->parityBufferPool.totalBuffers);
422 	RF_Malloc(raidPtr->parityBufferPool.buffers,
423 		  raidPtr->parityBufferPool.totalBuffers * sizeof(caddr_t),
424 		  (caddr_t *));
425 	if (raidPtr->parityBufferPool.buffers == NULL) {
426 		rf_mutex_destroy(&raidPtr->parityBufferPool.mutex);
427 		rf_cond_destroy(&raidPtr->parityBufferPool.cond);
428 		return (ENOMEM);
429 	}
430 	for (i = 0; i < raidPtr->parityBufferPool.totalBuffers; i++) {
431 		printf("Allocating %d bytes for parityBufferPool#%d\n",
432 		       (int) (raidPtr->parityBufferPool.bufferSize *
433 			      sizeof(char)), i);
434 		RF_Malloc(raidPtr->parityBufferPool.buffers[i],
435 			  raidPtr->parityBufferPool.bufferSize * sizeof(char),
436 			  (caddr_t));
437 		if (raidPtr->parityBufferPool.buffers == NULL) {
438 			rf_mutex_destroy(&raidPtr->parityBufferPool.mutex);
439 			rf_cond_destroy(&raidPtr->parityBufferPool.cond);
440 			for (j = 0; j < i; j++) {
441 				RF_Free(raidPtr->parityBufferPool.buffers[i],
442 					raidPtr->regionBufferPool.bufferSize *
443 					sizeof(char));
444 			}
445 			RF_Free(raidPtr->parityBufferPool.buffers,
446 				raidPtr->regionBufferPool.totalBuffers *
447 				sizeof(caddr_t));
448 			return (ENOMEM);
449 		}
450 		printf("parityBufferPool.buffers[%d] = %lx\n", i,
451 		    (long) raidPtr->parityBufferPool.buffers[i]);
452 	}
453 	rc = rf_ShutdownCreate(listp,
454 			       rf_ShutdownParityLoggingParityBufferPool,
455 			       raidPtr);
456 	if (rc) {
457 		RF_ERRORMSG3("Unable to create shutdown entry file %s line %d"
458 			     " rc=%d\n", __FILE__, __LINE__, rc);
459 		rf_ShutdownParityLoggingParityBufferPool(raidPtr);
460 		return (rc);
461 	}
462 	/* Initialize parityLogDiskQueue. */
463 	rc = rf_create_managed_mutex(listp,
464 				     &raidPtr->parityLogDiskQueue.mutex);
465 	if (rc) {
466 		RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
467 			     __FILE__, __LINE__, rc);
468 		return (rc);
469 	}
470 	rc = rf_create_managed_cond(listp, &raidPtr->parityLogDiskQueue.cond);
471 	if (rc) {
472 		RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n",
473 			     __FILE__, __LINE__, rc);
474 		return (rc);
475 	}
476 	raidPtr->parityLogDiskQueue.flushQueue = NULL;
477 	raidPtr->parityLogDiskQueue.reintQueue = NULL;
478 	raidPtr->parityLogDiskQueue.bufHead = NULL;
479 	raidPtr->parityLogDiskQueue.bufTail = NULL;
480 	raidPtr->parityLogDiskQueue.reintHead = NULL;
481 	raidPtr->parityLogDiskQueue.reintTail = NULL;
482 	raidPtr->parityLogDiskQueue.logBlockHead = NULL;
483 	raidPtr->parityLogDiskQueue.logBlockTail = NULL;
484 	raidPtr->parityLogDiskQueue.reintBlockHead = NULL;
485 	raidPtr->parityLogDiskQueue.reintBlockTail = NULL;
486 	raidPtr->parityLogDiskQueue.freeDataList = NULL;
487 	raidPtr->parityLogDiskQueue.freeCommonList = NULL;
488 
489 	rc = rf_ShutdownCreate(listp,
490 			       rf_ShutdownParityLoggingDiskQueue,
491 			       raidPtr);
492 	if (rc) {
493 		RF_ERRORMSG3("Unable to create shutdown entry file %s line %d"
494 			     " rc=%d\n", __FILE__, __LINE__, rc);
495 		return (rc);
496 	}
497 	for (i = 0; i < rf_numParityRegions; i++) {
498 		rc = rf_mutex_init(&raidPtr->regionInfo[i].mutex);
499 		if (rc) {
500 			RF_ERRORMSG3("Unable to init mutex file %s line %d"
501 				     " rc=%d\n", __FILE__, __LINE__, rc);
502 			for (j = 0; j < i; j++)
503 				rf_FreeRegionInfo(raidPtr, j);
504 			RF_Free(raidPtr->regionInfo,
505 				(rf_numParityRegions *
506 				 sizeof(RF_RegionInfo_t)));
507 			return (ENOMEM);
508 		}
509 		rc = rf_mutex_init(&raidPtr->regionInfo[i].reintMutex);
510 		if (rc) {
511 			RF_ERRORMSG3("Unable to init mutex file %s line %d"
512 				     " rc=%d\n", __FILE__, __LINE__, rc);
513 			rf_mutex_destroy(&raidPtr->regionInfo[i].mutex);
514 			for (j = 0; j < i; j++)
515 				rf_FreeRegionInfo(raidPtr, j);
516 			RF_Free(raidPtr->regionInfo,
517 				(rf_numParityRegions *
518 				 sizeof(RF_RegionInfo_t)));
519 			return (ENOMEM);
520 		}
521 		raidPtr->regionInfo[i].reintInProgress = RF_FALSE;
522 		raidPtr->regionInfo[i].regionStartAddr =
523 		    raidPtr->regionLogCapacity * i;
524 		raidPtr->regionInfo[i].parityStartAddr =
525 		    raidPtr->regionParityRange * i;
526 		if (i < rf_numParityRegions - 1) {
527 			raidPtr->regionInfo[i].capacity =
528 			    raidPtr->regionLogCapacity;
529 			raidPtr->regionInfo[i].numSectorsParity =
530 			    raidPtr->regionParityRange;
531 		} else {
532 			raidPtr->regionInfo[i].capacity = lastRegionCapacity;
533 			raidPtr->regionInfo[i].numSectorsParity =
534 			    raidPtr->sectorsPerDisk -
535 			    raidPtr->regionParityRange * i;
536 			if (raidPtr->regionInfo[i].numSectorsParity >
537 			    maxRegionParityRange)
538 				maxRegionParityRange =
539 				    raidPtr->regionInfo[i].numSectorsParity;
540 		}
541 		raidPtr->regionInfo[i].diskCount = 0;
542 		RF_ASSERT(raidPtr->regionInfo[i].capacity +
543 			  raidPtr->regionInfo[i].regionStartAddr <=
544 			  totalLogCapacity);
545 		RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr +
546 			  raidPtr->regionInfo[i].numSectorsParity <=
547 			  raidPtr->sectorsPerDisk);
548 		printf("Allocating %d bytes for region %d\n",
549 		       (int) (raidPtr->regionInfo[i].capacity *
550 			   sizeof(RF_DiskMap_t)), i);
551 		RF_Malloc(raidPtr->regionInfo[i].diskMap,
552 			  (raidPtr->regionInfo[i].capacity *
553 			   sizeof(RF_DiskMap_t)),
554 			  (RF_DiskMap_t *));
555 		if (raidPtr->regionInfo[i].diskMap == NULL) {
556 			rf_mutex_destroy(&raidPtr->regionInfo[i].mutex);
557 			rf_mutex_destroy(&raidPtr->regionInfo[i].reintMutex);
558 			for (j = 0; j < i; j++)
559 				rf_FreeRegionInfo(raidPtr, j);
560 			RF_Free(raidPtr->regionInfo,
561 				(rf_numParityRegions *
562 				 sizeof(RF_RegionInfo_t)));
563 			return (ENOMEM);
564 		}
565 		raidPtr->regionInfo[i].loggingEnabled = RF_FALSE;
566 		raidPtr->regionInfo[i].coreLog = NULL;
567 	}
568 	rc = rf_ShutdownCreate(listp,
569 			       rf_ShutdownParityLoggingRegionInfo,
570 			       raidPtr);
571 	if (rc) {
572 		RF_ERRORMSG3("Unable to create shutdown entry file %s line %d"
573 			     " rc=%d\n", __FILE__, __LINE__, rc);
574 		rf_ShutdownParityLoggingRegionInfo(raidPtr);
575 		return (rc);
576 	}
577 	RF_ASSERT(raidPtr->parityLogDiskQueue.threadState == 0);
578 	raidPtr->parityLogDiskQueue.threadState = RF_PLOG_CREATED;
579 	rc = RF_CREATE_THREAD(raidPtr->pLogDiskThreadHandle,
580 			      rf_ParityLoggingDiskManager, raidPtr, "rf_log");
581 	if (rc) {
582 		raidPtr->parityLogDiskQueue.threadState = 0;
583 		RF_ERRORMSG3("Unable to create parity logging disk thread"
584 			     " file %s line %d rc=%d\n",
585 			     __FILE__, __LINE__, rc);
586 		return (ENOMEM);
587 	}
588 	/* Wait for thread to start. */
589 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
590 	while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_RUNNING)) {
591 		RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond,
592 			     raidPtr->parityLogDiskQueue.mutex);
593 	}
594 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
595 
596 	rc = rf_ShutdownCreate(listp, rf_ShutdownParityLogging, raidPtr);
597 	if (rc) {
598 		RF_ERRORMSG1("Got rc=%d adding parity logging shutdown"
599 			     " event.\n", rc);
600 		rf_ShutdownParityLogging(raidPtr);
601 		return (rc);
602 	}
603 	if (rf_parityLogDebug) {
604 		printf("\t\t\tsize of disk log in sectors: %d\n",
605 		       (int) totalLogCapacity);
606 		printf("\t\t\ttotal number of parity regions is %d\n",
607 		       (int) rf_numParityRegions);
608 		printf("\t\t\tnominal sectors of log per parity region is %d\n",
609 		       (int) raidPtr->regionLogCapacity);
610 		printf("\t\t\tnominal region fragmentation is %d sectors\n",
611 		       (int) fragmentation);
612 		printf("\t\t\ttotal number of parity logs is %d\n",
613 		       raidPtr->numParityLogs);
614 		printf("\t\t\tparity log size is %d sectors\n",
615 		       raidPtr->numSectorsPerLog);
616 		printf("\t\t\ttotal in-core log space is %d bytes\n",
617 		       (int) rf_totalInCoreLogCapacity);
618 	}
619 	rf_EnableParityLogging(raidPtr);
620 
621 	return (0);
622 }
623 
624 
625 void
rf_FreeRegionInfo(RF_Raid_t * raidPtr,RF_RegionId_t regionID)626 rf_FreeRegionInfo(RF_Raid_t *raidPtr, RF_RegionId_t regionID)
627 {
628 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
629 	RF_Free(raidPtr->regionInfo[regionID].diskMap,
630 		(raidPtr->regionInfo[regionID].capacity *
631 		 sizeof(RF_DiskMap_t)));
632 	if (!rf_forceParityLogReint && raidPtr->regionInfo[regionID].coreLog) {
633 		rf_ReleaseParityLogs(raidPtr,
634 				     raidPtr->regionInfo[regionID].coreLog);
635 		raidPtr->regionInfo[regionID].coreLog = NULL;
636 	} else {
637 		RF_ASSERT(raidPtr->regionInfo[regionID].coreLog == NULL);
638 		RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == 0);
639 	}
640 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
641 	rf_mutex_destroy(&raidPtr->regionInfo[regionID].mutex);
642 	rf_mutex_destroy(&raidPtr->regionInfo[regionID].reintMutex);
643 }
644 
645 
646 void
rf_FreeParityLogQueue(RF_Raid_t * raidPtr,RF_ParityLogQueue_t * queue)647 rf_FreeParityLogQueue(RF_Raid_t *raidPtr, RF_ParityLogQueue_t *queue)
648 {
649 	RF_ParityLog_t *l1, *l2;
650 
651 	RF_LOCK_MUTEX(queue->mutex);
652 	l1 = queue->parityLogs;
653 	while (l1) {
654 		l2 = l1;
655 		l1 = l2->next;
656 		RF_Free(l2->records, (raidPtr->numSectorsPerLog *
657 				      sizeof(RF_ParityLogRecord_t)));
658 		RF_Free(l2, sizeof(RF_ParityLog_t));
659 	}
660 	RF_UNLOCK_MUTEX(queue->mutex);
661 	rf_mutex_destroy(&queue->mutex);
662 }
663 
664 
665 void
rf_FreeRegionBufferQueue(RF_RegionBufferQueue_t * queue)666 rf_FreeRegionBufferQueue(RF_RegionBufferQueue_t *queue)
667 {
668 	int i;
669 
670 	RF_LOCK_MUTEX(queue->mutex);
671 	if (queue->availableBuffers != queue->totalBuffers) {
672 		printf("Attempt to free region queue that is still in use !\n");
673 		RF_ASSERT(0);
674 	}
675 	for (i = 0; i < queue->totalBuffers; i++)
676 		RF_Free(queue->buffers[i], queue->bufferSize);
677 	RF_Free(queue->buffers, queue->totalBuffers * sizeof(caddr_t));
678 	RF_UNLOCK_MUTEX(queue->mutex);
679 	rf_mutex_destroy(&queue->mutex);
680 }
681 
682 
683 void
rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg)684 rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg)
685 {
686 	RF_Raid_t *raidPtr;
687 	RF_RegionId_t i;
688 
689 	raidPtr = (RF_Raid_t *) arg;
690 	if (rf_parityLogDebug) {
691 		printf("raid%d: ShutdownParityLoggingRegionInfo\n",
692 		       raidPtr->raidid);
693 	}
694 	/* Free region information structs. */
695 	for (i = 0; i < rf_numParityRegions; i++)
696 		rf_FreeRegionInfo(raidPtr, i);
697 	RF_Free(raidPtr->regionInfo, (rf_numParityRegions *
698 				      sizeof(raidPtr->regionInfo)));
699 	raidPtr->regionInfo = NULL;
700 }
701 
702 
703 void
rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg)704 rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg)
705 {
706 	RF_Raid_t *raidPtr;
707 
708 	raidPtr = (RF_Raid_t *) arg;
709 	if (rf_parityLogDebug) {
710 		printf("raid%d: ShutdownParityLoggingPool\n", raidPtr->raidid);
711 	}
712 	/* Free contents of parityLogPool. */
713 	rf_FreeParityLogQueue(raidPtr, &raidPtr->parityLogPool);
714 	RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs *
715 		raidPtr->numSectorsPerLog * raidPtr->bytesPerSector);
716 }
717 
718 
719 void
rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg)720 rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg)
721 {
722 	RF_Raid_t *raidPtr;
723 
724 	raidPtr = (RF_Raid_t *) arg;
725 	if (rf_parityLogDebug) {
726 		printf("raid%d: ShutdownParityLoggingRegionBufferPool\n",
727 		       raidPtr->raidid);
728 	}
729 	rf_FreeRegionBufferQueue(&raidPtr->regionBufferPool);
730 }
731 
732 
733 void
rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg)734 rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg)
735 {
736 	RF_Raid_t *raidPtr;
737 
738 	raidPtr = (RF_Raid_t *) arg;
739 	if (rf_parityLogDebug) {
740 		printf("raid%d: ShutdownParityLoggingParityBufferPool\n",
741 		       raidPtr->raidid);
742 	}
743 	rf_FreeRegionBufferQueue(&raidPtr->parityBufferPool);
744 }
745 
746 
747 void
rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg)748 rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg)
749 {
750 	RF_ParityLogData_t *d;
751 	RF_CommonLogData_t *c;
752 	RF_Raid_t *raidPtr;
753 
754 	raidPtr = (RF_Raid_t *) arg;
755 	if (rf_parityLogDebug) {
756 		printf("raid%d: ShutdownParityLoggingDiskQueue\n",
757 		       raidPtr->raidid);
758 	}
759 	/* Free disk manager stuff. */
760 	RF_ASSERT(raidPtr->parityLogDiskQueue.bufHead == NULL);
761 	RF_ASSERT(raidPtr->parityLogDiskQueue.bufTail == NULL);
762 	RF_ASSERT(raidPtr->parityLogDiskQueue.reintHead == NULL);
763 	RF_ASSERT(raidPtr->parityLogDiskQueue.reintTail == NULL);
764 	while (raidPtr->parityLogDiskQueue.freeDataList) {
765 		d = raidPtr->parityLogDiskQueue.freeDataList;
766 		raidPtr->parityLogDiskQueue.freeDataList =
767 		    raidPtr->parityLogDiskQueue.freeDataList->next;
768 		RF_Free(d, sizeof(RF_ParityLogData_t));
769 	}
770 	while (raidPtr->parityLogDiskQueue.freeCommonList) {
771 		c = raidPtr->parityLogDiskQueue.freeCommonList;
772 		rf_mutex_destroy(&c->mutex);
773 		raidPtr->parityLogDiskQueue.freeCommonList =
774 		    raidPtr->parityLogDiskQueue.freeCommonList->next;
775 		RF_Free(c, sizeof(RF_CommonLogData_t));
776 	}
777 }
778 
779 
780 void
rf_ShutdownParityLogging(RF_ThreadArg_t arg)781 rf_ShutdownParityLogging(RF_ThreadArg_t arg)
782 {
783 	RF_Raid_t *raidPtr;
784 
785 	raidPtr = (RF_Raid_t *) arg;
786 	if (rf_parityLogDebug) {
787 		printf("raid%d: ShutdownParityLogging\n", raidPtr->raidid);
788 	}
789 	/* Shutdown disk thread. */
790 	/*
791 	 * This has the desirable side-effect of forcing all regions to be
792 	 * reintegrated. This is necessary since all parity log maps are
793 	 * currently held in volatile memory.
794 	 */
795 
796 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
797 	raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_TERMINATE;
798 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
799 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
800 	/*
801 	 * pLogDiskThread will now terminate when queues are cleared.
802 	 * Now wait for it to be done.
803 	 */
804 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
805 	while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_SHUTDOWN)) {
806 		RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond,
807 			     raidPtr->parityLogDiskQueue.mutex);
808 	}
809 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
810 	if (rf_parityLogDebug) {
811 		printf("raid%d: ShutdownParityLogging done"
812 		       " (thread completed)\n", raidPtr->raidid);
813 	}
814 }
815 
816 
817 int
rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr)818 rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t *raidPtr)
819 {
820 	return (20);
821 }
822 
823 
824 RF_HeadSepLimit_t
rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr)825 rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t *raidPtr)
826 {
827 	return (10);
828 }
829 
830 
831 /* Return the region ID for a given RAID address. */
832 RF_RegionId_t
rf_MapRegionIDParityLogging(RF_Raid_t * raidPtr,RF_SectorNum_t address)833 rf_MapRegionIDParityLogging(RF_Raid_t *raidPtr, RF_SectorNum_t address)
834 {
835 	RF_RegionId_t regionID;
836 
837 	/* regionID = address / (raidPtr->regionParityRange *
838 	 *     raidPtr->Layout.numDataCol); */
839 	regionID = address / raidPtr->regionParityRange;
840 	if (regionID == rf_numParityRegions) {
841 		/* Last region may be larger than other regions. */
842 		regionID--;
843 	}
844 	RF_ASSERT(address >= raidPtr->regionInfo[regionID].parityStartAddr);
845 	RF_ASSERT(address < raidPtr->regionInfo[regionID].parityStartAddr +
846 		  raidPtr->regionInfo[regionID].numSectorsParity);
847 	RF_ASSERT(regionID < rf_numParityRegions);
848 	return (regionID);
849 }
850 
851 
852 /* Given a logical RAID sector, determine physical disk address of data. */
853 void
rf_MapSectorParityLogging(RF_Raid_t * raidPtr,RF_RaidAddr_t raidSector,RF_RowCol_t * row,RF_RowCol_t * col,RF_SectorNum_t * diskSector,int remap)854 rf_MapSectorParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
855     RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
856 {
857 	RF_StripeNum_t SUID = raidSector /
858 		raidPtr->Layout.sectorsPerStripeUnit;
859 	*row = 0;
860 	/* *col = (SUID % (raidPtr->numCol -
861 	 *     raidPtr->Layout.numParityLogCol)); */
862 	*col = SUID % raidPtr->Layout.numDataCol;
863 	*diskSector = (SUID / (raidPtr->Layout.numDataCol)) *
864 	    raidPtr->Layout.sectorsPerStripeUnit +
865 	    (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
866 }
867 
868 
869 /* Given a logical RAID sector, determine physical disk address of parity. */
870 void
rf_MapParityParityLogging(RF_Raid_t * raidPtr,RF_RaidAddr_t raidSector,RF_RowCol_t * row,RF_RowCol_t * col,RF_SectorNum_t * diskSector,int remap)871 rf_MapParityParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
872     RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
873 {
874 	RF_StripeNum_t SUID = raidSector /
875 	    raidPtr->Layout.sectorsPerStripeUnit;
876 
877 	*row = 0;
878 	/* *col =
879 	 *     raidPtr->Layout.numDataCol-(SUID / raidPtr->Layout.numDataCol) %
880 	 *      (raidPtr->numCol - raidPtr->Layout.numParityLogCol); */
881 	*col = raidPtr->Layout.numDataCol;
882 	*diskSector = (SUID / (raidPtr->Layout.numDataCol)) *
883 	    raidPtr->Layout.sectorsPerStripeUnit +
884 	    (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
885 }
886 
887 
888 /*
889  * Given a regionID and sector offset, determine the physical disk address
890  * of the parity log.
891  */
892 void
rf_MapLogParityLogging(RF_Raid_t * raidPtr,RF_RegionId_t regionID,RF_SectorNum_t regionOffset,RF_RowCol_t * row,RF_RowCol_t * col,RF_SectorNum_t * startSector)893 rf_MapLogParityLogging(RF_Raid_t *raidPtr, RF_RegionId_t regionID,
894     RF_SectorNum_t regionOffset, RF_RowCol_t *row, RF_RowCol_t *col,
895     RF_SectorNum_t *startSector)
896 {
897 	*row = 0;
898 	*col = raidPtr->numCol - 1;
899 	*startSector =
900 	    raidPtr->regionInfo[regionID].regionStartAddr + regionOffset;
901 }
902 
903 
904 /*
905  * Given a regionID, determine the physical disk address of the logged
906  * parity for that region.
907  */
908 void
rf_MapRegionParity(RF_Raid_t * raidPtr,RF_RegionId_t regionID,RF_RowCol_t * row,RF_RowCol_t * col,RF_SectorNum_t * startSector,RF_SectorCount_t * numSector)909 rf_MapRegionParity(RF_Raid_t *raidPtr, RF_RegionId_t regionID,
910     RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *startSector,
911     RF_SectorCount_t *numSector)
912 {
913 	*row = 0;
914 	*col = raidPtr->numCol - 2;
915 	*startSector = raidPtr->regionInfo[regionID].parityStartAddr;
916 	*numSector = raidPtr->regionInfo[regionID].numSectorsParity;
917 }
918 
919 
920 /*
921  * Given a logical RAID address, determine the participating disks in
922  * the stripe.
923  */
924 void
rf_IdentifyStripeParityLogging(RF_Raid_t * raidPtr,RF_RaidAddr_t addr,RF_RowCol_t ** diskids,RF_RowCol_t * outRow)925 rf_IdentifyStripeParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
926     RF_RowCol_t **diskids, RF_RowCol_t *outRow)
927 {
928 	RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout,
929 							   addr);
930 	RF_ParityLoggingConfigInfo_t *info = (RF_ParityLoggingConfigInfo_t *)
931 	    raidPtr->Layout.layoutSpecificInfo;
932 	*outRow = 0;
933 	*diskids = info->stripeIdentifier[stripeID % raidPtr->numCol];
934 }
935 
936 
937 void
rf_MapSIDToPSIDParityLogging(RF_RaidLayout_t * layoutPtr,RF_StripeNum_t stripeID,RF_StripeNum_t * psID,RF_ReconUnitNum_t * which_ru)938 rf_MapSIDToPSIDParityLogging(RF_RaidLayout_t *layoutPtr,
939     RF_StripeNum_t stripeID, RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru)
940 {
941 	*which_ru = 0;
942 	*psID = stripeID;
943 }
944 
945 
946 /*
947  * Select an algorithm for performing an access. Returns two pointers,
948  * one to a function that will return information about the DAG, and
949  * another to a function that will create the dag.
950  */
951 void
rf_ParityLoggingDagSelect(RF_Raid_t * raidPtr,RF_IoType_t type,RF_AccessStripeMap_t * asmp,RF_VoidFuncPtr * createFunc)952 rf_ParityLoggingDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
953     RF_AccessStripeMap_t *asmp, RF_VoidFuncPtr *createFunc)
954 {
955 	RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
956 	RF_PhysDiskAddr_t *failedPDA = NULL;
957 	RF_RowCol_t frow, fcol;
958 	RF_RowStatus_t rstat;
959 	int prior_recon;
960 
961 	RF_ASSERT(RF_IO_IS_R_OR_W(type));
962 
963 	if (asmp->numDataFailed + asmp->numParityFailed > 1) {
964 		RF_ERRORMSG("Multiple disks failed in a single group !"
965 			    "  Aborting I/O operation.\n");
966 		 /* *infoFunc = */ *createFunc = NULL;
967 		return;
968 	} else
969 		if (asmp->numDataFailed + asmp->numParityFailed == 1) {
970 
971 			/*
972 			 * If under recon & already reconstructed, redirect
973 			 * the access to the spare drive and eliminate the
974 			 * failure indication.
975 			 */
976 			failedPDA = asmp->failedPDAs[0];
977 			frow = failedPDA->row;
978 			fcol = failedPDA->col;
979 			rstat = raidPtr->status[failedPDA->row];
980 			prior_recon = (rstat == rf_rs_reconfigured) ||
981 			    ((rstat == rf_rs_reconstructing) ?
982 			     rf_CheckRUReconstructed(raidPtr->reconControl[frow]
983 			      ->reconMap, failedPDA->startSector) : 0);
984 			if (prior_recon) {
985 				RF_RowCol_t or = failedPDA->row;
986 				RF_RowCol_t oc = failedPDA->col;
987 				RF_SectorNum_t oo = failedPDA->startSector;
988 				if (layoutPtr->map->flags &
989 				    RF_DISTRIBUTE_SPARE) {
990 					/* Redirect to dist spare space. */
991 
992 					if (failedPDA == asmp->parityInfo) {
993 
994 						/* Parity has failed. */
995 						(layoutPtr->map->MapParity)
996 						    (raidPtr,
997 						     failedPDA->raidAddress,
998 						     &failedPDA->row,
999 						     &failedPDA->col,
1000 						     &failedPDA->startSector,
1001 						     RF_REMAP);
1002 
1003 						if (asmp->parityInfo->next) {
1004 							/*
1005 							 * Redir 2nd component,
1006 							 * if any.
1007 							 */
1008 							RF_PhysDiskAddr_t *p =
1009 							 asmp->parityInfo->next;
1010 							RF_SectorNum_t SUoffs =
1011 							    p->startSector %
1012 						layoutPtr->sectorsPerStripeUnit;
1013 							p->row = failedPDA->row;
1014 							p->col = failedPDA->col;
1015 							/*
1016 							 * Cheating:
1017 							 * startSector is not
1018 							 * really a RAID
1019 							 * address.
1020 							 */
1021 							p->startSector =
1022 			    rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
1023 				failedPDA->startSector) + SUoffs;
1024 						}
1025 					} else
1026 						if (asmp->parityInfo->next &&
1027 						    failedPDA ==
1028 						    asmp->parityInfo->next) {
1029 							/*
1030 							 * Should not ever
1031 							 * happen.
1032 							 */
1033 							RF_ASSERT(0);
1034 						} else {
1035 							/* Data has failed. */
1036 							(layoutPtr->map
1037 							 ->MapSector)
1038 							    (raidPtr,
1039 							 failedPDA->raidAddress,
1040 							    &failedPDA->row,
1041 							    &failedPDA->col,
1042 							&failedPDA->startSector,
1043 							    RF_REMAP);
1044 						}
1045 
1046 				} else {
1047 					/* Redirect to dedicated spare space. */
1048 
1049 					failedPDA->row =
1050 					    raidPtr->Disks[frow][fcol].spareRow;
1051 					failedPDA->col =
1052 					    raidPtr->Disks[frow][fcol].spareCol;
1053 
1054 					/*
1055 					 * The parity may have two distinct
1056 					 * components, both of which may need
1057 					 * to be redirected.
1058 					 */
1059 					if (asmp->parityInfo->next) {
1060 						if (failedPDA ==
1061 						    asmp->parityInfo) {
1062 							failedPDA->next->row =
1063 							    failedPDA->row;
1064 							failedPDA->next->col =
1065 							    failedPDA->col;
1066 						} else {
1067 							if (failedPDA ==
1068 							    asmp->parityInfo
1069 							     ->next) {
1070 								/*
1071 								 * Paranoid:
1072 								 * Should never
1073 								 * occur.
1074 								 */
1075 								asmp->parityInfo
1076 								->row =
1077 								 failedPDA->row;
1078 								asmp->parityInfo
1079 								->col =
1080 								 failedPDA->col;
1081 							}
1082 						}
1083 					}
1084 				}
1085 
1086 				RF_ASSERT(failedPDA->col != -1);
1087 
1088 				if (rf_dagDebug || rf_mapDebug) {
1089 					printf("raid%d: Redirected type '%c'"
1090 					       " r %d c %d o %ld -> r %d c %d"
1091 					       " o %ld\n", raidPtr->raidid,
1092 					       type, or, oc, (long) oo,
1093 					       failedPDA->row, failedPDA->col,
1094 					       (long) failedPDA->startSector);
1095 				}
1096 				asmp->numDataFailed = asmp->numParityFailed = 0;
1097 			}
1098 		}
1099 	if (type == RF_IO_TYPE_READ) {
1100 
1101 		if (asmp->numDataFailed == 0)
1102 			*createFunc =
1103 			    (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG;
1104 		else
1105 			*createFunc =
1106 			    (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG;
1107 
1108 	} else {
1109 
1110 
1111 		/*
1112 		 * If mirroring, always use large writes. If the access
1113 		 * requires two distinct parity updates, always do a small
1114 		 * write. If the stripe contains a failure but the access
1115 		 * does not, do a small write. The first conditional
1116 		 * (numStripeUnitsAccessed <= numDataCol/2) uses a
1117 		 * less-than-or-equal rather than just a less-than because
1118 		 * when G is 3 or 4, numDataCol/2 is 1, and I want
1119 		 * single-stripe-unit updates to use just one disk.
1120 		 */
1121 		if ((asmp->numDataFailed + asmp->numParityFailed) == 0) {
1122 			if (((asmp->numStripeUnitsAccessed <=
1123 			      (layoutPtr->numDataCol / 2)) &&
1124 			     (layoutPtr->numDataCol != 1)) ||
1125 			    (asmp->parityInfo->next != NULL) ||
1126 			    rf_CheckStripeForFailures(raidPtr, asmp)) {
1127 				*createFunc = (RF_VoidFuncPtr)
1128 				    rf_CreateParityLoggingSmallWriteDAG;
1129 			} else
1130 				*createFunc = (RF_VoidFuncPtr)
1131 				    rf_CreateParityLoggingLargeWriteDAG;
1132 		} else
1133 			if (asmp->numParityFailed == 1)
1134 				*createFunc = (RF_VoidFuncPtr)
1135 				    rf_CreateNonRedundantWriteDAG;
1136 			else
1137 				if (asmp->numStripeUnitsAccessed != 1 &&
1138 				    failedPDA->numSector !=
1139 				    layoutPtr->sectorsPerStripeUnit)
1140 					*createFunc = NULL;
1141 				else
1142 					*createFunc = (RF_VoidFuncPtr)
1143 					    rf_CreateDegradedWriteDAG;
1144 	}
1145 }
1146 #endif	/* RF_INCLUDE_PARITYLOGGING > 0 */
1147