1 /*	$OpenBSD: rf_paritylog.c,v 1.5 2002/12/16 07:01:04 tdeval Exp $	*/
2 /*	$NetBSD: rf_paritylog.c,v 1.5 2000/01/07 03:41:01 oster Exp $	*/
3 
4 /*
5  * Copyright (c) 1995 Carnegie-Mellon University.
6  * All rights reserved.
7  *
8  * Author: William V. Courtright II
9  *
10  * Permission to use, copy, modify and distribute this software and
11  * its documentation is hereby granted, provided that both the copyright
12  * notice and this permission notice appear in all copies of the
13  * software, derivative works or modified versions, and any portions
14  * thereof, and that both notices appear in supporting documentation.
15  *
16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19  *
20  * Carnegie Mellon requests users of this software to return to
21  *
22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
23  *  School of Computer Science
24  *  Carnegie Mellon University
25  *  Pittsburgh PA 15213-3890
26  *
27  * any improvements or extensions that they make and grant Carnegie the
28  * rights to redistribute these changes.
29  */
30 
31 /*
32  * Code for manipulating in-core parity logs.
33  */
34 
35 #include "rf_archs.h"
36 
37 #if	RF_INCLUDE_PARITYLOGGING > 0
38 
39 /*
40  * Append-only log for recording parity "update" and "overwrite" records.
41  */
42 
43 #include "rf_types.h"
44 #include "rf_threadstuff.h"
45 #include "rf_mcpair.h"
46 #include "rf_raid.h"
47 #include "rf_dag.h"
48 #include "rf_dagfuncs.h"
49 #include "rf_desc.h"
50 #include "rf_layout.h"
51 #include "rf_diskqueue.h"
52 #include "rf_etimer.h"
53 #include "rf_paritylog.h"
54 #include "rf_general.h"
55 #include "rf_map.h"
56 #include "rf_paritylogging.h"
57 #include "rf_paritylogDiskMgr.h"
58 
59 RF_CommonLogData_t *rf_AllocParityLogCommonData(RF_Raid_t *);
60 void rf_FreeParityLogCommonData(RF_CommonLogData_t *);
61 RF_ParityLogData_t *rf_AllocParityLogData(RF_Raid_t *);
62 void rf_FreeParityLogData(RF_ParityLogData_t *);
63 void rf_EnqueueParityLogData(RF_ParityLogData_t *, RF_ParityLogData_t **,
64 	RF_ParityLogData_t **);
65 RF_ParityLogData_t *rf_DequeueParityLogData(RF_Raid_t *, RF_ParityLogData_t **,
66 	RF_ParityLogData_t **, int);
67 void rf_RequeueParityLogData(RF_ParityLogData_t *, RF_ParityLogData_t **,
68 	RF_ParityLogData_t **);
69 RF_ParityLogData_t *rf_DequeueMatchingLogData(RF_Raid_t *,
70 	RF_ParityLogData_t **, RF_ParityLogData_t **);
71 RF_ParityLog_t *rf_AcquireParityLog(RF_ParityLogData_t *, int);
72 void rf_ReintLog(RF_Raid_t *, int, RF_ParityLog_t *);
73 void rf_FlushLog(RF_Raid_t *, RF_ParityLog_t *);
74 int  rf_DumpParityLogToDisk(int, RF_ParityLogData_t *);
75 
76 RF_CommonLogData_t *
rf_AllocParityLogCommonData(RF_Raid_t * raidPtr)77 rf_AllocParityLogCommonData(RF_Raid_t *raidPtr)
78 {
79 	RF_CommonLogData_t *common = NULL;
80 	int rc;
81 
82 	/*
83 	 * Return a struct for holding common parity log information from the
84 	 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list
85 	 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING
86 	 */
87 
88 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
89 	if (raidPtr->parityLogDiskQueue.freeCommonList) {
90 		common = raidPtr->parityLogDiskQueue.freeCommonList;
91 		raidPtr->parityLogDiskQueue.freeCommonList =
92 		    raidPtr->parityLogDiskQueue.freeCommonList->next;
93 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
94 	} else {
95 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
96 		RF_Malloc(common, sizeof(RF_CommonLogData_t),
97 		    (RF_CommonLogData_t *));
98 		rc = rf_mutex_init(&common->mutex);
99 		if (rc) {
100 			RF_ERRORMSG3("Unable to init mutex file %s line %d"
101 			    " rc=%d\n", __FILE__, __LINE__, rc);
102 			RF_Free(common, sizeof(RF_CommonLogData_t));
103 			common = NULL;
104 		}
105 	}
106 	common->next = NULL;
107 	return (common);
108 }
109 
110 void
rf_FreeParityLogCommonData(RF_CommonLogData_t * common)111 rf_FreeParityLogCommonData(RF_CommonLogData_t *common)
112 {
113 	RF_Raid_t *raidPtr;
114 
115 	/*
116 	 * Insert a single struct for holding parity log information (data)
117 	 * into the free list (rf_parityLogDiskQueue.freeCommonList).
118 	 * NON-BLOCKING
119 	 */
120 
121 	raidPtr = common->raidPtr;
122 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
123 	common->next = raidPtr->parityLogDiskQueue.freeCommonList;
124 	raidPtr->parityLogDiskQueue.freeCommonList = common;
125 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
126 }
127 
128 RF_ParityLogData_t *
rf_AllocParityLogData(RF_Raid_t * raidPtr)129 rf_AllocParityLogData(RF_Raid_t *raidPtr)
130 {
131 	RF_ParityLogData_t *data = NULL;
132 
133 	/*
134 	 * Return a struct for holding parity log information from the free
135 	 * list (rf_parityLogDiskQueue.freeList). If the free list is empty,
136 	 * call RF_Malloc to create a new structure. NON-BLOCKING
137 	 */
138 
139 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
140 	if (raidPtr->parityLogDiskQueue.freeDataList) {
141 		data = raidPtr->parityLogDiskQueue.freeDataList;
142 		raidPtr->parityLogDiskQueue.freeDataList =
143 		    raidPtr->parityLogDiskQueue.freeDataList->next;
144 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
145 	} else {
146 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
147 		RF_Malloc(data, sizeof(RF_ParityLogData_t),
148 		    (RF_ParityLogData_t *));
149 	}
150 	data->next = NULL;
151 	data->prev = NULL;
152 	return (data);
153 }
154 
155 
156 void
rf_FreeParityLogData(RF_ParityLogData_t * data)157 rf_FreeParityLogData(RF_ParityLogData_t *data)
158 {
159 	RF_ParityLogData_t *nextItem;
160 	RF_Raid_t *raidPtr;
161 
162 	/*
163 	 * Insert a linked list of structs for holding parity log information
164 	 * (data) into the free list (parityLogDiskQueue.freeList).
165 	 * NON-BLOCKING
166 	 */
167 
168 	raidPtr = data->common->raidPtr;
169 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
170 	while (data) {
171 		nextItem = data->next;
172 		data->next = raidPtr->parityLogDiskQueue.freeDataList;
173 		raidPtr->parityLogDiskQueue.freeDataList = data;
174 		data = nextItem;
175 	}
176 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
177 }
178 
179 
180 void
rf_EnqueueParityLogData(RF_ParityLogData_t * data,RF_ParityLogData_t ** head,RF_ParityLogData_t ** tail)181 rf_EnqueueParityLogData(RF_ParityLogData_t *data, RF_ParityLogData_t **head,
182     RF_ParityLogData_t **tail)
183 {
184 	RF_Raid_t *raidPtr;
185 
186 	/*
187 	 * Insert an in-core parity log (*data) into the head of a disk queue
188 	 * (*head, *tail). NON-BLOCKING
189 	 */
190 
191 	raidPtr = data->common->raidPtr;
192 	if (rf_parityLogDebug)
193 		printf("[enqueueing parity log data, region %d,"
194 		    " raidAddress %d, numSector %d]\n", data->regionID,
195 		    (int) data->diskAddress.raidAddress,
196 		    (int) data->diskAddress.numSector);
197 	RF_ASSERT(data->prev == NULL);
198 	RF_ASSERT(data->next == NULL);
199 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
200 	if (*head) {
201 		/* Insert into head of queue. */
202 		RF_ASSERT((*head)->prev == NULL);
203 		RF_ASSERT((*tail)->next == NULL);
204 		data->next = *head;
205 		(*head)->prev = data;
206 		*head = data;
207 	} else {
208 		/* Insert into empty list. */
209 		RF_ASSERT(*head == NULL);
210 		RF_ASSERT(*tail == NULL);
211 		*head = data;
212 		*tail = data;
213 	}
214 	RF_ASSERT((*head)->prev == NULL);
215 	RF_ASSERT((*tail)->next == NULL);
216 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
217 }
218 
219 RF_ParityLogData_t *
rf_DequeueParityLogData(RF_Raid_t * raidPtr,RF_ParityLogData_t ** head,RF_ParityLogData_t ** tail,int ignoreLocks)220 rf_DequeueParityLogData(RF_Raid_t *raidPtr, RF_ParityLogData_t **head,
221     RF_ParityLogData_t **tail, int ignoreLocks)
222 {
223 	RF_ParityLogData_t *data;
224 
225 	/*
226 	 * Remove and return an in-core parity log from the tail of a disk
227 	 * queue (*head, *tail). NON-BLOCKING
228 	 */
229 
230 	/* Remove from tail, preserving FIFO order. */
231 	if (!ignoreLocks)
232 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
233 	data = *tail;
234 	if (data) {
235 		if (*head == *tail) {
236 			/* Removing last item from queue. */
237 			*head = NULL;
238 			*tail = NULL;
239 		} else {
240 			*tail = (*tail)->prev;
241 			(*tail)->next = NULL;
242 			RF_ASSERT((*head)->prev == NULL);
243 			RF_ASSERT((*tail)->next == NULL);
244 		}
245 		data->next = NULL;
246 		data->prev = NULL;
247 		if (rf_parityLogDebug)
248 			printf("[dequeueing parity log data, region %d,"
249 			    " raidAddress %d, numSector %d]\n", data->regionID,
250 			    (int) data->diskAddress.raidAddress,
251 			    (int) data->diskAddress.numSector);
252 	}
253 	if (*head) {
254 		RF_ASSERT((*head)->prev == NULL);
255 		RF_ASSERT((*tail)->next == NULL);
256 	}
257 	if (!ignoreLocks)
258 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
259 	return (data);
260 }
261 
262 
263 void
rf_RequeueParityLogData(RF_ParityLogData_t * data,RF_ParityLogData_t ** head,RF_ParityLogData_t ** tail)264 rf_RequeueParityLogData(RF_ParityLogData_t *data, RF_ParityLogData_t **head,
265     RF_ParityLogData_t **tail)
266 {
267 	RF_Raid_t *raidPtr;
268 
269 	/*
270 	 * Insert an in-core parity log (*data) into the tail of a disk queue
271 	 * (*head, *tail). NON-BLOCKING
272 	 */
273 
274 	raidPtr = data->common->raidPtr;
275 	RF_ASSERT(data);
276 	if (rf_parityLogDebug)
277 		printf("[requeueing parity log data, region %d,"
278 		    " raidAddress %d, numSector %d]\n", data->regionID,
279 		    (int) data->diskAddress.raidAddress,
280 		    (int) data->diskAddress.numSector);
281 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
282 	if (*tail) {
283 		/* Append to tail of list. */
284 		data->prev = *tail;
285 		data->next = NULL;
286 		(*tail)->next = data;
287 		*tail = data;
288 	} else {
289 		/* Inserting into an empty list. */
290 		*head = data;
291 		*tail = data;
292 		(*head)->prev = NULL;
293 		(*tail)->next = NULL;
294 	}
295 	RF_ASSERT((*head)->prev == NULL);
296 	RF_ASSERT((*tail)->next == NULL);
297 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
298 }
299 
300 RF_ParityLogData_t *
rf_CreateParityLogData(RF_ParityRecordType_t operation,RF_PhysDiskAddr_t * pda,caddr_t bufPtr,RF_Raid_t * raidPtr,int (* wakeFunc)(RF_DagNode_t * node,int status),void * wakeArg,RF_AccTraceEntry_t * tracerec,RF_Etimer_t startTime)301 rf_CreateParityLogData(RF_ParityRecordType_t operation, RF_PhysDiskAddr_t *pda,
302     caddr_t bufPtr, RF_Raid_t *raidPtr,
303     int (*wakeFunc) (RF_DagNode_t * node, int status),
304     void *wakeArg, RF_AccTraceEntry_t *tracerec, RF_Etimer_t startTime)
305 {
306 	RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
307 	RF_CommonLogData_t *common;
308 	RF_PhysDiskAddr_t *diskAddress;
309 	int boundary, offset = 0;
310 
311 	/*
312 	 * Return an initialized struct of info to be logged. Build one item
313 	 * per physical disk address, one item per region.
314 	 *
315 	 * NON-BLOCKING
316 	 */
317 
318 	diskAddress = pda;
319 	common = rf_AllocParityLogCommonData(raidPtr);
320 	RF_ASSERT(common);
321 
322 	common->operation = operation;
323 	common->bufPtr = bufPtr;
324 	common->raidPtr = raidPtr;
325 	common->wakeFunc = wakeFunc;
326 	common->wakeArg = wakeArg;
327 	common->tracerec = tracerec;
328 	common->startTime = startTime;
329 	common->cnt = 0;
330 
331 	if (rf_parityLogDebug)
332 		printf("[entering CreateParityLogData]\n");
333 	while (diskAddress) {
334 		common->cnt++;
335 		data = rf_AllocParityLogData(raidPtr);
336 		RF_ASSERT(data);
337 		data->common = common;
338 		data->next = NULL;
339 		data->prev = NULL;
340 		data->regionID = rf_MapRegionIDParityLogging(raidPtr,
341 		    diskAddress->startSector);
342 		if (data->regionID == rf_MapRegionIDParityLogging(raidPtr,
343 		    diskAddress->startSector + diskAddress->numSector - 1)) {
344 			/* Disk address does not cross a region boundary. */
345 			data->diskAddress = *diskAddress;
346 			data->bufOffset = offset;
347 			offset = offset + diskAddress->numSector;
348 			rf_EnqueueParityLogData(data, &resultHead, &resultTail);
349 			/* Adjust disk address. */
350 			diskAddress = diskAddress->next;
351 		} else {
352 			/* Disk address crosses a region boundary. */
353 			/* Find address where region is crossed. */
354 			boundary = 0;
355 			while (data->regionID ==
356 			    rf_MapRegionIDParityLogging(raidPtr,
357 			     diskAddress->startSector + boundary))
358 				boundary++;
359 
360 			/* Enter data before the boundary. */
361 			data->diskAddress = *diskAddress;
362 			data->diskAddress.numSector = boundary;
363 			data->bufOffset = offset;
364 			offset += boundary;
365 			rf_EnqueueParityLogData(data, &resultHead, &resultTail);
366 			/* Adjust disk address. */
367 			diskAddress->startSector += boundary;
368 			diskAddress->numSector -= boundary;
369 		}
370 	}
371 	if (rf_parityLogDebug)
372 		printf("[leaving CreateParityLogData]\n");
373 	return (resultHead);
374 }
375 
376 
377 RF_ParityLogData_t *
rf_SearchAndDequeueParityLogData(RF_Raid_t * raidPtr,int regionID,RF_ParityLogData_t ** head,RF_ParityLogData_t ** tail,int ignoreLocks)378 rf_SearchAndDequeueParityLogData(RF_Raid_t *raidPtr, int regionID,
379     RF_ParityLogData_t **head, RF_ParityLogData_t **tail, int ignoreLocks)
380 {
381 	RF_ParityLogData_t *w;
382 
383 	/*
384 	 * Remove and return an in-core parity log from a specified region
385 	 * (regionID). If a matching log is not found, return NULL.
386 	 *
387 	 * NON-BLOCKING
388 	 */
389 
390 	/*
391 	 * walk backward through a list, looking for an entry with a matching
392 	 * region ID.
393 	 */
394 	if (!ignoreLocks)
395 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
396 	w = (*tail);
397 	while (w) {
398 		if (w->regionID == regionID) {
399 			/* Remove an element from the list. */
400 			if (w == *tail) {
401 				if (*head == *tail) {
402 					/* Removing only element in the list. */
403 					*head = NULL;
404 					*tail = NULL;
405 				} else {
406 					/* Removing last item in the list. */
407 					*tail = (*tail)->prev;
408 					(*tail)->next = NULL;
409 					RF_ASSERT((*head)->prev == NULL);
410 					RF_ASSERT((*tail)->next == NULL);
411 				}
412 			} else {
413 				if (w == *head) {
414 					/* Removing first item in the list. */
415 					*head = (*head)->next;
416 					(*head)->prev = NULL;
417 					RF_ASSERT((*head)->prev == NULL);
418 					RF_ASSERT((*tail)->next == NULL);
419 				} else {
420 					/*
421 					 * Removing an item from the middle of
422 					 * the list.
423 					 */
424 					w->prev->next = w->next;
425 					w->next->prev = w->prev;
426 					RF_ASSERT((*head)->prev == NULL);
427 					RF_ASSERT((*tail)->next == NULL);
428 				}
429 			}
430 			w->prev = NULL;
431 			w->next = NULL;
432 			if (rf_parityLogDebug)
433 				printf("[dequeueing parity log data,"
434 				    " region %d, raidAddress %d,"
435 				    " numSector %d]\n", w->regionID,
436 				    (int) w->diskAddress.raidAddress,
437 				    (int) w->diskAddress.numSector);
438 			return (w);
439 		} else
440 			w = w->prev;
441 	}
442 	if (!ignoreLocks)
443 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
444 	return (NULL);
445 }
446 
447 RF_ParityLogData_t *
rf_DequeueMatchingLogData(RF_Raid_t * raidPtr,RF_ParityLogData_t ** head,RF_ParityLogData_t ** tail)448 rf_DequeueMatchingLogData(RF_Raid_t *raidPtr, RF_ParityLogData_t **head,
449     RF_ParityLogData_t **tail)
450 {
451 	RF_ParityLogData_t *logDataList, *logData;
452 	int regionID;
453 
454 	/*
455 	 * Remove and return an in-core parity log from the tail of a disk
456 	 * queue (*head, *tail). Then remove all matching (identical
457 	 * regionIDs) logData and return as a linked list.
458 	 *
459 	 * NON-BLOCKING
460 	 */
461 
462 	logDataList = rf_DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
463 	if (logDataList) {
464 		regionID = logDataList->regionID;
465 		logData = logDataList;
466 		logData->next = rf_SearchAndDequeueParityLogData(raidPtr,
467 		    regionID, head, tail, RF_TRUE);
468 		while (logData->next) {
469 			logData = logData->next;
470 			logData->next =
471 			    rf_SearchAndDequeueParityLogData(raidPtr, regionID,
472 			     head, tail, RF_TRUE);
473 		}
474 	}
475 	return (logDataList);
476 }
477 
478 
479 RF_ParityLog_t *
rf_AcquireParityLog(RF_ParityLogData_t * logData,int finish)480 rf_AcquireParityLog(RF_ParityLogData_t *logData, int finish)
481 {
482 	RF_ParityLog_t *log = NULL;
483 	RF_Raid_t *raidPtr;
484 
485 	/*
486 	 * Grab a log buffer from the pool and return it. If no buffers are
487 	 * available, return NULL. NON-BLOCKING
488 	 */
489 	raidPtr = logData->common->raidPtr;
490 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
491 	if (raidPtr->parityLogPool.parityLogs) {
492 		log = raidPtr->parityLogPool.parityLogs;
493 		raidPtr->parityLogPool.parityLogs =
494 		    raidPtr->parityLogPool.parityLogs->next;
495 		log->regionID = logData->regionID;
496 		log->numRecords = 0;
497 		log->next = NULL;
498 		raidPtr->logsInUse++;
499 		RF_ASSERT(raidPtr->logsInUse >= 0 &&
500 		    raidPtr->logsInUse <= raidPtr->numParityLogs);
501 	} else {
502 		/*
503 		 * No logs available, so place ourselves on the queue of work
504 		 * waiting on log buffers this is done while
505 		 * parityLogPool.mutex is held, to ensure synchronization with
506 		 * ReleaseParityLogs.
507 		 */
508 		if (rf_parityLogDebug)
509 			printf("[blocked on log, region %d, finish %d]\n",
510 			    logData->regionID, finish);
511 		if (finish)
512 			rf_RequeueParityLogData(logData,
513 			    &raidPtr->parityLogDiskQueue.logBlockHead,
514 			    &raidPtr->parityLogDiskQueue.logBlockTail);
515 		else
516 			rf_EnqueueParityLogData(logData,
517 			    &raidPtr->parityLogDiskQueue.logBlockHead,
518 			    &raidPtr->parityLogDiskQueue.logBlockTail);
519 	}
520 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
521 	return (log);
522 }
523 
524 void
rf_ReleaseParityLogs(RF_Raid_t * raidPtr,RF_ParityLog_t * firstLog)525 rf_ReleaseParityLogs(RF_Raid_t *raidPtr, RF_ParityLog_t *firstLog)
526 {
527 	RF_ParityLogData_t *logDataList;
528 	RF_ParityLog_t *log, *lastLog;
529 	int cnt;
530 
531 	/*
532 	 * Insert a linked list of parity logs (firstLog) to the free list
533 	 * (parityLogPool.parityLogPool)
534 	 *
535 	 * NON-BLOCKING
536 	 */
537 
538 	RF_ASSERT(firstLog);
539 
540 	/*
541 	 * Before returning logs to global free list, service all requests
542 	 * which are blocked on logs. Holding mutexes for parityLogPool and
543 	 * parityLogDiskQueue forces synchronization with rf_AcquireParityLog().
544 	 */
545 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
546 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
547 	logDataList = rf_DequeueMatchingLogData(raidPtr,
548 	    &raidPtr->parityLogDiskQueue.logBlockHead,
549 	    &raidPtr->parityLogDiskQueue.logBlockTail);
550 	log = firstLog;
551 	if (firstLog)
552 		firstLog = firstLog->next;
553 	log->numRecords = 0;
554 	log->next = NULL;
555 	while (logDataList && log) {
556 		RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
557 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
558 		rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
559 		if (rf_parityLogDebug)
560 			printf("[finishing up buf-blocked log data,"
561 			    " region %d]\n", logDataList->regionID);
562 		if (log == NULL) {
563 			log = firstLog;
564 			if (firstLog) {
565 				firstLog = firstLog->next;
566 				log->numRecords = 0;
567 				log->next = NULL;
568 			}
569 		}
570 		RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
571 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
572 		if (log)
573 			logDataList = rf_DequeueMatchingLogData(raidPtr,
574 			    &raidPtr->parityLogDiskQueue.logBlockHead,
575 			    &raidPtr->parityLogDiskQueue.logBlockTail);
576 	}
577 	/* Return remaining logs to pool. */
578 	if (log) {
579 		log->next = firstLog;
580 		firstLog = log;
581 	}
582 	if (firstLog) {
583 		lastLog = firstLog;
584 		raidPtr->logsInUse--;
585 		RF_ASSERT(raidPtr->logsInUse >= 0 &&
586 		    raidPtr->logsInUse <= raidPtr->numParityLogs);
587 		while (lastLog->next) {
588 			lastLog = lastLog->next;
589 			raidPtr->logsInUse--;
590 			RF_ASSERT(raidPtr->logsInUse >= 0 &&
591 			    raidPtr->logsInUse <= raidPtr->numParityLogs);
592 		}
593 		lastLog->next = raidPtr->parityLogPool.parityLogs;
594 		raidPtr->parityLogPool.parityLogs = firstLog;
595 		cnt = 0;
596 		log = raidPtr->parityLogPool.parityLogs;
597 		while (log) {
598 			cnt++;
599 			log = log->next;
600 		}
601 		RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
602 	}
603 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
604 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
605 }
606 
607 void
rf_ReintLog(RF_Raid_t * raidPtr,int regionID,RF_ParityLog_t * log)608 rf_ReintLog(RF_Raid_t *raidPtr, int regionID, RF_ParityLog_t *log)
609 {
610 	RF_ASSERT(log);
611 
612 	/*
613 	 * Insert an in-core parity log (log) into the disk queue of
614 	 * reintegration work. Set the flag (reintInProgress) for the
615 	 * specified region (regionID) to indicate that reintegration is in
616 	 * progress for this region. NON-BLOCKING
617 	 */
618 
619 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
620 	/* Cleared when reint complete. */
621 	raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;
622 
623 	if (rf_parityLogDebug)
624 		printf("[requesting reintegration of region %d]\n",
625 		    log->regionID);
626 	/* Move record to reintegration queue. */
627 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
628 	log->next = raidPtr->parityLogDiskQueue.reintQueue;
629 	raidPtr->parityLogDiskQueue.reintQueue = log;
630 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
631 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
632 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
633 }
634 
635 void
rf_FlushLog(RF_Raid_t * raidPtr,RF_ParityLog_t * log)636 rf_FlushLog(RF_Raid_t *raidPtr, RF_ParityLog_t *log)
637 {
638 	/*
639 	 * Insert a core log (log) into a list of logs
640 	 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
641 	 * NON-BLOCKING
642 	 */
643 
644 	RF_ASSERT(log);
645 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
646 	RF_ASSERT(log->next == NULL);
647 	/* Move log to flush queue. */
648 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
649 	log->next = raidPtr->parityLogDiskQueue.flushQueue;
650 	raidPtr->parityLogDiskQueue.flushQueue = log;
651 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
652 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
653 }
654 
655 int
rf_DumpParityLogToDisk(int finish,RF_ParityLogData_t * logData)656 rf_DumpParityLogToDisk(int finish, RF_ParityLogData_t *logData)
657 {
658 	int i, diskCount, regionID = logData->regionID;
659 	RF_ParityLog_t *log;
660 	RF_Raid_t *raidPtr;
661 
662 	raidPtr = logData->common->raidPtr;
663 
664 	/*
665 	 * Move a core log to disk. If the log disk is full, initiate
666 	 * reintegration.
667 	 *
668 	 * Return (0) if we can enqueue the dump immediately, otherwise return
669 	 * (1) to indicate we are blocked on reintegration and control of the
670 	 * thread should be relinquished.
671 	 *
672 	 * Caller must hold regionInfo[regionID].mutex.
673 	 *
674 	 * NON-BLOCKING
675 	 */
676 
677 	if (rf_parityLogDebug)
678 		printf("[dumping parity log to disk, region %d]\n", regionID);
679 	log = raidPtr->regionInfo[regionID].coreLog;
680 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
681 	RF_ASSERT(log->next == NULL);
682 
683 	/* If reintegration is in progress, must queue work. */
684 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
685 	if (raidPtr->regionInfo[regionID].reintInProgress) {
686 		/*
687 		 * Can not proceed since this region is currently being
688 		 * reintegrated. We can not block, so queue remaining work and
689 		 * return.
690 		 */
691 		if (rf_parityLogDebug)
692 			printf("[region %d waiting on reintegration]\n",
693 			    regionID);
694 		/*
695 		 * XXX Not sure about the use of finish - shouldn't this
696 		 * always be "Enqueue" ?
697 		 */
698 		if (finish)
699 			rf_RequeueParityLogData(logData,
700 			    &raidPtr->parityLogDiskQueue.reintBlockHead,
701 			    &raidPtr->parityLogDiskQueue.reintBlockTail);
702 		else
703 			rf_EnqueueParityLogData(logData,
704 			    &raidPtr->parityLogDiskQueue.reintBlockHead,
705 			    &raidPtr->parityLogDiskQueue.reintBlockTail);
706 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
707 		return (1);	/* Relenquish control of this thread. */
708 	}
709 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
710 	raidPtr->regionInfo[regionID].coreLog = NULL;
711 	if ((raidPtr->regionInfo[regionID].diskCount) <
712 	    raidPtr->regionInfo[regionID].capacity)
713 		/*
714 		 * IMPORTANT !!!  This loop bound assumes region disk holds an
715 		 * integral number of core logs.
716 		 */
717 	{
718 		/* Update disk map for this region. */
719 		diskCount = raidPtr->regionInfo[regionID].diskCount;
720 		for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
721 			raidPtr->regionInfo[regionID].diskMap[i + diskCount]
722 			    .operation = log->records[i].operation;
723 			raidPtr->regionInfo[regionID].diskMap[i + diskCount]
724 			    .parityAddr = log->records[i].parityAddr;
725 		}
726 		log->diskOffset = diskCount;
727 		raidPtr->regionInfo[regionID].diskCount +=
728 		    raidPtr->numSectorsPerLog;
729 		rf_FlushLog(raidPtr, log);
730 	} else {
731 		/*
732 		 * No room for log on disk, send it to disk manager and
733 		 * request reintegration.
734 		 */
735 		RF_ASSERT(raidPtr->regionInfo[regionID].diskCount ==
736 		    raidPtr->regionInfo[regionID].capacity);
737 		rf_ReintLog(raidPtr, regionID, log);
738 	}
739 	if (rf_parityLogDebug)
740 		printf("[finished dumping parity log to disk, region %d]\n",
741 		    regionID);
742 	return (0);
743 }
744 
745 int
rf_ParityLogAppend(RF_ParityLogData_t * logData,int finish,RF_ParityLog_t ** incomingLog,int clearReintFlag)746 rf_ParityLogAppend(RF_ParityLogData_t *logData, int finish,
747     RF_ParityLog_t **incomingLog, int clearReintFlag)
748 {
749 	int regionID, logItem, itemDone;
750 	RF_ParityLogData_t *item;
751 	int punt, done = RF_FALSE;
752 	RF_ParityLog_t *log;
753 	RF_Raid_t *raidPtr;
754 	RF_Etimer_t timer;
755 	int (*wakeFunc) (RF_DagNode_t * node, int status);
756 	void *wakeArg;
757 
758 	/*
759 	 * Add parity to the appropriate log, one sector at a time. This
760 	 * routine is called is called by dag functions ParityLogUpdateFunc
761 	 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
762 	 *
763 	 * Parity to be logged is contained in a linked-list (logData). When
764 	 * this routine returns, every sector in the list will be in one of
765 	 * three places: 1) entered into the parity log 2) queued, waiting on
766 	 * reintegration 3) queued, waiting on a core log.
767 	 *
768 	 * Blocked work is passed to the ParityLoggingDiskManager for
769 	 * completion. Later, as conditions which required the block are
770 	 * removed, the work reenters this routine with the "finish" parameter
771 	 * set to "RF_TRUE."
772 	 *
773 	 * NON-BLOCKING
774 	 */
775 
776 	raidPtr = logData->common->raidPtr;
777 	/* Lock the region for the first item in logData. */
778 	RF_ASSERT(logData != NULL);
779 	regionID = logData->regionID;
780 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
781 	RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
782 
783 	if (clearReintFlag) {
784 		/*
785 		 * Enable flushing for this region. Holding both locks
786 		 * provides a synchronization barrier with
787 		 * rf_DumpParityLogToDisk.
788 		 */
789 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
790 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
791 		RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress ==
792 		    RF_TRUE);
793 		raidPtr->regionInfo[regionID].diskCount = 0;
794 		raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
795 		/* Flushing is now enabled. */
796 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
797 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
798 	}
799 	/* Process each item in logData. */
800 	while (logData) {
801 		/* Remove an item from logData. */
802 		item = logData;
803 		logData = logData->next;
804 		item->next = NULL;
805 		item->prev = NULL;
806 
807 		if (rf_parityLogDebug)
808 			printf("[appending parity log data, region %d,"
809 			    " raidAddress %d, numSector %d]\n", item->regionID,
810 			    (int) item->diskAddress.raidAddress,
811 			    (int) item->diskAddress.numSector);
812 
813 		/* See if we moved to a new region. */
814 		if (regionID != item->regionID) {
815 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
816 			regionID = item->regionID;
817 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
818 			RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
819 		}
820 		punt = RF_FALSE;/*
821 				 * Set to RF_TRUE if work is blocked. This
822 				 * can happen in one of two ways: 1) no core
823 				 * log (rf_AcquireParityLog) 2) waiting on
824 				 * reintegration (rf_DumpParityLogToDisk).
825 				 * If punt is RF_TRUE, the dataItem was queued,
826 				 * so skip to next item.
827 				 */
828 
829 		/*
830 		 * Process item, one sector at a time, until all sectors
831 		 * processed or we punt.
832 		 */
833 		if (item->diskAddress.numSector > 0)
834 			done = RF_FALSE;
835 		else
836 			RF_ASSERT(0);
837 		while (!punt && !done) {
838 			/* Verify that a core log exists for this region. */
839 			if (!raidPtr->regionInfo[regionID].coreLog) {
840 				/*
841 				 * Attempt to acquire a parity log. If
842 				 * acquisition fails, queue remaining work in
843 				 * data item and move to nextItem.
844 				 */
845 				if (incomingLog) {
846 					if (*incomingLog) {
847 						RF_ASSERT((*incomingLog)->next
848 						    == NULL);
849 						raidPtr->regionInfo[regionID]
850 						    .coreLog = *incomingLog;
851 						raidPtr->regionInfo[regionID]
852 						    .coreLog->regionID =
853 						     regionID;
854 						*incomingLog = NULL;
855 					} else
856 						raidPtr->regionInfo[regionID]
857 						    .coreLog =
858 						     rf_AcquireParityLog(item,
859 						      finish);
860 				} else
861 					raidPtr->regionInfo[regionID].coreLog =
862 					    rf_AcquireParityLog(item, finish);
863 				/*
864 				 * Note: rf_AcquireParityLog either returns
865 				 * a log or enqueues currentItem.
866 				 */
867 			}
868 			if (!raidPtr->regionInfo[regionID].coreLog)
869 				punt = RF_TRUE;	/* Failed to find a core log. */
870 			else {
871 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog
872 				    ->next == NULL);
873 				/*
874 				 * Verify that the log has room for new
875 				 * entries.
876 				 */
877 				/*
878 				 * If log is full, dump it to disk and grab a
879 				 * new log.
880 				 */
881 				if (raidPtr->regionInfo[regionID].coreLog
882 				    ->numRecords == raidPtr->numSectorsPerLog)
883 				{
884 					/* Log is full, dump it to disk. */
885 					if (rf_DumpParityLogToDisk(finish,
886 					    item))
887 						/*
888 						 * Dump unsuccessful, blocked
889 						 * on reintegration.
890 						 */
891 						punt = RF_TRUE;
892 					else {
893 						/* Dump was successful. */
894 					  if (incomingLog) {
895 							if (*incomingLog) {
896 								RF_ASSERT(
897 							(*incomingLog)->next ==
898 								    NULL);
899 								raidPtr->
900 						regionInfo[regionID].coreLog =
901 								   *incomingLog;
902 								raidPtr->
903 						regionInfo[regionID].coreLog->
904 							    regionID = regionID;
905 								*incomingLog =
906 								    NULL;
907 							} else
908 								raidPtr->
909 						regionInfo[regionID].coreLog =
910 						 rf_AcquireParityLog(item,
911 						     finish);
912 						} else
913 							raidPtr->regionInfo
914 							    [regionID].coreLog =
915 						 rf_AcquireParityLog(item,
916 						     finish);
917 						/*
918 						 * If a core log is not
919 						 * available, must queue work
920 						 * and return.
921 						 */
922 						if (!raidPtr->regionInfo
923 						    [regionID].coreLog)
924 							/*
925 							 * Blocked on log
926 							 * availability.
927 							 */
928 							punt = RF_TRUE;
929 					}
930 				}
931 			}
932 			/*
933 			 * If we didn't punt on this item, attempt to add a
934 			 * sector to the core log.
935 			 */
936 			if (!punt) {
937 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog
938 				    ->next == NULL);
939 				/*
940 				 * At this point, we have a core log with
941 				 * enough room for a sector.
942 				 */
943 				/* Copy a sector into the log. */
944 				log = raidPtr->regionInfo[regionID].coreLog;
945 				RF_ASSERT(log->numRecords <
946 				    raidPtr->numSectorsPerLog);
947 				logItem = log->numRecords++;
948 				log->records[logItem].parityAddr =
949 				    item->diskAddress;
950 				RF_ASSERT(log->records[logItem].parityAddr
951 				    .startSector >=
952 				    raidPtr->regionInfo[regionID]
953 				    .parityStartAddr);
954 				RF_ASSERT(log->records[logItem].parityAddr
955 				    .startSector <
956 				    raidPtr->regionInfo[regionID]
957 				    .parityStartAddr +
958 				    raidPtr->regionInfo[regionID]
959 				    .numSectorsParity);
960 				log->records[logItem].parityAddr.numSector = 1;
961 				log->records[logItem].operation =
962 				    item->common->operation;
963 				bcopy((item->common->bufPtr +
964 				    (item->bufOffset++ * (1 <<
965 				    item->common->raidPtr->logBytesPerSector))),
966 				    log->bufPtr + (logItem * (1 <<
967 				    item->common->raidPtr->logBytesPerSector)),
968 				    (1 << item->common->raidPtr
969 				     ->logBytesPerSector));
970 				item->diskAddress.numSector--;
971 				item->diskAddress.startSector++;
972 				if (item->diskAddress.numSector == 0)
973 					done = RF_TRUE;
974 			}
975 		}
976 
977 		if (!punt) {
978 			/*
979 			 * Processed this item completely, decrement count of
980 			 * items to be processed.
981 			 */
982 			RF_ASSERT(item->diskAddress.numSector == 0);
983 			RF_LOCK_MUTEX(item->common->mutex);
984 			item->common->cnt--;
985 			if (item->common->cnt == 0)
986 				itemDone = RF_TRUE;
987 			else
988 				itemDone = RF_FALSE;
989 			RF_UNLOCK_MUTEX(item->common->mutex);
990 			if (itemDone) {
991 				/*
992 				 * Finished processing all log data for this
993 				 * IO Return structs to free list and invoke
994 				 * wakeup function.
995 				 */
996 				/* Grab initial value of timer. */
997 				timer = item->common->startTime;
998 				RF_ETIMER_STOP(timer);
999 				RF_ETIMER_EVAL(timer);
1000 				item->common->tracerec->plog_us +=
1001 				    RF_ETIMER_VAL_US(timer);
1002 				if (rf_parityLogDebug)
1003 					printf("[waking process for region"
1004 					    " %d]\n", item->regionID);
1005 				wakeFunc = item->common->wakeFunc;
1006 				wakeArg = item->common->wakeArg;
1007 				rf_FreeParityLogCommonData(item->common);
1008 				rf_FreeParityLogData(item);
1009 				(wakeFunc) (wakeArg, 0);
1010 			} else
1011 				rf_FreeParityLogData(item);
1012 		}
1013 	}
1014 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
1015 	if (rf_parityLogDebug)
1016 		printf("[exiting ParityLogAppend]\n");
1017 	return (0);
1018 }
1019 
1020 
1021 void
rf_EnableParityLogging(RF_Raid_t * raidPtr)1022 rf_EnableParityLogging(RF_Raid_t *raidPtr)
1023 {
1024 	int regionID;
1025 
1026 	for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
1027 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
1028 		raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
1029 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
1030 	}
1031 	if (rf_parityLogDebug)
1032 		printf("[parity logging enabled]\n");
1033 }
1034 #endif	/* RF_INCLUDE_PARITYLOGGING > 0 */
1035