1 /*	$OpenBSD: rf_paritylogDiskMgr.c,v 1.6 2002/12/16 07:01:04 tdeval Exp $	*/
2 /*	$NetBSD: rf_paritylogDiskMgr.c,v 1.10 2000/01/15 01:57:57 oster Exp $	*/
3 
4 /*
5  * Copyright (c) 1995 Carnegie-Mellon University.
6  * All rights reserved.
7  *
8  * Author: William V. Courtright II
9  *
10  * Permission to use, copy, modify and distribute this software and
11  * its documentation is hereby granted, provided that both the copyright
12  * notice and this permission notice appear in all copies of the
13  * software, derivative works or modified versions, and any portions
14  * thereof, and that both notices appear in supporting documentation.
15  *
16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19  *
20  * Carnegie Mellon requests users of this software to return to
21  *
22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
23  *  School of Computer Science
24  *  Carnegie Mellon University
25  *  Pittsburgh PA 15213-3890
26  *
27  * any improvements or extensions that they make and grant Carnegie the
28  * rights to redistribute these changes.
29  */
30 /*
31  * Code for flushing and reintegrating operations related to parity logging.
32  */
33 
34 #include "rf_archs.h"
35 
36 #if	RF_INCLUDE_PARITYLOGGING > 0
37 
38 #include "rf_types.h"
39 #include "rf_threadstuff.h"
40 #include "rf_mcpair.h"
41 #include "rf_raid.h"
42 #include "rf_dag.h"
43 #include "rf_dagfuncs.h"
44 #include "rf_desc.h"
45 #include "rf_layout.h"
46 #include "rf_diskqueue.h"
47 #include "rf_paritylog.h"
48 #include "rf_general.h"
49 #include "rf_etimer.h"
50 #include "rf_paritylogging.h"
51 #include "rf_engine.h"
52 #include "rf_dagutils.h"
53 #include "rf_map.h"
54 #include "rf_parityscan.h"
55 
56 #include "rf_paritylogDiskMgr.h"
57 
58 caddr_t rf_AcquireReintBuffer(RF_RegionBufferQueue_t *);
59 void rf_ReleaseReintBuffer(RF_RegionBufferQueue_t *, caddr_t);
60 void rf_ReadRegionLog(RF_RegionId_t, RF_MCPair_t *, caddr_t, RF_Raid_t *,
61 	RF_DagHeader_t **, RF_AllocListElem_t **, RF_PhysDiskAddr_t **);
62 void rf_WriteCoreLog(RF_ParityLog_t *, RF_MCPair_t *, RF_Raid_t *,
63 	RF_DagHeader_t **, RF_AllocListElem_t **, RF_PhysDiskAddr_t **);
64 void rf_ReadRegionParity(RF_RegionId_t, RF_MCPair_t *, caddr_t, RF_Raid_t *,
65 	RF_DagHeader_t **, RF_AllocListElem_t **, RF_PhysDiskAddr_t **);
66 void rf_WriteRegionParity(RF_RegionId_t, RF_MCPair_t *, caddr_t, RF_Raid_t *,
67 	RF_DagHeader_t **, RF_AllocListElem_t **, RF_PhysDiskAddr_t **);
68 void rf_FlushLogsToDisk(RF_Raid_t *, RF_ParityLog_t *);
69 void rf_ReintegrateRegion(RF_Raid_t *, RF_RegionId_t, RF_ParityLog_t *);
70 void rf_ReintegrateLogs(RF_Raid_t *, RF_ParityLog_t *);
71 
72 
73 caddr_t
rf_AcquireReintBuffer(RF_RegionBufferQueue_t * pool)74 rf_AcquireReintBuffer(RF_RegionBufferQueue_t *pool)
75 {
76 	caddr_t bufPtr = NULL;
77 
78 	/*
79 	 * Return a region buffer from the free list (pool). If the free list
80 	 * is empty, WAIT. BLOCKING
81 	 */
82 
83 	RF_LOCK_MUTEX(pool->mutex);
84 	if (pool->availableBuffers > 0) {
85 		bufPtr = pool->buffers[pool->availBuffersIndex];
86 		pool->availableBuffers--;
87 		pool->availBuffersIndex++;
88 		if (pool->availBuffersIndex == pool->totalBuffers)
89 			pool->availBuffersIndex = 0;
90 		RF_UNLOCK_MUTEX(pool->mutex);
91 	} else {
92 		RF_PANIC();	/*
93 				 * Should never happen in correct config,
94 				 * single reint.
95 				 */
96 		RF_WAIT_COND(pool->cond, pool->mutex);
97 	}
98 	return (bufPtr);
99 }
100 
101 
102 void
rf_ReleaseReintBuffer(RF_RegionBufferQueue_t * pool,caddr_t bufPtr)103 rf_ReleaseReintBuffer(RF_RegionBufferQueue_t *pool, caddr_t bufPtr)
104 {
105 	/*
106 	 * Insert a region buffer (bufPtr) into the free list (pool).
107 	 * NON-BLOCKING
108 	 */
109 
110 	RF_LOCK_MUTEX(pool->mutex);
111 	pool->availableBuffers++;
112 	pool->buffers[pool->emptyBuffersIndex] = bufPtr;
113 	pool->emptyBuffersIndex++;
114 	if (pool->emptyBuffersIndex == pool->totalBuffers)
115 		pool->emptyBuffersIndex = 0;
116 	RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
117 	RF_UNLOCK_MUTEX(pool->mutex);
118 	RF_SIGNAL_COND(pool->cond);
119 }
120 
121 
122 void
rf_ReadRegionLog(RF_RegionId_t regionID,RF_MCPair_t * rrd_mcpair,caddr_t regionBuffer,RF_Raid_t * raidPtr,RF_DagHeader_t ** rrd_dag_h,RF_AllocListElem_t ** rrd_alloclist,RF_PhysDiskAddr_t ** rrd_pda)123 rf_ReadRegionLog(RF_RegionId_t regionID, RF_MCPair_t *rrd_mcpair,
124     caddr_t regionBuffer, RF_Raid_t *raidPtr, RF_DagHeader_t **rrd_dag_h,
125     RF_AllocListElem_t **rrd_alloclist, RF_PhysDiskAddr_t **rrd_pda)
126 {
127 	/*
128 	 * Initiate the read a region log from disk. Once initiated, return
129 	 * to the calling routine.
130 	 *
131 	 * NON-BLOCKING
132 	 */
133 
134 	RF_AccTraceEntry_t *tracerec;
135 	RF_DagNode_t *rrd_rdNode;
136 
137 	/* Create DAG to read region log from disk. */
138 	rf_MakeAllocList(*rrd_alloclist);
139 	*rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer,
140 	    rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rrl", *rrd_alloclist,
141 	    RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
142 
143 	/* Create and initialize PDA for the core log. */
144 	/* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t),
145 	 * (RF_PhysDiskAddr_t *)); */
146 	*rrd_pda = rf_AllocPDAList(1);
147 	rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row),
148 	    &((*rrd_pda)->col), &((*rrd_pda)->startSector));
149 	(*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
150 
151 	if ((*rrd_pda)->next) {
152 		(*rrd_pda)->next = NULL;
153 		printf("set rrd_pda->next to NULL\n");
154 	}
155 	/* Initialize DAG parameters. */
156 	RF_Malloc(tracerec, sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
157 	bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
158 	(*rrd_dag_h)->tracerec = tracerec;
159 	rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
160 	rrd_rdNode->params[0].p = *rrd_pda;
161 	/* rrd_rdNode->params[1] = regionBuffer; */
162 	rrd_rdNode->params[2].v = 0;
163 	rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
164 						   0, 0, 0);
165 
166 	/* Launch region log read dag. */
167 	rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
168 	    (void *) rrd_mcpair);
169 }
170 
171 
172 void
rf_WriteCoreLog(RF_ParityLog_t * log,RF_MCPair_t * fwr_mcpair,RF_Raid_t * raidPtr,RF_DagHeader_t ** fwr_dag_h,RF_AllocListElem_t ** fwr_alloclist,RF_PhysDiskAddr_t ** fwr_pda)173 rf_WriteCoreLog(RF_ParityLog_t *log, RF_MCPair_t *fwr_mcpair,
174     RF_Raid_t *raidPtr, RF_DagHeader_t **fwr_dag_h,
175     RF_AllocListElem_t **fwr_alloclist, RF_PhysDiskAddr_t **fwr_pda)
176 {
177 	RF_RegionId_t regionID = log->regionID;
178 	RF_AccTraceEntry_t *tracerec;
179 	RF_SectorNum_t regionOffset;
180 	RF_DagNode_t *fwr_wrNode;
181 
182 	/*
183 	 * Initiate the write of a core log to a region log disk. Once
184 	 * initiated, return to the calling routine.
185 	 *
186 	 * NON-BLOCKING
187 	 */
188 
189 	/* Create DAG to write a core log to a region log disk. */
190 	rf_MakeAllocList(*fwr_alloclist);
191 	*fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr,
192 	    rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wcl", *fwr_alloclist,
193 	    RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
194 
195 	/* Create and initialize PDA for the region log. */
196 	/* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t),
197 	 * (RF_PhysDiskAddr_t *)); */
198 	*fwr_pda = rf_AllocPDAList(1);
199 	regionOffset = log->diskOffset;
200 	rf_MapLogParityLogging(raidPtr, regionID, regionOffset,
201 	    &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector));
202 	(*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
203 
204 	/* Initialize DAG parameters. */
205 	RF_Malloc(tracerec, sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
206 	bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
207 	(*fwr_dag_h)->tracerec = tracerec;
208 	fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
209 	fwr_wrNode->params[0].p = *fwr_pda;
210 	/* fwr_wrNode->params[1] = log->bufPtr; */
211 	fwr_wrNode->params[2].v = 0;
212 	fwr_wrNode->params[3].v =
213 	    RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
214 
215 	/* Launch the dag to write the core log to disk. */
216 	rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
217 	    (void *) fwr_mcpair);
218 }
219 
220 
221 void
rf_ReadRegionParity(RF_RegionId_t regionID,RF_MCPair_t * prd_mcpair,caddr_t parityBuffer,RF_Raid_t * raidPtr,RF_DagHeader_t ** prd_dag_h,RF_AllocListElem_t ** prd_alloclist,RF_PhysDiskAddr_t ** prd_pda)222 rf_ReadRegionParity(RF_RegionId_t regionID, RF_MCPair_t *prd_mcpair,
223     caddr_t parityBuffer, RF_Raid_t *raidPtr, RF_DagHeader_t **prd_dag_h,
224     RF_AllocListElem_t **prd_alloclist, RF_PhysDiskAddr_t **prd_pda)
225 {
226 	/*
227 	 * Initiate the read region parity from disk. Once initiated, return
228 	 * to the calling routine.
229 	 *
230 	 * NON-BLOCKING
231 	 */
232 
233 	RF_AccTraceEntry_t *tracerec;
234 	RF_DagNode_t *prd_rdNode;
235 
236 	/* Create DAG to read region parity from disk. */
237 	rf_MakeAllocList(*prd_alloclist);
238 	*prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc,
239 	    rf_DiskReadUndoFunc, "Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE,
240 	    RF_IO_NORMAL_PRIORITY);
241 
242 	/* Create and initialize PDA for region parity. */
243 	/* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t),
244 	 * (RF_PhysDiskAddr_t *)); */
245 	*prd_pda = rf_AllocPDAList(1);
246 	rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row),
247 	    &((*prd_pda)->col), &((*prd_pda)->startSector),
248 	    &((*prd_pda)->numSector));
249 	if (rf_parityLogDebug)
250 		printf("[reading %d sectors of parity from region %d]\n",
251 		    (int) (*prd_pda)->numSector, regionID);
252 	if ((*prd_pda)->next) {
253 		(*prd_pda)->next = NULL;
254 		printf("set prd_pda->next to NULL\n");
255 	}
256 	/* Initialize DAG parameters. */
257 	RF_Malloc(tracerec, sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
258 	bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
259 	(*prd_dag_h)->tracerec = tracerec;
260 	prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
261 	prd_rdNode->params[0].p = *prd_pda;
262 	prd_rdNode->params[1].p = parityBuffer;
263 	prd_rdNode->params[2].v = 0;
264 	prd_rdNode->params[3].v =
265 	    RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
266 	if (rf_validateDAGDebug)
267 		rf_ValidateDAG(*prd_dag_h);
268 	/* Launch region parity read dag. */
269 	rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
270 	    (void *) prd_mcpair);
271 }
272 
273 void
rf_WriteRegionParity(RF_RegionId_t regionID,RF_MCPair_t * pwr_mcpair,caddr_t parityBuffer,RF_Raid_t * raidPtr,RF_DagHeader_t ** pwr_dag_h,RF_AllocListElem_t ** pwr_alloclist,RF_PhysDiskAddr_t ** pwr_pda)274 rf_WriteRegionParity(RF_RegionId_t regionID, RF_MCPair_t *pwr_mcpair,
275     caddr_t parityBuffer, RF_Raid_t *raidPtr, RF_DagHeader_t **pwr_dag_h,
276     RF_AllocListElem_t **pwr_alloclist, RF_PhysDiskAddr_t **pwr_pda)
277 {
278 	/*
279 	 * Initiate the write of region parity to disk. Once initiated, return
280 	 * to the calling routine.
281 	 *
282 	 * NON-BLOCKING
283 	 */
284 
285 	RF_AccTraceEntry_t *tracerec;
286 	RF_DagNode_t *pwr_wrNode;
287 
288 	/* Create DAG to write region log from disk. */
289 	rf_MakeAllocList(*pwr_alloclist);
290 	*pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer,
291 	    rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wrp", *pwr_alloclist,
292 	    RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
293 
294 	/* Create and initialize PDA for region parity. */
295 	/* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t),
296 	 * (RF_PhysDiskAddr_t *)); */
297 	*pwr_pda = rf_AllocPDAList(1);
298 	rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row),
299 	    &((*pwr_pda)->col), &((*pwr_pda)->startSector),
300 	    &((*pwr_pda)->numSector));
301 
302 	/* Initialize DAG parameters. */
303 	RF_Malloc(tracerec, sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
304 	bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
305 	(*pwr_dag_h)->tracerec = tracerec;
306 	pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
307 	pwr_wrNode->params[0].p = *pwr_pda;
308 	/* pwr_wrNode->params[1] = parityBuffer; */
309 	pwr_wrNode->params[2].v = 0;
310 	pwr_wrNode->params[3].v =
311 	    RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
312 
313 	/* Launch the dag to write region parity to disk. */
314 	rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
315 	    (void *) pwr_mcpair);
316 }
317 
318 void
rf_FlushLogsToDisk(RF_Raid_t * raidPtr,RF_ParityLog_t * logList)319 rf_FlushLogsToDisk(RF_Raid_t *raidPtr, RF_ParityLog_t *logList)
320 {
321 	/*
322 	 * Flush a linked list of core logs to the log disk. Logs contain the
323 	 * disk location where they should be written. Logs were written in
324 	 * FIFO order and that order must be preserved.
325 	 *
326 	 * Recommended optimizations:
327 	 * 1) Allow multiple flushes to occur simultaneously.
328 	 * 2) Coalesce contiguous flush operations.
329 	 *
330 	 * BLOCKING
331 	 */
332 
333 	RF_ParityLog_t *log;
334 	RF_RegionId_t regionID;
335 	RF_MCPair_t *fwr_mcpair;
336 	RF_DagHeader_t *fwr_dag_h;
337 	RF_AllocListElem_t *fwr_alloclist;
338 	RF_PhysDiskAddr_t *fwr_pda;
339 
340 	fwr_mcpair = rf_AllocMCPair();
341 	RF_LOCK_MUTEX(fwr_mcpair->mutex);
342 
343 	RF_ASSERT(logList);
344 	log = logList;
345 	while (log) {
346 		regionID = log->regionID;
347 
348 		/* Create and launch a DAG to write the core log. */
349 		if (rf_parityLogDebug)
350 			printf("[initiating write of core log for region"
351 			    " %d]\n", regionID);
352 		fwr_mcpair->flag = RF_FALSE;
353 		rf_WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h,
354 		    &fwr_alloclist, &fwr_pda);
355 
356 		/* Wait for the DAG to complete. */
357 		while (!fwr_mcpair->flag)
358 			RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
359 		if (fwr_dag_h->status != rf_enable) {
360 			RF_ERRORMSG1("Unable to write core log to disk"
361 			    " (region %d)\n", regionID);
362 			RF_ASSERT(0);
363 		}
364 		/* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
365 		rf_FreePhysDiskAddr(fwr_pda);
366 		rf_FreeDAG(fwr_dag_h);
367 		rf_FreeAllocList(fwr_alloclist);
368 
369 		log = log->next;
370 	}
371 	RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
372 	rf_FreeMCPair(fwr_mcpair);
373 	rf_ReleaseParityLogs(raidPtr, logList);
374 }
375 
376 void
rf_ReintegrateRegion(RF_Raid_t * raidPtr,RF_RegionId_t regionID,RF_ParityLog_t * coreLog)377 rf_ReintegrateRegion(RF_Raid_t *raidPtr, RF_RegionId_t regionID,
378     RF_ParityLog_t *coreLog)
379 {
380 	RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair;
381 	RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
382 	RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
383 	RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
384 	caddr_t parityBuffer, regionBuffer = NULL;
385 
386 	/*
387 	 * Reintegrate a region (regionID).
388 	 *
389 	 * 1. Acquire region and parity buffers.
390 	 * 2. Read log from disk.
391 	 * 3. Read parity from disk.
392 	 * 4. Apply log to parity.
393 	 * 5. Apply core log to parity.
394 	 * 6. Write new parity to disk.
395 	 *
396 	 * BLOCKING
397 	 */
398 
399 	if (rf_parityLogDebug)
400 		printf("[reintegrating region %d]\n", regionID);
401 
402 	/* Initiate read of region parity. */
403 	if (rf_parityLogDebug)
404 		printf("[initiating read of parity for region %d]\n", regionID);
405 	parityBuffer = rf_AcquireReintBuffer(&raidPtr->parityBufferPool);
406 	prd_mcpair = rf_AllocMCPair();
407 	RF_LOCK_MUTEX(prd_mcpair->mutex);
408 	prd_mcpair->flag = RF_FALSE;
409 	rf_ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr,
410 			 &prd_dag_h, &prd_alloclist, &prd_pda);
411 
412 	/* If region log nonempty, initiate read. */
413 	if (raidPtr->regionInfo[regionID].diskCount > 0) {
414 		if (rf_parityLogDebug)
415 			printf("[initiating read of disk log for region %d]\n",
416 			    regionID);
417 		regionBuffer =
418 		    rf_AcquireReintBuffer(&raidPtr->regionBufferPool);
419 		rrd_mcpair = rf_AllocMCPair();
420 		RF_LOCK_MUTEX(rrd_mcpair->mutex);
421 		rrd_mcpair->flag = RF_FALSE;
422 		rf_ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr,
423 		    &rrd_dag_h, &rrd_alloclist, &rrd_pda);
424 	}
425 	/* Wait on read of region parity to complete. */
426 	while (!prd_mcpair->flag) {
427 		RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
428 	}
429 	RF_UNLOCK_MUTEX(prd_mcpair->mutex);
430 	if (prd_dag_h->status != rf_enable) {
431 		RF_ERRORMSG("Unable to read parity from disk\n");
432 		/* Add code to fail the parity disk. */
433 		RF_ASSERT(0);
434 	}
435 	/* Apply core log to parity. */
436 	/* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */
437 
438 	if (raidPtr->regionInfo[regionID].diskCount > 0) {
439 		/* Wait on read of region log to complete. */
440 		while (!rrd_mcpair->flag)
441 			RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
442 		RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
443 		if (rrd_dag_h->status != rf_enable) {
444 			RF_ERRORMSG("Unable to read region log from disk\n");
445 			/* Add code to fail the log disk. */
446 			RF_ASSERT(0);
447 		}
448 		/* Apply region log to parity. */
449 		/* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
450 		/* Release resources associated with region log. */
451 		/* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
452 		rf_FreePhysDiskAddr(rrd_pda);
453 		rf_FreeDAG(rrd_dag_h);
454 		rf_FreeAllocList(rrd_alloclist);
455 		rf_FreeMCPair(rrd_mcpair);
456 		rf_ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
457 	}
458 	/* Write reintegrated parity to disk. */
459 	if (rf_parityLogDebug)
460 		printf("[initiating write of parity for region %d]\n",
461 		    regionID);
462 	pwr_mcpair = rf_AllocMCPair();
463 	RF_LOCK_MUTEX(pwr_mcpair->mutex);
464 	pwr_mcpair->flag = RF_FALSE;
465 	rf_WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr,
466 	    &pwr_dag_h, &pwr_alloclist, &pwr_pda);
467 	while (!pwr_mcpair->flag)
468 		RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
469 	RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
470 	if (pwr_dag_h->status != rf_enable) {
471 		RF_ERRORMSG("Unable to write parity to disk\n");
472 		/* Add code to fail the parity disk. */
473 		RF_ASSERT(0);
474 	}
475 	/* Release resources associated with read of old parity. */
476 	/* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
477 	rf_FreePhysDiskAddr(prd_pda);
478 	rf_FreeDAG(prd_dag_h);
479 	rf_FreeAllocList(prd_alloclist);
480 	rf_FreeMCPair(prd_mcpair);
481 
482 	/* Release resources associated with write of new parity. */
483 	rf_ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
484 	/* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
485 	rf_FreePhysDiskAddr(pwr_pda);
486 	rf_FreeDAG(pwr_dag_h);
487 	rf_FreeAllocList(pwr_alloclist);
488 	rf_FreeMCPair(pwr_mcpair);
489 
490 	if (rf_parityLogDebug)
491 		printf("[finished reintegrating region %d]\n", regionID);
492 }
493 
494 
495 void
rf_ReintegrateLogs(RF_Raid_t * raidPtr,RF_ParityLog_t * logList)496 rf_ReintegrateLogs(RF_Raid_t *raidPtr, RF_ParityLog_t *logList)
497 {
498 	RF_ParityLog_t *log, *freeLogList = NULL;
499 	RF_ParityLogData_t *logData, *logDataList;
500 	RF_RegionId_t regionID;
501 
502 	RF_ASSERT(logList);
503 	while (logList) {
504 		log = logList;
505 		logList = logList->next;
506 		log->next = NULL;
507 		regionID = log->regionID;
508 		rf_ReintegrateRegion(raidPtr, regionID, log);
509 		log->numRecords = 0;
510 
511 		/*
512 		 * Remove all items which are blocked on reintegration of this
513 		 * region.
514 		 */
515 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
516 		logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID,
517 		    &raidPtr->parityLogDiskQueue.reintBlockHead,
518 		    &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
519 		logDataList = logData;
520 		while (logData) {
521 			logData->next =
522 			    rf_SearchAndDequeueParityLogData(raidPtr, regionID,
523 			     &raidPtr->parityLogDiskQueue.reintBlockHead,
524 			     &raidPtr->parityLogDiskQueue.reintBlockTail,
525 			     RF_TRUE);
526 			logData = logData->next;
527 		}
528 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
529 
530 		/*
531 		 * Process blocked log data and clear reintInProgress flag for
532 		 * this region.
533 		 */
534 		if (logDataList)
535 			rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
536 		else {
537 			/*
538 			 * Enable flushing for this region. Holding both
539 			 * locks provides a synchronization barrier with
540 			 * DumpParityLogToDisk.
541 			 */
542 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
543 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
544 			RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
545 			raidPtr->regionInfo[regionID].diskCount = 0;
546 			raidPtr->regionInfo[regionID].reintInProgress =
547 			    RF_FALSE;
548 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
549 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID]
550 			    .reintMutex);	/* Flushing is now enabled. */
551 			RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
552 		}
553 		/*
554 		 * If log wasn't used, attach it to the list of logs to be
555 		 * returned.
556 		 */
557 		if (log) {
558 			log->next = freeLogList;
559 			freeLogList = log;
560 		}
561 	}
562 	if (freeLogList)
563 		rf_ReleaseParityLogs(raidPtr, freeLogList);
564 }
565 
566 int
rf_ShutdownLogging(RF_Raid_t * raidPtr)567 rf_ShutdownLogging(RF_Raid_t *raidPtr)
568 {
569 	/*
570 	 * Shutdown parity logging:
571 	 * 1) Disable parity logging in all regions.
572 	 * 2) Reintegrate all regions.
573 	 */
574 
575 	RF_SectorCount_t diskCount;
576 	RF_RegionId_t regionID;
577 	RF_ParityLog_t *log;
578 
579 	if (rf_parityLogDebug)
580 		printf("[shutting down parity logging]\n");
581 	/*
582 	 * Since parity log maps are volatile, we must reintegrate all
583 	 * regions.
584 	 */
585 	if (rf_forceParityLogReint) {
586 		for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
587 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
588 			raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE;
589 			log = raidPtr->regionInfo[regionID].coreLog;
590 			raidPtr->regionInfo[regionID].coreLog = NULL;
591 			diskCount = raidPtr->regionInfo[regionID].diskCount;
592 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
593 			if (diskCount > 0 || log != NULL)
594 				rf_ReintegrateRegion(raidPtr, regionID, log);
595 			if (log != NULL)
596 				rf_ReleaseParityLogs(raidPtr, log);
597 		}
598 	}
599 	if (rf_parityLogDebug) {
600 		printf("[parity logging disabled]\n");
601 		printf("[should be done !]\n");
602 	}
603 	return (0);
604 }
605 
606 int
rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr)607 rf_ParityLoggingDiskManager(RF_Raid_t *raidPtr)
608 {
609 	RF_ParityLog_t *reintQueue, *flushQueue;
610 	int workNeeded, done = RF_FALSE;
611 	int s;
612 
613 	/*
614 	 * Main program for parity logging disk thread. This routine waits
615 	 * for work to appear in either the flush or reintegration queues and
616 	 * is responsible for flushing core logs to the log disk as well as
617 	 * reintegrating parity regions.
618 	 *
619 	 * BLOCKING
620 	 */
621 
622 	s = splbio();
623 
624 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
625 
626 	/*
627 	 * Inform our creator that we're running. Don't bother doing the
628 	 * mutex lock/unlock dance: we locked above, and we'll unlock
629 	 * below with nothing to do, yet.
630 	 */
631 	raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
632 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
633 
634 	/* Empty the work queues. */
635 	flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
636 	raidPtr->parityLogDiskQueue.flushQueue = NULL;
637 	reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
638 	raidPtr->parityLogDiskQueue.reintQueue = NULL;
639 	workNeeded = (flushQueue || reintQueue);
640 
641 	while (!done) {
642 		while (workNeeded) {
643 			/*
644 			 * First, flush all logs in the flush queue, freeing
645 			 * buffers. Second, reintegrate all regions that are
646 			 * reported as full. Third, append queued log data
647 			 * until blocked.
648 			 *
649 			 * Note: Incoming appends (ParityLogAppend) can block
650 			 * on either 1. empty buffer pool 2. region under
651 			 * reintegration. To preserve a global FIFO ordering of
652 			 * appends, buffers are not released to the world
653 			 * until those appends blocked on buffers are removed
654 			 * from the append queue. Similarly, regions that are
655 			 * reintegrated are not opened for general use until
656 			 * the append queue has been emptied.
657 			 */
658 
659 			RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
660 
661 			/*
662 			 * Empty flushQueue, using free'd log buffers to
663 			 * process bufTail.
664 			 */
665 			if (flushQueue)
666 				rf_FlushLogsToDisk(raidPtr, flushQueue);
667 
668 			/*
669 			 * Empty reintQueue, flushing from reintTail as we go.
670 			 */
671 			if (reintQueue)
672 				rf_ReintegrateLogs(raidPtr, reintQueue);
673 
674 			RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
675 			flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
676 			raidPtr->parityLogDiskQueue.flushQueue = NULL;
677 			reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
678 			raidPtr->parityLogDiskQueue.reintQueue = NULL;
679 			workNeeded = (flushQueue || reintQueue);
680 		}
681 		/* No work is needed at this point. */
682 		if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) {
683 			/*
684 			 * Shutdown parity logging:
685 			 * 1. Disable parity logging in all regions.
686 			 * 2. Reintegrate all regions.
687 			 */
688 			done = RF_TRUE;	/* Thread disabled, no work needed. */
689 			RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
690 			rf_ShutdownLogging(raidPtr);
691 		}
692 		if (!done) {
693 			/* Thread enabled, no work needed, so sleep. */
694 			if (rf_parityLogDebug)
695 				printf("[parity logging disk manager"
696 				    " sleeping]\n");
697 			RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond,
698 			    raidPtr->parityLogDiskQueue.mutex);
699 			if (rf_parityLogDebug)
700 				printf("[parity logging disk manager just"
701 				    " woke up]\n");
702 			flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
703 			raidPtr->parityLogDiskQueue.flushQueue = NULL;
704 			reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
705 			raidPtr->parityLogDiskQueue.reintQueue = NULL;
706 			workNeeded = (flushQueue || reintQueue);
707 		}
708 	}
709 	/*
710 	 * Announce that we're done.
711 	 */
712 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
713 	raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
714 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
715 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
716 
717 	splx(s);
718 
719 	/*
720 	 * In the Net- & OpenBSD kernel, the thread must exit; returning would
721 	 * cause the proc trampoline to attempt to return to userspace.
722 	 */
723 	kthread_exit(0);	/* does not return */
724 }
725 #endif	/* RF_INCLUDE_PARITYLOGGING > 0 */
726