1 /*        $NetBSD: rf_paritylogDiskMgr.c,v 1.31 2021/07/23 00:54:45 oster Exp $ */
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: William V. Courtright II
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 /* Code for flushing and reintegration operations related to parity logging.
29  *
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: rf_paritylogDiskMgr.c,v 1.31 2021/07/23 00:54:45 oster Exp $");
34 
35 #include "rf_archs.h"
36 
37 #if RF_INCLUDE_PARITYLOGGING > 0
38 
39 #include <dev/raidframe/raidframevar.h>
40 
41 #include "rf_threadstuff.h"
42 #include "rf_mcpair.h"
43 #include "rf_raid.h"
44 #include "rf_dag.h"
45 #include "rf_dagfuncs.h"
46 #include "rf_desc.h"
47 #include "rf_layout.h"
48 #include "rf_diskqueue.h"
49 #include "rf_paritylog.h"
50 #include "rf_general.h"
51 #include "rf_etimer.h"
52 #include "rf_paritylogging.h"
53 #include "rf_engine.h"
54 #include "rf_dagutils.h"
55 #include "rf_map.h"
56 #include "rf_parityscan.h"
57 
58 #include "rf_paritylogDiskMgr.h"
59 
60 static void *AcquireReintBuffer(RF_RegionBufferQueue_t *);
61 
62 static void *
AcquireReintBuffer(RF_RegionBufferQueue_t * pool)63 AcquireReintBuffer(RF_RegionBufferQueue_t *pool)
64 {
65           void *bufPtr = NULL;
66 
67           /* Return a region buffer from the free list (pool). If the free list
68            * is empty, WAIT. BLOCKING */
69 
70           rf_lock_mutex2(pool->mutex);
71           if (pool->availableBuffers > 0) {
72                     bufPtr = pool->buffers[pool->availBuffersIndex];
73                     pool->availableBuffers--;
74                     pool->availBuffersIndex++;
75                     if (pool->availBuffersIndex == pool->totalBuffers)
76                               pool->availBuffersIndex = 0;
77                     rf_unlock_mutex2(pool->mutex);
78           } else {
79                     RF_PANIC();         /* should never happen in correct config,
80                                          * single reint */
81                     rf_wait_cond2(pool->cond, pool->mutex);
82           }
83           return (bufPtr);
84 }
85 
86 static void
ReleaseReintBuffer(RF_RegionBufferQueue_t * pool,void * bufPtr)87 ReleaseReintBuffer(
88     RF_RegionBufferQueue_t * pool,
89     void *bufPtr)
90 {
91           /* Insert a region buffer (bufPtr) into the free list (pool).
92            * NON-BLOCKING */
93 
94           rf_lock_mutex2(pool->mutex);
95           pool->availableBuffers++;
96           pool->buffers[pool->emptyBuffersIndex] = bufPtr;
97           pool->emptyBuffersIndex++;
98           if (pool->emptyBuffersIndex == pool->totalBuffers)
99                     pool->emptyBuffersIndex = 0;
100           RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
101           /*
102            * XXXmrg this signal goes with the above "shouldn't happen" wait?
103            */
104           rf_signal_cond2(pool->cond);
105           rf_unlock_mutex2(pool->mutex);
106 }
107 
108 
109 
110 static void
ReadRegionLog(RF_RegionId_t regionID,RF_MCPair_t * rrd_mcpair,void * regionBuffer,RF_Raid_t * raidPtr,RF_DagHeader_t ** rrd_dag_h,RF_AllocListElem_t ** rrd_alloclist,RF_PhysDiskAddr_t ** rrd_pda)111 ReadRegionLog(
112     RF_RegionId_t regionID,
113     RF_MCPair_t * rrd_mcpair,
114     void *regionBuffer,
115     RF_Raid_t * raidPtr,
116     RF_DagHeader_t ** rrd_dag_h,
117     RF_AllocListElem_t ** rrd_alloclist,
118     RF_PhysDiskAddr_t ** rrd_pda)
119 {
120           /* Initiate the read a region log from disk.  Once initiated, return
121            * to the calling routine.
122            *
123            * NON-BLOCKING */
124 
125           RF_AccTraceEntry_t *tracerec;
126           RF_DagNode_t *rrd_rdNode;
127 
128           /* create DAG to read region log from disk */
129           rf_MakeAllocList(*rrd_alloclist);
130           *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer,
131                                               rf_DiskReadFunc, rf_DiskReadUndoFunc,
132                                               "Rrl", *rrd_alloclist,
133                                               RF_DAG_FLAGS_NONE,
134                                               RF_IO_NORMAL_PRIORITY);
135 
136           /* create and initialize PDA for the core log */
137           *rrd_pda = rf_AllocPDAList(raidPtr, 1);
138           rf_MapLogParityLogging(raidPtr, regionID, 0,
139                                      &((*rrd_pda)->col), &((*rrd_pda)->startSector));
140           (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
141 
142           if ((*rrd_pda)->next) {
143                     (*rrd_pda)->next = NULL;
144                     printf("set rrd_pda->next to NULL\n");
145           }
146           /* initialize DAG parameters */
147           tracerec = RF_Malloc(sizeof(*tracerec));
148           (*rrd_dag_h)->tracerec = tracerec;
149           rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
150           rrd_rdNode->params[0].p = *rrd_pda;
151 /*  rrd_rdNode->params[1] = regionBuffer; */
152           rrd_rdNode->params[2].v = 0;
153           rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0);
154 
155           /* launch region log read dag */
156           rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
157               (void *) rrd_mcpair);
158 }
159 
160 
161 
162 static void
WriteCoreLog(RF_ParityLog_t * log,RF_MCPair_t * fwr_mcpair,RF_Raid_t * raidPtr,RF_DagHeader_t ** fwr_dag_h,RF_AllocListElem_t ** fwr_alloclist,RF_PhysDiskAddr_t ** fwr_pda)163 WriteCoreLog(
164     RF_ParityLog_t * log,
165     RF_MCPair_t * fwr_mcpair,
166     RF_Raid_t * raidPtr,
167     RF_DagHeader_t ** fwr_dag_h,
168     RF_AllocListElem_t ** fwr_alloclist,
169     RF_PhysDiskAddr_t ** fwr_pda)
170 {
171           RF_RegionId_t regionID = log->regionID;
172           RF_AccTraceEntry_t *tracerec;
173           RF_SectorNum_t regionOffset;
174           RF_DagNode_t *fwr_wrNode;
175 
176           /* Initiate the write of a core log to a region log disk. Once
177            * initiated, return to the calling routine.
178            *
179            * NON-BLOCKING */
180 
181           /* create DAG to write a core log to a region log disk */
182           rf_MakeAllocList(*fwr_alloclist);
183           *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr,
184                                               rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
185               "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
186 
187           *fwr_pda = rf_AllocPDAList(raidPtr, 1);
188           regionOffset = log->diskOffset;
189           rf_MapLogParityLogging(raidPtr, regionID, regionOffset,
190                                      &((*fwr_pda)->col),
191                                      &((*fwr_pda)->startSector));
192           (*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
193 
194           /* initialize DAG parameters */
195           tracerec = RF_Malloc(sizeof(*tracerec));
196           (*fwr_dag_h)->tracerec = tracerec;
197           fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
198           fwr_wrNode->params[0].p = *fwr_pda;
199 /*  fwr_wrNode->params[1] = log->bufPtr; */
200           fwr_wrNode->params[2].v = 0;
201           fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0);
202 
203           /* launch the dag to write the core log to disk */
204           rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
205               (void *) fwr_mcpair);
206 }
207 
208 
209 static void
ReadRegionParity(RF_RegionId_t regionID,RF_MCPair_t * prd_mcpair,void * parityBuffer,RF_Raid_t * raidPtr,RF_DagHeader_t ** prd_dag_h,RF_AllocListElem_t ** prd_alloclist,RF_PhysDiskAddr_t ** prd_pda)210 ReadRegionParity(
211     RF_RegionId_t regionID,
212     RF_MCPair_t * prd_mcpair,
213     void *parityBuffer,
214     RF_Raid_t * raidPtr,
215     RF_DagHeader_t ** prd_dag_h,
216     RF_AllocListElem_t ** prd_alloclist,
217     RF_PhysDiskAddr_t ** prd_pda)
218 {
219           /* Initiate the read region parity from disk. Once initiated, return
220            * to the calling routine.
221            *
222            * NON-BLOCKING */
223 
224           RF_AccTraceEntry_t *tracerec;
225           RF_DagNode_t *prd_rdNode;
226 
227           /* create DAG to read region parity from disk */
228           rf_MakeAllocList(*prd_alloclist);
229           *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc,
230                                               rf_DiskReadUndoFunc, "Rrp",
231                                               *prd_alloclist, RF_DAG_FLAGS_NONE,
232                                               RF_IO_NORMAL_PRIORITY);
233 
234           /* create and initialize PDA for region parity */
235           *prd_pda = rf_AllocPDAList(raidPtr, 1);
236           rf_MapRegionParity(raidPtr, regionID,
237                                  &((*prd_pda)->col), &((*prd_pda)->startSector),
238                                  &((*prd_pda)->numSector));
239           if (rf_parityLogDebug)
240                     printf("[reading %d sectors of parity from region %d]\n",
241                         (int) (*prd_pda)->numSector, regionID);
242           if ((*prd_pda)->next) {
243                     (*prd_pda)->next = NULL;
244                     printf("set prd_pda->next to NULL\n");
245           }
246           /* initialize DAG parameters */
247           tracerec = RF_Malloc(sizeof(*tracerec));
248           (*prd_dag_h)->tracerec = tracerec;
249           prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
250           prd_rdNode->params[0].p = *prd_pda;
251           prd_rdNode->params[1].p = parityBuffer;
252           prd_rdNode->params[2].v = 0;
253           prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0);
254 #if RF_DEBUG_VALIDATE_DAG
255           if (rf_validateDAGDebug)
256                     rf_ValidateDAG(*prd_dag_h);
257 #endif
258           /* launch region parity read dag */
259           rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
260               (void *) prd_mcpair);
261 }
262 
263 static void
WriteRegionParity(RF_RegionId_t regionID,RF_MCPair_t * pwr_mcpair,void * parityBuffer,RF_Raid_t * raidPtr,RF_DagHeader_t ** pwr_dag_h,RF_AllocListElem_t ** pwr_alloclist,RF_PhysDiskAddr_t ** pwr_pda)264 WriteRegionParity(
265     RF_RegionId_t regionID,
266     RF_MCPair_t * pwr_mcpair,
267     void *parityBuffer,
268     RF_Raid_t * raidPtr,
269     RF_DagHeader_t ** pwr_dag_h,
270     RF_AllocListElem_t ** pwr_alloclist,
271     RF_PhysDiskAddr_t ** pwr_pda)
272 {
273           /* Initiate the write of region parity to disk. Once initiated, return
274            * to the calling routine.
275            *
276            * NON-BLOCKING */
277 
278           RF_AccTraceEntry_t *tracerec;
279           RF_DagNode_t *pwr_wrNode;
280 
281           /* create DAG to write region log from disk */
282           rf_MakeAllocList(*pwr_alloclist);
283           *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer,
284                                               rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
285                                               "Wrp", *pwr_alloclist,
286                                               RF_DAG_FLAGS_NONE,
287                                               RF_IO_NORMAL_PRIORITY);
288 
289           /* create and initialize PDA for region parity */
290           *pwr_pda = rf_AllocPDAList(raidPtr, 1);
291           rf_MapRegionParity(raidPtr, regionID,
292                                  &((*pwr_pda)->col), &((*pwr_pda)->startSector),
293                                  &((*pwr_pda)->numSector));
294 
295           /* initialize DAG parameters */
296           tracerec = RF_Malloc(sizeof(*tracerec));
297           (*pwr_dag_h)->tracerec = tracerec;
298           pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
299           pwr_wrNode->params[0].p = *pwr_pda;
300 /*  pwr_wrNode->params[1] = parityBuffer; */
301           pwr_wrNode->params[2].v = 0;
302           pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0);
303 
304           /* launch the dag to write region parity to disk */
305           rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
306               (void *) pwr_mcpair);
307 }
308 
309 static void
FlushLogsToDisk(RF_Raid_t * raidPtr,RF_ParityLog_t * logList)310 FlushLogsToDisk(
311     RF_Raid_t * raidPtr,
312     RF_ParityLog_t * logList)
313 {
314           /* Flush a linked list of core logs to the log disk. Logs contain the
315            * disk location where they should be written.  Logs were written in
316            * FIFO order and that order must be preserved.
317            *
318            * Recommended optimizations: 1) allow multiple flushes to occur
319            * simultaneously 2) coalesce contiguous flush operations
320            *
321            * BLOCKING */
322 
323           RF_ParityLog_t *log;
324           RF_RegionId_t regionID;
325           RF_MCPair_t *fwr_mcpair;
326           RF_DagHeader_t *fwr_dag_h;
327           RF_AllocListElem_t *fwr_alloclist;
328           RF_PhysDiskAddr_t *fwr_pda;
329 
330           fwr_mcpair = rf_AllocMCPair(raidPtr);
331           RF_LOCK_MCPAIR(fwr_mcpair);
332 
333           RF_ASSERT(logList);
334           log = logList;
335           while (log) {
336                     regionID = log->regionID;
337 
338                     /* create and launch a DAG to write the core log */
339                     if (rf_parityLogDebug)
340                               printf("[initiating write of core log for region %d]\n", regionID);
341                     fwr_mcpair->flag = RF_FALSE;
342                     WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h,
343                                    &fwr_alloclist, &fwr_pda);
344 
345                     /* wait for the DAG to complete */
346                     while (!fwr_mcpair->flag)
347                               RF_WAIT_MCPAIR(fwr_mcpair);
348                     if (fwr_dag_h->status != rf_enable) {
349                               RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID);
350                               RF_ASSERT(0);
351                     }
352                     /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
353                     rf_FreePhysDiskAddr(raidPtr, fwr_pda);
354                     rf_FreeDAG(fwr_dag_h);
355                     rf_FreeAllocList(fwr_alloclist);
356 
357                     log = log->next;
358           }
359           RF_UNLOCK_MCPAIR(fwr_mcpair);
360           rf_FreeMCPair(raidPtr, fwr_mcpair);
361           rf_ReleaseParityLogs(raidPtr, logList);
362 }
363 
364 static void
ReintegrateRegion(RF_Raid_t * raidPtr,RF_RegionId_t regionID,RF_ParityLog_t * coreLog)365 ReintegrateRegion(
366     RF_Raid_t * raidPtr,
367     RF_RegionId_t regionID,
368     RF_ParityLog_t * coreLog)
369 {
370           RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair;
371           RF_DagHeader_t *rrd_dag_h = NULL, *prd_dag_h, *pwr_dag_h;
372           RF_AllocListElem_t *rrd_alloclist = NULL, *prd_alloclist, *pwr_alloclist;
373           RF_PhysDiskAddr_t *rrd_pda = NULL, *prd_pda, *pwr_pda;
374           void *parityBuffer, *regionBuffer = NULL;
375 
376           /* Reintegrate a region (regionID).
377            *
378            * 1. acquire region and parity buffers
379            * 2. read log from disk
380            * 3. read parity from disk
381            * 4. apply log to parity
382            * 5. apply core log to parity
383            * 6. write new parity to disk
384            *
385            * BLOCKING */
386 
387           if (rf_parityLogDebug)
388                     printf("[reintegrating region %d]\n", regionID);
389 
390           /* initiate read of region parity */
391           if (rf_parityLogDebug)
392                     printf("[initiating read of parity for region %d]\n",regionID);
393           parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool);
394           prd_mcpair = rf_AllocMCPair(raidPtr);
395           RF_LOCK_MCPAIR(prd_mcpair);
396           prd_mcpair->flag = RF_FALSE;
397           ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr,
398                                &prd_dag_h, &prd_alloclist, &prd_pda);
399 
400           /* if region log nonempty, initiate read */
401           if (raidPtr->regionInfo[regionID].diskCount > 0) {
402                     if (rf_parityLogDebug)
403                               printf("[initiating read of disk log for region %d]\n",
404                                      regionID);
405                     regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool);
406                     rrd_mcpair = rf_AllocMCPair(raidPtr);
407                     RF_LOCK_MCPAIR(rrd_mcpair);
408                     rrd_mcpair->flag = RF_FALSE;
409                     ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr,
410                                     &rrd_dag_h, &rrd_alloclist, &rrd_pda);
411           }
412           /* wait on read of region parity to complete */
413           while (!prd_mcpair->flag) {
414                     RF_WAIT_MCPAIR(prd_mcpair);
415           }
416           RF_UNLOCK_MCPAIR(prd_mcpair);
417           if (prd_dag_h->status != rf_enable) {
418                     RF_ERRORMSG("Unable to read parity from disk\n");
419                     /* add code to fail the parity disk */
420                     RF_ASSERT(0);
421           }
422           /* apply core log to parity */
423           /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */
424 
425           if (raidPtr->regionInfo[regionID].diskCount > 0) {
426                     /* wait on read of region log to complete */
427                     while (!rrd_mcpair->flag)
428                               RF_WAIT_MCPAIR(rrd_mcpair);
429                     RF_UNLOCK_MCPAIR(rrd_mcpair);
430                     if (rrd_dag_h->status != rf_enable) {
431                               RF_ERRORMSG("Unable to read region log from disk\n");
432                               /* add code to fail the log disk */
433                               RF_ASSERT(0);
434                     }
435                     /* apply region log to parity */
436                     /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
437                     /* release resources associated with region log */
438                     /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
439                     rf_FreePhysDiskAddr(raidPtr, rrd_pda);
440                     rf_FreeDAG(rrd_dag_h);
441                     rf_FreeAllocList(rrd_alloclist);
442                     rf_FreeMCPair(raidPtr, rrd_mcpair);
443                     ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
444           }
445           /* write reintegrated parity to disk */
446           if (rf_parityLogDebug)
447                     printf("[initiating write of parity for region %d]\n",
448                            regionID);
449           pwr_mcpair = rf_AllocMCPair(raidPtr);
450           RF_LOCK_MCPAIR(pwr_mcpair);
451           pwr_mcpair->flag = RF_FALSE;
452           WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr,
453                                 &pwr_dag_h, &pwr_alloclist, &pwr_pda);
454           while (!pwr_mcpair->flag)
455                     RF_WAIT_MCPAIR(pwr_mcpair);
456           RF_UNLOCK_MCPAIR(pwr_mcpair);
457           if (pwr_dag_h->status != rf_enable) {
458                     RF_ERRORMSG("Unable to write parity to disk\n");
459                     /* add code to fail the parity disk */
460                     RF_ASSERT(0);
461           }
462           /* release resources associated with read of old parity */
463           /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
464           rf_FreePhysDiskAddr(raidPtr, prd_pda);
465           rf_FreeDAG(prd_dag_h);
466           rf_FreeAllocList(prd_alloclist);
467           rf_FreeMCPair(raidPtr, prd_mcpair);
468 
469           /* release resources associated with write of new parity */
470           ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
471           /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
472           rf_FreePhysDiskAddr(raidPtr, pwr_pda);
473           rf_FreeDAG(pwr_dag_h);
474           rf_FreeAllocList(pwr_alloclist);
475           rf_FreeMCPair(raidPtr, pwr_mcpair);
476 
477           if (rf_parityLogDebug)
478                     printf("[finished reintegrating region %d]\n", regionID);
479 }
480 
481 
482 
483 static void
ReintegrateLogs(RF_Raid_t * raidPtr,RF_ParityLog_t * logList)484 ReintegrateLogs(
485     RF_Raid_t * raidPtr,
486     RF_ParityLog_t * logList)
487 {
488           RF_ParityLog_t *log, *freeLogList = NULL;
489           RF_ParityLogData_t *logData, *logDataList;
490           RF_RegionId_t regionID;
491 
492           RF_ASSERT(logList);
493           while (logList) {
494                     log = logList;
495                     logList = logList->next;
496                     log->next = NULL;
497                     regionID = log->regionID;
498                     ReintegrateRegion(raidPtr, regionID, log);
499                     log->numRecords = 0;
500 
501                     /* remove all items which are blocked on reintegration of this
502                      * region */
503                     rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
504                     logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID,
505                                  &raidPtr->parityLogDiskQueue.reintBlockHead,
506                                  &raidPtr->parityLogDiskQueue.reintBlockTail,
507                                                                          RF_TRUE);
508                     logDataList = logData;
509                     while (logData) {
510                               logData->next = rf_SearchAndDequeueParityLogData(
511                                                    raidPtr, regionID,
512                                                    &raidPtr->parityLogDiskQueue.reintBlockHead,
513                                                    &raidPtr->parityLogDiskQueue.reintBlockTail,
514                                                    RF_TRUE);
515                               logData = logData->next;
516                     }
517                     rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
518 
519                     /* process blocked log data and clear reintInProgress flag for
520                      * this region */
521                     if (logDataList)
522                               rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
523                     else {
524                               /* Enable flushing for this region.  Holding both
525                                * locks provides a synchronization barrier with
526                                * DumpParityLogToDisk */
527                               rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex);
528                               rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex);
529                               /* XXXmrg: don't need this? */
530                               rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
531                               raidPtr->regionInfo[regionID].diskCount = 0;
532                               raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
533                               rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex);
534                               rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now
535                                                                                                                * enabled */
536                               /* XXXmrg: don't need this? */
537                               rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
538                     }
539                     /* if log wasn't used, attach it to the list of logs to be
540                      * returned */
541                     if (log) {
542                               log->next = freeLogList;
543                               freeLogList = log;
544                     }
545           }
546           if (freeLogList)
547                     rf_ReleaseParityLogs(raidPtr, freeLogList);
548 }
549 
550 int
rf_ShutdownLogging(RF_Raid_t * raidPtr)551 rf_ShutdownLogging(RF_Raid_t * raidPtr)
552 {
553           /* shutdown parity logging 1) disable parity logging in all regions 2)
554            * reintegrate all regions */
555 
556           RF_SectorCount_t diskCount;
557           RF_RegionId_t regionID;
558           RF_ParityLog_t *log;
559 
560           if (rf_parityLogDebug)
561                     printf("[shutting down parity logging]\n");
562           /* Since parity log maps are volatile, we must reintegrate all
563            * regions. */
564           if (rf_forceParityLogReint) {
565                     for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
566                               rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex);
567                               raidPtr->regionInfo[regionID].loggingEnabled =
568                                         RF_FALSE;
569                               log = raidPtr->regionInfo[regionID].coreLog;
570                               raidPtr->regionInfo[regionID].coreLog = NULL;
571                               diskCount = raidPtr->regionInfo[regionID].diskCount;
572                               rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex);
573                               if (diskCount > 0 || log != NULL)
574                                         ReintegrateRegion(raidPtr, regionID, log);
575                               if (log != NULL)
576                                         rf_ReleaseParityLogs(raidPtr, log);
577                     }
578           }
579           if (rf_parityLogDebug) {
580                     printf("[parity logging disabled]\n");
581                     printf("[should be done!]\n");
582           }
583           return (0);
584 }
585 
586 void
rf_ParityLoggingDiskManager(void * v)587 rf_ParityLoggingDiskManager(void *v)
588 {
589           RF_Raid_t *raidPtr = v;
590           RF_ParityLog_t *reintQueue, *flushQueue;
591           int     workNeeded, done = RF_FALSE;
592           int s;
593 
594           /* Main program for parity logging disk thread.  This routine waits
595            * for work to appear in either the flush or reintegration queues and
596            * is responsible for flushing core logs to the log disk as well as
597            * reintegrating parity regions.
598            *
599            * BLOCKING */
600 
601           s = splbio();
602 
603           rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
604 
605           /*
606          * Inform our creator that we're running. Don't bother doing the
607          * mutex lock/unlock dance- we locked above, and we'll unlock
608          * below with nothing to do, yet.
609          */
610           raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
611           rf_signal_cond2(raidPtr->parityLogDiskQueue.cond);
612 
613           /* empty the work queues */
614           flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
615           raidPtr->parityLogDiskQueue.flushQueue = NULL;
616           reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
617           raidPtr->parityLogDiskQueue.reintQueue = NULL;
618           workNeeded = (flushQueue || reintQueue);
619 
620           while (!done) {
621                     while (workNeeded) {
622                               /* First, flush all logs in the flush queue, freeing
623                                * buffers Second, reintegrate all regions which are
624                                * reported as full. Third, append queued log data
625                                * until blocked.
626                                *
627                                * Note: Incoming appends (ParityLogAppend) can block on
628                                * either 1. empty buffer pool 2. region under
629                                * reintegration To preserve a global FIFO ordering of
630                                * appends, buffers are not released to the world
631                                * until those appends blocked on buffers are removed
632                                * from the append queue.  Similarly, regions which
633                                * are reintegrated are not opened for general use
634                                * until the append queue has been emptied. */
635 
636                               rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
637 
638                               /* empty flushQueue, using free'd log buffers to
639                                * process bufTail */
640                               if (flushQueue)
641                                      FlushLogsToDisk(raidPtr, flushQueue);
642 
643                               /* empty reintQueue, flushing from reintTail as we go */
644                               if (reintQueue)
645                                         ReintegrateLogs(raidPtr, reintQueue);
646 
647                               rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
648                               flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
649                               raidPtr->parityLogDiskQueue.flushQueue = NULL;
650                               reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
651                               raidPtr->parityLogDiskQueue.reintQueue = NULL;
652                               workNeeded = (flushQueue || reintQueue);
653                     }
654                     /* no work is needed at this point */
655                     if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) {
656                               /* shutdown parity logging 1. disable parity logging
657                                * in all regions 2. reintegrate all regions */
658                               done = RF_TRUE;     /* thread disabled, no work needed */
659                               rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
660                               rf_ShutdownLogging(raidPtr);
661                     }
662                     if (!done) {
663                               /* thread enabled, no work needed, so sleep */
664                               if (rf_parityLogDebug)
665                                         printf("[parity logging disk manager sleeping]\n");
666                               rf_wait_cond2(raidPtr->parityLogDiskQueue.cond,
667                                               raidPtr->parityLogDiskQueue.mutex);
668                               if (rf_parityLogDebug)
669                                         printf("[parity logging disk manager just woke up]\n");
670                               flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
671                               raidPtr->parityLogDiskQueue.flushQueue = NULL;
672                               reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
673                               raidPtr->parityLogDiskQueue.reintQueue = NULL;
674                               workNeeded = (flushQueue || reintQueue);
675                     }
676           }
677           /*
678          * Announce that we're done.
679          */
680           rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
681           raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
682           rf_signal_cond2(raidPtr->parityLogDiskQueue.cond);
683           rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
684 
685           splx(s);
686 
687           /*
688          * In the NetBSD kernel, the thread must exit; returning would
689          * cause the proc trampoline to attempt to return to userspace.
690          */
691           kthread_exit(0);    /* does not return */
692 }
693 #endif                                  /* RF_INCLUDE_PARITYLOGGING > 0 */
694