1 /*        $NetBSD: rf_dagffwr.c,v 1.38 2023/10/15 18:15:20 oster Exp $          */
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /*
30  * rf_dagff.c
31  *
32  * code for creating fault-free DAGs
33  *
34  */
35 
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.38 2023/10/15 18:15:20 oster Exp $");
38 
39 #include <dev/raidframe/raidframevar.h>
40 
41 #include "rf_raid.h"
42 #include "rf_dag.h"
43 #include "rf_dagutils.h"
44 #include "rf_dagfuncs.h"
45 #include "rf_debugMem.h"
46 #include "rf_dagffrd.h"
47 #include "rf_general.h"
48 #include "rf_dagffwr.h"
49 #include "rf_map.h"
50 
51 /******************************************************************************
52  *
53  * General comments on DAG creation:
54  *
55  * All DAGs in this file use roll-away error recovery.  Each DAG has a single
56  * commit node, usually called "Cmt."  If an error occurs before the Cmt node
57  * is reached, the execution engine will halt forward execution and work
58  * backward through the graph, executing the undo functions.  Assuming that
59  * each node in the graph prior to the Cmt node are undoable and atomic - or -
60  * does not make changes to permanent state, the graph will fail atomically.
61  * If an error occurs after the Cmt node executes, the engine will roll-forward
62  * through the graph, blindly executing nodes until it reaches the end.
63  * If a graph reaches the end, it is assumed to have completed successfully.
64  *
65  * A graph has only 1 Cmt node.
66  *
67  */
68 
69 
70 /******************************************************************************
71  *
72  * The following wrappers map the standard DAG creation interface to the
73  * DAG creation routines.  Additionally, these wrappers enable experimentation
74  * with new DAG structures by providing an extra level of indirection, allowing
75  * the DAG creation routines to be replaced at this single point.
76  */
77 
78 
79 void
rf_CreateNonRedundantWriteDAG(RF_Raid_t * raidPtr,RF_AccessStripeMap_t * asmap,RF_DagHeader_t * dag_h,void * bp,RF_RaidAccessFlags_t flags,RF_AllocListElem_t * allocList,RF_IoType_t type)80 rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
81                                     RF_DagHeader_t *dag_h, void *bp,
82                                     RF_RaidAccessFlags_t flags,
83                                     RF_AllocListElem_t *allocList,
84                                     RF_IoType_t type)
85 {
86           rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
87                                          RF_IO_TYPE_WRITE);
88 }
89 
90 void
rf_CreateRAID0WriteDAG(RF_Raid_t * raidPtr,RF_AccessStripeMap_t * asmap,RF_DagHeader_t * dag_h,void * bp,RF_RaidAccessFlags_t flags,RF_AllocListElem_t * allocList,RF_IoType_t type)91 rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
92                            RF_DagHeader_t *dag_h, void *bp,
93                            RF_RaidAccessFlags_t flags,
94                            RF_AllocListElem_t *allocList,
95                            RF_IoType_t type)
96 {
97           rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
98                                          RF_IO_TYPE_WRITE);
99 }
100 
101 void
rf_CreateSmallWriteDAG(RF_Raid_t * raidPtr,RF_AccessStripeMap_t * asmap,RF_DagHeader_t * dag_h,void * bp,RF_RaidAccessFlags_t flags,RF_AllocListElem_t * allocList)102 rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
103                            RF_DagHeader_t *dag_h, void *bp,
104                            RF_RaidAccessFlags_t flags,
105                            RF_AllocListElem_t *allocList)
106 {
107           /* "normal" rollaway */
108           rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
109                                              allocList, &rf_xorFuncs, NULL);
110 }
111 
112 void
rf_CreateLargeWriteDAG(RF_Raid_t * raidPtr,RF_AccessStripeMap_t * asmap,RF_DagHeader_t * dag_h,void * bp,RF_RaidAccessFlags_t flags,RF_AllocListElem_t * allocList)113 rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
114                            RF_DagHeader_t *dag_h, void *bp,
115                            RF_RaidAccessFlags_t flags,
116                            RF_AllocListElem_t *allocList)
117 {
118           /* "normal" rollaway */
119           rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
120                                              allocList, 1, rf_RegularXorFunc, RF_TRUE);
121 }
122 
123 
124 /******************************************************************************
125  *
126  * DAG creation code begins here
127  */
128 #define BUF_ALLOC(num) \
129   RF_MallocAndAdd(rf_RaidAddressToByte(raidPtr, num), allocList)
130 
131 
132 /******************************************************************************
133  *
134  * creates a DAG to perform a large-write operation:
135  *
136  *           / Rod \           / Wnd \
137  * H -- block- Rod - Xor - Cmt - Wnd --- T
138  *           \ Rod /          \  Wnp /
139  *                             \[Wnq]/
140  *
141  * The XOR node also does the Q calculation in the P+Q architecture.
142  * All nodes are before the commit node (Cmt) are assumed to be atomic and
143  * undoable - or - they make no changes to permanent state.
144  *
145  * Rod = read old data
146  * Cmt = commit node
147  * Wnp = write new parity
148  * Wnd = write new data
149  * Wnq = write new "q"
150  * [] denotes optional segments in the graph
151  *
152  * Parameters:  raidPtr   - description of the physical array
153  *              asmap     - logical & physical addresses for this access
154  *              bp        - buffer ptr (holds write data)
155  *              flags     - general flags (e.g. disk locking)
156  *              allocList - list of memory allocated in DAG creation
157  *              nfaults   - number of faults array can tolerate
158  *                          (equal to # redundancy units in stripe)
159  *              redfuncs  - list of redundancy generating functions
160  *
161  *****************************************************************************/
162 
163 void
rf_CommonCreateLargeWriteDAG(RF_Raid_t * raidPtr,RF_AccessStripeMap_t * asmap,RF_DagHeader_t * dag_h,void * bp,RF_RaidAccessFlags_t flags,RF_AllocListElem_t * allocList,int nfaults,void (* redFunc)(RF_DagNode_t *),int allowBufferRecycle)164 rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
165                                    RF_DagHeader_t *dag_h, void *bp,
166                                    RF_RaidAccessFlags_t flags,
167                                    RF_AllocListElem_t *allocList,
168                                    int nfaults, void (*redFunc) (RF_DagNode_t *),
169                                    int allowBufferRecycle)
170 {
171           RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
172           RF_DagNode_t *blockNode, *commitNode, *termNode;
173 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
174           RF_DagNode_t *wnqNode;
175 #endif
176           int     nWndNodes, nRodNodes, i, nodeNum, asmNum;
177           RF_AccessStripeMapHeader_t *new_asm_h[2];
178           RF_StripeNum_t parityStripeID;
179           char   *sosBuffer, *eosBuffer;
180           RF_ReconUnitNum_t which_ru;
181           RF_RaidLayout_t *layoutPtr;
182           RF_PhysDiskAddr_t *pda;
183 
184           layoutPtr = &(raidPtr->Layout);
185           parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
186                                                                       asmap->raidAddress,
187                                                                       &which_ru);
188 
189 #if RF_DEBUG_DAG
190           if (rf_dagDebug) {
191                     printf("[Creating large-write DAG]\n");
192           }
193 #endif
194           dag_h->creator = "LargeWriteDAG";
195 
196           dag_h->numCommitNodes = 1;
197           dag_h->numCommits = 0;
198           dag_h->numSuccedents = 1;
199 
200           /* alloc the nodes: Wnd, xor, commit, block, term, and  Wnp */
201           nWndNodes = asmap->numStripeUnitsAccessed;
202 
203           for (i = 0; i < nWndNodes; i++) {
204                     tmpNode = rf_AllocDAGNode(raidPtr);
205                     tmpNode->list_next = dag_h->nodes;
206                     dag_h->nodes = tmpNode;
207           }
208           wndNodes = dag_h->nodes;
209 
210           xorNode = rf_AllocDAGNode(raidPtr);
211           xorNode->list_next = dag_h->nodes;
212           dag_h->nodes = xorNode;
213 
214           wnpNode = rf_AllocDAGNode(raidPtr);
215           wnpNode->list_next = dag_h->nodes;
216           dag_h->nodes = wnpNode;
217 
218           blockNode = rf_AllocDAGNode(raidPtr);
219           blockNode->list_next = dag_h->nodes;
220           dag_h->nodes = blockNode;
221 
222           commitNode = rf_AllocDAGNode(raidPtr);
223           commitNode->list_next = dag_h->nodes;
224           dag_h->nodes = commitNode;
225 
226           termNode = rf_AllocDAGNode(raidPtr);
227           termNode->list_next = dag_h->nodes;
228           dag_h->nodes = termNode;
229 
230 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
231           if (nfaults == 2) {
232                     wnqNode = rf_AllocDAGNode(raidPtr);
233           } else {
234                     wnqNode = NULL;
235           }
236 #endif
237           rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
238                                                   new_asm_h, &nRodNodes, &sosBuffer,
239                                                   &eosBuffer, allocList);
240           if (nRodNodes > 0) {
241                     for (i = 0; i < nRodNodes; i++) {
242                               tmpNode = rf_AllocDAGNode(raidPtr);
243                               tmpNode->list_next = dag_h->nodes;
244                               dag_h->nodes = tmpNode;
245                     }
246                     rodNodes = dag_h->nodes;
247           } else {
248                     rodNodes = NULL;
249           }
250 
251           /* begin node initialization */
252           if (nRodNodes > 0) {
253                     rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
254                                   rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
255                                   dag_h, "Nil", allocList);
256           } else {
257                     rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
258                                   rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
259                                   dag_h, "Nil", allocList);
260           }
261 
262           rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
263                         rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
264                         dag_h, "Cmt", allocList);
265           rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
266                         rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
267                         dag_h, "Trm", allocList);
268 
269           /* initialize the Rod nodes */
270           tmpNode = rodNodes;
271           for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
272                     if (new_asm_h[asmNum]) {
273                               pda = new_asm_h[asmNum]->stripeMap->physInfo;
274                               while (pda) {
275                                         rf_InitNode(tmpNode, rf_wait,
276                                                       RF_FALSE, rf_DiskReadFunc,
277                                                       rf_DiskReadUndoFunc,
278                                                       rf_GenericWakeupFunc,
279                                                       1, 1, 4, 0, dag_h,
280                                                       "Rod", allocList);
281                                         tmpNode->params[0].p = pda;
282                                         tmpNode->params[1].p = pda->bufPtr;
283                                         tmpNode->params[2].v = parityStripeID;
284                                         tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
285                                             which_ru);
286                                         nodeNum++;
287                                         pda = pda->next;
288                                         tmpNode = tmpNode->list_next;
289                               }
290                     }
291           }
292           RF_ASSERT(nodeNum == nRodNodes);
293 
294           /* initialize the wnd nodes */
295           pda = asmap->physInfo;
296           tmpNode = wndNodes;
297           for (i = 0; i < nWndNodes; i++) {
298                     rf_InitNode(tmpNode, rf_wait, RF_FALSE,
299                                   rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
300                                   rf_GenericWakeupFunc, 1, 1, 4, 0,
301                                   dag_h, "Wnd", allocList);
302                     RF_ASSERT(pda != NULL);
303                     tmpNode->params[0].p = pda;
304                     tmpNode->params[1].p = pda->bufPtr;
305                     tmpNode->params[2].v = parityStripeID;
306                     tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
307                     pda = pda->next;
308                     tmpNode = tmpNode->list_next;
309           }
310 
311           /* initialize the redundancy node */
312           if (nRodNodes > 0) {
313                     rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
314                                   rf_NullNodeUndoFunc, NULL, 1,
315                                   nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
316                                   nfaults, dag_h, "Xr ", allocList);
317           } else {
318                     rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
319                                   rf_NullNodeUndoFunc, NULL, 1,
320                                   1, 2 * (nWndNodes + nRodNodes) + 1,
321                                   nfaults, dag_h, "Xr ", allocList);
322           }
323           xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
324           tmpNode = wndNodes;
325           for (i = 0; i < nWndNodes; i++) {
326                     /* pda */
327                     xorNode->params[2 * i + 0] = tmpNode->params[0];
328                     /* buf ptr */
329                     xorNode->params[2 * i + 1] = tmpNode->params[1];
330                     tmpNode = tmpNode->list_next;
331           }
332           tmpNode = rodNodes;
333           for (i = 0; i < nRodNodes; i++) {
334                     /* pda */
335                     xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
336                     /* buf ptr */
337                     xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
338                     tmpNode = tmpNode->list_next;
339           }
340           /* xor node needs to get at RAID information */
341           xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
342 
343           /*
344          * Look for an Rod node that reads a complete SU. If none,
345          * alloc a buffer to receive the parity info. Note that we
346          * can't use a new data buffer because it will not have gotten
347          * written when the xor occurs.  */
348           if (allowBufferRecycle) {
349                     tmpNode = rodNodes;
350                     for (i = 0; i < nRodNodes; i++) {
351                               if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
352                                         break;
353                               tmpNode = tmpNode->list_next;
354                     }
355           }
356           if ((!allowBufferRecycle) || (i == nRodNodes)) {
357                     xorNode->results[0] = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
358           } else {
359                     /* this works because the only way we get here is if
360                        allowBufferRecycle is true and we went through the
361                        above for loop, and exited via the break before
362                        i==nRodNodes was true.  That means tmpNode will
363                        still point to a valid node -- the one we want for
364                        here! */
365                     xorNode->results[0] = tmpNode->params[1].p;
366           }
367 
368           /* initialize the Wnp node */
369           rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
370                         rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
371                         dag_h, "Wnp", allocList);
372           wnpNode->params[0].p = asmap->parityInfo;
373           wnpNode->params[1].p = xorNode->results[0];
374           wnpNode->params[2].v = parityStripeID;
375           wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
376           /* parityInfo must describe entire parity unit */
377           RF_ASSERT(asmap->parityInfo->next == NULL);
378 
379 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
380           if (nfaults == 2) {
381                     /*
382                    * We never try to recycle a buffer for the Q calcuation
383                    * in addition to the parity. This would cause two buffers
384                    * to get smashed during the P and Q calculation, guaranteeing
385                    * one would be wrong.
386                    */
387                     xorNode->results[1] =
388                         BUF_ALLOC(raidPtr->Layout.sectorsPerStripeUnit);
389                     rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
390                                   rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
391                                   1, 1, 4, 0, dag_h, "Wnq", allocList);
392                     wnqNode->params[0].p = asmap->qInfo;
393                     wnqNode->params[1].p = xorNode->results[1];
394                     wnqNode->params[2].v = parityStripeID;
395                     wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
396                     /* parityInfo must describe entire parity unit */
397                     RF_ASSERT(asmap->parityInfo->next == NULL);
398           }
399 #endif
400           /*
401          * Connect nodes to form graph.
402          */
403 
404           /* connect dag header to block node */
405           RF_ASSERT(blockNode->numAntecedents == 0);
406           dag_h->succedents[0] = blockNode;
407 
408           if (nRodNodes > 0) {
409                     /* connect the block node to the Rod nodes */
410                     RF_ASSERT(blockNode->numSuccedents == nRodNodes);
411                     RF_ASSERT(xorNode->numAntecedents == nRodNodes);
412                     tmpNode = rodNodes;
413                     for (i = 0; i < nRodNodes; i++) {
414                               RF_ASSERT(tmpNode->numAntecedents == 1);
415                               blockNode->succedents[i] = tmpNode;
416                               tmpNode->antecedents[0] = blockNode;
417                               tmpNode->antType[0] = rf_control;
418 
419                               /* connect the Rod nodes to the Xor node */
420                               RF_ASSERT(tmpNode->numSuccedents == 1);
421                               tmpNode->succedents[0] = xorNode;
422                               xorNode->antecedents[i] = tmpNode;
423                               xorNode->antType[i] = rf_trueData;
424                               tmpNode = tmpNode->list_next;
425                     }
426           } else {
427                     /* connect the block node to the Xor node */
428                     RF_ASSERT(blockNode->numSuccedents == 1);
429                     RF_ASSERT(xorNode->numAntecedents == 1);
430                     blockNode->succedents[0] = xorNode;
431                     xorNode->antecedents[0] = blockNode;
432                     xorNode->antType[0] = rf_control;
433           }
434 
435           /* connect the xor node to the commit node */
436           RF_ASSERT(xorNode->numSuccedents == 1);
437           RF_ASSERT(commitNode->numAntecedents == 1);
438           xorNode->succedents[0] = commitNode;
439           commitNode->antecedents[0] = xorNode;
440           commitNode->antType[0] = rf_control;
441 
442           /* connect the commit node to the write nodes */
443           RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
444           tmpNode = wndNodes;
445           for (i = 0; i < nWndNodes; i++) {
446                     RF_ASSERT(wndNodes->numAntecedents == 1);
447                     commitNode->succedents[i] = tmpNode;
448                     tmpNode->antecedents[0] = commitNode;
449                     tmpNode->antType[0] = rf_control;
450                     tmpNode = tmpNode->list_next;
451           }
452           RF_ASSERT(wnpNode->numAntecedents == 1);
453           commitNode->succedents[nWndNodes] = wnpNode;
454           wnpNode->antecedents[0] = commitNode;
455           wnpNode->antType[0] = rf_trueData;
456 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
457           if (nfaults == 2) {
458                     RF_ASSERT(wnqNode->numAntecedents == 1);
459                     commitNode->succedents[nWndNodes + 1] = wnqNode;
460                     wnqNode->antecedents[0] = commitNode;
461                     wnqNode->antType[0] = rf_trueData;
462           }
463 #endif
464           /* connect the write nodes to the term node */
465           RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
466           RF_ASSERT(termNode->numSuccedents == 0);
467           tmpNode = wndNodes;
468           for (i = 0; i < nWndNodes; i++) {
469                     RF_ASSERT(wndNodes->numSuccedents == 1);
470                     tmpNode->succedents[0] = termNode;
471                     termNode->antecedents[i] = tmpNode;
472                     termNode->antType[i] = rf_control;
473                     tmpNode = tmpNode->list_next;
474           }
475           RF_ASSERT(wnpNode->numSuccedents == 1);
476           wnpNode->succedents[0] = termNode;
477           termNode->antecedents[nWndNodes] = wnpNode;
478           termNode->antType[nWndNodes] = rf_control;
479 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
480           if (nfaults == 2) {
481                     RF_ASSERT(wnqNode->numSuccedents == 1);
482                     wnqNode->succedents[0] = termNode;
483                     termNode->antecedents[nWndNodes + 1] = wnqNode;
484                     termNode->antType[nWndNodes + 1] = rf_control;
485           }
486 #endif
487 }
488 /******************************************************************************
489  *
490  * creates a DAG to perform a small-write operation (either raid 5 or pq),
491  * which is as follows:
492  *
493  * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
494  *            \- Rod X      /     \----> Wnd [Und]-/
495  *           [\- Rod X     /       \---> Wnd [Und]-/]
496  *           [\- Roq -> Q /         \--> Wnq [Unq]-/]
497  *
498  * Rop = read old parity
499  * Rod = read old data
500  * Roq = read old "q"
501  * Cmt = commit node
502  * Und = unlock data disk
503  * Unp = unlock parity disk
504  * Unq = unlock q disk
505  * Wnp = write new parity
506  * Wnd = write new data
507  * Wnq = write new "q"
508  * [ ] denotes optional segments in the graph
509  *
510  * Parameters:  raidPtr   - description of the physical array
511  *              asmap     - logical & physical addresses for this access
512  *              bp        - buffer ptr (holds write data)
513  *              flags     - general flags (e.g. disk locking)
514  *              allocList - list of memory allocated in DAG creation
515  *              pfuncs    - list of parity generating functions
516  *              qfuncs    - list of q generating functions
517  *
518  * A null qfuncs indicates single fault tolerant
519  *****************************************************************************/
520 
521 void
rf_CommonCreateSmallWriteDAG(RF_Raid_t * raidPtr,RF_AccessStripeMap_t * asmap,RF_DagHeader_t * dag_h,void * bp,RF_RaidAccessFlags_t flags,RF_AllocListElem_t * allocList,const RF_RedFuncs_t * pfuncs,const RF_RedFuncs_t * qfuncs)522 rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
523                                    RF_DagHeader_t *dag_h, void *bp,
524                                    RF_RaidAccessFlags_t flags,
525                                    RF_AllocListElem_t *allocList,
526                                    const RF_RedFuncs_t *pfuncs,
527                                    const RF_RedFuncs_t *qfuncs)
528 {
529           RF_DagNode_t *readDataNodes, *readParityNodes, *termNode;
530           RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
531           RF_DagNode_t *xorNodes, *blockNode, *commitNode;
532           RF_DagNode_t *writeDataNodes, *writeParityNodes;
533           RF_DagNode_t *tmpxorNode, *tmpwriteDataNode;
534           RF_DagNode_t *tmpwriteParityNode;
535 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
536           RF_DagNode_t *tmpwriteQNode, *tmpreadQNode, *tmpqNode, *readQNodes,
537                *writeQNodes, *qNodes;
538 #endif
539           int     i, j, nNodes;
540           RF_ReconUnitNum_t which_ru;
541           void    (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
542           void    (*qfunc) (RF_DagNode_t *) __unused;
543           int     numDataNodes, numParityNodes;
544           RF_StripeNum_t parityStripeID;
545           RF_PhysDiskAddr_t *pda;
546           const char *name, *qname __unused;
547           long    nfaults;
548 
549           nfaults = qfuncs ? 2 : 1;
550 
551           parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
552               asmap->raidAddress, &which_ru);
553           pda = asmap->physInfo;
554           numDataNodes = asmap->numStripeUnitsAccessed;
555           numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
556 
557 #if RF_DEBUG_DAG
558           if (rf_dagDebug) {
559                     printf("[Creating small-write DAG]\n");
560           }
561 #endif
562           RF_ASSERT(numDataNodes > 0);
563           dag_h->creator = "SmallWriteDAG";
564 
565           dag_h->numCommitNodes = 1;
566           dag_h->numCommits = 0;
567           dag_h->numSuccedents = 1;
568 
569           /*
570          * DAG creation occurs in four steps:
571          * 1. count the number of nodes in the DAG
572          * 2. create the nodes
573          * 3. initialize the nodes
574          * 4. connect the nodes
575          */
576 
577           /*
578          * Step 1. compute number of nodes in the graph
579          */
580 
581           /* number of nodes: a read and write for each data unit a
582            * redundancy computation node for each parity node (nfaults *
583            * nparity) a read and write for each parity unit a block and
584            * commit node (2) a terminate node if atomic RMW an unlock
585            * node for each data unit, redundancy unit
586            * totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
587            *   + (nfaults * 2 * numParityNodes) + 3;
588            */
589 
590           /*
591          * Step 2. create the nodes
592          */
593 
594           blockNode = rf_AllocDAGNode(raidPtr);
595           blockNode->list_next = dag_h->nodes;
596           dag_h->nodes = blockNode;
597 
598           commitNode = rf_AllocDAGNode(raidPtr);
599           commitNode->list_next = dag_h->nodes;
600           dag_h->nodes = commitNode;
601 
602           for (i = 0; i < numDataNodes; i++) {
603                     tmpNode = rf_AllocDAGNode(raidPtr);
604                     tmpNode->list_next = dag_h->nodes;
605                     dag_h->nodes = tmpNode;
606           }
607           readDataNodes = dag_h->nodes;
608 
609           for (i = 0; i < numParityNodes; i++) {
610                     tmpNode = rf_AllocDAGNode(raidPtr);
611                     tmpNode->list_next = dag_h->nodes;
612                     dag_h->nodes = tmpNode;
613           }
614           readParityNodes = dag_h->nodes;
615 
616           for (i = 0; i < numDataNodes; i++) {
617                     tmpNode = rf_AllocDAGNode(raidPtr);
618                     tmpNode->list_next = dag_h->nodes;
619                     dag_h->nodes = tmpNode;
620           }
621           writeDataNodes = dag_h->nodes;
622 
623           for (i = 0; i < numParityNodes; i++) {
624                     tmpNode = rf_AllocDAGNode(raidPtr);
625                     tmpNode->list_next = dag_h->nodes;
626                     dag_h->nodes = tmpNode;
627           }
628           writeParityNodes = dag_h->nodes;
629 
630           for (i = 0; i < numParityNodes; i++) {
631                     tmpNode = rf_AllocDAGNode(raidPtr);
632                     tmpNode->list_next = dag_h->nodes;
633                     dag_h->nodes = tmpNode;
634           }
635           xorNodes = dag_h->nodes;
636 
637           termNode = rf_AllocDAGNode(raidPtr);
638           termNode->list_next = dag_h->nodes;
639           dag_h->nodes = termNode;
640 
641 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
642           if (nfaults == 2) {
643                     for (i = 0; i < numParityNodes; i++) {
644                               tmpNode = rf_AllocDAGNode(raidPtr);
645                               tmpNode->list_next = dag_h->nodes;
646                               dag_h->nodes = tmpNode;
647                     }
648                     readQNodes = dag_h->nodes;
649 
650                     for (i = 0; i < numParityNodes; i++) {
651                               tmpNode = rf_AllocDAGNode(raidPtr);
652                               tmpNode->list_next = dag_h->nodes;
653                               dag_h->nodes = tmpNode;
654                     }
655                     writeQNodes = dag_h->nodes;
656 
657                     for (i = 0; i < numParityNodes; i++) {
658                               tmpNode = rf_AllocDAGNode(raidPtr);
659                               tmpNode->list_next = dag_h->nodes;
660                               dag_h->nodes = tmpNode;
661                     }
662                     qNodes = dag_h->nodes;
663           } else {
664                     readQNodes = writeQNodes = qNodes = NULL;
665           }
666 #endif
667 
668           /*
669          * Step 3. initialize the nodes
670          */
671           /* initialize block node (Nil) */
672           nNodes = numDataNodes + (nfaults * numParityNodes);
673           rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
674                         rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
675                         dag_h, "Nil", allocList);
676 
677           /* initialize commit node (Cmt) */
678           rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
679                         rf_NullNodeUndoFunc, NULL, nNodes,
680                         (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
681 
682           /* initialize terminate node (Trm) */
683           rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
684                         rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
685                         dag_h, "Trm", allocList);
686 
687           /* initialize nodes which read old data (Rod) */
688           tmpreadDataNode = readDataNodes;
689           for (i = 0; i < numDataNodes; i++) {
690                     rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
691                                   rf_DiskReadFunc, rf_DiskReadUndoFunc,
692                                   rf_GenericWakeupFunc, (nfaults * numParityNodes),
693                                   1, 4, 0, dag_h, "Rod", allocList);
694                     RF_ASSERT(pda != NULL);
695                     /* physical disk addr desc */
696                     tmpreadDataNode->params[0].p = pda;
697                     /* buffer to hold old data */
698                     tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
699                     tmpreadDataNode->params[2].v = parityStripeID;
700                     tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
701                         which_ru);
702                     pda = pda->next;
703                     for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
704                               tmpreadDataNode->propList[j] = NULL;
705                     }
706                     tmpreadDataNode = tmpreadDataNode->list_next;
707           }
708 
709           /* initialize nodes which read old parity (Rop) */
710           pda = asmap->parityInfo;
711           i = 0;
712           tmpreadParityNode = readParityNodes;
713           for (i = 0; i < numParityNodes; i++) {
714                     RF_ASSERT(pda != NULL);
715                     rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
716                                   rf_DiskReadFunc, rf_DiskReadUndoFunc,
717                                   rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
718                                   dag_h, "Rop", allocList);
719                     tmpreadParityNode->params[0].p = pda;
720                     /* buffer to hold old parity */
721                     tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
722                     tmpreadParityNode->params[2].v = parityStripeID;
723                     tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
724                         which_ru);
725                     pda = pda->next;
726                     for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
727                               tmpreadParityNode->propList[0] = NULL;
728                     }
729                     tmpreadParityNode = tmpreadParityNode->list_next;
730           }
731 
732 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
733           /* initialize nodes which read old Q (Roq) */
734           if (nfaults == 2) {
735                     pda = asmap->qInfo;
736                     tmpreadQNode = readQNodes;
737                     for (i = 0; i < numParityNodes; i++) {
738                               RF_ASSERT(pda != NULL);
739                               rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
740                                             rf_DiskReadFunc, rf_DiskReadUndoFunc,
741                                             rf_GenericWakeupFunc, numParityNodes,
742                                             1, 4, 0, dag_h, "Roq", allocList);
743                               tmpreadQNode->params[0].p = pda;
744                               /* buffer to hold old Q */
745                               tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h,
746                                                                                    pda->numSector << raidPtr->logBytesPerSector);
747                               tmpreadQNode->params[2].v = parityStripeID;
748                               tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
749                                   which_ru);
750                               pda = pda->next;
751                               for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
752                                         tmpreadQNode->propList[0] = NULL;
753                               }
754                               tmpreadQNode = tmpreadQNode->list_next;
755                     }
756           }
757 #endif
758           /* initialize nodes which write new data (Wnd) */
759           pda = asmap->physInfo;
760           tmpwriteDataNode = writeDataNodes;
761           for (i = 0; i < numDataNodes; i++) {
762                     RF_ASSERT(pda != NULL);
763                     rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
764                                   rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
765                                   rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
766                                   "Wnd", allocList);
767                     /* physical disk addr desc */
768                     tmpwriteDataNode->params[0].p = pda;
769                     /* buffer holding new data to be written */
770                     tmpwriteDataNode->params[1].p = pda->bufPtr;
771                     tmpwriteDataNode->params[2].v = parityStripeID;
772                     tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
773                         which_ru);
774                     pda = pda->next;
775                     tmpwriteDataNode = tmpwriteDataNode->list_next;
776           }
777 
778           /*
779          * Initialize nodes which compute new parity and Q.
780          */
781           /*
782          * We use the simple XOR func in the double-XOR case, and when
783          * we're accessing only a portion of one stripe unit. The
784          * distinction between the two is that the regular XOR func
785          * assumes that the targbuf is a full SU in size, and examines
786          * the pda associated with the buffer to decide where within
787          * the buffer to XOR the data, whereas the simple XOR func
788          * just XORs the data into the start of the buffer.  */
789           if ((numParityNodes == 2) || ((numDataNodes == 1)
790                     && (asmap->totalSectorsAccessed <
791                         raidPtr->Layout.sectorsPerStripeUnit))) {
792                     func = pfuncs->simple;
793                     undoFunc = rf_NullNodeUndoFunc;
794                     name = pfuncs->SimpleName;
795                     if (qfuncs) {
796                               qfunc = qfuncs->simple;
797                               qname = qfuncs->SimpleName;
798                     } else {
799                               qfunc = NULL;
800                               qname = NULL;
801                     }
802           } else {
803                     func = pfuncs->regular;
804                     undoFunc = rf_NullNodeUndoFunc;
805                     name = pfuncs->RegularName;
806                     if (qfuncs) {
807                               qfunc = qfuncs->regular;
808                               qname = qfuncs->RegularName;
809                     } else {
810                               qfunc = NULL;
811                               qname = NULL;
812                     }
813           }
814           /*
815          * Initialize the xor nodes: params are {pda,buf}
816          * from {Rod,Wnd,Rop} nodes, and raidPtr
817          */
818           if (numParityNodes == 2) {
819                     /* double-xor case */
820                     tmpxorNode = xorNodes;
821                     tmpreadDataNode = readDataNodes;
822                     tmpreadParityNode = readParityNodes;
823                     tmpwriteDataNode = writeDataNodes;
824 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
825                     tmpqNode = qNodes;
826                     tmpreadQNode = readQNodes;
827 #endif
828                     for (i = 0; i < numParityNodes; i++) {
829                               /* note: no wakeup func for xor */
830                               rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
831                                             undoFunc, NULL, 1,
832                                             (numDataNodes + numParityNodes),
833                                             7, 1, dag_h, name, allocList);
834                               tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
835                               tmpxorNode->params[0] = tmpreadDataNode->params[0];
836                               tmpxorNode->params[1] = tmpreadDataNode->params[1];
837                               tmpxorNode->params[2] = tmpreadParityNode->params[0];
838                               tmpxorNode->params[3] = tmpreadParityNode->params[1];
839                               tmpxorNode->params[4] = tmpwriteDataNode->params[0];
840                               tmpxorNode->params[5] = tmpwriteDataNode->params[1];
841                               tmpxorNode->params[6].p = raidPtr;
842                               /* use old parity buf as target buf */
843                               tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
844 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
845                               if (nfaults == 2) {
846                                         /* note: no wakeup func for qor */
847                                         rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
848                                                       qfunc, undoFunc, NULL, 1,
849                                                       (numDataNodes + numParityNodes),
850                                                       7, 1, dag_h, qname, allocList);
851                                         tmpqNode->params[0] = tmpreadDataNode->params[0];
852                                         tmpqNode->params[1] = tmpreadDataNode->params[1];
853                                         tmpqNode->params[2] = tmpreadQNode->params[0];
854                                         tmpqNode->params[3] = tmpreadQNode->params[1];
855                                         tmpqNode->params[4] = tmpwriteDataNode->params[0];
856                                         tmpqNode->params[5] = tmpwriteDataNode->params[1];
857                                         tmpqNode->params[6].p = raidPtr;
858                                         /* use old Q buf as target buf */
859                                         tmpqNode->results[0] = tmpreadQNode->params[1].p;
860                                         tmpqNode = tmpqNode->list_next;
861                                         tmpreadQNode = tmpreadQNode->list_next;
862                               }
863 #endif
864                               tmpxorNode = tmpxorNode->list_next;
865                               tmpreadDataNode = tmpreadDataNode->list_next;
866                               tmpreadParityNode = tmpreadParityNode->list_next;
867                               tmpwriteDataNode = tmpwriteDataNode->list_next;
868                     }
869           } else {
870                     /* there is only one xor node in this case */
871                     rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
872                                   undoFunc, NULL, 1, (numDataNodes + numParityNodes),
873                                   (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
874                                   dag_h, name, allocList);
875                     xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
876                     tmpreadDataNode = readDataNodes;
877                     for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
878                                                                       out the "+1" into the "deal with Rop separately below */
879                               /* set up params related to Rod nodes */
880                               xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0];   /* pda */
881                               xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1];   /* buffer ptr */
882                               tmpreadDataNode = tmpreadDataNode->list_next;
883                     }
884                     /* deal with Rop separately */
885                     xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0];    /* pda */
886                     xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1];    /* buffer ptr */
887 
888                     tmpwriteDataNode = writeDataNodes;
889                     for (i = 0; i < numDataNodes; i++) {
890                               /* set up params related to Wnd and Wnp nodes */
891                               xorNodes->params[2 * (numDataNodes + 1 + i) + 0] =          /* pda */
892                                   tmpwriteDataNode->params[0];
893                               xorNodes->params[2 * (numDataNodes + 1 + i) + 1] =          /* buffer ptr */
894                                   tmpwriteDataNode->params[1];
895                               tmpwriteDataNode = tmpwriteDataNode->list_next;
896                     }
897                     /* xor node needs to get at RAID information */
898                     xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
899                     xorNodes->results[0] = readParityNodes->params[1].p;
900 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
901                     if (nfaults == 2) {
902                               rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
903                                             undoFunc, NULL, 1,
904                                             (numDataNodes + numParityNodes),
905                                             (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
906                                             dag_h, qname, allocList);
907                               tmpreadDataNode = readDataNodes;
908                               for (i = 0; i < numDataNodes; i++) {
909                                         /* set up params related to Rod */
910                                         qNodes->params[2 * i + 0] = tmpreadDataNode->params[0];     /* pda */
911                                         qNodes->params[2 * i + 1] = tmpreadDataNode->params[1];     /* buffer ptr */
912                                         tmpreadDataNode = tmpreadDataNode->list_next;
913                               }
914                               /* and read old q */
915                               qNodes->params[2 * numDataNodes + 0] =  /* pda */
916                                   readQNodes->params[0];
917                               qNodes->params[2 * numDataNodes + 1] =  /* buffer ptr */
918                                   readQNodes->params[1];
919                               tmpwriteDataNode = writeDataNodes;
920                               for (i = 0; i < numDataNodes; i++) {
921                                         /* set up params related to Wnd nodes */
922                                         qNodes->params[2 * (numDataNodes + 1 + i) + 0] =  /* pda */
923                                             tmpwriteDataNode->params[0];
924                                         qNodes->params[2 * (numDataNodes + 1 + i) + 1] =  /* buffer ptr */
925                                             tmpwriteDataNode->params[1];
926                                         tmpwriteDataNode = tmpwriteDataNode->list_next;
927                               }
928                               /* xor node needs to get at RAID information */
929                               qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
930                               qNodes->results[0] = readQNodes->params[1].p;
931                     }
932 #endif
933           }
934 
935           /* initialize nodes which write new parity (Wnp) */
936           pda = asmap->parityInfo;
937           tmpwriteParityNode = writeParityNodes;
938           tmpxorNode = xorNodes;
939           for (i = 0; i < numParityNodes; i++) {
940                     rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
941                                   rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
942                                   rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
943                                   "Wnp", allocList);
944                     RF_ASSERT(pda != NULL);
945                     tmpwriteParityNode->params[0].p = pda;  /* param 1 (bufPtr)
946                                                                        * filled in by xor node */
947                     tmpwriteParityNode->params[1].p = tmpxorNode->results[0];   /* buffer pointer for
948                                                                                                      * parity write
949                                                                                                      * operation */
950                     tmpwriteParityNode->params[2].v = parityStripeID;
951                     tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
952                         which_ru);
953                     pda = pda->next;
954                     tmpwriteParityNode = tmpwriteParityNode->list_next;
955                     tmpxorNode = tmpxorNode->list_next;
956           }
957 
958 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
959           /* initialize nodes which write new Q (Wnq) */
960           if (nfaults == 2) {
961                     pda = asmap->qInfo;
962                     tmpwriteQNode = writeQNodes;
963                     tmpqNode = qNodes;
964                     for (i = 0; i < numParityNodes; i++) {
965                               rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
966                                             rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
967                                             rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
968                                             "Wnq", allocList);
969                               RF_ASSERT(pda != NULL);
970                               tmpwriteQNode->params[0].p = pda;       /* param 1 (bufPtr)
971                                                                                  * filled in by xor node */
972                               tmpwriteQNode->params[1].p = tmpqNode->results[0];          /* buffer pointer for
973                                                                                                      * parity write
974                                                                                                      * operation */
975                               tmpwriteQNode->params[2].v = parityStripeID;
976                               tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
977                                   which_ru);
978                               pda = pda->next;
979                               tmpwriteQNode = tmpwriteQNode->list_next;
980                               tmpqNode = tmpqNode->list_next;
981                     }
982           }
983 #endif
984           /*
985          * Step 4. connect the nodes.
986          */
987 
988           /* connect header to block node */
989           dag_h->succedents[0] = blockNode;
990 
991           /* connect block node to read old data nodes */
992           RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
993           tmpreadDataNode = readDataNodes;
994           for (i = 0; i < numDataNodes; i++) {
995                     blockNode->succedents[i] = tmpreadDataNode;
996                     RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
997                     tmpreadDataNode->antecedents[0] = blockNode;
998                     tmpreadDataNode->antType[0] = rf_control;
999                     tmpreadDataNode = tmpreadDataNode->list_next;
1000           }
1001 
1002           /* connect block node to read old parity nodes */
1003           tmpreadParityNode = readParityNodes;
1004           for (i = 0; i < numParityNodes; i++) {
1005                     blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
1006                     RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
1007                     tmpreadParityNode->antecedents[0] = blockNode;
1008                     tmpreadParityNode->antType[0] = rf_control;
1009                     tmpreadParityNode = tmpreadParityNode->list_next;
1010           }
1011 
1012 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1013           /* connect block node to read old Q nodes */
1014           if (nfaults == 2) {
1015                     tmpreadQNode = readQNodes;
1016                     for (i = 0; i < numParityNodes; i++) {
1017                               blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
1018                               RF_ASSERT(tmpreadQNode->numAntecedents == 1);
1019                               tmpreadQNode->antecedents[0] = blockNode;
1020                               tmpreadQNode->antType[0] = rf_control;
1021                               tmpreadQNode = tmpreadQNode->list_next;
1022                     }
1023           }
1024 #endif
1025           /* connect read old data nodes to xor nodes */
1026           tmpreadDataNode = readDataNodes;
1027           for (i = 0; i < numDataNodes; i++) {
1028                     RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
1029                     tmpxorNode = xorNodes;
1030                     for (j = 0; j < numParityNodes; j++) {
1031                               RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
1032                               tmpreadDataNode->succedents[j] = tmpxorNode;
1033                               tmpxorNode->antecedents[i] = tmpreadDataNode;
1034                               tmpxorNode->antType[i] = rf_trueData;
1035                               tmpxorNode = tmpxorNode->list_next;
1036                     }
1037                     tmpreadDataNode = tmpreadDataNode->list_next;
1038           }
1039 
1040 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1041           /* connect read old data nodes to q nodes */
1042           if (nfaults == 2) {
1043                     tmpreadDataNode = readDataNodes;
1044                     for (i = 0; i < numDataNodes; i++) {
1045                               tmpqNode = qNodes;
1046                               for (j = 0; j < numParityNodes; j++) {
1047                                         RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
1048                                         tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
1049                                         tmpqNode->antecedents[i] = tmpreadDataNode;
1050                                         tmpqNode->antType[i] = rf_trueData;
1051                                         tmpqNode = tmpqNode->list_next;
1052                               }
1053                               tmpreadDataNode = tmpreadDataNode->list_next;
1054                     }
1055           }
1056 #endif
1057           /* connect read old parity nodes to xor nodes */
1058           tmpreadParityNode = readParityNodes;
1059           for (i = 0; i < numParityNodes; i++) {
1060                     RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1061                     tmpxorNode = xorNodes;
1062                     for (j = 0; j < numParityNodes; j++) {
1063                               tmpreadParityNode->succedents[j] = tmpxorNode;
1064                               tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
1065                               tmpxorNode->antType[numDataNodes + i] = rf_trueData;
1066                               tmpxorNode = tmpxorNode->list_next;
1067                     }
1068                     tmpreadParityNode = tmpreadParityNode->list_next;
1069           }
1070 
1071 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1072           /* connect read old q nodes to q nodes */
1073           if (nfaults == 2) {
1074                     tmpreadParityNode = readParityNodes;
1075                     tmpreadQNode = readQNodes;
1076                     for (i = 0; i < numParityNodes; i++) {
1077                               RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1078                               tmpqNode = qNodes;
1079                               for (j = 0; j < numParityNodes; j++) {
1080                                         tmpreadQNode->succedents[j] = tmpqNode;
1081                                         tmpqNode->antecedents[numDataNodes + i] = tmpreadQNode;
1082                                         tmpqNode->antType[numDataNodes + i] = rf_trueData;
1083                                         tmpqNode = tmpqNode->list_next;
1084                               }
1085                               tmpreadParityNode = tmpreadParityNode->list_next;
1086                               tmpreadQNode = tmpreadQNode->list_next;
1087                     }
1088           }
1089 #endif
1090           /* connect xor nodes to commit node */
1091           RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
1092           tmpxorNode = xorNodes;
1093           for (i = 0; i < numParityNodes; i++) {
1094                     RF_ASSERT(tmpxorNode->numSuccedents == 1);
1095                     tmpxorNode->succedents[0] = commitNode;
1096                     commitNode->antecedents[i] = tmpxorNode;
1097                     commitNode->antType[i] = rf_control;
1098                     tmpxorNode = tmpxorNode->list_next;
1099           }
1100 
1101 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1102           /* connect q nodes to commit node */
1103           if (nfaults == 2) {
1104                     tmpqNode = qNodes;
1105                     for (i = 0; i < numParityNodes; i++) {
1106                               RF_ASSERT(tmpqNode->numSuccedents == 1);
1107                               tmpqNode->succedents[0] = commitNode;
1108                               commitNode->antecedents[i + numParityNodes] = tmpqNode;
1109                               commitNode->antType[i + numParityNodes] = rf_control;
1110                               tmpqNode = tmpqNode->list_next;
1111                     }
1112           }
1113 #endif
1114           /* connect commit node to write nodes */
1115           RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
1116           tmpwriteDataNode = writeDataNodes;
1117           for (i = 0; i < numDataNodes; i++) {
1118                     RF_ASSERT(tmpwriteDataNode->numAntecedents == 1);
1119                     commitNode->succedents[i] = tmpwriteDataNode;
1120                     tmpwriteDataNode->antecedents[0] = commitNode;
1121                     tmpwriteDataNode->antType[0] = rf_trueData;
1122                     tmpwriteDataNode = tmpwriteDataNode->list_next;
1123           }
1124           tmpwriteParityNode = writeParityNodes;
1125           for (i = 0; i < numParityNodes; i++) {
1126                     RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
1127                     commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
1128                     tmpwriteParityNode->antecedents[0] = commitNode;
1129                     tmpwriteParityNode->antType[0] = rf_trueData;
1130                     tmpwriteParityNode = tmpwriteParityNode->list_next;
1131           }
1132 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1133           if (nfaults == 2) {
1134                     tmpwriteQNode = writeQNodes;
1135                     for (i = 0; i < numParityNodes; i++) {
1136                               RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
1137                               commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
1138                               tmpwriteQNode->antecedents[0] = commitNode;
1139                               tmpwriteQNode->antType[0] = rf_trueData;
1140                               tmpwriteQNode = tmpwriteQNode->list_next;
1141                     }
1142           }
1143 #endif
1144           RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1145           RF_ASSERT(termNode->numSuccedents == 0);
1146           tmpwriteDataNode = writeDataNodes;
1147           for (i = 0; i < numDataNodes; i++) {
1148                     /* connect write new data nodes to term node */
1149                     RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
1150                     RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1151                     tmpwriteDataNode->succedents[0] = termNode;
1152                     termNode->antecedents[i] = tmpwriteDataNode;
1153                     termNode->antType[i] = rf_control;
1154                     tmpwriteDataNode = tmpwriteDataNode->list_next;
1155           }
1156 
1157           tmpwriteParityNode = writeParityNodes;
1158           for (i = 0; i < numParityNodes; i++) {
1159                     RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
1160                     tmpwriteParityNode->succedents[0] = termNode;
1161                     termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
1162                     termNode->antType[numDataNodes + i] = rf_control;
1163                     tmpwriteParityNode = tmpwriteParityNode->list_next;
1164           }
1165 
1166 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1167           if (nfaults == 2) {
1168                     tmpwriteQNode = writeQNodes;
1169                     for (i = 0; i < numParityNodes; i++) {
1170                               RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
1171                               tmpwriteQNode->succedents[0] = termNode;
1172                               termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
1173                               termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
1174                               tmpwriteQNode = tmpwriteQNode->list_next;
1175                     }
1176           }
1177 #endif
1178 }
1179 
1180 
1181 /******************************************************************************
1182  * create a write graph (fault-free or degraded) for RAID level 1
1183  *
1184  * Hdr -> Commit -> Wpd -> Nil -> Trm
1185  *               -> Wsd ->
1186  *
1187  * The "Wpd" node writes data to the primary copy in the mirror pair
1188  * The "Wsd" node writes data to the secondary copy in the mirror pair
1189  *
1190  * Parameters:  raidPtr   - description of the physical array
1191  *              asmap     - logical & physical addresses for this access
1192  *              bp        - buffer ptr (holds write data)
1193  *              flags     - general flags (e.g. disk locking)
1194  *              allocList - list of memory allocated in DAG creation
1195  *****************************************************************************/
1196 
1197 void
rf_CreateRaidOneWriteDAG(RF_Raid_t * raidPtr,RF_AccessStripeMap_t * asmap,RF_DagHeader_t * dag_h,void * bp,RF_RaidAccessFlags_t flags,RF_AllocListElem_t * allocList)1198 rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
1199                                RF_DagHeader_t *dag_h, void *bp,
1200                                RF_RaidAccessFlags_t flags,
1201                                RF_AllocListElem_t *allocList)
1202 {
1203           RF_DagNode_t *unblockNode, *termNode, *commitNode;
1204           RF_DagNode_t *wndNode, *wmirNode;
1205           RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
1206           int     nWndNodes, nWmirNodes, i;
1207           RF_ReconUnitNum_t which_ru;
1208           RF_PhysDiskAddr_t *pda, *pdaP;
1209           RF_StripeNum_t parityStripeID;
1210 
1211           parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
1212               asmap->raidAddress, &which_ru);
1213 #if RF_DEBUG_DAG
1214           if (rf_dagDebug) {
1215                     printf("[Creating RAID level 1 write DAG]\n");
1216           }
1217 #endif
1218           dag_h->creator = "RaidOneWriteDAG";
1219 
1220           /* 2 implies access not SU aligned */
1221           nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
1222           nWndNodes = (asmap->physInfo->next) ? 2 : 1;
1223 
1224           /* alloc the Wnd nodes and the Wmir node */
1225           if (asmap->numDataFailed == 1)
1226                     nWndNodes--;
1227           if (asmap->numParityFailed == 1)
1228                     nWmirNodes--;
1229 
1230           /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
1231            * + terminator) */
1232           for (i = 0; i < nWndNodes; i++) {
1233                     tmpNode = rf_AllocDAGNode(raidPtr);
1234                     tmpNode->list_next = dag_h->nodes;
1235                     dag_h->nodes = tmpNode;
1236           }
1237           wndNode = dag_h->nodes;
1238 
1239           for (i = 0; i < nWmirNodes; i++) {
1240                     tmpNode = rf_AllocDAGNode(raidPtr);
1241                     tmpNode->list_next = dag_h->nodes;
1242                     dag_h->nodes = tmpNode;
1243           }
1244           wmirNode = dag_h->nodes;
1245 
1246           commitNode = rf_AllocDAGNode(raidPtr);
1247           commitNode->list_next = dag_h->nodes;
1248           dag_h->nodes = commitNode;
1249 
1250           unblockNode = rf_AllocDAGNode(raidPtr);
1251           unblockNode->list_next = dag_h->nodes;
1252           dag_h->nodes = unblockNode;
1253 
1254           termNode = rf_AllocDAGNode(raidPtr);
1255           termNode->list_next = dag_h->nodes;
1256           dag_h->nodes = termNode;
1257 
1258           /* this dag can commit immediately */
1259           dag_h->numCommitNodes = 1;
1260           dag_h->numCommits = 0;
1261           dag_h->numSuccedents = 1;
1262 
1263           /* initialize the commit, unblock, and term nodes */
1264           rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
1265                         rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
1266                         0, 0, 0, dag_h, "Cmt", allocList);
1267           rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
1268                         rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
1269                         0, 0, dag_h, "Nil", allocList);
1270           rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
1271                         rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
1272                         dag_h, "Trm", allocList);
1273 
1274           /* initialize the wnd nodes */
1275           if (nWndNodes > 0) {
1276                     pda = asmap->physInfo;
1277                     tmpwndNode = wndNode;
1278                     for (i = 0; i < nWndNodes; i++) {
1279                               rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
1280                                             rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1281                                             rf_GenericWakeupFunc, 1, 1, 4, 0,
1282                                             dag_h, "Wpd", allocList);
1283                               RF_ASSERT(pda != NULL);
1284                               tmpwndNode->params[0].p = pda;
1285                               tmpwndNode->params[1].p = pda->bufPtr;
1286                               tmpwndNode->params[2].v = parityStripeID;
1287                               tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1288                               pda = pda->next;
1289                               tmpwndNode = tmpwndNode->list_next;
1290                     }
1291                     RF_ASSERT(pda == NULL);
1292           }
1293           /* initialize the mirror nodes */
1294           if (nWmirNodes > 0) {
1295                     pda = asmap->physInfo;
1296                     pdaP = asmap->parityInfo;
1297                     tmpwmirNode = wmirNode;
1298                     for (i = 0; i < nWmirNodes; i++) {
1299                               rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
1300                                             rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1301                                             rf_GenericWakeupFunc, 1, 1, 4, 0,
1302                                             dag_h, "Wsd", allocList);
1303                               RF_ASSERT(pda != NULL);
1304                               tmpwmirNode->params[0].p = pdaP;
1305                               tmpwmirNode->params[1].p = pda->bufPtr;
1306                               tmpwmirNode->params[2].v = parityStripeID;
1307                               tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1308                               pda = pda->next;
1309                               pdaP = pdaP->next;
1310                               tmpwmirNode = tmpwmirNode->list_next;
1311                     }
1312                     RF_ASSERT(pda == NULL);
1313                     RF_ASSERT(pdaP == NULL);
1314           }
1315           /* link the header node to the commit node */
1316           RF_ASSERT(dag_h->numSuccedents == 1);
1317           RF_ASSERT(commitNode->numAntecedents == 0);
1318           dag_h->succedents[0] = commitNode;
1319 
1320           /* link the commit node to the write nodes */
1321           RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
1322           tmpwndNode = wndNode;
1323           for (i = 0; i < nWndNodes; i++) {
1324                     RF_ASSERT(tmpwndNode->numAntecedents == 1);
1325                     commitNode->succedents[i] = tmpwndNode;
1326                     tmpwndNode->antecedents[0] = commitNode;
1327                     tmpwndNode->antType[0] = rf_control;
1328                     tmpwndNode = tmpwndNode->list_next;
1329           }
1330           tmpwmirNode = wmirNode;
1331           for (i = 0; i < nWmirNodes; i++) {
1332                     RF_ASSERT(tmpwmirNode->numAntecedents == 1);
1333                     commitNode->succedents[i + nWndNodes] = tmpwmirNode;
1334                     tmpwmirNode->antecedents[0] = commitNode;
1335                     tmpwmirNode->antType[0] = rf_control;
1336                     tmpwmirNode = tmpwmirNode->list_next;
1337           }
1338 
1339           /* link the write nodes to the unblock node */
1340           RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
1341           tmpwndNode = wndNode;
1342           for (i = 0; i < nWndNodes; i++) {
1343                     RF_ASSERT(tmpwndNode->numSuccedents == 1);
1344                     tmpwndNode->succedents[0] = unblockNode;
1345                     unblockNode->antecedents[i] = tmpwndNode;
1346                     unblockNode->antType[i] = rf_control;
1347                     tmpwndNode = tmpwndNode->list_next;
1348           }
1349           tmpwmirNode = wmirNode;
1350           for (i = 0; i < nWmirNodes; i++) {
1351                     RF_ASSERT(tmpwmirNode->numSuccedents == 1);
1352                     tmpwmirNode->succedents[0] = unblockNode;
1353                     unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
1354                     unblockNode->antType[i + nWndNodes] = rf_control;
1355                     tmpwmirNode = tmpwmirNode->list_next;
1356           }
1357 
1358           /* link the unblock node to the term node */
1359           RF_ASSERT(unblockNode->numSuccedents == 1);
1360           RF_ASSERT(termNode->numAntecedents == 1);
1361           RF_ASSERT(termNode->numSuccedents == 0);
1362           unblockNode->succedents[0] = termNode;
1363           termNode->antecedents[0] = unblockNode;
1364           termNode->antType[0] = rf_control;
1365 }
1366