1 /*	$OpenBSD: rf_states.c,v 1.9 2002/12/16 07:01:05 tdeval Exp $	*/
2 /*	$NetBSD: rf_states.c,v 1.15 2000/10/20 02:24:45 oster Exp $	*/
3 
4 /*
5  * Copyright (c) 1995 Carnegie-Mellon University.
6  * All rights reserved.
7  *
8  * Author: Mark Holland, William V. Courtright II, Robby Findler
9  *
10  * Permission to use, copy, modify and distribute this software and
11  * its documentation is hereby granted, provided that both the copyright
12  * notice and this permission notice appear in all copies of the
13  * software, derivative works or modified versions, and any portions
14  * thereof, and that both notices appear in supporting documentation.
15  *
16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19  *
20  * Carnegie Mellon requests users of this software to return to
21  *
22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
23  *  School of Computer Science
24  *  Carnegie Mellon University
25  *  Pittsburgh PA 15213-3890
26  *
27  * any improvements or extensions that they make and grant Carnegie the
28  * rights to redistribute these changes.
29  */
30 
31 #include <sys/errno.h>
32 
33 #include "rf_archs.h"
34 #include "rf_threadstuff.h"
35 #include "rf_raid.h"
36 #include "rf_dag.h"
37 #include "rf_desc.h"
38 #include "rf_aselect.h"
39 #include "rf_general.h"
40 #include "rf_states.h"
41 #include "rf_dagutils.h"
42 #include "rf_driver.h"
43 #include "rf_engine.h"
44 #include "rf_map.h"
45 #include "rf_etimer.h"
46 #include "rf_kintf.h"
47 
48 /*
49  * Prototypes for some of the available states.
50  *
51  * States must:
52  *
53  *   - not block.
54  *
55  *   - either schedule rf_ContinueRaidAccess as a callback and return
56  *     RF_TRUE, or complete all of their work and return RF_FALSE.
57  *
58  *   - increment desc->state when they have finished their work.
59  */
60 
61 char *StateName(RF_AccessState_t);
62 
63 char *
StateName(RF_AccessState_t state)64 StateName(RF_AccessState_t state)
65 {
66 	switch (state) {
67 		case rf_QuiesceState:return "QuiesceState";
68 	case rf_MapState:
69 		return "MapState";
70 	case rf_LockState:
71 		return "LockState";
72 	case rf_CreateDAGState:
73 		return "CreateDAGState";
74 	case rf_ExecuteDAGState:
75 		return "ExecuteDAGState";
76 	case rf_ProcessDAGState:
77 		return "ProcessDAGState";
78 	case rf_CleanupState:
79 		return "CleanupState";
80 	case rf_LastState:
81 		return "LastState";
82 	case rf_IncrAccessesCountState:
83 		return "IncrAccessesCountState";
84 	case rf_DecrAccessesCountState:
85 		return "DecrAccessesCountState";
86 	default:
87 		return "!!! UnnamedState !!!";
88 	}
89 }
90 
91 void
rf_ContinueRaidAccess(RF_RaidAccessDesc_t * desc)92 rf_ContinueRaidAccess(RF_RaidAccessDesc_t *desc)
93 {
94 	int suspended = RF_FALSE;
95 	int current_state_index = desc->state;
96 	RF_AccessState_t current_state = desc->states[current_state_index];
97 	int unit = desc->raidPtr->raidid;
98 
99 	do {
100 		current_state_index = desc->state;
101 		current_state = desc->states[current_state_index];
102 
103 		switch (current_state) {
104 
105 		case rf_QuiesceState:
106 			suspended = rf_State_Quiesce(desc);
107 			break;
108 		case rf_IncrAccessesCountState:
109 			suspended = rf_State_IncrAccessCount(desc);
110 			break;
111 		case rf_MapState:
112 			suspended = rf_State_Map(desc);
113 			break;
114 		case rf_LockState:
115 			suspended = rf_State_Lock(desc);
116 			break;
117 		case rf_CreateDAGState:
118 			suspended = rf_State_CreateDAG(desc);
119 			break;
120 		case rf_ExecuteDAGState:
121 			suspended = rf_State_ExecuteDAG(desc);
122 			break;
123 		case rf_ProcessDAGState:
124 			suspended = rf_State_ProcessDAG(desc);
125 			break;
126 		case rf_CleanupState:
127 			suspended = rf_State_Cleanup(desc);
128 			break;
129 		case rf_DecrAccessesCountState:
130 			suspended = rf_State_DecrAccessCount(desc);
131 			break;
132 		case rf_LastState:
133 			suspended = rf_State_LastState(desc);
134 			break;
135 		}
136 
137 		/*
138 		 * After this point, we cannot dereference desc since desc may
139 		 * have been freed. desc is only freed in LastState, so if we
140 		 * reenter this function or loop back up, desc should be valid.
141 		 */
142 
143 		if (rf_printStatesDebug) {
144 			printf("raid%d: State: %-24s StateIndex: %3i desc:"
145 			       " 0x%ld %s.\n", unit, StateName(current_state),
146 			       current_state_index, (long) desc, suspended ?
147 			       "callback scheduled" : "looping");
148 		}
149 	} while (!suspended && current_state != rf_LastState);
150 
151 	return;
152 }
153 
154 
155 void
rf_ContinueDagAccess(RF_DagList_t * dagList)156 rf_ContinueDagAccess(RF_DagList_t *dagList)
157 {
158 	RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec);
159 	RF_RaidAccessDesc_t *desc;
160 	RF_DagHeader_t *dag_h;
161 	RF_Etimer_t timer;
162 	int i;
163 
164 	desc = dagList->desc;
165 
166 	timer = tracerec->timer;
167 	RF_ETIMER_STOP(timer);
168 	RF_ETIMER_EVAL(timer);
169 	tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer);
170 	RF_ETIMER_START(tracerec->timer);
171 
172 	/* Skip to dag which just finished. */
173 	dag_h = dagList->dags;
174 	for (i = 0; i < dagList->numDagsDone; i++) {
175 		dag_h = dag_h->next;
176 	}
177 
178 	/* Check to see if retry is required. */
179 	if (dag_h->status == rf_rollBackward) {
180 		/*
181 		 * When a dag fails, mark desc status as bad and allow all
182 		 * other dags in the desc to execute to completion. Then,
183 		 * free all dags and start over.
184 		 */
185 		desc->status = 1;	/* Bad status. */
186 		{
187 			printf("raid%d: DAG failure: %c addr 0x%lx (%ld)"
188 			       " nblk 0x%x (%d) buf 0x%lx.\n",
189 			       desc->raidPtr->raidid, desc->type,
190 			       (long) desc->raidAddress,
191 			       (long) desc->raidAddress,
192 			       (int) desc->numBlocks, (int) desc->numBlocks,
193 			       (unsigned long) (desc->bufPtr));
194 		}
195 	}
196 	dagList->numDagsDone++;
197 	rf_ContinueRaidAccess(desc);
198 }
199 
200 int
rf_State_LastState(RF_RaidAccessDesc_t * desc)201 rf_State_LastState(RF_RaidAccessDesc_t *desc)
202 {
203 	void (*callbackFunc) (RF_CBParam_t) = desc->callbackFunc;
204 	RF_CBParam_t callbackArg;
205 
206 	callbackArg.p = desc->callbackArg;
207 
208 	/*
209 	 * If this is not an async request, wake up the caller.
210 	 */
211 	if (desc->async_flag == 0)
212 		wakeup(desc->bp);
213 
214 	/*
215 	 * That's all the IO for this one... Unbusy the 'disk'.
216 	 */
217 
218 	rf_disk_unbusy(desc);
219 
220 	/*
221 	 * Wakeup any requests waiting to go.
222 	 */
223 
224 	RF_LOCK_MUTEX(((RF_Raid_t *) desc->raidPtr)->mutex);
225 	((RF_Raid_t *) desc->raidPtr)->openings++;
226 	RF_UNLOCK_MUTEX(((RF_Raid_t *) desc->raidPtr)->mutex);
227 
228 	/* Wake up any pending I/O. */
229 	raidstart(((RF_Raid_t *) desc->raidPtr));
230 
231 	/* printf("%s: Calling biodone on 0x%x.\n", __func__, desc->bp); */
232 	splassert(IPL_BIO);
233 	biodone(desc->bp);	/* Access came through ioctl. */
234 
235 	if (callbackFunc)
236 		callbackFunc(callbackArg);
237 	rf_FreeRaidAccDesc(desc);
238 
239 	return RF_FALSE;
240 }
241 
242 int
rf_State_IncrAccessCount(RF_RaidAccessDesc_t * desc)243 rf_State_IncrAccessCount(RF_RaidAccessDesc_t *desc)
244 {
245 	RF_Raid_t *raidPtr;
246 
247 	raidPtr = desc->raidPtr;
248 	/*
249 	 * Bummer. We have to do this to be 100% safe w.r.t. the increment
250 	 * below.
251 	 */
252 	RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
253 	raidPtr->accs_in_flight++;	/* Used to detect quiescence. */
254 	RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
255 
256 	desc->state++;
257 	return RF_FALSE;
258 }
259 
260 int
rf_State_DecrAccessCount(RF_RaidAccessDesc_t * desc)261 rf_State_DecrAccessCount(RF_RaidAccessDesc_t *desc)
262 {
263 	RF_Raid_t *raidPtr;
264 
265 	raidPtr = desc->raidPtr;
266 
267 	RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
268 	raidPtr->accs_in_flight--;
269 	if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0) {
270 		rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc);
271 	}
272 	rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer),
273 	    desc->numBlocks);
274 	RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
275 
276 	desc->state++;
277 	return RF_FALSE;
278 }
279 
280 int
rf_State_Quiesce(RF_RaidAccessDesc_t * desc)281 rf_State_Quiesce(RF_RaidAccessDesc_t *desc)
282 {
283 	RF_AccTraceEntry_t *tracerec = &desc->tracerec;
284 	RF_Etimer_t timer;
285 	int suspended = RF_FALSE;
286 	RF_Raid_t *raidPtr;
287 
288 	raidPtr = desc->raidPtr;
289 
290 	RF_ETIMER_START(timer);
291 	RF_ETIMER_START(desc->timer);
292 
293 	RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
294 	if (raidPtr->accesses_suspended) {
295 		RF_CallbackDesc_t *cb;
296 		cb = rf_AllocCallbackDesc();
297 		/*
298 		 * XXX The following cast is quite bogus...
299 		 * rf_ContinueRaidAccess takes a (RF_RaidAccessDesc_t *)
300 		 * as an argument... GO
301 		 */
302 		cb->callbackFunc = (void (*) (RF_CBParam_t))
303 		    rf_ContinueRaidAccess;
304 		cb->callbackArg.p = (void *) desc;
305 		cb->next = raidPtr->quiesce_wait_list;
306 		raidPtr->quiesce_wait_list = cb;
307 		suspended = RF_TRUE;
308 	}
309 	RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
310 
311 	RF_ETIMER_STOP(timer);
312 	RF_ETIMER_EVAL(timer);
313 	tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer);
314 
315 	if (suspended && rf_quiesceDebug)
316 		printf("Stalling access due to quiescence lock.\n");
317 
318 	desc->state++;
319 	return suspended;
320 }
321 
322 int
rf_State_Map(RF_RaidAccessDesc_t * desc)323 rf_State_Map(RF_RaidAccessDesc_t *desc)
324 {
325 	RF_Raid_t *raidPtr = desc->raidPtr;
326 	RF_AccTraceEntry_t *tracerec = &desc->tracerec;
327 	RF_Etimer_t timer;
328 
329 	RF_ETIMER_START(timer);
330 
331 	if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress,
332 	     desc->numBlocks, desc->bufPtr, RF_DONT_REMAP)))
333 		RF_PANIC();
334 
335 	RF_ETIMER_STOP(timer);
336 	RF_ETIMER_EVAL(timer);
337 	tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer);
338 
339 	desc->state++;
340 	return RF_FALSE;
341 }
342 
343 int
rf_State_Lock(RF_RaidAccessDesc_t * desc)344 rf_State_Lock(RF_RaidAccessDesc_t *desc)
345 {
346 	RF_AccTraceEntry_t *tracerec = &desc->tracerec;
347 	RF_Raid_t *raidPtr = desc->raidPtr;
348 	RF_AccessStripeMapHeader_t *asmh = desc->asmap;
349 	RF_AccessStripeMap_t *asm_p;
350 	RF_Etimer_t timer;
351 	int suspended = RF_FALSE;
352 
353 	RF_ETIMER_START(timer);
354 	if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
355 		RF_StripeNum_t lastStripeID = -1;
356 
357 		/* Acquire each lock that we don't already hold. */
358 		for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
359 			RF_ASSERT(RF_IO_IS_R_OR_W(desc->type));
360 			if (!rf_suppressLocksAndLargeWrites &&
361 			    asm_p->parityInfo &&
362 			    !(desc->flags & RF_DAG_SUPPRESS_LOCKS) &&
363 			    !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED)) {
364 				asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED;
365 				/* Locks must be acquired hierarchically. */
366 				RF_ASSERT(asm_p->stripeID > lastStripeID);
367 				lastStripeID = asm_p->stripeID;
368 				/*
369 				 * XXX The cast to (void (*)(RF_CBParam_t))
370 				 * below is bogus !  GO
371 				 */
372 				RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc,
373 				    desc->type, (void (*) (struct buf *))
374 				     rf_ContinueRaidAccess, desc, asm_p,
375 				    raidPtr->Layout.dataSectorsPerStripe);
376 				if (rf_AcquireStripeLock(raidPtr->lockTable,
377 				     asm_p->stripeID, &asm_p->lockReqDesc)) {
378 					suspended = RF_TRUE;
379 					break;
380 				}
381 			}
382 			if (desc->type == RF_IO_TYPE_WRITE &&
383 			    raidPtr->status[asm_p->physInfo->row] ==
384 			    rf_rs_reconstructing) {
385 				if (!(asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED))
386 				{
387 					int val;
388 
389 					asm_p->flags |=
390 					    RF_ASM_FLAGS_FORCE_TRIED;
391 					/*
392 					 * XXX The cast below is quite
393 					 * bogus !!! XXX  GO
394 					 */
395 					val = rf_ForceOrBlockRecon(raidPtr,
396 					    asm_p,
397 					    (void (*) (RF_Raid_t *, void *))
398 					     rf_ContinueRaidAccess, desc);
399 					if (val == 0) {
400 						asm_p->flags |=
401 						    RF_ASM_FLAGS_RECON_BLOCKED;
402 					} else {
403 						suspended = RF_TRUE;
404 						break;
405 					}
406 				} else {
407 					if (rf_pssDebug) {
408 						printf("raid%d: skipping"
409 						       " force/block because"
410 						       " already done, psid"
411 						       " %ld.\n",
412 						       desc->raidPtr->raidid,
413 						       (long) asm_p->stripeID);
414 					}
415 				}
416 			} else {
417 				if (rf_pssDebug) {
418 					printf("raid%d: skipping force/block"
419 					       " because not write or not"
420 					       " under recon, psid %ld.\n",
421 					       desc->raidPtr->raidid,
422 					       (long) asm_p->stripeID);
423 				}
424 			}
425 		}
426 
427 		RF_ETIMER_STOP(timer);
428 		RF_ETIMER_EVAL(timer);
429 		tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
430 
431 		if (suspended)
432 			return (RF_TRUE);
433 	}
434 	desc->state++;
435 	return (RF_FALSE);
436 }
437 
438 /*
439  * The following three states create, execute, and post-process DAGs.
440  * The error recovery unit is a single DAG.
441  * By default, SelectAlgorithm creates an array of DAGs, one per parity stripe.
442  * In some tricky cases, multiple dags per stripe are created.
443  *   - DAGs within a parity stripe are executed sequentially (arbitrary order).
444  *   - DAGs for distinct parity stripes are executed concurrently.
445  *
446  * Repeat until all DAGs complete successfully -or- DAG selection fails.
447  *
448  * while !done
449  *   create dag(s) (SelectAlgorithm)
450  *   if dag
451  *     execute dag (DispatchDAG)
452  *     if dag successful
453  *       done (SUCCESS)
454  *     else
455  *       !done (RETRY - start over with new dags)
456  *   else
457  *     done (FAIL)
458  */
459 int
rf_State_CreateDAG(RF_RaidAccessDesc_t * desc)460 rf_State_CreateDAG(RF_RaidAccessDesc_t *desc)
461 {
462 	RF_AccTraceEntry_t *tracerec = &desc->tracerec;
463 	RF_Etimer_t timer;
464 	RF_DagHeader_t *dag_h;
465 	int i, selectStatus;
466 
467 	/*
468 	 * Generate a dag for the access, and fire it off. When the dag
469 	 * completes, we'll get re-invoked in the next state.
470 	 */
471 	RF_ETIMER_START(timer);
472 	/* SelectAlgorithm returns one or more dags. */
473 	selectStatus = rf_SelectAlgorithm(desc,
474 	    desc->flags | RF_DAG_SUPPRESS_LOCKS);
475 	if (rf_printDAGsDebug)
476 		for (i = 0; i < desc->numStripes; i++)
477 			rf_PrintDAGList(desc->dagArray[i].dags);
478 	RF_ETIMER_STOP(timer);
479 	RF_ETIMER_EVAL(timer);
480 	/* Update time to create all dags. */
481 	tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer);
482 
483 	desc->status = 0;	/* Good status. */
484 
485 	if (selectStatus) {
486 		/* Failed to create a dag. */
487 		/*
488 		 * This happens when there are too many faults or incomplete
489 		 * dag libraries.
490 		 */
491 		printf("[Failed to create a DAG]\n");
492 		RF_PANIC();
493 	} else {
494 		/* Bind dags to desc. */
495 		for (i = 0; i < desc->numStripes; i++) {
496 			dag_h = desc->dagArray[i].dags;
497 			while (dag_h) {
498 				dag_h->bp = (struct buf *) desc->bp;
499 				dag_h->tracerec = tracerec;
500 				dag_h = dag_h->next;
501 			}
502 		}
503 		desc->flags |= RF_DAG_DISPATCH_RETURNED;
504 		desc->state++;	/* Next state should be rf_State_ExecuteDAG. */
505 	}
506 	return RF_FALSE;
507 }
508 
509 
510 /*
511  * The access has an array of dagLists, one dagList per parity stripe.
512  * Fire the first DAG in each parity stripe (dagList).
513  * DAGs within a stripe (dagList) must be executed sequentially.
514  *  - This preserves atomic parity update.
515  * DAGs for independents parity groups (stripes) are fired concurrently.
516  */
517 int
rf_State_ExecuteDAG(RF_RaidAccessDesc_t * desc)518 rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc)
519 {
520 	int i;
521 	RF_DagHeader_t *dag_h;
522 	RF_DagList_t *dagArray = desc->dagArray;
523 
524 	/*
525 	 * Next state is always rf_State_ProcessDAG. Important to do this
526 	 * before firing the first dag (it may finish before we leave this
527 	 * routine).
528 	 */
529 	desc->state++;
530 
531 	/*
532 	 * Sweep dag array, a stripe at a time, firing the first dag in each
533 	 * stripe.
534 	 */
535 	for (i = 0; i < desc->numStripes; i++) {
536 		RF_ASSERT(dagArray[i].numDags > 0);
537 		RF_ASSERT(dagArray[i].numDagsDone == 0);
538 		RF_ASSERT(dagArray[i].numDagsFired == 0);
539 		RF_ETIMER_START(dagArray[i].tracerec.timer);
540 		/* Fire first dag in this stripe. */
541 		dag_h = dagArray[i].dags;
542 		RF_ASSERT(dag_h);
543 		dagArray[i].numDagsFired++;
544 		/*
545 		 * XXX Yet another case where we pass in a conflicting
546 		 * function pointer :-(  XXX  GO
547 		 */
548 		rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess,
549 		    &dagArray[i]);
550 	}
551 
552 	/*
553 	 * The DAG will always call the callback, even if there was no
554 	 * blocking, so we are always suspended in this state.
555 	 */
556 	return RF_TRUE;
557 }
558 
559 
560 /*
561  * rf_State_ProcessDAG is entered when a dag completes.
562  * First, check that all DAGs in the access have completed.
563  * If not, fire as many DAGs as possible.
564  */
565 int
rf_State_ProcessDAG(RF_RaidAccessDesc_t * desc)566 rf_State_ProcessDAG(RF_RaidAccessDesc_t *desc)
567 {
568 	RF_AccessStripeMapHeader_t *asmh = desc->asmap;
569 	RF_Raid_t *raidPtr = desc->raidPtr;
570 	RF_DagHeader_t *dag_h;
571 	int i, j, done = RF_TRUE;
572 	RF_DagList_t *dagArray = desc->dagArray;
573 	RF_Etimer_t timer;
574 
575 	/* Check to see if this is the last dag. */
576 	for (i = 0; i < desc->numStripes; i++)
577 		if (dagArray[i].numDags != dagArray[i].numDagsDone)
578 			done = RF_FALSE;
579 
580 	if (done) {
581 		if (desc->status) {
582 			/* A dag failed, retry. */
583 			RF_ETIMER_START(timer);
584 			/* Free all dags. */
585 			for (i = 0; i < desc->numStripes; i++) {
586 				rf_FreeDAG(desc->dagArray[i].dags);
587 			}
588 			rf_MarkFailuresInASMList(raidPtr, asmh);
589 			/* Back up to rf_State_CreateDAG. */
590 			desc->state = desc->state - 2;
591 			return RF_FALSE;
592 		} else {
593 			/* Move on to rf_State_Cleanup. */
594 			desc->state++;
595 		}
596 		return RF_FALSE;
597 	} else {
598 		/* More dags to execute. */
599 		/* See if any are ready to be fired. If so, fire them. */
600 		/*
601 		 * Don't fire the initial dag in a list, it's fired in
602 		 * rf_State_ExecuteDAG.
603 		 */
604 		for (i = 0; i < desc->numStripes; i++) {
605 			if ((dagArray[i].numDagsDone < dagArray[i].numDags) &&
606 			    (dagArray[i].numDagsDone ==
607 			     dagArray[i].numDagsFired) &&
608 			    (dagArray[i].numDagsFired > 0)) {
609 				RF_ETIMER_START(dagArray[i].tracerec.timer);
610 				/* Fire next dag in this stripe. */
611 				/*
612 				 * First, skip to next dag awaiting execution.
613 				 */
614 				dag_h = dagArray[i].dags;
615 				for (j = 0; j < dagArray[i].numDagsDone; j++)
616 					dag_h = dag_h->next;
617 				dagArray[i].numDagsFired++;
618 				/*
619 				 * XXX And again we pass a different function
620 				 * pointer... GO
621 				 */
622 				rf_DispatchDAG(dag_h, (void (*) (void *))
623 				    rf_ContinueDagAccess, &dagArray[i]);
624 			}
625 		}
626 		return RF_TRUE;
627 	}
628 }
629 
630 /* Only make it this far if all dags complete successfully. */
631 int
rf_State_Cleanup(RF_RaidAccessDesc_t * desc)632 rf_State_Cleanup(RF_RaidAccessDesc_t *desc)
633 {
634 	RF_AccTraceEntry_t *tracerec = &desc->tracerec;
635 	RF_AccessStripeMapHeader_t *asmh = desc->asmap;
636 	RF_Raid_t *raidPtr = desc->raidPtr;
637 	RF_AccessStripeMap_t *asm_p;
638 	RF_DagHeader_t *dag_h;
639 	RF_Etimer_t timer;
640 	int i;
641 
642 	desc->state++;
643 
644 	timer = tracerec->timer;
645 	RF_ETIMER_STOP(timer);
646 	RF_ETIMER_EVAL(timer);
647 	tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer);
648 
649 	/* The RAID I/O is complete. Clean up. */
650 	tracerec->specific.user.dag_retry_us = 0;
651 
652 	RF_ETIMER_START(timer);
653 	if (desc->flags & RF_DAG_RETURN_DAG) {
654 		/* Copy dags into paramDAG. */
655 		*(desc->paramDAG) = desc->dagArray[0].dags;
656 		dag_h = *(desc->paramDAG);
657 		for (i = 1; i < desc->numStripes; i++) {
658 			/* Concatenate dags from remaining stripes. */
659 			RF_ASSERT(dag_h);
660 			while (dag_h->next)
661 				dag_h = dag_h->next;
662 			dag_h->next = desc->dagArray[i].dags;
663 		}
664 	} else {
665 		/* Free all dags. */
666 		for (i = 0; i < desc->numStripes; i++) {
667 			rf_FreeDAG(desc->dagArray[i].dags);
668 		}
669 	}
670 
671 	RF_ETIMER_STOP(timer);
672 	RF_ETIMER_EVAL(timer);
673 	tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer);
674 
675 	RF_ETIMER_START(timer);
676 	if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
677 		for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
678 			if (!rf_suppressLocksAndLargeWrites &&
679 			    asm_p->parityInfo &&
680 			    !(desc->flags & RF_DAG_SUPPRESS_LOCKS)) {
681 				RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc);
682 				rf_ReleaseStripeLock(raidPtr->lockTable,
683 				    asm_p->stripeID, &asm_p->lockReqDesc);
684 			}
685 			if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) {
686 				rf_UnblockRecon(raidPtr, asm_p);
687 			}
688 		}
689 	}
690 	RF_ETIMER_STOP(timer);
691 	RF_ETIMER_EVAL(timer);
692 	tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
693 
694 	RF_ETIMER_START(timer);
695 	if (desc->flags & RF_DAG_RETURN_ASM)
696 		*(desc->paramASM) = asmh;
697 	else
698 		rf_FreeAccessStripeMap(asmh);
699 	RF_ETIMER_STOP(timer);
700 	RF_ETIMER_EVAL(timer);
701 	tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer);
702 
703 	RF_ETIMER_STOP(desc->timer);
704 	RF_ETIMER_EVAL(desc->timer);
705 
706 	timer = desc->tracerec.tot_timer;
707 	RF_ETIMER_STOP(timer);
708 	RF_ETIMER_EVAL(timer);
709 	desc->tracerec.total_us = RF_ETIMER_VAL_US(timer);
710 
711 	rf_LogTraceRec(raidPtr, tracerec);
712 
713 	desc->flags |= RF_DAG_ACCESS_COMPLETE;
714 
715 	return RF_FALSE;
716 }
717