1 /*	$OpenBSD: rf_copyback.c,v 1.7 2002/12/16 07:01:03 tdeval Exp $	*/
2 /*	$NetBSD: rf_copyback.c,v 1.14 2000/03/07 02:59:50 oster Exp $	*/
3 
4 /*
5  * Copyright (c) 1995 Carnegie-Mellon University.
6  * All rights reserved.
7  *
8  * Author: Mark Holland
9  *
10  * Permission to use, copy, modify and distribute this software and
11  * its documentation is hereby granted, provided that both the copyright
12  * notice and this permission notice appear in all copies of the
13  * software, derivative works or modified versions, and any portions
14  * thereof, and that both notices appear in supporting documentation.
15  *
16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19  *
20  * Carnegie Mellon requests users of this software to return to
21  *
22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
23  *  School of Computer Science
24  *  Carnegie Mellon University
25  *  Pittsburgh PA 15213-3890
26  *
27  * any improvements or extensions that they make and grant Carnegie the
28  * rights to redistribute these changes.
29  */
30 
31 
32 /*****************************************************************************
33  *
34  * copyback.c -- Code to copy reconstructed data back from spare space to
35  *		 the replaced disk.
36  *
37  * The code operates using callbacks on the I/Os to continue with the next
38  * unit to be copied back. We do this because a simple loop containing
39  * blocking I/Os will not work in the simulator.
40  *
41  *****************************************************************************/
42 
43 #include "rf_types.h"
44 
45 #include <sys/time.h>
46 #include <sys/buf.h>
47 #include "rf_raid.h"
48 #include "rf_mcpair.h"
49 #include "rf_acctrace.h"
50 #include "rf_etimer.h"
51 #include "rf_general.h"
52 #include "rf_utils.h"
53 #include "rf_copyback.h"
54 #include "rf_decluster.h"
55 #include "rf_driver.h"
56 #include "rf_shutdown.h"
57 #include "rf_kintf.h"
58 
59 #define	RF_COPYBACK_DATA	0
60 #define	RF_COPYBACK_PARITY	1
61 
62 int	rf_copyback_in_progress;
63 
64 int  rf_CopybackReadDoneProc(RF_CopybackDesc_t *, int);
65 int  rf_CopybackWriteDoneProc(RF_CopybackDesc_t *, int);
66 void rf_CopybackOne(RF_CopybackDesc_t *, int, RF_RaidAddr_t,
67 	RF_RowCol_t, RF_RowCol_t, RF_SectorNum_t);
68 void rf_CopybackComplete(RF_CopybackDesc_t *, int);
69 
70 int
rf_ConfigureCopyback(RF_ShutdownList_t ** listp)71 rf_ConfigureCopyback(RF_ShutdownList_t **listp)
72 {
73 	rf_copyback_in_progress = 0;
74 	return (0);
75 }
76 
77 #include <sys/types.h>
78 #include <sys/param.h>
79 #include <sys/systm.h>
80 #include <sys/proc.h>
81 #include <sys/ioctl.h>
82 #include <sys/fcntl.h>
83 #ifdef	__NETBSD__
84 #include <sys/vnode.h>
85 #endif
86 
87 
88 /* Do a complete copyback. */
89 void
rf_CopybackReconstructedData(RF_Raid_t * raidPtr)90 rf_CopybackReconstructedData(RF_Raid_t *raidPtr)
91 {
92 	RF_ComponentLabel_t c_label;
93 	int done, retcode;
94 	RF_CopybackDesc_t *desc;
95 	RF_RowCol_t frow, fcol;
96 	RF_RaidDisk_t *badDisk;
97 	char *databuf;
98 
99 	struct partinfo dpart;
100 	struct vnode *vp;
101 	struct vattr va;
102 	struct proc *proc;
103 
104 	int ac;
105 
106 	done = 0;
107 	fcol = 0;
108 	for (frow = 0; frow < raidPtr->numRow; frow++) {
109 		for (fcol = 0; fcol < raidPtr->numCol; fcol++) {
110 			if (raidPtr->Disks[frow][fcol].status ==
111 			     rf_ds_dist_spared ||
112 			    raidPtr->Disks[frow][fcol].status ==
113 			     rf_ds_spared) {
114 				done = 1;
115 				break;
116 			}
117 		}
118 		if (done)
119 			break;
120 	}
121 
122 	if (frow == raidPtr->numRow) {
123 		printf("COPYBACK: No disks need copyback.\n");
124 		return;
125 	}
126 	badDisk = &raidPtr->Disks[frow][fcol];
127 
128 	proc = raidPtr->engine_thread;
129 
130 	/*
131 	 * This device may have been opened successfully the first time.
132 	 * Close it before trying to open it again.
133 	 */
134 
135 	if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) {
136 		printf("Close the opened device: %s.\n",
137 		    raidPtr->Disks[frow][fcol].devname);
138  		vp = raidPtr->raid_cinfo[frow][fcol].ci_vp;
139  		ac = raidPtr->Disks[frow][fcol].auto_configured;
140  		rf_close_component(raidPtr, vp, ac);
141 		raidPtr->raid_cinfo[frow][fcol].ci_vp = NULL;
142 
143 	}
144  	/* Note that this disk was *not* auto_configured (any longer). */
145  	raidPtr->Disks[frow][fcol].auto_configured = 0;
146 
147 	printf("About to (re-)open the device: %s.\n",
148 	    raidPtr->Disks[frow][fcol].devname);
149 
150 	retcode = raidlookup(raidPtr->Disks[frow][fcol].devname, proc, &vp);
151 
152 	if (retcode) {
153 		printf("COPYBACK: raidlookup on device: %s failed: %d !\n",
154 		    raidPtr->Disks[frow][fcol].devname, retcode);
155 
156 		/*
157 		 * XXX The component isn't responding properly... Must be
158 		 * still dead :-(
159 		 */
160 		return;
161 
162 	} else {
163 
164 		/*
165 		 * Ok, so we can at least do a lookup...
166 		 * How about actually getting a vp for it ?
167 		 */
168 
169 		if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0)
170 		{
171 			return;
172 		}
173 		retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t) &dpart, FREAD,
174 		    proc->p_ucred, proc);
175 		if (retcode) {
176 			return;
177 		}
178 		raidPtr->Disks[frow][fcol].blockSize = dpart.disklab->d_secsize;
179 
180 		raidPtr->Disks[frow][fcol].numBlocks = dpart.part->p_size -
181 		    rf_protectedSectors;
182 
183 		raidPtr->raid_cinfo[frow][fcol].ci_vp = vp;
184 		raidPtr->raid_cinfo[frow][fcol].ci_dev = va.va_rdev;
185 
186 		/* XXX Or the above ? */
187 		raidPtr->Disks[frow][fcol].dev = va.va_rdev;
188 
189 		/*
190 		 * We allow the user to specify that only a fraction of the
191 		 * disks should be used this is just for debug: it speeds up
192 		 * the parity scan.
193 		 */
194 		raidPtr->Disks[frow][fcol].numBlocks =
195 		    raidPtr->Disks[frow][fcol].numBlocks *
196 		    rf_sizePercentage / 100;
197 	}
198 #if 0
199 	/* This is the way it was done before the CAM stuff was removed. */
200 
201 	if (rf_extract_ids(badDisk->devname, &bus, &targ, &lun)) {
202 		printf("COPYBACK: unable to extract bus, target, lun from"
203 		    " devname %s.\n", badDisk->devname);
204 		return;
205 	}
206 	/*
207 	 * TUR the disk that's marked as bad to be sure that it's actually
208 	 * alive.
209 	 */
210 	rf_SCSI_AllocTUR(&tur_op);
211 	retcode = rf_SCSI_DoTUR(tur_op, bus, targ, lun, badDisk->dev);
212 	rf_SCSI_FreeDiskOp(tur_op, 0);
213 #endif
214 
215 	if (retcode) {
216 		printf("COPYBACK: target disk failed TUR.\n");
217 		return;
218 	}
219 	/* Get a buffer to hold one SU. */
220 	RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr,
221 	    raidPtr->Layout.sectorsPerStripeUnit), (char *));
222 
223 	/* Create a descriptor. */
224 	RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *));
225 	desc->raidPtr = raidPtr;
226 	desc->status = 0;
227 	desc->frow = frow;
228 	desc->fcol = fcol;
229 	desc->spRow = badDisk->spareRow;
230 	desc->spCol = badDisk->spareCol;
231 	desc->stripeAddr = 0;
232 	desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
233 	desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit *
234 	    raidPtr->Layout.numDataCol;
235 	desc->databuf = databuf;
236 	desc->mcpair = rf_AllocMCPair();
237 
238 	printf("COPYBACK: Quiescing the array.\n");
239 	/*
240 	 * Quiesce the array, since we don't want to code support for user
241 	 * accs here.
242 	 */
243 	rf_SuspendNewRequestsAndWait(raidPtr);
244 
245 	/* Adjust state of the array and of the disks. */
246 	RF_LOCK_MUTEX(raidPtr->mutex);
247 	raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal;
248 	raidPtr->status[desc->frow] = rf_rs_optimal;
249 	rf_copyback_in_progress = 1;	/* Debug only. */
250 	RF_UNLOCK_MUTEX(raidPtr->mutex);
251 
252 	printf("COPYBACK: Beginning\n");
253 	RF_GETTIME(desc->starttime);
254 	rf_ContinueCopyback(desc);
255 
256 	/*
257 	 * Data has been restored.
258 	 * Fix up the component label.
259 	 * Don't actually need the read here.
260 	 */
261 	raidread_component_label(raidPtr->raid_cinfo[frow][fcol].ci_dev,
262 				 raidPtr->raid_cinfo[frow][fcol].ci_vp,
263 				 &c_label);
264 
265 	raid_init_component_label(raidPtr, &c_label);
266 
267 	c_label.row = frow;
268 	c_label.column = fcol;
269 
270 	raidwrite_component_label(raidPtr->raid_cinfo[frow][fcol].ci_dev,
271 				  raidPtr->raid_cinfo[frow][fcol].ci_vp,
272 				  &c_label);
273 }
274 
275 
276 /*
277  * Invoked via callback after a copyback I/O has completed to
278  * continue on with the next one.
279  */
280 void
rf_ContinueCopyback(RF_CopybackDesc_t * desc)281 rf_ContinueCopyback(RF_CopybackDesc_t *desc)
282 {
283 	RF_SectorNum_t testOffs, stripeAddr;
284 	RF_Raid_t *raidPtr = desc->raidPtr;
285 	RF_RaidAddr_t addr;
286 	RF_RowCol_t testRow, testCol;
287 	int old_pctg, new_pctg, done;
288 	struct timeval t, diff;
289 
290 	old_pctg = (-1);
291 	while (1) {
292 		stripeAddr = desc->stripeAddr;
293 		desc->raidPtr->copyback_stripes_done = stripeAddr /
294 		    desc->sectPerStripe;
295 		if (rf_prReconSched) {
296 			old_pctg = 100 * desc->stripeAddr /
297 			    raidPtr->totalSectors;
298 		}
299 		desc->stripeAddr += desc->sectPerStripe;
300 		if (rf_prReconSched) {
301 			new_pctg = 100 * desc->stripeAddr /
302 			    raidPtr->totalSectors;
303 			if (new_pctg != old_pctg) {
304 				RF_GETTIME(t);
305 				RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
306 				printf("%d %d.%06d\n", new_pctg,
307 				    (int) diff.tv_sec, (int) diff.tv_usec);
308 			}
309 		}
310 		if (stripeAddr >= raidPtr->totalSectors) {
311 			rf_CopybackComplete(desc, 0);
312 			return;
313 		}
314 		/* Walk through the current stripe, su-by-su. */
315 		for (done = 0, addr = stripeAddr;
316 		     addr < stripeAddr + desc->sectPerStripe;
317 		     addr += desc->sectPerSU) {
318 
319 			/* Map the SU, disallowing remap to spare space. */
320 			(raidPtr->Layout.map->MapSector) (raidPtr, addr,
321 			    &testRow, &testCol, &testOffs, RF_DONT_REMAP);
322 
323 			if (testRow == desc->frow && testCol == desc->fcol) {
324 				rf_CopybackOne(desc, RF_COPYBACK_DATA, addr,
325 				    testRow, testCol, testOffs);
326 				done = 1;
327 				break;
328 			}
329 		}
330 
331 		if (!done) {
332 			/*
333 			 * We didn't find the failed disk in the data part,
334 			 * check parity.
335 			 */
336 
337 			/*
338 			 * Map the parity for this stripe, disallowing remap
339 			 * to spare space.
340 			 */
341 			(raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr,
342 			    &testRow, &testCol, &testOffs, RF_DONT_REMAP);
343 
344 			if (testRow == desc->frow && testCol == desc->fcol) {
345 				rf_CopybackOne(desc, RF_COPYBACK_PARITY,
346 				    stripeAddr, testRow, testCol, testOffs);
347 			}
348 		}
349 		/* Check to see if the last read/write pair failed. */
350 		if (desc->status) {
351 			rf_CopybackComplete(desc, 1);
352 			return;
353 		}
354 		/*
355 		 * We didn't find any units to copy back in this stripe.
356 		 * Continue with the next one.
357 		 */
358 	}
359 }
360 
361 
362 /* Copyback one unit. */
363 void
rf_CopybackOne(RF_CopybackDesc_t * desc,int typ,RF_RaidAddr_t addr,RF_RowCol_t testRow,RF_RowCol_t testCol,RF_SectorNum_t testOffs)364 rf_CopybackOne(RF_CopybackDesc_t *desc, int typ, RF_RaidAddr_t addr,
365     RF_RowCol_t testRow, RF_RowCol_t testCol, RF_SectorNum_t testOffs)
366 {
367 	RF_SectorCount_t sectPerSU = desc->sectPerSU;
368 	RF_Raid_t *raidPtr = desc->raidPtr;
369 	RF_RowCol_t spRow = desc->spRow;
370 	RF_RowCol_t spCol = desc->spCol;
371 	RF_SectorNum_t spOffs;
372 
373 	/* Find the spare location for this SU. */
374 	if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
375 		if (typ == RF_COPYBACK_DATA)
376 			raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow,
377 			    &spCol, &spOffs, RF_REMAP);
378 		else
379 			raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow,
380 			    &spCol, &spOffs, RF_REMAP);
381 	} else {
382 		spOffs = testOffs;
383 	}
384 
385 	/* Create reqs to read the old location & write the new. */
386 	desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs,
387 	    sectPerSU, desc->databuf, 0L, 0, (int (*) (void *, int))
388 	    rf_CopybackReadDoneProc, desc, NULL, NULL, (void *) raidPtr,
389 	    RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
390 	desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs,
391 	    sectPerSU, desc->databuf, 0L, 0, (int (*) (void *, int))
392 	    rf_CopybackWriteDoneProc, desc, NULL, NULL, (void *) raidPtr,
393 	    RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
394 	desc->frow = testRow;
395 	desc->fcol = testCol;
396 
397 	/*
398 	 * Enqueue the read. The write will go out as part of the callback on
399 	 * the read. At user-level & in the kernel, wait for the read-write
400 	 * pair to complete. In the simulator, just return, since everything
401 	 * will happen as callbacks.
402 	 */
403 
404 	RF_LOCK_MUTEX(desc->mcpair->mutex);
405 	desc->mcpair->flag = 0;
406 
407 	rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq,
408 	    RF_IO_NORMAL_PRIORITY);
409 
410 	while (!desc->mcpair->flag) {
411 		RF_WAIT_MCPAIR(desc->mcpair);
412 	}
413 	RF_UNLOCK_MUTEX(desc->mcpair->mutex);
414 	rf_FreeDiskQueueData(desc->readreq);
415 	rf_FreeDiskQueueData(desc->writereq);
416 
417 }
418 
419 
420 /*
421  * Called at interrupt context when the read has completed.
422  * Just send out the write.
423  */
424 int
rf_CopybackReadDoneProc(RF_CopybackDesc_t * desc,int status)425 rf_CopybackReadDoneProc(RF_CopybackDesc_t *desc, int status)
426 {
427 	if (status) {		/* Invoke the callback with bad status. */
428 		printf("COPYBACK: copyback read failed. Aborting.\n");
429 		(desc->writereq->CompleteFunc) (desc, -100);
430 	} else {
431 		rf_DiskIOEnqueue(&(desc->raidPtr
432 		    ->Queues[desc->frow][desc->fcol]),
433 		    desc->writereq, RF_IO_NORMAL_PRIORITY);
434 	}
435 	return (0);
436 }
437 
438 
439 /*
440  * Called at interrupt context when the write has completed.
441  * At user level & in the kernel, wake up the copyback thread.
442  * In the simulator, invoke the next copyback directly.
443  * Can't free diskqueuedata structs in the kernel because we're at
444  * interrupt context.
445  */
446 int
rf_CopybackWriteDoneProc(RF_CopybackDesc_t * desc,int status)447 rf_CopybackWriteDoneProc(RF_CopybackDesc_t *desc, int status)
448 {
449 	if (status && status != -100) {
450 		printf("COPYBACK: copyback write failed. Aborting.\n");
451 	}
452 	desc->status = status;
453 	rf_MCPairWakeupFunc(desc->mcpair);
454 	return (0);
455 }
456 
457 
458 /* Invoked when the copyback has completed. */
459 void
rf_CopybackComplete(RF_CopybackDesc_t * desc,int status)460 rf_CopybackComplete(RF_CopybackDesc_t *desc, int status)
461 {
462 	RF_Raid_t *raidPtr = desc->raidPtr;
463 	struct timeval t, diff;
464 
465 	if (!status) {
466 		RF_LOCK_MUTEX(raidPtr->mutex);
467 		if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
468 			RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D');
469 			rf_FreeSpareTable(raidPtr);
470 		} else {
471 			raidPtr->Disks[desc->spRow][desc->spCol].status =
472 			    rf_ds_spare;
473 		}
474 		RF_UNLOCK_MUTEX(raidPtr->mutex);
475 
476 		RF_GETTIME(t);
477 		RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
478 		printf("Copyback time was %d.%06d seconds.\n",
479 		    (int) diff.tv_sec, (int) diff.tv_usec);
480 	} else
481 		printf("COPYBACK: Failure.\n");
482 
483 	RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU));
484 	rf_FreeMCPair(desc->mcpair);
485 	RF_Free(desc, sizeof(*desc));
486 
487 	rf_copyback_in_progress = 0;
488 	rf_ResumeNewRequests(raidPtr);
489 }
490