1 /*	$OpenBSD: rf_raid.h,v 1.7 2002/12/16 07:01:04 tdeval Exp $	*/
2 /*	$NetBSD: rf_raid.h,v 1.12 2000/02/24 17:12:10 oster Exp $	*/
3 
4 /*
5  * Copyright (c) 1995 Carnegie-Mellon University.
6  * All rights reserved.
7  *
8  * Author: Mark Holland
9  *
10  * Permission to use, copy, modify and distribute this software and
11  * its documentation is hereby granted, provided that both the copyright
12  * notice and this permission notice appear in all copies of the
13  * software, derivative works or modified versions, and any portions
14  * thereof, and that both notices appear in supporting documentation.
15  *
16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19  *
20  * Carnegie Mellon requests users of this software to return to
21  *
22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
23  *  School of Computer Science
24  *  Carnegie Mellon University
25  *  Pittsburgh PA 15213-3890
26  *
27  * any improvements or extensions that they make and grant Carnegie the
28  * rights to redistribute these changes.
29  */
30 
31 /*************************************************
32  * rf_raid.h -- Main header file for RAID driver.
33  *************************************************/
34 
35 
36 #ifndef	_RF__RF_RAID_H_
37 #define	_RF__RF_RAID_H_
38 
39 #include "rf_archs.h"
40 #include "rf_types.h"
41 #include "rf_threadstuff.h"
42 
43 #if	defined(__NetBSD__)
44 #include "rf_netbsd.h"
45 #elif	defined(__OpenBSD__)
46 #include "rf_openbsd.h"
47 #endif
48 
49 #include <sys/disklabel.h>
50 #include <sys/types.h>
51 
52 #include "rf_alloclist.h"
53 #include "rf_stripelocks.h"
54 #include "rf_layout.h"
55 #include "rf_disks.h"
56 #include "rf_debugMem.h"
57 #include "rf_diskqueue.h"
58 #include "rf_reconstruct.h"
59 #include "rf_acctrace.h"
60 
61 #if	RF_INCLUDE_PARITYLOGGING > 0
62 #include "rf_paritylog.h"
63 #endif	/* RF_INCLUDE_PARITYLOGGING > 0 */
64 
65 #define	RF_MAX_DISKS			128	/* Max disks per array. */
66 #define	RF_DEV2RAIDID(_dev)		(DISKUNIT(_dev))
67 
68 #define	RF_COMPONENT_LABEL_VERSION_1	1
69 #define	RF_COMPONENT_LABEL_VERSION	2
70 #define	RF_RAID_DIRTY			0
71 #define	RF_RAID_CLEAN			1
72 
73 /*
74  * Each row in the array is a distinct parity group, so
75  * each has it's own status, which is one of the following.
76  */
77 typedef enum RF_RowStatus_e {
78 	rf_rs_optimal,
79 	rf_rs_degraded,
80 	rf_rs_reconstructing,
81 	rf_rs_reconfigured
82 } RF_RowStatus_t;
83 
84 struct RF_CumulativeStats_s {
85 	struct timeval		start;		/*
86 						 * The time when the stats were
87 						 * last started.
88 						 */
89 	struct timeval		stop;		/*
90 						 * The time when the stats were
91 						 * last stopped.
92 						 */
93 	long			sum_io_us;	/*
94 						 * Sum of all user response
95 						 * times (us).
96 						 */
97 	long			num_ios;	/*
98 						 * Total number of I/Os
99 						 * serviced.
100 						 */
101 	long			num_sect_moved;	/*
102 						 * Total number of sectors read
103 						 * or written.
104 						 */
105 };
106 
107 struct RF_ThroughputStats_s {
108 	RF_DECLARE_MUTEX	(mutex);	/*
109 						 * A mutex used to lock the
110 						 * configuration stuff.
111 						 */
112 	struct timeval		 start;		/*
113 						 * Timer started when
114 						 * numOutstandingRequests
115 						 * moves from 0 to 1.
116 						 */
117 	struct timeval		 stop;		/*
118 						 * Timer stopped when
119 						 * numOutstandingRequests
120 						 * moves from 1 to 0.
121 						 */
122 	RF_uint64		 sum_io_us;	/*
123 						 * Total time timer is enabled.
124 						 */
125 	RF_uint64		 num_ios;	/*
126 						 * Total number of I/Os
127 						 * processed by RAIDframe.
128 						 */
129 	long			 num_out_ios;	/*
130 						 * Number of outstanding I/Os.
131 						 */
132 };
133 
134 struct RF_Raid_s {
135 	/* This portion never changes, and can be accessed without locking */
136 	/*
137 	 * An exception is Disks[][].status, which requires locking when it is
138 	 * changed. XXX This is no longer true. numSpare and friends can
139 	 * change now.
140 	 */
141 	u_int			  numRow;	/*
142 						 * Number of rows of disks,
143 						 * typically == # of ranks.
144 						 */
145 	u_int			  numCol;	/*
146 						 * Number of columns of disks,
147 						 * typically == # of disks/rank.
148 						 */
149 	u_int			  numSpare;	/* Number of spare disks. */
150 	int			  maxQueueDepth;/* Max disk queue depth. */
151 	RF_SectorCount_t	  totalSectors;	/*
152 						 * Total number of sectors
153 						 * in the array.
154 						 */
155 	RF_SectorCount_t	  sectorsPerDisk;
156 						/*
157 						 * Number of sectors on each
158 						 * disk.
159 						 */
160 	u_int			  logBytesPerSector;
161 						/*
162 						 * Base-2 log of the number
163 						 * of bytes in a sector.
164 						 */
165 	u_int			  bytesPerSector;
166 						/* Bytes in a sector. */
167 	RF_int32		  sectorMask;	/* Mask of bytes-per-sector. */
168 
169 	RF_RaidLayout_t		  Layout;	/*
170 						 * All information related to
171 						 * layout.
172 						 */
173 	RF_RaidDisk_t		**Disks;	/*
174 						 * All information related to
175 						 * physical disks.
176 						 */
177 	RF_DiskQueue_t		**Queues;	/*
178 						 * All information related to
179 						 * disk queues.
180 						 */
181 	RF_DiskQueueSW_t	 *qType;	/*
182 						 * Pointer to the DiskQueueSW
183 						 * used for the component
184 						 * queues.
185 						 */
186 	/*
187 	 * NOTE:  This is an anchor point via which the queues can be
188 	 * accessed, but the enqueue/dequeue routines in diskqueue.c use a
189 	 * local copy of this pointer for the actual accesses.
190 	 */
191 	/*
192 	 * The remainder of the structure can change, and therefore requires
193 	 * locking on reads and updates.
194 	 */
195 	RF_DECLARE_MUTEX	 (mutex);	/*
196 						 * Mutex used to serialize
197 						 * access to the fields below.
198 						 */
199 	RF_RowStatus_t		 *status;	/*
200 						 * The status of each row in
201 						 * the array.
202 						 */
203 	int			  valid;	/*
204 						 * Indicates successful
205 						 * configuration.
206 						 */
207 	RF_LockTableEntry_t	 *lockTable;	/* Stripe-lock table. */
208 	RF_LockTableEntry_t	 *quiesceLock;	/* Quiescence table. */
209 	int			  numFailures;	/*
210 						 * Total number of failures
211 						 * in the array.
212 						 */
213 	int			  numNewFailures;
214 						/*
215 						 * Number of *new* failures
216 						 * (that haven't caused a
217 						 * mod_counter update).
218 						 */
219 
220 	int			  parity_good;	/*
221 						 * !0 if parity is known to be
222 						 * correct.
223 						 */
224 	int			  serial_number;/*
225 						 * A "serial number" for this
226 						 * set.
227 						 */
228 	int			  mod_counter;	/*
229 						 * Modification counter for
230 						 * component labels.
231 						 */
232 	int			  clean;	/*
233 						 * The clean bit for this array.
234 						 */
235 
236 	int			  openings;	/*
237 						 * Number of I/Os that can be
238 						 * scheduled simultaneously
239 						 * (high-level - not a
240 				 		 * per-component limit).
241 						 */
242 
243 	int			  maxOutstanding;
244 						/*
245 						 * maxOutstanding requests
246 						 * (per-component).
247 						 */
248 	int			  autoconfigure;
249 						/*
250 						 * Automatically configure
251 						 * this RAID set.
252 						 * 0 == no, 1 == yes
253 						 */
254 	int			  root_partition;
255 						/*
256 						 * Use this set as
257 						 * 0 == no, 1 == yes.
258 						 */
259 	int			  last_unit;	/*
260 						 * Last unit number (e.g. 0
261 						 * for /dev/raid0) of this
262 						 * component. Used for
263 						 * autoconfigure only.
264 						 */
265 	int			  config_order;	/*
266 						 * 0 .. n. The order in which
267 						 * the component should be
268 						 * auto-configured.
269 						 * E.g. 0 is will done first,
270 						 * (and would become raid0).
271 						 * This may be in conflict
272 						 * with last_unit !!?!
273 						 */
274 						/* Not currently used. */
275 
276 	/*
277 	 * Cleanup stuff.
278 	 */
279 	RF_ShutdownList_t	 *shutdownList;	/* Shutdown activities. */
280 	RF_AllocListElem_t	 *cleanupList;	/*
281 						 * Memory to be freed at
282 						 * shutdown time.
283 						 */
284 
285 	/*
286 	 * Recon stuff.
287 	 */
288 	RF_HeadSepLimit_t	  headSepLimit;
289 	int			  numFloatingReconBufs;
290 	int			  reconInProgress;
291 	RF_DECLARE_COND		 (waitForReconCond);
292 	RF_RaidReconDesc_t	 *reconDesc;	/* Reconstruction descriptor. */
293 	RF_ReconCtrl_t		**reconControl;	/*
294 						 * Reconstruction control
295 						 * structure pointers for each
296 						 * row in the array.
297 						 */
298 
299 	/*
300 	 * Array-quiescence stuff.
301 	 */
302 	RF_DECLARE_MUTEX	 (access_suspend_mutex);
303 	RF_DECLARE_COND		 (quiescent_cond);
304 	RF_IoCount_t		  accesses_suspended;
305 	RF_IoCount_t		  accs_in_flight;
306 	int			  access_suspend_release;
307 	int			  waiting_for_quiescence;
308 	RF_CallbackDesc_t	 *quiesce_wait_list;
309 
310 	/*
311 	 * Statistics.
312 	 */
313 #if	!defined(_KERNEL) && !defined(SIMULATE)
314 	RF_ThroughputStats_t	  throughputstats;
315 #endif	/* !_KERNEL && !SIMULATE */
316 	RF_CumulativeStats_t	  userstats;
317 	int			  parity_rewrite_stripes_done;
318 	int			  recon_stripes_done;
319 	int			  copyback_stripes_done;
320 
321 	int			  recon_in_progress;
322 	int			  parity_rewrite_in_progress;
323 	int			  copyback_in_progress;
324 
325 	/*
326 	 * Engine thread control.
327 	 */
328 	RF_DECLARE_MUTEX	 (node_queue_mutex);
329 	RF_DECLARE_COND		 (node_queue_cond);
330 	RF_DagNode_t		 *node_queue;
331 	RF_Thread_t		  parity_rewrite_thread;
332 	RF_Thread_t		  copyback_thread;
333 	RF_Thread_t		  engine_thread;
334 	RF_Thread_t		  recon_thread;
335 	RF_ThreadGroup_t	  engine_tg;
336 	int			  shutdown_engine;
337 	int			  dags_in_flight;	/* Debug. */
338 
339 	/*
340 	 * PSS (Parity Stripe Status) stuff.
341 	 */
342 	RF_FreeList_t		 *pss_freelist;
343 	long			  pssTableSize;
344 
345 	/*
346 	 * Reconstruction stuff.
347 	 */
348 	int			  procsInBufWait;
349 	int			  numFullReconBuffers;
350 	RF_AccTraceEntry_t	 *recon_tracerecs;
351 	unsigned long		  accumXorTimeUs;
352 	RF_ReconDoneProc_t	 *recon_done_procs;
353 	RF_DECLARE_MUTEX	 (recon_done_proc_mutex);
354 	/*
355 	 * nAccOutstanding, waitShutdown protected by desc freelist lock
356 	 * (This may seem strange, since that's a central serialization point
357 	 * for a per-array piece of data, but otherwise, it'd be an extra
358 	 * per-array lock, and that'd only be less efficient...)
359 	 */
360 	RF_DECLARE_COND		 (outstandingCond);
361 	int			  waitShutdown;
362 	int			  nAccOutstanding;
363 
364 	RF_DiskId_t		**diskids;
365 	RF_DiskId_t		 *sparediskids;
366 
367 	int			  raidid;
368 	RF_AccTotals_t		  acc_totals;
369 	int			  keep_acc_totals;
370 
371 	struct raidcinfo	**raid_cinfo;	/* Array of component info. */
372 
373 	int			  terminate_disk_queues;
374 
375 	/*
376 	 * XXX
377 	 *
378 	 * Config-specific information should be moved
379 	 * somewhere else, or at least hung off this
380 	 * in some generic way.
381 	 */
382 
383 	/* Used by rf_compute_workload_shift. */
384 	RF_RowCol_t		  hist_diskreq[RF_MAXROW][RF_MAXCOL];
385 
386 	/* Used by declustering. */
387 	int			  noRotate;
388 
389 #if	RF_INCLUDE_PARITYLOGGING > 0
390 	/* used by parity logging */
391 	RF_SectorCount_t	  regionLogCapacity;
392 	RF_ParityLogQueue_t	  parityLogPool;/*
393 						 * Pool of unused parity logs.
394 						 */
395 	RF_RegionInfo_t		 *regionInfo;	/* Array of region state. */
396 	int			  numParityLogs;
397 	int			  numSectorsPerLog;
398 	int			  regionParityRange;
399 	int			  logsInUse;	/* Debugging. */
400 	RF_ParityLogDiskQueue_t	  parityLogDiskQueue;
401 						/*
402 						 * State of parity logging
403 						 * disk work.
404 						 */
405 	RF_RegionBufferQueue_t	  regionBufferPool;
406 					 	/*
407 						 * buffers for holding region
408 						 * log.
409 						 */
410 	RF_RegionBufferQueue_t	  parityBufferPool;
411 						/*
412 						 * Buffers for holding parity.
413 						 */
414 	caddr_t			  parityLogBufferHeap;
415 						/*
416 						 * Pool of unused parity logs.
417 						 */
418 	RF_Thread_t		  pLogDiskThreadHandle;
419 
420 #endif	/* RF_INCLUDE_PARITYLOGGING > 0 */
421 };
422 
423 #endif	/* !_RF__RF_RAID_H_ */
424