1 /* $OpenBSD: rf_raid.h,v 1.7 2002/12/16 07:01:04 tdeval Exp $ */ 2 /* $NetBSD: rf_raid.h,v 1.12 2000/02/24 17:12:10 oster Exp $ */ 3 4 /* 5 * Copyright (c) 1995 Carnegie-Mellon University. 6 * All rights reserved. 7 * 8 * Author: Mark Holland 9 * 10 * Permission to use, copy, modify and distribute this software and 11 * its documentation is hereby granted, provided that both the copyright 12 * notice and this permission notice appear in all copies of the 13 * software, derivative works or modified versions, and any portions 14 * thereof, and that both notices appear in supporting documentation. 15 * 16 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 17 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 18 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 19 * 20 * Carnegie Mellon requests users of this software to return to 21 * 22 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 23 * School of Computer Science 24 * Carnegie Mellon University 25 * Pittsburgh PA 15213-3890 26 * 27 * any improvements or extensions that they make and grant Carnegie the 28 * rights to redistribute these changes. 29 */ 30 31 /************************************************* 32 * rf_raid.h -- Main header file for RAID driver. 33 *************************************************/ 34 35 36 #ifndef _RF__RF_RAID_H_ 37 #define _RF__RF_RAID_H_ 38 39 #include "rf_archs.h" 40 #include "rf_types.h" 41 #include "rf_threadstuff.h" 42 43 #if defined(__NetBSD__) 44 #include "rf_netbsd.h" 45 #elif defined(__OpenBSD__) 46 #include "rf_openbsd.h" 47 #endif 48 49 #include <sys/disklabel.h> 50 #include <sys/types.h> 51 52 #include "rf_alloclist.h" 53 #include "rf_stripelocks.h" 54 #include "rf_layout.h" 55 #include "rf_disks.h" 56 #include "rf_debugMem.h" 57 #include "rf_diskqueue.h" 58 #include "rf_reconstruct.h" 59 #include "rf_acctrace.h" 60 61 #if RF_INCLUDE_PARITYLOGGING > 0 62 #include "rf_paritylog.h" 63 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 64 65 #define RF_MAX_DISKS 128 /* Max disks per array. */ 66 #define RF_DEV2RAIDID(_dev) (DISKUNIT(_dev)) 67 68 #define RF_COMPONENT_LABEL_VERSION_1 1 69 #define RF_COMPONENT_LABEL_VERSION 2 70 #define RF_RAID_DIRTY 0 71 #define RF_RAID_CLEAN 1 72 73 /* 74 * Each row in the array is a distinct parity group, so 75 * each has it's own status, which is one of the following. 76 */ 77 typedef enum RF_RowStatus_e { 78 rf_rs_optimal, 79 rf_rs_degraded, 80 rf_rs_reconstructing, 81 rf_rs_reconfigured 82 } RF_RowStatus_t; 83 84 struct RF_CumulativeStats_s { 85 struct timeval start; /* 86 * The time when the stats were 87 * last started. 88 */ 89 struct timeval stop; /* 90 * The time when the stats were 91 * last stopped. 92 */ 93 long sum_io_us; /* 94 * Sum of all user response 95 * times (us). 96 */ 97 long num_ios; /* 98 * Total number of I/Os 99 * serviced. 100 */ 101 long num_sect_moved; /* 102 * Total number of sectors read 103 * or written. 104 */ 105 }; 106 107 struct RF_ThroughputStats_s { 108 RF_DECLARE_MUTEX (mutex); /* 109 * A mutex used to lock the 110 * configuration stuff. 111 */ 112 struct timeval start; /* 113 * Timer started when 114 * numOutstandingRequests 115 * moves from 0 to 1. 116 */ 117 struct timeval stop; /* 118 * Timer stopped when 119 * numOutstandingRequests 120 * moves from 1 to 0. 121 */ 122 RF_uint64 sum_io_us; /* 123 * Total time timer is enabled. 124 */ 125 RF_uint64 num_ios; /* 126 * Total number of I/Os 127 * processed by RAIDframe. 128 */ 129 long num_out_ios; /* 130 * Number of outstanding I/Os. 131 */ 132 }; 133 134 struct RF_Raid_s { 135 /* This portion never changes, and can be accessed without locking */ 136 /* 137 * An exception is Disks[][].status, which requires locking when it is 138 * changed. XXX This is no longer true. numSpare and friends can 139 * change now. 140 */ 141 u_int numRow; /* 142 * Number of rows of disks, 143 * typically == # of ranks. 144 */ 145 u_int numCol; /* 146 * Number of columns of disks, 147 * typically == # of disks/rank. 148 */ 149 u_int numSpare; /* Number of spare disks. */ 150 int maxQueueDepth;/* Max disk queue depth. */ 151 RF_SectorCount_t totalSectors; /* 152 * Total number of sectors 153 * in the array. 154 */ 155 RF_SectorCount_t sectorsPerDisk; 156 /* 157 * Number of sectors on each 158 * disk. 159 */ 160 u_int logBytesPerSector; 161 /* 162 * Base-2 log of the number 163 * of bytes in a sector. 164 */ 165 u_int bytesPerSector; 166 /* Bytes in a sector. */ 167 RF_int32 sectorMask; /* Mask of bytes-per-sector. */ 168 169 RF_RaidLayout_t Layout; /* 170 * All information related to 171 * layout. 172 */ 173 RF_RaidDisk_t **Disks; /* 174 * All information related to 175 * physical disks. 176 */ 177 RF_DiskQueue_t **Queues; /* 178 * All information related to 179 * disk queues. 180 */ 181 RF_DiskQueueSW_t *qType; /* 182 * Pointer to the DiskQueueSW 183 * used for the component 184 * queues. 185 */ 186 /* 187 * NOTE: This is an anchor point via which the queues can be 188 * accessed, but the enqueue/dequeue routines in diskqueue.c use a 189 * local copy of this pointer for the actual accesses. 190 */ 191 /* 192 * The remainder of the structure can change, and therefore requires 193 * locking on reads and updates. 194 */ 195 RF_DECLARE_MUTEX (mutex); /* 196 * Mutex used to serialize 197 * access to the fields below. 198 */ 199 RF_RowStatus_t *status; /* 200 * The status of each row in 201 * the array. 202 */ 203 int valid; /* 204 * Indicates successful 205 * configuration. 206 */ 207 RF_LockTableEntry_t *lockTable; /* Stripe-lock table. */ 208 RF_LockTableEntry_t *quiesceLock; /* Quiescence table. */ 209 int numFailures; /* 210 * Total number of failures 211 * in the array. 212 */ 213 int numNewFailures; 214 /* 215 * Number of *new* failures 216 * (that haven't caused a 217 * mod_counter update). 218 */ 219 220 int parity_good; /* 221 * !0 if parity is known to be 222 * correct. 223 */ 224 int serial_number;/* 225 * A "serial number" for this 226 * set. 227 */ 228 int mod_counter; /* 229 * Modification counter for 230 * component labels. 231 */ 232 int clean; /* 233 * The clean bit for this array. 234 */ 235 236 int openings; /* 237 * Number of I/Os that can be 238 * scheduled simultaneously 239 * (high-level - not a 240 * per-component limit). 241 */ 242 243 int maxOutstanding; 244 /* 245 * maxOutstanding requests 246 * (per-component). 247 */ 248 int autoconfigure; 249 /* 250 * Automatically configure 251 * this RAID set. 252 * 0 == no, 1 == yes 253 */ 254 int root_partition; 255 /* 256 * Use this set as 257 * 0 == no, 1 == yes. 258 */ 259 int last_unit; /* 260 * Last unit number (e.g. 0 261 * for /dev/raid0) of this 262 * component. Used for 263 * autoconfigure only. 264 */ 265 int config_order; /* 266 * 0 .. n. The order in which 267 * the component should be 268 * auto-configured. 269 * E.g. 0 is will done first, 270 * (and would become raid0). 271 * This may be in conflict 272 * with last_unit !!?! 273 */ 274 /* Not currently used. */ 275 276 /* 277 * Cleanup stuff. 278 */ 279 RF_ShutdownList_t *shutdownList; /* Shutdown activities. */ 280 RF_AllocListElem_t *cleanupList; /* 281 * Memory to be freed at 282 * shutdown time. 283 */ 284 285 /* 286 * Recon stuff. 287 */ 288 RF_HeadSepLimit_t headSepLimit; 289 int numFloatingReconBufs; 290 int reconInProgress; 291 RF_DECLARE_COND (waitForReconCond); 292 RF_RaidReconDesc_t *reconDesc; /* Reconstruction descriptor. */ 293 RF_ReconCtrl_t **reconControl; /* 294 * Reconstruction control 295 * structure pointers for each 296 * row in the array. 297 */ 298 299 /* 300 * Array-quiescence stuff. 301 */ 302 RF_DECLARE_MUTEX (access_suspend_mutex); 303 RF_DECLARE_COND (quiescent_cond); 304 RF_IoCount_t accesses_suspended; 305 RF_IoCount_t accs_in_flight; 306 int access_suspend_release; 307 int waiting_for_quiescence; 308 RF_CallbackDesc_t *quiesce_wait_list; 309 310 /* 311 * Statistics. 312 */ 313 #if !defined(_KERNEL) && !defined(SIMULATE) 314 RF_ThroughputStats_t throughputstats; 315 #endif /* !_KERNEL && !SIMULATE */ 316 RF_CumulativeStats_t userstats; 317 int parity_rewrite_stripes_done; 318 int recon_stripes_done; 319 int copyback_stripes_done; 320 321 int recon_in_progress; 322 int parity_rewrite_in_progress; 323 int copyback_in_progress; 324 325 /* 326 * Engine thread control. 327 */ 328 RF_DECLARE_MUTEX (node_queue_mutex); 329 RF_DECLARE_COND (node_queue_cond); 330 RF_DagNode_t *node_queue; 331 RF_Thread_t parity_rewrite_thread; 332 RF_Thread_t copyback_thread; 333 RF_Thread_t engine_thread; 334 RF_Thread_t recon_thread; 335 RF_ThreadGroup_t engine_tg; 336 int shutdown_engine; 337 int dags_in_flight; /* Debug. */ 338 339 /* 340 * PSS (Parity Stripe Status) stuff. 341 */ 342 RF_FreeList_t *pss_freelist; 343 long pssTableSize; 344 345 /* 346 * Reconstruction stuff. 347 */ 348 int procsInBufWait; 349 int numFullReconBuffers; 350 RF_AccTraceEntry_t *recon_tracerecs; 351 unsigned long accumXorTimeUs; 352 RF_ReconDoneProc_t *recon_done_procs; 353 RF_DECLARE_MUTEX (recon_done_proc_mutex); 354 /* 355 * nAccOutstanding, waitShutdown protected by desc freelist lock 356 * (This may seem strange, since that's a central serialization point 357 * for a per-array piece of data, but otherwise, it'd be an extra 358 * per-array lock, and that'd only be less efficient...) 359 */ 360 RF_DECLARE_COND (outstandingCond); 361 int waitShutdown; 362 int nAccOutstanding; 363 364 RF_DiskId_t **diskids; 365 RF_DiskId_t *sparediskids; 366 367 int raidid; 368 RF_AccTotals_t acc_totals; 369 int keep_acc_totals; 370 371 struct raidcinfo **raid_cinfo; /* Array of component info. */ 372 373 int terminate_disk_queues; 374 375 /* 376 * XXX 377 * 378 * Config-specific information should be moved 379 * somewhere else, or at least hung off this 380 * in some generic way. 381 */ 382 383 /* Used by rf_compute_workload_shift. */ 384 RF_RowCol_t hist_diskreq[RF_MAXROW][RF_MAXCOL]; 385 386 /* Used by declustering. */ 387 int noRotate; 388 389 #if RF_INCLUDE_PARITYLOGGING > 0 390 /* used by parity logging */ 391 RF_SectorCount_t regionLogCapacity; 392 RF_ParityLogQueue_t parityLogPool;/* 393 * Pool of unused parity logs. 394 */ 395 RF_RegionInfo_t *regionInfo; /* Array of region state. */ 396 int numParityLogs; 397 int numSectorsPerLog; 398 int regionParityRange; 399 int logsInUse; /* Debugging. */ 400 RF_ParityLogDiskQueue_t parityLogDiskQueue; 401 /* 402 * State of parity logging 403 * disk work. 404 */ 405 RF_RegionBufferQueue_t regionBufferPool; 406 /* 407 * buffers for holding region 408 * log. 409 */ 410 RF_RegionBufferQueue_t parityBufferPool; 411 /* 412 * Buffers for holding parity. 413 */ 414 caddr_t parityLogBufferHeap; 415 /* 416 * Pool of unused parity logs. 417 */ 418 RF_Thread_t pLogDiskThreadHandle; 419 420 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 421 }; 422 423 #endif /* !_RF__RF_RAID_H_ */ 424