1 /*        $NetBSD: rf_driver.c,v 1.144 2024/09/19 06:13:03 andvar Exp $         */
2 /*-
3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Greg Oster
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /*
32  * Copyright (c) 1995 Carnegie-Mellon University.
33  * All rights reserved.
34  *
35  * Author: Mark Holland, Khalil Amiri, Claudson Bornstein, William V. Courtright II,
36  *         Robby Findler, Daniel Stodolsky, Rachad Youssef, Jim Zelenka
37  *
38  * Permission to use, copy, modify and distribute this software and
39  * its documentation is hereby granted, provided that both the copyright
40  * notice and this permission notice appear in all copies of the
41  * software, derivative works or modified versions, and any portions
42  * thereof, and that both notices appear in supporting documentation.
43  *
44  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
45  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
46  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
47  *
48  * Carnegie Mellon requests users of this software to return to
49  *
50  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
51  *  School of Computer Science
52  *  Carnegie Mellon University
53  *  Pittsburgh PA 15213-3890
54  *
55  * any improvements or extensions that they make and grant Carnegie the
56  * rights to redistribute these changes.
57  */
58 
59 /******************************************************************************
60  *
61  * rf_driver.c -- main setup, teardown, and access routines for the RAID driver
62  *
63  * all routines are prefixed with rf_ (raidframe), to avoid conflicts.
64  *
65  ******************************************************************************/
66 
67 
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: rf_driver.c,v 1.144 2024/09/19 06:13:03 andvar Exp $");
70 
71 #ifdef _KERNEL_OPT
72 #include "opt_raid_diagnostic.h"
73 #endif
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/ioctl.h>
78 #include <sys/fcntl.h>
79 #include <sys/vnode.h>
80 
81 
82 #include "rf_archs.h"
83 #include "rf_threadstuff.h"
84 
85 #include <sys/errno.h>
86 
87 #include "rf_raid.h"
88 #include "rf_dag.h"
89 #include "rf_aselect.h"
90 #include "rf_diskqueue.h"
91 #include "rf_parityscan.h"
92 #include "rf_alloclist.h"
93 #include "rf_dagutils.h"
94 #include "rf_utils.h"
95 #include "rf_etimer.h"
96 #include "rf_acctrace.h"
97 #include "rf_general.h"
98 #include "rf_desc.h"
99 #include "rf_states.h"
100 #include "rf_decluster.h"
101 #include "rf_map.h"
102 #include "rf_revent.h"
103 #include "rf_callback.h"
104 #include "rf_engine.h"
105 #include "rf_mcpair.h"
106 #include "rf_nwayxor.h"
107 #include "rf_driver.h"
108 #include "rf_options.h"
109 #include "rf_shutdown.h"
110 #include "rf_kintf.h"
111 #include "rf_paritymap.h"
112 
113 #include <sys/buf.h>
114 
115 #ifndef RF_ACCESS_DEBUG
116 #define RF_ACCESS_DEBUG 0
117 #endif
118 
119 /* rad == RF_RaidAccessDesc_t */
120 #define RF_MAX_FREE_RAD 128
121 #define RF_MIN_FREE_RAD  32
122 
123 /* main configuration routines */
124 static int raidframe_booted = 0;
125 
126 static void rf_ConfigureDebug(RF_Config_t * cfgPtr);
127 static void set_debug_option(char *name, long val);
128 static void rf_UnconfigureArray(void);
129 static void rf_ShutdownRDFreeList(void *);
130 static int rf_ConfigureRDFreeList(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *);
131 
132 rf_declare_mutex2(rf_printf_mutex);     /* debug only:  avoids interleaved
133                                                    * printfs by different stripes */
134 
135 #define SIGNAL_QUIESCENT_COND(_raid_) \
136           rf_broadcast_cond2((_raid_)->access_suspend_cv)
137 #define WAIT_FOR_QUIESCENCE(_raid_) \
138           rf_wait_cond2((_raid_)->access_suspend_cv, \
139                           (_raid_)->access_suspend_mutex)
140 
141 static int configureCount = 0;          /* number of active configurations */
142 static int isconfigged = 0;   /* is basic raidframe (non per-array)
143                                          * stuff configured */
144 static rf_declare_mutex2(configureMutex); /* used to lock the configuration
145                                                      * stuff */
146 static RF_ShutdownList_t *globalShutdown;         /* non array-specific
147                                                              * stuff */
148 
149 static int rf_ConfigureRDFreeList(RF_ShutdownList_t ** listp, RF_Raid_t *raidPtr, RF_Config_t *cfgPtr);
150 static int rf_AllocEmergBuffers(RF_Raid_t *);
151 static void rf_FreeEmergBuffers(RF_Raid_t *);
152 static void rf_destroy_mutex_cond(RF_Raid_t *);
153 static void rf_alloc_mutex_cond(RF_Raid_t *);
154 
155 /* called at system boot time */
156 int
rf_BootRaidframe(bool boot)157 rf_BootRaidframe(bool boot)
158 {
159 
160           if (boot) {
161                     if (raidframe_booted)
162                               return (EBUSY);
163                     raidframe_booted = 1;
164                     rf_init_mutex2(configureMutex, IPL_NONE);
165                     configureCount = 0;
166                     isconfigged = 0;
167                     globalShutdown = NULL;
168           } else {
169                     rf_destroy_mutex2(configureMutex);
170                     raidframe_booted = 0;
171           }
172           return (0);
173 }
174 
175 /*
176  * Called whenever an array is shutdown
177  */
178 static void
rf_UnconfigureArray(void)179 rf_UnconfigureArray(void)
180 {
181 
182           rf_lock_mutex2(configureMutex);
183           if (--configureCount == 0) {  /* if no active configurations, shut
184                                                    * everything down */
185                     rf_destroy_mutex2(rf_printf_mutex);
186                     isconfigged = 0;
187                     rf_ShutdownList(&globalShutdown);
188 
189                     /*
190                    * We must wait until now, because the AllocList module
191                    * uses the DebugMem module.
192                    */
193 #if RF_DEBUG_MEM
194                     if (rf_memDebug)
195                               rf_print_unfreed();
196 #endif
197           }
198           rf_unlock_mutex2(configureMutex);
199 }
200 
201 /*
202  * Called to shut down an array.
203  */
204 int
rf_Shutdown(RF_Raid_t * raidPtr)205 rf_Shutdown(RF_Raid_t *raidPtr)
206 {
207 
208           if (!raidPtr->valid) {
209                     RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver.  Aborting shutdown\n");
210                     return (EINVAL);
211           }
212           /*
213          * wait for outstanding IOs to land
214          * As described in rf_raid.h, we use the rad_freelist lock
215          * to protect the per-array info about outstanding descs
216          * since we need to do freelist locking anyway, and this
217          * cuts down on the amount of serialization we've got going
218          * on.
219          */
220           rf_lock_mutex2(raidPtr->rad_lock);
221           if (raidPtr->waitShutdown) {
222                     rf_unlock_mutex2(raidPtr->rad_lock);
223                     return (EBUSY);
224           }
225           raidPtr->waitShutdown = 1;
226           while (raidPtr->nAccOutstanding) {
227                     rf_wait_cond2(raidPtr->outstandingCond, raidPtr->rad_lock);
228           }
229 
230           /* Wait for any parity re-writes to stop... */
231           while (raidPtr->parity_rewrite_in_progress) {
232                     printf("raid%d: Waiting for parity re-write to exit...\n",
233                            raidPtr->raidid);
234                     rf_wait_cond2(raidPtr->parity_rewrite_cv, raidPtr->rad_lock);
235           }
236           rf_unlock_mutex2(raidPtr->rad_lock);
237 
238           /* Wait for any reconstruction to stop... */
239           rf_lock_mutex2(raidPtr->mutex);
240           while (raidPtr->reconInProgress) {
241                     printf("raid%d: Waiting for reconstruction to stop...\n",
242                            raidPtr->raidid);
243                     rf_wait_cond2(raidPtr->waitForReconCond, raidPtr->mutex);
244           }
245           rf_unlock_mutex2(raidPtr->mutex);
246 
247           raidPtr->valid = 0;
248 
249           if (raidPtr->parity_map != NULL)
250                     rf_paritymap_detach(raidPtr);
251 
252           rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE);
253 
254           rf_UnconfigureVnodes(raidPtr);
255 
256           rf_FreeEmergBuffers(raidPtr);
257 
258           rf_ShutdownList(&raidPtr->shutdownList);
259 
260           rf_destroy_mutex_cond(raidPtr);
261 
262           rf_UnconfigureArray();
263 
264           return (0);
265 }
266 
267 
268 #define DO_INIT_CONFIGURE(f) { \
269           rc = f (&globalShutdown); \
270           if (rc) { \
271                     RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
272                     rf_ShutdownList(&globalShutdown); \
273                     configureCount--; \
274                     rf_unlock_mutex2(configureMutex); \
275                     rf_destroy_mutex2(rf_printf_mutex); \
276                     return(rc); \
277           } \
278 }
279 
280 #define DO_RAID_FAIL() { \
281           rf_UnconfigureVnodes(raidPtr); \
282           rf_FreeEmergBuffers(raidPtr); \
283           rf_ShutdownList(&raidPtr->shutdownList); \
284           rf_UnconfigureArray(); \
285           rf_destroy_mutex_cond(raidPtr); \
286 }
287 
288 #define DO_RAID_INIT_CONFIGURE(f) { \
289           rc = f (&raidPtr->shutdownList, raidPtr, cfgPtr); \
290           if (rc) { \
291                     RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
292                     DO_RAID_FAIL(); \
293                     return(rc); \
294           } \
295 }
296 
297 int
rf_Configure(RF_Raid_t * raidPtr,RF_Config_t * cfgPtr,RF_AutoConfig_t * ac)298 rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *ac)
299 {
300           RF_RowCol_t col;
301           int rc;
302           bool swapped = false;
303           bool first = true;
304 
305           rf_lock_mutex2(configureMutex);
306           configureCount++;
307           if (isconfigged == 0) {
308                     rf_init_mutex2(rf_printf_mutex, IPL_VM);
309 
310                     /* initialize globals */
311                     DO_INIT_CONFIGURE(rf_ConfigureAllocList);
312 
313                     /*
314                    * Yes, this does make debugging general to the whole
315                    * system instead of being array specific. Bummer, drag.
316                      */
317                     rf_ConfigureDebug(cfgPtr);
318                     DO_INIT_CONFIGURE(rf_ConfigureDebugMem);
319 #if RF_ACC_TRACE > 0
320                     DO_INIT_CONFIGURE(rf_ConfigureAccessTrace);
321 #endif
322                     DO_INIT_CONFIGURE(rf_ConfigureNWayXor);
323                     DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs);
324                     isconfigged = 1;
325           }
326           rf_unlock_mutex2(configureMutex);
327 
328           rf_alloc_mutex_cond(raidPtr);
329 
330           /* set up the cleanup list.  Do this after ConfigureDebug so that
331            * value of memDebug will be set */
332 
333           rf_MakeAllocList(raidPtr->cleanupList);
334           if (raidPtr->cleanupList == NULL) {
335                     DO_RAID_FAIL();
336                     return (ENOMEM);
337           }
338           rf_ShutdownCreate(&raidPtr->shutdownList,
339                                 (void (*) (void *)) rf_FreeAllocList,
340                                 raidPtr->cleanupList);
341 
342           KASSERT(cfgPtr->numCol < RF_MAXCOL);
343           KASSERT(cfgPtr->numCol >= 0);
344           KASSERT(cfgPtr->numSpare < RF_MAXSPARE);
345           KASSERT(cfgPtr->numSpare >= 0);
346 
347           raidPtr->numCol = cfgPtr->numCol;
348           raidPtr->numSpare = cfgPtr->numSpare;
349           raidPtr->maxQueue = cfgPtr->numSpare;
350 
351           raidPtr->status = rf_rs_optimal;
352           raidPtr->reconControl = NULL;
353 
354           DO_RAID_INIT_CONFIGURE(rf_ConfigureMapModule);
355           DO_RAID_INIT_CONFIGURE(rf_ConfigureReconEvent);
356           DO_RAID_INIT_CONFIGURE(rf_ConfigureCallback);
357           DO_RAID_INIT_CONFIGURE(rf_ConfigureRDFreeList);
358           DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList);
359           DO_RAID_INIT_CONFIGURE(rf_ConfigureMCPair);
360           DO_RAID_INIT_CONFIGURE(rf_ConfigureDAGs);
361           DO_RAID_INIT_CONFIGURE(rf_ConfigureReconstruction);
362           DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem);
363           DO_RAID_INIT_CONFIGURE(rf_ConfigurePSStatus);
364 
365           DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine);
366           DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks);
367 
368           raidPtr->nAccOutstanding = 0;
369           raidPtr->waitShutdown = 0;
370 
371           if (ac!=NULL) {
372                     /* We have an AutoConfig structure..  Don't do the
373                        normal disk configuration... call the auto config
374                        stuff */
375                     rf_AutoConfigureDisks(raidPtr, cfgPtr, ac);
376           } else {
377                     DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks);
378                     DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks);
379           }
380           /* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev
381            * no. is set */
382           DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues);
383 
384           DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout);
385 
386 
387 
388 
389           /* Initialize per-RAID PSS bits */
390           rf_InitPSStatus(raidPtr);
391 
392 #if RF_INCLUDE_CHAINDECLUSTER > 0
393           for (col = 0; col < raidPtr->numCol; col++) {
394                     /*
395                      * XXX better distribution
396                      */
397                     raidPtr->hist_diskreq[col] = 0;
398           }
399 #endif
400           raidPtr->numNewFailures = 0;
401           raidPtr->parity_rewrite_in_progress = 0;
402           raidPtr->changing_components = 0;
403           raidPtr->recon_in_progress = 0;
404 
405           raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs;
406 
407           /* autoconfigure and root_partition will actually get filled in
408              after the config is done */
409           raidPtr->autoconfigure = 0;
410           raidPtr->root_partition = 0;
411           raidPtr->last_unit = raidPtr->raidid;
412           raidPtr->config_order = 0;
413 
414           if (rf_keepAccTotals) {
415                     raidPtr->keep_acc_totals = 1;
416           }
417 
418           /* Allocate a bunch of buffers to be used in low-memory conditions */
419           raidPtr->iobuf = NULL;
420 
421           rc = rf_AllocEmergBuffers(raidPtr);
422           if (rc) {
423                     printf("raid%d: Unable to allocate emergency buffers.\n",
424                            raidPtr->raidid);
425                     DO_RAID_FAIL();
426                     return(rc);
427           }
428 
429           /* Set up parity map stuff, if applicable. */
430 #ifndef RF_NO_PARITY_MAP
431           rf_paritymap_attach(raidPtr, cfgPtr->force);
432 #endif
433 
434           raidPtr->valid = 1;
435 
436           printf("raid%d: %s\n", raidPtr->raidid,
437                  raidPtr->Layout.map->configName);
438           printf("raid%d: Components:", raidPtr->raidid);
439 
440           for (col = 0; col < raidPtr->numCol; col++) {
441                     RF_ComponentLabel_t *clabel;
442                     bool compswapped;
443 
444                     printf(" %s", raidPtr->Disks[col].devname);
445                     if (RF_DEAD_DISK(raidPtr->Disks[col].status)) {
446                               printf("[**FAILED**]");
447                     }
448                     clabel = raidget_component_label(raidPtr, col);
449                     compswapped = clabel->version ==
450                                     bswap32(RF_COMPONENT_LABEL_VERSION);
451                     if (first)
452                               swapped = compswapped;
453                     else if (swapped != compswapped)
454                               printf("raid%d: Component %d has different endian "
455                                      "than first component.", raidPtr->raidid, col);
456           }
457           printf("\n");
458           printf("raid%d: Total Sectors: %" PRIu64 " (%" PRIu64 " MB)\n",
459                  raidPtr->raidid,
460                  raidPtr->totalSectors,
461                  (raidPtr->totalSectors / 1024 *
462                                         (1 << raidPtr->logBytesPerSector) / 1024));
463           if (swapped)
464                     printf("raid%d: Using swapped-endian component labels.\n",
465                         raidPtr->raidid);
466 
467           return (0);
468 }
469 
470 
471 /*
472 
473   Routines to allocate and free the "emergency buffers" for a given
474   RAID set.  These emergency buffers will be used when the kernel runs
475   out of kernel memory.
476 
477  */
478 
479 static int
rf_AllocEmergBuffers(RF_Raid_t * raidPtr)480 rf_AllocEmergBuffers(RF_Raid_t *raidPtr)
481 {
482           void *tmpbuf;
483           RF_VoidPointerListElem_t *vple;
484           int i;
485 
486           /* XXX next line needs tuning... */
487           raidPtr->numEmergencyBuffers = 10 * raidPtr->numCol;
488 #if DEBUG
489           printf("raid%d: allocating %d buffers of %d bytes.\n",
490                  raidPtr->raidid,
491                  raidPtr->numEmergencyBuffers,
492                  (int)(raidPtr->Layout.sectorsPerStripeUnit <<
493                  raidPtr->logBytesPerSector));
494 #endif
495           for (i = 0; i < raidPtr->numEmergencyBuffers; i++) {
496                     tmpbuf = malloc( raidPtr->Layout.sectorsPerStripeUnit <<
497                                          raidPtr->logBytesPerSector,
498                                          M_RAIDFRAME, M_WAITOK);
499                     if (tmpbuf) {
500                               vple = rf_AllocVPListElem(raidPtr);
501                               vple->p= tmpbuf;
502                               vple->next = raidPtr->iobuf;
503                               raidPtr->iobuf = vple;
504                               raidPtr->iobuf_count++;
505                     } else {
506                               printf("raid%d: failed to allocate emergency buffer!\n",
507                                      raidPtr->raidid);
508                               return 1;
509                     }
510           }
511 
512           /* XXX next line needs tuning too... */
513           raidPtr->numEmergencyStripeBuffers = 10;
514         for (i = 0; i < raidPtr->numEmergencyStripeBuffers; i++) {
515                 tmpbuf = malloc( raidPtr->numCol * (raidPtr->Layout.sectorsPerStripeUnit <<
516                                  raidPtr->logBytesPerSector),
517                                  M_RAIDFRAME, M_WAITOK);
518                 if (tmpbuf) {
519                         vple = rf_AllocVPListElem(raidPtr);
520                         vple->p= tmpbuf;
521                         vple->next = raidPtr->stripebuf;
522                         raidPtr->stripebuf = vple;
523                         raidPtr->stripebuf_count++;
524                 } else {
525                         printf("raid%d: failed to allocate emergency stripe buffer!\n",
526                                raidPtr->raidid);
527                               return 1;
528                 }
529         }
530 
531           return (0);
532 }
533 
534 static void
rf_FreeEmergBuffers(RF_Raid_t * raidPtr)535 rf_FreeEmergBuffers(RF_Raid_t *raidPtr)
536 {
537           RF_VoidPointerListElem_t *tmp;
538 
539           /* Free the emergency IO buffers */
540           while (raidPtr->iobuf != NULL) {
541                     tmp = raidPtr->iobuf;
542                     raidPtr->iobuf = raidPtr->iobuf->next;
543                     free(tmp->p, M_RAIDFRAME);
544                     rf_FreeVPListElem(raidPtr,tmp);
545           }
546 
547           /* Free the emergency stripe buffers */
548           while (raidPtr->stripebuf != NULL) {
549                     tmp = raidPtr->stripebuf;
550                     raidPtr->stripebuf = raidPtr->stripebuf->next;
551                     free(tmp->p, M_RAIDFRAME);
552                     rf_FreeVPListElem(raidPtr, tmp);
553           }
554 }
555 
556 
557 static void
rf_ShutdownRDFreeList(void * arg)558 rf_ShutdownRDFreeList(void *arg)
559 {
560           RF_Raid_t *raidPtr;
561 
562           raidPtr = (RF_Raid_t *) arg;
563 
564           pool_destroy(&raidPtr->pools.rad);
565 }
566 
567 static int
rf_ConfigureRDFreeList(RF_ShutdownList_t ** listp,RF_Raid_t * raidPtr,RF_Config_t * cfgPtr)568 rf_ConfigureRDFreeList(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
569                            RF_Config_t *cfgPtr)
570 {
571 
572           rf_pool_init(raidPtr, raidPtr->poolNames.rad, &raidPtr->pools.rad, sizeof(RF_RaidAccessDesc_t),
573                          "rad", RF_MIN_FREE_RAD, RF_MAX_FREE_RAD);
574           rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, raidPtr);
575           return (0);
576 }
577 
578 RF_RaidAccessDesc_t *
rf_AllocRaidAccDesc(RF_Raid_t * raidPtr,RF_IoType_t type,RF_RaidAddr_t raidAddress,RF_SectorCount_t numBlocks,void * bufPtr,void * bp,RF_RaidAccessFlags_t flags,const RF_AccessState_t * states)579 rf_AllocRaidAccDesc(RF_Raid_t *raidPtr, RF_IoType_t type,
580                         RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
581                         void *bufPtr, void *bp, RF_RaidAccessFlags_t flags,
582                         const RF_AccessState_t *states)
583 {
584           RF_RaidAccessDesc_t *desc;
585 
586           desc = pool_get(&raidPtr->pools.rad, PR_WAITOK);
587 
588           rf_lock_mutex2(raidPtr->rad_lock);
589           if (raidPtr->waitShutdown) {
590                     /*
591                    * Actually, we're shutting the array down. Free the desc
592                    * and return NULL.
593                    */
594 
595                     rf_unlock_mutex2(raidPtr->rad_lock);
596                     pool_put(&raidPtr->pools.rad, desc);
597                     return (NULL);
598           }
599           raidPtr->nAccOutstanding++;
600 
601           rf_unlock_mutex2(raidPtr->rad_lock);
602 
603           desc->raidPtr = (void *) raidPtr;
604           desc->type = type;
605           desc->raidAddress = raidAddress;
606           desc->numBlocks = numBlocks;
607           desc->bufPtr = bufPtr;
608           desc->bp = bp;
609           desc->flags = flags;
610           desc->states = states;
611           desc->state = 0;
612           desc->dagList = NULL;
613 
614           desc->status = 0;
615           desc->numRetries = 0;
616 #if RF_ACC_TRACE > 0
617           memset(&desc->tracerec, 0, sizeof(desc->tracerec));
618 #endif
619           desc->callbackFunc = NULL;
620           desc->callbackArg = NULL;
621           desc->next = NULL;
622           desc->iobufs = NULL;
623           desc->stripebufs = NULL;
624 
625           return (desc);
626 }
627 
628 void
rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc)629 rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *desc)
630 {
631           RF_Raid_t *raidPtr = desc->raidPtr;
632           RF_DagList_t *dagList, *temp;
633           RF_VoidPointerListElem_t *tmp;
634 
635           RF_ASSERT(desc);
636 
637           /* Cleanup the dagList(s) */
638           dagList = desc->dagList;
639           while(dagList != NULL) {
640                     temp = dagList;
641                     dagList = dagList->next;
642                     rf_FreeDAGList(raidPtr, temp);
643           }
644 
645           while (desc->iobufs) {
646                     tmp = desc->iobufs;
647                     desc->iobufs = desc->iobufs->next;
648                     rf_FreeIOBuffer(raidPtr, tmp);
649           }
650 
651           while (desc->stripebufs) {
652                     tmp = desc->stripebufs;
653                     desc->stripebufs = desc->stripebufs->next;
654                     rf_FreeStripeBuffer(raidPtr, tmp);
655           }
656 
657           pool_put(&raidPtr->pools.rad, desc);
658           rf_lock_mutex2(raidPtr->rad_lock);
659           raidPtr->nAccOutstanding--;
660           if (raidPtr->waitShutdown) {
661                     rf_signal_cond2(raidPtr->outstandingCond);
662           }
663           rf_unlock_mutex2(raidPtr->rad_lock);
664 }
665 /*********************************************************************
666  * Main routine for performing an access.
667  * Accesses are retried until a DAG can not be selected.  This occurs
668  * when either the DAG library is incomplete or there are too many
669  * failures in a parity group.
670  *
671  * type should be read or write.  bp_in is a buf pointer.  void *to
672  * facilitate ignoring it outside the kernel
673  ********************************************************************/
674 int
rf_DoAccess(RF_Raid_t * raidPtr,RF_IoType_t type,RF_RaidAddr_t raidAddress,RF_SectorCount_t numBlocks,void * bufPtr,struct buf * bp,RF_RaidAccessFlags_t flags)675 rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
676               void *bufPtr, struct buf *bp, RF_RaidAccessFlags_t flags)
677 {
678           RF_RaidAccessDesc_t *desc;
679           void *lbufPtr = bufPtr;
680 
681           raidAddress += rf_raidSectorOffset;
682 
683 #if RF_ACCESS_DEBUG
684           if (rf_accessDebug) {
685 
686                     printf("logBytes is: %d %d %d\n", raidPtr->raidid,
687                         raidPtr->logBytesPerSector,
688                         (int) rf_RaidAddressToByte(raidPtr, numBlocks));
689                     printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n", raidPtr->raidid,
690                         (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress,
691                         (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress),
692                         (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1),
693                         (int) numBlocks,
694                         (int) rf_RaidAddressToByte(raidPtr, numBlocks),
695                         (long) bufPtr);
696           }
697 #endif
698 
699           desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress,
700               numBlocks, lbufPtr, bp, flags, raidPtr->Layout.map->states);
701 
702           if (desc == NULL) {
703                     return (ENOMEM);
704           }
705 #if RF_ACC_TRACE > 0
706           RF_ETIMER_START(desc->tracerec.tot_timer);
707 #endif
708 
709           if (raidPtr->parity_map != NULL &&
710               type == RF_IO_TYPE_WRITE)
711                     rf_paritymap_begin(raidPtr->parity_map, raidAddress,
712                         numBlocks);
713 
714           rf_ContinueRaidAccess(desc);
715 
716           return (0);
717 }
718 #if 0
719 /* force the array into reconfigured mode without doing reconstruction */
720 int
721 rf_SetReconfiguredMode(RF_Raid_t *raidPtr, int col)
722 {
723           if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
724                     printf("Can't set reconfigured mode in dedicated-spare array\n");
725                     RF_PANIC();
726           }
727           rf_lock_mutex2(raidPtr->mutex);
728           raidPtr->numFailures++;
729           raidPtr->Disks[col].status = rf_ds_dist_spared;
730           raidPtr->status = rf_rs_reconfigured;
731           rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
732           /* install spare table only if declustering + distributed sparing
733            * architecture. */
734           if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED)
735                     rf_InstallSpareTable(raidPtr, col);
736           rf_unlock_mutex2(raidPtr->mutex);
737           return (0);
738 }
739 #endif
740 
741 int
rf_FailDisk(RF_Raid_t * raidPtr,int fcol,int initRecon)742 rf_FailDisk(RF_Raid_t *raidPtr, int fcol, int initRecon)
743 {
744 
745           /* need to suspend IO's here -- if there are DAGs in flight
746              and we pull the rug out from under ci_vp, Bad Things
747              can happen.  */
748 
749           rf_SuspendNewRequestsAndWait(raidPtr);
750 
751           rf_lock_mutex2(raidPtr->mutex);
752           if (raidPtr->Disks[fcol].status != rf_ds_failed) {
753                     /* must be failing something that is valid, or else it's
754                        already marked as failed (in which case we don't
755                        want to mark it failed again!) */
756                     raidPtr->numFailures++;
757                     raidPtr->Disks[fcol].status = rf_ds_failed;
758                     raidPtr->status = rf_rs_degraded;
759           }
760           rf_unlock_mutex2(raidPtr->mutex);
761 
762           rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
763 
764           /* Close the component, so that it's not "locked" if someone
765              else want's to use it! */
766 
767           rf_close_component(raidPtr, raidPtr->raid_cinfo[fcol].ci_vp,
768                                  raidPtr->Disks[fcol].auto_configured);
769 
770           rf_lock_mutex2(raidPtr->mutex);
771           raidPtr->raid_cinfo[fcol].ci_vp = NULL;
772 
773           /* Need to mark the component as not being auto_configured
774              (in case it was previously). */
775 
776           raidPtr->Disks[fcol].auto_configured = 0;
777           rf_unlock_mutex2(raidPtr->mutex);
778           /* now we can allow IO to continue -- we'll be suspending it
779              again in rf_ReconstructFailedDisk() if we have to.. */
780 
781           rf_ResumeNewRequests(raidPtr);
782 
783           if (initRecon)
784                     rf_ReconstructFailedDisk(raidPtr, fcol);
785           return (0);
786 }
787 /* releases a thread that is waiting for the array to become quiesced.
788  * access_suspend_mutex should be locked upon calling this
789  */
790 void
rf_SignalQuiescenceLock(RF_Raid_t * raidPtr)791 rf_SignalQuiescenceLock(RF_Raid_t *raidPtr)
792 {
793 #if RF_DEBUG_QUIESCE
794           if (rf_quiesceDebug) {
795                     printf("raid%d: Signalling quiescence lock\n",
796                            raidPtr->raidid);
797           }
798 #endif
799           raidPtr->access_suspend_release = 1;
800 
801           if (raidPtr->waiting_for_quiescence) {
802                     SIGNAL_QUIESCENT_COND(raidPtr);
803           }
804 }
805 /* suspends all new requests to the array.  No effect on accesses that are in flight.  */
806 int
rf_SuspendNewRequestsAndWait(RF_Raid_t * raidPtr)807 rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr)
808 {
809 #if RF_DEBUG_QUIESCE
810           if (rf_quiesceDebug)
811                     printf("raid%d: Suspending new reqs\n", raidPtr->raidid);
812 #endif
813           rf_lock_mutex2(raidPtr->access_suspend_mutex);
814           raidPtr->accesses_suspended++;
815           raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1;
816 
817           if (raidPtr->waiting_for_quiescence) {
818                     raidPtr->access_suspend_release = 0;
819                     while (!raidPtr->access_suspend_release) {
820 #if RF_DEBUG_QUIESCE
821                               printf("raid%d: Suspending: Waiting for Quiescence\n",
822                                      raidPtr->raidid);
823 #endif
824                               WAIT_FOR_QUIESCENCE(raidPtr);
825                               raidPtr->waiting_for_quiescence = 0;
826                     }
827           }
828 #if RF_DEBUG_QUIESCE
829           printf("raid%d: Quiescence reached..\n", raidPtr->raidid);
830 #endif
831 
832           rf_unlock_mutex2(raidPtr->access_suspend_mutex);
833           return (raidPtr->waiting_for_quiescence);
834 }
835 /* wake up everyone waiting for quiescence to be released */
836 void
rf_ResumeNewRequests(RF_Raid_t * raidPtr)837 rf_ResumeNewRequests(RF_Raid_t *raidPtr)
838 {
839           RF_CallbackFuncDesc_t *t, *cb;
840 
841 #if RF_DEBUG_QUIESCE
842           if (rf_quiesceDebug)
843                     printf("raid%d: Resuming new requests\n", raidPtr->raidid);
844 #endif
845 
846           rf_lock_mutex2(raidPtr->access_suspend_mutex);
847           raidPtr->accesses_suspended--;
848           if (raidPtr->accesses_suspended == 0)
849                     cb = raidPtr->quiesce_wait_list;
850           else
851                     cb = NULL;
852           raidPtr->quiesce_wait_list = NULL;
853           rf_unlock_mutex2(raidPtr->access_suspend_mutex);
854 
855           while (cb) {
856                     t = cb;
857                     cb = cb->next;
858                     (t->callbackFunc) (t->callbackArg);
859                     rf_FreeCallbackFuncDesc(raidPtr, t);
860           }
861 }
862 /*****************************************************************************************
863  *
864  * debug routines
865  *
866  ****************************************************************************************/
867 
868 static void
set_debug_option(char * name,long val)869 set_debug_option(char *name, long val)
870 {
871           RF_DebugName_t *p;
872 
873           for (p = rf_debugNames; p->name; p++) {
874                     if (!strcmp(p->name, name)) {
875                               *(p->ptr) = val;
876                               printf("[Set debug variable %s to %ld]\n", name, val);
877                               return;
878                     }
879           }
880           RF_ERRORMSG1("Unknown debug string \"%s\"\n", name);
881 }
882 
883 
884 /* would like to use sscanf here, but apparently not available in kernel */
885 /*ARGSUSED*/
886 static void
rf_ConfigureDebug(RF_Config_t * cfgPtr)887 rf_ConfigureDebug(RF_Config_t *cfgPtr)
888 {
889           char   *val_p, *name_p, *white_p;
890           long    val;
891           int     i;
892 
893           rf_ResetDebugOptions();
894           for (i = 0; i < RF_MAXDBGV && cfgPtr->debugVars[i][0]; i++) {
895                     name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]);
896                     white_p = rf_find_white(name_p);        /* skip to start of 2nd
897                                                                        * word */
898                     val_p = rf_find_non_white(white_p);
899                     if (*val_p == '0' && *(val_p + 1) == 'x')
900                               val = rf_htoi(val_p + 2);
901                     else
902                               val = rf_atoi(val_p);
903                     *white_p = '\0';
904                     set_debug_option(name_p, val);
905           }
906 }
907 
908 void
rf_print_panic_message(int line,const char * file)909 rf_print_panic_message(int line, const char *file)
910 {
911           kern_assert("raidframe error at line %d file %s", line, file);
912 }
913 
914 #ifdef RAID_DIAGNOSTIC
915 void
rf_print_assert_panic_message(int line,const char * file,const char * condition)916 rf_print_assert_panic_message(int line, const char *file, const char *condition)
917 {
918           kern_assert("raidframe error at line %d file %s (failed asserting %s)\n",
919               line, file, condition);
920 }
921 #endif
922 
923 void
rf_print_unable_to_init_mutex(const char * file,int line,int rc)924 rf_print_unable_to_init_mutex(const char *file, int line, int rc)
925 {
926           RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
927                          file, line, rc);
928 }
929 
930 void
rf_print_unable_to_add_shutdown(const char * file,int line,int rc)931 rf_print_unable_to_add_shutdown(const char *file, int line, int rc)
932 {
933           RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
934                          file, line, rc);
935 }
936 
937 static void
rf_alloc_mutex_cond(RF_Raid_t * raidPtr)938 rf_alloc_mutex_cond(RF_Raid_t *raidPtr)
939 {
940 
941           rf_init_mutex2(raidPtr->mutex, IPL_VM);
942 
943           rf_init_cond2(raidPtr->outstandingCond, "rfocond");
944           rf_init_cond2(raidPtr->parity_rewrite_cv, "rfprwshutdown");
945           rf_init_mutex2(raidPtr->rad_lock, IPL_VM);
946 
947           rf_init_mutex2(raidPtr->access_suspend_mutex, IPL_VM);
948           rf_init_cond2(raidPtr->access_suspend_cv, "rfquiesce");
949 
950           rf_init_cond2(raidPtr->waitForReconCond, "rfrcnw");
951 
952           rf_init_cond2(raidPtr->changing_components_cv, "rfcc");
953 }
954 
955 static void
rf_destroy_mutex_cond(RF_Raid_t * raidPtr)956 rf_destroy_mutex_cond(RF_Raid_t *raidPtr)
957 {
958 
959           rf_destroy_cond2(raidPtr->waitForReconCond);
960           rf_destroy_cond2(raidPtr->changing_components_cv);
961 
962           rf_destroy_mutex2(raidPtr->access_suspend_mutex);
963           rf_destroy_cond2(raidPtr->access_suspend_cv);
964 
965           rf_destroy_cond2(raidPtr->parity_rewrite_cv);
966           rf_destroy_cond2(raidPtr->outstandingCond);
967           rf_destroy_mutex2(raidPtr->rad_lock);
968 
969           rf_destroy_mutex2(raidPtr->mutex);
970 }
971