1 /* $NetBSD: rf_paritymap.c,v 1.11 2023/09/25 21:59:38 oster Exp $ */
2 
3 /*-
4  * Copyright (c) 2009 Jed Davis.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: rf_paritymap.c,v 1.11 2023/09/25 21:59:38 oster Exp $");
31 
32 #include <sys/param.h>
33 #include <sys/callout.h>
34 #include <sys/kmem.h>
35 #include <sys/mutex.h>
36 #include <sys/rwlock.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 
40 #include <dev/raidframe/rf_paritymap.h>
41 #include <dev/raidframe/rf_stripelocks.h>
42 #include <dev/raidframe/rf_layout.h>
43 #include <dev/raidframe/rf_raid.h>
44 #include <dev/raidframe/rf_parityscan.h>
45 #include <dev/raidframe/rf_kintf.h>
46 
47 /* Important parameters: */
48 #define REGION_MINSIZE (25ULL << 20)
49 #define DFL_TICKMS      40000
50 #define DFL_COOLDOWN    8     /* 7-8 intervals of 40s = 5min +/- 20s */
51 
52 /* Internal-use flag bits. */
53 #define TICKING 1
54 #define TICKED 2
55 
56 /* Prototypes! */
57 static void rf_paritymap_write_locked(struct rf_paritymap *);
58 static void rf_paritymap_tick(void *);
59 static u_int rf_paritymap_nreg(RF_Raid_t *);
60 
61 /* Extract the current status of the parity map. */
62 void
rf_paritymap_status(struct rf_paritymap * pm,struct rf_pmstat * ps)63 rf_paritymap_status(struct rf_paritymap *pm, struct rf_pmstat *ps)
64 {
65           memset(ps, 0, sizeof(*ps));
66           if (pm == NULL)
67                     ps->enabled = 0;
68           else {
69                     ps->enabled = 1;
70                     ps->region_size = pm->region_size;
71                     mutex_enter(&pm->lock);
72                     memcpy(&ps->params, &pm->params, sizeof(ps->params));
73                     memcpy(ps->dirty, pm->disk_now, sizeof(ps->dirty));
74                     memcpy(&ps->ctrs, &pm->ctrs, sizeof(ps->ctrs));
75                     mutex_exit(&pm->lock);
76           }
77 }
78 
79 /*
80  * Test whether parity in a given sector is suspected of being inconsistent
81  * on disk (assuming that any pending I/O to it is allowed to complete).
82  * This may be of interest to future work on parity scrubbing.
83  */
84 int
rf_paritymap_test(struct rf_paritymap * pm,daddr_t sector)85 rf_paritymap_test(struct rf_paritymap *pm, daddr_t sector)
86 {
87           unsigned region = sector / pm->region_size;
88           int retval;
89 
90           mutex_enter(&pm->lock);
91           retval = isset(pm->disk_boot->bits, region) ? 1 : 0;
92           mutex_exit(&pm->lock);
93           return retval;
94 }
95 
96 /* To be called before a write to the RAID is submitted. */
97 void
rf_paritymap_begin(struct rf_paritymap * pm,daddr_t offset,daddr_t size)98 rf_paritymap_begin(struct rf_paritymap *pm, daddr_t offset, daddr_t size)
99 {
100           unsigned i, b, e;
101 
102           b = offset / pm->region_size;
103           e = (offset + size - 1) / pm->region_size;
104 
105           for (i = b; i <= e; i++)
106                     rf_paritymap_begin_region(pm, i);
107 }
108 
109 /* To be called after a write to the RAID completes. */
110 void
rf_paritymap_end(struct rf_paritymap * pm,daddr_t offset,daddr_t size)111 rf_paritymap_end(struct rf_paritymap *pm, daddr_t offset, daddr_t size)
112 {
113           unsigned i, b, e;
114 
115           b = offset / pm->region_size;
116           e = (offset + size - 1) / pm->region_size;
117 
118           for (i = b; i <= e; i++)
119                     rf_paritymap_end_region(pm, i);
120 }
121 
122 void
rf_paritymap_begin_region(struct rf_paritymap * pm,unsigned region)123 rf_paritymap_begin_region(struct rf_paritymap *pm, unsigned region)
124 {
125           int needs_write;
126 
127           KASSERT(region < RF_PARITYMAP_NREG);
128           pm->ctrs.nwrite++;
129 
130           /* If it was being kept warm, deal with that. */
131           mutex_enter(&pm->lock);
132           if (pm->current->state[region] < 0)
133                     pm->current->state[region] = 0;
134 
135           /* This shouldn't happen unless RAIDOUTSTANDING is set too high. */
136           KASSERT(pm->current->state[region] < 127);
137           pm->current->state[region]++;
138 
139           needs_write = isclr(pm->disk_now->bits, region);
140 
141           if (needs_write) {
142                     KASSERT(pm->current->state[region] == 1);
143                     rf_paritymap_write_locked(pm);
144           }
145 
146           mutex_exit(&pm->lock);
147 }
148 
149 void
rf_paritymap_end_region(struct rf_paritymap * pm,unsigned region)150 rf_paritymap_end_region(struct rf_paritymap *pm, unsigned region)
151 {
152           KASSERT(region < RF_PARITYMAP_NREG);
153 
154           mutex_enter(&pm->lock);
155           KASSERT(pm->current->state[region] > 0);
156           --pm->current->state[region];
157 
158           if (pm->current->state[region] <= 0) {
159                     pm->current->state[region] = -pm->params.cooldown;
160                     KASSERT(pm->current->state[region] <= 0);
161                     mutex_enter(&pm->lk_flags);
162                     if (!(pm->flags & TICKING)) {
163                               pm->flags |= TICKING;
164                               mutex_exit(&pm->lk_flags);
165                               callout_schedule(&pm->ticker,
166                                   mstohz(pm->params.tickms));
167                     } else
168                               mutex_exit(&pm->lk_flags);
169           }
170           mutex_exit(&pm->lock);
171 }
172 
173 /*
174  * Updates the parity map to account for any changes in current activity
175  * and/or an ongoing parity scan, then writes it to disk with appropriate
176  * synchronization.
177  */
178 void
rf_paritymap_write(struct rf_paritymap * pm)179 rf_paritymap_write(struct rf_paritymap *pm)
180 {
181           mutex_enter(&pm->lock);
182           rf_paritymap_write_locked(pm);
183           mutex_exit(&pm->lock);
184 }
185 
186 /* As above, but to be used when pm->lock is already held. */
187 static void
rf_paritymap_write_locked(struct rf_paritymap * pm)188 rf_paritymap_write_locked(struct rf_paritymap *pm)
189 {
190           char w, w0;
191           int i, j, setting, clearing;
192 
193           setting = clearing = 0;
194           for (i = 0; i < RF_PARITYMAP_NBYTE; i++) {
195                     w0 = pm->disk_now->bits[i];
196                     w = pm->disk_boot->bits[i];
197 
198                     for (j = 0; j < NBBY; j++)
199                               if (pm->current->state[i * NBBY + j] != 0)
200                                         w |= 1 << j;
201 
202                     if (w & ~w0)
203                               setting = 1;
204                     if (w0 & ~w)
205                               clearing = 1;
206 
207                     pm->disk_now->bits[i] = w;
208           }
209           pm->ctrs.ncachesync += setting + clearing;
210           pm->ctrs.nclearing += clearing;
211 
212           /*
213            * If bits are being set in the parity map, then a sync is
214            * required afterwards, so that the regions are marked dirty
215            * on disk before any writes to them take place.  If bits are
216            * being cleared, then a sync is required before the write, so
217            * that any writes to those regions are processed before the
218            * region is marked clean.  (Synchronization is somewhat
219            * overkill; a write ordering barrier would suffice, but we
220            * currently have no way to express that directly.)
221            */
222           if (clearing)
223                     rf_sync_component_caches(pm->raid, 1);
224           rf_paritymap_kern_write(pm->raid, pm->disk_now);
225           if (setting)
226                     rf_sync_component_caches(pm->raid, 1);
227 }
228 
229 /* Mark all parity as being in need of rewrite. */
230 void
rf_paritymap_invalidate(struct rf_paritymap * pm)231 rf_paritymap_invalidate(struct rf_paritymap *pm)
232 {
233           mutex_enter(&pm->lock);
234           memset(pm->disk_boot, (unsigned char)~0, sizeof(*pm->disk_boot));
235           mutex_exit(&pm->lock);
236 }
237 
238 /* Mark all parity as being correct. */
239 void
rf_paritymap_forceclean(struct rf_paritymap * pm)240 rf_paritymap_forceclean(struct rf_paritymap *pm)
241 {
242           mutex_enter(&pm->lock);
243           memset(pm->disk_boot, 0, sizeof(*pm->disk_boot));
244           mutex_exit(&pm->lock);
245 }
246 
247 /*
248  * The cooldown callout routine just defers its work to a thread; it can't do
249  * the parity map write itself as it would block, and although mutex-induced
250  * blocking is permitted it seems wise to avoid tying up the softint.
251  */
252 static void
rf_paritymap_tick(void * arg)253 rf_paritymap_tick(void *arg)
254 {
255           struct rf_paritymap *pm = arg;
256 
257           mutex_enter(&pm->lk_flags);
258           pm->flags |= TICKED;
259           mutex_exit(&pm->lk_flags);
260 
261           rf_lock_mutex2(pm->raid->iodone_lock);
262           rf_signal_cond2(pm->raid->iodone_cv); /* XXX */
263           rf_unlock_mutex2(pm->raid->iodone_lock);
264 }
265 
266 /*
267  * This is where the parity cooling work (and rearming the callout if needed)
268  * is done; the raidio thread calls it when woken up, as by the above.
269  */
270 void
rf_paritymap_checkwork(struct rf_paritymap * pm)271 rf_paritymap_checkwork(struct rf_paritymap *pm)
272 {
273           int i, zerop, progressp;
274 
275           mutex_enter(&pm->lk_flags);
276           if (pm->flags & TICKED) {
277                     zerop = progressp = 0;
278 
279                     pm->flags &= ~TICKED;
280                     mutex_exit(&pm->lk_flags);
281 
282                     mutex_enter(&pm->lock);
283                     for (i = 0; i < RF_PARITYMAP_NREG; i++) {
284                               if (pm->current->state[i] < 0) {
285                                         progressp = 1;
286                                         pm->current->state[i]++;
287                                         if (pm->current->state[i] == 0)
288                                                   zerop = 1;
289                               }
290                     }
291 
292                     if (progressp)
293                               callout_schedule(&pm->ticker,
294                                   mstohz(pm->params.tickms));
295                     else {
296                               mutex_enter(&pm->lk_flags);
297                               pm->flags &= ~TICKING;
298                               mutex_exit(&pm->lk_flags);
299                     }
300 
301                     if (zerop)
302                               rf_paritymap_write_locked(pm);
303                     mutex_exit(&pm->lock);
304           } else
305                     mutex_exit(&pm->lk_flags);
306 }
307 
308 /*
309  * Set parity map parameters; used both to alter parameters on the fly and to
310  * establish their initial values.  Note that setting a parameter to 0 means
311  * to leave the previous setting unchanged, and that if this is done for the
312  * initial setting of "regions", then a default value will be computed based
313  * on the RAID component size.
314  */
315 int
rf_paritymap_set_params(struct rf_paritymap * pm,const struct rf_pmparams * params,int todisk)316 rf_paritymap_set_params(struct rf_paritymap *pm,
317     const struct rf_pmparams *params, int todisk)
318 {
319           int cooldown, tickms;
320           u_int regions;
321           RF_RowCol_t col;
322           RF_ComponentLabel_t *clabel;
323           RF_Raid_t *raidPtr;
324 
325           cooldown = params->cooldown != 0
326               ? params->cooldown : pm->params.cooldown;
327           tickms = params->tickms != 0
328               ? params->tickms : pm->params.tickms;
329           regions = params->regions != 0
330               ? params->regions : pm->params.regions;
331 
332           if (cooldown < 1 || cooldown > 128) {
333                     printf("raid%d: cooldown %d out of range\n", pm->raid->raidid,
334                         cooldown);
335                     return (-1);
336           }
337           if (tickms < 10) {
338                     printf("raid%d: tick time %dms out of range\n",
339                         pm->raid->raidid, tickms);
340                     return (-1);
341           }
342           if (regions == 0) {
343                     regions = rf_paritymap_nreg(pm->raid);
344           } else if (regions > RF_PARITYMAP_NREG) {
345                     printf("raid%d: region count %u too large (more than %u)\n",
346                         pm->raid->raidid, regions, RF_PARITYMAP_NREG);
347                     return (-1);
348           }
349 
350           /* XXX any currently warm parity will be used with the new tickms! */
351           pm->params.cooldown = cooldown;
352           pm->params.tickms = tickms;
353           /* Apply the initial region count, but do not change it after that. */
354           if (pm->params.regions == 0)
355                     pm->params.regions = regions;
356 
357           /* So that the newly set parameters can be tested: */
358           pm->ctrs.nwrite = pm->ctrs.ncachesync = pm->ctrs.nclearing = 0;
359 
360           if (todisk) {
361                     raidPtr = pm->raid;
362                     for (col = 0; col < raidPtr->numCol; col++) {
363                               if (RF_DEAD_DISK(raidPtr->Disks[col].status))
364                                         continue;
365 
366                               clabel = raidget_component_label(raidPtr, col);
367                               clabel->parity_map_ntick = cooldown;
368                               clabel->parity_map_tickms = tickms;
369                               clabel->parity_map_regions = regions;
370 
371                               /* Don't touch the disk if it's been spared */
372                               if (clabel->status == rf_ds_spared)
373                                         continue;
374 
375                               raidflush_component_label(raidPtr, col);
376                     }
377 
378                     /* handle the spares too... */
379                     for (col = 0; col < raidPtr->numSpare; col++) {
380                               if (raidPtr->Disks[raidPtr->numCol+col].status == rf_ds_used_spare) {
381                                         clabel = raidget_component_label(raidPtr, raidPtr->numCol+col);
382                                         clabel->parity_map_ntick = cooldown;
383                                         clabel->parity_map_tickms = tickms;
384                                         clabel->parity_map_regions = regions;
385                                         raidflush_component_label(raidPtr, raidPtr->numCol+col);
386                               }
387                     }
388           }
389           return 0;
390 }
391 
392 /*
393  * The number of regions may not be as many as can fit into the map, because
394  * when regions are too small, the overhead of setting parity map bits
395  * becomes significant in comparison to the actual I/O, while the
396  * corresponding gains in parity verification time become negligible.  Thus,
397  * a minimum region size (defined above) is imposed.
398  *
399  * Note that, if the number of regions is less than the maximum, then some of
400  * the regions will be "fictional", corresponding to no actual disk; some
401  * parts of the code may process them as normal, but they can not ever be
402  * written to.
403  */
404 static u_int
rf_paritymap_nreg(RF_Raid_t * raid)405 rf_paritymap_nreg(RF_Raid_t *raid)
406 {
407           daddr_t bytes_per_disk, nreg;
408 
409           bytes_per_disk = raid->sectorsPerDisk << raid->logBytesPerSector;
410           nreg = bytes_per_disk / REGION_MINSIZE;
411           if (nreg > RF_PARITYMAP_NREG)
412                     nreg = RF_PARITYMAP_NREG;
413           if (nreg < 1)
414                     nreg = 1;
415 
416           return (u_int)nreg;
417 }
418 
419 /*
420  * Initialize a parity map given specific parameters.  This neither reads nor
421  * writes the parity map config in the component labels; for that, see below.
422  */
423 int
rf_paritymap_init(struct rf_paritymap * pm,RF_Raid_t * raid,const struct rf_pmparams * params)424 rf_paritymap_init(struct rf_paritymap *pm, RF_Raid_t *raid,
425     const struct rf_pmparams *params)
426 {
427           daddr_t rstripes;
428           struct rf_pmparams safe;
429 
430           pm->raid = raid;
431           pm->params.regions = 0;
432           if (0 != rf_paritymap_set_params(pm, params, 0)) {
433                     /*
434                      * If the parameters are out-of-range, then bring the
435                      * parity map up with something reasonable, so that
436                      * the admin can at least go and fix it (or ignore it
437                      * entirely).
438                      */
439                     safe.cooldown = DFL_COOLDOWN;
440                     safe.tickms = DFL_TICKMS;
441                     safe.regions = 0;
442 
443                     if (0 != rf_paritymap_set_params(pm, &safe, 0))
444                               return (-1);
445           }
446 
447           rstripes = howmany(raid->Layout.numStripe, pm->params.regions);
448           pm->region_size = rstripes * raid->Layout.dataSectorsPerStripe;
449 
450           callout_init(&pm->ticker, CALLOUT_MPSAFE);
451           callout_setfunc(&pm->ticker, rf_paritymap_tick, pm);
452           pm->flags = 0;
453 
454           pm->disk_boot = kmem_alloc(sizeof(struct rf_paritymap_ondisk),
455               KM_SLEEP);
456           pm->disk_now = kmem_alloc(sizeof(struct rf_paritymap_ondisk),
457               KM_SLEEP);
458           pm->current = kmem_zalloc(sizeof(struct rf_paritymap_current),
459               KM_SLEEP);
460 
461           rf_paritymap_kern_read(pm->raid, pm->disk_boot);
462           memcpy(pm->disk_now, pm->disk_boot, sizeof(*pm->disk_now));
463 
464           mutex_init(&pm->lock, MUTEX_DEFAULT, IPL_NONE);
465           mutex_init(&pm->lk_flags, MUTEX_DEFAULT, IPL_SOFTCLOCK);
466 
467           return 0;
468 }
469 
470 /*
471  * Destroys a parity map; unless "force" is set, also cleans parity for any
472  * regions which were still in cooldown (but are not dirty on disk).
473  */
474 void
rf_paritymap_destroy(struct rf_paritymap * pm,int force)475 rf_paritymap_destroy(struct rf_paritymap *pm, int force)
476 {
477           int i;
478 
479           callout_halt(&pm->ticker, NULL); /* XXX stop? halt? */
480           callout_destroy(&pm->ticker);
481 
482           if (!force) {
483                     for (i = 0; i < RF_PARITYMAP_NREG; i++) {
484                               /* XXX check for > 0 ? */
485                               if (pm->current->state[i] < 0)
486                                         pm->current->state[i] = 0;
487                     }
488 
489                     rf_paritymap_write_locked(pm);
490           }
491 
492           mutex_destroy(&pm->lock);
493           mutex_destroy(&pm->lk_flags);
494 
495           kmem_free(pm->disk_boot, sizeof(struct rf_paritymap_ondisk));
496           kmem_free(pm->disk_now, sizeof(struct rf_paritymap_ondisk));
497           kmem_free(pm->current, sizeof(struct rf_paritymap_current));
498 }
499 
500 /*
501  * Rewrite parity, taking parity map into account; this is the equivalent of
502  * the old rf_RewriteParity, and is likewise to be called from a suitable
503  * thread and shouldn't have multiple copies running in parallel and so on.
504  *
505  * Note that the fictional regions are "cleaned" in one shot, so that very
506  * small RAIDs (useful for testing) will not experience potentially severe
507  * regressions in rewrite time.
508  */
509 int
rf_paritymap_rewrite(struct rf_paritymap * pm)510 rf_paritymap_rewrite(struct rf_paritymap *pm)
511 {
512           int i, ret_val = 0;
513           daddr_t reg_b, reg_e;
514 
515           /* Process only the actual regions. */
516           for (i = 0; i < pm->params.regions; i++) {
517                     mutex_enter(&pm->lock);
518                     if (isset(pm->disk_boot->bits, i)) {
519                               mutex_exit(&pm->lock);
520 
521                               reg_b = i * pm->region_size;
522                               reg_e = reg_b + pm->region_size;
523                               if (reg_e > pm->raid->totalSectors)
524                                         reg_e = pm->raid->totalSectors;
525 
526                               if (rf_RewriteParityRange(pm->raid, reg_b,
527                                   reg_e - reg_b)) {
528                                         ret_val = 1;
529                                         if (pm->raid->waitShutdown)
530                                                   return ret_val;
531                               } else {
532                                         mutex_enter(&pm->lock);
533                                         clrbit(pm->disk_boot->bits, i);
534                                         rf_paritymap_write_locked(pm);
535                                         mutex_exit(&pm->lock);
536                               }
537                     } else {
538                               mutex_exit(&pm->lock);
539                     }
540           }
541 
542           /* Now, clear the fictional regions, if any. */
543           rf_paritymap_forceclean(pm);
544           rf_paritymap_write(pm);
545 
546           return ret_val;
547 }
548 
549 /*
550  * How to merge the on-disk parity maps when reading them in from the
551  * various components; returns whether they differ.  In the case that
552  * they do differ, sets *dst to the union of *dst and *src.
553  *
554  * In theory, it should be safe to take the intersection (or just pick
555  * a single component arbitrarily), but the paranoid approach costs
556  * little.
557  *
558  * Appropriate locking, if any, is the responsibility of the caller.
559  */
560 int
rf_paritymap_merge(struct rf_paritymap_ondisk * dst,struct rf_paritymap_ondisk * src)561 rf_paritymap_merge(struct rf_paritymap_ondisk *dst,
562     struct rf_paritymap_ondisk *src)
563 {
564           int i, discrep = 0;
565 
566           for (i = 0; i < RF_PARITYMAP_NBYTE; i++) {
567                     if (dst->bits[i] != src->bits[i])
568                               discrep = 1;
569                     dst->bits[i] |= src->bits[i];
570           }
571 
572           return discrep;
573 }
574 
575 /*
576  * Detach a parity map from its RAID.  This is not meant to be applied except
577  * when unconfiguring the RAID after all I/O has been resolved, as otherwise
578  * an out-of-date parity map could be treated as current.
579  */
580 void
rf_paritymap_detach(RF_Raid_t * raidPtr)581 rf_paritymap_detach(RF_Raid_t *raidPtr)
582 {
583           if (raidPtr->parity_map == NULL)
584                     return;
585 
586           rf_lock_mutex2(raidPtr->iodone_lock);
587           struct rf_paritymap *pm = raidPtr->parity_map;
588           raidPtr->parity_map = NULL;
589           rf_unlock_mutex2(raidPtr->iodone_lock);
590           /* XXXjld is that enough locking?  Or too much? */
591           rf_paritymap_destroy(pm, 0);
592           kmem_free(pm, sizeof(*pm));
593 }
594 
595 /*
596  * Is this RAID set ineligible for parity-map use due to not actually
597  * having any parity?  (If so, rf_paritymap_attach is a no-op, but
598  * rf_paritymap_{get,set}_disable will still pointlessly act on the
599  * component labels.)
600  */
601 int
rf_paritymap_ineligible(RF_Raid_t * raidPtr)602 rf_paritymap_ineligible(RF_Raid_t *raidPtr)
603 {
604           return raidPtr->Layout.map->faultsTolerated == 0;
605 }
606 
607 /*
608  * Attach a parity map to a RAID set if appropriate.  Includes
609  * configure-time processing of parity-map fields of component label.
610  */
611 void
rf_paritymap_attach(RF_Raid_t * raidPtr,int force)612 rf_paritymap_attach(RF_Raid_t *raidPtr, int force)
613 {
614           RF_RowCol_t col;
615           int pm_use, pm_zap;
616           int g_tickms, g_ntick, g_regions;
617           int good;
618           RF_ComponentLabel_t *clabel;
619           u_int flags, regions;
620           struct rf_pmparams params;
621 
622           if (rf_paritymap_ineligible(raidPtr)) {
623                     /* There isn't any parity. */
624                     return;
625           }
626 
627           pm_use = 1;
628           pm_zap = 0;
629           g_tickms = DFL_TICKMS;
630           g_ntick = DFL_COOLDOWN;
631           g_regions = 0;
632 
633           /*
634            * Collect opinions on the set config.  If this is the initial
635            * config (raidctl -C), treat all labels as invalid, since
636            * there may be random data present.
637            */
638           if (!force) {
639                     for (col = 0; col < raidPtr->numCol; col++) {
640                               if (RF_DEAD_DISK(raidPtr->Disks[col].status))
641                                         continue;
642                               clabel = raidget_component_label(raidPtr, col);
643                               flags = clabel->parity_map_flags;
644                               /* Check for use by non-parity-map kernel. */
645                               if (clabel->parity_map_modcount
646                                   != clabel->mod_counter) {
647                                         flags &= ~RF_PMLABEL_WASUSED;
648                               }
649 
650                               if (flags & RF_PMLABEL_VALID) {
651                                         g_tickms = clabel->parity_map_tickms;
652                                         g_ntick = clabel->parity_map_ntick;
653                                         regions = clabel->parity_map_regions;
654                                         if (g_regions == 0)
655                                                   g_regions = regions;
656                                         else if (g_regions != regions) {
657                                                   pm_zap = 1; /* important! */
658                                         }
659 
660                                         if (flags & RF_PMLABEL_DISABLE) {
661                                                   pm_use = 0;
662                                         }
663                                         if (!(flags & RF_PMLABEL_WASUSED)) {
664                                                   pm_zap = 1;
665                                         }
666                               } else {
667                                         pm_zap = 1;
668                               }
669                     }
670           } else {
671                     pm_zap = 1;
672           }
673 
674           /* Finally, create and attach the parity map. */
675           if (pm_use) {
676                     params.cooldown = g_ntick;
677                     params.tickms = g_tickms;
678                     params.regions = g_regions;
679 
680                     raidPtr->parity_map = kmem_alloc(sizeof(struct rf_paritymap),
681                         KM_SLEEP);
682                     if (0 != rf_paritymap_init(raidPtr->parity_map, raidPtr,
683                               &params)) {
684                               /* It failed; do without. */
685                               kmem_free(raidPtr->parity_map,
686                                   sizeof(struct rf_paritymap));
687                               raidPtr->parity_map = NULL;
688                               return;
689                     }
690 
691                     if (g_regions == 0)
692                               /* Pick up the autoconfigured region count. */
693                               g_regions = raidPtr->parity_map->params.regions;
694 
695                     if (pm_zap) {
696                               good = raidPtr->parity_good && !force;
697 
698                               if (good)
699                                         rf_paritymap_forceclean(raidPtr->parity_map);
700                               else
701                                         rf_paritymap_invalidate(raidPtr->parity_map);
702                               /* This needs to be on disk before WASUSED is set. */
703                               rf_paritymap_write(raidPtr->parity_map);
704                     }
705           }
706 
707           /* Alter labels in-core to reflect the current view of things. */
708           for (col = 0; col < raidPtr->numCol; col++) {
709                     if (RF_DEAD_DISK(raidPtr->Disks[col].status))
710                               continue;
711                     clabel = raidget_component_label(raidPtr, col);
712 
713                     if (pm_use)
714                               flags = RF_PMLABEL_VALID | RF_PMLABEL_WASUSED;
715                     else
716                               flags = RF_PMLABEL_VALID | RF_PMLABEL_DISABLE;
717 
718                     clabel->parity_map_flags = flags;
719                     clabel->parity_map_tickms = g_tickms;
720                     clabel->parity_map_ntick = g_ntick;
721                     clabel->parity_map_regions = g_regions;
722                     raidflush_component_label(raidPtr, col);
723           }
724           /* Note that we're just in 'attach' here, and there won't
725              be any spare disks at this point. */
726 }
727 
728 /*
729  * For initializing the parity-map fields of a component label, both on
730  * initial creation and on reconstruct.  */
731 void
rf_paritymap_init_label(struct rf_paritymap * pm,RF_ComponentLabel_t * clabel)732 rf_paritymap_init_label(struct rf_paritymap *pm, RF_ComponentLabel_t *clabel)
733 {
734           if (pm != NULL) {
735                     clabel->parity_map_flags =
736                         RF_PMLABEL_VALID | RF_PMLABEL_WASUSED;
737                     clabel->parity_map_tickms = pm->params.tickms;
738                     clabel->parity_map_ntick = pm->params.cooldown;
739                     /*
740                      * XXXjld: If the number of regions is changed on disk, and
741                      * then a new component is labeled before the next configure,
742                      * then it will get the old value and they will conflict on
743                      * the next boot (and the default will be used instead).
744                      */
745                     clabel->parity_map_regions = pm->params.regions;
746           } else {
747                     /*
748                      * XXXjld: if the map is disabled, and all the components are
749                      * replaced without an intervening unconfigure/reconfigure,
750                      * then it will become enabled on the next unconfig/reconfig.
751                      */
752           }
753 }
754 
755 
756 /* Will the parity map be disabled next time? */
757 int
rf_paritymap_get_disable(RF_Raid_t * raidPtr)758 rf_paritymap_get_disable(RF_Raid_t *raidPtr)
759 {
760           RF_ComponentLabel_t *clabel;
761           RF_RowCol_t col;
762           int dis;
763 
764           dis = 0;
765           for (col = 0; col < raidPtr->numCol; col++) {
766                     if (RF_DEAD_DISK(raidPtr->Disks[col].status))
767                               continue;
768                     clabel = raidget_component_label(raidPtr, col);
769                     if (clabel->parity_map_flags & RF_PMLABEL_DISABLE)
770                               dis = 1;
771           }
772         for (col = 0; col < raidPtr->numSpare; col++) {
773                     if (raidPtr->Disks[raidPtr->numCol+col].status != rf_ds_used_spare)
774                         continue;
775                 clabel = raidget_component_label(raidPtr, raidPtr->numCol+col);
776                 if (clabel->parity_map_flags & RF_PMLABEL_DISABLE)
777                         dis = 1;
778         }
779 
780           return dis;
781 }
782 
783 /* Set whether the parity map will be disabled next time. */
784 void
rf_paritymap_set_disable(RF_Raid_t * raidPtr,int dis)785 rf_paritymap_set_disable(RF_Raid_t *raidPtr, int dis)
786 {
787           RF_ComponentLabel_t *clabel;
788           RF_RowCol_t col;
789 
790           for (col = 0; col < raidPtr->numCol; col++) {
791                     if (RF_DEAD_DISK(raidPtr->Disks[col].status))
792                               continue;
793                     clabel = raidget_component_label(raidPtr, col);
794                     if (dis)
795                               clabel->parity_map_flags |= RF_PMLABEL_DISABLE;
796                     else
797                               clabel->parity_map_flags &= ~RF_PMLABEL_DISABLE;
798                     raidflush_component_label(raidPtr, col);
799           }
800 
801           /* update any used spares as well */
802           for (col = 0; col < raidPtr->numSpare; col++) {
803                     if (raidPtr->Disks[raidPtr->numCol+col].status != rf_ds_used_spare)
804                               continue;
805 
806                     clabel = raidget_component_label(raidPtr, raidPtr->numCol+col);
807                     if (dis)
808                               clabel->parity_map_flags |= RF_PMLABEL_DISABLE;
809                     else
810                               clabel->parity_map_flags &= ~RF_PMLABEL_DISABLE;
811                     raidflush_component_label(raidPtr, raidPtr->numCol+col);
812           }
813 }
814