1 /*        $NetBSD: rf_reconbuffer.c,v 1.27 2021/07/23 00:54:45 oster Exp $      */
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: Mark Holland
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /***************************************************
30  *
31  * rf_reconbuffer.c -- reconstruction buffer manager
32  *
33  ***************************************************/
34 
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: rf_reconbuffer.c,v 1.27 2021/07/23 00:54:45 oster Exp $");
37 
38 #include "rf_raid.h"
39 #include "rf_reconbuffer.h"
40 #include "rf_acctrace.h"
41 #include "rf_etimer.h"
42 #include "rf_general.h"
43 #include "rf_revent.h"
44 #include "rf_reconutil.h"
45 #include "rf_nwayxor.h"
46 
47 #ifdef DEBUG
48 
49 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a)
50 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b)
51 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c)
52 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d)
53 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e)
54 
55 #else /* DEBUG */
56 
57 #define Dprintf1(s,a) {}
58 #define Dprintf2(s,a,b) {}
59 #define Dprintf3(s,a,b,c) {}
60 #define Dprintf4(s,a,b,c,d) {}
61 #define Dprintf5(s,a,b,c,d,e) {}
62 
63 #endif
64 
65 /*****************************************************************************
66  *
67  * Submit a reconstruction buffer to the manager for XOR.  We can only
68  * submit a buffer if (1) we can xor into an existing buffer, which
69  * means we don't have to acquire a new one, (2) we can acquire a
70  * floating recon buffer, or (3) the caller has indicated that we are
71  * allowed to keep the submitted buffer.
72  *
73  * Returns non-zero if and only if we were not able to submit.
74  * In this case, we append the current disk ID to the wait list on the
75  * indicated RU, so that it will be re-enabled when we acquire a buffer
76  * for this RU.
77  *
78  ****************************************************************************/
79 
80 /*
81  * nWayXorFuncs[i] is a pointer to a function that will xor "i"
82  * bufs into the accumulating sum.
83  */
84 static const RF_VoidFuncPtr nWayXorFuncs[] = {
85           NULL,
86           (RF_VoidFuncPtr) rf_nWayXor1,
87           (RF_VoidFuncPtr) rf_nWayXor2,
88           (RF_VoidFuncPtr) rf_nWayXor3,
89           (RF_VoidFuncPtr) rf_nWayXor4,
90           (RF_VoidFuncPtr) rf_nWayXor5,
91           (RF_VoidFuncPtr) rf_nWayXor6,
92           (RF_VoidFuncPtr) rf_nWayXor7,
93           (RF_VoidFuncPtr) rf_nWayXor8,
94           (RF_VoidFuncPtr) rf_nWayXor9
95 };
96 
97 /*
98  * rbuf          - the recon buffer to submit
99  * keep_it       - whether we can keep this buffer or we have to return it
100  * use_committed - whether to use a committed or an available recon buffer
101  */
102 int
rf_SubmitReconBuffer(RF_ReconBuffer_t * rbuf,int keep_it,int use_committed)103 rf_SubmitReconBuffer(RF_ReconBuffer_t *rbuf, int keep_it, int use_committed)
104 {
105           const RF_LayoutSW_t *lp;
106           int     rc;
107 
108           lp = rbuf->raidPtr->Layout.map;
109           rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
110           return (rc);
111 }
112 
113 /*
114  * rbuf          - the recon buffer to submit
115  * keep_it       - whether we can keep this buffer or we have to return it
116  * use_committed - whether to use a committed or an available recon buffer
117  */
118 int
rf_SubmitReconBufferBasic(RF_ReconBuffer_t * rbuf,int keep_it,int use_committed)119 rf_SubmitReconBufferBasic(RF_ReconBuffer_t *rbuf, int keep_it,
120                                 int use_committed)
121 {
122           RF_Raid_t *raidPtr = rbuf->raidPtr;
123           RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
124           RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl;
125           RF_ReconParityStripeStatus_t *pssPtr;
126           RF_ReconBuffer_t *targetRbuf, *t = NULL;          /* temporary rbuf
127                                                                        * pointers */
128           void *ta;           /* temporary data buffer pointer */
129           RF_CallbackValueDesc_t *cb, *p;
130           int     retcode = 0;
131 
132           RF_Etimer_t timer;
133 
134           /* makes no sense to have a submission from the failed disk */
135           RF_ASSERT(rbuf);
136           RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
137 
138           Dprintf4("RECON: submission by col %d for psid %ld ru %d (failed offset %ld)\n",
139               rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset);
140 
141           RF_LOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID);
142 
143           rf_lock_mutex2(reconCtrlPtr->rb_mutex);
144           while(reconCtrlPtr->rb_lock) {
145                     rf_wait_cond2(reconCtrlPtr->rb_cv, reconCtrlPtr->rb_mutex);
146           }
147           reconCtrlPtr->rb_lock = 1;
148           rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
149 
150           pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, NULL);
151           RF_ASSERT(pssPtr);  /* if it didn't exist, we wouldn't have gotten
152                                          * an rbuf for it */
153 
154           /* check to see if enough buffers have accumulated to do an XOR.  If
155            * so, there's no need to acquire a floating rbuf.  Before we can do
156            * any XORing, we must have acquired a destination buffer.  If we
157            * have, then we can go ahead and do the XOR if (1) including this
158            * buffer, enough bufs have accumulated, or (2) this is the last
159            * submission for this stripe. Otherwise, we have to go acquire a
160            * floating rbuf. */
161 
162           targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
163           if ((targetRbuf != NULL) &&
164               ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) {
165                     pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf;          /* install this buffer */
166                     Dprintf2("RECON: col %d invoking a %d-way XOR\n", rbuf->col, pssPtr->xorBufCount);
167                     RF_ETIMER_START(timer);
168                     rf_MultiWayReconXor(raidPtr, pssPtr);
169                     RF_ETIMER_STOP(timer);
170                     RF_ETIMER_EVAL(timer);
171                     raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
172                     if (!keep_it) {
173 #if RF_ACC_TRACE > 0
174                               raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer);
175                               RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
176                               RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
177                               raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
178                                   RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
179                               RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
180 
181                               rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
182 #endif
183                     }
184                     rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);
185 
186                     /* if use_committed is on, we _must_ consume a buffer off the
187                      * committed list. */
188                     if (use_committed) {
189                               t = reconCtrlPtr->committedRbufs;
190                               RF_ASSERT(t);
191                               reconCtrlPtr->committedRbufs = t->next;
192                               rf_ReleaseFloatingReconBuffer(raidPtr, t);
193                     }
194                     if (keep_it) {
195                               RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID);
196                               rf_lock_mutex2(reconCtrlPtr->rb_mutex);
197                               reconCtrlPtr->rb_lock = 0;
198                               rf_broadcast_cond2(reconCtrlPtr->rb_cv);
199                               rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
200                               rf_FreeReconBuffer(rbuf);
201                               return (retcode);
202                     }
203                     goto out;
204           }
205           /* set the value of "t", which we'll use as the rbuf from here on */
206           if (keep_it) {
207                     t = rbuf;
208           } else {
209                     if (use_committed) {          /* if a buffer has been committed to
210                                                    * us, use it */
211                               t = reconCtrlPtr->committedRbufs;
212                               RF_ASSERT(t);
213                               reconCtrlPtr->committedRbufs = t->next;
214                               t->next = NULL;
215                     } else
216                               if (reconCtrlPtr->floatingRbufs) {
217                                         t = reconCtrlPtr->floatingRbufs;
218                                         reconCtrlPtr->floatingRbufs = t->next;
219                                         t->next = NULL;
220                               }
221           }
222 
223           /* If we weren't able to acquire a buffer, append to the end of the
224            * buf list in the recon ctrl struct. */
225           if (!t) {
226                     RF_ASSERT(!keep_it && !use_committed);
227                     Dprintf1("RECON: col %d failed to acquire floating rbuf\n", rbuf->col);
228 
229                     raidPtr->procsInBufWait++;
230                     if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) {
231                               printf("Buffer wait deadlock detected.  Exiting.\n");
232                               rf_PrintPSStatusTable(raidPtr);
233                               RF_PANIC();
234                     }
235                     pssPtr->flags |= RF_PSS_BUFFERWAIT;
236                     cb = rf_AllocCallbackValueDesc(raidPtr); /* append to buf wait list in
237                                                                         * recon ctrl structure */
238                     cb->col = rbuf->col;
239                     cb->v = rbuf->parityStripeID;
240                     cb->next = NULL;
241                     if (!reconCtrlPtr->bufferWaitList)
242                               reconCtrlPtr->bufferWaitList = cb;
243                     else {              /* might want to maintain head/tail pointers
244                                          * here rather than search for end of list */
245                               for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next);
246                               p->next = cb;
247                     }
248                     retcode = 1;
249                     goto out;
250           }
251           Dprintf1("RECON: col %d acquired rbuf\n", rbuf->col);
252 #if RF_ACC_TRACE > 0
253           RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
254           RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
255           raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
256               RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
257           RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
258 
259           rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
260 #endif
261 
262           /* initialize the buffer */
263           if (t != rbuf) {
264                     t->col = reconCtrlPtr->fcol;
265                     t->parityStripeID = rbuf->parityStripeID;
266                     t->which_ru = rbuf->which_ru;
267                     t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
268                     t->spCol = rbuf->spCol;
269                     t->spOffset = rbuf->spOffset;
270 
271                     ta = t->buffer;
272                     t->buffer = rbuf->buffer;
273                     rbuf->buffer = ta;  /* swap buffers */
274           }
275           /* the first installation always gets installed as the destination
276            * buffer. subsequent installations get stacked up to allow for
277            * multi-way XOR */
278           if (!pssPtr->rbuf) {
279                     pssPtr->rbuf = t;
280                     t->count = 1;
281           } else
282                     pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t;   /* install this buffer */
283 
284           rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);      /* the buffer is full if
285                                                                                                                * G=2 */
286 
287 out:
288           RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID);
289           rf_lock_mutex2(reconCtrlPtr->rb_mutex);
290           reconCtrlPtr->rb_lock = 0;
291           rf_broadcast_cond2(reconCtrlPtr->rb_cv);
292           rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
293           return (retcode);
294 }
295 /* pssPtr - the pss descriptor for this parity stripe */
296 int
rf_MultiWayReconXor(RF_Raid_t * raidPtr,RF_ReconParityStripeStatus_t * pssPtr)297 rf_MultiWayReconXor(RF_Raid_t *raidPtr, RF_ReconParityStripeStatus_t *pssPtr)
298 {
299           int     i, numBufs = pssPtr->xorBufCount;
300           int     numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
301           RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
302           RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
303 
304           RF_ASSERT(pssPtr->rbuf != NULL);
305           RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
306 #ifdef _KERNEL
307 #ifndef __NetBSD__
308           thread_block();               /* yield the processor before doing a big XOR */
309 #endif
310 #endif                                  /* _KERNEL */
311           /*
312          * XXX
313          *
314          * What if more than 9 bufs?
315          */
316           nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long));
317 
318           /* release all the reconstruction buffers except the last one, which
319            * belongs to the disk whose submission caused this XOR to take place */
320           for (i = 0; i < numBufs - 1; i++) {
321                     if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING)
322                               rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]);
323                     else
324                               if (rbufs[i]->type == RF_RBUF_TYPE_FORCED)
325                                         rf_FreeReconBuffer(rbufs[i]);
326                               else
327                                         RF_ASSERT(0);
328           }
329           targetRbuf->count += pssPtr->xorBufCount;
330           pssPtr->xorBufCount = 0;
331           return (0);
332 }
333 /* removes one full buffer from one of the full-buffer lists and returns it.
334  *
335  * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
336  */
337 RF_ReconBuffer_t *
rf_GetFullReconBuffer(RF_ReconCtrl_t * reconCtrlPtr)338 rf_GetFullReconBuffer(RF_ReconCtrl_t *reconCtrlPtr)
339 {
340           RF_ReconBuffer_t *p;
341 
342           rf_lock_mutex2(reconCtrlPtr->rb_mutex);
343           while(reconCtrlPtr->rb_lock) {
344                     rf_wait_cond2(reconCtrlPtr->rb_cv, reconCtrlPtr->rb_mutex);
345           }
346           reconCtrlPtr->rb_lock = 1;
347           rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
348 
349           if ((p = reconCtrlPtr->fullBufferList) != NULL) {
350                     reconCtrlPtr->fullBufferList = p->next;
351                     p->next = NULL;
352           }
353           rf_lock_mutex2(reconCtrlPtr->rb_mutex);
354           reconCtrlPtr->rb_lock = 0;
355           rf_broadcast_cond2(reconCtrlPtr->rb_cv);
356           rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
357           return (p);
358 }
359 
360 
361 /* if the reconstruction buffer is full, move it to the full list,
362  * which is maintained sorted by failed disk sector offset
363  *
364  * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY.  */
365 int
rf_CheckForFullRbuf(RF_Raid_t * raidPtr,RF_ReconCtrl_t * reconCtrl,RF_ReconParityStripeStatus_t * pssPtr,int numDataCol)366 rf_CheckForFullRbuf(RF_Raid_t *raidPtr, RF_ReconCtrl_t *reconCtrl,
367                         RF_ReconParityStripeStatus_t *pssPtr, int numDataCol)
368 {
369           RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
370 
371           if (rbuf->count == numDataCol) {
372                     raidPtr->numFullReconBuffers++;
373                     Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n",
374                         (long) rbuf->parityStripeID, rbuf->which_ru);
375                     if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) {
376                               Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n",
377                                   (long) rbuf->parityStripeID, rbuf->which_ru);
378                               rbuf->next = reconCtrl->fullBufferList;
379                               reconCtrl->fullBufferList = rbuf;
380                     } else {
381                               for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next);
382                               rbuf->next = p;
383                               pt->next = rbuf;
384                               Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n",
385                                   (long) rbuf->parityStripeID, rbuf->which_ru);
386                     }
387                     rbuf->pssPtr = pssPtr;
388                     pssPtr->rbuf = NULL;
389                     rf_CauseReconEvent(raidPtr, rbuf->col, NULL, RF_REVENT_BUFREADY);
390           }
391           return (0);
392 }
393 
394 
395 /* release a floating recon buffer for someone else to use.
396  * assumes the rb_mutex is LOCKED at entry
397  */
398 void
rf_ReleaseFloatingReconBuffer(RF_Raid_t * raidPtr,RF_ReconBuffer_t * rbuf)399 rf_ReleaseFloatingReconBuffer(RF_Raid_t *raidPtr, RF_ReconBuffer_t *rbuf)
400 {
401           RF_ReconCtrl_t *rcPtr = raidPtr->reconControl;
402           RF_CallbackValueDesc_t *cb;
403 
404           Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n",
405               (long) rbuf->parityStripeID, rbuf->which_ru);
406 
407           /* if anyone is waiting on buffers, wake one of them up.  They will
408            * subsequently wake up anyone else waiting on their RU */
409           if (rcPtr->bufferWaitList) {
410                     rbuf->next = rcPtr->committedRbufs;
411                     rcPtr->committedRbufs = rbuf;
412                     cb = rcPtr->bufferWaitList;
413                     rcPtr->bufferWaitList = cb->next;
414                     rf_CauseReconEvent(raidPtr, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've
415                                                                                                                          * committed a buffer */
416                     rf_FreeCallbackValueDesc(raidPtr, cb);
417                     raidPtr->procsInBufWait--;
418           } else {
419                     rbuf->next = rcPtr->floatingRbufs;
420                     rcPtr->floatingRbufs = rbuf;
421           }
422 }
423