1 /*	$OpenBSD: rf_stripelocks.c,v 1.5 2002/12/16 07:01:05 tdeval Exp $	*/
2 /*	$NetBSD: rf_stripelocks.c,v 1.5 2000/01/08 23:45:05 oster Exp $	*/
3 
4 /*
5  * Copyright (c) 1995 Carnegie-Mellon University.
6  * All rights reserved.
7  *
8  * Authors: Mark Holland, Jim Zelenka
9  *
10  * Permission to use, copy, modify and distribute this software and
11  * its documentation is hereby granted, provided that both the copyright
12  * notice and this permission notice appear in all copies of the
13  * software, derivative works or modified versions, and any portions
14  * thereof, and that both notices appear in supporting documentation.
15  *
16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19  *
20  * Carnegie Mellon requests users of this software to return to
21  *
22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
23  *  School of Computer Science
24  *  Carnegie Mellon University
25  *  Pittsburgh PA 15213-3890
26  *
27  * any improvements or extensions that they make and grant Carnegie the
28  * rights to redistribute these changes.
29  */
30 
31 /*
32  * stripelocks.c -- Code to lock stripes for read and write access.
33  *
34  * The code distinguishes between read locks and write locks. There can be
35  * as many readers to given stripe as desired. When a write request comes
36  * in, no further readers are allowed to enter, and all subsequent requests
37  * are queued in FIFO order. When the number of readers goes to zero, the
38  * writer is given the lock. When a writer releases the lock, the list of
39  * queued requests is scanned, and all readers up to the next writer are
40  * given the lock.
41  *
42  * The lock table size must be one less than a power of two, but HASH_STRIPEID
43  * is the only function that requires this.
44  *
45  * The code now supports "range locks". When you ask to lock a stripe, you
46  * specify a range of addresses in that stripe that you want to lock. When
47  * you acquire the lock, you've locked only this range of addresses, and
48  * other threads can concurrently read/write any non-overlapping portions
49  * of the stripe. The "addresses" that you lock are abstract in that you
50  * can pass in anything you like. The expectation is that you'll pass in
51  * the range of physical disk offsets of the parity bits you're planning
52  * to update. The idea behind this, of course, is to allow sub-stripe
53  * locking. The implementation is perhaps not the best imaginable; in the
54  * worst case a lock release is O(n^2) in the total number of outstanding
55  * requests to a given stripe. Note that if you're striping with a
56  * stripe unit size equal to an entire disk (i.e. not striping), there will
57  * be only one stripe and you may spend some significant number of cycles
58  * searching through stripe lock descriptors.
59  */
60 
61 #include "rf_types.h"
62 #include "rf_raid.h"
63 #include "rf_stripelocks.h"
64 #include "rf_alloclist.h"
65 #include "rf_general.h"
66 #include "rf_freelist.h"
67 #include "rf_debugprint.h"
68 #include "rf_driver.h"
69 #include "rf_shutdown.h"
70 
71 #define	Dprintf1(s,a)							\
72 	rf_debug_printf(s, (void *)((unsigned long)a),			\
73 	    NULL, NULL, NULL, NULL, NULL, NULL, NULL)
74 #define	Dprintf2(s,a,b)							\
75 	rf_debug_printf(s, (void *)((unsigned long)a),			\
76 	    (void *)((unsigned long)b), NULL, NULL, NULL, NULL, NULL, NULL)
77 #define	Dprintf3(s,a,b,c)						\
78 	rf_debug_printf(s, (void *)((unsigned long)a),			\
79 	    (void *)((unsigned long)b), (void *)((unsigned long)c),	\
80 	    NULL, NULL, NULL, NULL, NULL)
81 #define	Dprintf4(s,a,b,c,d)						\
82 	rf_debug_printf(s, (void *)((unsigned long)a),			\
83 	    (void *)((unsigned long)b), (void *)((unsigned long)c),	\
84 	    (void *)((unsigned long)d), NULL, NULL, NULL, NULL)
85 #define	Dprintf5(s,a,b,c,d,e)						\
86 	rf_debug_printf(s, (void *)((unsigned long)a),			\
87 	    (void *)((unsigned long)b), (void *)((unsigned long)c),	\
88 	    (void *)((unsigned long)d), (void *)((unsigned long)e),	\
89 	    NULL, NULL, NULL)
90 #define	Dprintf6(s,a,b,c,d,e,f)						\
91 	rf_debug_printf(s, (void *)((unsigned long)a),			\
92 	    (void *)((unsigned long)b), (void *)((unsigned long)c),	\
93 	    (void *)((unsigned long)d), (void *)((unsigned long)e),	\
94 	    (void *)((unsigned long)f), NULL, NULL)
95 #define	Dprintf7(s,a,b,c,d,e,f,g)					\
96 	rf_debug_printf(s, (void *)((unsigned long)a),			\
97 	    (void *)((unsigned long)b), (void *)((unsigned long)c),	\
98 	    (void *)((unsigned long)d), (void *)((unsigned long)e),	\
99 	    (void *)((unsigned long)f), (void *)((unsigned long)g), NULL)
100 #define	Dprintf8(s,a,b,c,d,e,f,g,h)					\
101 	rf_debug_printf(s, (void *)((unsigned long)a),			\
102 	    (void *)((unsigned long)b), (void *)((unsigned long)c),	\
103 	    (void *)((unsigned long)d), (void *)((unsigned long)e),	\
104 	    (void *)((unsigned long)f), (void *)((unsigned long)g),	\
105 	    (void *)((unsigned long)h))
106 
107 #define	FLUSH
108 
109 #define	HASH_STRIPEID(_sid_)	((_sid_) & (rf_lockTableSize-1))
110 
111 void rf_AddToWaitersQueue(RF_LockTableEntry_t *, RF_StripeLockDesc_t *,
112 	RF_LockReqDesc_t *);
113 RF_StripeLockDesc_t *rf_AllocStripeLockDesc(RF_StripeNum_t);
114 void rf_FreeStripeLockDesc(RF_StripeLockDesc_t *);
115 void rf_PrintLockedStripes(RF_LockTableEntry_t *);
116 
117 /*
118  * Determines if two ranges overlap. Always yields false if either start
119  * value is negative.
120  */
121 #define	SINGLE_RANGE_OVERLAP(_strt1,_stop1,_strt2,_stop2)		\
122 	((_strt1 >= 0) && (_strt2 >= 0) &&				\
123 	 (RF_MAX(_strt1, _strt2) <= RF_MIN(_stop1, _stop2)))
124 
125 /*
126  * Determines if any of the ranges specified in the two lock descriptors
127  * overlap each other.
128  */
129 #define	RANGE_OVERLAP(_cand,_pred)					\
130 	(SINGLE_RANGE_OVERLAP((_cand)->start,  (_cand)->stop,		\
131 			      (_pred)->start,  (_pred)->stop) ||	\
132 	 SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2,		\
133 			      (_pred)->start,  (_pred)->stop) ||	\
134 	 SINGLE_RANGE_OVERLAP((_cand)->start,  (_cand)->stop,		\
135 			      (_pred)->start2, (_pred)->stop2) ||	\
136 	 SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2,		\
137 			      (_pred)->start2, (_pred)->stop2))
138 
139 /*
140  * Determines if a candidate lock request conflicts with a predecessor
141  * lock req. Note that the arguments are not interchangeable.
142  * The rules are:
143  *  A candidate read conflicts with a predecessor write if any ranges overlap.
144  *  A candidate write conflicts with a predecessor read if any ranges overlap.
145  *  A candidate write conflicts with a predecessor write if any ranges overlap.
146  */
147 #define	STRIPELOCK_CONFLICT(_cand,_pred)				\
148 	(RANGE_OVERLAP((_cand), (_pred)) &&				\
149 	 (((((_cand)->type == RF_IO_TYPE_READ) &&			\
150 	    ((_pred)->type == RF_IO_TYPE_WRITE)) ||			\
151 	   (((_cand)->type == RF_IO_TYPE_WRITE) &&			\
152 	    ((_pred)->type == RF_IO_TYPE_READ)) ||			\
153 	   (((_cand)->type == RF_IO_TYPE_WRITE) &&			\
154 	    ((_pred)->type == RF_IO_TYPE_WRITE)))))
155 
156 static RF_FreeList_t *rf_stripelock_freelist;
157 #define	RF_MAX_FREE_STRIPELOCK		128
158 #define	RF_STRIPELOCK_INC		  8
159 #define	RF_STRIPELOCK_INITIAL		 32
160 
161 void rf_ShutdownStripeLockFreeList(void *);
162 void rf_RaidShutdownStripeLocks(void *);
163 
164 void
rf_ShutdownStripeLockFreeList(void * ignored)165 rf_ShutdownStripeLockFreeList(void *ignored)
166 {
167 	RF_FREELIST_DESTROY(rf_stripelock_freelist, next,
168 	    (RF_StripeLockDesc_t *));
169 }
170 
171 int
rf_ConfigureStripeLockFreeList(RF_ShutdownList_t ** listp)172 rf_ConfigureStripeLockFreeList(RF_ShutdownList_t **listp)
173 {
174 	unsigned mask;
175 	int rc;
176 
177 	RF_FREELIST_CREATE(rf_stripelock_freelist, RF_MAX_FREE_STRIPELOCK,
178 	    RF_STRIPELOCK_INITIAL, sizeof(RF_StripeLockDesc_t));
179 	rc = rf_ShutdownCreate(listp, rf_ShutdownStripeLockFreeList, NULL);
180 	if (rc) {
181 		RF_ERRORMSG3("Unable to add to shutdown list file %s"
182 		    " line %d rc=%d.\n", __FILE__, __LINE__, rc);
183 		rf_ShutdownStripeLockFreeList(NULL);
184 		return (rc);
185 	}
186 	RF_FREELIST_PRIME(rf_stripelock_freelist, RF_STRIPELOCK_INITIAL, next,
187 	    (RF_StripeLockDesc_t *));
188 	for (mask = 0x1; mask; mask <<= 1)
189 		if (rf_lockTableSize == mask)
190 			break;
191 	if (!mask) {
192 		printf("[WARNING:  lock table size must be a power of two."
193 		    " Setting to %d.]\n", RF_DEFAULT_LOCK_TABLE_SIZE);
194 		rf_lockTableSize = RF_DEFAULT_LOCK_TABLE_SIZE;
195 	}
196 	return (0);
197 }
198 
199 RF_LockTableEntry_t *
rf_MakeLockTable(void)200 rf_MakeLockTable(void)
201 {
202 	RF_LockTableEntry_t *lockTable;
203 	int i, rc;
204 
205 	RF_Calloc(lockTable, ((int) rf_lockTableSize),
206 	    sizeof(RF_LockTableEntry_t), (RF_LockTableEntry_t *));
207 	if (lockTable == NULL)
208 		return (NULL);
209 	for (i = 0; i < rf_lockTableSize; i++) {
210 		rc = rf_mutex_init(&lockTable[i].mutex);
211 		if (rc) {
212 			RF_ERRORMSG3("Unable to init mutex file %s line %d"
213 			    " rc=%d.\n", __FILE__, __LINE__, rc);
214 			/* XXX Clean up other mutexes. */
215 			return (NULL);
216 		}
217 	}
218 	return (lockTable);
219 }
220 
221 void
rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable)222 rf_ShutdownStripeLocks(RF_LockTableEntry_t *lockTable)
223 {
224 	int i;
225 
226 	if (rf_stripeLockDebug) {
227 		rf_PrintLockedStripes(lockTable);
228 	}
229 	for (i = 0; i < rf_lockTableSize; i++) {
230 		rf_mutex_destroy(&lockTable[i].mutex);
231 	}
232 	RF_Free(lockTable, rf_lockTableSize * sizeof(RF_LockTableEntry_t));
233 }
234 
235 void
rf_RaidShutdownStripeLocks(void * arg)236 rf_RaidShutdownStripeLocks(void *arg)
237 {
238 	RF_Raid_t *raidPtr = (RF_Raid_t *) arg;
239 	rf_ShutdownStripeLocks(raidPtr->lockTable);
240 }
241 
242 int
rf_ConfigureStripeLocks(RF_ShutdownList_t ** listp,RF_Raid_t * raidPtr,RF_Config_t * cfgPtr)243 rf_ConfigureStripeLocks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
244     RF_Config_t *cfgPtr)
245 {
246 	int rc;
247 
248 	raidPtr->lockTable = rf_MakeLockTable();
249 	if (raidPtr->lockTable == NULL)
250 		return (ENOMEM);
251 	rc = rf_ShutdownCreate(listp, rf_RaidShutdownStripeLocks, raidPtr);
252 	if (rc) {
253 		RF_ERRORMSG3("Unable to add to shutdown list file %s line %d"
254 		    " rc=%d.\n", __FILE__, __LINE__, rc);
255 		rf_ShutdownStripeLocks(raidPtr->lockTable);
256 		return (rc);
257 	}
258 	return (0);
259 }
260 
261 /*
262  * Returns 0 if you've got the lock, and non-zero if you have to wait.
263  * If and only if you have to wait, we'll cause cbFunc to get invoked
264  * with cbArg when you are granted the lock. We store a tag in *releaseTag
265  * that you need to give back to us when you release the lock.
266  */
267 int
rf_AcquireStripeLock(RF_LockTableEntry_t * lockTable,RF_StripeNum_t stripeID,RF_LockReqDesc_t * lockReqDesc)268 rf_AcquireStripeLock(RF_LockTableEntry_t *lockTable, RF_StripeNum_t stripeID,
269     RF_LockReqDesc_t *lockReqDesc)
270 {
271 	RF_StripeLockDesc_t *lockDesc;
272 	RF_LockReqDesc_t *p;
273 	int tid = 0, hashval = HASH_STRIPEID(stripeID);
274 	int retcode = 0;
275 
276 	RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type));
277 
278 	if (rf_stripeLockDebug) {
279 		if (stripeID == -1)
280 			Dprintf1("[%d] Lock acquisition supressed"
281 			    " (stripeID == -1).\n", tid);
282 		else {
283 			Dprintf8("[%d] Trying to acquire stripe lock table"
284 			    " 0x%lx SID %ld type %c range %ld-%ld, range2"
285 			    " %ld-%ld hashval %d.\n", tid,
286 			    (unsigned long) lockTable, stripeID,
287 			    lockReqDesc->type, lockReqDesc->start,
288 			    lockReqDesc->stop, lockReqDesc->start2,
289 			    lockReqDesc->stop2);
290 			Dprintf3("[%d] lock %ld hashval %d.\n", tid, stripeID,
291 			    hashval);
292 			FLUSH;
293 		}
294 	}
295 	if (stripeID == -1)
296 		return (0);
297 	lockReqDesc->next = NULL;	/* Just to be sure. */
298 
299 	RF_LOCK_MUTEX(lockTable[hashval].mutex);
300 	for (lockDesc = lockTable[hashval].descList; lockDesc;
301 	     lockDesc = lockDesc->next) {
302 		if (lockDesc->stripeID == stripeID)
303 			break;
304 	}
305 
306 	if (!lockDesc) {
307 		/* No entry in table => no one reading or writing. */
308 		lockDesc = rf_AllocStripeLockDesc(stripeID);
309 		lockDesc->next = lockTable[hashval].descList;
310 		lockTable[hashval].descList = lockDesc;
311 		if (lockReqDesc->type == RF_IO_TYPE_WRITE)
312 			lockDesc->nWriters++;
313 		lockDesc->granted = lockReqDesc;
314 		if (rf_stripeLockDebug) {
315 			Dprintf7("[%d] no one waiting: lock %ld %c %ld-%ld"
316 			    " %ld-%ld granted.\n", tid, stripeID,
317 			    lockReqDesc->type,
318 			    lockReqDesc->start, lockReqDesc->stop,
319 			    lockReqDesc->start2, lockReqDesc->stop2);
320 			FLUSH;
321 		}
322 	} else {
323 
324 		if (lockReqDesc->type == RF_IO_TYPE_WRITE)
325 			lockDesc->nWriters++;
326 
327 		if (lockDesc->nWriters == 0) {
328 			/*
329 			 * No need to search any lists if there are no writers
330 			 * anywhere.
331 			 */
332 			lockReqDesc->next = lockDesc->granted;
333 			lockDesc->granted = lockReqDesc;
334 			if (rf_stripeLockDebug) {
335 				Dprintf7("[%d] no writers: lock %ld %c %ld-%ld"
336 				    " %ld-%ld granted.\n", tid,
337 				    stripeID, lockReqDesc->type,
338 				    lockReqDesc->start, lockReqDesc->stop,
339 				    lockReqDesc->start2, lockReqDesc->stop2);
340 				FLUSH;
341 			}
342 		} else {
343 			/*
344 			 * Search the granted & waiting lists for a conflict.
345 			 * Stop searching as soon as we find one.
346 			 */
347 			retcode = 0;
348 			for (p = lockDesc->granted; p; p = p->next)
349 				if (STRIPELOCK_CONFLICT(lockReqDesc, p)) {
350 					retcode = 1;
351 					break;
352 				}
353 			if (!retcode)
354 				for (p = lockDesc->waitersH; p; p = p->next)
355 					if (STRIPELOCK_CONFLICT(lockReqDesc, p))
356 					{
357 						retcode = 2;
358 						break;
359 					}
360 			if (!retcode) {
361 				/* No conflicts found => grant lock */
362 				lockReqDesc->next = lockDesc->granted;
363 				lockDesc->granted = lockReqDesc;
364 				if (rf_stripeLockDebug) {
365 					Dprintf7("[%d] no conflicts: lock %ld"
366 					    " %c %ld-%ld %ld-%ld granted.\n",
367 					    tid, stripeID, lockReqDesc->type,
368 					    lockReqDesc->start,
369 					    lockReqDesc->stop,
370 					    lockReqDesc->start2,
371 					    lockReqDesc->stop2);
372 					FLUSH;
373 				}
374 			} else {
375 				if (rf_stripeLockDebug) {
376 					Dprintf6("[%d] conflict: lock %ld %c"
377 					    " %ld-%ld hashval=%d not"
378 					    " granted.\n", tid, stripeID,
379 					    lockReqDesc->type,
380 					    lockReqDesc->start,
381 					    lockReqDesc->stop,
382 					    hashval);
383 					Dprintf3("[%d] lock %ld retcode=%d.\n",
384 					    tid, stripeID, retcode);
385 					FLUSH;
386 				}
387 				/* Conflict => the current access must wait. */
388 				rf_AddToWaitersQueue(lockTable, lockDesc,
389 				    lockReqDesc);
390 			}
391 		}
392 	}
393 
394 	RF_UNLOCK_MUTEX(lockTable[hashval].mutex);
395 	return (retcode);
396 }
397 
398 void
rf_ReleaseStripeLock(RF_LockTableEntry_t * lockTable,RF_StripeNum_t stripeID,RF_LockReqDesc_t * lockReqDesc)399 rf_ReleaseStripeLock(RF_LockTableEntry_t *lockTable, RF_StripeNum_t stripeID,
400     RF_LockReqDesc_t *lockReqDesc)
401 {
402 	RF_StripeLockDesc_t *lockDesc, *ld_t;
403 	RF_LockReqDesc_t *lr, *lr_t, *callbacklist, *t;
404 	RF_IoType_t type = lockReqDesc->type;
405 	int tid = 0, hashval = HASH_STRIPEID(stripeID);
406 	int release_it, consider_it;
407 	RF_LockReqDesc_t *candidate, *candidate_t, *predecessor;
408 
409 	RF_ASSERT(RF_IO_IS_R_OR_W(type));
410 
411 	if (rf_stripeLockDebug) {
412 		if (stripeID == -1)
413 			Dprintf1("[%d] Lock release supressed"
414 			    " (stripeID == -1).\n", tid);
415 		else {
416 			Dprintf8("[%d] Releasing stripe lock on stripe ID %ld,"
417 			    " type %c range %ld-%ld %ld-%ld table 0x%lx.\n",
418 			    tid, stripeID, lockReqDesc->type,
419 			    lockReqDesc->start, lockReqDesc->stop,
420 			    lockReqDesc->start2, lockReqDesc->stop2,
421 			    lockTable);
422 			FLUSH;
423 		}
424 	}
425 	if (stripeID == -1)
426 		return;
427 
428 	RF_LOCK_MUTEX(lockTable[hashval].mutex);
429 
430 	/* Find the stripe lock descriptor. */
431 	for (ld_t = NULL, lockDesc = lockTable[hashval].descList;
432 	     lockDesc; ld_t = lockDesc, lockDesc = lockDesc->next) {
433 		if (lockDesc->stripeID == stripeID)
434 			break;
435 	}
436 	RF_ASSERT(lockDesc);	/*
437 				 * Major error to release a lock that doesn't
438 				 * exist.
439 				 */
440 
441 	/* Find the stripe lock request descriptor & delete it from the list. */
442 	for (lr_t = NULL, lr = lockDesc->granted; lr; lr_t = lr, lr = lr->next)
443 		if (lr == lockReqDesc)
444 			break;
445 
446 	RF_ASSERT(lr && (lr == lockReqDesc));	/*
447 						 * Major error to release a
448 						 * lock that hasn't been
449 						 * granted.
450 						 */
451 	if (lr_t)
452 		lr_t->next = lr->next;
453 	else {
454 		RF_ASSERT(lr == lockDesc->granted);
455 		lockDesc->granted = lr->next;
456 	}
457 	lr->next = NULL;
458 
459 	if (lockReqDesc->type == RF_IO_TYPE_WRITE)
460 		lockDesc->nWriters--;
461 
462 	/*
463 	 * Search through the waiters list to see if anyone needs to be woken
464 	 * up. For each such descriptor in the wait list, we check it against
465 	 * everything granted and against everything _in front_ of it in the
466 	 * waiters queue. If it conflicts with none of these, we release it.
467 	 *
468 	 * DON'T TOUCH THE TEMPLINK POINTER OF ANYTHING IN THE GRANTED LIST
469 	 * HERE.
470 	 * This will roach the case where the callback tries to acquire a new
471 	 * lock in the same stripe. There are some asserts to try and detect
472 	 * this.
473 	 *
474 	 * We apply 2 performance optimizations:
475 	 * (1) If releasing this lock results in no more writers to this
476 	 *     stripe, we just release everybody waiting, since we place no
477 	 *     restrictions on the number of concurrent reads.
478 	 * (2) We consider as candidates for wakeup only those waiters that
479 	 *     have a range overlap with either the descriptor being woken up
480 	 *     or with something in the callbacklist (i.e. something we've
481 	 *     just now woken up).
482 	 * This allows us to avoid the long evaluation for some descriptors.
483 	 */
484 
485 	callbacklist = NULL;
486 	if (lockDesc->nWriters == 0) {	/* Performance tweak (1). */
487 		while (lockDesc->waitersH) {
488 
489 			lr = lockDesc->waitersH;	/*
490 							 * Delete from waiters
491 							 * list.
492 							 */
493 			lockDesc->waitersH = lr->next;
494 
495 			RF_ASSERT(lr->type == RF_IO_TYPE_READ);
496 
497 			lr->next = lockDesc->granted;	/*
498 							 * Add to granted list.
499 							 */
500 			lockDesc->granted = lr;
501 
502 			RF_ASSERT(!lr->templink);
503 			lr->templink = callbacklist;	/*
504 							 * Put on callback list
505 							 * so that we'll invoke
506 							 * callback below.
507 							 */
508 			callbacklist = lr;
509 			if (rf_stripeLockDebug) {
510 				Dprintf8("[%d] No writers: granting lock"
511 				    " stripe ID %ld, type %c range %ld-%l"
512 				    "d %ld-%ld table 0x%lx.\n", tid, stripeID,
513 				    lr->type, lr->start, lr->stop,
514 				    lr->start2, lr->stop2,
515 				    (unsigned long) lockTable);
516 				FLUSH;
517 			}
518 		}
519 		lockDesc->waitersT = NULL;	/*
520 						 * We've purged the whole
521 						 * waiters list.
522 						 */
523 
524 	} else
525 		for (candidate_t = NULL, candidate = lockDesc->waitersH;
526 		     candidate;) {
527 
528 			/* Performance tweak (2). */
529 			consider_it = 0;
530 			if (RANGE_OVERLAP(lockReqDesc, candidate))
531 				consider_it = 1;
532 			else
533 				for (t = callbacklist; t; t = t->templink)
534 					if (RANGE_OVERLAP(t, candidate)) {
535 						consider_it = 1;
536 						break;
537 					}
538 			if (!consider_it) {
539 				if (rf_stripeLockDebug) {
540 					Dprintf8("[%d] No overlap: rejecting"
541 					    " candidate stripeID %ld, type %c"
542 					    " range %ld-%ld %ld-%ld table"
543 					    " 0x%lx.\n", tid, stripeID,
544 					    candidate->type,
545 					    candidate->start, candidate->stop,
546 					    candidate->start2, candidate->stop2,
547 					    (unsigned long) lockTable);
548 					FLUSH;
549 				}
550 				candidate_t = candidate;
551 				candidate = candidate->next;
552 				continue;
553 			}
554 			/*
555 			 * We have a candidate for release. Check to make
556 			 * sure it is not blocked by any granted locks.
557 			 */
558 			release_it = 1;
559 			for (predecessor = lockDesc->granted; predecessor;
560 			     predecessor = predecessor->next) {
561 				if (STRIPELOCK_CONFLICT(candidate, predecessor))
562 				{
563 					if (rf_stripeLockDebug) {
564 						Dprintf8("[%d] Conflicts with"
565 						    " granted lock: rejecting"
566 						    " candidate stripeID %ld,"
567 						    " type %c range %ld-%ld"
568 						    " %ld-%ld table 0x%lx.\n",
569 						    tid, stripeID,
570 						    candidate->type,
571 						    candidate->start,
572 						    candidate->stop,
573 						    candidate->start2,
574 						    candidate->stop2,
575 						    (unsigned long) lockTable);
576 						FLUSH;
577 					}
578 					release_it = 0;
579 					break;
580 				}
581 			}
582 
583 			/*
584 			 * Now check to see if the candidate is blocked by any
585 			 * waiters that occur before it in the wait queue.
586 			 */
587 			if (release_it)
588 				for (predecessor = lockDesc->waitersH;
589 				     predecessor != candidate;
590 				     predecessor = predecessor->next) {
591 					if (STRIPELOCK_CONFLICT(candidate,
592 					    predecessor)) {
593 						if (rf_stripeLockDebug) {
594 							Dprintf8("[%d]"
595 							    " Conflicts with"
596 							    " waiting lock:"
597 							    " rejecting"
598 							    " candidate"
599 							    " stripeID %ld,"
600 							    " type %c"
601 							    " range %ld-%ld"
602 							    " %ld-%ld"
603 							    " table 0x%lx.\n",
604 							    tid, stripeID,
605 							    candidate->type,
606 							    candidate->start,
607 							    candidate->stop,
608 							    candidate->start2,
609 							    candidate->stop2,
610 							    (unsigned long)
611 							     lockTable);
612 							FLUSH;
613 						}
614 						release_it = 0;
615 						break;
616 					}
617 				}
618 
619 			/* Release it if indicated. */
620 			if (release_it) {
621 				if (rf_stripeLockDebug) {
622 					Dprintf8("[%d] Granting lock to"
623 					    " candidate stripeID %ld, type %c"
624 					    " range %ld-%ld %ld-%ld table"
625 					    " 0x%lx.\n", tid, stripeID,
626 					    candidate->type,
627 					    candidate->start, candidate->stop,
628 					    candidate->start2, candidate->stop2,
629 					    (unsigned long) lockTable);
630 					FLUSH;
631 				}
632 				if (candidate_t) {
633 					candidate_t->next = candidate->next;
634 					if (lockDesc->waitersT == candidate)
635 						/*
636 						 * Cannot be waitersH
637 						 * since candidate_t is
638 						 * not NULL.
639 						 */
640 						lockDesc->waitersT =
641 						    candidate_t;
642 				} else {
643 					RF_ASSERT(candidate ==
644 					    lockDesc->waitersH);
645 					lockDesc->waitersH =
646 					    lockDesc->waitersH->next;
647 					if (!lockDesc->waitersH)
648 						lockDesc->waitersT = NULL;
649 				}
650 				/* Move it to the granted list. */
651 				candidate->next = lockDesc->granted;
652 				lockDesc->granted = candidate;
653 
654 				RF_ASSERT(!candidate->templink);
655 				/*
656 				 * Put it on the list of things to be called
657 				 * after we release the mutex.
658 				 */
659 				candidate->templink = callbacklist;
660 				callbacklist = candidate;
661 
662 				if (!candidate_t)
663 					candidate = lockDesc->waitersH;
664 				else
665 					/*
666 					 * Continue with the rest of the list.
667 					 */
668 					candidate = candidate_t->next;
669 			} else {
670 				candidate_t = candidate;
671 				/* Continue with the rest of the list. */
672 				candidate = candidate->next;
673 			}
674 		}
675 
676 	/* Delete the descriptor if no one is waiting or active. */
677 	if (!lockDesc->granted && !lockDesc->waitersH) {
678 		RF_ASSERT(lockDesc->nWriters == 0);
679 		if (rf_stripeLockDebug) {
680 			Dprintf3("[%d] Last lock released (table 0x%lx):"
681 			    " deleting desc for stripeID %ld.\n", tid,
682 			    (unsigned long) lockTable, stripeID);
683 			FLUSH;
684 		}
685 		if (ld_t)
686 			ld_t->next = lockDesc->next;
687 		else {
688 			RF_ASSERT(lockDesc == lockTable[hashval].descList);
689 			lockTable[hashval].descList = lockDesc->next;
690 		}
691 		rf_FreeStripeLockDesc(lockDesc);
692 		lockDesc = NULL;	/* Only for the ASSERT below. */
693 	}
694 	RF_UNLOCK_MUTEX(lockTable[hashval].mutex);
695 
696 	/*
697 	 * Now that we've unlocked the mutex, invoke the callback on all the
698 	 * descriptors in the list.
699 	 */
700 	RF_ASSERT(!((callbacklist) && (!lockDesc)));	/*
701 							 * If we deleted the
702 							 * descriptor, we should
703 							 * have no callbacks to
704 							 * do.
705 							 */
706 	for (candidate = callbacklist; candidate;) {
707 		t = candidate;
708 		candidate = candidate->templink;
709 		t->templink = NULL;
710 		(t->cbFunc) (t->cbArg);
711 	}
712 }
713 
714 /* Must have the indicated lock table mutex upon entry. */
715 void
rf_AddToWaitersQueue(RF_LockTableEntry_t * lockTable,RF_StripeLockDesc_t * lockDesc,RF_LockReqDesc_t * lockReqDesc)716 rf_AddToWaitersQueue(RF_LockTableEntry_t *lockTable,
717     RF_StripeLockDesc_t *lockDesc, RF_LockReqDesc_t *lockReqDesc)
718 {
719 	int tid;
720 
721 	if (rf_stripeLockDebug) {
722 		Dprintf3("[%d] Waiting on lock for stripe %ld table 0x%lx.\n",
723 		    tid, lockDesc->stripeID, (unsigned long) lockTable);
724 		FLUSH;
725 	}
726 	if (!lockDesc->waitersH) {
727 		lockDesc->waitersH = lockDesc->waitersT = lockReqDesc;
728 	} else {
729 		lockDesc->waitersT->next = lockReqDesc;
730 		lockDesc->waitersT = lockReqDesc;
731 	}
732 }
733 
734 RF_StripeLockDesc_t *
rf_AllocStripeLockDesc(RF_StripeNum_t stripeID)735 rf_AllocStripeLockDesc(RF_StripeNum_t stripeID)
736 {
737 	RF_StripeLockDesc_t *p;
738 
739 	RF_FREELIST_GET(rf_stripelock_freelist, p, next,
740 	    (RF_StripeLockDesc_t *));
741 	if (p) {
742 		p->stripeID = stripeID;
743 	}
744 	return (p);
745 }
746 
747 void
rf_FreeStripeLockDesc(RF_StripeLockDesc_t * p)748 rf_FreeStripeLockDesc(RF_StripeLockDesc_t *p)
749 {
750 	RF_FREELIST_FREE(rf_stripelock_freelist, p, next);
751 }
752 
753 void
rf_PrintLockedStripes(RF_LockTableEntry_t * lockTable)754 rf_PrintLockedStripes(RF_LockTableEntry_t *lockTable)
755 {
756 	int i, j, foundone = 0, did;
757 	RF_StripeLockDesc_t *p;
758 	RF_LockReqDesc_t *q;
759 
760 	RF_LOCK_MUTEX(rf_printf_mutex);
761 	printf("Locked stripes:\n");
762 	for (i = 0; i < rf_lockTableSize; i++)
763 		if (lockTable[i].descList) {
764 			foundone = 1;
765 			for (p = lockTable[i].descList; p; p = p->next) {
766 				printf("Stripe ID 0x%lx (%d) nWriters %d\n",
767 				    (long) p->stripeID, (int) p->stripeID,
768 				    p->nWriters);
769 
770 				if (!(p->granted))
771 					printf("Granted: (none)\n");
772 				else
773 					printf("Granted:\n");
774 				for (did = 1, j = 0, q = p->granted; q;
775 				     j++, q = q->next) {
776 					printf("  %c(%ld-%ld", q->type,
777 					    (long) q->start, (long) q->stop);
778 					if (q->start2 != -1)
779 						printf(",%ld-%ld) ",
780 						    (long) q->start2,
781 						    (long) q->stop2);
782 					else
783 						printf(") ");
784 					if (j && !(j % 4)) {
785 						printf("\n");
786 						did = 1;
787 					} else
788 						did = 0;
789 				}
790 				if (!did)
791 					printf("\n");
792 
793 				if (!(p->waitersH))
794 					printf("Waiting: (none)\n");
795 				else
796 					printf("Waiting:\n");
797 				for (did = 1, j = 0, q = p->waitersH; q;
798 				     j++, q = q->next) {
799 					printf("%c(%ld-%ld", q->type,
800 					    (long) q->start, (long) q->stop);
801 					if (q->start2 != -1)
802 						printf(",%ld-%ld) ",
803 						    (long) q->start2,
804 						    (long) q->stop2);
805 					else
806 						printf(") ");
807 					if (j && !(j % 4)) {
808 						printf("\n         ");
809 						did = 1;
810 					} else
811 						did = 0;
812 				}
813 				if (!did)
814 					printf("\n");
815 			}
816 		}
817 	if (!foundone)
818 		printf("(none)\n");
819 	else
820 		printf("\n");
821 	RF_UNLOCK_MUTEX(rf_printf_mutex);
822 }
823