xref: /dragonfly/sys/vfs/hammer/hammer_subs.c (revision 91ffdfc56308b48600d5b200bd03a4da6ae6e2e4)
1 /*
2  * Copyright (c) 2007-2011 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * HAMMER structural locking
36  */
37 
38 #include "hammer.h"
39 
40 void
hammer_lock_ex_ident(struct hammer_lock * lock,const char * ident)41 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
42 {
43           thread_t td = curthread;
44           u_int lv;
45           u_int nlv;
46 
47           KKASSERT(lock->refs);
48           for (;;) {
49                     lv = lock->lockval;
50 
51                     if (lv == 0) {
52                               nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
53                               if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
54                                         lock->lowner = td;
55                                         break;
56                               }
57                     } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
58                                  lock->lowner == td) {
59                               nlv = (lv + 1);
60                               if (atomic_cmpset_int(&lock->lockval, lv, nlv))
61                                         break;
62                     } else {
63                               if (hammer_debug_locks) {
64                                         hdkprintf("held by %p\n", lock->lowner);
65                               }
66                               nlv = lv | HAMMER_LOCKF_WANTED;
67                               ++hammer_contention_count;
68                               tsleep_interlock(&lock->lockval, 0);
69                               if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
70                                         tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
71                                         if (hammer_debug_locks)
72                                                   hdkprintf("try again\n");
73                               }
74                     }
75           }
76 }
77 
78 /*
79  * Try to obtain an exclusive lock
80  */
81 int
hammer_lock_ex_try(struct hammer_lock * lock)82 hammer_lock_ex_try(struct hammer_lock *lock)
83 {
84           thread_t td = curthread;
85           int error;
86           u_int lv;
87           u_int nlv;
88 
89           KKASSERT(lock->refs);
90           for (;;) {
91                     lv = lock->lockval;
92 
93                     if (lv == 0) {
94                               nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
95                               if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
96                                         lock->lowner = td;
97                                         error = 0;
98                                         break;
99                               }
100                     } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
101                                  lock->lowner == td) {
102                               nlv = (lv + 1);
103                               if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
104                                         error = 0;
105                                         break;
106                               }
107                     } else {
108                               error = EAGAIN;
109                               break;
110                     }
111           }
112           return (error);
113 }
114 
115 /*
116  * Obtain a shared lock
117  *
118  * We do not give pending exclusive locks priority over shared locks as
119  * doing so could lead to a deadlock.
120  */
121 void
hammer_lock_sh(struct hammer_lock * lock)122 hammer_lock_sh(struct hammer_lock *lock)
123 {
124           thread_t td = curthread;
125           u_int lv;
126           u_int nlv;
127           const char *ident = "hmrlck";
128 
129           KKASSERT(lock->refs);
130           for (;;) {
131                     lv = lock->lockval;
132 
133                     if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
134                               nlv = (lv + 1);
135                               if (atomic_cmpset_int(&lock->lockval, lv, nlv))
136                                         break;
137                     } else if (lock->lowner == td) {
138                               /*
139                                * Disallowed case, drop into kernel debugger for
140                                * now.  A cont continues w/ an exclusive lock.
141                                */
142                               nlv = (lv + 1);
143                               if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
144                                         if (hammer_debug_critical)
145                                                   Debugger("hammer_lock_sh: holding ex");
146                                         break;
147                               }
148                     } else {
149                               nlv = lv | HAMMER_LOCKF_WANTED;
150                               ++hammer_contention_count;
151                               tsleep_interlock(&lock->lockval, 0);
152                               if (atomic_cmpset_int(&lock->lockval, lv, nlv))
153                                         tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
154                     }
155           }
156 }
157 
158 int
hammer_lock_sh_try(struct hammer_lock * lock)159 hammer_lock_sh_try(struct hammer_lock *lock)
160 {
161           thread_t td = curthread;
162           u_int lv;
163           u_int nlv;
164           int error;
165 
166           KKASSERT(lock->refs);
167           for (;;) {
168                     lv = lock->lockval;
169 
170                     if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
171                               nlv = (lv + 1);
172                               if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
173                                         error = 0;
174                                         break;
175                               }
176                     } else if (lock->lowner == td) {
177                               /*
178                                * Disallowed case, drop into kernel debugger for
179                                * now.  A cont continues w/ an exclusive lock.
180                                */
181                               nlv = (lv + 1);
182                               if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
183                                         if (hammer_debug_critical)
184                                                   Debugger("hammer_lock_sh: holding ex");
185                                         error = 0;
186                                         break;
187                               }
188                     } else {
189                               error = EAGAIN;
190                               break;
191                     }
192           }
193           return (error);
194 }
195 
196 /*
197  * Upgrade a shared lock to an exclusively held lock.  This function will
198  * return EDEADLK If there is more then one shared holder.
199  *
200  * No error occurs and no action is taken if the lock is already exclusively
201  * held by the caller.  If the lock is not held at all or held exclusively
202  * by someone else, this function will panic.
203  */
204 int
hammer_lock_upgrade(struct hammer_lock * lock,int shcount)205 hammer_lock_upgrade(struct hammer_lock *lock, int shcount)
206 {
207           thread_t td = curthread;
208           u_int lv;
209           u_int nlv;
210           int error;
211 
212           for (;;) {
213                     lv = lock->lockval;
214 
215                     if ((lv & ~HAMMER_LOCKF_WANTED) == shcount) {
216                               nlv = lv | HAMMER_LOCKF_EXCLUSIVE;
217                               if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
218                                         lock->lowner = td;
219                                         error = 0;
220                                         break;
221                               }
222                     } else if (lv & HAMMER_LOCKF_EXCLUSIVE) {
223                               if (lock->lowner != curthread)
224                                         hpanic("illegal state");
225                               error = 0;
226                               break;
227                     } else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) {
228                               hpanic("lock is not held");
229                               /* NOT REACHED */
230                               error = EDEADLK;
231                               break;
232                     } else {
233                               error = EDEADLK;
234                               break;
235                     }
236           }
237           return (error);
238 }
239 
240 /*
241  * Downgrade an exclusively held lock to a shared lock.
242  */
243 void
hammer_lock_downgrade(struct hammer_lock * lock,int shcount)244 hammer_lock_downgrade(struct hammer_lock *lock, int shcount)
245 {
246           thread_t td __debugvar = curthread;
247           u_int lv;
248           u_int nlv;
249 
250           KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) ==
251                      (HAMMER_LOCKF_EXCLUSIVE | shcount));
252           KKASSERT(lock->lowner == td);
253 
254           /*
255            * NOTE: Must clear owner before releasing exclusivity
256            */
257           lock->lowner = NULL;
258 
259           for (;;) {
260                     lv = lock->lockval;
261                     nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
262                     if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
263                               if (lv & HAMMER_LOCKF_WANTED)
264                                         wakeup(&lock->lockval);
265                               break;
266                     }
267           }
268 }
269 
270 void
hammer_unlock(struct hammer_lock * lock)271 hammer_unlock(struct hammer_lock *lock)
272 {
273           thread_t td __debugvar = curthread;
274           u_int lv;
275           u_int nlv;
276 
277           lv = lock->lockval;
278           KKASSERT(lv != 0);
279           if (lv & HAMMER_LOCKF_EXCLUSIVE)
280                     KKASSERT(lock->lowner == td);
281 
282           for (;;) {
283                     lv = lock->lockval;
284                     nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
285                     if (nlv > 1) {
286                               nlv = lv - 1;
287                               if (atomic_cmpset_int(&lock->lockval, lv, nlv))
288                                         break;
289                     } else if (nlv == 1) {
290                               nlv = 0;
291                               if (lv & HAMMER_LOCKF_EXCLUSIVE)
292                                         lock->lowner = NULL;
293                               if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
294                                         if (lv & HAMMER_LOCKF_WANTED)
295                                                   wakeup(&lock->lockval);
296                                         break;
297                               }
298                     } else {
299                               hpanic("lock %p is not held", lock);
300                     }
301           }
302 }
303 
304 /*
305  * The calling thread must be holding a shared or exclusive lock.
306  * Returns < 0 if lock is held shared, and > 0 if held exlusively.
307  */
308 int
hammer_lock_status(struct hammer_lock * lock)309 hammer_lock_status(struct hammer_lock *lock)
310 {
311           u_int lv = lock->lockval;
312 
313           if (lv & HAMMER_LOCKF_EXCLUSIVE)
314                     return(1);
315           else if (lv)
316                     return(-1);
317           hpanic("lock must be held: %p", lock);
318 }
319 
320 /*
321  * Bump the ref count for a lock (not the excl/share count, but a separate
322  * structural reference count).  The CHECK flag will be set on a 0->1
323  * transition.
324  *
325  * This function does nothing to serialize races between multple threads.
326  * The caller can interlock it later on to deal with serialization.
327  *
328  * MPSAFE
329  */
330 void
hammer_ref(struct hammer_lock * lock)331 hammer_ref(struct hammer_lock *lock)
332 {
333           u_int lv;
334           u_int nlv;
335 
336           for (;;) {
337                     lv = lock->refs;
338                     if ((lv & ~HAMMER_REFS_FLAGS) == 0) {
339                               nlv = (lv + 1) | HAMMER_REFS_CHECK;
340                               if (atomic_cmpset_int(&lock->refs, lv, nlv))
341                                         return;
342                     } else {
343                               nlv = (lv + 1);
344                               KKASSERT((int)nlv > 0);
345                               if (atomic_cmpset_int(&lock->refs, lv, nlv))
346                                         return;
347                     }
348           }
349           /* not reached */
350 }
351 
352 /*
353  * Drop the ref count for a lock (not the excl/share count, but a separate
354  * structural reference count).  The CHECK flag will be cleared on a 1->0
355  * transition.
356  *
357  * This function does nothing to serialize races between multple threads.
358  *
359  * MPSAFE
360  */
361 void
hammer_rel(struct hammer_lock * lock)362 hammer_rel(struct hammer_lock *lock)
363 {
364           u_int lv;
365           u_int nlv;
366 
367           for (;;) {
368                     lv = lock->refs;
369                     if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
370                               nlv = (lv - 1) & ~HAMMER_REFS_CHECK;
371                               if (atomic_cmpset_int(&lock->refs, lv, nlv))
372                                         return;
373                     } else {
374                               KKASSERT((int)lv > 0);
375                               nlv = (lv - 1);
376                               if (atomic_cmpset_int(&lock->refs, lv, nlv))
377                                         return;
378                     }
379           }
380           /* not reached */
381 }
382 
383 /*
384  * The hammer_*_interlock() and hammer_*_interlock_done() functions are
385  * more sophisticated versions which handle MP transition races and block
386  * when necessary.
387  *
388  * hammer_ref_interlock() bumps the ref-count and conditionally acquires
389  * the interlock for 0->1 transitions or if the CHECK is found to be set.
390  *
391  * This case will return 1, the interlock will be held, and the CHECK
392  * bit also set.  Other threads attempting to ref will see the CHECK bit
393  * and block until we clean up.
394  *
395  * 0 is returned for transitions other than 0->1 when the CHECK bit
396  * is not found to be set, or if the function loses the race with another
397  * thread.
398  *
399  * 1 is only returned to one thread and the others will block.
400  * Effectively a 1 indicator means 'someone transitioned 0->1
401  * and you are the first guy to successfully lock it after that, so you
402  * need to check'.  Due to races the ref-count may be greater than 1 upon
403  * return.
404  *
405  * MPSAFE
406  */
407 int
hammer_ref_interlock(struct hammer_lock * lock)408 hammer_ref_interlock(struct hammer_lock *lock)
409 {
410           u_int lv;
411           u_int nlv;
412 
413           /*
414            * Integrated reference count bump, lock, and check, with hot-path.
415            *
416            * (a) Return 1     (+LOCKED, +CHECK)   0->1 transition
417            * (b) Return 0 (-LOCKED, -CHECK)       N->N+1 transition
418            * (c) Break out (+CHECK)               Check condition and Cannot lock
419            * (d) Return 1 (+LOCKED, +CHECK)       Successfully locked
420            */
421           for (;;) {
422                     lv = lock->refs;
423                     if (lv == 0) {
424                               nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
425                               if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
426                                         lock->rowner = curthread;
427                                         return(1);
428                               }
429                     } else {
430                               nlv = (lv + 1);
431                               if ((lv & ~HAMMER_REFS_FLAGS) == 0)
432                                         nlv |= HAMMER_REFS_CHECK;
433                               if ((nlv & HAMMER_REFS_CHECK) == 0) {
434                                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
435                                                   return(0);
436                               } else if (lv & HAMMER_REFS_LOCKED) {
437                                         /* CHECK also set here */
438                                         if (atomic_cmpset_int(&lock->refs, lv, nlv))
439                                                   break;
440                               } else {
441                                         /* CHECK also set here */
442                                         nlv |= HAMMER_REFS_LOCKED;
443                                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
444                                                   lock->rowner = curthread;
445                                                   return(1);
446                                         }
447                               }
448                     }
449           }
450 
451           /*
452            * Defered check condition because we were unable to acquire the
453            * lock.  We must block until the check condition is cleared due
454            * to a race with another thread, or we are able to acquire the
455            * lock.
456            *
457            * (a) Return 0     (-CHECK)            Another thread handled it
458            * (b) Return 1 (+LOCKED, +CHECK)       We handled it.
459            */
460           for (;;) {
461                     lv = lock->refs;
462                     if ((lv & HAMMER_REFS_CHECK) == 0)
463                               return(0);
464                     if (lv & HAMMER_REFS_LOCKED) {
465                               tsleep_interlock(&lock->refs, 0);
466                               nlv = (lv | HAMMER_REFS_WANTED);
467                               if (atomic_cmpset_int(&lock->refs, lv, nlv))
468                                         tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0);
469                     } else {
470                               /* CHECK also set here */
471                               nlv = lv | HAMMER_REFS_LOCKED;
472                               if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
473                                         lock->rowner = curthread;
474                                         return(1);
475                               }
476                     }
477           }
478           /* not reached */
479 }
480 
481 /*
482  * This is the same as hammer_ref_interlock() but asserts that the
483  * 0->1 transition is always true, thus the lock must have no references
484  * on entry or have CHECK set, and will have one reference with the
485  * interlock held on return.  It must also not be interlocked on entry
486  * by anyone.
487  *
488  * NOTE that CHECK will never be found set when the ref-count is 0.
489  *
490  * 1 is always returned to match the API for hammer_ref_interlock().
491  * This function returns with one ref, the lock held, and the CHECK bit set.
492  */
493 int
hammer_ref_interlock_true(struct hammer_lock * lock)494 hammer_ref_interlock_true(struct hammer_lock *lock)
495 {
496           u_int lv;
497           u_int nlv;
498 
499           for (;;) {
500                     lv = lock->refs;
501 
502                     if (lv) {
503                               hpanic("bad lock %p %08x", lock, lock->refs);
504                     }
505                     nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
506                     if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
507                               lock->rowner = curthread;
508                               return (1);
509                     }
510           }
511 }
512 
513 /*
514  * Unlock the interlock acquired by hammer_ref_interlock() and clear the
515  * CHECK flag.  The ref-count remains unchanged.
516  *
517  * This routine is called in the load path when the load succeeds.
518  */
519 void
hammer_ref_interlock_done(struct hammer_lock * lock)520 hammer_ref_interlock_done(struct hammer_lock *lock)
521 {
522           u_int lv;
523           u_int nlv;
524 
525           for (;;) {
526                     lv = lock->refs;
527                     nlv = lv & ~HAMMER_REFS_FLAGS;
528                     if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
529                               if (lv & HAMMER_REFS_WANTED)
530                                         wakeup(&lock->refs);
531                               break;
532                     }
533           }
534 }
535 
536 /*
537  * hammer_rel_interlock() works a bit differently in that it must
538  * acquire the lock in tandem with a 1->0 transition.  CHECK is
539  * not used.
540  *
541  * 1 is returned on 1->0 transitions with the lock held on return
542  * and 0 is returned otherwise with the lock not held.
543  *
544  * It is important to note that the refs are not stable and may
545  * increase while we hold the lock, the 1 indication only means
546  * that we transitioned 1->0, not necessarily that we stayed at 0.
547  *
548  * Another thread bumping refs while we hold the lock will set CHECK,
549  * causing one of the competing hammer_ref_interlock() calls to
550  * return 1 after we release our lock.
551  *
552  * MPSAFE
553  */
554 int
hammer_rel_interlock(struct hammer_lock * lock,int locked)555 hammer_rel_interlock(struct hammer_lock *lock, int locked)
556 {
557           u_int lv;
558           u_int nlv;
559 
560           /*
561            * In locked mode (failure/unload path) we release the
562            * ref-count but leave it locked.
563            */
564           if (locked) {
565                     hammer_rel(lock);
566                     return(1);
567           }
568 
569           /*
570            * Integrated reference count drop with LOCKED, plus the hot-path
571            * returns.
572            */
573           for (;;) {
574                     lv = lock->refs;
575 
576                     if (lv == 1) {
577                               nlv = 0 | HAMMER_REFS_LOCKED;
578                               if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
579                                         lock->rowner = curthread;
580                                         return(1);
581                               }
582                     } else if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
583                               if ((lv & HAMMER_REFS_LOCKED) == 0) {
584                                         nlv = (lv - 1) | HAMMER_REFS_LOCKED;
585                                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
586                                                   lock->rowner = curthread;
587                                                   return(1);
588                                         }
589                               } else {
590                                         nlv = lv | HAMMER_REFS_WANTED;
591                                         tsleep_interlock(&lock->refs, 0);
592                                         if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
593                                                   tsleep(&lock->refs, PINTERLOCKED,
594                                                          "h0lk", 0);
595                                         }
596                               }
597                     } else {
598                               nlv = (lv - 1);
599                               KKASSERT((int)nlv >= 0);
600                               if (atomic_cmpset_int(&lock->refs, lv, nlv))
601                                         return(0);
602                     }
603           }
604           /* not reached */
605 }
606 
607 /*
608  * Unlock the interlock acquired by hammer_rel_interlock().
609  *
610  * If orig_locked is non-zero the interlock was originally held prior to
611  * the hammer_rel_interlock() call and passed through to us.  In this
612  * case we want to retain the CHECK error state if not transitioning
613  * to 0.
614  *
615  * The code is the same either way so we do not have to conditionalize
616  * on orig_locked.
617  */
618 void
hammer_rel_interlock_done(struct hammer_lock * lock,int orig_locked __unused)619 hammer_rel_interlock_done(struct hammer_lock *lock, int orig_locked __unused)
620 {
621           u_int lv;
622           u_int nlv;
623 
624           for (;;) {
625                     lv = lock->refs;
626                     nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
627                     if ((lv & ~HAMMER_REFS_FLAGS) == 0)
628                               nlv &= ~HAMMER_REFS_CHECK;
629                     if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
630                               if (lv & HAMMER_REFS_WANTED)
631                                         wakeup(&lock->refs);
632                               break;
633                     }
634           }
635 }
636 
637 /*
638  * Acquire the interlock on lock->refs.
639  *
640  * Return 1 if CHECK is currently set.  Note that CHECK will not
641  * be set if the reference count is 0, but can get set if this function
642  * is preceeded by, say, hammer_ref(), or through races with other
643  * threads.  The return value allows the caller to use the same logic
644  * as hammer_ref_interlock().
645  *
646  * MPSAFE
647  */
648 int
hammer_get_interlock(struct hammer_lock * lock)649 hammer_get_interlock(struct hammer_lock *lock)
650 {
651           u_int lv;
652           u_int nlv;
653 
654           for (;;) {
655                     lv = lock->refs;
656                     if (lv & HAMMER_REFS_LOCKED) {
657                               nlv = lv | HAMMER_REFS_WANTED;
658                               tsleep_interlock(&lock->refs, 0);
659                               if (atomic_cmpset_int(&lock->refs, lv, nlv))
660                                         tsleep(&lock->refs, PINTERLOCKED, "hilk", 0);
661                     } else {
662                               nlv = (lv | HAMMER_REFS_LOCKED);
663                               if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
664                                         lock->rowner = curthread;
665                                         return((lv & HAMMER_REFS_CHECK) ? 1 : 0);
666                               }
667                     }
668           }
669 }
670 
671 /*
672  * Attempt to acquire the interlock and expect 0 refs.  Used by the buffer
673  * cache callback code to disassociate or lock the bufs related to HAMMER
674  * structures.
675  *
676  * During teardown the related bp will be acquired by hammer_io_release()
677  * which interocks our test.
678  *
679  * Returns non-zero on success, zero on failure.
680  */
681 int
hammer_try_interlock_norefs(struct hammer_lock * lock)682 hammer_try_interlock_norefs(struct hammer_lock *lock)
683 {
684           u_int lv;
685           u_int nlv;
686 
687           for (;;) {
688                     lv = lock->refs;
689                     if (lv == 0) {
690                               nlv = lv | HAMMER_REFS_LOCKED;
691                               if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
692                                         lock->rowner = curthread;
693                                         return(1);
694                               }
695                     } else {
696                               return(0);
697                     }
698           }
699           /* not reached */
700 }
701 
702 /*
703  * Release the interlock on lock->refs.  This function will set
704  * CHECK if the refs is non-zero and error is non-zero, and clear
705  * CHECK otherwise.
706  *
707  * MPSAFE
708  */
709 void
hammer_put_interlock(struct hammer_lock * lock,int error)710 hammer_put_interlock(struct hammer_lock *lock, int error)
711 {
712           u_int lv;
713           u_int nlv;
714 
715           for (;;) {
716                     lv = lock->refs;
717                     KKASSERT(lv & HAMMER_REFS_LOCKED);
718                     nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
719 
720                     if ((nlv & ~HAMMER_REFS_FLAGS) == 0 || error == 0)
721                               nlv &= ~HAMMER_REFS_CHECK;
722                     else
723                               nlv |= HAMMER_REFS_CHECK;
724 
725                     if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
726                               if (lv & HAMMER_REFS_WANTED)
727                                         wakeup(&lock->refs);
728                               return;
729                     }
730           }
731 }
732 
733 /*
734  * The sync_lock must be held when doing any modifying operations on
735  * meta-data.  It does not have to be held when modifying non-meta-data buffers
736  * (backend or frontend).
737  *
738  * The flusher holds the lock exclusively while all other consumers hold it
739  * shared.  All modifying operations made while holding the lock are atomic
740  * in that they will be made part of the same flush group.
741  *
742  * Due to the atomicy requirement deadlock recovery code CANNOT release the
743  * sync lock, nor can we give pending exclusive sync locks priority over
744  * a shared sync lock as this could lead to a 3-way deadlock.
745  */
746 void
hammer_sync_lock_ex(hammer_transaction_t trans)747 hammer_sync_lock_ex(hammer_transaction_t trans)
748 {
749           ++trans->sync_lock_refs;
750           hammer_lock_ex(&trans->hmp->sync_lock);
751 }
752 
753 void
hammer_sync_lock_sh(hammer_transaction_t trans)754 hammer_sync_lock_sh(hammer_transaction_t trans)
755 {
756           ++trans->sync_lock_refs;
757           hammer_lock_sh(&trans->hmp->sync_lock);
758 }
759 
760 int
hammer_sync_lock_sh_try(hammer_transaction_t trans)761 hammer_sync_lock_sh_try(hammer_transaction_t trans)
762 {
763           int error;
764 
765           ++trans->sync_lock_refs;
766           if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0)
767                     --trans->sync_lock_refs;
768           return (error);
769 }
770 
771 void
hammer_sync_unlock(hammer_transaction_t trans)772 hammer_sync_unlock(hammer_transaction_t trans)
773 {
774           --trans->sync_lock_refs;
775           hammer_unlock(&trans->hmp->sync_lock);
776 }
777 
778 /*
779  * Misc
780  */
781 uint32_t
hammer_to_unix_xid(hammer_uuid_t * uuid)782 hammer_to_unix_xid(hammer_uuid_t *uuid)
783 {
784           return(*(uint32_t *)&uuid->node[2]);
785 }
786 
787 void
hammer_guid_to_uuid(hammer_uuid_t * uuid,uint32_t guid)788 hammer_guid_to_uuid(hammer_uuid_t *uuid, uint32_t guid)
789 {
790           bzero(uuid, sizeof(*uuid));
791           *(uint32_t *)&uuid->node[2] = guid;
792 }
793 
794 void
hammer_time_to_timespec(uint64_t xtime,struct timespec * ts)795 hammer_time_to_timespec(uint64_t xtime, struct timespec *ts)
796 {
797           ts->tv_sec = (unsigned long)(xtime / 1000000);
798           ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
799 }
800 
801 uint64_t
hammer_timespec_to_time(struct timespec * ts)802 hammer_timespec_to_time(struct timespec *ts)
803 {
804           uint64_t xtime;
805 
806           xtime = (unsigned)(ts->tv_nsec / 1000) +
807                     (unsigned long)ts->tv_sec * 1000000ULL;
808           return(xtime);
809 }
810 
811 
812 /*
813  * Convert a HAMMER filesystem object type to a vnode type
814  */
815 enum vtype
hammer_get_vnode_type(uint8_t obj_type)816 hammer_get_vnode_type(uint8_t obj_type)
817 {
818           switch(obj_type) {
819           case HAMMER_OBJTYPE_DIRECTORY:
820                     return(VDIR);
821           case HAMMER_OBJTYPE_REGFILE:
822                     return(VREG);
823           case HAMMER_OBJTYPE_DBFILE:
824                     return(VDATABASE);
825           case HAMMER_OBJTYPE_FIFO:
826                     return(VFIFO);
827           case HAMMER_OBJTYPE_SOCKET:
828                     return(VSOCK);
829           case HAMMER_OBJTYPE_CDEV:
830                     return(VCHR);
831           case HAMMER_OBJTYPE_BDEV:
832                     return(VBLK);
833           case HAMMER_OBJTYPE_SOFTLINK:
834                     return(VLNK);
835           default:
836                     return(VBAD);
837           }
838           /* not reached */
839 }
840 
841 int
hammer_get_dtype(uint8_t obj_type)842 hammer_get_dtype(uint8_t obj_type)
843 {
844           switch(obj_type) {
845           case HAMMER_OBJTYPE_DIRECTORY:
846                     return(DT_DIR);
847           case HAMMER_OBJTYPE_REGFILE:
848                     return(DT_REG);
849           case HAMMER_OBJTYPE_DBFILE:
850                     return(DT_DBF);
851           case HAMMER_OBJTYPE_FIFO:
852                     return(DT_FIFO);
853           case HAMMER_OBJTYPE_SOCKET:
854                     return(DT_SOCK);
855           case HAMMER_OBJTYPE_CDEV:
856                     return(DT_CHR);
857           case HAMMER_OBJTYPE_BDEV:
858                     return(DT_BLK);
859           case HAMMER_OBJTYPE_SOFTLINK:
860                     return(DT_LNK);
861           default:
862                     return(DT_UNKNOWN);
863           }
864           /* not reached */
865 }
866 
867 uint8_t
hammer_get_obj_type(enum vtype vtype)868 hammer_get_obj_type(enum vtype vtype)
869 {
870           switch(vtype) {
871           case VDIR:
872                     return(HAMMER_OBJTYPE_DIRECTORY);
873           case VREG:
874                     return(HAMMER_OBJTYPE_REGFILE);
875           case VDATABASE:
876                     return(HAMMER_OBJTYPE_DBFILE);
877           case VFIFO:
878                     return(HAMMER_OBJTYPE_FIFO);
879           case VSOCK:
880                     return(HAMMER_OBJTYPE_SOCKET);
881           case VCHR:
882                     return(HAMMER_OBJTYPE_CDEV);
883           case VBLK:
884                     return(HAMMER_OBJTYPE_BDEV);
885           case VLNK:
886                     return(HAMMER_OBJTYPE_SOFTLINK);
887           default:
888                     return(HAMMER_OBJTYPE_UNKNOWN);
889           }
890           /* not reached */
891 }
892 
893 /*
894  * Return flags for hammer_delete_at_cursor()
895  */
896 int
hammer_nohistory(hammer_inode_t ip)897 hammer_nohistory(hammer_inode_t ip)
898 {
899           if (ip->hmp->hflags & HMNT_NOHISTORY)
900                     return(HAMMER_DELETE_DESTROY);
901           if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY))
902                     return(HAMMER_DELETE_DESTROY);
903           return(0);
904 }
905 
906 /*
907  * ALGORITHM VERSION 0:
908  *        Return a namekey hash.   The 64 bit namekey hash consists of a 32 bit
909  *        crc in the MSB and 0 in the LSB.  The caller will use the low 32 bits
910  *        to generate a unique key and will scan all entries with the same upper
911  *        32 bits when issuing a lookup.
912  *
913  *        0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000
914  *
915  * ALGORITHM VERSION 1:
916  *
917  *        This algorithm breaks the filename down into a separate 32-bit crcs
918  *        for each filename segment separated by a special character (dot,
919  *        underscore, underline, or tilde).  The CRCs are then added together.
920  *        This allows temporary names.  A full-filename 16 bit crc is also
921  *        generated to deal with degenerate conditions.
922  *
923  *        The algorithm is designed to handle create/rename situations such
924  *        that a create with an extention to a rename without an extention
925  *        only shifts the key space rather than randomizes it.
926  *
927  *        NOTE: The inode allocator cache can only match 10 bits so we do
928  *              not really have any room for a partial sorted name, and
929  *              numbers don't sort well in that situation anyway.
930  *
931  *        0mmmmmmmmmmmmmmm mmmmmmmmmmmmmmmm llllllllllllllll 0000000000000000
932  *
933  *
934  * We strip bit 63 in order to provide a positive key, this way a seek
935  * offset of 0 will represent the base of the directory.
936  *
937  * We usually strip bit 0 (set it to 0) in order to provide a consistent
938  * iteration space for collisions.
939  *
940  * This function can never return 0.  We use the MSB-0 space to synthesize
941  * artificial directory entries such as "." and "..".
942  */
943 int64_t
hammer_direntry_namekey(hammer_inode_t dip,const void * name,int len,uint32_t * max_iterationsp)944 hammer_direntry_namekey(hammer_inode_t dip, const void *name, int len,
945                                uint32_t *max_iterationsp)
946 {
947           const char *aname = name;
948           int32_t crcx;
949           int64_t key;
950           int i;
951           int j;
952 
953           switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) {
954           case HAMMER_INODE_CAP_DIRHASH_ALG0:
955                     /*
956                      * Original algorithm
957                      */
958                     key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32;
959                     if (key == 0)
960                               key |= 0x100000000LL;
961                     *max_iterationsp = 0xFFFFFFFFU;
962                     break;
963           case HAMMER_INODE_CAP_DIRHASH_ALG1:
964                     /*
965                      * Filesystem version 6 or better will create directories
966                      * using the ALG1 dirhash.  This hash breaks the filename
967                      * up into domains separated by special characters and
968                      * hashes each domain independently.
969                      *
970                      * We also do a simple sub-sort using the first character
971                      * of the filename in the top 5-bits.
972                      */
973                     key = 0;
974 
975                     /*
976                      * m32
977                      */
978                     crcx = 0;
979                     for (i = j = 0; i < len; ++i) {
980                               if (aname[i] == '.' ||
981                                   aname[i] == '-' ||
982                                   aname[i] == '_' ||
983                                   aname[i] == '~') {
984                                         if (i != j)
985                                                   crcx += crc32(aname + j, i - j);
986                                         j = i + 1;
987                               }
988                     }
989                     if (i != j)
990                               crcx += crc32(aname + j, i - j);
991 
992 #if 0
993                     /*
994                      * xor top 5 bits 0mmmm into low bits and steal the top 5
995                      * bits as a semi sub sort using the first character of
996                      * the filename.  bit 63 is always left as 0 so directory
997                      * keys are positive numbers.
998                      */
999                     crcx ^= (uint32_t)crcx >> (32 - 5);
1000                     crcx = (crcx & 0x07FFFFFF) | ((aname[0] & 0x0F) << (32 - 5));
1001 #endif
1002                     crcx &= 0x7FFFFFFFU;
1003 
1004                     key |= (uint64_t)crcx << 32;
1005 
1006                     /*
1007                      * l16 - crc of entire filename
1008                      *
1009                      * This crc reduces degenerate hash collision conditions
1010                      */
1011                     crcx = crc32(aname, len);
1012                     crcx = crcx ^ (crcx << 16);
1013                     key |= crcx & 0xFFFF0000U;
1014 
1015                     /*
1016                      * Cleanup
1017                      */
1018                     if ((key & 0xFFFFFFFF00000000LL) == 0)
1019                               key |= 0x100000000LL;
1020                     if (hammer_debug_general & 0x0400) {
1021                               hdkprintf("0x%016jx %*.*s\n",
1022                                         (intmax_t)key, len, len, aname);
1023                     }
1024                     *max_iterationsp = 0x00FFFFFF;
1025                     break;
1026           case HAMMER_INODE_CAP_DIRHASH_ALG2:
1027           case HAMMER_INODE_CAP_DIRHASH_ALG3:
1028           default:
1029                     key = 0;                      /* compiler warning */
1030                     *max_iterationsp = 1;                   /* sanity */
1031                     hpanic("bad algorithm %p", dip);
1032                     break;
1033           }
1034           return(key);
1035 }
1036 
1037 /*
1038  * Convert string after @@ (@@ not included) to TID.  Returns 0 on success,
1039  * EINVAL on failure.
1040  *
1041  * If this function fails *ispfs, *tidp, and *localizationp will not
1042  * be modified.
1043  */
1044 int
hammer_str_to_tid(const char * str,int * ispfsp,hammer_tid_t * tidp,uint32_t * localizationp)1045 hammer_str_to_tid(const char *str, int *ispfsp,
1046                       hammer_tid_t *tidp, uint32_t *localizationp)
1047 {
1048           hammer_tid_t tid;
1049           uint32_t localization;
1050           char *ptr;
1051           int ispfs;
1052           int n;
1053 
1054           /*
1055            * Forms allowed for TID:  "0x%016llx"
1056            *                               "-1"
1057            */
1058           tid = strtouq(str, &ptr, 0);
1059           n = ptr - str;
1060           if (n == 2 && str[0] == '-' && str[1] == '1') {
1061                     /* ok */
1062           } else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') {
1063                     /* ok */
1064           } else {
1065                     return(EINVAL);
1066           }
1067 
1068           /*
1069            * Forms allowed for PFS:  ":%05d"  (i.e. "...:0" would be illegal).
1070            */
1071           str = ptr;
1072           if (*str == ':') {
1073                     localization = pfs_to_lo(strtoul(str + 1, &ptr, 10));
1074                     if (ptr - str != 6)
1075                               return(EINVAL);
1076                     str = ptr;
1077                     ispfs = 1;
1078           } else {
1079                     localization = *localizationp;
1080                     ispfs = 0;
1081           }
1082 
1083           /*
1084            * Any trailing junk invalidates special extension handling.
1085            */
1086           if (*str)
1087                     return(EINVAL);
1088           *tidp = tid;
1089           *localizationp = localization;
1090           *ispfsp = ispfs;
1091           return(0);
1092 }
1093 
1094 /*
1095  * Return the block size at the specified file offset.
1096  */
1097 int
hammer_blocksize(int64_t file_offset)1098 hammer_blocksize(int64_t file_offset)
1099 {
1100           if (file_offset < HAMMER_XDEMARC)
1101                     return(HAMMER_BUFSIZE);
1102           else
1103                     return(HAMMER_XBUFSIZE);
1104 }
1105 
1106 int
hammer_blockoff(int64_t file_offset)1107 hammer_blockoff(int64_t file_offset)
1108 {
1109           if (file_offset < HAMMER_XDEMARC)
1110                     return((int)file_offset & HAMMER_BUFMASK);
1111           else
1112                     return((int)file_offset & HAMMER_XBUFMASK);
1113 }
1114 
1115 /*
1116  * Return the demarkation point between the two offsets where
1117  * the block size changes.
1118  */
1119 int64_t
hammer_blockdemarc(int64_t file_offset1,int64_t file_offset2)1120 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
1121 {
1122           if (file_offset1 < HAMMER_XDEMARC) {
1123                     if (file_offset2 <= HAMMER_XDEMARC)
1124                               return(file_offset2);
1125                     return(HAMMER_XDEMARC);
1126           }
1127           hpanic("illegal range %jd %jd",
1128                 (intmax_t)file_offset1, (intmax_t)file_offset2);
1129 }
1130 
1131 dev_t
hammer_fsid_to_udev(hammer_uuid_t * uuid)1132 hammer_fsid_to_udev(hammer_uuid_t *uuid)
1133 {
1134           uint32_t crc;
1135 
1136           crc = crc32(uuid, sizeof(*uuid));
1137           return((dev_t)crc);
1138 }
1139 
1140