1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/kernel.h>
32 #include <sys/lock.h>
33 #include <sys/mutex.h>
34 #include <sys/proc.h>
35 #include <sys/rangelock.h>
36 #include <sys/systm.h>
37
38 #include <vm/uma.h>
39
40 struct rl_q_entry {
41 TAILQ_ENTRY(rl_q_entry) rl_q_link;
42 off_t rl_q_start, rl_q_end;
43 int rl_q_flags;
44 };
45
46 static uma_zone_t rl_entry_zone;
47
48 static void
rangelock_sys_init(void)49 rangelock_sys_init(void)
50 {
51
52 rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
53 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
54 }
55 SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
56
57 static struct rl_q_entry *
rlqentry_alloc(void)58 rlqentry_alloc(void)
59 {
60
61 return (uma_zalloc(rl_entry_zone, M_WAITOK));
62 }
63
64 void
rlqentry_free(struct rl_q_entry * rleq)65 rlqentry_free(struct rl_q_entry *rleq)
66 {
67
68 uma_zfree(rl_entry_zone, rleq);
69 }
70
71 void
rangelock_init(struct rangelock * lock)72 rangelock_init(struct rangelock *lock)
73 {
74
75 TAILQ_INIT(&lock->rl_waiters);
76 lock->rl_currdep = NULL;
77 }
78
79 void
rangelock_destroy(struct rangelock * lock)80 rangelock_destroy(struct rangelock *lock)
81 {
82
83 KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters"));
84 }
85
86 /*
87 * Two entries are compatible if their ranges do not overlap, or both
88 * entries are for read.
89 */
90 static int
ranges_overlap(const struct rl_q_entry * e1,const struct rl_q_entry * e2)91 ranges_overlap(const struct rl_q_entry *e1,
92 const struct rl_q_entry *e2)
93 {
94
95 if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start)
96 return (1);
97 return (0);
98 }
99
100 /*
101 * Recalculate the lock->rl_currdep after an unlock.
102 */
103 static void
rangelock_calc_block(struct rangelock * lock)104 rangelock_calc_block(struct rangelock *lock)
105 {
106 struct rl_q_entry *entry, *nextentry, *entry1;
107
108 for (entry = lock->rl_currdep; entry != NULL; entry = nextentry) {
109 nextentry = TAILQ_NEXT(entry, rl_q_link);
110 if (entry->rl_q_flags & RL_LOCK_READ) {
111 /* Reads must not overlap with granted writes. */
112 for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
113 !(entry1->rl_q_flags & RL_LOCK_READ);
114 entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
115 if (ranges_overlap(entry, entry1))
116 goto out;
117 }
118 } else {
119 /* Write must not overlap with any granted locks. */
120 for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
121 entry1 != entry;
122 entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
123 if (ranges_overlap(entry, entry1))
124 goto out;
125 }
126
127 /* Move grantable write locks to the front. */
128 TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
129 TAILQ_INSERT_HEAD(&lock->rl_waiters, entry, rl_q_link);
130 }
131
132 /* Grant this lock. */
133 entry->rl_q_flags |= RL_LOCK_GRANTED;
134 wakeup(entry);
135 }
136 out:
137 lock->rl_currdep = entry;
138 }
139
140 static void
rangelock_unlock_locked(struct rangelock * lock,struct rl_q_entry * entry,struct mtx * ilk,bool do_calc_block)141 rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry,
142 struct mtx *ilk, bool do_calc_block)
143 {
144
145 MPASS(lock != NULL && entry != NULL && ilk != NULL);
146 mtx_assert(ilk, MA_OWNED);
147
148 if (!do_calc_block) {
149 /*
150 * This is the case where rangelock_enqueue() has been called
151 * with trylock == true and just inserted this entry in the
152 * queue.
153 * If rl_currdep is this entry, rl_currdep needs to
154 * be set to the next entry in the rl_waiters list.
155 * However, since this entry is the last entry in the
156 * list, the next entry is NULL.
157 */
158 if (lock->rl_currdep == entry) {
159 KASSERT(TAILQ_NEXT(lock->rl_currdep, rl_q_link) == NULL,
160 ("rangelock_enqueue: next entry not NULL"));
161 lock->rl_currdep = NULL;
162 }
163 } else
164 KASSERT(entry != lock->rl_currdep, ("stuck currdep"));
165
166 TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
167 if (do_calc_block)
168 rangelock_calc_block(lock);
169 mtx_unlock(ilk);
170 if (curthread->td_rlqe == NULL)
171 curthread->td_rlqe = entry;
172 else
173 rlqentry_free(entry);
174 }
175
176 void
rangelock_unlock(struct rangelock * lock,void * cookie,struct mtx * ilk)177 rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk)
178 {
179
180 MPASS(lock != NULL && cookie != NULL && ilk != NULL);
181
182 mtx_lock(ilk);
183 rangelock_unlock_locked(lock, cookie, ilk, true);
184 }
185
186 /*
187 * Unlock the sub-range of granted lock.
188 */
189 void *
rangelock_unlock_range(struct rangelock * lock,void * cookie,off_t start,off_t end,struct mtx * ilk)190 rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start,
191 off_t end, struct mtx *ilk)
192 {
193 struct rl_q_entry *entry;
194
195 MPASS(lock != NULL && cookie != NULL && ilk != NULL);
196 entry = cookie;
197 KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED,
198 ("Unlocking non-granted lock"));
199 KASSERT(entry->rl_q_start == start, ("wrong start"));
200 KASSERT(entry->rl_q_end >= end, ("wrong end"));
201
202 mtx_lock(ilk);
203 if (entry->rl_q_end == end) {
204 rangelock_unlock_locked(lock, cookie, ilk, true);
205 return (NULL);
206 }
207 entry->rl_q_end = end;
208 rangelock_calc_block(lock);
209 mtx_unlock(ilk);
210 return (cookie);
211 }
212
213 /*
214 * Add the lock request to the queue of the pending requests for
215 * rangelock. Sleep until the request can be granted unless trylock == true.
216 */
217 static void *
rangelock_enqueue(struct rangelock * lock,off_t start,off_t end,int mode,struct mtx * ilk,bool trylock)218 rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode,
219 struct mtx *ilk, bool trylock)
220 {
221 struct rl_q_entry *entry;
222 struct thread *td;
223
224 MPASS(lock != NULL && ilk != NULL);
225
226 td = curthread;
227 if (td->td_rlqe != NULL) {
228 entry = td->td_rlqe;
229 td->td_rlqe = NULL;
230 } else
231 entry = rlqentry_alloc();
232 MPASS(entry != NULL);
233 entry->rl_q_flags = mode;
234 entry->rl_q_start = start;
235 entry->rl_q_end = end;
236
237 mtx_lock(ilk);
238 /*
239 * XXXKIB TODO. Check that a thread does not try to enqueue a
240 * lock that is incompatible with another request from the same
241 * thread.
242 */
243
244 TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link);
245 /*
246 * If rl_currdep == NULL, there is no entry waiting for a conflicting
247 * range to be resolved, so set rl_currdep to this entry. If there is
248 * no conflicting entry for this entry, rl_currdep will be set back to
249 * NULL by rangelock_calc_block().
250 */
251 if (lock->rl_currdep == NULL)
252 lock->rl_currdep = entry;
253 rangelock_calc_block(lock);
254 while (!(entry->rl_q_flags & RL_LOCK_GRANTED)) {
255 if (trylock) {
256 /*
257 * For this case, the range is not actually locked
258 * yet, but removal from the list requires the same
259 * steps, except for not doing a rangelock_calc_block()
260 * call, since rangelock_calc_block() was called above.
261 */
262 rangelock_unlock_locked(lock, entry, ilk, false);
263 return (NULL);
264 }
265 msleep(entry, ilk, 0, "range", 0);
266 }
267 mtx_unlock(ilk);
268 return (entry);
269 }
270
271 void *
rangelock_rlock(struct rangelock * lock,off_t start,off_t end,struct mtx * ilk)272 rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
273 {
274
275 return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, false));
276 }
277
278 void *
rangelock_tryrlock(struct rangelock * lock,off_t start,off_t end,struct mtx * ilk)279 rangelock_tryrlock(struct rangelock *lock, off_t start, off_t end,
280 struct mtx *ilk)
281 {
282
283 return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, true));
284 }
285
286 void *
rangelock_wlock(struct rangelock * lock,off_t start,off_t end,struct mtx * ilk)287 rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
288 {
289
290 return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, false));
291 }
292
293 void *
rangelock_trywlock(struct rangelock * lock,off_t start,off_t end,struct mtx * ilk)294 rangelock_trywlock(struct rangelock *lock, off_t start, off_t end,
295 struct mtx *ilk)
296 {
297
298 return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, true));
299 }
300
301 #ifdef INVARIANT_SUPPORT
302 void
_rangelock_cookie_assert(void * cookie,int what,const char * file,int line)303 _rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
304 {
305 struct rl_q_entry *entry;
306 int flags;
307
308 MPASS(cookie != NULL);
309 entry = cookie;
310 flags = entry->rl_q_flags;
311 switch (what) {
312 case RCA_LOCKED:
313 if ((flags & RL_LOCK_GRANTED) == 0)
314 panic("rangelock not held @ %s:%d\n", file, line);
315 break;
316 case RCA_RLOCKED:
317 if ((flags & (RL_LOCK_GRANTED | RL_LOCK_READ)) !=
318 (RL_LOCK_GRANTED | RL_LOCK_READ))
319 panic("rangelock not rlocked @ %s:%d\n", file, line);
320 break;
321 case RCA_WLOCKED:
322 if ((flags & (RL_LOCK_GRANTED | RL_LOCK_WRITE)) !=
323 (RL_LOCK_GRANTED | RL_LOCK_WRITE))
324 panic("rangelock not wlocked @ %s:%d\n", file, line);
325 break;
326 default:
327 panic("Unknown rangelock assertion: %d @ %s:%d", what, file,
328 line);
329 }
330 }
331 #endif /* INVARIANT_SUPPORT */
332