1 /*-
2  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice unmodified, this list of conditions, and the following
11  *    disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD: stable/9/sys/kern/kern_umtx.c 252292 2013-06-27 07:27:08Z kib $");
30 
31 #include "opt_compat.h"
32 #include "opt_umtx_profiling.h"
33 
34 #include <sys/param.h>
35 #include <sys/kernel.h>
36 #include <sys/limits.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mutex.h>
40 #include <sys/priv.h>
41 #include <sys/proc.h>
42 #include <sys/sched.h>
43 #include <sys/smp.h>
44 #include <sys/sysctl.h>
45 #include <sys/sysent.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/syscallsubr.h>
49 #include <sys/eventhandler.h>
50 #include <sys/umtx.h>
51 
52 #include <vm/vm.h>
53 #include <vm/vm_param.h>
54 #include <vm/pmap.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 
58 #include <machine/cpu.h>
59 
60 #ifdef COMPAT_FREEBSD32
61 #include <compat/freebsd32/freebsd32_proto.h>
62 #endif
63 
64 #define _UMUTEX_TRY		1
65 #define _UMUTEX_WAIT		2
66 
67 /* Priority inheritance mutex info. */
68 struct umtx_pi {
69 	/* Owner thread */
70 	struct thread		*pi_owner;
71 
72 	/* Reference count */
73 	int			pi_refcount;
74 
75  	/* List entry to link umtx holding by thread */
76 	TAILQ_ENTRY(umtx_pi)	pi_link;
77 
78 	/* List entry in hash */
79 	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
80 
81 	/* List for waiters */
82 	TAILQ_HEAD(,umtx_q)	pi_blocked;
83 
84 	/* Identify a userland lock object */
85 	struct umtx_key		pi_key;
86 };
87 
88 /* A userland synchronous object user. */
89 struct umtx_q {
90 	/* Linked list for the hash. */
91 	TAILQ_ENTRY(umtx_q)	uq_link;
92 
93 	/* Umtx key. */
94 	struct umtx_key		uq_key;
95 
96 	/* Umtx flags. */
97 	int			uq_flags;
98 #define UQF_UMTXQ	0x0001
99 
100 	/* The thread waits on. */
101 	struct thread		*uq_thread;
102 
103 	/*
104 	 * Blocked on PI mutex. read can use chain lock
105 	 * or umtx_lock, write must have both chain lock and
106 	 * umtx_lock being hold.
107 	 */
108 	struct umtx_pi		*uq_pi_blocked;
109 
110 	/* On blocked list */
111 	TAILQ_ENTRY(umtx_q)	uq_lockq;
112 
113 	/* Thread contending with us */
114 	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
115 
116 	/* Inherited priority from PP mutex */
117 	u_char			uq_inherited_pri;
118 
119 	/* Spare queue ready to be reused */
120 	struct umtxq_queue	*uq_spare_queue;
121 
122 	/* The queue we on */
123 	struct umtxq_queue	*uq_cur_queue;
124 };
125 
126 TAILQ_HEAD(umtxq_head, umtx_q);
127 
128 /* Per-key wait-queue */
129 struct umtxq_queue {
130 	struct umtxq_head	head;
131 	struct umtx_key		key;
132 	LIST_ENTRY(umtxq_queue)	link;
133 	int			length;
134 };
135 
136 LIST_HEAD(umtxq_list, umtxq_queue);
137 
138 /* Userland lock object's wait-queue chain */
139 struct umtxq_chain {
140 	/* Lock for this chain. */
141 	struct mtx		uc_lock;
142 
143 	/* List of sleep queues. */
144 	struct umtxq_list	uc_queue[2];
145 #define UMTX_SHARED_QUEUE	0
146 #define UMTX_EXCLUSIVE_QUEUE	1
147 
148 	LIST_HEAD(, umtxq_queue) uc_spare_queue;
149 
150 	/* Busy flag */
151 	char			uc_busy;
152 
153 	/* Chain lock waiters */
154 	int			uc_waiters;
155 
156 	/* All PI in the list */
157 	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
158 
159 #ifdef UMTX_PROFILING
160 	int 			length;
161 	int			max_length;
162 #endif
163 };
164 
165 #define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
166 #define	UMTXQ_BUSY_ASSERT(uc)	KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
167 
168 /*
169  * Don't propagate time-sharing priority, there is a security reason,
170  * a user can simply introduce PI-mutex, let thread A lock the mutex,
171  * and let another thread B block on the mutex, because B is
172  * sleeping, its priority will be boosted, this causes A's priority to
173  * be boosted via priority propagating too and will never be lowered even
174  * if it is using 100%CPU, this is unfair to other processes.
175  */
176 
177 #define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
178 			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
179 			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
180 
181 #define	GOLDEN_RATIO_PRIME	2654404609U
182 #define	UMTX_CHAINS		512
183 #define	UMTX_SHIFTS		(__WORD_BIT - 9)
184 
185 #define	GET_SHARE(flags)	\
186     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
187 
188 #define BUSY_SPINS		200
189 
190 static uma_zone_t		umtx_pi_zone;
191 static struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
192 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
193 static int			umtx_pi_allocated;
194 
195 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
196 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
197     &umtx_pi_allocated, 0, "Allocated umtx_pi");
198 
199 #ifdef UMTX_PROFILING
200 static long max_length;
201 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
202 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
203 #endif
204 
205 static void umtxq_sysinit(void *);
206 static void umtxq_hash(struct umtx_key *key);
207 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
208 static void umtxq_lock(struct umtx_key *key);
209 static void umtxq_unlock(struct umtx_key *key);
210 static void umtxq_busy(struct umtx_key *key);
211 static void umtxq_unbusy(struct umtx_key *key);
212 static void umtxq_insert_queue(struct umtx_q *uq, int q);
213 static void umtxq_remove_queue(struct umtx_q *uq, int q);
214 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
215 static int umtxq_count(struct umtx_key *key);
216 static struct umtx_pi *umtx_pi_alloc(int);
217 static void umtx_pi_free(struct umtx_pi *pi);
218 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
219 static void umtx_thread_cleanup(struct thread *td);
220 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
221 	struct image_params *imgp __unused);
222 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
223 
224 #define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
225 #define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
226 #define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
227 
228 static struct mtx umtx_lock;
229 
230 #ifdef UMTX_PROFILING
231 static void
umtx_init_profiling(void)232 umtx_init_profiling(void)
233 {
234 	struct sysctl_oid *chain_oid;
235 	char chain_name[10];
236 	int i;
237 
238 	for (i = 0; i < UMTX_CHAINS; ++i) {
239 		snprintf(chain_name, sizeof(chain_name), "%d", i);
240 		chain_oid = SYSCTL_ADD_NODE(NULL,
241 		    SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO,
242 		    chain_name, CTLFLAG_RD, NULL, "umtx hash stats");
243 		SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
244 		    "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
245 		SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
246 		    "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
247 	}
248 }
249 #endif
250 
251 static void
umtxq_sysinit(void * arg __unused)252 umtxq_sysinit(void *arg __unused)
253 {
254 	int i, j;
255 
256 	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
257 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
258 	for (i = 0; i < 2; ++i) {
259 		for (j = 0; j < UMTX_CHAINS; ++j) {
260 			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
261 				 MTX_DEF | MTX_DUPOK);
262 			LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
263 			LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
264 			LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
265 			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
266 			umtxq_chains[i][j].uc_busy = 0;
267 			umtxq_chains[i][j].uc_waiters = 0;
268 #ifdef UMTX_PROFILING
269 			umtxq_chains[i][j].length = 0;
270 			umtxq_chains[i][j].max_length = 0;
271 #endif
272 		}
273 	}
274 #ifdef UMTX_PROFILING
275 	umtx_init_profiling();
276 #endif
277 	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
278 	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
279 	    EVENTHANDLER_PRI_ANY);
280 }
281 
282 struct umtx_q *
umtxq_alloc(void)283 umtxq_alloc(void)
284 {
285 	struct umtx_q *uq;
286 
287 	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
288 	uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
289 	TAILQ_INIT(&uq->uq_spare_queue->head);
290 	TAILQ_INIT(&uq->uq_pi_contested);
291 	uq->uq_inherited_pri = PRI_MAX;
292 	return (uq);
293 }
294 
295 void
umtxq_free(struct umtx_q * uq)296 umtxq_free(struct umtx_q *uq)
297 {
298 	MPASS(uq->uq_spare_queue != NULL);
299 	free(uq->uq_spare_queue, M_UMTX);
300 	free(uq, M_UMTX);
301 }
302 
303 static inline void
umtxq_hash(struct umtx_key * key)304 umtxq_hash(struct umtx_key *key)
305 {
306 	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
307 	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
308 }
309 
310 static inline struct umtxq_chain *
umtxq_getchain(struct umtx_key * key)311 umtxq_getchain(struct umtx_key *key)
312 {
313 	if (key->type <= TYPE_SEM)
314 		return (&umtxq_chains[1][key->hash]);
315 	return (&umtxq_chains[0][key->hash]);
316 }
317 
318 /*
319  * Lock a chain.
320  */
321 static inline void
umtxq_lock(struct umtx_key * key)322 umtxq_lock(struct umtx_key *key)
323 {
324 	struct umtxq_chain *uc;
325 
326 	uc = umtxq_getchain(key);
327 	mtx_lock(&uc->uc_lock);
328 }
329 
330 /*
331  * Unlock a chain.
332  */
333 static inline void
umtxq_unlock(struct umtx_key * key)334 umtxq_unlock(struct umtx_key *key)
335 {
336 	struct umtxq_chain *uc;
337 
338 	uc = umtxq_getchain(key);
339 	mtx_unlock(&uc->uc_lock);
340 }
341 
342 /*
343  * Set chain to busy state when following operation
344  * may be blocked (kernel mutex can not be used).
345  */
346 static inline void
umtxq_busy(struct umtx_key * key)347 umtxq_busy(struct umtx_key *key)
348 {
349 	struct umtxq_chain *uc;
350 
351 	uc = umtxq_getchain(key);
352 	mtx_assert(&uc->uc_lock, MA_OWNED);
353 	if (uc->uc_busy) {
354 #ifdef SMP
355 		if (smp_cpus > 1) {
356 			int count = BUSY_SPINS;
357 			if (count > 0) {
358 				umtxq_unlock(key);
359 				while (uc->uc_busy && --count > 0)
360 					cpu_spinwait();
361 				umtxq_lock(key);
362 			}
363 		}
364 #endif
365 		while (uc->uc_busy) {
366 			uc->uc_waiters++;
367 			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
368 			uc->uc_waiters--;
369 		}
370 	}
371 	uc->uc_busy = 1;
372 }
373 
374 /*
375  * Unbusy a chain.
376  */
377 static inline void
umtxq_unbusy(struct umtx_key * key)378 umtxq_unbusy(struct umtx_key *key)
379 {
380 	struct umtxq_chain *uc;
381 
382 	uc = umtxq_getchain(key);
383 	mtx_assert(&uc->uc_lock, MA_OWNED);
384 	KASSERT(uc->uc_busy != 0, ("not busy"));
385 	uc->uc_busy = 0;
386 	if (uc->uc_waiters)
387 		wakeup_one(uc);
388 }
389 
390 static struct umtxq_queue *
umtxq_queue_lookup(struct umtx_key * key,int q)391 umtxq_queue_lookup(struct umtx_key *key, int q)
392 {
393 	struct umtxq_queue *uh;
394 	struct umtxq_chain *uc;
395 
396 	uc = umtxq_getchain(key);
397 	UMTXQ_LOCKED_ASSERT(uc);
398 	LIST_FOREACH(uh, &uc->uc_queue[q], link) {
399 		if (umtx_key_match(&uh->key, key))
400 			return (uh);
401 	}
402 
403 	return (NULL);
404 }
405 
406 static inline void
umtxq_insert_queue(struct umtx_q * uq,int q)407 umtxq_insert_queue(struct umtx_q *uq, int q)
408 {
409 	struct umtxq_queue *uh;
410 	struct umtxq_chain *uc;
411 
412 	uc = umtxq_getchain(&uq->uq_key);
413 	UMTXQ_LOCKED_ASSERT(uc);
414 	KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
415 	uh = umtxq_queue_lookup(&uq->uq_key, q);
416 	if (uh != NULL) {
417 		LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
418 	} else {
419 		uh = uq->uq_spare_queue;
420 		uh->key = uq->uq_key;
421 		LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
422 	}
423 	uq->uq_spare_queue = NULL;
424 
425 	TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
426 	uh->length++;
427 #ifdef UMTX_PROFILING
428 	uc->length++;
429 	if (uc->length > uc->max_length) {
430 		uc->max_length = uc->length;
431 		if (uc->max_length > max_length)
432 			max_length = uc->max_length;
433 	}
434 #endif
435 	uq->uq_flags |= UQF_UMTXQ;
436 	uq->uq_cur_queue = uh;
437 	return;
438 }
439 
440 static inline void
umtxq_remove_queue(struct umtx_q * uq,int q)441 umtxq_remove_queue(struct umtx_q *uq, int q)
442 {
443 	struct umtxq_chain *uc;
444 	struct umtxq_queue *uh;
445 
446 	uc = umtxq_getchain(&uq->uq_key);
447 	UMTXQ_LOCKED_ASSERT(uc);
448 	if (uq->uq_flags & UQF_UMTXQ) {
449 		uh = uq->uq_cur_queue;
450 		TAILQ_REMOVE(&uh->head, uq, uq_link);
451 		uh->length--;
452 #ifdef UMTX_PROFILING
453 		uc->length--;
454 #endif
455 		uq->uq_flags &= ~UQF_UMTXQ;
456 		if (TAILQ_EMPTY(&uh->head)) {
457 			KASSERT(uh->length == 0,
458 			    ("inconsistent umtxq_queue length"));
459 			LIST_REMOVE(uh, link);
460 		} else {
461 			uh = LIST_FIRST(&uc->uc_spare_queue);
462 			KASSERT(uh != NULL, ("uc_spare_queue is empty"));
463 			LIST_REMOVE(uh, link);
464 		}
465 		uq->uq_spare_queue = uh;
466 		uq->uq_cur_queue = NULL;
467 	}
468 }
469 
470 /*
471  * Check if there are multiple waiters
472  */
473 static int
umtxq_count(struct umtx_key * key)474 umtxq_count(struct umtx_key *key)
475 {
476 	struct umtxq_chain *uc;
477 	struct umtxq_queue *uh;
478 
479 	uc = umtxq_getchain(key);
480 	UMTXQ_LOCKED_ASSERT(uc);
481 	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
482 	if (uh != NULL)
483 		return (uh->length);
484 	return (0);
485 }
486 
487 /*
488  * Check if there are multiple PI waiters and returns first
489  * waiter.
490  */
491 static int
umtxq_count_pi(struct umtx_key * key,struct umtx_q ** first)492 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
493 {
494 	struct umtxq_chain *uc;
495 	struct umtxq_queue *uh;
496 
497 	*first = NULL;
498 	uc = umtxq_getchain(key);
499 	UMTXQ_LOCKED_ASSERT(uc);
500 	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
501 	if (uh != NULL) {
502 		*first = TAILQ_FIRST(&uh->head);
503 		return (uh->length);
504 	}
505 	return (0);
506 }
507 
508 static int
umtxq_check_susp(struct thread * td)509 umtxq_check_susp(struct thread *td)
510 {
511 	struct proc *p;
512 	int error;
513 
514 	/*
515 	 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to
516 	 * eventually break the lockstep loop.
517 	 */
518 	if ((td->td_flags & TDF_NEEDSUSPCHK) == 0)
519 		return (0);
520 	error = 0;
521 	p = td->td_proc;
522 	PROC_LOCK(p);
523 	if (P_SHOULDSTOP(p) ||
524 	    ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) {
525 		if (p->p_flag & P_SINGLE_EXIT)
526 			error = EINTR;
527 		else
528 			error = ERESTART;
529 	}
530 	PROC_UNLOCK(p);
531 	return (error);
532 }
533 
534 /*
535  * Wake up threads waiting on an userland object.
536  */
537 
538 static int
umtxq_signal_queue(struct umtx_key * key,int n_wake,int q)539 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
540 {
541 	struct umtxq_chain *uc;
542 	struct umtxq_queue *uh;
543 	struct umtx_q *uq;
544 	int ret;
545 
546 	ret = 0;
547 	uc = umtxq_getchain(key);
548 	UMTXQ_LOCKED_ASSERT(uc);
549 	uh = umtxq_queue_lookup(key, q);
550 	if (uh != NULL) {
551 		while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
552 			umtxq_remove_queue(uq, q);
553 			wakeup(uq);
554 			if (++ret >= n_wake)
555 				return (ret);
556 		}
557 	}
558 	return (ret);
559 }
560 
561 
562 /*
563  * Wake up specified thread.
564  */
565 static inline void
umtxq_signal_thread(struct umtx_q * uq)566 umtxq_signal_thread(struct umtx_q *uq)
567 {
568 	struct umtxq_chain *uc;
569 
570 	uc = umtxq_getchain(&uq->uq_key);
571 	UMTXQ_LOCKED_ASSERT(uc);
572 	umtxq_remove(uq);
573 	wakeup(uq);
574 }
575 
576 /*
577  * Put thread into sleep state, before sleeping, check if
578  * thread was removed from umtx queue.
579  */
580 static inline int
umtxq_sleep(struct umtx_q * uq,const char * wmesg,int timo)581 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
582 {
583 	struct umtxq_chain *uc;
584 	int error;
585 
586 	uc = umtxq_getchain(&uq->uq_key);
587 	UMTXQ_LOCKED_ASSERT(uc);
588 	if (!(uq->uq_flags & UQF_UMTXQ))
589 		return (0);
590 	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
591 	if (error == EWOULDBLOCK)
592 		error = ETIMEDOUT;
593 	return (error);
594 }
595 
596 /*
597  * Convert userspace address into unique logical address.
598  */
599 int
umtx_key_get(void * addr,int type,int share,struct umtx_key * key)600 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
601 {
602 	struct thread *td = curthread;
603 	vm_map_t map;
604 	vm_map_entry_t entry;
605 	vm_pindex_t pindex;
606 	vm_prot_t prot;
607 	boolean_t wired;
608 
609 	key->type = type;
610 	if (share == THREAD_SHARE) {
611 		key->shared = 0;
612 		key->info.private.vs = td->td_proc->p_vmspace;
613 		key->info.private.addr = (uintptr_t)addr;
614 	} else {
615 		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
616 		map = &td->td_proc->p_vmspace->vm_map;
617 		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
618 		    &entry, &key->info.shared.object, &pindex, &prot,
619 		    &wired) != KERN_SUCCESS) {
620 			return EFAULT;
621 		}
622 
623 		if ((share == PROCESS_SHARE) ||
624 		    (share == AUTO_SHARE &&
625 		     VM_INHERIT_SHARE == entry->inheritance)) {
626 			key->shared = 1;
627 			key->info.shared.offset = entry->offset + entry->start -
628 				(vm_offset_t)addr;
629 			vm_object_reference(key->info.shared.object);
630 		} else {
631 			key->shared = 0;
632 			key->info.private.vs = td->td_proc->p_vmspace;
633 			key->info.private.addr = (uintptr_t)addr;
634 		}
635 		vm_map_lookup_done(map, entry);
636 	}
637 
638 	umtxq_hash(key);
639 	return (0);
640 }
641 
642 /*
643  * Release key.
644  */
645 void
umtx_key_release(struct umtx_key * key)646 umtx_key_release(struct umtx_key *key)
647 {
648 	if (key->shared)
649 		vm_object_deallocate(key->info.shared.object);
650 }
651 
652 /*
653  * Lock a umtx object.
654  */
655 static int
_do_lock_umtx(struct thread * td,struct umtx * umtx,u_long id,int timo)656 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
657 {
658 	struct umtx_q *uq;
659 	u_long owner;
660 	u_long old;
661 	int error = 0;
662 
663 	uq = td->td_umtxq;
664 
665 	/*
666 	 * Care must be exercised when dealing with umtx structure. It
667 	 * can fault on any access.
668 	 */
669 	for (;;) {
670 		/*
671 		 * Try the uncontested case.  This should be done in userland.
672 		 */
673 		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
674 
675 		/* The acquire succeeded. */
676 		if (owner == UMTX_UNOWNED)
677 			return (0);
678 
679 		/* The address was invalid. */
680 		if (owner == -1)
681 			return (EFAULT);
682 
683 		/* If no one owns it but it is contested try to acquire it. */
684 		if (owner == UMTX_CONTESTED) {
685 			owner = casuword(&umtx->u_owner,
686 			    UMTX_CONTESTED, id | UMTX_CONTESTED);
687 
688 			if (owner == UMTX_CONTESTED)
689 				return (0);
690 
691 			/* The address was invalid. */
692 			if (owner == -1)
693 				return (EFAULT);
694 
695 			error = umtxq_check_susp(td);
696 			if (error != 0)
697 				break;
698 
699 			/* If this failed the lock has changed, restart. */
700 			continue;
701 		}
702 
703 		/*
704 		 * If we caught a signal, we have retried and now
705 		 * exit immediately.
706 		 */
707 		if (error != 0)
708 			return (error);
709 
710 		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
711 			AUTO_SHARE, &uq->uq_key)) != 0)
712 			return (error);
713 
714 		umtxq_lock(&uq->uq_key);
715 		umtxq_busy(&uq->uq_key);
716 		umtxq_insert(uq);
717 		umtxq_unbusy(&uq->uq_key);
718 		umtxq_unlock(&uq->uq_key);
719 
720 		/*
721 		 * Set the contested bit so that a release in user space
722 		 * knows to use the system call for unlock.  If this fails
723 		 * either some one else has acquired the lock or it has been
724 		 * released.
725 		 */
726 		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
727 
728 		/* The address was invalid. */
729 		if (old == -1) {
730 			umtxq_lock(&uq->uq_key);
731 			umtxq_remove(uq);
732 			umtxq_unlock(&uq->uq_key);
733 			umtx_key_release(&uq->uq_key);
734 			return (EFAULT);
735 		}
736 
737 		/*
738 		 * We set the contested bit, sleep. Otherwise the lock changed
739 		 * and we need to retry or we lost a race to the thread
740 		 * unlocking the umtx.
741 		 */
742 		umtxq_lock(&uq->uq_key);
743 		if (old == owner)
744 			error = umtxq_sleep(uq, "umtx", timo);
745 		umtxq_remove(uq);
746 		umtxq_unlock(&uq->uq_key);
747 		umtx_key_release(&uq->uq_key);
748 
749 		if (error == 0)
750 			error = umtxq_check_susp(td);
751 	}
752 
753 	return (0);
754 }
755 
756 /*
757  * Lock a umtx object.
758  */
759 static int
do_lock_umtx(struct thread * td,struct umtx * umtx,u_long id,struct timespec * timeout)760 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
761 	struct timespec *timeout)
762 {
763 	struct timespec ts, ts2, ts3;
764 	struct timeval tv;
765 	int error;
766 
767 	if (timeout == NULL) {
768 		error = _do_lock_umtx(td, umtx, id, 0);
769 		/* Mutex locking is restarted if it is interrupted. */
770 		if (error == EINTR)
771 			error = ERESTART;
772 	} else {
773 		getnanouptime(&ts);
774 		timespecadd(&ts, timeout);
775 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
776 		for (;;) {
777 			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
778 			if (error != ETIMEDOUT)
779 				break;
780 			getnanouptime(&ts2);
781 			if (timespeccmp(&ts2, &ts, >=)) {
782 				error = ETIMEDOUT;
783 				break;
784 			}
785 			ts3 = ts;
786 			timespecsub(&ts3, &ts2);
787 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
788 		}
789 		/* Timed-locking is not restarted. */
790 		if (error == ERESTART)
791 			error = EINTR;
792 	}
793 	return (error);
794 }
795 
796 /*
797  * Unlock a umtx object.
798  */
799 static int
do_unlock_umtx(struct thread * td,struct umtx * umtx,u_long id)800 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
801 {
802 	struct umtx_key key;
803 	u_long owner;
804 	u_long old;
805 	int error;
806 	int count;
807 
808 	/*
809 	 * Make sure we own this mtx.
810 	 */
811 	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
812 	if (owner == -1)
813 		return (EFAULT);
814 
815 	if ((owner & ~UMTX_CONTESTED) != id)
816 		return (EPERM);
817 
818 	/* This should be done in userland */
819 	if ((owner & UMTX_CONTESTED) == 0) {
820 		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
821 		if (old == -1)
822 			return (EFAULT);
823 		if (old == owner)
824 			return (0);
825 		owner = old;
826 	}
827 
828 	/* We should only ever be in here for contested locks */
829 	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
830 		&key)) != 0)
831 		return (error);
832 
833 	umtxq_lock(&key);
834 	umtxq_busy(&key);
835 	count = umtxq_count(&key);
836 	umtxq_unlock(&key);
837 
838 	/*
839 	 * When unlocking the umtx, it must be marked as unowned if
840 	 * there is zero or one thread only waiting for it.
841 	 * Otherwise, it must be marked as contested.
842 	 */
843 	old = casuword(&umtx->u_owner, owner,
844 		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
845 	umtxq_lock(&key);
846 	umtxq_signal(&key,1);
847 	umtxq_unbusy(&key);
848 	umtxq_unlock(&key);
849 	umtx_key_release(&key);
850 	if (old == -1)
851 		return (EFAULT);
852 	if (old != owner)
853 		return (EINVAL);
854 	return (0);
855 }
856 
857 #ifdef COMPAT_FREEBSD32
858 
859 /*
860  * Lock a umtx object.
861  */
862 static int
_do_lock_umtx32(struct thread * td,uint32_t * m,uint32_t id,int timo)863 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
864 {
865 	struct umtx_q *uq;
866 	uint32_t owner;
867 	uint32_t old;
868 	int error = 0;
869 
870 	uq = td->td_umtxq;
871 
872 	/*
873 	 * Care must be exercised when dealing with umtx structure. It
874 	 * can fault on any access.
875 	 */
876 	for (;;) {
877 		/*
878 		 * Try the uncontested case.  This should be done in userland.
879 		 */
880 		owner = casuword32(m, UMUTEX_UNOWNED, id);
881 
882 		/* The acquire succeeded. */
883 		if (owner == UMUTEX_UNOWNED)
884 			return (0);
885 
886 		/* The address was invalid. */
887 		if (owner == -1)
888 			return (EFAULT);
889 
890 		/* If no one owns it but it is contested try to acquire it. */
891 		if (owner == UMUTEX_CONTESTED) {
892 			owner = casuword32(m,
893 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
894 			if (owner == UMUTEX_CONTESTED)
895 				return (0);
896 
897 			/* The address was invalid. */
898 			if (owner == -1)
899 				return (EFAULT);
900 
901 			error = umtxq_check_susp(td);
902 			if (error != 0)
903 				break;
904 
905 			/* If this failed the lock has changed, restart. */
906 			continue;
907 		}
908 
909 		/*
910 		 * If we caught a signal, we have retried and now
911 		 * exit immediately.
912 		 */
913 		if (error != 0)
914 			return (error);
915 
916 		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
917 			AUTO_SHARE, &uq->uq_key)) != 0)
918 			return (error);
919 
920 		umtxq_lock(&uq->uq_key);
921 		umtxq_busy(&uq->uq_key);
922 		umtxq_insert(uq);
923 		umtxq_unbusy(&uq->uq_key);
924 		umtxq_unlock(&uq->uq_key);
925 
926 		/*
927 		 * Set the contested bit so that a release in user space
928 		 * knows to use the system call for unlock.  If this fails
929 		 * either some one else has acquired the lock or it has been
930 		 * released.
931 		 */
932 		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
933 
934 		/* The address was invalid. */
935 		if (old == -1) {
936 			umtxq_lock(&uq->uq_key);
937 			umtxq_remove(uq);
938 			umtxq_unlock(&uq->uq_key);
939 			umtx_key_release(&uq->uq_key);
940 			return (EFAULT);
941 		}
942 
943 		/*
944 		 * We set the contested bit, sleep. Otherwise the lock changed
945 		 * and we need to retry or we lost a race to the thread
946 		 * unlocking the umtx.
947 		 */
948 		umtxq_lock(&uq->uq_key);
949 		if (old == owner)
950 			error = umtxq_sleep(uq, "umtx", timo);
951 		umtxq_remove(uq);
952 		umtxq_unlock(&uq->uq_key);
953 		umtx_key_release(&uq->uq_key);
954 
955 		if (error == 0)
956 			error = umtxq_check_susp(td);
957 	}
958 
959 	return (0);
960 }
961 
962 /*
963  * Lock a umtx object.
964  */
965 static int
do_lock_umtx32(struct thread * td,void * m,uint32_t id,struct timespec * timeout)966 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
967 	struct timespec *timeout)
968 {
969 	struct timespec ts, ts2, ts3;
970 	struct timeval tv;
971 	int error;
972 
973 	if (timeout == NULL) {
974 		error = _do_lock_umtx32(td, m, id, 0);
975 		/* Mutex locking is restarted if it is interrupted. */
976 		if (error == EINTR)
977 			error = ERESTART;
978 	} else {
979 		getnanouptime(&ts);
980 		timespecadd(&ts, timeout);
981 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
982 		for (;;) {
983 			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
984 			if (error != ETIMEDOUT)
985 				break;
986 			getnanouptime(&ts2);
987 			if (timespeccmp(&ts2, &ts, >=)) {
988 				error = ETIMEDOUT;
989 				break;
990 			}
991 			ts3 = ts;
992 			timespecsub(&ts3, &ts2);
993 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
994 		}
995 		/* Timed-locking is not restarted. */
996 		if (error == ERESTART)
997 			error = EINTR;
998 	}
999 	return (error);
1000 }
1001 
1002 /*
1003  * Unlock a umtx object.
1004  */
1005 static int
do_unlock_umtx32(struct thread * td,uint32_t * m,uint32_t id)1006 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
1007 {
1008 	struct umtx_key key;
1009 	uint32_t owner;
1010 	uint32_t old;
1011 	int error;
1012 	int count;
1013 
1014 	/*
1015 	 * Make sure we own this mtx.
1016 	 */
1017 	owner = fuword32(m);
1018 	if (owner == -1)
1019 		return (EFAULT);
1020 
1021 	if ((owner & ~UMUTEX_CONTESTED) != id)
1022 		return (EPERM);
1023 
1024 	/* This should be done in userland */
1025 	if ((owner & UMUTEX_CONTESTED) == 0) {
1026 		old = casuword32(m, owner, UMUTEX_UNOWNED);
1027 		if (old == -1)
1028 			return (EFAULT);
1029 		if (old == owner)
1030 			return (0);
1031 		owner = old;
1032 	}
1033 
1034 	/* We should only ever be in here for contested locks */
1035 	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
1036 		&key)) != 0)
1037 		return (error);
1038 
1039 	umtxq_lock(&key);
1040 	umtxq_busy(&key);
1041 	count = umtxq_count(&key);
1042 	umtxq_unlock(&key);
1043 
1044 	/*
1045 	 * When unlocking the umtx, it must be marked as unowned if
1046 	 * there is zero or one thread only waiting for it.
1047 	 * Otherwise, it must be marked as contested.
1048 	 */
1049 	old = casuword32(m, owner,
1050 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1051 	umtxq_lock(&key);
1052 	umtxq_signal(&key,1);
1053 	umtxq_unbusy(&key);
1054 	umtxq_unlock(&key);
1055 	umtx_key_release(&key);
1056 	if (old == -1)
1057 		return (EFAULT);
1058 	if (old != owner)
1059 		return (EINVAL);
1060 	return (0);
1061 }
1062 #endif
1063 
1064 /*
1065  * Fetch and compare value, sleep on the address if value is not changed.
1066  */
1067 static int
do_wait(struct thread * td,void * addr,u_long id,struct timespec * timeout,int compat32,int is_private)1068 do_wait(struct thread *td, void *addr, u_long id,
1069 	struct timespec *timeout, int compat32, int is_private)
1070 {
1071 	struct umtx_q *uq;
1072 	struct timespec ts, ts2, ts3;
1073 	struct timeval tv;
1074 	u_long tmp;
1075 	int error = 0;
1076 
1077 	uq = td->td_umtxq;
1078 	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
1079 		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
1080 		return (error);
1081 
1082 	umtxq_lock(&uq->uq_key);
1083 	umtxq_insert(uq);
1084 	umtxq_unlock(&uq->uq_key);
1085 	if (compat32 == 0)
1086 		tmp = fuword(addr);
1087         else
1088 		tmp = (unsigned int)fuword32(addr);
1089 	if (tmp != id) {
1090 		umtxq_lock(&uq->uq_key);
1091 		umtxq_remove(uq);
1092 		umtxq_unlock(&uq->uq_key);
1093 	} else if (timeout == NULL) {
1094 		umtxq_lock(&uq->uq_key);
1095 		error = umtxq_sleep(uq, "uwait", 0);
1096 		umtxq_remove(uq);
1097 		umtxq_unlock(&uq->uq_key);
1098 	} else {
1099 		getnanouptime(&ts);
1100 		timespecadd(&ts, timeout);
1101 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
1102 		umtxq_lock(&uq->uq_key);
1103 		for (;;) {
1104 			error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
1105 			if (!(uq->uq_flags & UQF_UMTXQ)) {
1106 				error = 0;
1107 				break;
1108 			}
1109 			if (error != ETIMEDOUT)
1110 				break;
1111 			umtxq_unlock(&uq->uq_key);
1112 			getnanouptime(&ts2);
1113 			if (timespeccmp(&ts2, &ts, >=)) {
1114 				error = ETIMEDOUT;
1115 				umtxq_lock(&uq->uq_key);
1116 				break;
1117 			}
1118 			ts3 = ts;
1119 			timespecsub(&ts3, &ts2);
1120 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1121 			umtxq_lock(&uq->uq_key);
1122 		}
1123 		umtxq_remove(uq);
1124 		umtxq_unlock(&uq->uq_key);
1125 	}
1126 	umtx_key_release(&uq->uq_key);
1127 	if (error == ERESTART)
1128 		error = EINTR;
1129 	return (error);
1130 }
1131 
1132 /*
1133  * Wake up threads sleeping on the specified address.
1134  */
1135 int
kern_umtx_wake(struct thread * td,void * uaddr,int n_wake,int is_private)1136 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1137 {
1138 	struct umtx_key key;
1139 	int ret;
1140 
1141 	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1142 		is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1143 		return (ret);
1144 	umtxq_lock(&key);
1145 	ret = umtxq_signal(&key, n_wake);
1146 	umtxq_unlock(&key);
1147 	umtx_key_release(&key);
1148 	return (0);
1149 }
1150 
1151 /*
1152  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1153  */
1154 static int
_do_lock_normal(struct thread * td,struct umutex * m,uint32_t flags,int timo,int mode)1155 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1156 	int mode)
1157 {
1158 	struct umtx_q *uq;
1159 	uint32_t owner, old, id;
1160 	int error = 0;
1161 
1162 	id = td->td_tid;
1163 	uq = td->td_umtxq;
1164 
1165 	/*
1166 	 * Care must be exercised when dealing with umtx structure. It
1167 	 * can fault on any access.
1168 	 */
1169 	for (;;) {
1170 		owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1171 		if (mode == _UMUTEX_WAIT) {
1172 			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1173 				return (0);
1174 		} else {
1175 			/*
1176 			 * Try the uncontested case.  This should be done in userland.
1177 			 */
1178 			owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1179 
1180 			/* The acquire succeeded. */
1181 			if (owner == UMUTEX_UNOWNED)
1182 				return (0);
1183 
1184 			/* The address was invalid. */
1185 			if (owner == -1)
1186 				return (EFAULT);
1187 
1188 			/* If no one owns it but it is contested try to acquire it. */
1189 			if (owner == UMUTEX_CONTESTED) {
1190 				owner = casuword32(&m->m_owner,
1191 				    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1192 
1193 				if (owner == UMUTEX_CONTESTED)
1194 					return (0);
1195 
1196 				/* The address was invalid. */
1197 				if (owner == -1)
1198 					return (EFAULT);
1199 
1200 				error = umtxq_check_susp(td);
1201 				if (error != 0)
1202 					return (error);
1203 
1204 				/* If this failed the lock has changed, restart. */
1205 				continue;
1206 			}
1207 		}
1208 
1209 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1210 		    (owner & ~UMUTEX_CONTESTED) == id)
1211 			return (EDEADLK);
1212 
1213 		if (mode == _UMUTEX_TRY)
1214 			return (EBUSY);
1215 
1216 		/*
1217 		 * If we caught a signal, we have retried and now
1218 		 * exit immediately.
1219 		 */
1220 		if (error != 0)
1221 			return (error);
1222 
1223 		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1224 		    GET_SHARE(flags), &uq->uq_key)) != 0)
1225 			return (error);
1226 
1227 		umtxq_lock(&uq->uq_key);
1228 		umtxq_busy(&uq->uq_key);
1229 		umtxq_insert(uq);
1230 		umtxq_unlock(&uq->uq_key);
1231 
1232 		/*
1233 		 * Set the contested bit so that a release in user space
1234 		 * knows to use the system call for unlock.  If this fails
1235 		 * either some one else has acquired the lock or it has been
1236 		 * released.
1237 		 */
1238 		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1239 
1240 		/* The address was invalid. */
1241 		if (old == -1) {
1242 			umtxq_lock(&uq->uq_key);
1243 			umtxq_remove(uq);
1244 			umtxq_unbusy(&uq->uq_key);
1245 			umtxq_unlock(&uq->uq_key);
1246 			umtx_key_release(&uq->uq_key);
1247 			return (EFAULT);
1248 		}
1249 
1250 		/*
1251 		 * We set the contested bit, sleep. Otherwise the lock changed
1252 		 * and we need to retry or we lost a race to the thread
1253 		 * unlocking the umtx.
1254 		 */
1255 		umtxq_lock(&uq->uq_key);
1256 		umtxq_unbusy(&uq->uq_key);
1257 		if (old == owner)
1258 			error = umtxq_sleep(uq, "umtxn", timo);
1259 		umtxq_remove(uq);
1260 		umtxq_unlock(&uq->uq_key);
1261 		umtx_key_release(&uq->uq_key);
1262 
1263 		if (error == 0)
1264 			error = umtxq_check_susp(td);
1265 	}
1266 
1267 	return (0);
1268 }
1269 
1270 /*
1271  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1272  */
1273 /*
1274  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1275  */
1276 static int
do_unlock_normal(struct thread * td,struct umutex * m,uint32_t flags)1277 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1278 {
1279 	struct umtx_key key;
1280 	uint32_t owner, old, id;
1281 	int error;
1282 	int count;
1283 
1284 	id = td->td_tid;
1285 	/*
1286 	 * Make sure we own this mtx.
1287 	 */
1288 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1289 	if (owner == -1)
1290 		return (EFAULT);
1291 
1292 	if ((owner & ~UMUTEX_CONTESTED) != id)
1293 		return (EPERM);
1294 
1295 	if ((owner & UMUTEX_CONTESTED) == 0) {
1296 		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1297 		if (old == -1)
1298 			return (EFAULT);
1299 		if (old == owner)
1300 			return (0);
1301 		owner = old;
1302 	}
1303 
1304 	/* We should only ever be in here for contested locks */
1305 	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1306 	    &key)) != 0)
1307 		return (error);
1308 
1309 	umtxq_lock(&key);
1310 	umtxq_busy(&key);
1311 	count = umtxq_count(&key);
1312 	umtxq_unlock(&key);
1313 
1314 	/*
1315 	 * When unlocking the umtx, it must be marked as unowned if
1316 	 * there is zero or one thread only waiting for it.
1317 	 * Otherwise, it must be marked as contested.
1318 	 */
1319 	old = casuword32(&m->m_owner, owner,
1320 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1321 	umtxq_lock(&key);
1322 	umtxq_signal(&key,1);
1323 	umtxq_unbusy(&key);
1324 	umtxq_unlock(&key);
1325 	umtx_key_release(&key);
1326 	if (old == -1)
1327 		return (EFAULT);
1328 	if (old != owner)
1329 		return (EINVAL);
1330 	return (0);
1331 }
1332 
1333 /*
1334  * Check if the mutex is available and wake up a waiter,
1335  * only for simple mutex.
1336  */
1337 static int
do_wake_umutex(struct thread * td,struct umutex * m)1338 do_wake_umutex(struct thread *td, struct umutex *m)
1339 {
1340 	struct umtx_key key;
1341 	uint32_t owner;
1342 	uint32_t flags;
1343 	int error;
1344 	int count;
1345 
1346 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1347 	if (owner == -1)
1348 		return (EFAULT);
1349 
1350 	if ((owner & ~UMUTEX_CONTESTED) != 0)
1351 		return (0);
1352 
1353 	flags = fuword32(&m->m_flags);
1354 
1355 	/* We should only ever be in here for contested locks */
1356 	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1357 	    &key)) != 0)
1358 		return (error);
1359 
1360 	umtxq_lock(&key);
1361 	umtxq_busy(&key);
1362 	count = umtxq_count(&key);
1363 	umtxq_unlock(&key);
1364 
1365 	if (count <= 1)
1366 		owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1367 
1368 	umtxq_lock(&key);
1369 	if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1370 		umtxq_signal(&key, 1);
1371 	umtxq_unbusy(&key);
1372 	umtxq_unlock(&key);
1373 	umtx_key_release(&key);
1374 	return (0);
1375 }
1376 
1377 /*
1378  * Check if the mutex has waiters and tries to fix contention bit.
1379  */
1380 static int
do_wake2_umutex(struct thread * td,struct umutex * m,uint32_t flags)1381 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
1382 {
1383 	struct umtx_key key;
1384 	uint32_t owner, old;
1385 	int type;
1386 	int error;
1387 	int count;
1388 
1389 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
1390 	case 0:
1391 		type = TYPE_NORMAL_UMUTEX;
1392 		break;
1393 	case UMUTEX_PRIO_INHERIT:
1394 		type = TYPE_PI_UMUTEX;
1395 		break;
1396 	case UMUTEX_PRIO_PROTECT:
1397 		type = TYPE_PP_UMUTEX;
1398 		break;
1399 	default:
1400 		return (EINVAL);
1401 	}
1402 	if ((error = umtx_key_get(m, type, GET_SHARE(flags),
1403 	    &key)) != 0)
1404 		return (error);
1405 
1406 	owner = 0;
1407 	umtxq_lock(&key);
1408 	umtxq_busy(&key);
1409 	count = umtxq_count(&key);
1410 	umtxq_unlock(&key);
1411 	/*
1412 	 * Only repair contention bit if there is a waiter, this means the mutex
1413 	 * is still being referenced by userland code, otherwise don't update
1414 	 * any memory.
1415 	 */
1416 	if (count > 1) {
1417 		owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1418 		while ((owner & UMUTEX_CONTESTED) ==0) {
1419 			old = casuword32(&m->m_owner, owner,
1420 			    owner|UMUTEX_CONTESTED);
1421 			if (old == owner)
1422 				break;
1423 			owner = old;
1424 			if (old == -1)
1425 				break;
1426 			error = umtxq_check_susp(td);
1427 			if (error != 0)
1428 				break;
1429 		}
1430 	} else if (count == 1) {
1431 		owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1432 		while ((owner & ~UMUTEX_CONTESTED) != 0 &&
1433 		       (owner & UMUTEX_CONTESTED) == 0) {
1434 			old = casuword32(&m->m_owner, owner,
1435 			    owner|UMUTEX_CONTESTED);
1436 			if (old == owner)
1437 				break;
1438 			owner = old;
1439 			if (old == -1)
1440 				break;
1441 			error = umtxq_check_susp(td);
1442 			if (error != 0)
1443 				break;
1444 		}
1445 	}
1446 	umtxq_lock(&key);
1447 	if (owner == -1) {
1448 		error = EFAULT;
1449 		umtxq_signal(&key, INT_MAX);
1450 	}
1451 	else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1452 		umtxq_signal(&key, 1);
1453 	umtxq_unbusy(&key);
1454 	umtxq_unlock(&key);
1455 	umtx_key_release(&key);
1456 	return (error);
1457 }
1458 
1459 static inline struct umtx_pi *
umtx_pi_alloc(int flags)1460 umtx_pi_alloc(int flags)
1461 {
1462 	struct umtx_pi *pi;
1463 
1464 	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1465 	TAILQ_INIT(&pi->pi_blocked);
1466 	atomic_add_int(&umtx_pi_allocated, 1);
1467 	return (pi);
1468 }
1469 
1470 static inline void
umtx_pi_free(struct umtx_pi * pi)1471 umtx_pi_free(struct umtx_pi *pi)
1472 {
1473 	uma_zfree(umtx_pi_zone, pi);
1474 	atomic_add_int(&umtx_pi_allocated, -1);
1475 }
1476 
1477 /*
1478  * Adjust the thread's position on a pi_state after its priority has been
1479  * changed.
1480  */
1481 static int
umtx_pi_adjust_thread(struct umtx_pi * pi,struct thread * td)1482 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1483 {
1484 	struct umtx_q *uq, *uq1, *uq2;
1485 	struct thread *td1;
1486 
1487 	mtx_assert(&umtx_lock, MA_OWNED);
1488 	if (pi == NULL)
1489 		return (0);
1490 
1491 	uq = td->td_umtxq;
1492 
1493 	/*
1494 	 * Check if the thread needs to be moved on the blocked chain.
1495 	 * It needs to be moved if either its priority is lower than
1496 	 * the previous thread or higher than the next thread.
1497 	 */
1498 	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1499 	uq2 = TAILQ_NEXT(uq, uq_lockq);
1500 	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1501 	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1502 		/*
1503 		 * Remove thread from blocked chain and determine where
1504 		 * it should be moved to.
1505 		 */
1506 		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1507 		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1508 			td1 = uq1->uq_thread;
1509 			MPASS(td1->td_proc->p_magic == P_MAGIC);
1510 			if (UPRI(td1) > UPRI(td))
1511 				break;
1512 		}
1513 
1514 		if (uq1 == NULL)
1515 			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1516 		else
1517 			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1518 	}
1519 	return (1);
1520 }
1521 
1522 /*
1523  * Propagate priority when a thread is blocked on POSIX
1524  * PI mutex.
1525  */
1526 static void
umtx_propagate_priority(struct thread * td)1527 umtx_propagate_priority(struct thread *td)
1528 {
1529 	struct umtx_q *uq;
1530 	struct umtx_pi *pi;
1531 	int pri;
1532 
1533 	mtx_assert(&umtx_lock, MA_OWNED);
1534 	pri = UPRI(td);
1535 	uq = td->td_umtxq;
1536 	pi = uq->uq_pi_blocked;
1537 	if (pi == NULL)
1538 		return;
1539 
1540 	for (;;) {
1541 		td = pi->pi_owner;
1542 		if (td == NULL || td == curthread)
1543 			return;
1544 
1545 		MPASS(td->td_proc != NULL);
1546 		MPASS(td->td_proc->p_magic == P_MAGIC);
1547 
1548 		thread_lock(td);
1549 		if (td->td_lend_user_pri > pri)
1550 			sched_lend_user_prio(td, pri);
1551 		else {
1552 			thread_unlock(td);
1553 			break;
1554 		}
1555 		thread_unlock(td);
1556 
1557 		/*
1558 		 * Pick up the lock that td is blocked on.
1559 		 */
1560 		uq = td->td_umtxq;
1561 		pi = uq->uq_pi_blocked;
1562 		if (pi == NULL)
1563 			break;
1564 		/* Resort td on the list if needed. */
1565 		umtx_pi_adjust_thread(pi, td);
1566 	}
1567 }
1568 
1569 /*
1570  * Unpropagate priority for a PI mutex when a thread blocked on
1571  * it is interrupted by signal or resumed by others.
1572  */
1573 static void
umtx_repropagate_priority(struct umtx_pi * pi)1574 umtx_repropagate_priority(struct umtx_pi *pi)
1575 {
1576 	struct umtx_q *uq, *uq_owner;
1577 	struct umtx_pi *pi2;
1578 	int pri;
1579 
1580 	mtx_assert(&umtx_lock, MA_OWNED);
1581 
1582 	while (pi != NULL && pi->pi_owner != NULL) {
1583 		pri = PRI_MAX;
1584 		uq_owner = pi->pi_owner->td_umtxq;
1585 
1586 		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1587 			uq = TAILQ_FIRST(&pi2->pi_blocked);
1588 			if (uq != NULL) {
1589 				if (pri > UPRI(uq->uq_thread))
1590 					pri = UPRI(uq->uq_thread);
1591 			}
1592 		}
1593 
1594 		if (pri > uq_owner->uq_inherited_pri)
1595 			pri = uq_owner->uq_inherited_pri;
1596 		thread_lock(pi->pi_owner);
1597 		sched_lend_user_prio(pi->pi_owner, pri);
1598 		thread_unlock(pi->pi_owner);
1599 		if ((pi = uq_owner->uq_pi_blocked) != NULL)
1600 			umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1601 	}
1602 }
1603 
1604 /*
1605  * Insert a PI mutex into owned list.
1606  */
1607 static void
umtx_pi_setowner(struct umtx_pi * pi,struct thread * owner)1608 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1609 {
1610 	struct umtx_q *uq_owner;
1611 
1612 	uq_owner = owner->td_umtxq;
1613 	mtx_assert(&umtx_lock, MA_OWNED);
1614 	if (pi->pi_owner != NULL)
1615 		panic("pi_ower != NULL");
1616 	pi->pi_owner = owner;
1617 	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1618 }
1619 
1620 /*
1621  * Claim ownership of a PI mutex.
1622  */
1623 static int
umtx_pi_claim(struct umtx_pi * pi,struct thread * owner)1624 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1625 {
1626 	struct umtx_q *uq, *uq_owner;
1627 
1628 	uq_owner = owner->td_umtxq;
1629 	mtx_lock_spin(&umtx_lock);
1630 	if (pi->pi_owner == owner) {
1631 		mtx_unlock_spin(&umtx_lock);
1632 		return (0);
1633 	}
1634 
1635 	if (pi->pi_owner != NULL) {
1636 		/*
1637 		 * userland may have already messed the mutex, sigh.
1638 		 */
1639 		mtx_unlock_spin(&umtx_lock);
1640 		return (EPERM);
1641 	}
1642 	umtx_pi_setowner(pi, owner);
1643 	uq = TAILQ_FIRST(&pi->pi_blocked);
1644 	if (uq != NULL) {
1645 		int pri;
1646 
1647 		pri = UPRI(uq->uq_thread);
1648 		thread_lock(owner);
1649 		if (pri < UPRI(owner))
1650 			sched_lend_user_prio(owner, pri);
1651 		thread_unlock(owner);
1652 	}
1653 	mtx_unlock_spin(&umtx_lock);
1654 	return (0);
1655 }
1656 
1657 /*
1658  * Adjust a thread's order position in its blocked PI mutex,
1659  * this may result new priority propagating process.
1660  */
1661 void
umtx_pi_adjust(struct thread * td,u_char oldpri)1662 umtx_pi_adjust(struct thread *td, u_char oldpri)
1663 {
1664 	struct umtx_q *uq;
1665 	struct umtx_pi *pi;
1666 
1667 	uq = td->td_umtxq;
1668 	mtx_lock_spin(&umtx_lock);
1669 	/*
1670 	 * Pick up the lock that td is blocked on.
1671 	 */
1672 	pi = uq->uq_pi_blocked;
1673 	if (pi != NULL) {
1674 		umtx_pi_adjust_thread(pi, td);
1675 		umtx_repropagate_priority(pi);
1676 	}
1677 	mtx_unlock_spin(&umtx_lock);
1678 }
1679 
1680 /*
1681  * Sleep on a PI mutex.
1682  */
1683 static int
umtxq_sleep_pi(struct umtx_q * uq,struct umtx_pi * pi,uint32_t owner,const char * wmesg,int timo)1684 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1685 	uint32_t owner, const char *wmesg, int timo)
1686 {
1687 	struct umtxq_chain *uc;
1688 	struct thread *td, *td1;
1689 	struct umtx_q *uq1;
1690 	int pri;
1691 	int error = 0;
1692 
1693 	td = uq->uq_thread;
1694 	KASSERT(td == curthread, ("inconsistent uq_thread"));
1695 	uc = umtxq_getchain(&uq->uq_key);
1696 	UMTXQ_LOCKED_ASSERT(uc);
1697 	UMTXQ_BUSY_ASSERT(uc);
1698 	umtxq_insert(uq);
1699 	mtx_lock_spin(&umtx_lock);
1700 	if (pi->pi_owner == NULL) {
1701 		mtx_unlock_spin(&umtx_lock);
1702 		/* XXX Only look up thread in current process. */
1703 		td1 = tdfind(owner, curproc->p_pid);
1704 		mtx_lock_spin(&umtx_lock);
1705 		if (td1 != NULL) {
1706 			if (pi->pi_owner == NULL)
1707 				umtx_pi_setowner(pi, td1);
1708 			PROC_UNLOCK(td1->td_proc);
1709 		}
1710 	}
1711 
1712 	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1713 		pri = UPRI(uq1->uq_thread);
1714 		if (pri > UPRI(td))
1715 			break;
1716 	}
1717 
1718 	if (uq1 != NULL)
1719 		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1720 	else
1721 		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1722 
1723 	uq->uq_pi_blocked = pi;
1724 	thread_lock(td);
1725 	td->td_flags |= TDF_UPIBLOCKED;
1726 	thread_unlock(td);
1727 	umtx_propagate_priority(td);
1728 	mtx_unlock_spin(&umtx_lock);
1729 	umtxq_unbusy(&uq->uq_key);
1730 
1731 	if (uq->uq_flags & UQF_UMTXQ) {
1732 		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1733 		if (error == EWOULDBLOCK)
1734 			error = ETIMEDOUT;
1735 		if (uq->uq_flags & UQF_UMTXQ) {
1736 			umtxq_remove(uq);
1737 		}
1738 	}
1739 	mtx_lock_spin(&umtx_lock);
1740 	uq->uq_pi_blocked = NULL;
1741 	thread_lock(td);
1742 	td->td_flags &= ~TDF_UPIBLOCKED;
1743 	thread_unlock(td);
1744 	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1745 	umtx_repropagate_priority(pi);
1746 	mtx_unlock_spin(&umtx_lock);
1747 	umtxq_unlock(&uq->uq_key);
1748 
1749 	return (error);
1750 }
1751 
1752 /*
1753  * Add reference count for a PI mutex.
1754  */
1755 static void
umtx_pi_ref(struct umtx_pi * pi)1756 umtx_pi_ref(struct umtx_pi *pi)
1757 {
1758 	struct umtxq_chain *uc;
1759 
1760 	uc = umtxq_getchain(&pi->pi_key);
1761 	UMTXQ_LOCKED_ASSERT(uc);
1762 	pi->pi_refcount++;
1763 }
1764 
1765 /*
1766  * Decrease reference count for a PI mutex, if the counter
1767  * is decreased to zero, its memory space is freed.
1768  */
1769 static void
umtx_pi_unref(struct umtx_pi * pi)1770 umtx_pi_unref(struct umtx_pi *pi)
1771 {
1772 	struct umtxq_chain *uc;
1773 
1774 	uc = umtxq_getchain(&pi->pi_key);
1775 	UMTXQ_LOCKED_ASSERT(uc);
1776 	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1777 	if (--pi->pi_refcount == 0) {
1778 		mtx_lock_spin(&umtx_lock);
1779 		if (pi->pi_owner != NULL) {
1780 			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1781 				pi, pi_link);
1782 			pi->pi_owner = NULL;
1783 		}
1784 		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1785 			("blocked queue not empty"));
1786 		mtx_unlock_spin(&umtx_lock);
1787 		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1788 		umtx_pi_free(pi);
1789 	}
1790 }
1791 
1792 /*
1793  * Find a PI mutex in hash table.
1794  */
1795 static struct umtx_pi *
umtx_pi_lookup(struct umtx_key * key)1796 umtx_pi_lookup(struct umtx_key *key)
1797 {
1798 	struct umtxq_chain *uc;
1799 	struct umtx_pi *pi;
1800 
1801 	uc = umtxq_getchain(key);
1802 	UMTXQ_LOCKED_ASSERT(uc);
1803 
1804 	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1805 		if (umtx_key_match(&pi->pi_key, key)) {
1806 			return (pi);
1807 		}
1808 	}
1809 	return (NULL);
1810 }
1811 
1812 /*
1813  * Insert a PI mutex into hash table.
1814  */
1815 static inline void
umtx_pi_insert(struct umtx_pi * pi)1816 umtx_pi_insert(struct umtx_pi *pi)
1817 {
1818 	struct umtxq_chain *uc;
1819 
1820 	uc = umtxq_getchain(&pi->pi_key);
1821 	UMTXQ_LOCKED_ASSERT(uc);
1822 	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1823 }
1824 
1825 /*
1826  * Lock a PI mutex.
1827  */
1828 static int
_do_lock_pi(struct thread * td,struct umutex * m,uint32_t flags,int timo,int try)1829 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1830 	int try)
1831 {
1832 	struct umtx_q *uq;
1833 	struct umtx_pi *pi, *new_pi;
1834 	uint32_t id, owner, old;
1835 	int error;
1836 
1837 	id = td->td_tid;
1838 	uq = td->td_umtxq;
1839 
1840 	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1841 	    &uq->uq_key)) != 0)
1842 		return (error);
1843 	umtxq_lock(&uq->uq_key);
1844 	pi = umtx_pi_lookup(&uq->uq_key);
1845 	if (pi == NULL) {
1846 		new_pi = umtx_pi_alloc(M_NOWAIT);
1847 		if (new_pi == NULL) {
1848 			umtxq_unlock(&uq->uq_key);
1849 			new_pi = umtx_pi_alloc(M_WAITOK);
1850 			umtxq_lock(&uq->uq_key);
1851 			pi = umtx_pi_lookup(&uq->uq_key);
1852 			if (pi != NULL) {
1853 				umtx_pi_free(new_pi);
1854 				new_pi = NULL;
1855 			}
1856 		}
1857 		if (new_pi != NULL) {
1858 			new_pi->pi_key = uq->uq_key;
1859 			umtx_pi_insert(new_pi);
1860 			pi = new_pi;
1861 		}
1862 	}
1863 	umtx_pi_ref(pi);
1864 	umtxq_unlock(&uq->uq_key);
1865 
1866 	/*
1867 	 * Care must be exercised when dealing with umtx structure.  It
1868 	 * can fault on any access.
1869 	 */
1870 	for (;;) {
1871 		/*
1872 		 * Try the uncontested case.  This should be done in userland.
1873 		 */
1874 		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1875 
1876 		/* The acquire succeeded. */
1877 		if (owner == UMUTEX_UNOWNED) {
1878 			error = 0;
1879 			break;
1880 		}
1881 
1882 		/* The address was invalid. */
1883 		if (owner == -1) {
1884 			error = EFAULT;
1885 			break;
1886 		}
1887 
1888 		/* If no one owns it but it is contested try to acquire it. */
1889 		if (owner == UMUTEX_CONTESTED) {
1890 			owner = casuword32(&m->m_owner,
1891 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1892 
1893 			if (owner == UMUTEX_CONTESTED) {
1894 				umtxq_lock(&uq->uq_key);
1895 				umtxq_busy(&uq->uq_key);
1896 				error = umtx_pi_claim(pi, td);
1897 				umtxq_unbusy(&uq->uq_key);
1898 				umtxq_unlock(&uq->uq_key);
1899 				break;
1900 			}
1901 
1902 			/* The address was invalid. */
1903 			if (owner == -1) {
1904 				error = EFAULT;
1905 				break;
1906 			}
1907 
1908 			error = umtxq_check_susp(td);
1909 			if (error != 0)
1910 				break;
1911 
1912 			/* If this failed the lock has changed, restart. */
1913 			continue;
1914 		}
1915 
1916 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1917 		    (owner & ~UMUTEX_CONTESTED) == id) {
1918 			error = EDEADLK;
1919 			break;
1920 		}
1921 
1922 		if (try != 0) {
1923 			error = EBUSY;
1924 			break;
1925 		}
1926 
1927 		/*
1928 		 * If we caught a signal, we have retried and now
1929 		 * exit immediately.
1930 		 */
1931 		if (error != 0)
1932 			break;
1933 
1934 		umtxq_lock(&uq->uq_key);
1935 		umtxq_busy(&uq->uq_key);
1936 		umtxq_unlock(&uq->uq_key);
1937 
1938 		/*
1939 		 * Set the contested bit so that a release in user space
1940 		 * knows to use the system call for unlock.  If this fails
1941 		 * either some one else has acquired the lock or it has been
1942 		 * released.
1943 		 */
1944 		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1945 
1946 		/* The address was invalid. */
1947 		if (old == -1) {
1948 			umtxq_lock(&uq->uq_key);
1949 			umtxq_unbusy(&uq->uq_key);
1950 			umtxq_unlock(&uq->uq_key);
1951 			error = EFAULT;
1952 			break;
1953 		}
1954 
1955 		umtxq_lock(&uq->uq_key);
1956 		/*
1957 		 * We set the contested bit, sleep. Otherwise the lock changed
1958 		 * and we need to retry or we lost a race to the thread
1959 		 * unlocking the umtx.
1960 		 */
1961 		if (old == owner)
1962 			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1963 				 "umtxpi", timo);
1964 		else {
1965 			umtxq_unbusy(&uq->uq_key);
1966 			umtxq_unlock(&uq->uq_key);
1967 		}
1968 
1969 		error = umtxq_check_susp(td);
1970 		if (error != 0)
1971 			break;
1972 	}
1973 
1974 	umtxq_lock(&uq->uq_key);
1975 	umtx_pi_unref(pi);
1976 	umtxq_unlock(&uq->uq_key);
1977 
1978 	umtx_key_release(&uq->uq_key);
1979 	return (error);
1980 }
1981 
1982 /*
1983  * Unlock a PI mutex.
1984  */
1985 static int
do_unlock_pi(struct thread * td,struct umutex * m,uint32_t flags)1986 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1987 {
1988 	struct umtx_key key;
1989 	struct umtx_q *uq_first, *uq_first2, *uq_me;
1990 	struct umtx_pi *pi, *pi2;
1991 	uint32_t owner, old, id;
1992 	int error;
1993 	int count;
1994 	int pri;
1995 
1996 	id = td->td_tid;
1997 	/*
1998 	 * Make sure we own this mtx.
1999 	 */
2000 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2001 	if (owner == -1)
2002 		return (EFAULT);
2003 
2004 	if ((owner & ~UMUTEX_CONTESTED) != id)
2005 		return (EPERM);
2006 
2007 	/* This should be done in userland */
2008 	if ((owner & UMUTEX_CONTESTED) == 0) {
2009 		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
2010 		if (old == -1)
2011 			return (EFAULT);
2012 		if (old == owner)
2013 			return (0);
2014 		owner = old;
2015 	}
2016 
2017 	/* We should only ever be in here for contested locks */
2018 	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
2019 	    &key)) != 0)
2020 		return (error);
2021 
2022 	umtxq_lock(&key);
2023 	umtxq_busy(&key);
2024 	count = umtxq_count_pi(&key, &uq_first);
2025 	if (uq_first != NULL) {
2026 		mtx_lock_spin(&umtx_lock);
2027 		pi = uq_first->uq_pi_blocked;
2028 		KASSERT(pi != NULL, ("pi == NULL?"));
2029 		if (pi->pi_owner != curthread) {
2030 			mtx_unlock_spin(&umtx_lock);
2031 			umtxq_unbusy(&key);
2032 			umtxq_unlock(&key);
2033 			umtx_key_release(&key);
2034 			/* userland messed the mutex */
2035 			return (EPERM);
2036 		}
2037 		uq_me = curthread->td_umtxq;
2038 		pi->pi_owner = NULL;
2039 		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
2040 		/* get highest priority thread which is still sleeping. */
2041 		uq_first = TAILQ_FIRST(&pi->pi_blocked);
2042 		while (uq_first != NULL &&
2043 		       (uq_first->uq_flags & UQF_UMTXQ) == 0) {
2044 			uq_first = TAILQ_NEXT(uq_first, uq_lockq);
2045 		}
2046 		pri = PRI_MAX;
2047 		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
2048 			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
2049 			if (uq_first2 != NULL) {
2050 				if (pri > UPRI(uq_first2->uq_thread))
2051 					pri = UPRI(uq_first2->uq_thread);
2052 			}
2053 		}
2054 		thread_lock(curthread);
2055 		sched_lend_user_prio(curthread, pri);
2056 		thread_unlock(curthread);
2057 		mtx_unlock_spin(&umtx_lock);
2058 		if (uq_first)
2059 			umtxq_signal_thread(uq_first);
2060 	}
2061 	umtxq_unlock(&key);
2062 
2063 	/*
2064 	 * When unlocking the umtx, it must be marked as unowned if
2065 	 * there is zero or one thread only waiting for it.
2066 	 * Otherwise, it must be marked as contested.
2067 	 */
2068 	old = casuword32(&m->m_owner, owner,
2069 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
2070 
2071 	umtxq_lock(&key);
2072 	umtxq_unbusy(&key);
2073 	umtxq_unlock(&key);
2074 	umtx_key_release(&key);
2075 	if (old == -1)
2076 		return (EFAULT);
2077 	if (old != owner)
2078 		return (EINVAL);
2079 	return (0);
2080 }
2081 
2082 /*
2083  * Lock a PP mutex.
2084  */
2085 static int
_do_lock_pp(struct thread * td,struct umutex * m,uint32_t flags,int timo,int try)2086 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
2087 	int try)
2088 {
2089 	struct umtx_q *uq, *uq2;
2090 	struct umtx_pi *pi;
2091 	uint32_t ceiling;
2092 	uint32_t owner, id;
2093 	int error, pri, old_inherited_pri, su;
2094 
2095 	id = td->td_tid;
2096 	uq = td->td_umtxq;
2097 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2098 	    &uq->uq_key)) != 0)
2099 		return (error);
2100 	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2101 	for (;;) {
2102 		old_inherited_pri = uq->uq_inherited_pri;
2103 		umtxq_lock(&uq->uq_key);
2104 		umtxq_busy(&uq->uq_key);
2105 		umtxq_unlock(&uq->uq_key);
2106 
2107 		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
2108 		if (ceiling > RTP_PRIO_MAX) {
2109 			error = EINVAL;
2110 			goto out;
2111 		}
2112 
2113 		mtx_lock_spin(&umtx_lock);
2114 		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
2115 			mtx_unlock_spin(&umtx_lock);
2116 			error = EINVAL;
2117 			goto out;
2118 		}
2119 		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
2120 			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
2121 			thread_lock(td);
2122 			if (uq->uq_inherited_pri < UPRI(td))
2123 				sched_lend_user_prio(td, uq->uq_inherited_pri);
2124 			thread_unlock(td);
2125 		}
2126 		mtx_unlock_spin(&umtx_lock);
2127 
2128 		owner = casuword32(&m->m_owner,
2129 		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2130 
2131 		if (owner == UMUTEX_CONTESTED) {
2132 			error = 0;
2133 			break;
2134 		}
2135 
2136 		/* The address was invalid. */
2137 		if (owner == -1) {
2138 			error = EFAULT;
2139 			break;
2140 		}
2141 
2142 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
2143 		    (owner & ~UMUTEX_CONTESTED) == id) {
2144 			error = EDEADLK;
2145 			break;
2146 		}
2147 
2148 		if (try != 0) {
2149 			error = EBUSY;
2150 			break;
2151 		}
2152 
2153 		/*
2154 		 * If we caught a signal, we have retried and now
2155 		 * exit immediately.
2156 		 */
2157 		if (error != 0)
2158 			break;
2159 
2160 		umtxq_lock(&uq->uq_key);
2161 		umtxq_insert(uq);
2162 		umtxq_unbusy(&uq->uq_key);
2163 		error = umtxq_sleep(uq, "umtxpp", timo);
2164 		umtxq_remove(uq);
2165 		umtxq_unlock(&uq->uq_key);
2166 
2167 		mtx_lock_spin(&umtx_lock);
2168 		uq->uq_inherited_pri = old_inherited_pri;
2169 		pri = PRI_MAX;
2170 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2171 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2172 			if (uq2 != NULL) {
2173 				if (pri > UPRI(uq2->uq_thread))
2174 					pri = UPRI(uq2->uq_thread);
2175 			}
2176 		}
2177 		if (pri > uq->uq_inherited_pri)
2178 			pri = uq->uq_inherited_pri;
2179 		thread_lock(td);
2180 		sched_lend_user_prio(td, pri);
2181 		thread_unlock(td);
2182 		mtx_unlock_spin(&umtx_lock);
2183 	}
2184 
2185 	if (error != 0) {
2186 		mtx_lock_spin(&umtx_lock);
2187 		uq->uq_inherited_pri = old_inherited_pri;
2188 		pri = PRI_MAX;
2189 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2190 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2191 			if (uq2 != NULL) {
2192 				if (pri > UPRI(uq2->uq_thread))
2193 					pri = UPRI(uq2->uq_thread);
2194 			}
2195 		}
2196 		if (pri > uq->uq_inherited_pri)
2197 			pri = uq->uq_inherited_pri;
2198 		thread_lock(td);
2199 		sched_lend_user_prio(td, pri);
2200 		thread_unlock(td);
2201 		mtx_unlock_spin(&umtx_lock);
2202 	}
2203 
2204 out:
2205 	umtxq_lock(&uq->uq_key);
2206 	umtxq_unbusy(&uq->uq_key);
2207 	umtxq_unlock(&uq->uq_key);
2208 	umtx_key_release(&uq->uq_key);
2209 	return (error);
2210 }
2211 
2212 /*
2213  * Unlock a PP mutex.
2214  */
2215 static int
do_unlock_pp(struct thread * td,struct umutex * m,uint32_t flags)2216 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2217 {
2218 	struct umtx_key key;
2219 	struct umtx_q *uq, *uq2;
2220 	struct umtx_pi *pi;
2221 	uint32_t owner, id;
2222 	uint32_t rceiling;
2223 	int error, pri, new_inherited_pri, su;
2224 
2225 	id = td->td_tid;
2226 	uq = td->td_umtxq;
2227 	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2228 
2229 	/*
2230 	 * Make sure we own this mtx.
2231 	 */
2232 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2233 	if (owner == -1)
2234 		return (EFAULT);
2235 
2236 	if ((owner & ~UMUTEX_CONTESTED) != id)
2237 		return (EPERM);
2238 
2239 	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2240 	if (error != 0)
2241 		return (error);
2242 
2243 	if (rceiling == -1)
2244 		new_inherited_pri = PRI_MAX;
2245 	else {
2246 		rceiling = RTP_PRIO_MAX - rceiling;
2247 		if (rceiling > RTP_PRIO_MAX)
2248 			return (EINVAL);
2249 		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2250 	}
2251 
2252 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2253 	    &key)) != 0)
2254 		return (error);
2255 	umtxq_lock(&key);
2256 	umtxq_busy(&key);
2257 	umtxq_unlock(&key);
2258 	/*
2259 	 * For priority protected mutex, always set unlocked state
2260 	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2261 	 * to lock the mutex, it is necessary because thread priority
2262 	 * has to be adjusted for such mutex.
2263 	 */
2264 	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2265 		UMUTEX_CONTESTED);
2266 
2267 	umtxq_lock(&key);
2268 	if (error == 0)
2269 		umtxq_signal(&key, 1);
2270 	umtxq_unbusy(&key);
2271 	umtxq_unlock(&key);
2272 
2273 	if (error == -1)
2274 		error = EFAULT;
2275 	else {
2276 		mtx_lock_spin(&umtx_lock);
2277 		if (su != 0)
2278 			uq->uq_inherited_pri = new_inherited_pri;
2279 		pri = PRI_MAX;
2280 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2281 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2282 			if (uq2 != NULL) {
2283 				if (pri > UPRI(uq2->uq_thread))
2284 					pri = UPRI(uq2->uq_thread);
2285 			}
2286 		}
2287 		if (pri > uq->uq_inherited_pri)
2288 			pri = uq->uq_inherited_pri;
2289 		thread_lock(td);
2290 		sched_lend_user_prio(td, pri);
2291 		thread_unlock(td);
2292 		mtx_unlock_spin(&umtx_lock);
2293 	}
2294 	umtx_key_release(&key);
2295 	return (error);
2296 }
2297 
2298 static int
do_set_ceiling(struct thread * td,struct umutex * m,uint32_t ceiling,uint32_t * old_ceiling)2299 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2300 	uint32_t *old_ceiling)
2301 {
2302 	struct umtx_q *uq;
2303 	uint32_t save_ceiling;
2304 	uint32_t owner, id;
2305 	uint32_t flags;
2306 	int error;
2307 
2308 	flags = fuword32(&m->m_flags);
2309 	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2310 		return (EINVAL);
2311 	if (ceiling > RTP_PRIO_MAX)
2312 		return (EINVAL);
2313 	id = td->td_tid;
2314 	uq = td->td_umtxq;
2315 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2316 	   &uq->uq_key)) != 0)
2317 		return (error);
2318 	for (;;) {
2319 		umtxq_lock(&uq->uq_key);
2320 		umtxq_busy(&uq->uq_key);
2321 		umtxq_unlock(&uq->uq_key);
2322 
2323 		save_ceiling = fuword32(&m->m_ceilings[0]);
2324 
2325 		owner = casuword32(&m->m_owner,
2326 		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2327 
2328 		if (owner == UMUTEX_CONTESTED) {
2329 			suword32(&m->m_ceilings[0], ceiling);
2330 			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2331 				UMUTEX_CONTESTED);
2332 			error = 0;
2333 			break;
2334 		}
2335 
2336 		/* The address was invalid. */
2337 		if (owner == -1) {
2338 			error = EFAULT;
2339 			break;
2340 		}
2341 
2342 		if ((owner & ~UMUTEX_CONTESTED) == id) {
2343 			suword32(&m->m_ceilings[0], ceiling);
2344 			error = 0;
2345 			break;
2346 		}
2347 
2348 		/*
2349 		 * If we caught a signal, we have retried and now
2350 		 * exit immediately.
2351 		 */
2352 		if (error != 0)
2353 			break;
2354 
2355 		/*
2356 		 * We set the contested bit, sleep. Otherwise the lock changed
2357 		 * and we need to retry or we lost a race to the thread
2358 		 * unlocking the umtx.
2359 		 */
2360 		umtxq_lock(&uq->uq_key);
2361 		umtxq_insert(uq);
2362 		umtxq_unbusy(&uq->uq_key);
2363 		error = umtxq_sleep(uq, "umtxpp", 0);
2364 		umtxq_remove(uq);
2365 		umtxq_unlock(&uq->uq_key);
2366 	}
2367 	umtxq_lock(&uq->uq_key);
2368 	if (error == 0)
2369 		umtxq_signal(&uq->uq_key, INT_MAX);
2370 	umtxq_unbusy(&uq->uq_key);
2371 	umtxq_unlock(&uq->uq_key);
2372 	umtx_key_release(&uq->uq_key);
2373 	if (error == 0 && old_ceiling != NULL)
2374 		suword32(old_ceiling, save_ceiling);
2375 	return (error);
2376 }
2377 
2378 static int
_do_lock_umutex(struct thread * td,struct umutex * m,int flags,int timo,int mode)2379 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2380 	int mode)
2381 {
2382 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2383 	case 0:
2384 		return (_do_lock_normal(td, m, flags, timo, mode));
2385 	case UMUTEX_PRIO_INHERIT:
2386 		return (_do_lock_pi(td, m, flags, timo, mode));
2387 	case UMUTEX_PRIO_PROTECT:
2388 		return (_do_lock_pp(td, m, flags, timo, mode));
2389 	}
2390 	return (EINVAL);
2391 }
2392 
2393 /*
2394  * Lock a userland POSIX mutex.
2395  */
2396 static int
do_lock_umutex(struct thread * td,struct umutex * m,struct timespec * timeout,int mode)2397 do_lock_umutex(struct thread *td, struct umutex *m,
2398 	struct timespec *timeout, int mode)
2399 {
2400 	struct timespec ts, ts2, ts3;
2401 	struct timeval tv;
2402 	uint32_t flags;
2403 	int error;
2404 
2405 	flags = fuword32(&m->m_flags);
2406 	if (flags == -1)
2407 		return (EFAULT);
2408 
2409 	if (timeout == NULL) {
2410 		error = _do_lock_umutex(td, m, flags, 0, mode);
2411 		/* Mutex locking is restarted if it is interrupted. */
2412 		if (error == EINTR && mode != _UMUTEX_WAIT)
2413 			error = ERESTART;
2414 	} else {
2415 		getnanouptime(&ts);
2416 		timespecadd(&ts, timeout);
2417 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2418 		for (;;) {
2419 			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2420 			if (error != ETIMEDOUT)
2421 				break;
2422 			getnanouptime(&ts2);
2423 			if (timespeccmp(&ts2, &ts, >=)) {
2424 				error = ETIMEDOUT;
2425 				break;
2426 			}
2427 			ts3 = ts;
2428 			timespecsub(&ts3, &ts2);
2429 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2430 		}
2431 		/* Timed-locking is not restarted. */
2432 		if (error == ERESTART)
2433 			error = EINTR;
2434 	}
2435 	return (error);
2436 }
2437 
2438 /*
2439  * Unlock a userland POSIX mutex.
2440  */
2441 static int
do_unlock_umutex(struct thread * td,struct umutex * m)2442 do_unlock_umutex(struct thread *td, struct umutex *m)
2443 {
2444 	uint32_t flags;
2445 
2446 	flags = fuword32(&m->m_flags);
2447 	if (flags == -1)
2448 		return (EFAULT);
2449 
2450 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2451 	case 0:
2452 		return (do_unlock_normal(td, m, flags));
2453 	case UMUTEX_PRIO_INHERIT:
2454 		return (do_unlock_pi(td, m, flags));
2455 	case UMUTEX_PRIO_PROTECT:
2456 		return (do_unlock_pp(td, m, flags));
2457 	}
2458 
2459 	return (EINVAL);
2460 }
2461 
2462 static int
do_cv_wait(struct thread * td,struct ucond * cv,struct umutex * m,struct timespec * timeout,u_long wflags)2463 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2464 	struct timespec *timeout, u_long wflags)
2465 {
2466 	struct umtx_q *uq;
2467 	struct timeval tv;
2468 	struct timespec cts, ets, tts;
2469 	uint32_t flags;
2470 	uint32_t clockid;
2471 	int error;
2472 
2473 	uq = td->td_umtxq;
2474 	flags = fuword32(&cv->c_flags);
2475 	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2476 	if (error != 0)
2477 		return (error);
2478 
2479 	if ((wflags & CVWAIT_CLOCKID) != 0) {
2480 		clockid = fuword32(&cv->c_clockid);
2481 		if (clockid < CLOCK_REALTIME ||
2482 		    clockid >= CLOCK_THREAD_CPUTIME_ID) {
2483 			/* hmm, only HW clock id will work. */
2484 			return (EINVAL);
2485 		}
2486 	} else {
2487 		clockid = CLOCK_REALTIME;
2488 	}
2489 
2490 	umtxq_lock(&uq->uq_key);
2491 	umtxq_busy(&uq->uq_key);
2492 	umtxq_insert(uq);
2493 	umtxq_unlock(&uq->uq_key);
2494 
2495 	/*
2496 	 * Set c_has_waiters to 1 before releasing user mutex, also
2497 	 * don't modify cache line when unnecessary.
2498 	 */
2499 	if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
2500 		suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2501 
2502 	umtxq_lock(&uq->uq_key);
2503 	umtxq_unbusy(&uq->uq_key);
2504 	umtxq_unlock(&uq->uq_key);
2505 
2506 	error = do_unlock_umutex(td, m);
2507 
2508 	umtxq_lock(&uq->uq_key);
2509 	if (error == 0) {
2510 		if (timeout == NULL) {
2511 			error = umtxq_sleep(uq, "ucond", 0);
2512 		} else {
2513 			if ((wflags & CVWAIT_ABSTIME) == 0) {
2514 				kern_clock_gettime(td, clockid, &ets);
2515 				timespecadd(&ets, timeout);
2516 				tts = *timeout;
2517 			} else { /* absolute time */
2518 				ets = *timeout;
2519 				tts = *timeout;
2520 				kern_clock_gettime(td, clockid, &cts);
2521 				timespecsub(&tts, &cts);
2522 			}
2523 			TIMESPEC_TO_TIMEVAL(&tv, &tts);
2524 			for (;;) {
2525 				error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2526 				if (error != ETIMEDOUT)
2527 					break;
2528 				kern_clock_gettime(td, clockid, &cts);
2529 				if (timespeccmp(&cts, &ets, >=)) {
2530 					error = ETIMEDOUT;
2531 					break;
2532 				}
2533 				tts = ets;
2534 				timespecsub(&tts, &cts);
2535 				TIMESPEC_TO_TIMEVAL(&tv, &tts);
2536 			}
2537 		}
2538 	}
2539 
2540 	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2541 		error = 0;
2542 	else {
2543 		/*
2544 		 * This must be timeout,interrupted by signal or
2545 		 * surprious wakeup, clear c_has_waiter flag when
2546 		 * necessary.
2547 		 */
2548 		umtxq_busy(&uq->uq_key);
2549 		if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2550 			int oldlen = uq->uq_cur_queue->length;
2551 			umtxq_remove(uq);
2552 			if (oldlen == 1) {
2553 				umtxq_unlock(&uq->uq_key);
2554 				suword32(
2555 				    __DEVOLATILE(uint32_t *,
2556 					 &cv->c_has_waiters), 0);
2557 				umtxq_lock(&uq->uq_key);
2558 			}
2559 		}
2560 		umtxq_unbusy(&uq->uq_key);
2561 		if (error == ERESTART)
2562 			error = EINTR;
2563 	}
2564 
2565 	umtxq_unlock(&uq->uq_key);
2566 	umtx_key_release(&uq->uq_key);
2567 	return (error);
2568 }
2569 
2570 /*
2571  * Signal a userland condition variable.
2572  */
2573 static int
do_cv_signal(struct thread * td,struct ucond * cv)2574 do_cv_signal(struct thread *td, struct ucond *cv)
2575 {
2576 	struct umtx_key key;
2577 	int error, cnt, nwake;
2578 	uint32_t flags;
2579 
2580 	flags = fuword32(&cv->c_flags);
2581 	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2582 		return (error);
2583 	umtxq_lock(&key);
2584 	umtxq_busy(&key);
2585 	cnt = umtxq_count(&key);
2586 	nwake = umtxq_signal(&key, 1);
2587 	if (cnt <= nwake) {
2588 		umtxq_unlock(&key);
2589 		error = suword32(
2590 		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2591 		umtxq_lock(&key);
2592 	}
2593 	umtxq_unbusy(&key);
2594 	umtxq_unlock(&key);
2595 	umtx_key_release(&key);
2596 	return (error);
2597 }
2598 
2599 static int
do_cv_broadcast(struct thread * td,struct ucond * cv)2600 do_cv_broadcast(struct thread *td, struct ucond *cv)
2601 {
2602 	struct umtx_key key;
2603 	int error;
2604 	uint32_t flags;
2605 
2606 	flags = fuword32(&cv->c_flags);
2607 	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2608 		return (error);
2609 
2610 	umtxq_lock(&key);
2611 	umtxq_busy(&key);
2612 	umtxq_signal(&key, INT_MAX);
2613 	umtxq_unlock(&key);
2614 
2615 	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2616 
2617 	umtxq_lock(&key);
2618 	umtxq_unbusy(&key);
2619 	umtxq_unlock(&key);
2620 
2621 	umtx_key_release(&key);
2622 	return (error);
2623 }
2624 
2625 static int
do_rw_rdlock(struct thread * td,struct urwlock * rwlock,long fflag,int timo)2626 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2627 {
2628 	struct umtx_q *uq;
2629 	uint32_t flags, wrflags;
2630 	int32_t state, oldstate;
2631 	int32_t blocked_readers;
2632 	int error;
2633 
2634 	uq = td->td_umtxq;
2635 	flags = fuword32(&rwlock->rw_flags);
2636 	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2637 	if (error != 0)
2638 		return (error);
2639 
2640 	wrflags = URWLOCK_WRITE_OWNER;
2641 	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2642 		wrflags |= URWLOCK_WRITE_WAITERS;
2643 
2644 	for (;;) {
2645 		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2646 		/* try to lock it */
2647 		while (!(state & wrflags)) {
2648 			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2649 				umtx_key_release(&uq->uq_key);
2650 				return (EAGAIN);
2651 			}
2652 			oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2653 			if (oldstate == -1) {
2654 				umtx_key_release(&uq->uq_key);
2655 				return (EFAULT);
2656 			}
2657 			if (oldstate == state) {
2658 				umtx_key_release(&uq->uq_key);
2659 				return (0);
2660 			}
2661 			error = umtxq_check_susp(td);
2662 			if (error != 0)
2663 				break;
2664 			state = oldstate;
2665 		}
2666 
2667 		if (error)
2668 			break;
2669 
2670 		/* grab monitor lock */
2671 		umtxq_lock(&uq->uq_key);
2672 		umtxq_busy(&uq->uq_key);
2673 		umtxq_unlock(&uq->uq_key);
2674 
2675 		/*
2676 		 * re-read the state, in case it changed between the try-lock above
2677 		 * and the check below
2678 		 */
2679 		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2680 
2681 		/* set read contention bit */
2682 		while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2683 			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2684 			if (oldstate == -1) {
2685 				error = EFAULT;
2686 				break;
2687 			}
2688 			if (oldstate == state)
2689 				goto sleep;
2690 			state = oldstate;
2691 			error = umtxq_check_susp(td);
2692 			if (error != 0)
2693 				break;
2694 		}
2695 		if (error != 0) {
2696 			umtxq_lock(&uq->uq_key);
2697 			umtxq_unbusy(&uq->uq_key);
2698 			umtxq_unlock(&uq->uq_key);
2699 			break;
2700 		}
2701 
2702 		/* state is changed while setting flags, restart */
2703 		if (!(state & wrflags)) {
2704 			umtxq_lock(&uq->uq_key);
2705 			umtxq_unbusy(&uq->uq_key);
2706 			umtxq_unlock(&uq->uq_key);
2707 			error = umtxq_check_susp(td);
2708 			if (error != 0)
2709 				break;
2710 			continue;
2711 		}
2712 
2713 sleep:
2714 		/* contention bit is set, before sleeping, increase read waiter count */
2715 		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2716 		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2717 
2718 		while (state & wrflags) {
2719 			umtxq_lock(&uq->uq_key);
2720 			umtxq_insert(uq);
2721 			umtxq_unbusy(&uq->uq_key);
2722 
2723 			error = umtxq_sleep(uq, "urdlck", timo);
2724 
2725 			umtxq_busy(&uq->uq_key);
2726 			umtxq_remove(uq);
2727 			umtxq_unlock(&uq->uq_key);
2728 			if (error)
2729 				break;
2730 			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2731 		}
2732 
2733 		/* decrease read waiter count, and may clear read contention bit */
2734 		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2735 		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2736 		if (blocked_readers == 1) {
2737 			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2738 			for (;;) {
2739 				oldstate = casuword32(&rwlock->rw_state, state,
2740 					 state & ~URWLOCK_READ_WAITERS);
2741 				if (oldstate == -1) {
2742 					error = EFAULT;
2743 					break;
2744 				}
2745 				if (oldstate == state)
2746 					break;
2747 				state = oldstate;
2748 				error = umtxq_check_susp(td);
2749 				if (error != 0)
2750 					break;
2751 			}
2752 		}
2753 
2754 		umtxq_lock(&uq->uq_key);
2755 		umtxq_unbusy(&uq->uq_key);
2756 		umtxq_unlock(&uq->uq_key);
2757 		if (error != 0)
2758 			break;
2759 	}
2760 	umtx_key_release(&uq->uq_key);
2761 	return (error);
2762 }
2763 
2764 static int
do_rw_rdlock2(struct thread * td,void * obj,long val,struct timespec * timeout)2765 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2766 {
2767 	struct timespec ts, ts2, ts3;
2768 	struct timeval tv;
2769 	int error;
2770 
2771 	getnanouptime(&ts);
2772 	timespecadd(&ts, timeout);
2773 	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2774 	for (;;) {
2775 		error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2776 		if (error != ETIMEDOUT)
2777 			break;
2778 		getnanouptime(&ts2);
2779 		if (timespeccmp(&ts2, &ts, >=)) {
2780 			error = ETIMEDOUT;
2781 			break;
2782 		}
2783 		ts3 = ts;
2784 		timespecsub(&ts3, &ts2);
2785 		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2786 	}
2787 	if (error == ERESTART)
2788 		error = EINTR;
2789 	return (error);
2790 }
2791 
2792 static int
do_rw_wrlock(struct thread * td,struct urwlock * rwlock,int timo)2793 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2794 {
2795 	struct umtx_q *uq;
2796 	uint32_t flags;
2797 	int32_t state, oldstate;
2798 	int32_t blocked_writers;
2799 	int32_t blocked_readers;
2800 	int error;
2801 
2802 	uq = td->td_umtxq;
2803 	flags = fuword32(&rwlock->rw_flags);
2804 	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2805 	if (error != 0)
2806 		return (error);
2807 
2808 	blocked_readers = 0;
2809 	for (;;) {
2810 		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2811 		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2812 			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2813 			if (oldstate == -1) {
2814 				umtx_key_release(&uq->uq_key);
2815 				return (EFAULT);
2816 			}
2817 			if (oldstate == state) {
2818 				umtx_key_release(&uq->uq_key);
2819 				return (0);
2820 			}
2821 			state = oldstate;
2822 			error = umtxq_check_susp(td);
2823 			if (error != 0)
2824 				break;
2825 		}
2826 
2827 		if (error) {
2828 			if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2829 			    blocked_readers != 0) {
2830 				umtxq_lock(&uq->uq_key);
2831 				umtxq_busy(&uq->uq_key);
2832 				umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2833 				umtxq_unbusy(&uq->uq_key);
2834 				umtxq_unlock(&uq->uq_key);
2835 			}
2836 
2837 			break;
2838 		}
2839 
2840 		/* grab monitor lock */
2841 		umtxq_lock(&uq->uq_key);
2842 		umtxq_busy(&uq->uq_key);
2843 		umtxq_unlock(&uq->uq_key);
2844 
2845 		/*
2846 		 * re-read the state, in case it changed between the try-lock above
2847 		 * and the check below
2848 		 */
2849 		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2850 
2851 		while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2852 		       (state & URWLOCK_WRITE_WAITERS) == 0) {
2853 			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2854 			if (oldstate == -1) {
2855 				error = EFAULT;
2856 				break;
2857 			}
2858 			if (oldstate == state)
2859 				goto sleep;
2860 			state = oldstate;
2861 			error = umtxq_check_susp(td);
2862 			if (error != 0)
2863 				break;
2864 		}
2865 		if (error != 0) {
2866 			umtxq_lock(&uq->uq_key);
2867 			umtxq_unbusy(&uq->uq_key);
2868 			umtxq_unlock(&uq->uq_key);
2869 			break;
2870 		}
2871 
2872 		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2873 			umtxq_lock(&uq->uq_key);
2874 			umtxq_unbusy(&uq->uq_key);
2875 			umtxq_unlock(&uq->uq_key);
2876 			error = umtxq_check_susp(td);
2877 			if (error != 0)
2878 				break;
2879 			continue;
2880 		}
2881 sleep:
2882 		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2883 		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2884 
2885 		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2886 			umtxq_lock(&uq->uq_key);
2887 			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2888 			umtxq_unbusy(&uq->uq_key);
2889 
2890 			error = umtxq_sleep(uq, "uwrlck", timo);
2891 
2892 			umtxq_busy(&uq->uq_key);
2893 			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2894 			umtxq_unlock(&uq->uq_key);
2895 			if (error)
2896 				break;
2897 			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2898 		}
2899 
2900 		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2901 		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2902 		if (blocked_writers == 1) {
2903 			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2904 			for (;;) {
2905 				oldstate = casuword32(&rwlock->rw_state, state,
2906 					 state & ~URWLOCK_WRITE_WAITERS);
2907 				if (oldstate == -1) {
2908 					error = EFAULT;
2909 					break;
2910 				}
2911 				if (oldstate == state)
2912 					break;
2913 				state = oldstate;
2914 				error = umtxq_check_susp(td);
2915 				/*
2916 				 * We are leaving the URWLOCK_WRITE_WAITERS
2917 				 * behind, but this should not harm the
2918 				 * correctness.
2919 				 */
2920 				if (error != 0)
2921 					break;
2922 			}
2923 			blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2924 		} else
2925 			blocked_readers = 0;
2926 
2927 		umtxq_lock(&uq->uq_key);
2928 		umtxq_unbusy(&uq->uq_key);
2929 		umtxq_unlock(&uq->uq_key);
2930 	}
2931 
2932 	umtx_key_release(&uq->uq_key);
2933 	return (error);
2934 }
2935 
2936 static int
do_rw_wrlock2(struct thread * td,void * obj,struct timespec * timeout)2937 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2938 {
2939 	struct timespec ts, ts2, ts3;
2940 	struct timeval tv;
2941 	int error;
2942 
2943 	getnanouptime(&ts);
2944 	timespecadd(&ts, timeout);
2945 	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2946 	for (;;) {
2947 		error = do_rw_wrlock(td, obj, tvtohz(&tv));
2948 		if (error != ETIMEDOUT)
2949 			break;
2950 		getnanouptime(&ts2);
2951 		if (timespeccmp(&ts2, &ts, >=)) {
2952 			error = ETIMEDOUT;
2953 			break;
2954 		}
2955 		ts3 = ts;
2956 		timespecsub(&ts3, &ts2);
2957 		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2958 	}
2959 	if (error == ERESTART)
2960 		error = EINTR;
2961 	return (error);
2962 }
2963 
2964 static int
do_rw_unlock(struct thread * td,struct urwlock * rwlock)2965 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2966 {
2967 	struct umtx_q *uq;
2968 	uint32_t flags;
2969 	int32_t state, oldstate;
2970 	int error, q, count;
2971 
2972 	uq = td->td_umtxq;
2973 	flags = fuword32(&rwlock->rw_flags);
2974 	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2975 	if (error != 0)
2976 		return (error);
2977 
2978 	state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2979 	if (state & URWLOCK_WRITE_OWNER) {
2980 		for (;;) {
2981 			oldstate = casuword32(&rwlock->rw_state, state,
2982 				state & ~URWLOCK_WRITE_OWNER);
2983 			if (oldstate == -1) {
2984 				error = EFAULT;
2985 				goto out;
2986 			}
2987 			if (oldstate != state) {
2988 				state = oldstate;
2989 				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2990 					error = EPERM;
2991 					goto out;
2992 				}
2993 				error = umtxq_check_susp(td);
2994 				if (error != 0)
2995 					goto out;
2996 			} else
2997 				break;
2998 		}
2999 	} else if (URWLOCK_READER_COUNT(state) != 0) {
3000 		for (;;) {
3001 			oldstate = casuword32(&rwlock->rw_state, state,
3002 				state - 1);
3003 			if (oldstate == -1) {
3004 				error = EFAULT;
3005 				goto out;
3006 			}
3007 			if (oldstate != state) {
3008 				state = oldstate;
3009 				if (URWLOCK_READER_COUNT(oldstate) == 0) {
3010 					error = EPERM;
3011 					goto out;
3012 				}
3013 				error = umtxq_check_susp(td);
3014 				if (error != 0)
3015 					goto out;
3016 			} else
3017 				break;
3018 		}
3019 	} else {
3020 		error = EPERM;
3021 		goto out;
3022 	}
3023 
3024 	count = 0;
3025 
3026 	if (!(flags & URWLOCK_PREFER_READER)) {
3027 		if (state & URWLOCK_WRITE_WAITERS) {
3028 			count = 1;
3029 			q = UMTX_EXCLUSIVE_QUEUE;
3030 		} else if (state & URWLOCK_READ_WAITERS) {
3031 			count = INT_MAX;
3032 			q = UMTX_SHARED_QUEUE;
3033 		}
3034 	} else {
3035 		if (state & URWLOCK_READ_WAITERS) {
3036 			count = INT_MAX;
3037 			q = UMTX_SHARED_QUEUE;
3038 		} else if (state & URWLOCK_WRITE_WAITERS) {
3039 			count = 1;
3040 			q = UMTX_EXCLUSIVE_QUEUE;
3041 		}
3042 	}
3043 
3044 	if (count) {
3045 		umtxq_lock(&uq->uq_key);
3046 		umtxq_busy(&uq->uq_key);
3047 		umtxq_signal_queue(&uq->uq_key, count, q);
3048 		umtxq_unbusy(&uq->uq_key);
3049 		umtxq_unlock(&uq->uq_key);
3050 	}
3051 out:
3052 	umtx_key_release(&uq->uq_key);
3053 	return (error);
3054 }
3055 
3056 static int
do_sem_wait(struct thread * td,struct _usem * sem,struct timespec * timeout)3057 do_sem_wait(struct thread *td, struct _usem *sem, struct timespec *timeout)
3058 {
3059 	struct umtx_q *uq;
3060 	struct timeval tv;
3061 	struct timespec cts, ets, tts;
3062 	uint32_t flags, count;
3063 	int error;
3064 
3065 	uq = td->td_umtxq;
3066 	flags = fuword32(&sem->_flags);
3067 	error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
3068 	if (error != 0)
3069 		return (error);
3070 	umtxq_lock(&uq->uq_key);
3071 	umtxq_busy(&uq->uq_key);
3072 	umtxq_insert(uq);
3073 	umtxq_unlock(&uq->uq_key);
3074 
3075 	if (fuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters)) == 0)
3076 		casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
3077 
3078 	count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
3079 	if (count != 0) {
3080 		umtxq_lock(&uq->uq_key);
3081 		umtxq_unbusy(&uq->uq_key);
3082 		umtxq_remove(uq);
3083 		umtxq_unlock(&uq->uq_key);
3084 		umtx_key_release(&uq->uq_key);
3085 		return (0);
3086 	}
3087 
3088 	umtxq_lock(&uq->uq_key);
3089 	umtxq_unbusy(&uq->uq_key);
3090 	umtxq_unlock(&uq->uq_key);
3091 
3092 	umtxq_lock(&uq->uq_key);
3093 	if (timeout == NULL) {
3094 		error = umtxq_sleep(uq, "usem", 0);
3095 	} else {
3096 		getnanouptime(&ets);
3097 		timespecadd(&ets, timeout);
3098 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
3099 		for (;;) {
3100 			error = umtxq_sleep(uq, "usem", tvtohz(&tv));
3101 			if (error != ETIMEDOUT)
3102 				break;
3103 			getnanouptime(&cts);
3104 			if (timespeccmp(&cts, &ets, >=)) {
3105 				error = ETIMEDOUT;
3106 				break;
3107 			}
3108 			tts = ets;
3109 			timespecsub(&tts, &cts);
3110 			TIMESPEC_TO_TIMEVAL(&tv, &tts);
3111 		}
3112 	}
3113 
3114 	if ((uq->uq_flags & UQF_UMTXQ) == 0)
3115 		error = 0;
3116 	else {
3117 		umtxq_remove(uq);
3118 		/* A relative timeout cannot be restarted. */
3119 		if (error == ERESTART && timeout != NULL)
3120 			error = EINTR;
3121 	}
3122 	umtxq_unlock(&uq->uq_key);
3123 	umtx_key_release(&uq->uq_key);
3124 	return (error);
3125 }
3126 
3127 /*
3128  * Signal a userland condition variable.
3129  */
3130 static int
do_sem_wake(struct thread * td,struct _usem * sem)3131 do_sem_wake(struct thread *td, struct _usem *sem)
3132 {
3133 	struct umtx_key key;
3134 	int error, cnt, nwake;
3135 	uint32_t flags;
3136 
3137 	flags = fuword32(&sem->_flags);
3138 	if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
3139 		return (error);
3140 	umtxq_lock(&key);
3141 	umtxq_busy(&key);
3142 	cnt = umtxq_count(&key);
3143 	nwake = umtxq_signal(&key, 1);
3144 	if (cnt <= nwake) {
3145 		umtxq_unlock(&key);
3146 		error = suword32(
3147 		    __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
3148 		umtxq_lock(&key);
3149 	}
3150 	umtxq_unbusy(&key);
3151 	umtxq_unlock(&key);
3152 	umtx_key_release(&key);
3153 	return (error);
3154 }
3155 
3156 int
sys__umtx_lock(struct thread * td,struct _umtx_lock_args * uap)3157 sys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
3158     /* struct umtx *umtx */
3159 {
3160 	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
3161 }
3162 
3163 int
sys__umtx_unlock(struct thread * td,struct _umtx_unlock_args * uap)3164 sys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
3165     /* struct umtx *umtx */
3166 {
3167 	return do_unlock_umtx(td, uap->umtx, td->td_tid);
3168 }
3169 
3170 inline int
umtx_copyin_timeout(const void * addr,struct timespec * tsp)3171 umtx_copyin_timeout(const void *addr, struct timespec *tsp)
3172 {
3173 	int error;
3174 
3175 	error = copyin(addr, tsp, sizeof(struct timespec));
3176 	if (error == 0) {
3177 		if (tsp->tv_sec < 0 ||
3178 		    tsp->tv_nsec >= 1000000000 ||
3179 		    tsp->tv_nsec < 0)
3180 			error = EINVAL;
3181 	}
3182 	return (error);
3183 }
3184 
3185 static int
__umtx_op_lock_umtx(struct thread * td,struct _umtx_op_args * uap)3186 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
3187 {
3188 	struct timespec *ts, timeout;
3189 	int error;
3190 
3191 	/* Allow a null timespec (wait forever). */
3192 	if (uap->uaddr2 == NULL)
3193 		ts = NULL;
3194 	else {
3195 		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3196 		if (error != 0)
3197 			return (error);
3198 		ts = &timeout;
3199 	}
3200 	return (do_lock_umtx(td, uap->obj, uap->val, ts));
3201 }
3202 
3203 static int
__umtx_op_unlock_umtx(struct thread * td,struct _umtx_op_args * uap)3204 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
3205 {
3206 	return (do_unlock_umtx(td, uap->obj, uap->val));
3207 }
3208 
3209 static int
__umtx_op_wait(struct thread * td,struct _umtx_op_args * uap)3210 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
3211 {
3212 	struct timespec *ts, timeout;
3213 	int error;
3214 
3215 	if (uap->uaddr2 == NULL)
3216 		ts = NULL;
3217 	else {
3218 		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3219 		if (error != 0)
3220 			return (error);
3221 		ts = &timeout;
3222 	}
3223 	return do_wait(td, uap->obj, uap->val, ts, 0, 0);
3224 }
3225 
3226 static int
__umtx_op_wait_uint(struct thread * td,struct _umtx_op_args * uap)3227 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
3228 {
3229 	struct timespec *ts, timeout;
3230 	int error;
3231 
3232 	if (uap->uaddr2 == NULL)
3233 		ts = NULL;
3234 	else {
3235 		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3236 		if (error != 0)
3237 			return (error);
3238 		ts = &timeout;
3239 	}
3240 	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3241 }
3242 
3243 static int
__umtx_op_wait_uint_private(struct thread * td,struct _umtx_op_args * uap)3244 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3245 {
3246 	struct timespec *ts, timeout;
3247 	int error;
3248 
3249 	if (uap->uaddr2 == NULL)
3250 		ts = NULL;
3251 	else {
3252 		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3253 		if (error != 0)
3254 			return (error);
3255 		ts = &timeout;
3256 	}
3257 	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3258 }
3259 
3260 static int
__umtx_op_wake(struct thread * td,struct _umtx_op_args * uap)3261 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3262 {
3263 	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3264 }
3265 
3266 #define BATCH_SIZE	128
3267 static int
__umtx_op_nwake_private(struct thread * td,struct _umtx_op_args * uap)3268 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
3269 {
3270 	int count = uap->val;
3271 	void *uaddrs[BATCH_SIZE];
3272 	char **upp = (char **)uap->obj;
3273 	int tocopy;
3274 	int error = 0;
3275 	int i, pos = 0;
3276 
3277 	while (count > 0) {
3278 		tocopy = count;
3279 		if (tocopy > BATCH_SIZE)
3280 			tocopy = BATCH_SIZE;
3281 		error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
3282 		if (error != 0)
3283 			break;
3284 		for (i = 0; i < tocopy; ++i)
3285 			kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3286 		count -= tocopy;
3287 		pos += tocopy;
3288 	}
3289 	return (error);
3290 }
3291 
3292 static int
__umtx_op_wake_private(struct thread * td,struct _umtx_op_args * uap)3293 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3294 {
3295 	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3296 }
3297 
3298 static int
__umtx_op_lock_umutex(struct thread * td,struct _umtx_op_args * uap)3299 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3300 {
3301 	struct timespec *ts, timeout;
3302 	int error;
3303 
3304 	/* Allow a null timespec (wait forever). */
3305 	if (uap->uaddr2 == NULL)
3306 		ts = NULL;
3307 	else {
3308 		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3309 		if (error != 0)
3310 			return (error);
3311 		ts = &timeout;
3312 	}
3313 	return do_lock_umutex(td, uap->obj, ts, 0);
3314 }
3315 
3316 static int
__umtx_op_trylock_umutex(struct thread * td,struct _umtx_op_args * uap)3317 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3318 {
3319 	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3320 }
3321 
3322 static int
__umtx_op_wait_umutex(struct thread * td,struct _umtx_op_args * uap)3323 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3324 {
3325 	struct timespec *ts, timeout;
3326 	int error;
3327 
3328 	/* Allow a null timespec (wait forever). */
3329 	if (uap->uaddr2 == NULL)
3330 		ts = NULL;
3331 	else {
3332 		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3333 		if (error != 0)
3334 			return (error);
3335 		ts = &timeout;
3336 	}
3337 	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3338 }
3339 
3340 static int
__umtx_op_wake_umutex(struct thread * td,struct _umtx_op_args * uap)3341 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3342 {
3343 	return do_wake_umutex(td, uap->obj);
3344 }
3345 
3346 static int
__umtx_op_unlock_umutex(struct thread * td,struct _umtx_op_args * uap)3347 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3348 {
3349 	return do_unlock_umutex(td, uap->obj);
3350 }
3351 
3352 static int
__umtx_op_set_ceiling(struct thread * td,struct _umtx_op_args * uap)3353 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3354 {
3355 	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3356 }
3357 
3358 static int
__umtx_op_cv_wait(struct thread * td,struct _umtx_op_args * uap)3359 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3360 {
3361 	struct timespec *ts, timeout;
3362 	int error;
3363 
3364 	/* Allow a null timespec (wait forever). */
3365 	if (uap->uaddr2 == NULL)
3366 		ts = NULL;
3367 	else {
3368 		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3369 		if (error != 0)
3370 			return (error);
3371 		ts = &timeout;
3372 	}
3373 	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3374 }
3375 
3376 static int
__umtx_op_cv_signal(struct thread * td,struct _umtx_op_args * uap)3377 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3378 {
3379 	return do_cv_signal(td, uap->obj);
3380 }
3381 
3382 static int
__umtx_op_cv_broadcast(struct thread * td,struct _umtx_op_args * uap)3383 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3384 {
3385 	return do_cv_broadcast(td, uap->obj);
3386 }
3387 
3388 static int
__umtx_op_rw_rdlock(struct thread * td,struct _umtx_op_args * uap)3389 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3390 {
3391 	struct timespec timeout;
3392 	int error;
3393 
3394 	/* Allow a null timespec (wait forever). */
3395 	if (uap->uaddr2 == NULL) {
3396 		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3397 	} else {
3398 		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3399 		if (error != 0)
3400 			return (error);
3401 		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3402 	}
3403 	return (error);
3404 }
3405 
3406 static int
__umtx_op_rw_wrlock(struct thread * td,struct _umtx_op_args * uap)3407 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3408 {
3409 	struct timespec timeout;
3410 	int error;
3411 
3412 	/* Allow a null timespec (wait forever). */
3413 	if (uap->uaddr2 == NULL) {
3414 		error = do_rw_wrlock(td, uap->obj, 0);
3415 	} else {
3416 		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3417 		if (error != 0)
3418 			return (error);
3419 
3420 		error = do_rw_wrlock2(td, uap->obj, &timeout);
3421 	}
3422 	return (error);
3423 }
3424 
3425 static int
__umtx_op_rw_unlock(struct thread * td,struct _umtx_op_args * uap)3426 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3427 {
3428 	return do_rw_unlock(td, uap->obj);
3429 }
3430 
3431 static int
__umtx_op_sem_wait(struct thread * td,struct _umtx_op_args * uap)3432 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3433 {
3434 	struct timespec *ts, timeout;
3435 	int error;
3436 
3437 	/* Allow a null timespec (wait forever). */
3438 	if (uap->uaddr2 == NULL)
3439 		ts = NULL;
3440 	else {
3441 		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3442 		if (error != 0)
3443 			return (error);
3444 		ts = &timeout;
3445 	}
3446 	return (do_sem_wait(td, uap->obj, ts));
3447 }
3448 
3449 static int
__umtx_op_sem_wake(struct thread * td,struct _umtx_op_args * uap)3450 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3451 {
3452 	return do_sem_wake(td, uap->obj);
3453 }
3454 
3455 static int
__umtx_op_wake2_umutex(struct thread * td,struct _umtx_op_args * uap)3456 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap)
3457 {
3458 	return do_wake2_umutex(td, uap->obj, uap->val);
3459 }
3460 
3461 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3462 
3463 static _umtx_op_func op_table[] = {
3464 	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
3465 	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
3466 	__umtx_op_wait,			/* UMTX_OP_WAIT */
3467 	__umtx_op_wake,			/* UMTX_OP_WAKE */
3468 	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
3469 	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
3470 	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
3471 	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3472 	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
3473 	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3474 	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3475 	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
3476 	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
3477 	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
3478 	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3479 	__umtx_op_wait_uint_private,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3480 	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3481 	__umtx_op_wait_umutex,		/* UMTX_OP_UMUTEX_WAIT */
3482 	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3483 	__umtx_op_sem_wait,		/* UMTX_OP_SEM_WAIT */
3484 	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3485 	__umtx_op_nwake_private,	/* UMTX_OP_NWAKE_PRIVATE */
3486 	__umtx_op_wake2_umutex		/* UMTX_OP_UMUTEX_WAKE2 */
3487 };
3488 
3489 int
sys__umtx_op(struct thread * td,struct _umtx_op_args * uap)3490 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
3491 {
3492 	if ((unsigned)uap->op < UMTX_OP_MAX)
3493 		return (*op_table[uap->op])(td, uap);
3494 	return (EINVAL);
3495 }
3496 
3497 #ifdef COMPAT_FREEBSD32
3498 int
freebsd32_umtx_lock(struct thread * td,struct freebsd32_umtx_lock_args * uap)3499 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3500     /* struct umtx *umtx */
3501 {
3502 	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3503 }
3504 
3505 int
freebsd32_umtx_unlock(struct thread * td,struct freebsd32_umtx_unlock_args * uap)3506 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3507     /* struct umtx *umtx */
3508 {
3509 	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3510 }
3511 
3512 struct timespec32 {
3513 	int32_t tv_sec;
3514 	int32_t tv_nsec;
3515 };
3516 
3517 static inline int
umtx_copyin_timeout32(void * addr,struct timespec * tsp)3518 umtx_copyin_timeout32(void *addr, struct timespec *tsp)
3519 {
3520 	struct timespec32 ts32;
3521 	int error;
3522 
3523 	error = copyin(addr, &ts32, sizeof(struct timespec32));
3524 	if (error == 0) {
3525 		if (ts32.tv_sec < 0 ||
3526 		    ts32.tv_nsec >= 1000000000 ||
3527 		    ts32.tv_nsec < 0)
3528 			error = EINVAL;
3529 		else {
3530 			tsp->tv_sec = ts32.tv_sec;
3531 			tsp->tv_nsec = ts32.tv_nsec;
3532 		}
3533 	}
3534 	return (error);
3535 }
3536 
3537 static int
__umtx_op_lock_umtx_compat32(struct thread * td,struct _umtx_op_args * uap)3538 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3539 {
3540 	struct timespec *ts, timeout;
3541 	int error;
3542 
3543 	/* Allow a null timespec (wait forever). */
3544 	if (uap->uaddr2 == NULL)
3545 		ts = NULL;
3546 	else {
3547 		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3548 		if (error != 0)
3549 			return (error);
3550 		ts = &timeout;
3551 	}
3552 	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3553 }
3554 
3555 static int
__umtx_op_unlock_umtx_compat32(struct thread * td,struct _umtx_op_args * uap)3556 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3557 {
3558 	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3559 }
3560 
3561 static int
__umtx_op_wait_compat32(struct thread * td,struct _umtx_op_args * uap)3562 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3563 {
3564 	struct timespec *ts, timeout;
3565 	int error;
3566 
3567 	if (uap->uaddr2 == NULL)
3568 		ts = NULL;
3569 	else {
3570 		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3571 		if (error != 0)
3572 			return (error);
3573 		ts = &timeout;
3574 	}
3575 	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3576 }
3577 
3578 static int
__umtx_op_lock_umutex_compat32(struct thread * td,struct _umtx_op_args * uap)3579 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3580 {
3581 	struct timespec *ts, timeout;
3582 	int error;
3583 
3584 	/* Allow a null timespec (wait forever). */
3585 	if (uap->uaddr2 == NULL)
3586 		ts = NULL;
3587 	else {
3588 		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3589 		if (error != 0)
3590 			return (error);
3591 		ts = &timeout;
3592 	}
3593 	return do_lock_umutex(td, uap->obj, ts, 0);
3594 }
3595 
3596 static int
__umtx_op_wait_umutex_compat32(struct thread * td,struct _umtx_op_args * uap)3597 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3598 {
3599 	struct timespec *ts, timeout;
3600 	int error;
3601 
3602 	/* Allow a null timespec (wait forever). */
3603 	if (uap->uaddr2 == NULL)
3604 		ts = NULL;
3605 	else {
3606 		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3607 		if (error != 0)
3608 			return (error);
3609 		ts = &timeout;
3610 	}
3611 	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3612 }
3613 
3614 static int
__umtx_op_cv_wait_compat32(struct thread * td,struct _umtx_op_args * uap)3615 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3616 {
3617 	struct timespec *ts, timeout;
3618 	int error;
3619 
3620 	/* Allow a null timespec (wait forever). */
3621 	if (uap->uaddr2 == NULL)
3622 		ts = NULL;
3623 	else {
3624 		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3625 		if (error != 0)
3626 			return (error);
3627 		ts = &timeout;
3628 	}
3629 	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3630 }
3631 
3632 static int
__umtx_op_rw_rdlock_compat32(struct thread * td,struct _umtx_op_args * uap)3633 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3634 {
3635 	struct timespec timeout;
3636 	int error;
3637 
3638 	/* Allow a null timespec (wait forever). */
3639 	if (uap->uaddr2 == NULL) {
3640 		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3641 	} else {
3642 		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3643 		if (error != 0)
3644 			return (error);
3645 		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3646 	}
3647 	return (error);
3648 }
3649 
3650 static int
__umtx_op_rw_wrlock_compat32(struct thread * td,struct _umtx_op_args * uap)3651 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3652 {
3653 	struct timespec timeout;
3654 	int error;
3655 
3656 	/* Allow a null timespec (wait forever). */
3657 	if (uap->uaddr2 == NULL) {
3658 		error = do_rw_wrlock(td, uap->obj, 0);
3659 	} else {
3660 		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3661 		if (error != 0)
3662 			return (error);
3663 
3664 		error = do_rw_wrlock2(td, uap->obj, &timeout);
3665 	}
3666 	return (error);
3667 }
3668 
3669 static int
__umtx_op_wait_uint_private_compat32(struct thread * td,struct _umtx_op_args * uap)3670 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3671 {
3672 	struct timespec *ts, timeout;
3673 	int error;
3674 
3675 	if (uap->uaddr2 == NULL)
3676 		ts = NULL;
3677 	else {
3678 		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3679 		if (error != 0)
3680 			return (error);
3681 		ts = &timeout;
3682 	}
3683 	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3684 }
3685 
3686 static int
__umtx_op_sem_wait_compat32(struct thread * td,struct _umtx_op_args * uap)3687 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3688 {
3689 	struct timespec *ts, timeout;
3690 	int error;
3691 
3692 	/* Allow a null timespec (wait forever). */
3693 	if (uap->uaddr2 == NULL)
3694 		ts = NULL;
3695 	else {
3696 		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3697 		if (error != 0)
3698 			return (error);
3699 		ts = &timeout;
3700 	}
3701 	return (do_sem_wait(td, uap->obj, ts));
3702 }
3703 
3704 static int
__umtx_op_nwake_private32(struct thread * td,struct _umtx_op_args * uap)3705 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
3706 {
3707 	int count = uap->val;
3708 	uint32_t uaddrs[BATCH_SIZE];
3709 	uint32_t **upp = (uint32_t **)uap->obj;
3710 	int tocopy;
3711 	int error = 0;
3712 	int i, pos = 0;
3713 
3714 	while (count > 0) {
3715 		tocopy = count;
3716 		if (tocopy > BATCH_SIZE)
3717 			tocopy = BATCH_SIZE;
3718 		error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
3719 		if (error != 0)
3720 			break;
3721 		for (i = 0; i < tocopy; ++i)
3722 			kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
3723 				INT_MAX, 1);
3724 		count -= tocopy;
3725 		pos += tocopy;
3726 	}
3727 	return (error);
3728 }
3729 
3730 static _umtx_op_func op_table_compat32[] = {
3731 	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
3732 	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
3733 	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
3734 	__umtx_op_wake,			/* UMTX_OP_WAKE */
3735 	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
3736 	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
3737 	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
3738 	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3739 	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
3740 	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3741 	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3742 	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
3743 	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
3744 	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
3745 	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3746 	__umtx_op_wait_uint_private_compat32,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3747 	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3748 	__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3749 	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3750 	__umtx_op_sem_wait_compat32,	/* UMTX_OP_SEM_WAIT */
3751 	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3752 	__umtx_op_nwake_private32,	/* UMTX_OP_NWAKE_PRIVATE */
3753 	__umtx_op_wake2_umutex		/* UMTX_OP_UMUTEX_WAKE2 */
3754 };
3755 
3756 int
freebsd32_umtx_op(struct thread * td,struct freebsd32_umtx_op_args * uap)3757 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3758 {
3759 	if ((unsigned)uap->op < UMTX_OP_MAX)
3760 		return (*op_table_compat32[uap->op])(td,
3761 			(struct _umtx_op_args *)uap);
3762 	return (EINVAL);
3763 }
3764 #endif
3765 
3766 void
umtx_thread_init(struct thread * td)3767 umtx_thread_init(struct thread *td)
3768 {
3769 	td->td_umtxq = umtxq_alloc();
3770 	td->td_umtxq->uq_thread = td;
3771 }
3772 
3773 void
umtx_thread_fini(struct thread * td)3774 umtx_thread_fini(struct thread *td)
3775 {
3776 	umtxq_free(td->td_umtxq);
3777 }
3778 
3779 /*
3780  * It will be called when new thread is created, e.g fork().
3781  */
3782 void
umtx_thread_alloc(struct thread * td)3783 umtx_thread_alloc(struct thread *td)
3784 {
3785 	struct umtx_q *uq;
3786 
3787 	uq = td->td_umtxq;
3788 	uq->uq_inherited_pri = PRI_MAX;
3789 
3790 	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3791 	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3792 	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3793 	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3794 }
3795 
3796 /*
3797  * exec() hook.
3798  */
3799 static void
umtx_exec_hook(void * arg __unused,struct proc * p __unused,struct image_params * imgp __unused)3800 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
3801 	struct image_params *imgp __unused)
3802 {
3803 	umtx_thread_cleanup(curthread);
3804 }
3805 
3806 /*
3807  * thread_exit() hook.
3808  */
3809 void
umtx_thread_exit(struct thread * td)3810 umtx_thread_exit(struct thread *td)
3811 {
3812 	umtx_thread_cleanup(td);
3813 }
3814 
3815 /*
3816  * clean up umtx data.
3817  */
3818 static void
umtx_thread_cleanup(struct thread * td)3819 umtx_thread_cleanup(struct thread *td)
3820 {
3821 	struct umtx_q *uq;
3822 	struct umtx_pi *pi;
3823 
3824 	if ((uq = td->td_umtxq) == NULL)
3825 		return;
3826 
3827 	mtx_lock_spin(&umtx_lock);
3828 	uq->uq_inherited_pri = PRI_MAX;
3829 	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3830 		pi->pi_owner = NULL;
3831 		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3832 	}
3833 	mtx_unlock_spin(&umtx_lock);
3834 	thread_lock(td);
3835 	sched_lend_user_prio(td, PRI_MAX);
3836 	thread_unlock(td);
3837 }
3838