xref: /freebsd-13-stable/sys/i386/include/atomic.h (revision b6a3bda9bc2eebbc7630ec7a9ccb27ca923ef596)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 1998 Doug Rabson
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 #ifndef _MACHINE_ATOMIC_H_
29 #define	_MACHINE_ATOMIC_H_
30 
31 #ifndef _SYS_CDEFS_H_
32 #error this file needs sys/cdefs.h as a prerequisite
33 #endif
34 
35 #include <sys/atomic_common.h>
36 
37 #ifdef _KERNEL
38 #include <machine/md_var.h>
39 #include <machine/specialreg.h>
40 #endif
41 
42 #ifndef __OFFSETOF_MONITORBUF
43 /*
44  * __OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf).
45  *
46  * The open-coded number is used instead of the symbolic expression to
47  * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers.
48  * An assertion in i386/vm_machdep.c ensures that the value is correct.
49  */
50 #define	__OFFSETOF_MONITORBUF	0x80
51 
52 static __inline void
__mbk(void)53 __mbk(void)
54 {
55 
56 	__asm __volatile("lock; addl $0,%%fs:%0"
57 	    : "+m" (*(u_int *)__OFFSETOF_MONITORBUF) : : "memory", "cc");
58 }
59 
60 static __inline void
__mbu(void)61 __mbu(void)
62 {
63 
64 	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc");
65 }
66 #endif
67 
68 /*
69  * Various simple operations on memory, each of which is atomic in the
70  * presence of interrupts and multiple processors.
71  *
72  * atomic_set_char(P, V)	(*(u_char *)(P) |= (V))
73  * atomic_clear_char(P, V)	(*(u_char *)(P) &= ~(V))
74  * atomic_add_char(P, V)	(*(u_char *)(P) += (V))
75  * atomic_subtract_char(P, V)	(*(u_char *)(P) -= (V))
76  *
77  * atomic_set_short(P, V)	(*(u_short *)(P) |= (V))
78  * atomic_clear_short(P, V)	(*(u_short *)(P) &= ~(V))
79  * atomic_add_short(P, V)	(*(u_short *)(P) += (V))
80  * atomic_subtract_short(P, V)	(*(u_short *)(P) -= (V))
81  *
82  * atomic_set_int(P, V)		(*(u_int *)(P) |= (V))
83  * atomic_clear_int(P, V)	(*(u_int *)(P) &= ~(V))
84  * atomic_add_int(P, V)		(*(u_int *)(P) += (V))
85  * atomic_subtract_int(P, V)	(*(u_int *)(P) -= (V))
86  * atomic_swap_int(P, V)	(return (*(u_int *)(P)); *(u_int *)(P) = (V);)
87  * atomic_readandclear_int(P)	(return (*(u_int *)(P)); *(u_int *)(P) = 0;)
88  *
89  * atomic_set_long(P, V)	(*(u_long *)(P) |= (V))
90  * atomic_clear_long(P, V)	(*(u_long *)(P) &= ~(V))
91  * atomic_add_long(P, V)	(*(u_long *)(P) += (V))
92  * atomic_subtract_long(P, V)	(*(u_long *)(P) -= (V))
93  * atomic_swap_long(P, V)	(return (*(u_long *)(P)); *(u_long *)(P) = (V);)
94  * atomic_readandclear_long(P)	(return (*(u_long *)(P)); *(u_long *)(P) = 0;)
95  */
96 
97 #if !defined(__GNUCLIKE_ASM)
98 #define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
99 void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);	\
100 void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
101 
102 int	atomic_cmpset_char(volatile u_char *dst, u_char expect, u_char src);
103 int	atomic_cmpset_short(volatile u_short *dst, u_short expect, u_short src);
104 int	atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
105 int	atomic_fcmpset_char(volatile u_char *dst, u_char *expect, u_char src);
106 int	atomic_fcmpset_short(volatile u_short *dst, u_short *expect,
107 	    u_short src);
108 int	atomic_fcmpset_int(volatile u_int *dst, u_int *expect, u_int src);
109 u_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
110 int	atomic_testandset_int(volatile u_int *p, u_int v);
111 int	atomic_testandclear_int(volatile u_int *p, u_int v);
112 void	atomic_thread_fence_acq(void);
113 void	atomic_thread_fence_acq_rel(void);
114 void	atomic_thread_fence_rel(void);
115 void	atomic_thread_fence_seq_cst(void);
116 
117 #define	ATOMIC_LOAD(TYPE)					\
118 u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p)
119 #define	ATOMIC_STORE(TYPE)					\
120 void		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
121 
122 int		atomic_cmpset_64(volatile uint64_t *, uint64_t, uint64_t);
123 int		atomic_fcmpset_64(volatile uint64_t *, uint64_t *, uint64_t);
124 uint64_t	atomic_load_acq_64(volatile uint64_t *);
125 void		atomic_store_rel_64(volatile uint64_t *, uint64_t);
126 uint64_t	atomic_swap_64(volatile uint64_t *, uint64_t);
127 uint64_t	atomic_fetchadd_64(volatile uint64_t *, uint64_t);
128 void		atomic_add_64(volatile uint64_t *, uint64_t);
129 void		atomic_subtract_64(volatile uint64_t *, uint64_t);
130 
131 #else /* !__GNUCLIKE_ASM */
132 
133 /*
134  * Always use lock prefixes.  The result is slightly less optimal for
135  * UP systems, but it matters less now, and sometimes UP is emulated
136  * over SMP.
137  *
138  * The assembly is volatilized to avoid code chunk removal by the compiler.
139  * GCC aggressively reorders operations and memory clobbering is necessary
140  * in order to avoid that for memory barriers.
141  */
142 #define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
143 static __inline void					\
144 atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
145 {							\
146 	__asm __volatile("lock; " OP			\
147 	: "+m" (*p)					\
148 	: CONS (V)					\
149 	: "cc");					\
150 }							\
151 							\
152 static __inline void					\
153 atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
154 {							\
155 	__asm __volatile("lock; " OP			\
156 	: "+m" (*p)					\
157 	: CONS (V)					\
158 	: "memory", "cc");				\
159 }							\
160 struct __hack
161 
162 /*
163  * Atomic compare and set, used by the mutex functions.
164  *
165  * cmpset:
166  *	if (*dst == expect)
167  *		*dst = src
168  *
169  * fcmpset:
170  *	if (*dst == *expect)
171  *		*dst = src
172  *	else
173  *		*expect = *dst
174  *
175  * Returns 0 on failure, non-zero on success.
176  */
177 #define	ATOMIC_CMPSET(TYPE, CONS)			\
178 static __inline int					\
179 atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \
180 {							\
181 	u_char res;					\
182 							\
183 	__asm __volatile(				\
184 	"	lock; cmpxchg	%3,%1 ;	"		\
185 	"	sete	%0 ;		"		\
186 	"# atomic_cmpset_" #TYPE "	"		\
187 	: "=q" (res),			/* 0 */		\
188 	  "+m" (*dst),			/* 1 */		\
189 	  "+a" (expect)			/* 2 */		\
190 	: CONS (src)			/* 3 */		\
191 	: "memory", "cc");				\
192 	return (res);					\
193 }							\
194 							\
195 static __inline int					\
196 atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \
197 {							\
198 	u_char res;					\
199 							\
200 	__asm __volatile(				\
201 	"	lock; cmpxchg	%3,%1 ;	"		\
202 	"	sete	%0 ;		"		\
203 	"# atomic_fcmpset_" #TYPE "	"		\
204 	: "=q" (res),			/* 0 */		\
205 	  "+m" (*dst),			/* 1 */		\
206 	  "+a" (*expect)		/* 2 */		\
207 	: CONS (src)			/* 3 */		\
208 	: "memory", "cc");				\
209 	return (res);					\
210 }
211 
212 ATOMIC_CMPSET(char, "q");
213 ATOMIC_CMPSET(short, "r");
214 ATOMIC_CMPSET(int, "r");
215 
216 /*
217  * Atomically add the value of v to the integer pointed to by p and return
218  * the previous value of *p.
219  */
220 static __inline u_int
atomic_fetchadd_int(volatile u_int * p,u_int v)221 atomic_fetchadd_int(volatile u_int *p, u_int v)
222 {
223 
224 	__asm __volatile(
225 	"	lock; xaddl	%0,%1 ;	"
226 	"# atomic_fetchadd_int"
227 	: "+r" (v),			/* 0 */
228 	  "+m" (*p)			/* 1 */
229 	: : "cc");
230 	return (v);
231 }
232 
233 static __inline int
atomic_testandset_int(volatile u_int * p,u_int v)234 atomic_testandset_int(volatile u_int *p, u_int v)
235 {
236 	u_char res;
237 
238 	__asm __volatile(
239 	"	lock; btsl	%2,%1 ;	"
240 	"	setc	%0 ;		"
241 	"# atomic_testandset_int"
242 	: "=q" (res),			/* 0 */
243 	  "+m" (*p)			/* 1 */
244 	: "Ir" (v & 0x1f)		/* 2 */
245 	: "cc");
246 	return (res);
247 }
248 
249 static __inline int
atomic_testandclear_int(volatile u_int * p,u_int v)250 atomic_testandclear_int(volatile u_int *p, u_int v)
251 {
252 	u_char res;
253 
254 	__asm __volatile(
255 	"	lock; btrl	%2,%1 ;	"
256 	"	setc	%0 ;		"
257 	"# atomic_testandclear_int"
258 	: "=q" (res),			/* 0 */
259 	  "+m" (*p)			/* 1 */
260 	: "Ir" (v & 0x1f)		/* 2 */
261 	: "cc");
262 	return (res);
263 }
264 
265 /*
266  * We assume that a = b will do atomic loads and stores.  Due to the
267  * IA32 memory model, a simple store guarantees release semantics.
268  *
269  * However, a load may pass a store if they are performed on distinct
270  * addresses, so we need Store/Load barrier for sequentially
271  * consistent fences in SMP kernels.  We use "lock addl $0,mem" for a
272  * Store/Load barrier, as recommended by the AMD Software Optimization
273  * Guide, and not mfence.  In the kernel, we use a private per-cpu
274  * cache line for "mem", to avoid introducing false data
275  * dependencies.  In user space, we use the word at the top of the
276  * stack.
277  *
278  * For UP kernels, however, the memory of the single processor is
279  * always consistent, so we only need to stop the compiler from
280  * reordering accesses in a way that violates the semantics of acquire
281  * and release.
282  */
283 
284 #if defined(_KERNEL)
285 #define	__storeload_barrier()	__mbk()
286 #else /* !_KERNEL */
287 #define	__storeload_barrier()	__mbu()
288 #endif /* _KERNEL*/
289 
290 #define	ATOMIC_LOAD(TYPE)					\
291 static __inline u_##TYPE					\
292 atomic_load_acq_##TYPE(const volatile u_##TYPE *p)		\
293 {								\
294 	u_##TYPE res;						\
295 								\
296 	res = *p;						\
297 	__compiler_membar();					\
298 	return (res);						\
299 }								\
300 struct __hack
301 
302 #define	ATOMIC_STORE(TYPE)					\
303 static __inline void						\
304 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)	\
305 {								\
306 								\
307 	__compiler_membar();					\
308 	*p = v;							\
309 }								\
310 struct __hack
311 
312 static __inline void
atomic_thread_fence_acq(void)313 atomic_thread_fence_acq(void)
314 {
315 
316 	__compiler_membar();
317 }
318 
319 static __inline void
atomic_thread_fence_rel(void)320 atomic_thread_fence_rel(void)
321 {
322 
323 	__compiler_membar();
324 }
325 
326 static __inline void
atomic_thread_fence_acq_rel(void)327 atomic_thread_fence_acq_rel(void)
328 {
329 
330 	__compiler_membar();
331 }
332 
333 static __inline void
atomic_thread_fence_seq_cst(void)334 atomic_thread_fence_seq_cst(void)
335 {
336 
337 	__storeload_barrier();
338 }
339 
340 #ifdef _KERNEL
341 
342 /* I486 does not support SMP or CMPXCHG8B. */
343 static __inline int
atomic_cmpset_64_i386(volatile uint64_t * dst,uint64_t expect,uint64_t src)344 atomic_cmpset_64_i386(volatile uint64_t *dst, uint64_t expect, uint64_t src)
345 {
346 	volatile uint32_t *p;
347 	u_char res;
348 
349 	p = (volatile uint32_t *)dst;
350 	__asm __volatile(
351 	"	pushfl ;		"
352 	"	cli ;			"
353 	"	xorl	%1,%%eax ;	"
354 	"	xorl	%2,%%edx ;	"
355 	"	orl	%%edx,%%eax ;	"
356 	"	jne	1f ;		"
357 	"	movl	%4,%1 ;		"
358 	"	movl	%5,%2 ;		"
359 	"1:				"
360 	"	sete	%3 ;		"
361 	"	popfl"
362 	: "+A" (expect),		/* 0 */
363 	  "+m" (*p),			/* 1 */
364 	  "+m" (*(p + 1)),		/* 2 */
365 	  "=q" (res)			/* 3 */
366 	: "r" ((uint32_t)src),		/* 4 */
367 	  "r" ((uint32_t)(src >> 32))	/* 5 */
368 	: "memory", "cc");
369 	return (res);
370 }
371 
372 static __inline int
atomic_fcmpset_64_i386(volatile uint64_t * dst,uint64_t * expect,uint64_t src)373 atomic_fcmpset_64_i386(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
374 {
375 
376 	if (atomic_cmpset_64_i386(dst, *expect, src)) {
377 		return (1);
378 	} else {
379 		*expect = *dst;
380 		return (0);
381 	}
382 }
383 
384 static __inline uint64_t
atomic_load_acq_64_i386(const volatile uint64_t * p)385 atomic_load_acq_64_i386(const volatile uint64_t *p)
386 {
387 	const volatile uint32_t *q;
388 	uint64_t res;
389 
390 	q = (const volatile uint32_t *)p;
391 	__asm __volatile(
392 	"	pushfl ;		"
393 	"	cli ;			"
394 	"	movl	%1,%%eax ;	"
395 	"	movl	%2,%%edx ;	"
396 	"	popfl"
397 	: "=&A" (res)			/* 0 */
398 	: "m" (*q),			/* 1 */
399 	  "m" (*(q + 1))		/* 2 */
400 	: "memory");
401 	return (res);
402 }
403 
404 static __inline void
atomic_store_rel_64_i386(volatile uint64_t * p,uint64_t v)405 atomic_store_rel_64_i386(volatile uint64_t *p, uint64_t v)
406 {
407 	volatile uint32_t *q;
408 
409 	q = (volatile uint32_t *)p;
410 	__asm __volatile(
411 	"	pushfl ;		"
412 	"	cli ;			"
413 	"	movl	%%eax,%0 ;	"
414 	"	movl	%%edx,%1 ;	"
415 	"	popfl"
416 	: "=m" (*q),			/* 0 */
417 	  "=m" (*(q + 1))		/* 1 */
418 	: "A" (v)			/* 2 */
419 	: "memory");
420 }
421 
422 static __inline uint64_t
atomic_swap_64_i386(volatile uint64_t * p,uint64_t v)423 atomic_swap_64_i386(volatile uint64_t *p, uint64_t v)
424 {
425 	volatile uint32_t *q;
426 	uint64_t res;
427 
428 	q = (volatile uint32_t *)p;
429 	__asm __volatile(
430 	"	pushfl ;		"
431 	"	cli ;			"
432 	"	movl	%1,%%eax ;	"
433 	"	movl	%2,%%edx ;	"
434 	"	movl	%4,%2 ;		"
435 	"	movl	%3,%1 ;		"
436 	"	popfl"
437 	: "=&A" (res),			/* 0 */
438 	  "+m" (*q),			/* 1 */
439 	  "+m" (*(q + 1))		/* 2 */
440 	: "r" ((uint32_t)v),		/* 3 */
441 	  "r" ((uint32_t)(v >> 32)));	/* 4 */
442 	return (res);
443 }
444 
445 static __inline int
atomic_cmpset_64_i586(volatile uint64_t * dst,uint64_t expect,uint64_t src)446 atomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src)
447 {
448 	u_char res;
449 
450 	__asm __volatile(
451 	"	lock; cmpxchg8b %1 ;	"
452 	"	sete	%0"
453 	: "=q" (res),			/* 0 */
454 	  "+m" (*dst),			/* 1 */
455 	  "+A" (expect)			/* 2 */
456 	: "b" ((uint32_t)src),		/* 3 */
457 	  "c" ((uint32_t)(src >> 32))	/* 4 */
458 	: "memory", "cc");
459 	return (res);
460 }
461 
462 static __inline int
atomic_fcmpset_64_i586(volatile uint64_t * dst,uint64_t * expect,uint64_t src)463 atomic_fcmpset_64_i586(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
464 {
465 	u_char res;
466 
467 	__asm __volatile(
468 	"	lock; cmpxchg8b %1 ;	"
469 	"	sete	%0"
470 	: "=q" (res),			/* 0 */
471 	  "+m" (*dst),			/* 1 */
472 	  "+A" (*expect)		/* 2 */
473 	: "b" ((uint32_t)src),		/* 3 */
474 	  "c" ((uint32_t)(src >> 32))	/* 4 */
475 	: "memory", "cc");
476 	return (res);
477 }
478 
479 /*
480  * Architecturally always writes back some value to '*p' so will trigger
481  * a #GP(0) on read-only mappings.
482  */
483 static __inline uint64_t
atomic_load_acq_64_i586(const volatile uint64_t * p)484 atomic_load_acq_64_i586(const volatile uint64_t *p)
485 {
486 	uint64_t res;
487 
488 	__asm __volatile(
489 	"	movl	%%ebx,%%eax ;	"
490 	"	movl	%%ecx,%%edx ;	"
491 	"	lock; cmpxchg8b %1"
492 	: "=&A" (res)			/* 0 */
493 	: "m" (*p)			/* 1 */
494 	: "memory", "cc");
495 	return (res);
496 }
497 
498 static __inline void
atomic_store_rel_64_i586(volatile uint64_t * p,uint64_t v)499 atomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v)
500 {
501 
502 	__asm __volatile(
503 	"	movl	%%eax,%%ebx ;	"
504 	"	movl	%%edx,%%ecx ;	"
505 	"1:				"
506 	"	lock; cmpxchg8b %0 ;	"
507 	"	jne	1b"
508 	: "+m" (*p),			/* 0 */
509 	  "+A" (v)			/* 1 */
510 	: : "ebx", "ecx", "memory", "cc");
511 }
512 
513 static __inline uint64_t
atomic_swap_64_i586(volatile uint64_t * p,uint64_t v)514 atomic_swap_64_i586(volatile uint64_t *p, uint64_t v)
515 {
516 
517 	__asm __volatile(
518 	"	movl	%%eax,%%ebx ;	"
519 	"	movl	%%edx,%%ecx ;	"
520 	"1:				"
521 	"	lock; cmpxchg8b %0 ;	"
522 	"	jne	1b"
523 	: "+m" (*p),			/* 0 */
524 	  "+A" (v)			/* 1 */
525 	: : "ebx", "ecx", "memory", "cc");
526 	return (v);
527 }
528 
529 static __inline int
atomic_cmpset_64(volatile uint64_t * dst,uint64_t expect,uint64_t src)530 atomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src)
531 {
532 
533 	if ((cpu_feature & CPUID_CX8) == 0)
534 		return (atomic_cmpset_64_i386(dst, expect, src));
535 	else
536 		return (atomic_cmpset_64_i586(dst, expect, src));
537 }
538 
539 static __inline int
atomic_fcmpset_64(volatile uint64_t * dst,uint64_t * expect,uint64_t src)540 atomic_fcmpset_64(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
541 {
542 
543   	if ((cpu_feature & CPUID_CX8) == 0)
544 		return (atomic_fcmpset_64_i386(dst, expect, src));
545 	else
546 		return (atomic_fcmpset_64_i586(dst, expect, src));
547 }
548 
549 static __inline uint64_t
atomic_load_acq_64(const volatile uint64_t * p)550 atomic_load_acq_64(const volatile uint64_t *p)
551 {
552 
553 	if ((cpu_feature & CPUID_CX8) == 0)
554 		return (atomic_load_acq_64_i386(p));
555 	else
556 		return (atomic_load_acq_64_i586(p));
557 }
558 
559 static __inline void
atomic_store_rel_64(volatile uint64_t * p,uint64_t v)560 atomic_store_rel_64(volatile uint64_t *p, uint64_t v)
561 {
562 
563 	if ((cpu_feature & CPUID_CX8) == 0)
564 		atomic_store_rel_64_i386(p, v);
565 	else
566 		atomic_store_rel_64_i586(p, v);
567 }
568 
569 static __inline uint64_t
atomic_swap_64(volatile uint64_t * p,uint64_t v)570 atomic_swap_64(volatile uint64_t *p, uint64_t v)
571 {
572 
573 	if ((cpu_feature & CPUID_CX8) == 0)
574 		return (atomic_swap_64_i386(p, v));
575 	else
576 		return (atomic_swap_64_i586(p, v));
577 }
578 
579 static __inline uint64_t
atomic_fetchadd_64(volatile uint64_t * p,uint64_t v)580 atomic_fetchadd_64(volatile uint64_t *p, uint64_t v)
581 {
582 
583 	for (;;) {
584 		uint64_t t = *p;
585 		if (atomic_cmpset_64(p, t, t + v))
586 			return (t);
587 	}
588 }
589 
590 static __inline void
atomic_add_64(volatile uint64_t * p,uint64_t v)591 atomic_add_64(volatile uint64_t *p, uint64_t v)
592 {
593 	uint64_t t;
594 
595 	for (;;) {
596 		t = *p;
597 		if (atomic_cmpset_64(p, t, t + v))
598 			break;
599 	}
600 }
601 
602 static __inline void
atomic_subtract_64(volatile uint64_t * p,uint64_t v)603 atomic_subtract_64(volatile uint64_t *p, uint64_t v)
604 {
605 	uint64_t t;
606 
607 	for (;;) {
608 		t = *p;
609 		if (atomic_cmpset_64(p, t, t - v))
610 			break;
611 	}
612 }
613 
614 #endif /* _KERNEL */
615 
616 #endif /* !__GNUCLIKE_ASM */
617 
618 ATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",  v);
619 ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
620 ATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",  v);
621 ATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",  v);
622 
623 ATOMIC_ASM(set,	     short, "orw %w1,%0",  "ir",  v);
624 ATOMIC_ASM(clear,    short, "andw %w1,%0", "ir", ~v);
625 ATOMIC_ASM(add,	     short, "addw %w1,%0", "ir",  v);
626 ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir",  v);
627 
628 ATOMIC_ASM(set,	     int,   "orl %1,%0",   "ir",  v);
629 ATOMIC_ASM(clear,    int,   "andl %1,%0",  "ir", ~v);
630 ATOMIC_ASM(add,	     int,   "addl %1,%0",  "ir",  v);
631 ATOMIC_ASM(subtract, int,   "subl %1,%0",  "ir",  v);
632 
633 ATOMIC_ASM(set,	     long,  "orl %1,%0",   "ir",  v);
634 ATOMIC_ASM(clear,    long,  "andl %1,%0",  "ir", ~v);
635 ATOMIC_ASM(add,	     long,  "addl %1,%0",  "ir",  v);
636 ATOMIC_ASM(subtract, long,  "subl %1,%0",  "ir",  v);
637 
638 #define	ATOMIC_LOADSTORE(TYPE)				\
639 	ATOMIC_LOAD(TYPE);				\
640 	ATOMIC_STORE(TYPE)
641 
642 ATOMIC_LOADSTORE(char);
643 ATOMIC_LOADSTORE(short);
644 ATOMIC_LOADSTORE(int);
645 ATOMIC_LOADSTORE(long);
646 
647 #undef ATOMIC_ASM
648 #undef ATOMIC_LOAD
649 #undef ATOMIC_STORE
650 #undef ATOMIC_LOADSTORE
651 
652 static __inline int
atomic_cmpset_long(volatile u_long * dst,u_long expect,u_long src)653 atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
654 {
655 
656 	return (atomic_cmpset_int((volatile u_int *)dst, (u_int)expect,
657 	    (u_int)src));
658 }
659 
660 static __inline int
atomic_fcmpset_long(volatile u_long * dst,u_long * expect,u_long src)661 atomic_fcmpset_long(volatile u_long *dst, u_long *expect, u_long src)
662 {
663 
664 	return (atomic_fcmpset_int((volatile u_int *)dst, (u_int *)expect,
665 	    (u_int)src));
666 }
667 
668 static __inline u_long
atomic_fetchadd_long(volatile u_long * p,u_long v)669 atomic_fetchadd_long(volatile u_long *p, u_long v)
670 {
671 
672 	return (atomic_fetchadd_int((volatile u_int *)p, (u_int)v));
673 }
674 
675 static __inline int
atomic_testandset_long(volatile u_long * p,u_int v)676 atomic_testandset_long(volatile u_long *p, u_int v)
677 {
678 
679 	return (atomic_testandset_int((volatile u_int *)p, v));
680 }
681 
682 static __inline int
atomic_testandclear_long(volatile u_long * p,u_int v)683 atomic_testandclear_long(volatile u_long *p, u_int v)
684 {
685 
686 	return (atomic_testandclear_int((volatile u_int *)p, v));
687 }
688 
689 /* Read the current value and store a new value in the destination. */
690 #ifdef __GNUCLIKE_ASM
691 
692 static __inline u_int
atomic_swap_int(volatile u_int * p,u_int v)693 atomic_swap_int(volatile u_int *p, u_int v)
694 {
695 
696 	__asm __volatile(
697 	"	xchgl	%1,%0 ;		"
698 	"# atomic_swap_int"
699 	: "+r" (v),			/* 0 */
700 	  "+m" (*p));			/* 1 */
701 	return (v);
702 }
703 
704 static __inline u_long
atomic_swap_long(volatile u_long * p,u_long v)705 atomic_swap_long(volatile u_long *p, u_long v)
706 {
707 
708 	return (atomic_swap_int((volatile u_int *)p, (u_int)v));
709 }
710 
711 #else /* !__GNUCLIKE_ASM */
712 
713 u_int	atomic_swap_int(volatile u_int *p, u_int v);
714 u_long	atomic_swap_long(volatile u_long *p, u_long v);
715 
716 #endif /* __GNUCLIKE_ASM */
717 
718 #define	atomic_set_acq_char		atomic_set_barr_char
719 #define	atomic_set_rel_char		atomic_set_barr_char
720 #define	atomic_clear_acq_char		atomic_clear_barr_char
721 #define	atomic_clear_rel_char		atomic_clear_barr_char
722 #define	atomic_add_acq_char		atomic_add_barr_char
723 #define	atomic_add_rel_char		atomic_add_barr_char
724 #define	atomic_subtract_acq_char	atomic_subtract_barr_char
725 #define	atomic_subtract_rel_char	atomic_subtract_barr_char
726 #define	atomic_cmpset_acq_char		atomic_cmpset_char
727 #define	atomic_cmpset_rel_char		atomic_cmpset_char
728 #define	atomic_fcmpset_acq_char		atomic_fcmpset_char
729 #define	atomic_fcmpset_rel_char		atomic_fcmpset_char
730 
731 #define	atomic_set_acq_short		atomic_set_barr_short
732 #define	atomic_set_rel_short		atomic_set_barr_short
733 #define	atomic_clear_acq_short		atomic_clear_barr_short
734 #define	atomic_clear_rel_short		atomic_clear_barr_short
735 #define	atomic_add_acq_short		atomic_add_barr_short
736 #define	atomic_add_rel_short		atomic_add_barr_short
737 #define	atomic_subtract_acq_short	atomic_subtract_barr_short
738 #define	atomic_subtract_rel_short	atomic_subtract_barr_short
739 #define	atomic_cmpset_acq_short		atomic_cmpset_short
740 #define	atomic_cmpset_rel_short		atomic_cmpset_short
741 #define	atomic_fcmpset_acq_short	atomic_fcmpset_short
742 #define	atomic_fcmpset_rel_short	atomic_fcmpset_short
743 
744 #define	atomic_set_acq_int		atomic_set_barr_int
745 #define	atomic_set_rel_int		atomic_set_barr_int
746 #define	atomic_clear_acq_int		atomic_clear_barr_int
747 #define	atomic_clear_rel_int		atomic_clear_barr_int
748 #define	atomic_add_acq_int		atomic_add_barr_int
749 #define	atomic_add_rel_int		atomic_add_barr_int
750 #define	atomic_subtract_acq_int		atomic_subtract_barr_int
751 #define	atomic_subtract_rel_int		atomic_subtract_barr_int
752 #define	atomic_cmpset_acq_int		atomic_cmpset_int
753 #define	atomic_cmpset_rel_int		atomic_cmpset_int
754 #define	atomic_fcmpset_acq_int		atomic_fcmpset_int
755 #define	atomic_fcmpset_rel_int		atomic_fcmpset_int
756 
757 #define	atomic_set_acq_long		atomic_set_barr_long
758 #define	atomic_set_rel_long		atomic_set_barr_long
759 #define	atomic_clear_acq_long		atomic_clear_barr_long
760 #define	atomic_clear_rel_long		atomic_clear_barr_long
761 #define	atomic_add_acq_long		atomic_add_barr_long
762 #define	atomic_add_rel_long		atomic_add_barr_long
763 #define	atomic_subtract_acq_long	atomic_subtract_barr_long
764 #define	atomic_subtract_rel_long	atomic_subtract_barr_long
765 #define	atomic_cmpset_acq_long		atomic_cmpset_long
766 #define	atomic_cmpset_rel_long		atomic_cmpset_long
767 #define	atomic_fcmpset_acq_long		atomic_fcmpset_long
768 #define	atomic_fcmpset_rel_long		atomic_fcmpset_long
769 
770 #define	atomic_readandclear_int(p)	atomic_swap_int(p, 0)
771 #define	atomic_readandclear_long(p)	atomic_swap_long(p, 0)
772 #define	atomic_testandset_acq_long	atomic_testandset_long
773 
774 /* Operations on 8-bit bytes. */
775 #define	atomic_set_8		atomic_set_char
776 #define	atomic_set_acq_8	atomic_set_acq_char
777 #define	atomic_set_rel_8	atomic_set_rel_char
778 #define	atomic_clear_8		atomic_clear_char
779 #define	atomic_clear_acq_8	atomic_clear_acq_char
780 #define	atomic_clear_rel_8	atomic_clear_rel_char
781 #define	atomic_add_8		atomic_add_char
782 #define	atomic_add_acq_8	atomic_add_acq_char
783 #define	atomic_add_rel_8	atomic_add_rel_char
784 #define	atomic_subtract_8	atomic_subtract_char
785 #define	atomic_subtract_acq_8	atomic_subtract_acq_char
786 #define	atomic_subtract_rel_8	atomic_subtract_rel_char
787 #define	atomic_load_acq_8	atomic_load_acq_char
788 #define	atomic_store_rel_8	atomic_store_rel_char
789 #define	atomic_cmpset_8		atomic_cmpset_char
790 #define	atomic_cmpset_acq_8	atomic_cmpset_acq_char
791 #define	atomic_cmpset_rel_8	atomic_cmpset_rel_char
792 #define	atomic_fcmpset_8	atomic_fcmpset_char
793 #define	atomic_fcmpset_acq_8	atomic_fcmpset_acq_char
794 #define	atomic_fcmpset_rel_8	atomic_fcmpset_rel_char
795 
796 /* Operations on 16-bit words. */
797 #define	atomic_set_16		atomic_set_short
798 #define	atomic_set_acq_16	atomic_set_acq_short
799 #define	atomic_set_rel_16	atomic_set_rel_short
800 #define	atomic_clear_16		atomic_clear_short
801 #define	atomic_clear_acq_16	atomic_clear_acq_short
802 #define	atomic_clear_rel_16	atomic_clear_rel_short
803 #define	atomic_add_16		atomic_add_short
804 #define	atomic_add_acq_16	atomic_add_acq_short
805 #define	atomic_add_rel_16	atomic_add_rel_short
806 #define	atomic_subtract_16	atomic_subtract_short
807 #define	atomic_subtract_acq_16	atomic_subtract_acq_short
808 #define	atomic_subtract_rel_16	atomic_subtract_rel_short
809 #define	atomic_load_acq_16	atomic_load_acq_short
810 #define	atomic_store_rel_16	atomic_store_rel_short
811 #define	atomic_cmpset_16	atomic_cmpset_short
812 #define	atomic_cmpset_acq_16	atomic_cmpset_acq_short
813 #define	atomic_cmpset_rel_16	atomic_cmpset_rel_short
814 #define	atomic_fcmpset_16	atomic_fcmpset_short
815 #define	atomic_fcmpset_acq_16	atomic_fcmpset_acq_short
816 #define	atomic_fcmpset_rel_16	atomic_fcmpset_rel_short
817 
818 /* Operations on 32-bit double words. */
819 #define	atomic_set_32		atomic_set_int
820 #define	atomic_set_acq_32	atomic_set_acq_int
821 #define	atomic_set_rel_32	atomic_set_rel_int
822 #define	atomic_clear_32		atomic_clear_int
823 #define	atomic_clear_acq_32	atomic_clear_acq_int
824 #define	atomic_clear_rel_32	atomic_clear_rel_int
825 #define	atomic_add_32		atomic_add_int
826 #define	atomic_add_acq_32	atomic_add_acq_int
827 #define	atomic_add_rel_32	atomic_add_rel_int
828 #define	atomic_subtract_32	atomic_subtract_int
829 #define	atomic_subtract_acq_32	atomic_subtract_acq_int
830 #define	atomic_subtract_rel_32	atomic_subtract_rel_int
831 #define	atomic_load_acq_32	atomic_load_acq_int
832 #define	atomic_store_rel_32	atomic_store_rel_int
833 #define	atomic_cmpset_32	atomic_cmpset_int
834 #define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
835 #define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
836 #define	atomic_fcmpset_32	atomic_fcmpset_int
837 #define	atomic_fcmpset_acq_32	atomic_fcmpset_acq_int
838 #define	atomic_fcmpset_rel_32	atomic_fcmpset_rel_int
839 #define	atomic_swap_32		atomic_swap_int
840 #define	atomic_readandclear_32	atomic_readandclear_int
841 #define	atomic_fetchadd_32	atomic_fetchadd_int
842 #define	atomic_testandset_32	atomic_testandset_int
843 #define	atomic_testandclear_32	atomic_testandclear_int
844 
845 #ifdef _KERNEL
846 /* Operations on 64-bit quad words. */
847 #define	atomic_cmpset_acq_64 atomic_cmpset_64
848 #define	atomic_cmpset_rel_64 atomic_cmpset_64
849 #define	atomic_fcmpset_acq_64 atomic_fcmpset_64
850 #define	atomic_fcmpset_rel_64 atomic_fcmpset_64
851 #define	atomic_fetchadd_acq_64	atomic_fetchadd_64
852 #define	atomic_fetchadd_rel_64	atomic_fetchadd_64
853 #define	atomic_add_acq_64 atomic_add_64
854 #define	atomic_add_rel_64 atomic_add_64
855 #define	atomic_subtract_acq_64 atomic_subtract_64
856 #define	atomic_subtract_rel_64 atomic_subtract_64
857 #define	atomic_load_64 atomic_load_acq_64
858 #define	atomic_store_64 atomic_store_rel_64
859 #endif
860 
861 /* Operations on pointers. */
862 #define	atomic_set_ptr(p, v) \
863 	atomic_set_int((volatile u_int *)(p), (u_int)(v))
864 #define	atomic_set_acq_ptr(p, v) \
865 	atomic_set_acq_int((volatile u_int *)(p), (u_int)(v))
866 #define	atomic_set_rel_ptr(p, v) \
867 	atomic_set_rel_int((volatile u_int *)(p), (u_int)(v))
868 #define	atomic_clear_ptr(p, v) \
869 	atomic_clear_int((volatile u_int *)(p), (u_int)(v))
870 #define	atomic_clear_acq_ptr(p, v) \
871 	atomic_clear_acq_int((volatile u_int *)(p), (u_int)(v))
872 #define	atomic_clear_rel_ptr(p, v) \
873 	atomic_clear_rel_int((volatile u_int *)(p), (u_int)(v))
874 #define	atomic_add_ptr(p, v) \
875 	atomic_add_int((volatile u_int *)(p), (u_int)(v))
876 #define	atomic_add_acq_ptr(p, v) \
877 	atomic_add_acq_int((volatile u_int *)(p), (u_int)(v))
878 #define	atomic_add_rel_ptr(p, v) \
879 	atomic_add_rel_int((volatile u_int *)(p), (u_int)(v))
880 #define	atomic_subtract_ptr(p, v) \
881 	atomic_subtract_int((volatile u_int *)(p), (u_int)(v))
882 #define	atomic_subtract_acq_ptr(p, v) \
883 	atomic_subtract_acq_int((volatile u_int *)(p), (u_int)(v))
884 #define	atomic_subtract_rel_ptr(p, v) \
885 	atomic_subtract_rel_int((volatile u_int *)(p), (u_int)(v))
886 #define	atomic_load_acq_ptr(p) \
887 	atomic_load_acq_int((const volatile u_int *)(p))
888 #define	atomic_store_rel_ptr(p, v) \
889 	atomic_store_rel_int((volatile u_int *)(p), (v))
890 #define	atomic_cmpset_ptr(dst, old, new) \
891 	atomic_cmpset_int((volatile u_int *)(dst), (u_int)(old), (u_int)(new))
892 #define	atomic_cmpset_acq_ptr(dst, old, new) \
893 	atomic_cmpset_acq_int((volatile u_int *)(dst), (u_int)(old), \
894 	    (u_int)(new))
895 #define	atomic_cmpset_rel_ptr(dst, old, new) \
896 	atomic_cmpset_rel_int((volatile u_int *)(dst), (u_int)(old), \
897 	    (u_int)(new))
898 #define	atomic_fcmpset_ptr(dst, old, new) \
899 	atomic_fcmpset_int((volatile u_int *)(dst), (u_int *)(old), (u_int)(new))
900 #define	atomic_fcmpset_acq_ptr(dst, old, new) \
901 	atomic_fcmpset_acq_int((volatile u_int *)(dst), (u_int *)(old), \
902 	    (u_int)(new))
903 #define	atomic_fcmpset_rel_ptr(dst, old, new) \
904 	atomic_fcmpset_rel_int((volatile u_int *)(dst), (u_int *)(old), \
905 	    (u_int)(new))
906 #define	atomic_swap_ptr(p, v) \
907 	atomic_swap_int((volatile u_int *)(p), (u_int)(v))
908 #define	atomic_readandclear_ptr(p) \
909 	atomic_readandclear_int((volatile u_int *)(p))
910 
911 #if defined(_KERNEL)
912 #define	mb()	__mbk()
913 #define	wmb()	__mbk()
914 #define	rmb()	__mbk()
915 #else
916 #define	mb()	__mbu()
917 #define	wmb()	__mbu()
918 #define	rmb()	__mbu()
919 #endif
920 
921 #endif /* !_MACHINE_ATOMIC_H_ */
922