xref: /dragonfly/sys/cpu/x86_64/include/atomic.h (revision b272101acc636ac635f83d03265ef6a44a3ba51a)
1 /*-
2  * Copyright (c) 1998 Doug Rabson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD: src/sys/i386/include/atomic.h,v 1.9.2.1 2000/07/07 00:38:47 obrien Exp $
27  */
28 #ifndef _CPU_ATOMIC_H_
29 #define _CPU_ATOMIC_H_
30 
31 #include <sys/types.h>
32 #include <sys/atomic_common.h>
33 
34 /*
35  * Various simple arithmetic on memory which is atomic in the presence
36  * of interrupts and multiple processors.
37  *
38  * atomic_set_char(P, V)      (*(u_char*)(P) |= (V))
39  * atomic_clear_char(P, V)    (*(u_char*)(P) &= ~(V))
40  * atomic_add_char(P, V)      (*(u_char*)(P) += (V))
41  * atomic_subtract_char(P, V) (*(u_char*)(P) -= (V))
42  *
43  * atomic_set_short(P, V)     (*(u_short*)(P) |= (V))
44  * atomic_clear_short(P, V)   (*(u_short*)(P) &= ~(V))
45  * atomic_add_short(P, V)     (*(u_short*)(P) += (V))
46  * atomic_subtract_short(P, V)          (*(u_short*)(P) -= (V))
47  *
48  * atomic_set_int(P, V)                 (*(u_int*)(P) |= (V))
49  * atomic_clear_int(P, V)     (*(u_int*)(P) &= ~(V))
50  * atomic_add_int(P, V)                 (*(u_int*)(P) += (V))
51  * atomic_subtract_int(P, V)  (*(u_int*)(P) -= (V))
52  *
53  * atomic_set_long(P, V)      (*(u_long*)(P) |= (V))
54  * atomic_clear_long(P, V)    (*(u_long*)(P) &= ~(V))
55  * atomic_add_long(P, V)      (*(u_long*)(P) += (V))
56  * atomic_subtract_long(P, V) (*(u_long*)(P) -= (V))
57  * atomic_readandclear_long(P)          (return (*(u_long*)(P)); *(u_long*)(P) = 0;)
58  * atomic_readandclear_int(P) (return (*(u_int*)(P)); *(u_int*)(P) = 0;)
59  */
60 
61 /*
62  * locked bus cycle
63  * lock elision (backwards compatible)
64  */
65 #define MPLOCKED    "lock ; "
66 #define XACQUIRE    "repne; " /* lock elision */
67 #define XRELEASE    "repe; "  /* lock elision */
68 
69 /*
70  * The assembly is volatilized to demark potential before-and-after side
71  * effects if an interrupt or SMP collision were to occur.  The primary
72  * atomic instructions are MP safe, the nonlocked instructions are
73  * local-interrupt-safe (so we don't depend on C 'X |= Y' generating an
74  * atomic instruction).
75  *
76  * +m - memory is read and written (=m - memory is only written)
77  * iq - integer constant or %ax/%bx/%cx/%dx (ir = int constant or any reg)
78  *        (Note: byte instructions only work on %ax,%bx,%cx, or %dx).  iq
79  *        is good enough for our needs so don't get fancy.
80  * r  - any register.
81  *
82  * NOTE: 64-bit immediate values are not supported for most x86-64
83  *         instructions so we have to use "r".
84  */
85 
86 /* egcs 1.1.2+ version */
87 #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V)                 \
88 static __inline void                                                  \
89 atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
90 {                                                                     \
91           __asm __volatile(MPLOCKED OP                      \
92                                : "+m" (*p)                            \
93                                : CONS (V));                           \
94 }                                                                     \
95 static __inline void                                                  \
96 atomic_##NAME##_##TYPE##_xacquire(volatile u_##TYPE *p, u_##TYPE v)\
97 {                                                                     \
98           __asm __volatile(XACQUIRE MPLOCKED OP             \
99                                : "+m" (*p)                            \
100                                : CONS (V));                           \
101 }                                                                     \
102 static __inline void                                                  \
103 atomic_##NAME##_##TYPE##_xrelease(volatile u_##TYPE *p, u_##TYPE v)\
104 {                                                                     \
105           __asm __volatile(XRELEASE MPLOCKED OP             \
106                                : "+m" (*p)                            \
107                                : CONS (V));                           \
108 }                                                                     \
109 static __inline void                                                  \
110 atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v)\
111 {                                                                     \
112           __asm __volatile(OP                               \
113                                : "+m" (*p)                            \
114                                : CONS (V));                           \
115 }
116 
117 /* egcs 1.1.2+ version */
118 ATOMIC_ASM(set,      char,  "orb %b1,%0",  "iq",   v)
119 ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq",  ~v)
120 ATOMIC_ASM(add,      char,  "addb %b1,%0", "iq",   v)
121 ATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",   v)
122 
123 ATOMIC_ASM(set,      short, "orw %w1,%0",  "iq",   v)
124 ATOMIC_ASM(clear,    short, "andw %w1,%0", "iq",  ~v)
125 ATOMIC_ASM(add,      short, "addw %w1,%0", "iq",   v)
126 ATOMIC_ASM(subtract, short, "subw %w1,%0", "iq",   v)
127 
128 ATOMIC_ASM(set,      int,   "orl %1,%0",  "iq",   v)
129 ATOMIC_ASM(clear,    int,   "andl %1,%0", "iq",  ~v)
130 ATOMIC_ASM(add,      int,   "addl %1,%0", "iq",   v)
131 ATOMIC_ASM(subtract, int,   "subl %1,%0", "iq",   v)
132 
133 ATOMIC_ASM(set,      long,  "orq %1,%0",  "r",   v)
134 ATOMIC_ASM(clear,    long,  "andq %1,%0", "r",  ~v)
135 ATOMIC_ASM(add,      long,  "addq %1,%0", "r",   v)
136 ATOMIC_ASM(subtract, long,  "subq %1,%0", "r",   v)
137 
138 static __inline u_long
atomic_readandclear_long(volatile u_long * addr)139 atomic_readandclear_long(volatile u_long *addr)
140 {
141           u_long res;
142 
143           res = 0;
144           __asm __volatile(
145           "         xchgq     %1,%0 ;             "
146           "# atomic_readandclear_long"
147           : "+r" (res),                           /* 0 */
148             "=m" (*addr)                          /* 1 */
149           : "m" (*addr));
150 
151           return (res);
152 }
153 
154 static __inline u_int
atomic_readandclear_int(volatile u_int * addr)155 atomic_readandclear_int(volatile u_int *addr)
156 {
157           u_int res;
158 
159           res = 0;
160           __asm __volatile(
161           "         xchgl     %1,%0 ;             "
162           "# atomic_readandclear_int"
163           : "+r" (res),                           /* 0 */
164             "=m" (*addr)                          /* 1 */
165           : "m" (*addr));
166 
167           return (res);
168 }
169 
170 /*
171  * atomic_poll_acquire_int(P) Returns non-zero on success, 0 if the lock
172  *                                      has already been acquired.
173  * atomic_poll_release_int(P)
174  *
175  * These are used for IPIQ interlocks between CPUs.
176  * Both the acquisition and release must be cache-synchronizing instructions.
177  */
178 
179 static __inline int
atomic_swap_int(volatile int * addr,int value)180 atomic_swap_int(volatile int *addr, int value)
181 {
182           __asm __volatile("xchgl %0, %1" :
183               "=r" (value), "=m" (*addr) : "0" (value) : "memory");
184           return (value);
185 }
186 
187 static __inline long
atomic_swap_long(volatile long * addr,long value)188 atomic_swap_long(volatile long *addr, long value)
189 {
190           __asm __volatile("xchgq %0, %1" :
191               "=r" (value), "=m" (*addr) : "0" (value) : "memory");
192           return (value);
193 }
194 
195 static __inline void *
atomic_swap_ptr(volatile void ** addr,void * value)196 atomic_swap_ptr(volatile void **addr, void *value)
197 {
198           __asm __volatile("xchgq %0, %1" :
199               "=r" (value), "=m" (*addr) : "0" (value) : "memory");
200           return (value);
201 }
202 
203 static __inline int
atomic_poll_acquire_int(volatile u_int * p)204 atomic_poll_acquire_int(volatile u_int *p)
205 {
206           u_int data;
207 
208           __asm __volatile(MPLOCKED "btsl $0,%0; setnc %%al; andl $255,%%eax" : "+m" (*p), "=a" (data));
209           return(data);
210 }
211 
212 static __inline void
atomic_poll_release_int(volatile u_int * p)213 atomic_poll_release_int(volatile u_int *p)
214 {
215           __asm __volatile(MPLOCKED "btrl $0,%0" : "+m" (*p));
216 }
217 
218 /*
219  * These functions operate on a 32 bit interrupt interlock which is defined
220  * as follows:
221  *
222  *        bit 0-29  interrupt handler wait counter
223  *        bit 30              interrupt handler disabled bit
224  *        bit 31              interrupt handler currently running bit (1 = run)
225  *
226  * atomic_intr_cond_test(P)   Determine if the interlock is in an
227  *                                      acquired state.  Returns 0 if it not
228  *                                      acquired, non-zero if it is. (not MPLOCKed)
229  *
230  * atomic_intr_cond_try(P)    Attempt to set bit 31 to acquire the
231  *                                      interlock.  If we are unable to set bit 31
232  *                                      we return 1, otherwise we return 0.
233  *
234  * atomic_intr_cond_enter(P, func, arg)
235  *                                      Attempt to set bit 31 to acquire the
236  *                                      interlock.  If we are unable to set bit 31,
237  *                                      the wait is incremented counter and func(arg)
238  *                                      is called in a loop until we are able to set
239  *                                      bit 31.  Once we set bit 31, wait counter
240  *                                      is decremented.
241  *
242  * atomic_intr_cond_exit(P, func, arg)
243  *                                      Clear bit 31.  If the wait counter is still
244  *                                      non-zero call func(arg) once.
245  *
246  * atomic_intr_handler_disable(P)
247  *                                      Set bit 30, indicating that the interrupt
248  *                                      handler has been disabled.  Must be called
249  *                                      after the hardware is disabled.
250  *
251  *                                      Returns bit 31 indicating whether a serialized
252  *                                      accessor is active (typically the interrupt
253  *                                      handler is running).  0 == not active,
254  *                                      non-zero == active.
255  *
256  * atomic_intr_handler_enable(P)
257  *                                      Clear bit 30, indicating that the interrupt
258  *                                      handler has been enabled.  Must be called
259  *                                      before the hardware is actually enabled.
260  *
261  * atomic_intr_handler_is_enabled(P)
262  *                                      Returns bit 30, 0 indicates that the handler
263  *                                      is enabled, non-zero indicates that it is
264  *                                      disabled.  The request counter portion of
265  *                                      the field is ignored. (not MPLOCKed)
266  *
267  * atomic_intr_cond_inc(P)    Increment wait counter by 1.
268  * atomic_intr_cond_dec(P)    Decrement wait counter by 1.
269  */
270 
271 static __inline void
atomic_intr_init(__atomic_intr_t * p)272 atomic_intr_init(__atomic_intr_t *p)
273 {
274           *p = 0;
275 }
276 
277 static __inline int
atomic_intr_handler_disable(__atomic_intr_t * p)278 atomic_intr_handler_disable(__atomic_intr_t *p)
279 {
280           int data;
281 
282           __asm __volatile(MPLOCKED "orl $0x40000000,%1; movl %1,%%eax; " \
283                                           "andl $0x80000000,%%eax" \
284                                           : "=a"(data) , "+m"(*p));
285           return(data);
286 }
287 
288 static __inline void
atomic_intr_handler_enable(__atomic_intr_t * p)289 atomic_intr_handler_enable(__atomic_intr_t *p)
290 {
291           __asm __volatile(MPLOCKED "andl $0xBFFFFFFF,%0" : "+m" (*p));
292 }
293 
294 static __inline int
atomic_intr_handler_is_enabled(__atomic_intr_t * p)295 atomic_intr_handler_is_enabled(__atomic_intr_t *p)
296 {
297           int data;
298 
299           __asm __volatile("movl %1,%%eax; andl $0x40000000,%%eax" \
300                                : "=a"(data) : "m"(*p));
301           return(data);
302 }
303 
304 static __inline void
atomic_intr_cond_inc(__atomic_intr_t * p)305 atomic_intr_cond_inc(__atomic_intr_t *p)
306 {
307           __asm __volatile(MPLOCKED "incl %0" : "+m" (*p));
308 }
309 
310 static __inline void
atomic_intr_cond_dec(__atomic_intr_t * p)311 atomic_intr_cond_dec(__atomic_intr_t *p)
312 {
313           __asm __volatile(MPLOCKED "decl %0" : "+m" (*p));
314 }
315 
316 static __inline void
atomic_intr_cond_enter(__atomic_intr_t * p,void (* func)(void *),void * arg)317 atomic_intr_cond_enter(__atomic_intr_t *p, void (*func)(void *), void *arg)
318 {
319           __asm __volatile(MPLOCKED "btsl $31,%0; jnc 3f; " \
320                                MPLOCKED "incl %0; " \
321                                "1: ;" \
322                                MPLOCKED "btsl $31,%0; jnc 2f; " \
323                                "movq %2,%%rdi; call *%1; " \
324                                "jmp 1b; " \
325                                "2: ;" \
326                                MPLOCKED "decl %0; " \
327                                "3: ;" \
328                                : "+m" (*p) \
329                                : "r"(func), "m"(arg) \
330                                : "ax", "cx", "dx", "rsi", "rdi", "r8", "r9", "r10", "r11");
331                     /* YYY the function call may clobber even more registers? */
332 }
333 
334 /*
335  * Attempt to enter the interrupt condition variable.  Returns zero on
336  * success, 1 on failure.
337  */
338 static __inline int
atomic_intr_cond_try(__atomic_intr_t * p)339 atomic_intr_cond_try(__atomic_intr_t *p)
340 {
341           int ret;
342 
343           __asm __volatile("subl %%eax,%%eax; "                       \
344                                MPLOCKED "btsl $31,%0; jnc 2f; "       \
345                                "movl $1,%%eax;"                       \
346                                "2: ;"
347                                : "+m" (*p), "=&a"(ret)
348                          : : "cx", "dx");
349           return (ret);
350 }
351 
352 
353 static __inline int
atomic_intr_cond_test(__atomic_intr_t * p)354 atomic_intr_cond_test(__atomic_intr_t *p)
355 {
356           return((int)(*p & 0x80000000));
357 }
358 
359 static __inline void
atomic_intr_cond_exit(__atomic_intr_t * p,void (* func)(void *),void * arg)360 atomic_intr_cond_exit(__atomic_intr_t *p, void (*func)(void *), void *arg)
361 {
362           __asm __volatile(MPLOCKED "btrl $31,%0; " \
363                                "testl $0x3FFFFFFF,%0; jz 1f; " \
364                                "movq %2,%%rdi; call *%1; " \
365                                "1: ;" \
366                                : "+m" (*p) \
367                                : "r"(func), "m"(arg) \
368                                : "ax", "cx", "dx", "rsi", "rdi", "r8", "r9", "r10", "r11");
369                     /* YYY the function call may clobber even more registers? */
370 }
371 
372 /*
373  * Atomic compare and set
374  *
375  * if (*_dst == _old) *_dst = _new (all 32 bit words)
376  *
377  * Returns 0 on failure, non-zero on success.  The inline is designed to
378  * allow the compiler to optimize the common case where the caller calls
379  * these functions from inside a conditional.
380  */
381 
382 static __inline int
atomic_cmpxchg_int(volatile u_int * _dst,u_int _old,u_int _new)383 atomic_cmpxchg_int(volatile u_int *_dst, u_int _old, u_int _new)
384 {
385           u_int res = _old;
386 
387           __asm __volatile(MPLOCKED "cmpxchgl %2,%1; " \
388                                : "+a" (res), "=m" (*_dst) \
389                                : "r" (_new), "m" (*_dst) \
390                                : "memory");
391           return (res);
392 }
393 
394 static __inline int
atomic_cmpxchg_long_test(volatile u_long * _dst,u_long _old,u_long _new)395 atomic_cmpxchg_long_test(volatile u_long *_dst, u_long _old, u_long _new)
396 {
397           u_int res = _old;
398 
399           __asm __volatile(MPLOCKED "cmpxchgq %2,%1; "
400                                           " setz %%al;"
401                                           " movsbq %%al,%%rax" \
402                                : "+a" (res), "=m" (*_dst) \
403                                : "r" (_new), "m" (*_dst) \
404                                : "memory");
405           return (res);
406 }
407 
408 static __inline int
atomic_cmpset_short(volatile u_short * _dst,u_short _old,u_short _new)409 atomic_cmpset_short(volatile u_short *_dst, u_short _old, u_short _new)
410 {
411           u_short res = _old;
412 
413           __asm __volatile(MPLOCKED "cmpxchgw %w2,%1; " \
414                                : "+a" (res), "=m" (*_dst) \
415                                : "r" (_new), "m" (*_dst) \
416                                : "memory");
417           return (res == _old);
418 }
419 
420 static __inline int
atomic_fcmpset_char(volatile u_char * _dst,u_char * _old,u_char _new)421 atomic_fcmpset_char(volatile u_char *_dst, u_char *_old, u_char _new)
422 {
423           u_char res = *_old;
424 
425           __asm __volatile(MPLOCKED "cmpxchgb %2,%0; " \
426                                : "+m" (*_dst),              /* 0 */
427                                  "+a" (*_old)                         /* 1 */
428                                : "r" (_new)                           /* 2 */
429                                : "memory", "cc");
430           return (res == *_old);
431 }
432 
433 static __inline int
atomic_fcmpset_short(volatile u_short * _dst,u_short * _old,u_short _new)434 atomic_fcmpset_short(volatile u_short *_dst, u_short *_old, u_short _new)
435 {
436           u_short res = *_old;
437 
438           __asm __volatile(MPLOCKED "cmpxchgw %2,%0; " \
439                                : "+m" (*_dst),              /* 0 */
440                                  "+a" (*_old)                         /* 1 */
441                                : "r" (_new)                           /* 2 */
442                                : "memory", "cc");
443           return (res == *_old);
444 }
445 
446 static __inline int
atomic_cmpset_int(volatile u_int * _dst,u_int _old,u_int _new)447 atomic_cmpset_int(volatile u_int *_dst, u_int _old, u_int _new)
448 {
449           u_int res = _old;
450 
451           __asm __volatile(MPLOCKED "cmpxchgl %2,%1; " \
452                                : "+a" (res), "=m" (*_dst) \
453                                : "r" (_new), "m" (*_dst) \
454                                : "memory");
455           return (res == _old);
456 }
457 
458 static __inline int
atomic_fcmpset_int(volatile u_int * _dst,u_int * _old,u_int _new)459 atomic_fcmpset_int(volatile u_int *_dst, u_int *_old, u_int _new)
460 {
461           u_int res = *_old;
462 
463           __asm __volatile(MPLOCKED "cmpxchgl %2,%0; " \
464                                : "+m" (*_dst),              /* 0 */
465                                  "+a" (*_old)                         /* 1 */
466                                : "r" (_new)                           /* 2 */
467                                : "memory", "cc");
468           return (res == *_old);
469 }
470 
471 static __inline int
atomic_cmpset_int_xacquire(volatile u_int * _dst,u_int _old,u_int _new)472 atomic_cmpset_int_xacquire(volatile u_int *_dst, u_int _old, u_int _new)
473 {
474           u_int res = _old;
475 
476           __asm __volatile(XACQUIRE MPLOCKED "cmpxchgl %2,%1; " \
477                                : "+a" (res), "=m" (*_dst) \
478                                : "r" (_new), "m" (*_dst) \
479                                : "memory");
480           return (res == _old);
481 }
482 
483 static __inline int
atomic_cmpset_int_xrelease(volatile u_int * _dst,u_int _old,u_int _new)484 atomic_cmpset_int_xrelease(volatile u_int *_dst, u_int _old, u_int _new)
485 {
486           u_int res = _old;
487 
488           __asm __volatile(XRELEASE MPLOCKED "cmpxchgl %2,%1; " \
489                                : "+a" (res), "=m" (*_dst) \
490                                : "r" (_new), "m" (*_dst) \
491                                : "memory");
492           return (res == _old);
493 }
494 
495 static __inline int
atomic_cmpset_long(volatile u_long * _dst,u_long _old,u_long _new)496 atomic_cmpset_long(volatile u_long *_dst, u_long _old, u_long _new)
497 {
498           u_long res = _old;
499 
500           __asm __volatile(MPLOCKED "cmpxchgq %2,%1; " \
501                                : "+a" (res), "=m" (*_dst) \
502                                : "r" (_new), "m" (*_dst) \
503                                : "memory");
504           return (res == _old);
505 }
506 
507 static __inline int
atomic_fcmpset_long(volatile u_long * _dst,u_long * _old,u_long _new)508 atomic_fcmpset_long(volatile u_long *_dst, u_long *_old, u_long _new)
509 {
510           u_long res = *_old;
511 
512           __asm __volatile(MPLOCKED "cmpxchgq %2,%0; " \
513                                : "+m" (*_dst),              /* 0 */
514                                  "+a" (*_old)                         /* 1 */
515                                : "r" (_new)                           /* 2 */
516                                : "memory", "cc");
517           return (res == *_old);
518 }
519 
520 static __inline int
atomic_cmpset_long_xacquire(volatile u_long * _dst,u_long _old,u_long _new)521 atomic_cmpset_long_xacquire(volatile u_long *_dst, u_long _old, u_long _new)
522 {
523           u_long res = _old;
524 
525           __asm __volatile(XACQUIRE MPLOCKED "cmpxchgq %2,%1; " \
526                                : "+a" (res), "=m" (*_dst) \
527                                : "r" (_new), "m" (*_dst) \
528                                : "memory");
529           return (res == _old);
530 }
531 
532 static __inline int
atomic_cmpset_long_xrelease(volatile u_long * _dst,u_long _old,u_long _new)533 atomic_cmpset_long_xrelease(volatile u_long *_dst, u_long _old, u_long _new)
534 {
535           u_long res = _old;
536 
537           __asm __volatile(XRELEASE MPLOCKED "cmpxchgq %2,%1; " \
538                                : "+a" (res), "=m" (*_dst) \
539                                : "r" (_new), "m" (*_dst) \
540                                : "memory");
541           return (res == _old);
542 }
543 
544 static inline void *
atomic_cas_ptr(volatile void * p,void * e,void * n)545 atomic_cas_ptr(volatile void *p, void *e, void *n)
546 {
547           __asm volatile(MPLOCKED " cmpxchgq %2, %1"
548               : "=a" (n), "=m" (*(volatile unsigned long *)p)
549               : "r" (n), "a" (e), "m" (*(volatile unsigned long *)p));
550 
551           return (n);
552 }
553 
554 /*
555  * Atomically add the value of v to the integer pointed to by p and return
556  * the previous value of *p.
557  */
558 static __inline u_int
atomic_fetchadd_int(volatile u_int * _p,u_int _v)559 atomic_fetchadd_int(volatile u_int *_p, u_int _v)
560 {
561           __asm __volatile(MPLOCKED "xaddl %0,%1; " \
562                                : "+r" (_v), "=m" (*_p)      \
563                                : "m" (*_p)                  \
564                                : "memory");
565           return (_v);
566 }
567 
568 static __inline u_int
atomic_fetchadd_int_xacquire(volatile u_int * _p,u_int _v)569 atomic_fetchadd_int_xacquire(volatile u_int *_p, u_int _v)
570 {
571           __asm __volatile(XACQUIRE MPLOCKED "xaddl %0,%1; " \
572                                : "+r" (_v), "=m" (*_p)      \
573                                : "m" (*_p)                  \
574                                : "memory");
575           return (_v);
576 }
577 
578 static __inline u_int
atomic_fetchadd_int_xrelease(volatile u_int * _p,u_int _v)579 atomic_fetchadd_int_xrelease(volatile u_int *_p, u_int _v)
580 {
581           __asm __volatile(XRELEASE MPLOCKED "xaddl %0,%1; " \
582                                : "+r" (_v), "=m" (*_p)      \
583                                : "m" (*_p)                  \
584                                : "memory");
585           return (_v);
586 }
587 
588 static __inline u_long
atomic_fetchadd_long(volatile u_long * _p,u_long _v)589 atomic_fetchadd_long(volatile u_long *_p, u_long _v)
590 {
591           __asm __volatile(MPLOCKED "xaddq %0,%1; " \
592                                : "+r" (_v), "=m" (*_p)      \
593                                : "m" (*_p)                  \
594                                : "memory");
595           return (_v);
596 }
597 
598 static __inline u_long
atomic_fetchadd_long_xacquire(volatile u_long * _p,u_long _v)599 atomic_fetchadd_long_xacquire(volatile u_long *_p, u_long _v)
600 {
601           __asm __volatile(XACQUIRE MPLOCKED "xaddq %0,%1; " \
602                                : "+r" (_v), "=m" (*_p)      \
603                                : "m" (*_p)                  \
604                                : "memory");
605           return (_v);
606 }
607 
608 static __inline u_long
atomic_fetchadd_long_xrelease(volatile u_long * _p,u_long _v)609 atomic_fetchadd_long_xrelease(volatile u_long *_p, u_long _v)
610 {
611           __asm __volatile(XRELEASE MPLOCKED "xaddq %0,%1; " \
612                                : "+r" (_v), "=m" (*_p)      \
613                                : "m" (*_p)                  \
614                                : "memory");
615           return (_v);
616 }
617 
618 static __inline int
atomic_testandset_int(volatile u_int * p,u_int v)619 atomic_testandset_int(volatile u_int *p, u_int v)
620 {
621           u_char res;
622 
623           __asm __volatile(
624           "         " MPLOCKED "                  "
625           "         btsl      %2,%1 ;             "
626           "         setc      %0 ;                "
627           "# atomic_testandset_int"
628           : "=q" (res),                           /* 0 */
629             "+m" (*p)                             /* 1 */
630           : "Ir" (v & 0x1f)             /* 2 */
631           : "cc");
632           return (res);
633 }
634 
635 static __inline int
atomic_testandset_long(volatile u_long * p,u_long v)636 atomic_testandset_long(volatile u_long *p, u_long v)
637 {
638           u_char res;
639 
640           __asm __volatile(
641           "         " MPLOCKED "                  "
642           "         btsq      %2,%1 ;             "
643           "         setc      %0 ;                "
644           "# atomic_testandset_long"
645           : "=q" (res),                           /* 0 */
646             "+m" (*p)                             /* 1 */
647           : "Ir" (v & 0x3f)             /* 2 */
648           : "cc");
649           return (res);
650 }
651 
652 static __inline int
atomic_testandclear_int(volatile u_int * p,u_int v)653 atomic_testandclear_int(volatile u_int *p, u_int v)
654 {
655           u_char res;
656 
657           __asm __volatile(
658           "         " MPLOCKED "                  "
659           "         btrl      %2,%1 ;             "
660           "         setc      %0 ;                "
661           "# atomic_testandclear_int"
662           : "=q" (res),                           /* 0 */
663             "+m" (*p)                             /* 1 */
664           : "Ir" (v & 0x1f)             /* 2 */
665           : "cc");
666           return (res);
667 }
668 
669 static __inline int
atomic_testandclear_long(volatile u_long * p,u_long v)670 atomic_testandclear_long(volatile u_long *p, u_long v)
671 {
672           u_char res;
673 
674           __asm __volatile(
675           "         " MPLOCKED "                  "
676           "         btrq      %2,%1 ;             "
677           "         setc      %0 ;                "
678           "# atomic_testandclear_long"
679           : "=q" (res),                           /* 0 */
680             "+m" (*p)                             /* 1 */
681           : "Ir" (v & 0x3f)             /* 2 */
682           : "cc");
683           return (res);
684 }
685 
686 #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP)                   \
687 static __inline u_##TYPE                                    \
688 atomic_load_acq_##TYPE(volatile u_##TYPE *p)                \
689 {                                                                     \
690           u_##TYPE res; /* accumulator can be anything */   \
691                                                                       \
692           __asm __volatile(MPLOCKED LOP                     \
693           : "=a" (res),                           /* 0 */             \
694             "=m" (*p)                             /* 1 */             \
695           : "m" (*p)                              /* 2 */             \
696           : "memory");                                                \
697                                                                       \
698           return (res);                                               \
699 }                                                                     \
700                                                                       \
701 /*                                                                    \
702  * The XCHG instruction asserts LOCK automagically.         \
703  */                                                                   \
704 static __inline void                                                  \
705 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
706 {                                                                     \
707           __asm __volatile(SOP                                        \
708           : "=m" (*p),                            /* 0 */             \
709             "+r" (v)                              /* 1 */             \
710           : "m" (*p));                            /* 2 */             \
711 }                                                                     \
712 struct __hack
713 
714 ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0");
715 ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0");
716 ATOMIC_STORE_LOAD(int,  "cmpxchgl %0,%1",  "xchgl %1,%0");
717 ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1",  "xchgq %1,%0");
718 
719 #undef ATOMIC_ASM
720 #undef ATOMIC_STORE_LOAD
721 
722 /* Acquire and release variants are identical to the normal ones. */
723 #define   atomic_set_acq_char           atomic_set_char
724 #define   atomic_set_rel_char           atomic_set_char
725 #define   atomic_clear_acq_char                   atomic_clear_char
726 #define   atomic_clear_rel_char                   atomic_clear_char
727 #define   atomic_add_acq_char           atomic_add_char
728 #define   atomic_add_rel_char           atomic_add_char
729 #define   atomic_subtract_acq_char      atomic_subtract_char
730 #define   atomic_subtract_rel_char      atomic_subtract_char
731 
732 #define   atomic_set_acq_short                    atomic_set_short
733 #define   atomic_set_rel_short                    atomic_set_short
734 #define   atomic_clear_acq_short                  atomic_clear_short
735 #define   atomic_clear_rel_short                  atomic_clear_short
736 #define   atomic_add_acq_short                    atomic_add_short
737 #define   atomic_add_rel_short                    atomic_add_short
738 #define   atomic_subtract_acq_short     atomic_subtract_short
739 #define   atomic_subtract_rel_short     atomic_subtract_short
740 
741 #define   atomic_set_acq_int            atomic_set_int
742 #define   atomic_set_rel_int            atomic_set_int
743 #define   atomic_clear_acq_int                    atomic_clear_int
744 #define   atomic_clear_rel_int                    atomic_clear_int
745 #define   atomic_add_acq_int            atomic_add_int
746 #define   atomic_add_rel_int            atomic_add_int
747 #define   atomic_subtract_acq_int                 atomic_subtract_int
748 #define   atomic_subtract_rel_int                 atomic_subtract_int
749 #define   atomic_cmpset_acq_int                   atomic_cmpset_int
750 #define   atomic_cmpset_rel_int                   atomic_cmpset_int
751 
752 #define   atomic_set_acq_long           atomic_set_long
753 #define   atomic_set_rel_long           atomic_set_long
754 #define   atomic_clear_acq_long                   atomic_clear_long
755 #define   atomic_clear_rel_long                   atomic_clear_long
756 #define   atomic_add_acq_long           atomic_add_long
757 #define   atomic_add_rel_long           atomic_add_long
758 #define   atomic_subtract_acq_long      atomic_subtract_long
759 #define   atomic_subtract_rel_long      atomic_subtract_long
760 #define   atomic_cmpset_acq_long                  atomic_cmpset_long
761 #define   atomic_cmpset_rel_long                  atomic_cmpset_long
762 
763 /* cpumask_t is 64-bits on x86-64 */
764 #define   atomic_set_cpumask            atomic_set_long
765 #define   atomic_clear_cpumask                    atomic_clear_long
766 #define   atomic_cmpset_cpumask                   atomic_cmpset_long
767 #define   atomic_store_rel_cpumask      atomic_store_rel_long
768 #define   atomic_load_acq_cpumask                 atomic_load_acq_long
769 
770 /* Operations on 8-bit bytes. */
771 #define   atomic_set_8                  atomic_set_char
772 #define   atomic_set_acq_8    atomic_set_acq_char
773 #define   atomic_set_rel_8    atomic_set_rel_char
774 #define   atomic_clear_8                atomic_clear_char
775 #define   atomic_clear_acq_8  atomic_clear_acq_char
776 #define   atomic_clear_rel_8  atomic_clear_rel_char
777 #define   atomic_add_8                  atomic_add_char
778 #define   atomic_add_acq_8    atomic_add_acq_char
779 #define   atomic_add_rel_8    atomic_add_rel_char
780 #define   atomic_subtract_8   atomic_subtract_char
781 #define   atomic_subtract_acq_8         atomic_subtract_acq_char
782 #define   atomic_subtract_rel_8         atomic_subtract_rel_char
783 #define   atomic_load_acq_8   atomic_load_acq_char
784 #define   atomic_store_rel_8  atomic_store_rel_char
785 #define   atomic_fcmpset_8    atomic_fcmpset_char
786 
787 /* Operations on 16-bit words. */
788 #define   atomic_set_16                 atomic_set_short
789 #define   atomic_set_acq_16   atomic_set_acq_short
790 #define   atomic_set_rel_16   atomic_set_rel_short
791 #define   atomic_clear_16               atomic_clear_short
792 #define   atomic_clear_acq_16 atomic_clear_acq_short
793 #define   atomic_clear_rel_16 atomic_clear_rel_short
794 #define   atomic_add_16                 atomic_add_short
795 #define   atomic_add_acq_16   atomic_add_acq_short
796 #define   atomic_add_rel_16   atomic_add_rel_short
797 #define   atomic_subtract_16  atomic_subtract_short
798 #define   atomic_subtract_acq_16        atomic_subtract_acq_short
799 #define   atomic_subtract_rel_16        atomic_subtract_rel_short
800 #define   atomic_load_acq_16  atomic_load_acq_short
801 #define   atomic_store_rel_16 atomic_store_rel_short
802 #define   atomic_fcmpset_16   atomic_fcmpset_short
803 
804 /* Operations on 32-bit double words. */
805 #define   atomic_set_32                 atomic_set_int
806 #define   atomic_set_acq_32   atomic_set_acq_int
807 #define   atomic_set_rel_32   atomic_set_rel_int
808 #define   atomic_clear_32               atomic_clear_int
809 #define   atomic_clear_acq_32 atomic_clear_acq_int
810 #define   atomic_clear_rel_32 atomic_clear_rel_int
811 #define   atomic_add_32                 atomic_add_int
812 #define   atomic_add_acq_32   atomic_add_acq_int
813 #define   atomic_add_rel_32   atomic_add_rel_int
814 #define   atomic_subtract_32  atomic_subtract_int
815 #define   atomic_subtract_acq_32        atomic_subtract_acq_int
816 #define   atomic_subtract_rel_32        atomic_subtract_rel_int
817 #define   atomic_load_acq_32  atomic_load_acq_int
818 #define   atomic_store_rel_32 atomic_store_rel_int
819 #define   atomic_cmpset_32    atomic_cmpset_int
820 #define   atomic_fcmpset_32   atomic_fcmpset_int
821 #define   atomic_cmpset_acq_32          atomic_cmpset_acq_int
822 #define   atomic_cmpset_rel_32          atomic_cmpset_rel_int
823 #define   atomic_readandclear_32        atomic_readandclear_int
824 #define   atomic_fetchadd_32  atomic_fetchadd_int
825 
826 /* Operations on 64-bit quad words. */
827 #define   atomic_load_acq_64  atomic_load_acq_long
828 #define   atomic_store_rel_64 atomic_store_rel_long
829 #define   atomic_swap_64                atomic_swap_long
830 #define   atomic_fetchadd_64  atomic_fetchadd_long
831 #define   atomic_add_64                 atomic_add_long
832 #define   atomic_cmpset_64    atomic_cmpset_long
833 #define   atomic_fcmpset_64   atomic_fcmpset_long
834 #define   atomic_set_64                 atomic_set_long
835 #define   atomic_clear_64               atomic_clear_long
836 
837 /* Operations on pointers. */
838 #define atomic_set_ptr(p, v) \
839           atomic_set_long((volatile u_long *)(p), (u_long)(v))
840 #define atomic_set_acq_ptr(p, v) \
841           atomic_set_acq_long((volatile u_long *)(p), (u_long)(v))
842 #define atomic_set_rel_ptr(p, v) \
843           atomic_set_rel_long((volatile u_long *)(p), (u_long)(v))
844 #define atomic_clear_ptr(p, v) \
845           atomic_clear_long((volatile u_long *)(p), (u_long)(v))
846 #define atomic_clear_acq_ptr(p, v) \
847           atomic_clear_acq_long((volatile u_long *)(p), (u_long)(v))
848 #define atomic_clear_rel_ptr(p, v) \
849           atomic_clear_rel_long((volatile u_long *)(p), (u_long)(v))
850 #define atomic_add_ptr(p, v) \
851           atomic_add_long((volatile u_long *)(p), (u_long)(v))
852 #define atomic_add_acq_ptr(p, v) \
853           atomic_add_acq_long((volatile u_long *)(p), (u_long)(v))
854 #define atomic_add_rel_ptr(p, v) \
855           atomic_add_rel_long((volatile u_long *)(p), (u_long)(v))
856 #define atomic_subtract_ptr(p, v) \
857           atomic_subtract_long((volatile u_long *)(p), (u_long)(v))
858 #define atomic_subtract_acq_ptr(p, v) \
859           atomic_subtract_acq_long((volatile u_long *)(p), (u_long)(v))
860 #define atomic_subtract_rel_ptr(p, v) \
861           atomic_subtract_rel_long((volatile u_long *)(p), (u_long)(v))
862 #define atomic_load_acq_ptr(p) \
863           atomic_load_acq_long((volatile u_long *)(p))
864 #define atomic_store_rel_ptr(p, v) \
865           atomic_store_rel_long((volatile u_long *)(p), (v))
866 #define atomic_cmpset_ptr(dst, old, new)                                        \
867           atomic_cmpset_long((volatile u_long *)(dst), (u_long)(old), \
868                                         (u_long)(new))
869 #define atomic_fcmpset_ptr(dst, old, new)                                       \
870           atomic_fcmpset_long((volatile u_long *)(dst), (u_long *)(old),        \
871                                         (u_long)(new))
872 #define atomic_cmpset_acq_ptr(dst, old, new)                                    \
873           atomic_cmpset_acq_long((volatile u_long *)(dst), (u_long)(old), \
874                                         (u_long)(new))
875 #define atomic_cmpset_rel_ptr(dst, old, new)                                    \
876           atomic_cmpset_rel_long((volatile u_long *)(dst), (u_long)(old), \
877                                         (u_long)(new))
878 #define atomic_readandclear_ptr(p)                                              \
879           atomic_readandclear_long((volatile u_long *)(p))
880 
881 #endif /* ! _CPU_ATOMIC_H_ */
882