xref: /NextBSD/lib/libdispatch/src/once.c (revision 33da5adc555b3bc29986eeadca03829e4ad06b1e)
1 /*
2  * Copyright (c) 2008-2013 Apple Inc. All rights reserved.
3  *
4  * @APPLE_APACHE_LICENSE_HEADER_START@
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * @APPLE_APACHE_LICENSE_HEADER_END@
19  */
20 
21 #include "internal.h"
22 
23 #undef dispatch_once
24 #undef dispatch_once_f
25 
26 
27 typedef struct _dispatch_once_waiter_s {
28 	volatile struct _dispatch_once_waiter_s *volatile dow_next;
29 	_dispatch_thread_semaphore_t dow_sema;
30 	mach_port_t dow_thread;
31 } *_dispatch_once_waiter_t;
32 
33 #define DISPATCH_ONCE_DONE ((_dispatch_once_waiter_t)~0l)
34 
35 #ifdef __BLOCKS__
36 void
dispatch_once(dispatch_once_t * val,dispatch_block_t block)37 dispatch_once(dispatch_once_t *val, dispatch_block_t block)
38 {
39 	dispatch_once_f(val, block, _dispatch_Block_invoke(block));
40 }
41 #endif
42 
43 DISPATCH_NOINLINE
44 void
dispatch_once_f(dispatch_once_t * val,void * ctxt,dispatch_function_t func)45 dispatch_once_f(dispatch_once_t *val, void *ctxt, dispatch_function_t func)
46 {
47 	_dispatch_once_waiter_t volatile *vval = (_dispatch_once_waiter_t*)val;
48 	struct _dispatch_once_waiter_s dow = { NULL, 0, MACH_PORT_NULL };
49 	_dispatch_once_waiter_t tail = &dow, next, tmp;
50 	_dispatch_thread_semaphore_t sema;
51 
52 	if (dispatch_atomic_cmpxchg(vval, NULL, tail, acquire)) {
53 		dow.dow_thread = _dispatch_thread_port();
54 		_dispatch_client_callout(ctxt, func);
55 
56 		// The next barrier must be long and strong.
57 		//
58 		// The scenario: SMP systems with weakly ordered memory models
59 		// and aggressive out-of-order instruction execution.
60 		//
61 		// The problem:
62 		//
63 		// The dispatch_once*() wrapper macro causes the callee's
64 		// instruction stream to look like this (pseudo-RISC):
65 		//
66 		//      load r5, pred-addr
67 		//      cmpi r5, -1
68 		//      beq  1f
69 		//      call dispatch_once*()
70 		//      1f:
71 		//      load r6, data-addr
72 		//
73 		// May be re-ordered like so:
74 		//
75 		//      load r6, data-addr
76 		//      load r5, pred-addr
77 		//      cmpi r5, -1
78 		//      beq  1f
79 		//      call dispatch_once*()
80 		//      1f:
81 		//
82 		// Normally, a barrier on the read side is used to workaround
83 		// the weakly ordered memory model. But barriers are expensive
84 		// and we only need to synchronize once! After func(ctxt)
85 		// completes, the predicate will be marked as "done" and the
86 		// branch predictor will correctly skip the call to
87 		// dispatch_once*().
88 		//
89 		// A far faster alternative solution: Defeat the speculative
90 		// read-ahead of peer CPUs.
91 		//
92 		// Modern architectures will throw away speculative results
93 		// once a branch mis-prediction occurs. Therefore, if we can
94 		// ensure that the predicate is not marked as being complete
95 		// until long after the last store by func(ctxt), then we have
96 		// defeated the read-ahead of peer CPUs.
97 		//
98 		// In other words, the last "store" by func(ctxt) must complete
99 		// and then N cycles must elapse before ~0l is stored to *val.
100 		// The value of N is whatever is sufficient to defeat the
101 		// read-ahead mechanism of peer CPUs.
102 		//
103 		// On some CPUs, the most fully synchronizing instruction might
104 		// need to be issued.
105 
106 		dispatch_atomic_maximally_synchronizing_barrier();
107 		// above assumed to contain release barrier
108 		next = dispatch_atomic_xchg(vval, DISPATCH_ONCE_DONE, relaxed);
109 		while (next != tail) {
110 			_dispatch_wait_until(tmp = (_dispatch_once_waiter_t)next->dow_next);
111 			sema = next->dow_sema;
112 			next = tmp;
113 			_dispatch_thread_semaphore_signal(sema);
114 		}
115 	} else {
116 		dow.dow_sema = _dispatch_get_thread_semaphore();
117 		next = *vval;
118 		for (;;) {
119 			if (next == DISPATCH_ONCE_DONE) {
120 				break;
121 			}
122 			if (dispatch_atomic_cmpxchgvw(vval, next, tail, &next, release)) {
123 				dow.dow_thread = next->dow_thread;
124 				dow.dow_next = next;
125 				if (dow.dow_thread) {
126 					pthread_priority_t pp = _dispatch_get_priority();
127 					_dispatch_thread_override_start(dow.dow_thread, pp);
128 				}
129 				_dispatch_thread_semaphore_wait(dow.dow_sema);
130 				if (dow.dow_thread) {
131 					_dispatch_thread_override_end(dow.dow_thread);
132 				}
133 				break;
134 			}
135 		}
136 		_dispatch_put_thread_semaphore(dow.dow_sema);
137 	}
138 }
139