1 /*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
15
16 #include "kmp.h"
17 #include "kmp_os.h"
18
19 #if KMP_AFFINITY_SUPPORTED
20 #if KMP_USE_HWLOC
21 class KMPHwlocAffinity : public KMPAffinity {
22 public:
23 class Mask : public KMPAffinity::Mask {
24 hwloc_cpuset_t mask;
25
26 public:
Mask()27 Mask() {
28 mask = hwloc_bitmap_alloc();
29 this->zero();
30 }
~Mask()31 ~Mask() { hwloc_bitmap_free(mask); }
set(int i)32 void set(int i) override { hwloc_bitmap_set(mask, i); }
is_set(int i)33 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
clear(int i)34 void clear(int i) override { hwloc_bitmap_clr(mask, i); }
zero()35 void zero() override { hwloc_bitmap_zero(mask); }
copy(const KMPAffinity::Mask * src)36 void copy(const KMPAffinity::Mask *src) override {
37 const Mask *convert = static_cast<const Mask *>(src);
38 hwloc_bitmap_copy(mask, convert->mask);
39 }
bitwise_and(const KMPAffinity::Mask * rhs)40 void bitwise_and(const KMPAffinity::Mask *rhs) override {
41 const Mask *convert = static_cast<const Mask *>(rhs);
42 hwloc_bitmap_and(mask, mask, convert->mask);
43 }
bitwise_or(const KMPAffinity::Mask * rhs)44 void bitwise_or(const KMPAffinity::Mask *rhs) override {
45 const Mask *convert = static_cast<const Mask *>(rhs);
46 hwloc_bitmap_or(mask, mask, convert->mask);
47 }
bitwise_not()48 void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
begin()49 int begin() const override { return hwloc_bitmap_first(mask); }
end()50 int end() const override { return -1; }
next(int previous)51 int next(int previous) const override {
52 return hwloc_bitmap_next(mask, previous);
53 }
get_system_affinity(bool abort_on_error)54 int get_system_affinity(bool abort_on_error) override {
55 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
56 "Illegal get affinity operation when not capable");
57 int retval =
58 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
59 if (retval >= 0) {
60 return 0;
61 }
62 int error = errno;
63 if (abort_on_error) {
64 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
65 }
66 return error;
67 }
set_system_affinity(bool abort_on_error)68 int set_system_affinity(bool abort_on_error) const override {
69 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70 "Illegal get affinity operation when not capable");
71 int retval =
72 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
73 if (retval >= 0) {
74 return 0;
75 }
76 int error = errno;
77 if (abort_on_error) {
78 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
79 }
80 return error;
81 }
get_proc_group()82 int get_proc_group() const override {
83 int group = -1;
84 #if KMP_OS_WINDOWS
85 if (__kmp_num_proc_groups == 1) {
86 return 1;
87 }
88 for (int i = 0; i < __kmp_num_proc_groups; i++) {
89 // On windows, the long type is always 32 bits
90 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
91 unsigned long second_32_bits =
92 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
93 if (first_32_bits == 0 && second_32_bits == 0) {
94 continue;
95 }
96 if (group >= 0) {
97 return -1;
98 }
99 group = i;
100 }
101 #endif /* KMP_OS_WINDOWS */
102 return group;
103 }
104 };
determine_capable(const char * var)105 void determine_capable(const char *var) override {
106 const hwloc_topology_support *topology_support;
107 if (__kmp_hwloc_topology == NULL) {
108 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
109 __kmp_hwloc_error = TRUE;
110 if (__kmp_affinity_verbose)
111 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
112 }
113 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
114 __kmp_hwloc_error = TRUE;
115 if (__kmp_affinity_verbose)
116 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
117 }
118 }
119 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
120 // Is the system capable of setting/getting this thread's affinity?
121 // Also, is topology discovery possible? (pu indicates ability to discover
122 // processing units). And finally, were there no errors when calling any
123 // hwloc_* API functions?
124 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
125 topology_support->cpubind->get_thisthread_cpubind &&
126 topology_support->discovery->pu && !__kmp_hwloc_error) {
127 // enables affinity according to KMP_AFFINITY_CAPABLE() macro
128 KMP_AFFINITY_ENABLE(TRUE);
129 } else {
130 // indicate that hwloc didn't work and disable affinity
131 __kmp_hwloc_error = TRUE;
132 KMP_AFFINITY_DISABLE();
133 }
134 }
bind_thread(int which)135 void bind_thread(int which) override {
136 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
137 "Illegal set affinity operation when not capable");
138 KMPAffinity::Mask *mask;
139 KMP_CPU_ALLOC_ON_STACK(mask);
140 KMP_CPU_ZERO(mask);
141 KMP_CPU_SET(which, mask);
142 __kmp_set_system_affinity(mask, TRUE);
143 KMP_CPU_FREE_FROM_STACK(mask);
144 }
allocate_mask()145 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
deallocate_mask(KMPAffinity::Mask * m)146 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
allocate_mask_array(int num)147 KMPAffinity::Mask *allocate_mask_array(int num) override {
148 return new Mask[num];
149 }
deallocate_mask_array(KMPAffinity::Mask * array)150 void deallocate_mask_array(KMPAffinity::Mask *array) override {
151 Mask *hwloc_array = static_cast<Mask *>(array);
152 delete[] hwloc_array;
153 }
index_mask_array(KMPAffinity::Mask * array,int index)154 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
155 int index) override {
156 Mask *hwloc_array = static_cast<Mask *>(array);
157 return &(hwloc_array[index]);
158 }
get_api_type()159 api_type get_api_type() const override { return HWLOC; }
160 };
161 #endif /* KMP_USE_HWLOC */
162
163 #if KMP_OS_LINUX || KMP_OS_FREEBSD
164 #if KMP_OS_LINUX
165 /* On some of the older OS's that we build on, these constants aren't present
166 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
167 all systems of the same arch where they are defined, and they cannot change.
168 stone forever. */
169 #include <sys/syscall.h>
170 #if KMP_ARCH_X86 || KMP_ARCH_ARM
171 #ifndef __NR_sched_setaffinity
172 #define __NR_sched_setaffinity 241
173 #elif __NR_sched_setaffinity != 241
174 #error Wrong code for setaffinity system call.
175 #endif /* __NR_sched_setaffinity */
176 #ifndef __NR_sched_getaffinity
177 #define __NR_sched_getaffinity 242
178 #elif __NR_sched_getaffinity != 242
179 #error Wrong code for getaffinity system call.
180 #endif /* __NR_sched_getaffinity */
181 #elif KMP_ARCH_AARCH64
182 #ifndef __NR_sched_setaffinity
183 #define __NR_sched_setaffinity 122
184 #elif __NR_sched_setaffinity != 122
185 #error Wrong code for setaffinity system call.
186 #endif /* __NR_sched_setaffinity */
187 #ifndef __NR_sched_getaffinity
188 #define __NR_sched_getaffinity 123
189 #elif __NR_sched_getaffinity != 123
190 #error Wrong code for getaffinity system call.
191 #endif /* __NR_sched_getaffinity */
192 #elif KMP_ARCH_X86_64
193 #ifndef __NR_sched_setaffinity
194 #define __NR_sched_setaffinity 203
195 #elif __NR_sched_setaffinity != 203
196 #error Wrong code for setaffinity system call.
197 #endif /* __NR_sched_setaffinity */
198 #ifndef __NR_sched_getaffinity
199 #define __NR_sched_getaffinity 204
200 #elif __NR_sched_getaffinity != 204
201 #error Wrong code for getaffinity system call.
202 #endif /* __NR_sched_getaffinity */
203 #elif KMP_ARCH_PPC64
204 #ifndef __NR_sched_setaffinity
205 #define __NR_sched_setaffinity 222
206 #elif __NR_sched_setaffinity != 222
207 #error Wrong code for setaffinity system call.
208 #endif /* __NR_sched_setaffinity */
209 #ifndef __NR_sched_getaffinity
210 #define __NR_sched_getaffinity 223
211 #elif __NR_sched_getaffinity != 223
212 #error Wrong code for getaffinity system call.
213 #endif /* __NR_sched_getaffinity */
214 #elif KMP_ARCH_MIPS
215 #ifndef __NR_sched_setaffinity
216 #define __NR_sched_setaffinity 4239
217 #elif __NR_sched_setaffinity != 4239
218 #error Wrong code for setaffinity system call.
219 #endif /* __NR_sched_setaffinity */
220 #ifndef __NR_sched_getaffinity
221 #define __NR_sched_getaffinity 4240
222 #elif __NR_sched_getaffinity != 4240
223 #error Wrong code for getaffinity system call.
224 #endif /* __NR_sched_getaffinity */
225 #elif KMP_ARCH_MIPS64
226 #ifndef __NR_sched_setaffinity
227 #define __NR_sched_setaffinity 5195
228 #elif __NR_sched_setaffinity != 5195
229 #error Wrong code for setaffinity system call.
230 #endif /* __NR_sched_setaffinity */
231 #ifndef __NR_sched_getaffinity
232 #define __NR_sched_getaffinity 5196
233 #elif __NR_sched_getaffinity != 5196
234 #error Wrong code for getaffinity system call.
235 #endif /* __NR_sched_getaffinity */
236 #error Unknown or unsupported architecture
237 #endif /* KMP_ARCH_* */
238 #elif KMP_OS_FREEBSD
239 #include <pthread.h>
240 #include <pthread_np.h>
241 #endif
242 class KMPNativeAffinity : public KMPAffinity {
243 class Mask : public KMPAffinity::Mask {
244 typedef unsigned char mask_t;
245 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
246
247 public:
248 mask_t *mask;
Mask()249 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
~Mask()250 ~Mask() {
251 if (mask)
252 __kmp_free(mask);
253 }
set(int i)254 void set(int i) override {
255 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
256 }
is_set(int i)257 bool is_set(int i) const override {
258 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
259 }
clear(int i)260 void clear(int i) override {
261 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
262 }
zero()263 void zero() override {
264 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
265 mask[i] = 0;
266 }
copy(const KMPAffinity::Mask * src)267 void copy(const KMPAffinity::Mask *src) override {
268 const Mask *convert = static_cast<const Mask *>(src);
269 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
270 mask[i] = convert->mask[i];
271 }
bitwise_and(const KMPAffinity::Mask * rhs)272 void bitwise_and(const KMPAffinity::Mask *rhs) override {
273 const Mask *convert = static_cast<const Mask *>(rhs);
274 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
275 mask[i] &= convert->mask[i];
276 }
bitwise_or(const KMPAffinity::Mask * rhs)277 void bitwise_or(const KMPAffinity::Mask *rhs) override {
278 const Mask *convert = static_cast<const Mask *>(rhs);
279 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
280 mask[i] |= convert->mask[i];
281 }
bitwise_not()282 void bitwise_not() override {
283 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
284 mask[i] = ~(mask[i]);
285 }
begin()286 int begin() const override {
287 int retval = 0;
288 while (retval < end() && !is_set(retval))
289 ++retval;
290 return retval;
291 }
end()292 int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
next(int previous)293 int next(int previous) const override {
294 int retval = previous + 1;
295 while (retval < end() && !is_set(retval))
296 ++retval;
297 return retval;
298 }
get_system_affinity(bool abort_on_error)299 int get_system_affinity(bool abort_on_error) override {
300 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
301 "Illegal get affinity operation when not capable");
302 #if KMP_OS_LINUX
303 int retval =
304 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
305 #elif KMP_OS_FREEBSD
306 int retval =
307 pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
308 #endif
309 if (retval >= 0) {
310 return 0;
311 }
312 int error = errno;
313 if (abort_on_error) {
314 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
315 }
316 return error;
317 }
set_system_affinity(bool abort_on_error)318 int set_system_affinity(bool abort_on_error) const override {
319 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
320 "Illegal get affinity operation when not capable");
321 #if KMP_OS_LINUX
322 int retval =
323 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
324 #elif KMP_OS_FREEBSD
325 int retval =
326 pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
327 #endif
328 if (retval >= 0) {
329 return 0;
330 }
331 int error = errno;
332 if (abort_on_error) {
333 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
334 }
335 return error;
336 }
337 };
determine_capable(const char * env_var)338 void determine_capable(const char *env_var) override {
339 __kmp_affinity_determine_capable(env_var);
340 }
bind_thread(int which)341 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
allocate_mask()342 KMPAffinity::Mask *allocate_mask() override {
343 KMPNativeAffinity::Mask *retval = new Mask();
344 return retval;
345 }
deallocate_mask(KMPAffinity::Mask * m)346 void deallocate_mask(KMPAffinity::Mask *m) override {
347 KMPNativeAffinity::Mask *native_mask =
348 static_cast<KMPNativeAffinity::Mask *>(m);
349 delete native_mask;
350 }
allocate_mask_array(int num)351 KMPAffinity::Mask *allocate_mask_array(int num) override {
352 return new Mask[num];
353 }
deallocate_mask_array(KMPAffinity::Mask * array)354 void deallocate_mask_array(KMPAffinity::Mask *array) override {
355 Mask *linux_array = static_cast<Mask *>(array);
356 delete[] linux_array;
357 }
index_mask_array(KMPAffinity::Mask * array,int index)358 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
359 int index) override {
360 Mask *linux_array = static_cast<Mask *>(array);
361 return &(linux_array[index]);
362 }
get_api_type()363 api_type get_api_type() const override { return NATIVE_OS; }
364 };
365 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
366
367 #if KMP_OS_WINDOWS
368 class KMPNativeAffinity : public KMPAffinity {
369 class Mask : public KMPAffinity::Mask {
370 typedef ULONG_PTR mask_t;
371 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
372 mask_t *mask;
373
374 public:
Mask()375 Mask() {
376 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
377 }
~Mask()378 ~Mask() {
379 if (mask)
380 __kmp_free(mask);
381 }
set(int i)382 void set(int i) override {
383 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
384 }
is_set(int i)385 bool is_set(int i) const override {
386 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
387 }
clear(int i)388 void clear(int i) override {
389 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
390 }
zero()391 void zero() override {
392 for (int i = 0; i < __kmp_num_proc_groups; ++i)
393 mask[i] = 0;
394 }
copy(const KMPAffinity::Mask * src)395 void copy(const KMPAffinity::Mask *src) override {
396 const Mask *convert = static_cast<const Mask *>(src);
397 for (int i = 0; i < __kmp_num_proc_groups; ++i)
398 mask[i] = convert->mask[i];
399 }
bitwise_and(const KMPAffinity::Mask * rhs)400 void bitwise_and(const KMPAffinity::Mask *rhs) override {
401 const Mask *convert = static_cast<const Mask *>(rhs);
402 for (int i = 0; i < __kmp_num_proc_groups; ++i)
403 mask[i] &= convert->mask[i];
404 }
bitwise_or(const KMPAffinity::Mask * rhs)405 void bitwise_or(const KMPAffinity::Mask *rhs) override {
406 const Mask *convert = static_cast<const Mask *>(rhs);
407 for (int i = 0; i < __kmp_num_proc_groups; ++i)
408 mask[i] |= convert->mask[i];
409 }
bitwise_not()410 void bitwise_not() override {
411 for (int i = 0; i < __kmp_num_proc_groups; ++i)
412 mask[i] = ~(mask[i]);
413 }
begin()414 int begin() const override {
415 int retval = 0;
416 while (retval < end() && !is_set(retval))
417 ++retval;
418 return retval;
419 }
end()420 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
next(int previous)421 int next(int previous) const override {
422 int retval = previous + 1;
423 while (retval < end() && !is_set(retval))
424 ++retval;
425 return retval;
426 }
set_system_affinity(bool abort_on_error)427 int set_system_affinity(bool abort_on_error) const override {
428 if (__kmp_num_proc_groups > 1) {
429 // Check for a valid mask.
430 GROUP_AFFINITY ga;
431 int group = get_proc_group();
432 if (group < 0) {
433 if (abort_on_error) {
434 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
435 }
436 return -1;
437 }
438 // Transform the bit vector into a GROUP_AFFINITY struct
439 // and make the system call to set affinity.
440 ga.Group = group;
441 ga.Mask = mask[group];
442 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
443
444 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
445 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
446 DWORD error = GetLastError();
447 if (abort_on_error) {
448 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
449 __kmp_msg_null);
450 }
451 return error;
452 }
453 } else {
454 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
455 DWORD error = GetLastError();
456 if (abort_on_error) {
457 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
458 __kmp_msg_null);
459 }
460 return error;
461 }
462 }
463 return 0;
464 }
get_system_affinity(bool abort_on_error)465 int get_system_affinity(bool abort_on_error) override {
466 if (__kmp_num_proc_groups > 1) {
467 this->zero();
468 GROUP_AFFINITY ga;
469 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
470 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
471 DWORD error = GetLastError();
472 if (abort_on_error) {
473 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
474 KMP_ERR(error), __kmp_msg_null);
475 }
476 return error;
477 }
478 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
479 (ga.Mask == 0)) {
480 return -1;
481 }
482 mask[ga.Group] = ga.Mask;
483 } else {
484 mask_t newMask, sysMask, retval;
485 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
486 DWORD error = GetLastError();
487 if (abort_on_error) {
488 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
489 KMP_ERR(error), __kmp_msg_null);
490 }
491 return error;
492 }
493 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
494 if (!retval) {
495 DWORD error = GetLastError();
496 if (abort_on_error) {
497 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
498 KMP_ERR(error), __kmp_msg_null);
499 }
500 return error;
501 }
502 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
503 if (!newMask) {
504 DWORD error = GetLastError();
505 if (abort_on_error) {
506 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
507 KMP_ERR(error), __kmp_msg_null);
508 }
509 }
510 *mask = retval;
511 }
512 return 0;
513 }
get_proc_group()514 int get_proc_group() const override {
515 int group = -1;
516 if (__kmp_num_proc_groups == 1) {
517 return 1;
518 }
519 for (int i = 0; i < __kmp_num_proc_groups; i++) {
520 if (mask[i] == 0)
521 continue;
522 if (group >= 0)
523 return -1;
524 group = i;
525 }
526 return group;
527 }
528 };
determine_capable(const char * env_var)529 void determine_capable(const char *env_var) override {
530 __kmp_affinity_determine_capable(env_var);
531 }
bind_thread(int which)532 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
allocate_mask()533 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
deallocate_mask(KMPAffinity::Mask * m)534 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
allocate_mask_array(int num)535 KMPAffinity::Mask *allocate_mask_array(int num) override {
536 return new Mask[num];
537 }
deallocate_mask_array(KMPAffinity::Mask * array)538 void deallocate_mask_array(KMPAffinity::Mask *array) override {
539 Mask *windows_array = static_cast<Mask *>(array);
540 delete[] windows_array;
541 }
index_mask_array(KMPAffinity::Mask * array,int index)542 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
543 int index) override {
544 Mask *windows_array = static_cast<Mask *>(array);
545 return &(windows_array[index]);
546 }
get_api_type()547 api_type get_api_type() const override { return NATIVE_OS; }
548 };
549 #endif /* KMP_OS_WINDOWS */
550 #endif /* KMP_AFFINITY_SUPPORTED */
551
552 class Address {
553 public:
554 static const unsigned maxDepth = 32;
555 unsigned labels[maxDepth];
556 unsigned childNums[maxDepth];
557 unsigned depth;
558 unsigned leader;
Address(unsigned _depth)559 Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
560 Address &operator=(const Address &b) {
561 depth = b.depth;
562 for (unsigned i = 0; i < depth; i++) {
563 labels[i] = b.labels[i];
564 childNums[i] = b.childNums[i];
565 }
566 leader = FALSE;
567 return *this;
568 }
569 bool operator==(const Address &b) const {
570 if (depth != b.depth)
571 return false;
572 for (unsigned i = 0; i < depth; i++)
573 if (labels[i] != b.labels[i])
574 return false;
575 return true;
576 }
isClose(const Address & b,int level)577 bool isClose(const Address &b, int level) const {
578 if (depth != b.depth)
579 return false;
580 if ((unsigned)level >= depth)
581 return true;
582 for (unsigned i = 0; i < (depth - level); i++)
583 if (labels[i] != b.labels[i])
584 return false;
585 return true;
586 }
587 bool operator!=(const Address &b) const { return !operator==(b); }
print()588 void print() const {
589 unsigned i;
590 printf("Depth: %u --- ", depth);
591 for (i = 0; i < depth; i++) {
592 printf("%u ", labels[i]);
593 }
594 }
595 };
596
597 class AddrUnsPair {
598 public:
599 Address first;
600 unsigned second;
AddrUnsPair(Address _first,unsigned _second)601 AddrUnsPair(Address _first, unsigned _second)
602 : first(_first), second(_second) {}
603 AddrUnsPair &operator=(const AddrUnsPair &b) {
604 first = b.first;
605 second = b.second;
606 return *this;
607 }
print()608 void print() const {
609 printf("first = ");
610 first.print();
611 printf(" --- second = %u", second);
612 }
613 bool operator==(const AddrUnsPair &b) const {
614 if (first != b.first)
615 return false;
616 if (second != b.second)
617 return false;
618 return true;
619 }
620 bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
621 };
622
__kmp_affinity_cmp_Address_labels(const void * a,const void * b)623 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
624 const Address *aa = &(((const AddrUnsPair *)a)->first);
625 const Address *bb = &(((const AddrUnsPair *)b)->first);
626 unsigned depth = aa->depth;
627 unsigned i;
628 KMP_DEBUG_ASSERT(depth == bb->depth);
629 for (i = 0; i < depth; i++) {
630 if (aa->labels[i] < bb->labels[i])
631 return -1;
632 if (aa->labels[i] > bb->labels[i])
633 return 1;
634 }
635 return 0;
636 }
637
638 /* A structure for holding machine-specific hierarchy info to be computed once
639 at init. This structure represents a mapping of threads to the actual machine
640 hierarchy, or to our best guess at what the hierarchy might be, for the
641 purpose of performing an efficient barrier. In the worst case, when there is
642 no machine hierarchy information, it produces a tree suitable for a barrier,
643 similar to the tree used in the hyper barrier. */
644 class hierarchy_info {
645 public:
646 /* Good default values for number of leaves and branching factor, given no
647 affinity information. Behaves a bit like hyper barrier. */
648 static const kmp_uint32 maxLeaves = 4;
649 static const kmp_uint32 minBranch = 4;
650 /** Number of levels in the hierarchy. Typical levels are threads/core,
651 cores/package or socket, packages/node, nodes/machine, etc. We don't want
652 to get specific with nomenclature. When the machine is oversubscribed we
653 add levels to duplicate the hierarchy, doubling the thread capacity of the
654 hierarchy each time we add a level. */
655 kmp_uint32 maxLevels;
656
657 /** This is specifically the depth of the machine configuration hierarchy, in
658 terms of the number of levels along the longest path from root to any
659 leaf. It corresponds to the number of entries in numPerLevel if we exclude
660 all but one trailing 1. */
661 kmp_uint32 depth;
662 kmp_uint32 base_num_threads;
663 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
664 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
665 // 2=initialization in progress
666 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
667
668 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
669 the parent of a node at level i has. For example, if we have a machine
670 with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
671 {2, 4, 4, 1, 1}. All empty levels are set to 1. */
672 kmp_uint32 *numPerLevel;
673 kmp_uint32 *skipPerLevel;
674
deriveLevels(AddrUnsPair * adr2os,int num_addrs)675 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
676 int hier_depth = adr2os[0].first.depth;
677 int level = 0;
678 for (int i = hier_depth - 1; i >= 0; --i) {
679 int max = -1;
680 for (int j = 0; j < num_addrs; ++j) {
681 int next = adr2os[j].first.childNums[i];
682 if (next > max)
683 max = next;
684 }
685 numPerLevel[level] = max + 1;
686 ++level;
687 }
688 }
689
hierarchy_info()690 hierarchy_info()
691 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
692
fini()693 void fini() {
694 if (!uninitialized && numPerLevel) {
695 __kmp_free(numPerLevel);
696 numPerLevel = NULL;
697 uninitialized = not_initialized;
698 }
699 }
700
init(AddrUnsPair * adr2os,int num_addrs)701 void init(AddrUnsPair *adr2os, int num_addrs) {
702 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
703 &uninitialized, not_initialized, initializing);
704 if (bool_result == 0) { // Wait for initialization
705 while (TCR_1(uninitialized) != initialized)
706 KMP_CPU_PAUSE();
707 return;
708 }
709 KMP_DEBUG_ASSERT(bool_result == 1);
710
711 /* Added explicit initialization of the data fields here to prevent usage of
712 dirty value observed when static library is re-initialized multiple times
713 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
714 OpenMP). */
715 depth = 1;
716 resizing = 0;
717 maxLevels = 7;
718 numPerLevel =
719 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
720 skipPerLevel = &(numPerLevel[maxLevels]);
721 for (kmp_uint32 i = 0; i < maxLevels;
722 ++i) { // init numPerLevel[*] to 1 item per level
723 numPerLevel[i] = 1;
724 skipPerLevel[i] = 1;
725 }
726
727 // Sort table by physical ID
728 if (adr2os) {
729 qsort(adr2os, num_addrs, sizeof(*adr2os),
730 __kmp_affinity_cmp_Address_labels);
731 deriveLevels(adr2os, num_addrs);
732 } else {
733 numPerLevel[0] = maxLeaves;
734 numPerLevel[1] = num_addrs / maxLeaves;
735 if (num_addrs % maxLeaves)
736 numPerLevel[1]++;
737 }
738
739 base_num_threads = num_addrs;
740 for (int i = maxLevels - 1; i >= 0;
741 --i) // count non-empty levels to get depth
742 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
743 depth++;
744
745 kmp_uint32 branch = minBranch;
746 if (numPerLevel[0] == 1)
747 branch = num_addrs / maxLeaves;
748 if (branch < minBranch)
749 branch = minBranch;
750 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
751 while (numPerLevel[d] > branch ||
752 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
753 if (numPerLevel[d] & 1)
754 numPerLevel[d]++;
755 numPerLevel[d] = numPerLevel[d] >> 1;
756 if (numPerLevel[d + 1] == 1)
757 depth++;
758 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
759 }
760 if (numPerLevel[0] == 1) {
761 branch = branch >> 1;
762 if (branch < 4)
763 branch = minBranch;
764 }
765 }
766
767 for (kmp_uint32 i = 1; i < depth; ++i)
768 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
769 // Fill in hierarchy in the case of oversubscription
770 for (kmp_uint32 i = depth; i < maxLevels; ++i)
771 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
772
773 uninitialized = initialized; // One writer
774 }
775
776 // Resize the hierarchy if nproc changes to something larger than before
resize(kmp_uint32 nproc)777 void resize(kmp_uint32 nproc) {
778 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
779 while (bool_result == 0) { // someone else is trying to resize
780 KMP_CPU_PAUSE();
781 if (nproc <= base_num_threads) // happy with other thread's resize
782 return;
783 else // try to resize
784 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
785 }
786 KMP_DEBUG_ASSERT(bool_result != 0);
787 if (nproc <= base_num_threads)
788 return; // happy with other thread's resize
789
790 // Calculate new maxLevels
791 kmp_uint32 old_sz = skipPerLevel[depth - 1];
792 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
793 // First see if old maxLevels is enough to contain new size
794 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
795 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
796 numPerLevel[i - 1] *= 2;
797 old_sz *= 2;
798 depth++;
799 }
800 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
801 while (nproc > old_sz) {
802 old_sz *= 2;
803 incs++;
804 depth++;
805 }
806 maxLevels += incs;
807
808 // Resize arrays
809 kmp_uint32 *old_numPerLevel = numPerLevel;
810 kmp_uint32 *old_skipPerLevel = skipPerLevel;
811 numPerLevel = skipPerLevel = NULL;
812 numPerLevel =
813 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
814 skipPerLevel = &(numPerLevel[maxLevels]);
815
816 // Copy old elements from old arrays
817 for (kmp_uint32 i = 0; i < old_maxLevels;
818 ++i) { // init numPerLevel[*] to 1 item per level
819 numPerLevel[i] = old_numPerLevel[i];
820 skipPerLevel[i] = old_skipPerLevel[i];
821 }
822
823 // Init new elements in arrays to 1
824 for (kmp_uint32 i = old_maxLevels; i < maxLevels;
825 ++i) { // init numPerLevel[*] to 1 item per level
826 numPerLevel[i] = 1;
827 skipPerLevel[i] = 1;
828 }
829
830 // Free old arrays
831 __kmp_free(old_numPerLevel);
832 }
833
834 // Fill in oversubscription levels of hierarchy
835 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
836 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
837
838 base_num_threads = nproc;
839 resizing = 0; // One writer
840 }
841 };
842 #endif // KMP_AFFINITY_H
843