1 /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the maintenance of threads in response to team
27    creation and termination.  */
28 
29 #include "libgomp.h"
30 #include "pool.h"
31 #include <stdlib.h>
32 #include <string.h>
33 
34 #ifdef LIBGOMP_USE_PTHREADS
35 pthread_attr_t gomp_thread_attr;
36 
37 /* This key is for the thread destructor.  */
38 pthread_key_t gomp_thread_destructor;
39 
40 
41 /* This is the libgomp per-thread data structure.  */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread struct gomp_thread gomp_tls_data;
44 #else
45 pthread_key_t gomp_tls_key;
46 #endif
47 
48 
49 /* This structure is used to communicate across pthread_create.  */
50 
51 struct gomp_thread_start_data
52 {
53   void (*fn) (void *);
54   void *fn_data;
55   struct gomp_team_state ts;
56   struct gomp_task *task;
57   struct gomp_thread_pool *thread_pool;
58   unsigned int place;
59   unsigned int num_teams;
60   unsigned int team_num;
61   bool nested;
62   pthread_t handle;
63 };
64 
65 
66 /* This function is a pthread_create entry point.  This contains the idle
67    loop in which a thread waits to be called up to become part of a team.  */
68 
69 static void *
gomp_thread_start(void * xdata)70 gomp_thread_start (void *xdata)
71 {
72   struct gomp_thread_start_data *data = xdata;
73   struct gomp_thread *thr;
74   struct gomp_thread_pool *pool;
75   void (*local_fn) (void *);
76   void *local_data;
77 
78 #if defined HAVE_TLS || defined USE_EMUTLS
79   thr = &gomp_tls_data;
80 #else
81   struct gomp_thread local_thr;
82   thr = &local_thr;
83 #endif
84   gomp_sem_init (&thr->release, 0);
85 
86   /* Extract what we need from data.  */
87   local_fn = data->fn;
88   local_data = data->fn_data;
89   thr->thread_pool = data->thread_pool;
90   thr->ts = data->ts;
91   thr->task = data->task;
92   thr->place = data->place;
93   thr->num_teams = data->num_teams;
94   thr->team_num = data->team_num;
95 #ifdef GOMP_NEEDS_THREAD_HANDLE
96   thr->handle = data->handle;
97 #endif
98 #if !(defined HAVE_TLS || defined USE_EMUTLS)
99   pthread_setspecific (gomp_tls_key, thr);
100 #endif
101 
102   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
103 
104   /* Make thread pool local. */
105   pool = thr->thread_pool;
106 
107   if (data->nested)
108     {
109       struct gomp_team *team = thr->ts.team;
110       struct gomp_task *task = thr->task;
111 
112       gomp_barrier_wait (&team->barrier);
113 
114       local_fn (local_data);
115       gomp_team_barrier_wait_final (&team->barrier);
116       gomp_finish_task (task);
117       gomp_barrier_wait_last (&team->barrier);
118     }
119   else
120     {
121       pool->threads[thr->ts.team_id] = thr;
122 
123       gomp_simple_barrier_wait (&pool->threads_dock);
124       do
125           {
126             struct gomp_team *team = thr->ts.team;
127             struct gomp_task *task = thr->task;
128 
129             local_fn (local_data);
130             gomp_team_barrier_wait_final (&team->barrier);
131             gomp_finish_task (task);
132 
133             gomp_simple_barrier_wait (&pool->threads_dock);
134 
135             local_fn = thr->fn;
136             local_data = thr->data;
137             thr->fn = NULL;
138           }
139       while (local_fn);
140     }
141 
142   gomp_sem_destroy (&thr->release);
143   pthread_detach (pthread_self ());
144   thr->thread_pool = NULL;
145   thr->task = NULL;
146   return NULL;
147 }
148 #endif
149 
150 static inline struct gomp_team *
get_last_team(unsigned nthreads)151 get_last_team (unsigned nthreads)
152 {
153   struct gomp_thread *thr = gomp_thread ();
154   if (thr->ts.team == NULL)
155     {
156       struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
157       struct gomp_team *last_team = pool->last_team;
158       if (last_team != NULL && last_team->nthreads == nthreads)
159         {
160           pool->last_team = NULL;
161           return last_team;
162         }
163     }
164   return NULL;
165 }
166 
167 /* Create a new team data structure.  */
168 
169 struct gomp_team *
gomp_new_team(unsigned nthreads)170 gomp_new_team (unsigned nthreads)
171 {
172   struct gomp_team *team;
173   int i;
174 
175   team = get_last_team (nthreads);
176   if (team == NULL)
177     {
178       size_t extra = sizeof (team->ordered_release[0])
179                          + sizeof (team->implicit_task[0]);
180 #ifdef GOMP_USE_ALIGNED_WORK_SHARES
181       team = gomp_aligned_alloc (__alignof (struct gomp_team),
182                                          sizeof (*team) + nthreads * extra);
183 #else
184       team = team_malloc (sizeof (*team) + nthreads * extra);
185 #endif
186 
187 #ifndef HAVE_SYNC_BUILTINS
188       gomp_mutex_init (&team->work_share_list_free_lock);
189 #endif
190       gomp_barrier_init (&team->barrier, nthreads);
191       gomp_mutex_init (&team->task_lock);
192 
193       team->nthreads = nthreads;
194     }
195 
196   team->work_share_chunk = 8;
197 #ifdef HAVE_SYNC_BUILTINS
198   team->single_count = 0;
199 #endif
200   team->work_shares_to_free = &team->work_shares[0];
201   gomp_init_work_share (&team->work_shares[0], 0, nthreads);
202   team->work_shares[0].next_alloc = NULL;
203   team->work_share_list_free = NULL;
204   team->work_share_list_alloc = &team->work_shares[1];
205   for (i = 1; i < 7; i++)
206     team->work_shares[i].next_free = &team->work_shares[i + 1];
207   team->work_shares[i].next_free = NULL;
208 
209   gomp_sem_init (&team->master_release, 0);
210   team->ordered_release = (void *) &team->implicit_task[nthreads];
211   team->ordered_release[0] = &team->master_release;
212 
213   priority_queue_init (&team->task_queue);
214   team->task_count = 0;
215   team->task_queued_count = 0;
216   team->task_running_count = 0;
217   team->work_share_cancelled = 0;
218   team->team_cancelled = 0;
219 
220   team->task_detach_count = 0;
221 
222   return team;
223 }
224 
225 
226 /* Free a team data structure.  */
227 
228 static void
free_team(struct gomp_team * team)229 free_team (struct gomp_team *team)
230 {
231 #ifndef HAVE_SYNC_BUILTINS
232   gomp_mutex_destroy (&team->work_share_list_free_lock);
233 #endif
234   gomp_barrier_destroy (&team->barrier);
235   gomp_mutex_destroy (&team->task_lock);
236   priority_queue_free (&team->task_queue);
237   team_free (team);
238 }
239 
240 static void
gomp_free_pool_helper(void * thread_pool)241 gomp_free_pool_helper (void *thread_pool)
242 {
243   struct gomp_thread *thr = gomp_thread ();
244   struct gomp_thread_pool *pool
245     = (struct gomp_thread_pool *) thread_pool;
246   gomp_simple_barrier_wait_last (&pool->threads_dock);
247   gomp_sem_destroy (&thr->release);
248   thr->thread_pool = NULL;
249   thr->task = NULL;
250 #ifdef LIBGOMP_USE_PTHREADS
251   pthread_detach (pthread_self ());
252   pthread_exit (NULL);
253 #elif defined(__nvptx__)
254   asm ("exit;");
255 #elif defined(__AMDGCN__)
256   asm ("s_dcache_wb\n\t"
257        "s_endpgm");
258 #else
259 #error gomp_free_pool_helper must terminate the thread
260 #endif
261 }
262 
263 /* Free a thread pool and release its threads. */
264 
265 void
gomp_free_thread(void * arg)266 gomp_free_thread (void *arg __attribute__((unused)))
267 {
268   struct gomp_thread *thr = gomp_thread ();
269   struct gomp_thread_pool *pool = thr->thread_pool;
270   if (pool)
271     {
272       if (pool->threads_used > 0)
273           {
274             int i;
275             for (i = 1; i < pool->threads_used; i++)
276               {
277                 struct gomp_thread *nthr = pool->threads[i];
278                 nthr->fn = gomp_free_pool_helper;
279                 nthr->data = pool;
280               }
281             /* This barrier undocks threads docked on pool->threads_dock.  */
282             gomp_simple_barrier_wait (&pool->threads_dock);
283             /* And this waits till all threads have called gomp_barrier_wait_last
284                in gomp_free_pool_helper.  */
285             gomp_simple_barrier_wait (&pool->threads_dock);
286             /* Now it is safe to destroy the barrier and free the pool.  */
287             gomp_simple_barrier_destroy (&pool->threads_dock);
288 
289 #ifdef HAVE_SYNC_BUILTINS
290             __sync_fetch_and_add (&gomp_managed_threads,
291                                         1L - pool->threads_used);
292 #else
293             gomp_mutex_lock (&gomp_managed_threads_lock);
294             gomp_managed_threads -= pool->threads_used - 1L;
295             gomp_mutex_unlock (&gomp_managed_threads_lock);
296 #endif
297           }
298       if (pool->last_team)
299           free_team (pool->last_team);
300 #ifndef __nvptx__
301       team_free (pool->threads);
302       team_free (pool);
303 #endif
304       thr->thread_pool = NULL;
305     }
306   if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
307     gomp_team_end ();
308   if (thr->task != NULL)
309     {
310       struct gomp_task *task = thr->task;
311       gomp_end_task ();
312       free (task);
313     }
314 }
315 
316 /* Launch a team.  */
317 
318 #ifdef LIBGOMP_USE_PTHREADS
319 void
gomp_team_start(void (* fn)(void *),void * data,unsigned nthreads,unsigned flags,struct gomp_team * team,struct gomp_taskgroup * taskgroup)320 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
321                      unsigned flags, struct gomp_team *team,
322                      struct gomp_taskgroup *taskgroup)
323 {
324   struct gomp_thread_start_data *start_data = NULL;
325   struct gomp_thread *thr, *nthr;
326   struct gomp_task *task;
327   struct gomp_task_icv *icv;
328   bool nested;
329   struct gomp_thread_pool *pool;
330   unsigned i, n, old_threads_used = 0;
331   pthread_attr_t thread_attr, *attr;
332   unsigned long nthreads_var;
333   char bind, bind_var;
334   unsigned int s = 0, rest = 0, p = 0, k = 0;
335   unsigned int affinity_count = 0;
336   struct gomp_thread **affinity_thr = NULL;
337   bool force_display = false;
338 
339   thr = gomp_thread ();
340   nested = thr->ts.level;
341   pool = thr->thread_pool;
342   task = thr->task;
343   icv = task ? &task->icv : &gomp_global_icv;
344   if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
345     {
346       gomp_init_affinity ();
347       if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
348           gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
349                                               thr->place);
350     }
351 
352   /* Always save the previous state, even if this isn't a nested team.
353      In particular, we should save any work share state from an outer
354      orphaned work share construct.  */
355   team->prev_ts = thr->ts;
356 
357   thr->ts.team = team;
358   thr->ts.team_id = 0;
359   ++thr->ts.level;
360   if (nthreads > 1)
361     ++thr->ts.active_level;
362   thr->ts.work_share = &team->work_shares[0];
363   thr->ts.last_work_share = NULL;
364 #ifdef HAVE_SYNC_BUILTINS
365   thr->ts.single_count = 0;
366 #endif
367   thr->ts.static_trip = 0;
368   thr->task = &team->implicit_task[0];
369 #ifdef GOMP_NEEDS_THREAD_HANDLE
370   thr->handle = pthread_self ();
371 #endif
372   nthreads_var = icv->nthreads_var;
373   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
374       && thr->ts.level < gomp_nthreads_var_list_len)
375     nthreads_var = gomp_nthreads_var_list[thr->ts.level];
376   bind_var = icv->bind_var;
377   if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
378     bind_var = flags & 7;
379   bind = bind_var;
380   if (__builtin_expect (gomp_bind_var_list != NULL, 0)
381       && thr->ts.level < gomp_bind_var_list_len)
382     bind_var = gomp_bind_var_list[thr->ts.level];
383   gomp_init_task (thr->task, task, icv);
384   thr->task->taskgroup = taskgroup;
385   team->implicit_task[0].icv.nthreads_var = nthreads_var;
386   team->implicit_task[0].icv.bind_var = bind_var;
387 
388   if (nthreads == 1)
389     return;
390 
391   i = 1;
392 
393   if (__builtin_expect (gomp_places_list != NULL, 0))
394     {
395       /* Depending on chosen proc_bind model, set subpartition
396            for the master thread and initialize helper variables
397            P and optionally S, K and/or REST used by later place
398            computation for each additional thread.  */
399       p = thr->place - 1;
400       switch (bind)
401           {
402           case omp_proc_bind_true:
403           case omp_proc_bind_close:
404             if (nthreads > thr->ts.place_partition_len)
405               {
406                 /* T > P.  S threads will be placed in each place,
407                      and the final REM threads placed one by one
408                      into the already occupied places.  */
409                 s = nthreads / thr->ts.place_partition_len;
410                 rest = nthreads % thr->ts.place_partition_len;
411               }
412             else
413               s = 1;
414             k = 1;
415             break;
416           case omp_proc_bind_master:
417             /* Each thread will be bound to master's place.  */
418             break;
419           case omp_proc_bind_spread:
420             if (nthreads <= thr->ts.place_partition_len)
421               {
422                 /* T <= P.  Each subpartition will have in between s
423                      and s+1 places (subpartitions starting at or
424                      after rest will have s places, earlier s+1 places),
425                      each thread will be bound to the first place in
426                      its subpartition (except for the master thread
427                      that can be bound to another place in its
428                      subpartition).  */
429                 s = thr->ts.place_partition_len / nthreads;
430                 rest = thr->ts.place_partition_len % nthreads;
431                 rest = (s + 1) * rest + thr->ts.place_partition_off;
432                 if (p < rest)
433                     {
434                       p -= (p - thr->ts.place_partition_off) % (s + 1);
435                       thr->ts.place_partition_len = s + 1;
436                     }
437                 else
438                     {
439                       p -= (p - rest) % s;
440                       thr->ts.place_partition_len = s;
441                     }
442                 thr->ts.place_partition_off = p;
443               }
444             else
445               {
446                 /* T > P.  Each subpartition will have just a single
447                      place and we'll place between s and s+1
448                      threads into each subpartition.  */
449                 s = nthreads / thr->ts.place_partition_len;
450                 rest = nthreads % thr->ts.place_partition_len;
451                 thr->ts.place_partition_off = p;
452                 thr->ts.place_partition_len = 1;
453                 k = 1;
454               }
455             break;
456           }
457     }
458   else
459     bind = omp_proc_bind_false;
460 
461   /* We only allow the reuse of idle threads for non-nested PARALLEL
462      regions.  This appears to be implied by the semantics of
463      threadprivate variables, but perhaps that's reading too much into
464      things.  Certainly it does prevent any locking problems, since
465      only the initial program thread will modify gomp_threads.  */
466   if (!nested)
467     {
468       old_threads_used = pool->threads_used;
469 
470       if (nthreads <= old_threads_used)
471           n = nthreads;
472       else if (old_threads_used == 0)
473           {
474             n = 0;
475             gomp_simple_barrier_init (&pool->threads_dock, nthreads);
476           }
477       else
478           {
479             n = old_threads_used;
480 
481             /* Increase the barrier threshold to make sure all new
482                threads arrive before the team is released.  */
483             gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
484           }
485 
486       /* Not true yet, but soon will be.  We're going to release all
487            threads from the dock, and those that aren't part of the
488            team will exit.  */
489       pool->threads_used = nthreads;
490 
491       /* If necessary, expand the size of the gomp_threads array.  It is
492            expected that changes in the number of threads are rare, thus we
493            make no effort to expand gomp_threads_size geometrically.  */
494       if (nthreads >= pool->threads_size)
495           {
496             pool->threads_size = nthreads + 1;
497             pool->threads
498               = gomp_realloc (pool->threads,
499                                   pool->threads_size
500                                   * sizeof (struct gomp_thread *));
501             /* Add current (master) thread to threads[].  */
502             pool->threads[0] = thr;
503           }
504 
505       /* Release existing idle threads.  */
506       for (; i < n; ++i)
507           {
508             unsigned int place_partition_off = thr->ts.place_partition_off;
509             unsigned int place_partition_len = thr->ts.place_partition_len;
510             unsigned int place = 0;
511             if (__builtin_expect (gomp_places_list != NULL, 0))
512               {
513                 switch (bind)
514                     {
515                     case omp_proc_bind_true:
516                     case omp_proc_bind_close:
517                       if (k == s)
518                         {
519                           ++p;
520                           if (p == (team->prev_ts.place_partition_off
521                                         + team->prev_ts.place_partition_len))
522                               p = team->prev_ts.place_partition_off;
523                           k = 1;
524                           if (i == nthreads - rest)
525                               s = 1;
526                         }
527                       else
528                         ++k;
529                       break;
530                     case omp_proc_bind_master:
531                       break;
532                     case omp_proc_bind_spread:
533                       if (k == 0)
534                         {
535                           /* T <= P.  */
536                           if (p < rest)
537                               p += s + 1;
538                           else
539                               p += s;
540                           if (p == (team->prev_ts.place_partition_off
541                                         + team->prev_ts.place_partition_len))
542                               p = team->prev_ts.place_partition_off;
543                           place_partition_off = p;
544                           if (p < rest)
545                               place_partition_len = s + 1;
546                           else
547                               place_partition_len = s;
548                         }
549                       else
550                         {
551                           /* T > P.  */
552                           if (k == s)
553                               {
554                                 ++p;
555                                 if (p == (team->prev_ts.place_partition_off
556                                             + team->prev_ts.place_partition_len))
557                                   p = team->prev_ts.place_partition_off;
558                                 k = 1;
559                                 if (i == nthreads - rest)
560                                   s = 1;
561                               }
562                           else
563                               ++k;
564                           place_partition_off = p;
565                           place_partition_len = 1;
566                         }
567                       break;
568                     }
569                 if (affinity_thr != NULL
570                       || (bind != omp_proc_bind_true
571                           && pool->threads[i]->place != p + 1)
572                       || pool->threads[i]->place <= place_partition_off
573                       || pool->threads[i]->place > (place_partition_off
574                                                             + place_partition_len))
575                     {
576                       unsigned int l;
577                       force_display = true;
578                       if (affinity_thr == NULL)
579                         {
580                           unsigned int j;
581 
582                           if (team->prev_ts.place_partition_len > 64)
583                               affinity_thr
584                                 = gomp_malloc (team->prev_ts.place_partition_len
585                                                    * sizeof (struct gomp_thread *));
586                           else
587                               affinity_thr
588                                 = gomp_alloca (team->prev_ts.place_partition_len
589                                                    * sizeof (struct gomp_thread *));
590                           memset (affinity_thr, '\0',
591                                     team->prev_ts.place_partition_len
592                                     * sizeof (struct gomp_thread *));
593                           for (j = i; j < old_threads_used; j++)
594                               {
595                                 if (pool->threads[j]->place
596                                     > team->prev_ts.place_partition_off
597                                     && (pool->threads[j]->place
598                                           <= (team->prev_ts.place_partition_off
599                                               + team->prev_ts.place_partition_len)))
600                                   {
601                                     l = pool->threads[j]->place - 1
602                                           - team->prev_ts.place_partition_off;
603                                     pool->threads[j]->data = affinity_thr[l];
604                                     affinity_thr[l] = pool->threads[j];
605                                   }
606                                 pool->threads[j] = NULL;
607                               }
608                           if (nthreads > old_threads_used)
609                               memset (&pool->threads[old_threads_used],
610                                         '\0', ((nthreads - old_threads_used)
611                                                * sizeof (struct gomp_thread *)));
612                           n = nthreads;
613                           affinity_count = old_threads_used - i;
614                         }
615                       if (affinity_count == 0)
616                         break;
617                       l = p;
618                       if (affinity_thr[l - team->prev_ts.place_partition_off]
619                           == NULL)
620                         {
621                           if (bind != omp_proc_bind_true)
622                               continue;
623                           for (l = place_partition_off;
624                                  l < place_partition_off + place_partition_len;
625                                  l++)
626                               if (affinity_thr[l - team->prev_ts.place_partition_off]
627                                   != NULL)
628                                 break;
629                           if (l == place_partition_off + place_partition_len)
630                               continue;
631                         }
632                       nthr = affinity_thr[l - team->prev_ts.place_partition_off];
633                       affinity_thr[l - team->prev_ts.place_partition_off]
634                         = (struct gomp_thread *) nthr->data;
635                       affinity_count--;
636                       pool->threads[i] = nthr;
637                     }
638                 else
639                     nthr = pool->threads[i];
640                 place = p + 1;
641               }
642             else
643               nthr = pool->threads[i];
644             nthr->ts.team = team;
645             nthr->ts.work_share = &team->work_shares[0];
646             nthr->ts.last_work_share = NULL;
647             nthr->ts.team_id = i;
648             nthr->ts.level = team->prev_ts.level + 1;
649             nthr->ts.active_level = thr->ts.active_level;
650             nthr->ts.place_partition_off = place_partition_off;
651             nthr->ts.place_partition_len = place_partition_len;
652             nthr->ts.def_allocator = thr->ts.def_allocator;
653 #ifdef HAVE_SYNC_BUILTINS
654             nthr->ts.single_count = 0;
655 #endif
656             nthr->ts.static_trip = 0;
657             nthr->num_teams = thr->num_teams;
658             nthr->team_num = thr->team_num;
659             nthr->task = &team->implicit_task[i];
660             nthr->place = place;
661             gomp_init_task (nthr->task, task, icv);
662             team->implicit_task[i].icv.nthreads_var = nthreads_var;
663             team->implicit_task[i].icv.bind_var = bind_var;
664             nthr->task->taskgroup = taskgroup;
665             nthr->fn = fn;
666             nthr->data = data;
667             team->ordered_release[i] = &nthr->release;
668           }
669 
670       if (__builtin_expect (affinity_thr != NULL, 0))
671           {
672             /* If AFFINITY_THR is non-NULL just because we had to
673                permute some threads in the pool, but we've managed
674                to find exactly as many old threads as we'd find
675                without affinity, we don't need to handle this
676                specially anymore.  */
677             if (nthreads <= old_threads_used
678                 ? (affinity_count == old_threads_used - nthreads)
679                 : (i == old_threads_used))
680               {
681                 if (team->prev_ts.place_partition_len > 64)
682                     free (affinity_thr);
683                 affinity_thr = NULL;
684                 affinity_count = 0;
685               }
686             else
687               {
688                 i = 1;
689                 /* We are going to compute the places/subpartitions
690                      again from the beginning.  So, we need to reinitialize
691                      vars modified by the switch (bind) above inside
692                      of the loop, to the state they had after the initial
693                      switch (bind).  */
694                 switch (bind)
695                     {
696                     case omp_proc_bind_true:
697                     case omp_proc_bind_close:
698                       if (nthreads > thr->ts.place_partition_len)
699                         /* T > P.  S has been changed, so needs
700                            to be recomputed.  */
701                         s = nthreads / thr->ts.place_partition_len;
702                       k = 1;
703                       p = thr->place - 1;
704                       break;
705                     case omp_proc_bind_master:
706                       /* No vars have been changed.  */
707                       break;
708                     case omp_proc_bind_spread:
709                       p = thr->ts.place_partition_off;
710                       if (k != 0)
711                         {
712                           /* T > P.  */
713                           s = nthreads / team->prev_ts.place_partition_len;
714                           k = 1;
715                         }
716                       break;
717                     }
718 
719                 /* Increase the barrier threshold to make sure all new
720                      threads and all the threads we're going to let die
721                      arrive before the team is released.  */
722                 if (affinity_count)
723                     gomp_simple_barrier_reinit (&pool->threads_dock,
724                                                       nthreads + affinity_count);
725               }
726           }
727 
728       if (i == nthreads)
729           goto do_release;
730 
731     }
732 
733   if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
734     {
735       long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
736 
737       if (old_threads_used == 0)
738           --diff;
739 
740 #ifdef HAVE_SYNC_BUILTINS
741       __sync_fetch_and_add (&gomp_managed_threads, diff);
742 #else
743       gomp_mutex_lock (&gomp_managed_threads_lock);
744       gomp_managed_threads += diff;
745       gomp_mutex_unlock (&gomp_managed_threads_lock);
746 #endif
747     }
748 
749   attr = &gomp_thread_attr;
750   if (__builtin_expect (gomp_places_list != NULL, 0))
751     {
752       size_t stacksize;
753       pthread_attr_init (&thread_attr);
754       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
755           pthread_attr_setstacksize (&thread_attr, stacksize);
756       attr = &thread_attr;
757     }
758 
759   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
760                                   * (nthreads - i));
761 
762   /* Launch new threads.  */
763   for (; i < nthreads; ++i)
764     {
765       int err;
766 
767       start_data->ts.place_partition_off = thr->ts.place_partition_off;
768       start_data->ts.place_partition_len = thr->ts.place_partition_len;
769       start_data->place = 0;
770       if (__builtin_expect (gomp_places_list != NULL, 0))
771           {
772             switch (bind)
773               {
774               case omp_proc_bind_true:
775               case omp_proc_bind_close:
776                 if (k == s)
777                     {
778                       ++p;
779                       if (p == (team->prev_ts.place_partition_off
780                                   + team->prev_ts.place_partition_len))
781                         p = team->prev_ts.place_partition_off;
782                       k = 1;
783                       if (i == nthreads - rest)
784                         s = 1;
785                     }
786                 else
787                     ++k;
788                 break;
789               case omp_proc_bind_master:
790                 break;
791               case omp_proc_bind_spread:
792                 if (k == 0)
793                     {
794                       /* T <= P.  */
795                       if (p < rest)
796                         p += s + 1;
797                       else
798                         p += s;
799                       if (p == (team->prev_ts.place_partition_off
800                                   + team->prev_ts.place_partition_len))
801                         p = team->prev_ts.place_partition_off;
802                       start_data->ts.place_partition_off = p;
803                       if (p < rest)
804                         start_data->ts.place_partition_len = s + 1;
805                       else
806                         start_data->ts.place_partition_len = s;
807                     }
808                 else
809                     {
810                       /* T > P.  */
811                       if (k == s)
812                         {
813                           ++p;
814                           if (p == (team->prev_ts.place_partition_off
815                                         + team->prev_ts.place_partition_len))
816                               p = team->prev_ts.place_partition_off;
817                           k = 1;
818                           if (i == nthreads - rest)
819                               s = 1;
820                         }
821                       else
822                         ++k;
823                       start_data->ts.place_partition_off = p;
824                       start_data->ts.place_partition_len = 1;
825                     }
826                 break;
827               }
828             start_data->place = p + 1;
829             if (affinity_thr != NULL && pool->threads[i] != NULL)
830               continue;
831             gomp_init_thread_affinity (attr, p);
832           }
833 
834       start_data->fn = fn;
835       start_data->fn_data = data;
836       start_data->ts.team = team;
837       start_data->ts.work_share = &team->work_shares[0];
838       start_data->ts.last_work_share = NULL;
839       start_data->ts.team_id = i;
840       start_data->ts.level = team->prev_ts.level + 1;
841       start_data->ts.active_level = thr->ts.active_level;
842       start_data->ts.def_allocator = thr->ts.def_allocator;
843 #ifdef HAVE_SYNC_BUILTINS
844       start_data->ts.single_count = 0;
845 #endif
846       start_data->ts.static_trip = 0;
847       start_data->num_teams = thr->num_teams;
848       start_data->team_num = thr->team_num;
849       start_data->task = &team->implicit_task[i];
850       gomp_init_task (start_data->task, task, icv);
851       team->implicit_task[i].icv.nthreads_var = nthreads_var;
852       team->implicit_task[i].icv.bind_var = bind_var;
853       start_data->task->taskgroup = taskgroup;
854       start_data->thread_pool = pool;
855       start_data->nested = nested;
856 
857       attr = gomp_adjust_thread_attr (attr, &thread_attr);
858       err = pthread_create (&start_data->handle, attr, gomp_thread_start,
859                                   start_data);
860       start_data++;
861       if (err != 0)
862           gomp_fatal ("Thread creation failed: %s", strerror (err));
863     }
864 
865   if (__builtin_expect (attr == &thread_attr, 0))
866     pthread_attr_destroy (&thread_attr);
867 
868  do_release:
869   if (nested)
870     gomp_barrier_wait (&team->barrier);
871   else
872     gomp_simple_barrier_wait (&pool->threads_dock);
873 
874   /* Decrease the barrier threshold to match the number of threads
875      that should arrive back at the end of this team.  The extra
876      threads should be exiting.  Note that we arrange for this test
877      to never be true for nested teams.  If AFFINITY_COUNT is non-zero,
878      the barrier as well as gomp_managed_threads was temporarily
879      set to NTHREADS + AFFINITY_COUNT.  For NTHREADS < OLD_THREADS_COUNT,
880      AFFINITY_COUNT if non-zero will be always at least
881      OLD_THREADS_COUNT - NTHREADS.  */
882   if (__builtin_expect (nthreads < old_threads_used, 0)
883       || __builtin_expect (affinity_count, 0))
884     {
885       long diff = (long) nthreads - (long) old_threads_used;
886 
887       if (affinity_count)
888           diff = -affinity_count;
889 
890       gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
891 
892 #ifdef HAVE_SYNC_BUILTINS
893       __sync_fetch_and_add (&gomp_managed_threads, diff);
894 #else
895       gomp_mutex_lock (&gomp_managed_threads_lock);
896       gomp_managed_threads += diff;
897       gomp_mutex_unlock (&gomp_managed_threads_lock);
898 #endif
899     }
900   if (__builtin_expect (gomp_display_affinity_var, 0))
901     {
902       if (nested
903             || nthreads != old_threads_used
904             || force_display)
905           {
906             gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
907                                                   thr->place);
908             if (nested)
909               {
910                 start_data -= nthreads - 1;
911                 for (i = 1; i < nthreads; ++i)
912                     {
913                       gomp_display_affinity_thread (
914 #ifdef LIBGOMP_USE_PTHREADS
915                                                             start_data->handle,
916 #else
917                                                             gomp_thread_self (),
918 #endif
919                                                             &start_data->ts,
920                                                             start_data->place);
921                       start_data++;
922                     }
923               }
924             else
925               {
926                 for (i = 1; i < nthreads; ++i)
927                     {
928                       gomp_thread_handle handle
929                         = gomp_thread_to_pthread_t (pool->threads[i]);
930                       gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
931                                                             pool->threads[i]->place);
932                     }
933               }
934           }
935     }
936   if (__builtin_expect (affinity_thr != NULL, 0)
937       && team->prev_ts.place_partition_len > 64)
938     free (affinity_thr);
939 }
940 #endif
941 
942 
943 /* Terminate the current team.  This is only to be called by the master
944    thread.  We assume that we must wait for the other threads.  */
945 
946 void
gomp_team_end(void)947 gomp_team_end (void)
948 {
949   struct gomp_thread *thr = gomp_thread ();
950   struct gomp_team *team = thr->ts.team;
951 
952   /* This barrier handles all pending explicit threads.
953      As #pragma omp cancel parallel might get awaited count in
954      team->barrier in a inconsistent state, we need to use a different
955      counter here.  */
956   gomp_team_barrier_wait_final (&team->barrier);
957   if (__builtin_expect (team->team_cancelled, 0))
958     {
959       struct gomp_work_share *ws = team->work_shares_to_free;
960       do
961           {
962             struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
963             if (next_ws == NULL)
964               gomp_ptrlock_set (&ws->next_ws, ws);
965             gomp_fini_work_share (ws);
966             ws = next_ws;
967           }
968       while (ws != NULL);
969     }
970   else
971     gomp_fini_work_share (thr->ts.work_share);
972 
973   gomp_end_task ();
974   thr->ts = team->prev_ts;
975 
976   if (__builtin_expect (thr->ts.level != 0, 0))
977     {
978 #ifdef HAVE_SYNC_BUILTINS
979       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
980 #else
981       gomp_mutex_lock (&gomp_managed_threads_lock);
982       gomp_managed_threads -= team->nthreads - 1L;
983       gomp_mutex_unlock (&gomp_managed_threads_lock);
984 #endif
985       /* This barrier has gomp_barrier_wait_last counterparts
986            and ensures the team can be safely destroyed.  */
987       gomp_barrier_wait (&team->barrier);
988     }
989 
990   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
991     {
992       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
993       do
994           {
995             struct gomp_work_share *next_ws = ws->next_alloc;
996             free (ws);
997             ws = next_ws;
998           }
999       while (ws != NULL);
1000     }
1001   gomp_sem_destroy (&team->master_release);
1002 
1003   if (__builtin_expect (thr->ts.team != NULL, 0)
1004       || __builtin_expect (team->nthreads == 1, 0))
1005     free_team (team);
1006   else
1007     {
1008       struct gomp_thread_pool *pool = thr->thread_pool;
1009       if (pool->last_team)
1010           free_team (pool->last_team);
1011       pool->last_team = team;
1012       gomp_release_thread_pool (pool);
1013     }
1014 }
1015 
1016 #ifdef LIBGOMP_USE_PTHREADS
1017 
1018 /* Constructors for this file.  */
1019 
1020 static void __attribute__((constructor))
initialize_team(void)1021 initialize_team (void)
1022 {
1023 #if !defined HAVE_TLS && !defined USE_EMUTLS
1024   static struct gomp_thread initial_thread_tls_data;
1025 
1026   pthread_key_create (&gomp_tls_key, NULL);
1027   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
1028 #endif
1029 
1030   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
1031     gomp_fatal ("could not create thread pool destructor.");
1032 }
1033 
1034 static void __attribute__((destructor))
team_destructor(void)1035 team_destructor (void)
1036 {
1037   /* Without this dlclose on libgomp could lead to subsequent
1038      crashes.  */
1039   pthread_key_delete (gomp_thread_destructor);
1040 }
1041 
1042 /* Similar to gomp_free_pool_helper, but don't detach itself,
1043    gomp_pause_host will pthread_join those threads.  */
1044 
1045 static void
gomp_pause_pool_helper(void * thread_pool)1046 gomp_pause_pool_helper (void *thread_pool)
1047 {
1048   struct gomp_thread *thr = gomp_thread ();
1049   struct gomp_thread_pool *pool
1050     = (struct gomp_thread_pool *) thread_pool;
1051   gomp_simple_barrier_wait_last (&pool->threads_dock);
1052   gomp_sem_destroy (&thr->release);
1053   thr->thread_pool = NULL;
1054   thr->task = NULL;
1055   pthread_exit (NULL);
1056 }
1057 
1058 /* Free a thread pool and release its threads.  Return non-zero on
1059    failure.  */
1060 
1061 int
gomp_pause_host(void)1062 gomp_pause_host (void)
1063 {
1064   struct gomp_thread *thr = gomp_thread ();
1065   struct gomp_thread_pool *pool = thr->thread_pool;
1066   if (thr->ts.level)
1067     return -1;
1068   if (pool)
1069     {
1070       if (pool->threads_used > 0)
1071           {
1072             int i;
1073             pthread_t *thrs
1074               = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
1075             for (i = 1; i < pool->threads_used; i++)
1076               {
1077                 struct gomp_thread *nthr = pool->threads[i];
1078                 nthr->fn = gomp_pause_pool_helper;
1079                 nthr->data = pool;
1080                 thrs[i] = gomp_thread_to_pthread_t (nthr);
1081               }
1082             /* This barrier undocks threads docked on pool->threads_dock.  */
1083             gomp_simple_barrier_wait (&pool->threads_dock);
1084             /* And this waits till all threads have called gomp_barrier_wait_last
1085                in gomp_pause_pool_helper.  */
1086             gomp_simple_barrier_wait (&pool->threads_dock);
1087             /* Now it is safe to destroy the barrier and free the pool.  */
1088             gomp_simple_barrier_destroy (&pool->threads_dock);
1089 
1090 #ifdef HAVE_SYNC_BUILTINS
1091             __sync_fetch_and_add (&gomp_managed_threads,
1092                                         1L - pool->threads_used);
1093 #else
1094             gomp_mutex_lock (&gomp_managed_threads_lock);
1095             gomp_managed_threads -= pool->threads_used - 1L;
1096             gomp_mutex_unlock (&gomp_managed_threads_lock);
1097 #endif
1098             for (i = 1; i < pool->threads_used; i++)
1099               pthread_join (thrs[i], NULL);
1100           }
1101       if (pool->last_team)
1102           free_team (pool->last_team);
1103 #ifndef __nvptx__
1104       team_free (pool->threads);
1105       team_free (pool);
1106 #endif
1107       thr->thread_pool = NULL;
1108     }
1109   return 0;
1110 }
1111 #endif
1112 
1113 struct gomp_task_icv *
gomp_new_icv(void)1114 gomp_new_icv (void)
1115 {
1116   struct gomp_thread *thr = gomp_thread ();
1117   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
1118   gomp_init_task (task, NULL, &gomp_global_icv);
1119   thr->task = task;
1120 #ifdef LIBGOMP_USE_PTHREADS
1121   pthread_setspecific (gomp_thread_destructor, thr);
1122 #endif
1123   return &task->icv;
1124 }
1125