1 /* Copyright (C) 2015-2022 Free Software Foundation, Inc.
2    Contributed by Jakub Jelinek <jakub@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the taskloop construct.  It is included twice, once
27    for the long and once for unsigned long long variant.  */
28 
29 /* Called when encountering an explicit task directive.  If IF_CLAUSE is
30    false, then we must not delay in executing the task.  If UNTIED is true,
31    then the task may be executed by any member of the team.  */
32 
33 void
GOMP_taskloop(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,unsigned flags,unsigned long num_tasks,int priority,TYPE start,TYPE end,TYPE step)34 GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
35                  long arg_size, long arg_align, unsigned flags,
36                  unsigned long num_tasks, int priority,
37                  TYPE start, TYPE end, TYPE step)
38 {
39   struct gomp_thread *thr = gomp_thread ();
40   struct gomp_team *team = thr->ts.team;
41 
42 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
43   /* If pthread_mutex_* is used for omp_*lock*, then each task must be
44      tied to one thread all the time.  This means UNTIED tasks must be
45      tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
46      might be running on different thread than FN.  */
47   if (cpyfn)
48     flags &= ~GOMP_TASK_FLAG_IF;
49   flags &= ~GOMP_TASK_FLAG_UNTIED;
50 #endif
51 
52   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
53   if (team && gomp_team_barrier_cancelled (&team->barrier))
54     {
55     early_return:
56       if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION))
57             == GOMP_TASK_FLAG_REDUCTION)
58           {
59             struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
60             uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
61             /* Tell callers GOMP_taskgroup_reduction_register has not been
62                called.  */
63             ptr[2] = 0;
64           }
65       return;
66     }
67 
68 #ifdef TYPE_is_long
69   TYPE s = step;
70   if (step > 0)
71     {
72       if (start >= end)
73           goto early_return;
74       s--;
75     }
76   else
77     {
78       if (start <= end)
79           goto early_return;
80       s++;
81     }
82   UTYPE n = (end - start + s) / step;
83 #else
84   UTYPE n;
85   if (flags & GOMP_TASK_FLAG_UP)
86     {
87       if (start >= end)
88           goto early_return;
89       n = (end - start + step - 1) / step;
90     }
91   else
92     {
93       if (start <= end)
94           goto early_return;
95       n = (start - end - step - 1) / -step;
96     }
97 #endif
98 
99   TYPE task_step = step;
100   TYPE nfirst_task_step = step;
101   unsigned long nfirst = n;
102   if (flags & GOMP_TASK_FLAG_GRAINSIZE)
103     {
104       unsigned long grainsize = num_tasks;
105 #ifdef TYPE_is_long
106       num_tasks = n / grainsize;
107 #else
108       UTYPE ndiv = n / grainsize;
109       num_tasks = ndiv;
110       if (num_tasks != ndiv)
111           num_tasks = ~0UL;
112 #endif
113       if ((flags & GOMP_TASK_FLAG_STRICT)
114             && num_tasks != ~0ULL)
115           {
116             UTYPE mod = n % grainsize;
117             task_step = (TYPE) grainsize * step;
118             if (mod)
119               {
120                 num_tasks++;
121                 nfirst_task_step = (TYPE) mod * step;
122                 if (num_tasks == 1)
123                     task_step = nfirst_task_step;
124                 else
125                     nfirst = num_tasks - 2;
126               }
127           }
128       else if (num_tasks <= 1)
129           {
130             num_tasks = 1;
131             task_step = end - start;
132           }
133       else if (num_tasks >= grainsize
134 #ifndef TYPE_is_long
135                  && num_tasks != ~0UL
136 #endif
137                 )
138           {
139             UTYPE mul = num_tasks * grainsize;
140             task_step = (TYPE) grainsize * step;
141             if (mul != n)
142               {
143                 nfirst_task_step = task_step;
144                 task_step += step;
145                 nfirst = n - mul - 1;
146               }
147           }
148       else
149           {
150             UTYPE div = n / num_tasks;
151             UTYPE mod = n % num_tasks;
152             task_step = (TYPE) div * step;
153             if (mod)
154               {
155                 nfirst_task_step = task_step;
156                 task_step += step;
157                 nfirst = mod - 1;
158               }
159           }
160     }
161   else
162     {
163       if (num_tasks == 0)
164           num_tasks = team ? team->nthreads : 1;
165       if (num_tasks >= n)
166           num_tasks = n;
167       else
168           {
169             UTYPE div = n / num_tasks;
170             UTYPE mod = n % num_tasks;
171             task_step = (TYPE) div * step;
172             if (mod)
173               {
174                 nfirst_task_step = task_step;
175                 task_step += step;
176                 nfirst = mod - 1;
177               }
178           }
179     }
180 
181   if (flags & GOMP_TASK_FLAG_NOGROUP)
182     {
183       if (__builtin_expect (gomp_cancel_var, 0)
184             && thr->task
185             && thr->task->taskgroup)
186           {
187             if (thr->task->taskgroup->cancelled)
188               return;
189             if (thr->task->taskgroup->workshare
190                 && thr->task->taskgroup->prev
191                 && thr->task->taskgroup->prev->cancelled)
192               return;
193           }
194     }
195   else
196     {
197       ialias_call (GOMP_taskgroup_start) ();
198       if (flags & GOMP_TASK_FLAG_REDUCTION)
199           {
200             struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
201             uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
202             ialias_call (GOMP_taskgroup_reduction_register) (ptr);
203           }
204     }
205 
206   if (priority > gomp_max_task_priority_var)
207     priority = gomp_max_task_priority_var;
208 
209   if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
210       || (thr->task && thr->task->final_task)
211       || team->task_count + num_tasks > 64 * team->nthreads)
212     {
213       unsigned long i;
214       if (__builtin_expect (cpyfn != NULL, 0))
215           {
216             struct gomp_task task[num_tasks];
217             struct gomp_task *parent = thr->task;
218             arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
219             char buf[num_tasks * arg_size + arg_align - 1];
220             char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
221                                         & ~(uintptr_t) (arg_align - 1));
222             char *orig_arg = arg;
223             for (i = 0; i < num_tasks; i++)
224               {
225                 gomp_init_task (&task[i], parent, gomp_icv (false));
226                 task[i].priority = priority;
227                 task[i].kind = GOMP_TASK_UNDEFERRED;
228                 task[i].final_task = (thr->task && thr->task->final_task)
229                                            || (flags & GOMP_TASK_FLAG_FINAL);
230                 if (thr->task)
231                     {
232                       task[i].in_tied_task = thr->task->in_tied_task;
233                       task[i].taskgroup = thr->task->taskgroup;
234                     }
235                 thr->task = &task[i];
236                 cpyfn (arg, data);
237                 arg += arg_size;
238               }
239             arg = orig_arg;
240             for (i = 0; i < num_tasks; i++)
241               {
242                 thr->task = &task[i];
243                 ((TYPE *)arg)[0] = start;
244                 start += task_step;
245                 ((TYPE *)arg)[1] = start;
246                 if (i == nfirst)
247                     task_step = nfirst_task_step;
248                 fn (arg);
249                 arg += arg_size;
250                 if (!priority_queue_empty_p (&task[i].children_queue,
251                                                      MEMMODEL_RELAXED))
252                     {
253                       gomp_mutex_lock (&team->task_lock);
254                       gomp_clear_parent (&task[i].children_queue);
255                       gomp_mutex_unlock (&team->task_lock);
256                     }
257                 gomp_end_task ();
258               }
259           }
260       else
261           for (i = 0; i < num_tasks; i++)
262             {
263               struct gomp_task task;
264 
265               gomp_init_task (&task, thr->task, gomp_icv (false));
266               task.priority = priority;
267               task.kind = GOMP_TASK_UNDEFERRED;
268               task.final_task = (thr->task && thr->task->final_task)
269                                     || (flags & GOMP_TASK_FLAG_FINAL);
270               if (thr->task)
271                 {
272                     task.in_tied_task = thr->task->in_tied_task;
273                     task.taskgroup = thr->task->taskgroup;
274                 }
275               thr->task = &task;
276               ((TYPE *)data)[0] = start;
277               start += task_step;
278               ((TYPE *)data)[1] = start;
279               if (i == nfirst)
280                 task_step = nfirst_task_step;
281               fn (data);
282               if (!priority_queue_empty_p (&task.children_queue,
283                                                    MEMMODEL_RELAXED))
284                 {
285                     gomp_mutex_lock (&team->task_lock);
286                     gomp_clear_parent (&task.children_queue);
287                     gomp_mutex_unlock (&team->task_lock);
288                 }
289               gomp_end_task ();
290             }
291     }
292   else
293     {
294       struct gomp_task *tasks[num_tasks];
295       struct gomp_task *parent = thr->task;
296       struct gomp_taskgroup *taskgroup = parent->taskgroup;
297       char *arg;
298       int do_wake;
299       unsigned long i;
300 
301       for (i = 0; i < num_tasks; i++)
302           {
303             struct gomp_task *task
304               = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
305             tasks[i] = task;
306             arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
307                                 & ~(uintptr_t) (arg_align - 1));
308             gomp_init_task (task, parent, gomp_icv (false));
309             task->priority = priority;
310             task->kind = GOMP_TASK_UNDEFERRED;
311             task->in_tied_task = parent->in_tied_task;
312             task->taskgroup = taskgroup;
313             thr->task = task;
314             if (cpyfn)
315               {
316                 cpyfn (arg, data);
317                 task->copy_ctors_done = true;
318               }
319             else
320               memcpy (arg, data, arg_size);
321             ((TYPE *)arg)[0] = start;
322             start += task_step;
323             ((TYPE *)arg)[1] = start;
324             if (i == nfirst)
325               task_step = nfirst_task_step;
326             thr->task = parent;
327             task->kind = GOMP_TASK_WAITING;
328             task->fn = fn;
329             task->fn_data = arg;
330             task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
331           }
332       gomp_mutex_lock (&team->task_lock);
333       /* If parallel or taskgroup has been cancelled, don't start new
334            tasks.  */
335       if (__builtin_expect (gomp_cancel_var, 0)
336             && cpyfn == NULL)
337           {
338             if (gomp_team_barrier_cancelled (&team->barrier))
339               {
340               do_cancel:
341                 gomp_mutex_unlock (&team->task_lock);
342                 for (i = 0; i < num_tasks; i++)
343                     {
344                       gomp_finish_task (tasks[i]);
345                       free (tasks[i]);
346                     }
347                 if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
348                     ialias_call (GOMP_taskgroup_end) ();
349                 return;
350               }
351             if (taskgroup)
352               {
353                 if (taskgroup->cancelled)
354                     goto do_cancel;
355                 if (taskgroup->workshare
356                       && taskgroup->prev
357                       && taskgroup->prev->cancelled)
358                     goto do_cancel;
359               }
360           }
361       if (taskgroup)
362           taskgroup->num_children += num_tasks;
363       for (i = 0; i < num_tasks; i++)
364           {
365             struct gomp_task *task = tasks[i];
366             priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
367                                          task, priority,
368                                          PRIORITY_INSERT_BEGIN,
369                                          /*last_parent_depends_on=*/false,
370                                          task->parent_depends_on);
371             if (taskgroup)
372               priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
373                                            task, priority, PRIORITY_INSERT_BEGIN,
374                                            /*last_parent_depends_on=*/false,
375                                            task->parent_depends_on);
376             priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
377                                          PRIORITY_INSERT_END,
378                                          /*last_parent_depends_on=*/false,
379                                          task->parent_depends_on);
380             ++team->task_count;
381             ++team->task_queued_count;
382           }
383       gomp_team_barrier_set_task_pending (&team->barrier);
384       if (team->task_running_count + !parent->in_tied_task
385             < team->nthreads)
386           {
387             do_wake = team->nthreads - team->task_running_count
388                         - !parent->in_tied_task;
389             if ((unsigned long) do_wake > num_tasks)
390               do_wake = num_tasks;
391           }
392       else
393           do_wake = 0;
394       gomp_mutex_unlock (&team->task_lock);
395       if (do_wake)
396           gomp_team_barrier_wake (&team->barrier, do_wake);
397     }
398   if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
399     ialias_call (GOMP_taskgroup_end) ();
400 }
401