xref: /dragonfly/contrib/gcc-8.0/gcc/omp-expand.c (revision 95059079af47f9a66a175f374f2da1a5020e3255)
1 /* Expansion pass for OMP directives.  Outlines regions of certain OMP
2    directives to separate functions, converts others into explicit calls to the
3    runtime library (libgomp) and so forth
4 
5 Copyright (C) 2005-2018 Free Software Foundation, Inc.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 
62 /* OMP region information.  Every parallel and workshare
63    directive is enclosed between two markers, the OMP_* directive
64    and a corresponding GIMPLE_OMP_RETURN statement.  */
65 
66 struct omp_region
67 {
68   /* The enclosing region.  */
69   struct omp_region *outer;
70 
71   /* First child region.  */
72   struct omp_region *inner;
73 
74   /* Next peer region.  */
75   struct omp_region *next;
76 
77   /* Block containing the omp directive as its last stmt.  */
78   basic_block entry;
79 
80   /* Block containing the GIMPLE_OMP_RETURN as its last stmt.  */
81   basic_block exit;
82 
83   /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt.  */
84   basic_block cont;
85 
86   /* If this is a combined parallel+workshare region, this is a list
87      of additional arguments needed by the combined parallel+workshare
88      library call.  */
89   vec<tree, va_gc> *ws_args;
90 
91   /* The code for the omp directive of this region.  */
92   enum gimple_code type;
93 
94   /* Schedule kind, only used for GIMPLE_OMP_FOR type regions.  */
95   enum omp_clause_schedule_kind sched_kind;
96 
97   /* Schedule modifiers.  */
98   unsigned char sched_modifiers;
99 
100   /* True if this is a combined parallel+workshare region.  */
101   bool is_combined_parallel;
102 
103   /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104      a depend clause.  */
105   gomp_ordered *ord_stmt;
106 };
107 
108 static struct omp_region *root_omp_region;
109 static bool omp_any_child_fn_dumped;
110 
111 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112                                              bool = false);
113 static gphi *find_phi_with_arg_on_edge (tree, edge);
114 static void expand_omp (struct omp_region *region);
115 
116 /* Return true if REGION is a combined parallel+workshare region.  */
117 
118 static inline bool
is_combined_parallel(struct omp_region * region)119 is_combined_parallel (struct omp_region *region)
120 {
121   return region->is_combined_parallel;
122 }
123 
124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125    is the immediate dominator of PAR_ENTRY_BB, return true if there
126    are no data dependencies that would prevent expanding the parallel
127    directive at PAR_ENTRY_BB as a combined parallel+workshare region.
128 
129    When expanding a combined parallel+workshare region, the call to
130    the child function may need additional arguments in the case of
131    GIMPLE_OMP_FOR regions.  In some cases, these arguments are
132    computed out of variables passed in from the parent to the child
133    via 'struct .omp_data_s'.  For instance:
134 
135           #pragma omp parallel for schedule (guided, i * 4)
136           for (j ...)
137 
138    Is lowered into:
139 
140           # BLOCK 2 (PAR_ENTRY_BB)
141           .omp_data_o.i = i;
142           #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
143 
144           # BLOCK 3 (WS_ENTRY_BB)
145           .omp_data_i = &.omp_data_o;
146           D.1667 = .omp_data_i->i;
147           D.1598 = D.1667 * 4;
148           #pragma omp for schedule (guided, D.1598)
149 
150    When we outline the parallel region, the call to the child function
151    'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152    that value is computed *after* the call site.  So, in principle we
153    cannot do the transformation.
154 
155    To see whether the code in WS_ENTRY_BB blocks the combined
156    parallel+workshare call, we collect all the variables used in the
157    GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158    statement in WS_ENTRY_BB.  If so, then we cannot emit the combined
159    call.
160 
161    FIXME.  If we had the SSA form built at this point, we could merely
162    hoist the code in block 3 into block 2 and be done with it.  But at
163    this point we don't have dataflow information and though we could
164    hack something up here, it is really not worth the aggravation.  */
165 
166 static bool
workshare_safe_to_combine_p(basic_block ws_entry_bb)167 workshare_safe_to_combine_p (basic_block ws_entry_bb)
168 {
169   struct omp_for_data fd;
170   gimple *ws_stmt = last_stmt (ws_entry_bb);
171 
172   if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173     return true;
174 
175   gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
176 
177   omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
178 
179   if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
180     return false;
181   if (fd.iter_type != long_integer_type_node)
182     return false;
183 
184   /* FIXME.  We give up too easily here.  If any of these arguments
185      are not constants, they will likely involve variables that have
186      been mapped into fields of .omp_data_s for sharing with the child
187      function.  With appropriate data flow, it would be possible to
188      see through this.  */
189   if (!is_gimple_min_invariant (fd.loop.n1)
190       || !is_gimple_min_invariant (fd.loop.n2)
191       || !is_gimple_min_invariant (fd.loop.step)
192       || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
193     return false;
194 
195   return true;
196 }
197 
198 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
199    presence (SIMD_SCHEDULE).  */
200 
201 static tree
omp_adjust_chunk_size(tree chunk_size,bool simd_schedule)202 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
203 {
204   if (!simd_schedule)
205     return chunk_size;
206 
207   poly_uint64 vf = omp_max_vf ();
208   if (known_eq (vf, 1U))
209     return chunk_size;
210 
211   tree type = TREE_TYPE (chunk_size);
212   chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
213                                   build_int_cst (type, vf - 1));
214   return fold_build2 (BIT_AND_EXPR, type, chunk_size,
215                           build_int_cst (type, -vf));
216 }
217 
218 /* Collect additional arguments needed to emit a combined
219    parallel+workshare call.  WS_STMT is the workshare directive being
220    expanded.  */
221 
222 static vec<tree, va_gc> *
get_ws_args_for(gimple * par_stmt,gimple * ws_stmt)223 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
224 {
225   tree t;
226   location_t loc = gimple_location (ws_stmt);
227   vec<tree, va_gc> *ws_args;
228 
229   if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
230     {
231       struct omp_for_data fd;
232       tree n1, n2;
233 
234       omp_extract_for_data (for_stmt, &fd, NULL);
235       n1 = fd.loop.n1;
236       n2 = fd.loop.n2;
237 
238       if (gimple_omp_for_combined_into_p (for_stmt))
239           {
240             tree innerc
241               = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
242                                      OMP_CLAUSE__LOOPTEMP_);
243             gcc_assert (innerc);
244             n1 = OMP_CLAUSE_DECL (innerc);
245             innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
246                                             OMP_CLAUSE__LOOPTEMP_);
247             gcc_assert (innerc);
248             n2 = OMP_CLAUSE_DECL (innerc);
249           }
250 
251       vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
252 
253       t = fold_convert_loc (loc, long_integer_type_node, n1);
254       ws_args->quick_push (t);
255 
256       t = fold_convert_loc (loc, long_integer_type_node, n2);
257       ws_args->quick_push (t);
258 
259       t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
260       ws_args->quick_push (t);
261 
262       if (fd.chunk_size)
263           {
264             t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
265             t = omp_adjust_chunk_size (t, fd.simd_schedule);
266             ws_args->quick_push (t);
267           }
268 
269       return ws_args;
270     }
271   else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
272     {
273       /* Number of sections is equal to the number of edges from the
274            GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
275            the exit of the sections region.  */
276       basic_block bb = single_succ (gimple_bb (ws_stmt));
277       t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
278       vec_alloc (ws_args, 1);
279       ws_args->quick_push (t);
280       return ws_args;
281     }
282 
283   gcc_unreachable ();
284 }
285 
286 /* Discover whether REGION is a combined parallel+workshare region.  */
287 
288 static void
determine_parallel_type(struct omp_region * region)289 determine_parallel_type (struct omp_region *region)
290 {
291   basic_block par_entry_bb, par_exit_bb;
292   basic_block ws_entry_bb, ws_exit_bb;
293 
294   if (region == NULL || region->inner == NULL
295       || region->exit == NULL || region->inner->exit == NULL
296       || region->inner->cont == NULL)
297     return;
298 
299   /* We only support parallel+for and parallel+sections.  */
300   if (region->type != GIMPLE_OMP_PARALLEL
301       || (region->inner->type != GIMPLE_OMP_FOR
302             && region->inner->type != GIMPLE_OMP_SECTIONS))
303     return;
304 
305   /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
306      WS_EXIT_BB -> PAR_EXIT_BB.  */
307   par_entry_bb = region->entry;
308   par_exit_bb = region->exit;
309   ws_entry_bb = region->inner->entry;
310   ws_exit_bb = region->inner->exit;
311 
312   if (single_succ (par_entry_bb) == ws_entry_bb
313       && single_succ (ws_exit_bb) == par_exit_bb
314       && workshare_safe_to_combine_p (ws_entry_bb)
315       && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
316             || (last_and_only_stmt (ws_entry_bb)
317                 && last_and_only_stmt (par_exit_bb))))
318     {
319       gimple *par_stmt = last_stmt (par_entry_bb);
320       gimple *ws_stmt = last_stmt (ws_entry_bb);
321 
322       if (region->inner->type == GIMPLE_OMP_FOR)
323           {
324             /* If this is a combined parallel loop, we need to determine
325                whether or not to use the combined library calls.  There
326                are two cases where we do not apply the transformation:
327                static loops and any kind of ordered loop.  In the first
328                case, we already open code the loop so there is no need
329                to do anything else.  In the latter case, the combined
330                parallel loop call would still need extra synchronization
331                to implement ordered semantics, so there would not be any
332                gain in using the combined call.  */
333             tree clauses = gimple_omp_for_clauses (ws_stmt);
334             tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
335             if (c == NULL
336                 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
337                       == OMP_CLAUSE_SCHEDULE_STATIC)
338                 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
339               {
340                 region->is_combined_parallel = false;
341                 region->inner->is_combined_parallel = false;
342                 return;
343               }
344           }
345 
346       region->is_combined_parallel = true;
347       region->inner->is_combined_parallel = true;
348       region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
349     }
350 }
351 
352 /* Debugging dumps for parallel regions.  */
353 void dump_omp_region (FILE *, struct omp_region *, int);
354 void debug_omp_region (struct omp_region *);
355 void debug_all_omp_regions (void);
356 
357 /* Dump the parallel region tree rooted at REGION.  */
358 
359 void
dump_omp_region(FILE * file,struct omp_region * region,int indent)360 dump_omp_region (FILE *file, struct omp_region *region, int indent)
361 {
362   fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
363              gimple_code_name[region->type]);
364 
365   if (region->inner)
366     dump_omp_region (file, region->inner, indent + 4);
367 
368   if (region->cont)
369     {
370       fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
371                  region->cont->index);
372     }
373 
374   if (region->exit)
375     fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
376                region->exit->index);
377   else
378     fprintf (file, "%*s[no exit marker]\n", indent, "");
379 
380   if (region->next)
381     dump_omp_region (file, region->next, indent);
382 }
383 
384 DEBUG_FUNCTION void
debug_omp_region(struct omp_region * region)385 debug_omp_region (struct omp_region *region)
386 {
387   dump_omp_region (stderr, region, 0);
388 }
389 
390 DEBUG_FUNCTION void
debug_all_omp_regions(void)391 debug_all_omp_regions (void)
392 {
393   dump_omp_region (stderr, root_omp_region, 0);
394 }
395 
396 /* Create a new parallel region starting at STMT inside region PARENT.  */
397 
398 static struct omp_region *
new_omp_region(basic_block bb,enum gimple_code type,struct omp_region * parent)399 new_omp_region (basic_block bb, enum gimple_code type,
400                     struct omp_region *parent)
401 {
402   struct omp_region *region = XCNEW (struct omp_region);
403 
404   region->outer = parent;
405   region->entry = bb;
406   region->type = type;
407 
408   if (parent)
409     {
410       /* This is a nested region.  Add it to the list of inner
411            regions in PARENT.  */
412       region->next = parent->inner;
413       parent->inner = region;
414     }
415   else
416     {
417       /* This is a toplevel region.  Add it to the list of toplevel
418            regions in ROOT_OMP_REGION.  */
419       region->next = root_omp_region;
420       root_omp_region = region;
421     }
422 
423   return region;
424 }
425 
426 /* Release the memory associated with the region tree rooted at REGION.  */
427 
428 static void
free_omp_region_1(struct omp_region * region)429 free_omp_region_1 (struct omp_region *region)
430 {
431   struct omp_region *i, *n;
432 
433   for (i = region->inner; i ; i = n)
434     {
435       n = i->next;
436       free_omp_region_1 (i);
437     }
438 
439   free (region);
440 }
441 
442 /* Release the memory for the entire omp region tree.  */
443 
444 void
omp_free_regions(void)445 omp_free_regions (void)
446 {
447   struct omp_region *r, *n;
448   for (r = root_omp_region; r ; r = n)
449     {
450       n = r->next;
451       free_omp_region_1 (r);
452     }
453   root_omp_region = NULL;
454 }
455 
456 /* A convenience function to build an empty GIMPLE_COND with just the
457    condition.  */
458 
459 static gcond *
gimple_build_cond_empty(tree cond)460 gimple_build_cond_empty (tree cond)
461 {
462   enum tree_code pred_code;
463   tree lhs, rhs;
464 
465   gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
466   return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
467 }
468 
469 /* Return true if a parallel REGION is within a declare target function or
470    within a target region and is not a part of a gridified target.  */
471 
472 static bool
parallel_needs_hsa_kernel_p(struct omp_region * region)473 parallel_needs_hsa_kernel_p (struct omp_region *region)
474 {
475   bool indirect = false;
476   for (region = region->outer; region; region = region->outer)
477     {
478       if (region->type == GIMPLE_OMP_PARALLEL)
479           indirect = true;
480       else if (region->type == GIMPLE_OMP_TARGET)
481           {
482             gomp_target *tgt_stmt
483               = as_a <gomp_target *> (last_stmt (region->entry));
484 
485             if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
486                                      OMP_CLAUSE__GRIDDIM_))
487               return indirect;
488             else
489               return true;
490           }
491     }
492 
493   if (lookup_attribute ("omp declare target",
494                               DECL_ATTRIBUTES (current_function_decl)))
495     return true;
496 
497   return false;
498 }
499 
500 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
501    Add CHILD_FNDECL to decl chain of the supercontext of the block
502    ENTRY_BLOCK - this is the block which originally contained the
503    code from which CHILD_FNDECL was created.
504 
505    Together, these actions ensure that the debug info for the outlined
506    function will be emitted with the correct lexical scope.  */
507 
508 static void
adjust_context_and_scope(struct omp_region * region,tree entry_block,tree child_fndecl)509 adjust_context_and_scope (struct omp_region *region, tree entry_block,
510                                 tree child_fndecl)
511 {
512   tree parent_fndecl = NULL_TREE;
513   gimple *entry_stmt;
514   /* OMP expansion expands inner regions before outer ones, so if
515      we e.g. have explicit task region nested in parallel region, when
516      expanding the task region current_function_decl will be the original
517      source function, but we actually want to use as context the child
518      function of the parallel.  */
519   for (region = region->outer;
520        region && parent_fndecl == NULL_TREE; region = region->outer)
521     switch (region->type)
522       {
523       case GIMPLE_OMP_PARALLEL:
524       case GIMPLE_OMP_TASK:
525           entry_stmt = last_stmt (region->entry);
526           parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
527           break;
528       case GIMPLE_OMP_TARGET:
529           entry_stmt = last_stmt (region->entry);
530           parent_fndecl
531             = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
532           break;
533       default:
534           break;
535       }
536 
537   if (parent_fndecl == NULL_TREE)
538     parent_fndecl = current_function_decl;
539   DECL_CONTEXT (child_fndecl) = parent_fndecl;
540 
541   if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
542     {
543       tree b = BLOCK_SUPERCONTEXT (entry_block);
544       if (TREE_CODE (b) == BLOCK)
545         {
546             DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
547             BLOCK_VARS (b) = child_fndecl;
548           }
549     }
550 }
551 
552 /* Build the function calls to GOMP_parallel_start etc to actually
553    generate the parallel operation.  REGION is the parallel region
554    being expanded.  BB is the block where to insert the code.  WS_ARGS
555    will be set if this is a call to a combined parallel+workshare
556    construct, it contains the list of additional arguments needed by
557    the workshare construct.  */
558 
559 static void
expand_parallel_call(struct omp_region * region,basic_block bb,gomp_parallel * entry_stmt,vec<tree,va_gc> * ws_args)560 expand_parallel_call (struct omp_region *region, basic_block bb,
561                           gomp_parallel *entry_stmt,
562                           vec<tree, va_gc> *ws_args)
563 {
564   tree t, t1, t2, val, cond, c, clauses, flags;
565   gimple_stmt_iterator gsi;
566   gimple *stmt;
567   enum built_in_function start_ix;
568   int start_ix2;
569   location_t clause_loc;
570   vec<tree, va_gc> *args;
571 
572   clauses = gimple_omp_parallel_clauses (entry_stmt);
573 
574   /* Determine what flavor of GOMP_parallel we will be
575      emitting.  */
576   start_ix = BUILT_IN_GOMP_PARALLEL;
577   if (is_combined_parallel (region))
578     {
579       switch (region->inner->type)
580           {
581           case GIMPLE_OMP_FOR:
582             gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
583             switch (region->inner->sched_kind)
584               {
585               case OMP_CLAUSE_SCHEDULE_RUNTIME:
586                 start_ix2 = 3;
587                 break;
588               case OMP_CLAUSE_SCHEDULE_DYNAMIC:
589               case OMP_CLAUSE_SCHEDULE_GUIDED:
590                 if (region->inner->sched_modifiers
591                       & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
592                     {
593                       start_ix2 = 3 + region->inner->sched_kind;
594                       break;
595                     }
596                 /* FALLTHRU */
597               default:
598                 start_ix2 = region->inner->sched_kind;
599                 break;
600               }
601             start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
602             start_ix = (enum built_in_function) start_ix2;
603             break;
604           case GIMPLE_OMP_SECTIONS:
605             start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
606             break;
607           default:
608             gcc_unreachable ();
609           }
610     }
611 
612   /* By default, the value of NUM_THREADS is zero (selected at run time)
613      and there is no conditional.  */
614   cond = NULL_TREE;
615   val = build_int_cst (unsigned_type_node, 0);
616   flags = build_int_cst (unsigned_type_node, 0);
617 
618   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
619   if (c)
620     cond = OMP_CLAUSE_IF_EXPR (c);
621 
622   c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
623   if (c)
624     {
625       val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
626       clause_loc = OMP_CLAUSE_LOCATION (c);
627     }
628   else
629     clause_loc = gimple_location (entry_stmt);
630 
631   c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
632   if (c)
633     flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
634 
635   /* Ensure 'val' is of the correct type.  */
636   val = fold_convert_loc (clause_loc, unsigned_type_node, val);
637 
638   /* If we found the clause 'if (cond)', build either
639      (cond != 0) or (cond ? val : 1u).  */
640   if (cond)
641     {
642       cond = gimple_boolify (cond);
643 
644       if (integer_zerop (val))
645           val = fold_build2_loc (clause_loc,
646                                  EQ_EXPR, unsigned_type_node, cond,
647                                  build_int_cst (TREE_TYPE (cond), 0));
648       else
649           {
650             basic_block cond_bb, then_bb, else_bb;
651             edge e, e_then, e_else;
652             tree tmp_then, tmp_else, tmp_join, tmp_var;
653 
654             tmp_var = create_tmp_var (TREE_TYPE (val));
655             if (gimple_in_ssa_p (cfun))
656               {
657                 tmp_then = make_ssa_name (tmp_var);
658                 tmp_else = make_ssa_name (tmp_var);
659                 tmp_join = make_ssa_name (tmp_var);
660               }
661             else
662               {
663                 tmp_then = tmp_var;
664                 tmp_else = tmp_var;
665                 tmp_join = tmp_var;
666               }
667 
668             e = split_block_after_labels (bb);
669             cond_bb = e->src;
670             bb = e->dest;
671             remove_edge (e);
672 
673             then_bb = create_empty_bb (cond_bb);
674             else_bb = create_empty_bb (then_bb);
675             set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
676             set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
677 
678             stmt = gimple_build_cond_empty (cond);
679             gsi = gsi_start_bb (cond_bb);
680             gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
681 
682             gsi = gsi_start_bb (then_bb);
683             expand_omp_build_assign (&gsi, tmp_then, val, true);
684 
685             gsi = gsi_start_bb (else_bb);
686             expand_omp_build_assign (&gsi, tmp_else,
687                                            build_int_cst (unsigned_type_node, 1),
688                                            true);
689 
690             make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
691             make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
692             add_bb_to_loop (then_bb, cond_bb->loop_father);
693             add_bb_to_loop (else_bb, cond_bb->loop_father);
694             e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
695             e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
696 
697             if (gimple_in_ssa_p (cfun))
698               {
699                 gphi *phi = create_phi_node (tmp_join, bb);
700                 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
701                 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
702               }
703 
704             val = tmp_join;
705           }
706 
707       gsi = gsi_start_bb (bb);
708       val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
709                                               false, GSI_CONTINUE_LINKING);
710     }
711 
712   gsi = gsi_last_nondebug_bb (bb);
713   t = gimple_omp_parallel_data_arg (entry_stmt);
714   if (t == NULL)
715     t1 = null_pointer_node;
716   else
717     t1 = build_fold_addr_expr (t);
718   tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
719   t2 = build_fold_addr_expr (child_fndecl);
720 
721   vec_alloc (args, 4 + vec_safe_length (ws_args));
722   args->quick_push (t2);
723   args->quick_push (t1);
724   args->quick_push (val);
725   if (ws_args)
726     args->splice (*ws_args);
727   args->quick_push (flags);
728 
729   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
730                                      builtin_decl_explicit (start_ix), args);
731 
732   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
733                                   false, GSI_CONTINUE_LINKING);
734 
735   if (hsa_gen_requested_p ()
736       && parallel_needs_hsa_kernel_p (region))
737     {
738       cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
739       hsa_register_kernel (child_cnode);
740     }
741 }
742 
743 /* Build the function call to GOMP_task to actually
744    generate the task operation.  BB is the block where to insert the code.  */
745 
746 static void
expand_task_call(struct omp_region * region,basic_block bb,gomp_task * entry_stmt)747 expand_task_call (struct omp_region *region, basic_block bb,
748                       gomp_task *entry_stmt)
749 {
750   tree t1, t2, t3;
751   gimple_stmt_iterator gsi;
752   location_t loc = gimple_location (entry_stmt);
753 
754   tree clauses = gimple_omp_task_clauses (entry_stmt);
755 
756   tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
757   tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
758   tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
759   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
760   tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
761   tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
762 
763   unsigned int iflags
764     = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
765       | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
766       | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
767 
768   bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
769   tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
770   tree num_tasks = NULL_TREE;
771   bool ull = false;
772   if (taskloop_p)
773     {
774       gimple *g = last_stmt (region->outer->entry);
775       gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
776                       && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
777       struct omp_for_data fd;
778       omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
779       startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
780       endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
781                                         OMP_CLAUSE__LOOPTEMP_);
782       startvar = OMP_CLAUSE_DECL (startvar);
783       endvar = OMP_CLAUSE_DECL (endvar);
784       step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
785       if (fd.loop.cond_code == LT_EXPR)
786           iflags |= GOMP_TASK_FLAG_UP;
787       tree tclauses = gimple_omp_for_clauses (g);
788       num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
789       if (num_tasks)
790           num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
791       else
792           {
793             num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
794             if (num_tasks)
795               {
796                 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
797                 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
798               }
799             else
800               num_tasks = integer_zero_node;
801           }
802       num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
803       if (ifc == NULL_TREE)
804           iflags |= GOMP_TASK_FLAG_IF;
805       if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
806           iflags |= GOMP_TASK_FLAG_NOGROUP;
807       ull = fd.iter_type == long_long_unsigned_type_node;
808     }
809   else if (priority)
810     iflags |= GOMP_TASK_FLAG_PRIORITY;
811 
812   tree flags = build_int_cst (unsigned_type_node, iflags);
813 
814   tree cond = boolean_true_node;
815   if (ifc)
816     {
817       if (taskloop_p)
818           {
819             tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
820             t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
821                                      build_int_cst (unsigned_type_node,
822                                                         GOMP_TASK_FLAG_IF),
823                                      build_int_cst (unsigned_type_node, 0));
824             flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
825                                            flags, t);
826           }
827       else
828           cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
829     }
830 
831   if (finalc)
832     {
833       tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
834       t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
835                                  build_int_cst (unsigned_type_node,
836                                                     GOMP_TASK_FLAG_FINAL),
837                                  build_int_cst (unsigned_type_node, 0));
838       flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
839     }
840   if (depend)
841     depend = OMP_CLAUSE_DECL (depend);
842   else
843     depend = build_int_cst (ptr_type_node, 0);
844   if (priority)
845     priority = fold_convert (integer_type_node,
846                                    OMP_CLAUSE_PRIORITY_EXPR (priority));
847   else
848     priority = integer_zero_node;
849 
850   gsi = gsi_last_nondebug_bb (bb);
851   tree t = gimple_omp_task_data_arg (entry_stmt);
852   if (t == NULL)
853     t2 = null_pointer_node;
854   else
855     t2 = build_fold_addr_expr_loc (loc, t);
856   t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
857   t = gimple_omp_task_copy_fn (entry_stmt);
858   if (t == NULL)
859     t3 = null_pointer_node;
860   else
861     t3 = build_fold_addr_expr_loc (loc, t);
862 
863   if (taskloop_p)
864     t = build_call_expr (ull
865                                ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
866                                : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
867                                11, t1, t2, t3,
868                                gimple_omp_task_arg_size (entry_stmt),
869                                gimple_omp_task_arg_align (entry_stmt), flags,
870                                num_tasks, priority, startvar, endvar, step);
871   else
872     t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
873                                9, t1, t2, t3,
874                                gimple_omp_task_arg_size (entry_stmt),
875                                gimple_omp_task_arg_align (entry_stmt), cond, flags,
876                                depend, priority);
877 
878   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
879                                   false, GSI_CONTINUE_LINKING);
880 }
881 
882 /* Chain all the DECLs in LIST by their TREE_CHAIN fields.  */
883 
884 static tree
vec2chain(vec<tree,va_gc> * v)885 vec2chain (vec<tree, va_gc> *v)
886 {
887   tree chain = NULL_TREE, t;
888   unsigned ix;
889 
890   FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
891     {
892       DECL_CHAIN (t) = chain;
893       chain = t;
894     }
895 
896   return chain;
897 }
898 
899 /* Remove barriers in REGION->EXIT's block.  Note that this is only
900    valid for GIMPLE_OMP_PARALLEL regions.  Since the end of a parallel region
901    is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
902    left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
903    removed.  */
904 
905 static void
remove_exit_barrier(struct omp_region * region)906 remove_exit_barrier (struct omp_region *region)
907 {
908   gimple_stmt_iterator gsi;
909   basic_block exit_bb;
910   edge_iterator ei;
911   edge e;
912   gimple *stmt;
913   int any_addressable_vars = -1;
914 
915   exit_bb = region->exit;
916 
917   /* If the parallel region doesn't return, we don't have REGION->EXIT
918      block at all.  */
919   if (! exit_bb)
920     return;
921 
922   /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN.  The
923      workshare's GIMPLE_OMP_RETURN will be in a preceding block.  The kinds of
924      statements that can appear in between are extremely limited -- no
925      memory operations at all.  Here, we allow nothing at all, so the
926      only thing we allow to precede this GIMPLE_OMP_RETURN is a label.  */
927   gsi = gsi_last_nondebug_bb (exit_bb);
928   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
929   gsi_prev_nondebug (&gsi);
930   if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
931     return;
932 
933   FOR_EACH_EDGE (e, ei, exit_bb->preds)
934     {
935       gsi = gsi_last_nondebug_bb (e->src);
936       if (gsi_end_p (gsi))
937           continue;
938       stmt = gsi_stmt (gsi);
939       if (gimple_code (stmt) == GIMPLE_OMP_RETURN
940             && !gimple_omp_return_nowait_p (stmt))
941           {
942             /* OpenMP 3.0 tasks unfortunately prevent this optimization
943                in many cases.  If there could be tasks queued, the barrier
944                might be needed to let the tasks run before some local
945                variable of the parallel that the task uses as shared
946                runs out of scope.  The task can be spawned either
947                from within current function (this would be easy to check)
948                or from some function it calls and gets passed an address
949                of such a variable.  */
950             if (any_addressable_vars < 0)
951               {
952                 gomp_parallel *parallel_stmt
953                     = as_a <gomp_parallel *> (last_stmt (region->entry));
954                 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
955                 tree local_decls, block, decl;
956                 unsigned ix;
957 
958                 any_addressable_vars = 0;
959                 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
960                     if (TREE_ADDRESSABLE (decl))
961                       {
962                         any_addressable_vars = 1;
963                         break;
964                       }
965                 for (block = gimple_block (stmt);
966                        !any_addressable_vars
967                        && block
968                        && TREE_CODE (block) == BLOCK;
969                        block = BLOCK_SUPERCONTEXT (block))
970                     {
971                       for (local_decls = BLOCK_VARS (block);
972                            local_decls;
973                            local_decls = DECL_CHAIN (local_decls))
974                         if (TREE_ADDRESSABLE (local_decls))
975                           {
976                               any_addressable_vars = 1;
977                               break;
978                           }
979                       if (block == gimple_block (parallel_stmt))
980                         break;
981                     }
982               }
983             if (!any_addressable_vars)
984               gimple_omp_return_set_nowait (stmt);
985           }
986     }
987 }
988 
989 static void
remove_exit_barriers(struct omp_region * region)990 remove_exit_barriers (struct omp_region *region)
991 {
992   if (region->type == GIMPLE_OMP_PARALLEL)
993     remove_exit_barrier (region);
994 
995   if (region->inner)
996     {
997       region = region->inner;
998       remove_exit_barriers (region);
999       while (region->next)
1000           {
1001             region = region->next;
1002             remove_exit_barriers (region);
1003           }
1004     }
1005 }
1006 
1007 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1008    calls.  These can't be declared as const functions, but
1009    within one parallel body they are constant, so they can be
1010    transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1011    which are declared const.  Similarly for task body, except
1012    that in untied task omp_get_thread_num () can change at any task
1013    scheduling point.  */
1014 
1015 static void
optimize_omp_library_calls(gimple * entry_stmt)1016 optimize_omp_library_calls (gimple *entry_stmt)
1017 {
1018   basic_block bb;
1019   gimple_stmt_iterator gsi;
1020   tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1021   tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1022   tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1023   tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1024   bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1025                           && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1026                                                     OMP_CLAUSE_UNTIED) != NULL);
1027 
1028   FOR_EACH_BB_FN (bb, cfun)
1029     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1030       {
1031           gimple *call = gsi_stmt (gsi);
1032           tree decl;
1033 
1034           if (is_gimple_call (call)
1035               && (decl = gimple_call_fndecl (call))
1036               && DECL_EXTERNAL (decl)
1037               && TREE_PUBLIC (decl)
1038               && DECL_INITIAL (decl) == NULL)
1039             {
1040               tree built_in;
1041 
1042               if (DECL_NAME (decl) == thr_num_id)
1043                 {
1044                     /* In #pragma omp task untied omp_get_thread_num () can change
1045                        during the execution of the task region.  */
1046                     if (untied_task)
1047                       continue;
1048                     built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1049                 }
1050               else if (DECL_NAME (decl) == num_thr_id)
1051                 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1052               else
1053                 continue;
1054 
1055               if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1056                     || gimple_call_num_args (call) != 0)
1057                 continue;
1058 
1059               if (flag_exceptions && !TREE_NOTHROW (decl))
1060                 continue;
1061 
1062               if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1063                     || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1064                                                   TREE_TYPE (TREE_TYPE (built_in))))
1065                 continue;
1066 
1067               gimple_call_set_fndecl (call, built_in);
1068             }
1069       }
1070 }
1071 
1072 /* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
1073    regimplified.  */
1074 
1075 static tree
expand_omp_regimplify_p(tree * tp,int * walk_subtrees,void *)1076 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1077 {
1078   tree t = *tp;
1079 
1080   /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
1081   if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1082     return t;
1083 
1084   if (TREE_CODE (t) == ADDR_EXPR)
1085     recompute_tree_invariant_for_addr_expr (t);
1086 
1087   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1088   return NULL_TREE;
1089 }
1090 
1091 /* Prepend or append TO = FROM assignment before or after *GSI_P.  */
1092 
1093 static void
expand_omp_build_assign(gimple_stmt_iterator * gsi_p,tree to,tree from,bool after)1094 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1095                                bool after)
1096 {
1097   bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1098   from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1099                                            !after, after ? GSI_CONTINUE_LINKING
1100                                                              : GSI_SAME_STMT);
1101   gimple *stmt = gimple_build_assign (to, from);
1102   if (after)
1103     gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1104   else
1105     gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1106   if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1107       || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1108     {
1109       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1110       gimple_regimplify_operands (stmt, &gsi);
1111     }
1112 }
1113 
1114 /* Expand the OpenMP parallel or task directive starting at REGION.  */
1115 
1116 static void
expand_omp_taskreg(struct omp_region * region)1117 expand_omp_taskreg (struct omp_region *region)
1118 {
1119   basic_block entry_bb, exit_bb, new_bb;
1120   struct function *child_cfun;
1121   tree child_fn, block, t;
1122   gimple_stmt_iterator gsi;
1123   gimple *entry_stmt, *stmt;
1124   edge e;
1125   vec<tree, va_gc> *ws_args;
1126 
1127   entry_stmt = last_stmt (region->entry);
1128   child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1129   child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1130 
1131   entry_bb = region->entry;
1132   if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1133     exit_bb = region->cont;
1134   else
1135     exit_bb = region->exit;
1136 
1137   if (is_combined_parallel (region))
1138     ws_args = region->ws_args;
1139   else
1140     ws_args = NULL;
1141 
1142   if (child_cfun->cfg)
1143     {
1144       /* Due to inlining, it may happen that we have already outlined
1145            the region, in which case all we need to do is make the
1146            sub-graph unreachable and emit the parallel call.  */
1147       edge entry_succ_e, exit_succ_e;
1148 
1149       entry_succ_e = single_succ_edge (entry_bb);
1150 
1151       gsi = gsi_last_nondebug_bb (entry_bb);
1152       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1153                       || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1154       gsi_remove (&gsi, true);
1155 
1156       new_bb = entry_bb;
1157       if (exit_bb)
1158           {
1159             exit_succ_e = single_succ_edge (exit_bb);
1160             make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1161           }
1162       remove_edge_and_dominated_blocks (entry_succ_e);
1163     }
1164   else
1165     {
1166       unsigned srcidx, dstidx, num;
1167 
1168       /* If the parallel region needs data sent from the parent
1169            function, then the very first statement (except possible
1170            tree profile counter updates) of the parallel body
1171            is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
1172            &.OMP_DATA_O is passed as an argument to the child function,
1173            we need to replace it with the argument as seen by the child
1174            function.
1175 
1176            In most cases, this will end up being the identity assignment
1177            .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
1178            a function call that has been inlined, the original PARM_DECL
1179            .OMP_DATA_I may have been converted into a different local
1180            variable.  In which case, we need to keep the assignment.  */
1181       if (gimple_omp_taskreg_data_arg (entry_stmt))
1182           {
1183             basic_block entry_succ_bb
1184               = single_succ_p (entry_bb) ? single_succ (entry_bb)
1185                                                : FALLTHRU_EDGE (entry_bb)->dest;
1186             tree arg;
1187             gimple *parcopy_stmt = NULL;
1188 
1189             for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1190               {
1191                 gimple *stmt;
1192 
1193                 gcc_assert (!gsi_end_p (gsi));
1194                 stmt = gsi_stmt (gsi);
1195                 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1196                     continue;
1197 
1198                 if (gimple_num_ops (stmt) == 2)
1199                     {
1200                       tree arg = gimple_assign_rhs1 (stmt);
1201 
1202                       /* We're ignore the subcode because we're
1203                          effectively doing a STRIP_NOPS.  */
1204 
1205                       if (TREE_CODE (arg) == ADDR_EXPR
1206                           && TREE_OPERAND (arg, 0)
1207                               == gimple_omp_taskreg_data_arg (entry_stmt))
1208                         {
1209                           parcopy_stmt = stmt;
1210                           break;
1211                         }
1212                     }
1213               }
1214 
1215             gcc_assert (parcopy_stmt != NULL);
1216             arg = DECL_ARGUMENTS (child_fn);
1217 
1218             if (!gimple_in_ssa_p (cfun))
1219               {
1220                 if (gimple_assign_lhs (parcopy_stmt) == arg)
1221                     gsi_remove (&gsi, true);
1222                 else
1223                     {
1224                       /* ?? Is setting the subcode really necessary ??  */
1225                       gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1226                       gimple_assign_set_rhs1 (parcopy_stmt, arg);
1227                     }
1228               }
1229             else
1230               {
1231                 tree lhs = gimple_assign_lhs (parcopy_stmt);
1232                 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1233                 /* We'd like to set the rhs to the default def in the child_fn,
1234                      but it's too early to create ssa names in the child_fn.
1235                      Instead, we set the rhs to the parm.  In
1236                      move_sese_region_to_fn, we introduce a default def for the
1237                      parm, map the parm to it's default def, and once we encounter
1238                      this stmt, replace the parm with the default def.  */
1239                 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1240                 update_stmt (parcopy_stmt);
1241               }
1242           }
1243 
1244       /* Declare local variables needed in CHILD_CFUN.  */
1245       block = DECL_INITIAL (child_fn);
1246       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1247       /* The gimplifier could record temporaries in parallel/task block
1248            rather than in containing function's local_decls chain,
1249            which would mean cgraph missed finalizing them.  Do it now.  */
1250       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1251           if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1252             varpool_node::finalize_decl (t);
1253       DECL_SAVED_TREE (child_fn) = NULL;
1254       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
1255       gimple_set_body (child_fn, NULL);
1256       TREE_USED (block) = 1;
1257 
1258       /* Reset DECL_CONTEXT on function arguments.  */
1259       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1260           DECL_CONTEXT (t) = child_fn;
1261 
1262       /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1263            so that it can be moved to the child function.  */
1264       gsi = gsi_last_nondebug_bb (entry_bb);
1265       stmt = gsi_stmt (gsi);
1266       gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1267                                  || gimple_code (stmt) == GIMPLE_OMP_TASK));
1268       e = split_block (entry_bb, stmt);
1269       gsi_remove (&gsi, true);
1270       entry_bb = e->dest;
1271       edge e2 = NULL;
1272       if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1273           single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1274       else
1275           {
1276             e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1277             gcc_assert (e2->dest == region->exit);
1278             remove_edge (BRANCH_EDGE (entry_bb));
1279             set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1280             gsi = gsi_last_nondebug_bb (region->exit);
1281             gcc_assert (!gsi_end_p (gsi)
1282                           && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1283             gsi_remove (&gsi, true);
1284           }
1285 
1286       /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR.  */
1287       if (exit_bb)
1288           {
1289             gsi = gsi_last_nondebug_bb (exit_bb);
1290             gcc_assert (!gsi_end_p (gsi)
1291                           && (gimple_code (gsi_stmt (gsi))
1292                                 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1293             stmt = gimple_build_return (NULL);
1294             gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1295             gsi_remove (&gsi, true);
1296           }
1297 
1298       /* Move the parallel region into CHILD_CFUN.  */
1299 
1300       if (gimple_in_ssa_p (cfun))
1301           {
1302             init_tree_ssa (child_cfun);
1303             init_ssa_operands (child_cfun);
1304             child_cfun->gimple_df->in_ssa_p = true;
1305             block = NULL_TREE;
1306           }
1307       else
1308           block = gimple_block (entry_stmt);
1309 
1310       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1311       if (exit_bb)
1312           single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1313       if (e2)
1314           {
1315             basic_block dest_bb = e2->dest;
1316             if (!exit_bb)
1317               make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1318             remove_edge (e2);
1319             set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1320           }
1321       /* When the OMP expansion process cannot guarantee an up-to-date
1322            loop tree arrange for the child function to fixup loops.  */
1323       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1324           child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1325 
1326       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
1327       num = vec_safe_length (child_cfun->local_decls);
1328       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1329           {
1330             t = (*child_cfun->local_decls)[srcidx];
1331             if (DECL_CONTEXT (t) == cfun->decl)
1332               continue;
1333             if (srcidx != dstidx)
1334               (*child_cfun->local_decls)[dstidx] = t;
1335             dstidx++;
1336           }
1337       if (dstidx != num)
1338           vec_safe_truncate (child_cfun->local_decls, dstidx);
1339 
1340       /* Inform the callgraph about the new function.  */
1341       child_cfun->curr_properties = cfun->curr_properties;
1342       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1343       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1344       cgraph_node *node = cgraph_node::get_create (child_fn);
1345       node->parallelized_function = 1;
1346       cgraph_node::add_new_function (child_fn, true);
1347 
1348       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1349                           && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1350 
1351       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
1352            fixed in a following pass.  */
1353       push_cfun (child_cfun);
1354       if (need_asm)
1355           assign_assembler_name_if_needed (child_fn);
1356 
1357       if (optimize)
1358           optimize_omp_library_calls (entry_stmt);
1359       update_max_bb_count ();
1360       cgraph_edge::rebuild_edges ();
1361 
1362       /* Some EH regions might become dead, see PR34608.  If
1363            pass_cleanup_cfg isn't the first pass to happen with the
1364            new child, these dead EH edges might cause problems.
1365            Clean them up now.  */
1366       if (flag_exceptions)
1367           {
1368             basic_block bb;
1369             bool changed = false;
1370 
1371             FOR_EACH_BB_FN (bb, cfun)
1372               changed |= gimple_purge_dead_eh_edges (bb);
1373             if (changed)
1374               cleanup_tree_cfg ();
1375           }
1376       if (gimple_in_ssa_p (cfun))
1377           update_ssa (TODO_update_ssa);
1378       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1379           verify_loop_structure ();
1380       pop_cfun ();
1381 
1382       if (dump_file && !gimple_in_ssa_p (cfun))
1383           {
1384             omp_any_child_fn_dumped = true;
1385             dump_function_header (dump_file, child_fn, dump_flags);
1386             dump_function_to_file (child_fn, dump_file, dump_flags);
1387           }
1388     }
1389 
1390   adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1391 
1392   if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1393     expand_parallel_call (region, new_bb,
1394                                 as_a <gomp_parallel *> (entry_stmt), ws_args);
1395   else
1396     expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1397   if (gimple_in_ssa_p (cfun))
1398     update_ssa (TODO_update_ssa_only_virtuals);
1399 }
1400 
1401 /* Information about members of an OpenACC collapsed loop nest.  */
1402 
1403 struct oacc_collapse
1404 {
1405   tree base;  /* Base value.  */
1406   tree iters; /* Number of steps.  */
1407   tree step;  /* Step size.  */
1408   tree tile;  /* Tile increment (if tiled).  */
1409   tree outer; /* Tile iterator var. */
1410 };
1411 
1412 /* Helper for expand_oacc_for.  Determine collapsed loop information.
1413    Fill in COUNTS array.  Emit any initialization code before GSI.
1414    Return the calculated outer loop bound of BOUND_TYPE.  */
1415 
1416 static tree
expand_oacc_collapse_init(const struct omp_for_data * fd,gimple_stmt_iterator * gsi,oacc_collapse * counts,tree bound_type,location_t loc)1417 expand_oacc_collapse_init (const struct omp_for_data *fd,
1418                                  gimple_stmt_iterator *gsi,
1419                                  oacc_collapse *counts, tree bound_type,
1420                                  location_t loc)
1421 {
1422   tree tiling = fd->tiling;
1423   tree total = build_int_cst (bound_type, 1);
1424   int ix;
1425 
1426   gcc_assert (integer_onep (fd->loop.step));
1427   gcc_assert (integer_zerop (fd->loop.n1));
1428 
1429   /* When tiling, the first operand of the tile clause applies to the
1430      innermost loop, and we work outwards from there.  Seems
1431      backwards, but whatever.  */
1432   for (ix = fd->collapse; ix--;)
1433     {
1434       const omp_for_data_loop *loop = &fd->loops[ix];
1435 
1436       tree iter_type = TREE_TYPE (loop->v);
1437       tree diff_type = iter_type;
1438       tree plus_type = iter_type;
1439 
1440       gcc_assert (loop->cond_code == fd->loop.cond_code);
1441 
1442       if (POINTER_TYPE_P (iter_type))
1443           plus_type = sizetype;
1444       if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1445           diff_type = signed_type_for (diff_type);
1446       if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1447           diff_type = integer_type_node;
1448 
1449       if (tiling)
1450           {
1451             tree num = build_int_cst (integer_type_node, fd->collapse);
1452             tree loop_no = build_int_cst (integer_type_node, ix);
1453             tree tile = TREE_VALUE (tiling);
1454             gcall *call
1455               = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1456                                                     /* gwv-outer=*/integer_zero_node,
1457                                                     /* gwv-inner=*/integer_zero_node);
1458 
1459             counts[ix].outer = create_tmp_var (iter_type, ".outer");
1460             counts[ix].tile = create_tmp_var (diff_type, ".tile");
1461             gimple_call_set_lhs (call, counts[ix].tile);
1462             gimple_set_location (call, loc);
1463             gsi_insert_before (gsi, call, GSI_SAME_STMT);
1464 
1465             tiling = TREE_CHAIN (tiling);
1466           }
1467       else
1468           {
1469             counts[ix].tile = NULL;
1470             counts[ix].outer = loop->v;
1471           }
1472 
1473       tree b = loop->n1;
1474       tree e = loop->n2;
1475       tree s = loop->step;
1476       bool up = loop->cond_code == LT_EXPR;
1477       tree dir = build_int_cst (diff_type, up ? +1 : -1);
1478       bool negating;
1479       tree expr;
1480 
1481       b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1482                                             true, GSI_SAME_STMT);
1483       e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1484                                             true, GSI_SAME_STMT);
1485 
1486       /* Convert the step, avoiding possible unsigned->signed overflow.  */
1487       negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1488       if (negating)
1489           s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1490       s = fold_convert (diff_type, s);
1491       if (negating)
1492           s = fold_build1 (NEGATE_EXPR, diff_type, s);
1493       s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1494                                             true, GSI_SAME_STMT);
1495 
1496       /* Determine the range, avoiding possible unsigned->signed overflow.  */
1497       negating = !up && TYPE_UNSIGNED (iter_type);
1498       expr = fold_build2 (MINUS_EXPR, plus_type,
1499                                 fold_convert (plus_type, negating ? b : e),
1500                                 fold_convert (plus_type, negating ? e : b));
1501       expr = fold_convert (diff_type, expr);
1502       if (negating)
1503           expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1504       tree range = force_gimple_operand_gsi
1505           (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1506 
1507       /* Determine number of iterations.  */
1508       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1509       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1510       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1511 
1512       tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1513                                                        true, GSI_SAME_STMT);
1514 
1515       counts[ix].base = b;
1516       counts[ix].iters = iters;
1517       counts[ix].step = s;
1518 
1519       total = fold_build2 (MULT_EXPR, bound_type, total,
1520                                  fold_convert (bound_type, iters));
1521     }
1522 
1523   return total;
1524 }
1525 
1526 /* Emit initializers for collapsed loop members.  INNER is true if
1527    this is for the element loop of a TILE.  IVAR is the outer
1528    loop iteration variable, from which collapsed loop iteration values
1529    are  calculated.  COUNTS array has been initialized by
1530    expand_oacc_collapse_inits.  */
1531 
1532 static void
expand_oacc_collapse_vars(const struct omp_for_data * fd,bool inner,gimple_stmt_iterator * gsi,const oacc_collapse * counts,tree ivar)1533 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1534                                  gimple_stmt_iterator *gsi,
1535                                  const oacc_collapse *counts, tree ivar)
1536 {
1537   tree ivar_type = TREE_TYPE (ivar);
1538 
1539   /*  The most rapidly changing iteration variable is the innermost
1540       one.  */
1541   for (int ix = fd->collapse; ix--;)
1542     {
1543       const omp_for_data_loop *loop = &fd->loops[ix];
1544       const oacc_collapse *collapse = &counts[ix];
1545       tree v = inner ? loop->v : collapse->outer;
1546       tree iter_type = TREE_TYPE (v);
1547       tree diff_type = TREE_TYPE (collapse->step);
1548       tree plus_type = iter_type;
1549       enum tree_code plus_code = PLUS_EXPR;
1550       tree expr;
1551 
1552       if (POINTER_TYPE_P (iter_type))
1553           {
1554             plus_code = POINTER_PLUS_EXPR;
1555             plus_type = sizetype;
1556           }
1557 
1558       expr = ivar;
1559       if (ix)
1560           {
1561             tree mod = fold_convert (ivar_type, collapse->iters);
1562             ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1563             expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1564             ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1565                                                      true, GSI_SAME_STMT);
1566           }
1567 
1568       expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1569                                 collapse->step);
1570       expr = fold_build2 (plus_code, iter_type,
1571                                 inner ? collapse->outer : collapse->base,
1572                                 fold_convert (plus_type, expr));
1573       expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1574                                                true, GSI_SAME_STMT);
1575       gassign *ass = gimple_build_assign (v, expr);
1576       gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1577     }
1578 }
1579 
1580 /* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
1581    of the combined collapse > 1 loop constructs, generate code like:
1582           if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1583           if (cond3 is <)
1584             adj = STEP3 - 1;
1585           else
1586             adj = STEP3 + 1;
1587           count3 = (adj + N32 - N31) / STEP3;
1588           if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1589           if (cond2 is <)
1590             adj = STEP2 - 1;
1591           else
1592             adj = STEP2 + 1;
1593           count2 = (adj + N22 - N21) / STEP2;
1594           if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1595           if (cond1 is <)
1596             adj = STEP1 - 1;
1597           else
1598             adj = STEP1 + 1;
1599           count1 = (adj + N12 - N11) / STEP1;
1600           count = count1 * count2 * count3;
1601    Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1602           count = 0;
1603    and set ZERO_ITER_BB to that bb.  If this isn't the outermost
1604    of the combined loop constructs, just initialize COUNTS array
1605    from the _looptemp_ clauses.  */
1606 
1607 /* NOTE: It *could* be better to moosh all of the BBs together,
1608    creating one larger BB with all the computation and the unexpected
1609    jump at the end.  I.e.
1610 
1611    bool zero3, zero2, zero1, zero;
1612 
1613    zero3 = N32 c3 N31;
1614    count3 = (N32 - N31) /[cl] STEP3;
1615    zero2 = N22 c2 N21;
1616    count2 = (N22 - N21) /[cl] STEP2;
1617    zero1 = N12 c1 N11;
1618    count1 = (N12 - N11) /[cl] STEP1;
1619    zero = zero3 || zero2 || zero1;
1620    count = count1 * count2 * count3;
1621    if (__builtin_expect(zero, false)) goto zero_iter_bb;
1622 
1623    After all, we expect the zero=false, and thus we expect to have to
1624    evaluate all of the comparison expressions, so short-circuiting
1625    oughtn't be a win.  Since the condition isn't protecting a
1626    denominator, we're not concerned about divide-by-zero, so we can
1627    fully evaluate count even if a numerator turned out to be wrong.
1628 
1629    It seems like putting this all together would create much better
1630    scheduling opportunities, and less pressure on the chip's branch
1631    predictor.  */
1632 
1633 static void
expand_omp_for_init_counts(struct omp_for_data * fd,gimple_stmt_iterator * gsi,basic_block & entry_bb,tree * counts,basic_block & zero_iter1_bb,int & first_zero_iter1,basic_block & zero_iter2_bb,int & first_zero_iter2,basic_block & l2_dom_bb)1634 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1635                                   basic_block &entry_bb, tree *counts,
1636                                   basic_block &zero_iter1_bb, int &first_zero_iter1,
1637                                   basic_block &zero_iter2_bb, int &first_zero_iter2,
1638                                   basic_block &l2_dom_bb)
1639 {
1640   tree t, type = TREE_TYPE (fd->loop.v);
1641   edge e, ne;
1642   int i;
1643 
1644   /* Collapsed loops need work for expansion into SSA form.  */
1645   gcc_assert (!gimple_in_ssa_p (cfun));
1646 
1647   if (gimple_omp_for_combined_into_p (fd->for_stmt)
1648       && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1649     {
1650       gcc_assert (fd->ordered == 0);
1651       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1652            isn't supposed to be handled, as the inner loop doesn't
1653            use it.  */
1654       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1655                                              OMP_CLAUSE__LOOPTEMP_);
1656       gcc_assert (innerc);
1657       for (i = 0; i < fd->collapse; i++)
1658           {
1659             innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1660                                             OMP_CLAUSE__LOOPTEMP_);
1661             gcc_assert (innerc);
1662             if (i)
1663               counts[i] = OMP_CLAUSE_DECL (innerc);
1664             else
1665               counts[0] = NULL_TREE;
1666           }
1667       return;
1668     }
1669 
1670   for (i = fd->collapse; i < fd->ordered; i++)
1671     {
1672       tree itype = TREE_TYPE (fd->loops[i].v);
1673       counts[i] = NULL_TREE;
1674       t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1675                            fold_convert (itype, fd->loops[i].n1),
1676                            fold_convert (itype, fd->loops[i].n2));
1677       if (t && integer_zerop (t))
1678           {
1679             for (i = fd->collapse; i < fd->ordered; i++)
1680               counts[i] = build_int_cst (type, 0);
1681             break;
1682           }
1683     }
1684   for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1685     {
1686       tree itype = TREE_TYPE (fd->loops[i].v);
1687 
1688       if (i >= fd->collapse && counts[i])
1689           continue;
1690       if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1691             && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1692                                         fold_convert (itype, fd->loops[i].n1),
1693                                         fold_convert (itype, fd->loops[i].n2)))
1694                 == NULL_TREE || !integer_onep (t)))
1695           {
1696             gcond *cond_stmt;
1697             tree n1, n2;
1698             n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1699             n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1700                                                    true, GSI_SAME_STMT);
1701             n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1702             n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1703                                                    true, GSI_SAME_STMT);
1704             cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1705                                                    NULL_TREE, NULL_TREE);
1706             gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1707             if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1708                                expand_omp_regimplify_p, NULL, NULL)
1709                 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1710                                   expand_omp_regimplify_p, NULL, NULL))
1711               {
1712                 *gsi = gsi_for_stmt (cond_stmt);
1713                 gimple_regimplify_operands (cond_stmt, gsi);
1714               }
1715             e = split_block (entry_bb, cond_stmt);
1716             basic_block &zero_iter_bb
1717               = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1718             int &first_zero_iter
1719               = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1720             if (zero_iter_bb == NULL)
1721               {
1722                 gassign *assign_stmt;
1723                 first_zero_iter = i;
1724                 zero_iter_bb = create_empty_bb (entry_bb);
1725                 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1726                 *gsi = gsi_after_labels (zero_iter_bb);
1727                 if (i < fd->collapse)
1728                     assign_stmt = gimple_build_assign (fd->loop.n2,
1729                                                                build_zero_cst (type));
1730                 else
1731                     {
1732                       counts[i] = create_tmp_reg (type, ".count");
1733                       assign_stmt
1734                         = gimple_build_assign (counts[i], build_zero_cst (type));
1735                     }
1736                 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1737                 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1738                                                entry_bb);
1739               }
1740             ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1741             ne->probability = profile_probability::very_unlikely ();
1742             e->flags = EDGE_TRUE_VALUE;
1743             e->probability = ne->probability.invert ();
1744             if (l2_dom_bb == NULL)
1745               l2_dom_bb = entry_bb;
1746             entry_bb = e->dest;
1747             *gsi = gsi_last_nondebug_bb (entry_bb);
1748           }
1749 
1750       if (POINTER_TYPE_P (itype))
1751           itype = signed_type_for (itype);
1752       t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1753                                          ? -1 : 1));
1754       t = fold_build2 (PLUS_EXPR, itype,
1755                            fold_convert (itype, fd->loops[i].step), t);
1756       t = fold_build2 (PLUS_EXPR, itype, t,
1757                            fold_convert (itype, fd->loops[i].n2));
1758       t = fold_build2 (MINUS_EXPR, itype, t,
1759                            fold_convert (itype, fd->loops[i].n1));
1760       /* ?? We could probably use CEIL_DIV_EXPR instead of
1761            TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
1762            generate the same code in the end because generically we
1763            don't know that the values involved must be negative for
1764            GT??  */
1765       if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1766           t = fold_build2 (TRUNC_DIV_EXPR, itype,
1767                                fold_build1 (NEGATE_EXPR, itype, t),
1768                                fold_build1 (NEGATE_EXPR, itype,
1769                                               fold_convert (itype,
1770                                                                 fd->loops[i].step)));
1771       else
1772           t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1773                                fold_convert (itype, fd->loops[i].step));
1774       t = fold_convert (type, t);
1775       if (TREE_CODE (t) == INTEGER_CST)
1776           counts[i] = t;
1777       else
1778           {
1779             if (i < fd->collapse || i != first_zero_iter2)
1780               counts[i] = create_tmp_reg (type, ".count");
1781             expand_omp_build_assign (gsi, counts[i], t);
1782           }
1783       if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1784           {
1785             if (i == 0)
1786               t = counts[0];
1787             else
1788               t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1789             expand_omp_build_assign (gsi, fd->loop.n2, t);
1790           }
1791     }
1792 }
1793 
1794 /* Helper function for expand_omp_{for_*,simd}.  Generate code like:
1795           T = V;
1796           V3 = N31 + (T % count3) * STEP3;
1797           T = T / count3;
1798           V2 = N21 + (T % count2) * STEP2;
1799           T = T / count2;
1800           V1 = N11 + T * STEP1;
1801    if this loop doesn't have an inner loop construct combined with it.
1802    If it does have an inner loop construct combined with it and the
1803    iteration count isn't known constant, store values from counts array
1804    into its _looptemp_ temporaries instead.  */
1805 
1806 static void
expand_omp_for_init_vars(struct omp_for_data * fd,gimple_stmt_iterator * gsi,tree * counts,gimple * inner_stmt,tree startvar)1807 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1808                                 tree *counts, gimple *inner_stmt, tree startvar)
1809 {
1810   int i;
1811   if (gimple_omp_for_combined_p (fd->for_stmt))
1812     {
1813       /* If fd->loop.n2 is constant, then no propagation of the counts
1814            is needed, they are constant.  */
1815       if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1816           return;
1817 
1818       tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1819                          ? gimple_omp_taskreg_clauses (inner_stmt)
1820                          : gimple_omp_for_clauses (inner_stmt);
1821       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1822            isn't supposed to be handled, as the inner loop doesn't
1823            use it.  */
1824       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1825       gcc_assert (innerc);
1826       for (i = 0; i < fd->collapse; i++)
1827           {
1828             innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1829                                             OMP_CLAUSE__LOOPTEMP_);
1830             gcc_assert (innerc);
1831             if (i)
1832               {
1833                 tree tem = OMP_CLAUSE_DECL (innerc);
1834                 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1835                 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1836                                                       false, GSI_CONTINUE_LINKING);
1837                 gassign *stmt = gimple_build_assign (tem, t);
1838                 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1839               }
1840           }
1841       return;
1842     }
1843 
1844   tree type = TREE_TYPE (fd->loop.v);
1845   tree tem = create_tmp_reg (type, ".tem");
1846   gassign *stmt = gimple_build_assign (tem, startvar);
1847   gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1848 
1849   for (i = fd->collapse - 1; i >= 0; i--)
1850     {
1851       tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1852       itype = vtype;
1853       if (POINTER_TYPE_P (vtype))
1854           itype = signed_type_for (vtype);
1855       if (i != 0)
1856           t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1857       else
1858           t = tem;
1859       t = fold_convert (itype, t);
1860       t = fold_build2 (MULT_EXPR, itype, t,
1861                            fold_convert (itype, fd->loops[i].step));
1862       if (POINTER_TYPE_P (vtype))
1863           t = fold_build_pointer_plus (fd->loops[i].n1, t);
1864       else
1865           t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1866       t = force_gimple_operand_gsi (gsi, t,
1867                                             DECL_P (fd->loops[i].v)
1868                                             && TREE_ADDRESSABLE (fd->loops[i].v),
1869                                             NULL_TREE, false,
1870                                             GSI_CONTINUE_LINKING);
1871       stmt = gimple_build_assign (fd->loops[i].v, t);
1872       gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1873       if (i != 0)
1874           {
1875             t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1876             t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1877                                                   false, GSI_CONTINUE_LINKING);
1878             stmt = gimple_build_assign (tem, t);
1879             gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1880           }
1881     }
1882 }
1883 
1884 /* Helper function for expand_omp_for_*.  Generate code like:
1885     L10:
1886           V3 += STEP3;
1887           if (V3 cond3 N32) goto BODY_BB; else goto L11;
1888     L11:
1889           V3 = N31;
1890           V2 += STEP2;
1891           if (V2 cond2 N22) goto BODY_BB; else goto L12;
1892     L12:
1893           V2 = N21;
1894           V1 += STEP1;
1895           goto BODY_BB;  */
1896 
1897 static basic_block
extract_omp_for_update_vars(struct omp_for_data * fd,basic_block cont_bb,basic_block body_bb)1898 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1899                                    basic_block body_bb)
1900 {
1901   basic_block last_bb, bb, collapse_bb = NULL;
1902   int i;
1903   gimple_stmt_iterator gsi;
1904   edge e;
1905   tree t;
1906   gimple *stmt;
1907 
1908   last_bb = cont_bb;
1909   for (i = fd->collapse - 1; i >= 0; i--)
1910     {
1911       tree vtype = TREE_TYPE (fd->loops[i].v);
1912 
1913       bb = create_empty_bb (last_bb);
1914       add_bb_to_loop (bb, last_bb->loop_father);
1915       gsi = gsi_start_bb (bb);
1916 
1917       if (i < fd->collapse - 1)
1918           {
1919             e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1920             e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
1921 
1922             t = fd->loops[i + 1].n1;
1923             t = force_gimple_operand_gsi (&gsi, t,
1924                                                   DECL_P (fd->loops[i + 1].v)
1925                                                   && TREE_ADDRESSABLE (fd->loops[i
1926                                                                                        + 1].v),
1927                                                   NULL_TREE, false,
1928                                                   GSI_CONTINUE_LINKING);
1929             stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1930             gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1931           }
1932       else
1933           collapse_bb = bb;
1934 
1935       set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1936 
1937       if (POINTER_TYPE_P (vtype))
1938           t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1939       else
1940           t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1941       t = force_gimple_operand_gsi (&gsi, t,
1942                                             DECL_P (fd->loops[i].v)
1943                                             && TREE_ADDRESSABLE (fd->loops[i].v),
1944                                             NULL_TREE, false, GSI_CONTINUE_LINKING);
1945       stmt = gimple_build_assign (fd->loops[i].v, t);
1946       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1947 
1948       if (i > 0)
1949           {
1950             t = fd->loops[i].n2;
1951             t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1952                                                   false, GSI_CONTINUE_LINKING);
1953             tree v = fd->loops[i].v;
1954             if (DECL_P (v) && TREE_ADDRESSABLE (v))
1955               v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1956                                                     false, GSI_CONTINUE_LINKING);
1957             t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1958             stmt = gimple_build_cond_empty (t);
1959             gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1960             if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
1961                                expand_omp_regimplify_p, NULL, NULL)
1962                 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
1963                                   expand_omp_regimplify_p, NULL, NULL))
1964               gimple_regimplify_operands (stmt, &gsi);
1965             e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1966             e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
1967           }
1968       else
1969           make_edge (bb, body_bb, EDGE_FALLTHRU);
1970       last_bb = bb;
1971     }
1972 
1973   return collapse_bb;
1974 }
1975 
1976 /* Expand #pragma omp ordered depend(source).  */
1977 
1978 static void
expand_omp_ordered_source(gimple_stmt_iterator * gsi,struct omp_for_data * fd,tree * counts,location_t loc)1979 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1980                                  tree *counts, location_t loc)
1981 {
1982   enum built_in_function source_ix
1983     = fd->iter_type == long_integer_type_node
1984       ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1985   gimple *g
1986     = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1987                                build_fold_addr_expr (counts[fd->ordered]));
1988   gimple_set_location (g, loc);
1989   gsi_insert_before (gsi, g, GSI_SAME_STMT);
1990 }
1991 
1992 /* Expand a single depend from #pragma omp ordered depend(sink:...).  */
1993 
1994 static void
expand_omp_ordered_sink(gimple_stmt_iterator * gsi,struct omp_for_data * fd,tree * counts,tree c,location_t loc)1995 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1996                                tree *counts, tree c, location_t loc)
1997 {
1998   auto_vec<tree, 10> args;
1999   enum built_in_function sink_ix
2000     = fd->iter_type == long_integer_type_node
2001       ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2002   tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2003   int i;
2004   gimple_stmt_iterator gsi2 = *gsi;
2005   bool warned_step = false;
2006 
2007   for (i = 0; i < fd->ordered; i++)
2008     {
2009       tree step = NULL_TREE;
2010       off = TREE_PURPOSE (deps);
2011       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2012           {
2013             step = TREE_OPERAND (off, 1);
2014             off = TREE_OPERAND (off, 0);
2015           }
2016       if (!integer_zerop (off))
2017           {
2018             gcc_assert (fd->loops[i].cond_code == LT_EXPR
2019                           || fd->loops[i].cond_code == GT_EXPR);
2020             bool forward = fd->loops[i].cond_code == LT_EXPR;
2021             if (step)
2022               {
2023                 /* Non-simple Fortran DO loops.  If step is variable,
2024                      we don't know at compile even the direction, so can't
2025                      warn.  */
2026                 if (TREE_CODE (step) != INTEGER_CST)
2027                     break;
2028                 forward = tree_int_cst_sgn (step) != -1;
2029               }
2030             if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2031               warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2032                                         "lexically later iteration");
2033             break;
2034           }
2035       deps = TREE_CHAIN (deps);
2036     }
2037   /* If all offsets corresponding to the collapsed loops are zero,
2038      this depend clause can be ignored.  FIXME: but there is still a
2039      flush needed.  We need to emit one __sync_synchronize () for it
2040      though (perhaps conditionally)?  Solve this together with the
2041      conservative dependence folding optimization.
2042   if (i >= fd->collapse)
2043     return;  */
2044 
2045   deps = OMP_CLAUSE_DECL (c);
2046   gsi_prev (&gsi2);
2047   edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2048   edge e2 = split_block_after_labels (e1->dest);
2049 
2050   gsi2 = gsi_after_labels (e1->dest);
2051   *gsi = gsi_last_bb (e1->src);
2052   for (i = 0; i < fd->ordered; i++)
2053     {
2054       tree itype = TREE_TYPE (fd->loops[i].v);
2055       tree step = NULL_TREE;
2056       tree orig_off = NULL_TREE;
2057       if (POINTER_TYPE_P (itype))
2058           itype = sizetype;
2059       if (i)
2060           deps = TREE_CHAIN (deps);
2061       off = TREE_PURPOSE (deps);
2062       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2063           {
2064             step = TREE_OPERAND (off, 1);
2065             off = TREE_OPERAND (off, 0);
2066             gcc_assert (fd->loops[i].cond_code == LT_EXPR
2067                           && integer_onep (fd->loops[i].step)
2068                           && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2069           }
2070       tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2071       if (step)
2072           {
2073             off = fold_convert_loc (loc, itype, off);
2074             orig_off = off;
2075             off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2076           }
2077 
2078       if (integer_zerop (off))
2079           t = boolean_true_node;
2080       else
2081           {
2082             tree a;
2083             tree co = fold_convert_loc (loc, itype, off);
2084             if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2085               {
2086                 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2087                     co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2088                 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2089                                            TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2090                                            co);
2091               }
2092             else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2093               a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2094                                          fd->loops[i].v, co);
2095             else
2096               a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2097                                          fd->loops[i].v, co);
2098             if (step)
2099               {
2100                 tree t1, t2;
2101                 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2102                     t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2103                                               fd->loops[i].n1);
2104                 else
2105                     t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2106                                               fd->loops[i].n2);
2107                 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2108                     t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2109                                               fd->loops[i].n2);
2110                 else
2111                     t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2112                                               fd->loops[i].n1);
2113                 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2114                                            step, build_int_cst (TREE_TYPE (step), 0));
2115                 if (TREE_CODE (step) != INTEGER_CST)
2116                     {
2117                       t1 = unshare_expr (t1);
2118                       t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2119                                                              false, GSI_CONTINUE_LINKING);
2120                       t2 = unshare_expr (t2);
2121                       t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2122                                                              false, GSI_CONTINUE_LINKING);
2123                     }
2124                 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2125                                            t, t2, t1);
2126               }
2127             else if (fd->loops[i].cond_code == LT_EXPR)
2128               {
2129                 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2130                     t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2131                                              fd->loops[i].n1);
2132                 else
2133                     t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2134                                              fd->loops[i].n2);
2135               }
2136             else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2137               t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2138                                          fd->loops[i].n2);
2139             else
2140               t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2141                                          fd->loops[i].n1);
2142           }
2143       if (cond)
2144           cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2145       else
2146           cond = t;
2147 
2148       off = fold_convert_loc (loc, itype, off);
2149 
2150       if (step
2151             || (fd->loops[i].cond_code == LT_EXPR
2152                 ? !integer_onep (fd->loops[i].step)
2153                 : !integer_minus_onep (fd->loops[i].step)))
2154           {
2155             if (step == NULL_TREE
2156                 && TYPE_UNSIGNED (itype)
2157                 && fd->loops[i].cond_code == GT_EXPR)
2158               t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2159                                          fold_build1_loc (loc, NEGATE_EXPR, itype,
2160                                                               s));
2161             else
2162               t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2163                                          orig_off ? orig_off : off, s);
2164             t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2165                                      build_int_cst (itype, 0));
2166             if (integer_zerop (t) && !warned_step)
2167               {
2168                 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2169                                           "in the iteration space");
2170                 warned_step = true;
2171               }
2172             cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2173                                           cond, t);
2174           }
2175 
2176       if (i <= fd->collapse - 1 && fd->collapse > 1)
2177           t = fd->loop.v;
2178       else if (counts[i])
2179           t = counts[i];
2180       else
2181           {
2182             t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2183                                      fd->loops[i].v, fd->loops[i].n1);
2184             t = fold_convert_loc (loc, fd->iter_type, t);
2185           }
2186       if (step)
2187           /* We have divided off by step already earlier.  */;
2188       else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2189           off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2190                                      fold_build1_loc (loc, NEGATE_EXPR, itype,
2191                                                             s));
2192       else
2193           off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2194       if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2195           off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2196       off = fold_convert_loc (loc, fd->iter_type, off);
2197       if (i <= fd->collapse - 1 && fd->collapse > 1)
2198           {
2199             if (i)
2200               off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2201                                            off);
2202             if (i < fd->collapse - 1)
2203               {
2204                 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2205                                               counts[i]);
2206                 continue;
2207               }
2208           }
2209       off = unshare_expr (off);
2210       t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2211       t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2212                                             true, GSI_SAME_STMT);
2213       args.safe_push (t);
2214     }
2215   gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2216   gimple_set_location (g, loc);
2217   gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2218 
2219   cond = unshare_expr (cond);
2220   cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2221                                            GSI_CONTINUE_LINKING);
2222   gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2223   edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2224   e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2225   e1->probability = e3->probability.invert ();
2226   e1->flags = EDGE_TRUE_VALUE;
2227   set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2228 
2229   *gsi = gsi_after_labels (e2->dest);
2230 }
2231 
2232 /* Expand all #pragma omp ordered depend(source) and
2233    #pragma omp ordered depend(sink:...) constructs in the current
2234    #pragma omp for ordered(n) region.  */
2235 
2236 static void
expand_omp_ordered_source_sink(struct omp_region * region,struct omp_for_data * fd,tree * counts,basic_block cont_bb)2237 expand_omp_ordered_source_sink (struct omp_region *region,
2238                                         struct omp_for_data *fd, tree *counts,
2239                                         basic_block cont_bb)
2240 {
2241   struct omp_region *inner;
2242   int i;
2243   for (i = fd->collapse - 1; i < fd->ordered; i++)
2244     if (i == fd->collapse - 1 && fd->collapse > 1)
2245       counts[i] = NULL_TREE;
2246     else if (i >= fd->collapse && !cont_bb)
2247       counts[i] = build_zero_cst (fd->iter_type);
2248     else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2249                && integer_onep (fd->loops[i].step))
2250       counts[i] = NULL_TREE;
2251     else
2252       counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2253   tree atype
2254     = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2255   counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2256   TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2257 
2258   for (inner = region->inner; inner; inner = inner->next)
2259     if (inner->type == GIMPLE_OMP_ORDERED)
2260       {
2261           gomp_ordered *ord_stmt = inner->ord_stmt;
2262           gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2263           location_t loc = gimple_location (ord_stmt);
2264           tree c;
2265           for (c = gimple_omp_ordered_clauses (ord_stmt);
2266                c; c = OMP_CLAUSE_CHAIN (c))
2267             if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2268               break;
2269           if (c)
2270             expand_omp_ordered_source (&gsi, fd, counts, loc);
2271           for (c = gimple_omp_ordered_clauses (ord_stmt);
2272                c; c = OMP_CLAUSE_CHAIN (c))
2273             if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2274               expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2275           gsi_remove (&gsi, true);
2276       }
2277 }
2278 
2279 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2280    collapsed.  */
2281 
2282 static basic_block
expand_omp_for_ordered_loops(struct omp_for_data * fd,tree * counts,basic_block cont_bb,basic_block body_bb,bool ordered_lastprivate)2283 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2284                                     basic_block cont_bb, basic_block body_bb,
2285                                     bool ordered_lastprivate)
2286 {
2287   if (fd->ordered == fd->collapse)
2288     return cont_bb;
2289 
2290   if (!cont_bb)
2291     {
2292       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2293       for (int i = fd->collapse; i < fd->ordered; i++)
2294           {
2295             tree type = TREE_TYPE (fd->loops[i].v);
2296             tree n1 = fold_convert (type, fd->loops[i].n1);
2297             expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2298             tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2299                                     size_int (i - fd->collapse + 1),
2300                                     NULL_TREE, NULL_TREE);
2301             expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2302           }
2303       return NULL;
2304     }
2305 
2306   for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2307     {
2308       tree t, type = TREE_TYPE (fd->loops[i].v);
2309       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2310       expand_omp_build_assign (&gsi, fd->loops[i].v,
2311                                      fold_convert (type, fd->loops[i].n1));
2312       if (counts[i])
2313           expand_omp_build_assign (&gsi, counts[i],
2314                                          build_zero_cst (fd->iter_type));
2315       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2316                                 size_int (i - fd->collapse + 1),
2317                                 NULL_TREE, NULL_TREE);
2318       expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2319       if (!gsi_end_p (gsi))
2320           gsi_prev (&gsi);
2321       else
2322           gsi = gsi_last_bb (body_bb);
2323       edge e1 = split_block (body_bb, gsi_stmt (gsi));
2324       basic_block new_body = e1->dest;
2325       if (body_bb == cont_bb)
2326           cont_bb = new_body;
2327       edge e2 = NULL;
2328       basic_block new_header;
2329       if (EDGE_COUNT (cont_bb->preds) > 0)
2330           {
2331             gsi = gsi_last_bb (cont_bb);
2332             if (POINTER_TYPE_P (type))
2333               t = fold_build_pointer_plus (fd->loops[i].v,
2334                                                    fold_convert (sizetype,
2335                                                                    fd->loops[i].step));
2336             else
2337               t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2338                                    fold_convert (type, fd->loops[i].step));
2339             expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2340             if (counts[i])
2341               {
2342                 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2343                                      build_int_cst (fd->iter_type, 1));
2344                 expand_omp_build_assign (&gsi, counts[i], t);
2345                 t = counts[i];
2346               }
2347             else
2348               {
2349                 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2350                                      fd->loops[i].v, fd->loops[i].n1);
2351                 t = fold_convert (fd->iter_type, t);
2352                 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2353                                                       true, GSI_SAME_STMT);
2354               }
2355             aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2356                                size_int (i - fd->collapse + 1),
2357                                NULL_TREE, NULL_TREE);
2358             expand_omp_build_assign (&gsi, aref, t);
2359             gsi_prev (&gsi);
2360             e2 = split_block (cont_bb, gsi_stmt (gsi));
2361             new_header = e2->dest;
2362           }
2363       else
2364           new_header = cont_bb;
2365       gsi = gsi_after_labels (new_header);
2366       tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2367                                                    true, GSI_SAME_STMT);
2368       tree n2
2369           = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2370                                             true, NULL_TREE, true, GSI_SAME_STMT);
2371       t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2372       gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2373       edge e3 = split_block (new_header, gsi_stmt (gsi));
2374       cont_bb = e3->dest;
2375       remove_edge (e1);
2376       make_edge (body_bb, new_header, EDGE_FALLTHRU);
2377       e3->flags = EDGE_FALSE_VALUE;
2378       e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2379       e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2380       e1->probability = e3->probability.invert ();
2381 
2382       set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2383       set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2384 
2385       if (e2)
2386           {
2387             struct loop *loop = alloc_loop ();
2388             loop->header = new_header;
2389             loop->latch = e2->src;
2390             add_loop (loop, body_bb->loop_father);
2391           }
2392     }
2393 
2394   /* If there are any lastprivate clauses and it is possible some loops
2395      might have zero iterations, ensure all the decls are initialized,
2396      otherwise we could crash evaluating C++ class iterators with lastprivate
2397      clauses.  */
2398   bool need_inits = false;
2399   for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2400     if (need_inits)
2401       {
2402           tree type = TREE_TYPE (fd->loops[i].v);
2403           gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2404           expand_omp_build_assign (&gsi, fd->loops[i].v,
2405                                          fold_convert (type, fd->loops[i].n1));
2406       }
2407     else
2408       {
2409           tree type = TREE_TYPE (fd->loops[i].v);
2410           tree this_cond = fold_build2 (fd->loops[i].cond_code,
2411                                               boolean_type_node,
2412                                               fold_convert (type, fd->loops[i].n1),
2413                                               fold_convert (type, fd->loops[i].n2));
2414           if (!integer_onep (this_cond))
2415             need_inits = true;
2416       }
2417 
2418   return cont_bb;
2419 }
2420 
2421 /* A subroutine of expand_omp_for.  Generate code for a parallel
2422    loop with any schedule.  Given parameters:
2423 
2424           for (V = N1; V cond N2; V += STEP) BODY;
2425 
2426    where COND is "<" or ">", we generate pseudocode
2427 
2428           more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2429           if (more) goto L0; else goto L3;
2430     L0:
2431           V = istart0;
2432           iend = iend0;
2433     L1:
2434           BODY;
2435           V += STEP;
2436           if (V cond iend) goto L1; else goto L2;
2437     L2:
2438           if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2439     L3:
2440 
2441     If this is a combined omp parallel loop, instead of the call to
2442     GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2443     If this is gimple_omp_for_combined_p loop, then instead of assigning
2444     V and iend in L0 we assign the first two _looptemp_ clause decls of the
2445     inner GIMPLE_OMP_FOR and V += STEP; and
2446     if (V cond iend) goto L1; else goto L2; are removed.
2447 
2448     For collapsed loops, given parameters:
2449       collapse(3)
2450       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2451           for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2452             for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2453               BODY;
2454 
2455     we generate pseudocode
2456 
2457           if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2458           if (cond3 is <)
2459             adj = STEP3 - 1;
2460           else
2461             adj = STEP3 + 1;
2462           count3 = (adj + N32 - N31) / STEP3;
2463           if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2464           if (cond2 is <)
2465             adj = STEP2 - 1;
2466           else
2467             adj = STEP2 + 1;
2468           count2 = (adj + N22 - N21) / STEP2;
2469           if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2470           if (cond1 is <)
2471             adj = STEP1 - 1;
2472           else
2473             adj = STEP1 + 1;
2474           count1 = (adj + N12 - N11) / STEP1;
2475           count = count1 * count2 * count3;
2476           goto Z1;
2477     Z0:
2478           count = 0;
2479     Z1:
2480           more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2481           if (more) goto L0; else goto L3;
2482     L0:
2483           V = istart0;
2484           T = V;
2485           V3 = N31 + (T % count3) * STEP3;
2486           T = T / count3;
2487           V2 = N21 + (T % count2) * STEP2;
2488           T = T / count2;
2489           V1 = N11 + T * STEP1;
2490           iend = iend0;
2491     L1:
2492           BODY;
2493           V += 1;
2494           if (V < iend) goto L10; else goto L2;
2495     L10:
2496           V3 += STEP3;
2497           if (V3 cond3 N32) goto L1; else goto L11;
2498     L11:
2499           V3 = N31;
2500           V2 += STEP2;
2501           if (V2 cond2 N22) goto L1; else goto L12;
2502     L12:
2503           V2 = N21;
2504           V1 += STEP1;
2505           goto L1;
2506     L2:
2507           if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2508     L3:
2509 
2510       */
2511 
2512 static void
expand_omp_for_generic(struct omp_region * region,struct omp_for_data * fd,enum built_in_function start_fn,enum built_in_function next_fn,gimple * inner_stmt)2513 expand_omp_for_generic (struct omp_region *region,
2514                               struct omp_for_data *fd,
2515                               enum built_in_function start_fn,
2516                               enum built_in_function next_fn,
2517                               gimple *inner_stmt)
2518 {
2519   tree type, istart0, iend0, iend;
2520   tree t, vmain, vback, bias = NULL_TREE;
2521   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2522   basic_block l2_bb = NULL, l3_bb = NULL;
2523   gimple_stmt_iterator gsi;
2524   gassign *assign_stmt;
2525   bool in_combined_parallel = is_combined_parallel (region);
2526   bool broken_loop = region->cont == NULL;
2527   edge e, ne;
2528   tree *counts = NULL;
2529   int i;
2530   bool ordered_lastprivate = false;
2531 
2532   gcc_assert (!broken_loop || !in_combined_parallel);
2533   gcc_assert (fd->iter_type == long_integer_type_node
2534                 || !in_combined_parallel);
2535 
2536   entry_bb = region->entry;
2537   cont_bb = region->cont;
2538   collapse_bb = NULL;
2539   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2540   gcc_assert (broken_loop
2541                 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2542   l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2543   l1_bb = single_succ (l0_bb);
2544   if (!broken_loop)
2545     {
2546       l2_bb = create_empty_bb (cont_bb);
2547       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2548                       || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2549                           == l1_bb));
2550       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2551     }
2552   else
2553     l2_bb = NULL;
2554   l3_bb = BRANCH_EDGE (entry_bb)->dest;
2555   exit_bb = region->exit;
2556 
2557   gsi = gsi_last_nondebug_bb (entry_bb);
2558 
2559   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2560   if (fd->ordered
2561       && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2562                                 OMP_CLAUSE_LASTPRIVATE))
2563     ordered_lastprivate = false;
2564   if (fd->collapse > 1 || fd->ordered)
2565     {
2566       int first_zero_iter1 = -1, first_zero_iter2 = -1;
2567       basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2568 
2569       counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2570       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2571                                           zero_iter1_bb, first_zero_iter1,
2572                                           zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2573 
2574       if (zero_iter1_bb)
2575           {
2576             /* Some counts[i] vars might be uninitialized if
2577                some loop has zero iterations.  But the body shouldn't
2578                be executed in that case, so just avoid uninit warnings.  */
2579             for (i = first_zero_iter1;
2580                  i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2581               if (SSA_VAR_P (counts[i]))
2582                 TREE_NO_WARNING (counts[i]) = 1;
2583             gsi_prev (&gsi);
2584             e = split_block (entry_bb, gsi_stmt (gsi));
2585             entry_bb = e->dest;
2586             make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2587             gsi = gsi_last_nondebug_bb (entry_bb);
2588             set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2589                                            get_immediate_dominator (CDI_DOMINATORS,
2590                                                                           zero_iter1_bb));
2591           }
2592       if (zero_iter2_bb)
2593           {
2594             /* Some counts[i] vars might be uninitialized if
2595                some loop has zero iterations.  But the body shouldn't
2596                be executed in that case, so just avoid uninit warnings.  */
2597             for (i = first_zero_iter2; i < fd->ordered; i++)
2598               if (SSA_VAR_P (counts[i]))
2599                 TREE_NO_WARNING (counts[i]) = 1;
2600             if (zero_iter1_bb)
2601               make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2602             else
2603               {
2604                 gsi_prev (&gsi);
2605                 e = split_block (entry_bb, gsi_stmt (gsi));
2606                 entry_bb = e->dest;
2607                 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2608                 gsi = gsi_last_nondebug_bb (entry_bb);
2609                 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2610                                                get_immediate_dominator
2611                                                    (CDI_DOMINATORS, zero_iter2_bb));
2612               }
2613           }
2614       if (fd->collapse == 1)
2615           {
2616             counts[0] = fd->loop.n2;
2617             fd->loop = fd->loops[0];
2618           }
2619     }
2620 
2621   type = TREE_TYPE (fd->loop.v);
2622   istart0 = create_tmp_var (fd->iter_type, ".istart0");
2623   iend0 = create_tmp_var (fd->iter_type, ".iend0");
2624   TREE_ADDRESSABLE (istart0) = 1;
2625   TREE_ADDRESSABLE (iend0) = 1;
2626 
2627   /* See if we need to bias by LLONG_MIN.  */
2628   if (fd->iter_type == long_long_unsigned_type_node
2629       && TREE_CODE (type) == INTEGER_TYPE
2630       && !TYPE_UNSIGNED (type)
2631       && fd->ordered == 0)
2632     {
2633       tree n1, n2;
2634 
2635       if (fd->loop.cond_code == LT_EXPR)
2636           {
2637             n1 = fd->loop.n1;
2638             n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2639           }
2640       else
2641           {
2642             n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2643             n2 = fd->loop.n1;
2644           }
2645       if (TREE_CODE (n1) != INTEGER_CST
2646             || TREE_CODE (n2) != INTEGER_CST
2647             || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2648           bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2649     }
2650 
2651   gimple_stmt_iterator gsif = gsi;
2652   gsi_prev (&gsif);
2653 
2654   tree arr = NULL_TREE;
2655   if (in_combined_parallel)
2656     {
2657       gcc_assert (fd->ordered == 0);
2658       /* In a combined parallel loop, emit a call to
2659            GOMP_loop_foo_next.  */
2660       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2661                                  build_fold_addr_expr (istart0),
2662                                  build_fold_addr_expr (iend0));
2663     }
2664   else
2665     {
2666       tree t0, t1, t2, t3, t4;
2667       /* If this is not a combined parallel loop, emit a call to
2668            GOMP_loop_foo_start in ENTRY_BB.  */
2669       t4 = build_fold_addr_expr (iend0);
2670       t3 = build_fold_addr_expr (istart0);
2671       if (fd->ordered)
2672           {
2673             t0 = build_int_cst (unsigned_type_node,
2674                                     fd->ordered - fd->collapse + 1);
2675             arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2676                                                                       fd->ordered
2677                                                                       - fd->collapse + 1),
2678                                         ".omp_counts");
2679             DECL_NAMELESS (arr) = 1;
2680             TREE_ADDRESSABLE (arr) = 1;
2681             TREE_STATIC (arr) = 1;
2682             vec<constructor_elt, va_gc> *v;
2683             vec_alloc (v, fd->ordered - fd->collapse + 1);
2684             int idx;
2685 
2686             for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2687               {
2688                 tree c;
2689                 if (idx == 0 && fd->collapse > 1)
2690                     c = fd->loop.n2;
2691                 else
2692                     c = counts[idx + fd->collapse - 1];
2693                 tree purpose = size_int (idx);
2694                 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2695                 if (TREE_CODE (c) != INTEGER_CST)
2696                     TREE_STATIC (arr) = 0;
2697               }
2698 
2699             DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2700             if (!TREE_STATIC (arr))
2701               force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2702                                                                 void_type_node, arr),
2703                                               true, NULL_TREE, true, GSI_SAME_STMT);
2704             t1 = build_fold_addr_expr (arr);
2705             t2 = NULL_TREE;
2706           }
2707       else
2708           {
2709             t2 = fold_convert (fd->iter_type, fd->loop.step);
2710             t1 = fd->loop.n2;
2711             t0 = fd->loop.n1;
2712             if (gimple_omp_for_combined_into_p (fd->for_stmt))
2713               {
2714                 tree innerc
2715                     = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2716                                            OMP_CLAUSE__LOOPTEMP_);
2717                 gcc_assert (innerc);
2718                 t0 = OMP_CLAUSE_DECL (innerc);
2719                 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2720                                                   OMP_CLAUSE__LOOPTEMP_);
2721                 gcc_assert (innerc);
2722                 t1 = OMP_CLAUSE_DECL (innerc);
2723               }
2724             if (POINTER_TYPE_P (TREE_TYPE (t0))
2725                 && TYPE_PRECISION (TREE_TYPE (t0))
2726                      != TYPE_PRECISION (fd->iter_type))
2727               {
2728                 /* Avoid casting pointers to integer of a different size.  */
2729                 tree itype = signed_type_for (type);
2730                 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2731                 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2732               }
2733             else
2734               {
2735                 t1 = fold_convert (fd->iter_type, t1);
2736                 t0 = fold_convert (fd->iter_type, t0);
2737               }
2738             if (bias)
2739               {
2740                 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2741                 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2742               }
2743           }
2744       if (fd->iter_type == long_integer_type_node || fd->ordered)
2745           {
2746             if (fd->chunk_size)
2747               {
2748                 t = fold_convert (fd->iter_type, fd->chunk_size);
2749                 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2750                 if (fd->ordered)
2751                     t = build_call_expr (builtin_decl_explicit (start_fn),
2752                                              5, t0, t1, t, t3, t4);
2753                 else
2754                     t = build_call_expr (builtin_decl_explicit (start_fn),
2755                                              6, t0, t1, t2, t, t3, t4);
2756               }
2757             else if (fd->ordered)
2758               t = build_call_expr (builtin_decl_explicit (start_fn),
2759                                          4, t0, t1, t3, t4);
2760             else
2761               t = build_call_expr (builtin_decl_explicit (start_fn),
2762                                          5, t0, t1, t2, t3, t4);
2763           }
2764       else
2765           {
2766             tree t5;
2767             tree c_bool_type;
2768             tree bfn_decl;
2769 
2770             /* The GOMP_loop_ull_*start functions have additional boolean
2771                argument, true for < loops and false for > loops.
2772                In Fortran, the C bool type can be different from
2773                boolean_type_node.  */
2774             bfn_decl = builtin_decl_explicit (start_fn);
2775             c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2776             t5 = build_int_cst (c_bool_type,
2777                                     fd->loop.cond_code == LT_EXPR ? 1 : 0);
2778             if (fd->chunk_size)
2779               {
2780                 tree bfn_decl = builtin_decl_explicit (start_fn);
2781                 t = fold_convert (fd->iter_type, fd->chunk_size);
2782                 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2783                 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2784               }
2785             else
2786               t = build_call_expr (builtin_decl_explicit (start_fn),
2787                                          6, t5, t0, t1, t2, t3, t4);
2788           }
2789     }
2790   if (TREE_TYPE (t) != boolean_type_node)
2791     t = fold_build2 (NE_EXPR, boolean_type_node,
2792                          t, build_int_cst (TREE_TYPE (t), 0));
2793   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2794                                         true, GSI_SAME_STMT);
2795   if (arr && !TREE_STATIC (arr))
2796     {
2797       tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2798       TREE_THIS_VOLATILE (clobber) = 1;
2799       gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2800                                GSI_SAME_STMT);
2801     }
2802   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2803 
2804   /* Remove the GIMPLE_OMP_FOR statement.  */
2805   gsi_remove (&gsi, true);
2806 
2807   if (gsi_end_p (gsif))
2808     gsif = gsi_after_labels (gsi_bb (gsif));
2809   gsi_next (&gsif);
2810 
2811   /* Iteration setup for sequential loop goes in L0_BB.  */
2812   tree startvar = fd->loop.v;
2813   tree endvar = NULL_TREE;
2814 
2815   if (gimple_omp_for_combined_p (fd->for_stmt))
2816     {
2817       gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2818                       && gimple_omp_for_kind (inner_stmt)
2819                          == GF_OMP_FOR_KIND_SIMD);
2820       tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2821                                              OMP_CLAUSE__LOOPTEMP_);
2822       gcc_assert (innerc);
2823       startvar = OMP_CLAUSE_DECL (innerc);
2824       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2825                                         OMP_CLAUSE__LOOPTEMP_);
2826       gcc_assert (innerc);
2827       endvar = OMP_CLAUSE_DECL (innerc);
2828     }
2829 
2830   gsi = gsi_start_bb (l0_bb);
2831   t = istart0;
2832   if (fd->ordered && fd->collapse == 1)
2833     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2834                          fold_convert (fd->iter_type, fd->loop.step));
2835   else if (bias)
2836     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2837   if (fd->ordered && fd->collapse == 1)
2838     {
2839       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2840           t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2841                                fd->loop.n1, fold_convert (sizetype, t));
2842       else
2843           {
2844             t = fold_convert (TREE_TYPE (startvar), t);
2845             t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2846                                  fd->loop.n1, t);
2847           }
2848     }
2849   else
2850     {
2851       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2852           t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2853       t = fold_convert (TREE_TYPE (startvar), t);
2854     }
2855   t = force_gimple_operand_gsi (&gsi, t,
2856                                         DECL_P (startvar)
2857                                         && TREE_ADDRESSABLE (startvar),
2858                                         NULL_TREE, false, GSI_CONTINUE_LINKING);
2859   assign_stmt = gimple_build_assign (startvar, t);
2860   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2861 
2862   t = iend0;
2863   if (fd->ordered && fd->collapse == 1)
2864     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2865                          fold_convert (fd->iter_type, fd->loop.step));
2866   else if (bias)
2867     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2868   if (fd->ordered && fd->collapse == 1)
2869     {
2870       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2871           t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2872                                fd->loop.n1, fold_convert (sizetype, t));
2873       else
2874           {
2875             t = fold_convert (TREE_TYPE (startvar), t);
2876             t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2877                                  fd->loop.n1, t);
2878           }
2879     }
2880   else
2881     {
2882       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2883           t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2884       t = fold_convert (TREE_TYPE (startvar), t);
2885     }
2886   iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2887                                            false, GSI_CONTINUE_LINKING);
2888   if (endvar)
2889     {
2890       assign_stmt = gimple_build_assign (endvar, iend);
2891       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2892       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2893           assign_stmt = gimple_build_assign (fd->loop.v, iend);
2894       else
2895           assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2896       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2897     }
2898   /* Handle linear clause adjustments.  */
2899   tree itercnt = NULL_TREE;
2900   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2901     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2902            c; c = OMP_CLAUSE_CHAIN (c))
2903       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2904             && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2905           {
2906             tree d = OMP_CLAUSE_DECL (c);
2907             bool is_ref = omp_is_reference (d);
2908             tree t = d, a, dest;
2909             if (is_ref)
2910               t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2911             tree type = TREE_TYPE (t);
2912             if (POINTER_TYPE_P (type))
2913               type = sizetype;
2914             dest = unshare_expr (t);
2915             tree v = create_tmp_var (TREE_TYPE (t), NULL);
2916             expand_omp_build_assign (&gsif, v, t);
2917             if (itercnt == NULL_TREE)
2918               {
2919                 itercnt = startvar;
2920                 tree n1 = fd->loop.n1;
2921                 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2922                     {
2923                       itercnt
2924                         = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2925                                             itercnt);
2926                       n1 = fold_convert (TREE_TYPE (itercnt), n1);
2927                     }
2928                 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2929                                              itercnt, n1);
2930                 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2931                                              itercnt, fd->loop.step);
2932                 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2933                                                               NULL_TREE, false,
2934                                                               GSI_CONTINUE_LINKING);
2935               }
2936             a = fold_build2 (MULT_EXPR, type,
2937                                  fold_convert (type, itercnt),
2938                                  fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2939             t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2940                                  : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2941             t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2942                                                   false, GSI_CONTINUE_LINKING);
2943             assign_stmt = gimple_build_assign (dest, t);
2944             gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2945           }
2946   if (fd->collapse > 1)
2947     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2948 
2949   if (fd->ordered)
2950     {
2951       /* Until now, counts array contained number of iterations or
2952            variable containing it for ith loop.  From now on, we need
2953            those counts only for collapsed loops, and only for the 2nd
2954            till the last collapsed one.  Move those one element earlier,
2955            we'll use counts[fd->collapse - 1] for the first source/sink
2956            iteration counter and so on and counts[fd->ordered]
2957            as the array holding the current counter values for
2958            depend(source).  */
2959       if (fd->collapse > 1)
2960           memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2961       if (broken_loop)
2962           {
2963             int i;
2964             for (i = fd->collapse; i < fd->ordered; i++)
2965               {
2966                 tree type = TREE_TYPE (fd->loops[i].v);
2967                 tree this_cond
2968                     = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2969                                      fold_convert (type, fd->loops[i].n1),
2970                                      fold_convert (type, fd->loops[i].n2));
2971                 if (!integer_onep (this_cond))
2972                     break;
2973               }
2974             if (i < fd->ordered)
2975               {
2976                 cont_bb
2977                     = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2978                 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2979                 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2980                 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2981                 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2982                 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2983                 make_edge (cont_bb, l1_bb, 0);
2984                 l2_bb = create_empty_bb (cont_bb);
2985                 broken_loop = false;
2986               }
2987           }
2988       expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2989       cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2990                                                         ordered_lastprivate);
2991       if (counts[fd->collapse - 1])
2992           {
2993             gcc_assert (fd->collapse == 1);
2994             gsi = gsi_last_bb (l0_bb);
2995             expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2996                                            istart0, true);
2997             gsi = gsi_last_bb (cont_bb);
2998             t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2999                                  build_int_cst (fd->iter_type, 1));
3000             expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3001             tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3002                                     size_zero_node, NULL_TREE, NULL_TREE);
3003             expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3004             t = counts[fd->collapse - 1];
3005           }
3006       else if (fd->collapse > 1)
3007           t = fd->loop.v;
3008       else
3009           {
3010             t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3011                                  fd->loops[0].v, fd->loops[0].n1);
3012             t = fold_convert (fd->iter_type, t);
3013           }
3014       gsi = gsi_last_bb (l0_bb);
3015       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3016                                 size_zero_node, NULL_TREE, NULL_TREE);
3017       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3018                                             false, GSI_CONTINUE_LINKING);
3019       expand_omp_build_assign (&gsi, aref, t, true);
3020     }
3021 
3022   if (!broken_loop)
3023     {
3024       /* Code to control the increment and predicate for the sequential
3025            loop goes in the CONT_BB.  */
3026       gsi = gsi_last_nondebug_bb (cont_bb);
3027       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3028       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3029       vmain = gimple_omp_continue_control_use (cont_stmt);
3030       vback = gimple_omp_continue_control_def (cont_stmt);
3031 
3032       if (!gimple_omp_for_combined_p (fd->for_stmt))
3033           {
3034             if (POINTER_TYPE_P (type))
3035               t = fold_build_pointer_plus (vmain, fd->loop.step);
3036             else
3037               t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3038             t = force_gimple_operand_gsi (&gsi, t,
3039                                                   DECL_P (vback)
3040                                                   && TREE_ADDRESSABLE (vback),
3041                                                   NULL_TREE, true, GSI_SAME_STMT);
3042             assign_stmt = gimple_build_assign (vback, t);
3043             gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3044 
3045             if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3046               {
3047                 tree tem;
3048                 if (fd->collapse > 1)
3049                     tem = fd->loop.v;
3050                 else
3051                     {
3052                       tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3053                                              fd->loops[0].v, fd->loops[0].n1);
3054                       tem = fold_convert (fd->iter_type, tem);
3055                     }
3056                 tree aref = build4 (ARRAY_REF, fd->iter_type,
3057                                           counts[fd->ordered], size_zero_node,
3058                                           NULL_TREE, NULL_TREE);
3059                 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3060                                                         true, GSI_SAME_STMT);
3061                 expand_omp_build_assign (&gsi, aref, tem);
3062               }
3063 
3064             t = build2 (fd->loop.cond_code, boolean_type_node,
3065                           DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3066                           iend);
3067             gcond *cond_stmt = gimple_build_cond_empty (t);
3068             gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3069           }
3070 
3071       /* Remove GIMPLE_OMP_CONTINUE.  */
3072       gsi_remove (&gsi, true);
3073 
3074       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3075           collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3076 
3077       /* Emit code to get the next parallel iteration in L2_BB.  */
3078       gsi = gsi_start_bb (l2_bb);
3079 
3080       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3081                                  build_fold_addr_expr (istart0),
3082                                  build_fold_addr_expr (iend0));
3083       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3084                                             false, GSI_CONTINUE_LINKING);
3085       if (TREE_TYPE (t) != boolean_type_node)
3086           t = fold_build2 (NE_EXPR, boolean_type_node,
3087                                t, build_int_cst (TREE_TYPE (t), 0));
3088       gcond *cond_stmt = gimple_build_cond_empty (t);
3089       gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3090     }
3091 
3092   /* Add the loop cleanup function.  */
3093   gsi = gsi_last_nondebug_bb (exit_bb);
3094   if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3095     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3096   else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3097     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3098   else
3099     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3100   gcall *call_stmt = gimple_build_call (t, 0);
3101   if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3102     gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3103   gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3104   if (fd->ordered)
3105     {
3106       tree arr = counts[fd->ordered];
3107       tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3108       TREE_THIS_VOLATILE (clobber) = 1;
3109       gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3110                               GSI_SAME_STMT);
3111     }
3112   gsi_remove (&gsi, true);
3113 
3114   /* Connect the new blocks.  */
3115   find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3116   find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3117 
3118   if (!broken_loop)
3119     {
3120       gimple_seq phis;
3121 
3122       e = find_edge (cont_bb, l3_bb);
3123       ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3124 
3125       phis = phi_nodes (l3_bb);
3126       for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3127           {
3128             gimple *phi = gsi_stmt (gsi);
3129             SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3130                        PHI_ARG_DEF_FROM_EDGE (phi, e));
3131           }
3132       remove_edge (e);
3133 
3134       make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3135       e = find_edge (cont_bb, l1_bb);
3136       if (e == NULL)
3137           {
3138             e = BRANCH_EDGE (cont_bb);
3139             gcc_assert (single_succ (e->dest) == l1_bb);
3140           }
3141       if (gimple_omp_for_combined_p (fd->for_stmt))
3142           {
3143             remove_edge (e);
3144             e = NULL;
3145           }
3146       else if (fd->collapse > 1)
3147           {
3148             remove_edge (e);
3149             e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3150           }
3151       else
3152           e->flags = EDGE_TRUE_VALUE;
3153       if (e)
3154           {
3155             e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3156             find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3157           }
3158       else
3159           {
3160             e = find_edge (cont_bb, l2_bb);
3161             e->flags = EDGE_FALLTHRU;
3162           }
3163       make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3164 
3165       if (gimple_in_ssa_p (cfun))
3166           {
3167             /* Add phis to the outer loop that connect to the phis in the inner,
3168                original loop, and move the loop entry value of the inner phi to
3169                the loop entry value of the outer phi.  */
3170             gphi_iterator psi;
3171             for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3172               {
3173                 source_location locus;
3174                 gphi *nphi;
3175                 gphi *exit_phi = psi.phi ();
3176 
3177                 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3178                     continue;
3179 
3180                 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3181                 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3182 
3183                 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3184                 edge latch_to_l1 = find_edge (latch, l1_bb);
3185                 gphi *inner_phi
3186                     = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3187 
3188                 tree t = gimple_phi_result (exit_phi);
3189                 tree new_res = copy_ssa_name (t, NULL);
3190                 nphi = create_phi_node (new_res, l0_bb);
3191 
3192                 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3193                 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3194                 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3195                 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3196                 add_phi_arg (nphi, t, entry_to_l0, locus);
3197 
3198                 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3199                 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3200 
3201                 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3202               }
3203           }
3204 
3205       set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3206                                      recompute_dominator (CDI_DOMINATORS, l2_bb));
3207       set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3208                                      recompute_dominator (CDI_DOMINATORS, l3_bb));
3209       set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3210                                      recompute_dominator (CDI_DOMINATORS, l0_bb));
3211       set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3212                                      recompute_dominator (CDI_DOMINATORS, l1_bb));
3213 
3214       /* We enter expand_omp_for_generic with a loop.  This original loop may
3215            have its own loop struct, or it may be part of an outer loop struct
3216            (which may be the fake loop).  */
3217       struct loop *outer_loop = entry_bb->loop_father;
3218       bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3219 
3220       add_bb_to_loop (l2_bb, outer_loop);
3221 
3222       /* We've added a new loop around the original loop.  Allocate the
3223            corresponding loop struct.  */
3224       struct loop *new_loop = alloc_loop ();
3225       new_loop->header = l0_bb;
3226       new_loop->latch = l2_bb;
3227       add_loop (new_loop, outer_loop);
3228 
3229       /* Allocate a loop structure for the original loop unless we already
3230            had one.  */
3231       if (!orig_loop_has_loop_struct
3232             && !gimple_omp_for_combined_p (fd->for_stmt))
3233           {
3234             struct loop *orig_loop = alloc_loop ();
3235             orig_loop->header = l1_bb;
3236             /* The loop may have multiple latches.  */
3237             add_loop (orig_loop, new_loop);
3238           }
3239     }
3240 }
3241 
3242 /* A subroutine of expand_omp_for.  Generate code for a parallel
3243    loop with static schedule and no specified chunk size.  Given
3244    parameters:
3245 
3246           for (V = N1; V cond N2; V += STEP) BODY;
3247 
3248    where COND is "<" or ">", we generate pseudocode
3249 
3250           if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3251           if (cond is <)
3252             adj = STEP - 1;
3253           else
3254             adj = STEP + 1;
3255           if ((__typeof (V)) -1 > 0 && cond is >)
3256             n = -(adj + N2 - N1) / -STEP;
3257           else
3258             n = (adj + N2 - N1) / STEP;
3259           q = n / nthreads;
3260           tt = n % nthreads;
3261           if (threadid < tt) goto L3; else goto L4;
3262     L3:
3263           tt = 0;
3264           q = q + 1;
3265     L4:
3266           s0 = q * threadid + tt;
3267           e0 = s0 + q;
3268           V = s0 * STEP + N1;
3269           if (s0 >= e0) goto L2; else goto L0;
3270     L0:
3271           e = e0 * STEP + N1;
3272     L1:
3273           BODY;
3274           V += STEP;
3275           if (V cond e) goto L1;
3276     L2:
3277 */
3278 
3279 static void
expand_omp_for_static_nochunk(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)3280 expand_omp_for_static_nochunk (struct omp_region *region,
3281                                      struct omp_for_data *fd,
3282                                      gimple *inner_stmt)
3283 {
3284   tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3285   tree type, itype, vmain, vback;
3286   basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3287   basic_block body_bb, cont_bb, collapse_bb = NULL;
3288   basic_block fin_bb;
3289   gimple_stmt_iterator gsi;
3290   edge ep;
3291   bool broken_loop = region->cont == NULL;
3292   tree *counts = NULL;
3293   tree n1, n2, step;
3294 
3295   itype = type = TREE_TYPE (fd->loop.v);
3296   if (POINTER_TYPE_P (type))
3297     itype = signed_type_for (type);
3298 
3299   entry_bb = region->entry;
3300   cont_bb = region->cont;
3301   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3302   fin_bb = BRANCH_EDGE (entry_bb)->dest;
3303   gcc_assert (broken_loop
3304                 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3305   seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3306   body_bb = single_succ (seq_start_bb);
3307   if (!broken_loop)
3308     {
3309       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3310                       || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3311       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3312     }
3313   exit_bb = region->exit;
3314 
3315   /* Iteration space partitioning goes in ENTRY_BB.  */
3316   gsi = gsi_last_nondebug_bb (entry_bb);
3317   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3318 
3319   if (fd->collapse > 1)
3320     {
3321       int first_zero_iter = -1, dummy = -1;
3322       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3323 
3324       counts = XALLOCAVEC (tree, fd->collapse);
3325       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3326                                           fin_bb, first_zero_iter,
3327                                           dummy_bb, dummy, l2_dom_bb);
3328       t = NULL_TREE;
3329     }
3330   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3331     t = integer_one_node;
3332   else
3333     t = fold_binary (fd->loop.cond_code, boolean_type_node,
3334                          fold_convert (type, fd->loop.n1),
3335                          fold_convert (type, fd->loop.n2));
3336   if (fd->collapse == 1
3337       && TYPE_UNSIGNED (type)
3338       && (t == NULL_TREE || !integer_onep (t)))
3339     {
3340       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3341       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3342                                              true, GSI_SAME_STMT);
3343       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3344       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3345                                              true, GSI_SAME_STMT);
3346       gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3347                                                              NULL_TREE, NULL_TREE);
3348       gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3349       if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3350                          expand_omp_regimplify_p, NULL, NULL)
3351             || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3352                               expand_omp_regimplify_p, NULL, NULL))
3353           {
3354             gsi = gsi_for_stmt (cond_stmt);
3355             gimple_regimplify_operands (cond_stmt, &gsi);
3356           }
3357       ep = split_block (entry_bb, cond_stmt);
3358       ep->flags = EDGE_TRUE_VALUE;
3359       entry_bb = ep->dest;
3360       ep->probability = profile_probability::very_likely ();
3361       ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3362       ep->probability = profile_probability::very_unlikely ();
3363       if (gimple_in_ssa_p (cfun))
3364           {
3365             int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3366             for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3367                  !gsi_end_p (gpi); gsi_next (&gpi))
3368               {
3369                 gphi *phi = gpi.phi ();
3370                 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3371                                  ep, UNKNOWN_LOCATION);
3372               }
3373           }
3374       gsi = gsi_last_bb (entry_bb);
3375     }
3376 
3377   switch (gimple_omp_for_kind (fd->for_stmt))
3378     {
3379     case GF_OMP_FOR_KIND_FOR:
3380       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3381       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3382       break;
3383     case GF_OMP_FOR_KIND_DISTRIBUTE:
3384       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3385       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3386       break;
3387     default:
3388       gcc_unreachable ();
3389     }
3390   nthreads = build_call_expr (nthreads, 0);
3391   nthreads = fold_convert (itype, nthreads);
3392   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3393                                                true, GSI_SAME_STMT);
3394   threadid = build_call_expr (threadid, 0);
3395   threadid = fold_convert (itype, threadid);
3396   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3397                                                true, GSI_SAME_STMT);
3398 
3399   n1 = fd->loop.n1;
3400   n2 = fd->loop.n2;
3401   step = fd->loop.step;
3402   if (gimple_omp_for_combined_into_p (fd->for_stmt))
3403     {
3404       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3405                                              OMP_CLAUSE__LOOPTEMP_);
3406       gcc_assert (innerc);
3407       n1 = OMP_CLAUSE_DECL (innerc);
3408       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3409                                         OMP_CLAUSE__LOOPTEMP_);
3410       gcc_assert (innerc);
3411       n2 = OMP_CLAUSE_DECL (innerc);
3412     }
3413   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3414                                          true, NULL_TREE, true, GSI_SAME_STMT);
3415   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3416                                          true, NULL_TREE, true, GSI_SAME_STMT);
3417   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3418                                            true, NULL_TREE, true, GSI_SAME_STMT);
3419 
3420   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3421   t = fold_build2 (PLUS_EXPR, itype, step, t);
3422   t = fold_build2 (PLUS_EXPR, itype, t, n2);
3423   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3424   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3425     t = fold_build2 (TRUNC_DIV_EXPR, itype,
3426                          fold_build1 (NEGATE_EXPR, itype, t),
3427                          fold_build1 (NEGATE_EXPR, itype, step));
3428   else
3429     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3430   t = fold_convert (itype, t);
3431   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3432 
3433   q = create_tmp_reg (itype, "q");
3434   t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3435   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3436   gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3437 
3438   tt = create_tmp_reg (itype, "tt");
3439   t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3440   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3441   gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3442 
3443   t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3444   gcond *cond_stmt = gimple_build_cond_empty (t);
3445   gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3446 
3447   second_bb = split_block (entry_bb, cond_stmt)->dest;
3448   gsi = gsi_last_nondebug_bb (second_bb);
3449   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3450 
3451   gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3452                          GSI_SAME_STMT);
3453   gassign *assign_stmt
3454     = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3455   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3456 
3457   third_bb = split_block (second_bb, assign_stmt)->dest;
3458   gsi = gsi_last_nondebug_bb (third_bb);
3459   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3460 
3461   t = build2 (MULT_EXPR, itype, q, threadid);
3462   t = build2 (PLUS_EXPR, itype, t, tt);
3463   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3464 
3465   t = fold_build2 (PLUS_EXPR, itype, s0, q);
3466   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3467 
3468   t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3469   gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3470 
3471   /* Remove the GIMPLE_OMP_FOR statement.  */
3472   gsi_remove (&gsi, true);
3473 
3474   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
3475   gsi = gsi_start_bb (seq_start_bb);
3476 
3477   tree startvar = fd->loop.v;
3478   tree endvar = NULL_TREE;
3479 
3480   if (gimple_omp_for_combined_p (fd->for_stmt))
3481     {
3482       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3483                          ? gimple_omp_parallel_clauses (inner_stmt)
3484                          : gimple_omp_for_clauses (inner_stmt);
3485       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3486       gcc_assert (innerc);
3487       startvar = OMP_CLAUSE_DECL (innerc);
3488       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3489                                         OMP_CLAUSE__LOOPTEMP_);
3490       gcc_assert (innerc);
3491       endvar = OMP_CLAUSE_DECL (innerc);
3492       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3493             && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3494           {
3495             int i;
3496             for (i = 1; i < fd->collapse; i++)
3497               {
3498                 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3499                                                   OMP_CLAUSE__LOOPTEMP_);
3500                 gcc_assert (innerc);
3501               }
3502             innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3503                                             OMP_CLAUSE__LOOPTEMP_);
3504             if (innerc)
3505               {
3506                 /* If needed (distribute parallel for with lastprivate),
3507                      propagate down the total number of iterations.  */
3508                 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3509                                              fd->loop.n2);
3510                 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3511                                                       GSI_CONTINUE_LINKING);
3512                 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3513                 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3514               }
3515           }
3516     }
3517   t = fold_convert (itype, s0);
3518   t = fold_build2 (MULT_EXPR, itype, t, step);
3519   if (POINTER_TYPE_P (type))
3520     t = fold_build_pointer_plus (n1, t);
3521   else
3522     t = fold_build2 (PLUS_EXPR, type, t, n1);
3523   t = fold_convert (TREE_TYPE (startvar), t);
3524   t = force_gimple_operand_gsi (&gsi, t,
3525                                         DECL_P (startvar)
3526                                         && TREE_ADDRESSABLE (startvar),
3527                                         NULL_TREE, false, GSI_CONTINUE_LINKING);
3528   assign_stmt = gimple_build_assign (startvar, t);
3529   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3530 
3531   t = fold_convert (itype, e0);
3532   t = fold_build2 (MULT_EXPR, itype, t, step);
3533   if (POINTER_TYPE_P (type))
3534     t = fold_build_pointer_plus (n1, t);
3535   else
3536     t = fold_build2 (PLUS_EXPR, type, t, n1);
3537   t = fold_convert (TREE_TYPE (startvar), t);
3538   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3539                                         false, GSI_CONTINUE_LINKING);
3540   if (endvar)
3541     {
3542       assign_stmt = gimple_build_assign (endvar, e);
3543       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3544       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3545           assign_stmt = gimple_build_assign (fd->loop.v, e);
3546       else
3547           assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3548       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3549     }
3550   /* Handle linear clause adjustments.  */
3551   tree itercnt = NULL_TREE;
3552   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3553     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3554            c; c = OMP_CLAUSE_CHAIN (c))
3555       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3556             && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3557           {
3558             tree d = OMP_CLAUSE_DECL (c);
3559             bool is_ref = omp_is_reference (d);
3560             tree t = d, a, dest;
3561             if (is_ref)
3562               t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3563             if (itercnt == NULL_TREE)
3564               {
3565                 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3566                     {
3567                       itercnt = fold_build2 (MINUS_EXPR, itype,
3568                                                    fold_convert (itype, n1),
3569                                                    fold_convert (itype, fd->loop.n1));
3570                       itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3571                       itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3572                       itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3573                                                                   NULL_TREE, false,
3574                                                                   GSI_CONTINUE_LINKING);
3575                     }
3576                 else
3577                     itercnt = s0;
3578               }
3579             tree type = TREE_TYPE (t);
3580             if (POINTER_TYPE_P (type))
3581               type = sizetype;
3582             a = fold_build2 (MULT_EXPR, type,
3583                                  fold_convert (type, itercnt),
3584                                  fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3585             dest = unshare_expr (t);
3586             t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3587                                  : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3588             t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3589                                                   false, GSI_CONTINUE_LINKING);
3590             assign_stmt = gimple_build_assign (dest, t);
3591             gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3592           }
3593   if (fd->collapse > 1)
3594     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3595 
3596   if (!broken_loop)
3597     {
3598       /* The code controlling the sequential loop replaces the
3599            GIMPLE_OMP_CONTINUE.  */
3600       gsi = gsi_last_nondebug_bb (cont_bb);
3601       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3602       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3603       vmain = gimple_omp_continue_control_use (cont_stmt);
3604       vback = gimple_omp_continue_control_def (cont_stmt);
3605 
3606       if (!gimple_omp_for_combined_p (fd->for_stmt))
3607           {
3608             if (POINTER_TYPE_P (type))
3609               t = fold_build_pointer_plus (vmain, step);
3610             else
3611               t = fold_build2 (PLUS_EXPR, type, vmain, step);
3612             t = force_gimple_operand_gsi (&gsi, t,
3613                                                   DECL_P (vback)
3614                                                   && TREE_ADDRESSABLE (vback),
3615                                                   NULL_TREE, true, GSI_SAME_STMT);
3616             assign_stmt = gimple_build_assign (vback, t);
3617             gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3618 
3619             t = build2 (fd->loop.cond_code, boolean_type_node,
3620                           DECL_P (vback) && TREE_ADDRESSABLE (vback)
3621                           ? t : vback, e);
3622             gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3623           }
3624 
3625       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
3626       gsi_remove (&gsi, true);
3627 
3628       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3629           collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3630     }
3631 
3632   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
3633   gsi = gsi_last_nondebug_bb (exit_bb);
3634   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3635     {
3636       t = gimple_omp_return_lhs (gsi_stmt (gsi));
3637       gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3638     }
3639   gsi_remove (&gsi, true);
3640 
3641   /* Connect all the blocks.  */
3642   ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3643   ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3644   ep = find_edge (entry_bb, second_bb);
3645   ep->flags = EDGE_TRUE_VALUE;
3646   ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3647   find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3648   find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3649 
3650   if (!broken_loop)
3651     {
3652       ep = find_edge (cont_bb, body_bb);
3653       if (ep == NULL)
3654           {
3655             ep = BRANCH_EDGE (cont_bb);
3656             gcc_assert (single_succ (ep->dest) == body_bb);
3657           }
3658       if (gimple_omp_for_combined_p (fd->for_stmt))
3659           {
3660             remove_edge (ep);
3661             ep = NULL;
3662           }
3663       else if (fd->collapse > 1)
3664           {
3665             remove_edge (ep);
3666             ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3667           }
3668       else
3669           ep->flags = EDGE_TRUE_VALUE;
3670       find_edge (cont_bb, fin_bb)->flags
3671           = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3672     }
3673 
3674   set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3675   set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3676   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3677 
3678   set_immediate_dominator (CDI_DOMINATORS, body_bb,
3679                                  recompute_dominator (CDI_DOMINATORS, body_bb));
3680   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3681                                  recompute_dominator (CDI_DOMINATORS, fin_bb));
3682 
3683   struct loop *loop = body_bb->loop_father;
3684   if (loop != entry_bb->loop_father)
3685     {
3686       gcc_assert (broken_loop || loop->header == body_bb);
3687       gcc_assert (broken_loop
3688                       || loop->latch == region->cont
3689                       || single_pred (loop->latch) == region->cont);
3690       return;
3691     }
3692 
3693   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3694     {
3695       loop = alloc_loop ();
3696       loop->header = body_bb;
3697       if (collapse_bb == NULL)
3698           loop->latch = cont_bb;
3699       add_loop (loop, body_bb->loop_father);
3700     }
3701 }
3702 
3703 /* Return phi in E->DEST with ARG on edge E.  */
3704 
3705 static gphi *
find_phi_with_arg_on_edge(tree arg,edge e)3706 find_phi_with_arg_on_edge (tree arg, edge e)
3707 {
3708   basic_block bb = e->dest;
3709 
3710   for (gphi_iterator gpi = gsi_start_phis (bb);
3711        !gsi_end_p (gpi);
3712        gsi_next (&gpi))
3713     {
3714       gphi *phi = gpi.phi ();
3715       if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3716           return phi;
3717     }
3718 
3719   return NULL;
3720 }
3721 
3722 /* A subroutine of expand_omp_for.  Generate code for a parallel
3723    loop with static schedule and a specified chunk size.  Given
3724    parameters:
3725 
3726           for (V = N1; V cond N2; V += STEP) BODY;
3727 
3728    where COND is "<" or ">", we generate pseudocode
3729 
3730           if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3731           if (cond is <)
3732             adj = STEP - 1;
3733           else
3734             adj = STEP + 1;
3735           if ((__typeof (V)) -1 > 0 && cond is >)
3736             n = -(adj + N2 - N1) / -STEP;
3737           else
3738             n = (adj + N2 - N1) / STEP;
3739           trip = 0;
3740           V = threadid * CHUNK * STEP + N1;  -- this extra definition of V is
3741                                                         here so that V is defined
3742                                                         if the loop is not entered
3743     L0:
3744           s0 = (trip * nthreads + threadid) * CHUNK;
3745           e0 = min (s0 + CHUNK, n);
3746           if (s0 < n) goto L1; else goto L4;
3747     L1:
3748           V = s0 * STEP + N1;
3749           e = e0 * STEP + N1;
3750     L2:
3751           BODY;
3752           V += STEP;
3753           if (V cond e) goto L2; else goto L3;
3754     L3:
3755           trip += 1;
3756           goto L0;
3757     L4:
3758 */
3759 
3760 static void
expand_omp_for_static_chunk(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)3761 expand_omp_for_static_chunk (struct omp_region *region,
3762                                    struct omp_for_data *fd, gimple *inner_stmt)
3763 {
3764   tree n, s0, e0, e, t;
3765   tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3766   tree type, itype, vmain, vback, vextra;
3767   basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3768   basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3769   gimple_stmt_iterator gsi;
3770   edge se;
3771   bool broken_loop = region->cont == NULL;
3772   tree *counts = NULL;
3773   tree n1, n2, step;
3774 
3775   itype = type = TREE_TYPE (fd->loop.v);
3776   if (POINTER_TYPE_P (type))
3777     itype = signed_type_for (type);
3778 
3779   entry_bb = region->entry;
3780   se = split_block (entry_bb, last_stmt (entry_bb));
3781   entry_bb = se->src;
3782   iter_part_bb = se->dest;
3783   cont_bb = region->cont;
3784   gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3785   fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3786   gcc_assert (broken_loop
3787                 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3788   seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3789   body_bb = single_succ (seq_start_bb);
3790   if (!broken_loop)
3791     {
3792       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3793                       || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3794       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3795       trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3796     }
3797   exit_bb = region->exit;
3798 
3799   /* Trip and adjustment setup goes in ENTRY_BB.  */
3800   gsi = gsi_last_nondebug_bb (entry_bb);
3801   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3802 
3803   if (fd->collapse > 1)
3804     {
3805       int first_zero_iter = -1, dummy = -1;
3806       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3807 
3808       counts = XALLOCAVEC (tree, fd->collapse);
3809       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3810                                           fin_bb, first_zero_iter,
3811                                           dummy_bb, dummy, l2_dom_bb);
3812       t = NULL_TREE;
3813     }
3814   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3815     t = integer_one_node;
3816   else
3817     t = fold_binary (fd->loop.cond_code, boolean_type_node,
3818                          fold_convert (type, fd->loop.n1),
3819                          fold_convert (type, fd->loop.n2));
3820   if (fd->collapse == 1
3821       && TYPE_UNSIGNED (type)
3822       && (t == NULL_TREE || !integer_onep (t)))
3823     {
3824       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3825       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3826                                              true, GSI_SAME_STMT);
3827       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3828       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3829                                              true, GSI_SAME_STMT);
3830       gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3831                                                              NULL_TREE, NULL_TREE);
3832       gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3833       if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3834                          expand_omp_regimplify_p, NULL, NULL)
3835             || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3836                               expand_omp_regimplify_p, NULL, NULL))
3837           {
3838             gsi = gsi_for_stmt (cond_stmt);
3839             gimple_regimplify_operands (cond_stmt, &gsi);
3840           }
3841       se = split_block (entry_bb, cond_stmt);
3842       se->flags = EDGE_TRUE_VALUE;
3843       entry_bb = se->dest;
3844       se->probability = profile_probability::very_likely ();
3845       se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3846       se->probability = profile_probability::very_unlikely ();
3847       if (gimple_in_ssa_p (cfun))
3848           {
3849             int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3850             for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3851                  !gsi_end_p (gpi); gsi_next (&gpi))
3852               {
3853                 gphi *phi = gpi.phi ();
3854                 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3855                                  se, UNKNOWN_LOCATION);
3856               }
3857           }
3858       gsi = gsi_last_bb (entry_bb);
3859     }
3860 
3861   switch (gimple_omp_for_kind (fd->for_stmt))
3862     {
3863     case GF_OMP_FOR_KIND_FOR:
3864       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3865       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3866       break;
3867     case GF_OMP_FOR_KIND_DISTRIBUTE:
3868       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3869       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3870       break;
3871     default:
3872       gcc_unreachable ();
3873     }
3874   nthreads = build_call_expr (nthreads, 0);
3875   nthreads = fold_convert (itype, nthreads);
3876   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3877                                                true, GSI_SAME_STMT);
3878   threadid = build_call_expr (threadid, 0);
3879   threadid = fold_convert (itype, threadid);
3880   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3881                                                true, GSI_SAME_STMT);
3882 
3883   n1 = fd->loop.n1;
3884   n2 = fd->loop.n2;
3885   step = fd->loop.step;
3886   if (gimple_omp_for_combined_into_p (fd->for_stmt))
3887     {
3888       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3889                                              OMP_CLAUSE__LOOPTEMP_);
3890       gcc_assert (innerc);
3891       n1 = OMP_CLAUSE_DECL (innerc);
3892       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3893                                         OMP_CLAUSE__LOOPTEMP_);
3894       gcc_assert (innerc);
3895       n2 = OMP_CLAUSE_DECL (innerc);
3896     }
3897   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3898                                          true, NULL_TREE, true, GSI_SAME_STMT);
3899   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3900                                          true, NULL_TREE, true, GSI_SAME_STMT);
3901   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3902                                            true, NULL_TREE, true, GSI_SAME_STMT);
3903   tree chunk_size = fold_convert (itype, fd->chunk_size);
3904   chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3905   chunk_size
3906     = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3907                                         GSI_SAME_STMT);
3908 
3909   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3910   t = fold_build2 (PLUS_EXPR, itype, step, t);
3911   t = fold_build2 (PLUS_EXPR, itype, t, n2);
3912   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3913   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3914     t = fold_build2 (TRUNC_DIV_EXPR, itype,
3915                          fold_build1 (NEGATE_EXPR, itype, t),
3916                          fold_build1 (NEGATE_EXPR, itype, step));
3917   else
3918     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3919   t = fold_convert (itype, t);
3920   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3921                                         true, GSI_SAME_STMT);
3922 
3923   trip_var = create_tmp_reg (itype, ".trip");
3924   if (gimple_in_ssa_p (cfun))
3925     {
3926       trip_init = make_ssa_name (trip_var);
3927       trip_main = make_ssa_name (trip_var);
3928       trip_back = make_ssa_name (trip_var);
3929     }
3930   else
3931     {
3932       trip_init = trip_var;
3933       trip_main = trip_var;
3934       trip_back = trip_var;
3935     }
3936 
3937   gassign *assign_stmt
3938     = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3939   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3940 
3941   t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3942   t = fold_build2 (MULT_EXPR, itype, t, step);
3943   if (POINTER_TYPE_P (type))
3944     t = fold_build_pointer_plus (n1, t);
3945   else
3946     t = fold_build2 (PLUS_EXPR, type, t, n1);
3947   vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3948                                              true, GSI_SAME_STMT);
3949 
3950   /* Remove the GIMPLE_OMP_FOR.  */
3951   gsi_remove (&gsi, true);
3952 
3953   gimple_stmt_iterator gsif = gsi;
3954 
3955   /* Iteration space partitioning goes in ITER_PART_BB.  */
3956   gsi = gsi_last_bb (iter_part_bb);
3957 
3958   t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3959   t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3960   t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3961   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3962                                          false, GSI_CONTINUE_LINKING);
3963 
3964   t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3965   t = fold_build2 (MIN_EXPR, itype, t, n);
3966   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3967                                          false, GSI_CONTINUE_LINKING);
3968 
3969   t = build2 (LT_EXPR, boolean_type_node, s0, n);
3970   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3971 
3972   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
3973   gsi = gsi_start_bb (seq_start_bb);
3974 
3975   tree startvar = fd->loop.v;
3976   tree endvar = NULL_TREE;
3977 
3978   if (gimple_omp_for_combined_p (fd->for_stmt))
3979     {
3980       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3981                          ? gimple_omp_parallel_clauses (inner_stmt)
3982                          : gimple_omp_for_clauses (inner_stmt);
3983       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3984       gcc_assert (innerc);
3985       startvar = OMP_CLAUSE_DECL (innerc);
3986       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3987                                         OMP_CLAUSE__LOOPTEMP_);
3988       gcc_assert (innerc);
3989       endvar = OMP_CLAUSE_DECL (innerc);
3990       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3991             && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3992           {
3993             int i;
3994             for (i = 1; i < fd->collapse; i++)
3995               {
3996                 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3997                                                   OMP_CLAUSE__LOOPTEMP_);
3998                 gcc_assert (innerc);
3999               }
4000             innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4001                                             OMP_CLAUSE__LOOPTEMP_);
4002             if (innerc)
4003               {
4004                 /* If needed (distribute parallel for with lastprivate),
4005                      propagate down the total number of iterations.  */
4006                 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4007                                              fd->loop.n2);
4008                 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4009                                                       GSI_CONTINUE_LINKING);
4010                 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4011                 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4012               }
4013           }
4014     }
4015 
4016   t = fold_convert (itype, s0);
4017   t = fold_build2 (MULT_EXPR, itype, t, step);
4018   if (POINTER_TYPE_P (type))
4019     t = fold_build_pointer_plus (n1, t);
4020   else
4021     t = fold_build2 (PLUS_EXPR, type, t, n1);
4022   t = fold_convert (TREE_TYPE (startvar), t);
4023   t = force_gimple_operand_gsi (&gsi, t,
4024                                         DECL_P (startvar)
4025                                         && TREE_ADDRESSABLE (startvar),
4026                                         NULL_TREE, false, GSI_CONTINUE_LINKING);
4027   assign_stmt = gimple_build_assign (startvar, t);
4028   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4029 
4030   t = fold_convert (itype, e0);
4031   t = fold_build2 (MULT_EXPR, itype, t, step);
4032   if (POINTER_TYPE_P (type))
4033     t = fold_build_pointer_plus (n1, t);
4034   else
4035     t = fold_build2 (PLUS_EXPR, type, t, n1);
4036   t = fold_convert (TREE_TYPE (startvar), t);
4037   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4038                                         false, GSI_CONTINUE_LINKING);
4039   if (endvar)
4040     {
4041       assign_stmt = gimple_build_assign (endvar, e);
4042       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4043       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4044           assign_stmt = gimple_build_assign (fd->loop.v, e);
4045       else
4046           assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4047       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4048     }
4049   /* Handle linear clause adjustments.  */
4050   tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4051   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4052     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4053            c; c = OMP_CLAUSE_CHAIN (c))
4054       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4055             && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4056           {
4057             tree d = OMP_CLAUSE_DECL (c);
4058             bool is_ref = omp_is_reference (d);
4059             tree t = d, a, dest;
4060             if (is_ref)
4061               t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4062             tree type = TREE_TYPE (t);
4063             if (POINTER_TYPE_P (type))
4064               type = sizetype;
4065             dest = unshare_expr (t);
4066             tree v = create_tmp_var (TREE_TYPE (t), NULL);
4067             expand_omp_build_assign (&gsif, v, t);
4068             if (itercnt == NULL_TREE)
4069               {
4070                 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4071                     {
4072                       itercntbias
4073                         = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4074                                            fold_convert (itype, fd->loop.n1));
4075                       itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4076                                                        itercntbias, step);
4077                       itercntbias
4078                         = force_gimple_operand_gsi (&gsif, itercntbias, true,
4079                                                             NULL_TREE, true,
4080                                                             GSI_SAME_STMT);
4081                       itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4082                       itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4083                                                                   NULL_TREE, false,
4084                                                                   GSI_CONTINUE_LINKING);
4085                     }
4086                 else
4087                     itercnt = s0;
4088               }
4089             a = fold_build2 (MULT_EXPR, type,
4090                                  fold_convert (type, itercnt),
4091                                  fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4092             t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4093                                  : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4094             t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4095                                                   false, GSI_CONTINUE_LINKING);
4096             assign_stmt = gimple_build_assign (dest, t);
4097             gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4098           }
4099   if (fd->collapse > 1)
4100     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4101 
4102   if (!broken_loop)
4103     {
4104       /* The code controlling the sequential loop goes in CONT_BB,
4105            replacing the GIMPLE_OMP_CONTINUE.  */
4106       gsi = gsi_last_nondebug_bb (cont_bb);
4107       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4108       vmain = gimple_omp_continue_control_use (cont_stmt);
4109       vback = gimple_omp_continue_control_def (cont_stmt);
4110 
4111       if (!gimple_omp_for_combined_p (fd->for_stmt))
4112           {
4113             if (POINTER_TYPE_P (type))
4114               t = fold_build_pointer_plus (vmain, step);
4115             else
4116               t = fold_build2 (PLUS_EXPR, type, vmain, step);
4117             if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4118               t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4119                                                     true, GSI_SAME_STMT);
4120             assign_stmt = gimple_build_assign (vback, t);
4121             gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4122 
4123             if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4124               t = build2 (EQ_EXPR, boolean_type_node,
4125                               build_int_cst (itype, 0),
4126                               build_int_cst (itype, 1));
4127             else
4128               t = build2 (fd->loop.cond_code, boolean_type_node,
4129                               DECL_P (vback) && TREE_ADDRESSABLE (vback)
4130                               ? t : vback, e);
4131             gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4132           }
4133 
4134       /* Remove GIMPLE_OMP_CONTINUE.  */
4135       gsi_remove (&gsi, true);
4136 
4137       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4138           collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4139 
4140       /* Trip update code goes into TRIP_UPDATE_BB.  */
4141       gsi = gsi_start_bb (trip_update_bb);
4142 
4143       t = build_int_cst (itype, 1);
4144       t = build2 (PLUS_EXPR, itype, trip_main, t);
4145       assign_stmt = gimple_build_assign (trip_back, t);
4146       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4147     }
4148 
4149   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
4150   gsi = gsi_last_nondebug_bb (exit_bb);
4151   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4152     {
4153       t = gimple_omp_return_lhs (gsi_stmt (gsi));
4154       gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4155     }
4156   gsi_remove (&gsi, true);
4157 
4158   /* Connect the new blocks.  */
4159   find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4160   find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4161 
4162   if (!broken_loop)
4163     {
4164       se = find_edge (cont_bb, body_bb);
4165       if (se == NULL)
4166           {
4167             se = BRANCH_EDGE (cont_bb);
4168             gcc_assert (single_succ (se->dest) == body_bb);
4169           }
4170       if (gimple_omp_for_combined_p (fd->for_stmt))
4171           {
4172             remove_edge (se);
4173             se = NULL;
4174           }
4175       else if (fd->collapse > 1)
4176           {
4177             remove_edge (se);
4178             se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4179           }
4180       else
4181           se->flags = EDGE_TRUE_VALUE;
4182       find_edge (cont_bb, trip_update_bb)->flags
4183           = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4184 
4185       redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4186                                         iter_part_bb);
4187     }
4188 
4189   if (gimple_in_ssa_p (cfun))
4190     {
4191       gphi_iterator psi;
4192       gphi *phi;
4193       edge re, ene;
4194       edge_var_map *vm;
4195       size_t i;
4196 
4197       gcc_assert (fd->collapse == 1 && !broken_loop);
4198 
4199       /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4200            remove arguments of the phi nodes in fin_bb.  We need to create
4201            appropriate phi nodes in iter_part_bb instead.  */
4202       se = find_edge (iter_part_bb, fin_bb);
4203       re = single_succ_edge (trip_update_bb);
4204       vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4205       ene = single_succ_edge (entry_bb);
4206 
4207       psi = gsi_start_phis (fin_bb);
4208       for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4209              gsi_next (&psi), ++i)
4210           {
4211             gphi *nphi;
4212             source_location locus;
4213 
4214             phi = psi.phi ();
4215             if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4216                                      redirect_edge_var_map_def (vm), 0))
4217               continue;
4218 
4219             t = gimple_phi_result (phi);
4220             gcc_assert (t == redirect_edge_var_map_result (vm));
4221 
4222             if (!single_pred_p (fin_bb))
4223               t = copy_ssa_name (t, phi);
4224 
4225             nphi = create_phi_node (t, iter_part_bb);
4226 
4227             t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4228             locus = gimple_phi_arg_location_from_edge (phi, se);
4229 
4230             /* A special case -- fd->loop.v is not yet computed in
4231                iter_part_bb, we need to use vextra instead.  */
4232             if (t == fd->loop.v)
4233               t = vextra;
4234             add_phi_arg (nphi, t, ene, locus);
4235             locus = redirect_edge_var_map_location (vm);
4236             tree back_arg = redirect_edge_var_map_def (vm);
4237             add_phi_arg (nphi, back_arg, re, locus);
4238             edge ce = find_edge (cont_bb, body_bb);
4239             if (ce == NULL)
4240               {
4241                 ce = BRANCH_EDGE (cont_bb);
4242                 gcc_assert (single_succ (ce->dest) == body_bb);
4243                 ce = single_succ_edge (ce->dest);
4244               }
4245             gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4246             gcc_assert (inner_loop_phi != NULL);
4247             add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4248                            find_edge (seq_start_bb, body_bb), locus);
4249 
4250             if (!single_pred_p (fin_bb))
4251               add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4252           }
4253       gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4254       redirect_edge_var_map_clear (re);
4255       if (single_pred_p (fin_bb))
4256           while (1)
4257             {
4258               psi = gsi_start_phis (fin_bb);
4259               if (gsi_end_p (psi))
4260                 break;
4261               remove_phi_node (&psi, false);
4262             }
4263 
4264       /* Make phi node for trip.  */
4265       phi = create_phi_node (trip_main, iter_part_bb);
4266       add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4267                        UNKNOWN_LOCATION);
4268       add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4269                        UNKNOWN_LOCATION);
4270     }
4271 
4272   if (!broken_loop)
4273     set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4274   set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4275                                  recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4276   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4277                                  recompute_dominator (CDI_DOMINATORS, fin_bb));
4278   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4279                                  recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4280   set_immediate_dominator (CDI_DOMINATORS, body_bb,
4281                                  recompute_dominator (CDI_DOMINATORS, body_bb));
4282 
4283   if (!broken_loop)
4284     {
4285       struct loop *loop = body_bb->loop_father;
4286       struct loop *trip_loop = alloc_loop ();
4287       trip_loop->header = iter_part_bb;
4288       trip_loop->latch = trip_update_bb;
4289       add_loop (trip_loop, iter_part_bb->loop_father);
4290 
4291       if (loop != entry_bb->loop_father)
4292           {
4293             gcc_assert (loop->header == body_bb);
4294             gcc_assert (loop->latch == region->cont
4295                           || single_pred (loop->latch) == region->cont);
4296             trip_loop->inner = loop;
4297             return;
4298           }
4299 
4300       if (!gimple_omp_for_combined_p (fd->for_stmt))
4301           {
4302             loop = alloc_loop ();
4303             loop->header = body_bb;
4304             if (collapse_bb == NULL)
4305               loop->latch = cont_bb;
4306             add_loop (loop, trip_loop);
4307           }
4308     }
4309 }
4310 
4311 /* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
4312    loop.  Given parameters:
4313 
4314           for (V = N1; V cond N2; V += STEP) BODY;
4315 
4316    where COND is "<" or ">", we generate pseudocode
4317 
4318           V = N1;
4319           goto L1;
4320     L0:
4321           BODY;
4322           V += STEP;
4323     L1:
4324           if (V cond N2) goto L0; else goto L2;
4325     L2:
4326 
4327     For collapsed loops, given parameters:
4328       collapse(3)
4329       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4330           for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4331             for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4332               BODY;
4333 
4334     we generate pseudocode
4335 
4336           if (cond3 is <)
4337             adj = STEP3 - 1;
4338           else
4339             adj = STEP3 + 1;
4340           count3 = (adj + N32 - N31) / STEP3;
4341           if (cond2 is <)
4342             adj = STEP2 - 1;
4343           else
4344             adj = STEP2 + 1;
4345           count2 = (adj + N22 - N21) / STEP2;
4346           if (cond1 is <)
4347             adj = STEP1 - 1;
4348           else
4349             adj = STEP1 + 1;
4350           count1 = (adj + N12 - N11) / STEP1;
4351           count = count1 * count2 * count3;
4352           V = 0;
4353           V1 = N11;
4354           V2 = N21;
4355           V3 = N31;
4356           goto L1;
4357     L0:
4358           BODY;
4359           V += 1;
4360           V3 += STEP3;
4361           V2 += (V3 cond3 N32) ? 0 : STEP2;
4362           V3 = (V3 cond3 N32) ? V3 : N31;
4363           V1 += (V2 cond2 N22) ? 0 : STEP1;
4364           V2 = (V2 cond2 N22) ? V2 : N21;
4365     L1:
4366           if (V < count) goto L0; else goto L2;
4367     L2:
4368 
4369       */
4370 
4371 static void
expand_omp_simd(struct omp_region * region,struct omp_for_data * fd)4372 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4373 {
4374   tree type, t;
4375   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4376   gimple_stmt_iterator gsi;
4377   gimple *stmt;
4378   gcond *cond_stmt;
4379   bool broken_loop = region->cont == NULL;
4380   edge e, ne;
4381   tree *counts = NULL;
4382   int i;
4383   int safelen_int = INT_MAX;
4384   tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4385                                           OMP_CLAUSE_SAFELEN);
4386   tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4387                                           OMP_CLAUSE__SIMDUID_);
4388   tree n1, n2;
4389 
4390   if (safelen)
4391     {
4392       poly_uint64 val;
4393       safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4394       if (!poly_int_tree_p (safelen, &val))
4395           safelen_int = 0;
4396       else
4397           safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4398       if (safelen_int == 1)
4399           safelen_int = 0;
4400     }
4401   type = TREE_TYPE (fd->loop.v);
4402   entry_bb = region->entry;
4403   cont_bb = region->cont;
4404   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4405   gcc_assert (broken_loop
4406                 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4407   l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4408   if (!broken_loop)
4409     {
4410       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4411       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4412       l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4413       l2_bb = BRANCH_EDGE (entry_bb)->dest;
4414     }
4415   else
4416     {
4417       BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4418       l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4419       l2_bb = single_succ (l1_bb);
4420     }
4421   exit_bb = region->exit;
4422   l2_dom_bb = NULL;
4423 
4424   gsi = gsi_last_nondebug_bb (entry_bb);
4425 
4426   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4427   /* Not needed in SSA form right now.  */
4428   gcc_assert (!gimple_in_ssa_p (cfun));
4429   if (fd->collapse > 1)
4430     {
4431       int first_zero_iter = -1, dummy = -1;
4432       basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4433 
4434       counts = XALLOCAVEC (tree, fd->collapse);
4435       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4436                                           zero_iter_bb, first_zero_iter,
4437                                           dummy_bb, dummy, l2_dom_bb);
4438     }
4439   if (l2_dom_bb == NULL)
4440     l2_dom_bb = l1_bb;
4441 
4442   n1 = fd->loop.n1;
4443   n2 = fd->loop.n2;
4444   if (gimple_omp_for_combined_into_p (fd->for_stmt))
4445     {
4446       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4447                                              OMP_CLAUSE__LOOPTEMP_);
4448       gcc_assert (innerc);
4449       n1 = OMP_CLAUSE_DECL (innerc);
4450       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4451                                         OMP_CLAUSE__LOOPTEMP_);
4452       gcc_assert (innerc);
4453       n2 = OMP_CLAUSE_DECL (innerc);
4454     }
4455   tree step = fd->loop.step;
4456 
4457   bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4458                                           OMP_CLAUSE__SIMT_);
4459   if (is_simt)
4460     {
4461       cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4462       is_simt = safelen_int > 1;
4463     }
4464   tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4465   if (is_simt)
4466     {
4467       simt_lane = create_tmp_var (unsigned_type_node);
4468       gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4469       gimple_call_set_lhs (g, simt_lane);
4470       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4471       tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4472                                          fold_convert (TREE_TYPE (step), simt_lane));
4473       n1 = fold_convert (type, n1);
4474       if (POINTER_TYPE_P (type))
4475           n1 = fold_build_pointer_plus (n1, offset);
4476       else
4477           n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4478 
4479       /* Collapsed loops not handled for SIMT yet: limit to one lane only.  */
4480       if (fd->collapse > 1)
4481           simt_maxlane = build_one_cst (unsigned_type_node);
4482       else if (safelen_int < omp_max_simt_vf ())
4483           simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4484       tree vf
4485           = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4486                                                   unsigned_type_node, 0);
4487       if (simt_maxlane)
4488           vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4489       vf = fold_convert (TREE_TYPE (step), vf);
4490       step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4491     }
4492 
4493   expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4494   if (fd->collapse > 1)
4495     {
4496       if (gimple_omp_for_combined_into_p (fd->for_stmt))
4497           {
4498             gsi_prev (&gsi);
4499             expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4500             gsi_next (&gsi);
4501           }
4502       else
4503           for (i = 0; i < fd->collapse; i++)
4504             {
4505               tree itype = TREE_TYPE (fd->loops[i].v);
4506               if (POINTER_TYPE_P (itype))
4507                 itype = signed_type_for (itype);
4508               t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4509               expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4510             }
4511     }
4512 
4513   /* Remove the GIMPLE_OMP_FOR statement.  */
4514   gsi_remove (&gsi, true);
4515 
4516   if (!broken_loop)
4517     {
4518       /* Code to control the increment goes in the CONT_BB.  */
4519       gsi = gsi_last_nondebug_bb (cont_bb);
4520       stmt = gsi_stmt (gsi);
4521       gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4522 
4523       if (POINTER_TYPE_P (type))
4524           t = fold_build_pointer_plus (fd->loop.v, step);
4525       else
4526           t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4527       expand_omp_build_assign (&gsi, fd->loop.v, t);
4528 
4529       if (fd->collapse > 1)
4530           {
4531             i = fd->collapse - 1;
4532             if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4533               {
4534                 t = fold_convert (sizetype, fd->loops[i].step);
4535                 t = fold_build_pointer_plus (fd->loops[i].v, t);
4536               }
4537             else
4538               {
4539                 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4540                                         fd->loops[i].step);
4541                 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4542                                      fd->loops[i].v, t);
4543               }
4544             expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4545 
4546             for (i = fd->collapse - 1; i > 0; i--)
4547               {
4548                 tree itype = TREE_TYPE (fd->loops[i].v);
4549                 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4550                 if (POINTER_TYPE_P (itype2))
4551                     itype2 = signed_type_for (itype2);
4552                 t = fold_convert (itype2, fd->loops[i - 1].step);
4553                 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4554                                                       GSI_SAME_STMT);
4555                 t = build3 (COND_EXPR, itype2,
4556                                 build2 (fd->loops[i].cond_code, boolean_type_node,
4557                                           fd->loops[i].v,
4558                                           fold_convert (itype, fd->loops[i].n2)),
4559                                 build_int_cst (itype2, 0), t);
4560                 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4561                     t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4562                 else
4563                     t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4564                 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4565 
4566                 t = fold_convert (itype, fd->loops[i].n1);
4567                 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4568                                                       GSI_SAME_STMT);
4569                 t = build3 (COND_EXPR, itype,
4570                                 build2 (fd->loops[i].cond_code, boolean_type_node,
4571                                           fd->loops[i].v,
4572                                           fold_convert (itype, fd->loops[i].n2)),
4573                                 fd->loops[i].v, t);
4574                 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4575               }
4576           }
4577 
4578       /* Remove GIMPLE_OMP_CONTINUE.  */
4579       gsi_remove (&gsi, true);
4580     }
4581 
4582   /* Emit the condition in L1_BB.  */
4583   gsi = gsi_start_bb (l1_bb);
4584 
4585   t = fold_convert (type, n2);
4586   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4587                                         false, GSI_CONTINUE_LINKING);
4588   tree v = fd->loop.v;
4589   if (DECL_P (v) && TREE_ADDRESSABLE (v))
4590     v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4591                                           false, GSI_CONTINUE_LINKING);
4592   t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4593   cond_stmt = gimple_build_cond_empty (t);
4594   gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4595   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4596                      NULL, NULL)
4597       || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4598                         NULL, NULL))
4599     {
4600       gsi = gsi_for_stmt (cond_stmt);
4601       gimple_regimplify_operands (cond_stmt, &gsi);
4602     }
4603 
4604   /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop.  */
4605   if (is_simt)
4606     {
4607       gsi = gsi_start_bb (l2_bb);
4608       step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4609       if (POINTER_TYPE_P (type))
4610           t = fold_build_pointer_plus (fd->loop.v, step);
4611       else
4612           t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4613       expand_omp_build_assign (&gsi, fd->loop.v, t);
4614     }
4615 
4616   /* Remove GIMPLE_OMP_RETURN.  */
4617   gsi = gsi_last_nondebug_bb (exit_bb);
4618   gsi_remove (&gsi, true);
4619 
4620   /* Connect the new blocks.  */
4621   remove_edge (FALLTHRU_EDGE (entry_bb));
4622 
4623   if (!broken_loop)
4624     {
4625       remove_edge (BRANCH_EDGE (entry_bb));
4626       make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4627 
4628       e = BRANCH_EDGE (l1_bb);
4629       ne = FALLTHRU_EDGE (l1_bb);
4630       e->flags = EDGE_TRUE_VALUE;
4631     }
4632   else
4633     {
4634       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4635 
4636       ne = single_succ_edge (l1_bb);
4637       e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4638 
4639     }
4640   ne->flags = EDGE_FALSE_VALUE;
4641   e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4642   ne->probability = e->probability.invert ();
4643 
4644   set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4645   set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4646 
4647   if (simt_maxlane)
4648     {
4649       cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4650                                              NULL_TREE, NULL_TREE);
4651       gsi = gsi_last_bb (entry_bb);
4652       gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4653       make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4654       FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4655       FALLTHRU_EDGE (entry_bb)->probability
4656            = profile_probability::guessed_always ().apply_scale (7, 8);
4657       BRANCH_EDGE (entry_bb)->probability
4658            = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4659       l2_dom_bb = entry_bb;
4660     }
4661   set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4662 
4663   if (!broken_loop)
4664     {
4665       struct loop *loop = alloc_loop ();
4666       loop->header = l1_bb;
4667       loop->latch = cont_bb;
4668       add_loop (loop, l1_bb->loop_father);
4669       loop->safelen = safelen_int;
4670       if (simduid)
4671           {
4672             loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4673             cfun->has_simduid_loops = true;
4674           }
4675       /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4676            the loop.  */
4677       if ((flag_tree_loop_vectorize
4678              || !global_options_set.x_flag_tree_loop_vectorize)
4679             && flag_tree_loop_optimize
4680             && loop->safelen > 1)
4681           {
4682             loop->force_vectorize = true;
4683             cfun->has_force_vectorize_loops = true;
4684           }
4685     }
4686   else if (simduid)
4687     cfun->has_simduid_loops = true;
4688 }
4689 
4690 /* Taskloop construct is represented after gimplification with
4691    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4692    in between them.  This routine expands the outer GIMPLE_OMP_FOR,
4693    which should just compute all the needed loop temporaries
4694    for GIMPLE_OMP_TASK.  */
4695 
4696 static void
expand_omp_taskloop_for_outer(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)4697 expand_omp_taskloop_for_outer (struct omp_region *region,
4698                                      struct omp_for_data *fd,
4699                                      gimple *inner_stmt)
4700 {
4701   tree type, bias = NULL_TREE;
4702   basic_block entry_bb, cont_bb, exit_bb;
4703   gimple_stmt_iterator gsi;
4704   gassign *assign_stmt;
4705   tree *counts = NULL;
4706   int i;
4707 
4708   gcc_assert (inner_stmt);
4709   gcc_assert (region->cont);
4710   gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4711                 && gimple_omp_task_taskloop_p (inner_stmt));
4712   type = TREE_TYPE (fd->loop.v);
4713 
4714   /* See if we need to bias by LLONG_MIN.  */
4715   if (fd->iter_type == long_long_unsigned_type_node
4716       && TREE_CODE (type) == INTEGER_TYPE
4717       && !TYPE_UNSIGNED (type))
4718     {
4719       tree n1, n2;
4720 
4721       if (fd->loop.cond_code == LT_EXPR)
4722           {
4723             n1 = fd->loop.n1;
4724             n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4725           }
4726       else
4727           {
4728             n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4729             n2 = fd->loop.n1;
4730           }
4731       if (TREE_CODE (n1) != INTEGER_CST
4732             || TREE_CODE (n2) != INTEGER_CST
4733             || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4734           bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4735     }
4736 
4737   entry_bb = region->entry;
4738   cont_bb = region->cont;
4739   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4740   gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4741   exit_bb = region->exit;
4742 
4743   gsi = gsi_last_nondebug_bb (entry_bb);
4744   gimple *for_stmt = gsi_stmt (gsi);
4745   gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4746   if (fd->collapse > 1)
4747     {
4748       int first_zero_iter = -1, dummy = -1;
4749       basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4750 
4751       counts = XALLOCAVEC (tree, fd->collapse);
4752       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4753                                           zero_iter_bb, first_zero_iter,
4754                                           dummy_bb, dummy, l2_dom_bb);
4755 
4756       if (zero_iter_bb)
4757           {
4758             /* Some counts[i] vars might be uninitialized if
4759                some loop has zero iterations.  But the body shouldn't
4760                be executed in that case, so just avoid uninit warnings.  */
4761             for (i = first_zero_iter; i < fd->collapse; i++)
4762               if (SSA_VAR_P (counts[i]))
4763                 TREE_NO_WARNING (counts[i]) = 1;
4764             gsi_prev (&gsi);
4765             edge e = split_block (entry_bb, gsi_stmt (gsi));
4766             entry_bb = e->dest;
4767             make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4768             gsi = gsi_last_bb (entry_bb);
4769             set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4770                                            get_immediate_dominator (CDI_DOMINATORS,
4771                                                                           zero_iter_bb));
4772           }
4773     }
4774 
4775   tree t0, t1;
4776   t1 = fd->loop.n2;
4777   t0 = fd->loop.n1;
4778   if (POINTER_TYPE_P (TREE_TYPE (t0))
4779       && TYPE_PRECISION (TREE_TYPE (t0))
4780            != TYPE_PRECISION (fd->iter_type))
4781     {
4782       /* Avoid casting pointers to integer of a different size.  */
4783       tree itype = signed_type_for (type);
4784       t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4785       t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4786     }
4787   else
4788     {
4789       t1 = fold_convert (fd->iter_type, t1);
4790       t0 = fold_convert (fd->iter_type, t0);
4791     }
4792   if (bias)
4793     {
4794       t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4795       t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4796     }
4797 
4798   tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4799                                          OMP_CLAUSE__LOOPTEMP_);
4800   gcc_assert (innerc);
4801   tree startvar = OMP_CLAUSE_DECL (innerc);
4802   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4803   gcc_assert (innerc);
4804   tree endvar = OMP_CLAUSE_DECL (innerc);
4805   if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4806     {
4807       gcc_assert (innerc);
4808       for (i = 1; i < fd->collapse; i++)
4809           {
4810             innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4811                                             OMP_CLAUSE__LOOPTEMP_);
4812             gcc_assert (innerc);
4813           }
4814       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4815                                         OMP_CLAUSE__LOOPTEMP_);
4816       if (innerc)
4817           {
4818             /* If needed (inner taskloop has lastprivate clause), propagate
4819                down the total number of iterations.  */
4820             tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4821                                                        NULL_TREE, false,
4822                                                        GSI_CONTINUE_LINKING);
4823             assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4824             gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4825           }
4826     }
4827 
4828   t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
4829                                          GSI_CONTINUE_LINKING);
4830   assign_stmt = gimple_build_assign (startvar, t0);
4831   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4832 
4833   t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
4834                                          GSI_CONTINUE_LINKING);
4835   assign_stmt = gimple_build_assign (endvar, t1);
4836   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4837   if (fd->collapse > 1)
4838     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4839 
4840   /* Remove the GIMPLE_OMP_FOR statement.  */
4841   gsi = gsi_for_stmt (for_stmt);
4842   gsi_remove (&gsi, true);
4843 
4844   gsi = gsi_last_nondebug_bb (cont_bb);
4845   gsi_remove (&gsi, true);
4846 
4847   gsi = gsi_last_nondebug_bb (exit_bb);
4848   gsi_remove (&gsi, true);
4849 
4850   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
4851   remove_edge (BRANCH_EDGE (entry_bb));
4852   FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
4853   remove_edge (BRANCH_EDGE (cont_bb));
4854   set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
4855   set_immediate_dominator (CDI_DOMINATORS, region->entry,
4856                                  recompute_dominator (CDI_DOMINATORS, region->entry));
4857 }
4858 
4859 /* Taskloop construct is represented after gimplification with
4860    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4861    in between them.  This routine expands the inner GIMPLE_OMP_FOR.
4862    GOMP_taskloop{,_ull} function arranges for each task to be given just
4863    a single range of iterations.  */
4864 
4865 static void
expand_omp_taskloop_for_inner(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)4866 expand_omp_taskloop_for_inner (struct omp_region *region,
4867                                      struct omp_for_data *fd,
4868                                      gimple *inner_stmt)
4869 {
4870   tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
4871   basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
4872   basic_block fin_bb;
4873   gimple_stmt_iterator gsi;
4874   edge ep;
4875   bool broken_loop = region->cont == NULL;
4876   tree *counts = NULL;
4877   tree n1, n2, step;
4878 
4879   itype = type = TREE_TYPE (fd->loop.v);
4880   if (POINTER_TYPE_P (type))
4881     itype = signed_type_for (type);
4882 
4883   /* See if we need to bias by LLONG_MIN.  */
4884   if (fd->iter_type == long_long_unsigned_type_node
4885       && TREE_CODE (type) == INTEGER_TYPE
4886       && !TYPE_UNSIGNED (type))
4887     {
4888       tree n1, n2;
4889 
4890       if (fd->loop.cond_code == LT_EXPR)
4891           {
4892             n1 = fd->loop.n1;
4893             n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4894           }
4895       else
4896           {
4897             n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4898             n2 = fd->loop.n1;
4899           }
4900       if (TREE_CODE (n1) != INTEGER_CST
4901             || TREE_CODE (n2) != INTEGER_CST
4902             || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4903           bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4904     }
4905 
4906   entry_bb = region->entry;
4907   cont_bb = region->cont;
4908   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4909   fin_bb = BRANCH_EDGE (entry_bb)->dest;
4910   gcc_assert (broken_loop
4911                 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4912   body_bb = FALLTHRU_EDGE (entry_bb)->dest;
4913   if (!broken_loop)
4914     {
4915       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
4916       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4917     }
4918   exit_bb = region->exit;
4919 
4920   /* Iteration space partitioning goes in ENTRY_BB.  */
4921   gsi = gsi_last_nondebug_bb (entry_bb);
4922   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4923 
4924   if (fd->collapse > 1)
4925     {
4926       int first_zero_iter = -1, dummy = -1;
4927       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4928 
4929       counts = XALLOCAVEC (tree, fd->collapse);
4930       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4931                                           fin_bb, first_zero_iter,
4932                                           dummy_bb, dummy, l2_dom_bb);
4933       t = NULL_TREE;
4934     }
4935   else
4936     t = integer_one_node;
4937 
4938   step = fd->loop.step;
4939   tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4940                                          OMP_CLAUSE__LOOPTEMP_);
4941   gcc_assert (innerc);
4942   n1 = OMP_CLAUSE_DECL (innerc);
4943   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4944   gcc_assert (innerc);
4945   n2 = OMP_CLAUSE_DECL (innerc);
4946   if (bias)
4947     {
4948       n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
4949       n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
4950     }
4951   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4952                                          true, NULL_TREE, true, GSI_SAME_STMT);
4953   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4954                                          true, NULL_TREE, true, GSI_SAME_STMT);
4955   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4956                                            true, NULL_TREE, true, GSI_SAME_STMT);
4957 
4958   tree startvar = fd->loop.v;
4959   tree endvar = NULL_TREE;
4960 
4961   if (gimple_omp_for_combined_p (fd->for_stmt))
4962     {
4963       tree clauses = gimple_omp_for_clauses (inner_stmt);
4964       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4965       gcc_assert (innerc);
4966       startvar = OMP_CLAUSE_DECL (innerc);
4967       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4968                                         OMP_CLAUSE__LOOPTEMP_);
4969       gcc_assert (innerc);
4970       endvar = OMP_CLAUSE_DECL (innerc);
4971     }
4972   t = fold_convert (TREE_TYPE (startvar), n1);
4973   t = force_gimple_operand_gsi (&gsi, t,
4974                                         DECL_P (startvar)
4975                                         && TREE_ADDRESSABLE (startvar),
4976                                         NULL_TREE, false, GSI_CONTINUE_LINKING);
4977   gimple *assign_stmt = gimple_build_assign (startvar, t);
4978   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4979 
4980   t = fold_convert (TREE_TYPE (startvar), n2);
4981   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4982                                         false, GSI_CONTINUE_LINKING);
4983   if (endvar)
4984     {
4985       assign_stmt = gimple_build_assign (endvar, e);
4986       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4987       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4988           assign_stmt = gimple_build_assign (fd->loop.v, e);
4989       else
4990           assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4991       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4992     }
4993   if (fd->collapse > 1)
4994     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4995 
4996   if (!broken_loop)
4997     {
4998       /* The code controlling the sequential loop replaces the
4999            GIMPLE_OMP_CONTINUE.  */
5000       gsi = gsi_last_nondebug_bb (cont_bb);
5001       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5002       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5003       vmain = gimple_omp_continue_control_use (cont_stmt);
5004       vback = gimple_omp_continue_control_def (cont_stmt);
5005 
5006       if (!gimple_omp_for_combined_p (fd->for_stmt))
5007           {
5008             if (POINTER_TYPE_P (type))
5009               t = fold_build_pointer_plus (vmain, step);
5010             else
5011               t = fold_build2 (PLUS_EXPR, type, vmain, step);
5012             t = force_gimple_operand_gsi (&gsi, t,
5013                                                   DECL_P (vback)
5014                                                   && TREE_ADDRESSABLE (vback),
5015                                                   NULL_TREE, true, GSI_SAME_STMT);
5016             assign_stmt = gimple_build_assign (vback, t);
5017             gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5018 
5019             t = build2 (fd->loop.cond_code, boolean_type_node,
5020                           DECL_P (vback) && TREE_ADDRESSABLE (vback)
5021                           ? t : vback, e);
5022             gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5023           }
5024 
5025       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
5026       gsi_remove (&gsi, true);
5027 
5028       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5029           collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5030     }
5031 
5032   /* Remove the GIMPLE_OMP_FOR statement.  */
5033   gsi = gsi_for_stmt (fd->for_stmt);
5034   gsi_remove (&gsi, true);
5035 
5036   /* Remove the GIMPLE_OMP_RETURN statement.  */
5037   gsi = gsi_last_nondebug_bb (exit_bb);
5038   gsi_remove (&gsi, true);
5039 
5040   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5041   if (!broken_loop)
5042     remove_edge (BRANCH_EDGE (entry_bb));
5043   else
5044     {
5045       remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5046       region->outer->cont = NULL;
5047     }
5048 
5049   /* Connect all the blocks.  */
5050   if (!broken_loop)
5051     {
5052       ep = find_edge (cont_bb, body_bb);
5053       if (gimple_omp_for_combined_p (fd->for_stmt))
5054           {
5055             remove_edge (ep);
5056             ep = NULL;
5057           }
5058       else if (fd->collapse > 1)
5059           {
5060             remove_edge (ep);
5061             ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5062           }
5063       else
5064           ep->flags = EDGE_TRUE_VALUE;
5065       find_edge (cont_bb, fin_bb)->flags
5066           = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5067     }
5068 
5069   set_immediate_dominator (CDI_DOMINATORS, body_bb,
5070                                  recompute_dominator (CDI_DOMINATORS, body_bb));
5071   if (!broken_loop)
5072     set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5073                                    recompute_dominator (CDI_DOMINATORS, fin_bb));
5074 
5075   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5076     {
5077       struct loop *loop = alloc_loop ();
5078       loop->header = body_bb;
5079       if (collapse_bb == NULL)
5080           loop->latch = cont_bb;
5081       add_loop (loop, body_bb->loop_father);
5082     }
5083 }
5084 
5085 /* A subroutine of expand_omp_for.  Generate code for an OpenACC
5086    partitioned loop.  The lowering here is abstracted, in that the
5087    loop parameters are passed through internal functions, which are
5088    further lowered by oacc_device_lower, once we get to the target
5089    compiler.  The loop is of the form:
5090 
5091    for (V = B; V LTGT E; V += S) {BODY}
5092 
5093    where LTGT is < or >.  We may have a specified chunking size, CHUNKING
5094    (constant 0 for no chunking) and we will have a GWV partitioning
5095    mask, specifying dimensions over which the loop is to be
5096    partitioned (see note below).  We generate code that looks like
5097    (this ignores tiling):
5098 
5099    <entry_bb> [incoming FALL->body, BRANCH->exit]
5100      typedef signedintify (typeof (V)) T;  // underlying signed integral type
5101      T range = E - B;
5102      T chunk_no = 0;
5103      T DIR = LTGT == '<' ? +1 : -1;
5104      T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5105      T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5106 
5107    <head_bb> [created by splitting end of entry_bb]
5108      T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5109      T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5110      if (!(offset LTGT bound)) goto bottom_bb;
5111 
5112    <body_bb> [incoming]
5113      V = B + offset;
5114      {BODY}
5115 
5116    <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5117      offset += step;
5118      if (offset LTGT bound) goto body_bb; [*]
5119 
5120    <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5121      chunk_no++;
5122      if (chunk < chunk_max) goto head_bb;
5123 
5124    <exit_bb> [incoming]
5125      V = B + ((range -/+ 1) / S +/- 1) * S [*]
5126 
5127    [*] Needed if V live at end of loop.  */
5128 
5129 static void
expand_oacc_for(struct omp_region * region,struct omp_for_data * fd)5130 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5131 {
5132   tree v = fd->loop.v;
5133   enum tree_code cond_code = fd->loop.cond_code;
5134   enum tree_code plus_code = PLUS_EXPR;
5135 
5136   tree chunk_size = integer_minus_one_node;
5137   tree gwv = integer_zero_node;
5138   tree iter_type = TREE_TYPE (v);
5139   tree diff_type = iter_type;
5140   tree plus_type = iter_type;
5141   struct oacc_collapse *counts = NULL;
5142 
5143   gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5144                            == GF_OMP_FOR_KIND_OACC_LOOP);
5145   gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5146   gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5147 
5148   if (POINTER_TYPE_P (iter_type))
5149     {
5150       plus_code = POINTER_PLUS_EXPR;
5151       plus_type = sizetype;
5152     }
5153   if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5154     diff_type = signed_type_for (diff_type);
5155   if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5156     diff_type = integer_type_node;
5157 
5158   basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5159   basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5160   basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE  */
5161   basic_block bottom_bb = NULL;
5162 
5163   /* entry_bb has two sucessors; the branch edge is to the exit
5164      block,  fallthrough edge to body.  */
5165   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5166                 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5167 
5168   /* If cont_bb non-NULL, it has 2 successors.  The branch successor is
5169      body_bb, or to a block whose only successor is the body_bb.  Its
5170      fallthrough successor is the final block (same as the branch
5171      successor of the entry_bb).  */
5172   if (cont_bb)
5173     {
5174       basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5175       basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5176 
5177       gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5178       gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5179     }
5180   else
5181     gcc_assert (!gimple_in_ssa_p (cfun));
5182 
5183   /* The exit block only has entry_bb and cont_bb as predecessors.  */
5184   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5185 
5186   tree chunk_no;
5187   tree chunk_max = NULL_TREE;
5188   tree bound, offset;
5189   tree step = create_tmp_var (diff_type, ".step");
5190   bool up = cond_code == LT_EXPR;
5191   tree dir = build_int_cst (diff_type, up ? +1 : -1);
5192   bool chunking = !gimple_in_ssa_p (cfun);
5193   bool negating;
5194 
5195   /* Tiling vars.  */
5196   tree tile_size = NULL_TREE;
5197   tree element_s = NULL_TREE;
5198   tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5199   basic_block elem_body_bb = NULL;
5200   basic_block elem_cont_bb = NULL;
5201 
5202   /* SSA instances.  */
5203   tree offset_incr = NULL_TREE;
5204   tree offset_init = NULL_TREE;
5205 
5206   gimple_stmt_iterator gsi;
5207   gassign *ass;
5208   gcall *call;
5209   gimple *stmt;
5210   tree expr;
5211   location_t loc;
5212   edge split, be, fte;
5213 
5214   /* Split the end of entry_bb to create head_bb.  */
5215   split = split_block (entry_bb, last_stmt (entry_bb));
5216   basic_block head_bb = split->dest;
5217   entry_bb = split->src;
5218 
5219   /* Chunk setup goes at end of entry_bb, replacing the omp_for.  */
5220   gsi = gsi_last_nondebug_bb (entry_bb);
5221   gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5222   loc = gimple_location (for_stmt);
5223 
5224   if (gimple_in_ssa_p (cfun))
5225     {
5226       offset_init = gimple_omp_for_index (for_stmt, 0);
5227       gcc_assert (integer_zerop (fd->loop.n1));
5228       /* The SSA parallelizer does gang parallelism.  */
5229       gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5230     }
5231 
5232   if (fd->collapse > 1 || fd->tiling)
5233     {
5234       gcc_assert (!gimple_in_ssa_p (cfun) && up);
5235       counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5236       tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5237                                                         TREE_TYPE (fd->loop.n2), loc);
5238 
5239       if (SSA_VAR_P (fd->loop.n2))
5240           {
5241             total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5242                                                       true, GSI_SAME_STMT);
5243             ass = gimple_build_assign (fd->loop.n2, total);
5244             gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5245           }
5246     }
5247 
5248   tree b = fd->loop.n1;
5249   tree e = fd->loop.n2;
5250   tree s = fd->loop.step;
5251 
5252   b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5253   e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5254 
5255   /* Convert the step, avoiding possible unsigned->signed overflow.  */
5256   negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5257   if (negating)
5258     s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5259   s = fold_convert (diff_type, s);
5260   if (negating)
5261     s = fold_build1 (NEGATE_EXPR, diff_type, s);
5262   s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5263 
5264   if (!chunking)
5265     chunk_size = integer_zero_node;
5266   expr = fold_convert (diff_type, chunk_size);
5267   chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5268                                                    NULL_TREE, true, GSI_SAME_STMT);
5269 
5270   if (fd->tiling)
5271     {
5272       /* Determine the tile size and element step,
5273            modify the outer loop step size.  */
5274       tile_size = create_tmp_var (diff_type, ".tile_size");
5275       expr = build_int_cst (diff_type, 1);
5276       for (int ix = 0; ix < fd->collapse; ix++)
5277           expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5278       expr = force_gimple_operand_gsi (&gsi, expr, true,
5279                                                NULL_TREE, true, GSI_SAME_STMT);
5280       ass = gimple_build_assign (tile_size, expr);
5281       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5282 
5283       element_s = create_tmp_var (diff_type, ".element_s");
5284       ass = gimple_build_assign (element_s, s);
5285       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5286 
5287       expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5288       s = force_gimple_operand_gsi (&gsi, expr, true,
5289                                             NULL_TREE, true, GSI_SAME_STMT);
5290     }
5291 
5292   /* Determine the range, avoiding possible unsigned->signed overflow.  */
5293   negating = !up && TYPE_UNSIGNED (iter_type);
5294   expr = fold_build2 (MINUS_EXPR, plus_type,
5295                           fold_convert (plus_type, negating ? b : e),
5296                           fold_convert (plus_type, negating ? e : b));
5297   expr = fold_convert (diff_type, expr);
5298   if (negating)
5299     expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5300   tree range = force_gimple_operand_gsi (&gsi, expr, true,
5301                                                    NULL_TREE, true, GSI_SAME_STMT);
5302 
5303   chunk_no = build_int_cst (diff_type, 0);
5304   if (chunking)
5305     {
5306       gcc_assert (!gimple_in_ssa_p (cfun));
5307 
5308       expr = chunk_no;
5309       chunk_max = create_tmp_var (diff_type, ".chunk_max");
5310       chunk_no = create_tmp_var (diff_type, ".chunk_no");
5311 
5312       ass = gimple_build_assign (chunk_no, expr);
5313       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5314 
5315       call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5316                                                    build_int_cst (integer_type_node,
5317                                                                       IFN_GOACC_LOOP_CHUNKS),
5318                                                    dir, range, s, chunk_size, gwv);
5319       gimple_call_set_lhs (call, chunk_max);
5320       gimple_set_location (call, loc);
5321       gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5322     }
5323   else
5324     chunk_size = chunk_no;
5325 
5326   call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5327                                              build_int_cst (integer_type_node,
5328                                                                 IFN_GOACC_LOOP_STEP),
5329                                              dir, range, s, chunk_size, gwv);
5330   gimple_call_set_lhs (call, step);
5331   gimple_set_location (call, loc);
5332   gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5333 
5334   /* Remove the GIMPLE_OMP_FOR.  */
5335   gsi_remove (&gsi, true);
5336 
5337   /* Fixup edges from head_bb.  */
5338   be = BRANCH_EDGE (head_bb);
5339   fte = FALLTHRU_EDGE (head_bb);
5340   be->flags |= EDGE_FALSE_VALUE;
5341   fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5342 
5343   basic_block body_bb = fte->dest;
5344 
5345   if (gimple_in_ssa_p (cfun))
5346     {
5347       gsi = gsi_last_nondebug_bb (cont_bb);
5348       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5349 
5350       offset = gimple_omp_continue_control_use (cont_stmt);
5351       offset_incr = gimple_omp_continue_control_def (cont_stmt);
5352     }
5353   else
5354     {
5355       offset = create_tmp_var (diff_type, ".offset");
5356       offset_init = offset_incr = offset;
5357     }
5358   bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5359 
5360   /* Loop offset & bound go into head_bb.  */
5361   gsi = gsi_start_bb (head_bb);
5362 
5363   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5364                                              build_int_cst (integer_type_node,
5365                                                                 IFN_GOACC_LOOP_OFFSET),
5366                                              dir, range, s,
5367                                              chunk_size, gwv, chunk_no);
5368   gimple_call_set_lhs (call, offset_init);
5369   gimple_set_location (call, loc);
5370   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5371 
5372   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5373                                              build_int_cst (integer_type_node,
5374                                                                 IFN_GOACC_LOOP_BOUND),
5375                                              dir, range, s,
5376                                              chunk_size, gwv, offset_init);
5377   gimple_call_set_lhs (call, bound);
5378   gimple_set_location (call, loc);
5379   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5380 
5381   expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5382   gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5383                         GSI_CONTINUE_LINKING);
5384 
5385   /* V assignment goes into body_bb.  */
5386   if (!gimple_in_ssa_p (cfun))
5387     {
5388       gsi = gsi_start_bb (body_bb);
5389 
5390       expr = build2 (plus_code, iter_type, b,
5391                          fold_convert (plus_type, offset));
5392       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5393                                                true, GSI_SAME_STMT);
5394       ass = gimple_build_assign (v, expr);
5395       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5396 
5397       if (fd->collapse > 1 || fd->tiling)
5398           expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5399 
5400       if (fd->tiling)
5401           {
5402             /* Determine the range of the element loop -- usually simply
5403                the tile_size, but could be smaller if the final
5404                iteration of the outer loop is a partial tile.  */
5405             tree e_range = create_tmp_var (diff_type, ".e_range");
5406 
5407             expr = build2 (MIN_EXPR, diff_type,
5408                                build2 (MINUS_EXPR, diff_type, bound, offset),
5409                                build2 (MULT_EXPR, diff_type, tile_size,
5410                                          element_s));
5411             expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5412                                                      true, GSI_SAME_STMT);
5413             ass = gimple_build_assign (e_range, expr);
5414             gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5415 
5416             /* Determine bound, offset & step of inner loop. */
5417             e_bound = create_tmp_var (diff_type, ".e_bound");
5418             e_offset = create_tmp_var (diff_type, ".e_offset");
5419             e_step = create_tmp_var (diff_type, ".e_step");
5420 
5421             /* Mark these as element loops.  */
5422             tree t, e_gwv = integer_minus_one_node;
5423             tree chunk = build_int_cst (diff_type, 0); /* Never chunked.  */
5424 
5425             t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5426             call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5427                                                        element_s, chunk, e_gwv, chunk);
5428             gimple_call_set_lhs (call, e_offset);
5429             gimple_set_location (call, loc);
5430             gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5431 
5432             t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5433             call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5434                                                        element_s, chunk, e_gwv, e_offset);
5435             gimple_call_set_lhs (call, e_bound);
5436             gimple_set_location (call, loc);
5437             gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5438 
5439             t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5440             call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5441                                                        element_s, chunk, e_gwv);
5442             gimple_call_set_lhs (call, e_step);
5443             gimple_set_location (call, loc);
5444             gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5445 
5446             /* Add test and split block.  */
5447             expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5448             stmt = gimple_build_cond_empty (expr);
5449             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5450             split = split_block (body_bb, stmt);
5451             elem_body_bb = split->dest;
5452             if (cont_bb == body_bb)
5453               cont_bb = elem_body_bb;
5454             body_bb = split->src;
5455 
5456             split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5457 
5458             /* Add a dummy exit for the tiled block when cont_bb is missing.  */
5459             if (cont_bb == NULL)
5460               {
5461                 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5462                 e->probability = profile_probability::even ();
5463                 split->probability = profile_probability::even ();
5464               }
5465 
5466             /* Initialize the user's loop vars.  */
5467             gsi = gsi_start_bb (elem_body_bb);
5468             expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5469           }
5470     }
5471 
5472   /* Loop increment goes into cont_bb.  If this is not a loop, we
5473      will have spawned threads as if it was, and each one will
5474      execute one iteration.  The specification is not explicit about
5475      whether such constructs are ill-formed or not, and they can
5476      occur, especially when noreturn routines are involved.  */
5477   if (cont_bb)
5478     {
5479       gsi = gsi_last_nondebug_bb (cont_bb);
5480       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5481       loc = gimple_location (cont_stmt);
5482 
5483       if (fd->tiling)
5484           {
5485             /* Insert element loop increment and test.  */
5486             expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5487             expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5488                                                      true, GSI_SAME_STMT);
5489             ass = gimple_build_assign (e_offset, expr);
5490             gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5491             expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5492 
5493             stmt = gimple_build_cond_empty (expr);
5494             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5495             split = split_block (cont_bb, stmt);
5496             elem_cont_bb = split->src;
5497             cont_bb = split->dest;
5498 
5499             split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5500             split->probability = profile_probability::unlikely ().guessed ();
5501             edge latch_edge
5502               = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5503             latch_edge->probability = profile_probability::likely ().guessed ();
5504 
5505             edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5506             skip_edge->probability = profile_probability::unlikely ().guessed ();
5507             edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5508             loop_entry_edge->probability
5509               = profile_probability::likely ().guessed ();
5510 
5511             gsi = gsi_for_stmt (cont_stmt);
5512           }
5513 
5514       /* Increment offset.  */
5515       if (gimple_in_ssa_p (cfun))
5516           expr = build2 (plus_code, iter_type, offset,
5517                            fold_convert (plus_type, step));
5518       else
5519           expr = build2 (PLUS_EXPR, diff_type, offset, step);
5520       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5521                                                true, GSI_SAME_STMT);
5522       ass = gimple_build_assign (offset_incr, expr);
5523       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5524       expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5525       gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5526 
5527       /*  Remove the GIMPLE_OMP_CONTINUE.  */
5528       gsi_remove (&gsi, true);
5529 
5530       /* Fixup edges from cont_bb.  */
5531       be = BRANCH_EDGE (cont_bb);
5532       fte = FALLTHRU_EDGE (cont_bb);
5533       be->flags |= EDGE_TRUE_VALUE;
5534       fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5535 
5536       if (chunking)
5537           {
5538             /* Split the beginning of exit_bb to make bottom_bb.  We
5539                need to insert a nop at the start, because splitting is
5540                after a stmt, not before.  */
5541             gsi = gsi_start_bb (exit_bb);
5542             stmt = gimple_build_nop ();
5543             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5544             split = split_block (exit_bb, stmt);
5545             bottom_bb = split->src;
5546             exit_bb = split->dest;
5547             gsi = gsi_last_bb (bottom_bb);
5548 
5549             /* Chunk increment and test goes into bottom_bb.  */
5550             expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5551                                build_int_cst (diff_type, 1));
5552             ass = gimple_build_assign (chunk_no, expr);
5553             gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5554 
5555             /* Chunk test at end of bottom_bb.  */
5556             expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5557             gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5558                                   GSI_CONTINUE_LINKING);
5559 
5560             /* Fixup edges from bottom_bb.  */
5561             split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5562             split->probability = profile_probability::unlikely ().guessed ();
5563             edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5564             latch_edge->probability = profile_probability::likely ().guessed ();
5565           }
5566     }
5567 
5568   gsi = gsi_last_nondebug_bb (exit_bb);
5569   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5570   loc = gimple_location (gsi_stmt (gsi));
5571 
5572   if (!gimple_in_ssa_p (cfun))
5573     {
5574       /* Insert the final value of V, in case it is live.  This is the
5575            value for the only thread that survives past the join.  */
5576       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5577       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5578       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5579       expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5580       expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5581       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5582                                                true, GSI_SAME_STMT);
5583       ass = gimple_build_assign (v, expr);
5584       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5585     }
5586 
5587   /* Remove the OMP_RETURN.  */
5588   gsi_remove (&gsi, true);
5589 
5590   if (cont_bb)
5591     {
5592       /* We now have one, two or three nested loops.  Update the loop
5593            structures.  */
5594       struct loop *parent = entry_bb->loop_father;
5595       struct loop *body = body_bb->loop_father;
5596 
5597       if (chunking)
5598           {
5599             struct loop *chunk_loop = alloc_loop ();
5600             chunk_loop->header = head_bb;
5601             chunk_loop->latch = bottom_bb;
5602             add_loop (chunk_loop, parent);
5603             parent = chunk_loop;
5604           }
5605       else if (parent != body)
5606           {
5607             gcc_assert (body->header == body_bb);
5608             gcc_assert (body->latch == cont_bb
5609                           || single_pred (body->latch) == cont_bb);
5610             parent = NULL;
5611           }
5612 
5613       if (parent)
5614           {
5615             struct loop *body_loop = alloc_loop ();
5616             body_loop->header = body_bb;
5617             body_loop->latch = cont_bb;
5618             add_loop (body_loop, parent);
5619 
5620             if (fd->tiling)
5621               {
5622                 /* Insert tiling's element loop.  */
5623                 struct loop *inner_loop = alloc_loop ();
5624                 inner_loop->header = elem_body_bb;
5625                 inner_loop->latch = elem_cont_bb;
5626                 add_loop (inner_loop, body_loop);
5627               }
5628           }
5629     }
5630 }
5631 
5632 /* Expand the OMP loop defined by REGION.  */
5633 
5634 static void
expand_omp_for(struct omp_region * region,gimple * inner_stmt)5635 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5636 {
5637   struct omp_for_data fd;
5638   struct omp_for_data_loop *loops;
5639 
5640   loops
5641     = (struct omp_for_data_loop *)
5642       alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5643                 * sizeof (struct omp_for_data_loop));
5644   omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5645                               &fd, loops);
5646   region->sched_kind = fd.sched_kind;
5647   region->sched_modifiers = fd.sched_modifiers;
5648 
5649   gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5650   BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5651   FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5652   if (region->cont)
5653     {
5654       gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5655       BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5656       FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5657     }
5658   else
5659     /* If there isn't a continue then this is a degerate case where
5660        the introduction of abnormal edges during lowering will prevent
5661        original loops from being detected.  Fix that up.  */
5662     loops_state_set (LOOPS_NEED_FIXUP);
5663 
5664   if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5665     expand_omp_simd (region, &fd);
5666   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5667     {
5668       gcc_assert (!inner_stmt);
5669       expand_oacc_for (region, &fd);
5670     }
5671   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5672     {
5673       if (gimple_omp_for_combined_into_p (fd.for_stmt))
5674           expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5675       else
5676           expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5677     }
5678   else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5679              && !fd.have_ordered)
5680     {
5681       if (fd.chunk_size == NULL)
5682           expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5683       else
5684           expand_omp_for_static_chunk (region, &fd, inner_stmt);
5685     }
5686   else
5687     {
5688       int fn_index, start_ix, next_ix;
5689 
5690       gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5691                       == GF_OMP_FOR_KIND_FOR);
5692       if (fd.chunk_size == NULL
5693             && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5694           fd.chunk_size = integer_zero_node;
5695       gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5696       switch (fd.sched_kind)
5697           {
5698           case OMP_CLAUSE_SCHEDULE_RUNTIME:
5699             fn_index = 3;
5700             break;
5701           case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5702           case OMP_CLAUSE_SCHEDULE_GUIDED:
5703             if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5704                 && !fd.ordered
5705                 && !fd.have_ordered)
5706               {
5707                 fn_index = 3 + fd.sched_kind;
5708                 break;
5709               }
5710             /* FALLTHRU */
5711           default:
5712             fn_index = fd.sched_kind;
5713             break;
5714           }
5715       if (!fd.ordered)
5716           fn_index += fd.have_ordered * 6;
5717       if (fd.ordered)
5718           start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5719       else
5720           start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5721       next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5722       if (fd.iter_type == long_long_unsigned_type_node)
5723           {
5724             start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5725                               - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5726             next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5727                           - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5728           }
5729       expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5730                                     (enum built_in_function) next_ix, inner_stmt);
5731     }
5732 
5733   if (gimple_in_ssa_p (cfun))
5734     update_ssa (TODO_update_ssa_only_virtuals);
5735 }
5736 
5737 /* Expand code for an OpenMP sections directive.  In pseudo code, we generate
5738 
5739           v = GOMP_sections_start (n);
5740     L0:
5741           switch (v)
5742             {
5743             case 0:
5744               goto L2;
5745             case 1:
5746               section 1;
5747               goto L1;
5748             case 2:
5749               ...
5750             case n:
5751               ...
5752             default:
5753               abort ();
5754             }
5755     L1:
5756           v = GOMP_sections_next ();
5757           goto L0;
5758     L2:
5759           reduction;
5760 
5761     If this is a combined parallel sections, replace the call to
5762     GOMP_sections_start with call to GOMP_sections_next.  */
5763 
5764 static void
expand_omp_sections(struct omp_region * region)5765 expand_omp_sections (struct omp_region *region)
5766 {
5767   tree t, u, vin = NULL, vmain, vnext, l2;
5768   unsigned len;
5769   basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5770   gimple_stmt_iterator si, switch_si;
5771   gomp_sections *sections_stmt;
5772   gimple *stmt;
5773   gomp_continue *cont;
5774   edge_iterator ei;
5775   edge e;
5776   struct omp_region *inner;
5777   unsigned i, casei;
5778   bool exit_reachable = region->cont != NULL;
5779 
5780   gcc_assert (region->exit != NULL);
5781   entry_bb = region->entry;
5782   l0_bb = single_succ (entry_bb);
5783   l1_bb = region->cont;
5784   l2_bb = region->exit;
5785   if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5786     l2 = gimple_block_label (l2_bb);
5787   else
5788     {
5789       /* This can happen if there are reductions.  */
5790       len = EDGE_COUNT (l0_bb->succs);
5791       gcc_assert (len > 0);
5792       e = EDGE_SUCC (l0_bb, len - 1);
5793       si = gsi_last_nondebug_bb (e->dest);
5794       l2 = NULL_TREE;
5795       if (gsi_end_p (si)
5796             || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5797           l2 = gimple_block_label (e->dest);
5798       else
5799           FOR_EACH_EDGE (e, ei, l0_bb->succs)
5800             {
5801               si = gsi_last_nondebug_bb (e->dest);
5802               if (gsi_end_p (si)
5803                     || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5804                 {
5805                     l2 = gimple_block_label (e->dest);
5806                     break;
5807                 }
5808             }
5809     }
5810   if (exit_reachable)
5811     default_bb = create_empty_bb (l1_bb->prev_bb);
5812   else
5813     default_bb = create_empty_bb (l0_bb);
5814 
5815   /* We will build a switch() with enough cases for all the
5816      GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5817      and a default case to abort if something goes wrong.  */
5818   len = EDGE_COUNT (l0_bb->succs);
5819 
5820   /* Use vec::quick_push on label_vec throughout, since we know the size
5821      in advance.  */
5822   auto_vec<tree> label_vec (len);
5823 
5824   /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5825      GIMPLE_OMP_SECTIONS statement.  */
5826   si = gsi_last_nondebug_bb (entry_bb);
5827   sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5828   gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5829   vin = gimple_omp_sections_control (sections_stmt);
5830   if (!is_combined_parallel (region))
5831     {
5832       /* If we are not inside a combined parallel+sections region,
5833            call GOMP_sections_start.  */
5834       t = build_int_cst (unsigned_type_node, len - 1);
5835       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5836       stmt = gimple_build_call (u, 1, t);
5837     }
5838   else
5839     {
5840       /* Otherwise, call GOMP_sections_next.  */
5841       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5842       stmt = gimple_build_call (u, 0);
5843     }
5844   gimple_call_set_lhs (stmt, vin);
5845   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5846   gsi_remove (&si, true);
5847 
5848   /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5849      L0_BB.  */
5850   switch_si = gsi_last_nondebug_bb (l0_bb);
5851   gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
5852   if (exit_reachable)
5853     {
5854       cont = as_a <gomp_continue *> (last_stmt (l1_bb));
5855       gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
5856       vmain = gimple_omp_continue_control_use (cont);
5857       vnext = gimple_omp_continue_control_def (cont);
5858     }
5859   else
5860     {
5861       vmain = vin;
5862       vnext = NULL_TREE;
5863     }
5864 
5865   t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
5866   label_vec.quick_push (t);
5867   i = 1;
5868 
5869   /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.  */
5870   for (inner = region->inner, casei = 1;
5871        inner;
5872        inner = inner->next, i++, casei++)
5873     {
5874       basic_block s_entry_bb, s_exit_bb;
5875 
5876       /* Skip optional reduction region.  */
5877       if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
5878           {
5879             --i;
5880             --casei;
5881             continue;
5882           }
5883 
5884       s_entry_bb = inner->entry;
5885       s_exit_bb = inner->exit;
5886 
5887       t = gimple_block_label (s_entry_bb);
5888       u = build_int_cst (unsigned_type_node, casei);
5889       u = build_case_label (u, NULL, t);
5890       label_vec.quick_push (u);
5891 
5892       si = gsi_last_nondebug_bb (s_entry_bb);
5893       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
5894       gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
5895       gsi_remove (&si, true);
5896       single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
5897 
5898       if (s_exit_bb == NULL)
5899           continue;
5900 
5901       si = gsi_last_nondebug_bb (s_exit_bb);
5902       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5903       gsi_remove (&si, true);
5904 
5905       single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
5906     }
5907 
5908   /* Error handling code goes in DEFAULT_BB.  */
5909   t = gimple_block_label (default_bb);
5910   u = build_case_label (NULL, NULL, t);
5911   make_edge (l0_bb, default_bb, 0);
5912   add_bb_to_loop (default_bb, current_loops->tree_root);
5913 
5914   stmt = gimple_build_switch (vmain, u, label_vec);
5915   gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
5916   gsi_remove (&switch_si, true);
5917 
5918   si = gsi_start_bb (default_bb);
5919   stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
5920   gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
5921 
5922   if (exit_reachable)
5923     {
5924       tree bfn_decl;
5925 
5926       /* Code to get the next section goes in L1_BB.  */
5927       si = gsi_last_nondebug_bb (l1_bb);
5928       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
5929 
5930       bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5931       stmt = gimple_build_call (bfn_decl, 0);
5932       gimple_call_set_lhs (stmt, vnext);
5933       gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5934       gsi_remove (&si, true);
5935 
5936       single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
5937     }
5938 
5939   /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB.  */
5940   si = gsi_last_nondebug_bb (l2_bb);
5941   if (gimple_omp_return_nowait_p (gsi_stmt (si)))
5942     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
5943   else if (gimple_omp_return_lhs (gsi_stmt (si)))
5944     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
5945   else
5946     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
5947   stmt = gimple_build_call (t, 0);
5948   if (gimple_omp_return_lhs (gsi_stmt (si)))
5949     gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
5950   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5951   gsi_remove (&si, true);
5952 
5953   set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
5954 }
5955 
5956 /* Expand code for an OpenMP single directive.  We've already expanded
5957    much of the code, here we simply place the GOMP_barrier call.  */
5958 
5959 static void
expand_omp_single(struct omp_region * region)5960 expand_omp_single (struct omp_region *region)
5961 {
5962   basic_block entry_bb, exit_bb;
5963   gimple_stmt_iterator si;
5964 
5965   entry_bb = region->entry;
5966   exit_bb = region->exit;
5967 
5968   si = gsi_last_nondebug_bb (entry_bb);
5969   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
5970   gsi_remove (&si, true);
5971   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5972 
5973   si = gsi_last_nondebug_bb (exit_bb);
5974   if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
5975     {
5976       tree t = gimple_omp_return_lhs (gsi_stmt (si));
5977       gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
5978     }
5979   gsi_remove (&si, true);
5980   single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5981 }
5982 
5983 /* Generic expansion for OpenMP synchronization directives: master,
5984    ordered and critical.  All we need to do here is remove the entry
5985    and exit markers for REGION.  */
5986 
5987 static void
expand_omp_synch(struct omp_region * region)5988 expand_omp_synch (struct omp_region *region)
5989 {
5990   basic_block entry_bb, exit_bb;
5991   gimple_stmt_iterator si;
5992 
5993   entry_bb = region->entry;
5994   exit_bb = region->exit;
5995 
5996   si = gsi_last_nondebug_bb (entry_bb);
5997   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
5998                 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
5999                 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6000                 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6001                 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6002                 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6003   gsi_remove (&si, true);
6004   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6005 
6006   if (exit_bb)
6007     {
6008       si = gsi_last_nondebug_bb (exit_bb);
6009       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6010       gsi_remove (&si, true);
6011       single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6012     }
6013 }
6014 
6015 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6016    operation as a normal volatile load.  */
6017 
6018 static bool
expand_omp_atomic_load(basic_block load_bb,tree addr,tree loaded_val,int index)6019 expand_omp_atomic_load (basic_block load_bb, tree addr,
6020                               tree loaded_val, int index)
6021 {
6022   enum built_in_function tmpbase;
6023   gimple_stmt_iterator gsi;
6024   basic_block store_bb;
6025   location_t loc;
6026   gimple *stmt;
6027   tree decl, call, type, itype;
6028 
6029   gsi = gsi_last_nondebug_bb (load_bb);
6030   stmt = gsi_stmt (gsi);
6031   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6032   loc = gimple_location (stmt);
6033 
6034   /* ??? If the target does not implement atomic_load_optab[mode], and mode
6035      is smaller than word size, then expand_atomic_load assumes that the load
6036      is atomic.  We could avoid the builtin entirely in this case.  */
6037 
6038   tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6039   decl = builtin_decl_explicit (tmpbase);
6040   if (decl == NULL_TREE)
6041     return false;
6042 
6043   type = TREE_TYPE (loaded_val);
6044   itype = TREE_TYPE (TREE_TYPE (decl));
6045 
6046   call = build_call_expr_loc (loc, decl, 2, addr,
6047                                     build_int_cst (NULL,
6048                                                        gimple_omp_atomic_seq_cst_p (stmt)
6049                                                        ? MEMMODEL_SEQ_CST
6050                                                        : MEMMODEL_RELAXED));
6051   if (!useless_type_conversion_p (type, itype))
6052     call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6053   call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6054 
6055   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6056   gsi_remove (&gsi, true);
6057 
6058   store_bb = single_succ (load_bb);
6059   gsi = gsi_last_nondebug_bb (store_bb);
6060   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6061   gsi_remove (&gsi, true);
6062 
6063   if (gimple_in_ssa_p (cfun))
6064     update_ssa (TODO_update_ssa_no_phi);
6065 
6066   return true;
6067 }
6068 
6069 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6070    operation as a normal volatile store.  */
6071 
6072 static bool
expand_omp_atomic_store(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)6073 expand_omp_atomic_store (basic_block load_bb, tree addr,
6074                                tree loaded_val, tree stored_val, int index)
6075 {
6076   enum built_in_function tmpbase;
6077   gimple_stmt_iterator gsi;
6078   basic_block store_bb = single_succ (load_bb);
6079   location_t loc;
6080   gimple *stmt;
6081   tree decl, call, type, itype;
6082   machine_mode imode;
6083   bool exchange;
6084 
6085   gsi = gsi_last_nondebug_bb (load_bb);
6086   stmt = gsi_stmt (gsi);
6087   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6088 
6089   /* If the load value is needed, then this isn't a store but an exchange.  */
6090   exchange = gimple_omp_atomic_need_value_p (stmt);
6091 
6092   gsi = gsi_last_nondebug_bb (store_bb);
6093   stmt = gsi_stmt (gsi);
6094   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6095   loc = gimple_location (stmt);
6096 
6097   /* ??? If the target does not implement atomic_store_optab[mode], and mode
6098      is smaller than word size, then expand_atomic_store assumes that the store
6099      is atomic.  We could avoid the builtin entirely in this case.  */
6100 
6101   tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6102   tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6103   decl = builtin_decl_explicit (tmpbase);
6104   if (decl == NULL_TREE)
6105     return false;
6106 
6107   type = TREE_TYPE (stored_val);
6108 
6109   /* Dig out the type of the function's second argument.  */
6110   itype = TREE_TYPE (decl);
6111   itype = TYPE_ARG_TYPES (itype);
6112   itype = TREE_CHAIN (itype);
6113   itype = TREE_VALUE (itype);
6114   imode = TYPE_MODE (itype);
6115 
6116   if (exchange && !can_atomic_exchange_p (imode, true))
6117     return false;
6118 
6119   if (!useless_type_conversion_p (itype, type))
6120     stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6121   call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6122                                     build_int_cst (NULL,
6123                                                        gimple_omp_atomic_seq_cst_p (stmt)
6124                                                        ? MEMMODEL_SEQ_CST
6125                                                        : MEMMODEL_RELAXED));
6126   if (exchange)
6127     {
6128       if (!useless_type_conversion_p (type, itype))
6129           call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6130       call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6131     }
6132 
6133   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6134   gsi_remove (&gsi, true);
6135 
6136   /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above.  */
6137   gsi = gsi_last_nondebug_bb (load_bb);
6138   gsi_remove (&gsi, true);
6139 
6140   if (gimple_in_ssa_p (cfun))
6141     update_ssa (TODO_update_ssa_no_phi);
6142 
6143   return true;
6144 }
6145 
6146 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6147    operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
6148    size of the data type, and thus usable to find the index of the builtin
6149    decl.  Returns false if the expression is not of the proper form.  */
6150 
6151 static bool
expand_omp_atomic_fetch_op(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)6152 expand_omp_atomic_fetch_op (basic_block load_bb,
6153                                   tree addr, tree loaded_val,
6154                                   tree stored_val, int index)
6155 {
6156   enum built_in_function oldbase, newbase, tmpbase;
6157   tree decl, itype, call;
6158   tree lhs, rhs;
6159   basic_block store_bb = single_succ (load_bb);
6160   gimple_stmt_iterator gsi;
6161   gimple *stmt;
6162   location_t loc;
6163   enum tree_code code;
6164   bool need_old, need_new;
6165   machine_mode imode;
6166   bool seq_cst;
6167 
6168   /* We expect to find the following sequences:
6169 
6170    load_bb:
6171        GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6172 
6173    store_bb:
6174        val = tmp OP something; (or: something OP tmp)
6175        GIMPLE_OMP_STORE (val)
6176 
6177   ???FIXME: Allow a more flexible sequence.
6178   Perhaps use data flow to pick the statements.
6179 
6180   */
6181 
6182   gsi = gsi_after_labels (store_bb);
6183   stmt = gsi_stmt (gsi);
6184   if (is_gimple_debug (stmt))
6185     {
6186       gsi_next_nondebug (&gsi);
6187       if (gsi_end_p (gsi))
6188           return false;
6189       stmt = gsi_stmt (gsi);
6190     }
6191   loc = gimple_location (stmt);
6192   if (!is_gimple_assign (stmt))
6193     return false;
6194   gsi_next_nondebug (&gsi);
6195   if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6196     return false;
6197   need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6198   need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6199   seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6200   gcc_checking_assert (!need_old || !need_new);
6201 
6202   if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6203     return false;
6204 
6205   /* Check for one of the supported fetch-op operations.  */
6206   code = gimple_assign_rhs_code (stmt);
6207   switch (code)
6208     {
6209     case PLUS_EXPR:
6210     case POINTER_PLUS_EXPR:
6211       oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6212       newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6213       break;
6214     case MINUS_EXPR:
6215       oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6216       newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6217       break;
6218     case BIT_AND_EXPR:
6219       oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6220       newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6221       break;
6222     case BIT_IOR_EXPR:
6223       oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6224       newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6225       break;
6226     case BIT_XOR_EXPR:
6227       oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6228       newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6229       break;
6230     default:
6231       return false;
6232     }
6233 
6234   /* Make sure the expression is of the proper form.  */
6235   if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6236     rhs = gimple_assign_rhs2 (stmt);
6237   else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6238              && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6239     rhs = gimple_assign_rhs1 (stmt);
6240   else
6241     return false;
6242 
6243   tmpbase = ((enum built_in_function)
6244                ((need_new ? newbase : oldbase) + index + 1));
6245   decl = builtin_decl_explicit (tmpbase);
6246   if (decl == NULL_TREE)
6247     return false;
6248   itype = TREE_TYPE (TREE_TYPE (decl));
6249   imode = TYPE_MODE (itype);
6250 
6251   /* We could test all of the various optabs involved, but the fact of the
6252      matter is that (with the exception of i486 vs i586 and xadd) all targets
6253      that support any atomic operaton optab also implements compare-and-swap.
6254      Let optabs.c take care of expanding any compare-and-swap loop.  */
6255   if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6256     return false;
6257 
6258   gsi = gsi_last_nondebug_bb (load_bb);
6259   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6260 
6261   /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6262      It only requires that the operation happen atomically.  Thus we can
6263      use the RELAXED memory model.  */
6264   call = build_call_expr_loc (loc, decl, 3, addr,
6265                                     fold_convert_loc (loc, itype, rhs),
6266                                     build_int_cst (NULL,
6267                                                        seq_cst ? MEMMODEL_SEQ_CST
6268                                                                  : MEMMODEL_RELAXED));
6269 
6270   if (need_old || need_new)
6271     {
6272       lhs = need_old ? loaded_val : stored_val;
6273       call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6274       call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6275     }
6276   else
6277     call = fold_convert_loc (loc, void_type_node, call);
6278   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6279   gsi_remove (&gsi, true);
6280 
6281   gsi = gsi_last_nondebug_bb (store_bb);
6282   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6283   gsi_remove (&gsi, true);
6284   gsi = gsi_last_nondebug_bb (store_bb);
6285   stmt = gsi_stmt (gsi);
6286   gsi_remove (&gsi, true);
6287 
6288   if (gimple_in_ssa_p (cfun))
6289     {
6290       release_defs (stmt);
6291       update_ssa (TODO_update_ssa_no_phi);
6292     }
6293 
6294   return true;
6295 }
6296 
6297 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6298 
6299       oldval = *addr;
6300       repeat:
6301           newval = rhs;        // with oldval replacing *addr in rhs
6302           oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6303           if (oldval != newval)
6304             goto repeat;
6305 
6306    INDEX is log2 of the size of the data type, and thus usable to find the
6307    index of the builtin decl.  */
6308 
6309 static bool
expand_omp_atomic_pipeline(basic_block load_bb,basic_block store_bb,tree addr,tree loaded_val,tree stored_val,int index)6310 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6311                                   tree addr, tree loaded_val, tree stored_val,
6312                                   int index)
6313 {
6314   tree loadedi, storedi, initial, new_storedi, old_vali;
6315   tree type, itype, cmpxchg, iaddr, atype;
6316   gimple_stmt_iterator si;
6317   basic_block loop_header = single_succ (load_bb);
6318   gimple *phi, *stmt;
6319   edge e;
6320   enum built_in_function fncode;
6321 
6322   /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6323      order to use the RELAXED memory model effectively.  */
6324   fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6325                                             + index + 1);
6326   cmpxchg = builtin_decl_explicit (fncode);
6327   if (cmpxchg == NULL_TREE)
6328     return false;
6329   type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6330   atype = type;
6331   itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6332 
6333   if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6334       || !can_atomic_load_p (TYPE_MODE (itype)))
6335     return false;
6336 
6337   /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
6338   si = gsi_last_nondebug_bb (load_bb);
6339   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6340 
6341   /* For floating-point values, we'll need to view-convert them to integers
6342      so that we can perform the atomic compare and swap.  Simplify the
6343      following code by always setting up the "i"ntegral variables.  */
6344   if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6345     {
6346       tree iaddr_val;
6347 
6348       iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6349                                                                          true));
6350       atype = itype;
6351       iaddr_val
6352           = force_gimple_operand_gsi (&si,
6353                                             fold_convert (TREE_TYPE (iaddr), addr),
6354                                             false, NULL_TREE, true, GSI_SAME_STMT);
6355       stmt = gimple_build_assign (iaddr, iaddr_val);
6356       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6357       loadedi = create_tmp_var (itype);
6358       if (gimple_in_ssa_p (cfun))
6359           loadedi = make_ssa_name (loadedi);
6360     }
6361   else
6362     {
6363       iaddr = addr;
6364       loadedi = loaded_val;
6365     }
6366 
6367   fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6368   tree loaddecl = builtin_decl_explicit (fncode);
6369   if (loaddecl)
6370     initial
6371       = fold_convert (atype,
6372                           build_call_expr (loaddecl, 2, iaddr,
6373                                                build_int_cst (NULL_TREE,
6374                                                                   MEMMODEL_RELAXED)));
6375   else
6376     {
6377       tree off
6378           = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
6379                                                                   true), 0);
6380       initial = build2 (MEM_REF, atype, iaddr, off);
6381     }
6382 
6383   initial
6384     = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6385                                         GSI_SAME_STMT);
6386 
6387   /* Move the value to the LOADEDI temporary.  */
6388   if (gimple_in_ssa_p (cfun))
6389     {
6390       gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6391       phi = create_phi_node (loadedi, loop_header);
6392       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6393                  initial);
6394     }
6395   else
6396     gsi_insert_before (&si,
6397                            gimple_build_assign (loadedi, initial),
6398                            GSI_SAME_STMT);
6399   if (loadedi != loaded_val)
6400     {
6401       gimple_stmt_iterator gsi2;
6402       tree x;
6403 
6404       x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6405       gsi2 = gsi_start_bb (loop_header);
6406       if (gimple_in_ssa_p (cfun))
6407           {
6408             gassign *stmt;
6409             x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6410                                                   true, GSI_SAME_STMT);
6411             stmt = gimple_build_assign (loaded_val, x);
6412             gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6413           }
6414       else
6415           {
6416             x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6417             force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6418                                             true, GSI_SAME_STMT);
6419           }
6420     }
6421   gsi_remove (&si, true);
6422 
6423   si = gsi_last_nondebug_bb (store_bb);
6424   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6425 
6426   if (iaddr == addr)
6427     storedi = stored_val;
6428   else
6429     storedi
6430       = force_gimple_operand_gsi (&si,
6431                                           build1 (VIEW_CONVERT_EXPR, itype,
6432                                                     stored_val), true, NULL_TREE, true,
6433                                           GSI_SAME_STMT);
6434 
6435   /* Build the compare&swap statement.  */
6436   new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6437   new_storedi = force_gimple_operand_gsi (&si,
6438                                                     fold_convert (TREE_TYPE (loadedi),
6439                                                                       new_storedi),
6440                                                     true, NULL_TREE,
6441                                                     true, GSI_SAME_STMT);
6442 
6443   if (gimple_in_ssa_p (cfun))
6444     old_vali = loadedi;
6445   else
6446     {
6447       old_vali = create_tmp_var (TREE_TYPE (loadedi));
6448       stmt = gimple_build_assign (old_vali, loadedi);
6449       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6450 
6451       stmt = gimple_build_assign (loadedi, new_storedi);
6452       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6453     }
6454 
6455   /* Note that we always perform the comparison as an integer, even for
6456      floating point.  This allows the atomic operation to properly
6457      succeed even with NaNs and -0.0.  */
6458   tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6459   stmt = gimple_build_cond_empty (ne);
6460   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6461 
6462   /* Update cfg.  */
6463   e = single_succ_edge (store_bb);
6464   e->flags &= ~EDGE_FALLTHRU;
6465   e->flags |= EDGE_FALSE_VALUE;
6466   /* Expect no looping.  */
6467   e->probability = profile_probability::guessed_always ();
6468 
6469   e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6470   e->probability = profile_probability::guessed_never ();
6471 
6472   /* Copy the new value to loadedi (we already did that before the condition
6473      if we are not in SSA).  */
6474   if (gimple_in_ssa_p (cfun))
6475     {
6476       phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6477       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6478     }
6479 
6480   /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
6481   gsi_remove (&si, true);
6482 
6483   struct loop *loop = alloc_loop ();
6484   loop->header = loop_header;
6485   loop->latch = store_bb;
6486   add_loop (loop, loop_header->loop_father);
6487 
6488   if (gimple_in_ssa_p (cfun))
6489     update_ssa (TODO_update_ssa_no_phi);
6490 
6491   return true;
6492 }
6493 
6494 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6495 
6496                                           GOMP_atomic_start ();
6497                                           *addr = rhs;
6498                                           GOMP_atomic_end ();
6499 
6500    The result is not globally atomic, but works so long as all parallel
6501    references are within #pragma omp atomic directives.  According to
6502    responses received from omp@openmp.org, appears to be within spec.
6503    Which makes sense, since that's how several other compilers handle
6504    this situation as well.
6505    LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6506    expanding.  STORED_VAL is the operand of the matching
6507    GIMPLE_OMP_ATOMIC_STORE.
6508 
6509    We replace
6510    GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6511    loaded_val = *addr;
6512 
6513    and replace
6514    GIMPLE_OMP_ATOMIC_STORE (stored_val)  with
6515    *addr = stored_val;
6516 */
6517 
6518 static bool
expand_omp_atomic_mutex(basic_block load_bb,basic_block store_bb,tree addr,tree loaded_val,tree stored_val)6519 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6520                                tree addr, tree loaded_val, tree stored_val)
6521 {
6522   gimple_stmt_iterator si;
6523   gassign *stmt;
6524   tree t;
6525 
6526   si = gsi_last_nondebug_bb (load_bb);
6527   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6528 
6529   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6530   t = build_call_expr (t, 0);
6531   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6532 
6533   tree mem = build_simple_mem_ref (addr);
6534   TREE_TYPE (mem) = TREE_TYPE (loaded_val);
6535   TREE_OPERAND (mem, 1)
6536     = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
6537                                                              true),
6538                         TREE_OPERAND (mem, 1));
6539   stmt = gimple_build_assign (loaded_val, mem);
6540   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6541   gsi_remove (&si, true);
6542 
6543   si = gsi_last_nondebug_bb (store_bb);
6544   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6545 
6546   stmt = gimple_build_assign (unshare_expr (mem), stored_val);
6547   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6548 
6549   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6550   t = build_call_expr (t, 0);
6551   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6552   gsi_remove (&si, true);
6553 
6554   if (gimple_in_ssa_p (cfun))
6555     update_ssa (TODO_update_ssa_no_phi);
6556   return true;
6557 }
6558 
6559 /* Expand an GIMPLE_OMP_ATOMIC statement.  We try to expand
6560    using expand_omp_atomic_fetch_op.  If it failed, we try to
6561    call expand_omp_atomic_pipeline, and if it fails too, the
6562    ultimate fallback is wrapping the operation in a mutex
6563    (expand_omp_atomic_mutex).  REGION is the atomic region built
6564    by build_omp_regions_1().  */
6565 
6566 static void
expand_omp_atomic(struct omp_region * region)6567 expand_omp_atomic (struct omp_region *region)
6568 {
6569   basic_block load_bb = region->entry, store_bb = region->exit;
6570   gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6571   gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6572   tree loaded_val = gimple_omp_atomic_load_lhs (load);
6573   tree addr = gimple_omp_atomic_load_rhs (load);
6574   tree stored_val = gimple_omp_atomic_store_val (store);
6575   tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6576   HOST_WIDE_INT index;
6577 
6578   /* Make sure the type is one of the supported sizes.  */
6579   index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6580   index = exact_log2 (index);
6581   if (index >= 0 && index <= 4)
6582     {
6583       unsigned int align = TYPE_ALIGN_UNIT (type);
6584 
6585       /* __sync builtins require strict data alignment.  */
6586       if (exact_log2 (align) >= index)
6587           {
6588             /* Atomic load.  */
6589             scalar_mode smode;
6590             if (loaded_val == stored_val
6591                 && (is_int_mode (TYPE_MODE (type), &smode)
6592                       || is_float_mode (TYPE_MODE (type), &smode))
6593                 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6594                 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6595               return;
6596 
6597             /* Atomic store.  */
6598             if ((is_int_mode (TYPE_MODE (type), &smode)
6599                  || is_float_mode (TYPE_MODE (type), &smode))
6600                 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6601                 && store_bb == single_succ (load_bb)
6602                 && first_stmt (store_bb) == store
6603                 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6604                                                     stored_val, index))
6605               return;
6606 
6607             /* When possible, use specialized atomic update functions.  */
6608             if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6609                 && store_bb == single_succ (load_bb)
6610                 && expand_omp_atomic_fetch_op (load_bb, addr,
6611                                                        loaded_val, stored_val, index))
6612               return;
6613 
6614             /* If we don't have specialized __sync builtins, try and implement
6615                as a compare and swap loop.  */
6616             if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6617                                                     loaded_val, stored_val, index))
6618               return;
6619           }
6620     }
6621 
6622   /* The ultimate fallback is wrapping the operation in a mutex.  */
6623   expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6624 }
6625 
6626 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6627    at REGION_EXIT.  */
6628 
6629 static void
mark_loops_in_oacc_kernels_region(basic_block region_entry,basic_block region_exit)6630 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6631                                            basic_block region_exit)
6632 {
6633   struct loop *outer = region_entry->loop_father;
6634   gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6635 
6636   /* Don't parallelize the kernels region if it contains more than one outer
6637      loop.  */
6638   unsigned int nr_outer_loops = 0;
6639   struct loop *single_outer = NULL;
6640   for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6641     {
6642       gcc_assert (loop_outer (loop) == outer);
6643 
6644       if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6645           continue;
6646 
6647       if (region_exit != NULL
6648             && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6649           continue;
6650 
6651       nr_outer_loops++;
6652       single_outer = loop;
6653     }
6654   if (nr_outer_loops != 1)
6655     return;
6656 
6657   for (struct loop *loop = single_outer->inner;
6658        loop != NULL;
6659        loop = loop->inner)
6660     if (loop->next)
6661       return;
6662 
6663   /* Mark the loops in the region.  */
6664   for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6665     loop->in_oacc_kernels_region = true;
6666 }
6667 
6668 /* Types used to pass grid and wortkgroup sizes to kernel invocation.  */
6669 
6670 struct GTY(()) grid_launch_attributes_trees
6671 {
6672   tree kernel_dim_array_type;
6673   tree kernel_lattrs_dimnum_decl;
6674   tree kernel_lattrs_grid_decl;
6675   tree kernel_lattrs_group_decl;
6676   tree kernel_launch_attributes_type;
6677 };
6678 
6679 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6680 
6681 /* Create types used to pass kernel launch attributes to target.  */
6682 
6683 static void
grid_create_kernel_launch_attr_types(void)6684 grid_create_kernel_launch_attr_types (void)
6685 {
6686   if (grid_attr_trees)
6687     return;
6688   grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6689 
6690   tree dim_arr_index_type
6691     = build_index_type (build_int_cst (integer_type_node, 2));
6692   grid_attr_trees->kernel_dim_array_type
6693     = build_array_type (uint32_type_node, dim_arr_index_type);
6694 
6695   grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6696   grid_attr_trees->kernel_lattrs_dimnum_decl
6697     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6698                       uint32_type_node);
6699   DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6700 
6701   grid_attr_trees->kernel_lattrs_grid_decl
6702     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6703                       grid_attr_trees->kernel_dim_array_type);
6704   DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6705     = grid_attr_trees->kernel_lattrs_dimnum_decl;
6706   grid_attr_trees->kernel_lattrs_group_decl
6707     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6708                       grid_attr_trees->kernel_dim_array_type);
6709   DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6710     = grid_attr_trees->kernel_lattrs_grid_decl;
6711   finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6712                                "__gomp_kernel_launch_attributes",
6713                                grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6714 }
6715 
6716 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6717    array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR.  VALUE must be
6718    of type uint32_type_node.  */
6719 
6720 static void
grid_insert_store_range_dim(gimple_stmt_iterator * gsi,tree range_var,tree fld_decl,int index,tree value)6721 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6722                                    tree fld_decl, int index, tree value)
6723 {
6724   tree ref = build4 (ARRAY_REF, uint32_type_node,
6725                          build3 (COMPONENT_REF,
6726                                    grid_attr_trees->kernel_dim_array_type,
6727                                    range_var, fld_decl, NULL_TREE),
6728                          build_int_cst (integer_type_node, index),
6729                          NULL_TREE, NULL_TREE);
6730   gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6731 }
6732 
6733 /* Return a tree representation of a pointer to a structure with grid and
6734    work-group size information.  Statements filling that information will be
6735    inserted before GSI, TGT_STMT is the target statement which has the
6736    necessary information in it.  */
6737 
6738 static tree
grid_get_kernel_launch_attributes(gimple_stmt_iterator * gsi,gomp_target * tgt_stmt)6739 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6740                                                gomp_target *tgt_stmt)
6741 {
6742   grid_create_kernel_launch_attr_types ();
6743   tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6744                                         "__kernel_launch_attrs");
6745 
6746   unsigned max_dim = 0;
6747   for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6748        clause;
6749        clause = OMP_CLAUSE_CHAIN (clause))
6750     {
6751       if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6752           continue;
6753 
6754       unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6755       max_dim = MAX (dim, max_dim);
6756 
6757       grid_insert_store_range_dim (gsi, lattrs,
6758                                            grid_attr_trees->kernel_lattrs_grid_decl,
6759                                            dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6760       grid_insert_store_range_dim (gsi, lattrs,
6761                                            grid_attr_trees->kernel_lattrs_group_decl,
6762                                            dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6763     }
6764 
6765   tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6766                               grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6767   gcc_checking_assert (max_dim <= 2);
6768   tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6769   gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6770                          GSI_SAME_STMT);
6771   TREE_ADDRESSABLE (lattrs) = 1;
6772   return build_fold_addr_expr (lattrs);
6773 }
6774 
6775 /* Build target argument identifier from the DEVICE identifier, value
6776    identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
6777 
6778 static tree
get_target_argument_identifier_1(int device,bool subseqent_param,int id)6779 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6780 {
6781   tree t = build_int_cst (integer_type_node, device);
6782   if (subseqent_param)
6783     t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6784                          build_int_cst (integer_type_node,
6785                                             GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6786   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6787                        build_int_cst (integer_type_node, id));
6788   return t;
6789 }
6790 
6791 /* Like above but return it in type that can be directly stored as an element
6792    of the argument array.  */
6793 
6794 static tree
get_target_argument_identifier(int device,bool subseqent_param,int id)6795 get_target_argument_identifier (int device, bool subseqent_param, int id)
6796 {
6797   tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6798   return fold_convert (ptr_type_node, t);
6799 }
6800 
6801 /* Return a target argument consisting of DEVICE identifier, value identifier
6802    ID, and the actual VALUE.  */
6803 
6804 static tree
get_target_argument_value(gimple_stmt_iterator * gsi,int device,int id,tree value)6805 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6806                                  tree value)
6807 {
6808   tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6809                               fold_convert (integer_type_node, value),
6810                               build_int_cst (unsigned_type_node,
6811                                                GOMP_TARGET_ARG_VALUE_SHIFT));
6812   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6813                        get_target_argument_identifier_1 (device, false, id));
6814   t = fold_convert (ptr_type_node, t);
6815   return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6816 }
6817 
6818 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6819    push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6820    otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6821    arguments.  */
6822 
6823 static void
push_target_argument_according_to_value(gimple_stmt_iterator * gsi,int device,int id,tree value,vec<tree> * args)6824 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6825                                                    int id, tree value, vec <tree> *args)
6826 {
6827   if (tree_fits_shwi_p (value)
6828       && tree_to_shwi (value) > -(1 << 15)
6829       && tree_to_shwi (value) < (1 << 15))
6830     args->quick_push (get_target_argument_value (gsi, device, id, value));
6831   else
6832     {
6833       args->quick_push (get_target_argument_identifier (device, true, id));
6834       value = fold_convert (ptr_type_node, value);
6835       value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6836                                                   GSI_SAME_STMT);
6837       args->quick_push (value);
6838     }
6839 }
6840 
6841 /* Create an array of arguments that is then passed to GOMP_target.  */
6842 
6843 static tree
get_target_arguments(gimple_stmt_iterator * gsi,gomp_target * tgt_stmt)6844 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6845 {
6846   auto_vec <tree, 6> args;
6847   tree clauses = gimple_omp_target_clauses (tgt_stmt);
6848   tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6849   if (c)
6850     t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6851   else
6852     t = integer_minus_one_node;
6853   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6854                                                      GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6855 
6856   c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6857   if (c)
6858     t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6859   else
6860     t = integer_minus_one_node;
6861   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6862                                                      GOMP_TARGET_ARG_THREAD_LIMIT, t,
6863                                                      &args);
6864 
6865   /* Add HSA-specific grid sizes, if available.  */
6866   if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6867                            OMP_CLAUSE__GRIDDIM_))
6868     {
6869       int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
6870       t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
6871       args.quick_push (t);
6872       args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
6873     }
6874 
6875   /* Produce more, perhaps device specific, arguments here.  */
6876 
6877   tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
6878                                                                         args.length () + 1),
6879                                           ".omp_target_args");
6880   for (unsigned i = 0; i < args.length (); i++)
6881     {
6882       tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6883                                build_int_cst (integer_type_node, i),
6884                                NULL_TREE, NULL_TREE);
6885       gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
6886                                GSI_SAME_STMT);
6887     }
6888   tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6889                          build_int_cst (integer_type_node, args.length ()),
6890                          NULL_TREE, NULL_TREE);
6891   gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
6892                          GSI_SAME_STMT);
6893   TREE_ADDRESSABLE (argarray) = 1;
6894   return build_fold_addr_expr (argarray);
6895 }
6896 
6897 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
6898 
6899 static void
expand_omp_target(struct omp_region * region)6900 expand_omp_target (struct omp_region *region)
6901 {
6902   basic_block entry_bb, exit_bb, new_bb;
6903   struct function *child_cfun;
6904   tree child_fn, block, t;
6905   gimple_stmt_iterator gsi;
6906   gomp_target *entry_stmt;
6907   gimple *stmt;
6908   edge e;
6909   bool offloaded, data_region;
6910 
6911   entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
6912   new_bb = region->entry;
6913 
6914   offloaded = is_gimple_omp_offloaded (entry_stmt);
6915   switch (gimple_omp_target_kind (entry_stmt))
6916     {
6917     case GF_OMP_TARGET_KIND_REGION:
6918     case GF_OMP_TARGET_KIND_UPDATE:
6919     case GF_OMP_TARGET_KIND_ENTER_DATA:
6920     case GF_OMP_TARGET_KIND_EXIT_DATA:
6921     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
6922     case GF_OMP_TARGET_KIND_OACC_KERNELS:
6923     case GF_OMP_TARGET_KIND_OACC_UPDATE:
6924     case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
6925     case GF_OMP_TARGET_KIND_OACC_DECLARE:
6926       data_region = false;
6927       break;
6928     case GF_OMP_TARGET_KIND_DATA:
6929     case GF_OMP_TARGET_KIND_OACC_DATA:
6930     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
6931       data_region = true;
6932       break;
6933     default:
6934       gcc_unreachable ();
6935     }
6936 
6937   child_fn = NULL_TREE;
6938   child_cfun = NULL;
6939   if (offloaded)
6940     {
6941       child_fn = gimple_omp_target_child_fn (entry_stmt);
6942       child_cfun = DECL_STRUCT_FUNCTION (child_fn);
6943     }
6944 
6945   /* Supported by expand_omp_taskreg, but not here.  */
6946   if (child_cfun != NULL)
6947     gcc_checking_assert (!child_cfun->cfg);
6948   gcc_checking_assert (!gimple_in_ssa_p (cfun));
6949 
6950   entry_bb = region->entry;
6951   exit_bb = region->exit;
6952 
6953   if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
6954     {
6955       mark_loops_in_oacc_kernels_region (region->entry, region->exit);
6956 
6957       /* Further down, both OpenACC kernels and OpenACC parallel constructs
6958            will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
6959            two, there is an "oacc kernels" attribute set for OpenACC kernels.  */
6960       DECL_ATTRIBUTES (child_fn)
6961           = tree_cons (get_identifier ("oacc kernels"),
6962                          NULL_TREE, DECL_ATTRIBUTES (child_fn));
6963     }
6964 
6965   if (offloaded)
6966     {
6967       unsigned srcidx, dstidx, num;
6968 
6969       /* If the offloading region needs data sent from the parent
6970            function, then the very first statement (except possible
6971            tree profile counter updates) of the offloading body
6972            is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
6973            &.OMP_DATA_O is passed as an argument to the child function,
6974            we need to replace it with the argument as seen by the child
6975            function.
6976 
6977            In most cases, this will end up being the identity assignment
6978            .OMP_DATA_I = .OMP_DATA_I.  However, if the offloading body had
6979            a function call that has been inlined, the original PARM_DECL
6980            .OMP_DATA_I may have been converted into a different local
6981            variable.  In which case, we need to keep the assignment.  */
6982       tree data_arg = gimple_omp_target_data_arg (entry_stmt);
6983       if (data_arg)
6984           {
6985             basic_block entry_succ_bb = single_succ (entry_bb);
6986             gimple_stmt_iterator gsi;
6987             tree arg;
6988             gimple *tgtcopy_stmt = NULL;
6989             tree sender = TREE_VEC_ELT (data_arg, 0);
6990 
6991             for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
6992               {
6993                 gcc_assert (!gsi_end_p (gsi));
6994                 stmt = gsi_stmt (gsi);
6995                 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6996                     continue;
6997 
6998                 if (gimple_num_ops (stmt) == 2)
6999                     {
7000                       tree arg = gimple_assign_rhs1 (stmt);
7001 
7002                       /* We're ignoring the subcode because we're
7003                          effectively doing a STRIP_NOPS.  */
7004 
7005                       if (TREE_CODE (arg) == ADDR_EXPR
7006                           && TREE_OPERAND (arg, 0) == sender)
7007                         {
7008                           tgtcopy_stmt = stmt;
7009                           break;
7010                         }
7011                     }
7012               }
7013 
7014             gcc_assert (tgtcopy_stmt != NULL);
7015             arg = DECL_ARGUMENTS (child_fn);
7016 
7017             gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7018             gsi_remove (&gsi, true);
7019           }
7020 
7021       /* Declare local variables needed in CHILD_CFUN.  */
7022       block = DECL_INITIAL (child_fn);
7023       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7024       /* The gimplifier could record temporaries in the offloading block
7025            rather than in containing function's local_decls chain,
7026            which would mean cgraph missed finalizing them.  Do it now.  */
7027       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7028           if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7029             varpool_node::finalize_decl (t);
7030       DECL_SAVED_TREE (child_fn) = NULL;
7031       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
7032       gimple_set_body (child_fn, NULL);
7033       TREE_USED (block) = 1;
7034 
7035       /* Reset DECL_CONTEXT on function arguments.  */
7036       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7037           DECL_CONTEXT (t) = child_fn;
7038 
7039       /* Split ENTRY_BB at GIMPLE_*,
7040            so that it can be moved to the child function.  */
7041       gsi = gsi_last_nondebug_bb (entry_bb);
7042       stmt = gsi_stmt (gsi);
7043       gcc_assert (stmt
7044                       && gimple_code (stmt) == gimple_code (entry_stmt));
7045       e = split_block (entry_bb, stmt);
7046       gsi_remove (&gsi, true);
7047       entry_bb = e->dest;
7048       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7049 
7050       /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
7051       if (exit_bb)
7052           {
7053             gsi = gsi_last_nondebug_bb (exit_bb);
7054             gcc_assert (!gsi_end_p (gsi)
7055                           && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7056             stmt = gimple_build_return (NULL);
7057             gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7058             gsi_remove (&gsi, true);
7059           }
7060 
7061       /* Move the offloading region into CHILD_CFUN.  */
7062 
7063       block = gimple_block (entry_stmt);
7064 
7065       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7066       if (exit_bb)
7067           single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7068       /* When the OMP expansion process cannot guarantee an up-to-date
7069            loop tree arrange for the child function to fixup loops.  */
7070       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7071           child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7072 
7073       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
7074       num = vec_safe_length (child_cfun->local_decls);
7075       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7076           {
7077             t = (*child_cfun->local_decls)[srcidx];
7078             if (DECL_CONTEXT (t) == cfun->decl)
7079               continue;
7080             if (srcidx != dstidx)
7081               (*child_cfun->local_decls)[dstidx] = t;
7082             dstidx++;
7083           }
7084       if (dstidx != num)
7085           vec_safe_truncate (child_cfun->local_decls, dstidx);
7086 
7087       /* Inform the callgraph about the new function.  */
7088       child_cfun->curr_properties = cfun->curr_properties;
7089       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7090       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7091       cgraph_node *node = cgraph_node::get_create (child_fn);
7092       node->parallelized_function = 1;
7093       cgraph_node::add_new_function (child_fn, true);
7094 
7095       /* Add the new function to the offload table.  */
7096       if (ENABLE_OFFLOADING)
7097           {
7098             if (in_lto_p)
7099               DECL_PRESERVE_P (child_fn) = 1;
7100             vec_safe_push (offload_funcs, child_fn);
7101           }
7102 
7103       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7104                           && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7105 
7106       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
7107            fixed in a following pass.  */
7108       push_cfun (child_cfun);
7109       if (need_asm)
7110           assign_assembler_name_if_needed (child_fn);
7111       cgraph_edge::rebuild_edges ();
7112 
7113       /* Some EH regions might become dead, see PR34608.  If
7114            pass_cleanup_cfg isn't the first pass to happen with the
7115            new child, these dead EH edges might cause problems.
7116            Clean them up now.  */
7117       if (flag_exceptions)
7118           {
7119             basic_block bb;
7120             bool changed = false;
7121 
7122             FOR_EACH_BB_FN (bb, cfun)
7123               changed |= gimple_purge_dead_eh_edges (bb);
7124             if (changed)
7125               cleanup_tree_cfg ();
7126           }
7127       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7128           verify_loop_structure ();
7129       pop_cfun ();
7130 
7131       if (dump_file && !gimple_in_ssa_p (cfun))
7132           {
7133             omp_any_child_fn_dumped = true;
7134             dump_function_header (dump_file, child_fn, dump_flags);
7135             dump_function_to_file (child_fn, dump_file, dump_flags);
7136           }
7137 
7138       adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
7139     }
7140 
7141   /* Emit a library call to launch the offloading region, or do data
7142      transfers.  */
7143   tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7144   enum built_in_function start_ix;
7145   location_t clause_loc;
7146   unsigned int flags_i = 0;
7147 
7148   switch (gimple_omp_target_kind (entry_stmt))
7149     {
7150     case GF_OMP_TARGET_KIND_REGION:
7151       start_ix = BUILT_IN_GOMP_TARGET;
7152       break;
7153     case GF_OMP_TARGET_KIND_DATA:
7154       start_ix = BUILT_IN_GOMP_TARGET_DATA;
7155       break;
7156     case GF_OMP_TARGET_KIND_UPDATE:
7157       start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7158       break;
7159     case GF_OMP_TARGET_KIND_ENTER_DATA:
7160       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7161       break;
7162     case GF_OMP_TARGET_KIND_EXIT_DATA:
7163       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7164       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7165       break;
7166     case GF_OMP_TARGET_KIND_OACC_KERNELS:
7167     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7168       start_ix = BUILT_IN_GOACC_PARALLEL;
7169       break;
7170     case GF_OMP_TARGET_KIND_OACC_DATA:
7171     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7172       start_ix = BUILT_IN_GOACC_DATA_START;
7173       break;
7174     case GF_OMP_TARGET_KIND_OACC_UPDATE:
7175       start_ix = BUILT_IN_GOACC_UPDATE;
7176       break;
7177     case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7178       start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7179       break;
7180     case GF_OMP_TARGET_KIND_OACC_DECLARE:
7181       start_ix = BUILT_IN_GOACC_DECLARE;
7182       break;
7183     default:
7184       gcc_unreachable ();
7185     }
7186 
7187   clauses = gimple_omp_target_clauses (entry_stmt);
7188 
7189   /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7190      library choose) and there is no conditional.  */
7191   cond = NULL_TREE;
7192   device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7193 
7194   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7195   if (c)
7196     cond = OMP_CLAUSE_IF_EXPR (c);
7197 
7198   c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7199   if (c)
7200     {
7201       /* Even if we pass it to all library function calls, it is currently only
7202            defined/used for the OpenMP target ones.  */
7203       gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7204                                  || start_ix == BUILT_IN_GOMP_TARGET_DATA
7205                                  || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7206                                  || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7207 
7208       device = OMP_CLAUSE_DEVICE_ID (c);
7209       clause_loc = OMP_CLAUSE_LOCATION (c);
7210     }
7211   else
7212     clause_loc = gimple_location (entry_stmt);
7213 
7214   c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7215   if (c)
7216     flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7217 
7218   /* Ensure 'device' is of the correct type.  */
7219   device = fold_convert_loc (clause_loc, integer_type_node, device);
7220 
7221   /* If we found the clause 'if (cond)', build
7222      (cond ? device : GOMP_DEVICE_HOST_FALLBACK).  */
7223   if (cond)
7224     {
7225       cond = gimple_boolify (cond);
7226 
7227       basic_block cond_bb, then_bb, else_bb;
7228       edge e;
7229       tree tmp_var;
7230 
7231       tmp_var = create_tmp_var (TREE_TYPE (device));
7232       if (offloaded)
7233           e = split_block_after_labels (new_bb);
7234       else
7235           {
7236             gsi = gsi_last_nondebug_bb (new_bb);
7237             gsi_prev (&gsi);
7238             e = split_block (new_bb, gsi_stmt (gsi));
7239           }
7240       cond_bb = e->src;
7241       new_bb = e->dest;
7242       remove_edge (e);
7243 
7244       then_bb = create_empty_bb (cond_bb);
7245       else_bb = create_empty_bb (then_bb);
7246       set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7247       set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7248 
7249       stmt = gimple_build_cond_empty (cond);
7250       gsi = gsi_last_bb (cond_bb);
7251       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7252 
7253       gsi = gsi_start_bb (then_bb);
7254       stmt = gimple_build_assign (tmp_var, device);
7255       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7256 
7257       gsi = gsi_start_bb (else_bb);
7258       stmt = gimple_build_assign (tmp_var,
7259                                           build_int_cst (integer_type_node,
7260                                                              GOMP_DEVICE_HOST_FALLBACK));
7261       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7262 
7263       make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7264       make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7265       add_bb_to_loop (then_bb, cond_bb->loop_father);
7266       add_bb_to_loop (else_bb, cond_bb->loop_father);
7267       make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7268       make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7269 
7270       device = tmp_var;
7271       gsi = gsi_last_nondebug_bb (new_bb);
7272     }
7273   else
7274     {
7275       gsi = gsi_last_nondebug_bb (new_bb);
7276       device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7277                                                    true, GSI_SAME_STMT);
7278     }
7279 
7280   t = gimple_omp_target_data_arg (entry_stmt);
7281   if (t == NULL)
7282     {
7283       t1 = size_zero_node;
7284       t2 = build_zero_cst (ptr_type_node);
7285       t3 = t2;
7286       t4 = t2;
7287     }
7288   else
7289     {
7290       t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7291       t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7292       t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7293       t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7294       t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7295     }
7296 
7297   gimple *g;
7298   bool tagging = false;
7299   /* The maximum number used by any start_ix, without varargs.  */
7300   auto_vec<tree, 11> args;
7301   args.quick_push (device);
7302   if (offloaded)
7303     args.quick_push (build_fold_addr_expr (child_fn));
7304   args.quick_push (t1);
7305   args.quick_push (t2);
7306   args.quick_push (t3);
7307   args.quick_push (t4);
7308   switch (start_ix)
7309     {
7310     case BUILT_IN_GOACC_DATA_START:
7311     case BUILT_IN_GOACC_DECLARE:
7312     case BUILT_IN_GOMP_TARGET_DATA:
7313       break;
7314     case BUILT_IN_GOMP_TARGET:
7315     case BUILT_IN_GOMP_TARGET_UPDATE:
7316     case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7317       args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7318       c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7319       if (c)
7320           depend = OMP_CLAUSE_DECL (c);
7321       else
7322           depend = build_int_cst (ptr_type_node, 0);
7323       args.quick_push (depend);
7324       if (start_ix == BUILT_IN_GOMP_TARGET)
7325           args.quick_push (get_target_arguments (&gsi, entry_stmt));
7326       break;
7327     case BUILT_IN_GOACC_PARALLEL:
7328       oacc_set_fn_attrib (child_fn, clauses, &args);
7329       tagging = true;
7330       /* FALLTHRU */
7331     case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7332     case BUILT_IN_GOACC_UPDATE:
7333       {
7334           tree t_async = NULL_TREE;
7335 
7336           /* If present, use the value specified by the respective
7337              clause, making sure that is of the correct type.  */
7338           c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7339           if (c)
7340             t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7341                                               integer_type_node,
7342                                               OMP_CLAUSE_ASYNC_EXPR (c));
7343           else if (!tagging)
7344             /* Default values for t_async.  */
7345             t_async = fold_convert_loc (gimple_location (entry_stmt),
7346                                               integer_type_node,
7347                                               build_int_cst (integer_type_node,
7348                                                                  GOMP_ASYNC_SYNC));
7349           if (tagging && t_async)
7350             {
7351               unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7352 
7353               if (TREE_CODE (t_async) == INTEGER_CST)
7354                 {
7355                     /* See if we can pack the async arg in to the tag's
7356                        operand.  */
7357                     i_async = TREE_INT_CST_LOW (t_async);
7358                     if (i_async < GOMP_LAUNCH_OP_MAX)
7359                       t_async = NULL_TREE;
7360                     else
7361                       i_async = GOMP_LAUNCH_OP_MAX;
7362                 }
7363               args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7364                                                         i_async));
7365             }
7366           if (t_async)
7367             args.safe_push (t_async);
7368 
7369           /* Save the argument index, and ... */
7370           unsigned t_wait_idx = args.length ();
7371           unsigned num_waits = 0;
7372           c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7373           if (!tagging || c)
7374             /* ... push a placeholder.  */
7375             args.safe_push (integer_zero_node);
7376 
7377           for (; c; c = OMP_CLAUSE_CHAIN (c))
7378             if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7379               {
7380                 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7381                                                             integer_type_node,
7382                                                             OMP_CLAUSE_WAIT_EXPR (c)));
7383                 num_waits++;
7384               }
7385 
7386           if (!tagging || num_waits)
7387             {
7388               tree len;
7389 
7390               /* Now that we know the number, update the placeholder.  */
7391               if (tagging)
7392                 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7393               else
7394                 len = build_int_cst (integer_type_node, num_waits);
7395               len = fold_convert_loc (gimple_location (entry_stmt),
7396                                             unsigned_type_node, len);
7397               args[t_wait_idx] = len;
7398             }
7399       }
7400       break;
7401     default:
7402       gcc_unreachable ();
7403     }
7404   if (tagging)
7405     /*  Push terminal marker - zero.  */
7406     args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7407 
7408   g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7409   gimple_set_location (g, gimple_location (entry_stmt));
7410   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7411   if (!offloaded)
7412     {
7413       g = gsi_stmt (gsi);
7414       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7415       gsi_remove (&gsi, true);
7416     }
7417   if (data_region && region->exit)
7418     {
7419       gsi = gsi_last_nondebug_bb (region->exit);
7420       g = gsi_stmt (gsi);
7421       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7422       gsi_remove (&gsi, true);
7423     }
7424 }
7425 
7426 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7427    iteration variable derived from the thread number.  INTRA_GROUP means this
7428    is an expansion of a loop iterating over work-items within a separate
7429    iteration over groups.  */
7430 
7431 static void
grid_expand_omp_for_loop(struct omp_region * kfor,bool intra_group)7432 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7433 {
7434   gimple_stmt_iterator gsi;
7435   gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7436   gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7437                            == GF_OMP_FOR_KIND_GRID_LOOP);
7438   size_t collapse = gimple_omp_for_collapse (for_stmt);
7439   struct omp_for_data_loop *loops
7440     = XALLOCAVEC (struct omp_for_data_loop,
7441                       gimple_omp_for_collapse (for_stmt));
7442   struct omp_for_data fd;
7443 
7444   remove_edge (BRANCH_EDGE (kfor->entry));
7445   basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7446 
7447   gcc_assert (kfor->cont);
7448   omp_extract_for_data (for_stmt, &fd, loops);
7449 
7450   gsi = gsi_start_bb (body_bb);
7451 
7452   for (size_t dim = 0; dim < collapse; dim++)
7453     {
7454       tree type, itype;
7455       itype = type = TREE_TYPE (fd.loops[dim].v);
7456       if (POINTER_TYPE_P (type))
7457           itype = signed_type_for (type);
7458 
7459       tree n1 = fd.loops[dim].n1;
7460       tree step = fd.loops[dim].step;
7461       n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7462                                              true, NULL_TREE, true, GSI_SAME_STMT);
7463       step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7464                                                true, NULL_TREE, true, GSI_SAME_STMT);
7465       tree threadid;
7466       if (gimple_omp_for_grid_group_iter (for_stmt))
7467           {
7468             gcc_checking_assert (!intra_group);
7469             threadid = build_call_expr (builtin_decl_explicit
7470                                               (BUILT_IN_HSA_WORKGROUPID), 1,
7471                                               build_int_cstu (unsigned_type_node, dim));
7472           }
7473       else if (intra_group)
7474           threadid = build_call_expr (builtin_decl_explicit
7475                                             (BUILT_IN_HSA_WORKITEMID), 1,
7476                                             build_int_cstu (unsigned_type_node, dim));
7477       else
7478           threadid = build_call_expr (builtin_decl_explicit
7479                                             (BUILT_IN_HSA_WORKITEMABSID), 1,
7480                                             build_int_cstu (unsigned_type_node, dim));
7481       threadid = fold_convert (itype, threadid);
7482       threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7483                                                      true, GSI_SAME_STMT);
7484 
7485       tree startvar = fd.loops[dim].v;
7486       tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7487       if (POINTER_TYPE_P (type))
7488           t = fold_build_pointer_plus (n1, t);
7489       else
7490           t = fold_build2 (PLUS_EXPR, type, t, n1);
7491       t = fold_convert (type, t);
7492       t = force_gimple_operand_gsi (&gsi, t,
7493                                             DECL_P (startvar)
7494                                             && TREE_ADDRESSABLE (startvar),
7495                                             NULL_TREE, true, GSI_SAME_STMT);
7496       gassign *assign_stmt = gimple_build_assign (startvar, t);
7497       gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7498     }
7499   /* Remove the omp for statement.  */
7500   gsi = gsi_last_nondebug_bb (kfor->entry);
7501   gsi_remove (&gsi, true);
7502 
7503   /* Remove the GIMPLE_OMP_CONTINUE statement.  */
7504   gsi = gsi_last_nondebug_bb (kfor->cont);
7505   gcc_assert (!gsi_end_p (gsi)
7506                 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7507   gsi_remove (&gsi, true);
7508 
7509   /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary.  */
7510   gsi = gsi_last_nondebug_bb (kfor->exit);
7511   gcc_assert (!gsi_end_p (gsi)
7512                 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7513   if (intra_group)
7514     gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7515   gsi_remove (&gsi, true);
7516 
7517   /* Fixup the much simpler CFG.  */
7518   remove_edge (find_edge (kfor->cont, body_bb));
7519 
7520   if (kfor->cont != body_bb)
7521     set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7522   set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7523 }
7524 
7525 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7526    argument_decls.  */
7527 
7528 struct grid_arg_decl_map
7529 {
7530   tree old_arg;
7531   tree new_arg;
7532 };
7533 
7534 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7535    pertaining to kernel function.  */
7536 
7537 static tree
grid_remap_kernel_arg_accesses(tree * tp,int * walk_subtrees,void * data)7538 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7539 {
7540   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7541   struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7542   tree t = *tp;
7543 
7544   if (t == adm->old_arg)
7545     *tp = adm->new_arg;
7546   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7547   return NULL_TREE;
7548 }
7549 
7550 /* If TARGET region contains a kernel body for loop, remove its region from the
7551    TARGET and expand it in HSA gridified kernel fashion.  */
7552 
7553 static void
grid_expand_target_grid_body(struct omp_region * target)7554 grid_expand_target_grid_body (struct omp_region *target)
7555 {
7556   if (!hsa_gen_requested_p ())
7557     return;
7558 
7559   gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7560   struct omp_region **pp;
7561 
7562   for (pp = &target->inner; *pp; pp = &(*pp)->next)
7563     if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7564       break;
7565 
7566   struct omp_region *gpukernel = *pp;
7567 
7568   tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7569   if (!gpukernel)
7570     {
7571       /* HSA cannot handle OACC stuff.  */
7572       if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7573           return;
7574       gcc_checking_assert (orig_child_fndecl);
7575       gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7576                                             OMP_CLAUSE__GRIDDIM_));
7577       cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7578 
7579       hsa_register_kernel (n);
7580       return;
7581     }
7582 
7583   gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7584                                      OMP_CLAUSE__GRIDDIM_));
7585   tree inside_block
7586     = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7587   *pp = gpukernel->next;
7588   for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7589     if ((*pp)->type == GIMPLE_OMP_FOR)
7590       break;
7591 
7592   struct omp_region *kfor = *pp;
7593   gcc_assert (kfor);
7594   gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7595   gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7596   *pp = kfor->next;
7597   if (kfor->inner)
7598     {
7599       if (gimple_omp_for_grid_group_iter (for_stmt))
7600           {
7601             struct omp_region **next_pp;
7602             for (pp = &kfor->inner; *pp; pp = next_pp)
7603               {
7604                 next_pp = &(*pp)->next;
7605                 if ((*pp)->type != GIMPLE_OMP_FOR)
7606                     continue;
7607                 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7608                 gcc_assert (gimple_omp_for_kind (inner)
7609                                 == GF_OMP_FOR_KIND_GRID_LOOP);
7610                 grid_expand_omp_for_loop (*pp, true);
7611                 *pp = (*pp)->next;
7612                 next_pp = pp;
7613               }
7614           }
7615       expand_omp (kfor->inner);
7616     }
7617   if (gpukernel->inner)
7618     expand_omp (gpukernel->inner);
7619 
7620   tree kern_fndecl = copy_node (orig_child_fndecl);
7621   DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7622   SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7623   tree tgtblock = gimple_block (tgt_stmt);
7624   tree fniniblock = make_node (BLOCK);
7625   BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7626   BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7627   BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7628   BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7629   DECL_INITIAL (kern_fndecl) = fniniblock;
7630   push_struct_function (kern_fndecl);
7631   cfun->function_end_locus = gimple_location (tgt_stmt);
7632   init_tree_ssa (cfun);
7633   pop_cfun ();
7634 
7635   tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7636   gcc_assert (!DECL_CHAIN (old_parm_decl));
7637   tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7638   DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7639   DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7640   gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7641   DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7642   DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7643   struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7644   kern_cfun->curr_properties = cfun->curr_properties;
7645 
7646   grid_expand_omp_for_loop (kfor, false);
7647 
7648   /* Remove the omp for statement.  */
7649   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
7650   gsi_remove (&gsi, true);
7651   /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7652      return.  */
7653   gsi = gsi_last_nondebug_bb (gpukernel->exit);
7654   gcc_assert (!gsi_end_p (gsi)
7655                 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7656   gimple *ret_stmt = gimple_build_return (NULL);
7657   gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7658   gsi_remove (&gsi, true);
7659 
7660   /* Statements in the first BB in the target construct have been produced by
7661      target lowering and must be copied inside the GPUKERNEL, with the two
7662      exceptions of the first OMP statement and the OMP_DATA assignment
7663      statement.  */
7664   gsi = gsi_start_bb (single_succ (gpukernel->entry));
7665   tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7666   tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7667   for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7668        !gsi_end_p (tsi); gsi_next (&tsi))
7669     {
7670       gimple *stmt = gsi_stmt (tsi);
7671       if (is_gimple_omp (stmt))
7672           break;
7673       if (sender
7674             && is_gimple_assign (stmt)
7675             && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7676             && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7677           continue;
7678       gimple *copy = gimple_copy (stmt);
7679       gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7680       gimple_set_block (copy, fniniblock);
7681     }
7682 
7683   move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7684                                 gpukernel->exit, inside_block);
7685 
7686   cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7687   kcn->mark_force_output ();
7688   cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7689 
7690   hsa_register_kernel (kcn, orig_child);
7691 
7692   cgraph_node::add_new_function (kern_fndecl, true);
7693   push_cfun (kern_cfun);
7694   cgraph_edge::rebuild_edges ();
7695 
7696   /* Re-map any mention of the PARM_DECL of the original function to the
7697      PARM_DECL of the new one.
7698 
7699      TODO: It would be great if lowering produced references into the GPU
7700      kernel decl straight away and we did not have to do this.  */
7701   struct grid_arg_decl_map adm;
7702   adm.old_arg = old_parm_decl;
7703   adm.new_arg = new_parm_decl;
7704   basic_block bb;
7705   FOR_EACH_BB_FN (bb, kern_cfun)
7706     {
7707       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7708           {
7709             gimple *stmt = gsi_stmt (gsi);
7710             struct walk_stmt_info wi;
7711             memset (&wi, 0, sizeof (wi));
7712             wi.info = &adm;
7713             walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7714           }
7715     }
7716   pop_cfun ();
7717 
7718   return;
7719 }
7720 
7721 /* Expand the parallel region tree rooted at REGION.  Expansion
7722    proceeds in depth-first order.  Innermost regions are expanded
7723    first.  This way, parallel regions that require a new function to
7724    be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7725    internal dependencies in their body.  */
7726 
7727 static void
expand_omp(struct omp_region * region)7728 expand_omp (struct omp_region *region)
7729 {
7730   omp_any_child_fn_dumped = false;
7731   while (region)
7732     {
7733       location_t saved_location;
7734       gimple *inner_stmt = NULL;
7735 
7736       /* First, determine whether this is a combined parallel+workshare
7737            region.  */
7738       if (region->type == GIMPLE_OMP_PARALLEL)
7739           determine_parallel_type (region);
7740       else if (region->type == GIMPLE_OMP_TARGET)
7741           grid_expand_target_grid_body (region);
7742 
7743       if (region->type == GIMPLE_OMP_FOR
7744             && gimple_omp_for_combined_p (last_stmt (region->entry)))
7745           inner_stmt = last_stmt (region->inner->entry);
7746 
7747       if (region->inner)
7748           expand_omp (region->inner);
7749 
7750       saved_location = input_location;
7751       if (gimple_has_location (last_stmt (region->entry)))
7752           input_location = gimple_location (last_stmt (region->entry));
7753 
7754       switch (region->type)
7755           {
7756           case GIMPLE_OMP_PARALLEL:
7757           case GIMPLE_OMP_TASK:
7758             expand_omp_taskreg (region);
7759             break;
7760 
7761           case GIMPLE_OMP_FOR:
7762             expand_omp_for (region, inner_stmt);
7763             break;
7764 
7765           case GIMPLE_OMP_SECTIONS:
7766             expand_omp_sections (region);
7767             break;
7768 
7769           case GIMPLE_OMP_SECTION:
7770             /* Individual omp sections are handled together with their
7771                parent GIMPLE_OMP_SECTIONS region.  */
7772             break;
7773 
7774           case GIMPLE_OMP_SINGLE:
7775             expand_omp_single (region);
7776             break;
7777 
7778           case GIMPLE_OMP_ORDERED:
7779             {
7780               gomp_ordered *ord_stmt
7781                 = as_a <gomp_ordered *> (last_stmt (region->entry));
7782               if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7783                                          OMP_CLAUSE_DEPEND))
7784                 {
7785                     /* We'll expand these when expanding corresponding
7786                        worksharing region with ordered(n) clause.  */
7787                     gcc_assert (region->outer
7788                                   && region->outer->type == GIMPLE_OMP_FOR);
7789                     region->ord_stmt = ord_stmt;
7790                     break;
7791                 }
7792             }
7793             /* FALLTHRU */
7794           case GIMPLE_OMP_MASTER:
7795           case GIMPLE_OMP_TASKGROUP:
7796           case GIMPLE_OMP_CRITICAL:
7797           case GIMPLE_OMP_TEAMS:
7798             expand_omp_synch (region);
7799             break;
7800 
7801           case GIMPLE_OMP_ATOMIC_LOAD:
7802             expand_omp_atomic (region);
7803             break;
7804 
7805           case GIMPLE_OMP_TARGET:
7806             expand_omp_target (region);
7807             break;
7808 
7809           default:
7810             gcc_unreachable ();
7811           }
7812 
7813       input_location = saved_location;
7814       region = region->next;
7815     }
7816   if (omp_any_child_fn_dumped)
7817     {
7818       if (dump_file)
7819           dump_function_header (dump_file, current_function_decl, dump_flags);
7820       omp_any_child_fn_dumped = false;
7821     }
7822 }
7823 
7824 /* Helper for build_omp_regions.  Scan the dominator tree starting at
7825    block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
7826    true, the function ends once a single tree is built (otherwise, whole
7827    forest of OMP constructs may be built).  */
7828 
7829 static void
build_omp_regions_1(basic_block bb,struct omp_region * parent,bool single_tree)7830 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7831                          bool single_tree)
7832 {
7833   gimple_stmt_iterator gsi;
7834   gimple *stmt;
7835   basic_block son;
7836 
7837   gsi = gsi_last_nondebug_bb (bb);
7838   if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7839     {
7840       struct omp_region *region;
7841       enum gimple_code code;
7842 
7843       stmt = gsi_stmt (gsi);
7844       code = gimple_code (stmt);
7845       if (code == GIMPLE_OMP_RETURN)
7846           {
7847             /* STMT is the return point out of region PARENT.  Mark it
7848                as the exit point and make PARENT the immediately
7849                enclosing region.  */
7850             gcc_assert (parent);
7851             region = parent;
7852             region->exit = bb;
7853             parent = parent->outer;
7854           }
7855       else if (code == GIMPLE_OMP_ATOMIC_STORE)
7856           {
7857             /* GIMPLE_OMP_ATOMIC_STORE is analogous to
7858                GIMPLE_OMP_RETURN, but matches with
7859                GIMPLE_OMP_ATOMIC_LOAD.  */
7860             gcc_assert (parent);
7861             gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7862             region = parent;
7863             region->exit = bb;
7864             parent = parent->outer;
7865           }
7866       else if (code == GIMPLE_OMP_CONTINUE)
7867           {
7868             gcc_assert (parent);
7869             parent->cont = bb;
7870           }
7871       else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
7872           {
7873             /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7874                GIMPLE_OMP_SECTIONS, and we do nothing for it.  */
7875           }
7876       else
7877           {
7878             region = new_omp_region (bb, code, parent);
7879             /* Otherwise...  */
7880             if (code == GIMPLE_OMP_TARGET)
7881               {
7882                 switch (gimple_omp_target_kind (stmt))
7883                     {
7884                     case GF_OMP_TARGET_KIND_REGION:
7885                     case GF_OMP_TARGET_KIND_DATA:
7886                     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7887                     case GF_OMP_TARGET_KIND_OACC_KERNELS:
7888                     case GF_OMP_TARGET_KIND_OACC_DATA:
7889                     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7890                       break;
7891                     case GF_OMP_TARGET_KIND_UPDATE:
7892                     case GF_OMP_TARGET_KIND_ENTER_DATA:
7893                     case GF_OMP_TARGET_KIND_EXIT_DATA:
7894                     case GF_OMP_TARGET_KIND_OACC_UPDATE:
7895                     case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7896                     case GF_OMP_TARGET_KIND_OACC_DECLARE:
7897                       /* ..., other than for those stand-alone directives...  */
7898                       region = NULL;
7899                       break;
7900                     default:
7901                       gcc_unreachable ();
7902                     }
7903               }
7904             else if (code == GIMPLE_OMP_ORDERED
7905                        && omp_find_clause (gimple_omp_ordered_clauses
7906                                                    (as_a <gomp_ordered *> (stmt)),
7907                                                OMP_CLAUSE_DEPEND))
7908               /* #pragma omp ordered depend is also just a stand-alone
7909                  directive.  */
7910               region = NULL;
7911             /* ..., this directive becomes the parent for a new region.  */
7912             if (region)
7913               parent = region;
7914           }
7915     }
7916 
7917   if (single_tree && !parent)
7918     return;
7919 
7920   for (son = first_dom_son (CDI_DOMINATORS, bb);
7921        son;
7922        son = next_dom_son (CDI_DOMINATORS, son))
7923     build_omp_regions_1 (son, parent, single_tree);
7924 }
7925 
7926 /* Builds the tree of OMP regions rooted at ROOT, storing it to
7927    root_omp_region.  */
7928 
7929 static void
build_omp_regions_root(basic_block root)7930 build_omp_regions_root (basic_block root)
7931 {
7932   gcc_assert (root_omp_region == NULL);
7933   build_omp_regions_1 (root, NULL, true);
7934   gcc_assert (root_omp_region != NULL);
7935 }
7936 
7937 /* Expands omp construct (and its subconstructs) starting in HEAD.  */
7938 
7939 void
omp_expand_local(basic_block head)7940 omp_expand_local (basic_block head)
7941 {
7942   build_omp_regions_root (head);
7943   if (dump_file && (dump_flags & TDF_DETAILS))
7944     {
7945       fprintf (dump_file, "\nOMP region tree\n\n");
7946       dump_omp_region (dump_file, root_omp_region, 0);
7947       fprintf (dump_file, "\n");
7948     }
7949 
7950   remove_exit_barriers (root_omp_region);
7951   expand_omp (root_omp_region);
7952 
7953   omp_free_regions ();
7954 }
7955 
7956 /* Scan the CFG and build a tree of OMP regions.  Return the root of
7957    the OMP region tree.  */
7958 
7959 static void
build_omp_regions(void)7960 build_omp_regions (void)
7961 {
7962   gcc_assert (root_omp_region == NULL);
7963   calculate_dominance_info (CDI_DOMINATORS);
7964   build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
7965 }
7966 
7967 /* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
7968 
7969 static unsigned int
execute_expand_omp(void)7970 execute_expand_omp (void)
7971 {
7972   build_omp_regions ();
7973 
7974   if (!root_omp_region)
7975     return 0;
7976 
7977   if (dump_file)
7978     {
7979       fprintf (dump_file, "\nOMP region tree\n\n");
7980       dump_omp_region (dump_file, root_omp_region, 0);
7981       fprintf (dump_file, "\n");
7982     }
7983 
7984   remove_exit_barriers (root_omp_region);
7985 
7986   expand_omp (root_omp_region);
7987 
7988   if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7989     verify_loop_structure ();
7990   cleanup_tree_cfg ();
7991 
7992   omp_free_regions ();
7993 
7994   return 0;
7995 }
7996 
7997 /* OMP expansion -- the default pass, run before creation of SSA form.  */
7998 
7999 namespace {
8000 
8001 const pass_data pass_data_expand_omp =
8002 {
8003   GIMPLE_PASS, /* type */
8004   "ompexp", /* name */
8005   OPTGROUP_OMP, /* optinfo_flags */
8006   TV_NONE, /* tv_id */
8007   PROP_gimple_any, /* properties_required */
8008   PROP_gimple_eomp, /* properties_provided */
8009   0, /* properties_destroyed */
8010   0, /* todo_flags_start */
8011   0, /* todo_flags_finish */
8012 };
8013 
8014 class pass_expand_omp : public gimple_opt_pass
8015 {
8016 public:
pass_expand_omp(gcc::context * ctxt)8017   pass_expand_omp (gcc::context *ctxt)
8018     : gimple_opt_pass (pass_data_expand_omp, ctxt)
8019   {}
8020 
8021   /* opt_pass methods: */
execute(function *)8022   virtual unsigned int execute (function *)
8023     {
8024       bool gate = ((flag_openacc != 0 || flag_openmp != 0
8025                         || flag_openmp_simd != 0)
8026                        && !seen_error ());
8027 
8028       /* This pass always runs, to provide PROP_gimple_eomp.
8029            But often, there is nothing to do.  */
8030       if (!gate)
8031           return 0;
8032 
8033       return execute_expand_omp ();
8034     }
8035 
8036 }; // class pass_expand_omp
8037 
8038 } // anon namespace
8039 
8040 gimple_opt_pass *
make_pass_expand_omp(gcc::context * ctxt)8041 make_pass_expand_omp (gcc::context *ctxt)
8042 {
8043   return new pass_expand_omp (ctxt);
8044 }
8045 
8046 namespace {
8047 
8048 const pass_data pass_data_expand_omp_ssa =
8049 {
8050   GIMPLE_PASS, /* type */
8051   "ompexpssa", /* name */
8052   OPTGROUP_OMP, /* optinfo_flags */
8053   TV_NONE, /* tv_id */
8054   PROP_cfg | PROP_ssa, /* properties_required */
8055   PROP_gimple_eomp, /* properties_provided */
8056   0, /* properties_destroyed */
8057   0, /* todo_flags_start */
8058   TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8059 };
8060 
8061 class pass_expand_omp_ssa : public gimple_opt_pass
8062 {
8063 public:
pass_expand_omp_ssa(gcc::context * ctxt)8064   pass_expand_omp_ssa (gcc::context *ctxt)
8065     : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8066   {}
8067 
8068   /* opt_pass methods: */
gate(function * fun)8069   virtual bool gate (function *fun)
8070     {
8071       return !(fun->curr_properties & PROP_gimple_eomp);
8072     }
execute(function *)8073   virtual unsigned int execute (function *) { return execute_expand_omp (); }
clone()8074   opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8075 
8076 }; // class pass_expand_omp_ssa
8077 
8078 } // anon namespace
8079 
8080 gimple_opt_pass *
make_pass_expand_omp_ssa(gcc::context * ctxt)8081 make_pass_expand_omp_ssa (gcc::context *ctxt)
8082 {
8083   return new pass_expand_omp_ssa (ctxt);
8084 }
8085 
8086 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8087    GIMPLE_* codes.  */
8088 
8089 bool
omp_make_gimple_edges(basic_block bb,struct omp_region ** region,int * region_idx)8090 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8091                            int *region_idx)
8092 {
8093   gimple *last = last_stmt (bb);
8094   enum gimple_code code = gimple_code (last);
8095   struct omp_region *cur_region = *region;
8096   bool fallthru = false;
8097 
8098   switch (code)
8099     {
8100     case GIMPLE_OMP_PARALLEL:
8101     case GIMPLE_OMP_TASK:
8102     case GIMPLE_OMP_FOR:
8103     case GIMPLE_OMP_SINGLE:
8104     case GIMPLE_OMP_TEAMS:
8105     case GIMPLE_OMP_MASTER:
8106     case GIMPLE_OMP_TASKGROUP:
8107     case GIMPLE_OMP_CRITICAL:
8108     case GIMPLE_OMP_SECTION:
8109     case GIMPLE_OMP_GRID_BODY:
8110       cur_region = new_omp_region (bb, code, cur_region);
8111       fallthru = true;
8112       break;
8113 
8114     case GIMPLE_OMP_ORDERED:
8115       cur_region = new_omp_region (bb, code, cur_region);
8116       fallthru = true;
8117       if (omp_find_clause (gimple_omp_ordered_clauses
8118                                    (as_a <gomp_ordered *> (last)),
8119                                  OMP_CLAUSE_DEPEND))
8120           cur_region = cur_region->outer;
8121       break;
8122 
8123     case GIMPLE_OMP_TARGET:
8124       cur_region = new_omp_region (bb, code, cur_region);
8125       fallthru = true;
8126       switch (gimple_omp_target_kind (last))
8127           {
8128           case GF_OMP_TARGET_KIND_REGION:
8129           case GF_OMP_TARGET_KIND_DATA:
8130           case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8131           case GF_OMP_TARGET_KIND_OACC_KERNELS:
8132           case GF_OMP_TARGET_KIND_OACC_DATA:
8133           case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8134             break;
8135           case GF_OMP_TARGET_KIND_UPDATE:
8136           case GF_OMP_TARGET_KIND_ENTER_DATA:
8137           case GF_OMP_TARGET_KIND_EXIT_DATA:
8138           case GF_OMP_TARGET_KIND_OACC_UPDATE:
8139           case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8140           case GF_OMP_TARGET_KIND_OACC_DECLARE:
8141             cur_region = cur_region->outer;
8142             break;
8143           default:
8144             gcc_unreachable ();
8145           }
8146       break;
8147 
8148     case GIMPLE_OMP_SECTIONS:
8149       cur_region = new_omp_region (bb, code, cur_region);
8150       fallthru = true;
8151       break;
8152 
8153     case GIMPLE_OMP_SECTIONS_SWITCH:
8154       fallthru = false;
8155       break;
8156 
8157     case GIMPLE_OMP_ATOMIC_LOAD:
8158     case GIMPLE_OMP_ATOMIC_STORE:
8159        fallthru = true;
8160        break;
8161 
8162     case GIMPLE_OMP_RETURN:
8163       /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8164            somewhere other than the next block.  This will be
8165            created later.  */
8166       cur_region->exit = bb;
8167       if (cur_region->type == GIMPLE_OMP_TASK)
8168           /* Add an edge corresponding to not scheduling the task
8169              immediately.  */
8170           make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8171       fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8172       cur_region = cur_region->outer;
8173       break;
8174 
8175     case GIMPLE_OMP_CONTINUE:
8176       cur_region->cont = bb;
8177       switch (cur_region->type)
8178           {
8179           case GIMPLE_OMP_FOR:
8180             /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8181                succs edges as abnormal to prevent splitting
8182                them.  */
8183             single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8184             /* Make the loopback edge.  */
8185             make_edge (bb, single_succ (cur_region->entry),
8186                          EDGE_ABNORMAL);
8187 
8188             /* Create an edge from GIMPLE_OMP_FOR to exit, which
8189                corresponds to the case that the body of the loop
8190                is not executed at all.  */
8191             make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8192             make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8193             fallthru = false;
8194             break;
8195 
8196           case GIMPLE_OMP_SECTIONS:
8197             /* Wire up the edges into and out of the nested sections.  */
8198             {
8199               basic_block switch_bb = single_succ (cur_region->entry);
8200 
8201               struct omp_region *i;
8202               for (i = cur_region->inner; i ; i = i->next)
8203                 {
8204                     gcc_assert (i->type == GIMPLE_OMP_SECTION);
8205                     make_edge (switch_bb, i->entry, 0);
8206                     make_edge (i->exit, bb, EDGE_FALLTHRU);
8207                 }
8208 
8209               /* Make the loopback edge to the block with
8210                  GIMPLE_OMP_SECTIONS_SWITCH.  */
8211               make_edge (bb, switch_bb, 0);
8212 
8213               /* Make the edge from the switch to exit.  */
8214               make_edge (switch_bb, bb->next_bb, 0);
8215               fallthru = false;
8216             }
8217             break;
8218 
8219           case GIMPLE_OMP_TASK:
8220             fallthru = true;
8221             break;
8222 
8223           default:
8224             gcc_unreachable ();
8225           }
8226       break;
8227 
8228     default:
8229       gcc_unreachable ();
8230     }
8231 
8232   if (*region != cur_region)
8233     {
8234       *region = cur_region;
8235       if (cur_region)
8236           *region_idx = cur_region->entry->index;
8237       else
8238           *region_idx = 0;
8239     }
8240 
8241   return fallthru;
8242 }
8243 
8244 #include "gt-omp-expand.h"
8245