1 /* Expansion pass for OMP directives.  Outlines regions of certain OMP
2    directives to separate functions, converts others into explicit calls to the
3    runtime library (libgomp) and so forth
4 
5 Copyright (C) 2005-2022 Free Software Foundation, Inc.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
62 #include "opts.h"
63 
64 /* OMP region information.  Every parallel and workshare
65    directive is enclosed between two markers, the OMP_* directive
66    and a corresponding GIMPLE_OMP_RETURN statement.  */
67 
68 struct omp_region
69 {
70   /* The enclosing region.  */
71   struct omp_region *outer;
72 
73   /* First child region.  */
74   struct omp_region *inner;
75 
76   /* Next peer region.  */
77   struct omp_region *next;
78 
79   /* Block containing the omp directive as its last stmt.  */
80   basic_block entry;
81 
82   /* Block containing the GIMPLE_OMP_RETURN as its last stmt.  */
83   basic_block exit;
84 
85   /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt.  */
86   basic_block cont;
87 
88   /* If this is a combined parallel+workshare region, this is a list
89      of additional arguments needed by the combined parallel+workshare
90      library call.  */
91   vec<tree, va_gc> *ws_args;
92 
93   /* The code for the omp directive of this region.  */
94   enum gimple_code type;
95 
96   /* Schedule kind, only used for GIMPLE_OMP_FOR type regions.  */
97   enum omp_clause_schedule_kind sched_kind;
98 
99   /* Schedule modifiers.  */
100   unsigned char sched_modifiers;
101 
102   /* True if this is a combined parallel+workshare region.  */
103   bool is_combined_parallel;
104 
105   /* Copy of fd.lastprivate_conditional != 0.  */
106   bool has_lastprivate_conditional;
107 
108   /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109      a depend clause.  */
110   gomp_ordered *ord_stmt;
111 };
112 
113 static struct omp_region *root_omp_region;
114 static bool omp_any_child_fn_dumped;
115 
116 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117                                              bool = false);
118 static gphi *find_phi_with_arg_on_edge (tree, edge);
119 static void expand_omp (struct omp_region *region);
120 
121 /* Return true if REGION is a combined parallel+workshare region.  */
122 
123 static inline bool
is_combined_parallel(struct omp_region * region)124 is_combined_parallel (struct omp_region *region)
125 {
126   return region->is_combined_parallel;
127 }
128 
129 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
130    is the immediate dominator of PAR_ENTRY_BB, return true if there
131    are no data dependencies that would prevent expanding the parallel
132    directive at PAR_ENTRY_BB as a combined parallel+workshare region.
133 
134    When expanding a combined parallel+workshare region, the call to
135    the child function may need additional arguments in the case of
136    GIMPLE_OMP_FOR regions.  In some cases, these arguments are
137    computed out of variables passed in from the parent to the child
138    via 'struct .omp_data_s'.  For instance:
139 
140           #pragma omp parallel for schedule (guided, i * 4)
141           for (j ...)
142 
143    Is lowered into:
144 
145           # BLOCK 2 (PAR_ENTRY_BB)
146           .omp_data_o.i = i;
147           #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
148 
149           # BLOCK 3 (WS_ENTRY_BB)
150           .omp_data_i = &.omp_data_o;
151           D.1667 = .omp_data_i->i;
152           D.1598 = D.1667 * 4;
153           #pragma omp for schedule (guided, D.1598)
154 
155    When we outline the parallel region, the call to the child function
156    'bar.omp_fn.0' will need the value D.1598 in its argument list, but
157    that value is computed *after* the call site.  So, in principle we
158    cannot do the transformation.
159 
160    To see whether the code in WS_ENTRY_BB blocks the combined
161    parallel+workshare call, we collect all the variables used in the
162    GIMPLE_OMP_FOR header check whether they appear on the LHS of any
163    statement in WS_ENTRY_BB.  If so, then we cannot emit the combined
164    call.
165 
166    FIXME.  If we had the SSA form built at this point, we could merely
167    hoist the code in block 3 into block 2 and be done with it.  But at
168    this point we don't have dataflow information and though we could
169    hack something up here, it is really not worth the aggravation.  */
170 
171 static bool
workshare_safe_to_combine_p(basic_block ws_entry_bb)172 workshare_safe_to_combine_p (basic_block ws_entry_bb)
173 {
174   struct omp_for_data fd;
175   gimple *ws_stmt = last_stmt (ws_entry_bb);
176 
177   if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
178     return true;
179 
180   gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
181   if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
182     return false;
183 
184   omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
185 
186   if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
187     return false;
188   if (fd.iter_type != long_integer_type_node)
189     return false;
190 
191   /* FIXME.  We give up too easily here.  If any of these arguments
192      are not constants, they will likely involve variables that have
193      been mapped into fields of .omp_data_s for sharing with the child
194      function.  With appropriate data flow, it would be possible to
195      see through this.  */
196   if (!is_gimple_min_invariant (fd.loop.n1)
197       || !is_gimple_min_invariant (fd.loop.n2)
198       || !is_gimple_min_invariant (fd.loop.step)
199       || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
200     return false;
201 
202   return true;
203 }
204 
205 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
206    presence (SIMD_SCHEDULE).  */
207 
208 static tree
omp_adjust_chunk_size(tree chunk_size,bool simd_schedule)209 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
210 {
211   if (!simd_schedule || integer_zerop (chunk_size))
212     return chunk_size;
213 
214   poly_uint64 vf = omp_max_vf ();
215   if (known_eq (vf, 1U))
216     return chunk_size;
217 
218   tree type = TREE_TYPE (chunk_size);
219   chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
220                                   build_int_cst (type, vf - 1));
221   return fold_build2 (BIT_AND_EXPR, type, chunk_size,
222                           build_int_cst (type, -vf));
223 }
224 
225 /* Collect additional arguments needed to emit a combined
226    parallel+workshare call.  WS_STMT is the workshare directive being
227    expanded.  */
228 
229 static vec<tree, va_gc> *
get_ws_args_for(gimple * par_stmt,gimple * ws_stmt)230 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
231 {
232   tree t;
233   location_t loc = gimple_location (ws_stmt);
234   vec<tree, va_gc> *ws_args;
235 
236   if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
237     {
238       struct omp_for_data fd;
239       tree n1, n2;
240 
241       omp_extract_for_data (for_stmt, &fd, NULL);
242       n1 = fd.loop.n1;
243       n2 = fd.loop.n2;
244 
245       if (gimple_omp_for_combined_into_p (for_stmt))
246           {
247             tree innerc
248               = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
249                                      OMP_CLAUSE__LOOPTEMP_);
250             gcc_assert (innerc);
251             n1 = OMP_CLAUSE_DECL (innerc);
252             innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
253                                             OMP_CLAUSE__LOOPTEMP_);
254             gcc_assert (innerc);
255             n2 = OMP_CLAUSE_DECL (innerc);
256           }
257 
258       vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
259 
260       t = fold_convert_loc (loc, long_integer_type_node, n1);
261       ws_args->quick_push (t);
262 
263       t = fold_convert_loc (loc, long_integer_type_node, n2);
264       ws_args->quick_push (t);
265 
266       t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
267       ws_args->quick_push (t);
268 
269       if (fd.chunk_size)
270           {
271             t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
272             t = omp_adjust_chunk_size (t, fd.simd_schedule);
273             ws_args->quick_push (t);
274           }
275 
276       return ws_args;
277     }
278   else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
279     {
280       /* Number of sections is equal to the number of edges from the
281            GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
282            the exit of the sections region.  */
283       basic_block bb = single_succ (gimple_bb (ws_stmt));
284       t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
285       vec_alloc (ws_args, 1);
286       ws_args->quick_push (t);
287       return ws_args;
288     }
289 
290   gcc_unreachable ();
291 }
292 
293 /* Discover whether REGION is a combined parallel+workshare region.  */
294 
295 static void
determine_parallel_type(struct omp_region * region)296 determine_parallel_type (struct omp_region *region)
297 {
298   basic_block par_entry_bb, par_exit_bb;
299   basic_block ws_entry_bb, ws_exit_bb;
300 
301   if (region == NULL || region->inner == NULL
302       || region->exit == NULL || region->inner->exit == NULL
303       || region->inner->cont == NULL)
304     return;
305 
306   /* We only support parallel+for and parallel+sections.  */
307   if (region->type != GIMPLE_OMP_PARALLEL
308       || (region->inner->type != GIMPLE_OMP_FOR
309             && region->inner->type != GIMPLE_OMP_SECTIONS))
310     return;
311 
312   /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
313      WS_EXIT_BB -> PAR_EXIT_BB.  */
314   par_entry_bb = region->entry;
315   par_exit_bb = region->exit;
316   ws_entry_bb = region->inner->entry;
317   ws_exit_bb = region->inner->exit;
318 
319   /* Give up for task reductions on the parallel, while it is implementable,
320      adding another big set of APIs or slowing down the normal paths is
321      not acceptable.  */
322   tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
323   if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
324     return;
325 
326   if (single_succ (par_entry_bb) == ws_entry_bb
327       && single_succ (ws_exit_bb) == par_exit_bb
328       && workshare_safe_to_combine_p (ws_entry_bb)
329       && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
330             || (last_and_only_stmt (ws_entry_bb)
331                 && last_and_only_stmt (par_exit_bb))))
332     {
333       gimple *par_stmt = last_stmt (par_entry_bb);
334       gimple *ws_stmt = last_stmt (ws_entry_bb);
335 
336       if (region->inner->type == GIMPLE_OMP_FOR)
337           {
338             /* If this is a combined parallel loop, we need to determine
339                whether or not to use the combined library calls.  There
340                are two cases where we do not apply the transformation:
341                static loops and any kind of ordered loop.  In the first
342                case, we already open code the loop so there is no need
343                to do anything else.  In the latter case, the combined
344                parallel loop call would still need extra synchronization
345                to implement ordered semantics, so there would not be any
346                gain in using the combined call.  */
347             tree clauses = gimple_omp_for_clauses (ws_stmt);
348             tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
349             if (c == NULL
350                 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
351                       == OMP_CLAUSE_SCHEDULE_STATIC)
352                 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
353                 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
354                 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
355                       && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
356               return;
357           }
358       else if (region->inner->type == GIMPLE_OMP_SECTIONS
359                  && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360                                             OMP_CLAUSE__REDUCTEMP_)
361                        || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
362                                                OMP_CLAUSE__CONDTEMP_)))
363           return;
364 
365       region->is_combined_parallel = true;
366       region->inner->is_combined_parallel = true;
367       region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
368     }
369 }
370 
371 /* Debugging dumps for parallel regions.  */
372 void dump_omp_region (FILE *, struct omp_region *, int);
373 void debug_omp_region (struct omp_region *);
374 void debug_all_omp_regions (void);
375 
376 /* Dump the parallel region tree rooted at REGION.  */
377 
378 void
dump_omp_region(FILE * file,struct omp_region * region,int indent)379 dump_omp_region (FILE *file, struct omp_region *region, int indent)
380 {
381   fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
382              gimple_code_name[region->type]);
383 
384   if (region->inner)
385     dump_omp_region (file, region->inner, indent + 4);
386 
387   if (region->cont)
388     {
389       fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
390                  region->cont->index);
391     }
392 
393   if (region->exit)
394     fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
395                region->exit->index);
396   else
397     fprintf (file, "%*s[no exit marker]\n", indent, "");
398 
399   if (region->next)
400     dump_omp_region (file, region->next, indent);
401 }
402 
403 DEBUG_FUNCTION void
debug_omp_region(struct omp_region * region)404 debug_omp_region (struct omp_region *region)
405 {
406   dump_omp_region (stderr, region, 0);
407 }
408 
409 DEBUG_FUNCTION void
debug_all_omp_regions(void)410 debug_all_omp_regions (void)
411 {
412   dump_omp_region (stderr, root_omp_region, 0);
413 }
414 
415 /* Create a new parallel region starting at STMT inside region PARENT.  */
416 
417 static struct omp_region *
new_omp_region(basic_block bb,enum gimple_code type,struct omp_region * parent)418 new_omp_region (basic_block bb, enum gimple_code type,
419                     struct omp_region *parent)
420 {
421   struct omp_region *region = XCNEW (struct omp_region);
422 
423   region->outer = parent;
424   region->entry = bb;
425   region->type = type;
426 
427   if (parent)
428     {
429       /* This is a nested region.  Add it to the list of inner
430            regions in PARENT.  */
431       region->next = parent->inner;
432       parent->inner = region;
433     }
434   else
435     {
436       /* This is a toplevel region.  Add it to the list of toplevel
437            regions in ROOT_OMP_REGION.  */
438       region->next = root_omp_region;
439       root_omp_region = region;
440     }
441 
442   return region;
443 }
444 
445 /* Release the memory associated with the region tree rooted at REGION.  */
446 
447 static void
free_omp_region_1(struct omp_region * region)448 free_omp_region_1 (struct omp_region *region)
449 {
450   struct omp_region *i, *n;
451 
452   for (i = region->inner; i ; i = n)
453     {
454       n = i->next;
455       free_omp_region_1 (i);
456     }
457 
458   free (region);
459 }
460 
461 /* Release the memory for the entire omp region tree.  */
462 
463 void
omp_free_regions(void)464 omp_free_regions (void)
465 {
466   struct omp_region *r, *n;
467   for (r = root_omp_region; r ; r = n)
468     {
469       n = r->next;
470       free_omp_region_1 (r);
471     }
472   root_omp_region = NULL;
473 }
474 
475 /* A convenience function to build an empty GIMPLE_COND with just the
476    condition.  */
477 
478 static gcond *
gimple_build_cond_empty(tree cond)479 gimple_build_cond_empty (tree cond)
480 {
481   enum tree_code pred_code;
482   tree lhs, rhs;
483 
484   gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
485   return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
486 }
487 
488 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
489    Add CHILD_FNDECL to decl chain of the supercontext of the block
490    ENTRY_BLOCK - this is the block which originally contained the
491    code from which CHILD_FNDECL was created.
492 
493    Together, these actions ensure that the debug info for the outlined
494    function will be emitted with the correct lexical scope.  */
495 
496 static void
adjust_context_and_scope(struct omp_region * region,tree entry_block,tree child_fndecl)497 adjust_context_and_scope (struct omp_region *region, tree entry_block,
498                                 tree child_fndecl)
499 {
500   tree parent_fndecl = NULL_TREE;
501   gimple *entry_stmt;
502   /* OMP expansion expands inner regions before outer ones, so if
503      we e.g. have explicit task region nested in parallel region, when
504      expanding the task region current_function_decl will be the original
505      source function, but we actually want to use as context the child
506      function of the parallel.  */
507   for (region = region->outer;
508        region && parent_fndecl == NULL_TREE; region = region->outer)
509     switch (region->type)
510       {
511       case GIMPLE_OMP_PARALLEL:
512       case GIMPLE_OMP_TASK:
513       case GIMPLE_OMP_TEAMS:
514           entry_stmt = last_stmt (region->entry);
515           parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
516           break;
517       case GIMPLE_OMP_TARGET:
518           entry_stmt = last_stmt (region->entry);
519           parent_fndecl
520             = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
521           break;
522       default:
523           break;
524       }
525 
526   if (parent_fndecl == NULL_TREE)
527     parent_fndecl = current_function_decl;
528   DECL_CONTEXT (child_fndecl) = parent_fndecl;
529 
530   if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
531     {
532       tree b = BLOCK_SUPERCONTEXT (entry_block);
533       if (TREE_CODE (b) == BLOCK)
534         {
535             DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
536             BLOCK_VARS (b) = child_fndecl;
537           }
538     }
539 }
540 
541 /* Build the function calls to GOMP_parallel etc to actually
542    generate the parallel operation.  REGION is the parallel region
543    being expanded.  BB is the block where to insert the code.  WS_ARGS
544    will be set if this is a call to a combined parallel+workshare
545    construct, it contains the list of additional arguments needed by
546    the workshare construct.  */
547 
548 static void
expand_parallel_call(struct omp_region * region,basic_block bb,gomp_parallel * entry_stmt,vec<tree,va_gc> * ws_args)549 expand_parallel_call (struct omp_region *region, basic_block bb,
550                           gomp_parallel *entry_stmt,
551                           vec<tree, va_gc> *ws_args)
552 {
553   tree t, t1, t2, val, cond, c, clauses, flags;
554   gimple_stmt_iterator gsi;
555   gimple *stmt;
556   enum built_in_function start_ix;
557   int start_ix2;
558   location_t clause_loc;
559   vec<tree, va_gc> *args;
560 
561   clauses = gimple_omp_parallel_clauses (entry_stmt);
562 
563   /* Determine what flavor of GOMP_parallel we will be
564      emitting.  */
565   start_ix = BUILT_IN_GOMP_PARALLEL;
566   tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
567   if (rtmp)
568     start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
569   else if (is_combined_parallel (region))
570     {
571       switch (region->inner->type)
572           {
573           case GIMPLE_OMP_FOR:
574             gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
575             switch (region->inner->sched_kind)
576               {
577               case OMP_CLAUSE_SCHEDULE_RUNTIME:
578                 /* For lastprivate(conditional:), our implementation
579                      requires monotonic behavior.  */
580                 if (region->inner->has_lastprivate_conditional != 0)
581                     start_ix2 = 3;
582                 else if ((region->inner->sched_modifiers
583                            & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
584                     start_ix2 = 6;
585                 else if ((region->inner->sched_modifiers
586                               & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
587                     start_ix2 = 7;
588                 else
589                     start_ix2 = 3;
590                 break;
591               case OMP_CLAUSE_SCHEDULE_DYNAMIC:
592               case OMP_CLAUSE_SCHEDULE_GUIDED:
593                 if ((region->inner->sched_modifiers
594                        & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
595                       && !region->inner->has_lastprivate_conditional)
596                     {
597                       start_ix2 = 3 + region->inner->sched_kind;
598                       break;
599                     }
600                 /* FALLTHRU */
601               default:
602                 start_ix2 = region->inner->sched_kind;
603                 break;
604               }
605             start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
606             start_ix = (enum built_in_function) start_ix2;
607             break;
608           case GIMPLE_OMP_SECTIONS:
609             start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
610             break;
611           default:
612             gcc_unreachable ();
613           }
614     }
615 
616   /* By default, the value of NUM_THREADS is zero (selected at run time)
617      and there is no conditional.  */
618   cond = NULL_TREE;
619   val = build_int_cst (unsigned_type_node, 0);
620   flags = build_int_cst (unsigned_type_node, 0);
621 
622   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
623   if (c)
624     cond = OMP_CLAUSE_IF_EXPR (c);
625 
626   c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
627   if (c)
628     {
629       val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
630       clause_loc = OMP_CLAUSE_LOCATION (c);
631     }
632   else
633     clause_loc = gimple_location (entry_stmt);
634 
635   c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
636   if (c)
637     flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
638 
639   /* Ensure 'val' is of the correct type.  */
640   val = fold_convert_loc (clause_loc, unsigned_type_node, val);
641 
642   /* If we found the clause 'if (cond)', build either
643      (cond != 0) or (cond ? val : 1u).  */
644   if (cond)
645     {
646       cond = gimple_boolify (cond);
647 
648       if (integer_zerop (val))
649           val = fold_build2_loc (clause_loc,
650                                  EQ_EXPR, unsigned_type_node, cond,
651                                  build_int_cst (TREE_TYPE (cond), 0));
652       else
653           {
654             basic_block cond_bb, then_bb, else_bb;
655             edge e, e_then, e_else;
656             tree tmp_then, tmp_else, tmp_join, tmp_var;
657 
658             tmp_var = create_tmp_var (TREE_TYPE (val));
659             if (gimple_in_ssa_p (cfun))
660               {
661                 tmp_then = make_ssa_name (tmp_var);
662                 tmp_else = make_ssa_name (tmp_var);
663                 tmp_join = make_ssa_name (tmp_var);
664               }
665             else
666               {
667                 tmp_then = tmp_var;
668                 tmp_else = tmp_var;
669                 tmp_join = tmp_var;
670               }
671 
672             e = split_block_after_labels (bb);
673             cond_bb = e->src;
674             bb = e->dest;
675             remove_edge (e);
676 
677             then_bb = create_empty_bb (cond_bb);
678             else_bb = create_empty_bb (then_bb);
679             set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
680             set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
681 
682             stmt = gimple_build_cond_empty (cond);
683             gsi = gsi_start_bb (cond_bb);
684             gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
685 
686             gsi = gsi_start_bb (then_bb);
687             expand_omp_build_assign (&gsi, tmp_then, val, true);
688 
689             gsi = gsi_start_bb (else_bb);
690             expand_omp_build_assign (&gsi, tmp_else,
691                                            build_int_cst (unsigned_type_node, 1),
692                                            true);
693 
694             make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
695             make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
696             add_bb_to_loop (then_bb, cond_bb->loop_father);
697             add_bb_to_loop (else_bb, cond_bb->loop_father);
698             e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
699             e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
700 
701             if (gimple_in_ssa_p (cfun))
702               {
703                 gphi *phi = create_phi_node (tmp_join, bb);
704                 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
705                 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
706               }
707 
708             val = tmp_join;
709           }
710 
711       gsi = gsi_start_bb (bb);
712       val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
713                                               false, GSI_CONTINUE_LINKING);
714     }
715 
716   gsi = gsi_last_nondebug_bb (bb);
717   t = gimple_omp_parallel_data_arg (entry_stmt);
718   if (t == NULL)
719     t1 = null_pointer_node;
720   else
721     t1 = build_fold_addr_expr (t);
722   tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
723   t2 = build_fold_addr_expr (child_fndecl);
724 
725   vec_alloc (args, 4 + vec_safe_length (ws_args));
726   args->quick_push (t2);
727   args->quick_push (t1);
728   args->quick_push (val);
729   if (ws_args)
730     args->splice (*ws_args);
731   args->quick_push (flags);
732 
733   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
734                                      builtin_decl_explicit (start_ix), args);
735 
736   if (rtmp)
737     {
738       tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
739       t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
740                       fold_convert (type,
741                                         fold_convert (pointer_sized_int_node, t)));
742     }
743   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
744                                   false, GSI_CONTINUE_LINKING);
745 }
746 
747 /* Build the function call to GOMP_task to actually
748    generate the task operation.  BB is the block where to insert the code.  */
749 
750 static void
expand_task_call(struct omp_region * region,basic_block bb,gomp_task * entry_stmt)751 expand_task_call (struct omp_region *region, basic_block bb,
752                       gomp_task *entry_stmt)
753 {
754   tree t1, t2, t3;
755   gimple_stmt_iterator gsi;
756   location_t loc = gimple_location (entry_stmt);
757 
758   tree clauses = gimple_omp_task_clauses (entry_stmt);
759 
760   tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
761   tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
762   tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
763   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
764   tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
765   tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766   tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
767 
768   unsigned int iflags
769     = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
770       | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
771       | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
772 
773   bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
774   tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
775   tree num_tasks = NULL_TREE;
776   bool ull = false;
777   if (taskloop_p)
778     {
779       gimple *g = last_stmt (region->outer->entry);
780       gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
781                       && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
782       struct omp_for_data fd;
783       omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
784       startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
785       endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
786                                         OMP_CLAUSE__LOOPTEMP_);
787       startvar = OMP_CLAUSE_DECL (startvar);
788       endvar = OMP_CLAUSE_DECL (endvar);
789       step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
790       if (fd.loop.cond_code == LT_EXPR)
791           iflags |= GOMP_TASK_FLAG_UP;
792       tree tclauses = gimple_omp_for_clauses (g);
793       num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
794       if (num_tasks)
795           {
796             if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
797               iflags |= GOMP_TASK_FLAG_STRICT;
798             num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
799           }
800       else
801           {
802             num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
803             if (num_tasks)
804               {
805                 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
806                 if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
807                     iflags |= GOMP_TASK_FLAG_STRICT;
808                 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
809               }
810             else
811               num_tasks = integer_zero_node;
812           }
813       num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
814       if (ifc == NULL_TREE)
815           iflags |= GOMP_TASK_FLAG_IF;
816       if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
817           iflags |= GOMP_TASK_FLAG_NOGROUP;
818       ull = fd.iter_type == long_long_unsigned_type_node;
819       if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
820           iflags |= GOMP_TASK_FLAG_REDUCTION;
821     }
822   else
823     {
824       if (priority)
825           iflags |= GOMP_TASK_FLAG_PRIORITY;
826       if (detach)
827           iflags |= GOMP_TASK_FLAG_DETACH;
828     }
829 
830   tree flags = build_int_cst (unsigned_type_node, iflags);
831 
832   tree cond = boolean_true_node;
833   if (ifc)
834     {
835       if (taskloop_p)
836           {
837             tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
838             t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
839                                      build_int_cst (unsigned_type_node,
840                                                         GOMP_TASK_FLAG_IF),
841                                      build_int_cst (unsigned_type_node, 0));
842             flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
843                                            flags, t);
844           }
845       else
846           cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
847     }
848 
849   if (finalc)
850     {
851       tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
852       t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
853                                  build_int_cst (unsigned_type_node,
854                                                     GOMP_TASK_FLAG_FINAL),
855                                  build_int_cst (unsigned_type_node, 0));
856       flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
857     }
858   if (depend)
859     depend = OMP_CLAUSE_DECL (depend);
860   else
861     depend = build_int_cst (ptr_type_node, 0);
862   if (priority)
863     priority = fold_convert (integer_type_node,
864                                    OMP_CLAUSE_PRIORITY_EXPR (priority));
865   else
866     priority = integer_zero_node;
867 
868   gsi = gsi_last_nondebug_bb (bb);
869 
870   detach = (detach
871               ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
872               : null_pointer_node);
873 
874   tree t = gimple_omp_task_data_arg (entry_stmt);
875   if (t == NULL)
876     t2 = null_pointer_node;
877   else
878     t2 = build_fold_addr_expr_loc (loc, t);
879   t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
880   t = gimple_omp_task_copy_fn (entry_stmt);
881   if (t == NULL)
882     t3 = null_pointer_node;
883   else
884     t3 = build_fold_addr_expr_loc (loc, t);
885 
886   if (taskloop_p)
887     t = build_call_expr (ull
888                                ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
889                                : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
890                                11, t1, t2, t3,
891                                gimple_omp_task_arg_size (entry_stmt),
892                                gimple_omp_task_arg_align (entry_stmt), flags,
893                                num_tasks, priority, startvar, endvar, step);
894   else
895     t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
896                                10, t1, t2, t3,
897                                gimple_omp_task_arg_size (entry_stmt),
898                                gimple_omp_task_arg_align (entry_stmt), cond, flags,
899                                depend, priority, detach);
900 
901   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
902                                   false, GSI_CONTINUE_LINKING);
903 }
904 
905 /* Build the function call to GOMP_taskwait_depend to actually
906    generate the taskwait operation.  BB is the block where to insert the
907    code.  */
908 
909 static void
expand_taskwait_call(basic_block bb,gomp_task * entry_stmt)910 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
911 {
912   tree clauses = gimple_omp_task_clauses (entry_stmt);
913   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
914   if (depend == NULL_TREE)
915     return;
916 
917   depend = OMP_CLAUSE_DECL (depend);
918 
919   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
920   tree t
921     = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
922                            1, depend);
923 
924   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
925                                   false, GSI_CONTINUE_LINKING);
926 }
927 
928 /* Build the function call to GOMP_teams_reg to actually
929    generate the host teams operation.  REGION is the teams region
930    being expanded.  BB is the block where to insert the code.  */
931 
932 static void
expand_teams_call(basic_block bb,gomp_teams * entry_stmt)933 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
934 {
935   tree clauses = gimple_omp_teams_clauses (entry_stmt);
936   tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
937   if (num_teams == NULL_TREE)
938     num_teams = build_int_cst (unsigned_type_node, 0);
939   else
940     {
941       num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
942       num_teams = fold_convert (unsigned_type_node, num_teams);
943     }
944   tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
945   if (thread_limit == NULL_TREE)
946     thread_limit = build_int_cst (unsigned_type_node, 0);
947   else
948     {
949       thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
950       thread_limit = fold_convert (unsigned_type_node, thread_limit);
951     }
952 
953   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
954   tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
955   if (t == NULL)
956     t1 = null_pointer_node;
957   else
958     t1 = build_fold_addr_expr (t);
959   tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
960   tree t2 = build_fold_addr_expr (child_fndecl);
961 
962   vec<tree, va_gc> *args;
963   vec_alloc (args, 5);
964   args->quick_push (t2);
965   args->quick_push (t1);
966   args->quick_push (num_teams);
967   args->quick_push (thread_limit);
968   /* For future extensibility.  */
969   args->quick_push (build_zero_cst (unsigned_type_node));
970 
971   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
972                                      builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
973                                      args);
974 
975   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
976                                   false, GSI_CONTINUE_LINKING);
977 }
978 
979 /* Chain all the DECLs in LIST by their TREE_CHAIN fields.  */
980 
981 static tree
vec2chain(vec<tree,va_gc> * v)982 vec2chain (vec<tree, va_gc> *v)
983 {
984   tree chain = NULL_TREE, t;
985   unsigned ix;
986 
987   FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
988     {
989       DECL_CHAIN (t) = chain;
990       chain = t;
991     }
992 
993   return chain;
994 }
995 
996 /* Remove barriers in REGION->EXIT's block.  Note that this is only
997    valid for GIMPLE_OMP_PARALLEL regions.  Since the end of a parallel region
998    is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
999    left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1000    removed.  */
1001 
1002 static void
remove_exit_barrier(struct omp_region * region)1003 remove_exit_barrier (struct omp_region *region)
1004 {
1005   gimple_stmt_iterator gsi;
1006   basic_block exit_bb;
1007   edge_iterator ei;
1008   edge e;
1009   gimple *stmt;
1010   int any_addressable_vars = -1;
1011 
1012   exit_bb = region->exit;
1013 
1014   /* If the parallel region doesn't return, we don't have REGION->EXIT
1015      block at all.  */
1016   if (! exit_bb)
1017     return;
1018 
1019   /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN.  The
1020      workshare's GIMPLE_OMP_RETURN will be in a preceding block.  The kinds of
1021      statements that can appear in between are extremely limited -- no
1022      memory operations at all.  Here, we allow nothing at all, so the
1023      only thing we allow to precede this GIMPLE_OMP_RETURN is a label.  */
1024   gsi = gsi_last_nondebug_bb (exit_bb);
1025   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1026   gsi_prev_nondebug (&gsi);
1027   if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1028     return;
1029 
1030   FOR_EACH_EDGE (e, ei, exit_bb->preds)
1031     {
1032       gsi = gsi_last_nondebug_bb (e->src);
1033       if (gsi_end_p (gsi))
1034           continue;
1035       stmt = gsi_stmt (gsi);
1036       if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1037             && !gimple_omp_return_nowait_p (stmt))
1038           {
1039             /* OpenMP 3.0 tasks unfortunately prevent this optimization
1040                in many cases.  If there could be tasks queued, the barrier
1041                might be needed to let the tasks run before some local
1042                variable of the parallel that the task uses as shared
1043                runs out of scope.  The task can be spawned either
1044                from within current function (this would be easy to check)
1045                or from some function it calls and gets passed an address
1046                of such a variable.  */
1047             if (any_addressable_vars < 0)
1048               {
1049                 gomp_parallel *parallel_stmt
1050                     = as_a <gomp_parallel *> (last_stmt (region->entry));
1051                 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1052                 tree local_decls, block, decl;
1053                 unsigned ix;
1054 
1055                 any_addressable_vars = 0;
1056                 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1057                     if (TREE_ADDRESSABLE (decl))
1058                       {
1059                         any_addressable_vars = 1;
1060                         break;
1061                       }
1062                 for (block = gimple_block (stmt);
1063                        !any_addressable_vars
1064                        && block
1065                        && TREE_CODE (block) == BLOCK;
1066                        block = BLOCK_SUPERCONTEXT (block))
1067                     {
1068                       for (local_decls = BLOCK_VARS (block);
1069                            local_decls;
1070                            local_decls = DECL_CHAIN (local_decls))
1071                         if (TREE_ADDRESSABLE (local_decls))
1072                           {
1073                               any_addressable_vars = 1;
1074                               break;
1075                           }
1076                       if (block == gimple_block (parallel_stmt))
1077                         break;
1078                     }
1079               }
1080             if (!any_addressable_vars)
1081               gimple_omp_return_set_nowait (stmt);
1082           }
1083     }
1084 }
1085 
1086 static void
remove_exit_barriers(struct omp_region * region)1087 remove_exit_barriers (struct omp_region *region)
1088 {
1089   if (region->type == GIMPLE_OMP_PARALLEL)
1090     remove_exit_barrier (region);
1091 
1092   if (region->inner)
1093     {
1094       region = region->inner;
1095       remove_exit_barriers (region);
1096       while (region->next)
1097           {
1098             region = region->next;
1099             remove_exit_barriers (region);
1100           }
1101     }
1102 }
1103 
1104 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1105    calls.  These can't be declared as const functions, but
1106    within one parallel body they are constant, so they can be
1107    transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1108    which are declared const.  Similarly for task body, except
1109    that in untied task omp_get_thread_num () can change at any task
1110    scheduling point.  */
1111 
1112 static void
optimize_omp_library_calls(gimple * entry_stmt)1113 optimize_omp_library_calls (gimple *entry_stmt)
1114 {
1115   basic_block bb;
1116   gimple_stmt_iterator gsi;
1117   tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1118   tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1119   tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1120   tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1121   bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1122                           && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1123                                                     OMP_CLAUSE_UNTIED) != NULL);
1124 
1125   FOR_EACH_BB_FN (bb, cfun)
1126     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1127       {
1128           gimple *call = gsi_stmt (gsi);
1129           tree decl;
1130 
1131           if (is_gimple_call (call)
1132               && (decl = gimple_call_fndecl (call))
1133               && DECL_EXTERNAL (decl)
1134               && TREE_PUBLIC (decl)
1135               && DECL_INITIAL (decl) == NULL)
1136             {
1137               tree built_in;
1138 
1139               if (DECL_NAME (decl) == thr_num_id)
1140                 {
1141                     /* In #pragma omp task untied omp_get_thread_num () can change
1142                        during the execution of the task region.  */
1143                     if (untied_task)
1144                       continue;
1145                     built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1146                 }
1147               else if (DECL_NAME (decl) == num_thr_id)
1148                 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1149               else
1150                 continue;
1151 
1152               if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1153                     || gimple_call_num_args (call) != 0)
1154                 continue;
1155 
1156               if (flag_exceptions && !TREE_NOTHROW (decl))
1157                 continue;
1158 
1159               if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1160                     || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1161                                                   TREE_TYPE (TREE_TYPE (built_in))))
1162                 continue;
1163 
1164               gimple_call_set_fndecl (call, built_in);
1165             }
1166       }
1167 }
1168 
1169 /* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
1170    regimplified.  */
1171 
1172 static tree
expand_omp_regimplify_p(tree * tp,int * walk_subtrees,void *)1173 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1174 {
1175   tree t = *tp;
1176 
1177   /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
1178   if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1179     return t;
1180 
1181   if (TREE_CODE (t) == ADDR_EXPR)
1182     recompute_tree_invariant_for_addr_expr (t);
1183 
1184   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1185   return NULL_TREE;
1186 }
1187 
1188 /* Prepend or append TO = FROM assignment before or after *GSI_P.  */
1189 
1190 static void
expand_omp_build_assign(gimple_stmt_iterator * gsi_p,tree to,tree from,bool after)1191 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1192                                bool after)
1193 {
1194   bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1195   from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1196                                            !after, after ? GSI_CONTINUE_LINKING
1197                                                              : GSI_SAME_STMT);
1198   gimple *stmt = gimple_build_assign (to, from);
1199   if (after)
1200     gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1201   else
1202     gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1203   if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1204       || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1205     {
1206       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207       gimple_regimplify_operands (stmt, &gsi);
1208     }
1209 }
1210 
1211 /* Prepend or append LHS CODE RHS condition before or after *GSI_P.  */
1212 
1213 static gcond *
expand_omp_build_cond(gimple_stmt_iterator * gsi_p,enum tree_code code,tree lhs,tree rhs,bool after=false)1214 expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code,
1215                            tree lhs, tree rhs, bool after = false)
1216 {
1217   gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE);
1218   if (after)
1219     gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING);
1220   else
1221     gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT);
1222   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
1223                      NULL, NULL)
1224       || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
1225                         NULL, NULL))
1226     {
1227       gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt);
1228       gimple_regimplify_operands (cond_stmt, &gsi);
1229     }
1230   return cond_stmt;
1231 }
1232 
1233 /* Expand the OpenMP parallel or task directive starting at REGION.  */
1234 
1235 static void
expand_omp_taskreg(struct omp_region * region)1236 expand_omp_taskreg (struct omp_region *region)
1237 {
1238   basic_block entry_bb, exit_bb, new_bb;
1239   struct function *child_cfun;
1240   tree child_fn, block, t;
1241   gimple_stmt_iterator gsi;
1242   gimple *entry_stmt, *stmt;
1243   edge e;
1244   vec<tree, va_gc> *ws_args;
1245 
1246   entry_stmt = last_stmt (region->entry);
1247   if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1248       && gimple_omp_task_taskwait_p (entry_stmt))
1249     {
1250       new_bb = region->entry;
1251       gsi = gsi_last_nondebug_bb (region->entry);
1252       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1253       gsi_remove (&gsi, true);
1254       expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1255       return;
1256     }
1257 
1258   child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1259   child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1260 
1261   entry_bb = region->entry;
1262   if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1263     exit_bb = region->cont;
1264   else
1265     exit_bb = region->exit;
1266 
1267   if (is_combined_parallel (region))
1268     ws_args = region->ws_args;
1269   else
1270     ws_args = NULL;
1271 
1272   if (child_cfun->cfg)
1273     {
1274       /* Due to inlining, it may happen that we have already outlined
1275            the region, in which case all we need to do is make the
1276            sub-graph unreachable and emit the parallel call.  */
1277       edge entry_succ_e, exit_succ_e;
1278 
1279       entry_succ_e = single_succ_edge (entry_bb);
1280 
1281       gsi = gsi_last_nondebug_bb (entry_bb);
1282       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1283                       || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1284                       || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1285       gsi_remove (&gsi, true);
1286 
1287       new_bb = entry_bb;
1288       if (exit_bb)
1289           {
1290             exit_succ_e = single_succ_edge (exit_bb);
1291             make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1292           }
1293       remove_edge_and_dominated_blocks (entry_succ_e);
1294     }
1295   else
1296     {
1297       unsigned srcidx, dstidx, num;
1298 
1299       /* If the parallel region needs data sent from the parent
1300            function, then the very first statement (except possible
1301            tree profile counter updates) of the parallel body
1302            is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
1303            &.OMP_DATA_O is passed as an argument to the child function,
1304            we need to replace it with the argument as seen by the child
1305            function.
1306 
1307            In most cases, this will end up being the identity assignment
1308            .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
1309            a function call that has been inlined, the original PARM_DECL
1310            .OMP_DATA_I may have been converted into a different local
1311            variable.  In which case, we need to keep the assignment.  */
1312       if (gimple_omp_taskreg_data_arg (entry_stmt))
1313           {
1314             basic_block entry_succ_bb
1315               = single_succ_p (entry_bb) ? single_succ (entry_bb)
1316                                                : FALLTHRU_EDGE (entry_bb)->dest;
1317             tree arg;
1318             gimple *parcopy_stmt = NULL;
1319 
1320             for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1321               {
1322                 gimple *stmt;
1323 
1324                 gcc_assert (!gsi_end_p (gsi));
1325                 stmt = gsi_stmt (gsi);
1326                 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1327                     continue;
1328 
1329                 if (gimple_num_ops (stmt) == 2)
1330                     {
1331                       tree arg = gimple_assign_rhs1 (stmt);
1332 
1333                       /* We're ignore the subcode because we're
1334                          effectively doing a STRIP_NOPS.  */
1335 
1336                       if (TREE_CODE (arg) == ADDR_EXPR
1337                           && (TREE_OPERAND (arg, 0)
1338                                 == gimple_omp_taskreg_data_arg (entry_stmt)))
1339                         {
1340                           parcopy_stmt = stmt;
1341                           break;
1342                         }
1343                     }
1344               }
1345 
1346             gcc_assert (parcopy_stmt != NULL);
1347             arg = DECL_ARGUMENTS (child_fn);
1348 
1349             if (!gimple_in_ssa_p (cfun))
1350               {
1351                 if (gimple_assign_lhs (parcopy_stmt) == arg)
1352                     gsi_remove (&gsi, true);
1353                 else
1354                     {
1355                       /* ?? Is setting the subcode really necessary ??  */
1356                       gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1357                       gimple_assign_set_rhs1 (parcopy_stmt, arg);
1358                     }
1359               }
1360             else
1361               {
1362                 tree lhs = gimple_assign_lhs (parcopy_stmt);
1363                 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1364                 /* We'd like to set the rhs to the default def in the child_fn,
1365                      but it's too early to create ssa names in the child_fn.
1366                      Instead, we set the rhs to the parm.  In
1367                      move_sese_region_to_fn, we introduce a default def for the
1368                      parm, map the parm to it's default def, and once we encounter
1369                      this stmt, replace the parm with the default def.  */
1370                 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1371                 update_stmt (parcopy_stmt);
1372               }
1373           }
1374 
1375       /* Declare local variables needed in CHILD_CFUN.  */
1376       block = DECL_INITIAL (child_fn);
1377       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1378       /* The gimplifier could record temporaries in parallel/task block
1379            rather than in containing function's local_decls chain,
1380            which would mean cgraph missed finalizing them.  Do it now.  */
1381       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1382           if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1383             varpool_node::finalize_decl (t);
1384       DECL_SAVED_TREE (child_fn) = NULL;
1385       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
1386       gimple_set_body (child_fn, NULL);
1387       TREE_USED (block) = 1;
1388 
1389       /* Reset DECL_CONTEXT on function arguments.  */
1390       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1391           DECL_CONTEXT (t) = child_fn;
1392 
1393       /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1394            so that it can be moved to the child function.  */
1395       gsi = gsi_last_nondebug_bb (entry_bb);
1396       stmt = gsi_stmt (gsi);
1397       gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1398                                  || gimple_code (stmt) == GIMPLE_OMP_TASK
1399                                  || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1400       e = split_block (entry_bb, stmt);
1401       gsi_remove (&gsi, true);
1402       entry_bb = e->dest;
1403       edge e2 = NULL;
1404       if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1405           single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1406       else
1407           {
1408             e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1409             gcc_assert (e2->dest == region->exit);
1410             remove_edge (BRANCH_EDGE (entry_bb));
1411             set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1412             gsi = gsi_last_nondebug_bb (region->exit);
1413             gcc_assert (!gsi_end_p (gsi)
1414                           && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1415             gsi_remove (&gsi, true);
1416           }
1417 
1418       /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR.  */
1419       if (exit_bb)
1420           {
1421             gsi = gsi_last_nondebug_bb (exit_bb);
1422             gcc_assert (!gsi_end_p (gsi)
1423                           && (gimple_code (gsi_stmt (gsi))
1424                                 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1425             stmt = gimple_build_return (NULL);
1426             gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1427             gsi_remove (&gsi, true);
1428           }
1429 
1430       /* Move the parallel region into CHILD_CFUN.  */
1431 
1432       if (gimple_in_ssa_p (cfun))
1433           {
1434             init_tree_ssa (child_cfun);
1435             init_ssa_operands (child_cfun);
1436             child_cfun->gimple_df->in_ssa_p = true;
1437             block = NULL_TREE;
1438           }
1439       else
1440           block = gimple_block (entry_stmt);
1441 
1442       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1443       if (exit_bb)
1444           single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1445       if (e2)
1446           {
1447             basic_block dest_bb = e2->dest;
1448             if (!exit_bb)
1449               make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1450             remove_edge (e2);
1451             set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1452           }
1453       /* When the OMP expansion process cannot guarantee an up-to-date
1454            loop tree arrange for the child function to fixup loops.  */
1455       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1456           child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1457 
1458       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
1459       num = vec_safe_length (child_cfun->local_decls);
1460       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1461           {
1462             t = (*child_cfun->local_decls)[srcidx];
1463             if (DECL_CONTEXT (t) == cfun->decl)
1464               continue;
1465             if (srcidx != dstidx)
1466               (*child_cfun->local_decls)[dstidx] = t;
1467             dstidx++;
1468           }
1469       if (dstidx != num)
1470           vec_safe_truncate (child_cfun->local_decls, dstidx);
1471 
1472       /* Inform the callgraph about the new function.  */
1473       child_cfun->curr_properties = cfun->curr_properties;
1474       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1475       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1476       cgraph_node *node = cgraph_node::get_create (child_fn);
1477       node->parallelized_function = 1;
1478       cgraph_node::add_new_function (child_fn, true);
1479 
1480       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1481                           && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1482 
1483       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
1484            fixed in a following pass.  */
1485       push_cfun (child_cfun);
1486       if (need_asm)
1487           assign_assembler_name_if_needed (child_fn);
1488 
1489       if (optimize)
1490           optimize_omp_library_calls (entry_stmt);
1491       update_max_bb_count ();
1492       cgraph_edge::rebuild_edges ();
1493 
1494       /* Some EH regions might become dead, see PR34608.  If
1495            pass_cleanup_cfg isn't the first pass to happen with the
1496            new child, these dead EH edges might cause problems.
1497            Clean them up now.  */
1498       if (flag_exceptions)
1499           {
1500             basic_block bb;
1501             bool changed = false;
1502 
1503             FOR_EACH_BB_FN (bb, cfun)
1504               changed |= gimple_purge_dead_eh_edges (bb);
1505             if (changed)
1506               cleanup_tree_cfg ();
1507           }
1508       if (gimple_in_ssa_p (cfun))
1509           update_ssa (TODO_update_ssa);
1510       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1511           verify_loop_structure ();
1512       pop_cfun ();
1513 
1514       if (dump_file && !gimple_in_ssa_p (cfun))
1515           {
1516             omp_any_child_fn_dumped = true;
1517             dump_function_header (dump_file, child_fn, dump_flags);
1518             dump_function_to_file (child_fn, dump_file, dump_flags);
1519           }
1520     }
1521 
1522   adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1523 
1524   if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1525     expand_parallel_call (region, new_bb,
1526                                 as_a <gomp_parallel *> (entry_stmt), ws_args);
1527   else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1528     expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1529   else
1530     expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1531   if (gimple_in_ssa_p (cfun))
1532     update_ssa (TODO_update_ssa_only_virtuals);
1533 }
1534 
1535 /* Information about members of an OpenACC collapsed loop nest.  */
1536 
1537 struct oacc_collapse
1538 {
1539   tree base;  /* Base value.  */
1540   tree iters; /* Number of steps.  */
1541   tree step;  /* Step size.  */
1542   tree tile;  /* Tile increment (if tiled).  */
1543   tree outer; /* Tile iterator var. */
1544 };
1545 
1546 /* Helper for expand_oacc_for.  Determine collapsed loop information.
1547    Fill in COUNTS array.  Emit any initialization code before GSI.
1548    Return the calculated outer loop bound of BOUND_TYPE.  */
1549 
1550 static tree
expand_oacc_collapse_init(const struct omp_for_data * fd,gimple_stmt_iterator * gsi,oacc_collapse * counts,tree diff_type,tree bound_type,location_t loc)1551 expand_oacc_collapse_init (const struct omp_for_data *fd,
1552                                  gimple_stmt_iterator *gsi,
1553                                  oacc_collapse *counts, tree diff_type,
1554                                  tree bound_type, location_t loc)
1555 {
1556   tree tiling = fd->tiling;
1557   tree total = build_int_cst (bound_type, 1);
1558   int ix;
1559 
1560   gcc_assert (integer_onep (fd->loop.step));
1561   gcc_assert (integer_zerop (fd->loop.n1));
1562 
1563   /* When tiling, the first operand of the tile clause applies to the
1564      innermost loop, and we work outwards from there.  Seems
1565      backwards, but whatever.  */
1566   for (ix = fd->collapse; ix--;)
1567     {
1568       const omp_for_data_loop *loop = &fd->loops[ix];
1569 
1570       tree iter_type = TREE_TYPE (loop->v);
1571       tree plus_type = iter_type;
1572 
1573       gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1574 
1575       if (POINTER_TYPE_P (iter_type))
1576           plus_type = sizetype;
1577 
1578       if (tiling)
1579           {
1580             tree num = build_int_cst (integer_type_node, fd->collapse);
1581             tree loop_no = build_int_cst (integer_type_node, ix);
1582             tree tile = TREE_VALUE (tiling);
1583             gcall *call
1584               = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1585                                                     /* gwv-outer=*/integer_zero_node,
1586                                                     /* gwv-inner=*/integer_zero_node);
1587 
1588             counts[ix].outer = create_tmp_var (iter_type, ".outer");
1589             counts[ix].tile = create_tmp_var (diff_type, ".tile");
1590             gimple_call_set_lhs (call, counts[ix].tile);
1591             gimple_set_location (call, loc);
1592             gsi_insert_before (gsi, call, GSI_SAME_STMT);
1593 
1594             tiling = TREE_CHAIN (tiling);
1595           }
1596       else
1597           {
1598             counts[ix].tile = NULL;
1599             counts[ix].outer = loop->v;
1600           }
1601 
1602       tree b = loop->n1;
1603       tree e = loop->n2;
1604       tree s = loop->step;
1605       bool up = loop->cond_code == LT_EXPR;
1606       tree dir = build_int_cst (diff_type, up ? +1 : -1);
1607       bool negating;
1608       tree expr;
1609 
1610       b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1611                                             true, GSI_SAME_STMT);
1612       e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1613                                             true, GSI_SAME_STMT);
1614 
1615       /* Convert the step, avoiding possible unsigned->signed overflow.  */
1616       negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1617       if (negating)
1618           s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1619       s = fold_convert (diff_type, s);
1620       if (negating)
1621           s = fold_build1 (NEGATE_EXPR, diff_type, s);
1622       s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1623                                             true, GSI_SAME_STMT);
1624 
1625       /* Determine the range, avoiding possible unsigned->signed overflow.  */
1626       negating = !up && TYPE_UNSIGNED (iter_type);
1627       expr = fold_build2 (MINUS_EXPR, plus_type,
1628                                 fold_convert (plus_type, negating ? b : e),
1629                                 fold_convert (plus_type, negating ? e : b));
1630       expr = fold_convert (diff_type, expr);
1631       if (negating)
1632           expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1633       tree range = force_gimple_operand_gsi
1634           (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1635 
1636       /* Determine number of iterations.  */
1637       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1638       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1639       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1640 
1641       tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1642                                                        true, GSI_SAME_STMT);
1643 
1644       counts[ix].base = b;
1645       counts[ix].iters = iters;
1646       counts[ix].step = s;
1647 
1648       total = fold_build2 (MULT_EXPR, bound_type, total,
1649                                  fold_convert (bound_type, iters));
1650     }
1651 
1652   return total;
1653 }
1654 
1655 /* Emit initializers for collapsed loop members.  INNER is true if
1656    this is for the element loop of a TILE.  IVAR is the outer
1657    loop iteration variable, from which collapsed loop iteration values
1658    are  calculated.  COUNTS array has been initialized by
1659    expand_oacc_collapse_inits.  */
1660 
1661 static void
expand_oacc_collapse_vars(const struct omp_for_data * fd,bool inner,gimple_stmt_iterator * gsi,const oacc_collapse * counts,tree ivar,tree diff_type)1662 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1663                                  gimple_stmt_iterator *gsi,
1664                                  const oacc_collapse *counts, tree ivar,
1665                                  tree diff_type)
1666 {
1667   tree ivar_type = TREE_TYPE (ivar);
1668 
1669   /*  The most rapidly changing iteration variable is the innermost
1670       one.  */
1671   for (int ix = fd->collapse; ix--;)
1672     {
1673       const omp_for_data_loop *loop = &fd->loops[ix];
1674       const oacc_collapse *collapse = &counts[ix];
1675       tree v = inner ? loop->v : collapse->outer;
1676       tree iter_type = TREE_TYPE (v);
1677       tree plus_type = iter_type;
1678       enum tree_code plus_code = PLUS_EXPR;
1679       tree expr;
1680 
1681       if (POINTER_TYPE_P (iter_type))
1682           {
1683             plus_code = POINTER_PLUS_EXPR;
1684             plus_type = sizetype;
1685           }
1686 
1687       expr = ivar;
1688       if (ix)
1689           {
1690             tree mod = fold_convert (ivar_type, collapse->iters);
1691             ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1692             expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1693             ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1694                                                      true, GSI_SAME_STMT);
1695           }
1696 
1697       expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1698                                 fold_convert (diff_type, collapse->step));
1699       expr = fold_build2 (plus_code, iter_type,
1700                                 inner ? collapse->outer : collapse->base,
1701                                 fold_convert (plus_type, expr));
1702       expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1703                                                true, GSI_SAME_STMT);
1704       gassign *ass = gimple_build_assign (v, expr);
1705       gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1706     }
1707 }
1708 
1709 /* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
1710    of the combined collapse > 1 loop constructs, generate code like:
1711           if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1712           if (cond3 is <)
1713             adj = STEP3 - 1;
1714           else
1715             adj = STEP3 + 1;
1716           count3 = (adj + N32 - N31) / STEP3;
1717           if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1718           if (cond2 is <)
1719             adj = STEP2 - 1;
1720           else
1721             adj = STEP2 + 1;
1722           count2 = (adj + N22 - N21) / STEP2;
1723           if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1724           if (cond1 is <)
1725             adj = STEP1 - 1;
1726           else
1727             adj = STEP1 + 1;
1728           count1 = (adj + N12 - N11) / STEP1;
1729           count = count1 * count2 * count3;
1730    Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1731           count = 0;
1732    and set ZERO_ITER_BB to that bb.  If this isn't the outermost
1733    of the combined loop constructs, just initialize COUNTS array
1734    from the _looptemp_ clauses.  For loop nests with non-rectangular
1735    loops, do this only for the rectangular loops.  Then pick
1736    the loops which reference outer vars in their bound expressions
1737    and the loops which they refer to and for this sub-nest compute
1738    number of iterations.  For triangular loops use Faulhaber's formula,
1739    otherwise as a fallback, compute by iterating the loops.
1740    If e.g. the sub-nest is
1741           for (I = N11; I COND1 N12; I += STEP1)
1742           for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1743           for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1744    do:
1745           COUNT = 0;
1746           for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1747           for (tmpj = M21 * tmpi + N21;
1748                tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1749             {
1750               int tmpk1 = M31 * tmpj + N31;
1751               int tmpk2 = M32 * tmpj + N32;
1752               if (tmpk1 COND3 tmpk2)
1753                 {
1754                     if (COND3 is <)
1755                       adj = STEP3 - 1;
1756                     else
1757                       adj = STEP3 + 1;
1758                     COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1759                 }
1760             }
1761    and finally multiply the counts of the rectangular loops not
1762    in the sub-nest with COUNT.  Also, as counts[fd->last_nonrect]
1763    store number of iterations of the loops from fd->first_nonrect
1764    to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1765    by the counts of rectangular loops not referenced in any non-rectangular
1766    loops sandwitched in between those.  */
1767 
1768 /* NOTE: It *could* be better to moosh all of the BBs together,
1769    creating one larger BB with all the computation and the unexpected
1770    jump at the end.  I.e.
1771 
1772    bool zero3, zero2, zero1, zero;
1773 
1774    zero3 = N32 c3 N31;
1775    count3 = (N32 - N31) /[cl] STEP3;
1776    zero2 = N22 c2 N21;
1777    count2 = (N22 - N21) /[cl] STEP2;
1778    zero1 = N12 c1 N11;
1779    count1 = (N12 - N11) /[cl] STEP1;
1780    zero = zero3 || zero2 || zero1;
1781    count = count1 * count2 * count3;
1782    if (__builtin_expect(zero, false)) goto zero_iter_bb;
1783 
1784    After all, we expect the zero=false, and thus we expect to have to
1785    evaluate all of the comparison expressions, so short-circuiting
1786    oughtn't be a win.  Since the condition isn't protecting a
1787    denominator, we're not concerned about divide-by-zero, so we can
1788    fully evaluate count even if a numerator turned out to be wrong.
1789 
1790    It seems like putting this all together would create much better
1791    scheduling opportunities, and less pressure on the chip's branch
1792    predictor.  */
1793 
1794 static void
expand_omp_for_init_counts(struct omp_for_data * fd,gimple_stmt_iterator * gsi,basic_block & entry_bb,tree * counts,basic_block & zero_iter1_bb,int & first_zero_iter1,basic_block & zero_iter2_bb,int & first_zero_iter2,basic_block & l2_dom_bb)1795 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1796                                   basic_block &entry_bb, tree *counts,
1797                                   basic_block &zero_iter1_bb, int &first_zero_iter1,
1798                                   basic_block &zero_iter2_bb, int &first_zero_iter2,
1799                                   basic_block &l2_dom_bb)
1800 {
1801   tree t, type = TREE_TYPE (fd->loop.v);
1802   edge e, ne;
1803   int i;
1804 
1805   /* Collapsed loops need work for expansion into SSA form.  */
1806   gcc_assert (!gimple_in_ssa_p (cfun));
1807 
1808   if (gimple_omp_for_combined_into_p (fd->for_stmt)
1809       && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1810     {
1811       gcc_assert (fd->ordered == 0);
1812       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1813            isn't supposed to be handled, as the inner loop doesn't
1814            use it.  */
1815       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1816                                              OMP_CLAUSE__LOOPTEMP_);
1817       gcc_assert (innerc);
1818       for (i = 0; i < fd->collapse; i++)
1819           {
1820             innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1821                                             OMP_CLAUSE__LOOPTEMP_);
1822             gcc_assert (innerc);
1823             if (i)
1824               counts[i] = OMP_CLAUSE_DECL (innerc);
1825             else
1826               counts[0] = NULL_TREE;
1827           }
1828       if (fd->non_rect
1829             && fd->last_nonrect == fd->first_nonrect + 1
1830             && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1831           {
1832             tree c[4];
1833             for (i = 0; i < 4; i++)
1834               {
1835                 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1836                                                   OMP_CLAUSE__LOOPTEMP_);
1837                 gcc_assert (innerc);
1838                 c[i] = OMP_CLAUSE_DECL (innerc);
1839               }
1840             counts[0] = c[0];
1841             fd->first_inner_iterations = c[1];
1842             fd->factor = c[2];
1843             fd->adjn1 = c[3];
1844           }
1845       return;
1846     }
1847 
1848   for (i = fd->collapse; i < fd->ordered; i++)
1849     {
1850       tree itype = TREE_TYPE (fd->loops[i].v);
1851       counts[i] = NULL_TREE;
1852       t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1853                            fold_convert (itype, fd->loops[i].n1),
1854                            fold_convert (itype, fd->loops[i].n2));
1855       if (t && integer_zerop (t))
1856           {
1857             for (i = fd->collapse; i < fd->ordered; i++)
1858               counts[i] = build_int_cst (type, 0);
1859             break;
1860           }
1861     }
1862   bool rect_count_seen = false;
1863   for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1864     {
1865       tree itype = TREE_TYPE (fd->loops[i].v);
1866 
1867       if (i >= fd->collapse && counts[i])
1868           continue;
1869       if (fd->non_rect)
1870           {
1871             /* Skip loops that use outer iterators in their expressions
1872                during this phase.  */
1873             if (fd->loops[i].m1 || fd->loops[i].m2)
1874               {
1875                 counts[i] = build_zero_cst (type);
1876                 continue;
1877               }
1878           }
1879       if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1880             && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1881                                         fold_convert (itype, fd->loops[i].n1),
1882                                         fold_convert (itype, fd->loops[i].n2)))
1883                 == NULL_TREE || !integer_onep (t)))
1884           {
1885             gcond *cond_stmt;
1886             tree n1, n2;
1887             n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1888             n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1889                                                    true, GSI_SAME_STMT);
1890             n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1891             n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1892                                                    true, GSI_SAME_STMT);
1893             cond_stmt = expand_omp_build_cond (gsi, fd->loops[i].cond_code,
1894                                                        n1, n2);
1895             e = split_block (entry_bb, cond_stmt);
1896             basic_block &zero_iter_bb
1897               = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1898             int &first_zero_iter
1899               = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1900             if (zero_iter_bb == NULL)
1901               {
1902                 gassign *assign_stmt;
1903                 first_zero_iter = i;
1904                 zero_iter_bb = create_empty_bb (entry_bb);
1905                 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1906                 *gsi = gsi_after_labels (zero_iter_bb);
1907                 if (i < fd->collapse)
1908                     assign_stmt = gimple_build_assign (fd->loop.n2,
1909                                                                build_zero_cst (type));
1910                 else
1911                     {
1912                       counts[i] = create_tmp_reg (type, ".count");
1913                       assign_stmt
1914                         = gimple_build_assign (counts[i], build_zero_cst (type));
1915                     }
1916                 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1917                 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1918                                                entry_bb);
1919               }
1920             ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1921             ne->probability = profile_probability::very_unlikely ();
1922             e->flags = EDGE_TRUE_VALUE;
1923             e->probability = ne->probability.invert ();
1924             if (l2_dom_bb == NULL)
1925               l2_dom_bb = entry_bb;
1926             entry_bb = e->dest;
1927             *gsi = gsi_last_nondebug_bb (entry_bb);
1928           }
1929 
1930       if (POINTER_TYPE_P (itype))
1931           itype = signed_type_for (itype);
1932       t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1933                                          ? -1 : 1));
1934       t = fold_build2 (PLUS_EXPR, itype,
1935                            fold_convert (itype, fd->loops[i].step), t);
1936       t = fold_build2 (PLUS_EXPR, itype, t,
1937                            fold_convert (itype, fd->loops[i].n2));
1938       t = fold_build2 (MINUS_EXPR, itype, t,
1939                            fold_convert (itype, fd->loops[i].n1));
1940       /* ?? We could probably use CEIL_DIV_EXPR instead of
1941            TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
1942            generate the same code in the end because generically we
1943            don't know that the values involved must be negative for
1944            GT??  */
1945       if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1946           t = fold_build2 (TRUNC_DIV_EXPR, itype,
1947                                fold_build1 (NEGATE_EXPR, itype, t),
1948                                fold_build1 (NEGATE_EXPR, itype,
1949                                               fold_convert (itype,
1950                                                                 fd->loops[i].step)));
1951       else
1952           t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1953                                fold_convert (itype, fd->loops[i].step));
1954       t = fold_convert (type, t);
1955       if (TREE_CODE (t) == INTEGER_CST)
1956           counts[i] = t;
1957       else
1958           {
1959             if (i < fd->collapse || i != first_zero_iter2)
1960               counts[i] = create_tmp_reg (type, ".count");
1961             expand_omp_build_assign (gsi, counts[i], t);
1962           }
1963       if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1964           {
1965             if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1966               continue;
1967             if (!rect_count_seen)
1968               {
1969                 t = counts[i];
1970                 rect_count_seen = true;
1971               }
1972             else
1973               t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1974             expand_omp_build_assign (gsi, fd->loop.n2, t);
1975           }
1976     }
1977   if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1978     {
1979       gcc_assert (fd->last_nonrect != -1);
1980 
1981       counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1982       expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1983                                      build_zero_cst (type));
1984       for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1985           if (fd->loops[i].m1
1986               || fd->loops[i].m2
1987               || fd->loops[i].non_rect_referenced)
1988             break;
1989       if (i == fd->last_nonrect
1990             && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1991             && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
1992             && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1993           {
1994             int o = fd->first_nonrect;
1995             tree itype = TREE_TYPE (fd->loops[o].v);
1996             tree n1o = create_tmp_reg (itype, ".n1o");
1997             t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1998             expand_omp_build_assign (gsi, n1o, t);
1999             tree n2o = create_tmp_reg (itype, ".n2o");
2000             t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
2001             expand_omp_build_assign (gsi, n2o, t);
2002             if (fd->loops[i].m1 && fd->loops[i].m2)
2003               t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
2004                                    unshare_expr (fd->loops[i].m1));
2005             else if (fd->loops[i].m1)
2006               t = fold_build1 (NEGATE_EXPR, itype,
2007                                    unshare_expr (fd->loops[i].m1));
2008             else
2009               t = unshare_expr (fd->loops[i].m2);
2010             tree m2minusm1
2011               = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2012                                                   true, GSI_SAME_STMT);
2013 
2014             gimple_stmt_iterator gsi2 = *gsi;
2015             gsi_prev (&gsi2);
2016             e = split_block (entry_bb, gsi_stmt (gsi2));
2017             e = split_block (e->dest, (gimple *) NULL);
2018             basic_block bb1 = e->src;
2019             entry_bb = e->dest;
2020             *gsi = gsi_after_labels (entry_bb);
2021 
2022             gsi2 = gsi_after_labels (bb1);
2023             tree ostep = fold_convert (itype, fd->loops[o].step);
2024             t = build_int_cst (itype, (fd->loops[o].cond_code
2025                                              == LT_EXPR ? -1 : 1));
2026             t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2027             t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2028             t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2029             if (TYPE_UNSIGNED (itype)
2030                 && fd->loops[o].cond_code == GT_EXPR)
2031               t = fold_build2 (TRUNC_DIV_EXPR, itype,
2032                                    fold_build1 (NEGATE_EXPR, itype, t),
2033                                    fold_build1 (NEGATE_EXPR, itype, ostep));
2034             else
2035               t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2036             tree outer_niters
2037               = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2038                                                   true, GSI_SAME_STMT);
2039             t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2040                                  build_one_cst (itype));
2041             t = fold_build2 (MULT_EXPR, itype, t, ostep);
2042             t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2043             tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2044                                                             true, GSI_SAME_STMT);
2045             tree n1, n2, n1e, n2e;
2046             t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2047             if (fd->loops[i].m1)
2048               {
2049                 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2050                 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2051                 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2052               }
2053             else
2054               n1 = t;
2055             n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2056                                                    true, GSI_SAME_STMT);
2057             t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2058             if (fd->loops[i].m2)
2059               {
2060                 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2061                 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2062                 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2063               }
2064             else
2065               n2 = t;
2066             n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2067                                                    true, GSI_SAME_STMT);
2068             t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2069             if (fd->loops[i].m1)
2070               {
2071                 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2072                 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2073                 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2074               }
2075             else
2076               n1e = t;
2077             n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2078                                                     true, GSI_SAME_STMT);
2079             t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2080             if (fd->loops[i].m2)
2081               {
2082                 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2083                 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2084                 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2085               }
2086             else
2087               n2e = t;
2088             n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2089                                                     true, GSI_SAME_STMT);
2090             gcond *cond_stmt
2091               = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2092                                              n1, n2);
2093             e = split_block (bb1, cond_stmt);
2094             e->flags = EDGE_TRUE_VALUE;
2095             e->probability = profile_probability::likely ().guessed ();
2096             basic_block bb2 = e->dest;
2097             gsi2 = gsi_after_labels (bb2);
2098 
2099             cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2100                                                        n1e, n2e);
2101             e = split_block (bb2, cond_stmt);
2102             e->flags = EDGE_TRUE_VALUE;
2103             e->probability = profile_probability::likely ().guessed ();
2104             gsi2 = gsi_after_labels (e->dest);
2105 
2106             tree step = fold_convert (itype, fd->loops[i].step);
2107             t = build_int_cst (itype, (fd->loops[i].cond_code
2108                                              == LT_EXPR ? -1 : 1));
2109             t = fold_build2 (PLUS_EXPR, itype, step, t);
2110             t = fold_build2 (PLUS_EXPR, itype, t, n2);
2111             t = fold_build2 (MINUS_EXPR, itype, t, n1);
2112             if (TYPE_UNSIGNED (itype)
2113                 && fd->loops[i].cond_code == GT_EXPR)
2114               t = fold_build2 (TRUNC_DIV_EXPR, itype,
2115                                    fold_build1 (NEGATE_EXPR, itype, t),
2116                                    fold_build1 (NEGATE_EXPR, itype, step));
2117             else
2118               t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2119             tree first_inner_iterations
2120               = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2121                                                   true, GSI_SAME_STMT);
2122             t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2123             if (TYPE_UNSIGNED (itype)
2124                 && fd->loops[i].cond_code == GT_EXPR)
2125               t = fold_build2 (TRUNC_DIV_EXPR, itype,
2126                                    fold_build1 (NEGATE_EXPR, itype, t),
2127                                    fold_build1 (NEGATE_EXPR, itype, step));
2128             else
2129               t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2130             tree factor
2131               = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2132                                                   true, GSI_SAME_STMT);
2133             t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2134                                  build_one_cst (itype));
2135             t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2136             t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2137             t = fold_build2 (MULT_EXPR, itype, factor, t);
2138             t = fold_build2 (PLUS_EXPR, itype,
2139                                  fold_build2 (MULT_EXPR, itype, outer_niters,
2140                                                   first_inner_iterations), t);
2141             expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2142                                            fold_convert (type, t));
2143 
2144             basic_block bb3 = create_empty_bb (bb1);
2145             add_bb_to_loop (bb3, bb1->loop_father);
2146 
2147             e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2148             e->probability = profile_probability::unlikely ().guessed ();
2149 
2150             gsi2 = gsi_after_labels (bb3);
2151             cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2152                                                        n1e, n2e);
2153             e = split_block (bb3, cond_stmt);
2154             e->flags = EDGE_TRUE_VALUE;
2155             e->probability = profile_probability::likely ().guessed ();
2156             basic_block bb4 = e->dest;
2157 
2158             ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2159             ne->probability = e->probability.invert ();
2160 
2161             basic_block bb5 = create_empty_bb (bb2);
2162             add_bb_to_loop (bb5, bb2->loop_father);
2163 
2164             ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2165             ne->probability = profile_probability::unlikely ().guessed ();
2166 
2167             for (int j = 0; j < 2; j++)
2168               {
2169                 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2170                 t = fold_build2 (MINUS_EXPR, itype,
2171                                      unshare_expr (fd->loops[i].n1),
2172                                      unshare_expr (fd->loops[i].n2));
2173                 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2174                 tree tem
2175                     = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2176                                                       true, GSI_SAME_STMT);
2177                 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2178                 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2179                 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2180                 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2181                                                         true, GSI_SAME_STMT);
2182                 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2183                 if (fd->loops[i].m1)
2184                     {
2185                       n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2186                       n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2187                       n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2188                     }
2189                 else
2190                     n1 = t;
2191                 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2192                                                        true, GSI_SAME_STMT);
2193                 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2194                 if (fd->loops[i].m2)
2195                     {
2196                       n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2197                       n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2198                       n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2199                     }
2200                 else
2201                     n2 = t;
2202                 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2203                                                        true, GSI_SAME_STMT);
2204                 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2205 
2206                 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2207                                                              n1, n2);
2208                 e = split_block (gsi_bb (gsi2), cond_stmt);
2209                 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2210                 e->probability = profile_probability::unlikely ().guessed ();
2211                 ne = make_edge (e->src, bb1,
2212                                     j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2213                 ne->probability = e->probability.invert ();
2214                 gsi2 = gsi_after_labels (e->dest);
2215 
2216                 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2217                 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2218 
2219                 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2220               }
2221 
2222             set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2223             set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2224             set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2225 
2226             if (fd->first_nonrect + 1 == fd->last_nonrect)
2227               {
2228                 fd->first_inner_iterations = first_inner_iterations;
2229                 fd->factor = factor;
2230                 fd->adjn1 = n1o;
2231               }
2232           }
2233       else
2234           {
2235             /* Fallback implementation.  Evaluate the loops with m1/m2
2236                non-NULL as well as their outer loops at runtime using temporaries
2237                instead of the original iteration variables, and in the
2238                body just bump the counter.  */
2239             gimple_stmt_iterator gsi2 = *gsi;
2240             gsi_prev (&gsi2);
2241             e = split_block (entry_bb, gsi_stmt (gsi2));
2242             e = split_block (e->dest, (gimple *) NULL);
2243             basic_block cur_bb = e->src;
2244             basic_block next_bb = e->dest;
2245             entry_bb = e->dest;
2246             *gsi = gsi_after_labels (entry_bb);
2247 
2248             tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2249             memset (vs, 0, fd->last_nonrect * sizeof (tree));
2250 
2251             for (i = 0; i <= fd->last_nonrect; i++)
2252               {
2253                 if (fd->loops[i].m1 == NULL_TREE
2254                       && fd->loops[i].m2 == NULL_TREE
2255                       && !fd->loops[i].non_rect_referenced)
2256                     continue;
2257 
2258                 tree itype = TREE_TYPE (fd->loops[i].v);
2259 
2260                 gsi2 = gsi_after_labels (cur_bb);
2261                 tree n1, n2;
2262                 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2263                 if (fd->loops[i].m1 == NULL_TREE)
2264                     n1 = t;
2265                 else if (POINTER_TYPE_P (itype))
2266                     {
2267                       gcc_assert (integer_onep (fd->loops[i].m1));
2268                       t = fold_convert (sizetype,
2269                                             unshare_expr (fd->loops[i].n1));
2270                       n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2271                     }
2272                 else
2273                     {
2274                       n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2275                       n1 = fold_build2 (MULT_EXPR, itype,
2276                                             vs[i - fd->loops[i].outer], n1);
2277                       n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2278                     }
2279                 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2280                                                        true, GSI_SAME_STMT);
2281                 if (i < fd->last_nonrect)
2282                     {
2283                       vs[i] = create_tmp_reg (itype, ".it");
2284                       expand_omp_build_assign (&gsi2, vs[i], n1);
2285                     }
2286                 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2287                 if (fd->loops[i].m2 == NULL_TREE)
2288                     n2 = t;
2289                 else if (POINTER_TYPE_P (itype))
2290                     {
2291                       gcc_assert (integer_onep (fd->loops[i].m2));
2292                       t = fold_convert (sizetype,
2293                                             unshare_expr (fd->loops[i].n2));
2294                       n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2295                     }
2296                 else
2297                     {
2298                       n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2299                       n2 = fold_build2 (MULT_EXPR, itype,
2300                                             vs[i - fd->loops[i].outer], n2);
2301                       n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2302                     }
2303                 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2304                                                        true, GSI_SAME_STMT);
2305                 if (POINTER_TYPE_P (itype))
2306                     itype = signed_type_for (itype);
2307                 if (i == fd->last_nonrect)
2308                     {
2309                       gcond *cond_stmt
2310                         = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2311                                                        n1, n2);
2312                       e = split_block (cur_bb, cond_stmt);
2313                       e->flags = EDGE_TRUE_VALUE;
2314                       ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2315                       e->probability = profile_probability::likely ().guessed ();
2316                       ne->probability = e->probability.invert ();
2317                       gsi2 = gsi_after_labels (e->dest);
2318 
2319                       t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2320                                                        ? -1 : 1));
2321                       t = fold_build2 (PLUS_EXPR, itype,
2322                                            fold_convert (itype, fd->loops[i].step), t);
2323                       t = fold_build2 (PLUS_EXPR, itype, t,
2324                                            fold_convert (itype, n2));
2325                       t = fold_build2 (MINUS_EXPR, itype, t,
2326                                            fold_convert (itype, n1));
2327                       tree step = fold_convert (itype, fd->loops[i].step);
2328                       if (TYPE_UNSIGNED (itype)
2329                           && fd->loops[i].cond_code == GT_EXPR)
2330                         t = fold_build2 (TRUNC_DIV_EXPR, itype,
2331                                              fold_build1 (NEGATE_EXPR, itype, t),
2332                                              fold_build1 (NEGATE_EXPR, itype, step));
2333                       else
2334                         t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2335                       t = fold_convert (type, t);
2336                       t = fold_build2 (PLUS_EXPR, type,
2337                                            counts[fd->last_nonrect], t);
2338                       t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2339                                                             true, GSI_SAME_STMT);
2340                       expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2341                       e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2342                       set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2343                       break;
2344                     }
2345                 e = split_block (cur_bb, last_stmt (cur_bb));
2346 
2347                 basic_block new_cur_bb = create_empty_bb (cur_bb);
2348                 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2349 
2350                 gsi2 = gsi_after_labels (e->dest);
2351                 tree step = fold_convert (itype,
2352                                                   unshare_expr (fd->loops[i].step));
2353                 if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
2354                     t = fold_build_pointer_plus (vs[i],
2355                                                        fold_convert (sizetype, step));
2356                 else
2357                     t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2358                 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2359                                                       true, GSI_SAME_STMT);
2360                 expand_omp_build_assign (&gsi2, vs[i], t);
2361 
2362                 ne = split_block (e->dest, last_stmt (e->dest));
2363                 gsi2 = gsi_after_labels (ne->dest);
2364 
2365                 expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, vs[i], n2);
2366                 edge e3, e4;
2367                 if (next_bb == entry_bb)
2368                     {
2369                       e3 = find_edge (ne->dest, next_bb);
2370                       e3->flags = EDGE_FALSE_VALUE;
2371                     }
2372                 else
2373                     e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2374                 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2375                 e4->probability = profile_probability::likely ().guessed ();
2376                 e3->probability = e4->probability.invert ();
2377                 basic_block esrc = e->src;
2378                 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2379                 cur_bb = new_cur_bb;
2380                 basic_block latch_bb = next_bb;
2381                 next_bb = e->dest;
2382                 remove_edge (e);
2383                 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2384                 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2385                 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2386               }
2387           }
2388       t = NULL_TREE;
2389       for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2390           if (!fd->loops[i].non_rect_referenced
2391               && fd->loops[i].m1 == NULL_TREE
2392               && fd->loops[i].m2 == NULL_TREE)
2393             {
2394               if (t == NULL_TREE)
2395                 t = counts[i];
2396               else
2397                 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2398             }
2399       if (t)
2400           {
2401             t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2402             expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2403           }
2404       if (!rect_count_seen)
2405           t = counts[fd->last_nonrect];
2406       else
2407           t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2408                                counts[fd->last_nonrect]);
2409       expand_omp_build_assign (gsi, fd->loop.n2, t);
2410     }
2411   else if (fd->non_rect)
2412     {
2413       tree t = fd->loop.n2;
2414       gcc_assert (TREE_CODE (t) == INTEGER_CST);
2415       int non_rect_referenced = 0, non_rect = 0;
2416       for (i = 0; i < fd->collapse; i++)
2417           {
2418             if ((i < fd->first_nonrect || i > fd->last_nonrect)
2419                 && !integer_zerop (counts[i]))
2420               t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2421             if (fd->loops[i].non_rect_referenced)
2422               non_rect_referenced++;
2423             if (fd->loops[i].m1 || fd->loops[i].m2)
2424               non_rect++;
2425           }
2426       gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2427       counts[fd->last_nonrect] = t;
2428     }
2429 }
2430 
2431 /* Helper function for expand_omp_{for_*,simd}.  Generate code like:
2432           T = V;
2433           V3 = N31 + (T % count3) * STEP3;
2434           T = T / count3;
2435           V2 = N21 + (T % count2) * STEP2;
2436           T = T / count2;
2437           V1 = N11 + T * STEP1;
2438    if this loop doesn't have an inner loop construct combined with it.
2439    If it does have an inner loop construct combined with it and the
2440    iteration count isn't known constant, store values from counts array
2441    into its _looptemp_ temporaries instead.
2442    For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2443    inclusive), use the count of all those loops together, and either
2444    find quadratic etc. equation roots, or as a fallback, do:
2445           COUNT = 0;
2446           for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2447           for (tmpj = M21 * tmpi + N21;
2448                tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2449             {
2450               int tmpk1 = M31 * tmpj + N31;
2451               int tmpk2 = M32 * tmpj + N32;
2452               if (tmpk1 COND3 tmpk2)
2453                 {
2454                     if (COND3 is <)
2455                       adj = STEP3 - 1;
2456                     else
2457                       adj = STEP3 + 1;
2458                     int temp = (adj + tmpk2 - tmpk1) / STEP3;
2459                     if (COUNT + temp > T)
2460                       {
2461                         V1 = tmpi;
2462                         V2 = tmpj;
2463                         V3 = tmpk1 + (T - COUNT) * STEP3;
2464                         goto done;
2465                       }
2466                     else
2467                       COUNT += temp;
2468                 }
2469             }
2470           done:;
2471    but for optional innermost or outermost rectangular loops that aren't
2472    referenced by other loop expressions keep doing the division/modulo.  */
2473 
2474 static void
expand_omp_for_init_vars(struct omp_for_data * fd,gimple_stmt_iterator * gsi,tree * counts,tree * nonrect_bounds,gimple * inner_stmt,tree startvar)2475 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2476                                 tree *counts, tree *nonrect_bounds,
2477                                 gimple *inner_stmt, tree startvar)
2478 {
2479   int i;
2480   if (gimple_omp_for_combined_p (fd->for_stmt))
2481     {
2482       /* If fd->loop.n2 is constant, then no propagation of the counts
2483            is needed, they are constant.  */
2484       if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2485           return;
2486 
2487       tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2488                          ? gimple_omp_taskreg_clauses (inner_stmt)
2489                          : gimple_omp_for_clauses (inner_stmt);
2490       /* First two _looptemp_ clauses are for istart/iend, counts[0]
2491            isn't supposed to be handled, as the inner loop doesn't
2492            use it.  */
2493       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2494       gcc_assert (innerc);
2495       int count = 0;
2496       if (fd->non_rect
2497             && fd->last_nonrect == fd->first_nonrect + 1
2498             && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2499           count = 4;
2500       for (i = 0; i < fd->collapse + count; i++)
2501           {
2502             innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2503                                             OMP_CLAUSE__LOOPTEMP_);
2504             gcc_assert (innerc);
2505             if (i)
2506               {
2507                 tree tem = OMP_CLAUSE_DECL (innerc);
2508                 tree t;
2509                 if (i < fd->collapse)
2510                     t = counts[i];
2511                 else
2512                     switch (i - fd->collapse)
2513                       {
2514                       case 0: t = counts[0]; break;
2515                       case 1: t = fd->first_inner_iterations; break;
2516                       case 2: t = fd->factor; break;
2517                       case 3: t = fd->adjn1; break;
2518                       default: gcc_unreachable ();
2519                       }
2520                 t = fold_convert (TREE_TYPE (tem), t);
2521                 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2522                                                       false, GSI_CONTINUE_LINKING);
2523                 gassign *stmt = gimple_build_assign (tem, t);
2524                 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2525               }
2526           }
2527       return;
2528     }
2529 
2530   tree type = TREE_TYPE (fd->loop.v);
2531   tree tem = create_tmp_reg (type, ".tem");
2532   gassign *stmt = gimple_build_assign (tem, startvar);
2533   gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2534 
2535   for (i = fd->collapse - 1; i >= 0; i--)
2536     {
2537       tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2538       itype = vtype;
2539       if (POINTER_TYPE_P (vtype))
2540           itype = signed_type_for (vtype);
2541       if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2542           t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2543       else
2544           t = tem;
2545       if (i == fd->last_nonrect)
2546           {
2547             t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2548                                                   false, GSI_CONTINUE_LINKING);
2549             tree stopval = t;
2550             tree idx = create_tmp_reg (type, ".count");
2551             expand_omp_build_assign (gsi, idx,
2552                                            build_zero_cst (type), true);
2553             basic_block bb_triang = NULL, bb_triang_dom = NULL;
2554             if (fd->first_nonrect + 1 == fd->last_nonrect
2555                 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2556                       || fd->first_inner_iterations)
2557                 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2558                       != CODE_FOR_nothing)
2559                 && !integer_zerop (fd->loop.n2))
2560               {
2561                 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2562                 tree itype = TREE_TYPE (fd->loops[i].v);
2563                 tree first_inner_iterations = fd->first_inner_iterations;
2564                 tree factor = fd->factor;
2565                 gcond *cond_stmt
2566                     = expand_omp_build_cond (gsi, NE_EXPR, factor,
2567                                                    build_zero_cst (TREE_TYPE (factor)),
2568                                                    true);
2569                 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2570                 basic_block bb0 = e->src;
2571                 e->flags = EDGE_TRUE_VALUE;
2572                 e->probability = profile_probability::likely ();
2573                 bb_triang_dom = bb0;
2574                 *gsi = gsi_after_labels (e->dest);
2575                 tree slltype = long_long_integer_type_node;
2576                 tree ulltype = long_long_unsigned_type_node;
2577                 tree stopvalull = fold_convert (ulltype, stopval);
2578                 stopvalull
2579                     = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2580                                                       false, GSI_CONTINUE_LINKING);
2581                 first_inner_iterations
2582                     = fold_convert (slltype, first_inner_iterations);
2583                 first_inner_iterations
2584                     = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2585                                                       NULL_TREE, false,
2586                                                       GSI_CONTINUE_LINKING);
2587                 factor = fold_convert (slltype, factor);
2588                 factor
2589                     = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2590                                                       false, GSI_CONTINUE_LINKING);
2591                 tree first_inner_iterationsd
2592                     = fold_build1 (FLOAT_EXPR, double_type_node,
2593                                      first_inner_iterations);
2594                 first_inner_iterationsd
2595                     = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2596                                                       NULL_TREE, false,
2597                                                       GSI_CONTINUE_LINKING);
2598                 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2599                                                     factor);
2600                 factord = force_gimple_operand_gsi (gsi, factord, true,
2601                                                               NULL_TREE, false,
2602                                                               GSI_CONTINUE_LINKING);
2603                 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2604                                                      stopvalull);
2605                 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2606                                                                NULL_TREE, false,
2607                                                                GSI_CONTINUE_LINKING);
2608                 /* Temporarily disable flag_rounding_math, values will be
2609                      decimal numbers divided by 2 and worst case imprecisions
2610                      due to too large values ought to be caught later by the
2611                      checks for fallback.  */
2612                 int save_flag_rounding_math = flag_rounding_math;
2613                 flag_rounding_math = 0;
2614                 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2615                                      build_real (double_type_node, dconst2));
2616                 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2617                                              first_inner_iterationsd, t);
2618                 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2619                                                        GSI_CONTINUE_LINKING);
2620                 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2621                                      build_real (double_type_node, dconst2));
2622                 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2623                 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2624                                      fold_build2 (MULT_EXPR, double_type_node,
2625                                                       t3, t3));
2626                 flag_rounding_math = save_flag_rounding_math;
2627                 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2628                                                       GSI_CONTINUE_LINKING);
2629                 if (flag_exceptions
2630                       && cfun->can_throw_non_call_exceptions
2631                       && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2632                     {
2633                       tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2634                                                     build_zero_cst (double_type_node));
2635                       tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2636                                                               false, GSI_CONTINUE_LINKING);
2637                       cond_stmt = gimple_build_cond (NE_EXPR, tem,
2638                                                              boolean_false_node,
2639                                                              NULL_TREE, NULL_TREE);
2640                     }
2641                 else
2642                     cond_stmt
2643                       = gimple_build_cond (LT_EXPR, t,
2644                                                build_zero_cst (double_type_node),
2645                                                NULL_TREE, NULL_TREE);
2646                 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2647                 e = split_block (gsi_bb (*gsi), cond_stmt);
2648                 basic_block bb1 = e->src;
2649                 e->flags = EDGE_FALSE_VALUE;
2650                 e->probability = profile_probability::very_likely ();
2651                 *gsi = gsi_after_labels (e->dest);
2652                 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2653                 tree sqrtr = create_tmp_var (double_type_node);
2654                 gimple_call_set_lhs (call, sqrtr);
2655                 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2656                 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2657                 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2658                 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2659                 tree c = create_tmp_var (ulltype);
2660                 tree d = create_tmp_var (ulltype);
2661                 expand_omp_build_assign (gsi, c, t, true);
2662                 t = fold_build2 (MINUS_EXPR, ulltype, c,
2663                                      build_one_cst (ulltype));
2664                 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2665                 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2666                 t = fold_build2 (MULT_EXPR, ulltype,
2667                                      fold_convert (ulltype, fd->factor), t);
2668                 tree t2
2669                     = fold_build2 (MULT_EXPR, ulltype, c,
2670                                      fold_convert (ulltype,
2671                                                        fd->first_inner_iterations));
2672                 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2673                 expand_omp_build_assign (gsi, d, t, true);
2674                 t = fold_build2 (MULT_EXPR, ulltype,
2675                                      fold_convert (ulltype, fd->factor), c);
2676                 t = fold_build2 (PLUS_EXPR, ulltype,
2677                                      t, fold_convert (ulltype,
2678                                                             fd->first_inner_iterations));
2679                 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2680                                                        GSI_CONTINUE_LINKING);
2681                 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2682                                                        NULL_TREE, NULL_TREE);
2683                 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2684                 e = split_block (gsi_bb (*gsi), cond_stmt);
2685                 basic_block bb2 = e->src;
2686                 e->flags = EDGE_TRUE_VALUE;
2687                 e->probability = profile_probability::very_likely ();
2688                 *gsi = gsi_after_labels (e->dest);
2689                 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2690                 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2691                                                       GSI_CONTINUE_LINKING);
2692                 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2693                                                        NULL_TREE, NULL_TREE);
2694                 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2695                 e = split_block (gsi_bb (*gsi), cond_stmt);
2696                 basic_block bb3 = e->src;
2697                 e->flags = EDGE_FALSE_VALUE;
2698                 e->probability = profile_probability::very_likely ();
2699                 *gsi = gsi_after_labels (e->dest);
2700                 t = fold_convert (itype, c);
2701                 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2702                 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2703                 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2704                                                       GSI_CONTINUE_LINKING);
2705                 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2706                 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2707                 t2 = fold_convert (itype, t2);
2708                 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2709                 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2710                 if (fd->loops[i].m1)
2711                     {
2712                       t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2713                       t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2714                     }
2715                 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2716                 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2717                 bb_triang = e->src;
2718                 *gsi = gsi_after_labels (e->dest);
2719                 remove_edge (e);
2720                 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2721                 e->probability = profile_probability::very_unlikely ();
2722                 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2723                 e->probability = profile_probability::very_unlikely ();
2724                 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2725                 e->probability = profile_probability::very_unlikely ();
2726 
2727                 basic_block bb4 = create_empty_bb (bb0);
2728                 add_bb_to_loop (bb4, bb0->loop_father);
2729                 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2730                 e->probability = profile_probability::unlikely ();
2731                 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2732                 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2733                 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2734                 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2735                 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2736                                         counts[i], counts[i - 1]);
2737                 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2738                                                        GSI_CONTINUE_LINKING);
2739                 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2740                 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2741                 t = fold_convert (itype, t);
2742                 t2 = fold_convert (itype, t2);
2743                 t = fold_build2 (MULT_EXPR, itype, t,
2744                                      fold_convert (itype, fd->loops[i].step));
2745                 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2746                 t2 = fold_build2 (MULT_EXPR, itype, t2,
2747                                         fold_convert (itype, fd->loops[i - 1].step));
2748                 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2749                 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2750                                                        false, GSI_CONTINUE_LINKING);
2751                 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2752                 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2753                 if (fd->loops[i].m1)
2754                     {
2755                       t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2756                                             fd->loops[i - 1].v);
2757                       t = fold_build2 (PLUS_EXPR, itype, t, t2);
2758                     }
2759                 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2760                                                       false, GSI_CONTINUE_LINKING);
2761                 stmt = gimple_build_assign (fd->loops[i].v, t);
2762                 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2763               }
2764             /* Fallback implementation.  Evaluate the loops in between
2765                (inclusive) fd->first_nonrect and fd->last_nonrect at
2766                runtime unsing temporaries instead of the original iteration
2767                variables, in the body just bump the counter and compare
2768                with the desired value.  */
2769             gimple_stmt_iterator gsi2 = *gsi;
2770             basic_block entry_bb = gsi_bb (gsi2);
2771             edge e = split_block (entry_bb, gsi_stmt (gsi2));
2772             e = split_block (e->dest, (gimple *) NULL);
2773             basic_block dom_bb = NULL;
2774             basic_block cur_bb = e->src;
2775             basic_block next_bb = e->dest;
2776             entry_bb = e->dest;
2777             *gsi = gsi_after_labels (entry_bb);
2778 
2779             tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2780             tree n1 = NULL_TREE, n2 = NULL_TREE;
2781             memset (vs, 0, fd->last_nonrect * sizeof (tree));
2782 
2783             for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2784               {
2785                 tree itype = TREE_TYPE (fd->loops[j].v);
2786                 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2787                                    && fd->loops[j].m2 == NULL_TREE
2788                                    && !fd->loops[j].non_rect_referenced);
2789                 gsi2 = gsi_after_labels (cur_bb);
2790                 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2791                 if (fd->loops[j].m1 == NULL_TREE)
2792                     n1 = rect_p ? build_zero_cst (type) : t;
2793                 else if (POINTER_TYPE_P (itype))
2794                     {
2795                       gcc_assert (integer_onep (fd->loops[j].m1));
2796                       t = fold_convert (sizetype,
2797                                             unshare_expr (fd->loops[j].n1));
2798                       n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2799                     }
2800                 else
2801                     {
2802                       n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2803                       n1 = fold_build2 (MULT_EXPR, itype,
2804                                             vs[j - fd->loops[j].outer], n1);
2805                       n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2806                     }
2807                 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2808                                                        true, GSI_SAME_STMT);
2809                 if (j < fd->last_nonrect)
2810                     {
2811                       vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2812                       expand_omp_build_assign (&gsi2, vs[j], n1);
2813                     }
2814                 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2815                 if (fd->loops[j].m2 == NULL_TREE)
2816                     n2 = rect_p ? counts[j] : t;
2817                 else if (POINTER_TYPE_P (itype))
2818                     {
2819                       gcc_assert (integer_onep (fd->loops[j].m2));
2820                       t = fold_convert (sizetype,
2821                                             unshare_expr (fd->loops[j].n2));
2822                       n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2823                     }
2824                 else
2825                     {
2826                       n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2827                       n2 = fold_build2 (MULT_EXPR, itype,
2828                                             vs[j - fd->loops[j].outer], n2);
2829                       n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2830                     }
2831                 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2832                                                        true, GSI_SAME_STMT);
2833                 if (POINTER_TYPE_P (itype))
2834                     itype = signed_type_for (itype);
2835                 if (j == fd->last_nonrect)
2836                     {
2837                       gcond *cond_stmt
2838                         = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2839                                                        n1, n2);
2840                       e = split_block (cur_bb, cond_stmt);
2841                       e->flags = EDGE_TRUE_VALUE;
2842                       edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2843                       e->probability = profile_probability::likely ().guessed ();
2844                       ne->probability = e->probability.invert ();
2845                       gsi2 = gsi_after_labels (e->dest);
2846 
2847                       t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2848                                                        ? -1 : 1));
2849                       t = fold_build2 (PLUS_EXPR, itype,
2850                                            fold_convert (itype, fd->loops[j].step), t);
2851                       t = fold_build2 (PLUS_EXPR, itype, t,
2852                                            fold_convert (itype, n2));
2853                       t = fold_build2 (MINUS_EXPR, itype, t,
2854                                            fold_convert (itype, n1));
2855                       tree step = fold_convert (itype, fd->loops[j].step);
2856                       if (TYPE_UNSIGNED (itype)
2857                           && fd->loops[j].cond_code == GT_EXPR)
2858                         t = fold_build2 (TRUNC_DIV_EXPR, itype,
2859                                              fold_build1 (NEGATE_EXPR, itype, t),
2860                                              fold_build1 (NEGATE_EXPR, itype, step));
2861                       else
2862                         t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2863                       t = fold_convert (type, t);
2864                       t = fold_build2 (PLUS_EXPR, type, idx, t);
2865                       t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2866                                                             true, GSI_SAME_STMT);
2867                       e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2868                       set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2869                       cond_stmt
2870                         = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2871                                                    NULL_TREE);
2872                       gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2873                       e = split_block (gsi_bb (gsi2), cond_stmt);
2874                       e->flags = EDGE_TRUE_VALUE;
2875                       e->probability = profile_probability::likely ().guessed ();
2876                       ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2877                       ne->probability = e->probability.invert ();
2878                       gsi2 = gsi_after_labels (e->dest);
2879                       expand_omp_build_assign (&gsi2, idx, t);
2880                       set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2881                       break;
2882                     }
2883                 e = split_block (cur_bb, last_stmt (cur_bb));
2884 
2885                 basic_block new_cur_bb = create_empty_bb (cur_bb);
2886                 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2887 
2888                 gsi2 = gsi_after_labels (e->dest);
2889                 if (rect_p)
2890                     t = fold_build2 (PLUS_EXPR, type, vs[j],
2891                                          build_one_cst (type));
2892                 else
2893                     {
2894                       tree step
2895                         = fold_convert (itype, unshare_expr (fd->loops[j].step));
2896                       if (POINTER_TYPE_P (vtype))
2897                         t = fold_build_pointer_plus (vs[j], fold_convert (sizetype,
2898                                                                                       step));
2899                       else
2900                         t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2901                     }
2902                 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2903                                                       true, GSI_SAME_STMT);
2904                 expand_omp_build_assign (&gsi2, vs[j], t);
2905 
2906                 edge ne = split_block (e->dest, last_stmt (e->dest));
2907                 gsi2 = gsi_after_labels (ne->dest);
2908 
2909                 gcond *cond_stmt;
2910                 if (next_bb == entry_bb)
2911                     /* No need to actually check the outermost condition.  */
2912                     cond_stmt
2913                       = gimple_build_cond (EQ_EXPR, boolean_true_node,
2914                                                boolean_true_node,
2915                                                NULL_TREE, NULL_TREE);
2916                 else
2917                     cond_stmt
2918                       = gimple_build_cond (rect_p ? LT_EXPR
2919                                                         : fd->loops[j].cond_code,
2920                                                vs[j], n2, NULL_TREE, NULL_TREE);
2921                 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2922                 edge e3, e4;
2923                 if (next_bb == entry_bb)
2924                     {
2925                       e3 = find_edge (ne->dest, next_bb);
2926                       e3->flags = EDGE_FALSE_VALUE;
2927                       dom_bb = ne->dest;
2928                     }
2929                 else
2930                     e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2931                 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2932                 e4->probability = profile_probability::likely ().guessed ();
2933                 e3->probability = e4->probability.invert ();
2934                 basic_block esrc = e->src;
2935                 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2936                 cur_bb = new_cur_bb;
2937                 basic_block latch_bb = next_bb;
2938                 next_bb = e->dest;
2939                 remove_edge (e);
2940                 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2941                 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2942                 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2943               }
2944             for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2945               {
2946                 tree vtype = TREE_TYPE (fd->loops[j].v);
2947                 tree itype = vtype;
2948                 if (POINTER_TYPE_P (itype))
2949                     itype = signed_type_for (itype);
2950                 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2951                                    && fd->loops[j].m2 == NULL_TREE
2952                                    && !fd->loops[j].non_rect_referenced);
2953                 if (j == fd->last_nonrect)
2954                     {
2955                       t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2956                       t = fold_convert (itype, t);
2957                       tree t2
2958                         = fold_convert (itype, unshare_expr (fd->loops[j].step));
2959                       t = fold_build2 (MULT_EXPR, itype, t, t2);
2960                       if (POINTER_TYPE_P (vtype))
2961                         t = fold_build_pointer_plus (n1,
2962                                                              fold_convert (sizetype, t));
2963                       else
2964                         t = fold_build2 (PLUS_EXPR, itype, n1, t);
2965                     }
2966                 else if (rect_p)
2967                     {
2968                       t = fold_convert (itype, vs[j]);
2969                       t = fold_build2 (MULT_EXPR, itype, t,
2970                                            fold_convert (itype, fd->loops[j].step));
2971                       if (POINTER_TYPE_P (vtype))
2972                         t = fold_build_pointer_plus (fd->loops[j].n1,
2973                                                              fold_convert (sizetype, t));
2974                       else
2975                         t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2976                     }
2977                 else
2978                     t = vs[j];
2979                 t = force_gimple_operand_gsi (gsi, t, false,
2980                                                       NULL_TREE, true,
2981                                                       GSI_SAME_STMT);
2982                 stmt = gimple_build_assign (fd->loops[j].v, t);
2983                 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2984               }
2985             if (gsi_end_p (*gsi))
2986               *gsi = gsi_last_bb (gsi_bb (*gsi));
2987             else
2988               gsi_prev (gsi);
2989             if (bb_triang)
2990               {
2991                 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2992                 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2993                 *gsi = gsi_after_labels (e->dest);
2994                 if (!gsi_end_p (*gsi))
2995                     gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2996                 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2997               }
2998           }
2999       else
3000           {
3001             t = fold_convert (itype, t);
3002             t = fold_build2 (MULT_EXPR, itype, t,
3003                                  fold_convert (itype, fd->loops[i].step));
3004             if (POINTER_TYPE_P (vtype))
3005               t = fold_build_pointer_plus (fd->loops[i].n1, t);
3006             else
3007               t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
3008             t = force_gimple_operand_gsi (gsi, t,
3009                                                   DECL_P (fd->loops[i].v)
3010                                                   && TREE_ADDRESSABLE (fd->loops[i].v),
3011                                                   NULL_TREE, false,
3012                                                   GSI_CONTINUE_LINKING);
3013             stmt = gimple_build_assign (fd->loops[i].v, t);
3014             gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3015           }
3016       if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
3017           {
3018             t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
3019             t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
3020                                                   false, GSI_CONTINUE_LINKING);
3021             stmt = gimple_build_assign (tem, t);
3022             gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3023           }
3024       if (i == fd->last_nonrect)
3025           i = fd->first_nonrect;
3026     }
3027   if (fd->non_rect)
3028     for (i = 0; i <= fd->last_nonrect; i++)
3029       if (fd->loops[i].m2)
3030           {
3031             tree itype = TREE_TYPE (fd->loops[i].v);
3032 
3033             tree t;
3034             if (POINTER_TYPE_P (itype))
3035               {
3036                 gcc_assert (integer_onep (fd->loops[i].m2));
3037                 t = fold_convert (sizetype, unshare_expr (fd->loops[i].n2));
3038                 t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v,
3039                                                      t);
3040               }
3041             else
3042               {
3043                 t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
3044                 t = fold_build2 (MULT_EXPR, itype,
3045                                      fd->loops[i - fd->loops[i].outer].v, t);
3046                 t = fold_build2 (PLUS_EXPR, itype, t,
3047                                      fold_convert (itype,
3048                                                        unshare_expr (fd->loops[i].n2)));
3049               }
3050             nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
3051             t = force_gimple_operand_gsi (gsi, t, false,
3052                                                   NULL_TREE, false,
3053                                                   GSI_CONTINUE_LINKING);
3054             stmt = gimple_build_assign (nonrect_bounds[i], t);
3055             gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3056           }
3057 }
3058 
3059 /* Helper function for expand_omp_for_*.  Generate code like:
3060     L10:
3061           V3 += STEP3;
3062           if (V3 cond3 N32) goto BODY_BB; else goto L11;
3063     L11:
3064           V3 = N31;
3065           V2 += STEP2;
3066           if (V2 cond2 N22) goto BODY_BB; else goto L12;
3067     L12:
3068           V2 = N21;
3069           V1 += STEP1;
3070           goto BODY_BB;
3071    For non-rectangular loops, use temporaries stored in nonrect_bounds
3072    for the upper bounds if M?2 multiplier is present.  Given e.g.
3073    for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3074    for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3075    for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3076    for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3077    do:
3078     L10:
3079           V4 += STEP4;
3080           if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3081     L11:
3082           V4 = N41 + M41 * V2; // This can be left out if the loop
3083                                    // refers to the immediate parent loop
3084           V3 += STEP3;
3085           if (V3 cond3 N32) goto BODY_BB; else goto L12;
3086     L12:
3087           V3 = N31;
3088           V2 += STEP2;
3089           if (V2 cond2 N22) goto L120; else goto L13;
3090     L120:
3091           V4 = N41 + M41 * V2;
3092           NONRECT_BOUND4 = N42 + M42 * V2;
3093           if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3094     L13:
3095           V2 = N21;
3096           V1 += STEP1;
3097           goto L120;  */
3098 
3099 static basic_block
extract_omp_for_update_vars(struct omp_for_data * fd,tree * nonrect_bounds,basic_block cont_bb,basic_block body_bb)3100 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3101                                    basic_block cont_bb, basic_block body_bb)
3102 {
3103   basic_block last_bb, bb, collapse_bb = NULL;
3104   int i;
3105   gimple_stmt_iterator gsi;
3106   edge e;
3107   tree t;
3108   gimple *stmt;
3109 
3110   last_bb = cont_bb;
3111   for (i = fd->collapse - 1; i >= 0; i--)
3112     {
3113       tree vtype = TREE_TYPE (fd->loops[i].v);
3114 
3115       bb = create_empty_bb (last_bb);
3116       add_bb_to_loop (bb, last_bb->loop_father);
3117       gsi = gsi_start_bb (bb);
3118 
3119       if (i < fd->collapse - 1)
3120           {
3121             e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3122             e->probability
3123               = profile_probability::guessed_always ().apply_scale (1, 8);
3124 
3125             struct omp_for_data_loop *l = &fd->loops[i + 1];
3126             if (l->m1 == NULL_TREE || l->outer != 1)
3127               {
3128                 t = l->n1;
3129                 if (l->m1)
3130                     {
3131                       if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3132                         t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v,
3133                                                              fold_convert (sizetype, t));
3134                       else
3135                         {
3136                           tree t2
3137                               = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3138                                                fd->loops[i + 1 - l->outer].v, l->m1);
3139                           t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3140                         }
3141                     }
3142                 t = force_gimple_operand_gsi (&gsi, t,
3143                                                       DECL_P (l->v)
3144                                                       && TREE_ADDRESSABLE (l->v),
3145                                                       NULL_TREE, false,
3146                                                       GSI_CONTINUE_LINKING);
3147                 stmt = gimple_build_assign (l->v, t);
3148                 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3149               }
3150           }
3151       else
3152           collapse_bb = bb;
3153 
3154       set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3155 
3156       if (POINTER_TYPE_P (vtype))
3157           t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3158       else
3159           t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3160       t = force_gimple_operand_gsi (&gsi, t,
3161                                             DECL_P (fd->loops[i].v)
3162                                             && TREE_ADDRESSABLE (fd->loops[i].v),
3163                                             NULL_TREE, false, GSI_CONTINUE_LINKING);
3164       stmt = gimple_build_assign (fd->loops[i].v, t);
3165       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3166 
3167       if (fd->loops[i].non_rect_referenced)
3168           {
3169             basic_block update_bb = NULL, prev_bb = NULL;
3170             for (int j = i + 1; j <= fd->last_nonrect; j++)
3171               if (j - fd->loops[j].outer == i)
3172                 {
3173                     tree n1, n2;
3174                     struct omp_for_data_loop *l = &fd->loops[j];
3175                     basic_block this_bb = create_empty_bb (last_bb);
3176                     add_bb_to_loop (this_bb, last_bb->loop_father);
3177                     gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3178                     if (prev_bb)
3179                       {
3180                         e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3181                         e->probability
3182                           = profile_probability::guessed_always ().apply_scale (7,
3183                                                                                               8);
3184                         set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3185                       }
3186                     if (l->m1)
3187                       {
3188                         if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3189                           t = fold_build_pointer_plus (fd->loops[i].v,
3190                                                                fold_convert (sizetype,
3191                                                                                  l->n1));
3192                         else
3193                           {
3194                               t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3195                                                    fd->loops[i].v);
3196                               t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v),
3197                                                    t, l->n1);
3198                           }
3199                         n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3200                                                                false,
3201                                                                GSI_CONTINUE_LINKING);
3202                         stmt = gimple_build_assign (l->v, n1);
3203                         gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3204                         n1 = l->v;
3205                       }
3206                     else
3207                       n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3208                                                              NULL_TREE, false,
3209                                                              GSI_CONTINUE_LINKING);
3210                     if (l->m2)
3211                       {
3212                         if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3213                           t = fold_build_pointer_plus (fd->loops[i].v,
3214                                                                fold_convert (sizetype,
3215                                                                                  l->n2));
3216                         else
3217                           {
3218                               t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3219                                                    fd->loops[i].v);
3220                               t = fold_build2 (PLUS_EXPR,
3221                                                    TREE_TYPE (nonrect_bounds[j]),
3222                                                    t, unshare_expr (l->n2));
3223                           }
3224                         n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3225                                                                false,
3226                                                                GSI_CONTINUE_LINKING);
3227                         stmt = gimple_build_assign (nonrect_bounds[j], n2);
3228                         gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3229                         n2 = nonrect_bounds[j];
3230                       }
3231                     else
3232                       n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3233                                                              true, NULL_TREE, false,
3234                                                              GSI_CONTINUE_LINKING);
3235                     gcond *cond_stmt
3236                       = gimple_build_cond (l->cond_code, n1, n2,
3237                                                NULL_TREE, NULL_TREE);
3238                     gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3239                     if (update_bb == NULL)
3240                       update_bb = this_bb;
3241                     e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3242                     e->probability
3243                       = profile_probability::guessed_always ().apply_scale (1, 8);
3244                     if (prev_bb == NULL)
3245                       set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3246                     prev_bb = this_bb;
3247                 }
3248             e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3249             e->probability
3250               = profile_probability::guessed_always ().apply_scale (7, 8);
3251             body_bb = update_bb;
3252           }
3253 
3254       if (i > 0)
3255           {
3256             if (fd->loops[i].m2)
3257               t = nonrect_bounds[i];
3258             else
3259               t = unshare_expr (fd->loops[i].n2);
3260             t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3261                                                   false, GSI_CONTINUE_LINKING);
3262             tree v = fd->loops[i].v;
3263             if (DECL_P (v) && TREE_ADDRESSABLE (v))
3264               v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3265                                                     false, GSI_CONTINUE_LINKING);
3266             t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3267             stmt = gimple_build_cond_empty (t);
3268             gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3269             if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3270                                expand_omp_regimplify_p, NULL, NULL)
3271                 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3272                                   expand_omp_regimplify_p, NULL, NULL))
3273               gimple_regimplify_operands (stmt, &gsi);
3274             e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3275             e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3276           }
3277       else
3278           make_edge (bb, body_bb, EDGE_FALLTHRU);
3279       set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3280       last_bb = bb;
3281     }
3282 
3283   return collapse_bb;
3284 }
3285 
3286 /* Expand #pragma omp ordered depend(source).  */
3287 
3288 static void
expand_omp_ordered_source(gimple_stmt_iterator * gsi,struct omp_for_data * fd,tree * counts,location_t loc)3289 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3290                                  tree *counts, location_t loc)
3291 {
3292   enum built_in_function source_ix
3293     = fd->iter_type == long_integer_type_node
3294       ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3295   gimple *g
3296     = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3297                                build_fold_addr_expr (counts[fd->ordered]));
3298   gimple_set_location (g, loc);
3299   gsi_insert_before (gsi, g, GSI_SAME_STMT);
3300 }
3301 
3302 /* Expand a single depend from #pragma omp ordered depend(sink:...).  */
3303 
3304 static void
expand_omp_ordered_sink(gimple_stmt_iterator * gsi,struct omp_for_data * fd,tree * counts,tree c,location_t loc)3305 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3306                                tree *counts, tree c, location_t loc)
3307 {
3308   auto_vec<tree, 10> args;
3309   enum built_in_function sink_ix
3310     = fd->iter_type == long_integer_type_node
3311       ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3312   tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3313   int i;
3314   gimple_stmt_iterator gsi2 = *gsi;
3315   bool warned_step = false;
3316 
3317   for (i = 0; i < fd->ordered; i++)
3318     {
3319       tree step = NULL_TREE;
3320       off = TREE_PURPOSE (deps);
3321       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3322           {
3323             step = TREE_OPERAND (off, 1);
3324             off = TREE_OPERAND (off, 0);
3325           }
3326       if (!integer_zerop (off))
3327           {
3328             gcc_assert (fd->loops[i].cond_code == LT_EXPR
3329                           || fd->loops[i].cond_code == GT_EXPR);
3330             bool forward = fd->loops[i].cond_code == LT_EXPR;
3331             if (step)
3332               {
3333                 /* Non-simple Fortran DO loops.  If step is variable,
3334                      we don't know at compile even the direction, so can't
3335                      warn.  */
3336                 if (TREE_CODE (step) != INTEGER_CST)
3337                     break;
3338                 forward = tree_int_cst_sgn (step) != -1;
3339               }
3340             if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3341               warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3342                                         "waiting for lexically later iteration");
3343             break;
3344           }
3345       deps = TREE_CHAIN (deps);
3346     }
3347   /* If all offsets corresponding to the collapsed loops are zero,
3348      this depend clause can be ignored.  FIXME: but there is still a
3349      flush needed.  We need to emit one __sync_synchronize () for it
3350      though (perhaps conditionally)?  Solve this together with the
3351      conservative dependence folding optimization.
3352   if (i >= fd->collapse)
3353     return;  */
3354 
3355   deps = OMP_CLAUSE_DECL (c);
3356   gsi_prev (&gsi2);
3357   edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3358   edge e2 = split_block_after_labels (e1->dest);
3359 
3360   gsi2 = gsi_after_labels (e1->dest);
3361   *gsi = gsi_last_bb (e1->src);
3362   for (i = 0; i < fd->ordered; i++)
3363     {
3364       tree itype = TREE_TYPE (fd->loops[i].v);
3365       tree step = NULL_TREE;
3366       tree orig_off = NULL_TREE;
3367       if (POINTER_TYPE_P (itype))
3368           itype = sizetype;
3369       if (i)
3370           deps = TREE_CHAIN (deps);
3371       off = TREE_PURPOSE (deps);
3372       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3373           {
3374             step = TREE_OPERAND (off, 1);
3375             off = TREE_OPERAND (off, 0);
3376             gcc_assert (fd->loops[i].cond_code == LT_EXPR
3377                           && integer_onep (fd->loops[i].step)
3378                           && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3379           }
3380       tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3381       if (step)
3382           {
3383             off = fold_convert_loc (loc, itype, off);
3384             orig_off = off;
3385             off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3386           }
3387 
3388       if (integer_zerop (off))
3389           t = boolean_true_node;
3390       else
3391           {
3392             tree a;
3393             tree co = fold_convert_loc (loc, itype, off);
3394             if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3395               {
3396                 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3397                     co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3398                 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3399                                            TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3400                                            co);
3401               }
3402             else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3403               a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3404                                          fd->loops[i].v, co);
3405             else
3406               a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3407                                          fd->loops[i].v, co);
3408             if (step)
3409               {
3410                 tree t1, t2;
3411                 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3412                     t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3413                                               fd->loops[i].n1);
3414                 else
3415                     t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3416                                               fd->loops[i].n2);
3417                 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3418                     t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3419                                               fd->loops[i].n2);
3420                 else
3421                     t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3422                                               fd->loops[i].n1);
3423                 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3424                                            step, build_int_cst (TREE_TYPE (step), 0));
3425                 if (TREE_CODE (step) != INTEGER_CST)
3426                     {
3427                       t1 = unshare_expr (t1);
3428                       t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3429                                                              false, GSI_CONTINUE_LINKING);
3430                       t2 = unshare_expr (t2);
3431                       t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3432                                                              false, GSI_CONTINUE_LINKING);
3433                     }
3434                 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3435                                            t, t2, t1);
3436               }
3437             else if (fd->loops[i].cond_code == LT_EXPR)
3438               {
3439                 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3440                     t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3441                                              fd->loops[i].n1);
3442                 else
3443                     t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3444                                              fd->loops[i].n2);
3445               }
3446             else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3447               t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3448                                          fd->loops[i].n2);
3449             else
3450               t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3451                                          fd->loops[i].n1);
3452           }
3453       if (cond)
3454           cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3455       else
3456           cond = t;
3457 
3458       off = fold_convert_loc (loc, itype, off);
3459 
3460       if (step
3461             || (fd->loops[i].cond_code == LT_EXPR
3462                 ? !integer_onep (fd->loops[i].step)
3463                 : !integer_minus_onep (fd->loops[i].step)))
3464           {
3465             if (step == NULL_TREE
3466                 && TYPE_UNSIGNED (itype)
3467                 && fd->loops[i].cond_code == GT_EXPR)
3468               t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3469                                          fold_build1_loc (loc, NEGATE_EXPR, itype,
3470                                                               s));
3471             else
3472               t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3473                                          orig_off ? orig_off : off, s);
3474             t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3475                                      build_int_cst (itype, 0));
3476             if (integer_zerop (t) && !warned_step)
3477               {
3478                 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3479                                           "refers to iteration never in the iteration "
3480                                           "space");
3481                 warned_step = true;
3482               }
3483             cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3484                                           cond, t);
3485           }
3486 
3487       if (i <= fd->collapse - 1 && fd->collapse > 1)
3488           t = fd->loop.v;
3489       else if (counts[i])
3490           t = counts[i];
3491       else
3492           {
3493             t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3494                                      fd->loops[i].v, fd->loops[i].n1);
3495             t = fold_convert_loc (loc, fd->iter_type, t);
3496           }
3497       if (step)
3498           /* We have divided off by step already earlier.  */;
3499       else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3500           off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3501                                      fold_build1_loc (loc, NEGATE_EXPR, itype,
3502                                                             s));
3503       else
3504           off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3505       if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3506           off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3507       off = fold_convert_loc (loc, fd->iter_type, off);
3508       if (i <= fd->collapse - 1 && fd->collapse > 1)
3509           {
3510             if (i)
3511               off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3512                                            off);
3513             if (i < fd->collapse - 1)
3514               {
3515                 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3516                                               counts[i]);
3517                 continue;
3518               }
3519           }
3520       off = unshare_expr (off);
3521       t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3522       t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3523                                             true, GSI_SAME_STMT);
3524       args.safe_push (t);
3525     }
3526   gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3527   gimple_set_location (g, loc);
3528   gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3529 
3530   cond = unshare_expr (cond);
3531   cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3532                                            GSI_CONTINUE_LINKING);
3533   gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3534   edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3535   e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3536   e1->probability = e3->probability.invert ();
3537   e1->flags = EDGE_TRUE_VALUE;
3538   set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3539 
3540   *gsi = gsi_after_labels (e2->dest);
3541 }
3542 
3543 /* Expand all #pragma omp ordered depend(source) and
3544    #pragma omp ordered depend(sink:...) constructs in the current
3545    #pragma omp for ordered(n) region.  */
3546 
3547 static void
expand_omp_ordered_source_sink(struct omp_region * region,struct omp_for_data * fd,tree * counts,basic_block cont_bb)3548 expand_omp_ordered_source_sink (struct omp_region *region,
3549                                         struct omp_for_data *fd, tree *counts,
3550                                         basic_block cont_bb)
3551 {
3552   struct omp_region *inner;
3553   int i;
3554   for (i = fd->collapse - 1; i < fd->ordered; i++)
3555     if (i == fd->collapse - 1 && fd->collapse > 1)
3556       counts[i] = NULL_TREE;
3557     else if (i >= fd->collapse && !cont_bb)
3558       counts[i] = build_zero_cst (fd->iter_type);
3559     else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3560                && integer_onep (fd->loops[i].step))
3561       counts[i] = NULL_TREE;
3562     else
3563       counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3564   tree atype
3565     = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3566   counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3567   TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3568 
3569   for (inner = region->inner; inner; inner = inner->next)
3570     if (inner->type == GIMPLE_OMP_ORDERED)
3571       {
3572           gomp_ordered *ord_stmt = inner->ord_stmt;
3573           gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3574           location_t loc = gimple_location (ord_stmt);
3575           tree c;
3576           for (c = gimple_omp_ordered_clauses (ord_stmt);
3577                c; c = OMP_CLAUSE_CHAIN (c))
3578             if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3579               break;
3580           if (c)
3581             expand_omp_ordered_source (&gsi, fd, counts, loc);
3582           for (c = gimple_omp_ordered_clauses (ord_stmt);
3583                c; c = OMP_CLAUSE_CHAIN (c))
3584             if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3585               expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3586           gsi_remove (&gsi, true);
3587       }
3588 }
3589 
3590 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3591    collapsed.  */
3592 
3593 static basic_block
expand_omp_for_ordered_loops(struct omp_for_data * fd,tree * counts,basic_block cont_bb,basic_block body_bb,basic_block l0_bb,bool ordered_lastprivate)3594 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3595                                     basic_block cont_bb, basic_block body_bb,
3596                                     basic_block l0_bb, bool ordered_lastprivate)
3597 {
3598   if (fd->ordered == fd->collapse)
3599     return cont_bb;
3600 
3601   if (!cont_bb)
3602     {
3603       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3604       for (int i = fd->collapse; i < fd->ordered; i++)
3605           {
3606             tree type = TREE_TYPE (fd->loops[i].v);
3607             tree n1 = fold_convert (type, fd->loops[i].n1);
3608             expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3609             tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3610                                     size_int (i - fd->collapse + 1),
3611                                     NULL_TREE, NULL_TREE);
3612             expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3613           }
3614       return NULL;
3615     }
3616 
3617   for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3618     {
3619       tree t, type = TREE_TYPE (fd->loops[i].v);
3620       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3621       expand_omp_build_assign (&gsi, fd->loops[i].v,
3622                                      fold_convert (type, fd->loops[i].n1));
3623       if (counts[i])
3624           expand_omp_build_assign (&gsi, counts[i],
3625                                          build_zero_cst (fd->iter_type));
3626       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3627                                 size_int (i - fd->collapse + 1),
3628                                 NULL_TREE, NULL_TREE);
3629       expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3630       if (!gsi_end_p (gsi))
3631           gsi_prev (&gsi);
3632       else
3633           gsi = gsi_last_bb (body_bb);
3634       edge e1 = split_block (body_bb, gsi_stmt (gsi));
3635       basic_block new_body = e1->dest;
3636       if (body_bb == cont_bb)
3637           cont_bb = new_body;
3638       edge e2 = NULL;
3639       basic_block new_header;
3640       if (EDGE_COUNT (cont_bb->preds) > 0)
3641           {
3642             gsi = gsi_last_bb (cont_bb);
3643             if (POINTER_TYPE_P (type))
3644               t = fold_build_pointer_plus (fd->loops[i].v,
3645                                                    fold_convert (sizetype,
3646                                                                    fd->loops[i].step));
3647             else
3648               t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3649                                    fold_convert (type, fd->loops[i].step));
3650             expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3651             if (counts[i])
3652               {
3653                 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3654                                      build_int_cst (fd->iter_type, 1));
3655                 expand_omp_build_assign (&gsi, counts[i], t);
3656                 t = counts[i];
3657               }
3658             else
3659               {
3660                 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3661                                      fd->loops[i].v, fd->loops[i].n1);
3662                 t = fold_convert (fd->iter_type, t);
3663                 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3664                                                       true, GSI_SAME_STMT);
3665               }
3666             aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3667                                size_int (i - fd->collapse + 1),
3668                                NULL_TREE, NULL_TREE);
3669             expand_omp_build_assign (&gsi, aref, t);
3670             gsi_prev (&gsi);
3671             e2 = split_block (cont_bb, gsi_stmt (gsi));
3672             new_header = e2->dest;
3673           }
3674       else
3675           new_header = cont_bb;
3676       gsi = gsi_after_labels (new_header);
3677       tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3678                                                    true, GSI_SAME_STMT);
3679       tree n2
3680           = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3681                                             true, NULL_TREE, true, GSI_SAME_STMT);
3682       t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3683       gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3684       edge e3 = split_block (new_header, gsi_stmt (gsi));
3685       cont_bb = e3->dest;
3686       remove_edge (e1);
3687       make_edge (body_bb, new_header, EDGE_FALLTHRU);
3688       e3->flags = EDGE_FALSE_VALUE;
3689       e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3690       e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3691       e1->probability = e3->probability.invert ();
3692 
3693       set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3694       set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3695 
3696       if (e2)
3697           {
3698             class loop *loop = alloc_loop ();
3699             loop->header = new_header;
3700             loop->latch = e2->src;
3701             add_loop (loop, l0_bb->loop_father);
3702           }
3703     }
3704 
3705   /* If there are any lastprivate clauses and it is possible some loops
3706      might have zero iterations, ensure all the decls are initialized,
3707      otherwise we could crash evaluating C++ class iterators with lastprivate
3708      clauses.  */
3709   bool need_inits = false;
3710   for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3711     if (need_inits)
3712       {
3713           tree type = TREE_TYPE (fd->loops[i].v);
3714           gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3715           expand_omp_build_assign (&gsi, fd->loops[i].v,
3716                                          fold_convert (type, fd->loops[i].n1));
3717       }
3718     else
3719       {
3720           tree type = TREE_TYPE (fd->loops[i].v);
3721           tree this_cond = fold_build2 (fd->loops[i].cond_code,
3722                                               boolean_type_node,
3723                                               fold_convert (type, fd->loops[i].n1),
3724                                               fold_convert (type, fd->loops[i].n2));
3725           if (!integer_onep (this_cond))
3726             need_inits = true;
3727       }
3728 
3729   return cont_bb;
3730 }
3731 
3732 /* A subroutine of expand_omp_for.  Generate code for a parallel
3733    loop with any schedule.  Given parameters:
3734 
3735           for (V = N1; V cond N2; V += STEP) BODY;
3736 
3737    where COND is "<" or ">", we generate pseudocode
3738 
3739           more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3740           if (more) goto L0; else goto L3;
3741     L0:
3742           V = istart0;
3743           iend = iend0;
3744     L1:
3745           BODY;
3746           V += STEP;
3747           if (V cond iend) goto L1; else goto L2;
3748     L2:
3749           if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3750     L3:
3751 
3752     If this is a combined omp parallel loop, instead of the call to
3753     GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3754     If this is gimple_omp_for_combined_p loop, then instead of assigning
3755     V and iend in L0 we assign the first two _looptemp_ clause decls of the
3756     inner GIMPLE_OMP_FOR and V += STEP; and
3757     if (V cond iend) goto L1; else goto L2; are removed.
3758 
3759     For collapsed loops, given parameters:
3760       collapse(3)
3761       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3762           for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3763             for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3764               BODY;
3765 
3766     we generate pseudocode
3767 
3768           if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3769           if (cond3 is <)
3770             adj = STEP3 - 1;
3771           else
3772             adj = STEP3 + 1;
3773           count3 = (adj + N32 - N31) / STEP3;
3774           if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3775           if (cond2 is <)
3776             adj = STEP2 - 1;
3777           else
3778             adj = STEP2 + 1;
3779           count2 = (adj + N22 - N21) / STEP2;
3780           if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3781           if (cond1 is <)
3782             adj = STEP1 - 1;
3783           else
3784             adj = STEP1 + 1;
3785           count1 = (adj + N12 - N11) / STEP1;
3786           count = count1 * count2 * count3;
3787           goto Z1;
3788     Z0:
3789           count = 0;
3790     Z1:
3791           more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3792           if (more) goto L0; else goto L3;
3793     L0:
3794           V = istart0;
3795           T = V;
3796           V3 = N31 + (T % count3) * STEP3;
3797           T = T / count3;
3798           V2 = N21 + (T % count2) * STEP2;
3799           T = T / count2;
3800           V1 = N11 + T * STEP1;
3801           iend = iend0;
3802     L1:
3803           BODY;
3804           V += 1;
3805           if (V < iend) goto L10; else goto L2;
3806     L10:
3807           V3 += STEP3;
3808           if (V3 cond3 N32) goto L1; else goto L11;
3809     L11:
3810           V3 = N31;
3811           V2 += STEP2;
3812           if (V2 cond2 N22) goto L1; else goto L12;
3813     L12:
3814           V2 = N21;
3815           V1 += STEP1;
3816           goto L1;
3817     L2:
3818           if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3819     L3:
3820 
3821       */
3822 
3823 static void
expand_omp_for_generic(struct omp_region * region,struct omp_for_data * fd,enum built_in_function start_fn,enum built_in_function next_fn,tree sched_arg,gimple * inner_stmt)3824 expand_omp_for_generic (struct omp_region *region,
3825                               struct omp_for_data *fd,
3826                               enum built_in_function start_fn,
3827                               enum built_in_function next_fn,
3828                               tree sched_arg,
3829                               gimple *inner_stmt)
3830 {
3831   tree type, istart0, iend0, iend;
3832   tree t, vmain, vback, bias = NULL_TREE;
3833   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3834   basic_block l2_bb = NULL, l3_bb = NULL;
3835   gimple_stmt_iterator gsi;
3836   gassign *assign_stmt;
3837   bool in_combined_parallel = is_combined_parallel (region);
3838   bool broken_loop = region->cont == NULL;
3839   edge e, ne;
3840   tree *counts = NULL;
3841   int i;
3842   bool ordered_lastprivate = false;
3843 
3844   gcc_assert (!broken_loop || !in_combined_parallel);
3845   gcc_assert (fd->iter_type == long_integer_type_node
3846                 || !in_combined_parallel);
3847 
3848   entry_bb = region->entry;
3849   cont_bb = region->cont;
3850   collapse_bb = NULL;
3851   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3852   gcc_assert (broken_loop
3853                 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3854   l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3855   l1_bb = single_succ (l0_bb);
3856   if (!broken_loop)
3857     {
3858       l2_bb = create_empty_bb (cont_bb);
3859       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3860                       || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3861                           == l1_bb));
3862       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3863     }
3864   else
3865     l2_bb = NULL;
3866   l3_bb = BRANCH_EDGE (entry_bb)->dest;
3867   exit_bb = region->exit;
3868 
3869   gsi = gsi_last_nondebug_bb (entry_bb);
3870 
3871   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3872   if (fd->ordered
3873       && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3874                                 OMP_CLAUSE_LASTPRIVATE))
3875     ordered_lastprivate = false;
3876   tree reductions = NULL_TREE;
3877   tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3878   tree memv = NULL_TREE;
3879   if (fd->lastprivate_conditional)
3880     {
3881       tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3882                                         OMP_CLAUSE__CONDTEMP_);
3883       if (fd->have_pointer_condtemp)
3884           condtemp = OMP_CLAUSE_DECL (c);
3885       c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3886       cond_var = OMP_CLAUSE_DECL (c);
3887     }
3888   if (sched_arg)
3889     {
3890       if (fd->have_reductemp)
3891           {
3892             tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3893                                             OMP_CLAUSE__REDUCTEMP_);
3894             reductions = OMP_CLAUSE_DECL (c);
3895             gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3896             gimple *g = SSA_NAME_DEF_STMT (reductions);
3897             reductions = gimple_assign_rhs1 (g);
3898             OMP_CLAUSE_DECL (c) = reductions;
3899             entry_bb = gimple_bb (g);
3900             edge e = split_block (entry_bb, g);
3901             if (region->entry == entry_bb)
3902               region->entry = e->dest;
3903             gsi = gsi_last_bb (entry_bb);
3904           }
3905       else
3906           reductions = null_pointer_node;
3907       if (fd->have_pointer_condtemp)
3908           {
3909             tree type = TREE_TYPE (condtemp);
3910             memv = create_tmp_var (type);
3911             TREE_ADDRESSABLE (memv) = 1;
3912             unsigned HOST_WIDE_INT sz
3913               = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3914             sz *= fd->lastprivate_conditional;
3915             expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3916                                            false);
3917             mem = build_fold_addr_expr (memv);
3918           }
3919       else
3920           mem = null_pointer_node;
3921     }
3922   if (fd->collapse > 1 || fd->ordered)
3923     {
3924       int first_zero_iter1 = -1, first_zero_iter2 = -1;
3925       basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3926 
3927       counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3928       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3929                                           zero_iter1_bb, first_zero_iter1,
3930                                           zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3931 
3932       if (zero_iter1_bb)
3933           {
3934             /* Some counts[i] vars might be uninitialized if
3935                some loop has zero iterations.  But the body shouldn't
3936                be executed in that case, so just avoid uninit warnings.  */
3937             for (i = first_zero_iter1;
3938                  i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3939               if (SSA_VAR_P (counts[i]))
3940                 suppress_warning (counts[i], OPT_Wuninitialized);
3941             gsi_prev (&gsi);
3942             e = split_block (entry_bb, gsi_stmt (gsi));
3943             entry_bb = e->dest;
3944             make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3945             gsi = gsi_last_nondebug_bb (entry_bb);
3946             set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3947                                            get_immediate_dominator (CDI_DOMINATORS,
3948                                                                           zero_iter1_bb));
3949           }
3950       if (zero_iter2_bb)
3951           {
3952             /* Some counts[i] vars might be uninitialized if
3953                some loop has zero iterations.  But the body shouldn't
3954                be executed in that case, so just avoid uninit warnings.  */
3955             for (i = first_zero_iter2; i < fd->ordered; i++)
3956               if (SSA_VAR_P (counts[i]))
3957                 suppress_warning (counts[i], OPT_Wuninitialized);
3958             if (zero_iter1_bb)
3959               make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3960             else
3961               {
3962                 gsi_prev (&gsi);
3963                 e = split_block (entry_bb, gsi_stmt (gsi));
3964                 entry_bb = e->dest;
3965                 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3966                 gsi = gsi_last_nondebug_bb (entry_bb);
3967                 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3968                                                get_immediate_dominator
3969                                                    (CDI_DOMINATORS, zero_iter2_bb));
3970               }
3971           }
3972       if (fd->collapse == 1)
3973           {
3974             counts[0] = fd->loop.n2;
3975             fd->loop = fd->loops[0];
3976           }
3977     }
3978 
3979   type = TREE_TYPE (fd->loop.v);
3980   istart0 = create_tmp_var (fd->iter_type, ".istart0");
3981   iend0 = create_tmp_var (fd->iter_type, ".iend0");
3982   TREE_ADDRESSABLE (istart0) = 1;
3983   TREE_ADDRESSABLE (iend0) = 1;
3984 
3985   /* See if we need to bias by LLONG_MIN.  */
3986   if (fd->iter_type == long_long_unsigned_type_node
3987       && TREE_CODE (type) == INTEGER_TYPE
3988       && !TYPE_UNSIGNED (type)
3989       && fd->ordered == 0)
3990     {
3991       tree n1, n2;
3992 
3993       if (fd->loop.cond_code == LT_EXPR)
3994           {
3995             n1 = fd->loop.n1;
3996             n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3997           }
3998       else
3999           {
4000             n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4001             n2 = fd->loop.n1;
4002           }
4003       if (TREE_CODE (n1) != INTEGER_CST
4004             || TREE_CODE (n2) != INTEGER_CST
4005             || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4006           bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4007     }
4008 
4009   gimple_stmt_iterator gsif = gsi;
4010   gsi_prev (&gsif);
4011 
4012   tree arr = NULL_TREE;
4013   if (in_combined_parallel)
4014     {
4015       gcc_assert (fd->ordered == 0);
4016       /* In a combined parallel loop, emit a call to
4017            GOMP_loop_foo_next.  */
4018       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4019                                  build_fold_addr_expr (istart0),
4020                                  build_fold_addr_expr (iend0));
4021     }
4022   else
4023     {
4024       tree t0, t1, t2, t3, t4;
4025       /* If this is not a combined parallel loop, emit a call to
4026            GOMP_loop_foo_start in ENTRY_BB.  */
4027       t4 = build_fold_addr_expr (iend0);
4028       t3 = build_fold_addr_expr (istart0);
4029       if (fd->ordered)
4030           {
4031             t0 = build_int_cst (unsigned_type_node,
4032                                     fd->ordered - fd->collapse + 1);
4033             arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
4034                                                                       fd->ordered
4035                                                                       - fd->collapse + 1),
4036                                         ".omp_counts");
4037             DECL_NAMELESS (arr) = 1;
4038             TREE_ADDRESSABLE (arr) = 1;
4039             TREE_STATIC (arr) = 1;
4040             vec<constructor_elt, va_gc> *v;
4041             vec_alloc (v, fd->ordered - fd->collapse + 1);
4042             int idx;
4043 
4044             for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
4045               {
4046                 tree c;
4047                 if (idx == 0 && fd->collapse > 1)
4048                     c = fd->loop.n2;
4049                 else
4050                     c = counts[idx + fd->collapse - 1];
4051                 tree purpose = size_int (idx);
4052                 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
4053                 if (TREE_CODE (c) != INTEGER_CST)
4054                     TREE_STATIC (arr) = 0;
4055               }
4056 
4057             DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
4058             if (!TREE_STATIC (arr))
4059               force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
4060                                                                 void_type_node, arr),
4061                                               true, NULL_TREE, true, GSI_SAME_STMT);
4062             t1 = build_fold_addr_expr (arr);
4063             t2 = NULL_TREE;
4064           }
4065       else
4066           {
4067             t2 = fold_convert (fd->iter_type, fd->loop.step);
4068             t1 = fd->loop.n2;
4069             t0 = fd->loop.n1;
4070             if (gimple_omp_for_combined_into_p (fd->for_stmt))
4071               {
4072                 tree innerc
4073                     = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4074                                            OMP_CLAUSE__LOOPTEMP_);
4075                 gcc_assert (innerc);
4076                 t0 = OMP_CLAUSE_DECL (innerc);
4077                 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4078                                                   OMP_CLAUSE__LOOPTEMP_);
4079                 gcc_assert (innerc);
4080                 t1 = OMP_CLAUSE_DECL (innerc);
4081               }
4082             if (POINTER_TYPE_P (TREE_TYPE (t0))
4083                 && TYPE_PRECISION (TREE_TYPE (t0))
4084                      != TYPE_PRECISION (fd->iter_type))
4085               {
4086                 /* Avoid casting pointers to integer of a different size.  */
4087                 tree itype = signed_type_for (type);
4088                 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4089                 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4090               }
4091             else
4092               {
4093                 t1 = fold_convert (fd->iter_type, t1);
4094                 t0 = fold_convert (fd->iter_type, t0);
4095               }
4096             if (bias)
4097               {
4098                 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4099                 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4100               }
4101           }
4102       if (fd->iter_type == long_integer_type_node || fd->ordered)
4103           {
4104             if (fd->chunk_size)
4105               {
4106                 t = fold_convert (fd->iter_type, fd->chunk_size);
4107                 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4108                 if (sched_arg)
4109                     {
4110                       if (fd->ordered)
4111                         t = build_call_expr (builtin_decl_explicit (start_fn),
4112                                                    8, t0, t1, sched_arg, t, t3, t4,
4113                                                    reductions, mem);
4114                       else
4115                         t = build_call_expr (builtin_decl_explicit (start_fn),
4116                                                    9, t0, t1, t2, sched_arg, t, t3, t4,
4117                                                    reductions, mem);
4118                     }
4119                 else if (fd->ordered)
4120                     t = build_call_expr (builtin_decl_explicit (start_fn),
4121                                              5, t0, t1, t, t3, t4);
4122                 else
4123                     t = build_call_expr (builtin_decl_explicit (start_fn),
4124                                              6, t0, t1, t2, t, t3, t4);
4125               }
4126             else if (fd->ordered)
4127               t = build_call_expr (builtin_decl_explicit (start_fn),
4128                                          4, t0, t1, t3, t4);
4129             else
4130               t = build_call_expr (builtin_decl_explicit (start_fn),
4131                                          5, t0, t1, t2, t3, t4);
4132           }
4133       else
4134           {
4135             tree t5;
4136             tree c_bool_type;
4137             tree bfn_decl;
4138 
4139             /* The GOMP_loop_ull_*start functions have additional boolean
4140                argument, true for < loops and false for > loops.
4141                In Fortran, the C bool type can be different from
4142                boolean_type_node.  */
4143             bfn_decl = builtin_decl_explicit (start_fn);
4144             c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4145             t5 = build_int_cst (c_bool_type,
4146                                     fd->loop.cond_code == LT_EXPR ? 1 : 0);
4147             if (fd->chunk_size)
4148               {
4149                 tree bfn_decl = builtin_decl_explicit (start_fn);
4150                 t = fold_convert (fd->iter_type, fd->chunk_size);
4151                 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4152                 if (sched_arg)
4153                     t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4154                                              t, t3, t4, reductions, mem);
4155                 else
4156                     t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4157               }
4158             else
4159               t = build_call_expr (builtin_decl_explicit (start_fn),
4160                                          6, t5, t0, t1, t2, t3, t4);
4161           }
4162     }
4163   if (TREE_TYPE (t) != boolean_type_node)
4164     t = fold_build2 (NE_EXPR, boolean_type_node,
4165                          t, build_int_cst (TREE_TYPE (t), 0));
4166   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4167                                         true, GSI_SAME_STMT);
4168   if (arr && !TREE_STATIC (arr))
4169     {
4170       tree clobber = build_clobber (TREE_TYPE (arr));
4171       gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4172                                GSI_SAME_STMT);
4173     }
4174   if (fd->have_pointer_condtemp)
4175     expand_omp_build_assign (&gsi, condtemp, memv, false);
4176   if (fd->have_reductemp)
4177     {
4178       gimple *g = gsi_stmt (gsi);
4179       gsi_remove (&gsi, true);
4180       release_ssa_name (gimple_assign_lhs (g));
4181 
4182       entry_bb = region->entry;
4183       gsi = gsi_last_nondebug_bb (entry_bb);
4184 
4185       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4186     }
4187   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4188 
4189   /* Remove the GIMPLE_OMP_FOR statement.  */
4190   gsi_remove (&gsi, true);
4191 
4192   if (gsi_end_p (gsif))
4193     gsif = gsi_after_labels (gsi_bb (gsif));
4194   gsi_next (&gsif);
4195 
4196   /* Iteration setup for sequential loop goes in L0_BB.  */
4197   tree startvar = fd->loop.v;
4198   tree endvar = NULL_TREE;
4199 
4200   if (gimple_omp_for_combined_p (fd->for_stmt))
4201     {
4202       gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4203                       && gimple_omp_for_kind (inner_stmt)
4204                          == GF_OMP_FOR_KIND_SIMD);
4205       tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4206                                              OMP_CLAUSE__LOOPTEMP_);
4207       gcc_assert (innerc);
4208       startvar = OMP_CLAUSE_DECL (innerc);
4209       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4210                                         OMP_CLAUSE__LOOPTEMP_);
4211       gcc_assert (innerc);
4212       endvar = OMP_CLAUSE_DECL (innerc);
4213     }
4214 
4215   gsi = gsi_start_bb (l0_bb);
4216   t = istart0;
4217   if (fd->ordered && fd->collapse == 1)
4218     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4219                          fold_convert (fd->iter_type, fd->loop.step));
4220   else if (bias)
4221     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4222   if (fd->ordered && fd->collapse == 1)
4223     {
4224       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4225           t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4226                                fd->loop.n1, fold_convert (sizetype, t));
4227       else
4228           {
4229             t = fold_convert (TREE_TYPE (startvar), t);
4230             t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4231                                  fd->loop.n1, t);
4232           }
4233     }
4234   else
4235     {
4236       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4237           t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4238       t = fold_convert (TREE_TYPE (startvar), t);
4239     }
4240   t = force_gimple_operand_gsi (&gsi, t,
4241                                         DECL_P (startvar)
4242                                         && TREE_ADDRESSABLE (startvar),
4243                                         NULL_TREE, false, GSI_CONTINUE_LINKING);
4244   assign_stmt = gimple_build_assign (startvar, t);
4245   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4246   if (cond_var)
4247     {
4248       tree itype = TREE_TYPE (cond_var);
4249       /* For lastprivate(conditional:) itervar, we need some iteration
4250            counter that starts at unsigned non-zero and increases.
4251            Prefer as few IVs as possible, so if we can use startvar
4252            itself, use that, or startvar + constant (those would be
4253            incremented with step), and as last resort use the s0 + 1
4254            incremented by 1.  */
4255       if ((fd->ordered && fd->collapse == 1)
4256             || bias
4257             || POINTER_TYPE_P (type)
4258             || TREE_CODE (fd->loop.n1) != INTEGER_CST
4259             || fd->loop.cond_code != LT_EXPR)
4260           t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4261                                build_int_cst (itype, 1));
4262       else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4263           t = fold_convert (itype, t);
4264       else
4265           {
4266             tree c = fold_convert (itype, fd->loop.n1);
4267             c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4268             t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4269           }
4270       t = force_gimple_operand_gsi (&gsi, t, false,
4271                                             NULL_TREE, false, GSI_CONTINUE_LINKING);
4272       assign_stmt = gimple_build_assign (cond_var, t);
4273       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4274     }
4275 
4276   t = iend0;
4277   if (fd->ordered && fd->collapse == 1)
4278     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4279                          fold_convert (fd->iter_type, fd->loop.step));
4280   else if (bias)
4281     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4282   if (fd->ordered && fd->collapse == 1)
4283     {
4284       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4285           t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4286                                fd->loop.n1, fold_convert (sizetype, t));
4287       else
4288           {
4289             t = fold_convert (TREE_TYPE (startvar), t);
4290             t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4291                                  fd->loop.n1, t);
4292           }
4293     }
4294   else
4295     {
4296       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4297           t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4298       t = fold_convert (TREE_TYPE (startvar), t);
4299     }
4300   iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4301                                            false, GSI_CONTINUE_LINKING);
4302   if (endvar)
4303     {
4304       assign_stmt = gimple_build_assign (endvar, iend);
4305       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4306       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4307           assign_stmt = gimple_build_assign (fd->loop.v, iend);
4308       else
4309           assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4310       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4311     }
4312   /* Handle linear clause adjustments.  */
4313   tree itercnt = NULL_TREE;
4314   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4315     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4316            c; c = OMP_CLAUSE_CHAIN (c))
4317       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4318             && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4319           {
4320             tree d = OMP_CLAUSE_DECL (c);
4321             tree t = d, a, dest;
4322             if (omp_privatize_by_reference (t))
4323               t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4324             tree type = TREE_TYPE (t);
4325             if (POINTER_TYPE_P (type))
4326               type = sizetype;
4327             dest = unshare_expr (t);
4328             tree v = create_tmp_var (TREE_TYPE (t), NULL);
4329             expand_omp_build_assign (&gsif, v, t);
4330             if (itercnt == NULL_TREE)
4331               {
4332                 itercnt = startvar;
4333                 tree n1 = fd->loop.n1;
4334                 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4335                     {
4336                       itercnt
4337                         = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4338                                             itercnt);
4339                       n1 = fold_convert (TREE_TYPE (itercnt), n1);
4340                     }
4341                 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4342                                              itercnt, n1);
4343                 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4344                                              itercnt, fd->loop.step);
4345                 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4346                                                               NULL_TREE, false,
4347                                                               GSI_CONTINUE_LINKING);
4348               }
4349             a = fold_build2 (MULT_EXPR, type,
4350                                  fold_convert (type, itercnt),
4351                                  fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4352             t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4353                                  : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4354             t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4355                                                   false, GSI_CONTINUE_LINKING);
4356             expand_omp_build_assign (&gsi, dest, t, true);
4357           }
4358   if (fd->collapse > 1)
4359     expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4360 
4361   if (fd->ordered)
4362     {
4363       /* Until now, counts array contained number of iterations or
4364            variable containing it for ith loop.  From now on, we need
4365            those counts only for collapsed loops, and only for the 2nd
4366            till the last collapsed one.  Move those one element earlier,
4367            we'll use counts[fd->collapse - 1] for the first source/sink
4368            iteration counter and so on and counts[fd->ordered]
4369            as the array holding the current counter values for
4370            depend(source).  */
4371       if (fd->collapse > 1)
4372           memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4373       if (broken_loop)
4374           {
4375             int i;
4376             for (i = fd->collapse; i < fd->ordered; i++)
4377               {
4378                 tree type = TREE_TYPE (fd->loops[i].v);
4379                 tree this_cond
4380                     = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4381                                      fold_convert (type, fd->loops[i].n1),
4382                                      fold_convert (type, fd->loops[i].n2));
4383                 if (!integer_onep (this_cond))
4384                     break;
4385               }
4386             if (i < fd->ordered)
4387               {
4388                 if (entry_bb->loop_father != l0_bb->loop_father)
4389                     {
4390                       remove_bb_from_loops (l0_bb);
4391                       add_bb_to_loop (l0_bb, entry_bb->loop_father);
4392                       gcc_assert (single_succ (l0_bb) == l1_bb);
4393                     }
4394                 cont_bb
4395                     = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4396                 add_bb_to_loop (cont_bb, l0_bb->loop_father);
4397                 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4398                 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4399                 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4400                 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4401                 make_edge (cont_bb, l1_bb, 0);
4402                 l2_bb = create_empty_bb (cont_bb);
4403                 broken_loop = false;
4404               }
4405           }
4406       expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4407       cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4408                                                         l0_bb, ordered_lastprivate);
4409       if (counts[fd->collapse - 1])
4410           {
4411             gcc_assert (fd->collapse == 1);
4412             gsi = gsi_last_bb (l0_bb);
4413             expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4414                                            istart0, true);
4415             if (cont_bb)
4416               {
4417                 gsi = gsi_last_bb (cont_bb);
4418                 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4419                                      counts[fd->collapse - 1],
4420                                      build_int_cst (fd->iter_type, 1));
4421                 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4422                 tree aref = build4 (ARRAY_REF, fd->iter_type,
4423                                           counts[fd->ordered], size_zero_node,
4424                                           NULL_TREE, NULL_TREE);
4425                 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4426               }
4427             t = counts[fd->collapse - 1];
4428           }
4429       else if (fd->collapse > 1)
4430           t = fd->loop.v;
4431       else
4432           {
4433             t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4434                                  fd->loops[0].v, fd->loops[0].n1);
4435             t = fold_convert (fd->iter_type, t);
4436           }
4437       gsi = gsi_last_bb (l0_bb);
4438       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4439                                 size_zero_node, NULL_TREE, NULL_TREE);
4440       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4441                                             false, GSI_CONTINUE_LINKING);
4442       expand_omp_build_assign (&gsi, aref, t, true);
4443     }
4444 
4445   if (!broken_loop)
4446     {
4447       /* Code to control the increment and predicate for the sequential
4448            loop goes in the CONT_BB.  */
4449       gsi = gsi_last_nondebug_bb (cont_bb);
4450       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4451       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4452       vmain = gimple_omp_continue_control_use (cont_stmt);
4453       vback = gimple_omp_continue_control_def (cont_stmt);
4454 
4455       if (cond_var)
4456           {
4457             tree itype = TREE_TYPE (cond_var);
4458             tree t2;
4459             if ((fd->ordered && fd->collapse == 1)
4460                  || bias
4461                  || POINTER_TYPE_P (type)
4462                  || TREE_CODE (fd->loop.n1) != INTEGER_CST
4463                  || fd->loop.cond_code != LT_EXPR)
4464               t2 = build_int_cst (itype, 1);
4465             else
4466               t2 = fold_convert (itype, fd->loop.step);
4467             t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4468             t2 = force_gimple_operand_gsi (&gsi, t2, false,
4469                                                    NULL_TREE, true, GSI_SAME_STMT);
4470             assign_stmt = gimple_build_assign (cond_var, t2);
4471             gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4472           }
4473 
4474       if (!gimple_omp_for_combined_p (fd->for_stmt))
4475           {
4476             if (POINTER_TYPE_P (type))
4477               t = fold_build_pointer_plus (vmain, fd->loop.step);
4478             else
4479               t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4480             t = force_gimple_operand_gsi (&gsi, t,
4481                                                   DECL_P (vback)
4482                                                   && TREE_ADDRESSABLE (vback),
4483                                                   NULL_TREE, true, GSI_SAME_STMT);
4484             assign_stmt = gimple_build_assign (vback, t);
4485             gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4486 
4487             if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4488               {
4489                 tree tem;
4490                 if (fd->collapse > 1)
4491                     tem = fd->loop.v;
4492                 else
4493                     {
4494                       tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4495                                              fd->loops[0].v, fd->loops[0].n1);
4496                       tem = fold_convert (fd->iter_type, tem);
4497                     }
4498                 tree aref = build4 (ARRAY_REF, fd->iter_type,
4499                                           counts[fd->ordered], size_zero_node,
4500                                           NULL_TREE, NULL_TREE);
4501                 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4502                                                         true, GSI_SAME_STMT);
4503                 expand_omp_build_assign (&gsi, aref, tem);
4504               }
4505 
4506             t = build2 (fd->loop.cond_code, boolean_type_node,
4507                           DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4508                           iend);
4509             gcond *cond_stmt = gimple_build_cond_empty (t);
4510             gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4511           }
4512 
4513       /* Remove GIMPLE_OMP_CONTINUE.  */
4514       gsi_remove (&gsi, true);
4515 
4516       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4517           collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4518 
4519       /* Emit code to get the next parallel iteration in L2_BB.  */
4520       gsi = gsi_start_bb (l2_bb);
4521 
4522       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4523                                  build_fold_addr_expr (istart0),
4524                                  build_fold_addr_expr (iend0));
4525       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4526                                             false, GSI_CONTINUE_LINKING);
4527       if (TREE_TYPE (t) != boolean_type_node)
4528           t = fold_build2 (NE_EXPR, boolean_type_node,
4529                                t, build_int_cst (TREE_TYPE (t), 0));
4530       gcond *cond_stmt = gimple_build_cond_empty (t);
4531       gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4532     }
4533 
4534   /* Add the loop cleanup function.  */
4535   gsi = gsi_last_nondebug_bb (exit_bb);
4536   if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4537     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4538   else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4539     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4540   else
4541     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4542   gcall *call_stmt = gimple_build_call (t, 0);
4543   if (fd->ordered)
4544     {
4545       tree arr = counts[fd->ordered];
4546       tree clobber = build_clobber (TREE_TYPE (arr));
4547       gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4548                               GSI_SAME_STMT);
4549     }
4550   if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4551     {
4552       gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4553       if (fd->have_reductemp)
4554           {
4555             gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4556                                                      gimple_call_lhs (call_stmt));
4557             gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4558           }
4559     }
4560   gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4561   gsi_remove (&gsi, true);
4562 
4563   /* Connect the new blocks.  */
4564   find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4565   find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4566 
4567   if (!broken_loop)
4568     {
4569       gimple_seq phis;
4570 
4571       e = find_edge (cont_bb, l3_bb);
4572       ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4573 
4574       phis = phi_nodes (l3_bb);
4575       for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4576           {
4577             gimple *phi = gsi_stmt (gsi);
4578             SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4579                        PHI_ARG_DEF_FROM_EDGE (phi, e));
4580           }
4581       remove_edge (e);
4582 
4583       make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4584       e = find_edge (cont_bb, l1_bb);
4585       if (e == NULL)
4586           {
4587             e = BRANCH_EDGE (cont_bb);
4588             gcc_assert (single_succ (e->dest) == l1_bb);
4589           }
4590       if (gimple_omp_for_combined_p (fd->for_stmt))
4591           {
4592             remove_edge (e);
4593             e = NULL;
4594           }
4595       else if (fd->collapse > 1)
4596           {
4597             remove_edge (e);
4598             e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4599           }
4600       else
4601           e->flags = EDGE_TRUE_VALUE;
4602       if (e)
4603           {
4604             e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4605             find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4606           }
4607       else
4608           {
4609             e = find_edge (cont_bb, l2_bb);
4610             e->flags = EDGE_FALLTHRU;
4611           }
4612       make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4613 
4614       if (gimple_in_ssa_p (cfun))
4615           {
4616             /* Add phis to the outer loop that connect to the phis in the inner,
4617                original loop, and move the loop entry value of the inner phi to
4618                the loop entry value of the outer phi.  */
4619             gphi_iterator psi;
4620             for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4621               {
4622                 location_t locus;
4623                 gphi *nphi;
4624                 gphi *exit_phi = psi.phi ();
4625 
4626                 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4627                     continue;
4628 
4629                 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4630                 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4631 
4632                 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4633                 edge latch_to_l1 = find_edge (latch, l1_bb);
4634                 gphi *inner_phi
4635                     = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4636 
4637                 tree t = gimple_phi_result (exit_phi);
4638                 tree new_res = copy_ssa_name (t, NULL);
4639                 nphi = create_phi_node (new_res, l0_bb);
4640 
4641                 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4642                 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4643                 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4644                 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4645                 add_phi_arg (nphi, t, entry_to_l0, locus);
4646 
4647                 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4648                 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4649 
4650                 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4651               }
4652           }
4653 
4654       set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4655                                      recompute_dominator (CDI_DOMINATORS, l2_bb));
4656       set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4657                                      recompute_dominator (CDI_DOMINATORS, l3_bb));
4658       set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4659                                      recompute_dominator (CDI_DOMINATORS, l0_bb));
4660       set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4661                                      recompute_dominator (CDI_DOMINATORS, l1_bb));
4662 
4663       /* We enter expand_omp_for_generic with a loop.  This original loop may
4664            have its own loop struct, or it may be part of an outer loop struct
4665            (which may be the fake loop).  */
4666       class loop *outer_loop = entry_bb->loop_father;
4667       bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4668 
4669       add_bb_to_loop (l2_bb, outer_loop);
4670 
4671       /* We've added a new loop around the original loop.  Allocate the
4672            corresponding loop struct.  */
4673       class loop *new_loop = alloc_loop ();
4674       new_loop->header = l0_bb;
4675       new_loop->latch = l2_bb;
4676       add_loop (new_loop, outer_loop);
4677 
4678       /* Allocate a loop structure for the original loop unless we already
4679            had one.  */
4680       if (!orig_loop_has_loop_struct
4681             && !gimple_omp_for_combined_p (fd->for_stmt))
4682           {
4683             class loop *orig_loop = alloc_loop ();
4684             orig_loop->header = l1_bb;
4685             /* The loop may have multiple latches.  */
4686             add_loop (orig_loop, new_loop);
4687           }
4688     }
4689 }
4690 
4691 /* Helper function for expand_omp_for_static_nochunk.  If PTR is NULL,
4692    compute needed allocation size.  If !ALLOC of team allocations,
4693    if ALLOC of thread allocation.  SZ is the initial needed size for
4694    other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4695    CNT number of elements of each array, for !ALLOC this is
4696    omp_get_num_threads (), for ALLOC number of iterations handled by the
4697    current thread.  If PTR is non-NULL, it is the start of the allocation
4698    and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4699    clauses pointers to the corresponding arrays.  */
4700 
4701 static tree
expand_omp_scantemp_alloc(tree clauses,tree ptr,unsigned HOST_WIDE_INT sz,unsigned HOST_WIDE_INT alloc_align,tree cnt,gimple_stmt_iterator * gsi,bool alloc)4702 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4703                                  unsigned HOST_WIDE_INT alloc_align, tree cnt,
4704                                  gimple_stmt_iterator *gsi, bool alloc)
4705 {
4706   tree eltsz = NULL_TREE;
4707   unsigned HOST_WIDE_INT preval = 0;
4708   if (ptr && sz)
4709     ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4710                            ptr, size_int (sz));
4711   for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4712     if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4713           && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4714           && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4715       {
4716           tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4717           unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4718           if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4719             {
4720               unsigned HOST_WIDE_INT szl
4721                 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4722               szl = least_bit_hwi (szl);
4723               if (szl)
4724                 al = MIN (al, szl);
4725             }
4726           if (ptr == NULL_TREE)
4727             {
4728               if (eltsz == NULL_TREE)
4729                 eltsz = TYPE_SIZE_UNIT (pointee_type);
4730               else
4731                 eltsz = size_binop (PLUS_EXPR, eltsz,
4732                                           TYPE_SIZE_UNIT (pointee_type));
4733             }
4734           if (preval == 0 && al <= alloc_align)
4735             {
4736               unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4737               sz += diff;
4738               if (diff && ptr)
4739                 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4740                                          ptr, size_int (diff));
4741             }
4742           else if (al > preval)
4743             {
4744               if (ptr)
4745                 {
4746                     ptr = fold_convert (pointer_sized_int_node, ptr);
4747                     ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4748                                            build_int_cst (pointer_sized_int_node,
4749                                                               al - 1));
4750                     ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4751                                            build_int_cst (pointer_sized_int_node,
4752                                                               -(HOST_WIDE_INT) al));
4753                     ptr = fold_convert (ptr_type_node, ptr);
4754                 }
4755               else
4756                 sz += al - 1;
4757             }
4758           if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4759             preval = al;
4760           else
4761             preval = 1;
4762           if (ptr)
4763             {
4764               expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4765               ptr = OMP_CLAUSE_DECL (c);
4766               ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4767                                      size_binop (MULT_EXPR, cnt,
4768                                                      TYPE_SIZE_UNIT (pointee_type)));
4769             }
4770       }
4771 
4772   if (ptr == NULL_TREE)
4773     {
4774       eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4775       if (sz)
4776           eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4777       return eltsz;
4778     }
4779   else
4780     return ptr;
4781 }
4782 
4783 /* Return the last _looptemp_ clause if one has been created for
4784    lastprivate on distribute parallel for{, simd} or taskloop.
4785    FD is the loop data and INNERC should be the second _looptemp_
4786    clause (the one holding the end of the range).
4787    This is followed by collapse - 1 _looptemp_ clauses for the
4788    counts[1] and up, and for triangular loops followed by 4
4789    further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4790    one factor and one adjn1).  After this there is optionally one
4791    _looptemp_ clause that this function returns.  */
4792 
4793 static tree
find_lastprivate_looptemp(struct omp_for_data * fd,tree innerc)4794 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4795 {
4796   gcc_assert (innerc);
4797   int count = fd->collapse - 1;
4798   if (fd->non_rect
4799       && fd->last_nonrect == fd->first_nonrect + 1
4800       && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4801     count += 4;
4802   for (int i = 0; i < count; i++)
4803     {
4804       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4805                                         OMP_CLAUSE__LOOPTEMP_);
4806       gcc_assert (innerc);
4807     }
4808   return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4809                                 OMP_CLAUSE__LOOPTEMP_);
4810 }
4811 
4812 /* A subroutine of expand_omp_for.  Generate code for a parallel
4813    loop with static schedule and no specified chunk size.  Given
4814    parameters:
4815 
4816           for (V = N1; V cond N2; V += STEP) BODY;
4817 
4818    where COND is "<" or ">", we generate pseudocode
4819 
4820           if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4821           if (cond is <)
4822             adj = STEP - 1;
4823           else
4824             adj = STEP + 1;
4825           if ((__typeof (V)) -1 > 0 && cond is >)
4826             n = -(adj + N2 - N1) / -STEP;
4827           else
4828             n = (adj + N2 - N1) / STEP;
4829           q = n / nthreads;
4830           tt = n % nthreads;
4831           if (threadid < tt) goto L3; else goto L4;
4832     L3:
4833           tt = 0;
4834           q = q + 1;
4835     L4:
4836           s0 = q * threadid + tt;
4837           e0 = s0 + q;
4838           V = s0 * STEP + N1;
4839           if (s0 >= e0) goto L2; else goto L0;
4840     L0:
4841           e = e0 * STEP + N1;
4842     L1:
4843           BODY;
4844           V += STEP;
4845           if (V cond e) goto L1;
4846     L2:
4847 */
4848 
4849 static void
expand_omp_for_static_nochunk(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)4850 expand_omp_for_static_nochunk (struct omp_region *region,
4851                                      struct omp_for_data *fd,
4852                                      gimple *inner_stmt)
4853 {
4854   tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4855   tree type, itype, vmain, vback;
4856   basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4857   basic_block body_bb, cont_bb, collapse_bb = NULL;
4858   basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4859   basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4860   gimple_stmt_iterator gsi, gsip;
4861   edge ep;
4862   bool broken_loop = region->cont == NULL;
4863   tree *counts = NULL;
4864   tree n1, n2, step;
4865   tree reductions = NULL_TREE;
4866   tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4867 
4868   itype = type = TREE_TYPE (fd->loop.v);
4869   if (POINTER_TYPE_P (type))
4870     itype = signed_type_for (type);
4871 
4872   entry_bb = region->entry;
4873   cont_bb = region->cont;
4874   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4875   fin_bb = BRANCH_EDGE (entry_bb)->dest;
4876   gcc_assert (broken_loop
4877                 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4878   seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4879   body_bb = single_succ (seq_start_bb);
4880   if (!broken_loop)
4881     {
4882       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4883                       || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4884       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4885     }
4886   exit_bb = region->exit;
4887 
4888   /* Iteration space partitioning goes in ENTRY_BB.  */
4889   gsi = gsi_last_nondebug_bb (entry_bb);
4890   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4891   gsip = gsi;
4892   gsi_prev (&gsip);
4893 
4894   if (fd->collapse > 1)
4895     {
4896       int first_zero_iter = -1, dummy = -1;
4897       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4898 
4899       counts = XALLOCAVEC (tree, fd->collapse);
4900       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4901                                           fin_bb, first_zero_iter,
4902                                           dummy_bb, dummy, l2_dom_bb);
4903       t = NULL_TREE;
4904     }
4905   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4906     t = integer_one_node;
4907   else
4908     t = fold_binary (fd->loop.cond_code, boolean_type_node,
4909                          fold_convert (type, fd->loop.n1),
4910                          fold_convert (type, fd->loop.n2));
4911   if (fd->collapse == 1
4912       && TYPE_UNSIGNED (type)
4913       && (t == NULL_TREE || !integer_onep (t)))
4914     {
4915       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4916       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4917                                              true, GSI_SAME_STMT);
4918       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4919       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4920                                              true, GSI_SAME_STMT);
4921       gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
4922                                                             n1, n2);
4923       ep = split_block (entry_bb, cond_stmt);
4924       ep->flags = EDGE_TRUE_VALUE;
4925       entry_bb = ep->dest;
4926       ep->probability = profile_probability::very_likely ();
4927       ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4928       ep->probability = profile_probability::very_unlikely ();
4929       if (gimple_in_ssa_p (cfun))
4930           {
4931             int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4932             for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4933                  !gsi_end_p (gpi); gsi_next (&gpi))
4934               {
4935                 gphi *phi = gpi.phi ();
4936                 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4937                                  ep, UNKNOWN_LOCATION);
4938               }
4939           }
4940       gsi = gsi_last_bb (entry_bb);
4941     }
4942 
4943   if (fd->lastprivate_conditional)
4944     {
4945       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4946       tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4947       if (fd->have_pointer_condtemp)
4948           condtemp = OMP_CLAUSE_DECL (c);
4949       c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4950       cond_var = OMP_CLAUSE_DECL (c);
4951     }
4952   if (fd->have_reductemp
4953       /* For scan, we don't want to reinitialize condtemp before the
4954            second loop.  */
4955       || (fd->have_pointer_condtemp && !fd->have_scantemp)
4956       || fd->have_nonctrl_scantemp)
4957     {
4958       tree t1 = build_int_cst (long_integer_type_node, 0);
4959       tree t2 = build_int_cst (long_integer_type_node, 1);
4960       tree t3 = build_int_cstu (long_integer_type_node,
4961                                         (HOST_WIDE_INT_1U << 31) + 1);
4962       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4963       gimple_stmt_iterator gsi2 = gsi_none ();
4964       gimple *g = NULL;
4965       tree mem = null_pointer_node, memv = NULL_TREE;
4966       unsigned HOST_WIDE_INT condtemp_sz = 0;
4967       unsigned HOST_WIDE_INT alloc_align = 0;
4968       if (fd->have_reductemp)
4969           {
4970             gcc_assert (!fd->have_nonctrl_scantemp);
4971             tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4972             reductions = OMP_CLAUSE_DECL (c);
4973             gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4974             g = SSA_NAME_DEF_STMT (reductions);
4975             reductions = gimple_assign_rhs1 (g);
4976             OMP_CLAUSE_DECL (c) = reductions;
4977             gsi2 = gsi_for_stmt (g);
4978           }
4979       else
4980           {
4981             if (gsi_end_p (gsip))
4982               gsi2 = gsi_after_labels (region->entry);
4983             else
4984               gsi2 = gsip;
4985             reductions = null_pointer_node;
4986           }
4987       if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4988           {
4989             tree type;
4990             if (fd->have_pointer_condtemp)
4991               type = TREE_TYPE (condtemp);
4992             else
4993               type = ptr_type_node;
4994             memv = create_tmp_var (type);
4995             TREE_ADDRESSABLE (memv) = 1;
4996             unsigned HOST_WIDE_INT sz = 0;
4997             tree size = NULL_TREE;
4998             if (fd->have_pointer_condtemp)
4999               {
5000                 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5001                 sz *= fd->lastprivate_conditional;
5002                 condtemp_sz = sz;
5003               }
5004             if (fd->have_nonctrl_scantemp)
5005               {
5006                 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5007                 gimple *g = gimple_build_call (nthreads, 0);
5008                 nthreads = create_tmp_var (integer_type_node);
5009                 gimple_call_set_lhs (g, nthreads);
5010                 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5011                 nthreads = fold_convert (sizetype, nthreads);
5012                 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
5013                 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
5014                                                             alloc_align, nthreads, NULL,
5015                                                             false);
5016                 size = fold_convert (type, size);
5017               }
5018             else
5019               size = build_int_cst (type, sz);
5020             expand_omp_build_assign (&gsi2, memv, size, false);
5021             mem = build_fold_addr_expr (memv);
5022           }
5023       tree t
5024           = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5025                                  9, t1, t2, t2, t3, t1, null_pointer_node,
5026                                  null_pointer_node, reductions, mem);
5027       force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5028                                         true, GSI_SAME_STMT);
5029       if (fd->have_pointer_condtemp)
5030           expand_omp_build_assign (&gsi2, condtemp, memv, false);
5031       if (fd->have_nonctrl_scantemp)
5032           {
5033             tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
5034             expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
5035                                              alloc_align, nthreads, &gsi2, false);
5036           }
5037       if (fd->have_reductemp)
5038           {
5039             gsi_remove (&gsi2, true);
5040             release_ssa_name (gimple_assign_lhs (g));
5041           }
5042     }
5043   switch (gimple_omp_for_kind (fd->for_stmt))
5044     {
5045     case GF_OMP_FOR_KIND_FOR:
5046       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5047       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5048       break;
5049     case GF_OMP_FOR_KIND_DISTRIBUTE:
5050       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5051       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5052       break;
5053     default:
5054       gcc_unreachable ();
5055     }
5056   nthreads = build_call_expr (nthreads, 0);
5057   nthreads = fold_convert (itype, nthreads);
5058   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5059                                                true, GSI_SAME_STMT);
5060   threadid = build_call_expr (threadid, 0);
5061   threadid = fold_convert (itype, threadid);
5062   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5063                                                true, GSI_SAME_STMT);
5064 
5065   n1 = fd->loop.n1;
5066   n2 = fd->loop.n2;
5067   step = fd->loop.step;
5068   if (gimple_omp_for_combined_into_p (fd->for_stmt))
5069     {
5070       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5071                                              OMP_CLAUSE__LOOPTEMP_);
5072       gcc_assert (innerc);
5073       n1 = OMP_CLAUSE_DECL (innerc);
5074       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5075                                         OMP_CLAUSE__LOOPTEMP_);
5076       gcc_assert (innerc);
5077       n2 = OMP_CLAUSE_DECL (innerc);
5078     }
5079   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5080                                          true, NULL_TREE, true, GSI_SAME_STMT);
5081   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5082                                          true, NULL_TREE, true, GSI_SAME_STMT);
5083   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5084                                            true, NULL_TREE, true, GSI_SAME_STMT);
5085 
5086   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5087   t = fold_build2 (PLUS_EXPR, itype, step, t);
5088   t = fold_build2 (PLUS_EXPR, itype, t, n2);
5089   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5090   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5091     t = fold_build2 (TRUNC_DIV_EXPR, itype,
5092                          fold_build1 (NEGATE_EXPR, itype, t),
5093                          fold_build1 (NEGATE_EXPR, itype, step));
5094   else
5095     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5096   t = fold_convert (itype, t);
5097   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5098 
5099   q = create_tmp_reg (itype, "q");
5100   t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5101   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5102   gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5103 
5104   tt = create_tmp_reg (itype, "tt");
5105   t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5106   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5107   gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5108 
5109   t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5110   gcond *cond_stmt = gimple_build_cond_empty (t);
5111   gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5112 
5113   second_bb = split_block (entry_bb, cond_stmt)->dest;
5114   gsi = gsi_last_nondebug_bb (second_bb);
5115   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5116 
5117   gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5118                          GSI_SAME_STMT);
5119   gassign *assign_stmt
5120     = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5121   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5122 
5123   third_bb = split_block (second_bb, assign_stmt)->dest;
5124   gsi = gsi_last_nondebug_bb (third_bb);
5125   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5126 
5127   if (fd->have_nonctrl_scantemp)
5128     {
5129       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5130       tree controlp = NULL_TREE, controlb = NULL_TREE;
5131       for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5132           if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5133               && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5134             {
5135               if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5136                 controlb = OMP_CLAUSE_DECL (c);
5137               else
5138                 controlp = OMP_CLAUSE_DECL (c);
5139               if (controlb && controlp)
5140                 break;
5141             }
5142       gcc_assert (controlp && controlb);
5143       tree cnt = create_tmp_var (sizetype);
5144       gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5145       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5146       unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5147       tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5148                                                      alloc_align, cnt, NULL, true);
5149       tree size = create_tmp_var (sizetype);
5150       expand_omp_build_assign (&gsi, size, sz, false);
5151       tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5152                                     size, size_int (16384));
5153       expand_omp_build_assign (&gsi, controlb, cmp);
5154       g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5155                                    NULL_TREE, NULL_TREE);
5156       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5157       fourth_bb = split_block (third_bb, g)->dest;
5158       gsi = gsi_last_nondebug_bb (fourth_bb);
5159       /* FIXME: Once we have allocators, this should use allocator.  */
5160       g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5161       gimple_call_set_lhs (g, controlp);
5162       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5163       expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5164                                          &gsi, true);
5165       gsi_prev (&gsi);
5166       g = gsi_stmt (gsi);
5167       fifth_bb = split_block (fourth_bb, g)->dest;
5168       gsi = gsi_last_nondebug_bb (fifth_bb);
5169 
5170       g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5171       gimple_call_set_lhs (g, controlp);
5172       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5173       tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5174       for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5175           if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5176               && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5177             {
5178               tree tmp = create_tmp_var (sizetype);
5179               tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5180               g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5181                                              TYPE_SIZE_UNIT (pointee_type));
5182               gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5183               g = gimple_build_call (alloca_decl, 2, tmp,
5184                                            size_int (TYPE_ALIGN (pointee_type)));
5185               gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5186               gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5187             }
5188 
5189       sixth_bb = split_block (fifth_bb, g)->dest;
5190       gsi = gsi_last_nondebug_bb (sixth_bb);
5191     }
5192 
5193   t = build2 (MULT_EXPR, itype, q, threadid);
5194   t = build2 (PLUS_EXPR, itype, t, tt);
5195   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5196 
5197   t = fold_build2 (PLUS_EXPR, itype, s0, q);
5198   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5199 
5200   t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5201   gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5202 
5203   /* Remove the GIMPLE_OMP_FOR statement.  */
5204   gsi_remove (&gsi, true);
5205 
5206   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
5207   gsi = gsi_start_bb (seq_start_bb);
5208 
5209   tree startvar = fd->loop.v;
5210   tree endvar = NULL_TREE;
5211 
5212   if (gimple_omp_for_combined_p (fd->for_stmt))
5213     {
5214       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5215                          ? gimple_omp_parallel_clauses (inner_stmt)
5216                          : gimple_omp_for_clauses (inner_stmt);
5217       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5218       gcc_assert (innerc);
5219       startvar = OMP_CLAUSE_DECL (innerc);
5220       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5221                                         OMP_CLAUSE__LOOPTEMP_);
5222       gcc_assert (innerc);
5223       endvar = OMP_CLAUSE_DECL (innerc);
5224       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5225             && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5226           {
5227             innerc = find_lastprivate_looptemp (fd, innerc);
5228             if (innerc)
5229               {
5230                 /* If needed (distribute parallel for with lastprivate),
5231                      propagate down the total number of iterations.  */
5232                 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5233                                              fd->loop.n2);
5234                 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5235                                                       GSI_CONTINUE_LINKING);
5236                 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5237                 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5238               }
5239           }
5240     }
5241   t = fold_convert (itype, s0);
5242   t = fold_build2 (MULT_EXPR, itype, t, step);
5243   if (POINTER_TYPE_P (type))
5244     {
5245       t = fold_build_pointer_plus (n1, t);
5246       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5247             && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5248           t = fold_convert (signed_type_for (type), t);
5249     }
5250   else
5251     t = fold_build2 (PLUS_EXPR, type, t, n1);
5252   t = fold_convert (TREE_TYPE (startvar), t);
5253   t = force_gimple_operand_gsi (&gsi, t,
5254                                         DECL_P (startvar)
5255                                         && TREE_ADDRESSABLE (startvar),
5256                                         NULL_TREE, false, GSI_CONTINUE_LINKING);
5257   assign_stmt = gimple_build_assign (startvar, t);
5258   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5259   if (cond_var)
5260     {
5261       tree itype = TREE_TYPE (cond_var);
5262       /* For lastprivate(conditional:) itervar, we need some iteration
5263            counter that starts at unsigned non-zero and increases.
5264            Prefer as few IVs as possible, so if we can use startvar
5265            itself, use that, or startvar + constant (those would be
5266            incremented with step), and as last resort use the s0 + 1
5267            incremented by 1.  */
5268       if (POINTER_TYPE_P (type)
5269             || TREE_CODE (n1) != INTEGER_CST
5270             || fd->loop.cond_code != LT_EXPR)
5271           t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5272                                build_int_cst (itype, 1));
5273       else if (tree_int_cst_sgn (n1) == 1)
5274           t = fold_convert (itype, t);
5275       else
5276           {
5277             tree c = fold_convert (itype, n1);
5278             c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5279             t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5280           }
5281       t = force_gimple_operand_gsi (&gsi, t, false,
5282                                             NULL_TREE, false, GSI_CONTINUE_LINKING);
5283       assign_stmt = gimple_build_assign (cond_var, t);
5284       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5285     }
5286 
5287   t = fold_convert (itype, e0);
5288   t = fold_build2 (MULT_EXPR, itype, t, step);
5289   if (POINTER_TYPE_P (type))
5290     {
5291       t = fold_build_pointer_plus (n1, t);
5292       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5293             && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5294           t = fold_convert (signed_type_for (type), t);
5295     }
5296   else
5297     t = fold_build2 (PLUS_EXPR, type, t, n1);
5298   t = fold_convert (TREE_TYPE (startvar), t);
5299   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5300                                         false, GSI_CONTINUE_LINKING);
5301   if (endvar)
5302     {
5303       assign_stmt = gimple_build_assign (endvar, e);
5304       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5305       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5306           assign_stmt = gimple_build_assign (fd->loop.v, e);
5307       else
5308           assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5309       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5310     }
5311   /* Handle linear clause adjustments.  */
5312   tree itercnt = NULL_TREE;
5313   tree *nonrect_bounds = NULL;
5314   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5315     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5316            c; c = OMP_CLAUSE_CHAIN (c))
5317       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5318             && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5319           {
5320             tree d = OMP_CLAUSE_DECL (c);
5321             tree t = d, a, dest;
5322             if (omp_privatize_by_reference (t))
5323               t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5324             if (itercnt == NULL_TREE)
5325               {
5326                 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5327                     {
5328                       itercnt = fold_build2 (MINUS_EXPR, itype,
5329                                                    fold_convert (itype, n1),
5330                                                    fold_convert (itype, fd->loop.n1));
5331                       itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5332                       itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5333                       itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5334                                                                   NULL_TREE, false,
5335                                                                   GSI_CONTINUE_LINKING);
5336                     }
5337                 else
5338                     itercnt = s0;
5339               }
5340             tree type = TREE_TYPE (t);
5341             if (POINTER_TYPE_P (type))
5342               type = sizetype;
5343             a = fold_build2 (MULT_EXPR, type,
5344                                  fold_convert (type, itercnt),
5345                                  fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5346             dest = unshare_expr (t);
5347             t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5348                                  : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5349             t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5350                                                   false, GSI_CONTINUE_LINKING);
5351             expand_omp_build_assign (&gsi, dest, t, true);
5352           }
5353   if (fd->collapse > 1)
5354     {
5355       if (fd->non_rect)
5356           {
5357             nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5358             memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5359           }
5360       expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5361                                         startvar);
5362     }
5363 
5364   if (!broken_loop)
5365     {
5366       /* The code controlling the sequential loop replaces the
5367            GIMPLE_OMP_CONTINUE.  */
5368       gsi = gsi_last_nondebug_bb (cont_bb);
5369       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5370       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5371       vmain = gimple_omp_continue_control_use (cont_stmt);
5372       vback = gimple_omp_continue_control_def (cont_stmt);
5373 
5374       if (cond_var)
5375           {
5376             tree itype = TREE_TYPE (cond_var);
5377             tree t2;
5378             if (POINTER_TYPE_P (type)
5379                 || TREE_CODE (n1) != INTEGER_CST
5380                 || fd->loop.cond_code != LT_EXPR)
5381               t2 = build_int_cst (itype, 1);
5382             else
5383               t2 = fold_convert (itype, step);
5384             t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5385             t2 = force_gimple_operand_gsi (&gsi, t2, false,
5386                                                    NULL_TREE, true, GSI_SAME_STMT);
5387             assign_stmt = gimple_build_assign (cond_var, t2);
5388             gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5389           }
5390 
5391       if (!gimple_omp_for_combined_p (fd->for_stmt))
5392           {
5393             if (POINTER_TYPE_P (type))
5394               t = fold_build_pointer_plus (vmain, step);
5395             else
5396               t = fold_build2 (PLUS_EXPR, type, vmain, step);
5397             t = force_gimple_operand_gsi (&gsi, t,
5398                                                   DECL_P (vback)
5399                                                   && TREE_ADDRESSABLE (vback),
5400                                                   NULL_TREE, true, GSI_SAME_STMT);
5401             assign_stmt = gimple_build_assign (vback, t);
5402             gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5403 
5404             t = build2 (fd->loop.cond_code, boolean_type_node,
5405                           DECL_P (vback) && TREE_ADDRESSABLE (vback)
5406                           ? t : vback, e);
5407             gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5408           }
5409 
5410       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
5411       gsi_remove (&gsi, true);
5412 
5413       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5414           collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5415                                                                cont_bb, body_bb);
5416     }
5417 
5418   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
5419   gsi = gsi_last_nondebug_bb (exit_bb);
5420   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5421     {
5422       t = gimple_omp_return_lhs (gsi_stmt (gsi));
5423       if (fd->have_reductemp
5424             || ((fd->have_pointer_condtemp || fd->have_scantemp)
5425                 && !fd->have_nonctrl_scantemp))
5426           {
5427             tree fn;
5428             if (t)
5429               fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5430             else
5431               fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5432             gcall *g = gimple_build_call (fn, 0);
5433             if (t)
5434               {
5435                 gimple_call_set_lhs (g, t);
5436                 if (fd->have_reductemp)
5437                     gsi_insert_after (&gsi, gimple_build_assign (reductions,
5438                                                                            NOP_EXPR, t),
5439                                           GSI_SAME_STMT);
5440               }
5441             gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5442           }
5443       else
5444           gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5445     }
5446   else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5447              && !fd->have_nonctrl_scantemp)
5448     {
5449       tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5450       gcall *g = gimple_build_call (fn, 0);
5451       gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5452     }
5453   if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5454     {
5455       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5456       tree controlp = NULL_TREE, controlb = NULL_TREE;
5457       for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5458           if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5459               && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5460             {
5461               if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5462                 controlb = OMP_CLAUSE_DECL (c);
5463               else
5464                 controlp = OMP_CLAUSE_DECL (c);
5465               if (controlb && controlp)
5466                 break;
5467             }
5468       gcc_assert (controlp && controlb);
5469       gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5470                                              NULL_TREE, NULL_TREE);
5471       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5472       exit1_bb = split_block (exit_bb, g)->dest;
5473       gsi = gsi_after_labels (exit1_bb);
5474       g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5475                                    controlp);
5476       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5477       exit2_bb = split_block (exit1_bb, g)->dest;
5478       gsi = gsi_after_labels (exit2_bb);
5479       g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5480                                    controlp);
5481       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5482       exit3_bb = split_block (exit2_bb, g)->dest;
5483       gsi = gsi_after_labels (exit3_bb);
5484     }
5485   gsi_remove (&gsi, true);
5486 
5487   /* Connect all the blocks.  */
5488   ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5489   ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5490   ep = find_edge (entry_bb, second_bb);
5491   ep->flags = EDGE_TRUE_VALUE;
5492   ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5493   if (fourth_bb)
5494     {
5495       ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5496       ep->probability
5497           = profile_probability::guessed_always ().apply_scale (1, 2);
5498       ep = find_edge (third_bb, fourth_bb);
5499       ep->flags = EDGE_TRUE_VALUE;
5500       ep->probability
5501           = profile_probability::guessed_always ().apply_scale (1, 2);
5502       ep = find_edge (fourth_bb, fifth_bb);
5503       redirect_edge_and_branch (ep, sixth_bb);
5504     }
5505   else
5506     sixth_bb = third_bb;
5507   find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5508   find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5509   if (exit1_bb)
5510     {
5511       ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5512       ep->probability
5513           = profile_probability::guessed_always ().apply_scale (1, 2);
5514       ep = find_edge (exit_bb, exit1_bb);
5515       ep->flags = EDGE_TRUE_VALUE;
5516       ep->probability
5517           = profile_probability::guessed_always ().apply_scale (1, 2);
5518       ep = find_edge (exit1_bb, exit2_bb);
5519       redirect_edge_and_branch (ep, exit3_bb);
5520     }
5521 
5522   if (!broken_loop)
5523     {
5524       ep = find_edge (cont_bb, body_bb);
5525       if (ep == NULL)
5526           {
5527             ep = BRANCH_EDGE (cont_bb);
5528             gcc_assert (single_succ (ep->dest) == body_bb);
5529           }
5530       if (gimple_omp_for_combined_p (fd->for_stmt))
5531           {
5532             remove_edge (ep);
5533             ep = NULL;
5534           }
5535       else if (fd->collapse > 1)
5536           {
5537             remove_edge (ep);
5538             ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5539           }
5540       else
5541           ep->flags = EDGE_TRUE_VALUE;
5542       find_edge (cont_bb, fin_bb)->flags
5543           = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5544     }
5545 
5546   set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5547   set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5548   if (fourth_bb)
5549     {
5550       set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5551       set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5552     }
5553   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5554 
5555   set_immediate_dominator (CDI_DOMINATORS, body_bb,
5556                                  recompute_dominator (CDI_DOMINATORS, body_bb));
5557   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5558                                  recompute_dominator (CDI_DOMINATORS, fin_bb));
5559   if (exit1_bb)
5560     {
5561       set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5562       set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5563     }
5564 
5565   class loop *loop = body_bb->loop_father;
5566   if (loop != entry_bb->loop_father)
5567     {
5568       gcc_assert (broken_loop || loop->header == body_bb);
5569       gcc_assert (broken_loop
5570                       || loop->latch == region->cont
5571                       || single_pred (loop->latch) == region->cont);
5572       return;
5573     }
5574 
5575   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5576     {
5577       loop = alloc_loop ();
5578       loop->header = body_bb;
5579       if (collapse_bb == NULL)
5580           loop->latch = cont_bb;
5581       add_loop (loop, body_bb->loop_father);
5582     }
5583 }
5584 
5585 /* Return phi in E->DEST with ARG on edge E.  */
5586 
5587 static gphi *
find_phi_with_arg_on_edge(tree arg,edge e)5588 find_phi_with_arg_on_edge (tree arg, edge e)
5589 {
5590   basic_block bb = e->dest;
5591 
5592   for (gphi_iterator gpi = gsi_start_phis (bb);
5593        !gsi_end_p (gpi);
5594        gsi_next (&gpi))
5595     {
5596       gphi *phi = gpi.phi ();
5597       if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5598           return phi;
5599     }
5600 
5601   return NULL;
5602 }
5603 
5604 /* A subroutine of expand_omp_for.  Generate code for a parallel
5605    loop with static schedule and a specified chunk size.  Given
5606    parameters:
5607 
5608           for (V = N1; V cond N2; V += STEP) BODY;
5609 
5610    where COND is "<" or ">", we generate pseudocode
5611 
5612           if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5613           if (cond is <)
5614             adj = STEP - 1;
5615           else
5616             adj = STEP + 1;
5617           if ((__typeof (V)) -1 > 0 && cond is >)
5618             n = -(adj + N2 - N1) / -STEP;
5619           else
5620             n = (adj + N2 - N1) / STEP;
5621           trip = 0;
5622           V = threadid * CHUNK * STEP + N1;  -- this extra definition of V is
5623                                                         here so that V is defined
5624                                                         if the loop is not entered
5625     L0:
5626           s0 = (trip * nthreads + threadid) * CHUNK;
5627           e0 = min (s0 + CHUNK, n);
5628           if (s0 < n) goto L1; else goto L4;
5629     L1:
5630           V = s0 * STEP + N1;
5631           e = e0 * STEP + N1;
5632     L2:
5633           BODY;
5634           V += STEP;
5635           if (V cond e) goto L2; else goto L3;
5636     L3:
5637           trip += 1;
5638           goto L0;
5639     L4:
5640 */
5641 
5642 static void
expand_omp_for_static_chunk(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)5643 expand_omp_for_static_chunk (struct omp_region *region,
5644                                    struct omp_for_data *fd, gimple *inner_stmt)
5645 {
5646   tree n, s0, e0, e, t;
5647   tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5648   tree type, itype, vmain, vback, vextra;
5649   basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5650   basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5651   gimple_stmt_iterator gsi, gsip;
5652   edge se;
5653   bool broken_loop = region->cont == NULL;
5654   tree *counts = NULL;
5655   tree n1, n2, step;
5656   tree reductions = NULL_TREE;
5657   tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5658 
5659   itype = type = TREE_TYPE (fd->loop.v);
5660   if (POINTER_TYPE_P (type))
5661     itype = signed_type_for (type);
5662 
5663   entry_bb = region->entry;
5664   se = split_block (entry_bb, last_stmt (entry_bb));
5665   entry_bb = se->src;
5666   iter_part_bb = se->dest;
5667   cont_bb = region->cont;
5668   gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5669   fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5670   gcc_assert (broken_loop
5671                 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5672   seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5673   body_bb = single_succ (seq_start_bb);
5674   if (!broken_loop)
5675     {
5676       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5677                       || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5678       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5679       trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5680     }
5681   exit_bb = region->exit;
5682 
5683   /* Trip and adjustment setup goes in ENTRY_BB.  */
5684   gsi = gsi_last_nondebug_bb (entry_bb);
5685   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5686   gsip = gsi;
5687   gsi_prev (&gsip);
5688 
5689   if (fd->collapse > 1)
5690     {
5691       int first_zero_iter = -1, dummy = -1;
5692       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5693 
5694       counts = XALLOCAVEC (tree, fd->collapse);
5695       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5696                                           fin_bb, first_zero_iter,
5697                                           dummy_bb, dummy, l2_dom_bb);
5698       t = NULL_TREE;
5699     }
5700   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5701     t = integer_one_node;
5702   else
5703     t = fold_binary (fd->loop.cond_code, boolean_type_node,
5704                          fold_convert (type, fd->loop.n1),
5705                          fold_convert (type, fd->loop.n2));
5706   if (fd->collapse == 1
5707       && TYPE_UNSIGNED (type)
5708       && (t == NULL_TREE || !integer_onep (t)))
5709     {
5710       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5711       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5712                                              true, GSI_SAME_STMT);
5713       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5714       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5715                                              true, GSI_SAME_STMT);
5716       gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5717                                                             n1, n2);
5718       se = split_block (entry_bb, cond_stmt);
5719       se->flags = EDGE_TRUE_VALUE;
5720       entry_bb = se->dest;
5721       se->probability = profile_probability::very_likely ();
5722       se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5723       se->probability = profile_probability::very_unlikely ();
5724       if (gimple_in_ssa_p (cfun))
5725           {
5726             int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5727             for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5728                  !gsi_end_p (gpi); gsi_next (&gpi))
5729               {
5730                 gphi *phi = gpi.phi ();
5731                 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5732                                  se, UNKNOWN_LOCATION);
5733               }
5734           }
5735       gsi = gsi_last_bb (entry_bb);
5736     }
5737 
5738   if (fd->lastprivate_conditional)
5739     {
5740       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5741       tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5742       if (fd->have_pointer_condtemp)
5743           condtemp = OMP_CLAUSE_DECL (c);
5744       c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5745       cond_var = OMP_CLAUSE_DECL (c);
5746     }
5747   if (fd->have_reductemp || fd->have_pointer_condtemp)
5748     {
5749       tree t1 = build_int_cst (long_integer_type_node, 0);
5750       tree t2 = build_int_cst (long_integer_type_node, 1);
5751       tree t3 = build_int_cstu (long_integer_type_node,
5752                                         (HOST_WIDE_INT_1U << 31) + 1);
5753       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5754       gimple_stmt_iterator gsi2 = gsi_none ();
5755       gimple *g = NULL;
5756       tree mem = null_pointer_node, memv = NULL_TREE;
5757       if (fd->have_reductemp)
5758           {
5759             tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5760             reductions = OMP_CLAUSE_DECL (c);
5761             gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5762             g = SSA_NAME_DEF_STMT (reductions);
5763             reductions = gimple_assign_rhs1 (g);
5764             OMP_CLAUSE_DECL (c) = reductions;
5765             gsi2 = gsi_for_stmt (g);
5766           }
5767       else
5768           {
5769             if (gsi_end_p (gsip))
5770               gsi2 = gsi_after_labels (region->entry);
5771             else
5772               gsi2 = gsip;
5773             reductions = null_pointer_node;
5774           }
5775       if (fd->have_pointer_condtemp)
5776           {
5777             tree type = TREE_TYPE (condtemp);
5778             memv = create_tmp_var (type);
5779             TREE_ADDRESSABLE (memv) = 1;
5780             unsigned HOST_WIDE_INT sz
5781               = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5782             sz *= fd->lastprivate_conditional;
5783             expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5784                                            false);
5785             mem = build_fold_addr_expr (memv);
5786           }
5787       tree t
5788           = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5789                                  9, t1, t2, t2, t3, t1, null_pointer_node,
5790                                  null_pointer_node, reductions, mem);
5791       force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5792                                         true, GSI_SAME_STMT);
5793       if (fd->have_pointer_condtemp)
5794           expand_omp_build_assign (&gsi2, condtemp, memv, false);
5795       if (fd->have_reductemp)
5796           {
5797             gsi_remove (&gsi2, true);
5798             release_ssa_name (gimple_assign_lhs (g));
5799           }
5800     }
5801   switch (gimple_omp_for_kind (fd->for_stmt))
5802     {
5803     case GF_OMP_FOR_KIND_FOR:
5804       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5805       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5806       break;
5807     case GF_OMP_FOR_KIND_DISTRIBUTE:
5808       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5809       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5810       break;
5811     default:
5812       gcc_unreachable ();
5813     }
5814   nthreads = build_call_expr (nthreads, 0);
5815   nthreads = fold_convert (itype, nthreads);
5816   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5817                                                true, GSI_SAME_STMT);
5818   threadid = build_call_expr (threadid, 0);
5819   threadid = fold_convert (itype, threadid);
5820   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5821                                                true, GSI_SAME_STMT);
5822 
5823   n1 = fd->loop.n1;
5824   n2 = fd->loop.n2;
5825   step = fd->loop.step;
5826   if (gimple_omp_for_combined_into_p (fd->for_stmt))
5827     {
5828       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5829                                              OMP_CLAUSE__LOOPTEMP_);
5830       gcc_assert (innerc);
5831       n1 = OMP_CLAUSE_DECL (innerc);
5832       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5833                                         OMP_CLAUSE__LOOPTEMP_);
5834       gcc_assert (innerc);
5835       n2 = OMP_CLAUSE_DECL (innerc);
5836     }
5837   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5838                                          true, NULL_TREE, true, GSI_SAME_STMT);
5839   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5840                                          true, NULL_TREE, true, GSI_SAME_STMT);
5841   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5842                                            true, NULL_TREE, true, GSI_SAME_STMT);
5843   tree chunk_size = fold_convert (itype, fd->chunk_size);
5844   chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5845   chunk_size
5846     = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5847                                         GSI_SAME_STMT);
5848 
5849   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5850   t = fold_build2 (PLUS_EXPR, itype, step, t);
5851   t = fold_build2 (PLUS_EXPR, itype, t, n2);
5852   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5853   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5854     t = fold_build2 (TRUNC_DIV_EXPR, itype,
5855                          fold_build1 (NEGATE_EXPR, itype, t),
5856                          fold_build1 (NEGATE_EXPR, itype, step));
5857   else
5858     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5859   t = fold_convert (itype, t);
5860   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5861                                         true, GSI_SAME_STMT);
5862 
5863   trip_var = create_tmp_reg (itype, ".trip");
5864   if (gimple_in_ssa_p (cfun))
5865     {
5866       trip_init = make_ssa_name (trip_var);
5867       trip_main = make_ssa_name (trip_var);
5868       trip_back = make_ssa_name (trip_var);
5869     }
5870   else
5871     {
5872       trip_init = trip_var;
5873       trip_main = trip_var;
5874       trip_back = trip_var;
5875     }
5876 
5877   gassign *assign_stmt
5878     = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5879   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5880 
5881   t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5882   t = fold_build2 (MULT_EXPR, itype, t, step);
5883   if (POINTER_TYPE_P (type))
5884     t = fold_build_pointer_plus (n1, t);
5885   else
5886     t = fold_build2 (PLUS_EXPR, type, t, n1);
5887   vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5888                                              true, GSI_SAME_STMT);
5889 
5890   /* Remove the GIMPLE_OMP_FOR.  */
5891   gsi_remove (&gsi, true);
5892 
5893   gimple_stmt_iterator gsif = gsi;
5894 
5895   /* Iteration space partitioning goes in ITER_PART_BB.  */
5896   gsi = gsi_last_bb (iter_part_bb);
5897 
5898   t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5899   t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5900   t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5901   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5902                                          false, GSI_CONTINUE_LINKING);
5903 
5904   t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5905   t = fold_build2 (MIN_EXPR, itype, t, n);
5906   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5907                                          false, GSI_CONTINUE_LINKING);
5908 
5909   t = build2 (LT_EXPR, boolean_type_node, s0, n);
5910   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5911 
5912   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
5913   gsi = gsi_start_bb (seq_start_bb);
5914 
5915   tree startvar = fd->loop.v;
5916   tree endvar = NULL_TREE;
5917 
5918   if (gimple_omp_for_combined_p (fd->for_stmt))
5919     {
5920       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5921                          ? gimple_omp_parallel_clauses (inner_stmt)
5922                          : gimple_omp_for_clauses (inner_stmt);
5923       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5924       gcc_assert (innerc);
5925       startvar = OMP_CLAUSE_DECL (innerc);
5926       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5927                                         OMP_CLAUSE__LOOPTEMP_);
5928       gcc_assert (innerc);
5929       endvar = OMP_CLAUSE_DECL (innerc);
5930       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5931             && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5932           {
5933             innerc = find_lastprivate_looptemp (fd, innerc);
5934             if (innerc)
5935               {
5936                 /* If needed (distribute parallel for with lastprivate),
5937                      propagate down the total number of iterations.  */
5938                 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5939                                              fd->loop.n2);
5940                 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5941                                                       GSI_CONTINUE_LINKING);
5942                 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5943                 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5944               }
5945           }
5946     }
5947 
5948   t = fold_convert (itype, s0);
5949   t = fold_build2 (MULT_EXPR, itype, t, step);
5950   if (POINTER_TYPE_P (type))
5951     {
5952       t = fold_build_pointer_plus (n1, t);
5953       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5954             && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5955           t = fold_convert (signed_type_for (type), t);
5956     }
5957   else
5958     t = fold_build2 (PLUS_EXPR, type, t, n1);
5959   t = fold_convert (TREE_TYPE (startvar), t);
5960   t = force_gimple_operand_gsi (&gsi, t,
5961                                         DECL_P (startvar)
5962                                         && TREE_ADDRESSABLE (startvar),
5963                                         NULL_TREE, false, GSI_CONTINUE_LINKING);
5964   assign_stmt = gimple_build_assign (startvar, t);
5965   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5966   if (cond_var)
5967     {
5968       tree itype = TREE_TYPE (cond_var);
5969       /* For lastprivate(conditional:) itervar, we need some iteration
5970            counter that starts at unsigned non-zero and increases.
5971            Prefer as few IVs as possible, so if we can use startvar
5972            itself, use that, or startvar + constant (those would be
5973            incremented with step), and as last resort use the s0 + 1
5974            incremented by 1.  */
5975       if (POINTER_TYPE_P (type)
5976             || TREE_CODE (n1) != INTEGER_CST
5977             || fd->loop.cond_code != LT_EXPR)
5978           t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5979                                build_int_cst (itype, 1));
5980       else if (tree_int_cst_sgn (n1) == 1)
5981           t = fold_convert (itype, t);
5982       else
5983           {
5984             tree c = fold_convert (itype, n1);
5985             c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5986             t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5987           }
5988       t = force_gimple_operand_gsi (&gsi, t, false,
5989                                             NULL_TREE, false, GSI_CONTINUE_LINKING);
5990       assign_stmt = gimple_build_assign (cond_var, t);
5991       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5992     }
5993 
5994   t = fold_convert (itype, e0);
5995   t = fold_build2 (MULT_EXPR, itype, t, step);
5996   if (POINTER_TYPE_P (type))
5997     {
5998       t = fold_build_pointer_plus (n1, t);
5999       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6000             && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6001           t = fold_convert (signed_type_for (type), t);
6002     }
6003   else
6004     t = fold_build2 (PLUS_EXPR, type, t, n1);
6005   t = fold_convert (TREE_TYPE (startvar), t);
6006   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6007                                         false, GSI_CONTINUE_LINKING);
6008   if (endvar)
6009     {
6010       assign_stmt = gimple_build_assign (endvar, e);
6011       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6012       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6013           assign_stmt = gimple_build_assign (fd->loop.v, e);
6014       else
6015           assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6016       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6017     }
6018   /* Handle linear clause adjustments.  */
6019   tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
6020   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
6021     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
6022            c; c = OMP_CLAUSE_CHAIN (c))
6023       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
6024             && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
6025           {
6026             tree d = OMP_CLAUSE_DECL (c);
6027             tree t = d, a, dest;
6028             if (omp_privatize_by_reference (t))
6029               t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
6030             tree type = TREE_TYPE (t);
6031             if (POINTER_TYPE_P (type))
6032               type = sizetype;
6033             dest = unshare_expr (t);
6034             tree v = create_tmp_var (TREE_TYPE (t), NULL);
6035             expand_omp_build_assign (&gsif, v, t);
6036             if (itercnt == NULL_TREE)
6037               {
6038                 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6039                     {
6040                       itercntbias
6041                         = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
6042                                            fold_convert (itype, fd->loop.n1));
6043                       itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
6044                                                        itercntbias, step);
6045                       itercntbias
6046                         = force_gimple_operand_gsi (&gsif, itercntbias, true,
6047                                                             NULL_TREE, true,
6048                                                             GSI_SAME_STMT);
6049                       itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
6050                       itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
6051                                                                   NULL_TREE, false,
6052                                                                   GSI_CONTINUE_LINKING);
6053                     }
6054                 else
6055                     itercnt = s0;
6056               }
6057             a = fold_build2 (MULT_EXPR, type,
6058                                  fold_convert (type, itercnt),
6059                                  fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
6060             t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
6061                                  : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
6062             t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6063                                                   false, GSI_CONTINUE_LINKING);
6064             expand_omp_build_assign (&gsi, dest, t, true);
6065           }
6066   if (fd->collapse > 1)
6067     expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
6068 
6069   if (!broken_loop)
6070     {
6071       /* The code controlling the sequential loop goes in CONT_BB,
6072            replacing the GIMPLE_OMP_CONTINUE.  */
6073       gsi = gsi_last_nondebug_bb (cont_bb);
6074       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6075       vmain = gimple_omp_continue_control_use (cont_stmt);
6076       vback = gimple_omp_continue_control_def (cont_stmt);
6077 
6078       if (cond_var)
6079           {
6080             tree itype = TREE_TYPE (cond_var);
6081             tree t2;
6082             if (POINTER_TYPE_P (type)
6083                 || TREE_CODE (n1) != INTEGER_CST
6084                 || fd->loop.cond_code != LT_EXPR)
6085               t2 = build_int_cst (itype, 1);
6086             else
6087               t2 = fold_convert (itype, step);
6088             t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6089             t2 = force_gimple_operand_gsi (&gsi, t2, false,
6090                                                    NULL_TREE, true, GSI_SAME_STMT);
6091             assign_stmt = gimple_build_assign (cond_var, t2);
6092             gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6093           }
6094 
6095       if (!gimple_omp_for_combined_p (fd->for_stmt))
6096           {
6097             if (POINTER_TYPE_P (type))
6098               t = fold_build_pointer_plus (vmain, step);
6099             else
6100               t = fold_build2 (PLUS_EXPR, type, vmain, step);
6101             if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6102               t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6103                                                     true, GSI_SAME_STMT);
6104             assign_stmt = gimple_build_assign (vback, t);
6105             gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6106 
6107             if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6108               t = build2 (EQ_EXPR, boolean_type_node,
6109                               build_int_cst (itype, 0),
6110                               build_int_cst (itype, 1));
6111             else
6112               t = build2 (fd->loop.cond_code, boolean_type_node,
6113                               DECL_P (vback) && TREE_ADDRESSABLE (vback)
6114                               ? t : vback, e);
6115             gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6116           }
6117 
6118       /* Remove GIMPLE_OMP_CONTINUE.  */
6119       gsi_remove (&gsi, true);
6120 
6121       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6122           collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6123 
6124       /* Trip update code goes into TRIP_UPDATE_BB.  */
6125       gsi = gsi_start_bb (trip_update_bb);
6126 
6127       t = build_int_cst (itype, 1);
6128       t = build2 (PLUS_EXPR, itype, trip_main, t);
6129       assign_stmt = gimple_build_assign (trip_back, t);
6130       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6131     }
6132 
6133   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
6134   gsi = gsi_last_nondebug_bb (exit_bb);
6135   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6136     {
6137       t = gimple_omp_return_lhs (gsi_stmt (gsi));
6138       if (fd->have_reductemp || fd->have_pointer_condtemp)
6139           {
6140             tree fn;
6141             if (t)
6142               fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6143             else
6144               fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6145             gcall *g = gimple_build_call (fn, 0);
6146             if (t)
6147               {
6148                 gimple_call_set_lhs (g, t);
6149                 if (fd->have_reductemp)
6150                     gsi_insert_after (&gsi, gimple_build_assign (reductions,
6151                                                                            NOP_EXPR, t),
6152                                           GSI_SAME_STMT);
6153               }
6154             gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6155           }
6156       else
6157           gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6158     }
6159   else if (fd->have_pointer_condtemp)
6160     {
6161       tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6162       gcall *g = gimple_build_call (fn, 0);
6163       gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6164     }
6165   gsi_remove (&gsi, true);
6166 
6167   /* Connect the new blocks.  */
6168   find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6169   find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6170 
6171   if (!broken_loop)
6172     {
6173       se = find_edge (cont_bb, body_bb);
6174       if (se == NULL)
6175           {
6176             se = BRANCH_EDGE (cont_bb);
6177             gcc_assert (single_succ (se->dest) == body_bb);
6178           }
6179       if (gimple_omp_for_combined_p (fd->for_stmt))
6180           {
6181             remove_edge (se);
6182             se = NULL;
6183           }
6184       else if (fd->collapse > 1)
6185           {
6186             remove_edge (se);
6187             se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6188           }
6189       else
6190           se->flags = EDGE_TRUE_VALUE;
6191       find_edge (cont_bb, trip_update_bb)->flags
6192           = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6193 
6194       redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6195                                         iter_part_bb);
6196     }
6197 
6198   if (gimple_in_ssa_p (cfun))
6199     {
6200       gphi_iterator psi;
6201       gphi *phi;
6202       edge re, ene;
6203       edge_var_map *vm;
6204       size_t i;
6205 
6206       gcc_assert (fd->collapse == 1 && !broken_loop);
6207 
6208       /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6209            remove arguments of the phi nodes in fin_bb.  We need to create
6210            appropriate phi nodes in iter_part_bb instead.  */
6211       se = find_edge (iter_part_bb, fin_bb);
6212       re = single_succ_edge (trip_update_bb);
6213       vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6214       ene = single_succ_edge (entry_bb);
6215 
6216       psi = gsi_start_phis (fin_bb);
6217       for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6218              gsi_next (&psi), ++i)
6219           {
6220             gphi *nphi;
6221             location_t locus;
6222 
6223             phi = psi.phi ();
6224             if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6225                                      redirect_edge_var_map_def (vm), 0))
6226               continue;
6227 
6228             t = gimple_phi_result (phi);
6229             gcc_assert (t == redirect_edge_var_map_result (vm));
6230 
6231             if (!single_pred_p (fin_bb))
6232               t = copy_ssa_name (t, phi);
6233 
6234             nphi = create_phi_node (t, iter_part_bb);
6235 
6236             t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6237             locus = gimple_phi_arg_location_from_edge (phi, se);
6238 
6239             /* A special case -- fd->loop.v is not yet computed in
6240                iter_part_bb, we need to use vextra instead.  */
6241             if (t == fd->loop.v)
6242               t = vextra;
6243             add_phi_arg (nphi, t, ene, locus);
6244             locus = redirect_edge_var_map_location (vm);
6245             tree back_arg = redirect_edge_var_map_def (vm);
6246             add_phi_arg (nphi, back_arg, re, locus);
6247             edge ce = find_edge (cont_bb, body_bb);
6248             if (ce == NULL)
6249               {
6250                 ce = BRANCH_EDGE (cont_bb);
6251                 gcc_assert (single_succ (ce->dest) == body_bb);
6252                 ce = single_succ_edge (ce->dest);
6253               }
6254             gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6255             gcc_assert (inner_loop_phi != NULL);
6256             add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6257                            find_edge (seq_start_bb, body_bb), locus);
6258 
6259             if (!single_pred_p (fin_bb))
6260               add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6261           }
6262       gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6263       redirect_edge_var_map_clear (re);
6264       if (single_pred_p (fin_bb))
6265           while (1)
6266             {
6267               psi = gsi_start_phis (fin_bb);
6268               if (gsi_end_p (psi))
6269                 break;
6270               remove_phi_node (&psi, false);
6271             }
6272 
6273       /* Make phi node for trip.  */
6274       phi = create_phi_node (trip_main, iter_part_bb);
6275       add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6276                        UNKNOWN_LOCATION);
6277       add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6278                        UNKNOWN_LOCATION);
6279     }
6280 
6281   if (!broken_loop)
6282     set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6283   set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6284                                  recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6285   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6286                                  recompute_dominator (CDI_DOMINATORS, fin_bb));
6287   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6288                                  recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6289   set_immediate_dominator (CDI_DOMINATORS, body_bb,
6290                                  recompute_dominator (CDI_DOMINATORS, body_bb));
6291 
6292   if (!broken_loop)
6293     {
6294       class loop *loop = body_bb->loop_father;
6295       class loop *trip_loop = alloc_loop ();
6296       trip_loop->header = iter_part_bb;
6297       trip_loop->latch = trip_update_bb;
6298       add_loop (trip_loop, iter_part_bb->loop_father);
6299 
6300       if (loop != entry_bb->loop_father)
6301           {
6302             gcc_assert (loop->header == body_bb);
6303             gcc_assert (loop->latch == region->cont
6304                           || single_pred (loop->latch) == region->cont);
6305             trip_loop->inner = loop;
6306             return;
6307           }
6308 
6309       if (!gimple_omp_for_combined_p (fd->for_stmt))
6310           {
6311             loop = alloc_loop ();
6312             loop->header = body_bb;
6313             if (collapse_bb == NULL)
6314               loop->latch = cont_bb;
6315             add_loop (loop, trip_loop);
6316           }
6317     }
6318 }
6319 
6320 /* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
6321    loop.  Given parameters:
6322 
6323           for (V = N1; V cond N2; V += STEP) BODY;
6324 
6325    where COND is "<" or ">", we generate pseudocode
6326 
6327           V = N1;
6328           goto L1;
6329     L0:
6330           BODY;
6331           V += STEP;
6332     L1:
6333           if (V cond N2) goto L0; else goto L2;
6334     L2:
6335 
6336     For collapsed loops, emit the outer loops as scalar
6337     and only try to vectorize the innermost loop.  */
6338 
6339 static void
expand_omp_simd(struct omp_region * region,struct omp_for_data * fd)6340 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6341 {
6342   tree type, t;
6343   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6344   gimple_stmt_iterator gsi;
6345   gimple *stmt;
6346   gcond *cond_stmt;
6347   bool broken_loop = region->cont == NULL;
6348   edge e, ne;
6349   tree *counts = NULL;
6350   int i;
6351   int safelen_int = INT_MAX;
6352   bool dont_vectorize = false;
6353   tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6354                                           OMP_CLAUSE_SAFELEN);
6355   tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6356                                           OMP_CLAUSE__SIMDUID_);
6357   tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6358                                     OMP_CLAUSE_IF);
6359   tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6360                                           OMP_CLAUSE_SIMDLEN);
6361   tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6362                                            OMP_CLAUSE__CONDTEMP_);
6363   tree n1, n2;
6364   tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6365 
6366   if (safelen)
6367     {
6368       poly_uint64 val;
6369       safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6370       if (!poly_int_tree_p (safelen, &val))
6371           safelen_int = 0;
6372       else
6373           safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6374       if (safelen_int == 1)
6375           safelen_int = 0;
6376     }
6377   if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6378       || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6379     {
6380       safelen_int = 0;
6381       dont_vectorize = true;
6382     }
6383   type = TREE_TYPE (fd->loop.v);
6384   entry_bb = region->entry;
6385   cont_bb = region->cont;
6386   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6387   gcc_assert (broken_loop
6388                 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6389   l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6390   if (!broken_loop)
6391     {
6392       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6393       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6394       l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6395       l2_bb = BRANCH_EDGE (entry_bb)->dest;
6396     }
6397   else
6398     {
6399       BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6400       l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6401       l2_bb = single_succ (l1_bb);
6402     }
6403   exit_bb = region->exit;
6404   l2_dom_bb = NULL;
6405 
6406   gsi = gsi_last_nondebug_bb (entry_bb);
6407 
6408   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6409   /* Not needed in SSA form right now.  */
6410   gcc_assert (!gimple_in_ssa_p (cfun));
6411   if (fd->collapse > 1
6412       && (gimple_omp_for_combined_into_p (fd->for_stmt)
6413             || broken_loop))
6414     {
6415       int first_zero_iter = -1, dummy = -1;
6416       basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6417 
6418       counts = XALLOCAVEC (tree, fd->collapse);
6419       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6420                                           zero_iter_bb, first_zero_iter,
6421                                           dummy_bb, dummy, l2_dom_bb);
6422     }
6423   if (l2_dom_bb == NULL)
6424     l2_dom_bb = l1_bb;
6425 
6426   n1 = fd->loop.n1;
6427   n2 = fd->loop.n2;
6428   if (gimple_omp_for_combined_into_p (fd->for_stmt))
6429     {
6430       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6431                                              OMP_CLAUSE__LOOPTEMP_);
6432       gcc_assert (innerc);
6433       n1 = OMP_CLAUSE_DECL (innerc);
6434       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6435                                         OMP_CLAUSE__LOOPTEMP_);
6436       gcc_assert (innerc);
6437       n2 = OMP_CLAUSE_DECL (innerc);
6438     }
6439   tree step = fd->loop.step;
6440   tree orig_step = step; /* May be different from step if is_simt.  */
6441 
6442   bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6443                                           OMP_CLAUSE__SIMT_);
6444   if (is_simt)
6445     {
6446       cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6447       is_simt = safelen_int > 1;
6448     }
6449   tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6450   if (is_simt)
6451     {
6452       simt_lane = create_tmp_var (unsigned_type_node);
6453       gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6454       gimple_call_set_lhs (g, simt_lane);
6455       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6456       tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6457                                          fold_convert (TREE_TYPE (step), simt_lane));
6458       n1 = fold_convert (type, n1);
6459       if (POINTER_TYPE_P (type))
6460           n1 = fold_build_pointer_plus (n1, offset);
6461       else
6462           n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6463 
6464       /* Collapsed loops not handled for SIMT yet: limit to one lane only.  */
6465       if (fd->collapse > 1)
6466           simt_maxlane = build_one_cst (unsigned_type_node);
6467       else if (safelen_int < omp_max_simt_vf ())
6468           simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6469       tree vf
6470           = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6471                                                   unsigned_type_node, 0);
6472       if (simt_maxlane)
6473           vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6474       vf = fold_convert (TREE_TYPE (step), vf);
6475       step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6476     }
6477 
6478   tree n2var = NULL_TREE;
6479   tree n2v = NULL_TREE;
6480   tree *nonrect_bounds = NULL;
6481   tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6482   if (fd->collapse > 1)
6483     {
6484       if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6485           {
6486             if (fd->non_rect)
6487               {
6488                 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6489                 memset (nonrect_bounds, 0,
6490                           sizeof (tree) * (fd->last_nonrect + 1));
6491               }
6492             expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6493             gcc_assert (entry_bb == gsi_bb (gsi));
6494             gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6495             gsi_prev (&gsi);
6496             entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6497             expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6498                                             NULL, n1);
6499             gsi = gsi_for_stmt (fd->for_stmt);
6500           }
6501       if (broken_loop)
6502           ;
6503       else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6504           {
6505             /* Compute in n2var the limit for the first innermost loop,
6506                i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6507                where cnt is how many iterations would the loop have if
6508                all further iterations were assigned to the current task.  */
6509             n2var = create_tmp_var (type);
6510             i = fd->collapse - 1;
6511             tree itype = TREE_TYPE (fd->loops[i].v);
6512             if (POINTER_TYPE_P (itype))
6513               itype = signed_type_for (itype);
6514             t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6515                                              ? -1 : 1));
6516             t = fold_build2 (PLUS_EXPR, itype,
6517                                  fold_convert (itype, fd->loops[i].step), t);
6518             t = fold_build2 (PLUS_EXPR, itype, t,
6519                                  fold_convert (itype, fd->loops[i].n2));
6520             if (fd->loops[i].m2)
6521               {
6522                 tree t2 = fold_convert (itype,
6523                                               fd->loops[i - fd->loops[i].outer].v);
6524                 tree t3 = fold_convert (itype, fd->loops[i].m2);
6525                 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6526                 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6527               }
6528             t = fold_build2 (MINUS_EXPR, itype, t,
6529                                  fold_convert (itype, fd->loops[i].v));
6530             if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6531               t = fold_build2 (TRUNC_DIV_EXPR, itype,
6532                                    fold_build1 (NEGATE_EXPR, itype, t),
6533                                    fold_build1 (NEGATE_EXPR, itype,
6534                                                     fold_convert (itype,
6535                                                                       fd->loops[i].step)));
6536             else
6537               t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6538                                    fold_convert (itype, fd->loops[i].step));
6539             t = fold_convert (type, t);
6540             tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6541             min_arg1 = create_tmp_var (type);
6542             expand_omp_build_assign (&gsi, min_arg1, t2);
6543             min_arg2 = create_tmp_var (type);
6544             expand_omp_build_assign (&gsi, min_arg2, t);
6545           }
6546       else
6547           {
6548             if (TREE_CODE (n2) == INTEGER_CST)
6549               {
6550                 /* Indicate for lastprivate handling that at least one iteration
6551                      has been performed, without wasting runtime.  */
6552                 if (integer_nonzerop (n2))
6553                     expand_omp_build_assign (&gsi, fd->loop.v,
6554                                                    fold_convert (type, n2));
6555                 else
6556                     /* Indicate that no iteration has been performed.  */
6557                     expand_omp_build_assign (&gsi, fd->loop.v,
6558                                                    build_one_cst (type));
6559               }
6560             else
6561               {
6562                 expand_omp_build_assign (&gsi, fd->loop.v,
6563                                                build_zero_cst (type));
6564                 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6565               }
6566             for (i = 0; i < fd->collapse; i++)
6567               {
6568                 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6569                 if (fd->loops[i].m1)
6570                     {
6571                       tree t2
6572                         = fold_convert (TREE_TYPE (t),
6573                                             fd->loops[i - fd->loops[i].outer].v);
6574                       tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6575                       t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6576                       t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6577                     }
6578                 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6579                 /* For normal non-combined collapsed loops just initialize
6580                      the outermost iterator in the entry_bb.  */
6581                 if (!broken_loop)
6582                     break;
6583               }
6584           }
6585     }
6586   else
6587     expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6588   tree altv = NULL_TREE, altn2 = NULL_TREE;
6589   if (fd->collapse == 1
6590       && !broken_loop
6591       && TREE_CODE (orig_step) != INTEGER_CST)
6592     {
6593       /* The vectorizer currently punts on loops with non-constant steps
6594            for the main IV (can't compute number of iterations and gives up
6595            because of that).  As for OpenMP loops it is always possible to
6596            compute the number of iterations upfront, use an alternate IV
6597            as the loop iterator:
6598            altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6599            for (i = n1, altv = 0; altv < altn2; altv++, i += step)  */
6600       altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6601       expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6602       tree itype = TREE_TYPE (fd->loop.v);
6603       if (POINTER_TYPE_P (itype))
6604           itype = signed_type_for (itype);
6605       t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6606       t = fold_build2 (PLUS_EXPR, itype,
6607                            fold_convert (itype, step), t);
6608       t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6609       t = fold_build2 (MINUS_EXPR, itype, t,
6610                            fold_convert (itype, fd->loop.v));
6611       if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6612           t = fold_build2 (TRUNC_DIV_EXPR, itype,
6613                                fold_build1 (NEGATE_EXPR, itype, t),
6614                                fold_build1 (NEGATE_EXPR, itype,
6615                                               fold_convert (itype, step)));
6616       else
6617           t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6618                                fold_convert (itype, step));
6619       t = fold_convert (TREE_TYPE (altv), t);
6620       altn2 = create_tmp_var (TREE_TYPE (altv));
6621       expand_omp_build_assign (&gsi, altn2, t);
6622       tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6623       t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6624                                              true, GSI_SAME_STMT);
6625       t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6626       gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6627                                                   build_zero_cst (TREE_TYPE (altv)));
6628       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6629     }
6630   else if (fd->collapse > 1
6631              && !broken_loop
6632              && !gimple_omp_for_combined_into_p (fd->for_stmt)
6633              && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6634     {
6635       altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6636       altn2 = create_tmp_var (TREE_TYPE (altv));
6637     }
6638   if (cond_var)
6639     {
6640       if (POINTER_TYPE_P (type)
6641             || TREE_CODE (n1) != INTEGER_CST
6642             || fd->loop.cond_code != LT_EXPR
6643             || tree_int_cst_sgn (n1) != 1)
6644           expand_omp_build_assign (&gsi, cond_var,
6645                                          build_one_cst (TREE_TYPE (cond_var)));
6646       else
6647           expand_omp_build_assign (&gsi, cond_var,
6648                                          fold_convert (TREE_TYPE (cond_var), n1));
6649     }
6650 
6651   /* Remove the GIMPLE_OMP_FOR statement.  */
6652   gsi_remove (&gsi, true);
6653 
6654   if (!broken_loop)
6655     {
6656       /* Code to control the increment goes in the CONT_BB.  */
6657       gsi = gsi_last_nondebug_bb (cont_bb);
6658       stmt = gsi_stmt (gsi);
6659       gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6660 
6661       if (fd->collapse == 1
6662             || gimple_omp_for_combined_into_p (fd->for_stmt))
6663           {
6664             if (POINTER_TYPE_P (type))
6665               t = fold_build_pointer_plus (fd->loop.v, step);
6666             else
6667               t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6668             expand_omp_build_assign (&gsi, fd->loop.v, t);
6669           }
6670       else if (TREE_CODE (n2) != INTEGER_CST)
6671           expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6672       if (altv)
6673           {
6674             t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6675                                  build_one_cst (TREE_TYPE (altv)));
6676             expand_omp_build_assign (&gsi, altv, t);
6677           }
6678 
6679       if (fd->collapse > 1)
6680           {
6681             i = fd->collapse - 1;
6682             if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6683               {
6684                 t = fold_convert (sizetype, fd->loops[i].step);
6685                 t = fold_build_pointer_plus (fd->loops[i].v, t);
6686               }
6687             else
6688               {
6689                 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6690                                         fd->loops[i].step);
6691                 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6692                                      fd->loops[i].v, t);
6693               }
6694             expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6695           }
6696       if (cond_var)
6697           {
6698             if (POINTER_TYPE_P (type)
6699                 || TREE_CODE (n1) != INTEGER_CST
6700                 || fd->loop.cond_code != LT_EXPR
6701                 || tree_int_cst_sgn (n1) != 1)
6702               t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6703                                    build_one_cst (TREE_TYPE (cond_var)));
6704             else
6705               t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6706                                    fold_convert (TREE_TYPE (cond_var), step));
6707             expand_omp_build_assign (&gsi, cond_var, t);
6708           }
6709 
6710       /* Remove GIMPLE_OMP_CONTINUE.  */
6711       gsi_remove (&gsi, true);
6712     }
6713 
6714   /* Emit the condition in L1_BB.  */
6715   gsi = gsi_start_bb (l1_bb);
6716 
6717   if (altv)
6718     t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6719   else if (fd->collapse > 1
6720              && !gimple_omp_for_combined_into_p (fd->for_stmt)
6721              && !broken_loop)
6722     {
6723       i = fd->collapse - 1;
6724       tree itype = TREE_TYPE (fd->loops[i].v);
6725       if (fd->loops[i].m2)
6726           t = n2v = create_tmp_var (itype);
6727       else
6728           t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6729       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6730                                             false, GSI_CONTINUE_LINKING);
6731       tree v = fd->loops[i].v;
6732       if (DECL_P (v) && TREE_ADDRESSABLE (v))
6733           v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6734                                               false, GSI_CONTINUE_LINKING);
6735       t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6736     }
6737   else
6738     {
6739       if (fd->collapse > 1 && !broken_loop)
6740           t = n2var;
6741       else
6742           t = fold_convert (type, unshare_expr (n2));
6743       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6744                                             false, GSI_CONTINUE_LINKING);
6745       tree v = fd->loop.v;
6746       if (DECL_P (v) && TREE_ADDRESSABLE (v))
6747           v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6748                                               false, GSI_CONTINUE_LINKING);
6749       t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6750     }
6751   cond_stmt = gimple_build_cond_empty (t);
6752   gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6753   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6754                      NULL, NULL)
6755       || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6756                         NULL, NULL))
6757     {
6758       gsi = gsi_for_stmt (cond_stmt);
6759       gimple_regimplify_operands (cond_stmt, &gsi);
6760     }
6761 
6762   /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop.  */
6763   if (is_simt)
6764     {
6765       gsi = gsi_start_bb (l2_bb);
6766       step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6767       if (POINTER_TYPE_P (type))
6768           t = fold_build_pointer_plus (fd->loop.v, step);
6769       else
6770           t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6771       expand_omp_build_assign (&gsi, fd->loop.v, t);
6772     }
6773 
6774   /* Remove GIMPLE_OMP_RETURN.  */
6775   gsi = gsi_last_nondebug_bb (exit_bb);
6776   gsi_remove (&gsi, true);
6777 
6778   /* Connect the new blocks.  */
6779   remove_edge (FALLTHRU_EDGE (entry_bb));
6780 
6781   if (!broken_loop)
6782     {
6783       remove_edge (BRANCH_EDGE (entry_bb));
6784       make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6785 
6786       e = BRANCH_EDGE (l1_bb);
6787       ne = FALLTHRU_EDGE (l1_bb);
6788       e->flags = EDGE_TRUE_VALUE;
6789     }
6790   else
6791     {
6792       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6793 
6794       ne = single_succ_edge (l1_bb);
6795       e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6796 
6797     }
6798   ne->flags = EDGE_FALSE_VALUE;
6799   e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6800   ne->probability = e->probability.invert ();
6801 
6802   set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6803   set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6804 
6805   if (simt_maxlane)
6806     {
6807       cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6808                                              NULL_TREE, NULL_TREE);
6809       gsi = gsi_last_bb (entry_bb);
6810       gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6811       make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6812       FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6813       FALLTHRU_EDGE (entry_bb)->probability
6814            = profile_probability::guessed_always ().apply_scale (7, 8);
6815       BRANCH_EDGE (entry_bb)->probability
6816            = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6817       l2_dom_bb = entry_bb;
6818     }
6819   set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6820 
6821   if (!broken_loop && fd->collapse > 1)
6822     {
6823       basic_block last_bb = l1_bb;
6824       basic_block init_bb = NULL;
6825       for (i = fd->collapse - 2; i >= 0; i--)
6826           {
6827             tree nextn2v = NULL_TREE;
6828             if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6829               e = EDGE_SUCC (last_bb, 0);
6830             else
6831               e = EDGE_SUCC (last_bb, 1);
6832             basic_block bb = split_edge (e);
6833             if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6834               {
6835                 t = fold_convert (sizetype, fd->loops[i].step);
6836                 t = fold_build_pointer_plus (fd->loops[i].v, t);
6837               }
6838             else
6839               {
6840                 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6841                                         fd->loops[i].step);
6842                 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6843                                      fd->loops[i].v, t);
6844               }
6845             gsi = gsi_after_labels (bb);
6846             expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6847 
6848             bb = split_block (bb, last_stmt (bb))->dest;
6849             gsi = gsi_start_bb (bb);
6850             tree itype = TREE_TYPE (fd->loops[i].v);
6851             if (fd->loops[i].m2)
6852               t = nextn2v = create_tmp_var (itype);
6853             else
6854               t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6855             t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6856                                                   false, GSI_CONTINUE_LINKING);
6857             tree v = fd->loops[i].v;
6858             if (DECL_P (v) && TREE_ADDRESSABLE (v))
6859               v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6860                                                     false, GSI_CONTINUE_LINKING);
6861             t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6862             cond_stmt = gimple_build_cond_empty (t);
6863             gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6864             if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6865                                expand_omp_regimplify_p, NULL, NULL)
6866                 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6867                                   expand_omp_regimplify_p, NULL, NULL))
6868               {
6869                 gsi = gsi_for_stmt (cond_stmt);
6870                 gimple_regimplify_operands (cond_stmt, &gsi);
6871               }
6872             ne = single_succ_edge (bb);
6873             ne->flags = EDGE_FALSE_VALUE;
6874 
6875             init_bb = create_empty_bb (bb);
6876             set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6877             add_bb_to_loop (init_bb, bb->loop_father);
6878             e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6879             e->probability
6880               = profile_probability::guessed_always ().apply_scale (7, 8);
6881             ne->probability = e->probability.invert ();
6882 
6883             gsi = gsi_after_labels (init_bb);
6884             if (fd->loops[i + 1].m1)
6885               {
6886                 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6887                                               fd->loops[i + 1
6888                                                             - fd->loops[i + 1].outer].v);
6889                 if (POINTER_TYPE_P (TREE_TYPE (t2)))
6890                     t = fold_build_pointer_plus (t2, fd->loops[i + 1].n1);
6891                 else
6892                     {
6893                       t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6894                                             fd->loops[i + 1].n1);
6895                       tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6896                       t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6897                       t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6898                     }
6899               }
6900             else
6901               t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6902                                     fd->loops[i + 1].n1);
6903             expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6904             if (fd->loops[i + 1].m2)
6905               {
6906                 if (i + 2 == fd->collapse && (n2var || altv))
6907                     {
6908                       gcc_assert (n2v == NULL_TREE);
6909                       n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6910                     }
6911                 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6912                                               fd->loops[i + 1
6913                                                             - fd->loops[i + 1].outer].v);
6914                 if (POINTER_TYPE_P (TREE_TYPE (t2)))
6915                     t = fold_build_pointer_plus (t2, fd->loops[i + 1].n2);
6916                 else
6917                     {
6918                       t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6919                                             fd->loops[i + 1].n2);
6920                       tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6921                       t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6922                       t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6923                     }
6924                 expand_omp_build_assign (&gsi, n2v, t);
6925               }
6926             if (i + 2 == fd->collapse && n2var)
6927               {
6928                 /* For composite simd, n2 is the first iteration the current
6929                      task shouldn't already handle, so we effectively want to use
6930                      for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6931                      as the vectorized loop.  Except the vectorizer will not
6932                      vectorize that, so instead compute N2VAR as
6933                      N2VAR = V + MIN (N2 - V, COUNTS3) and use
6934                      for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6935                      as the loop to vectorize.  */
6936                 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6937                 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6938                     {
6939                       tree itype = TREE_TYPE (fd->loops[i].v);
6940                       if (POINTER_TYPE_P (itype))
6941                         itype = signed_type_for (itype);
6942                       t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6943                                                        == LT_EXPR ? -1 : 1));
6944                       t = fold_build2 (PLUS_EXPR, itype,
6945                                            fold_convert (itype,
6946                                                              fd->loops[i + 1].step), t);
6947                       if (fd->loops[i + 1].m2 == NULL_TREE)
6948                         t = fold_build2 (PLUS_EXPR, itype, t,
6949                                              fold_convert (itype,
6950                                                                fd->loops[i + 1].n2));
6951                       else if (POINTER_TYPE_P (TREE_TYPE (n2v)))
6952                         {
6953                           t = fold_build_pointer_plus (n2v, t);
6954                           t = fold_convert (itype, t);
6955                         }
6956                       else
6957                         t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6958                       t = fold_build2 (MINUS_EXPR, itype, t,
6959                                            fold_convert (itype, fd->loops[i + 1].v));
6960                       tree step = fold_convert (itype, fd->loops[i + 1].step);
6961                       if (TYPE_UNSIGNED (itype)
6962                           && fd->loops[i + 1].cond_code == GT_EXPR)
6963                         t = fold_build2 (TRUNC_DIV_EXPR, itype,
6964                                              fold_build1 (NEGATE_EXPR, itype, t),
6965                                              fold_build1 (NEGATE_EXPR, itype, step));
6966                       else
6967                         t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6968                       t = fold_convert (type, t);
6969                     }
6970                 else
6971                     t = counts[i + 1];
6972                 expand_omp_build_assign (&gsi, min_arg1, t2);
6973                 expand_omp_build_assign (&gsi, min_arg2, t);
6974                 e = split_block (init_bb, last_stmt (init_bb));
6975                 gsi = gsi_after_labels (e->dest);
6976                 init_bb = e->dest;
6977                 remove_edge (FALLTHRU_EDGE (entry_bb));
6978                 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6979                 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6980                 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6981                 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6982                 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6983                 expand_omp_build_assign (&gsi, n2var, t);
6984               }
6985             if (i + 2 == fd->collapse && altv)
6986               {
6987                 /* The vectorizer currently punts on loops with non-constant
6988                      steps for the main IV (can't compute number of iterations
6989                      and gives up because of that).  As for OpenMP loops it is
6990                      always possible to compute the number of iterations upfront,
6991                      use an alternate IV as the loop iterator.  */
6992                 expand_omp_build_assign (&gsi, altv,
6993                                                build_zero_cst (TREE_TYPE (altv)));
6994                 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6995                 if (POINTER_TYPE_P (itype))
6996                     itype = signed_type_for (itype);
6997                 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6998                                                    ? -1 : 1));
6999                 t = fold_build2 (PLUS_EXPR, itype,
7000                                      fold_convert (itype, fd->loops[i + 1].step), t);
7001                 t = fold_build2 (PLUS_EXPR, itype, t,
7002                                      fold_convert (itype,
7003                                                        fd->loops[i + 1].m2
7004                                                        ? n2v : fd->loops[i + 1].n2));
7005                 t = fold_build2 (MINUS_EXPR, itype, t,
7006                                      fold_convert (itype, fd->loops[i + 1].v));
7007                 tree step = fold_convert (itype, fd->loops[i + 1].step);
7008                 if (TYPE_UNSIGNED (itype)
7009                       && fd->loops[i + 1].cond_code == GT_EXPR)
7010                     t = fold_build2 (TRUNC_DIV_EXPR, itype,
7011                                          fold_build1 (NEGATE_EXPR, itype, t),
7012                                          fold_build1 (NEGATE_EXPR, itype, step));
7013                 else
7014                     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7015                 t = fold_convert (TREE_TYPE (altv), t);
7016                 expand_omp_build_assign (&gsi, altn2, t);
7017                 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7018                                               fd->loops[i + 1].m2
7019                                               ? n2v : fd->loops[i + 1].n2);
7020                 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
7021                                                        true, GSI_SAME_STMT);
7022                 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
7023                                         fd->loops[i + 1].v, t2);
7024                 gassign *g
7025                     = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
7026                                                build_zero_cst (TREE_TYPE (altv)));
7027                 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7028               }
7029             n2v = nextn2v;
7030 
7031             make_edge (init_bb, last_bb, EDGE_FALLTHRU);
7032             if (!gimple_omp_for_combined_into_p (fd->for_stmt))
7033               {
7034                 e = find_edge (entry_bb, last_bb);
7035                 redirect_edge_succ (e, bb);
7036                 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
7037                 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
7038               }
7039 
7040             last_bb = bb;
7041           }
7042     }
7043   if (!broken_loop)
7044     {
7045       class loop *loop = alloc_loop ();
7046       loop->header = l1_bb;
7047       loop->latch = cont_bb;
7048       add_loop (loop, l1_bb->loop_father);
7049       loop->safelen = safelen_int;
7050       if (simduid)
7051           {
7052             loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
7053             cfun->has_simduid_loops = true;
7054           }
7055       /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
7056            the loop.  */
7057       if ((flag_tree_loop_vectorize
7058              || !OPTION_SET_P (flag_tree_loop_vectorize))
7059             && flag_tree_loop_optimize
7060             && loop->safelen > 1)
7061           {
7062             loop->force_vectorize = true;
7063             if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
7064               {
7065                 unsigned HOST_WIDE_INT v
7066                     = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
7067                 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
7068                     loop->simdlen = v;
7069               }
7070             cfun->has_force_vectorize_loops = true;
7071           }
7072       else if (dont_vectorize)
7073           loop->dont_vectorize = true;
7074     }
7075   else if (simduid)
7076     cfun->has_simduid_loops = true;
7077 }
7078 
7079 /* Taskloop construct is represented after gimplification with
7080    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7081    in between them.  This routine expands the outer GIMPLE_OMP_FOR,
7082    which should just compute all the needed loop temporaries
7083    for GIMPLE_OMP_TASK.  */
7084 
7085 static void
expand_omp_taskloop_for_outer(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)7086 expand_omp_taskloop_for_outer (struct omp_region *region,
7087                                      struct omp_for_data *fd,
7088                                      gimple *inner_stmt)
7089 {
7090   tree type, bias = NULL_TREE;
7091   basic_block entry_bb, cont_bb, exit_bb;
7092   gimple_stmt_iterator gsi;
7093   gassign *assign_stmt;
7094   tree *counts = NULL;
7095   int i;
7096 
7097   gcc_assert (inner_stmt);
7098   gcc_assert (region->cont);
7099   gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7100                 && gimple_omp_task_taskloop_p (inner_stmt));
7101   type = TREE_TYPE (fd->loop.v);
7102 
7103   /* See if we need to bias by LLONG_MIN.  */
7104   if (fd->iter_type == long_long_unsigned_type_node
7105       && TREE_CODE (type) == INTEGER_TYPE
7106       && !TYPE_UNSIGNED (type))
7107     {
7108       tree n1, n2;
7109 
7110       if (fd->loop.cond_code == LT_EXPR)
7111           {
7112             n1 = fd->loop.n1;
7113             n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7114           }
7115       else
7116           {
7117             n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7118             n2 = fd->loop.n1;
7119           }
7120       if (TREE_CODE (n1) != INTEGER_CST
7121             || TREE_CODE (n2) != INTEGER_CST
7122             || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7123           bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7124     }
7125 
7126   entry_bb = region->entry;
7127   cont_bb = region->cont;
7128   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7129   gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7130   exit_bb = region->exit;
7131 
7132   gsi = gsi_last_nondebug_bb (entry_bb);
7133   gimple *for_stmt = gsi_stmt (gsi);
7134   gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7135   if (fd->collapse > 1)
7136     {
7137       int first_zero_iter = -1, dummy = -1;
7138       basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7139 
7140       counts = XALLOCAVEC (tree, fd->collapse);
7141       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7142                                           zero_iter_bb, first_zero_iter,
7143                                           dummy_bb, dummy, l2_dom_bb);
7144 
7145       if (zero_iter_bb)
7146           {
7147             /* Some counts[i] vars might be uninitialized if
7148                some loop has zero iterations.  But the body shouldn't
7149                be executed in that case, so just avoid uninit warnings.  */
7150             for (i = first_zero_iter; i < fd->collapse; i++)
7151               if (SSA_VAR_P (counts[i]))
7152                 suppress_warning (counts[i], OPT_Wuninitialized);
7153             gsi_prev (&gsi);
7154             edge e = split_block (entry_bb, gsi_stmt (gsi));
7155             entry_bb = e->dest;
7156             make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7157             gsi = gsi_last_bb (entry_bb);
7158             set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7159                                            get_immediate_dominator (CDI_DOMINATORS,
7160                                                                           zero_iter_bb));
7161           }
7162     }
7163 
7164   tree t0, t1;
7165   t1 = fd->loop.n2;
7166   t0 = fd->loop.n1;
7167   if (POINTER_TYPE_P (TREE_TYPE (t0))
7168       && TYPE_PRECISION (TREE_TYPE (t0))
7169            != TYPE_PRECISION (fd->iter_type))
7170     {
7171       /* Avoid casting pointers to integer of a different size.  */
7172       tree itype = signed_type_for (type);
7173       t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7174       t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7175     }
7176   else
7177     {
7178       t1 = fold_convert (fd->iter_type, t1);
7179       t0 = fold_convert (fd->iter_type, t0);
7180     }
7181   if (bias)
7182     {
7183       t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7184       t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7185     }
7186 
7187   tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7188                                          OMP_CLAUSE__LOOPTEMP_);
7189   gcc_assert (innerc);
7190   tree startvar = OMP_CLAUSE_DECL (innerc);
7191   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7192   gcc_assert (innerc);
7193   tree endvar = OMP_CLAUSE_DECL (innerc);
7194   if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7195     {
7196       innerc = find_lastprivate_looptemp (fd, innerc);
7197       if (innerc)
7198           {
7199             /* If needed (inner taskloop has lastprivate clause), propagate
7200                down the total number of iterations.  */
7201             tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7202                                                        NULL_TREE, false,
7203                                                        GSI_CONTINUE_LINKING);
7204             assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7205             gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7206           }
7207     }
7208 
7209   t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7210                                          GSI_CONTINUE_LINKING);
7211   assign_stmt = gimple_build_assign (startvar, t0);
7212   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7213 
7214   t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7215                                          GSI_CONTINUE_LINKING);
7216   assign_stmt = gimple_build_assign (endvar, t1);
7217   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7218   if (fd->collapse > 1)
7219     expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7220 
7221   /* Remove the GIMPLE_OMP_FOR statement.  */
7222   gsi = gsi_for_stmt (for_stmt);
7223   gsi_remove (&gsi, true);
7224 
7225   gsi = gsi_last_nondebug_bb (cont_bb);
7226   gsi_remove (&gsi, true);
7227 
7228   gsi = gsi_last_nondebug_bb (exit_bb);
7229   gsi_remove (&gsi, true);
7230 
7231   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7232   remove_edge (BRANCH_EDGE (entry_bb));
7233   FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7234   remove_edge (BRANCH_EDGE (cont_bb));
7235   set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7236   set_immediate_dominator (CDI_DOMINATORS, region->entry,
7237                                  recompute_dominator (CDI_DOMINATORS, region->entry));
7238 }
7239 
7240 /* Taskloop construct is represented after gimplification with
7241    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7242    in between them.  This routine expands the inner GIMPLE_OMP_FOR.
7243    GOMP_taskloop{,_ull} function arranges for each task to be given just
7244    a single range of iterations.  */
7245 
7246 static void
expand_omp_taskloop_for_inner(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)7247 expand_omp_taskloop_for_inner (struct omp_region *region,
7248                                      struct omp_for_data *fd,
7249                                      gimple *inner_stmt)
7250 {
7251   tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7252   basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7253   basic_block fin_bb;
7254   gimple_stmt_iterator gsi;
7255   edge ep;
7256   bool broken_loop = region->cont == NULL;
7257   tree *counts = NULL;
7258   tree n1, n2, step;
7259 
7260   itype = type = TREE_TYPE (fd->loop.v);
7261   if (POINTER_TYPE_P (type))
7262     itype = signed_type_for (type);
7263 
7264   /* See if we need to bias by LLONG_MIN.  */
7265   if (fd->iter_type == long_long_unsigned_type_node
7266       && TREE_CODE (type) == INTEGER_TYPE
7267       && !TYPE_UNSIGNED (type))
7268     {
7269       tree n1, n2;
7270 
7271       if (fd->loop.cond_code == LT_EXPR)
7272           {
7273             n1 = fd->loop.n1;
7274             n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7275           }
7276       else
7277           {
7278             n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7279             n2 = fd->loop.n1;
7280           }
7281       if (TREE_CODE (n1) != INTEGER_CST
7282             || TREE_CODE (n2) != INTEGER_CST
7283             || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7284           bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7285     }
7286 
7287   entry_bb = region->entry;
7288   cont_bb = region->cont;
7289   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7290   fin_bb = BRANCH_EDGE (entry_bb)->dest;
7291   gcc_assert (broken_loop
7292                 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7293   body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7294   if (!broken_loop)
7295     {
7296       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7297       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7298     }
7299   exit_bb = region->exit;
7300 
7301   /* Iteration space partitioning goes in ENTRY_BB.  */
7302   gsi = gsi_last_nondebug_bb (entry_bb);
7303   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7304 
7305   if (fd->collapse > 1)
7306     {
7307       int first_zero_iter = -1, dummy = -1;
7308       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7309 
7310       counts = XALLOCAVEC (tree, fd->collapse);
7311       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7312                                           fin_bb, first_zero_iter,
7313                                           dummy_bb, dummy, l2_dom_bb);
7314       t = NULL_TREE;
7315     }
7316   else
7317     t = integer_one_node;
7318 
7319   step = fd->loop.step;
7320   tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7321                                          OMP_CLAUSE__LOOPTEMP_);
7322   gcc_assert (innerc);
7323   n1 = OMP_CLAUSE_DECL (innerc);
7324   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7325   gcc_assert (innerc);
7326   n2 = OMP_CLAUSE_DECL (innerc);
7327   if (bias)
7328     {
7329       n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7330       n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7331     }
7332   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7333                                          true, NULL_TREE, true, GSI_SAME_STMT);
7334   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7335                                          true, NULL_TREE, true, GSI_SAME_STMT);
7336   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7337                                            true, NULL_TREE, true, GSI_SAME_STMT);
7338 
7339   tree startvar = fd->loop.v;
7340   tree endvar = NULL_TREE;
7341 
7342   if (gimple_omp_for_combined_p (fd->for_stmt))
7343     {
7344       tree clauses = gimple_omp_for_clauses (inner_stmt);
7345       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7346       gcc_assert (innerc);
7347       startvar = OMP_CLAUSE_DECL (innerc);
7348       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7349                                         OMP_CLAUSE__LOOPTEMP_);
7350       gcc_assert (innerc);
7351       endvar = OMP_CLAUSE_DECL (innerc);
7352     }
7353   t = fold_convert (TREE_TYPE (startvar), n1);
7354   t = force_gimple_operand_gsi (&gsi, t,
7355                                         DECL_P (startvar)
7356                                         && TREE_ADDRESSABLE (startvar),
7357                                         NULL_TREE, false, GSI_CONTINUE_LINKING);
7358   gimple *assign_stmt = gimple_build_assign (startvar, t);
7359   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7360 
7361   t = fold_convert (TREE_TYPE (startvar), n2);
7362   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7363                                         false, GSI_CONTINUE_LINKING);
7364   if (endvar)
7365     {
7366       assign_stmt = gimple_build_assign (endvar, e);
7367       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7368       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7369           assign_stmt = gimple_build_assign (fd->loop.v, e);
7370       else
7371           assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7372       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7373     }
7374 
7375   tree *nonrect_bounds = NULL;
7376   if (fd->collapse > 1)
7377     {
7378       if (fd->non_rect)
7379           {
7380             nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7381             memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7382           }
7383       gcc_assert (gsi_bb (gsi) == entry_bb);
7384       expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7385                                         startvar);
7386       entry_bb = gsi_bb (gsi);
7387     }
7388 
7389   if (!broken_loop)
7390     {
7391       /* The code controlling the sequential loop replaces the
7392            GIMPLE_OMP_CONTINUE.  */
7393       gsi = gsi_last_nondebug_bb (cont_bb);
7394       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7395       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7396       vmain = gimple_omp_continue_control_use (cont_stmt);
7397       vback = gimple_omp_continue_control_def (cont_stmt);
7398 
7399       if (!gimple_omp_for_combined_p (fd->for_stmt))
7400           {
7401             if (POINTER_TYPE_P (type))
7402               t = fold_build_pointer_plus (vmain, step);
7403             else
7404               t = fold_build2 (PLUS_EXPR, type, vmain, step);
7405             t = force_gimple_operand_gsi (&gsi, t,
7406                                                   DECL_P (vback)
7407                                                   && TREE_ADDRESSABLE (vback),
7408                                                   NULL_TREE, true, GSI_SAME_STMT);
7409             assign_stmt = gimple_build_assign (vback, t);
7410             gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7411 
7412             t = build2 (fd->loop.cond_code, boolean_type_node,
7413                           DECL_P (vback) && TREE_ADDRESSABLE (vback)
7414                           ? t : vback, e);
7415             gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7416           }
7417 
7418       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
7419       gsi_remove (&gsi, true);
7420 
7421       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7422           collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7423                                                                cont_bb, body_bb);
7424     }
7425 
7426   /* Remove the GIMPLE_OMP_FOR statement.  */
7427   gsi = gsi_for_stmt (fd->for_stmt);
7428   gsi_remove (&gsi, true);
7429 
7430   /* Remove the GIMPLE_OMP_RETURN statement.  */
7431   gsi = gsi_last_nondebug_bb (exit_bb);
7432   gsi_remove (&gsi, true);
7433 
7434   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7435   if (!broken_loop)
7436     remove_edge (BRANCH_EDGE (entry_bb));
7437   else
7438     {
7439       remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7440       region->outer->cont = NULL;
7441     }
7442 
7443   /* Connect all the blocks.  */
7444   if (!broken_loop)
7445     {
7446       ep = find_edge (cont_bb, body_bb);
7447       if (gimple_omp_for_combined_p (fd->for_stmt))
7448           {
7449             remove_edge (ep);
7450             ep = NULL;
7451           }
7452       else if (fd->collapse > 1)
7453           {
7454             remove_edge (ep);
7455             ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7456           }
7457       else
7458           ep->flags = EDGE_TRUE_VALUE;
7459       find_edge (cont_bb, fin_bb)->flags
7460           = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7461     }
7462 
7463   set_immediate_dominator (CDI_DOMINATORS, body_bb,
7464                                  recompute_dominator (CDI_DOMINATORS, body_bb));
7465   if (!broken_loop)
7466     set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7467                                    recompute_dominator (CDI_DOMINATORS, fin_bb));
7468 
7469   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7470     {
7471       class loop *loop = alloc_loop ();
7472       loop->header = body_bb;
7473       if (collapse_bb == NULL)
7474           loop->latch = cont_bb;
7475       add_loop (loop, body_bb->loop_father);
7476     }
7477 }
7478 
7479 /* A subroutine of expand_omp_for.  Generate code for an OpenACC
7480    partitioned loop.  The lowering here is abstracted, in that the
7481    loop parameters are passed through internal functions, which are
7482    further lowered by oacc_device_lower, once we get to the target
7483    compiler.  The loop is of the form:
7484 
7485    for (V = B; V LTGT E; V += S) {BODY}
7486 
7487    where LTGT is < or >.  We may have a specified chunking size, CHUNKING
7488    (constant 0 for no chunking) and we will have a GWV partitioning
7489    mask, specifying dimensions over which the loop is to be
7490    partitioned (see note below).  We generate code that looks like
7491    (this ignores tiling):
7492 
7493    <entry_bb> [incoming FALL->body, BRANCH->exit]
7494      typedef signedintify (typeof (V)) T;  // underlying signed integral type
7495      T range = E - B;
7496      T chunk_no = 0;
7497      T DIR = LTGT == '<' ? +1 : -1;
7498      T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7499      T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7500 
7501    <head_bb> [created by splitting end of entry_bb]
7502      T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7503      T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7504      if (!(offset LTGT bound)) goto bottom_bb;
7505 
7506    <body_bb> [incoming]
7507      V = B + offset;
7508      {BODY}
7509 
7510    <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7511      offset += step;
7512      if (offset LTGT bound) goto body_bb; [*]
7513 
7514    <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7515      chunk_no++;
7516      if (chunk < chunk_max) goto head_bb;
7517 
7518    <exit_bb> [incoming]
7519      V = B + ((range -/+ 1) / S +/- 1) * S [*]
7520 
7521    [*] Needed if V live at end of loop.  */
7522 
7523 static void
expand_oacc_for(struct omp_region * region,struct omp_for_data * fd)7524 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7525 {
7526   bool is_oacc_kernels_parallelized
7527     = (lookup_attribute ("oacc kernels parallelized",
7528                                DECL_ATTRIBUTES (current_function_decl)) != NULL);
7529   {
7530     bool is_oacc_kernels
7531       = (lookup_attribute ("oacc kernels",
7532                                  DECL_ATTRIBUTES (current_function_decl)) != NULL);
7533     if (is_oacc_kernels_parallelized)
7534       gcc_checking_assert (is_oacc_kernels);
7535   }
7536   gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7537   /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7538      for SSA specifics, and some are for 'parloops' OpenACC
7539      'kernels'-parallelized specifics.  */
7540 
7541   tree v = fd->loop.v;
7542   enum tree_code cond_code = fd->loop.cond_code;
7543   enum tree_code plus_code = PLUS_EXPR;
7544 
7545   tree chunk_size = integer_minus_one_node;
7546   tree gwv = integer_zero_node;
7547   tree iter_type = TREE_TYPE (v);
7548   tree diff_type = iter_type;
7549   tree plus_type = iter_type;
7550   struct oacc_collapse *counts = NULL;
7551 
7552   gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7553                            == GF_OMP_FOR_KIND_OACC_LOOP);
7554   gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7555   gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7556 
7557   if (POINTER_TYPE_P (iter_type))
7558     {
7559       plus_code = POINTER_PLUS_EXPR;
7560       plus_type = sizetype;
7561     }
7562   for (int ix = fd->collapse; ix--;)
7563     {
7564       tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7565       if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7566           diff_type = diff_type2;
7567     }
7568   if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7569     diff_type = signed_type_for (diff_type);
7570   if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7571     diff_type = integer_type_node;
7572 
7573   basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7574   basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7575   basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE  */
7576   basic_block bottom_bb = NULL;
7577 
7578   /* entry_bb has two successors; the branch edge is to the exit
7579      block, fallthrough edge to body.  */
7580   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7581                 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7582 
7583   /* If cont_bb non-NULL, it has 2 successors.  The branch successor is
7584      body_bb, or to a block whose only successor is the body_bb.  Its
7585      fallthrough successor is the final block (same as the branch
7586      successor of the entry_bb).  */
7587   if (cont_bb)
7588     {
7589       basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7590       basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7591 
7592       gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7593       gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7594     }
7595   else
7596     gcc_assert (!gimple_in_ssa_p (cfun));
7597 
7598   /* The exit block only has entry_bb and cont_bb as predecessors.  */
7599   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7600 
7601   tree chunk_no;
7602   tree chunk_max = NULL_TREE;
7603   tree bound, offset;
7604   tree step = create_tmp_var (diff_type, ".step");
7605   bool up = cond_code == LT_EXPR;
7606   tree dir = build_int_cst (diff_type, up ? +1 : -1);
7607   bool chunking = !gimple_in_ssa_p (cfun);
7608   bool negating;
7609 
7610   /* Tiling vars.  */
7611   tree tile_size = NULL_TREE;
7612   tree element_s = NULL_TREE;
7613   tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7614   basic_block elem_body_bb = NULL;
7615   basic_block elem_cont_bb = NULL;
7616 
7617   /* SSA instances.  */
7618   tree offset_incr = NULL_TREE;
7619   tree offset_init = NULL_TREE;
7620 
7621   gimple_stmt_iterator gsi;
7622   gassign *ass;
7623   gcall *call;
7624   gimple *stmt;
7625   tree expr;
7626   location_t loc;
7627   edge split, be, fte;
7628 
7629   /* Split the end of entry_bb to create head_bb.  */
7630   split = split_block (entry_bb, last_stmt (entry_bb));
7631   basic_block head_bb = split->dest;
7632   entry_bb = split->src;
7633 
7634   /* Chunk setup goes at end of entry_bb, replacing the omp_for.  */
7635   gsi = gsi_last_nondebug_bb (entry_bb);
7636   gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7637   loc = gimple_location (for_stmt);
7638 
7639   if (gimple_in_ssa_p (cfun))
7640     {
7641       offset_init = gimple_omp_for_index (for_stmt, 0);
7642       gcc_assert (integer_zerop (fd->loop.n1));
7643       /* The SSA parallelizer does gang parallelism.  */
7644       gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7645     }
7646 
7647   if (fd->collapse > 1 || fd->tiling)
7648     {
7649       gcc_assert (!gimple_in_ssa_p (cfun) && up);
7650       counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7651       tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7652                                                         TREE_TYPE (fd->loop.n2), loc);
7653 
7654       if (SSA_VAR_P (fd->loop.n2))
7655           {
7656             total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7657                                                       true, GSI_SAME_STMT);
7658             ass = gimple_build_assign (fd->loop.n2, total);
7659             gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7660           }
7661     }
7662 
7663   tree b = fd->loop.n1;
7664   tree e = fd->loop.n2;
7665   tree s = fd->loop.step;
7666 
7667   b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7668   e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7669 
7670   /* Convert the step, avoiding possible unsigned->signed overflow.  */
7671   negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7672   if (negating)
7673     s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7674   s = fold_convert (diff_type, s);
7675   if (negating)
7676     s = fold_build1 (NEGATE_EXPR, diff_type, s);
7677   s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7678 
7679   if (!chunking)
7680     chunk_size = integer_zero_node;
7681   expr = fold_convert (diff_type, chunk_size);
7682   chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7683                                                    NULL_TREE, true, GSI_SAME_STMT);
7684 
7685   if (fd->tiling)
7686     {
7687       /* Determine the tile size and element step,
7688            modify the outer loop step size.  */
7689       tile_size = create_tmp_var (diff_type, ".tile_size");
7690       expr = build_int_cst (diff_type, 1);
7691       for (int ix = 0; ix < fd->collapse; ix++)
7692           expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7693       expr = force_gimple_operand_gsi (&gsi, expr, true,
7694                                                NULL_TREE, true, GSI_SAME_STMT);
7695       ass = gimple_build_assign (tile_size, expr);
7696       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7697 
7698       element_s = create_tmp_var (diff_type, ".element_s");
7699       ass = gimple_build_assign (element_s, s);
7700       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7701 
7702       expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7703       s = force_gimple_operand_gsi (&gsi, expr, true,
7704                                             NULL_TREE, true, GSI_SAME_STMT);
7705     }
7706 
7707   /* Determine the range, avoiding possible unsigned->signed overflow.  */
7708   negating = !up && TYPE_UNSIGNED (iter_type);
7709   expr = fold_build2 (MINUS_EXPR, plus_type,
7710                           fold_convert (plus_type, negating ? b : e),
7711                           fold_convert (plus_type, negating ? e : b));
7712   expr = fold_convert (diff_type, expr);
7713   if (negating)
7714     expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7715   tree range = force_gimple_operand_gsi (&gsi, expr, true,
7716                                                    NULL_TREE, true, GSI_SAME_STMT);
7717 
7718   chunk_no = build_int_cst (diff_type, 0);
7719   if (chunking)
7720     {
7721       gcc_assert (!gimple_in_ssa_p (cfun));
7722 
7723       expr = chunk_no;
7724       chunk_max = create_tmp_var (diff_type, ".chunk_max");
7725       chunk_no = create_tmp_var (diff_type, ".chunk_no");
7726 
7727       ass = gimple_build_assign (chunk_no, expr);
7728       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7729 
7730       call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7731                                                    build_int_cst (integer_type_node,
7732                                                                       IFN_GOACC_LOOP_CHUNKS),
7733                                                    dir, range, s, chunk_size, gwv);
7734       gimple_call_set_lhs (call, chunk_max);
7735       gimple_set_location (call, loc);
7736       gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7737     }
7738   else
7739     chunk_size = chunk_no;
7740 
7741   call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7742                                              build_int_cst (integer_type_node,
7743                                                                 IFN_GOACC_LOOP_STEP),
7744                                              dir, range, s, chunk_size, gwv);
7745   gimple_call_set_lhs (call, step);
7746   gimple_set_location (call, loc);
7747   gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7748 
7749   /* Remove the GIMPLE_OMP_FOR.  */
7750   gsi_remove (&gsi, true);
7751 
7752   /* Fixup edges from head_bb.  */
7753   be = BRANCH_EDGE (head_bb);
7754   fte = FALLTHRU_EDGE (head_bb);
7755   be->flags |= EDGE_FALSE_VALUE;
7756   fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7757 
7758   basic_block body_bb = fte->dest;
7759 
7760   if (gimple_in_ssa_p (cfun))
7761     {
7762       gsi = gsi_last_nondebug_bb (cont_bb);
7763       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7764 
7765       offset = gimple_omp_continue_control_use (cont_stmt);
7766       offset_incr = gimple_omp_continue_control_def (cont_stmt);
7767     }
7768   else
7769     {
7770       offset = create_tmp_var (diff_type, ".offset");
7771       offset_init = offset_incr = offset;
7772     }
7773   bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7774 
7775   /* Loop offset & bound go into head_bb.  */
7776   gsi = gsi_start_bb (head_bb);
7777 
7778   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7779                                              build_int_cst (integer_type_node,
7780                                                                 IFN_GOACC_LOOP_OFFSET),
7781                                              dir, range, s,
7782                                              chunk_size, gwv, chunk_no);
7783   gimple_call_set_lhs (call, offset_init);
7784   gimple_set_location (call, loc);
7785   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7786 
7787   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7788                                              build_int_cst (integer_type_node,
7789                                                                 IFN_GOACC_LOOP_BOUND),
7790                                              dir, range, s,
7791                                              chunk_size, gwv, offset_init);
7792   gimple_call_set_lhs (call, bound);
7793   gimple_set_location (call, loc);
7794   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7795 
7796   expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7797   gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7798                         GSI_CONTINUE_LINKING);
7799 
7800   /* V assignment goes into body_bb.  */
7801   if (!gimple_in_ssa_p (cfun))
7802     {
7803       gsi = gsi_start_bb (body_bb);
7804 
7805       expr = build2 (plus_code, iter_type, b,
7806                          fold_convert (plus_type, offset));
7807       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7808                                                true, GSI_SAME_STMT);
7809       ass = gimple_build_assign (v, expr);
7810       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7811 
7812       if (fd->collapse > 1 || fd->tiling)
7813           expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7814 
7815       if (fd->tiling)
7816           {
7817             /* Determine the range of the element loop -- usually simply
7818                the tile_size, but could be smaller if the final
7819                iteration of the outer loop is a partial tile.  */
7820             tree e_range = create_tmp_var (diff_type, ".e_range");
7821 
7822             expr = build2 (MIN_EXPR, diff_type,
7823                                build2 (MINUS_EXPR, diff_type, bound, offset),
7824                                build2 (MULT_EXPR, diff_type, tile_size,
7825                                          element_s));
7826             expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7827                                                      true, GSI_SAME_STMT);
7828             ass = gimple_build_assign (e_range, expr);
7829             gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7830 
7831             /* Determine bound, offset & step of inner loop. */
7832             e_bound = create_tmp_var (diff_type, ".e_bound");
7833             e_offset = create_tmp_var (diff_type, ".e_offset");
7834             e_step = create_tmp_var (diff_type, ".e_step");
7835 
7836             /* Mark these as element loops.  */
7837             tree t, e_gwv = integer_minus_one_node;
7838             tree chunk = build_int_cst (diff_type, 0); /* Never chunked.  */
7839 
7840             t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7841             call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7842                                                        element_s, chunk, e_gwv, chunk);
7843             gimple_call_set_lhs (call, e_offset);
7844             gimple_set_location (call, loc);
7845             gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7846 
7847             t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7848             call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7849                                                        element_s, chunk, e_gwv, e_offset);
7850             gimple_call_set_lhs (call, e_bound);
7851             gimple_set_location (call, loc);
7852             gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7853 
7854             t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7855             call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7856                                                        element_s, chunk, e_gwv);
7857             gimple_call_set_lhs (call, e_step);
7858             gimple_set_location (call, loc);
7859             gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7860 
7861             /* Add test and split block.  */
7862             expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7863             stmt = gimple_build_cond_empty (expr);
7864             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7865             split = split_block (body_bb, stmt);
7866             elem_body_bb = split->dest;
7867             if (cont_bb == body_bb)
7868               cont_bb = elem_body_bb;
7869             body_bb = split->src;
7870 
7871             split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7872 
7873             /* Add a dummy exit for the tiled block when cont_bb is missing.  */
7874             if (cont_bb == NULL)
7875               {
7876                 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7877                 e->probability = profile_probability::even ();
7878                 split->probability = profile_probability::even ();
7879               }
7880 
7881             /* Initialize the user's loop vars.  */
7882             gsi = gsi_start_bb (elem_body_bb);
7883             expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7884                                              diff_type);
7885           }
7886     }
7887 
7888   /* Loop increment goes into cont_bb.  If this is not a loop, we
7889      will have spawned threads as if it was, and each one will
7890      execute one iteration.  The specification is not explicit about
7891      whether such constructs are ill-formed or not, and they can
7892      occur, especially when noreturn routines are involved.  */
7893   if (cont_bb)
7894     {
7895       gsi = gsi_last_nondebug_bb (cont_bb);
7896       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7897       loc = gimple_location (cont_stmt);
7898 
7899       if (fd->tiling)
7900           {
7901             /* Insert element loop increment and test.  */
7902             expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7903             expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7904                                                      true, GSI_SAME_STMT);
7905             ass = gimple_build_assign (e_offset, expr);
7906             gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7907             expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7908 
7909             stmt = gimple_build_cond_empty (expr);
7910             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7911             split = split_block (cont_bb, stmt);
7912             elem_cont_bb = split->src;
7913             cont_bb = split->dest;
7914 
7915             split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7916             split->probability = profile_probability::unlikely ().guessed ();
7917             edge latch_edge
7918               = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7919             latch_edge->probability = profile_probability::likely ().guessed ();
7920 
7921             edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7922             skip_edge->probability = profile_probability::unlikely ().guessed ();
7923             edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7924             loop_entry_edge->probability
7925               = profile_probability::likely ().guessed ();
7926 
7927             gsi = gsi_for_stmt (cont_stmt);
7928           }
7929 
7930       /* Increment offset.  */
7931       if (gimple_in_ssa_p (cfun))
7932           expr = build2 (plus_code, iter_type, offset,
7933                            fold_convert (plus_type, step));
7934       else
7935           expr = build2 (PLUS_EXPR, diff_type, offset, step);
7936       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7937                                                true, GSI_SAME_STMT);
7938       ass = gimple_build_assign (offset_incr, expr);
7939       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7940       expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7941       gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7942 
7943       /*  Remove the GIMPLE_OMP_CONTINUE.  */
7944       gsi_remove (&gsi, true);
7945 
7946       /* Fixup edges from cont_bb.  */
7947       be = BRANCH_EDGE (cont_bb);
7948       fte = FALLTHRU_EDGE (cont_bb);
7949       be->flags |= EDGE_TRUE_VALUE;
7950       fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7951 
7952       if (chunking)
7953           {
7954             /* Split the beginning of exit_bb to make bottom_bb.  We
7955                need to insert a nop at the start, because splitting is
7956                after a stmt, not before.  */
7957             gsi = gsi_start_bb (exit_bb);
7958             stmt = gimple_build_nop ();
7959             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7960             split = split_block (exit_bb, stmt);
7961             bottom_bb = split->src;
7962             exit_bb = split->dest;
7963             gsi = gsi_last_bb (bottom_bb);
7964 
7965             /* Chunk increment and test goes into bottom_bb.  */
7966             expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7967                                build_int_cst (diff_type, 1));
7968             ass = gimple_build_assign (chunk_no, expr);
7969             gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7970 
7971             /* Chunk test at end of bottom_bb.  */
7972             expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7973             gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7974                                   GSI_CONTINUE_LINKING);
7975 
7976             /* Fixup edges from bottom_bb.  */
7977             split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7978             split->probability = profile_probability::unlikely ().guessed ();
7979             edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7980             latch_edge->probability = profile_probability::likely ().guessed ();
7981           }
7982     }
7983 
7984   gsi = gsi_last_nondebug_bb (exit_bb);
7985   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7986   loc = gimple_location (gsi_stmt (gsi));
7987 
7988   if (!gimple_in_ssa_p (cfun))
7989     {
7990       /* Insert the final value of V, in case it is live.  This is the
7991            value for the only thread that survives past the join.  */
7992       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7993       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7994       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7995       expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7996       expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7997       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7998                                                true, GSI_SAME_STMT);
7999       ass = gimple_build_assign (v, expr);
8000       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8001     }
8002 
8003   /* Remove the OMP_RETURN.  */
8004   gsi_remove (&gsi, true);
8005 
8006   if (cont_bb)
8007     {
8008       /* We now have one, two or three nested loops.  Update the loop
8009            structures.  */
8010       class loop *parent = entry_bb->loop_father;
8011       class loop *body = body_bb->loop_father;
8012 
8013       if (chunking)
8014           {
8015             class loop *chunk_loop = alloc_loop ();
8016             chunk_loop->header = head_bb;
8017             chunk_loop->latch = bottom_bb;
8018             add_loop (chunk_loop, parent);
8019             parent = chunk_loop;
8020           }
8021       else if (parent != body)
8022           {
8023             gcc_assert (body->header == body_bb);
8024             gcc_assert (body->latch == cont_bb
8025                           || single_pred (body->latch) == cont_bb);
8026             parent = NULL;
8027           }
8028 
8029       if (parent)
8030           {
8031             class loop *body_loop = alloc_loop ();
8032             body_loop->header = body_bb;
8033             body_loop->latch = cont_bb;
8034             add_loop (body_loop, parent);
8035 
8036             if (fd->tiling)
8037               {
8038                 /* Insert tiling's element loop.  */
8039                 class loop *inner_loop = alloc_loop ();
8040                 inner_loop->header = elem_body_bb;
8041                 inner_loop->latch = elem_cont_bb;
8042                 add_loop (inner_loop, body_loop);
8043               }
8044           }
8045     }
8046 }
8047 
8048 /* Expand the OMP loop defined by REGION.  */
8049 
8050 static void
expand_omp_for(struct omp_region * region,gimple * inner_stmt)8051 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
8052 {
8053   struct omp_for_data fd;
8054   struct omp_for_data_loop *loops;
8055 
8056   loops = XALLOCAVEC (struct omp_for_data_loop,
8057                           gimple_omp_for_collapse (last_stmt (region->entry)));
8058   omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
8059                               &fd, loops);
8060   region->sched_kind = fd.sched_kind;
8061   region->sched_modifiers = fd.sched_modifiers;
8062   region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
8063   if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
8064     {
8065       for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
8066           if ((loops[i].m1 || loops[i].m2)
8067               && (loops[i].m1 == NULL_TREE
8068                     || TREE_CODE (loops[i].m1) == INTEGER_CST)
8069               && (loops[i].m2 == NULL_TREE
8070                     || TREE_CODE (loops[i].m2) == INTEGER_CST)
8071               && TREE_CODE (loops[i].step) == INTEGER_CST
8072               && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
8073             {
8074               tree t;
8075               tree itype = TREE_TYPE (loops[i].v);
8076               if (loops[i].m1 && loops[i].m2)
8077                 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
8078               else if (loops[i].m1)
8079                 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
8080               else
8081                 t = loops[i].m2;
8082               t = fold_build2 (MULT_EXPR, itype, t,
8083                                    fold_convert (itype,
8084                                                      loops[i - loops[i].outer].step));
8085               if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
8086                 t = fold_build2 (TRUNC_MOD_EXPR, itype,
8087                                      fold_build1 (NEGATE_EXPR, itype, t),
8088                                      fold_build1 (NEGATE_EXPR, itype,
8089                                                       fold_convert (itype,
8090                                                                         loops[i].step)));
8091               else
8092                 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
8093                                      fold_convert (itype, loops[i].step));
8094               if (integer_nonzerop (t))
8095                 error_at (gimple_location (fd.for_stmt),
8096                               "invalid OpenMP non-rectangular loop step; "
8097                               "%<(%E - %E) * %E%> is not a multiple of loop %d "
8098                               "step %qE",
8099                               loops[i].m2 ? loops[i].m2 : integer_zero_node,
8100                               loops[i].m1 ? loops[i].m1 : integer_zero_node,
8101                               loops[i - loops[i].outer].step, i + 1,
8102                               loops[i].step);
8103             }
8104     }
8105 
8106   gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8107   BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8108   FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8109   if (region->cont)
8110     {
8111       gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8112       BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8113       FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8114     }
8115   else
8116     /* If there isn't a continue then this is a degerate case where
8117        the introduction of abnormal edges during lowering will prevent
8118        original loops from being detected.  Fix that up.  */
8119     loops_state_set (LOOPS_NEED_FIXUP);
8120 
8121   if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8122     expand_omp_simd (region, &fd);
8123   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8124     {
8125       gcc_assert (!inner_stmt && !fd.non_rect);
8126       expand_oacc_for (region, &fd);
8127     }
8128   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8129     {
8130       if (gimple_omp_for_combined_into_p (fd.for_stmt))
8131           expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8132       else
8133           expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8134     }
8135   else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8136              && !fd.have_ordered)
8137     {
8138       if (fd.chunk_size == NULL)
8139           expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8140       else
8141           expand_omp_for_static_chunk (region, &fd, inner_stmt);
8142     }
8143   else
8144     {
8145       int fn_index, start_ix, next_ix;
8146       unsigned HOST_WIDE_INT sched = 0;
8147       tree sched_arg = NULL_TREE;
8148 
8149       gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8150                       == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8151       if (fd.chunk_size == NULL
8152             && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8153           fd.chunk_size = integer_zero_node;
8154       switch (fd.sched_kind)
8155           {
8156           case OMP_CLAUSE_SCHEDULE_RUNTIME:
8157             if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8158                 && fd.lastprivate_conditional == 0)
8159               {
8160                 gcc_assert (!fd.have_ordered);
8161                 fn_index = 6;
8162                 sched = 4;
8163               }
8164             else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8165                        && !fd.have_ordered
8166                        && fd.lastprivate_conditional == 0)
8167               fn_index = 7;
8168             else
8169               {
8170                 fn_index = 3;
8171                 sched = (HOST_WIDE_INT_1U << 31);
8172               }
8173             break;
8174           case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8175           case OMP_CLAUSE_SCHEDULE_GUIDED:
8176             if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8177                 && !fd.have_ordered
8178                 && fd.lastprivate_conditional == 0)
8179               {
8180                 fn_index = 3 + fd.sched_kind;
8181                 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8182                 break;
8183               }
8184             fn_index = fd.sched_kind;
8185             sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8186             sched += (HOST_WIDE_INT_1U << 31);
8187             break;
8188           case OMP_CLAUSE_SCHEDULE_STATIC:
8189             gcc_assert (fd.have_ordered);
8190             fn_index = 0;
8191             sched = (HOST_WIDE_INT_1U << 31) + 1;
8192             break;
8193           default:
8194             gcc_unreachable ();
8195           }
8196       if (!fd.ordered)
8197           fn_index += fd.have_ordered * 8;
8198       if (fd.ordered)
8199           start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8200       else
8201           start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8202       next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8203       if (fd.have_reductemp || fd.have_pointer_condtemp)
8204           {
8205             if (fd.ordered)
8206               start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8207             else if (fd.have_ordered)
8208               start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8209             else
8210               start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8211             sched_arg = build_int_cstu (long_integer_type_node, sched);
8212             if (!fd.chunk_size)
8213               fd.chunk_size = integer_zero_node;
8214           }
8215       if (fd.iter_type == long_long_unsigned_type_node)
8216           {
8217             start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8218                               - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8219             next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8220                           - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8221           }
8222       expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8223                                     (enum built_in_function) next_ix, sched_arg,
8224                                     inner_stmt);
8225     }
8226 
8227   if (gimple_in_ssa_p (cfun))
8228     update_ssa (TODO_update_ssa_only_virtuals);
8229 }
8230 
8231 /* Expand code for an OpenMP sections directive.  In pseudo code, we generate
8232 
8233           v = GOMP_sections_start (n);
8234     L0:
8235           switch (v)
8236             {
8237             case 0:
8238               goto L2;
8239             case 1:
8240               section 1;
8241               goto L1;
8242             case 2:
8243               ...
8244             case n:
8245               ...
8246             default:
8247               abort ();
8248             }
8249     L1:
8250           v = GOMP_sections_next ();
8251           goto L0;
8252     L2:
8253           reduction;
8254 
8255     If this is a combined parallel sections, replace the call to
8256     GOMP_sections_start with call to GOMP_sections_next.  */
8257 
8258 static void
expand_omp_sections(struct omp_region * region)8259 expand_omp_sections (struct omp_region *region)
8260 {
8261   tree t, u, vin = NULL, vmain, vnext, l2;
8262   unsigned len;
8263   basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8264   gimple_stmt_iterator si, switch_si;
8265   gomp_sections *sections_stmt;
8266   gimple *stmt;
8267   gomp_continue *cont;
8268   edge_iterator ei;
8269   edge e;
8270   struct omp_region *inner;
8271   unsigned i, casei;
8272   bool exit_reachable = region->cont != NULL;
8273 
8274   gcc_assert (region->exit != NULL);
8275   entry_bb = region->entry;
8276   l0_bb = single_succ (entry_bb);
8277   l1_bb = region->cont;
8278   l2_bb = region->exit;
8279   if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8280     l2 = gimple_block_label (l2_bb);
8281   else
8282     {
8283       /* This can happen if there are reductions.  */
8284       len = EDGE_COUNT (l0_bb->succs);
8285       gcc_assert (len > 0);
8286       e = EDGE_SUCC (l0_bb, len - 1);
8287       si = gsi_last_nondebug_bb (e->dest);
8288       l2 = NULL_TREE;
8289       if (gsi_end_p (si)
8290             || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8291           l2 = gimple_block_label (e->dest);
8292       else
8293           FOR_EACH_EDGE (e, ei, l0_bb->succs)
8294             {
8295               si = gsi_last_nondebug_bb (e->dest);
8296               if (gsi_end_p (si)
8297                     || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8298                 {
8299                     l2 = gimple_block_label (e->dest);
8300                     break;
8301                 }
8302             }
8303     }
8304   if (exit_reachable)
8305     default_bb = create_empty_bb (l1_bb->prev_bb);
8306   else
8307     default_bb = create_empty_bb (l0_bb);
8308 
8309   /* We will build a switch() with enough cases for all the
8310      GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8311      and a default case to abort if something goes wrong.  */
8312   len = EDGE_COUNT (l0_bb->succs);
8313 
8314   /* Use vec::quick_push on label_vec throughout, since we know the size
8315      in advance.  */
8316   auto_vec<tree> label_vec (len);
8317 
8318   /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8319      GIMPLE_OMP_SECTIONS statement.  */
8320   si = gsi_last_nondebug_bb (entry_bb);
8321   sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8322   gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8323   vin = gimple_omp_sections_control (sections_stmt);
8324   tree clauses = gimple_omp_sections_clauses (sections_stmt);
8325   tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8326   tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8327   tree cond_var = NULL_TREE;
8328   if (reductmp || condtmp)
8329     {
8330       tree reductions = null_pointer_node, mem = null_pointer_node;
8331       tree memv = NULL_TREE, condtemp = NULL_TREE;
8332       gimple_stmt_iterator gsi = gsi_none ();
8333       gimple *g = NULL;
8334       if (reductmp)
8335           {
8336             reductions = OMP_CLAUSE_DECL (reductmp);
8337             gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8338             g = SSA_NAME_DEF_STMT (reductions);
8339             reductions = gimple_assign_rhs1 (g);
8340             OMP_CLAUSE_DECL (reductmp) = reductions;
8341             gsi = gsi_for_stmt (g);
8342           }
8343       else
8344           gsi = si;
8345       if (condtmp)
8346           {
8347             condtemp = OMP_CLAUSE_DECL (condtmp);
8348             tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8349                                             OMP_CLAUSE__CONDTEMP_);
8350             cond_var = OMP_CLAUSE_DECL (c);
8351             tree type = TREE_TYPE (condtemp);
8352             memv = create_tmp_var (type);
8353             TREE_ADDRESSABLE (memv) = 1;
8354             unsigned cnt = 0;
8355             for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8356               if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8357                     && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8358                 ++cnt;
8359             unsigned HOST_WIDE_INT sz
8360               = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8361             expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8362                                            false);
8363             mem = build_fold_addr_expr (memv);
8364           }
8365       t = build_int_cst (unsigned_type_node, len - 1);
8366       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8367       stmt = gimple_build_call (u, 3, t, reductions, mem);
8368       gimple_call_set_lhs (stmt, vin);
8369       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8370       if (condtmp)
8371           {
8372             expand_omp_build_assign (&gsi, condtemp, memv, false);
8373             tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8374                                  vin, build_one_cst (TREE_TYPE (cond_var)));
8375             expand_omp_build_assign (&gsi, cond_var, t, false);
8376           }
8377       if (reductmp)
8378           {
8379             gsi_remove (&gsi, true);
8380             release_ssa_name (gimple_assign_lhs (g));
8381           }
8382     }
8383   else if (!is_combined_parallel (region))
8384     {
8385       /* If we are not inside a combined parallel+sections region,
8386            call GOMP_sections_start.  */
8387       t = build_int_cst (unsigned_type_node, len - 1);
8388       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8389       stmt = gimple_build_call (u, 1, t);
8390     }
8391   else
8392     {
8393       /* Otherwise, call GOMP_sections_next.  */
8394       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8395       stmt = gimple_build_call (u, 0);
8396     }
8397   if (!reductmp && !condtmp)
8398     {
8399       gimple_call_set_lhs (stmt, vin);
8400       gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8401     }
8402   gsi_remove (&si, true);
8403 
8404   /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8405      L0_BB.  */
8406   switch_si = gsi_last_nondebug_bb (l0_bb);
8407   gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8408   if (exit_reachable)
8409     {
8410       cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8411       gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8412       vmain = gimple_omp_continue_control_use (cont);
8413       vnext = gimple_omp_continue_control_def (cont);
8414     }
8415   else
8416     {
8417       vmain = vin;
8418       vnext = NULL_TREE;
8419     }
8420 
8421   t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8422   label_vec.quick_push (t);
8423   i = 1;
8424 
8425   /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.  */
8426   for (inner = region->inner, casei = 1;
8427        inner;
8428        inner = inner->next, i++, casei++)
8429     {
8430       basic_block s_entry_bb, s_exit_bb;
8431 
8432       /* Skip optional reduction region.  */
8433       if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8434           {
8435             --i;
8436             --casei;
8437             continue;
8438           }
8439 
8440       s_entry_bb = inner->entry;
8441       s_exit_bb = inner->exit;
8442 
8443       t = gimple_block_label (s_entry_bb);
8444       u = build_int_cst (unsigned_type_node, casei);
8445       u = build_case_label (u, NULL, t);
8446       label_vec.quick_push (u);
8447 
8448       si = gsi_last_nondebug_bb (s_entry_bb);
8449       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8450       gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8451       gsi_remove (&si, true);
8452       single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8453 
8454       if (s_exit_bb == NULL)
8455           continue;
8456 
8457       si = gsi_last_nondebug_bb (s_exit_bb);
8458       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8459       gsi_remove (&si, true);
8460 
8461       single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8462     }
8463 
8464   /* Error handling code goes in DEFAULT_BB.  */
8465   t = gimple_block_label (default_bb);
8466   u = build_case_label (NULL, NULL, t);
8467   make_edge (l0_bb, default_bb, 0);
8468   add_bb_to_loop (default_bb, current_loops->tree_root);
8469 
8470   stmt = gimple_build_switch (vmain, u, label_vec);
8471   gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8472   gsi_remove (&switch_si, true);
8473 
8474   si = gsi_start_bb (default_bb);
8475   stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8476   gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8477 
8478   if (exit_reachable)
8479     {
8480       tree bfn_decl;
8481 
8482       /* Code to get the next section goes in L1_BB.  */
8483       si = gsi_last_nondebug_bb (l1_bb);
8484       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8485 
8486       bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8487       stmt = gimple_build_call (bfn_decl, 0);
8488       gimple_call_set_lhs (stmt, vnext);
8489       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8490       if (cond_var)
8491           {
8492             tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8493                                  vnext, build_one_cst (TREE_TYPE (cond_var)));
8494             expand_omp_build_assign (&si, cond_var, t, false);
8495           }
8496       gsi_remove (&si, true);
8497 
8498       single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8499     }
8500 
8501   /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB.  */
8502   si = gsi_last_nondebug_bb (l2_bb);
8503   if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8504     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8505   else if (gimple_omp_return_lhs (gsi_stmt (si)))
8506     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8507   else
8508     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8509   stmt = gimple_build_call (t, 0);
8510   if (gimple_omp_return_lhs (gsi_stmt (si)))
8511     gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8512   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8513   gsi_remove (&si, true);
8514 
8515   set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8516 }
8517 
8518 /* Expand code for an OpenMP single or scope directive.  We've already expanded
8519    much of the code, here we simply place the GOMP_barrier call.  */
8520 
8521 static void
expand_omp_single(struct omp_region * region)8522 expand_omp_single (struct omp_region *region)
8523 {
8524   basic_block entry_bb, exit_bb;
8525   gimple_stmt_iterator si;
8526 
8527   entry_bb = region->entry;
8528   exit_bb = region->exit;
8529 
8530   si = gsi_last_nondebug_bb (entry_bb);
8531   enum gimple_code code = gimple_code (gsi_stmt (si));
8532   gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8533   gsi_remove (&si, true);
8534   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8535 
8536   if (exit_bb == NULL)
8537     {
8538       gcc_assert (code == GIMPLE_OMP_SCOPE);
8539       return;
8540     }
8541 
8542   si = gsi_last_nondebug_bb (exit_bb);
8543   if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8544     {
8545       tree t = gimple_omp_return_lhs (gsi_stmt (si));
8546       gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8547     }
8548   gsi_remove (&si, true);
8549   single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8550 }
8551 
8552 /* Generic expansion for OpenMP synchronization directives: master,
8553    ordered and critical.  All we need to do here is remove the entry
8554    and exit markers for REGION.  */
8555 
8556 static void
expand_omp_synch(struct omp_region * region)8557 expand_omp_synch (struct omp_region *region)
8558 {
8559   basic_block entry_bb, exit_bb;
8560   gimple_stmt_iterator si;
8561 
8562   entry_bb = region->entry;
8563   exit_bb = region->exit;
8564 
8565   si = gsi_last_nondebug_bb (entry_bb);
8566   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8567                 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8568                 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8569                 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8570                 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8571                 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8572                 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8573   if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8574       && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8575     {
8576       expand_omp_taskreg (region);
8577       return;
8578     }
8579   gsi_remove (&si, true);
8580   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8581 
8582   if (exit_bb)
8583     {
8584       si = gsi_last_nondebug_bb (exit_bb);
8585       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8586       gsi_remove (&si, true);
8587       single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8588     }
8589 }
8590 
8591 /* Translate enum omp_memory_order to enum memmodel for the embedded
8592    fail clause in there.  */
8593 
8594 static enum memmodel
omp_memory_order_to_fail_memmodel(enum omp_memory_order mo)8595 omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8596 {
8597   switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8598     {
8599     case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8600       switch (mo & OMP_MEMORY_ORDER_MASK)
8601           {
8602           case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8603           case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8604           case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8605           case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8606           case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8607           default: break;
8608           }
8609       gcc_unreachable ();
8610     case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8611     case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8612     case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8613     default: gcc_unreachable ();
8614     }
8615 }
8616 
8617 /* Translate enum omp_memory_order to enum memmodel.  The two enums
8618    are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8619    is 0 and omp_memory_order has the fail mode encoded in it too.  */
8620 
8621 static enum memmodel
omp_memory_order_to_memmodel(enum omp_memory_order mo)8622 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8623 {
8624   enum memmodel ret, fail_ret;
8625   switch (mo & OMP_MEMORY_ORDER_MASK)
8626     {
8627     case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8628     case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8629     case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8630     case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8631     case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8632     default: gcc_unreachable ();
8633     }
8634   /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8635      we can just return ret here unconditionally.  Otherwise, work around
8636      it here and make sure fail memmodel is not stronger.  */
8637   if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8638     return ret;
8639   fail_ret = omp_memory_order_to_fail_memmodel (mo);
8640   if (fail_ret > ret)
8641     return fail_ret;
8642   return ret;
8643 }
8644 
8645 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8646    operation as a normal volatile load.  */
8647 
8648 static bool
expand_omp_atomic_load(basic_block load_bb,tree addr,tree loaded_val,int index)8649 expand_omp_atomic_load (basic_block load_bb, tree addr,
8650                               tree loaded_val, int index)
8651 {
8652   enum built_in_function tmpbase;
8653   gimple_stmt_iterator gsi;
8654   basic_block store_bb;
8655   location_t loc;
8656   gimple *stmt;
8657   tree decl, call, type, itype;
8658 
8659   gsi = gsi_last_nondebug_bb (load_bb);
8660   stmt = gsi_stmt (gsi);
8661   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8662   loc = gimple_location (stmt);
8663 
8664   /* ??? If the target does not implement atomic_load_optab[mode], and mode
8665      is smaller than word size, then expand_atomic_load assumes that the load
8666      is atomic.  We could avoid the builtin entirely in this case.  */
8667 
8668   tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8669   decl = builtin_decl_explicit (tmpbase);
8670   if (decl == NULL_TREE)
8671     return false;
8672 
8673   type = TREE_TYPE (loaded_val);
8674   itype = TREE_TYPE (TREE_TYPE (decl));
8675 
8676   enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8677   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8678   call = build_call_expr_loc (loc, decl, 2, addr, mo);
8679   if (!useless_type_conversion_p (type, itype))
8680     call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8681   call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8682 
8683   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8684   gsi_remove (&gsi, true);
8685 
8686   store_bb = single_succ (load_bb);
8687   gsi = gsi_last_nondebug_bb (store_bb);
8688   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8689   gsi_remove (&gsi, true);
8690 
8691   if (gimple_in_ssa_p (cfun))
8692     update_ssa (TODO_update_ssa_no_phi);
8693 
8694   return true;
8695 }
8696 
8697 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8698    operation as a normal volatile store.  */
8699 
8700 static bool
expand_omp_atomic_store(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)8701 expand_omp_atomic_store (basic_block load_bb, tree addr,
8702                                tree loaded_val, tree stored_val, int index)
8703 {
8704   enum built_in_function tmpbase;
8705   gimple_stmt_iterator gsi;
8706   basic_block store_bb = single_succ (load_bb);
8707   location_t loc;
8708   gimple *stmt;
8709   tree decl, call, type, itype;
8710   machine_mode imode;
8711   bool exchange;
8712 
8713   gsi = gsi_last_nondebug_bb (load_bb);
8714   stmt = gsi_stmt (gsi);
8715   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8716 
8717   /* If the load value is needed, then this isn't a store but an exchange.  */
8718   exchange = gimple_omp_atomic_need_value_p (stmt);
8719 
8720   gsi = gsi_last_nondebug_bb (store_bb);
8721   stmt = gsi_stmt (gsi);
8722   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8723   loc = gimple_location (stmt);
8724 
8725   /* ??? If the target does not implement atomic_store_optab[mode], and mode
8726      is smaller than word size, then expand_atomic_store assumes that the store
8727      is atomic.  We could avoid the builtin entirely in this case.  */
8728 
8729   tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8730   tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8731   decl = builtin_decl_explicit (tmpbase);
8732   if (decl == NULL_TREE)
8733     return false;
8734 
8735   type = TREE_TYPE (stored_val);
8736 
8737   /* Dig out the type of the function's second argument.  */
8738   itype = TREE_TYPE (decl);
8739   itype = TYPE_ARG_TYPES (itype);
8740   itype = TREE_CHAIN (itype);
8741   itype = TREE_VALUE (itype);
8742   imode = TYPE_MODE (itype);
8743 
8744   if (exchange && !can_atomic_exchange_p (imode, true))
8745     return false;
8746 
8747   if (!useless_type_conversion_p (itype, type))
8748     stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8749   enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8750   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8751   call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8752   if (exchange)
8753     {
8754       if (!useless_type_conversion_p (type, itype))
8755           call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8756       call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8757     }
8758 
8759   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8760   gsi_remove (&gsi, true);
8761 
8762   /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above.  */
8763   gsi = gsi_last_nondebug_bb (load_bb);
8764   gsi_remove (&gsi, true);
8765 
8766   if (gimple_in_ssa_p (cfun))
8767     update_ssa (TODO_update_ssa_no_phi);
8768 
8769   return true;
8770 }
8771 
8772 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8773    operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
8774    size of the data type, and thus usable to find the index of the builtin
8775    decl.  Returns false if the expression is not of the proper form.  */
8776 
8777 static bool
expand_omp_atomic_fetch_op(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)8778 expand_omp_atomic_fetch_op (basic_block load_bb,
8779                                   tree addr, tree loaded_val,
8780                                   tree stored_val, int index)
8781 {
8782   enum built_in_function oldbase, newbase, tmpbase;
8783   tree decl, itype, call;
8784   tree lhs, rhs;
8785   basic_block store_bb = single_succ (load_bb);
8786   gimple_stmt_iterator gsi;
8787   gimple *stmt;
8788   location_t loc;
8789   enum tree_code code;
8790   bool need_old, need_new;
8791   machine_mode imode;
8792 
8793   /* We expect to find the following sequences:
8794 
8795    load_bb:
8796        GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8797 
8798    store_bb:
8799        val = tmp OP something; (or: something OP tmp)
8800        GIMPLE_OMP_STORE (val)
8801 
8802   ???FIXME: Allow a more flexible sequence.
8803   Perhaps use data flow to pick the statements.
8804 
8805   */
8806 
8807   gsi = gsi_after_labels (store_bb);
8808   stmt = gsi_stmt (gsi);
8809   if (is_gimple_debug (stmt))
8810     {
8811       gsi_next_nondebug (&gsi);
8812       if (gsi_end_p (gsi))
8813           return false;
8814       stmt = gsi_stmt (gsi);
8815     }
8816   loc = gimple_location (stmt);
8817   if (!is_gimple_assign (stmt))
8818     return false;
8819   gsi_next_nondebug (&gsi);
8820   if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8821     return false;
8822   need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8823   need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8824   enum omp_memory_order omo
8825     = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8826   enum memmodel mo = omp_memory_order_to_memmodel (omo);
8827   gcc_checking_assert (!need_old || !need_new);
8828 
8829   if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8830     return false;
8831 
8832   /* Check for one of the supported fetch-op operations.  */
8833   code = gimple_assign_rhs_code (stmt);
8834   switch (code)
8835     {
8836     case PLUS_EXPR:
8837     case POINTER_PLUS_EXPR:
8838       oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8839       newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8840       break;
8841     case MINUS_EXPR:
8842       oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8843       newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8844       break;
8845     case BIT_AND_EXPR:
8846       oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8847       newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8848       break;
8849     case BIT_IOR_EXPR:
8850       oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8851       newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8852       break;
8853     case BIT_XOR_EXPR:
8854       oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8855       newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8856       break;
8857     default:
8858       return false;
8859     }
8860 
8861   /* Make sure the expression is of the proper form.  */
8862   if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8863     rhs = gimple_assign_rhs2 (stmt);
8864   else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8865              && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8866     rhs = gimple_assign_rhs1 (stmt);
8867   else
8868     return false;
8869 
8870   tmpbase = ((enum built_in_function)
8871                ((need_new ? newbase : oldbase) + index + 1));
8872   decl = builtin_decl_explicit (tmpbase);
8873   if (decl == NULL_TREE)
8874     return false;
8875   itype = TREE_TYPE (TREE_TYPE (decl));
8876   imode = TYPE_MODE (itype);
8877 
8878   /* We could test all of the various optabs involved, but the fact of the
8879      matter is that (with the exception of i486 vs i586 and xadd) all targets
8880      that support any atomic operaton optab also implements compare-and-swap.
8881      Let optabs.cc take care of expanding any compare-and-swap loop.  */
8882   if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8883     return false;
8884 
8885   gsi = gsi_last_nondebug_bb (load_bb);
8886   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8887 
8888   /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8889      It only requires that the operation happen atomically.  Thus we can
8890      use the RELAXED memory model.  */
8891   call = build_call_expr_loc (loc, decl, 3, addr,
8892                                     fold_convert_loc (loc, itype, rhs),
8893                                     build_int_cst (NULL, mo));
8894 
8895   if (need_old || need_new)
8896     {
8897       lhs = need_old ? loaded_val : stored_val;
8898       call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8899       call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8900     }
8901   else
8902     call = fold_convert_loc (loc, void_type_node, call);
8903   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8904   gsi_remove (&gsi, true);
8905 
8906   gsi = gsi_last_nondebug_bb (store_bb);
8907   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8908   gsi_remove (&gsi, true);
8909   gsi = gsi_last_nondebug_bb (store_bb);
8910   stmt = gsi_stmt (gsi);
8911   gsi_remove (&gsi, true);
8912 
8913   if (gimple_in_ssa_p (cfun))
8914     {
8915       release_defs (stmt);
8916       update_ssa (TODO_update_ssa_no_phi);
8917     }
8918 
8919   return true;
8920 }
8921 
8922 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8923    compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
8924    Returns false if the expression is not of the proper form.  */
8925 
8926 static bool
expand_omp_atomic_cas(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)8927 expand_omp_atomic_cas (basic_block load_bb, tree addr,
8928                            tree loaded_val, tree stored_val, int index)
8929 {
8930   /* We expect to find the following sequences:
8931 
8932    load_bb:
8933        GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8934 
8935    store_bb:
8936        val = tmp == e ? d : tmp;
8937        GIMPLE_OMP_ATOMIC_STORE (val)
8938 
8939      or in store_bb instead:
8940        tmp2 = tmp == e;
8941        val = tmp2 ? d : tmp;
8942        GIMPLE_OMP_ATOMIC_STORE (val)
8943 
8944      or:
8945        tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
8946        val = e == tmp3 ? d : tmp;
8947        GIMPLE_OMP_ATOMIC_STORE (val)
8948 
8949      etc.  */
8950 
8951 
8952   basic_block store_bb = single_succ (load_bb);
8953   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
8954   gimple *store_stmt = gsi_stmt (gsi);
8955   if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
8956     return false;
8957   gsi_prev_nondebug (&gsi);
8958   if (gsi_end_p (gsi))
8959     return false;
8960   gimple *condexpr_stmt = gsi_stmt (gsi);
8961   if (!is_gimple_assign (condexpr_stmt)
8962       || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
8963     return false;
8964   if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
8965     return false;
8966   gimple *cond_stmt = NULL;
8967   gimple *vce_stmt = NULL;
8968   gsi_prev_nondebug (&gsi);
8969   if (!gsi_end_p (gsi))
8970     {
8971       cond_stmt = gsi_stmt (gsi);
8972       if (!is_gimple_assign (cond_stmt))
8973           return false;
8974       if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
8975           {
8976             gsi_prev_nondebug (&gsi);
8977             if (!gsi_end_p (gsi))
8978               {
8979                 vce_stmt = gsi_stmt (gsi);
8980                 if (!is_gimple_assign (vce_stmt)
8981                       || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
8982                     return false;
8983               }
8984           }
8985       else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
8986           std::swap (vce_stmt, cond_stmt);
8987       else
8988           return false;
8989       if (vce_stmt)
8990           {
8991             tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
8992             if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
8993                 || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
8994               return false;
8995             if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
8996                 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
8997                 || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
8998                                               TYPE_SIZE (TREE_TYPE (loaded_val))))
8999               return false;
9000             gsi_prev_nondebug (&gsi);
9001             if (!gsi_end_p (gsi))
9002               return false;
9003           }
9004     }
9005   tree cond = gimple_assign_rhs1 (condexpr_stmt);
9006   tree cond_op1, cond_op2;
9007   if (cond_stmt)
9008     {
9009       if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
9010           return false;
9011       cond_op1 = gimple_assign_rhs1 (cond_stmt);
9012       cond_op2 = gimple_assign_rhs2 (cond_stmt);
9013     }
9014   else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
9015     return false;
9016   else
9017     {
9018       cond_op1 = TREE_OPERAND (cond, 0);
9019       cond_op2 = TREE_OPERAND (cond, 1);
9020     }
9021   tree d;
9022   if (TREE_CODE (cond) == NE_EXPR)
9023     {
9024       if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
9025           return false;
9026       d = gimple_assign_rhs3 (condexpr_stmt);
9027     }
9028   else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
9029     return false;
9030   else
9031     d = gimple_assign_rhs2 (condexpr_stmt);
9032   tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
9033   if (operand_equal_p (e, cond_op1))
9034     e = cond_op2;
9035   else if (operand_equal_p (e, cond_op2))
9036     e = cond_op1;
9037   else
9038     return false;
9039 
9040   location_t loc = gimple_location (store_stmt);
9041   gimple *load_stmt = last_stmt (load_bb);
9042   bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
9043   bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
9044   bool weak = gimple_omp_atomic_weak_p (load_stmt);
9045   enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
9046   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9047   tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9048   gcc_checking_assert (!need_old || !need_new);
9049 
9050   enum built_in_function fncode
9051     = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9052                                         + index + 1);
9053   tree cmpxchg = builtin_decl_explicit (fncode);
9054   if (cmpxchg == NULL_TREE)
9055     return false;
9056   tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9057 
9058   if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9059       || !can_atomic_load_p (TYPE_MODE (itype)))
9060     return false;
9061 
9062   tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9063   if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
9064     return false;
9065 
9066   gsi = gsi_for_stmt (store_stmt);
9067   if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
9068     {
9069       tree ne = create_tmp_reg (itype);
9070       gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
9071       gimple_set_location (g, loc);
9072       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9073       e = ne;
9074     }
9075   if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
9076     {
9077       tree nd = create_tmp_reg (itype);
9078       enum tree_code code;
9079       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
9080           {
9081             code = VIEW_CONVERT_EXPR;
9082             d = build1 (VIEW_CONVERT_EXPR, itype, d);
9083           }
9084       else
9085           code = NOP_EXPR;
9086       gimple *g = gimple_build_assign (nd, code, d);
9087       gimple_set_location (g, loc);
9088       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9089       d = nd;
9090     }
9091 
9092   tree ctype = build_complex_type (itype);
9093   int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9094   gimple *g
9095     = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9096                                           build_int_cst (integer_type_node, flag),
9097                                           mo, fmo);
9098   tree cres = create_tmp_reg (ctype);
9099   gimple_call_set_lhs (g, cres);
9100   gimple_set_location (g, loc);
9101   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9102 
9103   if (cond_stmt || need_old || need_new)
9104     {
9105       tree im = create_tmp_reg (itype);
9106       g = gimple_build_assign (im, IMAGPART_EXPR,
9107                                      build1 (IMAGPART_EXPR, itype, cres));
9108       gimple_set_location (g, loc);
9109       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9110 
9111       tree re = NULL_TREE;
9112       if (need_old || need_new)
9113           {
9114             re = create_tmp_reg (itype);
9115             g = gimple_build_assign (re, REALPART_EXPR,
9116                                            build1 (REALPART_EXPR, itype, cres));
9117             gimple_set_location (g, loc);
9118             gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9119           }
9120 
9121       if (cond_stmt)
9122           {
9123             g = gimple_build_assign (gimple_assign_lhs (cond_stmt),
9124                                            NOP_EXPR, im);
9125             gimple_set_location (g, loc);
9126             gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9127           }
9128       else if (need_new)
9129           {
9130             g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9131                                            build2 (NE_EXPR, boolean_type_node,
9132                                                      im, build_zero_cst (itype)),
9133                                            d, re);
9134             gimple_set_location (g, loc);
9135             gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9136             re = gimple_assign_lhs (g);
9137           }
9138 
9139       if (need_old || need_new)
9140           {
9141             tree v = need_old ? loaded_val : stored_val;
9142             enum tree_code code;
9143             if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9144               {
9145                 code = VIEW_CONVERT_EXPR;
9146                 re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9147               }
9148             else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9149               code = NOP_EXPR;
9150             else
9151               code = TREE_CODE (re);
9152             g = gimple_build_assign (v, code, re);
9153             gimple_set_location (g, loc);
9154             gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9155           }
9156     }
9157 
9158   gsi_remove (&gsi, true);
9159   gsi = gsi_for_stmt (load_stmt);
9160   gsi_remove (&gsi, true);
9161   gsi = gsi_for_stmt (condexpr_stmt);
9162   gsi_remove (&gsi, true);
9163   if (cond_stmt)
9164     {
9165       gsi = gsi_for_stmt (cond_stmt);
9166       gsi_remove (&gsi, true);
9167     }
9168   if (vce_stmt)
9169     {
9170       gsi = gsi_for_stmt (vce_stmt);
9171       gsi_remove (&gsi, true);
9172     }
9173 
9174   return true;
9175 }
9176 
9177 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
9178 
9179       oldval = *addr;
9180       repeat:
9181           newval = rhs;        // with oldval replacing *addr in rhs
9182           oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9183           if (oldval != newval)
9184             goto repeat;
9185 
9186    INDEX is log2 of the size of the data type, and thus usable to find the
9187    index of the builtin decl.  */
9188 
9189 static bool
expand_omp_atomic_pipeline(basic_block load_bb,basic_block store_bb,tree addr,tree loaded_val,tree stored_val,int index)9190 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9191                                   tree addr, tree loaded_val, tree stored_val,
9192                                   int index)
9193 {
9194   tree loadedi, storedi, initial, new_storedi, old_vali;
9195   tree type, itype, cmpxchg, iaddr, atype;
9196   gimple_stmt_iterator si;
9197   basic_block loop_header = single_succ (load_bb);
9198   gimple *phi, *stmt;
9199   edge e;
9200   enum built_in_function fncode;
9201 
9202   fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9203                                             + index + 1);
9204   cmpxchg = builtin_decl_explicit (fncode);
9205   if (cmpxchg == NULL_TREE)
9206     return false;
9207   type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9208   atype = type;
9209   itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9210 
9211   if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9212       || !can_atomic_load_p (TYPE_MODE (itype)))
9213     return false;
9214 
9215   /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
9216   si = gsi_last_nondebug_bb (load_bb);
9217   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9218   location_t loc = gimple_location (gsi_stmt (si));
9219   enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
9220   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9221   tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9222 
9223   /* For floating-point values, we'll need to view-convert them to integers
9224      so that we can perform the atomic compare and swap.  Simplify the
9225      following code by always setting up the "i"ntegral variables.  */
9226   if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9227     {
9228       tree iaddr_val;
9229 
9230       iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9231                                                                          true));
9232       atype = itype;
9233       iaddr_val
9234           = force_gimple_operand_gsi (&si,
9235                                             fold_convert (TREE_TYPE (iaddr), addr),
9236                                             false, NULL_TREE, true, GSI_SAME_STMT);
9237       stmt = gimple_build_assign (iaddr, iaddr_val);
9238       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9239       loadedi = create_tmp_var (itype);
9240       if (gimple_in_ssa_p (cfun))
9241           loadedi = make_ssa_name (loadedi);
9242     }
9243   else
9244     {
9245       iaddr = addr;
9246       loadedi = loaded_val;
9247     }
9248 
9249   fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9250   tree loaddecl = builtin_decl_explicit (fncode);
9251   if (loaddecl)
9252     initial
9253       = fold_convert (atype,
9254                           build_call_expr (loaddecl, 2, iaddr,
9255                                                build_int_cst (NULL_TREE,
9256                                                                   MEMMODEL_RELAXED)));
9257   else
9258     {
9259       tree off
9260           = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9261                                                                   true), 0);
9262       initial = build2 (MEM_REF, atype, iaddr, off);
9263     }
9264 
9265   initial
9266     = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9267                                         GSI_SAME_STMT);
9268 
9269   /* Move the value to the LOADEDI temporary.  */
9270   if (gimple_in_ssa_p (cfun))
9271     {
9272       gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9273       phi = create_phi_node (loadedi, loop_header);
9274       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9275                  initial);
9276     }
9277   else
9278     gsi_insert_before (&si,
9279                            gimple_build_assign (loadedi, initial),
9280                            GSI_SAME_STMT);
9281   if (loadedi != loaded_val)
9282     {
9283       gimple_stmt_iterator gsi2;
9284       tree x;
9285 
9286       x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9287       gsi2 = gsi_start_bb (loop_header);
9288       if (gimple_in_ssa_p (cfun))
9289           {
9290             gassign *stmt;
9291             x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9292                                                   true, GSI_SAME_STMT);
9293             stmt = gimple_build_assign (loaded_val, x);
9294             gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9295           }
9296       else
9297           {
9298             x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9299             force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9300                                             true, GSI_SAME_STMT);
9301           }
9302     }
9303   gsi_remove (&si, true);
9304 
9305   si = gsi_last_nondebug_bb (store_bb);
9306   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9307 
9308   if (iaddr == addr)
9309     storedi = stored_val;
9310   else
9311     storedi
9312       = force_gimple_operand_gsi (&si,
9313                                           build1 (VIEW_CONVERT_EXPR, itype,
9314                                                     stored_val), true, NULL_TREE, true,
9315                                           GSI_SAME_STMT);
9316 
9317   /* Build the compare&swap statement.  */
9318   tree ctype = build_complex_type (itype);
9319   int flag = int_size_in_bytes (itype);
9320   new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9321                                                         ctype, 6, iaddr, loadedi,
9322                                                         storedi,
9323                                                         build_int_cst (integer_type_node,
9324                                                                            flag),
9325                                                         mo, fmo);
9326   new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9327   new_storedi = force_gimple_operand_gsi (&si,
9328                                                     fold_convert (TREE_TYPE (loadedi),
9329                                                                       new_storedi),
9330                                                     true, NULL_TREE,
9331                                                     true, GSI_SAME_STMT);
9332 
9333   if (gimple_in_ssa_p (cfun))
9334     old_vali = loadedi;
9335   else
9336     {
9337       old_vali = create_tmp_var (TREE_TYPE (loadedi));
9338       stmt = gimple_build_assign (old_vali, loadedi);
9339       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9340 
9341       stmt = gimple_build_assign (loadedi, new_storedi);
9342       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9343     }
9344 
9345   /* Note that we always perform the comparison as an integer, even for
9346      floating point.  This allows the atomic operation to properly
9347      succeed even with NaNs and -0.0.  */
9348   tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9349   stmt = gimple_build_cond_empty (ne);
9350   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9351 
9352   /* Update cfg.  */
9353   e = single_succ_edge (store_bb);
9354   e->flags &= ~EDGE_FALLTHRU;
9355   e->flags |= EDGE_FALSE_VALUE;
9356   /* Expect no looping.  */
9357   e->probability = profile_probability::guessed_always ();
9358 
9359   e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9360   e->probability = profile_probability::guessed_never ();
9361 
9362   /* Copy the new value to loadedi (we already did that before the condition
9363      if we are not in SSA).  */
9364   if (gimple_in_ssa_p (cfun))
9365     {
9366       phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9367       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9368     }
9369 
9370   /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
9371   gsi_remove (&si, true);
9372 
9373   class loop *loop = alloc_loop ();
9374   loop->header = loop_header;
9375   loop->latch = store_bb;
9376   add_loop (loop, loop_header->loop_father);
9377 
9378   if (gimple_in_ssa_p (cfun))
9379     update_ssa (TODO_update_ssa_no_phi);
9380 
9381   return true;
9382 }
9383 
9384 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
9385 
9386                                           GOMP_atomic_start ();
9387                                           *addr = rhs;
9388                                           GOMP_atomic_end ();
9389 
9390    The result is not globally atomic, but works so long as all parallel
9391    references are within #pragma omp atomic directives.  According to
9392    responses received from omp@openmp.org, appears to be within spec.
9393    Which makes sense, since that's how several other compilers handle
9394    this situation as well.
9395    LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9396    expanding.  STORED_VAL is the operand of the matching
9397    GIMPLE_OMP_ATOMIC_STORE.
9398 
9399    We replace
9400    GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9401    loaded_val = *addr;
9402 
9403    and replace
9404    GIMPLE_OMP_ATOMIC_STORE (stored_val)  with
9405    *addr = stored_val;
9406 */
9407 
9408 static bool
expand_omp_atomic_mutex(basic_block load_bb,basic_block store_bb,tree addr,tree loaded_val,tree stored_val)9409 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9410                                tree addr, tree loaded_val, tree stored_val)
9411 {
9412   gimple_stmt_iterator si;
9413   gassign *stmt;
9414   tree t;
9415 
9416   si = gsi_last_nondebug_bb (load_bb);
9417   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9418 
9419   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9420   t = build_call_expr (t, 0);
9421   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9422 
9423   tree mem = build_simple_mem_ref (addr);
9424   TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9425   TREE_OPERAND (mem, 1)
9426     = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9427                                                              true),
9428                         TREE_OPERAND (mem, 1));
9429   stmt = gimple_build_assign (loaded_val, mem);
9430   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9431   gsi_remove (&si, true);
9432 
9433   si = gsi_last_nondebug_bb (store_bb);
9434   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9435 
9436   stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9437   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9438 
9439   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9440   t = build_call_expr (t, 0);
9441   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9442   gsi_remove (&si, true);
9443 
9444   if (gimple_in_ssa_p (cfun))
9445     update_ssa (TODO_update_ssa_no_phi);
9446   return true;
9447 }
9448 
9449 /* Expand an GIMPLE_OMP_ATOMIC statement.  We try to expand
9450    using expand_omp_atomic_fetch_op.  If it failed, we try to
9451    call expand_omp_atomic_pipeline, and if it fails too, the
9452    ultimate fallback is wrapping the operation in a mutex
9453    (expand_omp_atomic_mutex).  REGION is the atomic region built
9454    by build_omp_regions_1().  */
9455 
9456 static void
expand_omp_atomic(struct omp_region * region)9457 expand_omp_atomic (struct omp_region *region)
9458 {
9459   basic_block load_bb = region->entry, store_bb = region->exit;
9460   gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9461   gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9462   tree loaded_val = gimple_omp_atomic_load_lhs (load);
9463   tree addr = gimple_omp_atomic_load_rhs (load);
9464   tree stored_val = gimple_omp_atomic_store_val (store);
9465   tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9466   HOST_WIDE_INT index;
9467 
9468   /* Make sure the type is one of the supported sizes.  */
9469   index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9470   index = exact_log2 (index);
9471   if (index >= 0 && index <= 4)
9472     {
9473       unsigned int align = TYPE_ALIGN_UNIT (type);
9474 
9475       /* __sync builtins require strict data alignment.  */
9476       if (exact_log2 (align) >= index)
9477           {
9478             /* Atomic load.  */
9479             scalar_mode smode;
9480             if (loaded_val == stored_val
9481                 && (is_int_mode (TYPE_MODE (type), &smode)
9482                       || is_float_mode (TYPE_MODE (type), &smode))
9483                 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9484                 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9485               return;
9486 
9487             /* Atomic store.  */
9488             if ((is_int_mode (TYPE_MODE (type), &smode)
9489                  || is_float_mode (TYPE_MODE (type), &smode))
9490                 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9491                 && store_bb == single_succ (load_bb)
9492                 && first_stmt (store_bb) == store
9493                 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9494                                                     stored_val, index))
9495               return;
9496 
9497             /* When possible, use specialized atomic update functions.  */
9498             if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9499                 && store_bb == single_succ (load_bb)
9500                 && expand_omp_atomic_fetch_op (load_bb, addr,
9501                                                        loaded_val, stored_val, index))
9502               return;
9503 
9504             /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop.  */
9505             if (store_bb == single_succ (load_bb)
9506                 && !gimple_in_ssa_p (cfun)
9507                 && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9508                                                   index))
9509               return;
9510 
9511             /* If we don't have specialized __sync builtins, try and implement
9512                as a compare and swap loop.  */
9513             if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9514                                                     loaded_val, stored_val, index))
9515               return;
9516           }
9517     }
9518 
9519   /* The ultimate fallback is wrapping the operation in a mutex.  */
9520   expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9521 }
9522 
9523 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9524    at REGION_EXIT.  */
9525 
9526 static void
mark_loops_in_oacc_kernels_region(basic_block region_entry,basic_block region_exit)9527 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9528                                            basic_block region_exit)
9529 {
9530   class loop *outer = region_entry->loop_father;
9531   gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9532 
9533   /* Don't parallelize the kernels region if it contains more than one outer
9534      loop.  */
9535   unsigned int nr_outer_loops = 0;
9536   class loop *single_outer = NULL;
9537   for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9538     {
9539       gcc_assert (loop_outer (loop) == outer);
9540 
9541       if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9542           continue;
9543 
9544       if (region_exit != NULL
9545             && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9546           continue;
9547 
9548       nr_outer_loops++;
9549       single_outer = loop;
9550     }
9551   if (nr_outer_loops != 1)
9552     return;
9553 
9554   for (class loop *loop = single_outer->inner;
9555        loop != NULL;
9556        loop = loop->inner)
9557     if (loop->next)
9558       return;
9559 
9560   /* Mark the loops in the region.  */
9561   for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9562     loop->in_oacc_kernels_region = true;
9563 }
9564 
9565 /* Build target argument identifier from the DEVICE identifier, value
9566    identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
9567 
9568 static tree
get_target_argument_identifier_1(int device,bool subseqent_param,int id)9569 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9570 {
9571   tree t = build_int_cst (integer_type_node, device);
9572   if (subseqent_param)
9573     t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9574                          build_int_cst (integer_type_node,
9575                                             GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9576   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9577                        build_int_cst (integer_type_node, id));
9578   return t;
9579 }
9580 
9581 /* Like above but return it in type that can be directly stored as an element
9582    of the argument array.  */
9583 
9584 static tree
get_target_argument_identifier(int device,bool subseqent_param,int id)9585 get_target_argument_identifier (int device, bool subseqent_param, int id)
9586 {
9587   tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9588   return fold_convert (ptr_type_node, t);
9589 }
9590 
9591 /* Return a target argument consisting of DEVICE identifier, value identifier
9592    ID, and the actual VALUE.  */
9593 
9594 static tree
get_target_argument_value(gimple_stmt_iterator * gsi,int device,int id,tree value)9595 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9596                                  tree value)
9597 {
9598   tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9599                               fold_convert (integer_type_node, value),
9600                               build_int_cst (unsigned_type_node,
9601                                                GOMP_TARGET_ARG_VALUE_SHIFT));
9602   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9603                        get_target_argument_identifier_1 (device, false, id));
9604   t = fold_convert (ptr_type_node, t);
9605   return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9606 }
9607 
9608 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9609    push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9610    otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9611    arguments.  */
9612 
9613 static void
push_target_argument_according_to_value(gimple_stmt_iterator * gsi,int device,int id,tree value,vec<tree> * args)9614 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9615                                                    int id, tree value, vec <tree> *args)
9616 {
9617   if (tree_fits_shwi_p (value)
9618       && tree_to_shwi (value) > -(1 << 15)
9619       && tree_to_shwi (value) < (1 << 15))
9620     args->quick_push (get_target_argument_value (gsi, device, id, value));
9621   else
9622     {
9623       args->quick_push (get_target_argument_identifier (device, true, id));
9624       value = fold_convert (ptr_type_node, value);
9625       value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9626                                                   GSI_SAME_STMT);
9627       args->quick_push (value);
9628     }
9629 }
9630 
9631 /* Create an array of arguments that is then passed to GOMP_target.  */
9632 
9633 static tree
get_target_arguments(gimple_stmt_iterator * gsi,gomp_target * tgt_stmt)9634 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9635 {
9636   auto_vec <tree, 6> args;
9637   tree clauses = gimple_omp_target_clauses (tgt_stmt);
9638   tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9639   if (c)
9640     t = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c);
9641   else
9642     t = integer_minus_one_node;
9643   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9644                                                      GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9645 
9646   c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9647   if (c)
9648     t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9649   else
9650     t = integer_minus_one_node;
9651   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9652                                                      GOMP_TARGET_ARG_THREAD_LIMIT, t,
9653                                                      &args);
9654 
9655   /* Produce more, perhaps device specific, arguments here.  */
9656 
9657   tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9658                                                                         args.length () + 1),
9659                                           ".omp_target_args");
9660   for (unsigned i = 0; i < args.length (); i++)
9661     {
9662       tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9663                                build_int_cst (integer_type_node, i),
9664                                NULL_TREE, NULL_TREE);
9665       gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9666                                GSI_SAME_STMT);
9667     }
9668   tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9669                          build_int_cst (integer_type_node, args.length ()),
9670                          NULL_TREE, NULL_TREE);
9671   gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9672                          GSI_SAME_STMT);
9673   TREE_ADDRESSABLE (argarray) = 1;
9674   return build_fold_addr_expr (argarray);
9675 }
9676 
9677 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
9678 
9679 static void
expand_omp_target(struct omp_region * region)9680 expand_omp_target (struct omp_region *region)
9681 {
9682   basic_block entry_bb, exit_bb, new_bb;
9683   struct function *child_cfun;
9684   tree child_fn, block, t;
9685   gimple_stmt_iterator gsi;
9686   gomp_target *entry_stmt;
9687   gimple *stmt;
9688   edge e;
9689   bool offloaded;
9690   int target_kind;
9691 
9692   entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9693   target_kind = gimple_omp_target_kind (entry_stmt);
9694   new_bb = region->entry;
9695 
9696   offloaded = is_gimple_omp_offloaded (entry_stmt);
9697   switch (target_kind)
9698     {
9699     case GF_OMP_TARGET_KIND_REGION:
9700     case GF_OMP_TARGET_KIND_UPDATE:
9701     case GF_OMP_TARGET_KIND_ENTER_DATA:
9702     case GF_OMP_TARGET_KIND_EXIT_DATA:
9703     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9704     case GF_OMP_TARGET_KIND_OACC_KERNELS:
9705     case GF_OMP_TARGET_KIND_OACC_SERIAL:
9706     case GF_OMP_TARGET_KIND_OACC_UPDATE:
9707     case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9708     case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9709     case GF_OMP_TARGET_KIND_OACC_DECLARE:
9710     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9711     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9712     case GF_OMP_TARGET_KIND_DATA:
9713     case GF_OMP_TARGET_KIND_OACC_DATA:
9714     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9715     case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9716       break;
9717     default:
9718       gcc_unreachable ();
9719     }
9720 
9721   child_fn = NULL_TREE;
9722   child_cfun = NULL;
9723   if (offloaded)
9724     {
9725       child_fn = gimple_omp_target_child_fn (entry_stmt);
9726       child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9727     }
9728 
9729   /* Supported by expand_omp_taskreg, but not here.  */
9730   if (child_cfun != NULL)
9731     gcc_checking_assert (!child_cfun->cfg);
9732   gcc_checking_assert (!gimple_in_ssa_p (cfun));
9733 
9734   entry_bb = region->entry;
9735   exit_bb = region->exit;
9736 
9737   if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9738     mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9739 
9740   /* Going on, all OpenACC compute constructs are mapped to
9741      'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9742      To distinguish between them, we attach attributes.  */
9743   switch (target_kind)
9744     {
9745     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9746       DECL_ATTRIBUTES (child_fn)
9747           = tree_cons (get_identifier ("oacc parallel"),
9748                          NULL_TREE, DECL_ATTRIBUTES (child_fn));
9749       break;
9750     case GF_OMP_TARGET_KIND_OACC_KERNELS:
9751       DECL_ATTRIBUTES (child_fn)
9752           = tree_cons (get_identifier ("oacc kernels"),
9753                          NULL_TREE, DECL_ATTRIBUTES (child_fn));
9754       break;
9755     case GF_OMP_TARGET_KIND_OACC_SERIAL:
9756       DECL_ATTRIBUTES (child_fn)
9757           = tree_cons (get_identifier ("oacc serial"),
9758                          NULL_TREE, DECL_ATTRIBUTES (child_fn));
9759       break;
9760     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9761       DECL_ATTRIBUTES (child_fn)
9762           = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9763                          NULL_TREE, DECL_ATTRIBUTES (child_fn));
9764       break;
9765     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9766       DECL_ATTRIBUTES (child_fn)
9767           = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9768                          NULL_TREE, DECL_ATTRIBUTES (child_fn));
9769       break;
9770     default:
9771       /* Make sure we don't miss any.  */
9772       gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9773                                    && is_gimple_omp_offloaded (entry_stmt)));
9774       break;
9775     }
9776 
9777   if (offloaded)
9778     {
9779       unsigned srcidx, dstidx, num;
9780 
9781       /* If the offloading region needs data sent from the parent
9782            function, then the very first statement (except possible
9783            tree profile counter updates) of the offloading body
9784            is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
9785            &.OMP_DATA_O is passed as an argument to the child function,
9786            we need to replace it with the argument as seen by the child
9787            function.
9788 
9789            In most cases, this will end up being the identity assignment
9790            .OMP_DATA_I = .OMP_DATA_I.  However, if the offloading body had
9791            a function call that has been inlined, the original PARM_DECL
9792            .OMP_DATA_I may have been converted into a different local
9793            variable.  In which case, we need to keep the assignment.  */
9794       tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9795       if (data_arg)
9796           {
9797             basic_block entry_succ_bb = single_succ (entry_bb);
9798             gimple_stmt_iterator gsi;
9799             tree arg;
9800             gimple *tgtcopy_stmt = NULL;
9801             tree sender = TREE_VEC_ELT (data_arg, 0);
9802 
9803             for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9804               {
9805                 gcc_assert (!gsi_end_p (gsi));
9806                 stmt = gsi_stmt (gsi);
9807                 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9808                     continue;
9809 
9810                 if (gimple_num_ops (stmt) == 2)
9811                     {
9812                       tree arg = gimple_assign_rhs1 (stmt);
9813 
9814                       /* We're ignoring the subcode because we're
9815                          effectively doing a STRIP_NOPS.  */
9816 
9817                       if (TREE_CODE (arg) == ADDR_EXPR
9818                           && TREE_OPERAND (arg, 0) == sender)
9819                         {
9820                           tgtcopy_stmt = stmt;
9821                           break;
9822                         }
9823                     }
9824               }
9825 
9826             gcc_assert (tgtcopy_stmt != NULL);
9827             arg = DECL_ARGUMENTS (child_fn);
9828 
9829             gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9830             gsi_remove (&gsi, true);
9831           }
9832 
9833       /* Declare local variables needed in CHILD_CFUN.  */
9834       block = DECL_INITIAL (child_fn);
9835       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9836       /* The gimplifier could record temporaries in the offloading block
9837            rather than in containing function's local_decls chain,
9838            which would mean cgraph missed finalizing them.  Do it now.  */
9839       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9840           if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9841             varpool_node::finalize_decl (t);
9842       DECL_SAVED_TREE (child_fn) = NULL;
9843       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
9844       gimple_set_body (child_fn, NULL);
9845       TREE_USED (block) = 1;
9846 
9847       /* Reset DECL_CONTEXT on function arguments.  */
9848       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9849           DECL_CONTEXT (t) = child_fn;
9850 
9851       /* Split ENTRY_BB at GIMPLE_*,
9852            so that it can be moved to the child function.  */
9853       gsi = gsi_last_nondebug_bb (entry_bb);
9854       stmt = gsi_stmt (gsi);
9855       gcc_assert (stmt
9856                       && gimple_code (stmt) == gimple_code (entry_stmt));
9857       e = split_block (entry_bb, stmt);
9858       gsi_remove (&gsi, true);
9859       entry_bb = e->dest;
9860       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9861 
9862       /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
9863       if (exit_bb)
9864           {
9865             gsi = gsi_last_nondebug_bb (exit_bb);
9866             gcc_assert (!gsi_end_p (gsi)
9867                           && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9868             stmt = gimple_build_return (NULL);
9869             gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9870             gsi_remove (&gsi, true);
9871           }
9872 
9873       /* Move the offloading region into CHILD_CFUN.  */
9874 
9875       block = gimple_block (entry_stmt);
9876 
9877       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9878       if (exit_bb)
9879           single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9880       /* When the OMP expansion process cannot guarantee an up-to-date
9881            loop tree arrange for the child function to fixup loops.  */
9882       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9883           child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9884 
9885       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
9886       num = vec_safe_length (child_cfun->local_decls);
9887       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9888           {
9889             t = (*child_cfun->local_decls)[srcidx];
9890             if (DECL_CONTEXT (t) == cfun->decl)
9891               continue;
9892             if (srcidx != dstidx)
9893               (*child_cfun->local_decls)[dstidx] = t;
9894             dstidx++;
9895           }
9896       if (dstidx != num)
9897           vec_safe_truncate (child_cfun->local_decls, dstidx);
9898 
9899       /* Inform the callgraph about the new function.  */
9900       child_cfun->curr_properties = cfun->curr_properties;
9901       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9902       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9903       cgraph_node *node = cgraph_node::get_create (child_fn);
9904       node->parallelized_function = 1;
9905       cgraph_node::add_new_function (child_fn, true);
9906 
9907       /* Add the new function to the offload table.  */
9908       if (ENABLE_OFFLOADING)
9909           {
9910             if (in_lto_p)
9911               DECL_PRESERVE_P (child_fn) = 1;
9912             vec_safe_push (offload_funcs, child_fn);
9913           }
9914 
9915       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9916                           && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9917 
9918       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
9919            fixed in a following pass.  */
9920       push_cfun (child_cfun);
9921       if (need_asm)
9922           assign_assembler_name_if_needed (child_fn);
9923       cgraph_edge::rebuild_edges ();
9924 
9925       /* Some EH regions might become dead, see PR34608.  If
9926            pass_cleanup_cfg isn't the first pass to happen with the
9927            new child, these dead EH edges might cause problems.
9928            Clean them up now.  */
9929       if (flag_exceptions)
9930           {
9931             basic_block bb;
9932             bool changed = false;
9933 
9934             FOR_EACH_BB_FN (bb, cfun)
9935               changed |= gimple_purge_dead_eh_edges (bb);
9936             if (changed)
9937               cleanup_tree_cfg ();
9938           }
9939       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9940           verify_loop_structure ();
9941       pop_cfun ();
9942 
9943       if (dump_file && !gimple_in_ssa_p (cfun))
9944           {
9945             omp_any_child_fn_dumped = true;
9946             dump_function_header (dump_file, child_fn, dump_flags);
9947             dump_function_to_file (child_fn, dump_file, dump_flags);
9948           }
9949 
9950       adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9951     }
9952 
9953   /* Emit a library call to launch the offloading region, or do data
9954      transfers.  */
9955   tree t1, t2, t3, t4, depend, c, clauses;
9956   enum built_in_function start_ix;
9957   unsigned int flags_i = 0;
9958 
9959   switch (gimple_omp_target_kind (entry_stmt))
9960     {
9961     case GF_OMP_TARGET_KIND_REGION:
9962       start_ix = BUILT_IN_GOMP_TARGET;
9963       break;
9964     case GF_OMP_TARGET_KIND_DATA:
9965       start_ix = BUILT_IN_GOMP_TARGET_DATA;
9966       break;
9967     case GF_OMP_TARGET_KIND_UPDATE:
9968       start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9969       break;
9970     case GF_OMP_TARGET_KIND_ENTER_DATA:
9971       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9972       break;
9973     case GF_OMP_TARGET_KIND_EXIT_DATA:
9974       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9975       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9976       break;
9977     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9978     case GF_OMP_TARGET_KIND_OACC_KERNELS:
9979     case GF_OMP_TARGET_KIND_OACC_SERIAL:
9980     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9981     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9982       start_ix = BUILT_IN_GOACC_PARALLEL;
9983       break;
9984     case GF_OMP_TARGET_KIND_OACC_DATA:
9985     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9986     case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9987       start_ix = BUILT_IN_GOACC_DATA_START;
9988       break;
9989     case GF_OMP_TARGET_KIND_OACC_UPDATE:
9990       start_ix = BUILT_IN_GOACC_UPDATE;
9991       break;
9992     case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9993       start_ix = BUILT_IN_GOACC_ENTER_DATA;
9994       break;
9995     case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9996       start_ix = BUILT_IN_GOACC_EXIT_DATA;
9997       break;
9998     case GF_OMP_TARGET_KIND_OACC_DECLARE:
9999       start_ix = BUILT_IN_GOACC_DECLARE;
10000       break;
10001     default:
10002       gcc_unreachable ();
10003     }
10004 
10005   clauses = gimple_omp_target_clauses (entry_stmt);
10006 
10007   tree device = NULL_TREE;
10008   location_t device_loc = UNKNOWN_LOCATION;
10009   tree goacc_flags = NULL_TREE;
10010   if (is_gimple_omp_oacc (entry_stmt))
10011     {
10012       /* By default, no GOACC_FLAGs are set.  */
10013       goacc_flags = integer_zero_node;
10014     }
10015   else
10016     {
10017       c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
10018       if (c)
10019           {
10020             device = OMP_CLAUSE_DEVICE_ID (c);
10021             device_loc = OMP_CLAUSE_LOCATION (c);
10022             if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
10023               sorry_at (device_loc, "%<ancestor%> not yet supported");
10024           }
10025       else
10026           {
10027             /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
10028                library choose).  */
10029             device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
10030             device_loc = gimple_location (entry_stmt);
10031           }
10032 
10033       c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
10034       /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
10035            nowait doesn't appear.  */
10036       if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
10037           c = NULL;
10038       if (c)
10039           flags_i |= GOMP_TARGET_FLAG_NOWAIT;
10040     }
10041 
10042   /* By default, there is no conditional.  */
10043   tree cond = NULL_TREE;
10044   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
10045   if (c)
10046     cond = OMP_CLAUSE_IF_EXPR (c);
10047   /* If we found the clause 'if (cond)', build:
10048      OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
10049      OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
10050   if (cond)
10051     {
10052       tree *tp;
10053       if (is_gimple_omp_oacc (entry_stmt))
10054           tp = &goacc_flags;
10055       else
10056           {
10057             /* Ensure 'device' is of the correct type.  */
10058             device = fold_convert_loc (device_loc, integer_type_node, device);
10059 
10060             tp = &device;
10061           }
10062 
10063       cond = gimple_boolify (cond);
10064 
10065       basic_block cond_bb, then_bb, else_bb;
10066       edge e;
10067       tree tmp_var;
10068 
10069       tmp_var = create_tmp_var (TREE_TYPE (*tp));
10070       if (offloaded)
10071           e = split_block_after_labels (new_bb);
10072       else
10073           {
10074             gsi = gsi_last_nondebug_bb (new_bb);
10075             gsi_prev (&gsi);
10076             e = split_block (new_bb, gsi_stmt (gsi));
10077           }
10078       cond_bb = e->src;
10079       new_bb = e->dest;
10080       remove_edge (e);
10081 
10082       then_bb = create_empty_bb (cond_bb);
10083       else_bb = create_empty_bb (then_bb);
10084       set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10085       set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10086 
10087       stmt = gimple_build_cond_empty (cond);
10088       gsi = gsi_last_bb (cond_bb);
10089       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10090 
10091       gsi = gsi_start_bb (then_bb);
10092       stmt = gimple_build_assign (tmp_var, *tp);
10093       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10094 
10095       gsi = gsi_start_bb (else_bb);
10096       if (is_gimple_omp_oacc (entry_stmt))
10097           stmt = gimple_build_assign (tmp_var,
10098                                             BIT_IOR_EXPR,
10099                                             *tp,
10100                                             build_int_cst (integer_type_node,
10101                                                                GOACC_FLAG_HOST_FALLBACK));
10102       else
10103           stmt = gimple_build_assign (tmp_var,
10104                                             build_int_cst (integer_type_node,
10105                                                                GOMP_DEVICE_HOST_FALLBACK));
10106       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10107 
10108       make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10109       make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10110       add_bb_to_loop (then_bb, cond_bb->loop_father);
10111       add_bb_to_loop (else_bb, cond_bb->loop_father);
10112       make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10113       make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10114 
10115       *tp = tmp_var;
10116 
10117       gsi = gsi_last_nondebug_bb (new_bb);
10118     }
10119   else
10120     {
10121       gsi = gsi_last_nondebug_bb (new_bb);
10122 
10123       if (device != NULL_TREE)
10124           device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10125                                                      true, GSI_SAME_STMT);
10126     }
10127 
10128   t = gimple_omp_target_data_arg (entry_stmt);
10129   if (t == NULL)
10130     {
10131       t1 = size_zero_node;
10132       t2 = build_zero_cst (ptr_type_node);
10133       t3 = t2;
10134       t4 = t2;
10135     }
10136   else
10137     {
10138       t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10139       t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10140       t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10141       t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10142       t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10143     }
10144 
10145   gimple *g;
10146   bool tagging = false;
10147   /* The maximum number used by any start_ix, without varargs.  */
10148   auto_vec<tree, 11> args;
10149   if (is_gimple_omp_oacc (entry_stmt))
10150     {
10151       tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10152                                                   TREE_TYPE (goacc_flags), goacc_flags);
10153       goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10154                                                             NULL_TREE, true,
10155                                                             GSI_SAME_STMT);
10156       args.quick_push (goacc_flags_m);
10157     }
10158   else
10159     args.quick_push (device);
10160   if (offloaded)
10161     args.quick_push (build_fold_addr_expr (child_fn));
10162   args.quick_push (t1);
10163   args.quick_push (t2);
10164   args.quick_push (t3);
10165   args.quick_push (t4);
10166   switch (start_ix)
10167     {
10168     case BUILT_IN_GOACC_DATA_START:
10169     case BUILT_IN_GOACC_DECLARE:
10170     case BUILT_IN_GOMP_TARGET_DATA:
10171       break;
10172     case BUILT_IN_GOMP_TARGET:
10173     case BUILT_IN_GOMP_TARGET_UPDATE:
10174     case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10175       args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10176       c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10177       if (c)
10178           depend = OMP_CLAUSE_DECL (c);
10179       else
10180           depend = build_int_cst (ptr_type_node, 0);
10181       args.quick_push (depend);
10182       if (start_ix == BUILT_IN_GOMP_TARGET)
10183           args.quick_push (get_target_arguments (&gsi, entry_stmt));
10184       break;
10185     case BUILT_IN_GOACC_PARALLEL:
10186       if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10187           {
10188             tree dims = NULL_TREE;
10189             unsigned int ix;
10190 
10191             /* For serial constructs we set all dimensions to 1.  */
10192             for (ix = GOMP_DIM_MAX; ix--;)
10193               dims = tree_cons (NULL_TREE, integer_one_node, dims);
10194             oacc_replace_fn_attrib (child_fn, dims);
10195           }
10196       else
10197           oacc_set_fn_attrib (child_fn, clauses, &args);
10198       tagging = true;
10199       /* FALLTHRU */
10200     case BUILT_IN_GOACC_ENTER_DATA:
10201     case BUILT_IN_GOACC_EXIT_DATA:
10202     case BUILT_IN_GOACC_UPDATE:
10203       {
10204           tree t_async = NULL_TREE;
10205 
10206           /* If present, use the value specified by the respective
10207              clause, making sure that is of the correct type.  */
10208           c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10209           if (c)
10210             t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10211                                               integer_type_node,
10212                                               OMP_CLAUSE_ASYNC_EXPR (c));
10213           else if (!tagging)
10214             /* Default values for t_async.  */
10215             t_async = fold_convert_loc (gimple_location (entry_stmt),
10216                                               integer_type_node,
10217                                               build_int_cst (integer_type_node,
10218                                                                  GOMP_ASYNC_SYNC));
10219           if (tagging && t_async)
10220             {
10221               unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10222 
10223               if (TREE_CODE (t_async) == INTEGER_CST)
10224                 {
10225                     /* See if we can pack the async arg in to the tag's
10226                        operand.  */
10227                     i_async = TREE_INT_CST_LOW (t_async);
10228                     if (i_async < GOMP_LAUNCH_OP_MAX)
10229                       t_async = NULL_TREE;
10230                     else
10231                       i_async = GOMP_LAUNCH_OP_MAX;
10232                 }
10233               args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10234                                                         i_async));
10235             }
10236           if (t_async)
10237             args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10238                                                                 NULL_TREE, true,
10239                                                                 GSI_SAME_STMT));
10240 
10241           /* Save the argument index, and ... */
10242           unsigned t_wait_idx = args.length ();
10243           unsigned num_waits = 0;
10244           c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10245           if (!tagging || c)
10246             /* ... push a placeholder.  */
10247             args.safe_push (integer_zero_node);
10248 
10249           for (; c; c = OMP_CLAUSE_CHAIN (c))
10250             if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10251               {
10252                 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10253                                                      integer_type_node,
10254                                                      OMP_CLAUSE_WAIT_EXPR (c));
10255                 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10256                                                         GSI_SAME_STMT);
10257                 args.safe_push (arg);
10258                 num_waits++;
10259               }
10260 
10261           if (!tagging || num_waits)
10262             {
10263               tree len;
10264 
10265               /* Now that we know the number, update the placeholder.  */
10266               if (tagging)
10267                 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10268               else
10269                 len = build_int_cst (integer_type_node, num_waits);
10270               len = fold_convert_loc (gimple_location (entry_stmt),
10271                                             unsigned_type_node, len);
10272               args[t_wait_idx] = len;
10273             }
10274       }
10275       break;
10276     default:
10277       gcc_unreachable ();
10278     }
10279   if (tagging)
10280     /*  Push terminal marker - zero.  */
10281     args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10282 
10283   g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10284   gimple_set_location (g, gimple_location (entry_stmt));
10285   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10286   if (!offloaded)
10287     {
10288       g = gsi_stmt (gsi);
10289       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10290       gsi_remove (&gsi, true);
10291     }
10292 }
10293 
10294 /* Expand the parallel region tree rooted at REGION.  Expansion
10295    proceeds in depth-first order.  Innermost regions are expanded
10296    first.  This way, parallel regions that require a new function to
10297    be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10298    internal dependencies in their body.  */
10299 
10300 static void
expand_omp(struct omp_region * region)10301 expand_omp (struct omp_region *region)
10302 {
10303   omp_any_child_fn_dumped = false;
10304   while (region)
10305     {
10306       location_t saved_location;
10307       gimple *inner_stmt = NULL;
10308 
10309       /* First, determine whether this is a combined parallel+workshare
10310            region.  */
10311       if (region->type == GIMPLE_OMP_PARALLEL)
10312           determine_parallel_type (region);
10313 
10314       if (region->type == GIMPLE_OMP_FOR
10315             && gimple_omp_for_combined_p (last_stmt (region->entry)))
10316           inner_stmt = last_stmt (region->inner->entry);
10317 
10318       if (region->inner)
10319           expand_omp (region->inner);
10320 
10321       saved_location = input_location;
10322       if (gimple_has_location (last_stmt (region->entry)))
10323           input_location = gimple_location (last_stmt (region->entry));
10324 
10325       switch (region->type)
10326           {
10327           case GIMPLE_OMP_PARALLEL:
10328           case GIMPLE_OMP_TASK:
10329             expand_omp_taskreg (region);
10330             break;
10331 
10332           case GIMPLE_OMP_FOR:
10333             expand_omp_for (region, inner_stmt);
10334             break;
10335 
10336           case GIMPLE_OMP_SECTIONS:
10337             expand_omp_sections (region);
10338             break;
10339 
10340           case GIMPLE_OMP_SECTION:
10341             /* Individual omp sections are handled together with their
10342                parent GIMPLE_OMP_SECTIONS region.  */
10343             break;
10344 
10345           case GIMPLE_OMP_SINGLE:
10346           case GIMPLE_OMP_SCOPE:
10347             expand_omp_single (region);
10348             break;
10349 
10350           case GIMPLE_OMP_ORDERED:
10351             {
10352               gomp_ordered *ord_stmt
10353                 = as_a <gomp_ordered *> (last_stmt (region->entry));
10354               if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
10355                                          OMP_CLAUSE_DEPEND))
10356                 {
10357                     /* We'll expand these when expanding corresponding
10358                        worksharing region with ordered(n) clause.  */
10359                     gcc_assert (region->outer
10360                                   && region->outer->type == GIMPLE_OMP_FOR);
10361                     region->ord_stmt = ord_stmt;
10362                     break;
10363                 }
10364             }
10365             /* FALLTHRU */
10366           case GIMPLE_OMP_MASTER:
10367           case GIMPLE_OMP_MASKED:
10368           case GIMPLE_OMP_TASKGROUP:
10369           case GIMPLE_OMP_CRITICAL:
10370           case GIMPLE_OMP_TEAMS:
10371             expand_omp_synch (region);
10372             break;
10373 
10374           case GIMPLE_OMP_ATOMIC_LOAD:
10375             expand_omp_atomic (region);
10376             break;
10377 
10378           case GIMPLE_OMP_TARGET:
10379             expand_omp_target (region);
10380             break;
10381 
10382           default:
10383             gcc_unreachable ();
10384           }
10385 
10386       input_location = saved_location;
10387       region = region->next;
10388     }
10389   if (omp_any_child_fn_dumped)
10390     {
10391       if (dump_file)
10392           dump_function_header (dump_file, current_function_decl, dump_flags);
10393       omp_any_child_fn_dumped = false;
10394     }
10395 }
10396 
10397 /* Helper for build_omp_regions.  Scan the dominator tree starting at
10398    block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
10399    true, the function ends once a single tree is built (otherwise, whole
10400    forest of OMP constructs may be built).  */
10401 
10402 static void
build_omp_regions_1(basic_block bb,struct omp_region * parent,bool single_tree)10403 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10404                          bool single_tree)
10405 {
10406   gimple_stmt_iterator gsi;
10407   gimple *stmt;
10408   basic_block son;
10409 
10410   gsi = gsi_last_nondebug_bb (bb);
10411   if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10412     {
10413       struct omp_region *region;
10414       enum gimple_code code;
10415 
10416       stmt = gsi_stmt (gsi);
10417       code = gimple_code (stmt);
10418       if (code == GIMPLE_OMP_RETURN)
10419           {
10420             /* STMT is the return point out of region PARENT.  Mark it
10421                as the exit point and make PARENT the immediately
10422                enclosing region.  */
10423             gcc_assert (parent);
10424             region = parent;
10425             region->exit = bb;
10426             parent = parent->outer;
10427           }
10428       else if (code == GIMPLE_OMP_ATOMIC_STORE)
10429           {
10430             /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10431                GIMPLE_OMP_RETURN, but matches with
10432                GIMPLE_OMP_ATOMIC_LOAD.  */
10433             gcc_assert (parent);
10434             gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10435             region = parent;
10436             region->exit = bb;
10437             parent = parent->outer;
10438           }
10439       else if (code == GIMPLE_OMP_CONTINUE)
10440           {
10441             gcc_assert (parent);
10442             parent->cont = bb;
10443           }
10444       else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10445           {
10446             /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10447                GIMPLE_OMP_SECTIONS, and we do nothing for it.  */
10448           }
10449       else
10450           {
10451             region = new_omp_region (bb, code, parent);
10452             /* Otherwise...  */
10453             if (code == GIMPLE_OMP_TARGET)
10454               {
10455                 switch (gimple_omp_target_kind (stmt))
10456                     {
10457                     case GF_OMP_TARGET_KIND_REGION:
10458                     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10459                     case GF_OMP_TARGET_KIND_OACC_KERNELS:
10460                     case GF_OMP_TARGET_KIND_OACC_SERIAL:
10461                     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10462                     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10463                       break;
10464                     case GF_OMP_TARGET_KIND_UPDATE:
10465                     case GF_OMP_TARGET_KIND_ENTER_DATA:
10466                     case GF_OMP_TARGET_KIND_EXIT_DATA:
10467                     case GF_OMP_TARGET_KIND_DATA:
10468                     case GF_OMP_TARGET_KIND_OACC_DATA:
10469                     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10470                     case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10471                     case GF_OMP_TARGET_KIND_OACC_UPDATE:
10472                     case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10473                     case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10474                     case GF_OMP_TARGET_KIND_OACC_DECLARE:
10475                       /* ..., other than for those stand-alone directives...
10476                          To be precise, target data isn't stand-alone, but
10477                          gimplifier put the end API call into try finally block
10478                          for it, so omp expansion can treat it as such.  */
10479                       region = NULL;
10480                       break;
10481                     default:
10482                       gcc_unreachable ();
10483                     }
10484               }
10485             else if (code == GIMPLE_OMP_ORDERED
10486                        && omp_find_clause (gimple_omp_ordered_clauses
10487                                                    (as_a <gomp_ordered *> (stmt)),
10488                                                OMP_CLAUSE_DEPEND))
10489               /* #pragma omp ordered depend is also just a stand-alone
10490                  directive.  */
10491               region = NULL;
10492             else if (code == GIMPLE_OMP_TASK
10493                        && gimple_omp_task_taskwait_p (stmt))
10494               /* #pragma omp taskwait depend(...) is a stand-alone directive.  */
10495               region = NULL;
10496             else if (code == GIMPLE_OMP_TASKGROUP)
10497               /* #pragma omp taskgroup isn't a stand-alone directive, but
10498                  gimplifier put the end API call into try finall block
10499                  for it, so omp expansion can treat it as such.  */
10500               region = NULL;
10501             /* ..., this directive becomes the parent for a new region.  */
10502             if (region)
10503               parent = region;
10504           }
10505     }
10506 
10507   if (single_tree && !parent)
10508     return;
10509 
10510   for (son = first_dom_son (CDI_DOMINATORS, bb);
10511        son;
10512        son = next_dom_son (CDI_DOMINATORS, son))
10513     build_omp_regions_1 (son, parent, single_tree);
10514 }
10515 
10516 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10517    root_omp_region.  */
10518 
10519 static void
build_omp_regions_root(basic_block root)10520 build_omp_regions_root (basic_block root)
10521 {
10522   gcc_assert (root_omp_region == NULL);
10523   build_omp_regions_1 (root, NULL, true);
10524   gcc_assert (root_omp_region != NULL);
10525 }
10526 
10527 /* Expands omp construct (and its subconstructs) starting in HEAD.  */
10528 
10529 void
omp_expand_local(basic_block head)10530 omp_expand_local (basic_block head)
10531 {
10532   build_omp_regions_root (head);
10533   if (dump_file && (dump_flags & TDF_DETAILS))
10534     {
10535       fprintf (dump_file, "\nOMP region tree\n\n");
10536       dump_omp_region (dump_file, root_omp_region, 0);
10537       fprintf (dump_file, "\n");
10538     }
10539 
10540   remove_exit_barriers (root_omp_region);
10541   expand_omp (root_omp_region);
10542 
10543   omp_free_regions ();
10544 }
10545 
10546 /* Scan the CFG and build a tree of OMP regions.  Return the root of
10547    the OMP region tree.  */
10548 
10549 static void
build_omp_regions(void)10550 build_omp_regions (void)
10551 {
10552   gcc_assert (root_omp_region == NULL);
10553   calculate_dominance_info (CDI_DOMINATORS);
10554   build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10555 }
10556 
10557 /* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
10558 
10559 static unsigned int
execute_expand_omp(void)10560 execute_expand_omp (void)
10561 {
10562   build_omp_regions ();
10563 
10564   if (!root_omp_region)
10565     return 0;
10566 
10567   if (dump_file)
10568     {
10569       fprintf (dump_file, "\nOMP region tree\n\n");
10570       dump_omp_region (dump_file, root_omp_region, 0);
10571       fprintf (dump_file, "\n");
10572     }
10573 
10574   remove_exit_barriers (root_omp_region);
10575 
10576   expand_omp (root_omp_region);
10577 
10578   if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10579     verify_loop_structure ();
10580   cleanup_tree_cfg ();
10581 
10582   omp_free_regions ();
10583 
10584   return 0;
10585 }
10586 
10587 /* OMP expansion -- the default pass, run before creation of SSA form.  */
10588 
10589 namespace {
10590 
10591 const pass_data pass_data_expand_omp =
10592 {
10593   GIMPLE_PASS, /* type */
10594   "ompexp", /* name */
10595   OPTGROUP_OMP, /* optinfo_flags */
10596   TV_NONE, /* tv_id */
10597   PROP_gimple_any, /* properties_required */
10598   PROP_gimple_eomp, /* properties_provided */
10599   0, /* properties_destroyed */
10600   0, /* todo_flags_start */
10601   0, /* todo_flags_finish */
10602 };
10603 
10604 class pass_expand_omp : public gimple_opt_pass
10605 {
10606 public:
pass_expand_omp(gcc::context * ctxt)10607   pass_expand_omp (gcc::context *ctxt)
10608     : gimple_opt_pass (pass_data_expand_omp, ctxt)
10609   {}
10610 
10611   /* opt_pass methods: */
execute(function *)10612   virtual unsigned int execute (function *)
10613     {
10614       bool gate = ((flag_openacc != 0 || flag_openmp != 0
10615                         || flag_openmp_simd != 0)
10616                        && !seen_error ());
10617 
10618       /* This pass always runs, to provide PROP_gimple_eomp.
10619            But often, there is nothing to do.  */
10620       if (!gate)
10621           return 0;
10622 
10623       return execute_expand_omp ();
10624     }
10625 
10626 }; // class pass_expand_omp
10627 
10628 } // anon namespace
10629 
10630 gimple_opt_pass *
make_pass_expand_omp(gcc::context * ctxt)10631 make_pass_expand_omp (gcc::context *ctxt)
10632 {
10633   return new pass_expand_omp (ctxt);
10634 }
10635 
10636 namespace {
10637 
10638 const pass_data pass_data_expand_omp_ssa =
10639 {
10640   GIMPLE_PASS, /* type */
10641   "ompexpssa", /* name */
10642   OPTGROUP_OMP, /* optinfo_flags */
10643   TV_NONE, /* tv_id */
10644   PROP_cfg | PROP_ssa, /* properties_required */
10645   PROP_gimple_eomp, /* properties_provided */
10646   0, /* properties_destroyed */
10647   0, /* todo_flags_start */
10648   TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10649 };
10650 
10651 class pass_expand_omp_ssa : public gimple_opt_pass
10652 {
10653 public:
pass_expand_omp_ssa(gcc::context * ctxt)10654   pass_expand_omp_ssa (gcc::context *ctxt)
10655     : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10656   {}
10657 
10658   /* opt_pass methods: */
gate(function * fun)10659   virtual bool gate (function *fun)
10660     {
10661       return !(fun->curr_properties & PROP_gimple_eomp);
10662     }
execute(function *)10663   virtual unsigned int execute (function *) { return execute_expand_omp (); }
clone()10664   opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10665 
10666 }; // class pass_expand_omp_ssa
10667 
10668 } // anon namespace
10669 
10670 gimple_opt_pass *
make_pass_expand_omp_ssa(gcc::context * ctxt)10671 make_pass_expand_omp_ssa (gcc::context *ctxt)
10672 {
10673   return new pass_expand_omp_ssa (ctxt);
10674 }
10675 
10676 /* Called from tree-cfg.cc::make_edges to create cfg edges for all relevant
10677    GIMPLE_* codes.  */
10678 
10679 bool
omp_make_gimple_edges(basic_block bb,struct omp_region ** region,int * region_idx)10680 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10681                            int *region_idx)
10682 {
10683   gimple *last = last_stmt (bb);
10684   enum gimple_code code = gimple_code (last);
10685   struct omp_region *cur_region = *region;
10686   bool fallthru = false;
10687 
10688   switch (code)
10689     {
10690     case GIMPLE_OMP_PARALLEL:
10691     case GIMPLE_OMP_FOR:
10692     case GIMPLE_OMP_SINGLE:
10693     case GIMPLE_OMP_TEAMS:
10694     case GIMPLE_OMP_MASTER:
10695     case GIMPLE_OMP_MASKED:
10696     case GIMPLE_OMP_SCOPE:
10697     case GIMPLE_OMP_CRITICAL:
10698     case GIMPLE_OMP_SECTION:
10699       cur_region = new_omp_region (bb, code, cur_region);
10700       fallthru = true;
10701       break;
10702 
10703     case GIMPLE_OMP_TASKGROUP:
10704       cur_region = new_omp_region (bb, code, cur_region);
10705       fallthru = true;
10706       cur_region = cur_region->outer;
10707       break;
10708 
10709     case GIMPLE_OMP_TASK:
10710       cur_region = new_omp_region (bb, code, cur_region);
10711       fallthru = true;
10712       if (gimple_omp_task_taskwait_p (last))
10713           cur_region = cur_region->outer;
10714       break;
10715 
10716     case GIMPLE_OMP_ORDERED:
10717       cur_region = new_omp_region (bb, code, cur_region);
10718       fallthru = true;
10719       if (omp_find_clause (gimple_omp_ordered_clauses
10720                                    (as_a <gomp_ordered *> (last)),
10721                                  OMP_CLAUSE_DEPEND))
10722           cur_region = cur_region->outer;
10723       break;
10724 
10725     case GIMPLE_OMP_TARGET:
10726       cur_region = new_omp_region (bb, code, cur_region);
10727       fallthru = true;
10728       switch (gimple_omp_target_kind (last))
10729           {
10730           case GF_OMP_TARGET_KIND_REGION:
10731           case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10732           case GF_OMP_TARGET_KIND_OACC_KERNELS:
10733           case GF_OMP_TARGET_KIND_OACC_SERIAL:
10734           case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10735           case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10736             break;
10737           case GF_OMP_TARGET_KIND_UPDATE:
10738           case GF_OMP_TARGET_KIND_ENTER_DATA:
10739           case GF_OMP_TARGET_KIND_EXIT_DATA:
10740           case GF_OMP_TARGET_KIND_DATA:
10741           case GF_OMP_TARGET_KIND_OACC_DATA:
10742           case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10743           case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10744           case GF_OMP_TARGET_KIND_OACC_UPDATE:
10745           case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10746           case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10747           case GF_OMP_TARGET_KIND_OACC_DECLARE:
10748             cur_region = cur_region->outer;
10749             break;
10750           default:
10751             gcc_unreachable ();
10752           }
10753       break;
10754 
10755     case GIMPLE_OMP_SECTIONS:
10756       cur_region = new_omp_region (bb, code, cur_region);
10757       fallthru = true;
10758       break;
10759 
10760     case GIMPLE_OMP_SECTIONS_SWITCH:
10761       fallthru = false;
10762       break;
10763 
10764     case GIMPLE_OMP_ATOMIC_LOAD:
10765     case GIMPLE_OMP_ATOMIC_STORE:
10766        fallthru = true;
10767        break;
10768 
10769     case GIMPLE_OMP_RETURN:
10770       /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10771            somewhere other than the next block.  This will be
10772            created later.  */
10773       cur_region->exit = bb;
10774       if (cur_region->type == GIMPLE_OMP_TASK)
10775           /* Add an edge corresponding to not scheduling the task
10776              immediately.  */
10777           make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10778       fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10779       cur_region = cur_region->outer;
10780       break;
10781 
10782     case GIMPLE_OMP_CONTINUE:
10783       cur_region->cont = bb;
10784       switch (cur_region->type)
10785           {
10786           case GIMPLE_OMP_FOR:
10787             /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10788                succs edges as abnormal to prevent splitting
10789                them.  */
10790             single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10791             /* Make the loopback edge.  */
10792             make_edge (bb, single_succ (cur_region->entry),
10793                          EDGE_ABNORMAL);
10794 
10795             /* Create an edge from GIMPLE_OMP_FOR to exit, which
10796                corresponds to the case that the body of the loop
10797                is not executed at all.  */
10798             make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10799             make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10800             fallthru = false;
10801             break;
10802 
10803           case GIMPLE_OMP_SECTIONS:
10804             /* Wire up the edges into and out of the nested sections.  */
10805             {
10806               basic_block switch_bb = single_succ (cur_region->entry);
10807 
10808               struct omp_region *i;
10809               for (i = cur_region->inner; i ; i = i->next)
10810                 {
10811                     gcc_assert (i->type == GIMPLE_OMP_SECTION);
10812                     make_edge (switch_bb, i->entry, 0);
10813                     make_edge (i->exit, bb, EDGE_FALLTHRU);
10814                 }
10815 
10816               /* Make the loopback edge to the block with
10817                  GIMPLE_OMP_SECTIONS_SWITCH.  */
10818               make_edge (bb, switch_bb, 0);
10819 
10820               /* Make the edge from the switch to exit.  */
10821               make_edge (switch_bb, bb->next_bb, 0);
10822               fallthru = false;
10823             }
10824             break;
10825 
10826           case GIMPLE_OMP_TASK:
10827             fallthru = true;
10828             break;
10829 
10830           default:
10831             gcc_unreachable ();
10832           }
10833       break;
10834 
10835     default:
10836       gcc_unreachable ();
10837     }
10838 
10839   if (*region != cur_region)
10840     {
10841       *region = cur_region;
10842       if (cur_region)
10843           *region_idx = cur_region->entry->index;
10844       else
10845           *region_idx = 0;
10846     }
10847 
10848   return fallthru;
10849 }
10850