1 /* Loop invariant motion.
2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
31 #include "cfganal.h"
32 #include "tree-eh.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "tree-cfg.h"
36 #include "tree-ssa-loop-manip.h"
37 #include "tree-ssa-loop.h"
38 #include "tree-into-ssa.h"
39 #include "cfgloop.h"
40 #include "tree-affine.h"
41 #include "tree-ssa-propagate.h"
42 #include "trans-mem.h"
43 #include "gimple-fold.h"
44 #include "tree-scalar-evolution.h"
45 #include "tree-ssa-loop-niter.h"
46 #include "alias.h"
47 #include "builtins.h"
48 #include "tree-dfa.h"
49 #include "tree-ssa.h"
50 #include "dbgcnt.h"
51 
52 /* TODO:  Support for predicated code motion.  I.e.
53 
54    while (1)
55      {
56        if (cond)
57            {
58              a = inv;
59              something;
60            }
61      }
62 
63    Where COND and INV are invariants, but evaluating INV may trap or be
64    invalid from some other reason if !COND.  This may be transformed to
65 
66    if (cond)
67      a = inv;
68    while (1)
69      {
70        if (cond)
71            something;
72      }  */
73 
74 /* The auxiliary data kept for each statement.  */
75 
76 struct lim_aux_data
77 {
78   class loop *max_loop;       /* The outermost loop in that the statement
79                                            is invariant.  */
80 
81   class loop *tgt_loop;       /* The loop out of that we want to move the
82                                            invariant.  */
83 
84   class loop *always_executed_in;
85                                         /* The outermost loop for that we are sure
86                                            the statement is executed if the loop
87                                            is entered.  */
88 
89   unsigned cost;              /* Cost of the computation performed by the
90                                            statement.  */
91 
92   unsigned ref;                         /* The simple_mem_ref in this stmt or 0.  */
93 
94   vec<gimple *> depends;      /* Vector of statements that must be also
95                                            hoisted out of the loop when this statement
96                                            is hoisted; i.e. those that define the
97                                            operands of the statement and are inside of
98                                            the MAX_LOOP loop.  */
99 };
100 
101 /* Maps statements to their lim_aux_data.  */
102 
103 static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
104 
105 /* Description of a memory reference location.  */
106 
107 struct mem_ref_loc
108 {
109   tree *ref;                            /* The reference itself.  */
110   gimple *stmt;                         /* The statement in that it occurs.  */
111 };
112 
113 
114 /* Description of a memory reference.  */
115 
116 class im_mem_ref
117 {
118 public:
119   unsigned id : 30;           /* ID assigned to the memory reference
120                                            (its index in memory_accesses.refs_list)  */
121   unsigned ref_canonical : 1;   /* Whether mem.ref was canonicalized.  */
122   unsigned ref_decomposed : 1;  /* Whether the ref was hashed from mem.  */
123   hashval_t hash;             /* Its hash value.  */
124 
125   /* The memory access itself and associated caching of alias-oracle
126      query meta-data.  We are using mem.ref == error_mark_node for the
127      case the reference is represented by its single access stmt
128      in accesses_in_loop[0].  */
129   ao_ref mem;
130 
131   bitmap stored;              /* The set of loops in that this memory location
132                                            is stored to.  */
133   bitmap loaded;              /* The set of loops in that this memory location
134                                            is loaded from.  */
135   vec<mem_ref_loc>            accesses_in_loop;
136                                         /* The locations of the accesses.  */
137 
138   /* The following set is computed on demand.  */
139   bitmap_head dep_loop;                 /* The set of loops in that the memory
140                                            reference is {in,}dependent in
141                                            different modes.  */
142 };
143 
144 /* We use six bits per loop in the ref->dep_loop bitmap to record
145    the dep_kind x dep_state combinations.  */
146 
147 enum dep_kind { lim_raw, sm_war, sm_waw };
148 enum dep_state { dep_unknown, dep_independent, dep_dependent };
149 
150 /* coldest outermost loop for given loop.  */
151 vec<class loop *> coldest_outermost_loop;
152 /* hotter outer loop nearest to given loop.  */
153 vec<class loop *> hotter_than_inner_loop;
154 
155 /* Populate the loop dependence cache of REF for LOOP, KIND with STATE.  */
156 
157 static void
record_loop_dependence(class loop * loop,im_mem_ref * ref,dep_kind kind,dep_state state)158 record_loop_dependence (class loop *loop, im_mem_ref *ref,
159                               dep_kind kind, dep_state state)
160 {
161   gcc_assert (state != dep_unknown);
162   unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
163   bitmap_set_bit (&ref->dep_loop, bit);
164 }
165 
166 /* Query the loop dependence cache of REF for LOOP, KIND.  */
167 
168 static dep_state
query_loop_dependence(class loop * loop,im_mem_ref * ref,dep_kind kind)169 query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
170 {
171   unsigned first_bit = 6 * loop->num + kind * 2;
172   if (bitmap_bit_p (&ref->dep_loop, first_bit))
173     return dep_independent;
174   else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
175     return dep_dependent;
176   return dep_unknown;
177 }
178 
179 /* Mem_ref hashtable helpers.  */
180 
181 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
182 {
183   typedef ao_ref *compare_type;
184   static inline hashval_t hash (const im_mem_ref *);
185   static inline bool equal (const im_mem_ref *, const ao_ref *);
186 };
187 
188 /* A hash function for class im_mem_ref object OBJ.  */
189 
190 inline hashval_t
hash(const im_mem_ref * mem)191 mem_ref_hasher::hash (const im_mem_ref *mem)
192 {
193   return mem->hash;
194 }
195 
196 /* An equality function for class im_mem_ref object MEM1 with
197    memory reference OBJ2.  */
198 
199 inline bool
equal(const im_mem_ref * mem1,const ao_ref * obj2)200 mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
201 {
202   if (obj2->max_size_known_p ())
203     return (mem1->ref_decomposed
204               && ((TREE_CODE (mem1->mem.base) == MEM_REF
205                      && TREE_CODE (obj2->base) == MEM_REF
206                      && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0),
207                                              TREE_OPERAND (obj2->base, 0), 0)
208                      && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset,
209                                     mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset))
210                     || (operand_equal_p (mem1->mem.base, obj2->base, 0)
211                         && known_eq (mem1->mem.offset, obj2->offset)))
212               && known_eq (mem1->mem.size, obj2->size)
213               && known_eq (mem1->mem.max_size, obj2->max_size)
214               && mem1->mem.volatile_p == obj2->volatile_p
215               && (mem1->mem.ref_alias_set == obj2->ref_alias_set
216                     /* We are not canonicalizing alias-sets but for the
217                        special-case we didn't canonicalize yet and the
218                        incoming ref is a alias-set zero MEM we pick
219                        the correct one already.  */
220                     || (!mem1->ref_canonical
221                         && (TREE_CODE (obj2->ref) == MEM_REF
222                               || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
223                         && obj2->ref_alias_set == 0)
224                     /* Likewise if there's a canonical ref with alias-set zero.  */
225                     || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
226               && types_compatible_p (TREE_TYPE (mem1->mem.ref),
227                                            TREE_TYPE (obj2->ref)));
228   else
229     return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
230 }
231 
232 
233 /* Description of memory accesses in loops.  */
234 
235 static struct
236 {
237   /* The hash table of memory references accessed in loops.  */
238   hash_table<mem_ref_hasher> *refs;
239 
240   /* The list of memory references.  */
241   vec<im_mem_ref *> refs_list;
242 
243   /* The set of memory references accessed in each loop.  */
244   vec<bitmap_head> refs_loaded_in_loop;
245 
246   /* The set of memory references stored in each loop.  */
247   vec<bitmap_head> refs_stored_in_loop;
248 
249   /* The set of memory references stored in each loop, including subloops .  */
250   vec<bitmap_head> all_refs_stored_in_loop;
251 
252   /* Cache for expanding memory addresses.  */
253   hash_map<tree, name_expansion *> *ttae_cache;
254 } memory_accesses;
255 
256 /* Obstack for the bitmaps in the above data structures.  */
257 static bitmap_obstack lim_bitmap_obstack;
258 static obstack mem_ref_obstack;
259 
260 static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
261 static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
262 static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
263 
264 /* Minimum cost of an expensive expression.  */
265 #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
266 
267 /* The outermost loop for which execution of the header guarantees that the
268    block will be executed.  */
269 #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
270 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
271 
272 /* ID of the shared unanalyzable mem.  */
273 #define UNANALYZABLE_MEM_ID 0
274 
275 /* Whether the reference was analyzable.  */
276 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
277 
278 static struct lim_aux_data *
init_lim_data(gimple * stmt)279 init_lim_data (gimple *stmt)
280 {
281   lim_aux_data *p = XCNEW (struct lim_aux_data);
282   lim_aux_data_map->put (stmt, p);
283 
284   return p;
285 }
286 
287 static struct lim_aux_data *
get_lim_data(gimple * stmt)288 get_lim_data (gimple *stmt)
289 {
290   lim_aux_data **p = lim_aux_data_map->get (stmt);
291   if (!p)
292     return NULL;
293 
294   return *p;
295 }
296 
297 /* Releases the memory occupied by DATA.  */
298 
299 static void
free_lim_aux_data(struct lim_aux_data * data)300 free_lim_aux_data (struct lim_aux_data *data)
301 {
302   data->depends.release ();
303   free (data);
304 }
305 
306 static void
clear_lim_data(gimple * stmt)307 clear_lim_data (gimple *stmt)
308 {
309   lim_aux_data **p = lim_aux_data_map->get (stmt);
310   if (!p)
311     return;
312 
313   free_lim_aux_data (*p);
314   *p = NULL;
315 }
316 
317 
318 /* The possibilities of statement movement.  */
319 enum move_pos
320   {
321     MOVE_IMPOSSIBLE,                    /* No movement -- side effect expression.  */
322     MOVE_PRESERVE_EXECUTION,  /* Must not cause the non-executed statement
323                                            become executed -- memory accesses, ... */
324     MOVE_POSSIBLE             /* Unlimited movement.  */
325   };
326 
327 
328 /* If it is possible to hoist the statement STMT unconditionally,
329    returns MOVE_POSSIBLE.
330    If it is possible to hoist the statement STMT, but we must avoid making
331    it executed if it would not be executed in the original program (e.g.
332    because it may trap), return MOVE_PRESERVE_EXECUTION.
333    Otherwise return MOVE_IMPOSSIBLE.  */
334 
335 static enum move_pos
movement_possibility_1(gimple * stmt)336 movement_possibility_1 (gimple *stmt)
337 {
338   tree lhs;
339   enum move_pos ret = MOVE_POSSIBLE;
340 
341   if (flag_unswitch_loops
342       && gimple_code (stmt) == GIMPLE_COND)
343     {
344       /* If we perform unswitching, force the operands of the invariant
345            condition to be moved out of the loop.  */
346       return MOVE_POSSIBLE;
347     }
348 
349   if (gimple_code (stmt) == GIMPLE_PHI
350       && gimple_phi_num_args (stmt) <= 2
351       && !virtual_operand_p (gimple_phi_result (stmt))
352       && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
353     return MOVE_POSSIBLE;
354 
355   if (gimple_get_lhs (stmt) == NULL_TREE)
356     return MOVE_IMPOSSIBLE;
357 
358   if (gimple_vdef (stmt))
359     return MOVE_IMPOSSIBLE;
360 
361   if (stmt_ends_bb_p (stmt)
362       || gimple_has_volatile_ops (stmt)
363       || gimple_has_side_effects (stmt)
364       || stmt_could_throw_p (cfun, stmt))
365     return MOVE_IMPOSSIBLE;
366 
367   if (is_gimple_call (stmt))
368     {
369       /* While pure or const call is guaranteed to have no side effects, we
370            cannot move it arbitrarily.  Consider code like
371 
372            char *s = something ();
373 
374            while (1)
375              {
376                if (s)
377                  t = strlen (s);
378                else
379                  t = 0;
380              }
381 
382            Here the strlen call cannot be moved out of the loop, even though
383            s is invariant.  In addition to possibly creating a call with
384            invalid arguments, moving out a function call that is not executed
385            may cause performance regressions in case the call is costly and
386            not executed at all.  */
387       ret = MOVE_PRESERVE_EXECUTION;
388       lhs = gimple_call_lhs (stmt);
389     }
390   else if (is_gimple_assign (stmt))
391     lhs = gimple_assign_lhs (stmt);
392   else
393     return MOVE_IMPOSSIBLE;
394 
395   if (TREE_CODE (lhs) == SSA_NAME
396       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
397     return MOVE_IMPOSSIBLE;
398 
399   if (TREE_CODE (lhs) != SSA_NAME
400       || gimple_could_trap_p (stmt))
401     return MOVE_PRESERVE_EXECUTION;
402 
403   /* Non local loads in a transaction cannot be hoisted out.  Well,
404      unless the load happens on every path out of the loop, but we
405      don't take this into account yet.  */
406   if (flag_tm
407       && gimple_in_transaction (stmt)
408       && gimple_assign_single_p (stmt))
409     {
410       tree rhs = gimple_assign_rhs1 (stmt);
411       if (DECL_P (rhs) && is_global_var (rhs))
412           {
413             if (dump_file)
414               {
415                 fprintf (dump_file, "Cannot hoist conditional load of ");
416                 print_generic_expr (dump_file, rhs, TDF_SLIM);
417                 fprintf (dump_file, " because it is in a transaction.\n");
418               }
419             return MOVE_IMPOSSIBLE;
420           }
421     }
422 
423   return ret;
424 }
425 
426 static enum move_pos
movement_possibility(gimple * stmt)427 movement_possibility (gimple *stmt)
428 {
429   enum move_pos pos = movement_possibility_1 (stmt);
430   if (pos == MOVE_POSSIBLE)
431     {
432       use_operand_p use_p;
433       ssa_op_iter ssa_iter;
434       FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, ssa_iter, SSA_OP_USE)
435           if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME
436               && ssa_name_maybe_undef_p (USE_FROM_PTR (use_p)))
437             return MOVE_PRESERVE_EXECUTION;
438     }
439   return pos;
440 }
441 
442 
443 /* Compare the profile count inequality of bb and loop's preheader, it is
444    three-state as stated in profile-count.h, FALSE is returned if inequality
445    cannot be decided.  */
446 bool
bb_colder_than_loop_preheader(basic_block bb,class loop * loop)447 bb_colder_than_loop_preheader (basic_block bb, class loop *loop)
448 {
449   gcc_assert (bb && loop);
450   return bb->count < loop_preheader_edge (loop)->src->count;
451 }
452 
453 /* Check coldest loop between OUTERMOST_LOOP and LOOP by comparing profile
454    count.
455   It does three steps check:
456   1) Check whether CURR_BB is cold in it's own loop_father, if it is cold, just
457   return NULL which means it should not be moved out at all;
458   2) CURR_BB is NOT cold, check if pre-computed COLDEST_LOOP is outside of
459   OUTERMOST_LOOP, if it is inside of OUTERMOST_LOOP, return the COLDEST_LOOP;
460   3) If COLDEST_LOOP is outside of OUTERMOST_LOOP, check whether there is a
461   hotter loop between OUTERMOST_LOOP and loop in pre-computed
462   HOTTER_THAN_INNER_LOOP, return it's nested inner loop, otherwise return
463   OUTERMOST_LOOP.
464   At last, the coldest_loop is inside of OUTERMOST_LOOP, just return it as
465   the hoist target.  */
466 
467 static class loop *
get_coldest_out_loop(class loop * outermost_loop,class loop * loop,basic_block curr_bb)468 get_coldest_out_loop (class loop *outermost_loop, class loop *loop,
469                           basic_block curr_bb)
470 {
471   gcc_assert (outermost_loop == loop
472                 || flow_loop_nested_p (outermost_loop, loop));
473 
474   /* If bb_colder_than_loop_preheader returns false due to three-state
475     comparision, OUTERMOST_LOOP is returned finally to preserve the behavior.
476     Otherwise, return the coldest loop between OUTERMOST_LOOP and LOOP.  */
477   if (curr_bb && bb_colder_than_loop_preheader (curr_bb, loop))
478     return NULL;
479 
480   class loop *coldest_loop = coldest_outermost_loop[loop->num];
481   if (loop_depth (coldest_loop) < loop_depth (outermost_loop))
482     {
483       class loop *hotter_loop = hotter_than_inner_loop[loop->num];
484       if (!hotter_loop
485             || loop_depth (hotter_loop) < loop_depth (outermost_loop))
486           return outermost_loop;
487 
488       /*  hotter_loop is between OUTERMOST_LOOP and LOOP like:
489           [loop tree root, ..., coldest_loop, ..., outermost_loop, ...,
490           hotter_loop, second_coldest_loop, ..., loop]
491           return second_coldest_loop to be the hoist target.  */
492       class loop *aloop;
493       for (aloop = hotter_loop->inner; aloop; aloop = aloop->next)
494           if (aloop == loop || flow_loop_nested_p (aloop, loop))
495             return aloop;
496     }
497   return coldest_loop;
498 }
499 
500 /* Suppose that operand DEF is used inside the LOOP.  Returns the outermost
501    loop to that we could move the expression using DEF if it did not have
502    other operands, i.e. the outermost loop enclosing LOOP in that the value
503    of DEF is invariant.  */
504 
505 static class loop *
outermost_invariant_loop(tree def,class loop * loop)506 outermost_invariant_loop (tree def, class loop *loop)
507 {
508   gimple *def_stmt;
509   basic_block def_bb;
510   class loop *max_loop;
511   struct lim_aux_data *lim_data;
512 
513   if (!def)
514     return superloop_at_depth (loop, 1);
515 
516   if (TREE_CODE (def) != SSA_NAME)
517     {
518       gcc_assert (is_gimple_min_invariant (def));
519       return superloop_at_depth (loop, 1);
520     }
521 
522   def_stmt = SSA_NAME_DEF_STMT (def);
523   def_bb = gimple_bb (def_stmt);
524   if (!def_bb)
525     return superloop_at_depth (loop, 1);
526 
527   max_loop = find_common_loop (loop, def_bb->loop_father);
528 
529   lim_data = get_lim_data (def_stmt);
530   if (lim_data != NULL && lim_data->max_loop != NULL)
531     max_loop = find_common_loop (max_loop,
532                                          loop_outer (lim_data->max_loop));
533   if (max_loop == loop)
534     return NULL;
535   max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
536 
537   return max_loop;
538 }
539 
540 /* DATA is a structure containing information associated with a statement
541    inside LOOP.  DEF is one of the operands of this statement.
542 
543    Find the outermost loop enclosing LOOP in that value of DEF is invariant
544    and record this in DATA->max_loop field.  If DEF itself is defined inside
545    this loop as well (i.e. we need to hoist it out of the loop if we want
546    to hoist the statement represented by DATA), record the statement in that
547    DEF is defined to the DATA->depends list.  Additionally if ADD_COST is true,
548    add the cost of the computation of DEF to the DATA->cost.
549 
550    If DEF is not invariant in LOOP, return false.  Otherwise return TRUE.  */
551 
552 static bool
add_dependency(tree def,struct lim_aux_data * data,class loop * loop,bool add_cost)553 add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
554                     bool add_cost)
555 {
556   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
557   basic_block def_bb = gimple_bb (def_stmt);
558   class loop *max_loop;
559   struct lim_aux_data *def_data;
560 
561   if (!def_bb)
562     return true;
563 
564   max_loop = outermost_invariant_loop (def, loop);
565   if (!max_loop)
566     return false;
567 
568   if (flow_loop_nested_p (data->max_loop, max_loop))
569     data->max_loop = max_loop;
570 
571   def_data = get_lim_data (def_stmt);
572   if (!def_data)
573     return true;
574 
575   if (add_cost
576       /* Only add the cost if the statement defining DEF is inside LOOP,
577            i.e. if it is likely that by moving the invariants dependent
578            on it, we will be able to avoid creating a new register for
579            it (since it will be only used in these dependent invariants).  */
580       && def_bb->loop_father == loop)
581     data->cost += def_data->cost;
582 
583   data->depends.safe_push (def_stmt);
584 
585   return true;
586 }
587 
588 /* Returns an estimate for a cost of statement STMT.  The values here
589    are just ad-hoc constants, similar to costs for inlining.  */
590 
591 static unsigned
stmt_cost(gimple * stmt)592 stmt_cost (gimple *stmt)
593 {
594   /* Always try to create possibilities for unswitching.  */
595   if (gimple_code (stmt) == GIMPLE_COND
596       || gimple_code (stmt) == GIMPLE_PHI)
597     return LIM_EXPENSIVE;
598 
599   /* We should be hoisting calls if possible.  */
600   if (is_gimple_call (stmt))
601     {
602       tree fndecl;
603 
604       /* Unless the call is a builtin_constant_p; this always folds to a
605            constant, so moving it is useless.  */
606       fndecl = gimple_call_fndecl (stmt);
607       if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
608           return 0;
609 
610       return LIM_EXPENSIVE;
611     }
612 
613   /* Hoisting memory references out should almost surely be a win.  */
614   if (gimple_references_memory_p (stmt))
615     return LIM_EXPENSIVE;
616 
617   if (gimple_code (stmt) != GIMPLE_ASSIGN)
618     return 1;
619 
620   switch (gimple_assign_rhs_code (stmt))
621     {
622     case MULT_EXPR:
623     case WIDEN_MULT_EXPR:
624     case WIDEN_MULT_PLUS_EXPR:
625     case WIDEN_MULT_MINUS_EXPR:
626     case DOT_PROD_EXPR:
627     case TRUNC_DIV_EXPR:
628     case CEIL_DIV_EXPR:
629     case FLOOR_DIV_EXPR:
630     case ROUND_DIV_EXPR:
631     case EXACT_DIV_EXPR:
632     case CEIL_MOD_EXPR:
633     case FLOOR_MOD_EXPR:
634     case ROUND_MOD_EXPR:
635     case TRUNC_MOD_EXPR:
636     case RDIV_EXPR:
637       /* Division and multiplication are usually expensive.  */
638       return LIM_EXPENSIVE;
639 
640     case LSHIFT_EXPR:
641     case RSHIFT_EXPR:
642     case WIDEN_LSHIFT_EXPR:
643     case LROTATE_EXPR:
644     case RROTATE_EXPR:
645       /* Shifts and rotates are usually expensive.  */
646       return LIM_EXPENSIVE;
647 
648     case CONSTRUCTOR:
649       /* Make vector construction cost proportional to the number
650          of elements.  */
651       return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
652 
653     case SSA_NAME:
654     case PAREN_EXPR:
655       /* Whether or not something is wrapped inside a PAREN_EXPR
656          should not change move cost.  Nor should an intermediate
657            unpropagated SSA name copy.  */
658       return 0;
659 
660     default:
661       return 1;
662     }
663 }
664 
665 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
666    REF is independent.  If REF is not independent in LOOP, NULL is returned
667    instead.  */
668 
669 static class loop *
outermost_indep_loop(class loop * outer,class loop * loop,im_mem_ref * ref)670 outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
671 {
672   class loop *aloop;
673 
674   if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
675     return NULL;
676 
677   for (aloop = outer;
678        aloop != loop;
679        aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
680     if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
681           && ref_indep_loop_p (aloop, ref, lim_raw))
682       return aloop;
683 
684   if (ref_indep_loop_p (loop, ref, lim_raw))
685     return loop;
686   else
687     return NULL;
688 }
689 
690 /* If there is a simple load or store to a memory reference in STMT, returns
691    the location of the memory reference, and sets IS_STORE according to whether
692    it is a store or load.  Otherwise, returns NULL.  */
693 
694 static tree *
simple_mem_ref_in_stmt(gimple * stmt,bool * is_store)695 simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
696 {
697   tree *lhs, *rhs;
698 
699   /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns.  */
700   if (!gimple_assign_single_p (stmt))
701     return NULL;
702 
703   lhs = gimple_assign_lhs_ptr (stmt);
704   rhs = gimple_assign_rhs1_ptr (stmt);
705 
706   if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
707     {
708       *is_store = false;
709       return rhs;
710     }
711   else if (gimple_vdef (stmt)
712              && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
713     {
714       *is_store = true;
715       return lhs;
716     }
717   else
718     return NULL;
719 }
720 
721 /* From a controlling predicate in DOM determine the arguments from
722    the PHI node PHI that are chosen if the predicate evaluates to
723    true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
724    they are non-NULL.  Returns true if the arguments can be determined,
725    else return false.  */
726 
727 static bool
extract_true_false_args_from_phi(basic_block dom,gphi * phi,tree * true_arg_p,tree * false_arg_p)728 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
729                                           tree *true_arg_p, tree *false_arg_p)
730 {
731   edge te, fe;
732   if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
733                                                        &te, &fe))
734     return false;
735 
736   if (true_arg_p)
737     *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
738   if (false_arg_p)
739     *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
740 
741   return true;
742 }
743 
744 /* Determine the outermost loop to that it is possible to hoist a statement
745    STMT and store it to LIM_DATA (STMT)->max_loop.  To do this we determine
746    the outermost loop in that the value computed by STMT is invariant.
747    If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
748    we preserve the fact whether STMT is executed.  It also fills other related
749    information to LIM_DATA (STMT).
750 
751    The function returns false if STMT cannot be hoisted outside of the loop it
752    is defined in, and true otherwise.  */
753 
754 static bool
determine_max_movement(gimple * stmt,bool must_preserve_exec)755 determine_max_movement (gimple *stmt, bool must_preserve_exec)
756 {
757   basic_block bb = gimple_bb (stmt);
758   class loop *loop = bb->loop_father;
759   class loop *level;
760   struct lim_aux_data *lim_data = get_lim_data (stmt);
761   tree val;
762   ssa_op_iter iter;
763 
764   if (must_preserve_exec)
765     level = ALWAYS_EXECUTED_IN (bb);
766   else
767     level = superloop_at_depth (loop, 1);
768   lim_data->max_loop = get_coldest_out_loop (level, loop, bb);
769   if (!lim_data->max_loop)
770     return false;
771 
772   if (gphi *phi = dyn_cast <gphi *> (stmt))
773     {
774       use_operand_p use_p;
775       unsigned min_cost = UINT_MAX;
776       unsigned total_cost = 0;
777       struct lim_aux_data *def_data;
778 
779       /* We will end up promoting dependencies to be unconditionally
780            evaluated.  For this reason the PHI cost (and thus the
781            cost we remove from the loop by doing the invariant motion)
782            is that of the cheapest PHI argument dependency chain.  */
783       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
784           {
785             val = USE_FROM_PTR (use_p);
786 
787             if (TREE_CODE (val) != SSA_NAME)
788               {
789                 /* Assign const 1 to constants.  */
790                 min_cost = MIN (min_cost, 1);
791                 total_cost += 1;
792                 continue;
793               }
794             if (!add_dependency (val, lim_data, loop, false))
795               return false;
796 
797             gimple *def_stmt = SSA_NAME_DEF_STMT (val);
798             if (gimple_bb (def_stmt)
799                 && gimple_bb (def_stmt)->loop_father == loop)
800               {
801                 def_data = get_lim_data (def_stmt);
802                 if (def_data)
803                     {
804                       min_cost = MIN (min_cost, def_data->cost);
805                       total_cost += def_data->cost;
806                     }
807               }
808           }
809 
810       min_cost = MIN (min_cost, total_cost);
811       lim_data->cost += min_cost;
812 
813       if (gimple_phi_num_args (phi) > 1)
814           {
815             basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
816             gimple *cond;
817             if (gsi_end_p (gsi_last_bb (dom)))
818               return false;
819             cond = gsi_stmt (gsi_last_bb (dom));
820             if (gimple_code (cond) != GIMPLE_COND)
821               return false;
822             /* Verify that this is an extended form of a diamond and
823                the PHI arguments are completely controlled by the
824                predicate in DOM.  */
825             if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
826               return false;
827 
828             /* Fold in dependencies and cost of the condition.  */
829             FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
830               {
831                 if (!add_dependency (val, lim_data, loop, false))
832                     return false;
833                 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
834                 if (def_data)
835                     lim_data->cost += def_data->cost;
836               }
837 
838             /* We want to avoid unconditionally executing very expensive
839                operations.  As costs for our dependencies cannot be
840                negative just claim we are not invariand for this case.
841                We also are not sure whether the control-flow inside the
842                loop will vanish.  */
843             if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
844                 && !(min_cost != 0
845                        && total_cost / min_cost <= 2))
846               return false;
847 
848             /* Assume that the control-flow in the loop will vanish.
849                ???  We should verify this and not artificially increase
850                the cost if that is not the case.  */
851             lim_data->cost += stmt_cost (stmt);
852           }
853 
854       return true;
855     }
856   else
857     FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
858       if (!add_dependency (val, lim_data, loop, true))
859           return false;
860 
861   if (gimple_vuse (stmt))
862     {
863       im_mem_ref *ref
864           = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
865       if (ref
866             && MEM_ANALYZABLE (ref))
867           {
868             lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
869                                                                  loop, ref);
870             if (!lim_data->max_loop)
871               return false;
872           }
873       else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
874           return false;
875     }
876 
877   lim_data->cost += stmt_cost (stmt);
878 
879   return true;
880 }
881 
882 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
883    and that one of the operands of this statement is computed by STMT.
884    Ensure that STMT (together with all the statements that define its
885    operands) is hoisted at least out of the loop LEVEL.  */
886 
887 static void
set_level(gimple * stmt,class loop * orig_loop,class loop * level)888 set_level (gimple *stmt, class loop *orig_loop, class loop *level)
889 {
890   class loop *stmt_loop = gimple_bb (stmt)->loop_father;
891   struct lim_aux_data *lim_data;
892   gimple *dep_stmt;
893   unsigned i;
894 
895   stmt_loop = find_common_loop (orig_loop, stmt_loop);
896   lim_data = get_lim_data (stmt);
897   if (lim_data != NULL && lim_data->tgt_loop != NULL)
898     stmt_loop = find_common_loop (stmt_loop,
899                                           loop_outer (lim_data->tgt_loop));
900   if (flow_loop_nested_p (stmt_loop, level))
901     return;
902 
903   gcc_assert (level == lim_data->max_loop
904                 || flow_loop_nested_p (lim_data->max_loop, level));
905 
906   lim_data->tgt_loop = level;
907   FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
908     set_level (dep_stmt, orig_loop, level);
909 }
910 
911 /* Determines an outermost loop from that we want to hoist the statement STMT.
912    For now we chose the outermost possible loop.  TODO -- use profiling
913    information to set it more sanely.  */
914 
915 static void
set_profitable_level(gimple * stmt)916 set_profitable_level (gimple *stmt)
917 {
918   set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
919 }
920 
921 /* Returns true if STMT is a call that has side effects.  */
922 
923 static bool
nonpure_call_p(gimple * stmt)924 nonpure_call_p (gimple *stmt)
925 {
926   if (gimple_code (stmt) != GIMPLE_CALL)
927     return false;
928 
929   return gimple_has_side_effects (stmt);
930 }
931 
932 /* Rewrite a/b to a*(1/b).  Return the invariant stmt to process.  */
933 
934 static gimple *
rewrite_reciprocal(gimple_stmt_iterator * bsi)935 rewrite_reciprocal (gimple_stmt_iterator *bsi)
936 {
937   gassign *stmt, *stmt1, *stmt2;
938   tree name, lhs, type;
939   tree real_one;
940   gimple_stmt_iterator gsi;
941 
942   stmt = as_a <gassign *> (gsi_stmt (*bsi));
943   lhs = gimple_assign_lhs (stmt);
944   type = TREE_TYPE (lhs);
945 
946   real_one = build_one_cst (type);
947 
948   name = make_temp_ssa_name (type, NULL, "reciptmp");
949   stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
950                                      gimple_assign_rhs2 (stmt));
951   stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
952                                      gimple_assign_rhs1 (stmt));
953 
954   /* Replace division stmt with reciprocal and multiply stmts.
955      The multiply stmt is not invariant, so update iterator
956      and avoid rescanning.  */
957   gsi = *bsi;
958   gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
959   gsi_replace (&gsi, stmt2, true);
960 
961   /* Continue processing with invariant reciprocal statement.  */
962   return stmt1;
963 }
964 
965 /* Check if the pattern at *BSI is a bittest of the form
966    (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0.  */
967 
968 static gimple *
rewrite_bittest(gimple_stmt_iterator * bsi)969 rewrite_bittest (gimple_stmt_iterator *bsi)
970 {
971   gassign *stmt;
972   gimple *stmt1;
973   gassign *stmt2;
974   gimple *use_stmt;
975   gcond *cond_stmt;
976   tree lhs, name, t, a, b;
977   use_operand_p use;
978 
979   stmt = as_a <gassign *> (gsi_stmt (*bsi));
980   lhs = gimple_assign_lhs (stmt);
981 
982   /* Verify that the single use of lhs is a comparison against zero.  */
983   if (TREE_CODE (lhs) != SSA_NAME
984       || !single_imm_use (lhs, &use, &use_stmt))
985     return stmt;
986   cond_stmt = dyn_cast <gcond *> (use_stmt);
987   if (!cond_stmt)
988     return stmt;
989   if (gimple_cond_lhs (cond_stmt) != lhs
990       || (gimple_cond_code (cond_stmt) != NE_EXPR
991             && gimple_cond_code (cond_stmt) != EQ_EXPR)
992       || !integer_zerop (gimple_cond_rhs (cond_stmt)))
993     return stmt;
994 
995   /* Get at the operands of the shift.  The rhs is TMP1 & 1.  */
996   stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
997   if (gimple_code (stmt1) != GIMPLE_ASSIGN)
998     return stmt;
999 
1000   /* There is a conversion in between possibly inserted by fold.  */
1001   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
1002     {
1003       t = gimple_assign_rhs1 (stmt1);
1004       if (TREE_CODE (t) != SSA_NAME
1005             || !has_single_use (t))
1006           return stmt;
1007       stmt1 = SSA_NAME_DEF_STMT (t);
1008       if (gimple_code (stmt1) != GIMPLE_ASSIGN)
1009           return stmt;
1010     }
1011 
1012   /* Verify that B is loop invariant but A is not.  Verify that with
1013      all the stmt walking we are still in the same loop.  */
1014   if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
1015       || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
1016     return stmt;
1017 
1018   a = gimple_assign_rhs1 (stmt1);
1019   b = gimple_assign_rhs2 (stmt1);
1020 
1021   if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
1022       && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
1023     {
1024       gimple_stmt_iterator rsi;
1025 
1026       /* 1 << B */
1027       t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
1028                            build_int_cst (TREE_TYPE (a), 1), b);
1029       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1030       stmt1 = gimple_build_assign (name, t);
1031 
1032       /* A & (1 << B) */
1033       t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
1034       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1035       stmt2 = gimple_build_assign (name, t);
1036 
1037       /* Replace the SSA_NAME we compare against zero.  Adjust
1038            the type of zero accordingly.  */
1039       SET_USE (use, name);
1040       gimple_cond_set_rhs (cond_stmt,
1041                                  build_int_cst_type (TREE_TYPE (name),
1042                                                          0));
1043 
1044       /* Don't use gsi_replace here, none of the new assignments sets
1045            the variable originally set in stmt.  Move bsi to stmt1, and
1046            then remove the original stmt, so that we get a chance to
1047            retain debug info for it.  */
1048       rsi = *bsi;
1049       gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1050       gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
1051       gimple *to_release = gsi_stmt (rsi);
1052       gsi_remove (&rsi, true);
1053       release_defs (to_release);
1054 
1055       return stmt1;
1056     }
1057 
1058   return stmt;
1059 }
1060 
1061 /* Determine the outermost loops in that statements in basic block BB are
1062    invariant, and record them to the LIM_DATA associated with the
1063    statements.  */
1064 
1065 static void
compute_invariantness(basic_block bb)1066 compute_invariantness (basic_block bb)
1067 {
1068   enum move_pos pos;
1069   gimple_stmt_iterator bsi;
1070   gimple *stmt;
1071   bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1072   class loop *outermost = ALWAYS_EXECUTED_IN (bb);
1073   struct lim_aux_data *lim_data;
1074 
1075   if (!loop_outer (bb->loop_father))
1076     return;
1077 
1078   if (dump_file && (dump_flags & TDF_DETAILS))
1079     fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1080                bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1081 
1082   /* Look at PHI nodes, but only if there is at most two.
1083      ???  We could relax this further by post-processing the inserted
1084      code and transforming adjacent cond-exprs with the same predicate
1085      to control flow again.  */
1086   bsi = gsi_start_phis (bb);
1087   if (!gsi_end_p (bsi)
1088       && ((gsi_next (&bsi), gsi_end_p (bsi))
1089             || (gsi_next (&bsi), gsi_end_p (bsi))))
1090     for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1091       {
1092           stmt = gsi_stmt (bsi);
1093 
1094           pos = movement_possibility (stmt);
1095           if (pos == MOVE_IMPOSSIBLE)
1096             continue;
1097 
1098           lim_data = get_lim_data (stmt);
1099           if (! lim_data)
1100             lim_data = init_lim_data (stmt);
1101           lim_data->always_executed_in = outermost;
1102 
1103           if (!determine_max_movement (stmt, false))
1104             {
1105               lim_data->max_loop = NULL;
1106               continue;
1107             }
1108 
1109           if (dump_file && (dump_flags & TDF_DETAILS))
1110             {
1111               print_gimple_stmt (dump_file, stmt, 2);
1112               fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1113                          loop_depth (lim_data->max_loop),
1114                          lim_data->cost);
1115             }
1116 
1117           if (lim_data->cost >= LIM_EXPENSIVE)
1118             set_profitable_level (stmt);
1119       }
1120 
1121   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1122     {
1123       stmt = gsi_stmt (bsi);
1124 
1125       pos = movement_possibility (stmt);
1126       if (pos == MOVE_IMPOSSIBLE)
1127           {
1128             if (nonpure_call_p (stmt))
1129               {
1130                 maybe_never = true;
1131                 outermost = NULL;
1132               }
1133             /* Make sure to note always_executed_in for stores to make
1134                store-motion work.  */
1135             else if (stmt_makes_single_store (stmt))
1136               {
1137                 struct lim_aux_data *lim_data = get_lim_data (stmt);
1138                 if (! lim_data)
1139                     lim_data = init_lim_data (stmt);
1140                 lim_data->always_executed_in = outermost;
1141               }
1142             continue;
1143           }
1144 
1145       if (is_gimple_assign (stmt)
1146             && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1147                 == GIMPLE_BINARY_RHS))
1148           {
1149             tree op0 = gimple_assign_rhs1 (stmt);
1150             tree op1 = gimple_assign_rhs2 (stmt);
1151             class loop *ol1 = outermost_invariant_loop (op1,
1152                                                   loop_containing_stmt (stmt));
1153 
1154             /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1155                to be hoisted out of loop, saving expensive divide.  */
1156             if (pos == MOVE_POSSIBLE
1157                 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1158                 && flag_unsafe_math_optimizations
1159                 && !flag_trapping_math
1160                 && ol1 != NULL
1161                 && outermost_invariant_loop (op0, ol1) == NULL)
1162               stmt = rewrite_reciprocal (&bsi);
1163 
1164             /* If the shift count is invariant, convert (A >> B) & 1 to
1165                A & (1 << B) allowing the bit mask to be hoisted out of the loop
1166                saving an expensive shift.  */
1167             if (pos == MOVE_POSSIBLE
1168                 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1169                 && integer_onep (op1)
1170                 && TREE_CODE (op0) == SSA_NAME
1171                 && has_single_use (op0))
1172               stmt = rewrite_bittest (&bsi);
1173           }
1174 
1175       lim_data = get_lim_data (stmt);
1176       if (! lim_data)
1177           lim_data = init_lim_data (stmt);
1178       lim_data->always_executed_in = outermost;
1179 
1180       if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1181           continue;
1182 
1183       if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1184           {
1185             lim_data->max_loop = NULL;
1186             continue;
1187           }
1188 
1189       if (dump_file && (dump_flags & TDF_DETAILS))
1190           {
1191             print_gimple_stmt (dump_file, stmt, 2);
1192             fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1193                        loop_depth (lim_data->max_loop),
1194                        lim_data->cost);
1195           }
1196 
1197       if (lim_data->cost >= LIM_EXPENSIVE)
1198           set_profitable_level (stmt);
1199     }
1200 }
1201 
1202 /* Hoist the statements in basic block BB out of the loops prescribed by
1203    data stored in LIM_DATA structures associated with each statement.  Callback
1204    for walk_dominator_tree.  */
1205 
1206 unsigned int
move_computations_worker(basic_block bb)1207 move_computations_worker (basic_block bb)
1208 {
1209   class loop *level;
1210   unsigned cost = 0;
1211   struct lim_aux_data *lim_data;
1212   unsigned int todo = 0;
1213 
1214   if (!loop_outer (bb->loop_father))
1215     return todo;
1216 
1217   for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1218     {
1219       gassign *new_stmt;
1220       gphi *stmt = bsi.phi ();
1221 
1222       lim_data = get_lim_data (stmt);
1223       if (lim_data == NULL)
1224           {
1225             gsi_next (&bsi);
1226             continue;
1227           }
1228 
1229       cost = lim_data->cost;
1230       level = lim_data->tgt_loop;
1231       clear_lim_data (stmt);
1232 
1233       if (!level)
1234           {
1235             gsi_next (&bsi);
1236             continue;
1237           }
1238 
1239       if (dump_file && (dump_flags & TDF_DETAILS))
1240           {
1241             fprintf (dump_file, "Moving PHI node\n");
1242             print_gimple_stmt (dump_file, stmt, 0);
1243             fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1244                        cost, level->num);
1245           }
1246 
1247       if (gimple_phi_num_args (stmt) == 1)
1248           {
1249             tree arg = PHI_ARG_DEF (stmt, 0);
1250             new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1251                                                     TREE_CODE (arg), arg);
1252           }
1253       else
1254           {
1255             basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1256             gimple *cond = gsi_stmt (gsi_last_bb (dom));
1257             tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1258             /* Get the PHI arguments corresponding to the true and false
1259                edges of COND.  */
1260             extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1261             gcc_assert (arg0 && arg1);
1262             t = build2 (gimple_cond_code (cond), boolean_type_node,
1263                           gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1264             new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1265                                                     COND_EXPR, t, arg0, arg1);
1266             todo |= TODO_cleanup_cfg;
1267           }
1268       if (!ALWAYS_EXECUTED_IN (bb)
1269             || (ALWAYS_EXECUTED_IN (bb) != level
1270                 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))
1271           reset_flow_sensitive_info (gimple_assign_lhs (new_stmt));
1272       gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1273       remove_phi_node (&bsi, false);
1274     }
1275 
1276   for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1277     {
1278       edge e;
1279 
1280       gimple *stmt = gsi_stmt (bsi);
1281 
1282       lim_data = get_lim_data (stmt);
1283       if (lim_data == NULL)
1284           {
1285             gsi_next (&bsi);
1286             continue;
1287           }
1288 
1289       cost = lim_data->cost;
1290       level = lim_data->tgt_loop;
1291       clear_lim_data (stmt);
1292 
1293       if (!level)
1294           {
1295             gsi_next (&bsi);
1296             continue;
1297           }
1298 
1299       /* We do not really want to move conditionals out of the loop; we just
1300            placed it here to force its operands to be moved if necessary.  */
1301       if (gimple_code (stmt) == GIMPLE_COND)
1302           {
1303             gsi_next (&bsi);
1304             continue;
1305           }
1306 
1307       if (dump_file && (dump_flags & TDF_DETAILS))
1308           {
1309             fprintf (dump_file, "Moving statement\n");
1310             print_gimple_stmt (dump_file, stmt, 0);
1311             fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1312                        cost, level->num);
1313           }
1314 
1315       e = loop_preheader_edge (level);
1316       gcc_assert (!gimple_vdef (stmt));
1317       if (gimple_vuse (stmt))
1318           {
1319             /* The new VUSE is the one from the virtual PHI in the loop
1320                header or the one already present.  */
1321             gphi_iterator gsi2;
1322             for (gsi2 = gsi_start_phis (e->dest);
1323                  !gsi_end_p (gsi2); gsi_next (&gsi2))
1324               {
1325                 gphi *phi = gsi2.phi ();
1326                 if (virtual_operand_p (gimple_phi_result (phi)))
1327                     {
1328                       SET_USE (gimple_vuse_op (stmt),
1329                                  PHI_ARG_DEF_FROM_EDGE (phi, e));
1330                       break;
1331                     }
1332               }
1333           }
1334       gsi_remove (&bsi, false);
1335       if (gimple_has_lhs (stmt)
1336             && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1337             && (!ALWAYS_EXECUTED_IN (bb)
1338                 || !(ALWAYS_EXECUTED_IN (bb) == level
1339                        || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1340           reset_flow_sensitive_info (gimple_get_lhs (stmt));
1341       /* In case this is a stmt that is not unconditionally executed
1342          when the target loop header is executed and the stmt may
1343            invoke undefined integer or pointer overflow rewrite it to
1344            unsigned arithmetic.  */
1345       if (is_gimple_assign (stmt)
1346             && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1347             && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1348             && arith_code_with_undefined_signed_overflow
1349                  (gimple_assign_rhs_code (stmt))
1350             && (!ALWAYS_EXECUTED_IN (bb)
1351                 || !(ALWAYS_EXECUTED_IN (bb) == level
1352                        || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1353           gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1354       else
1355           gsi_insert_on_edge (e, stmt);
1356     }
1357 
1358   return todo;
1359 }
1360 
1361 /* Checks whether the statement defining variable *INDEX can be hoisted
1362    out of the loop passed in DATA.  Callback for for_each_index.  */
1363 
1364 static bool
may_move_till(tree ref,tree * index,void * data)1365 may_move_till (tree ref, tree *index, void *data)
1366 {
1367   class loop *loop = (class loop *) data, *max_loop;
1368 
1369   /* If REF is an array reference, check also that the step and the lower
1370      bound is invariant in LOOP.  */
1371   if (TREE_CODE (ref) == ARRAY_REF)
1372     {
1373       tree step = TREE_OPERAND (ref, 3);
1374       tree lbound = TREE_OPERAND (ref, 2);
1375 
1376       max_loop = outermost_invariant_loop (step, loop);
1377       if (!max_loop)
1378           return false;
1379 
1380       max_loop = outermost_invariant_loop (lbound, loop);
1381       if (!max_loop)
1382           return false;
1383     }
1384 
1385   max_loop = outermost_invariant_loop (*index, loop);
1386   if (!max_loop)
1387     return false;
1388 
1389   return true;
1390 }
1391 
1392 /* If OP is SSA NAME, force the statement that defines it to be
1393    moved out of the LOOP.  ORIG_LOOP is the loop in that EXPR is used.  */
1394 
1395 static void
force_move_till_op(tree op,class loop * orig_loop,class loop * loop)1396 force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
1397 {
1398   gimple *stmt;
1399 
1400   if (!op
1401       || is_gimple_min_invariant (op))
1402     return;
1403 
1404   gcc_assert (TREE_CODE (op) == SSA_NAME);
1405 
1406   stmt = SSA_NAME_DEF_STMT (op);
1407   if (gimple_nop_p (stmt))
1408     return;
1409 
1410   set_level (stmt, orig_loop, loop);
1411 }
1412 
1413 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1414    the LOOP.  The reference REF is used in the loop ORIG_LOOP.  Callback for
1415    for_each_index.  */
1416 
1417 struct fmt_data
1418 {
1419   class loop *loop;
1420   class loop *orig_loop;
1421 };
1422 
1423 static bool
force_move_till(tree ref,tree * index,void * data)1424 force_move_till (tree ref, tree *index, void *data)
1425 {
1426   struct fmt_data *fmt_data = (struct fmt_data *) data;
1427 
1428   if (TREE_CODE (ref) == ARRAY_REF)
1429     {
1430       tree step = TREE_OPERAND (ref, 3);
1431       tree lbound = TREE_OPERAND (ref, 2);
1432 
1433       force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1434       force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1435     }
1436 
1437   force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1438 
1439   return true;
1440 }
1441 
1442 /* A function to free the mem_ref object OBJ.  */
1443 
1444 static void
memref_free(class im_mem_ref * mem)1445 memref_free (class im_mem_ref *mem)
1446 {
1447   mem->accesses_in_loop.release ();
1448 }
1449 
1450 /* Allocates and returns a memory reference description for MEM whose hash
1451    value is HASH and id is ID.  */
1452 
1453 static im_mem_ref *
mem_ref_alloc(ao_ref * mem,unsigned hash,unsigned id)1454 mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1455 {
1456   im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1457   if (mem)
1458     ref->mem = *mem;
1459   else
1460     ao_ref_init (&ref->mem, error_mark_node);
1461   ref->id = id;
1462   ref->ref_canonical = false;
1463   ref->ref_decomposed = false;
1464   ref->hash = hash;
1465   ref->stored = NULL;
1466   ref->loaded = NULL;
1467   bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1468   ref->accesses_in_loop.create (1);
1469 
1470   return ref;
1471 }
1472 
1473 /* Records memory reference location *LOC in LOOP to the memory reference
1474    description REF.  The reference occurs in statement STMT.  */
1475 
1476 static void
record_mem_ref_loc(im_mem_ref * ref,gimple * stmt,tree * loc)1477 record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1478 {
1479   mem_ref_loc aref;
1480   aref.stmt = stmt;
1481   aref.ref = loc;
1482   ref->accesses_in_loop.safe_push (aref);
1483 }
1484 
1485 /* Set the LOOP bit in REF stored bitmap and allocate that if
1486    necessary.  Return whether a bit was changed.  */
1487 
1488 static bool
set_ref_stored_in_loop(im_mem_ref * ref,class loop * loop)1489 set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
1490 {
1491   if (!ref->stored)
1492     ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1493   return bitmap_set_bit (ref->stored, loop->num);
1494 }
1495 
1496 /* Marks reference REF as stored in LOOP.  */
1497 
1498 static void
mark_ref_stored(im_mem_ref * ref,class loop * loop)1499 mark_ref_stored (im_mem_ref *ref, class loop *loop)
1500 {
1501   while (loop != current_loops->tree_root
1502            && set_ref_stored_in_loop (ref, loop))
1503     loop = loop_outer (loop);
1504 }
1505 
1506 /* Set the LOOP bit in REF loaded bitmap and allocate that if
1507    necessary.  Return whether a bit was changed.  */
1508 
1509 static bool
set_ref_loaded_in_loop(im_mem_ref * ref,class loop * loop)1510 set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
1511 {
1512   if (!ref->loaded)
1513     ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
1514   return bitmap_set_bit (ref->loaded, loop->num);
1515 }
1516 
1517 /* Marks reference REF as loaded in LOOP.  */
1518 
1519 static void
mark_ref_loaded(im_mem_ref * ref,class loop * loop)1520 mark_ref_loaded (im_mem_ref *ref, class loop *loop)
1521 {
1522   while (loop != current_loops->tree_root
1523            && set_ref_loaded_in_loop (ref, loop))
1524     loop = loop_outer (loop);
1525 }
1526 
1527 /* Gathers memory references in statement STMT in LOOP, storing the
1528    information about them in the memory_accesses structure.  Marks
1529    the vops accessed through unrecognized statements there as
1530    well.  */
1531 
1532 static void
gather_mem_refs_stmt(class loop * loop,gimple * stmt)1533 gather_mem_refs_stmt (class loop *loop, gimple *stmt)
1534 {
1535   tree *mem = NULL;
1536   hashval_t hash;
1537   im_mem_ref **slot;
1538   im_mem_ref *ref;
1539   bool is_stored;
1540   unsigned id;
1541 
1542   if (!gimple_vuse (stmt))
1543     return;
1544 
1545   mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1546   if (!mem && is_gimple_assign (stmt))
1547     {
1548       /* For aggregate copies record distinct references but use them
1549            only for disambiguation purposes.  */
1550       id = memory_accesses.refs_list.length ();
1551       ref = mem_ref_alloc (NULL, 0, id);
1552       memory_accesses.refs_list.safe_push (ref);
1553       if (dump_file && (dump_flags & TDF_DETAILS))
1554           {
1555             fprintf (dump_file, "Unhandled memory reference %u: ", id);
1556             print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1557           }
1558       record_mem_ref_loc (ref, stmt, mem);
1559       is_stored = gimple_vdef (stmt);
1560     }
1561   else if (!mem)
1562     {
1563       /* We use the shared mem_ref for all unanalyzable refs.  */
1564       id = UNANALYZABLE_MEM_ID;
1565       ref = memory_accesses.refs_list[id];
1566       if (dump_file && (dump_flags & TDF_DETAILS))
1567           {
1568             fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1569             print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1570           }
1571       is_stored = gimple_vdef (stmt);
1572     }
1573   else
1574     {
1575       /* We are looking for equal refs that might differ in structure
1576          such as a.b vs. MEM[&a + 4].  So we key off the ao_ref but
1577            make sure we can canonicalize the ref in the hashtable if
1578            non-operand_equal_p refs are found.  For the lookup we mark
1579            the case we want strict equality with aor.max_size == -1.  */
1580       ao_ref aor;
1581       ao_ref_init (&aor, *mem);
1582       ao_ref_base (&aor);
1583       ao_ref_alias_set (&aor);
1584       HOST_WIDE_INT offset, size, max_size;
1585       poly_int64 saved_maxsize = aor.max_size, mem_off;
1586       tree mem_base;
1587       bool ref_decomposed;
1588       if (aor.max_size_known_p ()
1589             && aor.offset.is_constant (&offset)
1590             && aor.size.is_constant (&size)
1591             && aor.max_size.is_constant (&max_size)
1592             && size == max_size
1593             && (size % BITS_PER_UNIT) == 0
1594             /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1595                size.  Make sure this is consistent with the extraction.  */
1596             && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1597             && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1598                            aor.size)
1599             && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1600           {
1601             ref_decomposed = true;
1602             tree base = ao_ref_base (&aor);
1603             poly_int64 moffset;
1604             HOST_WIDE_INT mcoffset;
1605             if (TREE_CODE (base) == MEM_REF
1606                 && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset)
1607                 && moffset.is_constant (&mcoffset))
1608               {
1609                 hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0);
1610                 hash = iterative_hash_host_wide_int (mcoffset, hash);
1611               }
1612             else
1613               {
1614                 hash = iterative_hash_expr (base, 0);
1615                 hash = iterative_hash_host_wide_int (offset, hash);
1616               }
1617             hash = iterative_hash_host_wide_int (size, hash);
1618           }
1619       else
1620           {
1621             ref_decomposed = false;
1622             hash = iterative_hash_expr (aor.ref, 0);
1623             aor.max_size = -1;
1624           }
1625       slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1626       aor.max_size = saved_maxsize;
1627       if (*slot)
1628           {
1629             if (!(*slot)->ref_canonical
1630                 && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1631               {
1632                 /* If we didn't yet canonicalize the hashtable ref (which
1633                    we'll end up using for code insertion) and hit a second
1634                      equal ref that is not structurally equivalent create
1635                      a canonical ref which is a bare MEM_REF.  */
1636                 if (TREE_CODE (*mem) == MEM_REF
1637                       || TREE_CODE (*mem) == TARGET_MEM_REF)
1638                     {
1639                       (*slot)->mem.ref = *mem;
1640                       (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1641                     }
1642                 else
1643                     {
1644                       tree ref_alias_type = reference_alias_ptr_type (*mem);
1645                       unsigned int ref_align = get_object_alignment (*mem);
1646                       tree ref_type = TREE_TYPE (*mem);
1647                       tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1648                                              unshare_expr (mem_base));
1649                       if (TYPE_ALIGN (ref_type) != ref_align)
1650                         ref_type = build_aligned_type (ref_type, ref_align);
1651                       tree new_ref
1652                         = fold_build2 (MEM_REF, ref_type, tmp,
1653                                            build_int_cst (ref_alias_type, mem_off));
1654                       if ((*slot)->mem.volatile_p)
1655                         TREE_THIS_VOLATILE (new_ref) = 1;
1656                       (*slot)->mem.ref = new_ref;
1657                       /* Make sure the recorded base and offset are consistent
1658                          with the newly built ref.  */
1659                       if (TREE_CODE (TREE_OPERAND (new_ref, 0)) == ADDR_EXPR)
1660                         ;
1661                       else
1662                         {
1663                           (*slot)->mem.base = new_ref;
1664                           (*slot)->mem.offset = 0;
1665                         }
1666                       gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1667                                                && is_gimple_mem_ref_addr
1668                                                     (TREE_OPERAND ((*slot)->mem.ref,
1669                                                                          0)));
1670                       (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1671                     }
1672                 (*slot)->ref_canonical = true;
1673               }
1674             ref = *slot;
1675             id = ref->id;
1676           }
1677       else
1678           {
1679             id = memory_accesses.refs_list.length ();
1680             ref = mem_ref_alloc (&aor, hash, id);
1681             ref->ref_decomposed = ref_decomposed;
1682             memory_accesses.refs_list.safe_push (ref);
1683             *slot = ref;
1684 
1685             if (dump_file && (dump_flags & TDF_DETAILS))
1686               {
1687                 fprintf (dump_file, "Memory reference %u: ", id);
1688                 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1689                 fprintf (dump_file, "\n");
1690               }
1691           }
1692 
1693       record_mem_ref_loc (ref, stmt, mem);
1694     }
1695   if (is_stored)
1696     {
1697       bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1698       mark_ref_stored (ref, loop);
1699     }
1700   /* A not simple memory op is also a read when it is a write.  */
1701   if (!is_stored || id == UNANALYZABLE_MEM_ID
1702       || ref->mem.ref == error_mark_node)
1703     {
1704       bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
1705       mark_ref_loaded (ref, loop);
1706     }
1707   init_lim_data (stmt)->ref = ref->id;
1708   return;
1709 }
1710 
1711 static unsigned *bb_loop_postorder;
1712 
1713 /* qsort sort function to sort blocks after their loop fathers postorder.  */
1714 
1715 static int
sort_bbs_in_loop_postorder_cmp(const void * bb1_,const void * bb2_,void * bb_loop_postorder_)1716 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1717                                         void *bb_loop_postorder_)
1718 {
1719   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1720   basic_block bb1 = *(const basic_block *)bb1_;
1721   basic_block bb2 = *(const basic_block *)bb2_;
1722   class loop *loop1 = bb1->loop_father;
1723   class loop *loop2 = bb2->loop_father;
1724   if (loop1->num == loop2->num)
1725     return bb1->index - bb2->index;
1726   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1727 }
1728 
1729 /* qsort sort function to sort ref locs after their loop fathers postorder.  */
1730 
1731 static int
sort_locs_in_loop_postorder_cmp(const void * loc1_,const void * loc2_,void * bb_loop_postorder_)1732 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1733                                          void *bb_loop_postorder_)
1734 {
1735   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1736   const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1737   const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1738   class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1739   class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1740   if (loop1->num == loop2->num)
1741     return 0;
1742   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1743 }
1744 
1745 /* Gathers memory references in loops.  */
1746 
1747 static void
analyze_memory_references(bool store_motion)1748 analyze_memory_references (bool store_motion)
1749 {
1750   gimple_stmt_iterator bsi;
1751   basic_block bb, *bbs;
1752   class loop *outer;
1753   unsigned i, n;
1754 
1755   /* Collect all basic-blocks in loops and sort them after their
1756      loops postorder.  */
1757   i = 0;
1758   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1759   FOR_EACH_BB_FN (bb, cfun)
1760     if (bb->loop_father != current_loops->tree_root)
1761       bbs[i++] = bb;
1762   n = i;
1763   gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1764                 bb_loop_postorder);
1765 
1766   /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1767      That results in better locality for all the bitmaps.  It also
1768      automatically sorts the location list of gathered memory references
1769      after their loop postorder number allowing to binary-search it.  */
1770   for (i = 0; i < n; ++i)
1771     {
1772       basic_block bb = bbs[i];
1773       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1774         gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1775     }
1776 
1777   /* Verify the list of gathered memory references is sorted after their
1778      loop postorder number.  */
1779   if (flag_checking)
1780     {
1781       im_mem_ref *ref;
1782       FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1783           for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
1784             gcc_assert (sort_locs_in_loop_postorder_cmp
1785                               (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
1786                                bb_loop_postorder) <= 0);
1787     }
1788 
1789   free (bbs);
1790 
1791   if (!store_motion)
1792     return;
1793 
1794   /* Propagate the information about accessed memory references up
1795      the loop hierarchy.  */
1796   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1797     {
1798       /* Finalize the overall touched references (including subloops).  */
1799       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1800                            &memory_accesses.refs_stored_in_loop[loop->num]);
1801 
1802       /* Propagate the information about accessed memory references up
1803            the loop hierarchy.  */
1804       outer = loop_outer (loop);
1805       if (outer == current_loops->tree_root)
1806           continue;
1807 
1808       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1809                            &memory_accesses.all_refs_stored_in_loop[loop->num]);
1810     }
1811 }
1812 
1813 /* Returns true if MEM1 and MEM2 may alias.  TTAE_CACHE is used as a cache in
1814    tree_to_aff_combination_expand.  */
1815 
1816 static bool
mem_refs_may_alias_p(im_mem_ref * mem1,im_mem_ref * mem2,hash_map<tree,name_expansion * > ** ttae_cache,bool tbaa_p)1817 mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1818                           hash_map<tree, name_expansion *> **ttae_cache,
1819                           bool tbaa_p)
1820 {
1821   gcc_checking_assert (mem1->mem.ref != error_mark_node
1822                            && mem2->mem.ref != error_mark_node);
1823 
1824   /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1825      object and their offset differ in such a way that the locations cannot
1826      overlap, then they cannot alias.  */
1827   poly_widest_int size1, size2;
1828   aff_tree off1, off2;
1829 
1830   /* Perform basic offset and type-based disambiguation.  */
1831   if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
1832     return false;
1833 
1834   /* The expansion of addresses may be a bit expensive, thus we only do
1835      the check at -O2 and higher optimization levels.  */
1836   if (optimize < 2)
1837     return true;
1838 
1839   get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1840   get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1841   aff_combination_expand (&off1, ttae_cache);
1842   aff_combination_expand (&off2, ttae_cache);
1843   aff_combination_scale (&off1, -1);
1844   aff_combination_add (&off2, &off1);
1845 
1846   if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1847     return false;
1848 
1849   return true;
1850 }
1851 
1852 /* Compare function for bsearch searching for reference locations
1853    in a loop.  */
1854 
1855 static int
find_ref_loc_in_loop_cmp(const void * loop_,const void * loc_,void * bb_loop_postorder_)1856 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1857                                 void *bb_loop_postorder_)
1858 {
1859   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1860   class loop *loop = (class loop *)const_cast<void *>(loop_);
1861   mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1862   class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1863   if (loop->num  == loc_loop->num
1864       || flow_loop_nested_p (loop, loc_loop))
1865     return 0;
1866   return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1867             ? -1 : 1);
1868 }
1869 
1870 /* Iterates over all locations of REF in LOOP and its subloops calling
1871    fn.operator() with the location as argument.  When that operator
1872    returns true the iteration is stopped and true is returned.
1873    Otherwise false is returned.  */
1874 
1875 template <typename FN>
1876 static bool
for_all_locs_in_loop(class loop * loop,im_mem_ref * ref,FN fn)1877 for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
1878 {
1879   unsigned i;
1880   mem_ref_loc *loc;
1881 
1882   /* Search for the cluster of locs in the accesses_in_loop vector
1883      which is sorted after postorder index of the loop father.  */
1884   loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
1885                                                bb_loop_postorder);
1886   if (!loc)
1887     return false;
1888 
1889   /* We have found one location inside loop or its sub-loops.  Iterate
1890      both forward and backward to cover the whole cluster.  */
1891   i = loc - ref->accesses_in_loop.address ();
1892   while (i > 0)
1893     {
1894       --i;
1895       mem_ref_loc *l = &ref->accesses_in_loop[i];
1896       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1897           break;
1898       if (fn (l))
1899           return true;
1900     }
1901   for (i = loc - ref->accesses_in_loop.address ();
1902        i < ref->accesses_in_loop.length (); ++i)
1903     {
1904       mem_ref_loc *l = &ref->accesses_in_loop[i];
1905       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1906           break;
1907       if (fn (l))
1908           return true;
1909     }
1910 
1911   return false;
1912 }
1913 
1914 /* Rewrites location LOC by TMP_VAR.  */
1915 
1916 class rewrite_mem_ref_loc
1917 {
1918 public:
rewrite_mem_ref_loc(tree tmp_var_)1919   rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1920   bool operator () (mem_ref_loc *loc);
1921   tree tmp_var;
1922 };
1923 
1924 bool
operator ()(mem_ref_loc * loc)1925 rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1926 {
1927   *loc->ref = tmp_var;
1928   update_stmt (loc->stmt);
1929   return false;
1930 }
1931 
1932 /* Rewrites all references to REF in LOOP by variable TMP_VAR.  */
1933 
1934 static void
rewrite_mem_refs(class loop * loop,im_mem_ref * ref,tree tmp_var)1935 rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
1936 {
1937   for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1938 }
1939 
1940 /* Stores the first reference location in LOCP.  */
1941 
1942 class first_mem_ref_loc_1
1943 {
1944 public:
first_mem_ref_loc_1(mem_ref_loc ** locp_)1945   first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1946   bool operator () (mem_ref_loc *loc);
1947   mem_ref_loc **locp;
1948 };
1949 
1950 bool
operator ()(mem_ref_loc * loc)1951 first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
1952 {
1953   *locp = loc;
1954   return true;
1955 }
1956 
1957 /* Returns the first reference location to REF in LOOP.  */
1958 
1959 static mem_ref_loc *
first_mem_ref_loc(class loop * loop,im_mem_ref * ref)1960 first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
1961 {
1962   mem_ref_loc *locp = NULL;
1963   for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1964   return locp;
1965 }
1966 
1967 /* Helper function for execute_sm.  Emit code to store TMP_VAR into
1968    MEM along edge EX.
1969 
1970    The store is only done if MEM has changed.  We do this so no
1971    changes to MEM occur on code paths that did not originally store
1972    into it.
1973 
1974    The common case for execute_sm will transform:
1975 
1976      for (...) {
1977        if (foo)
1978          stuff;
1979        else
1980          MEM = TMP_VAR;
1981      }
1982 
1983    into:
1984 
1985      lsm = MEM;
1986      for (...) {
1987        if (foo)
1988          stuff;
1989        else
1990          lsm = TMP_VAR;
1991      }
1992      MEM = lsm;
1993 
1994   This function will generate:
1995 
1996      lsm = MEM;
1997 
1998      lsm_flag = false;
1999      ...
2000      for (...) {
2001        if (foo)
2002          stuff;
2003        else {
2004          lsm = TMP_VAR;
2005          lsm_flag = true;
2006        }
2007      }
2008      if (lsm_flag)  <--
2009        MEM = lsm;   <-- (X)
2010 
2011   In case MEM and TMP_VAR are NULL the function will return the then
2012   block so the caller can insert (X) and other related stmts.
2013 */
2014 
2015 static basic_block
execute_sm_if_changed(edge ex,tree mem,tree tmp_var,tree flag,edge preheader,hash_set<basic_block> * flag_bbs,edge & append_cond_position,edge & last_cond_fallthru)2016 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
2017                            edge preheader, hash_set <basic_block> *flag_bbs,
2018                            edge &append_cond_position, edge &last_cond_fallthru)
2019 {
2020   basic_block new_bb, then_bb, old_dest;
2021   bool loop_has_only_one_exit;
2022   edge then_old_edge;
2023   gimple_stmt_iterator gsi;
2024   gimple *stmt;
2025   bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
2026 
2027   profile_count count_sum = profile_count::zero ();
2028   int nbbs = 0, ncount = 0;
2029   profile_probability flag_probability = profile_probability::uninitialized ();
2030 
2031   /* Flag is set in FLAG_BBS. Determine probability that flag will be true
2032      at loop exit.
2033 
2034      This code may look fancy, but it cannot update profile very realistically
2035      because we do not know the probability that flag will be true at given
2036      loop exit.
2037 
2038      We look for two interesting extremes
2039        - when exit is dominated by block setting the flag, we know it will
2040          always be true.  This is a common case.
2041        - when all blocks setting the flag have very low frequency we know
2042          it will likely be false.
2043      In all other cases we default to 2/3 for flag being true.  */
2044 
2045   for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
2046        it != flag_bbs->end (); ++it)
2047     {
2048        if ((*it)->count.initialized_p ())
2049          count_sum += (*it)->count, ncount ++;
2050        if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
2051            flag_probability = profile_probability::always ();
2052        nbbs++;
2053     }
2054 
2055   profile_probability cap = profile_probability::always ().apply_scale (2, 3);
2056 
2057   if (flag_probability.initialized_p ())
2058     ;
2059   else if (ncount == nbbs
2060              && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
2061     {
2062       flag_probability = count_sum.probability_in (preheader->count ());
2063       if (flag_probability > cap)
2064           flag_probability = cap;
2065     }
2066 
2067   if (!flag_probability.initialized_p ())
2068     flag_probability = cap;
2069 
2070   /* ?? Insert store after previous store if applicable.  See note
2071      below.  */
2072   if (append_cond_position)
2073     ex = append_cond_position;
2074 
2075   loop_has_only_one_exit = single_pred_p (ex->dest);
2076 
2077   if (loop_has_only_one_exit)
2078     ex = split_block_after_labels (ex->dest);
2079   else
2080     {
2081       for (gphi_iterator gpi = gsi_start_phis (ex->dest);
2082              !gsi_end_p (gpi); gsi_next (&gpi))
2083           {
2084             gphi *phi = gpi.phi ();
2085             if (virtual_operand_p (gimple_phi_result (phi)))
2086               continue;
2087 
2088             /* When the destination has a non-virtual PHI node with multiple
2089                predecessors make sure we preserve the PHI structure by
2090                forcing a forwarder block so that hoisting of that PHI will
2091                still work.  */
2092             split_edge (ex);
2093             break;
2094           }
2095     }
2096 
2097   old_dest = ex->dest;
2098   new_bb = split_edge (ex);
2099   then_bb = create_empty_bb (new_bb);
2100   then_bb->count = new_bb->count.apply_probability (flag_probability);
2101   if (irr)
2102     then_bb->flags = BB_IRREDUCIBLE_LOOP;
2103   add_bb_to_loop (then_bb, new_bb->loop_father);
2104 
2105   gsi = gsi_start_bb (new_bb);
2106   stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2107                                   NULL_TREE, NULL_TREE);
2108   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2109 
2110   /* Insert actual store.  */
2111   if (mem)
2112     {
2113       gsi = gsi_start_bb (then_bb);
2114       stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2115       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2116     }
2117 
2118   edge e1 = single_succ_edge (new_bb);
2119   edge e2 = make_edge (new_bb, then_bb,
2120                          EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2121   e2->probability = flag_probability;
2122 
2123   e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
2124   e1->flags &= ~EDGE_FALLTHRU;
2125 
2126   e1->probability = flag_probability.invert ();
2127 
2128   then_old_edge = make_single_succ_edge (then_bb, old_dest,
2129                                    EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2130 
2131   set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2132 
2133   if (append_cond_position)
2134     {
2135       basic_block prevbb = last_cond_fallthru->src;
2136       redirect_edge_succ (last_cond_fallthru, new_bb);
2137       set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2138       set_immediate_dominator (CDI_DOMINATORS, old_dest,
2139                                      recompute_dominator (CDI_DOMINATORS, old_dest));
2140     }
2141 
2142   /* ?? Because stores may alias, they must happen in the exact
2143      sequence they originally happened.  Save the position right after
2144      the (_lsm) store we just created so we can continue appending after
2145      it and maintain the original order.  */
2146   append_cond_position = then_old_edge;
2147   last_cond_fallthru = find_edge (new_bb, old_dest);
2148 
2149   if (!loop_has_only_one_exit)
2150     for (gphi_iterator gpi = gsi_start_phis (old_dest);
2151            !gsi_end_p (gpi); gsi_next (&gpi))
2152       {
2153           gphi *phi = gpi.phi ();
2154           unsigned i;
2155 
2156           for (i = 0; i < gimple_phi_num_args (phi); i++)
2157             if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2158               {
2159                 tree arg = gimple_phi_arg_def (phi, i);
2160                 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2161                 update_stmt (phi);
2162               }
2163       }
2164 
2165   return then_bb;
2166 }
2167 
2168 /* When REF is set on the location, set flag indicating the store.  */
2169 
2170 class sm_set_flag_if_changed
2171 {
2172 public:
sm_set_flag_if_changed(tree flag_,hash_set<basic_block> * bbs_)2173   sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2174            : flag (flag_), bbs (bbs_) {}
2175   bool operator () (mem_ref_loc *loc);
2176   tree flag;
2177   hash_set <basic_block> *bbs;
2178 };
2179 
2180 bool
operator ()(mem_ref_loc * loc)2181 sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2182 {
2183   /* Only set the flag for writes.  */
2184   if (is_gimple_assign (loc->stmt)
2185       && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2186     {
2187       gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2188       gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2189       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2190       bbs->add (gimple_bb (stmt));
2191     }
2192   return false;
2193 }
2194 
2195 /* Helper function for execute_sm.  On every location where REF is
2196    set, set an appropriate flag indicating the store.  */
2197 
2198 static tree
execute_sm_if_changed_flag_set(class loop * loop,im_mem_ref * ref,hash_set<basic_block> * bbs)2199 execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2200                                         hash_set <basic_block> *bbs)
2201 {
2202   tree flag;
2203   char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
2204   flag = create_tmp_reg (boolean_type_node, str);
2205   for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
2206   return flag;
2207 }
2208 
2209 struct sm_aux
2210 {
2211   tree tmp_var;
2212   tree store_flag;
2213   hash_set <basic_block> flag_bbs;
2214 };
2215 
2216 /* Executes store motion of memory reference REF from LOOP.
2217    Exits from the LOOP are stored in EXITS.  The initialization of the
2218    temporary variable is put to the preheader of the loop, and assignments
2219    to the reference from the temporary variable are emitted to exits.  */
2220 
2221 static void
execute_sm(class loop * loop,im_mem_ref * ref,hash_map<im_mem_ref *,sm_aux * > & aux_map,bool maybe_mt,bool use_other_flag_var)2222 execute_sm (class loop *loop, im_mem_ref *ref,
2223               hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
2224               bool use_other_flag_var)
2225 {
2226   gassign *load;
2227   struct fmt_data fmt_data;
2228   struct lim_aux_data *lim_data;
2229   bool multi_threaded_model_p = false;
2230   gimple_stmt_iterator gsi;
2231   sm_aux *aux = new sm_aux;
2232 
2233   if (dump_file && (dump_flags & TDF_DETAILS))
2234     {
2235       fprintf (dump_file, "Executing store motion of ");
2236       print_generic_expr (dump_file, ref->mem.ref);
2237       fprintf (dump_file, " from loop %d\n", loop->num);
2238     }
2239 
2240   aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2241                                          get_lsm_tmp_name (ref->mem.ref, ~0));
2242 
2243   fmt_data.loop = loop;
2244   fmt_data.orig_loop = loop;
2245   for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2246 
2247   bool always_stored = ref_always_accessed_p (loop, ref, true);
2248   if (maybe_mt
2249       && (bb_in_transaction (loop_preheader_edge (loop)->src)
2250             || (! flag_store_data_races && ! always_stored)))
2251     multi_threaded_model_p = true;
2252 
2253   if (multi_threaded_model_p && !use_other_flag_var)
2254     aux->store_flag
2255       = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
2256   else
2257     aux->store_flag = NULL_TREE;
2258 
2259   /* Remember variable setup.  */
2260   aux_map.put (ref, aux);
2261 
2262   rewrite_mem_refs (loop, ref, aux->tmp_var);
2263 
2264   /* Emit the load code on a random exit edge or into the latch if
2265      the loop does not exit, so that we are sure it will be processed
2266      by move_computations after all dependencies.  */
2267   gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2268 
2269   /* Avoid doing a load if there was no load of the ref in the loop.
2270      Esp. when the ref is not always stored we cannot optimize it
2271      away later.  But when it is not always stored we must use a conditional
2272      store then.  */
2273   if ((!always_stored && !multi_threaded_model_p)
2274       || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
2275     load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
2276   else
2277     {
2278       /* If not emitting a load mark the uninitialized state on the
2279            loop entry as not to be warned for.  */
2280       tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
2281       suppress_warning (uninit, OPT_Wuninitialized);
2282       load = gimple_build_assign (aux->tmp_var, uninit);
2283     }
2284   lim_data = init_lim_data (load);
2285   lim_data->max_loop = loop;
2286   lim_data->tgt_loop = loop;
2287   gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2288 
2289   if (aux->store_flag)
2290     {
2291       load = gimple_build_assign (aux->store_flag, boolean_false_node);
2292       lim_data = init_lim_data (load);
2293       lim_data->max_loop = loop;
2294       lim_data->tgt_loop = loop;
2295       gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2296     }
2297 }
2298 
2299 /* sm_ord is used for ordinary stores we can retain order with respect
2300        to other stores
2301    sm_unord is used for conditional executed stores which need to be
2302        able to execute in arbitrary order with respect to other stores
2303    sm_other is used for stores we do not try to apply store motion to.  */
2304 enum sm_kind { sm_ord, sm_unord, sm_other };
2305 struct seq_entry
2306 {
seq_entryseq_entry2307   seq_entry () {}
seq_entryseq_entry2308   seq_entry (unsigned f, sm_kind k, tree fr = NULL)
2309     : first (f), second (k), from (fr) {}
2310   unsigned first;
2311   sm_kind second;
2312   tree from;
2313 };
2314 
2315 static void
execute_sm_exit(class loop * loop,edge ex,vec<seq_entry> & seq,hash_map<im_mem_ref *,sm_aux * > & aux_map,sm_kind kind,edge & append_cond_position,edge & last_cond_fallthru)2316 execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
2317                      hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
2318                      edge &append_cond_position, edge &last_cond_fallthru)
2319 {
2320   /* Sink the stores to exit from the loop.  */
2321   for (unsigned i = seq.length (); i > 0; --i)
2322     {
2323       im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
2324       if (seq[i-1].second == sm_other)
2325           {
2326             gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
2327             if (dump_file && (dump_flags & TDF_DETAILS))
2328               {
2329                 fprintf (dump_file, "Re-issueing dependent store of ");
2330                 print_generic_expr (dump_file, ref->mem.ref);
2331                 fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2332                            loop->num, ex->src->index, ex->dest->index);
2333               }
2334             gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref),
2335                                                             seq[i-1].from);
2336             gsi_insert_on_edge (ex, store);
2337           }
2338       else
2339           {
2340             sm_aux *aux = *aux_map.get (ref);
2341             if (!aux->store_flag || kind == sm_ord)
2342               {
2343                 gassign *store;
2344                 store = gimple_build_assign (unshare_expr (ref->mem.ref),
2345                                                      aux->tmp_var);
2346                 gsi_insert_on_edge (ex, store);
2347               }
2348             else
2349               execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
2350                                            aux->store_flag,
2351                                            loop_preheader_edge (loop), &aux->flag_bbs,
2352                                            append_cond_position, last_cond_fallthru);
2353           }
2354     }
2355 }
2356 
2357 /* Push the SM candidate at index PTR in the sequence SEQ down until
2358    we hit the next SM candidate.  Return true if that went OK and
2359    false if we could not disambiguate agains another unrelated ref.
2360    Update *AT to the index where the candidate now resides.  */
2361 
2362 static bool
sm_seq_push_down(vec<seq_entry> & seq,unsigned ptr,unsigned * at)2363 sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
2364 {
2365   *at = ptr;
2366   for (; ptr > 0; --ptr)
2367     {
2368       seq_entry &new_cand = seq[ptr];
2369       seq_entry &against = seq[ptr-1];
2370       if (against.second == sm_ord
2371             || (against.second == sm_other && against.from != NULL_TREE))
2372           /* Found the tail of the sequence.  */
2373           break;
2374       /* We may not ignore self-dependences here.  */
2375       if (new_cand.first == against.first
2376             || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
2377                                           memory_accesses.refs_list[against.first],
2378                                           false))
2379           /* ???  Prune new_cand from the list of refs to apply SM to.  */
2380           return false;
2381       std::swap (new_cand, against);
2382       *at = ptr - 1;
2383     }
2384   return true;
2385 }
2386 
2387 /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2388    walking backwards from VDEF (or the end of BB if VDEF is NULL).  */
2389 
2390 static int
sm_seq_valid_bb(class loop * loop,basic_block bb,tree vdef,vec<seq_entry> & seq,bitmap refs_not_in_seq,bitmap refs_not_supported,bool forked,bitmap fully_visited)2391 sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
2392                      vec<seq_entry> &seq, bitmap refs_not_in_seq,
2393                      bitmap refs_not_supported, bool forked,
2394                      bitmap fully_visited)
2395 {
2396   if (!vdef)
2397     for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
2398            gsi_prev (&gsi))
2399       {
2400           vdef = gimple_vdef (gsi_stmt (gsi));
2401           if (vdef)
2402             break;
2403       }
2404   if (!vdef)
2405     {
2406       gphi *vphi = get_virtual_phi (bb);
2407       if (vphi)
2408           vdef = gimple_phi_result (vphi);
2409     }
2410   if (!vdef)
2411     {
2412       if (single_pred_p (bb))
2413           /* This handles the perfect nest case.  */
2414           return sm_seq_valid_bb (loop, single_pred (bb), vdef,
2415                                         seq, refs_not_in_seq, refs_not_supported,
2416                                         forked, fully_visited);
2417       return 0;
2418     }
2419   do
2420     {
2421       gimple *def = SSA_NAME_DEF_STMT (vdef);
2422       if (gimple_bb (def) != bb)
2423           {
2424             /* If we forked by processing a PHI do not allow our walk to
2425                merge again until we handle that robustly.  */
2426             if (forked)
2427               {
2428                 /* Mark refs_not_in_seq as unsupported.  */
2429                 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2430                 return 1;
2431               }
2432             /* Otherwise it doesn't really matter if we end up in different
2433                BBs.  */
2434             bb = gimple_bb (def);
2435           }
2436       if (gphi *phi = dyn_cast <gphi *> (def))
2437           {
2438             /* Handle CFG merges.  Until we handle forks (gimple_bb (def) != bb)
2439                this is still linear.
2440                Eventually we want to cache intermediate results per BB
2441                (but we can't easily cache for different exits?).  */
2442             /* Stop at PHIs with possible backedges.  */
2443             if (bb == bb->loop_father->header
2444                 || bb->flags & BB_IRREDUCIBLE_LOOP)
2445               {
2446                 /* Mark refs_not_in_seq as unsupported.  */
2447                 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2448                 return 1;
2449               }
2450             if (gimple_phi_num_args (phi) == 1)
2451               return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2452                                             gimple_phi_arg_def (phi, 0), seq,
2453                                             refs_not_in_seq, refs_not_supported,
2454                                             false, fully_visited);
2455             if (bitmap_bit_p (fully_visited,
2456                                   SSA_NAME_VERSION (gimple_phi_result (phi))))
2457               return 1;
2458             auto_vec<seq_entry> first_edge_seq;
2459             auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
2460             int eret;
2461             bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2462             eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2463                                           gimple_phi_arg_def (phi, 0),
2464                                           first_edge_seq,
2465                                           tem_refs_not_in_seq, refs_not_supported,
2466                                           true, fully_visited);
2467             if (eret != 1)
2468               return -1;
2469             /* Simplify our lives by pruning the sequence of !sm_ord.  */
2470             while (!first_edge_seq.is_empty ()
2471                      && first_edge_seq.last ().second != sm_ord)
2472               first_edge_seq.pop ();
2473             for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i)
2474               {
2475                 tree vuse = gimple_phi_arg_def (phi, i);
2476                 edge e = gimple_phi_arg_edge (phi, i);
2477                 auto_vec<seq_entry> edge_seq;
2478                 bitmap_and_compl (tem_refs_not_in_seq,
2479                                         refs_not_in_seq, refs_not_supported);
2480                 /* If we've marked all refs we search for as unsupported
2481                      we can stop processing and use the sequence as before
2482                      the PHI.  */
2483                 if (bitmap_empty_p (tem_refs_not_in_seq))
2484                     return 1;
2485                 eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq,
2486                                               tem_refs_not_in_seq, refs_not_supported,
2487                                               true, fully_visited);
2488                 if (eret != 1)
2489                     return -1;
2490                 /* Simplify our lives by pruning the sequence of !sm_ord.  */
2491                 while (!edge_seq.is_empty ()
2492                          && edge_seq.last ().second != sm_ord)
2493                     edge_seq.pop ();
2494                 unsigned min_len = MIN(first_edge_seq.length (),
2495                                              edge_seq.length ());
2496                 /* Incrementally merge seqs into first_edge_seq.  */
2497                 int first_uneq = -1;
2498                 auto_vec<seq_entry, 2> extra_refs;
2499                 for (unsigned int i = 0; i < min_len; ++i)
2500                     {
2501                       /* ???  We can more intelligently merge when we face different
2502                          order by additional sinking operations in one sequence.
2503                          For now we simply mark them as to be processed by the
2504                          not order-preserving SM code.  */
2505                       if (first_edge_seq[i].first != edge_seq[i].first)
2506                         {
2507                           if (first_edge_seq[i].second == sm_ord)
2508                               bitmap_set_bit (refs_not_supported,
2509                                                   first_edge_seq[i].first);
2510                           if (edge_seq[i].second == sm_ord)
2511                               bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2512                           first_edge_seq[i].second = sm_other;
2513                           first_edge_seq[i].from = NULL_TREE;
2514                           /* Record the dropped refs for later processing.  */
2515                           if (first_uneq == -1)
2516                               first_uneq = i;
2517                           extra_refs.safe_push (seq_entry (edge_seq[i].first,
2518                                                                    sm_other, NULL_TREE));
2519                         }
2520                       /* sm_other prevails.  */
2521                       else if (first_edge_seq[i].second != edge_seq[i].second)
2522                         {
2523                           /* Make sure the ref is marked as not supported.  */
2524                           bitmap_set_bit (refs_not_supported,
2525                                               first_edge_seq[i].first);
2526                           first_edge_seq[i].second = sm_other;
2527                           first_edge_seq[i].from = NULL_TREE;
2528                         }
2529                       else if (first_edge_seq[i].second == sm_other
2530                                  && first_edge_seq[i].from != NULL_TREE
2531                                  && (edge_seq[i].from == NULL_TREE
2532                                      || !operand_equal_p (first_edge_seq[i].from,
2533                                                                 edge_seq[i].from, 0)))
2534                         first_edge_seq[i].from = NULL_TREE;
2535                     }
2536                 /* Any excess elements become sm_other since they are now
2537                      coonditionally executed.  */
2538                 if (first_edge_seq.length () > edge_seq.length ())
2539                     {
2540                       for (unsigned i = edge_seq.length ();
2541                            i < first_edge_seq.length (); ++i)
2542                         {
2543                           if (first_edge_seq[i].second == sm_ord)
2544                               bitmap_set_bit (refs_not_supported,
2545                                                   first_edge_seq[i].first);
2546                           first_edge_seq[i].second = sm_other;
2547                         }
2548                     }
2549                 else if (edge_seq.length () > first_edge_seq.length ())
2550                     {
2551                       if (first_uneq == -1)
2552                         first_uneq = first_edge_seq.length ();
2553                       for (unsigned i = first_edge_seq.length ();
2554                            i < edge_seq.length (); ++i)
2555                         {
2556                           if (edge_seq[i].second == sm_ord)
2557                               bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2558                           extra_refs.safe_push (seq_entry (edge_seq[i].first,
2559                                                                    sm_other, NULL_TREE));
2560                         }
2561                     }
2562                 /* Put unmerged refs at first_uneq to force dependence checking
2563                      on them.  */
2564                 if (first_uneq != -1)
2565                     {
2566                       /* Missing ordered_splice_at.  */
2567                       if ((unsigned)first_uneq == first_edge_seq.length ())
2568                         first_edge_seq.safe_splice (extra_refs);
2569                       else
2570                         {
2571                           unsigned fes_length = first_edge_seq.length ();
2572                           first_edge_seq.safe_grow (fes_length
2573                                                             + extra_refs.length ());
2574                           memmove (&first_edge_seq[first_uneq + extra_refs.length ()],
2575                                      &first_edge_seq[first_uneq],
2576                                      (fes_length - first_uneq) * sizeof (seq_entry));
2577                           memcpy (&first_edge_seq[first_uneq],
2578                                     extra_refs.address (),
2579                                     extra_refs.length () * sizeof (seq_entry));
2580                         }
2581                     }
2582               }
2583             /* Use the sequence from the first edge and push SMs down.  */
2584             for (unsigned i = 0; i < first_edge_seq.length (); ++i)
2585               {
2586                 unsigned id = first_edge_seq[i].first;
2587                 seq.safe_push (first_edge_seq[i]);
2588                 unsigned new_idx;
2589                 if ((first_edge_seq[i].second == sm_ord
2590                        || (first_edge_seq[i].second == sm_other
2591                            && first_edge_seq[i].from != NULL_TREE))
2592                       && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
2593                     {
2594                       if (first_edge_seq[i].second == sm_ord)
2595                         bitmap_set_bit (refs_not_supported, id);
2596                       /* Mark it sm_other.  */
2597                       seq[new_idx].second = sm_other;
2598                       seq[new_idx].from = NULL_TREE;
2599                     }
2600               }
2601             bitmap_set_bit (fully_visited,
2602                                 SSA_NAME_VERSION (gimple_phi_result (phi)));
2603             return 1;
2604           }
2605       lim_aux_data *data = get_lim_data (def);
2606       gcc_assert (data);
2607       if (data->ref == UNANALYZABLE_MEM_ID)
2608           return -1;
2609       /* Stop at memory references which we can't move.  */
2610       else if (memory_accesses.refs_list[data->ref]->mem.ref == error_mark_node
2611                  || TREE_THIS_VOLATILE
2612                         (memory_accesses.refs_list[data->ref]->mem.ref))
2613           {
2614             /* Mark refs_not_in_seq as unsupported.  */
2615             bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2616             return 1;
2617           }
2618       /* One of the stores we want to apply SM to and we've not yet seen.  */
2619       else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
2620           {
2621             seq.safe_push (seq_entry (data->ref, sm_ord));
2622 
2623             /* 1) push it down the queue until a SMed
2624                and not ignored ref is reached, skipping all not SMed refs
2625                and ignored refs via non-TBAA disambiguation.  */
2626             unsigned new_idx;
2627             if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
2628                 /* If that fails but we did not fork yet continue, we'll see
2629                      to re-materialize all of the stores in the sequence then.
2630                      Further stores will only be pushed up to this one.  */
2631                 && forked)
2632               {
2633                 bitmap_set_bit (refs_not_supported, data->ref);
2634                 /* Mark it sm_other.  */
2635                 seq[new_idx].second = sm_other;
2636               }
2637 
2638             /* 2) check whether we've seen all refs we want to SM and if so
2639                declare success for the active exit  */
2640             if (bitmap_empty_p (refs_not_in_seq))
2641               return 1;
2642           }
2643       else
2644           /* Another store not part of the final sequence.  Simply push it.  */
2645           seq.safe_push (seq_entry (data->ref, sm_other,
2646                                           gimple_assign_rhs1 (def)));
2647 
2648       vdef = gimple_vuse (def);
2649     }
2650   while (1);
2651 }
2652 
2653 /* Hoists memory references MEM_REFS out of LOOP.  EXITS is the list of exit
2654    edges of the LOOP.  */
2655 
2656 static void
hoist_memory_references(class loop * loop,bitmap mem_refs,const vec<edge> & exits)2657 hoist_memory_references (class loop *loop, bitmap mem_refs,
2658                                const vec<edge> &exits)
2659 {
2660   im_mem_ref *ref;
2661   unsigned  i;
2662   bitmap_iterator bi;
2663 
2664   /* There's a special case we can use ordered re-materialization for
2665      conditionally excuted stores which is when all stores in the loop
2666      happen in the same basic-block.  In that case we know we'll reach
2667      all stores and thus can simply process that BB and emit a single
2668      conditional block of ordered materializations.  See PR102436.  */
2669   basic_block single_store_bb = NULL;
2670   EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
2671                                   0, i, bi)
2672     {
2673       bool fail = false;
2674       ref = memory_accesses.refs_list[i];
2675       for (auto loc : ref->accesses_in_loop)
2676           if (!gimple_vdef (loc.stmt))
2677             ;
2678           else if (!single_store_bb)
2679             {
2680               single_store_bb = gimple_bb (loc.stmt);
2681               bool conditional = false;
2682               for (edge e : exits)
2683                 if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
2684                     {
2685                       /* Conditional as seen from e.  */
2686                       conditional = true;
2687                       break;
2688                     }
2689               if (!conditional)
2690                 {
2691                     fail = true;
2692                     break;
2693                 }
2694             }
2695           else if (single_store_bb != gimple_bb (loc.stmt))
2696             {
2697               fail = true;
2698               break;
2699             }
2700       if (fail)
2701           {
2702             single_store_bb = NULL;
2703             break;
2704           }
2705     }
2706   if (single_store_bb)
2707     {
2708       /* Analyze the single block with stores.  */
2709       auto_bitmap fully_visited;
2710       auto_bitmap refs_not_supported;
2711       auto_bitmap refs_not_in_seq;
2712       auto_vec<seq_entry> seq;
2713       bitmap_copy (refs_not_in_seq, mem_refs);
2714       int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE,
2715                                          seq, refs_not_in_seq, refs_not_supported,
2716                                          false, fully_visited);
2717       if (res != 1)
2718           {
2719             /* Unhandled refs can still fail this.  */
2720             bitmap_clear (mem_refs);
2721             return;
2722           }
2723 
2724       /* We cannot handle sm_other since we neither remember the
2725            stored location nor the value at the point we execute them.  */
2726       for (unsigned i = 0; i < seq.length (); ++i)
2727           {
2728             unsigned new_i;
2729             if (seq[i].second == sm_other
2730                 && seq[i].from != NULL_TREE)
2731               seq[i].from = NULL_TREE;
2732             else if ((seq[i].second == sm_ord
2733                         || (seq[i].second == sm_other
2734                               && seq[i].from != NULL_TREE))
2735                        && !sm_seq_push_down (seq, i, &new_i))
2736               {
2737                 bitmap_set_bit (refs_not_supported, seq[new_i].first);
2738                 seq[new_i].second = sm_other;
2739                 seq[new_i].from = NULL_TREE;
2740               }
2741           }
2742       bitmap_and_compl_into (mem_refs, refs_not_supported);
2743       if (bitmap_empty_p (mem_refs))
2744           return;
2745 
2746       /* Prune seq.  */
2747       while (seq.last ().second == sm_other
2748                && seq.last ().from == NULL_TREE)
2749           seq.pop ();
2750 
2751       hash_map<im_mem_ref *, sm_aux *> aux_map;
2752 
2753       /* Execute SM but delay the store materialization for ordered
2754            sequences on exit.  */
2755       bool first_p = true;
2756       EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2757           {
2758             ref = memory_accesses.refs_list[i];
2759             execute_sm (loop, ref, aux_map, true, !first_p);
2760             first_p = false;
2761           }
2762 
2763       /* Get at the single flag variable we eventually produced.  */
2764       im_mem_ref *ref
2765           = memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)];
2766       sm_aux *aux = *aux_map.get (ref);
2767 
2768       /* Materialize ordered store sequences on exits.  */
2769       edge e;
2770       FOR_EACH_VEC_ELT (exits, i, e)
2771           {
2772             edge append_cond_position = NULL;
2773             edge last_cond_fallthru = NULL;
2774             edge insert_e = e;
2775             /* Construct the single flag variable control flow and insert
2776                the ordered seq of stores in the then block.  With
2777                -fstore-data-races we can do the stores unconditionally.  */
2778             if (aux->store_flag)
2779               insert_e
2780                 = single_pred_edge
2781                       (execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
2782                                                     aux->store_flag,
2783                                                     loop_preheader_edge (loop),
2784                                                     &aux->flag_bbs, append_cond_position,
2785                                                     last_cond_fallthru));
2786             execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
2787                                  append_cond_position, last_cond_fallthru);
2788             gsi_commit_one_edge_insert (insert_e, NULL);
2789           }
2790 
2791       for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2792              iter != aux_map.end (); ++iter)
2793           delete (*iter).second;
2794 
2795       return;
2796     }
2797 
2798   /* To address PR57359 before actually applying store-motion check
2799      the candidates found for validity with regards to reordering
2800      relative to other stores which we until here disambiguated using
2801      TBAA which isn't valid.
2802      What matters is the order of the last stores to the mem_refs
2803      with respect to the other stores of the loop at the point of the
2804      loop exits.  */
2805 
2806   /* For each exit compute the store order, pruning from mem_refs
2807      on the fly.  */
2808   /* The complexity of this is at least
2809      O(number of exits * number of SM refs) but more approaching
2810      O(number of exits * number of SM refs * number of stores).  */
2811   /* ???  Somehow do this in a single sweep over the loop body.  */
2812   auto_vec<std::pair<edge, vec<seq_entry> > > sms;
2813   auto_bitmap refs_not_supported (&lim_bitmap_obstack);
2814   edge e;
2815   FOR_EACH_VEC_ELT (exits, i, e)
2816     {
2817       vec<seq_entry> seq;
2818       seq.create (4);
2819       auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
2820       bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
2821       if (bitmap_empty_p (refs_not_in_seq))
2822           {
2823             seq.release ();
2824             break;
2825           }
2826       auto_bitmap fully_visited;
2827       int res = sm_seq_valid_bb (loop, e->src, NULL_TREE,
2828                                          seq, refs_not_in_seq,
2829                                          refs_not_supported, false,
2830                                          fully_visited);
2831       if (res != 1)
2832           {
2833             bitmap_copy (refs_not_supported, mem_refs);
2834             seq.release ();
2835             break;
2836           }
2837       sms.safe_push (std::make_pair (e, seq));
2838     }
2839 
2840   /* Prune pruned mem_refs from earlier processed exits.  */
2841   bool changed = !bitmap_empty_p (refs_not_supported);
2842   while (changed)
2843     {
2844       changed = false;
2845       std::pair<edge, vec<seq_entry> > *seq;
2846       FOR_EACH_VEC_ELT (sms, i, seq)
2847           {
2848             bool need_to_push = false;
2849             for (unsigned i = 0; i < seq->second.length (); ++i)
2850               {
2851                 sm_kind kind = seq->second[i].second;
2852                 if (kind == sm_other && seq->second[i].from == NULL_TREE)
2853                     break;
2854                 unsigned id = seq->second[i].first;
2855                 unsigned new_idx;
2856                 if (kind == sm_ord
2857                       && bitmap_bit_p (refs_not_supported, id))
2858                     {
2859                       seq->second[i].second = sm_other;
2860                       gcc_assert (seq->second[i].from == NULL_TREE);
2861                       need_to_push = true;
2862                     }
2863                 else if (need_to_push
2864                            && !sm_seq_push_down (seq->second, i, &new_idx))
2865                     {
2866                       /* We need to push down both sm_ord and sm_other
2867                          but for the latter we need to disqualify all
2868                          following refs.  */
2869                       if (kind == sm_ord)
2870                         {
2871                           if (bitmap_set_bit (refs_not_supported, id))
2872                               changed = true;
2873                           seq->second[new_idx].second = sm_other;
2874                         }
2875                       else
2876                         {
2877                           for (unsigned j = seq->second.length () - 1;
2878                                  j > new_idx; --j)
2879                               if (seq->second[j].second == sm_ord
2880                                   && bitmap_set_bit (refs_not_supported,
2881                                                          seq->second[j].first))
2882                                 changed = true;
2883                           seq->second.truncate (new_idx);
2884                           break;
2885                         }
2886                     }
2887               }
2888           }
2889     }
2890   std::pair<edge, vec<seq_entry> > *seq;
2891   FOR_EACH_VEC_ELT (sms, i, seq)
2892     {
2893       /* Prune sm_other from the end.  */
2894       while (!seq->second.is_empty ()
2895                && seq->second.last ().second == sm_other)
2896           seq->second.pop ();
2897       /* Prune duplicates from the start.  */
2898       auto_bitmap seen (&lim_bitmap_obstack);
2899       unsigned j, k;
2900       for (j = k = 0; j < seq->second.length (); ++j)
2901           if (bitmap_set_bit (seen, seq->second[j].first))
2902             {
2903               if (k != j)
2904                 seq->second[k] = seq->second[j];
2905               ++k;
2906             }
2907       seq->second.truncate (k);
2908       /* And verify.  */
2909       seq_entry *e;
2910       FOR_EACH_VEC_ELT (seq->second, j, e)
2911           gcc_assert (e->second == sm_ord
2912                         || (e->second == sm_other && e->from != NULL_TREE));
2913     }
2914 
2915   /* Verify dependence for refs we cannot handle with the order preserving
2916      code (refs_not_supported) or prune them from mem_refs.  */
2917   auto_vec<seq_entry> unord_refs;
2918   EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
2919     {
2920       ref = memory_accesses.refs_list[i];
2921       if (!ref_indep_loop_p (loop, ref, sm_waw))
2922           bitmap_clear_bit (mem_refs, i);
2923       /* We've now verified store order for ref with respect to all other
2924            stores in the loop does not matter.  */
2925       else
2926           unord_refs.safe_push (seq_entry (i, sm_unord));
2927     }
2928 
2929   hash_map<im_mem_ref *, sm_aux *> aux_map;
2930 
2931   /* Execute SM but delay the store materialization for ordered
2932      sequences on exit.  */
2933   EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2934     {
2935       ref = memory_accesses.refs_list[i];
2936       execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i),
2937                       false);
2938     }
2939 
2940   /* Materialize ordered store sequences on exits.  */
2941   FOR_EACH_VEC_ELT (exits, i, e)
2942     {
2943       edge append_cond_position = NULL;
2944       edge last_cond_fallthru = NULL;
2945       if (i < sms.length ())
2946           {
2947             gcc_assert (sms[i].first == e);
2948             execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord,
2949                                  append_cond_position, last_cond_fallthru);
2950             sms[i].second.release ();
2951           }
2952       if (!unord_refs.is_empty ())
2953           execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord,
2954                                append_cond_position, last_cond_fallthru);
2955       /* Commit edge inserts here to preserve the order of stores
2956            when an exit exits multiple loops.  */
2957       gsi_commit_one_edge_insert (e, NULL);
2958     }
2959 
2960   for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2961        iter != aux_map.end (); ++iter)
2962     delete (*iter).second;
2963 }
2964 
2965 class ref_always_accessed
2966 {
2967 public:
ref_always_accessed(class loop * loop_,bool stored_p_)2968   ref_always_accessed (class loop *loop_, bool stored_p_)
2969       : loop (loop_), stored_p (stored_p_) {}
2970   bool operator () (mem_ref_loc *loc);
2971   class loop *loop;
2972   bool stored_p;
2973 };
2974 
2975 bool
operator ()(mem_ref_loc * loc)2976 ref_always_accessed::operator () (mem_ref_loc *loc)
2977 {
2978   class loop *must_exec;
2979 
2980   struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
2981   if (!lim_data)
2982     return false;
2983 
2984   /* If we require an always executed store make sure the statement
2985      is a store.  */
2986   if (stored_p)
2987     {
2988       tree lhs = gimple_get_lhs (loc->stmt);
2989       if (!lhs
2990             || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
2991           return false;
2992     }
2993 
2994   must_exec = lim_data->always_executed_in;
2995   if (!must_exec)
2996     return false;
2997 
2998   if (must_exec == loop
2999       || flow_loop_nested_p (must_exec, loop))
3000     return true;
3001 
3002   return false;
3003 }
3004 
3005 /* Returns true if REF is always accessed in LOOP.  If STORED_P is true
3006    make sure REF is always stored to in LOOP.  */
3007 
3008 static bool
ref_always_accessed_p(class loop * loop,im_mem_ref * ref,bool stored_p)3009 ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
3010 {
3011   return for_all_locs_in_loop (loop, ref,
3012                                      ref_always_accessed (loop, stored_p));
3013 }
3014 
3015 /* Returns true if REF1 and REF2 are independent.  */
3016 
3017 static bool
refs_independent_p(im_mem_ref * ref1,im_mem_ref * ref2,bool tbaa_p)3018 refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
3019 {
3020   if (ref1 == ref2)
3021     return true;
3022 
3023   if (dump_file && (dump_flags & TDF_DETAILS))
3024     fprintf (dump_file, "Querying dependency of refs %u and %u: ",
3025                ref1->id, ref2->id);
3026 
3027   if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p))
3028     {
3029       if (dump_file && (dump_flags & TDF_DETAILS))
3030           fprintf (dump_file, "dependent.\n");
3031       return false;
3032     }
3033   else
3034     {
3035       if (dump_file && (dump_flags & TDF_DETAILS))
3036           fprintf (dump_file, "independent.\n");
3037       return true;
3038     }
3039 }
3040 
3041 /* Returns true if REF is independent on all other accessess in LOOP.
3042    KIND specifies the kind of dependence to consider.
3043      lim_raw assumes REF is not stored in LOOP and disambiguates RAW
3044                dependences so if true REF can be hoisted out of LOOP
3045      sm_war disambiguates a store REF against all other loads to see
3046               whether the store can be sunk across loads out of LOOP
3047      sm_waw disambiguates a store REF against all other stores to see
3048               whether the store can be sunk across stores out of LOOP.  */
3049 
3050 static bool
ref_indep_loop_p(class loop * loop,im_mem_ref * ref,dep_kind kind)3051 ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
3052 {
3053   bool indep_p = true;
3054   bitmap refs_to_check;
3055 
3056   if (kind == sm_war)
3057     refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
3058   else
3059     refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
3060 
3061   if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
3062       || ref->mem.ref == error_mark_node)
3063     indep_p = false;
3064   else
3065     {
3066       /* tri-state, { unknown, independent, dependent }  */
3067       dep_state state = query_loop_dependence (loop, ref, kind);
3068       if (state != dep_unknown)
3069           return state == dep_independent ? true : false;
3070 
3071       class loop *inner = loop->inner;
3072       while (inner)
3073           {
3074             if (!ref_indep_loop_p (inner, ref, kind))
3075               {
3076                 indep_p = false;
3077                 break;
3078               }
3079             inner = inner->next;
3080           }
3081 
3082       if (indep_p)
3083           {
3084             unsigned i;
3085             bitmap_iterator bi;
3086             EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
3087               {
3088                 im_mem_ref *aref = memory_accesses.refs_list[i];
3089                 if (aref->mem.ref == error_mark_node)
3090                     {
3091                       gimple *stmt = aref->accesses_in_loop[0].stmt;
3092                       if ((kind == sm_war
3093                            && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
3094                                                                 kind != sm_waw))
3095                           || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
3096                                                                kind != sm_waw))
3097                         {
3098                           indep_p = false;
3099                           break;
3100                         }
3101                     }
3102                 else if (!refs_independent_p (ref, aref, kind != sm_waw))
3103                     {
3104                       indep_p = false;
3105                       break;
3106                     }
3107               }
3108           }
3109     }
3110 
3111   if (dump_file && (dump_flags & TDF_DETAILS))
3112     fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n",
3113                kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
3114                ref->id, loop->num, indep_p ? "independent" : "dependent");
3115 
3116   /* Record the computed result in the cache.  */
3117   record_loop_dependence (loop, ref, kind,
3118                                 indep_p ? dep_independent : dep_dependent);
3119 
3120   return indep_p;
3121 }
3122 
3123 class ref_in_loop_hot_body
3124 {
3125 public:
ref_in_loop_hot_body(class loop * loop_)3126   ref_in_loop_hot_body (class loop *loop_) : l (loop_) {}
3127   bool operator () (mem_ref_loc *loc);
3128   class loop *l;
3129 };
3130 
3131 /* Check the coldest loop between loop L and innermost loop.  If there is one
3132    cold loop between L and INNER_LOOP, store motion can be performed, otherwise
3133    no cold loop means no store motion.  get_coldest_out_loop also handles cases
3134    when l is inner_loop.  */
3135 bool
operator ()(mem_ref_loc * loc)3136 ref_in_loop_hot_body::operator () (mem_ref_loc *loc)
3137 {
3138   basic_block curr_bb = gimple_bb (loc->stmt);
3139   class loop *inner_loop = curr_bb->loop_father;
3140   return get_coldest_out_loop (l, inner_loop, curr_bb);
3141 }
3142 
3143 
3144 /* Returns true if we can perform store motion of REF from LOOP.  */
3145 
3146 static bool
can_sm_ref_p(class loop * loop,im_mem_ref * ref)3147 can_sm_ref_p (class loop *loop, im_mem_ref *ref)
3148 {
3149   tree base;
3150 
3151   /* Can't hoist unanalyzable refs.  */
3152   if (!MEM_ANALYZABLE (ref))
3153     return false;
3154 
3155   /* Can't hoist/sink aggregate copies.  */
3156   if (ref->mem.ref == error_mark_node)
3157     return false;
3158 
3159   /* It should be movable.  */
3160   if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
3161       || TREE_THIS_VOLATILE (ref->mem.ref)
3162       || !for_each_index (&ref->mem.ref, may_move_till, loop))
3163     return false;
3164 
3165   /* If it can throw fail, we do not properly update EH info.  */
3166   if (tree_could_throw_p (ref->mem.ref))
3167     return false;
3168 
3169   /* If it can trap, it must be always executed in LOOP.
3170      Readonly memory locations may trap when storing to them, but
3171      tree_could_trap_p is a predicate for rvalues, so check that
3172      explicitly.  */
3173   base = get_base_address (ref->mem.ref);
3174   if ((tree_could_trap_p (ref->mem.ref)
3175        || (DECL_P (base) && TREE_READONLY (base)))
3176       /* ???  We can at least use false here, allowing loads?  We
3177            are forcing conditional stores if the ref is not always
3178            stored to later anyway.  So this would only guard
3179            the load we need to emit.  Thus when the ref is not
3180            loaded we can elide this completely?  */
3181       && !ref_always_accessed_p (loop, ref, true))
3182     return false;
3183 
3184   /* Verify all loads of ref can be hoisted.  */
3185   if (ref->loaded
3186       && bitmap_bit_p (ref->loaded, loop->num)
3187       && !ref_indep_loop_p (loop, ref, lim_raw))
3188     return false;
3189 
3190   /* Verify the candidate can be disambiguated against all loads,
3191      that is, we can elide all in-loop stores.  Disambiguation
3192      against stores is done later when we cannot guarantee preserving
3193      the order of stores.  */
3194   if (!ref_indep_loop_p (loop, ref, sm_war))
3195     return false;
3196 
3197   /* Verify whether the candidate is hot for LOOP.  Only do store motion if the
3198     candidate's profile count is hot.  Statement in cold BB shouldn't be moved
3199     out of it's loop_father.  */
3200   if (!for_all_locs_in_loop (loop, ref, ref_in_loop_hot_body (loop)))
3201     return false;
3202 
3203   return true;
3204 }
3205 
3206 /* Marks the references in LOOP for that store motion should be performed
3207    in REFS_TO_SM.  SM_EXECUTED is the set of references for that store
3208    motion was performed in one of the outer loops.  */
3209 
3210 static void
find_refs_for_sm(class loop * loop,bitmap sm_executed,bitmap refs_to_sm)3211 find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
3212 {
3213   bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
3214   unsigned i;
3215   bitmap_iterator bi;
3216   im_mem_ref *ref;
3217 
3218   EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
3219     {
3220       ref = memory_accesses.refs_list[i];
3221       if (can_sm_ref_p (loop, ref) && dbg_cnt (lim))
3222           bitmap_set_bit (refs_to_sm, i);
3223     }
3224 }
3225 
3226 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
3227    for a store motion optimization (i.e. whether we can insert statement
3228    on its exits).  */
3229 
3230 static bool
loop_suitable_for_sm(class loop * loop ATTRIBUTE_UNUSED,const vec<edge> & exits)3231 loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
3232                           const vec<edge> &exits)
3233 {
3234   unsigned i;
3235   edge ex;
3236 
3237   FOR_EACH_VEC_ELT (exits, i, ex)
3238     if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
3239       return false;
3240 
3241   return true;
3242 }
3243 
3244 /* Try to perform store motion for all memory references modified inside
3245    LOOP.  SM_EXECUTED is the bitmap of the memory references for that
3246    store motion was executed in one of the outer loops.  */
3247 
3248 static void
store_motion_loop(class loop * loop,bitmap sm_executed)3249 store_motion_loop (class loop *loop, bitmap sm_executed)
3250 {
3251   auto_vec<edge> exits = get_loop_exit_edges (loop);
3252   class loop *subloop;
3253   bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
3254 
3255   if (loop_suitable_for_sm (loop, exits))
3256     {
3257       find_refs_for_sm (loop, sm_executed, sm_in_loop);
3258       if (!bitmap_empty_p (sm_in_loop))
3259           hoist_memory_references (loop, sm_in_loop, exits);
3260     }
3261 
3262   bitmap_ior_into (sm_executed, sm_in_loop);
3263   for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
3264     store_motion_loop (subloop, sm_executed);
3265   bitmap_and_compl_into (sm_executed, sm_in_loop);
3266   BITMAP_FREE (sm_in_loop);
3267 }
3268 
3269 /* Try to perform store motion for all memory references modified inside
3270    loops.  */
3271 
3272 static void
do_store_motion(void)3273 do_store_motion (void)
3274 {
3275   class loop *loop;
3276   bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
3277 
3278   for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3279     store_motion_loop (loop, sm_executed);
3280 
3281   BITMAP_FREE (sm_executed);
3282 }
3283 
3284 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
3285    for each such basic block bb records the outermost loop for that execution
3286    of its header implies execution of bb.  CONTAINS_CALL is the bitmap of
3287    blocks that contain a nonpure call.  */
3288 
3289 static void
fill_always_executed_in_1(class loop * loop,sbitmap contains_call)3290 fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
3291 {
3292   basic_block bb = NULL, last = NULL;
3293   edge e;
3294   class loop *inn_loop = loop;
3295 
3296   if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
3297     {
3298       auto_vec<basic_block, 64> worklist;
3299       worklist.reserve_exact (loop->num_nodes);
3300       worklist.quick_push (loop->header);
3301       do
3302           {
3303             edge_iterator ei;
3304             bb = worklist.pop ();
3305 
3306             if (!flow_bb_inside_loop_p (inn_loop, bb))
3307               {
3308                 /* When we are leaving a possibly infinite inner loop
3309                      we have to stop processing.  */
3310                 if (!finite_loop_p (inn_loop))
3311                     break;
3312                 /* If the loop was finite we can continue with processing
3313                      the loop we exited to.  */
3314                 inn_loop = bb->loop_father;
3315               }
3316 
3317             if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
3318               last = bb;
3319 
3320             if (bitmap_bit_p (contains_call, bb->index))
3321               break;
3322 
3323             /* If LOOP exits from this BB stop processing.  */
3324             FOR_EACH_EDGE (e, ei, bb->succs)
3325               if (!flow_bb_inside_loop_p (loop, e->dest))
3326                 break;
3327             if (e)
3328               break;
3329 
3330             /* A loop might be infinite (TODO use simple loop analysis
3331                to disprove this if possible).  */
3332             if (bb->flags & BB_IRREDUCIBLE_LOOP)
3333               break;
3334 
3335             if (bb->loop_father->header == bb)
3336               /* Record that we enter into a subloop since it might not
3337                  be finite.  */
3338               /* ???  Entering into a not always executed subloop makes
3339                  fill_always_executed_in quadratic in loop depth since
3340                  we walk those loops N times.  This is not a problem
3341                  in practice though, see PR102253 for a worst-case testcase.  */
3342               inn_loop = bb->loop_father;
3343 
3344             /* Walk the body of LOOP sorted by dominance relation.  Additionally,
3345                if a basic block S dominates the latch, then only blocks dominated
3346                by S are after it.
3347                This is get_loop_body_in_dom_order using a worklist algorithm and
3348                stopping once we are no longer interested in visiting further
3349                blocks.  */
3350             unsigned old_len = worklist.length ();
3351             unsigned postpone = 0;
3352             for (basic_block son = first_dom_son (CDI_DOMINATORS, bb);
3353                  son;
3354                  son = next_dom_son (CDI_DOMINATORS, son))
3355               {
3356                 if (!flow_bb_inside_loop_p (loop, son))
3357                     continue;
3358                 if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
3359                     postpone = worklist.length ();
3360                 worklist.quick_push (son);
3361               }
3362             if (postpone)
3363               /* Postponing the block that dominates the latch means
3364                  processing it last and thus putting it earliest in the
3365                  worklist.  */
3366               std::swap (worklist[old_len], worklist[postpone]);
3367           }
3368       while (!worklist.is_empty ());
3369 
3370       while (1)
3371           {
3372             if (dump_enabled_p ())
3373               dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n",
3374                                last->index, loop->num);
3375             SET_ALWAYS_EXECUTED_IN (last, loop);
3376             if (last == loop->header)
3377               break;
3378             last = get_immediate_dominator (CDI_DOMINATORS, last);
3379           }
3380     }
3381 
3382   for (loop = loop->inner; loop; loop = loop->next)
3383     fill_always_executed_in_1 (loop, contains_call);
3384 }
3385 
3386 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3387    for each such basic block bb records the outermost loop for that execution
3388    of its header implies execution of bb.  */
3389 
3390 static void
fill_always_executed_in(void)3391 fill_always_executed_in (void)
3392 {
3393   basic_block bb;
3394   class loop *loop;
3395 
3396   auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
3397   bitmap_clear (contains_call);
3398   FOR_EACH_BB_FN (bb, cfun)
3399     {
3400       gimple_stmt_iterator gsi;
3401       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3402           {
3403             if (nonpure_call_p (gsi_stmt (gsi)))
3404               break;
3405           }
3406 
3407       if (!gsi_end_p (gsi))
3408           bitmap_set_bit (contains_call, bb->index);
3409     }
3410 
3411   for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
3412     fill_always_executed_in_1 (loop, contains_call);
3413 }
3414 
3415 /* Find the coldest loop preheader for LOOP, also find the nearest hotter loop
3416    to LOOP.  Then recursively iterate each inner loop.  */
3417 
3418 void
fill_coldest_and_hotter_out_loop(class loop * coldest_loop,class loop * hotter_loop,class loop * loop)3419 fill_coldest_and_hotter_out_loop (class loop *coldest_loop,
3420                                           class loop *hotter_loop, class loop *loop)
3421 {
3422   if (bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3423                                              coldest_loop))
3424     coldest_loop = loop;
3425 
3426   coldest_outermost_loop[loop->num] = coldest_loop;
3427 
3428   hotter_than_inner_loop[loop->num] = NULL;
3429   class loop *outer_loop = loop_outer (loop);
3430   if (hotter_loop
3431       && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3432                                                   hotter_loop))
3433     hotter_than_inner_loop[loop->num] = hotter_loop;
3434 
3435   if (outer_loop && outer_loop != current_loops->tree_root
3436       && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3437                                                   outer_loop))
3438     hotter_than_inner_loop[loop->num] = outer_loop;
3439 
3440   if (dump_enabled_p ())
3441     {
3442       dump_printf (MSG_NOTE, "loop %d's coldest_outermost_loop is %d, ",
3443                        loop->num, coldest_loop->num);
3444       if (hotter_than_inner_loop[loop->num])
3445           dump_printf (MSG_NOTE, "hotter_than_inner_loop is %d\n",
3446                          hotter_than_inner_loop[loop->num]->num);
3447       else
3448           dump_printf (MSG_NOTE, "hotter_than_inner_loop is NULL\n");
3449     }
3450 
3451   class loop *inner_loop;
3452   for (inner_loop = loop->inner; inner_loop; inner_loop = inner_loop->next)
3453     fill_coldest_and_hotter_out_loop (coldest_loop,
3454                                               hotter_than_inner_loop[loop->num],
3455                                               inner_loop);
3456 }
3457 
3458 /* Compute the global information needed by the loop invariant motion pass.  */
3459 
3460 static void
tree_ssa_lim_initialize(bool store_motion)3461 tree_ssa_lim_initialize (bool store_motion)
3462 {
3463   unsigned i;
3464 
3465   bitmap_obstack_initialize (&lim_bitmap_obstack);
3466   gcc_obstack_init (&mem_ref_obstack);
3467   lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
3468 
3469   if (flag_tm)
3470     compute_transaction_bits ();
3471 
3472   memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
3473   memory_accesses.refs_list.create (100);
3474   /* Allocate a special, unanalyzable mem-ref with ID zero.  */
3475   memory_accesses.refs_list.quick_push
3476     (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
3477 
3478   memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun));
3479   memory_accesses.refs_loaded_in_loop.quick_grow (number_of_loops (cfun));
3480   memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
3481   memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
3482   if (store_motion)
3483     {
3484       memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
3485       memory_accesses.all_refs_stored_in_loop.quick_grow
3486                                                                   (number_of_loops (cfun));
3487     }
3488 
3489   for (i = 0; i < number_of_loops (cfun); i++)
3490     {
3491       bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i],
3492                                &lim_bitmap_obstack);
3493       bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
3494                                &lim_bitmap_obstack);
3495       if (store_motion)
3496           bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
3497                                  &lim_bitmap_obstack);
3498     }
3499 
3500   memory_accesses.ttae_cache = NULL;
3501 
3502   /* Initialize bb_loop_postorder with a mapping from loop->num to
3503      its postorder index.  */
3504   i = 0;
3505   bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
3506   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
3507     bb_loop_postorder[loop->num] = i++;
3508 }
3509 
3510 /* Cleans up after the invariant motion pass.  */
3511 
3512 static void
tree_ssa_lim_finalize(void)3513 tree_ssa_lim_finalize (void)
3514 {
3515   basic_block bb;
3516   unsigned i;
3517   im_mem_ref *ref;
3518 
3519   FOR_EACH_BB_FN (bb, cfun)
3520     SET_ALWAYS_EXECUTED_IN (bb, NULL);
3521 
3522   bitmap_obstack_release (&lim_bitmap_obstack);
3523   delete lim_aux_data_map;
3524 
3525   delete memory_accesses.refs;
3526   memory_accesses.refs = NULL;
3527 
3528   FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
3529     memref_free (ref);
3530   memory_accesses.refs_list.release ();
3531   obstack_free (&mem_ref_obstack, NULL);
3532 
3533   memory_accesses.refs_loaded_in_loop.release ();
3534   memory_accesses.refs_stored_in_loop.release ();
3535   memory_accesses.all_refs_stored_in_loop.release ();
3536 
3537   if (memory_accesses.ttae_cache)
3538     free_affine_expand_cache (&memory_accesses.ttae_cache);
3539 
3540   free (bb_loop_postorder);
3541 
3542   coldest_outermost_loop.release ();
3543   hotter_than_inner_loop.release ();
3544 }
3545 
3546 /* Moves invariants from loops.  Only "expensive" invariants are moved out --
3547    i.e. those that are likely to be win regardless of the register pressure.
3548    Only perform store motion if STORE_MOTION is true.  */
3549 
3550 unsigned int
loop_invariant_motion_in_fun(function * fun,bool store_motion)3551 loop_invariant_motion_in_fun (function *fun, bool store_motion)
3552 {
3553   unsigned int todo = 0;
3554 
3555   tree_ssa_lim_initialize (store_motion);
3556 
3557   mark_ssa_maybe_undefs ();
3558 
3559   /* Gathers information about memory accesses in the loops.  */
3560   analyze_memory_references (store_motion);
3561 
3562   /* Fills ALWAYS_EXECUTED_IN information for basic blocks.  */
3563   fill_always_executed_in ();
3564 
3565   /* Pre-compute coldest outermost loop and nearest hotter loop of each loop.
3566    */
3567   class loop *loop;
3568   coldest_outermost_loop.create (number_of_loops (cfun));
3569   coldest_outermost_loop.safe_grow_cleared (number_of_loops (cfun));
3570   hotter_than_inner_loop.create (number_of_loops (cfun));
3571   hotter_than_inner_loop.safe_grow_cleared (number_of_loops (cfun));
3572   for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3573     fill_coldest_and_hotter_out_loop (loop, NULL, loop);
3574 
3575   int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3576   int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3577 
3578   /* For each statement determine the outermost loop in that it is
3579      invariant and cost for computing the invariant.  */
3580   for (int i = 0; i < n; ++i)
3581     compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3582 
3583   /* Execute store motion.  Force the necessary invariants to be moved
3584      out of the loops as well.  */
3585   if (store_motion)
3586     do_store_motion ();
3587 
3588   free (rpo);
3589   rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3590   n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3591 
3592   /* Move the expressions that are expensive enough.  */
3593   for (int i = 0; i < n; ++i)
3594     todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3595 
3596   free (rpo);
3597 
3598   gsi_commit_edge_inserts ();
3599   if (need_ssa_update_p (fun))
3600     rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3601 
3602   tree_ssa_lim_finalize ();
3603 
3604   return todo;
3605 }
3606 
3607 /* Loop invariant motion pass.  */
3608 
3609 namespace {
3610 
3611 const pass_data pass_data_lim =
3612 {
3613   GIMPLE_PASS, /* type */
3614   "lim", /* name */
3615   OPTGROUP_LOOP, /* optinfo_flags */
3616   TV_LIM, /* tv_id */
3617   PROP_cfg, /* properties_required */
3618   0, /* properties_provided */
3619   0, /* properties_destroyed */
3620   0, /* todo_flags_start */
3621   0, /* todo_flags_finish */
3622 };
3623 
3624 class pass_lim : public gimple_opt_pass
3625 {
3626 public:
pass_lim(gcc::context * ctxt)3627   pass_lim (gcc::context *ctxt)
3628     : gimple_opt_pass (pass_data_lim, ctxt)
3629   {}
3630 
3631   /* opt_pass methods: */
clone()3632   opt_pass * clone () { return new pass_lim (m_ctxt); }
gate(function *)3633   virtual bool gate (function *) { return flag_tree_loop_im != 0; }
3634   virtual unsigned int execute (function *);
3635 
3636 }; // class pass_lim
3637 
3638 unsigned int
execute(function * fun)3639 pass_lim::execute (function *fun)
3640 {
3641   bool in_loop_pipeline = scev_initialized_p ();
3642   if (!in_loop_pipeline)
3643     loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
3644 
3645   if (number_of_loops (fun) <= 1)
3646     return 0;
3647   unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores);
3648 
3649   if (!in_loop_pipeline)
3650     loop_optimizer_finalize ();
3651   else
3652     scev_reset ();
3653   return todo;
3654 }
3655 
3656 } // anon namespace
3657 
3658 gimple_opt_pass *
make_pass_lim(gcc::context * ctxt)3659 make_pass_lim (gcc::context *ctxt)
3660 {
3661   return new pass_lim (ctxt);
3662 }
3663 
3664 
3665