xref: /dragonfly/contrib/gcc-8.0/gcc/tree-ssa-reassoc.c (revision 95059079af47f9a66a175f374f2da1a5020e3255)
1 /* Reassociation for trees.
2    Copyright (C) 2005-2018 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dan@dberlin.org>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
55 
56 /*  This is a simple global reassociation pass.  It is, in part, based
57     on the LLVM pass of the same name (They do some things more/less
58     than we do, in different orders, etc).
59 
60     It consists of five steps:
61 
62     1. Breaking up subtract operations into addition + negate, where
63     it would promote the reassociation of adds.
64 
65     2. Left linearization of the expression trees, so that (A+B)+(C+D)
66     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67     During linearization, we place the operands of the binary
68     expressions into a vector of operand_entry_*
69 
70     3. Optimization of the operand lists, eliminating things like a +
71     -a, a & a, etc.
72 
73     3a. Combine repeated factors with the same occurrence counts
74     into a __builtin_powi call that will later be optimized into
75     an optimal number of multiplies.
76 
77     4. Rewrite the expression trees we linearized and optimized so
78     they are in proper rank order.
79 
80     5. Repropagate negates, as nothing else will clean it up ATM.
81 
82     A bit of theory on #4, since nobody seems to write anything down
83     about why it makes sense to do it the way they do it:
84 
85     We could do this much nicer theoretically, but don't (for reasons
86     explained after how to do it theoretically nice :P).
87 
88     In order to promote the most redundancy elimination, you want
89     binary expressions whose operands are the same rank (or
90     preferably, the same value) exposed to the redundancy eliminator,
91     for possible elimination.
92 
93     So the way to do this if we really cared, is to build the new op
94     tree from the leaves to the roots, merging as you go, and putting the
95     new op on the end of the worklist, until you are left with one
96     thing on the worklist.
97 
98     IE if you have to rewrite the following set of operands (listed with
99     rank in parentheses), with opcode PLUS_EXPR:
100 
101     a (1),  b (1),  c (1),  d (2), e (2)
102 
103 
104     We start with our merge worklist empty, and the ops list with all of
105     those on it.
106 
107     You want to first merge all leaves of the same rank, as much as
108     possible.
109 
110     So first build a binary op of
111 
112     mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 
114     Because there is no three operand form of PLUS_EXPR, c is not going to
115     be exposed to redundancy elimination as a rank 1 operand.
116 
117     So you might as well throw it on the merge worklist (you could also
118     consider it to now be a rank two operand, and merge it with d and e,
119     but in this case, you then have evicted e from a binary op. So at
120     least in this situation, you can't win.)
121 
122     Then build a binary op of d + e
123     mergetmp2 = d + e
124 
125     and put mergetmp2 on the merge worklist.
126 
127     so merge worklist = {mergetmp, c, mergetmp2}
128 
129     Continue building binary ops of these operations until you have only
130     one operation left on the worklist.
131 
132     So we have
133 
134     build binary op
135     mergetmp3 = mergetmp + c
136 
137     worklist = {mergetmp2, mergetmp3}
138 
139     mergetmp4 = mergetmp2 + mergetmp3
140 
141     worklist = {mergetmp4}
142 
143     because we have one operation left, we can now just set the original
144     statement equal to the result of that operation.
145 
146     This will at least expose a + b  and d + e to redundancy elimination
147     as binary operations.
148 
149     For extra points, you can reuse the old statements to build the
150     mergetmps, since you shouldn't run out.
151 
152     So why don't we do this?
153 
154     Because it's expensive, and rarely will help.  Most trees we are
155     reassociating have 3 or less ops.  If they have 2 ops, they already
156     will be written into a nice single binary op.  If you have 3 ops, a
157     single simple check suffices to tell you whether the first two are of the
158     same rank.  If so, you know to order it
159 
160     mergetmp = op1 + op2
161     newstmt = mergetmp + op3
162 
163     instead of
164     mergetmp = op2 + op3
165     newstmt = mergetmp + op1
166 
167     If all three are of the same rank, you can't expose them all in a
168     single binary operator anyway, so the above is *still* the best you
169     can do.
170 
171     Thus, this is what we do.  When we have three ops left, we check to see
172     what order to put them in, and call it a day.  As a nod to vector sum
173     reduction, we check if any of the ops are really a phi node that is a
174     destructive update for the associating op, and keep the destructive
175     update together for vector sum reduction recognition.  */
176 
177 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
178    point 3a in the pass header comment.  */
179 static bool reassoc_insert_powi_p;
180 
181 /* Statistics */
182 static struct
183 {
184   int linearized;
185   int constants_eliminated;
186   int ops_eliminated;
187   int rewritten;
188   int pows_encountered;
189   int pows_created;
190 } reassociate_stats;
191 
192 /* Operator, rank pair.  */
193 struct operand_entry
194 {
195   unsigned int rank;
196   unsigned int id;
197   tree op;
198   unsigned int count;
199   gimple *stmt_to_insert;
200 };
201 
202 static object_allocator<operand_entry> operand_entry_pool
203   ("operand entry pool");
204 
205 /* This is used to assign a unique ID to each struct operand_entry
206    so that qsort results are identical on different hosts.  */
207 static unsigned int next_operand_entry_id;
208 
209 /* Starting rank number for a given basic block, so that we can rank
210    operations using unmovable instructions in that BB based on the bb
211    depth.  */
212 static long *bb_rank;
213 
214 /* Operand->rank hashtable.  */
215 static hash_map<tree, long> *operand_rank;
216 
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218    all basic blocks the CFG should be adjusted - basic blocks
219    split right after that SSA_NAME's definition statement and before
220    the only use, which must be a bit ior.  */
221 static vec<tree> reassoc_branch_fixups;
222 
223 /* Forward decls.  */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
226 
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228    possibly added by gsi_remove.  */
229 
230 bool
reassoc_remove_stmt(gimple_stmt_iterator * gsi)231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
232 {
233   gimple *stmt = gsi_stmt (*gsi);
234 
235   if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236     return gsi_remove (gsi, true);
237 
238   gimple_stmt_iterator prev = *gsi;
239   gsi_prev (&prev);
240   unsigned uid = gimple_uid (stmt);
241   basic_block bb = gimple_bb (stmt);
242   bool ret = gsi_remove (gsi, true);
243   if (!gsi_end_p (prev))
244     gsi_next (&prev);
245   else
246     prev = gsi_start_bb (bb);
247   gimple *end_stmt = gsi_stmt (*gsi);
248   while ((stmt = gsi_stmt (prev)) != end_stmt)
249     {
250       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251       gimple_set_uid (stmt, uid);
252       gsi_next (&prev);
253     }
254   return ret;
255 }
256 
257 /* Bias amount for loop-carried phis.  We want this to be larger than
258    the depth of any reassociation tree we can see, but not larger than
259    the rank difference between two blocks.  */
260 #define PHI_LOOP_BIAS (1 << 15)
261 
262 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
263    an innermost loop, and the phi has only a single use which is inside
264    the loop, then the rank is the block rank of the loop latch plus an
265    extra bias for the loop-carried dependence.  This causes expressions
266    calculated into an accumulator variable to be independent for each
267    iteration of the loop.  If STMT is some other phi, the rank is the
268    block rank of its containing block.  */
269 static long
phi_rank(gimple * stmt)270 phi_rank (gimple *stmt)
271 {
272   basic_block bb = gimple_bb (stmt);
273   struct loop *father = bb->loop_father;
274   tree res;
275   unsigned i;
276   use_operand_p use;
277   gimple *use_stmt;
278 
279   /* We only care about real loops (those with a latch).  */
280   if (!father->latch)
281     return bb_rank[bb->index];
282 
283   /* Interesting phis must be in headers of innermost loops.  */
284   if (bb != father->header
285       || father->inner)
286     return bb_rank[bb->index];
287 
288   /* Ignore virtual SSA_NAMEs.  */
289   res = gimple_phi_result (stmt);
290   if (virtual_operand_p (res))
291     return bb_rank[bb->index];
292 
293   /* The phi definition must have a single use, and that use must be
294      within the loop.  Otherwise this isn't an accumulator pattern.  */
295   if (!single_imm_use (res, &use, &use_stmt)
296       || gimple_bb (use_stmt)->loop_father != father)
297     return bb_rank[bb->index];
298 
299   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
300   for (i = 0; i < gimple_phi_num_args (stmt); i++)
301     {
302       tree arg = gimple_phi_arg_def (stmt, i);
303       if (TREE_CODE (arg) == SSA_NAME
304             && !SSA_NAME_IS_DEFAULT_DEF (arg))
305           {
306             gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307             if (gimple_bb (def_stmt)->loop_father == father)
308               return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
309           }
310     }
311 
312   /* Must be an uninteresting phi.  */
313   return bb_rank[bb->index];
314 }
315 
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317    loop-carried dependence of an innermost loop, return TRUE; else
318    return FALSE.  */
319 static bool
loop_carried_phi(tree exp)320 loop_carried_phi (tree exp)
321 {
322   gimple *phi_stmt;
323   long block_rank;
324 
325   if (TREE_CODE (exp) != SSA_NAME
326       || SSA_NAME_IS_DEFAULT_DEF (exp))
327     return false;
328 
329   phi_stmt = SSA_NAME_DEF_STMT (exp);
330 
331   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332     return false;
333 
334   /* Non-loop-carried phis have block rank.  Loop-carried phis have
335      an additional bias added in.  If this phi doesn't have block rank,
336      it's biased and should not be propagated.  */
337   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
338 
339   if (phi_rank (phi_stmt) != block_rank)
340     return true;
341 
342   return false;
343 }
344 
345 /* Return the maximum of RANK and the rank that should be propagated
346    from expression OP.  For most operands, this is just the rank of OP.
347    For loop-carried phis, the value is zero to avoid undoing the bias
348    in favor of the phi.  */
349 static long
propagate_rank(long rank,tree op)350 propagate_rank (long rank, tree op)
351 {
352   long op_rank;
353 
354   if (loop_carried_phi (op))
355     return rank;
356 
357   op_rank = get_rank (op);
358 
359   return MAX (rank, op_rank);
360 }
361 
362 /* Look up the operand rank structure for expression E.  */
363 
364 static inline long
find_operand_rank(tree e)365 find_operand_rank (tree e)
366 {
367   long *slot = operand_rank->get (e);
368   return slot ? *slot : -1;
369 }
370 
371 /* Insert {E,RANK} into the operand rank hashtable.  */
372 
373 static inline void
insert_operand_rank(tree e,long rank)374 insert_operand_rank (tree e, long rank)
375 {
376   gcc_assert (rank > 0);
377   gcc_assert (!operand_rank->put (e, rank));
378 }
379 
380 /* Given an expression E, return the rank of the expression.  */
381 
382 static long
get_rank(tree e)383 get_rank (tree e)
384 {
385   /* SSA_NAME's have the rank of the expression they are the result
386      of.
387      For globals and uninitialized values, the rank is 0.
388      For function arguments, use the pre-setup rank.
389      For PHI nodes, stores, asm statements, etc, we use the rank of
390      the BB.
391      For simple operations, the rank is the maximum rank of any of
392      its operands, or the bb_rank, whichever is less.
393      I make no claims that this is optimal, however, it gives good
394      results.  */
395 
396   /* We make an exception to the normal ranking system to break
397      dependences of accumulator variables in loops.  Suppose we
398      have a simple one-block loop containing:
399 
400        x_1 = phi(x_0, x_2)
401        b = a + x_1
402        c = b + d
403        x_2 = c + e
404 
405      As shown, each iteration of the calculation into x is fully
406      dependent upon the iteration before it.  We would prefer to
407      see this in the form:
408 
409        x_1 = phi(x_0, x_2)
410        b = a + d
411        c = b + e
412        x_2 = c + x_1
413 
414      If the loop is unrolled, the calculations of b and c from
415      different iterations can be interleaved.
416 
417      To obtain this result during reassociation, we bias the rank
418      of the phi definition x_1 upward, when it is recognized as an
419      accumulator pattern.  The artificial rank causes it to be
420      added last, providing the desired independence.  */
421 
422   if (TREE_CODE (e) == SSA_NAME)
423     {
424       ssa_op_iter iter;
425       gimple *stmt;
426       long rank;
427       tree op;
428 
429       if (SSA_NAME_IS_DEFAULT_DEF (e))
430           return find_operand_rank (e);
431 
432       stmt = SSA_NAME_DEF_STMT (e);
433       if (gimple_code (stmt) == GIMPLE_PHI)
434           return phi_rank (stmt);
435 
436       if (!is_gimple_assign (stmt))
437           return bb_rank[gimple_bb (stmt)->index];
438 
439       /* If we already have a rank for this expression, use that.  */
440       rank = find_operand_rank (e);
441       if (rank != -1)
442           return rank;
443 
444       /* Otherwise, find the maximum rank for the operands.  As an
445            exception, remove the bias from loop-carried phis when propagating
446            the rank so that dependent operations are not also biased.  */
447       /* Simply walk over all SSA uses - this takes advatage of the
448          fact that non-SSA operands are is_gimple_min_invariant and
449            thus have rank 0.  */
450       rank = 0;
451       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452           rank = propagate_rank (rank, op);
453 
454       if (dump_file && (dump_flags & TDF_DETAILS))
455           {
456             fprintf (dump_file, "Rank for ");
457             print_generic_expr (dump_file, e);
458             fprintf (dump_file, " is %ld\n", (rank + 1));
459           }
460 
461       /* Note the rank in the hashtable so we don't recompute it.  */
462       insert_operand_rank (e, (rank + 1));
463       return (rank + 1);
464     }
465 
466   /* Constants, globals, etc., are rank 0 */
467   return 0;
468 }
469 
470 
471 /* We want integer ones to end up last no matter what, since they are
472    the ones we can do the most with.  */
473 #define INTEGER_CONST_TYPE 1 << 4
474 #define FLOAT_ONE_CONST_TYPE 1 << 3
475 #define FLOAT_CONST_TYPE 1 << 2
476 #define OTHER_CONST_TYPE 1 << 1
477 
478 /* Classify an invariant tree into integer, float, or other, so that
479    we can sort them to be near other constants of the same type.  */
480 static inline int
constant_type(tree t)481 constant_type (tree t)
482 {
483   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
484     return INTEGER_CONST_TYPE;
485   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
486     {
487       /* Sort -1.0 and 1.0 constants last, while in some cases
488            const_binop can't optimize some inexact operations, multiplication
489            by -1.0 or 1.0 can be always merged with others.  */
490       if (real_onep (t) || real_minus_onep (t))
491           return FLOAT_ONE_CONST_TYPE;
492       return FLOAT_CONST_TYPE;
493     }
494   else
495     return OTHER_CONST_TYPE;
496 }
497 
498 /* qsort comparison function to sort operand entries PA and PB by rank
499    so that the sorted array is ordered by rank in decreasing order.  */
500 static int
sort_by_operand_rank(const void * pa,const void * pb)501 sort_by_operand_rank (const void *pa, const void *pb)
502 {
503   const operand_entry *oea = *(const operand_entry *const *)pa;
504   const operand_entry *oeb = *(const operand_entry *const *)pb;
505 
506   if (oeb->rank != oea->rank)
507     return oeb->rank > oea->rank ? 1 : -1;
508 
509   /* It's nicer for optimize_expression if constants that are likely
510      to fold when added/multiplied/whatever are put next to each
511      other.  Since all constants have rank 0, order them by type.  */
512   if (oea->rank == 0)
513     {
514       if (constant_type (oeb->op) != constant_type (oea->op))
515           return constant_type (oea->op) - constant_type (oeb->op);
516       else
517           /* To make sorting result stable, we use unique IDs to determine
518              order.  */
519           return oeb->id > oea->id ? 1 : -1;
520     }
521 
522   if (TREE_CODE (oea->op) != SSA_NAME)
523     {
524       if (TREE_CODE (oeb->op) != SSA_NAME)
525           return oeb->id > oea->id ? 1 : -1;
526       else
527           return 1;
528     }
529   else if (TREE_CODE (oeb->op) != SSA_NAME)
530     return -1;
531 
532   /* Lastly, make sure the versions that are the same go next to each
533      other.  */
534   if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
535     {
536       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
537            versions of removed SSA_NAMEs, so if possible, prefer to sort
538            based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
539            See PR60418.  */
540       gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
541       gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
542       basic_block bba = gimple_bb (stmta);
543       basic_block bbb = gimple_bb (stmtb);
544       if (bbb != bba)
545           {
546             /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
547                but the other might not.  */
548             if (!bba)
549               return 1;
550             if (!bbb)
551               return -1;
552             /* If neither is, compare bb_rank.  */
553             if (bb_rank[bbb->index] != bb_rank[bba->index])
554               return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
555           }
556 
557       bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
558       bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
559       if (da != db)
560           return da ? 1 : -1;
561 
562       return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
563     }
564 
565   return oeb->id > oea->id ? 1 : -1;
566 }
567 
568 /* Add an operand entry to *OPS for the tree operand OP.  */
569 
570 static void
571 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
572 {
573   operand_entry *oe = operand_entry_pool.allocate ();
574 
575   oe->op = op;
576   oe->rank = get_rank (op);
577   oe->id = next_operand_entry_id++;
578   oe->count = 1;
579   oe->stmt_to_insert = stmt_to_insert;
580   ops->safe_push (oe);
581 }
582 
583 /* Add an operand entry to *OPS for the tree operand OP with repeat
584    count REPEAT.  */
585 
586 static void
add_repeat_to_ops_vec(vec<operand_entry * > * ops,tree op,HOST_WIDE_INT repeat)587 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
588                            HOST_WIDE_INT repeat)
589 {
590   operand_entry *oe = operand_entry_pool.allocate ();
591 
592   oe->op = op;
593   oe->rank = get_rank (op);
594   oe->id = next_operand_entry_id++;
595   oe->count = repeat;
596   oe->stmt_to_insert = NULL;
597   ops->safe_push (oe);
598 
599   reassociate_stats.pows_encountered++;
600 }
601 
602 /* Return true if STMT is reassociable operation containing a binary
603    operation with tree code CODE, and is inside LOOP.  */
604 
605 static bool
is_reassociable_op(gimple * stmt,enum tree_code code,struct loop * loop)606 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
607 {
608   basic_block bb = gimple_bb (stmt);
609 
610   if (gimple_bb (stmt) == NULL)
611     return false;
612 
613   if (!flow_bb_inside_loop_p (loop, bb))
614     return false;
615 
616   if (is_gimple_assign (stmt)
617       && gimple_assign_rhs_code (stmt) == code
618       && has_single_use (gimple_assign_lhs (stmt)))
619     {
620       tree rhs1 = gimple_assign_rhs1 (stmt);
621       tree rhs2 = gimple_assign_rhs1 (stmt);
622       if (TREE_CODE (rhs1) == SSA_NAME
623             && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
624           return false;
625       if (rhs2
626             && TREE_CODE (rhs2) == SSA_NAME
627             && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
628           return false;
629       return true;
630     }
631 
632   return false;
633 }
634 
635 
636 /* Return true if STMT is a nop-conversion.  */
637 
638 static bool
gimple_nop_conversion_p(gimple * stmt)639 gimple_nop_conversion_p (gimple *stmt)
640 {
641   if (gassign *ass = dyn_cast <gassign *> (stmt))
642     {
643       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
644             && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
645                                             TREE_TYPE (gimple_assign_rhs1 (ass))))
646           return true;
647     }
648   return false;
649 }
650 
651 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
652    operand of the negate operation.  Otherwise, return NULL.  */
653 
654 static tree
get_unary_op(tree name,enum tree_code opcode)655 get_unary_op (tree name, enum tree_code opcode)
656 {
657   gimple *stmt = SSA_NAME_DEF_STMT (name);
658 
659   /* Look through nop conversions (sign changes).  */
660   if (gimple_nop_conversion_p (stmt)
661       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
662     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
663 
664   if (!is_gimple_assign (stmt))
665     return NULL_TREE;
666 
667   if (gimple_assign_rhs_code (stmt) == opcode)
668     return gimple_assign_rhs1 (stmt);
669   return NULL_TREE;
670 }
671 
672 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
673 
674 static bool
ops_equal_values_p(tree op1,tree op2)675 ops_equal_values_p (tree op1, tree op2)
676 {
677   if (op1 == op2)
678     return true;
679 
680   tree orig_op1 = op1;
681   if (TREE_CODE (op1) == SSA_NAME)
682     {
683       gimple *stmt = SSA_NAME_DEF_STMT (op1);
684       if (gimple_nop_conversion_p (stmt))
685           {
686             op1 = gimple_assign_rhs1 (stmt);
687             if (op1 == op2)
688               return true;
689           }
690     }
691 
692   if (TREE_CODE (op2) == SSA_NAME)
693     {
694       gimple *stmt = SSA_NAME_DEF_STMT (op2);
695       if (gimple_nop_conversion_p (stmt))
696           {
697             op2 = gimple_assign_rhs1 (stmt);
698             if (op1 == op2
699                 || orig_op1 == op2)
700               return true;
701           }
702     }
703 
704   return false;
705 }
706 
707 
708 /* If CURR and LAST are a pair of ops that OPCODE allows us to
709    eliminate through equivalences, do so, remove them from OPS, and
710    return true.  Otherwise, return false.  */
711 
712 static bool
eliminate_duplicate_pair(enum tree_code opcode,vec<operand_entry * > * ops,bool * all_done,unsigned int i,operand_entry * curr,operand_entry * last)713 eliminate_duplicate_pair (enum tree_code opcode,
714                                 vec<operand_entry *> *ops,
715                                 bool *all_done,
716                                 unsigned int i,
717                                 operand_entry *curr,
718                                 operand_entry *last)
719 {
720 
721   /* If we have two of the same op, and the opcode is & |, min, or max,
722      we can eliminate one of them.
723      If we have two of the same op, and the opcode is ^, we can
724      eliminate both of them.  */
725 
726   if (last && last->op == curr->op)
727     {
728       switch (opcode)
729           {
730           case MAX_EXPR:
731           case MIN_EXPR:
732           case BIT_IOR_EXPR:
733           case BIT_AND_EXPR:
734             if (dump_file && (dump_flags & TDF_DETAILS))
735               {
736                 fprintf (dump_file, "Equivalence: ");
737                 print_generic_expr (dump_file, curr->op);
738                 fprintf (dump_file, " [&|minmax] ");
739                 print_generic_expr (dump_file, last->op);
740                 fprintf (dump_file, " -> ");
741                 print_generic_stmt (dump_file, last->op);
742               }
743 
744             ops->ordered_remove (i);
745             reassociate_stats.ops_eliminated ++;
746 
747             return true;
748 
749           case BIT_XOR_EXPR:
750             if (dump_file && (dump_flags & TDF_DETAILS))
751               {
752                 fprintf (dump_file, "Equivalence: ");
753                 print_generic_expr (dump_file, curr->op);
754                 fprintf (dump_file, " ^ ");
755                 print_generic_expr (dump_file, last->op);
756                 fprintf (dump_file, " -> nothing\n");
757               }
758 
759             reassociate_stats.ops_eliminated += 2;
760 
761             if (ops->length () == 2)
762               {
763                 ops->truncate (0);
764                 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
765                 *all_done = true;
766               }
767             else
768               {
769                 ops->ordered_remove (i-1);
770                 ops->ordered_remove (i-1);
771               }
772 
773             return true;
774 
775           default:
776             break;
777           }
778     }
779   return false;
780 }
781 
782 static vec<tree> plus_negates;
783 
784 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
785    expression, look in OPS for a corresponding positive operation to cancel
786    it out.  If we find one, remove the other from OPS, replace
787    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
788    return false. */
789 
790 static bool
eliminate_plus_minus_pair(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)791 eliminate_plus_minus_pair (enum tree_code opcode,
792                                  vec<operand_entry *> *ops,
793                                  unsigned int currindex,
794                                  operand_entry *curr)
795 {
796   tree negateop;
797   tree notop;
798   unsigned int i;
799   operand_entry *oe;
800 
801   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
802     return false;
803 
804   negateop = get_unary_op (curr->op, NEGATE_EXPR);
805   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
806   if (negateop == NULL_TREE && notop == NULL_TREE)
807     return false;
808 
809   /* Any non-negated version will have a rank that is one less than
810      the current rank.  So once we hit those ranks, if we don't find
811      one, we can stop.  */
812 
813   for (i = currindex + 1;
814        ops->iterate (i, &oe)
815        && oe->rank >= curr->rank - 1 ;
816        i++)
817     {
818       if (negateop
819             && ops_equal_values_p (oe->op, negateop))
820           {
821             if (dump_file && (dump_flags & TDF_DETAILS))
822               {
823                 fprintf (dump_file, "Equivalence: ");
824                 print_generic_expr (dump_file, negateop);
825                 fprintf (dump_file, " + -");
826                 print_generic_expr (dump_file, oe->op);
827                 fprintf (dump_file, " -> 0\n");
828               }
829 
830             ops->ordered_remove (i);
831             add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
832             ops->ordered_remove (currindex);
833             reassociate_stats.ops_eliminated ++;
834 
835             return true;
836           }
837       else if (notop
838                  && ops_equal_values_p (oe->op, notop))
839           {
840             tree op_type = TREE_TYPE (oe->op);
841 
842             if (dump_file && (dump_flags & TDF_DETAILS))
843               {
844                 fprintf (dump_file, "Equivalence: ");
845                 print_generic_expr (dump_file, notop);
846                 fprintf (dump_file, " + ~");
847                 print_generic_expr (dump_file, oe->op);
848                 fprintf (dump_file, " -> -1\n");
849               }
850 
851             ops->ordered_remove (i);
852             add_to_ops_vec (ops, build_all_ones_cst (op_type));
853             ops->ordered_remove (currindex);
854             reassociate_stats.ops_eliminated ++;
855 
856             return true;
857           }
858     }
859 
860   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
861      save it for later inspection in repropagate_negates().  */
862   if (negateop != NULL_TREE
863       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
864     plus_negates.safe_push (curr->op);
865 
866   return false;
867 }
868 
869 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
870    bitwise not expression, look in OPS for a corresponding operand to
871    cancel it out.  If we find one, remove the other from OPS, replace
872    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
873    false. */
874 
875 static bool
eliminate_not_pairs(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)876 eliminate_not_pairs (enum tree_code opcode,
877                          vec<operand_entry *> *ops,
878                          unsigned int currindex,
879                          operand_entry *curr)
880 {
881   tree notop;
882   unsigned int i;
883   operand_entry *oe;
884 
885   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
886       || TREE_CODE (curr->op) != SSA_NAME)
887     return false;
888 
889   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
890   if (notop == NULL_TREE)
891     return false;
892 
893   /* Any non-not version will have a rank that is one less than
894      the current rank.  So once we hit those ranks, if we don't find
895      one, we can stop.  */
896 
897   for (i = currindex + 1;
898        ops->iterate (i, &oe)
899        && oe->rank >= curr->rank - 1;
900        i++)
901     {
902       if (oe->op == notop)
903           {
904             if (dump_file && (dump_flags & TDF_DETAILS))
905               {
906                 fprintf (dump_file, "Equivalence: ");
907                 print_generic_expr (dump_file, notop);
908                 if (opcode == BIT_AND_EXPR)
909                     fprintf (dump_file, " & ~");
910                 else if (opcode == BIT_IOR_EXPR)
911                     fprintf (dump_file, " | ~");
912                 print_generic_expr (dump_file, oe->op);
913                 if (opcode == BIT_AND_EXPR)
914                     fprintf (dump_file, " -> 0\n");
915                 else if (opcode == BIT_IOR_EXPR)
916                     fprintf (dump_file, " -> -1\n");
917               }
918 
919             if (opcode == BIT_AND_EXPR)
920               oe->op = build_zero_cst (TREE_TYPE (oe->op));
921             else if (opcode == BIT_IOR_EXPR)
922               oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
923 
924             reassociate_stats.ops_eliminated += ops->length () - 1;
925             ops->truncate (0);
926             ops->quick_push (oe);
927             return true;
928           }
929     }
930 
931   return false;
932 }
933 
934 /* Use constant value that may be present in OPS to try to eliminate
935    operands.  Note that this function is only really used when we've
936    eliminated ops for other reasons, or merged constants.  Across
937    single statements, fold already does all of this, plus more.  There
938    is little point in duplicating logic, so I've only included the
939    identities that I could ever construct testcases to trigger.  */
940 
941 static void
eliminate_using_constants(enum tree_code opcode,vec<operand_entry * > * ops)942 eliminate_using_constants (enum tree_code opcode,
943                                  vec<operand_entry *> *ops)
944 {
945   operand_entry *oelast = ops->last ();
946   tree type = TREE_TYPE (oelast->op);
947 
948   if (oelast->rank == 0
949       && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
950     {
951       switch (opcode)
952           {
953           case BIT_AND_EXPR:
954             if (integer_zerop (oelast->op))
955               {
956                 if (ops->length () != 1)
957                     {
958                       if (dump_file && (dump_flags & TDF_DETAILS))
959                         fprintf (dump_file, "Found & 0, removing all other ops\n");
960 
961                       reassociate_stats.ops_eliminated += ops->length () - 1;
962 
963                       ops->truncate (0);
964                       ops->quick_push (oelast);
965                       return;
966                     }
967               }
968             else if (integer_all_onesp (oelast->op))
969               {
970                 if (ops->length () != 1)
971                     {
972                       if (dump_file && (dump_flags & TDF_DETAILS))
973                         fprintf (dump_file, "Found & -1, removing\n");
974                       ops->pop ();
975                       reassociate_stats.ops_eliminated++;
976                     }
977               }
978             break;
979           case BIT_IOR_EXPR:
980             if (integer_all_onesp (oelast->op))
981               {
982                 if (ops->length () != 1)
983                     {
984                       if (dump_file && (dump_flags & TDF_DETAILS))
985                         fprintf (dump_file, "Found | -1, removing all other ops\n");
986 
987                       reassociate_stats.ops_eliminated += ops->length () - 1;
988 
989                       ops->truncate (0);
990                       ops->quick_push (oelast);
991                       return;
992                     }
993               }
994             else if (integer_zerop (oelast->op))
995               {
996                 if (ops->length () != 1)
997                     {
998                       if (dump_file && (dump_flags & TDF_DETAILS))
999                         fprintf (dump_file, "Found | 0, removing\n");
1000                       ops->pop ();
1001                       reassociate_stats.ops_eliminated++;
1002                     }
1003               }
1004             break;
1005           case MULT_EXPR:
1006             if (integer_zerop (oelast->op)
1007                 || (FLOAT_TYPE_P (type)
1008                       && !HONOR_NANS (type)
1009                       && !HONOR_SIGNED_ZEROS (type)
1010                       && real_zerop (oelast->op)))
1011               {
1012                 if (ops->length () != 1)
1013                     {
1014                       if (dump_file && (dump_flags & TDF_DETAILS))
1015                         fprintf (dump_file, "Found * 0, removing all other ops\n");
1016 
1017                       reassociate_stats.ops_eliminated += ops->length () - 1;
1018                       ops->truncate (0);
1019                       ops->quick_push (oelast);
1020                       return;
1021                     }
1022               }
1023             else if (integer_onep (oelast->op)
1024                        || (FLOAT_TYPE_P (type)
1025                            && !HONOR_SNANS (type)
1026                            && real_onep (oelast->op)))
1027               {
1028                 if (ops->length () != 1)
1029                     {
1030                       if (dump_file && (dump_flags & TDF_DETAILS))
1031                         fprintf (dump_file, "Found * 1, removing\n");
1032                       ops->pop ();
1033                       reassociate_stats.ops_eliminated++;
1034                       return;
1035                     }
1036               }
1037             break;
1038           case BIT_XOR_EXPR:
1039           case PLUS_EXPR:
1040           case MINUS_EXPR:
1041             if (integer_zerop (oelast->op)
1042                 || (FLOAT_TYPE_P (type)
1043                       && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1044                       && fold_real_zero_addition_p (type, oelast->op,
1045                                                             opcode == MINUS_EXPR)))
1046               {
1047                 if (ops->length () != 1)
1048                     {
1049                       if (dump_file && (dump_flags & TDF_DETAILS))
1050                         fprintf (dump_file, "Found [|^+] 0, removing\n");
1051                       ops->pop ();
1052                       reassociate_stats.ops_eliminated++;
1053                       return;
1054                     }
1055               }
1056             break;
1057           default:
1058             break;
1059           }
1060     }
1061 }
1062 
1063 
1064 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1065                                          bool, bool);
1066 
1067 /* Structure for tracking and counting operands.  */
1068 struct oecount {
1069   unsigned int cnt;
1070   unsigned int id;
1071   enum tree_code oecode;
1072   tree op;
1073 };
1074 
1075 
1076 /* The heap for the oecount hashtable and the sorted list of operands.  */
1077 static vec<oecount> cvec;
1078 
1079 
1080 /* Oecount hashtable helpers.  */
1081 
1082 struct oecount_hasher : int_hash <int, 0, 1>
1083 {
1084   static inline hashval_t hash (int);
1085   static inline bool equal (int, int);
1086 };
1087 
1088 /* Hash function for oecount.  */
1089 
1090 inline hashval_t
hash(int p)1091 oecount_hasher::hash (int p)
1092 {
1093   const oecount *c = &cvec[p - 42];
1094   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1095 }
1096 
1097 /* Comparison function for oecount.  */
1098 
1099 inline bool
equal(int p1,int p2)1100 oecount_hasher::equal (int p1, int p2)
1101 {
1102   const oecount *c1 = &cvec[p1 - 42];
1103   const oecount *c2 = &cvec[p2 - 42];
1104   return c1->oecode == c2->oecode && c1->op == c2->op;
1105 }
1106 
1107 /* Comparison function for qsort sorting oecount elements by count.  */
1108 
1109 static int
oecount_cmp(const void * p1,const void * p2)1110 oecount_cmp (const void *p1, const void *p2)
1111 {
1112   const oecount *c1 = (const oecount *)p1;
1113   const oecount *c2 = (const oecount *)p2;
1114   if (c1->cnt != c2->cnt)
1115     return c1->cnt > c2->cnt ? 1 : -1;
1116   else
1117     /* If counts are identical, use unique IDs to stabilize qsort.  */
1118     return c1->id > c2->id ? 1 : -1;
1119 }
1120 
1121 /* Return TRUE iff STMT represents a builtin call that raises OP
1122    to some exponent.  */
1123 
1124 static bool
stmt_is_power_of_op(gimple * stmt,tree op)1125 stmt_is_power_of_op (gimple *stmt, tree op)
1126 {
1127   if (!is_gimple_call (stmt))
1128     return false;
1129 
1130   switch (gimple_call_combined_fn (stmt))
1131     {
1132     CASE_CFN_POW:
1133     CASE_CFN_POWI:
1134       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1135 
1136     default:
1137       return false;
1138     }
1139 }
1140 
1141 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1142    in place and return the result.  Assumes that stmt_is_power_of_op
1143    was previously called for STMT and returned TRUE.  */
1144 
1145 static HOST_WIDE_INT
decrement_power(gimple * stmt)1146 decrement_power (gimple *stmt)
1147 {
1148   REAL_VALUE_TYPE c, cint;
1149   HOST_WIDE_INT power;
1150   tree arg1;
1151 
1152   switch (gimple_call_combined_fn (stmt))
1153     {
1154     CASE_CFN_POW:
1155       arg1 = gimple_call_arg (stmt, 1);
1156       c = TREE_REAL_CST (arg1);
1157       power = real_to_integer (&c) - 1;
1158       real_from_integer (&cint, VOIDmode, power, SIGNED);
1159       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1160       return power;
1161 
1162     CASE_CFN_POWI:
1163       arg1 = gimple_call_arg (stmt, 1);
1164       power = TREE_INT_CST_LOW (arg1) - 1;
1165       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1166       return power;
1167 
1168     default:
1169       gcc_unreachable ();
1170     }
1171 }
1172 
1173 /* Replace SSA defined by STMT and replace all its uses with new
1174    SSA.  Also return the new SSA.  */
1175 
1176 static tree
make_new_ssa_for_def(gimple * stmt,enum tree_code opcode,tree op)1177 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1178 {
1179   gimple *use_stmt;
1180   use_operand_p use;
1181   imm_use_iterator iter;
1182   tree new_lhs, new_debug_lhs = NULL_TREE;
1183   tree lhs = gimple_get_lhs (stmt);
1184 
1185   new_lhs = make_ssa_name (TREE_TYPE (lhs));
1186   gimple_set_lhs (stmt, new_lhs);
1187 
1188   /* Also need to update GIMPLE_DEBUGs.  */
1189   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1190     {
1191       tree repl = new_lhs;
1192       if (is_gimple_debug (use_stmt))
1193           {
1194             if (new_debug_lhs == NULL_TREE)
1195               {
1196                 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1197                 gdebug *def_temp
1198                     = gimple_build_debug_bind (new_debug_lhs,
1199                                                      build2 (opcode, TREE_TYPE (lhs),
1200                                                                new_lhs, op),
1201                                                      stmt);
1202                 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1203                 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1204                 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1205                 gimple_set_uid (def_temp, gimple_uid (stmt));
1206                 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207                 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1208               }
1209             repl = new_debug_lhs;
1210           }
1211       FOR_EACH_IMM_USE_ON_STMT (use, iter)
1212           SET_USE (use, repl);
1213       update_stmt (use_stmt);
1214     }
1215   return new_lhs;
1216 }
1217 
1218 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1219    uses with new SSAs.  Also do this for the stmt that defines DEF
1220    if *DEF is not OP.  */
1221 
1222 static void
make_new_ssa_for_all_defs(tree * def,enum tree_code opcode,tree op,vec<gimple * > & stmts_to_fix)1223 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1224                                  vec<gimple *> &stmts_to_fix)
1225 {
1226   unsigned i;
1227   gimple *stmt;
1228 
1229   if (*def != op
1230       && TREE_CODE (*def) == SSA_NAME
1231       && (stmt = SSA_NAME_DEF_STMT (*def))
1232       && gimple_code (stmt) != GIMPLE_NOP)
1233     *def = make_new_ssa_for_def (stmt, opcode, op);
1234 
1235   FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1236     make_new_ssa_for_def (stmt, opcode, op);
1237 }
1238 
1239 /* Find the single immediate use of STMT's LHS, and replace it
1240    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1241    replace *DEF with OP as well.  */
1242 
1243 static void
propagate_op_to_single_use(tree op,gimple * stmt,tree * def)1244 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1245 {
1246   tree lhs;
1247   gimple *use_stmt;
1248   use_operand_p use;
1249   gimple_stmt_iterator gsi;
1250 
1251   if (is_gimple_call (stmt))
1252     lhs = gimple_call_lhs (stmt);
1253   else
1254     lhs = gimple_assign_lhs (stmt);
1255 
1256   gcc_assert (has_single_use (lhs));
1257   single_imm_use (lhs, &use, &use_stmt);
1258   if (lhs == *def)
1259     *def = op;
1260   SET_USE (use, op);
1261   if (TREE_CODE (op) != SSA_NAME)
1262     update_stmt (use_stmt);
1263   gsi = gsi_for_stmt (stmt);
1264   unlink_stmt_vdef (stmt);
1265   reassoc_remove_stmt (&gsi);
1266   release_defs (stmt);
1267 }
1268 
1269 /* Walks the linear chain with result *DEF searching for an operation
1270    with operand OP and code OPCODE removing that from the chain.  *DEF
1271    is updated if there is only one operand but no operation left.  */
1272 
1273 static void
zero_one_operation(tree * def,enum tree_code opcode,tree op)1274 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1275 {
1276   tree orig_def = *def;
1277   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1278   /* PR72835 - Record the stmt chain that has to be updated such that
1279      we dont use the same LHS when the values computed are different.  */
1280   auto_vec<gimple *, 64> stmts_to_fix;
1281 
1282   do
1283     {
1284       tree name;
1285 
1286       if (opcode == MULT_EXPR)
1287           {
1288             if (stmt_is_power_of_op (stmt, op))
1289               {
1290                 if (decrement_power (stmt) == 1)
1291                     {
1292                       if (stmts_to_fix.length () > 0)
1293                         stmts_to_fix.pop ();
1294                       propagate_op_to_single_use (op, stmt, def);
1295                     }
1296                 break;
1297               }
1298             else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1299               {
1300                 if (gimple_assign_rhs1 (stmt) == op)
1301                     {
1302                       tree cst = build_minus_one_cst (TREE_TYPE (op));
1303                       if (stmts_to_fix.length () > 0)
1304                         stmts_to_fix.pop ();
1305                       propagate_op_to_single_use (cst, stmt, def);
1306                       break;
1307                     }
1308                 else if (integer_minus_onep (op)
1309                            || real_minus_onep (op))
1310                     {
1311                       gimple_assign_set_rhs_code
1312                         (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1313                       break;
1314                     }
1315               }
1316           }
1317 
1318       name = gimple_assign_rhs1 (stmt);
1319 
1320       /* If this is the operation we look for and one of the operands
1321          is ours simply propagate the other operand into the stmts
1322            single use.  */
1323       if (gimple_assign_rhs_code (stmt) == opcode
1324             && (name == op
1325                 || gimple_assign_rhs2 (stmt) == op))
1326           {
1327             if (name == op)
1328               name = gimple_assign_rhs2 (stmt);
1329             if (stmts_to_fix.length () > 0)
1330               stmts_to_fix.pop ();
1331             propagate_op_to_single_use (name, stmt, def);
1332             break;
1333           }
1334 
1335       /* We might have a multiply of two __builtin_pow* calls, and
1336            the operand might be hiding in the rightmost one.  Likewise
1337            this can happen for a negate.  */
1338       if (opcode == MULT_EXPR
1339             && gimple_assign_rhs_code (stmt) == opcode
1340             && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1341             && has_single_use (gimple_assign_rhs2 (stmt)))
1342           {
1343             gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1344             if (stmt_is_power_of_op (stmt2, op))
1345               {
1346                 if (decrement_power (stmt2) == 1)
1347                     propagate_op_to_single_use (op, stmt2, def);
1348                 else
1349                     stmts_to_fix.safe_push (stmt2);
1350                 break;
1351               }
1352             else if (is_gimple_assign (stmt2)
1353                        && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1354               {
1355                 if (gimple_assign_rhs1 (stmt2) == op)
1356                     {
1357                       tree cst = build_minus_one_cst (TREE_TYPE (op));
1358                       propagate_op_to_single_use (cst, stmt2, def);
1359                       break;
1360                     }
1361                 else if (integer_minus_onep (op)
1362                            || real_minus_onep (op))
1363                     {
1364                       stmts_to_fix.safe_push (stmt2);
1365                       gimple_assign_set_rhs_code
1366                         (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1367                       break;
1368                     }
1369               }
1370           }
1371 
1372       /* Continue walking the chain.  */
1373       gcc_assert (name != op
1374                       && TREE_CODE (name) == SSA_NAME);
1375       stmt = SSA_NAME_DEF_STMT (name);
1376       stmts_to_fix.safe_push (stmt);
1377     }
1378   while (1);
1379 
1380   if (stmts_to_fix.length () > 0 || *def == orig_def)
1381     make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1382 }
1383 
1384 /* Returns true if statement S1 dominates statement S2.  Like
1385    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1386 
1387 static bool
reassoc_stmt_dominates_stmt_p(gimple * s1,gimple * s2)1388 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1389 {
1390   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1391 
1392   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1393      SSA_NAME.  Assume it lives at the beginning of function and
1394      thus dominates everything.  */
1395   if (!bb1 || s1 == s2)
1396     return true;
1397 
1398   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1399   if (!bb2)
1400     return false;
1401 
1402   if (bb1 == bb2)
1403     {
1404       /* PHIs in the same basic block are assumed to be
1405            executed all in parallel, if only one stmt is a PHI,
1406            it dominates the other stmt in the same basic block.  */
1407       if (gimple_code (s1) == GIMPLE_PHI)
1408           return true;
1409 
1410       if (gimple_code (s2) == GIMPLE_PHI)
1411           return false;
1412 
1413       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1414 
1415       if (gimple_uid (s1) < gimple_uid (s2))
1416           return true;
1417 
1418       if (gimple_uid (s1) > gimple_uid (s2))
1419           return false;
1420 
1421       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1422       unsigned int uid = gimple_uid (s1);
1423       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1424           {
1425             gimple *s = gsi_stmt (gsi);
1426             if (gimple_uid (s) != uid)
1427               break;
1428             if (s == s2)
1429               return true;
1430           }
1431 
1432       return false;
1433     }
1434 
1435   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1436 }
1437 
1438 /* Insert STMT after INSERT_POINT.  */
1439 
1440 static void
insert_stmt_after(gimple * stmt,gimple * insert_point)1441 insert_stmt_after (gimple *stmt, gimple *insert_point)
1442 {
1443   gimple_stmt_iterator gsi;
1444   basic_block bb;
1445 
1446   if (gimple_code (insert_point) == GIMPLE_PHI)
1447     bb = gimple_bb (insert_point);
1448   else if (!stmt_ends_bb_p (insert_point))
1449     {
1450       gsi = gsi_for_stmt (insert_point);
1451       gimple_set_uid (stmt, gimple_uid (insert_point));
1452       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1453       return;
1454     }
1455   else
1456     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1457        thus if it must end a basic block, it should be a call that can
1458        throw, or some assignment that can throw.  If it throws, the LHS
1459        of it will not be initialized though, so only valid places using
1460        the SSA_NAME should be dominated by the fallthru edge.  */
1461     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1462   gsi = gsi_after_labels (bb);
1463   if (gsi_end_p (gsi))
1464     {
1465       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1466       gimple_set_uid (stmt,
1467                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1468     }
1469   else
1470     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1471   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1472 }
1473 
1474 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1475    the result.  Places the statement after the definition of either
1476    OP1 or OP2.  Returns the new statement.  */
1477 
1478 static gimple *
build_and_add_sum(tree type,tree op1,tree op2,enum tree_code opcode)1479 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1480 {
1481   gimple *op1def = NULL, *op2def = NULL;
1482   gimple_stmt_iterator gsi;
1483   tree op;
1484   gassign *sum;
1485 
1486   /* Create the addition statement.  */
1487   op = make_ssa_name (type);
1488   sum = gimple_build_assign (op, opcode, op1, op2);
1489 
1490   /* Find an insertion place and insert.  */
1491   if (TREE_CODE (op1) == SSA_NAME)
1492     op1def = SSA_NAME_DEF_STMT (op1);
1493   if (TREE_CODE (op2) == SSA_NAME)
1494     op2def = SSA_NAME_DEF_STMT (op2);
1495   if ((!op1def || gimple_nop_p (op1def))
1496       && (!op2def || gimple_nop_p (op2def)))
1497     {
1498       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1499       if (gsi_end_p (gsi))
1500           {
1501             gimple_stmt_iterator gsi2
1502               = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1503             gimple_set_uid (sum,
1504                                 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1505           }
1506       else
1507           gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1508       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1509     }
1510   else
1511     {
1512       gimple *insert_point;
1513       if ((!op1def || gimple_nop_p (op1def))
1514              || (op2def && !gimple_nop_p (op2def)
1515                  && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1516           insert_point = op2def;
1517       else
1518           insert_point = op1def;
1519       insert_stmt_after (sum, insert_point);
1520     }
1521   update_stmt (sum);
1522 
1523   return sum;
1524 }
1525 
1526 /* Perform un-distribution of divisions and multiplications.
1527    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1528    to (A + B) / X for real X.
1529 
1530    The algorithm is organized as follows.
1531 
1532     - First we walk the addition chain *OPS looking for summands that
1533       are defined by a multiplication or a real division.  This results
1534       in the candidates bitmap with relevant indices into *OPS.
1535 
1536     - Second we build the chains of multiplications or divisions for
1537       these candidates, counting the number of occurrences of (operand, code)
1538       pairs in all of the candidates chains.
1539 
1540     - Third we sort the (operand, code) pairs by number of occurrence and
1541       process them starting with the pair with the most uses.
1542 
1543       * For each such pair we walk the candidates again to build a
1544         second candidate bitmap noting all multiplication/division chains
1545           that have at least one occurrence of (operand, code).
1546 
1547       * We build an alternate addition chain only covering these
1548         candidates with one (operand, code) operation removed from their
1549           multiplication/division chain.
1550 
1551       * The first candidate gets replaced by the alternate addition chain
1552         multiplied/divided by the operand.
1553 
1554       * All candidate chains get disabled for further processing and
1555         processing of (operand, code) pairs continues.
1556 
1557   The alternate addition chains built are re-processed by the main
1558   reassociation algorithm which allows optimizing a * x * y + b * y * x
1559   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1560 
1561 static bool
undistribute_ops_list(enum tree_code opcode,vec<operand_entry * > * ops,struct loop * loop)1562 undistribute_ops_list (enum tree_code opcode,
1563                            vec<operand_entry *> *ops, struct loop *loop)
1564 {
1565   unsigned int length = ops->length ();
1566   operand_entry *oe1;
1567   unsigned i, j;
1568   unsigned nr_candidates, nr_candidates2;
1569   sbitmap_iterator sbi0;
1570   vec<operand_entry *> *subops;
1571   bool changed = false;
1572   unsigned int next_oecount_id = 0;
1573 
1574   if (length <= 1
1575       || opcode != PLUS_EXPR)
1576     return false;
1577 
1578   /* Build a list of candidates to process.  */
1579   auto_sbitmap candidates (length);
1580   bitmap_clear (candidates);
1581   nr_candidates = 0;
1582   FOR_EACH_VEC_ELT (*ops, i, oe1)
1583     {
1584       enum tree_code dcode;
1585       gimple *oe1def;
1586 
1587       if (TREE_CODE (oe1->op) != SSA_NAME)
1588           continue;
1589       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1590       if (!is_gimple_assign (oe1def))
1591           continue;
1592       dcode = gimple_assign_rhs_code (oe1def);
1593       if ((dcode != MULT_EXPR
1594              && dcode != RDIV_EXPR)
1595             || !is_reassociable_op (oe1def, dcode, loop))
1596           continue;
1597 
1598       bitmap_set_bit (candidates, i);
1599       nr_candidates++;
1600     }
1601 
1602   if (nr_candidates < 2)
1603     return false;
1604 
1605   if (dump_file && (dump_flags & TDF_DETAILS))
1606     {
1607       fprintf (dump_file, "searching for un-distribute opportunities ");
1608       print_generic_expr (dump_file,
1609           (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1610       fprintf (dump_file, " %d\n", nr_candidates);
1611     }
1612 
1613   /* Build linearized sub-operand lists and the counting table.  */
1614   cvec.create (0);
1615 
1616   hash_table<oecount_hasher> ctable (15);
1617 
1618   /* ??? Macro arguments cannot have multi-argument template types in
1619      them.  This typedef is needed to workaround that limitation.  */
1620   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1621   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1622   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1623     {
1624       gimple *oedef;
1625       enum tree_code oecode;
1626       unsigned j;
1627 
1628       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1629       oecode = gimple_assign_rhs_code (oedef);
1630       linearize_expr_tree (&subops[i], oedef,
1631                                  associative_tree_code (oecode), false);
1632 
1633       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1634           {
1635             oecount c;
1636             int *slot;
1637             int idx;
1638             c.oecode = oecode;
1639             c.cnt = 1;
1640             c.id = next_oecount_id++;
1641             c.op = oe1->op;
1642             cvec.safe_push (c);
1643             idx = cvec.length () + 41;
1644             slot = ctable.find_slot (idx, INSERT);
1645             if (!*slot)
1646               {
1647                 *slot = idx;
1648               }
1649             else
1650               {
1651                 cvec.pop ();
1652                 cvec[*slot - 42].cnt++;
1653               }
1654           }
1655     }
1656 
1657   /* Sort the counting table.  */
1658   cvec.qsort (oecount_cmp);
1659 
1660   if (dump_file && (dump_flags & TDF_DETAILS))
1661     {
1662       oecount *c;
1663       fprintf (dump_file, "Candidates:\n");
1664       FOR_EACH_VEC_ELT (cvec, j, c)
1665           {
1666             fprintf (dump_file, "  %u %s: ", c->cnt,
1667                        c->oecode == MULT_EXPR
1668                        ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1669             print_generic_expr (dump_file, c->op);
1670             fprintf (dump_file, "\n");
1671           }
1672     }
1673 
1674   /* Process the (operand, code) pairs in order of most occurrence.  */
1675   auto_sbitmap candidates2 (length);
1676   while (!cvec.is_empty ())
1677     {
1678       oecount *c = &cvec.last ();
1679       if (c->cnt < 2)
1680           break;
1681 
1682       /* Now collect the operands in the outer chain that contain
1683          the common operand in their inner chain.  */
1684       bitmap_clear (candidates2);
1685       nr_candidates2 = 0;
1686       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1687           {
1688             gimple *oedef;
1689             enum tree_code oecode;
1690             unsigned j;
1691             tree op = (*ops)[i]->op;
1692 
1693             /* If we undistributed in this chain already this may be
1694                a constant.  */
1695             if (TREE_CODE (op) != SSA_NAME)
1696               continue;
1697 
1698             oedef = SSA_NAME_DEF_STMT (op);
1699             oecode = gimple_assign_rhs_code (oedef);
1700             if (oecode != c->oecode)
1701               continue;
1702 
1703             FOR_EACH_VEC_ELT (subops[i], j, oe1)
1704               {
1705                 if (oe1->op == c->op)
1706                     {
1707                       bitmap_set_bit (candidates2, i);
1708                       ++nr_candidates2;
1709                       break;
1710                     }
1711               }
1712           }
1713 
1714       if (nr_candidates2 >= 2)
1715           {
1716             operand_entry *oe1, *oe2;
1717             gimple *prod;
1718             int first = bitmap_first_set_bit (candidates2);
1719 
1720             /* Build the new addition chain.  */
1721             oe1 = (*ops)[first];
1722             if (dump_file && (dump_flags & TDF_DETAILS))
1723               {
1724                 fprintf (dump_file, "Building (");
1725                 print_generic_expr (dump_file, oe1->op);
1726               }
1727             zero_one_operation (&oe1->op, c->oecode, c->op);
1728             EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1729               {
1730                 gimple *sum;
1731                 oe2 = (*ops)[i];
1732                 if (dump_file && (dump_flags & TDF_DETAILS))
1733                     {
1734                       fprintf (dump_file, " + ");
1735                       print_generic_expr (dump_file, oe2->op);
1736                     }
1737                 zero_one_operation (&oe2->op, c->oecode, c->op);
1738                 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1739                                                oe1->op, oe2->op, opcode);
1740                 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1741                 oe2->rank = 0;
1742                 oe1->op = gimple_get_lhs (sum);
1743               }
1744 
1745             /* Apply the multiplication/division.  */
1746             prod = build_and_add_sum (TREE_TYPE (oe1->op),
1747                                             oe1->op, c->op, c->oecode);
1748             if (dump_file && (dump_flags & TDF_DETAILS))
1749               {
1750                 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1751                 print_generic_expr (dump_file, c->op);
1752                 fprintf (dump_file, "\n");
1753               }
1754 
1755             /* Record it in the addition chain and disable further
1756                undistribution with this op.  */
1757             oe1->op = gimple_assign_lhs (prod);
1758             oe1->rank = get_rank (oe1->op);
1759             subops[first].release ();
1760 
1761             changed = true;
1762           }
1763 
1764       cvec.pop ();
1765     }
1766 
1767   for (i = 0; i < ops->length (); ++i)
1768     subops[i].release ();
1769   free (subops);
1770   cvec.release ();
1771 
1772   return changed;
1773 }
1774 
1775 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1776    expression, examine the other OPS to see if any of them are comparisons
1777    of the same values, which we may be able to combine or eliminate.
1778    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1779 
1780 static bool
eliminate_redundant_comparison(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)1781 eliminate_redundant_comparison (enum tree_code opcode,
1782                                         vec<operand_entry *> *ops,
1783                                         unsigned int currindex,
1784                                         operand_entry *curr)
1785 {
1786   tree op1, op2;
1787   enum tree_code lcode, rcode;
1788   gimple *def1, *def2;
1789   int i;
1790   operand_entry *oe;
1791 
1792   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1793     return false;
1794 
1795   /* Check that CURR is a comparison.  */
1796   if (TREE_CODE (curr->op) != SSA_NAME)
1797     return false;
1798   def1 = SSA_NAME_DEF_STMT (curr->op);
1799   if (!is_gimple_assign (def1))
1800     return false;
1801   lcode = gimple_assign_rhs_code (def1);
1802   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1803     return false;
1804   op1 = gimple_assign_rhs1 (def1);
1805   op2 = gimple_assign_rhs2 (def1);
1806 
1807   /* Now look for a similar comparison in the remaining OPS.  */
1808   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1809     {
1810       tree t;
1811 
1812       if (TREE_CODE (oe->op) != SSA_NAME)
1813           continue;
1814       def2 = SSA_NAME_DEF_STMT (oe->op);
1815       if (!is_gimple_assign (def2))
1816           continue;
1817       rcode = gimple_assign_rhs_code (def2);
1818       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1819           continue;
1820 
1821       /* If we got here, we have a match.  See if we can combine the
1822            two comparisons.  */
1823       if (opcode == BIT_IOR_EXPR)
1824           t = maybe_fold_or_comparisons (lcode, op1, op2,
1825                                                rcode, gimple_assign_rhs1 (def2),
1826                                                gimple_assign_rhs2 (def2));
1827       else
1828           t = maybe_fold_and_comparisons (lcode, op1, op2,
1829                                                   rcode, gimple_assign_rhs1 (def2),
1830                                                   gimple_assign_rhs2 (def2));
1831       if (!t)
1832           continue;
1833 
1834       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1835            always give us a boolean_type_node value back.  If the original
1836            BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1837            we need to convert.  */
1838       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1839           t = fold_convert (TREE_TYPE (curr->op), t);
1840 
1841       if (TREE_CODE (t) != INTEGER_CST
1842             && !operand_equal_p (t, curr->op, 0))
1843           {
1844             enum tree_code subcode;
1845             tree newop1, newop2;
1846             if (!COMPARISON_CLASS_P (t))
1847               continue;
1848             extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1849             STRIP_USELESS_TYPE_CONVERSION (newop1);
1850             STRIP_USELESS_TYPE_CONVERSION (newop2);
1851             if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1852               continue;
1853           }
1854 
1855       if (dump_file && (dump_flags & TDF_DETAILS))
1856           {
1857             fprintf (dump_file, "Equivalence: ");
1858             print_generic_expr (dump_file, curr->op);
1859             fprintf (dump_file, " %s ", op_symbol_code (opcode));
1860             print_generic_expr (dump_file, oe->op);
1861             fprintf (dump_file, " -> ");
1862             print_generic_expr (dump_file, t);
1863             fprintf (dump_file, "\n");
1864           }
1865 
1866       /* Now we can delete oe, as it has been subsumed by the new combined
1867          expression t.  */
1868       ops->ordered_remove (i);
1869       reassociate_stats.ops_eliminated ++;
1870 
1871       /* If t is the same as curr->op, we're done.  Otherwise we must
1872            replace curr->op with t.  Special case is if we got a constant
1873            back, in which case we add it to the end instead of in place of
1874            the current entry.  */
1875       if (TREE_CODE (t) == INTEGER_CST)
1876           {
1877             ops->ordered_remove (currindex);
1878             add_to_ops_vec (ops, t);
1879           }
1880       else if (!operand_equal_p (t, curr->op, 0))
1881           {
1882             gimple *sum;
1883             enum tree_code subcode;
1884             tree newop1;
1885             tree newop2;
1886             gcc_assert (COMPARISON_CLASS_P (t));
1887             extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1888             STRIP_USELESS_TYPE_CONVERSION (newop1);
1889             STRIP_USELESS_TYPE_CONVERSION (newop2);
1890             gcc_checking_assert (is_gimple_val (newop1)
1891                                      && is_gimple_val (newop2));
1892             sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1893             curr->op = gimple_get_lhs (sum);
1894           }
1895       return true;
1896     }
1897 
1898   return false;
1899 }
1900 
1901 
1902 /* Transform repeated addition of same values into multiply with
1903    constant.  */
1904 static bool
transform_add_to_multiply(vec<operand_entry * > * ops)1905 transform_add_to_multiply (vec<operand_entry *> *ops)
1906 {
1907   operand_entry *oe;
1908   tree op = NULL_TREE;
1909   int j;
1910   int i, start = -1, end = 0, count = 0;
1911   auto_vec<std::pair <int, int> > indxs;
1912   bool changed = false;
1913 
1914   if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1915       && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1916             || !flag_unsafe_math_optimizations))
1917     return false;
1918 
1919   /* Look for repeated operands.  */
1920   FOR_EACH_VEC_ELT (*ops, i, oe)
1921     {
1922       if (start == -1)
1923           {
1924             count = 1;
1925             op = oe->op;
1926             start = i;
1927           }
1928       else if (operand_equal_p (oe->op, op, 0))
1929           {
1930             count++;
1931             end = i;
1932           }
1933       else
1934           {
1935             if (count > 1)
1936               indxs.safe_push (std::make_pair (start, end));
1937             count = 1;
1938             op = oe->op;
1939             start = i;
1940           }
1941     }
1942 
1943   if (count > 1)
1944     indxs.safe_push (std::make_pair (start, end));
1945 
1946   for (j = indxs.length () - 1; j >= 0; --j)
1947     {
1948       /* Convert repeated operand addition to multiplication.  */
1949       start = indxs[j].first;
1950       end = indxs[j].second;
1951       op = (*ops)[start]->op;
1952       count = end - start + 1;
1953       for (i = end; i >= start; --i)
1954           ops->unordered_remove (i);
1955       tree tmp = make_ssa_name (TREE_TYPE (op));
1956       tree cst = build_int_cst (integer_type_node, count);
1957       gassign *mul_stmt
1958           = gimple_build_assign (tmp, MULT_EXPR,
1959                                      op, fold_convert (TREE_TYPE (op), cst));
1960       gimple_set_visited (mul_stmt, true);
1961       add_to_ops_vec (ops, tmp, mul_stmt);
1962       changed = true;
1963     }
1964 
1965   return changed;
1966 }
1967 
1968 
1969 /* Perform various identities and other optimizations on the list of
1970    operand entries, stored in OPS.  The tree code for the binary
1971    operation between all the operands is OPCODE.  */
1972 
1973 static void
optimize_ops_list(enum tree_code opcode,vec<operand_entry * > * ops)1974 optimize_ops_list (enum tree_code opcode,
1975                        vec<operand_entry *> *ops)
1976 {
1977   unsigned int length = ops->length ();
1978   unsigned int i;
1979   operand_entry *oe;
1980   operand_entry *oelast = NULL;
1981   bool iterate = false;
1982 
1983   if (length == 1)
1984     return;
1985 
1986   oelast = ops->last ();
1987 
1988   /* If the last two are constants, pop the constants off, merge them
1989      and try the next two.  */
1990   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1991     {
1992       operand_entry *oelm1 = (*ops)[length - 2];
1993 
1994       if (oelm1->rank == 0
1995             && is_gimple_min_invariant (oelm1->op)
1996             && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1997                                                TREE_TYPE (oelast->op)))
1998           {
1999             tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2000                                              oelm1->op, oelast->op);
2001 
2002             if (folded && is_gimple_min_invariant (folded))
2003               {
2004                 if (dump_file && (dump_flags & TDF_DETAILS))
2005                     fprintf (dump_file, "Merging constants\n");
2006 
2007                 ops->pop ();
2008                 ops->pop ();
2009 
2010                 add_to_ops_vec (ops, folded);
2011                 reassociate_stats.constants_eliminated++;
2012 
2013                 optimize_ops_list (opcode, ops);
2014                 return;
2015               }
2016           }
2017     }
2018 
2019   eliminate_using_constants (opcode, ops);
2020   oelast = NULL;
2021 
2022   for (i = 0; ops->iterate (i, &oe);)
2023     {
2024       bool done = false;
2025 
2026       if (eliminate_not_pairs (opcode, ops, i, oe))
2027           return;
2028       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2029             || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2030             || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2031           {
2032             if (done)
2033               return;
2034             iterate = true;
2035             oelast = NULL;
2036             continue;
2037           }
2038       oelast = oe;
2039       i++;
2040     }
2041 
2042   length = ops->length ();
2043   oelast = ops->last ();
2044 
2045   if (iterate)
2046     optimize_ops_list (opcode, ops);
2047 }
2048 
2049 /* The following functions are subroutines to optimize_range_tests and allow
2050    it to try to change a logical combination of comparisons into a range
2051    test.
2052 
2053    For example, both
2054           X == 2 || X == 5 || X == 3 || X == 4
2055    and
2056           X >= 2 && X <= 5
2057    are converted to
2058           (unsigned) (X - 2) <= 3
2059 
2060    For more information see comments above fold_test_range in fold-const.c,
2061    this implementation is for GIMPLE.  */
2062 
2063 struct range_entry
2064 {
2065   tree exp;
2066   tree low;
2067   tree high;
2068   bool in_p;
2069   bool strict_overflow_p;
2070   unsigned int idx, next;
2071 };
2072 
2073 /* This is similar to make_range in fold-const.c, but on top of
2074    GIMPLE instead of trees.  If EXP is non-NULL, it should be
2075    an SSA_NAME and STMT argument is ignored, otherwise STMT
2076    argument should be a GIMPLE_COND.  */
2077 
2078 static void
init_range_entry(struct range_entry * r,tree exp,gimple * stmt)2079 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2080 {
2081   int in_p;
2082   tree low, high;
2083   bool is_bool, strict_overflow_p;
2084 
2085   r->exp = NULL_TREE;
2086   r->in_p = false;
2087   r->strict_overflow_p = false;
2088   r->low = NULL_TREE;
2089   r->high = NULL_TREE;
2090   if (exp != NULL_TREE
2091       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2092     return;
2093 
2094   /* Start with simply saying "EXP != 0" and then look at the code of EXP
2095      and see if we can refine the range.  Some of the cases below may not
2096      happen, but it doesn't seem worth worrying about this.  We "continue"
2097      the outer loop when we've changed something; otherwise we "break"
2098      the switch, which will "break" the while.  */
2099   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2100   high = low;
2101   in_p = 0;
2102   strict_overflow_p = false;
2103   is_bool = false;
2104   if (exp == NULL_TREE)
2105     is_bool = true;
2106   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2107     {
2108       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2109           is_bool = true;
2110       else
2111           return;
2112     }
2113   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2114     is_bool = true;
2115 
2116   while (1)
2117     {
2118       enum tree_code code;
2119       tree arg0, arg1, exp_type;
2120       tree nexp;
2121       location_t loc;
2122 
2123       if (exp != NULL_TREE)
2124           {
2125             if (TREE_CODE (exp) != SSA_NAME
2126                 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2127               break;
2128 
2129             stmt = SSA_NAME_DEF_STMT (exp);
2130             if (!is_gimple_assign (stmt))
2131               break;
2132 
2133             code = gimple_assign_rhs_code (stmt);
2134             arg0 = gimple_assign_rhs1 (stmt);
2135             arg1 = gimple_assign_rhs2 (stmt);
2136             exp_type = TREE_TYPE (exp);
2137           }
2138       else
2139           {
2140             code = gimple_cond_code (stmt);
2141             arg0 = gimple_cond_lhs (stmt);
2142             arg1 = gimple_cond_rhs (stmt);
2143             exp_type = boolean_type_node;
2144           }
2145 
2146       if (TREE_CODE (arg0) != SSA_NAME)
2147           break;
2148       loc = gimple_location (stmt);
2149       switch (code)
2150           {
2151           case BIT_NOT_EXPR:
2152             if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2153                 /* Ensure the range is either +[-,0], +[0,0],
2154                      -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2155                      -[1,1].  If it is e.g. +[-,-] or -[-,-]
2156                      or similar expression of unconditional true or
2157                      false, it should not be negated.  */
2158                 && ((high && integer_zerop (high))
2159                       || (low && integer_onep (low))))
2160               {
2161                 in_p = !in_p;
2162                 exp = arg0;
2163                 continue;
2164               }
2165             break;
2166           case SSA_NAME:
2167             exp = arg0;
2168             continue;
2169           CASE_CONVERT:
2170             if (is_bool)
2171               {
2172                 if ((TYPE_PRECISION (exp_type) == 1
2173                        || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2174                       && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2175                     return;
2176               }
2177             else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2178               {
2179                 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2180                     is_bool = true;
2181                 else
2182                     return;
2183               }
2184             else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2185               is_bool = true;
2186             goto do_default;
2187           case EQ_EXPR:
2188           case NE_EXPR:
2189           case LT_EXPR:
2190           case LE_EXPR:
2191           case GE_EXPR:
2192           case GT_EXPR:
2193             is_bool = true;
2194             /* FALLTHRU */
2195           default:
2196             if (!is_bool)
2197               return;
2198           do_default:
2199             nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2200                                           &low, &high, &in_p,
2201                                           &strict_overflow_p);
2202             if (nexp != NULL_TREE)
2203               {
2204                 exp = nexp;
2205                 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2206                 continue;
2207               }
2208             break;
2209           }
2210       break;
2211     }
2212   if (is_bool)
2213     {
2214       r->exp = exp;
2215       r->in_p = in_p;
2216       r->low = low;
2217       r->high = high;
2218       r->strict_overflow_p = strict_overflow_p;
2219     }
2220 }
2221 
2222 /* Comparison function for qsort.  Sort entries
2223    without SSA_NAME exp first, then with SSA_NAMEs sorted
2224    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2225    by increasing ->low and if ->low is the same, by increasing
2226    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2227    maximum.  */
2228 
2229 static int
range_entry_cmp(const void * a,const void * b)2230 range_entry_cmp (const void *a, const void *b)
2231 {
2232   const struct range_entry *p = (const struct range_entry *) a;
2233   const struct range_entry *q = (const struct range_entry *) b;
2234 
2235   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2236     {
2237       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2238           {
2239             /* Group range_entries for the same SSA_NAME together.  */
2240             if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2241               return -1;
2242             else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2243               return 1;
2244             /* If ->low is different, NULL low goes first, then by
2245                ascending low.  */
2246             if (p->low != NULL_TREE)
2247               {
2248                 if (q->low != NULL_TREE)
2249                     {
2250                       tree tem = fold_binary (LT_EXPR, boolean_type_node,
2251                                                     p->low, q->low);
2252                       if (tem && integer_onep (tem))
2253                         return -1;
2254                       tem = fold_binary (GT_EXPR, boolean_type_node,
2255                                              p->low, q->low);
2256                       if (tem && integer_onep (tem))
2257                         return 1;
2258                     }
2259                 else
2260                     return 1;
2261               }
2262             else if (q->low != NULL_TREE)
2263               return -1;
2264             /* If ->high is different, NULL high goes last, before that by
2265                ascending high.  */
2266             if (p->high != NULL_TREE)
2267               {
2268                 if (q->high != NULL_TREE)
2269                     {
2270                       tree tem = fold_binary (LT_EXPR, boolean_type_node,
2271                                                     p->high, q->high);
2272                       if (tem && integer_onep (tem))
2273                         return -1;
2274                       tem = fold_binary (GT_EXPR, boolean_type_node,
2275                                              p->high, q->high);
2276                       if (tem && integer_onep (tem))
2277                         return 1;
2278                     }
2279                 else
2280                     return -1;
2281               }
2282             else if (q->high != NULL_TREE)
2283               return 1;
2284             /* If both ranges are the same, sort below by ascending idx.  */
2285           }
2286       else
2287           return 1;
2288     }
2289   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2290     return -1;
2291 
2292   if (p->idx < q->idx)
2293     return -1;
2294   else
2295     {
2296       gcc_checking_assert (p->idx > q->idx);
2297       return 1;
2298     }
2299 }
2300 
2301 /* Helper function for update_range_test.  Force EXPR into an SSA_NAME,
2302    insert needed statements BEFORE or after GSI.  */
2303 
2304 static tree
force_into_ssa_name(gimple_stmt_iterator * gsi,tree expr,bool before)2305 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2306 {
2307   enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2308   tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2309   if (TREE_CODE (ret) != SSA_NAME)
2310     {
2311       gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2312       if (before)
2313           gsi_insert_before (gsi, g, GSI_SAME_STMT);
2314       else
2315           gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2316       ret = gimple_assign_lhs (g);
2317     }
2318   return ret;
2319 }
2320 
2321 /* Helper routine of optimize_range_test.
2322    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2323    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2324    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2325    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2326    an array of COUNT pointers to other ranges.  Return
2327    true if the range merge has been successful.
2328    If OPCODE is ERROR_MARK, this is called from within
2329    maybe_optimize_range_tests and is performing inter-bb range optimization.
2330    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2331    oe->rank.  */
2332 
2333 static bool
update_range_test(struct range_entry * range,struct range_entry * otherrange,struct range_entry ** otherrangep,unsigned int count,enum tree_code opcode,vec<operand_entry * > * ops,tree exp,gimple_seq seq,bool in_p,tree low,tree high,bool strict_overflow_p)2334 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2335                        struct range_entry **otherrangep,
2336                        unsigned int count, enum tree_code opcode,
2337                        vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2338                        bool in_p, tree low, tree high, bool strict_overflow_p)
2339 {
2340   operand_entry *oe = (*ops)[range->idx];
2341   tree op = oe->op;
2342   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2343                         : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2344   location_t loc = gimple_location (stmt);
2345   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2346   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2347                                         in_p, low, high);
2348   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2349   gimple_stmt_iterator gsi;
2350   unsigned int i, uid;
2351 
2352   if (tem == NULL_TREE)
2353     return false;
2354 
2355   /* If op is default def SSA_NAME, there is no place to insert the
2356      new comparison.  Give up, unless we can use OP itself as the
2357      range test.  */
2358   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2359     {
2360       if (op == range->exp
2361             && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2362                 || TREE_CODE (optype) == BOOLEAN_TYPE)
2363             && (op == tem
2364                 || (TREE_CODE (tem) == EQ_EXPR
2365                       && TREE_OPERAND (tem, 0) == op
2366                       && integer_onep (TREE_OPERAND (tem, 1))))
2367             && opcode != BIT_IOR_EXPR
2368             && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2369           {
2370             stmt = NULL;
2371             tem = op;
2372           }
2373       else
2374           return false;
2375     }
2376 
2377   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2378     warning_at (loc, OPT_Wstrict_overflow,
2379                     "assuming signed overflow does not occur "
2380                     "when simplifying range test");
2381 
2382   if (dump_file && (dump_flags & TDF_DETAILS))
2383     {
2384       struct range_entry *r;
2385       fprintf (dump_file, "Optimizing range tests ");
2386       print_generic_expr (dump_file, range->exp);
2387       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2388       print_generic_expr (dump_file, range->low);
2389       fprintf (dump_file, ", ");
2390       print_generic_expr (dump_file, range->high);
2391       fprintf (dump_file, "]");
2392       for (i = 0; i < count; i++)
2393           {
2394             if (otherrange)
2395               r = otherrange + i;
2396             else
2397               r = otherrangep[i];
2398             if (r->exp
2399                 && r->exp != range->exp
2400                 && TREE_CODE (r->exp) == SSA_NAME)
2401               {
2402                 fprintf (dump_file, " and ");
2403                 print_generic_expr (dump_file, r->exp);
2404               }
2405             else
2406               fprintf (dump_file, " and");
2407             fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2408             print_generic_expr (dump_file, r->low);
2409             fprintf (dump_file, ", ");
2410             print_generic_expr (dump_file, r->high);
2411             fprintf (dump_file, "]");
2412           }
2413       fprintf (dump_file, "\n into ");
2414       print_generic_expr (dump_file, tem);
2415       fprintf (dump_file, "\n");
2416     }
2417 
2418   if (opcode == BIT_IOR_EXPR
2419       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2420     tem = invert_truthvalue_loc (loc, tem);
2421 
2422   tem = fold_convert_loc (loc, optype, tem);
2423   if (stmt)
2424     {
2425       gsi = gsi_for_stmt (stmt);
2426       uid = gimple_uid (stmt);
2427     }
2428   else
2429     {
2430       gsi = gsi_none ();
2431       uid = 0;
2432     }
2433   if (stmt == NULL)
2434     gcc_checking_assert (tem == op);
2435   /* In rare cases range->exp can be equal to lhs of stmt.
2436      In that case we have to insert after the stmt rather then before
2437      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2438   else if (op != range->exp)
2439     {
2440       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2441       tem = force_into_ssa_name (&gsi, tem, true);
2442       gsi_prev (&gsi);
2443     }
2444   else if (gimple_code (stmt) != GIMPLE_PHI)
2445     {
2446       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2447       tem = force_into_ssa_name (&gsi, tem, false);
2448     }
2449   else
2450     {
2451       gsi = gsi_after_labels (gimple_bb (stmt));
2452       if (!gsi_end_p (gsi))
2453           uid = gimple_uid (gsi_stmt (gsi));
2454       else
2455           {
2456             gsi = gsi_start_bb (gimple_bb (stmt));
2457             uid = 1;
2458             while (!gsi_end_p (gsi))
2459               {
2460                 uid = gimple_uid (gsi_stmt (gsi));
2461                 gsi_next (&gsi);
2462               }
2463           }
2464       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2465       tem = force_into_ssa_name (&gsi, tem, true);
2466       if (gsi_end_p (gsi))
2467           gsi = gsi_last_bb (gimple_bb (stmt));
2468       else
2469           gsi_prev (&gsi);
2470     }
2471   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2472     if (gimple_uid (gsi_stmt (gsi)))
2473       break;
2474     else
2475       gimple_set_uid (gsi_stmt (gsi), uid);
2476 
2477   oe->op = tem;
2478   range->exp = exp;
2479   range->low = low;
2480   range->high = high;
2481   range->in_p = in_p;
2482   range->strict_overflow_p = false;
2483 
2484   for (i = 0; i < count; i++)
2485     {
2486       if (otherrange)
2487           range = otherrange + i;
2488       else
2489           range = otherrangep[i];
2490       oe = (*ops)[range->idx];
2491       /* Now change all the other range test immediate uses, so that
2492            those tests will be optimized away.  */
2493       if (opcode == ERROR_MARK)
2494           {
2495             if (oe->op)
2496               oe->op = build_int_cst (TREE_TYPE (oe->op),
2497                                             oe->rank == BIT_IOR_EXPR ? 0 : 1);
2498             else
2499               oe->op = (oe->rank == BIT_IOR_EXPR
2500                           ? boolean_false_node : boolean_true_node);
2501           }
2502       else
2503           oe->op = error_mark_node;
2504       range->exp = NULL_TREE;
2505       range->low = NULL_TREE;
2506       range->high = NULL_TREE;
2507     }
2508   return true;
2509 }
2510 
2511 /* Optimize X == CST1 || X == CST2
2512    if popcount (CST1 ^ CST2) == 1 into
2513    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2514    Similarly for ranges.  E.g.
2515    X != 2 && X != 3 && X != 10 && X != 11
2516    will be transformed by the previous optimization into
2517    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2518    and this loop can transform that into
2519    !(((X & ~8) - 2U) <= 1U).  */
2520 
2521 static bool
optimize_range_tests_xor(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2522 optimize_range_tests_xor (enum tree_code opcode, tree type,
2523                                 tree lowi, tree lowj, tree highi, tree highj,
2524                                 vec<operand_entry *> *ops,
2525                                 struct range_entry *rangei,
2526                                 struct range_entry *rangej)
2527 {
2528   tree lowxor, highxor, tem, exp;
2529   /* Check lowi ^ lowj == highi ^ highj and
2530      popcount (lowi ^ lowj) == 1.  */
2531   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2532   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2533     return false;
2534   if (!integer_pow2p (lowxor))
2535     return false;
2536   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2537   if (!tree_int_cst_equal (lowxor, highxor))
2538     return false;
2539 
2540   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2541   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2542   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2543   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2544   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2545                                NULL, rangei->in_p, lowj, highj,
2546                                rangei->strict_overflow_p
2547                                || rangej->strict_overflow_p))
2548     return true;
2549   return false;
2550 }
2551 
2552 /* Optimize X == CST1 || X == CST2
2553    if popcount (CST2 - CST1) == 1 into
2554    ((X - CST1) & ~(CST2 - CST1)) == 0.
2555    Similarly for ranges.  E.g.
2556    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2557    || X == 75 || X == 45
2558    will be transformed by the previous optimization into
2559    (X - 43U) <= 3U || (X - 75U) <= 3U
2560    and this loop can transform that into
2561    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2562 static bool
optimize_range_tests_diff(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2563 optimize_range_tests_diff (enum tree_code opcode, tree type,
2564                                  tree lowi, tree lowj, tree highi, tree highj,
2565                                  vec<operand_entry *> *ops,
2566                                  struct range_entry *rangei,
2567                                  struct range_entry *rangej)
2568 {
2569   tree tem1, tem2, mask;
2570   /* Check highi - lowi == highj - lowj.  */
2571   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2572   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2573     return false;
2574   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2575   if (!tree_int_cst_equal (tem1, tem2))
2576     return false;
2577   /* Check popcount (lowj - lowi) == 1.  */
2578   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2579   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2580     return false;
2581   if (!integer_pow2p (tem1))
2582     return false;
2583 
2584   type = unsigned_type_for (type);
2585   tem1 = fold_convert (type, tem1);
2586   tem2 = fold_convert (type, tem2);
2587   lowi = fold_convert (type, lowi);
2588   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2589   tem1 = fold_build2 (MINUS_EXPR, type,
2590                           fold_convert (type, rangei->exp), lowi);
2591   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2592   lowj = build_int_cst (type, 0);
2593   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2594                                NULL, rangei->in_p, lowj, tem2,
2595                                rangei->strict_overflow_p
2596                                || rangej->strict_overflow_p))
2597     return true;
2598   return false;
2599 }
2600 
2601 /* It does some common checks for function optimize_range_tests_xor and
2602    optimize_range_tests_diff.
2603    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2604    Else it calls optimize_range_tests_diff.  */
2605 
2606 static bool
optimize_range_tests_1(enum tree_code opcode,int first,int length,bool optimize_xor,vec<operand_entry * > * ops,struct range_entry * ranges)2607 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2608                               bool optimize_xor, vec<operand_entry *> *ops,
2609                               struct range_entry *ranges)
2610 {
2611   int i, j;
2612   bool any_changes = false;
2613   for (i = first; i < length; i++)
2614     {
2615       tree lowi, highi, lowj, highj, type, tem;
2616 
2617       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2618           continue;
2619       type = TREE_TYPE (ranges[i].exp);
2620       if (!INTEGRAL_TYPE_P (type))
2621           continue;
2622       lowi = ranges[i].low;
2623       if (lowi == NULL_TREE)
2624           lowi = TYPE_MIN_VALUE (type);
2625       highi = ranges[i].high;
2626       if (highi == NULL_TREE)
2627           continue;
2628       for (j = i + 1; j < length && j < i + 64; j++)
2629           {
2630             bool changes;
2631             if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2632               continue;
2633             lowj = ranges[j].low;
2634             if (lowj == NULL_TREE)
2635               continue;
2636             highj = ranges[j].high;
2637             if (highj == NULL_TREE)
2638               highj = TYPE_MAX_VALUE (type);
2639             /* Check lowj > highi.  */
2640             tem = fold_binary (GT_EXPR, boolean_type_node,
2641                                    lowj, highi);
2642             if (tem == NULL_TREE || !integer_onep (tem))
2643               continue;
2644             if (optimize_xor)
2645               changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2646                                                             highi, highj, ops,
2647                                                             ranges + i, ranges + j);
2648             else
2649               changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2650                                                              highi, highj, ops,
2651                                                              ranges + i, ranges + j);
2652             if (changes)
2653               {
2654                 any_changes = true;
2655                 break;
2656               }
2657           }
2658     }
2659   return any_changes;
2660 }
2661 
2662 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
2663    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2664    EXP on success, NULL otherwise.  */
2665 
2666 static tree
extract_bit_test_mask(tree exp,int prec,tree totallow,tree low,tree high,wide_int * mask,tree * totallowp)2667 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2668                            wide_int *mask, tree *totallowp)
2669 {
2670   tree tem = int_const_binop (MINUS_EXPR, high, low);
2671   if (tem == NULL_TREE
2672       || TREE_CODE (tem) != INTEGER_CST
2673       || TREE_OVERFLOW (tem)
2674       || tree_int_cst_sgn (tem) == -1
2675       || compare_tree_int (tem, prec) != -1)
2676     return NULL_TREE;
2677 
2678   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2679   *mask = wi::shifted_mask (0, max, false, prec);
2680   if (TREE_CODE (exp) == BIT_AND_EXPR
2681       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2682     {
2683       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2684       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2685       if (wi::popcount (msk) == 1
2686             && wi::ltu_p (msk, prec - max))
2687           {
2688             *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2689             max += msk.to_uhwi ();
2690             exp = TREE_OPERAND (exp, 0);
2691             if (integer_zerop (low)
2692                 && TREE_CODE (exp) == PLUS_EXPR
2693                 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2694               {
2695                 tree ret = TREE_OPERAND (exp, 0);
2696                 STRIP_NOPS (ret);
2697                 widest_int bias
2698                   = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2699                                              TYPE_PRECISION (TREE_TYPE (low))));
2700                 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2701                 if (totallowp)
2702                     {
2703                       *totallowp = tbias;
2704                       return ret;
2705                     }
2706                 else if (!tree_int_cst_lt (totallow, tbias))
2707                     return NULL_TREE;
2708                 bias = wi::to_widest (tbias);
2709                 bias -= wi::to_widest (totallow);
2710                 if (bias >= 0 && bias < prec - max)
2711                     {
2712                       *mask = wi::lshift (*mask, bias);
2713                       return ret;
2714                     }
2715               }
2716           }
2717     }
2718   if (totallowp)
2719     return exp;
2720   if (!tree_int_cst_lt (totallow, low))
2721     return exp;
2722   tem = int_const_binop (MINUS_EXPR, low, totallow);
2723   if (tem == NULL_TREE
2724       || TREE_CODE (tem) != INTEGER_CST
2725       || TREE_OVERFLOW (tem)
2726       || compare_tree_int (tem, prec - max) == 1)
2727     return NULL_TREE;
2728 
2729   *mask = wi::lshift (*mask, wi::to_widest (tem));
2730   return exp;
2731 }
2732 
2733 /* Attempt to optimize small range tests using bit test.
2734    E.g.
2735    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2736    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2737    has been by earlier optimizations optimized into:
2738    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2739    As all the 43 through 82 range is less than 64 numbers,
2740    for 64-bit word targets optimize that into:
2741    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
2742 
2743 static bool
optimize_range_tests_to_bit_test(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)2744 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2745                                           vec<operand_entry *> *ops,
2746                                           struct range_entry *ranges)
2747 {
2748   int i, j;
2749   bool any_changes = false;
2750   int prec = GET_MODE_BITSIZE (word_mode);
2751   auto_vec<struct range_entry *, 64> candidates;
2752 
2753   for (i = first; i < length - 2; i++)
2754     {
2755       tree lowi, highi, lowj, highj, type;
2756 
2757       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2758           continue;
2759       type = TREE_TYPE (ranges[i].exp);
2760       if (!INTEGRAL_TYPE_P (type))
2761           continue;
2762       lowi = ranges[i].low;
2763       if (lowi == NULL_TREE)
2764           lowi = TYPE_MIN_VALUE (type);
2765       highi = ranges[i].high;
2766       if (highi == NULL_TREE)
2767           continue;
2768       wide_int mask;
2769       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2770                                                   highi, &mask, &lowi);
2771       if (exp == NULL_TREE)
2772           continue;
2773       bool strict_overflow_p = ranges[i].strict_overflow_p;
2774       candidates.truncate (0);
2775       int end = MIN (i + 64, length);
2776       for (j = i + 1; j < end; j++)
2777           {
2778             tree exp2;
2779             if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2780               continue;
2781             if (ranges[j].exp == exp)
2782               ;
2783             else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2784               {
2785                 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2786                 if (exp2 == exp)
2787                     ;
2788                 else if (TREE_CODE (exp2) == PLUS_EXPR)
2789                     {
2790                       exp2 = TREE_OPERAND (exp2, 0);
2791                       STRIP_NOPS (exp2);
2792                       if (exp2 != exp)
2793                         continue;
2794                     }
2795                 else
2796                     continue;
2797               }
2798             else
2799               continue;
2800             lowj = ranges[j].low;
2801             if (lowj == NULL_TREE)
2802               continue;
2803             highj = ranges[j].high;
2804             if (highj == NULL_TREE)
2805               highj = TYPE_MAX_VALUE (type);
2806             wide_int mask2;
2807             exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2808                                                   highj, &mask2, NULL);
2809             if (exp2 != exp)
2810               continue;
2811             mask |= mask2;
2812             strict_overflow_p |= ranges[j].strict_overflow_p;
2813             candidates.safe_push (&ranges[j]);
2814           }
2815 
2816       /* If we need otherwise 3 or more comparisons, use a bit test.  */
2817       if (candidates.length () >= 2)
2818           {
2819             tree high = wide_int_to_tree (TREE_TYPE (lowi),
2820                                                   wi::to_widest (lowi)
2821                                                   + prec - 1 - wi::clz (mask));
2822             operand_entry *oe = (*ops)[ranges[i].idx];
2823             tree op = oe->op;
2824             gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2825                                   : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2826             location_t loc = gimple_location (stmt);
2827             tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2828 
2829             /* See if it isn't cheaper to pretend the minimum value of the
2830                range is 0, if maximum value is small enough.
2831                We can avoid then subtraction of the minimum value, but the
2832                mask constant could be perhaps more expensive.  */
2833             if (compare_tree_int (lowi, 0) > 0
2834                 && compare_tree_int (high, prec) < 0)
2835               {
2836                 int cost_diff;
2837                 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2838                 rtx reg = gen_raw_REG (word_mode, 10000);
2839                 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2840                 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2841                                                                   GEN_INT (-m)), speed_p);
2842                 rtx r = immed_wide_int_const (mask, word_mode);
2843                 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2844                                                    word_mode, speed_p);
2845                 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2846                 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2847                                                    word_mode, speed_p);
2848                 if (cost_diff > 0)
2849                     {
2850                       mask = wi::lshift (mask, m);
2851                       lowi = build_zero_cst (TREE_TYPE (lowi));
2852                     }
2853               }
2854 
2855             tree tem = build_range_check (loc, optype, unshare_expr (exp),
2856                                                   false, lowi, high);
2857             if (tem == NULL_TREE || is_gimple_val (tem))
2858               continue;
2859             tree etype = unsigned_type_for (TREE_TYPE (exp));
2860             exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2861                                          fold_convert_loc (loc, etype, exp),
2862                                          fold_convert_loc (loc, etype, lowi));
2863             exp = fold_convert_loc (loc, integer_type_node, exp);
2864             tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2865             exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2866                                          build_int_cst (word_type, 1), exp);
2867             exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2868                                          wide_int_to_tree (word_type, mask));
2869             exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2870                                          build_zero_cst (word_type));
2871             if (is_gimple_val (exp))
2872               continue;
2873 
2874             /* The shift might have undefined behavior if TEM is true,
2875                but reassociate_bb isn't prepared to have basic blocks
2876                split when it is running.  So, temporarily emit a code
2877                with BIT_IOR_EXPR instead of &&, and fix it up in
2878                branch_fixup.  */
2879             gimple_seq seq;
2880             tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2881             gcc_assert (TREE_CODE (tem) == SSA_NAME);
2882             gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2883             gimple_seq seq2;
2884             exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2885             gimple_seq_add_seq_without_update (&seq, seq2);
2886             gcc_assert (TREE_CODE (exp) == SSA_NAME);
2887             gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2888             gimple *g = gimple_build_assign (make_ssa_name (optype),
2889                                                      BIT_IOR_EXPR, tem, exp);
2890             gimple_set_location (g, loc);
2891             gimple_seq_add_stmt_without_update (&seq, g);
2892             exp = gimple_assign_lhs (g);
2893             tree val = build_zero_cst (optype);
2894             if (update_range_test (&ranges[i], NULL, candidates.address (),
2895                                          candidates.length (), opcode, ops, exp,
2896                                          seq, false, val, val, strict_overflow_p))
2897               {
2898                 any_changes = true;
2899                 reassoc_branch_fixups.safe_push (tem);
2900               }
2901             else
2902               gimple_seq_discard (seq);
2903           }
2904     }
2905   return any_changes;
2906 }
2907 
2908 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
2909    and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.  */
2910 
2911 static bool
optimize_range_tests_cmp_bitwise(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)2912 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
2913                                           vec<operand_entry *> *ops,
2914                                           struct range_entry *ranges)
2915 {
2916   int i;
2917   unsigned int b;
2918   bool any_changes = false;
2919   auto_vec<int, 128> buckets;
2920   auto_vec<int, 32> chains;
2921   auto_vec<struct range_entry *, 32> candidates;
2922 
2923   for (i = first; i < length; i++)
2924     {
2925       if (ranges[i].exp == NULL_TREE
2926             || TREE_CODE (ranges[i].exp) != SSA_NAME
2927             || !ranges[i].in_p
2928             || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
2929             || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
2930             || ranges[i].low == NULL_TREE
2931             || ranges[i].low != ranges[i].high)
2932           continue;
2933 
2934       bool zero_p = integer_zerop (ranges[i].low);
2935       if (!zero_p && !integer_all_onesp (ranges[i].low))
2936           continue;
2937 
2938       b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
2939       if (buckets.length () <= b)
2940           buckets.safe_grow_cleared (b + 1);
2941       if (chains.length () <= (unsigned) i)
2942           chains.safe_grow (i + 1);
2943       chains[i] = buckets[b];
2944       buckets[b] = i + 1;
2945     }
2946 
2947   FOR_EACH_VEC_ELT (buckets, b, i)
2948     if (i && chains[i - 1])
2949       {
2950           int j, k = i;
2951           for (j = chains[i - 1]; j; j = chains[j - 1])
2952             {
2953               gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
2954               gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
2955               if (reassoc_stmt_dominates_stmt_p (gk, gj))
2956                 k = j;
2957             }
2958           tree type1 = TREE_TYPE (ranges[k - 1].exp);
2959           tree type2 = NULL_TREE;
2960           bool strict_overflow_p = false;
2961           candidates.truncate (0);
2962           for (j = i; j; j = chains[j - 1])
2963             {
2964               tree type = TREE_TYPE (ranges[j - 1].exp);
2965               strict_overflow_p |= ranges[j - 1].strict_overflow_p;
2966               if (j == k
2967                     || useless_type_conversion_p (type1, type))
2968                 ;
2969               else if (type2 == NULL_TREE
2970                          || useless_type_conversion_p (type2, type))
2971                 {
2972                     if (type2 == NULL_TREE)
2973                       type2 = type;
2974                     candidates.safe_push (&ranges[j - 1]);
2975                 }
2976             }
2977           unsigned l = candidates.length ();
2978           for (j = i; j; j = chains[j - 1])
2979             {
2980               tree type = TREE_TYPE (ranges[j - 1].exp);
2981               if (j == k)
2982                 continue;
2983               if (useless_type_conversion_p (type1, type))
2984                 ;
2985               else if (type2 == NULL_TREE
2986                          || useless_type_conversion_p (type2, type))
2987                 continue;
2988               candidates.safe_push (&ranges[j - 1]);
2989             }
2990           gimple_seq seq = NULL;
2991           tree op = NULL_TREE;
2992           unsigned int id;
2993           struct range_entry *r;
2994           candidates.safe_push (&ranges[k - 1]);
2995           FOR_EACH_VEC_ELT (candidates, id, r)
2996             {
2997               gimple *g;
2998               if (id == 0)
2999                 {
3000                     op = r->exp;
3001                     continue;
3002                 }
3003               if (id == l)
3004                 {
3005                     g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3006                     gimple_seq_add_stmt_without_update (&seq, g);
3007                     op = gimple_assign_lhs (g);
3008                 }
3009               tree type = TREE_TYPE (r->exp);
3010               tree exp = r->exp;
3011               if (id >= l && !useless_type_conversion_p (type1, type))
3012                 {
3013                     g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3014                     gimple_seq_add_stmt_without_update (&seq, g);
3015                     exp = gimple_assign_lhs (g);
3016                 }
3017               g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3018                                              (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3019                                              op, exp);
3020               gimple_seq_add_stmt_without_update (&seq, g);
3021               op = gimple_assign_lhs (g);
3022             }
3023           candidates.pop ();
3024           if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3025                                      candidates.length (), opcode, ops, op,
3026                                      seq, true, ranges[k - 1].low,
3027                                      ranges[k - 1].low, strict_overflow_p))
3028             any_changes = true;
3029           else
3030             gimple_seq_discard (seq);
3031       }
3032 
3033   return any_changes;
3034 }
3035 
3036 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3037    a >= 0 && a < b into (unsigned) a < (unsigned) b
3038    a >= 0 && a <= b into (unsigned) a <= (unsigned) b  */
3039 
3040 static bool
optimize_range_tests_var_bound(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges,basic_block first_bb)3041 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3042                                         vec<operand_entry *> *ops,
3043                                         struct range_entry *ranges,
3044                                         basic_block first_bb)
3045 {
3046   int i;
3047   bool any_changes = false;
3048   hash_map<tree, int> *map = NULL;
3049 
3050   for (i = first; i < length; i++)
3051     {
3052       if (ranges[i].exp == NULL_TREE
3053             || TREE_CODE (ranges[i].exp) != SSA_NAME
3054             || !ranges[i].in_p)
3055           continue;
3056 
3057       tree type = TREE_TYPE (ranges[i].exp);
3058       if (!INTEGRAL_TYPE_P (type)
3059             || TYPE_UNSIGNED (type)
3060             || ranges[i].low == NULL_TREE
3061             || !integer_zerop (ranges[i].low)
3062             || ranges[i].high != NULL_TREE)
3063           continue;
3064       /* EXP >= 0 here.  */
3065       if (map == NULL)
3066           map = new hash_map <tree, int>;
3067       map->put (ranges[i].exp, i);
3068     }
3069 
3070   if (map == NULL)
3071     return false;
3072 
3073   for (i = 0; i < length; i++)
3074     {
3075       bool in_p = ranges[i].in_p;
3076       if (ranges[i].low == NULL_TREE
3077             || ranges[i].high == NULL_TREE)
3078           continue;
3079       if (!integer_zerop (ranges[i].low)
3080             || !integer_zerop (ranges[i].high))
3081           {
3082             if (ranges[i].exp
3083                 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3084                 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3085                 && integer_onep (ranges[i].low)
3086                 && integer_onep (ranges[i].high))
3087               in_p = !in_p;
3088             else
3089               continue;
3090           }
3091 
3092       gimple *stmt;
3093       tree_code ccode;
3094       tree rhs1, rhs2;
3095       if (ranges[i].exp)
3096           {
3097             if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3098               continue;
3099             stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3100             if (!is_gimple_assign (stmt))
3101               continue;
3102             ccode = gimple_assign_rhs_code (stmt);
3103             rhs1 = gimple_assign_rhs1 (stmt);
3104             rhs2 = gimple_assign_rhs2 (stmt);
3105           }
3106       else
3107           {
3108             operand_entry *oe = (*ops)[ranges[i].idx];
3109             stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3110             if (gimple_code (stmt) != GIMPLE_COND)
3111               continue;
3112             ccode = gimple_cond_code (stmt);
3113             rhs1 = gimple_cond_lhs (stmt);
3114             rhs2 = gimple_cond_rhs (stmt);
3115           }
3116 
3117       if (TREE_CODE (rhs1) != SSA_NAME
3118             || rhs2 == NULL_TREE
3119             || TREE_CODE (rhs2) != SSA_NAME)
3120           continue;
3121 
3122       switch (ccode)
3123           {
3124           case GT_EXPR:
3125           case GE_EXPR:
3126           case LT_EXPR:
3127           case LE_EXPR:
3128             break;
3129           default:
3130             continue;
3131           }
3132       if (in_p)
3133           ccode = invert_tree_comparison (ccode, false);
3134       switch (ccode)
3135           {
3136           case GT_EXPR:
3137           case GE_EXPR:
3138             std::swap (rhs1, rhs2);
3139             ccode = swap_tree_comparison (ccode);
3140             break;
3141           case LT_EXPR:
3142           case LE_EXPR:
3143             break;
3144           default:
3145             gcc_unreachable ();
3146           }
3147 
3148       int *idx = map->get (rhs1);
3149       if (idx == NULL)
3150           continue;
3151 
3152       /* maybe_optimize_range_tests allows statements without side-effects
3153            in the basic blocks as long as they are consumed in the same bb.
3154            Make sure rhs2's def stmt is not among them, otherwise we can't
3155            use safely get_nonzero_bits on it.  E.g. in:
3156             # RANGE [-83, 1] NONZERO 173
3157             # k_32 = PHI <k_47(13), k_12(9)>
3158            ...
3159             if (k_32 >= 0)
3160               goto <bb 5>; [26.46%]
3161             else
3162               goto <bb 9>; [73.54%]
3163 
3164             <bb 5> [local count: 140323371]:
3165             # RANGE [0, 1] NONZERO 1
3166             _5 = (int) k_32;
3167             # RANGE [0, 4] NONZERO 4
3168             _21 = _5 << 2;
3169             # RANGE [0, 4] NONZERO 4
3170             iftmp.0_44 = (char) _21;
3171             if (k_32 < iftmp.0_44)
3172               goto <bb 6>; [84.48%]
3173             else
3174               goto <bb 9>; [15.52%]
3175            the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3176            k_32 >= 0.  If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3177            to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3178            those stmts even for negative k_32 and the value ranges would be no
3179            longer guaranteed and so the optimization would be invalid.  */
3180       if (opcode == ERROR_MARK)
3181           {
3182             gimple *g = SSA_NAME_DEF_STMT (rhs2);
3183             basic_block bb2 = gimple_bb (g);
3184             if (bb2
3185                 && bb2 != first_bb
3186                 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3187               {
3188                 /* As an exception, handle a few common cases.  */
3189                 if (gimple_assign_cast_p (g)
3190                       && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g)))
3191                       && TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g)))
3192                       && (TYPE_PRECISION (TREE_TYPE (rhs2))
3193                           > TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (g)))))
3194                     /* Zero-extension is always ok.  */ ;
3195                 else if (is_gimple_assign (g)
3196                            && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3197                            && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3198                            && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3199                     /* Masking with INTEGER_CST with MSB clear is always ok
3200                        too.  */ ;
3201                 else
3202                     continue;
3203               }
3204           }
3205 
3206       wide_int nz = get_nonzero_bits (rhs2);
3207       if (wi::neg_p (nz))
3208           continue;
3209 
3210       /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3211            and RHS2 is known to be RHS2 >= 0.  */
3212       tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3213 
3214       enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3215       if ((ranges[*idx].strict_overflow_p
3216              || ranges[i].strict_overflow_p)
3217             && issue_strict_overflow_warning (wc))
3218           warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3219                         "assuming signed overflow does not occur "
3220                         "when simplifying range test");
3221 
3222       if (dump_file && (dump_flags & TDF_DETAILS))
3223           {
3224             struct range_entry *r = &ranges[*idx];
3225             fprintf (dump_file, "Optimizing range test ");
3226             print_generic_expr (dump_file, r->exp);
3227             fprintf (dump_file, " +[");
3228             print_generic_expr (dump_file, r->low);
3229             fprintf (dump_file, ", ");
3230             print_generic_expr (dump_file, r->high);
3231             fprintf (dump_file, "] and comparison ");
3232             print_generic_expr (dump_file, rhs1);
3233             fprintf (dump_file, " %s ", op_symbol_code (ccode));
3234             print_generic_expr (dump_file, rhs2);
3235             fprintf (dump_file, "\n into (");
3236             print_generic_expr (dump_file, utype);
3237             fprintf (dump_file, ") ");
3238             print_generic_expr (dump_file, rhs1);
3239             fprintf (dump_file, " %s (", op_symbol_code (ccode));
3240             print_generic_expr (dump_file, utype);
3241             fprintf (dump_file, ") ");
3242             print_generic_expr (dump_file, rhs2);
3243             fprintf (dump_file, "\n");
3244           }
3245 
3246       operand_entry *oe = (*ops)[ranges[i].idx];
3247       ranges[i].in_p = 0;
3248       if (opcode == BIT_IOR_EXPR
3249             || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3250           {
3251             ranges[i].in_p = 1;
3252             ccode = invert_tree_comparison (ccode, false);
3253           }
3254 
3255       unsigned int uid = gimple_uid (stmt);
3256       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3257       gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3258       gimple_set_uid (g, uid);
3259       rhs1 = gimple_assign_lhs (g);
3260       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3261       g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3262       gimple_set_uid (g, uid);
3263       rhs2 = gimple_assign_lhs (g);
3264       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3265       if (tree_swap_operands_p (rhs1, rhs2))
3266           {
3267             std::swap (rhs1, rhs2);
3268             ccode = swap_tree_comparison (ccode);
3269           }
3270       if (gimple_code (stmt) == GIMPLE_COND)
3271           {
3272             gcond *c = as_a <gcond *> (stmt);
3273             gimple_cond_set_code (c, ccode);
3274             gimple_cond_set_lhs (c, rhs1);
3275             gimple_cond_set_rhs (c, rhs2);
3276             update_stmt (stmt);
3277           }
3278       else
3279           {
3280             tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3281             if (!INTEGRAL_TYPE_P (ctype)
3282                 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3283                       && TYPE_PRECISION (ctype) != 1))
3284               ctype = boolean_type_node;
3285             g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3286             gimple_set_uid (g, uid);
3287             gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3288             if (oe->op && ctype != TREE_TYPE (oe->op))
3289               {
3290                 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3291                                                NOP_EXPR, gimple_assign_lhs (g));
3292                 gimple_set_uid (g, uid);
3293                 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3294               }
3295             ranges[i].exp = gimple_assign_lhs (g);
3296             oe->op = ranges[i].exp;
3297             ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3298             ranges[i].high = ranges[i].low;
3299           }
3300       ranges[i].strict_overflow_p = false;
3301       oe = (*ops)[ranges[*idx].idx];
3302       /* Now change all the other range test immediate uses, so that
3303            those tests will be optimized away.  */
3304       if (opcode == ERROR_MARK)
3305           {
3306             if (oe->op)
3307               oe->op = build_int_cst (TREE_TYPE (oe->op),
3308                                             oe->rank == BIT_IOR_EXPR ? 0 : 1);
3309             else
3310               oe->op = (oe->rank == BIT_IOR_EXPR
3311                           ? boolean_false_node : boolean_true_node);
3312           }
3313       else
3314           oe->op = error_mark_node;
3315       ranges[*idx].exp = NULL_TREE;
3316       ranges[*idx].low = NULL_TREE;
3317       ranges[*idx].high = NULL_TREE;
3318       any_changes = true;
3319     }
3320 
3321   delete map;
3322   return any_changes;
3323 }
3324 
3325 /* Optimize range tests, similarly how fold_range_test optimizes
3326    it on trees.  The tree code for the binary
3327    operation between all the operands is OPCODE.
3328    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3329    maybe_optimize_range_tests for inter-bb range optimization.
3330    In that case if oe->op is NULL, oe->id is bb->index whose
3331    GIMPLE_COND is && or ||ed into the test, and oe->rank says
3332    the actual opcode.
3333    FIRST_BB is the first basic block if OPCODE is ERROR_MARK.  */
3334 
3335 static bool
optimize_range_tests(enum tree_code opcode,vec<operand_entry * > * ops,basic_block first_bb)3336 optimize_range_tests (enum tree_code opcode,
3337                           vec<operand_entry *> *ops, basic_block first_bb)
3338 {
3339   unsigned int length = ops->length (), i, j, first;
3340   operand_entry *oe;
3341   struct range_entry *ranges;
3342   bool any_changes = false;
3343 
3344   if (length == 1)
3345     return false;
3346 
3347   ranges = XNEWVEC (struct range_entry, length);
3348   for (i = 0; i < length; i++)
3349     {
3350       oe = (*ops)[i];
3351       ranges[i].idx = i;
3352       init_range_entry (ranges + i, oe->op,
3353                               oe->op
3354                               ? NULL
3355                               : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3356       /* For | invert it now, we will invert it again before emitting
3357            the optimized expression.  */
3358       if (opcode == BIT_IOR_EXPR
3359             || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3360           ranges[i].in_p = !ranges[i].in_p;
3361     }
3362 
3363   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3364   for (i = 0; i < length; i++)
3365     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3366       break;
3367 
3368   /* Try to merge ranges.  */
3369   for (first = i; i < length; i++)
3370     {
3371       tree low = ranges[i].low;
3372       tree high = ranges[i].high;
3373       int in_p = ranges[i].in_p;
3374       bool strict_overflow_p = ranges[i].strict_overflow_p;
3375       int update_fail_count = 0;
3376 
3377       for (j = i + 1; j < length; j++)
3378           {
3379             if (ranges[i].exp != ranges[j].exp)
3380               break;
3381             if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3382                                    ranges[j].in_p, ranges[j].low, ranges[j].high))
3383               break;
3384             strict_overflow_p |= ranges[j].strict_overflow_p;
3385           }
3386 
3387       if (j == i + 1)
3388           continue;
3389 
3390       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3391                                    opcode, ops, ranges[i].exp, NULL, in_p,
3392                                    low, high, strict_overflow_p))
3393           {
3394             i = j - 1;
3395             any_changes = true;
3396           }
3397       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3398            while update_range_test would fail.  */
3399       else if (update_fail_count == 64)
3400           i = j - 1;
3401       else
3402           ++update_fail_count;
3403     }
3404 
3405   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3406                                                    ops, ranges);
3407 
3408   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3409     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3410                                                      ops, ranges);
3411   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3412     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3413                                                                  ops, ranges);
3414   any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3415                                                                ops, ranges);
3416   any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3417                                                              ranges, first_bb);
3418 
3419   if (any_changes && opcode != ERROR_MARK)
3420     {
3421       j = 0;
3422       FOR_EACH_VEC_ELT (*ops, i, oe)
3423           {
3424             if (oe->op == error_mark_node)
3425               continue;
3426             else if (i != j)
3427               (*ops)[j] = oe;
3428             j++;
3429           }
3430       ops->truncate (j);
3431     }
3432 
3433   XDELETEVEC (ranges);
3434   return any_changes;
3435 }
3436 
3437 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3438    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
3439    otherwise the comparison code.  */
3440 
3441 static tree_code
ovce_extract_ops(tree var,gassign ** rets,bool * reti)3442 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3443 {
3444   if (TREE_CODE (var) != SSA_NAME)
3445     return ERROR_MARK;
3446 
3447   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3448   if (stmt == NULL)
3449     return ERROR_MARK;
3450 
3451   /* ??? If we start creating more COND_EXPR, we could perform
3452      this same optimization with them.  For now, simplify.  */
3453   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3454     return ERROR_MARK;
3455 
3456   tree cond = gimple_assign_rhs1 (stmt);
3457   tree_code cmp = TREE_CODE (cond);
3458   if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3459     return ERROR_MARK;
3460 
3461   /* ??? For now, allow only canonical true and false result vectors.
3462      We could expand this to other constants should the need arise,
3463      but at the moment we don't create them.  */
3464   tree t = gimple_assign_rhs2 (stmt);
3465   tree f = gimple_assign_rhs3 (stmt);
3466   bool inv;
3467   if (integer_all_onesp (t))
3468     inv = false;
3469   else if (integer_all_onesp (f))
3470     {
3471       cmp = invert_tree_comparison (cmp, false);
3472       inv = true;
3473     }
3474   else
3475     return ERROR_MARK;
3476   if (!integer_zerop (f))
3477     return ERROR_MARK;
3478 
3479   /* Success!  */
3480   if (rets)
3481     *rets = stmt;
3482   if (reti)
3483     *reti = inv;
3484   return cmp;
3485 }
3486 
3487 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3488    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
3489 
3490 static bool
optimize_vec_cond_expr(tree_code opcode,vec<operand_entry * > * ops)3491 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3492 {
3493   unsigned int length = ops->length (), i, j;
3494   bool any_changes = false;
3495 
3496   if (length == 1)
3497     return false;
3498 
3499   for (i = 0; i < length; ++i)
3500     {
3501       tree elt0 = (*ops)[i]->op;
3502 
3503       gassign *stmt0;
3504       bool invert;
3505       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3506       if (cmp0 == ERROR_MARK)
3507           continue;
3508 
3509       for (j = i + 1; j < length; ++j)
3510           {
3511             tree &elt1 = (*ops)[j]->op;
3512 
3513             gassign *stmt1;
3514             tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3515             if (cmp1 == ERROR_MARK)
3516               continue;
3517 
3518             tree cond0 = gimple_assign_rhs1 (stmt0);
3519             tree x0 = TREE_OPERAND (cond0, 0);
3520             tree y0 = TREE_OPERAND (cond0, 1);
3521 
3522             tree cond1 = gimple_assign_rhs1 (stmt1);
3523             tree x1 = TREE_OPERAND (cond1, 0);
3524             tree y1 = TREE_OPERAND (cond1, 1);
3525 
3526             tree comb;
3527             if (opcode == BIT_AND_EXPR)
3528               comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3529             else if (opcode == BIT_IOR_EXPR)
3530               comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3531             else
3532               gcc_unreachable ();
3533             if (comb == NULL)
3534               continue;
3535 
3536             /* Success! */
3537             if (dump_file && (dump_flags & TDF_DETAILS))
3538               {
3539                 fprintf (dump_file, "Transforming ");
3540                 print_generic_expr (dump_file, cond0);
3541                 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3542                 print_generic_expr (dump_file, cond1);
3543                 fprintf (dump_file, " into ");
3544                 print_generic_expr (dump_file, comb);
3545                 fputc ('\n', dump_file);
3546               }
3547 
3548             gimple_assign_set_rhs1 (stmt0, comb);
3549             if (invert)
3550               std::swap (*gimple_assign_rhs2_ptr (stmt0),
3551                            *gimple_assign_rhs3_ptr (stmt0));
3552             update_stmt (stmt0);
3553 
3554             elt1 = error_mark_node;
3555             any_changes = true;
3556           }
3557     }
3558 
3559   if (any_changes)
3560     {
3561       operand_entry *oe;
3562       j = 0;
3563       FOR_EACH_VEC_ELT (*ops, i, oe)
3564           {
3565             if (oe->op == error_mark_node)
3566               continue;
3567             else if (i != j)
3568               (*ops)[j] = oe;
3569             j++;
3570           }
3571       ops->truncate (j);
3572     }
3573 
3574   return any_changes;
3575 }
3576 
3577 /* Return true if STMT is a cast like:
3578    <bb N>:
3579    ...
3580    _123 = (int) _234;
3581 
3582    <bb M>:
3583    # _345 = PHI <_123(N), 1(...), 1(...)>
3584    where _234 has bool type, _123 has single use and
3585    bb N has a single successor M.  This is commonly used in
3586    the last block of a range test.
3587 
3588    Also Return true if STMT is tcc_compare like:
3589    <bb N>:
3590    ...
3591    _234 = a_2(D) == 2;
3592 
3593    <bb M>:
3594    # _345 = PHI <_234(N), 1(...), 1(...)>
3595    _346 = (int) _345;
3596    where _234 has booltype, single use and
3597    bb N has a single successor M.  This is commonly used in
3598    the last block of a range test.  */
3599 
3600 static bool
final_range_test_p(gimple * stmt)3601 final_range_test_p (gimple *stmt)
3602 {
3603   basic_block bb, rhs_bb, lhs_bb;
3604   edge e;
3605   tree lhs, rhs;
3606   use_operand_p use_p;
3607   gimple *use_stmt;
3608 
3609   if (!gimple_assign_cast_p (stmt)
3610       && (!is_gimple_assign (stmt)
3611             || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3612                 != tcc_comparison)))
3613     return false;
3614   bb = gimple_bb (stmt);
3615   if (!single_succ_p (bb))
3616     return false;
3617   e = single_succ_edge (bb);
3618   if (e->flags & EDGE_COMPLEX)
3619     return false;
3620 
3621   lhs = gimple_assign_lhs (stmt);
3622   rhs = gimple_assign_rhs1 (stmt);
3623   if (gimple_assign_cast_p (stmt)
3624       && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3625             || TREE_CODE (rhs) != SSA_NAME
3626             || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3627     return false;
3628 
3629   if (!gimple_assign_cast_p (stmt)
3630       && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3631       return false;
3632 
3633   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
3634   if (!single_imm_use (lhs, &use_p, &use_stmt))
3635     return false;
3636 
3637   if (gimple_code (use_stmt) != GIMPLE_PHI
3638       || gimple_bb (use_stmt) != e->dest)
3639     return false;
3640 
3641   /* And that the rhs is defined in the same loop.  */
3642   if (gimple_assign_cast_p (stmt))
3643     {
3644       if (TREE_CODE (rhs) != SSA_NAME
3645             || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3646             || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3647           return false;
3648     }
3649   else
3650     {
3651       if (TREE_CODE (lhs) != SSA_NAME
3652             || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3653             || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3654           return false;
3655     }
3656 
3657   return true;
3658 }
3659 
3660 /* Return true if BB is suitable basic block for inter-bb range test
3661    optimization.  If BACKWARD is true, BB should be the only predecessor
3662    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3663    or compared with to find a common basic block to which all conditions
3664    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
3665    be the only predecessor of BB.  */
3666 
3667 static bool
suitable_cond_bb(basic_block bb,basic_block test_bb,basic_block * other_bb,bool backward)3668 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3669                       bool backward)
3670 {
3671   edge_iterator ei, ei2;
3672   edge e, e2;
3673   gimple *stmt;
3674   gphi_iterator gsi;
3675   bool other_edge_seen = false;
3676   bool is_cond;
3677 
3678   if (test_bb == bb)
3679     return false;
3680   /* Check last stmt first.  */
3681   stmt = last_stmt (bb);
3682   if (stmt == NULL
3683       || (gimple_code (stmt) != GIMPLE_COND
3684             && (backward || !final_range_test_p (stmt)))
3685       || gimple_visited_p (stmt)
3686       || stmt_could_throw_p (stmt)
3687       || *other_bb == bb)
3688     return false;
3689   is_cond = gimple_code (stmt) == GIMPLE_COND;
3690   if (is_cond)
3691     {
3692       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3693            goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3694            to *OTHER_BB (if not set yet, try to find it out).  */
3695       if (EDGE_COUNT (bb->succs) != 2)
3696           return false;
3697       FOR_EACH_EDGE (e, ei, bb->succs)
3698           {
3699             if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3700               return false;
3701             if (e->dest == test_bb)
3702               {
3703                 if (backward)
3704                     continue;
3705                 else
3706                     return false;
3707               }
3708             if (e->dest == bb)
3709               return false;
3710             if (*other_bb == NULL)
3711               {
3712                 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3713                     if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3714                       return false;
3715                     else if (e->dest == e2->dest)
3716                       *other_bb = e->dest;
3717                 if (*other_bb == NULL)
3718                     return false;
3719               }
3720             if (e->dest == *other_bb)
3721               other_edge_seen = true;
3722             else if (backward)
3723               return false;
3724           }
3725       if (*other_bb == NULL || !other_edge_seen)
3726           return false;
3727     }
3728   else if (single_succ (bb) != *other_bb)
3729     return false;
3730 
3731   /* Now check all PHIs of *OTHER_BB.  */
3732   e = find_edge (bb, *other_bb);
3733   e2 = find_edge (test_bb, *other_bb);
3734   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3735     {
3736       gphi *phi = gsi.phi ();
3737       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3738            corresponding to BB and TEST_BB predecessor must be the same.  */
3739       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3740                                   gimple_phi_arg_def (phi, e2->dest_idx), 0))
3741           {
3742             /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3743                one of the PHIs should have the lhs of the last stmt in
3744                that block as PHI arg and that PHI should have 0 or 1
3745                corresponding to it in all other range test basic blocks
3746                considered.  */
3747             if (!is_cond)
3748               {
3749                 if (gimple_phi_arg_def (phi, e->dest_idx)
3750                       == gimple_assign_lhs (stmt)
3751                       && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3752                           || integer_onep (gimple_phi_arg_def (phi,
3753                                                                          e2->dest_idx))))
3754                     continue;
3755               }
3756             else
3757               {
3758                 gimple *test_last = last_stmt (test_bb);
3759                 if (gimple_code (test_last) != GIMPLE_COND
3760                       && gimple_phi_arg_def (phi, e2->dest_idx)
3761                          == gimple_assign_lhs (test_last)
3762                       && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3763                           || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3764                     continue;
3765               }
3766 
3767             return false;
3768           }
3769     }
3770   return true;
3771 }
3772 
3773 /* Return true if BB doesn't have side-effects that would disallow
3774    range test optimization, all SSA_NAMEs set in the bb are consumed
3775    in the bb and there are no PHIs.  */
3776 
3777 static bool
no_side_effect_bb(basic_block bb)3778 no_side_effect_bb (basic_block bb)
3779 {
3780   gimple_stmt_iterator gsi;
3781   gimple *last;
3782 
3783   if (!gimple_seq_empty_p (phi_nodes (bb)))
3784     return false;
3785   last = last_stmt (bb);
3786   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3787     {
3788       gimple *stmt = gsi_stmt (gsi);
3789       tree lhs;
3790       imm_use_iterator imm_iter;
3791       use_operand_p use_p;
3792 
3793       if (is_gimple_debug (stmt))
3794           continue;
3795       if (gimple_has_side_effects (stmt))
3796           return false;
3797       if (stmt == last)
3798           return true;
3799       if (!is_gimple_assign (stmt))
3800           return false;
3801       lhs = gimple_assign_lhs (stmt);
3802       if (TREE_CODE (lhs) != SSA_NAME)
3803           return false;
3804       if (gimple_assign_rhs_could_trap_p (stmt))
3805           return false;
3806       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3807           {
3808             gimple *use_stmt = USE_STMT (use_p);
3809             if (is_gimple_debug (use_stmt))
3810               continue;
3811             if (gimple_bb (use_stmt) != bb)
3812               return false;
3813           }
3814     }
3815   return false;
3816 }
3817 
3818 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3819    return true and fill in *OPS recursively.  */
3820 
3821 static bool
get_ops(tree var,enum tree_code code,vec<operand_entry * > * ops,struct loop * loop)3822 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3823            struct loop *loop)
3824 {
3825   gimple *stmt = SSA_NAME_DEF_STMT (var);
3826   tree rhs[2];
3827   int i;
3828 
3829   if (!is_reassociable_op (stmt, code, loop))
3830     return false;
3831 
3832   rhs[0] = gimple_assign_rhs1 (stmt);
3833   rhs[1] = gimple_assign_rhs2 (stmt);
3834   gimple_set_visited (stmt, true);
3835   for (i = 0; i < 2; i++)
3836     if (TREE_CODE (rhs[i]) == SSA_NAME
3837           && !get_ops (rhs[i], code, ops, loop)
3838           && has_single_use (rhs[i]))
3839       {
3840           operand_entry *oe = operand_entry_pool.allocate ();
3841 
3842           oe->op = rhs[i];
3843           oe->rank = code;
3844           oe->id = 0;
3845           oe->count = 1;
3846           oe->stmt_to_insert = NULL;
3847           ops->safe_push (oe);
3848       }
3849   return true;
3850 }
3851 
3852 /* Find the ops that were added by get_ops starting from VAR, see if
3853    they were changed during update_range_test and if yes, create new
3854    stmts.  */
3855 
3856 static tree
update_ops(tree var,enum tree_code code,vec<operand_entry * > ops,unsigned int * pidx,struct loop * loop)3857 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3858               unsigned int *pidx, struct loop *loop)
3859 {
3860   gimple *stmt = SSA_NAME_DEF_STMT (var);
3861   tree rhs[4];
3862   int i;
3863 
3864   if (!is_reassociable_op (stmt, code, loop))
3865     return NULL;
3866 
3867   rhs[0] = gimple_assign_rhs1 (stmt);
3868   rhs[1] = gimple_assign_rhs2 (stmt);
3869   rhs[2] = rhs[0];
3870   rhs[3] = rhs[1];
3871   for (i = 0; i < 2; i++)
3872     if (TREE_CODE (rhs[i]) == SSA_NAME)
3873       {
3874           rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3875           if (rhs[2 + i] == NULL_TREE)
3876             {
3877               if (has_single_use (rhs[i]))
3878                 rhs[2 + i] = ops[(*pidx)++]->op;
3879               else
3880                 rhs[2 + i] = rhs[i];
3881             }
3882       }
3883   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3884       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3885     {
3886       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3887       var = make_ssa_name (TREE_TYPE (var));
3888       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3889                                                   rhs[2], rhs[3]);
3890       gimple_set_uid (g, gimple_uid (stmt));
3891       gimple_set_visited (g, true);
3892       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3893     }
3894   return var;
3895 }
3896 
3897 /* Structure to track the initial value passed to get_ops and
3898    the range in the ops vector for each basic block.  */
3899 
3900 struct inter_bb_range_test_entry
3901 {
3902   tree op;
3903   unsigned int first_idx, last_idx;
3904 };
3905 
3906 /* Inter-bb range test optimization.
3907 
3908    Returns TRUE if a gimple conditional is optimized to a true/false,
3909    otherwise return FALSE.
3910 
3911    This indicates to the caller that it should run a CFG cleanup pass
3912    once reassociation is completed.  */
3913 
3914 static bool
maybe_optimize_range_tests(gimple * stmt)3915 maybe_optimize_range_tests (gimple *stmt)
3916 {
3917   basic_block first_bb = gimple_bb (stmt);
3918   basic_block last_bb = first_bb;
3919   basic_block other_bb = NULL;
3920   basic_block bb;
3921   edge_iterator ei;
3922   edge e;
3923   auto_vec<operand_entry *> ops;
3924   auto_vec<inter_bb_range_test_entry> bbinfo;
3925   bool any_changes = false;
3926   bool cfg_cleanup_needed = false;
3927 
3928   /* Consider only basic blocks that end with GIMPLE_COND or
3929      a cast statement satisfying final_range_test_p.  All
3930      but the last bb in the first_bb .. last_bb range
3931      should end with GIMPLE_COND.  */
3932   if (gimple_code (stmt) == GIMPLE_COND)
3933     {
3934       if (EDGE_COUNT (first_bb->succs) != 2)
3935           return cfg_cleanup_needed;
3936     }
3937   else if (final_range_test_p (stmt))
3938     other_bb = single_succ (first_bb);
3939   else
3940     return cfg_cleanup_needed;
3941 
3942   if (stmt_could_throw_p (stmt))
3943     return cfg_cleanup_needed;
3944 
3945   /* As relative ordering of post-dominator sons isn't fixed,
3946      maybe_optimize_range_tests can be called first on any
3947      bb in the range we want to optimize.  So, start searching
3948      backwards, if first_bb can be set to a predecessor.  */
3949   while (single_pred_p (first_bb))
3950     {
3951       basic_block pred_bb = single_pred (first_bb);
3952       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3953           break;
3954       if (!no_side_effect_bb (first_bb))
3955           break;
3956       first_bb = pred_bb;
3957     }
3958   /* If first_bb is last_bb, other_bb hasn't been computed yet.
3959      Before starting forward search in last_bb successors, find
3960      out the other_bb.  */
3961   if (first_bb == last_bb)
3962     {
3963       other_bb = NULL;
3964       /* As non-GIMPLE_COND last stmt always terminates the range,
3965            if forward search didn't discover anything, just give up.  */
3966       if (gimple_code (stmt) != GIMPLE_COND)
3967           return cfg_cleanup_needed;
3968       /* Look at both successors.  Either it ends with a GIMPLE_COND
3969            and satisfies suitable_cond_bb, or ends with a cast and
3970            other_bb is that cast's successor.  */
3971       FOR_EACH_EDGE (e, ei, first_bb->succs)
3972           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3973               || e->dest == first_bb)
3974             return cfg_cleanup_needed;
3975           else if (single_pred_p (e->dest))
3976             {
3977               stmt = last_stmt (e->dest);
3978               if (stmt
3979                     && gimple_code (stmt) == GIMPLE_COND
3980                     && EDGE_COUNT (e->dest->succs) == 2)
3981                 {
3982                     if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3983                       break;
3984                     else
3985                       other_bb = NULL;
3986                 }
3987               else if (stmt
3988                          && final_range_test_p (stmt)
3989                          && find_edge (first_bb, single_succ (e->dest)))
3990                 {
3991                     other_bb = single_succ (e->dest);
3992                     if (other_bb == first_bb)
3993                       other_bb = NULL;
3994                 }
3995             }
3996       if (other_bb == NULL)
3997           return cfg_cleanup_needed;
3998     }
3999   /* Now do the forward search, moving last_bb to successor bbs
4000      that aren't other_bb.  */
4001   while (EDGE_COUNT (last_bb->succs) == 2)
4002     {
4003       FOR_EACH_EDGE (e, ei, last_bb->succs)
4004           if (e->dest != other_bb)
4005             break;
4006       if (e == NULL)
4007           break;
4008       if (!single_pred_p (e->dest))
4009           break;
4010       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4011           break;
4012       if (!no_side_effect_bb (e->dest))
4013           break;
4014       last_bb = e->dest;
4015     }
4016   if (first_bb == last_bb)
4017     return cfg_cleanup_needed;
4018   /* Here basic blocks first_bb through last_bb's predecessor
4019      end with GIMPLE_COND, all of them have one of the edges to
4020      other_bb and another to another block in the range,
4021      all blocks except first_bb don't have side-effects and
4022      last_bb ends with either GIMPLE_COND, or cast satisfying
4023      final_range_test_p.  */
4024   for (bb = last_bb; ; bb = single_pred (bb))
4025     {
4026       enum tree_code code;
4027       tree lhs, rhs;
4028       inter_bb_range_test_entry bb_ent;
4029 
4030       bb_ent.op = NULL_TREE;
4031       bb_ent.first_idx = ops.length ();
4032       bb_ent.last_idx = bb_ent.first_idx;
4033       e = find_edge (bb, other_bb);
4034       stmt = last_stmt (bb);
4035       gimple_set_visited (stmt, true);
4036       if (gimple_code (stmt) != GIMPLE_COND)
4037           {
4038             use_operand_p use_p;
4039             gimple *phi;
4040             edge e2;
4041             unsigned int d;
4042 
4043             lhs = gimple_assign_lhs (stmt);
4044             rhs = gimple_assign_rhs1 (stmt);
4045             gcc_assert (bb == last_bb);
4046 
4047             /* stmt is
4048                _123 = (int) _234;
4049                OR
4050                _234 = a_2(D) == 2;
4051 
4052                followed by:
4053                <bb M>:
4054                # _345 = PHI <_123(N), 1(...), 1(...)>
4055 
4056                or 0 instead of 1.  If it is 0, the _234
4057                range test is anded together with all the
4058                other range tests, if it is 1, it is ored with
4059                them.  */
4060             single_imm_use (lhs, &use_p, &phi);
4061             gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4062             e2 = find_edge (first_bb, other_bb);
4063             d = e2->dest_idx;
4064             gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4065             if (integer_zerop (gimple_phi_arg_def (phi, d)))
4066               code = BIT_AND_EXPR;
4067             else
4068               {
4069                 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4070                 code = BIT_IOR_EXPR;
4071               }
4072 
4073             /* If _234 SSA_NAME_DEF_STMT is
4074                _234 = _567 | _789;
4075                (or &, corresponding to 1/0 in the phi arguments,
4076                push into ops the individual range test arguments
4077                of the bitwise or resp. and, recursively.  */
4078             if (TREE_CODE (rhs) == SSA_NAME
4079                 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4080                       != tcc_comparison)
4081                 && !get_ops (rhs, code, &ops,
4082                               loop_containing_stmt (stmt))
4083                 && has_single_use (rhs))
4084               {
4085                 /* Otherwise, push the _234 range test itself.  */
4086                 operand_entry *oe = operand_entry_pool.allocate ();
4087 
4088                 oe->op = rhs;
4089                 oe->rank = code;
4090                 oe->id = 0;
4091                 oe->count = 1;
4092                 oe->stmt_to_insert = NULL;
4093                 ops.safe_push (oe);
4094                 bb_ent.last_idx++;
4095                 bb_ent.op = rhs;
4096               }
4097             else if (is_gimple_assign (stmt)
4098                        && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4099                            == tcc_comparison)
4100                        && !get_ops (lhs, code, &ops,
4101                                         loop_containing_stmt (stmt))
4102                        && has_single_use (lhs))
4103               {
4104                 operand_entry *oe = operand_entry_pool.allocate ();
4105                 oe->op = lhs;
4106                 oe->rank = code;
4107                 oe->id = 0;
4108                 oe->count = 1;
4109                 ops.safe_push (oe);
4110                 bb_ent.last_idx++;
4111                 bb_ent.op = lhs;
4112               }
4113             else
4114               {
4115                 bb_ent.last_idx = ops.length ();
4116                 bb_ent.op = rhs;
4117               }
4118             bbinfo.safe_push (bb_ent);
4119             continue;
4120           }
4121       /* Otherwise stmt is GIMPLE_COND.  */
4122       code = gimple_cond_code (stmt);
4123       lhs = gimple_cond_lhs (stmt);
4124       rhs = gimple_cond_rhs (stmt);
4125       if (TREE_CODE (lhs) == SSA_NAME
4126             && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4127             && ((code != EQ_EXPR && code != NE_EXPR)
4128                 || rhs != boolean_false_node
4129                      /* Either push into ops the individual bitwise
4130                         or resp. and operands, depending on which
4131                         edge is other_bb.  */
4132                 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4133                                          ^ (code == EQ_EXPR))
4134                                         ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4135                                  loop_containing_stmt (stmt))))
4136           {
4137             /* Or push the GIMPLE_COND stmt itself.  */
4138             operand_entry *oe = operand_entry_pool.allocate ();
4139 
4140             oe->op = NULL;
4141             oe->rank = (e->flags & EDGE_TRUE_VALUE)
4142                          ? BIT_IOR_EXPR : BIT_AND_EXPR;
4143             /* oe->op = NULL signs that there is no SSA_NAME
4144                for the range test, and oe->id instead is the
4145                basic block number, at which's end the GIMPLE_COND
4146                is.  */
4147             oe->id = bb->index;
4148             oe->count = 1;
4149             oe->stmt_to_insert = NULL;
4150             ops.safe_push (oe);
4151             bb_ent.op = NULL;
4152             bb_ent.last_idx++;
4153           }
4154       else if (ops.length () > bb_ent.first_idx)
4155           {
4156             bb_ent.op = lhs;
4157             bb_ent.last_idx = ops.length ();
4158           }
4159       bbinfo.safe_push (bb_ent);
4160       if (bb == first_bb)
4161           break;
4162     }
4163   if (ops.length () > 1)
4164     any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4165   if (any_changes)
4166     {
4167       unsigned int idx, max_idx = 0;
4168       /* update_ops relies on has_single_use predicates returning the
4169            same values as it did during get_ops earlier.  Additionally it
4170            never removes statements, only adds new ones and it should walk
4171            from the single imm use and check the predicate already before
4172            making those changes.
4173            On the other side, the handling of GIMPLE_COND directly can turn
4174            previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4175            it needs to be done in a separate loop afterwards.  */
4176       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4177           {
4178             if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4179                 && bbinfo[idx].op != NULL_TREE)
4180               {
4181                 tree new_op;
4182 
4183                 max_idx = idx;
4184                 stmt = last_stmt (bb);
4185                 new_op = update_ops (bbinfo[idx].op,
4186                                            (enum tree_code)
4187                                            ops[bbinfo[idx].first_idx]->rank,
4188                                            ops, &bbinfo[idx].first_idx,
4189                                            loop_containing_stmt (stmt));
4190                 if (new_op == NULL_TREE)
4191                     {
4192                       gcc_assert (bb == last_bb);
4193                       new_op = ops[bbinfo[idx].first_idx++]->op;
4194                     }
4195                 if (bbinfo[idx].op != new_op)
4196                     {
4197                       imm_use_iterator iter;
4198                       use_operand_p use_p;
4199                       gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4200 
4201                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4202                         if (is_gimple_debug (use_stmt))
4203                           continue;
4204                         else if (gimple_code (use_stmt) == GIMPLE_COND
4205                                    || gimple_code (use_stmt) == GIMPLE_PHI)
4206                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4207                               SET_USE (use_p, new_op);
4208                         else if ((is_gimple_assign (use_stmt)
4209                                     && (TREE_CODE_CLASS
4210                                           (gimple_assign_rhs_code (use_stmt))
4211                                           == tcc_comparison)))
4212                           cast_or_tcc_cmp_stmt = use_stmt;
4213                         else if (gimple_assign_cast_p (use_stmt))
4214                           cast_or_tcc_cmp_stmt = use_stmt;
4215                         else
4216                           gcc_unreachable ();
4217 
4218                       if (cast_or_tcc_cmp_stmt)
4219                         {
4220                           gcc_assert (bb == last_bb);
4221                           tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4222                           tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4223                           enum tree_code rhs_code
4224                               = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4225                               ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4226                               : CONVERT_EXPR;
4227                           gassign *g;
4228                           if (is_gimple_min_invariant (new_op))
4229                               {
4230                                 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4231                                 g = gimple_build_assign (new_lhs, new_op);
4232                               }
4233                           else
4234                               g = gimple_build_assign (new_lhs, rhs_code, new_op);
4235                           gimple_stmt_iterator gsi
4236                               = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4237                           gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4238                           gimple_set_visited (g, true);
4239                           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4240                           FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4241                               if (is_gimple_debug (use_stmt))
4242                                 continue;
4243                               else if (gimple_code (use_stmt) == GIMPLE_COND
4244                                          || gimple_code (use_stmt) == GIMPLE_PHI)
4245                                 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4246                                   SET_USE (use_p, new_lhs);
4247                               else
4248                                 gcc_unreachable ();
4249                         }
4250                     }
4251               }
4252             if (bb == first_bb)
4253               break;
4254           }
4255       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4256           {
4257             if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4258                 && bbinfo[idx].op == NULL_TREE
4259                 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4260               {
4261                 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4262 
4263                 if (idx > max_idx)
4264                     max_idx = idx;
4265 
4266                 /* If we collapse the conditional to a true/false
4267                      condition, then bubble that knowledge up to our caller.  */
4268                 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4269                     {
4270                       gimple_cond_make_false (cond_stmt);
4271                       cfg_cleanup_needed = true;
4272                     }
4273                 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4274                     {
4275                       gimple_cond_make_true (cond_stmt);
4276                       cfg_cleanup_needed = true;
4277                     }
4278                 else
4279                     {
4280                       gimple_cond_set_code (cond_stmt, NE_EXPR);
4281                       gimple_cond_set_lhs (cond_stmt,
4282                                                ops[bbinfo[idx].first_idx]->op);
4283                       gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4284                     }
4285                 update_stmt (cond_stmt);
4286               }
4287             if (bb == first_bb)
4288               break;
4289           }
4290 
4291       /* The above changes could result in basic blocks after the first
4292            modified one, up to and including last_bb, to be executed even if
4293            they would not be in the original program.  If the value ranges of
4294            assignment lhs' in those bbs were dependent on the conditions
4295            guarding those basic blocks which now can change, the VRs might
4296            be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
4297            are only used within the same bb, it should be not a big deal if
4298            we just reset all the VRs in those bbs.  See PR68671.  */
4299       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4300           reset_flow_sensitive_info_in_bb (bb);
4301     }
4302   return cfg_cleanup_needed;
4303 }
4304 
4305 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4306    of STMT in it's operands.  This is also known as a "destructive
4307    update" operation.  */
4308 
4309 static bool
is_phi_for_stmt(gimple * stmt,tree operand)4310 is_phi_for_stmt (gimple *stmt, tree operand)
4311 {
4312   gimple *def_stmt;
4313   gphi *def_phi;
4314   tree lhs;
4315   use_operand_p arg_p;
4316   ssa_op_iter i;
4317 
4318   if (TREE_CODE (operand) != SSA_NAME)
4319     return false;
4320 
4321   lhs = gimple_assign_lhs (stmt);
4322 
4323   def_stmt = SSA_NAME_DEF_STMT (operand);
4324   def_phi = dyn_cast <gphi *> (def_stmt);
4325   if (!def_phi)
4326     return false;
4327 
4328   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4329     if (lhs == USE_FROM_PTR (arg_p))
4330       return true;
4331   return false;
4332 }
4333 
4334 /* Remove def stmt of VAR if VAR has zero uses and recurse
4335    on rhs1 operand if so.  */
4336 
4337 static void
remove_visited_stmt_chain(tree var)4338 remove_visited_stmt_chain (tree var)
4339 {
4340   gimple *stmt;
4341   gimple_stmt_iterator gsi;
4342 
4343   while (1)
4344     {
4345       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4346           return;
4347       stmt = SSA_NAME_DEF_STMT (var);
4348       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4349           {
4350             var = gimple_assign_rhs1 (stmt);
4351             gsi = gsi_for_stmt (stmt);
4352             reassoc_remove_stmt (&gsi);
4353             release_defs (stmt);
4354           }
4355       else
4356           return;
4357     }
4358 }
4359 
4360 /* This function checks three consequtive operands in
4361    passed operands vector OPS starting from OPINDEX and
4362    swaps two operands if it is profitable for binary operation
4363    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4364 
4365    We pair ops with the same rank if possible.
4366 
4367    The alternative we try is to see if STMT is a destructive
4368    update style statement, which is like:
4369    b = phi (a, ...)
4370    a = c + b;
4371    In that case, we want to use the destructive update form to
4372    expose the possible vectorizer sum reduction opportunity.
4373    In that case, the third operand will be the phi node. This
4374    check is not performed if STMT is null.
4375 
4376    We could, of course, try to be better as noted above, and do a
4377    lot of work to try to find these opportunities in >3 operand
4378    cases, but it is unlikely to be worth it.  */
4379 
4380 static void
swap_ops_for_binary_stmt(vec<operand_entry * > ops,unsigned int opindex,gimple * stmt)4381 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4382                                 unsigned int opindex, gimple *stmt)
4383 {
4384   operand_entry *oe1, *oe2, *oe3;
4385 
4386   oe1 = ops[opindex];
4387   oe2 = ops[opindex + 1];
4388   oe3 = ops[opindex + 2];
4389 
4390   if ((oe1->rank == oe2->rank
4391        && oe2->rank != oe3->rank)
4392       || (stmt && is_phi_for_stmt (stmt, oe3->op)
4393             && !is_phi_for_stmt (stmt, oe1->op)
4394             && !is_phi_for_stmt (stmt, oe2->op)))
4395     std::swap (*oe1, *oe3);
4396   else if ((oe1->rank == oe3->rank
4397               && oe2->rank != oe3->rank)
4398              || (stmt && is_phi_for_stmt (stmt, oe2->op)
4399                  && !is_phi_for_stmt (stmt, oe1->op)
4400                  && !is_phi_for_stmt (stmt, oe3->op)))
4401     std::swap (*oe1, *oe2);
4402 }
4403 
4404 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4405    two definitions, otherwise return STMT.  */
4406 
4407 static inline gimple *
find_insert_point(gimple * stmt,tree rhs1,tree rhs2)4408 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4409 {
4410   if (TREE_CODE (rhs1) == SSA_NAME
4411       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4412     stmt = SSA_NAME_DEF_STMT (rhs1);
4413   if (TREE_CODE (rhs2) == SSA_NAME
4414       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4415     stmt = SSA_NAME_DEF_STMT (rhs2);
4416   return stmt;
4417 }
4418 
4419 /* If the stmt that defines operand has to be inserted, insert it
4420    before the use.  */
4421 static void
insert_stmt_before_use(gimple * stmt,gimple * stmt_to_insert)4422 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4423 {
4424   gcc_assert (is_gimple_assign (stmt_to_insert));
4425   tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4426   tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4427   gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4428   gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4429   gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4430 
4431   /* If the insert point is not stmt, then insert_point would be
4432      the point where operand rhs1 or rhs2 is defined. In this case,
4433      stmt_to_insert has to be inserted afterwards. This would
4434      only happen when the stmt insertion point is flexible. */
4435   if (stmt == insert_point)
4436     gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4437   else
4438     insert_stmt_after (stmt_to_insert, insert_point);
4439 }
4440 
4441 
4442 /* Recursively rewrite our linearized statements so that the operators
4443    match those in OPS[OPINDEX], putting the computation in rank
4444    order.  Return new lhs.
4445    CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4446    the current stmt and during recursive invocations.
4447    NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4448    recursive invocations.  */
4449 
4450 static tree
rewrite_expr_tree(gimple * stmt,unsigned int opindex,vec<operand_entry * > ops,bool changed,bool next_changed)4451 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4452                        vec<operand_entry *> ops, bool changed, bool next_changed)
4453 {
4454   tree rhs1 = gimple_assign_rhs1 (stmt);
4455   tree rhs2 = gimple_assign_rhs2 (stmt);
4456   tree lhs = gimple_assign_lhs (stmt);
4457   operand_entry *oe;
4458 
4459   /* The final recursion case for this function is that you have
4460      exactly two operations left.
4461      If we had exactly one op in the entire list to start with, we
4462      would have never called this function, and the tail recursion
4463      rewrites them one at a time.  */
4464   if (opindex + 2 == ops.length ())
4465     {
4466       operand_entry *oe1, *oe2;
4467 
4468       oe1 = ops[opindex];
4469       oe2 = ops[opindex + 1];
4470 
4471       if (rhs1 != oe1->op || rhs2 != oe2->op)
4472           {
4473             gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4474             unsigned int uid = gimple_uid (stmt);
4475 
4476             if (dump_file && (dump_flags & TDF_DETAILS))
4477               {
4478                 fprintf (dump_file, "Transforming ");
4479                 print_gimple_stmt (dump_file, stmt, 0);
4480               }
4481 
4482             /* If the stmt that defines operand has to be inserted, insert it
4483                before the use.  */
4484             if (oe1->stmt_to_insert)
4485               insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4486             if (oe2->stmt_to_insert)
4487               insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4488             /* Even when changed is false, reassociation could have e.g. removed
4489                some redundant operations, so unless we are just swapping the
4490                arguments or unless there is no change at all (then we just
4491                return lhs), force creation of a new SSA_NAME.  */
4492             if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4493               {
4494                 gimple *insert_point
4495                     = find_insert_point (stmt, oe1->op, oe2->op);
4496                 lhs = make_ssa_name (TREE_TYPE (lhs));
4497                 stmt
4498                     = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4499                                                oe1->op, oe2->op);
4500                 gimple_set_uid (stmt, uid);
4501                 gimple_set_visited (stmt, true);
4502                 if (insert_point == gsi_stmt (gsi))
4503                     gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4504                 else
4505                     insert_stmt_after (stmt, insert_point);
4506               }
4507             else
4508               {
4509                 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4510                                            == stmt);
4511                 gimple_assign_set_rhs1 (stmt, oe1->op);
4512                 gimple_assign_set_rhs2 (stmt, oe2->op);
4513                 update_stmt (stmt);
4514               }
4515 
4516             if (rhs1 != oe1->op && rhs1 != oe2->op)
4517               remove_visited_stmt_chain (rhs1);
4518 
4519             if (dump_file && (dump_flags & TDF_DETAILS))
4520               {
4521                 fprintf (dump_file, " into ");
4522                 print_gimple_stmt (dump_file, stmt, 0);
4523               }
4524           }
4525       return lhs;
4526     }
4527 
4528   /* If we hit here, we should have 3 or more ops left.  */
4529   gcc_assert (opindex + 2 < ops.length ());
4530 
4531   /* Rewrite the next operator.  */
4532   oe = ops[opindex];
4533 
4534   /* If the stmt that defines operand has to be inserted, insert it
4535      before the use.  */
4536   if (oe->stmt_to_insert)
4537     insert_stmt_before_use (stmt, oe->stmt_to_insert);
4538 
4539   /* Recurse on the LHS of the binary operator, which is guaranteed to
4540      be the non-leaf side.  */
4541   tree new_rhs1
4542     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4543                                changed || oe->op != rhs2 || next_changed,
4544                                false);
4545 
4546   if (oe->op != rhs2 || new_rhs1 != rhs1)
4547     {
4548       if (dump_file && (dump_flags & TDF_DETAILS))
4549           {
4550             fprintf (dump_file, "Transforming ");
4551             print_gimple_stmt (dump_file, stmt, 0);
4552           }
4553 
4554       /* If changed is false, this is either opindex == 0
4555            or all outer rhs2's were equal to corresponding oe->op,
4556            and powi_result is NULL.
4557            That means lhs is equivalent before and after reassociation.
4558            Otherwise ensure the old lhs SSA_NAME is not reused and
4559            create a new stmt as well, so that any debug stmts will be
4560            properly adjusted.  */
4561       if (changed)
4562           {
4563             gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4564             unsigned int uid = gimple_uid (stmt);
4565             gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4566 
4567             lhs = make_ssa_name (TREE_TYPE (lhs));
4568             stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4569                                               new_rhs1, oe->op);
4570             gimple_set_uid (stmt, uid);
4571             gimple_set_visited (stmt, true);
4572             if (insert_point == gsi_stmt (gsi))
4573               gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4574             else
4575               insert_stmt_after (stmt, insert_point);
4576           }
4577       else
4578           {
4579             gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4580                                      == stmt);
4581             gimple_assign_set_rhs1 (stmt, new_rhs1);
4582             gimple_assign_set_rhs2 (stmt, oe->op);
4583             update_stmt (stmt);
4584           }
4585 
4586       if (dump_file && (dump_flags & TDF_DETAILS))
4587           {
4588             fprintf (dump_file, " into ");
4589             print_gimple_stmt (dump_file, stmt, 0);
4590           }
4591     }
4592   return lhs;
4593 }
4594 
4595 /* Find out how many cycles we need to compute statements chain.
4596    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
4597    maximum number of independent statements we may execute per cycle.  */
4598 
4599 static int
get_required_cycles(int ops_num,int cpu_width)4600 get_required_cycles (int ops_num, int cpu_width)
4601 {
4602   int res;
4603   int elog;
4604   unsigned int rest;
4605 
4606   /* While we have more than 2 * cpu_width operands
4607      we may reduce number of operands by cpu_width
4608      per cycle.  */
4609   res = ops_num / (2 * cpu_width);
4610 
4611   /* Remained operands count may be reduced twice per cycle
4612      until we have only one operand.  */
4613   rest = (unsigned)(ops_num - res * cpu_width);
4614   elog = exact_log2 (rest);
4615   if (elog >= 0)
4616     res += elog;
4617   else
4618     res += floor_log2 (rest) + 1;
4619 
4620   return res;
4621 }
4622 
4623 /* Returns an optimal number of registers to use for computation of
4624    given statements.  */
4625 
4626 static int
get_reassociation_width(int ops_num,enum tree_code opc,machine_mode mode)4627 get_reassociation_width (int ops_num, enum tree_code opc,
4628                                machine_mode mode)
4629 {
4630   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4631   int width;
4632   int width_min;
4633   int cycles_best;
4634 
4635   if (param_width > 0)
4636     width = param_width;
4637   else
4638     width = targetm.sched.reassociation_width (opc, mode);
4639 
4640   if (width == 1)
4641     return width;
4642 
4643   /* Get the minimal time required for sequence computation.  */
4644   cycles_best = get_required_cycles (ops_num, width);
4645 
4646   /* Check if we may use less width and still compute sequence for
4647      the same time.  It will allow us to reduce registers usage.
4648      get_required_cycles is monotonically increasing with lower width
4649      so we can perform a binary search for the minimal width that still
4650      results in the optimal cycle count.  */
4651   width_min = 1;
4652   while (width > width_min)
4653     {
4654       int width_mid = (width + width_min) / 2;
4655 
4656       if (get_required_cycles (ops_num, width_mid) == cycles_best)
4657           width = width_mid;
4658       else if (width_min < width_mid)
4659           width_min = width_mid;
4660       else
4661           break;
4662     }
4663 
4664   return width;
4665 }
4666 
4667 /* Recursively rewrite our linearized statements so that the operators
4668    match those in OPS[OPINDEX], putting the computation in rank
4669    order and trying to allow operations to be executed in
4670    parallel.  */
4671 
4672 static void
rewrite_expr_tree_parallel(gassign * stmt,int width,vec<operand_entry * > ops)4673 rewrite_expr_tree_parallel (gassign *stmt, int width,
4674                                   vec<operand_entry *> ops)
4675 {
4676   enum tree_code opcode = gimple_assign_rhs_code (stmt);
4677   int op_num = ops.length ();
4678   gcc_assert (op_num > 0);
4679   int stmt_num = op_num - 1;
4680   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4681   int op_index = op_num - 1;
4682   int stmt_index = 0;
4683   int ready_stmts_end = 0;
4684   int i = 0;
4685   gimple *stmt1 = NULL, *stmt2 = NULL;
4686   tree last_rhs1 = gimple_assign_rhs1 (stmt);
4687 
4688   /* We start expression rewriting from the top statements.
4689      So, in this loop we create a full list of statements
4690      we will work with.  */
4691   stmts[stmt_num - 1] = stmt;
4692   for (i = stmt_num - 2; i >= 0; i--)
4693     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4694 
4695   for (i = 0; i < stmt_num; i++)
4696     {
4697       tree op1, op2;
4698 
4699       /* Determine whether we should use results of
4700            already handled statements or not.  */
4701       if (ready_stmts_end == 0
4702             && (i - stmt_index >= width || op_index < 1))
4703           ready_stmts_end = i;
4704 
4705       /* Now we choose operands for the next statement.  Non zero
4706            value in ready_stmts_end means here that we should use
4707            the result of already generated statements as new operand.  */
4708       if (ready_stmts_end > 0)
4709           {
4710             op1 = gimple_assign_lhs (stmts[stmt_index++]);
4711             if (ready_stmts_end > stmt_index)
4712               op2 = gimple_assign_lhs (stmts[stmt_index++]);
4713             else if (op_index >= 0)
4714               {
4715                 operand_entry *oe = ops[op_index--];
4716                 stmt2 = oe->stmt_to_insert;
4717                 op2 = oe->op;
4718               }
4719             else
4720               {
4721                 gcc_assert (stmt_index < i);
4722                 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4723               }
4724 
4725             if (stmt_index >= ready_stmts_end)
4726               ready_stmts_end = 0;
4727           }
4728       else
4729           {
4730             if (op_index > 1)
4731               swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4732             operand_entry *oe2 = ops[op_index--];
4733             operand_entry *oe1 = ops[op_index--];
4734             op2 = oe2->op;
4735             stmt2 = oe2->stmt_to_insert;
4736             op1 = oe1->op;
4737             stmt1 = oe1->stmt_to_insert;
4738           }
4739 
4740       /* If we emit the last statement then we should put
4741            operands into the last statement.  It will also
4742            break the loop.  */
4743       if (op_index < 0 && stmt_index == i)
4744           i = stmt_num - 1;
4745 
4746       if (dump_file && (dump_flags & TDF_DETAILS))
4747           {
4748             fprintf (dump_file, "Transforming ");
4749             print_gimple_stmt (dump_file, stmts[i], 0);
4750           }
4751 
4752       /* If the stmt that defines operand has to be inserted, insert it
4753            before the use.  */
4754       if (stmt1)
4755           insert_stmt_before_use (stmts[i], stmt1);
4756       if (stmt2)
4757           insert_stmt_before_use (stmts[i], stmt2);
4758       stmt1 = stmt2 = NULL;
4759 
4760       /* We keep original statement only for the last one.  All
4761            others are recreated.  */
4762       if (i == stmt_num - 1)
4763           {
4764             gimple_assign_set_rhs1 (stmts[i], op1);
4765             gimple_assign_set_rhs2 (stmts[i], op2);
4766             update_stmt (stmts[i]);
4767           }
4768       else
4769           {
4770             stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4771           }
4772       if (dump_file && (dump_flags & TDF_DETAILS))
4773           {
4774             fprintf (dump_file, " into ");
4775             print_gimple_stmt (dump_file, stmts[i], 0);
4776           }
4777     }
4778 
4779   remove_visited_stmt_chain (last_rhs1);
4780 }
4781 
4782 /* Transform STMT, which is really (A +B) + (C + D) into the left
4783    linear form, ((A+B)+C)+D.
4784    Recurse on D if necessary.  */
4785 
4786 static void
linearize_expr(gimple * stmt)4787 linearize_expr (gimple *stmt)
4788 {
4789   gimple_stmt_iterator gsi;
4790   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4791   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4792   gimple *oldbinrhs = binrhs;
4793   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4794   gimple *newbinrhs = NULL;
4795   struct loop *loop = loop_containing_stmt (stmt);
4796   tree lhs = gimple_assign_lhs (stmt);
4797 
4798   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4799                 && is_reassociable_op (binrhs, rhscode, loop));
4800 
4801   gsi = gsi_for_stmt (stmt);
4802 
4803   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4804   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4805                                         gimple_assign_rhs_code (binrhs),
4806                                         gimple_assign_lhs (binlhs),
4807                                         gimple_assign_rhs2 (binrhs));
4808   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4809   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4810   gimple_set_uid (binrhs, gimple_uid (stmt));
4811 
4812   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4813     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4814 
4815   if (dump_file && (dump_flags & TDF_DETAILS))
4816     {
4817       fprintf (dump_file, "Linearized: ");
4818       print_gimple_stmt (dump_file, stmt, 0);
4819     }
4820 
4821   reassociate_stats.linearized++;
4822   update_stmt (stmt);
4823 
4824   gsi = gsi_for_stmt (oldbinrhs);
4825   reassoc_remove_stmt (&gsi);
4826   release_defs (oldbinrhs);
4827 
4828   gimple_set_visited (stmt, true);
4829   gimple_set_visited (binlhs, true);
4830   gimple_set_visited (binrhs, true);
4831 
4832   /* Tail recurse on the new rhs if it still needs reassociation.  */
4833   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4834     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4835              want to change the algorithm while converting to tuples.  */
4836     linearize_expr (stmt);
4837 }
4838 
4839 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4840    it.  Otherwise, return NULL.  */
4841 
4842 static gimple *
get_single_immediate_use(tree lhs)4843 get_single_immediate_use (tree lhs)
4844 {
4845   use_operand_p immuse;
4846   gimple *immusestmt;
4847 
4848   if (TREE_CODE (lhs) == SSA_NAME
4849       && single_imm_use (lhs, &immuse, &immusestmt)
4850       && is_gimple_assign (immusestmt))
4851     return immusestmt;
4852 
4853   return NULL;
4854 }
4855 
4856 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4857    representing the negated value.  Insertions of any necessary
4858    instructions go before GSI.
4859    This function is recursive in that, if you hand it "a_5" as the
4860    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4861    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
4862 
4863 static tree
negate_value(tree tonegate,gimple_stmt_iterator * gsip)4864 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4865 {
4866   gimple *negatedefstmt = NULL;
4867   tree resultofnegate;
4868   gimple_stmt_iterator gsi;
4869   unsigned int uid;
4870 
4871   /* If we are trying to negate a name, defined by an add, negate the
4872      add operands instead.  */
4873   if (TREE_CODE (tonegate) == SSA_NAME)
4874     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4875   if (TREE_CODE (tonegate) == SSA_NAME
4876       && is_gimple_assign (negatedefstmt)
4877       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4878       && has_single_use (gimple_assign_lhs (negatedefstmt))
4879       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4880     {
4881       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4882       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4883       tree lhs = gimple_assign_lhs (negatedefstmt);
4884       gimple *g;
4885 
4886       gsi = gsi_for_stmt (negatedefstmt);
4887       rhs1 = negate_value (rhs1, &gsi);
4888 
4889       gsi = gsi_for_stmt (negatedefstmt);
4890       rhs2 = negate_value (rhs2, &gsi);
4891 
4892       gsi = gsi_for_stmt (negatedefstmt);
4893       lhs = make_ssa_name (TREE_TYPE (lhs));
4894       gimple_set_visited (negatedefstmt, true);
4895       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4896       gimple_set_uid (g, gimple_uid (negatedefstmt));
4897       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4898       return lhs;
4899     }
4900 
4901   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4902   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4903                                                        NULL_TREE, true, GSI_SAME_STMT);
4904   gsi = *gsip;
4905   uid = gimple_uid (gsi_stmt (gsi));
4906   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4907     {
4908       gimple *stmt = gsi_stmt (gsi);
4909       if (gimple_uid (stmt) != 0)
4910           break;
4911       gimple_set_uid (stmt, uid);
4912     }
4913   return resultofnegate;
4914 }
4915 
4916 /* Return true if we should break up the subtract in STMT into an add
4917    with negate.  This is true when we the subtract operands are really
4918    adds, or the subtract itself is used in an add expression.  In
4919    either case, breaking up the subtract into an add with negate
4920    exposes the adds to reassociation.  */
4921 
4922 static bool
should_break_up_subtract(gimple * stmt)4923 should_break_up_subtract (gimple *stmt)
4924 {
4925   tree lhs = gimple_assign_lhs (stmt);
4926   tree binlhs = gimple_assign_rhs1 (stmt);
4927   tree binrhs = gimple_assign_rhs2 (stmt);
4928   gimple *immusestmt;
4929   struct loop *loop = loop_containing_stmt (stmt);
4930 
4931   if (TREE_CODE (binlhs) == SSA_NAME
4932       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4933     return true;
4934 
4935   if (TREE_CODE (binrhs) == SSA_NAME
4936       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4937     return true;
4938 
4939   if (TREE_CODE (lhs) == SSA_NAME
4940       && (immusestmt = get_single_immediate_use (lhs))
4941       && is_gimple_assign (immusestmt)
4942       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4943             || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
4944                 && gimple_assign_rhs1 (immusestmt) == lhs)
4945             || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4946     return true;
4947   return false;
4948 }
4949 
4950 /* Transform STMT from A - B into A + -B.  */
4951 
4952 static void
break_up_subtract(gimple * stmt,gimple_stmt_iterator * gsip)4953 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4954 {
4955   tree rhs1 = gimple_assign_rhs1 (stmt);
4956   tree rhs2 = gimple_assign_rhs2 (stmt);
4957 
4958   if (dump_file && (dump_flags & TDF_DETAILS))
4959     {
4960       fprintf (dump_file, "Breaking up subtract ");
4961       print_gimple_stmt (dump_file, stmt, 0);
4962     }
4963 
4964   rhs2 = negate_value (rhs2, gsip);
4965   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4966   update_stmt (stmt);
4967 }
4968 
4969 /* Determine whether STMT is a builtin call that raises an SSA name
4970    to an integer power and has only one use.  If so, and this is early
4971    reassociation and unsafe math optimizations are permitted, place
4972    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4973    If any of these conditions does not hold, return FALSE.  */
4974 
4975 static bool
acceptable_pow_call(gcall * stmt,tree * base,HOST_WIDE_INT * exponent)4976 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4977 {
4978   tree arg1;
4979   REAL_VALUE_TYPE c, cint;
4980 
4981   switch (gimple_call_combined_fn (stmt))
4982     {
4983     CASE_CFN_POW:
4984       if (flag_errno_math)
4985           return false;
4986 
4987       *base = gimple_call_arg (stmt, 0);
4988       arg1 = gimple_call_arg (stmt, 1);
4989 
4990       if (TREE_CODE (arg1) != REAL_CST)
4991           return false;
4992 
4993       c = TREE_REAL_CST (arg1);
4994 
4995       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4996           return false;
4997 
4998       *exponent = real_to_integer (&c);
4999       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5000       if (!real_identical (&c, &cint))
5001           return false;
5002 
5003       break;
5004 
5005     CASE_CFN_POWI:
5006       *base = gimple_call_arg (stmt, 0);
5007       arg1 = gimple_call_arg (stmt, 1);
5008 
5009       if (!tree_fits_shwi_p (arg1))
5010           return false;
5011 
5012       *exponent = tree_to_shwi (arg1);
5013       break;
5014 
5015     default:
5016       return false;
5017     }
5018 
5019   /* Expanding negative exponents is generally unproductive, so we don't
5020      complicate matters with those.  Exponents of zero and one should
5021      have been handled by expression folding.  */
5022   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5023     return false;
5024 
5025   return true;
5026 }
5027 
5028 /* Try to derive and add operand entry for OP to *OPS.  Return false if
5029    unsuccessful.  */
5030 
5031 static bool
try_special_add_to_ops(vec<operand_entry * > * ops,enum tree_code code,tree op,gimple * def_stmt)5032 try_special_add_to_ops (vec<operand_entry *> *ops,
5033                               enum tree_code code,
5034                               tree op, gimple* def_stmt)
5035 {
5036   tree base = NULL_TREE;
5037   HOST_WIDE_INT exponent = 0;
5038 
5039   if (TREE_CODE (op) != SSA_NAME
5040       || ! has_single_use (op))
5041     return false;
5042 
5043   if (code == MULT_EXPR
5044       && reassoc_insert_powi_p
5045       && flag_unsafe_math_optimizations
5046       && is_gimple_call (def_stmt)
5047       && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5048     {
5049       add_repeat_to_ops_vec (ops, base, exponent);
5050       gimple_set_visited (def_stmt, true);
5051       return true;
5052     }
5053   else if (code == MULT_EXPR
5054              && is_gimple_assign (def_stmt)
5055              && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5056              && !HONOR_SNANS (TREE_TYPE (op))
5057              && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5058                  || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5059     {
5060       tree rhs1 = gimple_assign_rhs1 (def_stmt);
5061       tree cst = build_minus_one_cst (TREE_TYPE (op));
5062       add_to_ops_vec (ops, rhs1);
5063       add_to_ops_vec (ops, cst);
5064       gimple_set_visited (def_stmt, true);
5065       return true;
5066     }
5067 
5068   return false;
5069 }
5070 
5071 /* Recursively linearize a binary expression that is the RHS of STMT.
5072    Place the operands of the expression tree in the vector named OPS.  */
5073 
5074 static void
linearize_expr_tree(vec<operand_entry * > * ops,gimple * stmt,bool is_associative,bool set_visited)5075 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5076                          bool is_associative, bool set_visited)
5077 {
5078   tree binlhs = gimple_assign_rhs1 (stmt);
5079   tree binrhs = gimple_assign_rhs2 (stmt);
5080   gimple *binlhsdef = NULL, *binrhsdef = NULL;
5081   bool binlhsisreassoc = false;
5082   bool binrhsisreassoc = false;
5083   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5084   struct loop *loop = loop_containing_stmt (stmt);
5085 
5086   if (set_visited)
5087     gimple_set_visited (stmt, true);
5088 
5089   if (TREE_CODE (binlhs) == SSA_NAME)
5090     {
5091       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5092       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5093                                && !stmt_could_throw_p (binlhsdef));
5094     }
5095 
5096   if (TREE_CODE (binrhs) == SSA_NAME)
5097     {
5098       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5099       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5100                                && !stmt_could_throw_p (binrhsdef));
5101     }
5102 
5103   /* If the LHS is not reassociable, but the RHS is, we need to swap
5104      them.  If neither is reassociable, there is nothing we can do, so
5105      just put them in the ops vector.  If the LHS is reassociable,
5106      linearize it.  If both are reassociable, then linearize the RHS
5107      and the LHS.  */
5108 
5109   if (!binlhsisreassoc)
5110     {
5111       /* If this is not a associative operation like division, give up.  */
5112       if (!is_associative)
5113           {
5114             add_to_ops_vec (ops, binrhs);
5115             return;
5116           }
5117 
5118       if (!binrhsisreassoc)
5119           {
5120             if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5121               add_to_ops_vec (ops, binrhs);
5122 
5123             if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5124               add_to_ops_vec (ops, binlhs);
5125 
5126             return;
5127           }
5128 
5129       if (dump_file && (dump_flags & TDF_DETAILS))
5130           {
5131             fprintf (dump_file, "swapping operands of ");
5132             print_gimple_stmt (dump_file, stmt, 0);
5133           }
5134 
5135       swap_ssa_operands (stmt,
5136                                gimple_assign_rhs1_ptr (stmt),
5137                                gimple_assign_rhs2_ptr (stmt));
5138       update_stmt (stmt);
5139 
5140       if (dump_file && (dump_flags & TDF_DETAILS))
5141           {
5142             fprintf (dump_file, " is now ");
5143             print_gimple_stmt (dump_file, stmt, 0);
5144           }
5145 
5146       /* We want to make it so the lhs is always the reassociative op,
5147            so swap.  */
5148       std::swap (binlhs, binrhs);
5149     }
5150   else if (binrhsisreassoc)
5151     {
5152       linearize_expr (stmt);
5153       binlhs = gimple_assign_rhs1 (stmt);
5154       binrhs = gimple_assign_rhs2 (stmt);
5155     }
5156 
5157   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5158                 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5159                                               rhscode, loop));
5160   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5161                            is_associative, set_visited);
5162 
5163   if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5164     add_to_ops_vec (ops, binrhs);
5165 }
5166 
5167 /* Repropagate the negates back into subtracts, since no other pass
5168    currently does it.  */
5169 
5170 static void
repropagate_negates(void)5171 repropagate_negates (void)
5172 {
5173   unsigned int i = 0;
5174   tree negate;
5175 
5176   FOR_EACH_VEC_ELT (plus_negates, i, negate)
5177     {
5178       gimple *user = get_single_immediate_use (negate);
5179 
5180       if (!user || !is_gimple_assign (user))
5181           continue;
5182 
5183       /* The negate operand can be either operand of a PLUS_EXPR
5184            (it can be the LHS if the RHS is a constant for example).
5185 
5186            Force the negate operand to the RHS of the PLUS_EXPR, then
5187            transform the PLUS_EXPR into a MINUS_EXPR.  */
5188       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5189           {
5190             /* If the negated operand appears on the LHS of the
5191                PLUS_EXPR, exchange the operands of the PLUS_EXPR
5192                to force the negated operand to the RHS of the PLUS_EXPR.  */
5193             if (gimple_assign_rhs1 (user) == negate)
5194               {
5195                 swap_ssa_operands (user,
5196                                          gimple_assign_rhs1_ptr (user),
5197                                          gimple_assign_rhs2_ptr (user));
5198               }
5199 
5200             /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5201                the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
5202             if (gimple_assign_rhs2 (user) == negate)
5203               {
5204                 tree rhs1 = gimple_assign_rhs1 (user);
5205                 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5206                 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5207                 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5208                 update_stmt (user);
5209               }
5210           }
5211       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5212           {
5213             if (gimple_assign_rhs1 (user) == negate)
5214               {
5215                 /* We have
5216                      x = -a
5217                        y = x - b
5218                      which we transform into
5219                        x = a + b
5220                        y = -x .
5221                      This pushes down the negate which we possibly can merge
5222                      into some other operation, hence insert it into the
5223                      plus_negates vector.  */
5224                 gimple *feed = SSA_NAME_DEF_STMT (negate);
5225                 tree a = gimple_assign_rhs1 (feed);
5226                 tree b = gimple_assign_rhs2 (user);
5227                 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5228                 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5229                 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5230                 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5231                 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5232                 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5233                 user = gsi_stmt (gsi2);
5234                 update_stmt (user);
5235                 reassoc_remove_stmt (&gsi);
5236                 release_defs (feed);
5237                 plus_negates.safe_push (gimple_assign_lhs (user));
5238               }
5239             else
5240               {
5241                 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5242                    rid of one operation.  */
5243                 gimple *feed = SSA_NAME_DEF_STMT (negate);
5244                 tree a = gimple_assign_rhs1 (feed);
5245                 tree rhs1 = gimple_assign_rhs1 (user);
5246                 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5247                 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5248                 update_stmt (gsi_stmt (gsi));
5249               }
5250           }
5251     }
5252 }
5253 
5254 /* Returns true if OP is of a type for which we can do reassociation.
5255    That is for integral or non-saturating fixed-point types, and for
5256    floating point type when associative-math is enabled.  */
5257 
5258 static bool
can_reassociate_p(tree op)5259 can_reassociate_p (tree op)
5260 {
5261   tree type = TREE_TYPE (op);
5262   if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5263     return false;
5264   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5265       || NON_SAT_FIXED_POINT_TYPE_P (type)
5266       || (flag_associative_math && FLOAT_TYPE_P (type)))
5267     return true;
5268   return false;
5269 }
5270 
5271 /* Break up subtract operations in block BB.
5272 
5273    We do this top down because we don't know whether the subtract is
5274    part of a possible chain of reassociation except at the top.
5275 
5276    IE given
5277    d = f + g
5278    c = a + e
5279    b = c - d
5280    q = b - r
5281    k = t - q
5282 
5283    we want to break up k = t - q, but we won't until we've transformed q
5284    = b - r, which won't be broken up until we transform b = c - d.
5285 
5286    En passant, clear the GIMPLE visited flag on every statement
5287    and set UIDs within each basic block.  */
5288 
5289 static void
break_up_subtract_bb(basic_block bb)5290 break_up_subtract_bb (basic_block bb)
5291 {
5292   gimple_stmt_iterator gsi;
5293   basic_block son;
5294   unsigned int uid = 1;
5295 
5296   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5297     {
5298       gimple *stmt = gsi_stmt (gsi);
5299       gimple_set_visited (stmt, false);
5300       gimple_set_uid (stmt, uid++);
5301 
5302       if (!is_gimple_assign (stmt)
5303             || !can_reassociate_p (gimple_assign_lhs (stmt)))
5304           continue;
5305 
5306       /* Look for simple gimple subtract operations.  */
5307       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5308           {
5309             if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5310                 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5311               continue;
5312 
5313             /* Check for a subtract used only in an addition.  If this
5314                is the case, transform it into add of a negate for better
5315                reassociation.  IE transform C = A-B into C = A + -B if C
5316                is only used in an addition.  */
5317             if (should_break_up_subtract (stmt))
5318               break_up_subtract (stmt, &gsi);
5319           }
5320       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5321                  && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5322           plus_negates.safe_push (gimple_assign_lhs (stmt));
5323     }
5324   for (son = first_dom_son (CDI_DOMINATORS, bb);
5325        son;
5326        son = next_dom_son (CDI_DOMINATORS, son))
5327     break_up_subtract_bb (son);
5328 }
5329 
5330 /* Used for repeated factor analysis.  */
5331 struct repeat_factor
5332 {
5333   /* An SSA name that occurs in a multiply chain.  */
5334   tree factor;
5335 
5336   /* Cached rank of the factor.  */
5337   unsigned rank;
5338 
5339   /* Number of occurrences of the factor in the chain.  */
5340   HOST_WIDE_INT count;
5341 
5342   /* An SSA name representing the product of this factor and
5343      all factors appearing later in the repeated factor vector.  */
5344   tree repr;
5345 };
5346 
5347 
5348 static vec<repeat_factor> repeat_factor_vec;
5349 
5350 /* Used for sorting the repeat factor vector.  Sort primarily by
5351    ascending occurrence count, secondarily by descending rank.  */
5352 
5353 static int
compare_repeat_factors(const void * x1,const void * x2)5354 compare_repeat_factors (const void *x1, const void *x2)
5355 {
5356   const repeat_factor *rf1 = (const repeat_factor *) x1;
5357   const repeat_factor *rf2 = (const repeat_factor *) x2;
5358 
5359   if (rf1->count != rf2->count)
5360     return rf1->count - rf2->count;
5361 
5362   return rf2->rank - rf1->rank;
5363 }
5364 
5365 /* Look for repeated operands in OPS in the multiply tree rooted at
5366    STMT.  Replace them with an optimal sequence of multiplies and powi
5367    builtin calls, and remove the used operands from OPS.  Return an
5368    SSA name representing the value of the replacement sequence.  */
5369 
5370 static tree
attempt_builtin_powi(gimple * stmt,vec<operand_entry * > * ops)5371 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5372 {
5373   unsigned i, j, vec_len;
5374   int ii;
5375   operand_entry *oe;
5376   repeat_factor *rf1, *rf2;
5377   repeat_factor rfnew;
5378   tree result = NULL_TREE;
5379   tree target_ssa, iter_result;
5380   tree type = TREE_TYPE (gimple_get_lhs (stmt));
5381   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5382   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5383   gimple *mul_stmt, *pow_stmt;
5384 
5385   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5386      target.  */
5387   if (!powi_fndecl)
5388     return NULL_TREE;
5389 
5390   /* Allocate the repeated factor vector.  */
5391   repeat_factor_vec.create (10);
5392 
5393   /* Scan the OPS vector for all SSA names in the product and build
5394      up a vector of occurrence counts for each factor.  */
5395   FOR_EACH_VEC_ELT (*ops, i, oe)
5396     {
5397       if (TREE_CODE (oe->op) == SSA_NAME)
5398           {
5399             FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5400               {
5401                 if (rf1->factor == oe->op)
5402                     {
5403                       rf1->count += oe->count;
5404                       break;
5405                     }
5406               }
5407 
5408             if (j >= repeat_factor_vec.length ())
5409               {
5410                 rfnew.factor = oe->op;
5411                 rfnew.rank = oe->rank;
5412                 rfnew.count = oe->count;
5413                 rfnew.repr = NULL_TREE;
5414                 repeat_factor_vec.safe_push (rfnew);
5415               }
5416           }
5417     }
5418 
5419   /* Sort the repeated factor vector by (a) increasing occurrence count,
5420      and (b) decreasing rank.  */
5421   repeat_factor_vec.qsort (compare_repeat_factors);
5422 
5423   /* It is generally best to combine as many base factors as possible
5424      into a product before applying __builtin_powi to the result.
5425      However, the sort order chosen for the repeated factor vector
5426      allows us to cache partial results for the product of the base
5427      factors for subsequent use.  When we already have a cached partial
5428      result from a previous iteration, it is best to make use of it
5429      before looking for another __builtin_pow opportunity.
5430 
5431      As an example, consider x * x * y * y * y * z * z * z * z.
5432      We want to first compose the product x * y * z, raise it to the
5433      second power, then multiply this by y * z, and finally multiply
5434      by z.  This can be done in 5 multiplies provided we cache y * z
5435      for use in both expressions:
5436 
5437         t1 = y * z
5438           t2 = t1 * x
5439           t3 = t2 * t2
5440           t4 = t1 * t3
5441           result = t4 * z
5442 
5443      If we instead ignored the cached y * z and first multiplied by
5444      the __builtin_pow opportunity z * z, we would get the inferior:
5445 
5446         t1 = y * z
5447           t2 = t1 * x
5448           t3 = t2 * t2
5449           t4 = z * z
5450           t5 = t3 * t4
5451         result = t5 * y  */
5452 
5453   vec_len = repeat_factor_vec.length ();
5454 
5455   /* Repeatedly look for opportunities to create a builtin_powi call.  */
5456   while (true)
5457     {
5458       HOST_WIDE_INT power;
5459 
5460       /* First look for the largest cached product of factors from
5461            preceding iterations.  If found, create a builtin_powi for
5462            it if the minimum occurrence count for its factors is at
5463            least 2, or just use this cached product as our next
5464            multiplicand if the minimum occurrence count is 1.  */
5465       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5466           {
5467             if (rf1->repr && rf1->count > 0)
5468               break;
5469           }
5470 
5471       if (j < vec_len)
5472           {
5473             power = rf1->count;
5474 
5475             if (power == 1)
5476               {
5477                 iter_result = rf1->repr;
5478 
5479                 if (dump_file && (dump_flags & TDF_DETAILS))
5480                     {
5481                       unsigned elt;
5482                       repeat_factor *rf;
5483                       fputs ("Multiplying by cached product ", dump_file);
5484                       for (elt = j; elt < vec_len; elt++)
5485                         {
5486                           rf = &repeat_factor_vec[elt];
5487                           print_generic_expr (dump_file, rf->factor);
5488                           if (elt < vec_len - 1)
5489                               fputs (" * ", dump_file);
5490                         }
5491                       fputs ("\n", dump_file);
5492                     }
5493               }
5494             else
5495               {
5496                 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5497                 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5498                                                       build_int_cst (integer_type_node,
5499                                                                          power));
5500                 gimple_call_set_lhs (pow_stmt, iter_result);
5501                 gimple_set_location (pow_stmt, gimple_location (stmt));
5502                 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5503                 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5504 
5505                 if (dump_file && (dump_flags & TDF_DETAILS))
5506                     {
5507                       unsigned elt;
5508                       repeat_factor *rf;
5509                       fputs ("Building __builtin_pow call for cached product (",
5510                                dump_file);
5511                       for (elt = j; elt < vec_len; elt++)
5512                         {
5513                           rf = &repeat_factor_vec[elt];
5514                           print_generic_expr (dump_file, rf->factor);
5515                           if (elt < vec_len - 1)
5516                               fputs (" * ", dump_file);
5517                         }
5518                       fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5519                                  power);
5520                     }
5521               }
5522           }
5523       else
5524           {
5525             /* Otherwise, find the first factor in the repeated factor
5526                vector whose occurrence count is at least 2.  If no such
5527                factor exists, there are no builtin_powi opportunities
5528                remaining.  */
5529             FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5530               {
5531                 if (rf1->count >= 2)
5532                     break;
5533               }
5534 
5535             if (j >= vec_len)
5536               break;
5537 
5538             power = rf1->count;
5539 
5540             if (dump_file && (dump_flags & TDF_DETAILS))
5541               {
5542                 unsigned elt;
5543                 repeat_factor *rf;
5544                 fputs ("Building __builtin_pow call for (", dump_file);
5545                 for (elt = j; elt < vec_len; elt++)
5546                     {
5547                       rf = &repeat_factor_vec[elt];
5548                       print_generic_expr (dump_file, rf->factor);
5549                       if (elt < vec_len - 1)
5550                         fputs (" * ", dump_file);
5551                     }
5552                 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5553               }
5554 
5555             reassociate_stats.pows_created++;
5556 
5557             /* Visit each element of the vector in reverse order (so that
5558                high-occurrence elements are visited first, and within the
5559                same occurrence count, lower-ranked elements are visited
5560                first).  Form a linear product of all elements in this order
5561                whose occurrencce count is at least that of element J.
5562                Record the SSA name representing the product of each element
5563                with all subsequent elements in the vector.  */
5564             if (j == vec_len - 1)
5565               rf1->repr = rf1->factor;
5566             else
5567               {
5568                 for (ii = vec_len - 2; ii >= (int)j; ii--)
5569                     {
5570                       tree op1, op2;
5571 
5572                       rf1 = &repeat_factor_vec[ii];
5573                       rf2 = &repeat_factor_vec[ii + 1];
5574 
5575                       /* Init the last factor's representative to be itself.  */
5576                       if (!rf2->repr)
5577                         rf2->repr = rf2->factor;
5578 
5579                       op1 = rf1->factor;
5580                       op2 = rf2->repr;
5581 
5582                       target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5583                       mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5584                                                               op1, op2);
5585                       gimple_set_location (mul_stmt, gimple_location (stmt));
5586                       gimple_set_uid (mul_stmt, gimple_uid (stmt));
5587                       gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5588                       rf1->repr = target_ssa;
5589 
5590                       /* Don't reprocess the multiply we just introduced.  */
5591                       gimple_set_visited (mul_stmt, true);
5592                     }
5593               }
5594 
5595             /* Form a call to __builtin_powi for the maximum product
5596                just formed, raised to the power obtained earlier.  */
5597             rf1 = &repeat_factor_vec[j];
5598             iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5599             pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5600                                                   build_int_cst (integer_type_node,
5601                                                                    power));
5602             gimple_call_set_lhs (pow_stmt, iter_result);
5603             gimple_set_location (pow_stmt, gimple_location (stmt));
5604             gimple_set_uid (pow_stmt, gimple_uid (stmt));
5605             gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5606           }
5607 
5608       /* If we previously formed at least one other builtin_powi call,
5609            form the product of this one and those others.  */
5610       if (result)
5611           {
5612             tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5613             mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5614                                                     result, iter_result);
5615             gimple_set_location (mul_stmt, gimple_location (stmt));
5616             gimple_set_uid (mul_stmt, gimple_uid (stmt));
5617             gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5618             gimple_set_visited (mul_stmt, true);
5619             result = new_result;
5620           }
5621       else
5622           result = iter_result;
5623 
5624       /* Decrement the occurrence count of each element in the product
5625            by the count found above, and remove this many copies of each
5626            factor from OPS.  */
5627       for (i = j; i < vec_len; i++)
5628           {
5629             unsigned k = power;
5630             unsigned n;
5631 
5632             rf1 = &repeat_factor_vec[i];
5633             rf1->count -= power;
5634 
5635             FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5636               {
5637                 if (oe->op == rf1->factor)
5638                     {
5639                       if (oe->count <= k)
5640                         {
5641                           ops->ordered_remove (n);
5642                           k -= oe->count;
5643 
5644                           if (k == 0)
5645                               break;
5646                         }
5647                       else
5648                         {
5649                           oe->count -= k;
5650                           break;
5651                         }
5652                     }
5653               }
5654           }
5655     }
5656 
5657   /* At this point all elements in the repeated factor vector have a
5658      remaining occurrence count of 0 or 1, and those with a count of 1
5659      don't have cached representatives.  Re-sort the ops vector and
5660      clean up.  */
5661   ops->qsort (sort_by_operand_rank);
5662   repeat_factor_vec.release ();
5663 
5664   /* Return the final product computed herein.  Note that there may
5665      still be some elements with single occurrence count left in OPS;
5666      those will be handled by the normal reassociation logic.  */
5667   return result;
5668 }
5669 
5670 /* Attempt to optimize
5671    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5672    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
5673 
5674 static void
attempt_builtin_copysign(vec<operand_entry * > * ops)5675 attempt_builtin_copysign (vec<operand_entry *> *ops)
5676 {
5677   operand_entry *oe;
5678   unsigned int i;
5679   unsigned int length = ops->length ();
5680   tree cst = ops->last ()->op;
5681 
5682   if (length == 1 || TREE_CODE (cst) != REAL_CST)
5683     return;
5684 
5685   FOR_EACH_VEC_ELT (*ops, i, oe)
5686     {
5687       if (TREE_CODE (oe->op) == SSA_NAME
5688             && has_single_use (oe->op))
5689           {
5690             gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5691             if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5692               {
5693                 tree arg0, arg1;
5694                 switch (gimple_call_combined_fn (old_call))
5695                     {
5696                     CASE_CFN_COPYSIGN:
5697                     CASE_CFN_COPYSIGN_FN:
5698                       arg0 = gimple_call_arg (old_call, 0);
5699                       arg1 = gimple_call_arg (old_call, 1);
5700                       /* The first argument of copysign must be a constant,
5701                          otherwise there's nothing to do.  */
5702                       if (TREE_CODE (arg0) == REAL_CST)
5703                         {
5704                           tree type = TREE_TYPE (arg0);
5705                           tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5706                           /* If we couldn't fold to a single constant, skip it.
5707                                That happens e.g. for inexact multiplication when
5708                                -frounding-math.  */
5709                           if (mul == NULL_TREE)
5710                               break;
5711                           /* Instead of adjusting OLD_CALL, let's build a new
5712                                call to not leak the LHS and prevent keeping bogus
5713                                debug statements.  DCE will clean up the old call.  */
5714                           gcall *new_call;
5715                           if (gimple_call_internal_p (old_call))
5716                               new_call = gimple_build_call_internal
5717                                 (IFN_COPYSIGN, 2, mul, arg1);
5718                           else
5719                               new_call = gimple_build_call
5720                                 (gimple_call_fndecl (old_call), 2, mul, arg1);
5721                           tree lhs = make_ssa_name (type);
5722                           gimple_call_set_lhs (new_call, lhs);
5723                           gimple_set_location (new_call,
5724                                                      gimple_location (old_call));
5725                           insert_stmt_after (new_call, old_call);
5726                           /* We've used the constant, get rid of it.  */
5727                           ops->pop ();
5728                           bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5729                           /* Handle the CST1 < 0 case by negating the result.  */
5730                           if (cst1_neg)
5731                               {
5732                                 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5733                                 gimple *negate_stmt
5734                                   = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5735                                 insert_stmt_after (negate_stmt, new_call);
5736                                 oe->op = negrhs;
5737                               }
5738                           else
5739                               oe->op = lhs;
5740                           if (dump_file && (dump_flags & TDF_DETAILS))
5741                               {
5742                                 fprintf (dump_file, "Optimizing copysign: ");
5743                                 print_generic_expr (dump_file, cst);
5744                                 fprintf (dump_file, " * COPYSIGN (");
5745                                 print_generic_expr (dump_file, arg0);
5746                                 fprintf (dump_file, ", ");
5747                                 print_generic_expr (dump_file, arg1);
5748                                 fprintf (dump_file, ") into %sCOPYSIGN (",
5749                                            cst1_neg ? "-" : "");
5750                                 print_generic_expr (dump_file, mul);
5751                                 fprintf (dump_file, ", ");
5752                                 print_generic_expr (dump_file, arg1);
5753                                 fprintf (dump_file, "\n");
5754                               }
5755                           return;
5756                         }
5757                       break;
5758                     default:
5759                       break;
5760                     }
5761               }
5762           }
5763     }
5764 }
5765 
5766 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
5767 
5768 static void
transform_stmt_to_copy(gimple_stmt_iterator * gsi,gimple * stmt,tree new_rhs)5769 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5770 {
5771   tree rhs1;
5772 
5773   if (dump_file && (dump_flags & TDF_DETAILS))
5774     {
5775       fprintf (dump_file, "Transforming ");
5776       print_gimple_stmt (dump_file, stmt, 0);
5777     }
5778 
5779   rhs1 = gimple_assign_rhs1 (stmt);
5780   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5781   update_stmt (stmt);
5782   remove_visited_stmt_chain (rhs1);
5783 
5784   if (dump_file && (dump_flags & TDF_DETAILS))
5785     {
5786       fprintf (dump_file, " into ");
5787       print_gimple_stmt (dump_file, stmt, 0);
5788     }
5789 }
5790 
5791 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
5792 
5793 static void
transform_stmt_to_multiply(gimple_stmt_iterator * gsi,gimple * stmt,tree rhs1,tree rhs2)5794 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5795                                   tree rhs1, tree rhs2)
5796 {
5797   if (dump_file && (dump_flags & TDF_DETAILS))
5798     {
5799       fprintf (dump_file, "Transforming ");
5800       print_gimple_stmt (dump_file, stmt, 0);
5801     }
5802 
5803   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5804   update_stmt (gsi_stmt (*gsi));
5805   remove_visited_stmt_chain (rhs1);
5806 
5807   if (dump_file && (dump_flags & TDF_DETAILS))
5808     {
5809       fprintf (dump_file, " into ");
5810       print_gimple_stmt (dump_file, stmt, 0);
5811     }
5812 }
5813 
5814 /* Reassociate expressions in basic block BB and its post-dominator as
5815    children.
5816 
5817    Bubble up return status from maybe_optimize_range_tests.  */
5818 
5819 static bool
reassociate_bb(basic_block bb)5820 reassociate_bb (basic_block bb)
5821 {
5822   gimple_stmt_iterator gsi;
5823   basic_block son;
5824   gimple *stmt = last_stmt (bb);
5825   bool cfg_cleanup_needed = false;
5826 
5827   if (stmt && !gimple_visited_p (stmt))
5828     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5829 
5830   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5831     {
5832       stmt = gsi_stmt (gsi);
5833 
5834       if (is_gimple_assign (stmt)
5835             && !stmt_could_throw_p (stmt))
5836           {
5837             tree lhs, rhs1, rhs2;
5838             enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5839 
5840             /* If this is not a gimple binary expression, there is
5841                nothing for us to do with it.  */
5842             if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5843               continue;
5844 
5845             /* If this was part of an already processed statement,
5846                we don't need to touch it again. */
5847             if (gimple_visited_p (stmt))
5848               {
5849                 /* This statement might have become dead because of previous
5850                      reassociations.  */
5851                 if (has_zero_uses (gimple_get_lhs (stmt)))
5852                     {
5853                       reassoc_remove_stmt (&gsi);
5854                       release_defs (stmt);
5855                       /* We might end up removing the last stmt above which
5856                          places the iterator to the end of the sequence.
5857                          Reset it to the last stmt in this case which might
5858                          be the end of the sequence as well if we removed
5859                          the last statement of the sequence.  In which case
5860                          we need to bail out.  */
5861                       if (gsi_end_p (gsi))
5862                         {
5863                           gsi = gsi_last_bb (bb);
5864                           if (gsi_end_p (gsi))
5865                               break;
5866                         }
5867                     }
5868                 continue;
5869               }
5870 
5871             lhs = gimple_assign_lhs (stmt);
5872             rhs1 = gimple_assign_rhs1 (stmt);
5873             rhs2 = gimple_assign_rhs2 (stmt);
5874 
5875             /* For non-bit or min/max operations we can't associate
5876                all types.  Verify that here.  */
5877             if (rhs_code != BIT_IOR_EXPR
5878                 && rhs_code != BIT_AND_EXPR
5879                 && rhs_code != BIT_XOR_EXPR
5880                 && rhs_code != MIN_EXPR
5881                 && rhs_code != MAX_EXPR
5882                 && (!can_reassociate_p (lhs)
5883                       || !can_reassociate_p (rhs1)
5884                       || !can_reassociate_p (rhs2)))
5885               continue;
5886 
5887             if (associative_tree_code (rhs_code))
5888               {
5889                 auto_vec<operand_entry *> ops;
5890                 tree powi_result = NULL_TREE;
5891                 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5892 
5893                 /* There may be no immediate uses left by the time we
5894                      get here because we may have eliminated them all.  */
5895                 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5896                     continue;
5897 
5898                 gimple_set_visited (stmt, true);
5899                 linearize_expr_tree (&ops, stmt, true, true);
5900                 ops.qsort (sort_by_operand_rank);
5901                 int orig_len = ops.length ();
5902                 optimize_ops_list (rhs_code, &ops);
5903                 if (undistribute_ops_list (rhs_code, &ops,
5904                                                    loop_containing_stmt (stmt)))
5905                     {
5906                       ops.qsort (sort_by_operand_rank);
5907                       optimize_ops_list (rhs_code, &ops);
5908                     }
5909 
5910                 if (rhs_code == PLUS_EXPR
5911                       && transform_add_to_multiply (&ops))
5912                     ops.qsort (sort_by_operand_rank);
5913 
5914                 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5915                     {
5916                       if (is_vector)
5917                         optimize_vec_cond_expr (rhs_code, &ops);
5918                       else
5919                         optimize_range_tests (rhs_code, &ops, NULL);
5920                   }
5921 
5922                 if (rhs_code == MULT_EXPR && !is_vector)
5923                   {
5924                       attempt_builtin_copysign (&ops);
5925 
5926                       if (reassoc_insert_powi_p
5927                           && flag_unsafe_math_optimizations)
5928                         powi_result = attempt_builtin_powi (stmt, &ops);
5929                     }
5930 
5931                 operand_entry *last;
5932                 bool negate_result = false;
5933                 if (ops.length () > 1
5934                       && rhs_code == MULT_EXPR)
5935                     {
5936                       last = ops.last ();
5937                       if ((integer_minus_onep (last->op)
5938                            || real_minus_onep (last->op))
5939                           && !HONOR_SNANS (TREE_TYPE (lhs))
5940                           && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5941                                 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5942                         {
5943                           ops.pop ();
5944                           negate_result = true;
5945                         }
5946                     }
5947 
5948                 tree new_lhs = lhs;
5949                 /* If the operand vector is now empty, all operands were
5950                      consumed by the __builtin_powi optimization.  */
5951                 if (ops.length () == 0)
5952                     transform_stmt_to_copy (&gsi, stmt, powi_result);
5953                 else if (ops.length () == 1)
5954                     {
5955                       tree last_op = ops.last ()->op;
5956 
5957                       /* If the stmt that defines operand has to be inserted, insert it
5958                          before the use.  */
5959                       if (ops.last ()->stmt_to_insert)
5960                         insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5961                       if (powi_result)
5962                         transform_stmt_to_multiply (&gsi, stmt, last_op,
5963                                                             powi_result);
5964                       else
5965                         transform_stmt_to_copy (&gsi, stmt, last_op);
5966                     }
5967                 else
5968                     {
5969                       machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5970                       int ops_num = ops.length ();
5971                       int width = get_reassociation_width (ops_num, rhs_code, mode);
5972 
5973                       if (dump_file && (dump_flags & TDF_DETAILS))
5974                         fprintf (dump_file,
5975                                    "Width = %d was chosen for reassociation\n", width);
5976 
5977 
5978                       /* For binary bit operations, if there are at least 3
5979                          operands and the last last operand in OPS is a constant,
5980                          move it to the front.  This helps ensure that we generate
5981                          (X & Y) & C rather than (X & C) & Y.  The former will
5982                          often match a canonical bit test when we get to RTL.  */
5983                       if (ops.length () > 2
5984                           && (rhs_code == BIT_AND_EXPR
5985                               || rhs_code == BIT_IOR_EXPR
5986                               || rhs_code == BIT_XOR_EXPR)
5987                           && TREE_CODE (ops.last ()->op) == INTEGER_CST)
5988                         std::swap (*ops[0], *ops[ops_num - 1]);
5989 
5990                       if (width > 1
5991                           && ops.length () > 3)
5992                         rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5993                                                             width, ops);
5994                       else
5995                     {
5996                       /* When there are three operands left, we want
5997                          to make sure the ones that get the double
5998                          binary op are chosen wisely.  */
5999                       int len = ops.length ();
6000                       if (len >= 3)
6001                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
6002 
6003                           new_lhs = rewrite_expr_tree (stmt, 0, ops,
6004                                                                powi_result != NULL
6005                                                                || negate_result,
6006                                                                len != orig_len);
6007                     }
6008 
6009                       /* If we combined some repeated factors into a
6010                          __builtin_powi call, multiply that result by the
6011                          reassociated operands.  */
6012                       if (powi_result)
6013                         {
6014                           gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6015                           tree type = TREE_TYPE (lhs);
6016                           tree target_ssa = make_temp_ssa_name (type, NULL,
6017                                                                           "reassocpow");
6018                           gimple_set_lhs (lhs_stmt, target_ssa);
6019                           update_stmt (lhs_stmt);
6020                           if (lhs != new_lhs)
6021                               {
6022                                 target_ssa = new_lhs;
6023                                 new_lhs = lhs;
6024                               }
6025                           mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6026                                                                   powi_result, target_ssa);
6027                           gimple_set_location (mul_stmt, gimple_location (stmt));
6028                           gimple_set_uid (mul_stmt, gimple_uid (stmt));
6029                           gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6030                         }
6031                     }
6032 
6033                 if (negate_result)
6034                     {
6035                       stmt = SSA_NAME_DEF_STMT (lhs);
6036                       tree tmp = make_ssa_name (TREE_TYPE (lhs));
6037                       gimple_set_lhs (stmt, tmp);
6038                       if (lhs != new_lhs)
6039                         tmp = new_lhs;
6040                       gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6041                                                                          tmp);
6042                       gimple_set_uid (neg_stmt, gimple_uid (stmt));
6043                       gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6044                       update_stmt (stmt);
6045                     }
6046               }
6047           }
6048     }
6049   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6050        son;
6051        son = next_dom_son (CDI_POST_DOMINATORS, son))
6052     cfg_cleanup_needed |= reassociate_bb (son);
6053 
6054   return cfg_cleanup_needed;
6055 }
6056 
6057 /* Add jumps around shifts for range tests turned into bit tests.
6058    For each SSA_NAME VAR we have code like:
6059    VAR = ...; // final stmt of range comparison
6060    // bit test here...;
6061    OTHERVAR = ...; // final stmt of the bit test sequence
6062    RES = VAR | OTHERVAR;
6063    Turn the above into:
6064    VAR = ...;
6065    if (VAR != 0)
6066      goto <l3>;
6067    else
6068      goto <l2>;
6069    <l2>:
6070    // bit test here...;
6071    OTHERVAR = ...;
6072    <l3>:
6073    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
6074 
6075 static void
branch_fixup(void)6076 branch_fixup (void)
6077 {
6078   tree var;
6079   unsigned int i;
6080 
6081   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6082     {
6083       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6084       gimple *use_stmt;
6085       use_operand_p use;
6086       bool ok = single_imm_use (var, &use, &use_stmt);
6087       gcc_assert (ok
6088                       && is_gimple_assign (use_stmt)
6089                       && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6090                       && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6091 
6092       basic_block cond_bb = gimple_bb (def_stmt);
6093       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6094       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6095 
6096       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6097       gimple *g = gimple_build_cond (NE_EXPR, var,
6098                                              build_zero_cst (TREE_TYPE (var)),
6099                                              NULL_TREE, NULL_TREE);
6100       location_t loc = gimple_location (use_stmt);
6101       gimple_set_location (g, loc);
6102       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6103 
6104       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6105       etrue->probability = profile_probability::even ();
6106       edge efalse = find_edge (cond_bb, then_bb);
6107       efalse->flags = EDGE_FALSE_VALUE;
6108       efalse->probability -= etrue->probability;
6109       then_bb->count -= etrue->count ();
6110 
6111       tree othervar = NULL_TREE;
6112       if (gimple_assign_rhs1 (use_stmt) == var)
6113           othervar = gimple_assign_rhs2 (use_stmt);
6114       else if (gimple_assign_rhs2 (use_stmt) == var)
6115           othervar = gimple_assign_rhs1 (use_stmt);
6116       else
6117           gcc_unreachable ();
6118       tree lhs = gimple_assign_lhs (use_stmt);
6119       gphi *phi = create_phi_node (lhs, merge_bb);
6120       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6121       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6122       gsi = gsi_for_stmt (use_stmt);
6123       gsi_remove (&gsi, true);
6124 
6125       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6126       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6127     }
6128   reassoc_branch_fixups.release ();
6129 }
6130 
6131 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6132 void debug_ops_vector (vec<operand_entry *> ops);
6133 
6134 /* Dump the operand entry vector OPS to FILE.  */
6135 
6136 void
dump_ops_vector(FILE * file,vec<operand_entry * > ops)6137 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6138 {
6139   operand_entry *oe;
6140   unsigned int i;
6141 
6142   FOR_EACH_VEC_ELT (ops, i, oe)
6143     {
6144       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6145       print_generic_expr (file, oe->op);
6146       fprintf (file, "\n");
6147     }
6148 }
6149 
6150 /* Dump the operand entry vector OPS to STDERR.  */
6151 
6152 DEBUG_FUNCTION void
debug_ops_vector(vec<operand_entry * > ops)6153 debug_ops_vector (vec<operand_entry *> ops)
6154 {
6155   dump_ops_vector (stderr, ops);
6156 }
6157 
6158 /* Bubble up return status from reassociate_bb.  */
6159 
6160 static bool
do_reassoc(void)6161 do_reassoc (void)
6162 {
6163   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6164   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6165 }
6166 
6167 /* Initialize the reassociation pass.  */
6168 
6169 static void
init_reassoc(void)6170 init_reassoc (void)
6171 {
6172   int i;
6173   long rank = 2;
6174   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6175 
6176   /* Find the loops, so that we can prevent moving calculations in
6177      them.  */
6178   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6179 
6180   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6181 
6182   next_operand_entry_id = 0;
6183 
6184   /* Reverse RPO (Reverse Post Order) will give us something where
6185      deeper loops come later.  */
6186   pre_and_rev_post_order_compute (NULL, bbs, false);
6187   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6188   operand_rank = new hash_map<tree, long>;
6189 
6190   /* Give each default definition a distinct rank.  This includes
6191      parameters and the static chain.  Walk backwards over all
6192      SSA names so that we get proper rank ordering according
6193      to tree_swap_operands_p.  */
6194   for (i = num_ssa_names - 1; i > 0; --i)
6195     {
6196       tree name = ssa_name (i);
6197       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6198           insert_operand_rank (name, ++rank);
6199     }
6200 
6201   /* Set up rank for each BB  */
6202   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6203     bb_rank[bbs[i]] = ++rank << 16;
6204 
6205   free (bbs);
6206   calculate_dominance_info (CDI_POST_DOMINATORS);
6207   plus_negates = vNULL;
6208 }
6209 
6210 /* Cleanup after the reassociation pass, and print stats if
6211    requested.  */
6212 
6213 static void
fini_reassoc(void)6214 fini_reassoc (void)
6215 {
6216   statistics_counter_event (cfun, "Linearized",
6217                                   reassociate_stats.linearized);
6218   statistics_counter_event (cfun, "Constants eliminated",
6219                                   reassociate_stats.constants_eliminated);
6220   statistics_counter_event (cfun, "Ops eliminated",
6221                                   reassociate_stats.ops_eliminated);
6222   statistics_counter_event (cfun, "Statements rewritten",
6223                                   reassociate_stats.rewritten);
6224   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6225                                   reassociate_stats.pows_encountered);
6226   statistics_counter_event (cfun, "Built-in powi calls created",
6227                                   reassociate_stats.pows_created);
6228 
6229   delete operand_rank;
6230   operand_entry_pool.release ();
6231   free (bb_rank);
6232   plus_negates.release ();
6233   free_dominance_info (CDI_POST_DOMINATORS);
6234   loop_optimizer_finalize ();
6235 }
6236 
6237 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
6238    insertion of __builtin_powi calls.
6239 
6240    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6241    optimization of a gimple conditional.  Otherwise returns zero.  */
6242 
6243 static unsigned int
execute_reassoc(bool insert_powi_p)6244 execute_reassoc (bool insert_powi_p)
6245 {
6246   reassoc_insert_powi_p = insert_powi_p;
6247 
6248   init_reassoc ();
6249 
6250   bool cfg_cleanup_needed;
6251   cfg_cleanup_needed = do_reassoc ();
6252   repropagate_negates ();
6253   branch_fixup ();
6254 
6255   fini_reassoc ();
6256   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6257 }
6258 
6259 namespace {
6260 
6261 const pass_data pass_data_reassoc =
6262 {
6263   GIMPLE_PASS, /* type */
6264   "reassoc", /* name */
6265   OPTGROUP_NONE, /* optinfo_flags */
6266   TV_TREE_REASSOC, /* tv_id */
6267   ( PROP_cfg | PROP_ssa ), /* properties_required */
6268   0, /* properties_provided */
6269   0, /* properties_destroyed */
6270   0, /* todo_flags_start */
6271   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6272 };
6273 
6274 class pass_reassoc : public gimple_opt_pass
6275 {
6276 public:
pass_reassoc(gcc::context * ctxt)6277   pass_reassoc (gcc::context *ctxt)
6278     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6279   {}
6280 
6281   /* opt_pass methods: */
clone()6282   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
set_pass_param(unsigned int n,bool param)6283   void set_pass_param (unsigned int n, bool param)
6284     {
6285       gcc_assert (n == 0);
6286       insert_powi_p = param;
6287     }
gate(function *)6288   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
execute(function *)6289   virtual unsigned int execute (function *)
6290     { return execute_reassoc (insert_powi_p); }
6291 
6292  private:
6293   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
6294      point 3a in the pass header comment.  */
6295   bool insert_powi_p;
6296 }; // class pass_reassoc
6297 
6298 } // anon namespace
6299 
6300 gimple_opt_pass *
make_pass_reassoc(gcc::context * ctxt)6301 make_pass_reassoc (gcc::context *ctxt)
6302 {
6303   return new pass_reassoc (ctxt);
6304 }
6305